mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Some amount of dynamic rendering
This commit is contained in:
parent
1302f31ec7
commit
088299423e
@ -1,19 +1,9 @@
|
|||||||
"""
|
"""
|
||||||
Contains the PTX fragments which will drive the device.
|
Contains the PTX fragments which will drive the device, and helper functions
|
||||||
|
to combine those fragments.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Basic headers, utility functions, and so on
|
import util
|
||||||
base = """
|
import mwc
|
||||||
#include<cuda.h>
|
import iter
|
||||||
#include<stdint.h>
|
|
||||||
|
|
||||||
// TODO: use launch parameter preconfig to eliminate unnecessary parts
|
|
||||||
__device__
|
|
||||||
uint32_t gtid() {
|
|
||||||
return threadIdx.x + blockDim.x *
|
|
||||||
(threadIdx.y + blockDim.y *
|
|
||||||
(threadIdx.z + blockDim.z *
|
|
||||||
(blockIdx.x + (gridDim.x * blockIdx.y))));
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
@ -7,62 +7,86 @@ from pycuda.driver import In, Out, InOut
|
|||||||
from pycuda.compiler import SourceModule
|
from pycuda.compiler import SourceModule
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from cuburn import code
|
|
||||||
from cuburn.code import mwc
|
from cuburn.code import mwc
|
||||||
|
from cuburn.code.util import *
|
||||||
|
|
||||||
src = r"""
|
import tempita
|
||||||
#define FUSE 20
|
|
||||||
#define MAXOOB 10
|
|
||||||
|
|
||||||
typedef struct {
|
class IterCode(HunkOCode):
|
||||||
// Number of iterations to perform, *per thread*.
|
def __init__(self, features):
|
||||||
uint32_t niters;
|
self.features = features
|
||||||
|
self.packer = DataPacker('iter_info')
|
||||||
|
iterbody = self._iterbody()
|
||||||
|
bodies = [self._xfbody(i,x) for i,x in enumerate(self.features.xforms)]
|
||||||
|
bodies.append(iterbody)
|
||||||
|
self.defs = '\n'.join(bodies)
|
||||||
|
|
||||||
// Number of accumulators per row and column in the accum buffer
|
def _xfbody(self, xfid, xform):
|
||||||
uint32_t accwidth, accheight;
|
px = self.packer.view('info', 'xf%d_' % xfid)
|
||||||
} iter_info;
|
px.sub('xf', 'cp.xforms[%d]' % xfid)
|
||||||
|
|
||||||
|
tmpl = tempita.Template("""
|
||||||
|
__device__
|
||||||
|
void apply_xf{{xfid}}(float *ix, float *iy, float *icolor,
|
||||||
|
const iter_info *info) {
|
||||||
|
float tx, ty, ox = *ix, oy = *iy;
|
||||||
|
{{apply_affine('ox', 'oy', 'tx', 'ty', px, 'xf.c', 'pre')}}
|
||||||
|
|
||||||
|
// tiny little TODO: variations
|
||||||
|
|
||||||
|
*ix = tx;
|
||||||
|
*iy = ty;
|
||||||
|
|
||||||
|
float csp = {{px.get('xf.color_speed')}};
|
||||||
|
*icolor = *icolor * (1.0f - csp) + {{px.get('xf.color')}} * csp;
|
||||||
|
};
|
||||||
|
""")
|
||||||
|
g = dict(globals())
|
||||||
|
g.update(locals())
|
||||||
|
return tmpl.substitute(g)
|
||||||
|
|
||||||
|
def _iterbody(self):
|
||||||
|
tmpl = tempita.Template("""
|
||||||
__global__
|
__global__
|
||||||
void silly(mwc_st *msts, iter_info *infos, float *accbuf, float *denbuf) {
|
void iter(mwc_st *msts, const iter_info *infos, float *accbuf, float *denbuf) {
|
||||||
mwc_st rctx = msts[gtid()];
|
mwc_st rctx = msts[gtid()];
|
||||||
iter_info *info = &(infos[blockIdx.x]);
|
const iter_info *info = &(infos[blockIdx.x]);
|
||||||
|
|
||||||
float consec_bad = -FUSE;
|
int consec_bad = -{{features.fuse}};
|
||||||
float nsamps = info->niters;
|
int nsamps = 500;
|
||||||
|
|
||||||
float x, y, color;
|
float x, y, color;
|
||||||
x = mwc_next_11(&rctx);
|
x = mwc_next_11(&rctx);
|
||||||
y = mwc_next_11(&rctx);
|
y = mwc_next_11(&rctx);
|
||||||
color = mwc_next_01(&rctx);
|
color = mwc_next_01(&rctx);
|
||||||
|
|
||||||
while (nsamps > 0.0f) {
|
while (nsamps > 0) {
|
||||||
float xfsel = mwc_next_01(&rctx);
|
float xfsel = mwc_next_01(&rctx);
|
||||||
|
|
||||||
x *= 0.5f;
|
{{for xfid, xform in enumerate(features.xforms)}}
|
||||||
y *= 0.5f;
|
if (xfsel < {{packer.get('cp.norm_density[%d]' % xfid)}}) {
|
||||||
color *= 0.5f;
|
apply_xf{{xfid}}(&x, &y, &color, info);
|
||||||
if (xfsel < 0.33f) {
|
} else
|
||||||
color += 0.25f;
|
{{endfor}}
|
||||||
x += 0.5f;
|
{
|
||||||
} else if (xfsel < 0.66f) {
|
denbuf[0] = xfsel;
|
||||||
color += 0.5f;
|
break; // TODO: fail here
|
||||||
y += 0.5f;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (consec_bad < 0.0f) {
|
if (consec_bad < 0) {
|
||||||
consec_bad++;
|
consec_bad++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (x <= -1.0f || x >= 1.0f || y <= -1.0f || y >= 1.0f
|
if (x <= -1.0f || x >= 1.0f || y <= -1.0f || y >= 1.0f
|
||||||
|| consec_bad < 0.0f) {
|
|| consec_bad < 0) {
|
||||||
|
|
||||||
consec_bad++;
|
consec_bad++;
|
||||||
if (consec_bad > MAXOOB) {
|
if (consec_bad > {{features.max_oob}}) {
|
||||||
x = mwc_next_11(&rctx);
|
x = mwc_next_11(&rctx);
|
||||||
y = mwc_next_11(&rctx);
|
y = mwc_next_11(&rctx);
|
||||||
color = mwc_next_01(&rctx);
|
color = mwc_next_01(&rctx);
|
||||||
consec_bad = -FUSE;
|
consec_bad = -{{features.fuse}};
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -80,26 +104,28 @@ void silly(mwc_st *msts, iter_info *infos, float *accbuf, float *denbuf) {
|
|||||||
nsamps--;
|
nsamps--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
""")
|
||||||
|
return tmpl.substitute(
|
||||||
|
features = self.features,
|
||||||
|
packer = self.packer.view('info'))
|
||||||
|
|
||||||
def silly():
|
|
||||||
mod = SourceModule(code.base + mwc.src + src)
|
def silly(features, cp):
|
||||||
abuf = np.zeros((512, 512, 4), dtype=np.float32)
|
abuf = np.zeros((512, 512, 4), dtype=np.float32)
|
||||||
dbuf = np.zeros((512, 512), dtype=np.float32)
|
dbuf = np.zeros((512, 512), dtype=np.float32)
|
||||||
seeds = mwc.build_mwc_seeds(512 * 24, seed=5)
|
seeds = mwc.MWC.make_seeds(512 * 24)
|
||||||
|
|
||||||
info = np.zeros(3, dtype=np.uint32)
|
iter = IterCode(features)
|
||||||
info[0] = 5000
|
code = assemble_code(BaseCode, mwc.MWC, iter, iter.packer)
|
||||||
info[1] = 512
|
print code
|
||||||
info[2] = 512
|
mod = SourceModule(code)
|
||||||
info = np.repeat([info], 24, axis=0)
|
|
||||||
|
|
||||||
fun = mod.get_function("silly")
|
info = iter.packer.pack(cp=cp)
|
||||||
|
print info
|
||||||
|
|
||||||
|
fun = mod.get_function("iter")
|
||||||
fun(InOut(seeds), In(info), InOut(abuf), InOut(dbuf),
|
fun(InOut(seeds), In(info), InOut(abuf), InOut(dbuf),
|
||||||
block=(512,1,1), grid=(24,1), time_kernel=True)
|
block=(512,1,1), grid=(1,1), time_kernel=True)
|
||||||
|
|
||||||
print abuf
|
|
||||||
print dbuf
|
|
||||||
print sum(dbuf)
|
|
||||||
return abuf, dbuf
|
return abuf, dbuf
|
||||||
|
|
||||||
|
@ -2,23 +2,21 @@
|
|||||||
The multiply-with-carry random number generator.
|
The multiply-with-carry random number generator.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import time
|
|
||||||
|
|
||||||
import pycuda.driver as cuda
|
|
||||||
from pycuda.compiler import SourceModule
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import tempita
|
||||||
|
|
||||||
from jinja2 import Template
|
from cuburn.code.util import *
|
||||||
|
|
||||||
from cuburn import code
|
class MWC(HunkOCode):
|
||||||
|
decls = """
|
||||||
src = r"""
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint32_t mul;
|
uint32_t mul;
|
||||||
uint32_t state;
|
uint32_t state;
|
||||||
uint32_t carry;
|
uint32_t carry;
|
||||||
} mwc_st;
|
} mwc_st;
|
||||||
|
"""
|
||||||
|
|
||||||
|
defs = r"""
|
||||||
__device__ uint32_t mwc_next(mwc_st *st) {
|
__device__ uint32_t mwc_next(mwc_st *st) {
|
||||||
asm("{\n\t.reg .u64 val;\n\t"
|
asm("{\n\t.reg .u64 val;\n\t"
|
||||||
"cvt.u64.u32 val, %0;\n\t"
|
"cvt.u64.u32 val, %0;\n\t"
|
||||||
@ -35,22 +33,12 @@ __device__ float mwc_next_01(mwc_st *st) {
|
|||||||
__device__ float mwc_next_11(mwc_st *st) {
|
__device__ float mwc_next_11(mwc_st *st) {
|
||||||
return ((int32_t) mwc_next(st)) * (1.0f / 2147483648.0f);
|
return ((int32_t) mwc_next(st)) * (1.0f / 2147483648.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
testsrc = code.base + src + """
|
@staticmethod
|
||||||
__global__ void test_mwc(mwc_st *msts, uint64_t *sums, float nrounds) {
|
def make_seeds(nthreads, host_seed=None):
|
||||||
mwc_st rctx = msts[gtid()];
|
if host_seed:
|
||||||
uint64_t sum = 0;
|
rand = np.random.RandomState(host_seed)
|
||||||
for (float i = 0; i < nrounds; i++) sum += mwc_next(&rctx);
|
|
||||||
sums[gtid()] = sum;
|
|
||||||
msts[gtid()] = rctx;
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
def build_mwc_seeds(nthreads, seed=None):
|
|
||||||
if seed:
|
|
||||||
rand = np.random.RandomState(seed)
|
|
||||||
else:
|
else:
|
||||||
rand = np.random
|
rand = np.random
|
||||||
|
|
||||||
@ -75,15 +63,28 @@ def build_mwc_seeds(nthreads, seed=None):
|
|||||||
|
|
||||||
return seeds
|
return seeds
|
||||||
|
|
||||||
def test_mwc():
|
class MWCTest(HunkOCode):
|
||||||
rounds = 5000
|
defs = """
|
||||||
nblocks = 64
|
__global__ void test_mwc(mwc_st *msts, uint64_t *sums, float nrounds) {
|
||||||
nthreads = 512 * nblocks
|
mwc_st rctx = msts[gtid()];
|
||||||
|
uint64_t sum = 0;
|
||||||
|
for (float i = 0; i < nrounds; i++) sum += mwc_next(&rctx);
|
||||||
|
sums[gtid()] = sum;
|
||||||
|
msts[gtid()] = rctx;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
seeds = build_mwc_seeds(nthreads, seed = 5)
|
@classmethod
|
||||||
|
def test_mwc(cls, rounds=5000, nblocks=64, blockwidth=512):
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
from pycuda.compiler import SourceModule
|
||||||
|
import time
|
||||||
|
|
||||||
|
nthreads = blockwidth * nblocks
|
||||||
|
seeds = MWC.make_seeds(nthreads, host_seed = 5)
|
||||||
dseeds = cuda.to_device(seeds)
|
dseeds = cuda.to_device(seeds)
|
||||||
|
|
||||||
mod = SourceModule(testsrc)
|
mod = SourceModule(assemble_code(BaseCode, MWC, cls))
|
||||||
|
|
||||||
for trial in range(2):
|
for trial in range(2):
|
||||||
print "Trial %d, on CPU: " % trial,
|
print "Trial %d, on CPU: " % trial,
|
||||||
@ -106,7 +107,8 @@ def test_mwc():
|
|||||||
dsums = cuda.mem_alloc(8*nthreads)
|
dsums = cuda.mem_alloc(8*nthreads)
|
||||||
fun = mod.get_function("test_mwc")
|
fun = mod.get_function("test_mwc")
|
||||||
dtime = fun(dseeds, dsums, np.float32(rounds),
|
dtime = fun(dseeds, dsums, np.float32(rounds),
|
||||||
block=(512,1,1), grid=(nblocks,1), time_kernel=True)
|
block=(blockwidth,1,1), grid=(nblocks,1),
|
||||||
|
time_kernel=True)
|
||||||
print "Took %g seconds." % dtime
|
print "Took %g seconds." % dtime
|
||||||
dsums = cuda.from_device(dsums, nthreads, np.uint64)
|
dsums = cuda.from_device(dsums, nthreads, np.uint64)
|
||||||
if not np.all(np.equal(sums, dsums)):
|
if not np.all(np.equal(sums, dsums)):
|
||||||
|
@ -14,7 +14,17 @@ from cuburn.variations import Variations
|
|||||||
Point = lambda x, y: np.array([x, y], dtype=np.double)
|
Point = lambda x, y: np.array([x, y], dtype=np.double)
|
||||||
|
|
||||||
class Genome(pyflam3.Genome):
|
class Genome(pyflam3.Genome):
|
||||||
pass
|
@classmethod
|
||||||
|
def from_string(cls, *args, **kwargs):
|
||||||
|
gnms = super(Genome, cls).from_string(*args, **kwargs)
|
||||||
|
for g in gnms: g._init()
|
||||||
|
return gnms
|
||||||
|
|
||||||
|
def _init(self):
|
||||||
|
self.xforms = [self.xform[i] for i in range(self.num_xforms)]
|
||||||
|
dens = np.array([x.density for x in self.xforms])
|
||||||
|
dens /= np.sum(dens)
|
||||||
|
self.norm_density = [np.sum(dens[:i+1]) for i in range(len(dens))]
|
||||||
|
|
||||||
class XForm(object):
|
class XForm(object):
|
||||||
"""
|
"""
|
||||||
@ -99,7 +109,7 @@ class Frame(object):
|
|||||||
cp.camera = Camera(self._frame, cp, filters)
|
cp.camera = Camera(self._frame, cp, filters)
|
||||||
cp.nsamples = (cp.camera.sample_density *
|
cp.nsamples = (cp.camera.sample_density *
|
||||||
center.width * center.height) / ncps
|
center.width * center.height) / ncps
|
||||||
cp.xforms = XForm.parse(cp)
|
|
||||||
|
|
||||||
print "Expected writes:", (
|
print "Expected writes:", (
|
||||||
cp.camera.sample_density * center.width * center.height)
|
cp.camera.sample_density * center.width * center.height)
|
||||||
@ -190,9 +200,10 @@ class Features(object):
|
|||||||
"""
|
"""
|
||||||
# Constant parameters which control handling of out-of-frame samples:
|
# Constant parameters which control handling of out-of-frame samples:
|
||||||
# Number of iterations to iterate without write after new point
|
# Number of iterations to iterate without write after new point
|
||||||
fuse = 2
|
fuse = 20
|
||||||
# Maximum consecutive out-of-frame points before picking new point
|
# Maximum consecutive out-of-bounds points before picking new point
|
||||||
max_bad = 3
|
max_oob = 10
|
||||||
|
max_nxforms = 12
|
||||||
|
|
||||||
# Height of the texture pallete which gets uploaded to the GPU (assuming
|
# Height of the texture pallete which gets uploaded to the GPU (assuming
|
||||||
# that palette-from-texture is enabled). For most genomes, this doesn't
|
# that palette-from-texture is enabled). For most genomes, this doesn't
|
||||||
@ -205,7 +216,6 @@ class Features(object):
|
|||||||
any = lambda l: bool(filter(None, map(l, genomes)))
|
any = lambda l: bool(filter(None, map(l, genomes)))
|
||||||
self.max_ntemporal_samples = max(
|
self.max_ntemporal_samples = max(
|
||||||
[cp.nbatches * cp.ntemporal_samples for cp in genomes])
|
[cp.nbatches * cp.ntemporal_samples for cp in genomes])
|
||||||
self.camera_rotation = any(lambda cp: cp.rotate)
|
|
||||||
self.non_box_temporal_filter = genomes[0].temporal_filter_type
|
self.non_box_temporal_filter = genomes[0].temporal_filter_type
|
||||||
self.palette_mode = genomes[0].palette_mode and "linear" or "nearest"
|
self.palette_mode = genomes[0].palette_mode and "linear" or "nearest"
|
||||||
|
|
||||||
@ -214,6 +224,7 @@ class Features(object):
|
|||||||
"number of xforms! (try running through flam3-genome first)")
|
"number of xforms! (try running through flam3-genome first)")
|
||||||
self.xforms = [XFormFeatures([x[i] for x in xforms], i)
|
self.xforms = [XFormFeatures([x[i] for x in xforms], i)
|
||||||
for i in range(len(xforms[0]))]
|
for i in range(len(xforms[0]))]
|
||||||
|
self.nxforms = len(self.xforms)
|
||||||
if any(lambda cp: cp.final_xform_enable):
|
if any(lambda cp: cp.final_xform_enable):
|
||||||
raise NotImplementedError("Final xform")
|
raise NotImplementedError("Final xform")
|
||||||
|
|
||||||
|
7
main.py
7
main.py
@ -24,16 +24,15 @@ import pyglet
|
|||||||
import pycuda.autoinit
|
import pycuda.autoinit
|
||||||
|
|
||||||
from cuburn.render import *
|
from cuburn.render import *
|
||||||
from cuburn.code.mwc import test_mwc
|
from cuburn.code.mwc import MWCTest
|
||||||
from cuburn.code.iter import silly
|
from cuburn.code.iter import silly
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
|
#MWCTest.test_mwc()
|
||||||
with open(args[-1]) as fp:
|
with open(args[-1]) as fp:
|
||||||
genomes = Genome.from_string(fp.read())
|
genomes = Genome.from_string(fp.read())
|
||||||
anim = Animation(genomes)
|
anim = Animation(genomes)
|
||||||
|
accum, den = silly(anim.features, genomes[0])
|
||||||
accum, den = silly()
|
|
||||||
|
|
||||||
if False:
|
if False:
|
||||||
bins = anim.render_frame()
|
bins = anim.render_frame()
|
||||||
|
Loading…
Reference in New Issue
Block a user