From 1302f31ec76c5bdd9d68b7a30a9be240d09f24f6 Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Fri, 29 Apr 2011 17:25:51 -0400 Subject: [PATCH] "Crappy whatever I hate it" edition of Sierpinski triangle --- cuburn/code/iter.py | 105 ++++++++++++++++++++++++++++++++++++++++++++ cuburn/code/mwc.py | 13 +++++- cuburn/render.py | 4 -- main.py | 47 ++++++++++---------- 4 files changed, 140 insertions(+), 29 deletions(-) create mode 100644 cuburn/code/iter.py diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py new file mode 100644 index 0000000..4eabf38 --- /dev/null +++ b/cuburn/code/iter.py @@ -0,0 +1,105 @@ +""" +The main iteration loop. +""" + +import pycuda.driver as cuda +from pycuda.driver import In, Out, InOut +from pycuda.compiler import SourceModule +import numpy as np + +from cuburn import code +from cuburn.code import mwc + +src = r""" +#define FUSE 20 +#define MAXOOB 10 + +typedef struct { + // Number of iterations to perform, *per thread*. + uint32_t niters; + + // Number of accumulators per row and column in the accum buffer + uint32_t accwidth, accheight; +} iter_info; + +__global__ +void silly(mwc_st *msts, iter_info *infos, float *accbuf, float *denbuf) { + mwc_st rctx = msts[gtid()]; + iter_info *info = &(infos[blockIdx.x]); + + float consec_bad = -FUSE; + float nsamps = info->niters; + + float x, y, color; + x = mwc_next_11(&rctx); + y = mwc_next_11(&rctx); + color = mwc_next_01(&rctx); + + while (nsamps > 0.0f) { + float xfsel = mwc_next_01(&rctx); + + x *= 0.5f; + y *= 0.5f; + color *= 0.5f; + if (xfsel < 0.33f) { + color += 0.25f; + x += 0.5f; + } else if (xfsel < 0.66f) { + color += 0.5f; + y += 0.5f; + } + + if (consec_bad < 0.0f) { + consec_bad++; + continue; + } + + if (x <= -1.0f || x >= 1.0f || y <= -1.0f || y >= 1.0f + || consec_bad < 0.0f) { + + consec_bad++; + if (consec_bad > MAXOOB) { + x = mwc_next_11(&rctx); + y = mwc_next_11(&rctx); + color = mwc_next_01(&rctx); + consec_bad = -FUSE; + } + continue; + } + + // TODO: dither? + int i = ((int)((y + 1.0f) * 256.0f) * 512) + + (int)((x + 1.0f) * 256.0f); + accbuf[i*4] += color < 0.5f ? (1.0f - 2.0f * color) : 0.0f; + accbuf[i*4+1] += 1.0f - 2.0f * fabsf(0.5f - color); + accbuf[i*4+2] += color > 0.5f ? color * 2.0f - 1.0f : 0.0f; + accbuf[i*4+3] += 1.0f; + + denbuf[i] += 1.0f; + + nsamps--; + } +} +""" + +def silly(): + mod = SourceModule(code.base + mwc.src + src) + abuf = np.zeros((512, 512, 4), dtype=np.float32) + dbuf = np.zeros((512, 512), dtype=np.float32) + seeds = mwc.build_mwc_seeds(512 * 24, seed=5) + + info = np.zeros(3, dtype=np.uint32) + info[0] = 5000 + info[1] = 512 + info[2] = 512 + info = np.repeat([info], 24, axis=0) + + fun = mod.get_function("silly") + fun(InOut(seeds), In(info), InOut(abuf), InOut(dbuf), + block=(512,1,1), grid=(24,1), time_kernel=True) + + print abuf + print dbuf + print sum(dbuf) + return abuf, dbuf + diff --git a/cuburn/code/mwc.py b/cuburn/code/mwc.py index 4665967..216682c 100644 --- a/cuburn/code/mwc.py +++ b/cuburn/code/mwc.py @@ -20,13 +20,22 @@ typedef struct { } mwc_st; __device__ uint32_t mwc_next(mwc_st *st) { - asm(".reg .u64 val;\n\t" + asm("{\n\t.reg .u64 val;\n\t" "cvt.u64.u32 val, %0;\n\t" "mad.wide.u32 val, %1, %2, val;\n\t" - "mov.b64 {%1, %0}, val;\n\t" + "mov.b64 {%1, %0}, val;\n\t}\n\t" : "=r"(st->carry), "=r"(st->state) : "r"(st->mul)); return st->state; } + +__device__ float mwc_next_01(mwc_st *st) { + return mwc_next(st) * (1.0f / 4294967296.0f); +} + +__device__ float mwc_next_11(mwc_st *st) { + return ((int32_t) mwc_next(st)) * (1.0f / 2147483648.0f); +} + """ testsrc = code.base + src + """ diff --git a/cuburn/render.py b/cuburn/render.py index 0767110..b56b78a 100644 --- a/cuburn/render.py +++ b/cuburn/render.py @@ -85,10 +85,6 @@ class Frame(object): center = self.center_cp ncps = center.nbatches * center.ntemporal_samples - if ncps < ctx.nctas: - raise NotImplementedError( - "Distribution of a CP across multiple CTAs not yet done") - # TODO: isn't this leaking xforms from C all over the place? stream = StringIO() cp_list = [] diff --git a/main.py b/main.py index 496af8e..8867882 100644 --- a/main.py +++ b/main.py @@ -25,44 +25,45 @@ import pycuda.autoinit from cuburn.render import * from cuburn.code.mwc import test_mwc +from cuburn.code.iter import silly def main(args): - test_mwc() - return with open(args[-1]) as fp: - genomes, ngenomes = Genome.from_string(fp.read()) - anim = Animation(genomes, ngenomes) - anim.compile() - bins = anim.render_frame() - w, h = anim.features.hist_width, anim.features.hist_height - bins = bins[:,:w] - alpha = bins[...,3] - k2ish = (256./(np.mean(alpha)+1e-9)) - lses = 20 * np.log2(1.0 + alpha * k2ish) / (alpha+1e-6) - bins *= lses.reshape(h,w,1).repeat(4,2) - bins = np.minimum(bins, 255) - bins = bins.astype(np.uint8) + genomes = Genome.from_string(fp.read()) + anim = Animation(genomes) - if '-g' not in args: - return + accum, den = silly() + + if False: + bins = anim.render_frame() + w, h = anim.features.hist_width, anim.features.hist_height + bins = bins[:,:w] + alpha = bins[...,3] + k2ish = (256./(np.mean(alpha)+1e-9)) + lses = 20 * np.log2(1.0 + alpha * k2ish) / (alpha+1e-6) + bins *= lses.reshape(h,w,1).repeat(4,2) + bins = np.minimum(bins, 255) + bins = bins.astype(np.uint8) + + #if '-g' not in args: + # return + + imgbuf = (accum * 255).astype(np.uint8) window = pyglet.window.Window(1600, 900) - image = pyglet.image.ImageData(anim.features.hist_width, - anim.features.hist_height, - 'RGBA', - bins.tostring()) + image = pyglet.image.ImageData(512, 512, 'RGBA', imgbuf.tostring()) #-anim.features.hist_stride*4) tex = image.texture - pal = (anim.ctx.ptx.instances[PaletteLookup].pal * 255.).astype(np.uint8) - image2 = pyglet.image.ImageData(256, 16, 'RGBA', pal.tostring()) + #pal = (anim.ctx.ptx.instances[PaletteLookup].pal * 255.).astype(np.uint8) + #image2 = pyglet.image.ImageData(256, 16, 'RGBA', pal.tostring()) @window.event def on_draw(): window.clear() tex.blit(0, 0) - image2.blit(0, 0) + #image2.blit(0, 0) @window.event def on_key_press(sym, mod):