diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py index 35d4ec6..e118ffa 100644 --- a/cuburn/code/iter.py +++ b/cuburn/code/iter.py @@ -154,7 +154,7 @@ def silly(features, cps): sampAt = [int(i/15.*(nsteps-1)) for i in range(16)] for n in range(nsteps): - flam3_interpolate(cps_as_array, 2, float(n)/nsteps/5 - 0.1, 0, byref(cp)) + flam3_interpolate(cps_as_array, 2, float(n)/nsteps - 0.5, 0, byref(cp)) cp._init() if n in sampAt: pidx = sampAt.index(n) @@ -194,3 +194,48 @@ def silly(features, cps): dbuf = cuda.from_device_like(dbufd, dbuf) return abuf, dbuf + +# TODO: find a better place to stick this code +class MemBench(HunkOCode): + decls = """ +__shared__ uint32_t coord[512]; +""" + + defs_tmpl = tempita.Template(""" +__global__ +void iter{{W}}(mwc_st *mwcs, uint32_t *buf) { + mwc_st rctx = mwcs[gtid()]; + + int mask = (1 << {{W}}) - 1; + int smoff = threadIdx.x >> {{W}}; + int writer = (threadIdx.x & mask) == 0; + + for (int i = 0; i < 1024 * 32; i++) { + if (writer) + coord[smoff] = mwc_next(&rctx) & 0x7ffffff; // 512MB / 4 bytes + __syncthreads(); + uint32_t *dst = buf + (coord[smoff] + (threadIdx.x & mask)); + uint32_t val = mwc_next(&rctx); + asm("st.global.u32 [%0], %1;" :: "l"(dst), "r"(val)); + } +} +""") + + @property + def defs(self): + return '\n'.join([self.defs_tmpl.substitute(W=w) for w in range(8)]) + +def membench(): + code = assemble_code(BaseCode, mwc.MWC, MemBench()) + mod = SourceModule(code) + + buf = cuda.mem_alloc(512 << 20) + seeds = mwc.MWC.make_seeds(512 * 21) + + for w in range(8): + fun = mod.get_function('iter%d' % w) + print 'Launching with W=%d' % w + t = fun(cuda.In(seeds), buf, + block=(512, 1, 1), grid=(21, 1), time_kernel=True) + print 'Completed in %g' % t + diff --git a/main.py b/main.py index 237c557..1fe9149 100644 --- a/main.py +++ b/main.py @@ -11,6 +11,7 @@ import os import sys +os.environ['PATH'] = '/usr/x86_64-pc-linux-gnu/gcc-bin/4.4.5:' + os.environ['PATH'] from pprint import pprint from ctypes import * @@ -27,9 +28,13 @@ import pycuda.gl.autoinit from cuburn.render import * from cuburn.code.mwc import MWCTest -from cuburn.code.iter import silly +from cuburn.code.iter import silly, membench + def main(args): + membench() + return + #MWCTest.test_mwc() with open(args[-1]) as fp: genomes = Genome.from_string(fp.read())