A memory benchmark (temporary)

2025-07-11 18:55:26 -04:00 · 2011-05-03 13:02:15 -04:00
parent 810b263aa2
commit 84c2583ba8
2 changed files with 52 additions and 2 deletions
--- a/cuburn/code/iter.py
+++ b/cuburn/code/iter.py
@ -154,7 +154,7 @@ def silly(features, cps):
    sampAt = [int(i/15.*(nsteps-1)) for i in range(16)]
    for n in range(nsteps):
-        flam3_interpolate(cps_as_array, 2, float(n)/nsteps/5 - 0.1, 0, byref(cp))
+        flam3_interpolate(cps_as_array, 2, float(n)/nsteps - 0.5, 0, byref(cp))
        cp._init()
        if n in sampAt:
            pidx = sampAt.index(n)
@ -194,3 +194,48 @@ def silly(features, cps):
    dbuf = cuda.from_device_like(dbufd, dbuf)
    return abuf, dbuf
 # TODO: find a better place to stick this code
 class MemBench(HunkOCode):
    decls = """
 __shared__ uint32_t coord[512];
 """
    defs_tmpl = tempita.Template("""
 __global__
 void iter{{W}}(mwc_st *mwcs, uint32_t *buf) {
    mwc_st rctx = mwcs[gtid()];
    int mask = (1 << {{W}}) - 1;
    int smoff = threadIdx.x >> {{W}};
    int writer = (threadIdx.x & mask) == 0;
    for (int i = 0; i < 1024 * 32; i++) {
        if (writer)
            coord[smoff] = mwc_next(&rctx) & 0x7ffffff; // 512MB / 4 bytes
        __syncthreads();
        uint32_t *dst = buf + (coord[smoff] + (threadIdx.x & mask));
        uint32_t val = mwc_next(&rctx);
        asm("st.global.u32  [%0],   %1;" :: "l"(dst), "r"(val));
    }
 }
 """)
    @property
    def defs(self):
        return '\n'.join([self.defs_tmpl.substitute(W=w) for w in range(8)])
 def membench():
    code = assemble_code(BaseCode, mwc.MWC, MemBench())
    mod = SourceModule(code)
    buf = cuda.mem_alloc(512 << 20)
    seeds = mwc.MWC.make_seeds(512 * 21)
    for w in range(8):
        fun = mod.get_function('iter%d' % w)
        print 'Launching with W=%d' % w
        t = fun(cuda.In(seeds), buf,
                block=(512, 1, 1), grid=(21, 1), time_kernel=True)
        print 'Completed in %g' % t
--- a/main.py
+++ b/main.py
@ -11,6 +11,7 @@
 import os
 import sys
 os.environ['PATH'] = '/usr/x86_64-pc-linux-gnu/gcc-bin/4.4.5:' + os.environ['PATH']
 from pprint import pprint
 from ctypes import *
@ -27,9 +28,13 @@ import pycuda.gl.autoinit
 from cuburn.render import *
 from cuburn.code.mwc import MWCTest
-from cuburn.code.iter import silly
+from cuburn.code.iter import silly, membench
 def main(args):
    membench()
    return
    #MWCTest.test_mwc()
    with open(args[-1]) as fp:
        genomes = Genome.from_string(fp.read())