From b73461132ca01269aa1823073e817d3d72f1f581 Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Thu, 8 Dec 2011 12:07:22 -0500 Subject: [PATCH] Use consts for image size instead of immediates. This saves us from having to recompile if the frame size changes. --- cuburn/code/iter.py | 11 +++++++++-- cuburn/render.py | 4 ++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py index a26b4fb..c93f630 100644 --- a/cuburn/code/iter.py +++ b/cuburn/code/iter.py @@ -129,6 +129,13 @@ texture palTex; __shared__ iter_params params; __device__ int rb_head, rb_tail, rb_size; +typedef struct { + uint32_t width; + uint32_t height; + uint32_t stride; +} acc_size_t; +__constant__ acc_size_t acc_size; + """ def _xfbody(self, xfid, xform): @@ -330,14 +337,14 @@ void iter( uint32_t ix = trunca(cx), iy = trunca(cy); - if (ix >= {{info.acc_width}} || iy >= {{info.acc_height}}) { + if (ix >= acc_size.width || iy >= acc_size.height) { {{if info.acc_mode == 'deferred'}} *log = 0xffffffff; {{endif}} continue; } - uint32_t i = iy * {{info.acc_stride}} + ix; + uint32_t i = iy * acc_size.stride + ix; {{if info.acc_mode == 'atomic'}} float4 outcol = tex2D(palTex, cc, time_frac); diff --git a/cuburn/render.py b/cuburn/render.py index 32aa68b..ed96d76 100644 --- a/cuburn/render.py +++ b/cuburn/render.py @@ -132,6 +132,10 @@ class Renderer(object): d_accum = cuda.mem_alloc(16 * nbins) d_out = cuda.mem_alloc(16 * nbins) + acc_size = np.array([info.acc_width, info.acc_height, info.acc_stride]) + d_acc_size = self.mod.get_global('acc_size')[0] + cuda.memcpy_htod_async(d_acc_size, np.uint32(acc_size), write_stream) + if info.acc_mode == 'deferred': # Having a fixed, power-of-two log size makes things much easier log_size = 64 << 20