From b73461132ca01269aa1823073e817d3d72f1f581 Mon Sep 17 00:00:00 2001
From: Steven Robertson <steven@strobe.cc>
Date: Thu, 8 Dec 2011 12:07:22 -0500
Subject: [PATCH] Use consts for image size instead of immediates.

This saves us from having to recompile if the frame size changes.
---
 cuburn/code/iter.py | 11 +++++++++--
 cuburn/render.py    |  4 ++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py
index a26b4fb..c93f630 100644
--- a/cuburn/code/iter.py
+++ b/cuburn/code/iter.py
@@ -129,6 +129,13 @@ texture<uchar4, cudaTextureType2D, cudaReadModeNormalizedFloat> palTex;
 __shared__ iter_params params;
 __device__ int rb_head, rb_tail, rb_size;
 
+typedef struct {
+    uint32_t width;
+    uint32_t height;
+    uint32_t stride;
+} acc_size_t;
+__constant__ acc_size_t acc_size;
+
 """
 
     def _xfbody(self, xfid, xform):
@@ -330,14 +337,14 @@ void iter(
 
         uint32_t ix = trunca(cx), iy = trunca(cy);
 
-        if (ix >= {{info.acc_width}} || iy >= {{info.acc_height}}) {
+        if (ix >= acc_size.width || iy >= acc_size.height) {
 {{if info.acc_mode == 'deferred'}}
             *log = 0xffffffff;
 {{endif}}
             continue;
         }
 
-        uint32_t i = iy * {{info.acc_stride}} + ix;
+        uint32_t i = iy * acc_size.stride + ix;
 
 {{if info.acc_mode == 'atomic'}}
         float4 outcol = tex2D(palTex, cc, time_frac);
diff --git a/cuburn/render.py b/cuburn/render.py
index 32aa68b..ed96d76 100644
--- a/cuburn/render.py
+++ b/cuburn/render.py
@@ -132,6 +132,10 @@ class Renderer(object):
         d_accum = cuda.mem_alloc(16 * nbins)
         d_out = cuda.mem_alloc(16 * nbins)
 
+        acc_size = np.array([info.acc_width, info.acc_height, info.acc_stride])
+        d_acc_size = self.mod.get_global('acc_size')[0]
+        cuda.memcpy_htod_async(d_acc_size, np.uint32(acc_size), write_stream)
+
         if info.acc_mode == 'deferred':
             # Having a fixed, power-of-two log size makes things much easier
             log_size = 64 << 20