From 50b664b1f9d1e8600ddb9219b6e5cff5fa1180ed Mon Sep 17 00:00:00 2001
From: Erik Reckase <e.reckase@gmail.com>
Date: Fri, 24 Jun 2011 06:09:04 -0600
Subject: [PATCH] chaos support \0/

---
 cuburn/code/filtering.py |  7 +++----
 cuburn/code/iter.py      | 24 ++++++++++++++----------
 cuburn/render.py         | 19 +++++++++++++++++--
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/cuburn/code/filtering.py b/cuburn/code/filtering.py
index 20ae061..3a64d48 100644
--- a/cuburn/code/filtering.py
+++ b/cuburn/code/filtering.py
@@ -102,7 +102,7 @@ class DensityEst(HunkOCode):
     def defs(self):
         return self.defs_tmpl.substitute(features=self.features, cp=self.cp)
 
-    defs_tmpl = Template("""
+    defs_tmpl = Template('''
 #define W 15        // Filter width (regardless of standard deviation chosen)
 #define W2 7        // Half of filter width, rounded down
 #define FW 46       // Width of local result storage (NW+W2+W2)
@@ -277,7 +277,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
     }
 }
 
-""")
+''')
 
     def invoke(self, mod, cp, abufd, obufd, dbufd, stream=None):
         # TODO: add no-est version
@@ -285,8 +285,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
 
         k1 = np.float32(cp.brightness * 268 / 256)
         area = self.features.width * self.features.height / cp.ppu ** 2
-        k2 = np.float32(1 / (area * cp.adj_density))
-        print k1, k2
+        k2 = np.float32(1 / (area * cp.adj_density ))
 
         if self.cp.estimator == 0:
             nbins = self.features.acc_height * self.features.acc_stride
diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py
index f124876..33ac96c 100644
--- a/cuburn/code/iter.py
+++ b/cuburn/code/iter.py
@@ -67,7 +67,7 @@ void apply_xf{{xfid}}(float *ix, float *iy, float *icolor, mwc_st *rctx) {
         return tmpl.substitute(g)
 
     def _iterbody(self):
-        tmpl = Template(r"""
+        tmpl = Template('''
 __global__
 void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
     mwc_st rctx = msts[gtid()];
@@ -81,9 +81,10 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
 
     int consec_bad = -{{features.fuse}};
     // TODO: remove '512' constant
-    int nsamps = {{packer.get('cp.width * cp.height / (cp.ntemporal_samples * 512.) * cp.adj_density')}};
+    int nsamps = {{packer.get("cp.width * cp.height / (cp.ntemporal_samples * 512.) * cp.adj_density")}};
 
     float x, y, color;
+    int last_xf_used = 0;
     x = mwc_next_11(&rctx);
     y = mwc_next_11(&rctx);
     color = mwc_next_01(&rctx);
@@ -91,14 +92,17 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
     while (nsamps > 0) {
         float xfsel = mwc_next_01(&rctx);
 
-        {{for xfid, xform in enumerate(features.xforms)}}
-        {{if xfid != features.final_xform_index}}
-        if (xfsel <= {{packer.get('cp.norm_density[%d]' % xfid)}}) {
-            apply_xf{{xfid}}(&x, &y, &color, &rctx);
+        {{for density_row_idx, prior_xform_idx in enumerate(features.std_xforms)}}
+        {{for density_col_idx,  this_xform_idx in enumerate(features.std_xforms)}}
+        if (last_xf_used == {{prior_xform_idx}} && 
+                xfsel < {{packer.get("cp.chaos_densities[%d][%d]" % (density_row_idx, density_col_idx))}}) {
+            apply_xf{{this_xform_idx}}(&x, &y, &color, &rctx);
+            last_xf_used = {{this_xform_idx}};
         } else
-        {{endif}}
+        {{endfor}}
         {{endfor}}
         {
+            //printf("%d ",last_xf_used);
             denbuf[0] = xfsel;
             break; // TODO: fail here
         }
@@ -125,7 +129,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
         {{endif}}
 
         // TODO: verify that constants get premultiplied
-        float ditherwidth = {{packer.get('0.33 * cp.spatial_filter_radius')}};
+        float ditherwidth = {{packer.get("0.33 * cp.spatial_filter_radius")}};
         float u0 = mwc_next_01(&rctx);
         float r = ditherwidth * sqrt(-2.0f * log2f(u0) / M_LOG2E);
 
@@ -150,7 +154,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
 
         int i = iy * {{features.acc_stride}} + ix;
 
-        float4 outcol = tex2D(palTex, color, {{packer.get('cp_step_frac')}});
+        float4 outcol = tex2D(palTex, color, {{packer.get("cp_step_frac")}});
         float4 pix = accbuf[i];
         pix.x += outcol.x;
         pix.y += outcol.y;
@@ -161,7 +165,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
     }
     asm volatile ("membar.cta;");
 }
-""")
+''')
         return tmpl.substitute(
                 features = self.features,
                 packer = self.packer.view('info'),
diff --git a/cuburn/render.py b/cuburn/render.py
index 508996b..3d98f84 100644
--- a/cuburn/render.py
+++ b/cuburn/render.py
@@ -41,8 +41,19 @@ class Genome(object):
         self.xforms = [self.xform[i] for i in range(self.num_xforms)]
         dens = np.array([x.density for i, x in enumerate(self.xforms)
                          if i != self.final_xform_index])
-        dens /= np.sum(dens)
-        self.norm_density = [np.sum(dens[:i+1]) for i in range(len(dens))]
+
+        ###############
+        # Chaos support
+        num_std_xf = len(dens)
+        self.chaos_densities = np.zeros( (num_std_xf,num_std_xf) )
+        for r in range(num_std_xf):
+            chaos_row = np.array([ctypes_genome.chaos[r][c] for c in range(num_std_xf)])
+            chaos_row = chaos_row * dens
+            chaos_row /= np.sum(chaos_row)
+            chaos_row = [np.sum(chaos_row[:i+1]) for i in range(len(dens))]
+            self.chaos_densities[r,:] = chaos_row
+        ###############
+
         self.camera_transform = self.calc_camera_transform()
 
     scale = property(lambda cp: 2.0 ** cp.zoom)
@@ -368,6 +379,9 @@ class _AnimRenderer(object):
         g = a.features.gutter
         obuf_dim = (a.features.acc_height, a.features.acc_stride, 4)
         out = cuda.from_device(self.d_out, obuf_dim, np.float32)
+        #dacc = cuda.from_device(self.d_accum, obuf_dim, np.float32)
+        #daccw = dacc[:,:,3]
+        #print daccw.sum()
         # TODO: performance?
         g = a.features.gutter
         out = np.delete(out, np.s_[:g], axis=0)
@@ -438,6 +452,7 @@ class Features(object):
         self.acc_width = genomes[0].width + 2 * self.gutter
         self.acc_height = genomes[0].height + 2 * self.gutter
         self.acc_stride = 32 * int(math.ceil(self.acc_width / 32.))
+        self.std_xforms = filter(lambda v: v != self.final_xform_index, range(self.nxforms))
 
 class XFormFeatures(object):
     def __init__(self, xforms, xform_id):