From 3ee4640001c1df1a9e88afdab2cfc30755bdd0a7 Mon Sep 17 00:00:00 2001
From: Steven Robertson <steven@strobe.cc>
Date: Fri, 16 Mar 2012 20:38:50 -0700
Subject: [PATCH] Hack: add magnitude scaling to affine coeffecients

---
 cuburn/code/interp.py | 74 ++++++++++++++++++++++++++++++++++++++-----
 cuburn/code/iter.py   |  8 ++---
 2 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/cuburn/code/interp.py b/cuburn/code/interp.py
index dbe23e8..c3fca6d 100644
--- a/cuburn/code/interp.py
+++ b/cuburn/code/interp.py
@@ -107,6 +107,12 @@ class GenomePackerPrecalc(GenomePackerView):
         super(GenomePackerPrecalc, self).__init__(packer, 'out', wrapped, prefix)
     def __str__(self):
         return self.packer._require_pre(self.prefix)
+    def _magscale(self):
+        """
+        This is a temporary hack which turns on magnitude scaling for the
+        value on which it is called. Takes the place of __str__ serialization.
+        """
+        return self.packer._require_pre(self.prefix, True)
     def _set(self, name):
         fullname = self.prefix + (name,)
         self.packer._pre_alloc(fullname)
@@ -165,10 +171,11 @@ class GenomePacker(object):
         """
         self.packed_direct[name] = None
 
-    def _require_pre(self, name):
+    def _require_pre(self, name, mag_scaling=False):
         i = len(self.genome_precalc) << self.search_rounds
         self.genome_precalc[name] = None
-        return 'catmull_rom(&times[%d], &knots[%d], time)' % (i, i)
+        name = 'catmull_rom_mag' if mag_scaling else 'catmull_rom'
+        return '%s(&times[%d], &knots[%d], time)' % (name, i, i)
 
     def _pre_alloc(self, name):
         self.packed_precalc[name] = None
@@ -259,9 +266,39 @@ typedef struct {
 catmullromlib = devlib(deps=[binsearchlib], decls=r'''
 __device__ __noinline__
 float catmull_rom(const float *times, const float *knots, float t);
-''', defs=r'''
+
 __device__ __noinline__
-float catmull_rom(const float *times, const float *knots, float t) {
+float catmull_rom_mag(const float *times, const float *knots, float t);
+''', defs=r'''
+
+// ELBOW is the linearization threhsold; above this magnitude, a value scales
+// logarithmically, and below it, linearly. ELOG1 is a constant used to make
+// this happen. See helpers/spline_mag_domain_interp.wxm for nice graphs.
+#define ELBOW 0.0625f   // 2^(-4)
+#define ELOG1 5.0f      // 1 - log2(elbow)
+
+// Transform from linear to magnitude domain
+__device__ float linlog(float x) {
+    if (x > ELBOW)  return   log2f(x)  + ELOG1;
+    if (x < -ELBOW) return -(log2f(-x) + ELOG1);
+    return x / ELBOW;
+}
+
+// Reverse of above
+__device__ float linexp(float v) {
+    if (v >= 1.0)   return  exp2f( v - ELOG1);
+    if (v <= -1.0)  return -exp2f(-v - ELOG1);
+    return v * ELBOW;
+}
+
+__device__ float linslope(float x, float m) {
+    if (x >=  ELBOW) return m /  x;
+    if (x <= -ELBOW) return m / -x;
+    return m / ELBOW;
+}
+
+__device__ float
+catmull_rom_base(const float *times, const float *knots, float t, bool mag) {
     int idx = bitwise_binsearch(times, t);
 
     // The left bias of the search means that we never have to worry about
@@ -281,12 +318,33 @@ float catmull_rom(const float *times, const float *knots, float t) {
     float m1 = (k2 - k0) / (1.0f - t0),
           m2 = (k3 - k1) / (t3);
 
+    if (mag) {
+        m1 = linslope(k1, m1);
+        m2 = linslope(k2, m2);
+        k1 = linlog(k1);
+        k2 = linlog(k2);
+    }
+
     float tt = t * t, ttt = tt * t;
 
-    return m1 * (      ttt - 2.0f*tt + t)
-         + k1 * ( 2.0f*ttt - 3.0f*tt + 1)
-         + m2 * (      ttt -      tt)
-         + k2 * (-2.0f*ttt + 3.0f*tt);
+    float r = m1 * (      ttt - 2.0f*tt + t)
+            + k1 * ( 2.0f*ttt - 3.0f*tt + 1)
+            + m2 * (      ttt -      tt)
+            + k2 * (-2.0f*ttt + 3.0f*tt);
+
+    if (mag) r = linexp(r);
+    return r;
+}
+
+// Variants with scaling domain logic inlined
+__device__ __noinline__
+float catmull_rom(const float *times, const float *knots, float t) {
+    return catmull_rom_base(times, knots, t, false);
+}
+
+__device__ __noinline__
+float catmull_rom_mag(const float *times, const float *knots, float t) {
+    return catmull_rom_base(times, knots, t, true);
 }
 ''')
 
diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py
index 2f3df96..7b48348 100644
--- a/cuburn/code/iter.py
+++ b/cuburn/code/iter.py
@@ -85,15 +85,15 @@ def precalc_xf_affine(px):
         float pri = {{pre.angle}} * M_PI / 180.0f;
         float spr = {{pre.spread}} * M_PI / 180.0f;
 
-        float magx = {{pre.magnitude.x}};
-        float magy = {{pre.magnitude.y}};
+        float magx = {{pre.magnitude.x._magscale()}};
+        float magy = {{pre.magnitude.y._magscale()}};
 
         {{pre._set('xx')}} = magx * cos(pri-spr);
         {{pre._set('yx')}} = -magx * sin(pri-spr);
         {{pre._set('xy')}} = -magy * cos(pri+spr);
         {{pre._set('yy')}} = magy * sin(pri+spr);
-        {{pre._set('xo')}} = {{pre.offset.x}};
-        {{pre._set('yo')}} = -{{pre.offset.y}};
+        {{pre._set('xo')}} = {{pre.offset.x._magscale()}};
+        {{pre._set('yo')}} = -{{pre.offset.y._magscale()}};
     """, 'precalc_xf_affine').substitute(locals()))
 
 def apply_affine(x, y, xo, yo, packer):