From 3ee4640001c1df1a9e88afdab2cfc30755bdd0a7 Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Fri, 16 Mar 2012 20:38:50 -0700 Subject: [PATCH] Hack: add magnitude scaling to affine coeffecients --- cuburn/code/interp.py | 74 ++++++++++++++++++++++++++++++++++++++----- cuburn/code/iter.py | 8 ++--- 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/cuburn/code/interp.py b/cuburn/code/interp.py index dbe23e8..c3fca6d 100644 --- a/cuburn/code/interp.py +++ b/cuburn/code/interp.py @@ -107,6 +107,12 @@ class GenomePackerPrecalc(GenomePackerView): super(GenomePackerPrecalc, self).__init__(packer, 'out', wrapped, prefix) def __str__(self): return self.packer._require_pre(self.prefix) + def _magscale(self): + """ + This is a temporary hack which turns on magnitude scaling for the + value on which it is called. Takes the place of __str__ serialization. + """ + return self.packer._require_pre(self.prefix, True) def _set(self, name): fullname = self.prefix + (name,) self.packer._pre_alloc(fullname) @@ -165,10 +171,11 @@ class GenomePacker(object): """ self.packed_direct[name] = None - def _require_pre(self, name): + def _require_pre(self, name, mag_scaling=False): i = len(self.genome_precalc) << self.search_rounds self.genome_precalc[name] = None - return 'catmull_rom(×[%d], &knots[%d], time)' % (i, i) + name = 'catmull_rom_mag' if mag_scaling else 'catmull_rom' + return '%s(×[%d], &knots[%d], time)' % (name, i, i) def _pre_alloc(self, name): self.packed_precalc[name] = None @@ -259,9 +266,39 @@ typedef struct { catmullromlib = devlib(deps=[binsearchlib], decls=r''' __device__ __noinline__ float catmull_rom(const float *times, const float *knots, float t); -''', defs=r''' + __device__ __noinline__ -float catmull_rom(const float *times, const float *knots, float t) { +float catmull_rom_mag(const float *times, const float *knots, float t); +''', defs=r''' + +// ELBOW is the linearization threhsold; above this magnitude, a value scales +// logarithmically, and below it, linearly. ELOG1 is a constant used to make +// this happen. See helpers/spline_mag_domain_interp.wxm for nice graphs. +#define ELBOW 0.0625f // 2^(-4) +#define ELOG1 5.0f // 1 - log2(elbow) + +// Transform from linear to magnitude domain +__device__ float linlog(float x) { + if (x > ELBOW) return log2f(x) + ELOG1; + if (x < -ELBOW) return -(log2f(-x) + ELOG1); + return x / ELBOW; +} + +// Reverse of above +__device__ float linexp(float v) { + if (v >= 1.0) return exp2f( v - ELOG1); + if (v <= -1.0) return -exp2f(-v - ELOG1); + return v * ELBOW; +} + +__device__ float linslope(float x, float m) { + if (x >= ELBOW) return m / x; + if (x <= -ELBOW) return m / -x; + return m / ELBOW; +} + +__device__ float +catmull_rom_base(const float *times, const float *knots, float t, bool mag) { int idx = bitwise_binsearch(times, t); // The left bias of the search means that we never have to worry about @@ -281,12 +318,33 @@ float catmull_rom(const float *times, const float *knots, float t) { float m1 = (k2 - k0) / (1.0f - t0), m2 = (k3 - k1) / (t3); + if (mag) { + m1 = linslope(k1, m1); + m2 = linslope(k2, m2); + k1 = linlog(k1); + k2 = linlog(k2); + } + float tt = t * t, ttt = tt * t; - return m1 * ( ttt - 2.0f*tt + t) - + k1 * ( 2.0f*ttt - 3.0f*tt + 1) - + m2 * ( ttt - tt) - + k2 * (-2.0f*ttt + 3.0f*tt); + float r = m1 * ( ttt - 2.0f*tt + t) + + k1 * ( 2.0f*ttt - 3.0f*tt + 1) + + m2 * ( ttt - tt) + + k2 * (-2.0f*ttt + 3.0f*tt); + + if (mag) r = linexp(r); + return r; +} + +// Variants with scaling domain logic inlined +__device__ __noinline__ +float catmull_rom(const float *times, const float *knots, float t) { + return catmull_rom_base(times, knots, t, false); +} + +__device__ __noinline__ +float catmull_rom_mag(const float *times, const float *knots, float t) { + return catmull_rom_base(times, knots, t, true); } ''') diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py index 2f3df96..7b48348 100644 --- a/cuburn/code/iter.py +++ b/cuburn/code/iter.py @@ -85,15 +85,15 @@ def precalc_xf_affine(px): float pri = {{pre.angle}} * M_PI / 180.0f; float spr = {{pre.spread}} * M_PI / 180.0f; - float magx = {{pre.magnitude.x}}; - float magy = {{pre.magnitude.y}}; + float magx = {{pre.magnitude.x._magscale()}}; + float magy = {{pre.magnitude.y._magscale()}}; {{pre._set('xx')}} = magx * cos(pri-spr); {{pre._set('yx')}} = -magx * sin(pri-spr); {{pre._set('xy')}} = -magy * cos(pri+spr); {{pre._set('yy')}} = magy * sin(pri+spr); - {{pre._set('xo')}} = {{pre.offset.x}}; - {{pre._set('yo')}} = -{{pre.offset.y}}; + {{pre._set('xo')}} = {{pre.offset.x._magscale()}}; + {{pre._set('yo')}} = -{{pre.offset.y._magscale()}}; """, 'precalc_xf_affine').substitute(locals())) def apply_affine(x, y, xo, yo, packer):