Filter adjustments (density prefilter, gutter)

2026-02-16 16:20:27 -05:00 · 2011-06-11 15:58:15 -04:00
parent 1deb3105a0
commit 94c453d153
4 changed files with 31 additions and 26 deletions
--- a/cuburn/code/filter.py
+++ b/cuburn/code/filter.py
@ -117,7 +117,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
        float den = denbuf[idx];

        if (in.w > 0 && den > 0) {
-            float ls = k1 * 12 * logf(1.0 + in.w * k2) / in.w;
+            float ls = k1 * logf(1.0f + in.w * k2) / in.w;
            in.x *= ls;
            in.y *= ls;
            in.z *= ls;
@ -200,19 +200,6 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
            atomicAdd(out+3, de_a[si]);
        }

-        if (threadIdx.y == 5000) {
-            for (int i = threadIdx.x; i < FW; i += 32) {
-                idx = {{features.acc_stride}} * (imrow + 32)
-                    + blockIdx.x * 32 + i + W2;
-                int si = 32 * FW + i;
-                float *out = reinterpret_cast<float*>(&outbuf[idx]);
-                atomicAdd(out,   0.2 + de_r[si]);
-                atomicAdd(out+1, de_g[si]);
-                atomicAdd(out+2, de_b[si]);
-                atomicAdd(out+3, de_a[si]);
-            }
-        }
-
        __syncthreads();
        // TODO: shift instead of copying
        int tid = threadIdx.y * 32 + threadIdx.x;
@ -239,9 +226,11 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
    def invoke(self, mod, abufd, obufd, dbufd):
        # TODO: add no-est version
        # TODO: come up with a general way to average these parameters
+
        k1 = self.cp.brightness * 268 / 256
-        area = self.features.width * self.features.height / self.cp.ppu ** 2
+        area = self.features.acc_width * self.features.acc_height / self.cp.ppu ** 2
        k2 = 1 / (area * self.cp.adj_density)
+        print k1, k2, area

        if self.cp.estimator == 0:
            fun = mod.get_function("logscale")
@ -251,7 +240,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
        else:
            fun = mod.get_function("density_est")
            t = fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2),
-                    block=(32, 32, 1), grid=(self.features.acc_stride/32 - 1, 1),
+                    block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
                    time_kernel=True)
            print "Density estimation: %g" % t

--- a/cuburn/code/iter.py
+++ b/cuburn/code/iter.py
@ -8,6 +8,7 @@ import pycuda.driver as cuda
 from pycuda.driver import In, Out, InOut
 from pycuda.compiler import SourceModule
 import numpy as np
+from scipy import ndimage

 from fr0stlib.pyflam3 import flam3_interpolate
 from cuburn.code import mwc, variations, filter
@ -125,8 +126,8 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {

        int ix = trunca(cx+ditherx), iy = trunca(cy+dithery);

-        if (ix < 0 || ix >= {{features.width}} ||
-            iy < 0 || iy >= {{features.height}} ) {
+        if (ix < 0 || ix >= {{features.acc_width}} ||
+            iy < 0 || iy >= {{features.acc_height}} ) {
            consec_bad++;
            if (consec_bad > {{features.max_oob}}) {
                x = mwc_next_11(&rctx);
@ -137,7 +138,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
            continue;
        }

-        int i = iy * {{features.width}} + ix;
+        int i = iy * {{features.acc_stride}} + ix;

        float4 outcol = tex2D(palTex, color, {{packer.get('cp_step_frac')}});
        float4 pix = accbuf[i];
@ -148,6 +149,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
        accbuf[i] = pix;    // TODO: atomic operations (or better)
        denbuf[i] += 1.0f;
    }
+    asm volatile ("membar.cta;");
 }
 """)
        return tmpl.substitute(
@ -158,8 +160,8 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
 def render(features, cps):
    # TODO: make this adjustable via genome
    nsteps = 1000
-    abuf = np.zeros((features.height, features.width, 4), dtype=np.float32)
-    dbuf = np.zeros((features.height, features.width), dtype=np.float32)
+    abuf = np.zeros((features.acc_height, features.acc_stride, 4), dtype=np.float32)
+    dbuf = np.zeros((features.acc_height, features.acc_stride), dtype=np.float32)
    seeds = mwc.MWC.make_seeds(512 * nsteps)

    iter = IterCode(features)
@ -221,9 +223,13 @@ def render(features, cps):

    f = np.float32

-    npix = features.width * features.height
+    npix = features.acc_width * features.acc_height

+    # TODO: just allocate
    obufd = cuda.to_device(abuf)
+    dbuf = cuda.from_device_like(dbufd, dbuf)
+    dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6)
+    dbufd = cuda.to_device(dbuf)
    de.invoke(mod, abufd, obufd, dbufd)

    fun = mod.get_function("colorclip")
--- a/cuburn/render.py
+++ b/cuburn/render.py
@ -38,7 +38,9 @@ class Genome(pyflam3.Genome):
        """
        # TODO: when reading as a property during packing, this may be
        # calculated 6 times instead of 1
-        return ( affine.translate(0.5 * cp.width, 0.5 * cp.height)
+        # TODO: also requires knowing gutter width
+        g = Features.gutter
+        return ( affine.translate(0.5 * cp.width + g, 0.5 * cp.height + g)
               * affine.scale(cp.ppu, cp.ppu)
               * affine.translate(-cp._center[0], -cp._center[1])
               * affine.rotate(cp.rotate * 2 * np.pi / 360,
@ -90,6 +92,10 @@ class Features(object):
    # performance too much. Power-of-two, please.
    palette_height = 16

+    # Maximum width of DE and other spatial filters, and thus in turn the
+    # amount of padding applied
+    gutter = 16
+
    def __init__(self, genomes):
        any = lambda l: bool(filter(None, map(l, genomes)))
        self.max_ntemporal_samples = max(
@ -112,9 +118,9 @@ class Features(object):

        self.width = genomes[0].width
        self.height = genomes[0].height
-        self.acc_width = genomes[0].width
-        self.acc_height = genomes[0].height
-        self.acc_stride = genomes[0].width
+        self.acc_width = genomes[0].width + 2 * self.gutter
+        self.acc_height = genomes[0].height + 2 * self.gutter
+        self.acc_stride = genomes[0].width + 2 * self.gutter

 class XFormFeatures(object):
    def __init__(self, xforms, xform_id):
--- a/main.py
+++ b/main.py
@ -44,6 +44,10 @@ def main(args):
        genomes = Genome.from_string(fp.read())
    anim = Animation(genomes)
    accum, den = render(anim.features, genomes)
+    accum = np.delete(accum, np.s_[:16], axis=0)
+    accum = np.delete(accum, np.s_[:16], axis=1)
+    accum = np.delete(accum, np.s_[-16:], axis=0)
+    accum = np.delete(accum, np.s_[-16:], axis=1)

    noalpha = np.delete(accum, 3, axis=2)
    scipy.misc.imsave('rendered.png', noalpha)