Factor texrefs out of bilateral filter.

This also corrects the fact that denblurlib wouldn't compile without bilaterllib.
2026-01-08 05:00:21 -05:00 · 2012-04-06 21:17:31 -07:00
parent b1b09c4bde
commit eca8a8e1d3
2 changed files with 28 additions and 27 deletions
--- a/cuburn/code/filters.py
+++ b/cuburn/code/filters.py
@ -69,8 +69,9 @@ fma_buf(float4 *dst, const float4 *src, int astride, float scale) {
 }
 ''')
-denblurlib = devlib(decls='''
+denblurlib = devlib(deps=[texshearlib], decls='''
-texture<float,  cudaTextureType2D> blur_src;
+texture<float4, cudaTextureType2D> chan4_src;
 texture<float,  cudaTextureType2D> chan1_src;
 __constant__ float gauss_coefs[7] = {
    0.00443305f,  0.05400558f,  0.24203623f,  0.39905028f,
@ -78,7 +79,7 @@ __constant__ float gauss_coefs[7] = {
 };
 ''', defs=r'''
 // Apply a Gaussian-esque blur to the density channel of the texture in
-// ``bilateral_src`` in the horizontal direction, and write it to ``dst``, a
+// ``chan4_src`` in the horizontal direction, and write it to ``dst``, a
 // one-channel buffer.
 __global__ void den_blur(float *dst, int pattern, int upsample) {
    int xi = blockIdx.x * blockDim.x + threadIdx.x;
@ -89,7 +90,7 @@ __global__ void den_blur(float *dst, int pattern, int upsample) {
    #pragma unroll
    for (int i = 0; i < 7; i++)
-        den += tex_shear(bilateral_src, pattern, x, y, (i - 3) << upsample).w
+        den += tex_shear(chan4_src, pattern, x, y, (i - 3) << upsample).w
             * gauss_coefs[i];
    dst[yi * (blockDim.x * gridDim.x) + xi] = den;
 }
@ -104,15 +105,13 @@ __global__ void den_blur_1c(float *dst, int pattern, int upsample) {
    #pragma unroll
    for (int i = 0; i < 7; i++)
-        den += tex_shear(blur_src, pattern, x, y, (i - 3) << upsample)
+        den += tex_shear(chan1_src, pattern, x, y, (i - 3) << upsample)
             * gauss_coefs[i];
    dst[yi * (blockDim.x * gridDim.x) + xi] = den;
 }
 ''')
-bilaterallib = devlib(deps=[logscalelib, texshearlib, denblurlib], decls='''
+bilaterallib = devlib(deps=[logscalelib, texshearlib, denblurlib], defs=r'''
 texture<float4, cudaTextureType2D> bilateral_src;
 ''', defs=r'''
 /* sstd:    spatial standard deviation (Gaussian filter)
 * cstd:    color standard deviation (Gaussian on the range [0, 1], where 1
 *          represents an "opposite" color).
@ -146,7 +145,7 @@ bilateral(float4 *dst, int pattern, int radius,
    // Gather the center point, and pre-average the color values for faster
    // comparison.
-    float4 cen = tex2D(bilateral_src, x, y);
+    float4 cen = tex2D(chan4_src, x, y);
    float cdrcp = 1.0f / (cen.w + 1.0e-6f);
    cen.x *= cdrcp;
    cen.y *= cdrcp;
@ -160,13 +159,13 @@ bilateral(float4 *dst, int pattern, int radius,
    // Be extra-sure spatial coeffecients have been written
    __syncthreads();
-    float4 pix = tex_shear(bilateral_src, pattern, x, y, -radius - 1.0f);
+    float4 pix = tex_shear(chan4_src, pattern, x, y, -radius - 1.0f);
-    float4 next = tex_shear(bilateral_src, pattern, x, y, -radius);
+    float4 next = tex_shear(chan4_src, pattern, x, y, -radius);
    for (float r = -radius; r <= radius; r++) {
        float prev = pix.w;
        pix = next;
-        next = tex_shear(bilateral_src, pattern, x, y, r + 1.0f);
+        next = tex_shear(chan4_src, pattern, x, y, r + 1.0f);
        // This initial factor is arbitrary, but seems to do a decent job at
        // preventing excessive bleed-out from points inside an empty region.
@ -201,7 +200,7 @@ bilateral(float4 *dst, int pattern, int radius,
        //
        // Note that both the gradient and the blurred weight are calculated
        // in one dimension, along the current sampling vector.
-        float avg = tex_shear(blur_src, pattern, x, y, r);
+        float avg = tex_shear(chan1_src, pattern, x, y, r);
        float gradfact = (next.w - prev) / (avg + 1.0e-6f);
        if (r < 0) gradfact = -gradfact;
        gradfact = exp2f(-exp2f(gspeed * gradfact));
--- a/cuburn/filters.py
+++ b/cuburn/filters.py
@ -8,6 +8,18 @@ from pycuda.gpuarray import vec
 import code.filters
 from code.util import ClsMod, argset, launch
 def mktref(mod, n):
    tref = mod.get_texref(n)
    tref.set_filter_mode(cuda.filter_mode.POINT)
    tref.set_address_mode(0, cuda.address_mode.WRAP)
    tref.set_address_mode(1, cuda.address_mode.WRAP)
    return tref
 def mkdsc(dim, ch):
    return argset(cuda.ArrayDescriptor(), height=dim.ah,
                  width=dim.astride, num_channels=ch,
                  format=cuda.array_format.FLOAT)
 class Filter(object):
    def apply(self, fb, gnm, dim, tc, stream=None):
        """
@ -34,20 +46,10 @@ class Bilateral(Filter, ClsMod):
        bs = sb * dim.ah
        bl, gr = (32, 8, 1), (dim.astride / 32, dim.ah / 8)
-        mkdsc = lambda c: argset(cuda.ArrayDescriptor(), height=dim.ah,
+        dsc = mkdsc(dim, 4)
-                                 width=dim.astride, num_channels=c,
+        tref = mktref(self.mod, 'chan4_src')
-                                 format=cuda.array_format.FLOAT)
+        grad_dsc = mkdsc(dim, 1)
-        def mktref(n):
+        grad_tref = mktref(self.mod, 'chan1_src')
            tref = self.mod.get_texref(n)
            tref.set_filter_mode(cuda.filter_mode.POINT)
            tref.set_address_mode(0, cuda.address_mode.WRAP)
            tref.set_address_mode(1, cuda.address_mode.WRAP)
            return tref
        dsc = mkdsc(4)
        tref = mktref('bilateral_src')
        grad_dsc = mkdsc(1)
        grad_tref = mktref('blur_src')
        for pattern in range(self.directions):
            # Scale spatial parameter so that a "pixel" is equivalent to an