Factor texrefs out of bilateral filter.

This also corrects the fact that denblurlib wouldn't compile without
bilaterllib.
This commit is contained in:
Steven Robertson 2012-04-06 21:17:31 -07:00
parent b1b09c4bde
commit eca8a8e1d3
2 changed files with 28 additions and 27 deletions

View File

@ -69,8 +69,9 @@ fma_buf(float4 *dst, const float4 *src, int astride, float scale) {
} }
''') ''')
denblurlib = devlib(decls=''' denblurlib = devlib(deps=[texshearlib], decls='''
texture<float, cudaTextureType2D> blur_src; texture<float4, cudaTextureType2D> chan4_src;
texture<float, cudaTextureType2D> chan1_src;
__constant__ float gauss_coefs[7] = { __constant__ float gauss_coefs[7] = {
0.00443305f, 0.05400558f, 0.24203623f, 0.39905028f, 0.00443305f, 0.05400558f, 0.24203623f, 0.39905028f,
@ -78,7 +79,7 @@ __constant__ float gauss_coefs[7] = {
}; };
''', defs=r''' ''', defs=r'''
// Apply a Gaussian-esque blur to the density channel of the texture in // Apply a Gaussian-esque blur to the density channel of the texture in
// ``bilateral_src`` in the horizontal direction, and write it to ``dst``, a // ``chan4_src`` in the horizontal direction, and write it to ``dst``, a
// one-channel buffer. // one-channel buffer.
__global__ void den_blur(float *dst, int pattern, int upsample) { __global__ void den_blur(float *dst, int pattern, int upsample) {
int xi = blockIdx.x * blockDim.x + threadIdx.x; int xi = blockIdx.x * blockDim.x + threadIdx.x;
@ -89,7 +90,7 @@ __global__ void den_blur(float *dst, int pattern, int upsample) {
#pragma unroll #pragma unroll
for (int i = 0; i < 7; i++) for (int i = 0; i < 7; i++)
den += tex_shear(bilateral_src, pattern, x, y, (i - 3) << upsample).w den += tex_shear(chan4_src, pattern, x, y, (i - 3) << upsample).w
* gauss_coefs[i]; * gauss_coefs[i];
dst[yi * (blockDim.x * gridDim.x) + xi] = den; dst[yi * (blockDim.x * gridDim.x) + xi] = den;
} }
@ -104,15 +105,13 @@ __global__ void den_blur_1c(float *dst, int pattern, int upsample) {
#pragma unroll #pragma unroll
for (int i = 0; i < 7; i++) for (int i = 0; i < 7; i++)
den += tex_shear(blur_src, pattern, x, y, (i - 3) << upsample) den += tex_shear(chan1_src, pattern, x, y, (i - 3) << upsample)
* gauss_coefs[i]; * gauss_coefs[i];
dst[yi * (blockDim.x * gridDim.x) + xi] = den; dst[yi * (blockDim.x * gridDim.x) + xi] = den;
} }
''') ''')
bilaterallib = devlib(deps=[logscalelib, texshearlib, denblurlib], decls=''' bilaterallib = devlib(deps=[logscalelib, texshearlib, denblurlib], defs=r'''
texture<float4, cudaTextureType2D> bilateral_src;
''', defs=r'''
/* sstd: spatial standard deviation (Gaussian filter) /* sstd: spatial standard deviation (Gaussian filter)
* cstd: color standard deviation (Gaussian on the range [0, 1], where 1 * cstd: color standard deviation (Gaussian on the range [0, 1], where 1
* represents an "opposite" color). * represents an "opposite" color).
@ -146,7 +145,7 @@ bilateral(float4 *dst, int pattern, int radius,
// Gather the center point, and pre-average the color values for faster // Gather the center point, and pre-average the color values for faster
// comparison. // comparison.
float4 cen = tex2D(bilateral_src, x, y); float4 cen = tex2D(chan4_src, x, y);
float cdrcp = 1.0f / (cen.w + 1.0e-6f); float cdrcp = 1.0f / (cen.w + 1.0e-6f);
cen.x *= cdrcp; cen.x *= cdrcp;
cen.y *= cdrcp; cen.y *= cdrcp;
@ -160,13 +159,13 @@ bilateral(float4 *dst, int pattern, int radius,
// Be extra-sure spatial coeffecients have been written // Be extra-sure spatial coeffecients have been written
__syncthreads(); __syncthreads();
float4 pix = tex_shear(bilateral_src, pattern, x, y, -radius - 1.0f); float4 pix = tex_shear(chan4_src, pattern, x, y, -radius - 1.0f);
float4 next = tex_shear(bilateral_src, pattern, x, y, -radius); float4 next = tex_shear(chan4_src, pattern, x, y, -radius);
for (float r = -radius; r <= radius; r++) { for (float r = -radius; r <= radius; r++) {
float prev = pix.w; float prev = pix.w;
pix = next; pix = next;
next = tex_shear(bilateral_src, pattern, x, y, r + 1.0f); next = tex_shear(chan4_src, pattern, x, y, r + 1.0f);
// This initial factor is arbitrary, but seems to do a decent job at // This initial factor is arbitrary, but seems to do a decent job at
// preventing excessive bleed-out from points inside an empty region. // preventing excessive bleed-out from points inside an empty region.
@ -201,7 +200,7 @@ bilateral(float4 *dst, int pattern, int radius,
// //
// Note that both the gradient and the blurred weight are calculated // Note that both the gradient and the blurred weight are calculated
// in one dimension, along the current sampling vector. // in one dimension, along the current sampling vector.
float avg = tex_shear(blur_src, pattern, x, y, r); float avg = tex_shear(chan1_src, pattern, x, y, r);
float gradfact = (next.w - prev) / (avg + 1.0e-6f); float gradfact = (next.w - prev) / (avg + 1.0e-6f);
if (r < 0) gradfact = -gradfact; if (r < 0) gradfact = -gradfact;
gradfact = exp2f(-exp2f(gspeed * gradfact)); gradfact = exp2f(-exp2f(gspeed * gradfact));

View File

@ -8,6 +8,18 @@ from pycuda.gpuarray import vec
import code.filters import code.filters
from code.util import ClsMod, argset, launch from code.util import ClsMod, argset, launch
def mktref(mod, n):
tref = mod.get_texref(n)
tref.set_filter_mode(cuda.filter_mode.POINT)
tref.set_address_mode(0, cuda.address_mode.WRAP)
tref.set_address_mode(1, cuda.address_mode.WRAP)
return tref
def mkdsc(dim, ch):
return argset(cuda.ArrayDescriptor(), height=dim.ah,
width=dim.astride, num_channels=ch,
format=cuda.array_format.FLOAT)
class Filter(object): class Filter(object):
def apply(self, fb, gnm, dim, tc, stream=None): def apply(self, fb, gnm, dim, tc, stream=None):
""" """
@ -34,20 +46,10 @@ class Bilateral(Filter, ClsMod):
bs = sb * dim.ah bs = sb * dim.ah
bl, gr = (32, 8, 1), (dim.astride / 32, dim.ah / 8) bl, gr = (32, 8, 1), (dim.astride / 32, dim.ah / 8)
mkdsc = lambda c: argset(cuda.ArrayDescriptor(), height=dim.ah, dsc = mkdsc(dim, 4)
width=dim.astride, num_channels=c, tref = mktref(self.mod, 'chan4_src')
format=cuda.array_format.FLOAT) grad_dsc = mkdsc(dim, 1)
def mktref(n): grad_tref = mktref(self.mod, 'chan1_src')
tref = self.mod.get_texref(n)
tref.set_filter_mode(cuda.filter_mode.POINT)
tref.set_address_mode(0, cuda.address_mode.WRAP)
tref.set_address_mode(1, cuda.address_mode.WRAP)
return tref
dsc = mkdsc(4)
tref = mktref('bilateral_src')
grad_dsc = mkdsc(1)
grad_tref = mktref('blur_src')
for pattern in range(self.directions): for pattern in range(self.directions):
# Scale spatial parameter so that a "pixel" is equivalent to an # Scale spatial parameter so that a "pixel" is equivalent to an