mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Factor texrefs out of bilateral filter.
This also corrects the fact that denblurlib wouldn't compile without bilaterllib.
This commit is contained in:
parent
b1b09c4bde
commit
eca8a8e1d3
@ -69,8 +69,9 @@ fma_buf(float4 *dst, const float4 *src, int astride, float scale) {
|
|||||||
}
|
}
|
||||||
''')
|
''')
|
||||||
|
|
||||||
denblurlib = devlib(decls='''
|
denblurlib = devlib(deps=[texshearlib], decls='''
|
||||||
texture<float, cudaTextureType2D> blur_src;
|
texture<float4, cudaTextureType2D> chan4_src;
|
||||||
|
texture<float, cudaTextureType2D> chan1_src;
|
||||||
|
|
||||||
__constant__ float gauss_coefs[7] = {
|
__constant__ float gauss_coefs[7] = {
|
||||||
0.00443305f, 0.05400558f, 0.24203623f, 0.39905028f,
|
0.00443305f, 0.05400558f, 0.24203623f, 0.39905028f,
|
||||||
@ -78,7 +79,7 @@ __constant__ float gauss_coefs[7] = {
|
|||||||
};
|
};
|
||||||
''', defs=r'''
|
''', defs=r'''
|
||||||
// Apply a Gaussian-esque blur to the density channel of the texture in
|
// Apply a Gaussian-esque blur to the density channel of the texture in
|
||||||
// ``bilateral_src`` in the horizontal direction, and write it to ``dst``, a
|
// ``chan4_src`` in the horizontal direction, and write it to ``dst``, a
|
||||||
// one-channel buffer.
|
// one-channel buffer.
|
||||||
__global__ void den_blur(float *dst, int pattern, int upsample) {
|
__global__ void den_blur(float *dst, int pattern, int upsample) {
|
||||||
int xi = blockIdx.x * blockDim.x + threadIdx.x;
|
int xi = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -89,7 +90,7 @@ __global__ void den_blur(float *dst, int pattern, int upsample) {
|
|||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < 7; i++)
|
for (int i = 0; i < 7; i++)
|
||||||
den += tex_shear(bilateral_src, pattern, x, y, (i - 3) << upsample).w
|
den += tex_shear(chan4_src, pattern, x, y, (i - 3) << upsample).w
|
||||||
* gauss_coefs[i];
|
* gauss_coefs[i];
|
||||||
dst[yi * (blockDim.x * gridDim.x) + xi] = den;
|
dst[yi * (blockDim.x * gridDim.x) + xi] = den;
|
||||||
}
|
}
|
||||||
@ -104,15 +105,13 @@ __global__ void den_blur_1c(float *dst, int pattern, int upsample) {
|
|||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < 7; i++)
|
for (int i = 0; i < 7; i++)
|
||||||
den += tex_shear(blur_src, pattern, x, y, (i - 3) << upsample)
|
den += tex_shear(chan1_src, pattern, x, y, (i - 3) << upsample)
|
||||||
* gauss_coefs[i];
|
* gauss_coefs[i];
|
||||||
dst[yi * (blockDim.x * gridDim.x) + xi] = den;
|
dst[yi * (blockDim.x * gridDim.x) + xi] = den;
|
||||||
}
|
}
|
||||||
''')
|
''')
|
||||||
|
|
||||||
bilaterallib = devlib(deps=[logscalelib, texshearlib, denblurlib], decls='''
|
bilaterallib = devlib(deps=[logscalelib, texshearlib, denblurlib], defs=r'''
|
||||||
texture<float4, cudaTextureType2D> bilateral_src;
|
|
||||||
''', defs=r'''
|
|
||||||
/* sstd: spatial standard deviation (Gaussian filter)
|
/* sstd: spatial standard deviation (Gaussian filter)
|
||||||
* cstd: color standard deviation (Gaussian on the range [0, 1], where 1
|
* cstd: color standard deviation (Gaussian on the range [0, 1], where 1
|
||||||
* represents an "opposite" color).
|
* represents an "opposite" color).
|
||||||
@ -146,7 +145,7 @@ bilateral(float4 *dst, int pattern, int radius,
|
|||||||
|
|
||||||
// Gather the center point, and pre-average the color values for faster
|
// Gather the center point, and pre-average the color values for faster
|
||||||
// comparison.
|
// comparison.
|
||||||
float4 cen = tex2D(bilateral_src, x, y);
|
float4 cen = tex2D(chan4_src, x, y);
|
||||||
float cdrcp = 1.0f / (cen.w + 1.0e-6f);
|
float cdrcp = 1.0f / (cen.w + 1.0e-6f);
|
||||||
cen.x *= cdrcp;
|
cen.x *= cdrcp;
|
||||||
cen.y *= cdrcp;
|
cen.y *= cdrcp;
|
||||||
@ -160,13 +159,13 @@ bilateral(float4 *dst, int pattern, int radius,
|
|||||||
// Be extra-sure spatial coeffecients have been written
|
// Be extra-sure spatial coeffecients have been written
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
float4 pix = tex_shear(bilateral_src, pattern, x, y, -radius - 1.0f);
|
float4 pix = tex_shear(chan4_src, pattern, x, y, -radius - 1.0f);
|
||||||
float4 next = tex_shear(bilateral_src, pattern, x, y, -radius);
|
float4 next = tex_shear(chan4_src, pattern, x, y, -radius);
|
||||||
|
|
||||||
for (float r = -radius; r <= radius; r++) {
|
for (float r = -radius; r <= radius; r++) {
|
||||||
float prev = pix.w;
|
float prev = pix.w;
|
||||||
pix = next;
|
pix = next;
|
||||||
next = tex_shear(bilateral_src, pattern, x, y, r + 1.0f);
|
next = tex_shear(chan4_src, pattern, x, y, r + 1.0f);
|
||||||
|
|
||||||
// This initial factor is arbitrary, but seems to do a decent job at
|
// This initial factor is arbitrary, but seems to do a decent job at
|
||||||
// preventing excessive bleed-out from points inside an empty region.
|
// preventing excessive bleed-out from points inside an empty region.
|
||||||
@ -201,7 +200,7 @@ bilateral(float4 *dst, int pattern, int radius,
|
|||||||
//
|
//
|
||||||
// Note that both the gradient and the blurred weight are calculated
|
// Note that both the gradient and the blurred weight are calculated
|
||||||
// in one dimension, along the current sampling vector.
|
// in one dimension, along the current sampling vector.
|
||||||
float avg = tex_shear(blur_src, pattern, x, y, r);
|
float avg = tex_shear(chan1_src, pattern, x, y, r);
|
||||||
float gradfact = (next.w - prev) / (avg + 1.0e-6f);
|
float gradfact = (next.w - prev) / (avg + 1.0e-6f);
|
||||||
if (r < 0) gradfact = -gradfact;
|
if (r < 0) gradfact = -gradfact;
|
||||||
gradfact = exp2f(-exp2f(gspeed * gradfact));
|
gradfact = exp2f(-exp2f(gspeed * gradfact));
|
||||||
|
@ -8,6 +8,18 @@ from pycuda.gpuarray import vec
|
|||||||
import code.filters
|
import code.filters
|
||||||
from code.util import ClsMod, argset, launch
|
from code.util import ClsMod, argset, launch
|
||||||
|
|
||||||
|
def mktref(mod, n):
|
||||||
|
tref = mod.get_texref(n)
|
||||||
|
tref.set_filter_mode(cuda.filter_mode.POINT)
|
||||||
|
tref.set_address_mode(0, cuda.address_mode.WRAP)
|
||||||
|
tref.set_address_mode(1, cuda.address_mode.WRAP)
|
||||||
|
return tref
|
||||||
|
|
||||||
|
def mkdsc(dim, ch):
|
||||||
|
return argset(cuda.ArrayDescriptor(), height=dim.ah,
|
||||||
|
width=dim.astride, num_channels=ch,
|
||||||
|
format=cuda.array_format.FLOAT)
|
||||||
|
|
||||||
class Filter(object):
|
class Filter(object):
|
||||||
def apply(self, fb, gnm, dim, tc, stream=None):
|
def apply(self, fb, gnm, dim, tc, stream=None):
|
||||||
"""
|
"""
|
||||||
@ -34,20 +46,10 @@ class Bilateral(Filter, ClsMod):
|
|||||||
bs = sb * dim.ah
|
bs = sb * dim.ah
|
||||||
bl, gr = (32, 8, 1), (dim.astride / 32, dim.ah / 8)
|
bl, gr = (32, 8, 1), (dim.astride / 32, dim.ah / 8)
|
||||||
|
|
||||||
mkdsc = lambda c: argset(cuda.ArrayDescriptor(), height=dim.ah,
|
dsc = mkdsc(dim, 4)
|
||||||
width=dim.astride, num_channels=c,
|
tref = mktref(self.mod, 'chan4_src')
|
||||||
format=cuda.array_format.FLOAT)
|
grad_dsc = mkdsc(dim, 1)
|
||||||
def mktref(n):
|
grad_tref = mktref(self.mod, 'chan1_src')
|
||||||
tref = self.mod.get_texref(n)
|
|
||||||
tref.set_filter_mode(cuda.filter_mode.POINT)
|
|
||||||
tref.set_address_mode(0, cuda.address_mode.WRAP)
|
|
||||||
tref.set_address_mode(1, cuda.address_mode.WRAP)
|
|
||||||
return tref
|
|
||||||
|
|
||||||
dsc = mkdsc(4)
|
|
||||||
tref = mktref('bilateral_src')
|
|
||||||
grad_dsc = mkdsc(1)
|
|
||||||
grad_tref = mktref('blur_src')
|
|
||||||
|
|
||||||
for pattern in range(self.directions):
|
for pattern in range(self.directions):
|
||||||
# Scale spatial parameter so that a "pixel" is equivalent to an
|
# Scale spatial parameter so that a "pixel" is equivalent to an
|
||||||
|
Loading…
Reference in New Issue
Block a user