mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-04-21 00:51:31 -04:00
More experiments
This commit is contained in:
parent
7c37b2b688
commit
387dfd9f8c
@ -8,6 +8,38 @@ from pycuda.gpuarray import vec
|
|||||||
from cuburn.code.util import *
|
from cuburn.code.util import *
|
||||||
|
|
||||||
_CODE = r'''
|
_CODE = r'''
|
||||||
|
// Filter directions specified in degrees, using image/texture addressing
|
||||||
|
// [(0,0) is upper left corner, 90 degrees is down].
|
||||||
|
|
||||||
|
__constant__ float2 addressing_patterns[16] = {
|
||||||
|
{ 1.0f, 0.0f}, { 0.0f, 1.0f}, // 0, 1: 0, 90
|
||||||
|
{ 1.0f, 1.0f}, {-1.0f, 1.0f}, // 2, 3: 45, 135
|
||||||
|
{ 1.0f, 0.5f}, {-0.5f, 1.0f}, // 4, 5: 22.5, 112.5
|
||||||
|
{ 1.0f, -0.5f}, { 0.5f, 1.0f}, // 6, 7: -22.5, 67.5
|
||||||
|
{ 1.0f, 0.666667f}, {-0.666667f, 1.0f}, // 8, 9: 30, 120
|
||||||
|
{ 1.0f, -0.666667f}, { 0.666667f, 1.0f}, // 10, 11: -30, 60
|
||||||
|
{ 1.0f, 0.333333f}, {-0.333333f, 1.0f}, // 12, 13: 15, 105
|
||||||
|
{ 1.0f, -0.333333f}, { 0.333333f, 1.0f}, // 14, 15: -15, 75
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mon dieu! A C++ feature?
|
||||||
|
template <typename T> __device__ T
|
||||||
|
tex_shear(texture<T, cudaTextureType2D> ref, int pattern,
|
||||||
|
float x, float y, float radius) {
|
||||||
|
float2 scale = addressing_patterns[pattern];
|
||||||
|
float i = scale.x * radius, j = scale.y * radius;
|
||||||
|
// Round i and j to the nearest integer, choosing the nearest even when
|
||||||
|
// equidistant. It's critical that this be done before adding 'x' and 'y',
|
||||||
|
// so that addressing patterns remain consistent across the grid.
|
||||||
|
asm("{\n\t"
|
||||||
|
"cvt.rni.ftz.f32.f32 %0, %0;\n\t"
|
||||||
|
"cvt.rni.ftz.f32.f32 %1, %1;\n\t"
|
||||||
|
"}\n" : "+f"(i), "+f"(j));
|
||||||
|
return tex2D(ref, x + i, y + j);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
__global__
|
__global__
|
||||||
void colorclip(float4 *pixbuf, float gamma, float vibrance, float highpow,
|
void colorclip(float4 *pixbuf, float gamma, float vibrance, float highpow,
|
||||||
float linrange, float lingam, float3 bkgd,
|
float linrange, float lingam, float3 bkgd,
|
||||||
@ -127,48 +159,49 @@ void fma_buf(float4 *dst, const float4 *src, int astride, float scale) {
|
|||||||
texture<float4, cudaTextureType2D> bilateral_src;
|
texture<float4, cudaTextureType2D> bilateral_src;
|
||||||
texture<float, cudaTextureType2D> blur_src;
|
texture<float, cudaTextureType2D> blur_src;
|
||||||
|
|
||||||
|
__global__ void logscale_den(float *dst, float k2) {
|
||||||
|
int xi = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int yi = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
float4 pix = tex2D(bilateral_src, xi, yi);
|
||||||
|
float out = logf(1.0f + pix.w * k2);
|
||||||
|
dst[yi * (blockDim.x * gridDim.x) + xi] = out;
|
||||||
|
}
|
||||||
|
|
||||||
|
__constant__ float gauss_coefs[9] = {
|
||||||
|
0.00443305f, 0.05400558f, 0.24203623f, 0.39905028f,
|
||||||
|
0.24203623f, 0.05400558f, 0.00443305f
|
||||||
|
};
|
||||||
|
|
||||||
// Apply a Gaussian-esque blur to the density channel of the texture in
|
// Apply a Gaussian-esque blur to the density channel of the texture in
|
||||||
// ``bilateral_src`` in the horizontal direction, and write it to ``dst``, a
|
// ``bilateral_src`` in the horizontal direction, and write it to ``dst``, a
|
||||||
// one-channel buffer
|
// one-channel buffer.
|
||||||
__global__ void blur_h(float *dst) {
|
__global__ void den_blur(float *dst, int pattern, int upsample) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int xi = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int yi = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
float x = xi, y = yi;
|
||||||
|
|
||||||
float den = 0.0f;
|
float den = 0.0f;
|
||||||
den += tex2D(bilateral_src, x - 2, y).w * 0.00135f;
|
|
||||||
den += tex2D(bilateral_src, x - 1, y).w * 0.1573f;
|
#pragma unroll
|
||||||
den += tex2D(bilateral_src, x, y).w * 0.6827f;
|
for (int i = 0; i < 9; i++)
|
||||||
den += tex2D(bilateral_src, x + 1, y).w * 0.1573f;
|
den += tex_shear(bilateral_src, pattern, x, y, (i - 4) << upsample).w
|
||||||
den += tex2D(bilateral_src, x + 2, y).w * 0.00135f;
|
* gauss_coefs[i];
|
||||||
dst[y * (blockDim.x * gridDim.x) + x] = den;
|
dst[yi * (blockDim.x * gridDim.x) + xi] = den;
|
||||||
}
|
}
|
||||||
|
|
||||||
// As above, but with a one-channel texture as source
|
// As above, but with the one-channel texture as source
|
||||||
__global__ void blur_h_1cp(float *dst) {
|
__global__ void den_blur_1c(float *dst, int pattern, int upsample) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int xi = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int yi = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
float x = xi, y = yi;
|
||||||
|
|
||||||
float den = 0.0f;
|
float den = 0.0f;
|
||||||
den += tex2D(blur_src, x - 2, y) * 0.00135f;
|
|
||||||
den += tex2D(blur_src, x - 1, y) * 0.1573f;
|
|
||||||
den += tex2D(blur_src, x, y) * 0.6827f;
|
|
||||||
den += tex2D(blur_src, x + 1, y) * 0.1573f;
|
|
||||||
den += tex2D(blur_src, x + 2, y) * 0.00135f;
|
|
||||||
dst[y * (blockDim.x * gridDim.x) + x] = den;
|
|
||||||
}
|
|
||||||
|
|
||||||
// As above, but in the vertical direction
|
#pragma unroll
|
||||||
__global__ void blur_v(float *dst) {
|
for (int i = 0; i < 9; i++)
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
den += tex_shear(blur_src, pattern, x, y, (i - 4) << upsample)
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
* gauss_coefs[i];
|
||||||
|
dst[yi * (blockDim.x * gridDim.x) + xi] = den;
|
||||||
float den = 0.0f;
|
|
||||||
den += tex2D(blur_src, x, y - 2) * 0.00135f;
|
|
||||||
den += tex2D(blur_src, x, y - 1) * 0.1573f;
|
|
||||||
den += tex2D(blur_src, x, y ) * 0.6827f;
|
|
||||||
den += tex2D(blur_src, x, y + 1) * 0.1573f;
|
|
||||||
den += tex2D(blur_src, x, y + 2) * 0.00135f;
|
|
||||||
dst[y * (blockDim.x * gridDim.x) + x] = den;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* sstd: spatial standard deviation (Gaussian filter)
|
/* sstd: spatial standard deviation (Gaussian filter)
|
||||||
@ -194,13 +227,17 @@ __global__ void blur_v(float *dst) {
|
|||||||
* cell.
|
* cell.
|
||||||
*/
|
*/
|
||||||
__global__
|
__global__
|
||||||
void bilateral(float4 *dst, int r, float sstd, float cstd, float angscale,
|
void bilateral(float4 *dst, int pattern, int radius,
|
||||||
float dhalfpt, float dspeed, float dpow) {
|
float sstd, float cstd, float angscale,
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
float dhalfpt, float dspeed, float dpow, float k2
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
) {
|
||||||
|
int xi = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int yi = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
float x = xi, y = yi;
|
||||||
|
|
||||||
// Precalculate the spatial coeffecients.
|
// Precalculate the spatial coeffecients.
|
||||||
__shared__ float spa_coefs[32];
|
__shared__ float spa_coefs[32];
|
||||||
|
|
||||||
if (threadIdx.y == 0) {
|
if (threadIdx.y == 0) {
|
||||||
float df = threadIdx.x;
|
float df = threadIdx.x;
|
||||||
spa_coefs[threadIdx.x] = expf(df * df / (-M_SQRT2 * sstd));
|
spa_coefs[threadIdx.x] = expf(df * df / (-M_SQRT2 * sstd));
|
||||||
@ -212,27 +249,24 @@ void bilateral(float4 *dst, int r, float sstd, float cstd, float angscale,
|
|||||||
// Gather the center point, and pre-average the color values for easier
|
// Gather the center point, and pre-average the color values for easier
|
||||||
// comparison.
|
// comparison.
|
||||||
float4 cen = tex2D(bilateral_src, x, y);
|
float4 cen = tex2D(bilateral_src, x, y);
|
||||||
if (cen.w > 0.0f) {
|
float cdrcp = 1.0f / (cen.w + 1.0e-6f);
|
||||||
float cdrcp = 1.0f / cen.w;
|
cen.x *= cdrcp;
|
||||||
cen.x *= cdrcp;
|
cen.y *= cdrcp;
|
||||||
cen.y *= cdrcp;
|
cen.z *= cdrcp;
|
||||||
cen.z *= cdrcp;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute the Sobel directional derivative of a pre-blurred version of
|
float clogden = powf(cen.w, 0.8);
|
||||||
// the density channel at the center point.
|
//logf(1.0f + cen.w * k2);
|
||||||
float nw = tex2D(blur_src, x - 1, y - 1);
|
|
||||||
float ne = tex2D(blur_src, x + 1, y - 1);
|
|
||||||
float sw = tex2D(blur_src, x - 1, y + 1);
|
|
||||||
float se = tex2D(blur_src, x + 1, y + 1);
|
|
||||||
float h = ne + se + 2 * tex2D(blur_src, x + 1, y)
|
|
||||||
-(nw + sw + 2 * tex2D(blur_src, x - 1, y));
|
|
||||||
float v = se + sw + 2 * tex2D(blur_src, x, y + 1)
|
|
||||||
-(ne + nw + 2 * tex2D(blur_src, x, y - 1));
|
|
||||||
|
|
||||||
// TODO: figure out how to work `mag` in to scaling the degree to which
|
/*
|
||||||
// the directionality filter clamps
|
// Calculate the gradient from the pre-blurred density texture in the
|
||||||
float mag = sqrtf(h * h + v * v);
|
// "forward" and "crosswise" directions (separated by 90 degrees)
|
||||||
|
float cgrad_f = tex_shear(blur_src, pattern, x, y, 1)
|
||||||
|
- tex_shear(blur_src, pattern, x, y, -1);
|
||||||
|
float cgrad_c = tex_shear(blur_src, pattern ^ 1, x, y, 1)
|
||||||
|
- tex_shear(blur_src, pattern ^ 1, x, y, -1);
|
||||||
|
float gradrcp = 1.0f / sqrtf(cgrad_f * cgrad_f + cgrad_c * cgrad_c + 1.0e-6f);
|
||||||
|
float gradfact = cgrad_f * gradrcp;
|
||||||
|
*/
|
||||||
|
|
||||||
float4 out = make_float4(0, 0, 0, 0);
|
float4 out = make_float4(0, 0, 0, 0);
|
||||||
float weightsum = 0.0f;
|
float weightsum = 0.0f;
|
||||||
@ -240,33 +274,41 @@ void bilateral(float4 *dst, int r, float sstd, float cstd, float angscale,
|
|||||||
// Be extra-sure spatial coeffecients have been written
|
// Be extra-sure spatial coeffecients have been written
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
for (int i = -r; i <= r; i++) {
|
float4 pix = tex_shear(bilateral_src, pattern, x, y, -radius - 1.0f);
|
||||||
for (int j = -r; j <= r; j++) {
|
float4 next = tex_shear(bilateral_src, pattern, x, y, -radius);
|
||||||
float4 pix = tex2D(bilateral_src, x + i, y + j);
|
|
||||||
float cdiff = 0.5f;
|
|
||||||
|
|
||||||
if (pix.w > 0.0f && cen.w > 0.0f) {
|
for (float r = -radius; r <= radius; r++) {
|
||||||
float pdrcp = 1.0f / pix.w;
|
float prev = pix.w;
|
||||||
float yd = pix.x * pdrcp - cen.x;
|
pix = next;
|
||||||
float ud = pix.y * pdrcp - cen.y;
|
next = tex_shear(bilateral_src, pattern, x, y, r + 1.0f);
|
||||||
float vd = pix.z * pdrcp - cen.z;
|
|
||||||
cdiff = yd * yd + ud * ud + vd * vd;
|
|
||||||
}
|
|
||||||
float ddiff = dspeed * (dhalfpt - fabsf(pix.w - cen.w) - 1.0f);
|
|
||||||
float dsum = -powf(0.5f * (pix.w + cen.w + 1.0f), dpow);
|
|
||||||
float dfact = 1.0f - exp2f(dsum * exp2f(ddiff));
|
|
||||||
float angfact = (h * i + v * j) / (sqrtf(i*i + j*j) * mag + 1.0e-10f);
|
|
||||||
|
|
||||||
// Oh, this is ridiculous. But it works!
|
float cdiff = 0.5f;
|
||||||
float factor = spa_coefs[abs(i)] * spa_coefs[abs(j)]
|
|
||||||
* expf(cscale * cdiff) * dfact
|
if (pix.w > 0.0f && cen.w > 0.0f) {
|
||||||
* exp2f(angscale * (-angfact - 1.0f));
|
float pdrcp = 1.0f / pix.w;
|
||||||
weightsum += factor;
|
float yd = pix.x * pdrcp - cen.x;
|
||||||
out.x += factor * pix.x;
|
float ud = pix.y * pdrcp - cen.y;
|
||||||
out.y += factor * pix.y;
|
float vd = pix.z * pdrcp - cen.z;
|
||||||
out.z += factor * pix.z;
|
cdiff = yd * yd + ud * ud + vd * vd;
|
||||||
out.w += factor * pix.w;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//float logden = logf(1.0f + pix.w * k2);
|
||||||
|
float logden = powf(pix.w, 0.8);
|
||||||
|
float dfact = exp2f(-0.5f * fabsf(clogden - logden) * dhalfpt);
|
||||||
|
|
||||||
|
float avg = tex_shear(blur_src, pattern, x, y, r);
|
||||||
|
float yayfact = (prev - next.w) / (avg + 1.0e-6f);
|
||||||
|
yayfact = expf(-expf(0.5f * yayfact));
|
||||||
|
|
||||||
|
// Oh, this is ridiculous.
|
||||||
|
float factor = spa_coefs[(int) fabsf(r)];
|
||||||
|
if (r != 0) factor *= expf(cscale * cdiff) * dfact * yayfact;
|
||||||
|
// * expf(-cdrcp * expf((gradfact - 1.0f) * r));
|
||||||
|
weightsum += factor;
|
||||||
|
out.x += factor * pix.x;
|
||||||
|
out.y += factor * pix.y;
|
||||||
|
out.z += factor * pix.z;
|
||||||
|
out.w += factor * pix.w;
|
||||||
}
|
}
|
||||||
|
|
||||||
float weightrcp = 1.0f / (weightsum + 1e-10f);
|
float weightrcp = 1.0f / (weightsum + 1e-10f);
|
||||||
@ -275,17 +317,16 @@ void bilateral(float4 *dst, int r, float sstd, float cstd, float angscale,
|
|||||||
out.z *= weightrcp;
|
out.z *= weightrcp;
|
||||||
out.w *= weightrcp;
|
out.w *= weightrcp;
|
||||||
|
|
||||||
// Uncomment to write directional gradient using YUV colors
|
//out.x = out.w = tex_shear(blur_src, pattern, x, y, 0);
|
||||||
/*
|
//out.y = cgrad_f;
|
||||||
out.x = mag;
|
//out.z = cgrad_c;
|
||||||
out.y = h;
|
//out.y = gradfact * out.w;
|
||||||
out.z = v;
|
|
||||||
out.w = mag;
|
|
||||||
*/
|
|
||||||
|
|
||||||
const int astride = blockDim.x * gridDim.x;
|
const int astride = blockDim.x * gridDim.x;
|
||||||
dst[y * astride + x] = out;
|
dst[yi * astride + xi] = out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // end extern "C"
|
||||||
'''
|
'''
|
||||||
|
|
||||||
class Filtering(HunkOCode):
|
class Filtering(HunkOCode):
|
||||||
@ -296,12 +337,20 @@ class Filtering(HunkOCode):
|
|||||||
def init_mod(cls):
|
def init_mod(cls):
|
||||||
if cls.mod is None:
|
if cls.mod is None:
|
||||||
cls.mod = pycuda.compiler.SourceModule(assemble_code(BaseCode, cls),
|
cls.mod = pycuda.compiler.SourceModule(assemble_code(BaseCode, cls),
|
||||||
options=['-use_fast_math', '-maxrregcount', '32'])
|
options=['-use_fast_math', '-maxrregcount', '32'],
|
||||||
|
no_extern_c=True)
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.init_mod()
|
self.init_mod()
|
||||||
|
|
||||||
def de(self, ddst, dsrc, dscratch, gnm, dim, tc, nxf, stream=None):
|
def de(self, ddst, dsrc, dscratch, gnm, dim, tc, nxf, stream=None):
|
||||||
|
# Log-scale the accumulated buffer in `dsrc`.
|
||||||
|
k1 = f32(gnm.color.brightness(tc) * 268 / 256)
|
||||||
|
# Old definition of area is (w*h/(s*s)). Since new scale 'ns' is now
|
||||||
|
# s/w, new definition is (w*h/(s*s*w*w)) = (h/(s*s*w))
|
||||||
|
area = dim.h / (gnm.camera.scale(tc) ** 2 * dim.w)
|
||||||
|
k2 = f32(1.0 / (area * gnm.spp(tc)))
|
||||||
|
|
||||||
# Helper variables and functions to keep it clean
|
# Helper variables and functions to keep it clean
|
||||||
sb = 16 * dim.astride
|
sb = 16 * dim.astride
|
||||||
bs = sb * dim.ah
|
bs = sb * dim.ah
|
||||||
@ -324,47 +373,53 @@ class Filtering(HunkOCode):
|
|||||||
grad_dsc = mkdsc(1)
|
grad_dsc = mkdsc(1)
|
||||||
grad_tref = mktref('blur_src')
|
grad_tref = mktref('blur_src')
|
||||||
|
|
||||||
bilateral, blur_h, blur_h_1cp, blur_v, fma_buf = map(
|
bilateral, logscale_den, den_blur, den_blur_1c, fma_buf = map(
|
||||||
self.mod.get_function,
|
self.mod.get_function,
|
||||||
'bilateral blur_h blur_h_1cp blur_v fma_buf'.split())
|
'bilateral logscale_den den_blur den_blur_1c fma_buf'.split())
|
||||||
ROUNDS = 1 # TODO: user customizable?
|
|
||||||
|
# Number of different shear patterns to use when filtering. Must be
|
||||||
|
# even, since we depend on buffer bouncing (but I don't think that it's
|
||||||
|
# a requirement for the filter itself to get decent results).
|
||||||
|
DIRECTIONS = 8
|
||||||
|
|
||||||
|
def do_bilateral(bsrc, bdst, pattern, r=15, sstd=3, cstd=0.1,
|
||||||
|
angscale=2.5, dhalfpt=1, dspeed=2000000, dpow=0.6):
|
||||||
|
# Scale spatial parameters so that a "pixel" is equivalent to an
|
||||||
|
# actual pixel at 1080p
|
||||||
|
sstd *= 1920. / dim.w
|
||||||
|
|
||||||
def do_bilateral(bsrc, bdst):
|
|
||||||
tref.set_address_2d(bsrc, dsc, sb)
|
tref.set_address_2d(bsrc, dsc, sb)
|
||||||
launch(blur_h, np.intp(bdst), texrefs=[tref])
|
launch(den_blur, np.intp(bdst), i32(pattern), i32(0),
|
||||||
|
texrefs=[tref])
|
||||||
grad_tref.set_address_2d(bdst, grad_dsc, sb / 4)
|
grad_tref.set_address_2d(bdst, grad_dsc, sb / 4)
|
||||||
launch(blur_v, dscratch, texrefs=[grad_tref])
|
launch(den_blur_1c, dscratch, i32(pattern), i32(1),
|
||||||
|
texrefs=[grad_tref])
|
||||||
grad_tref.set_address_2d(dscratch, grad_dsc, sb / 4)
|
grad_tref.set_address_2d(dscratch, grad_dsc, sb / 4)
|
||||||
launch(blur_h_1cp, np.intp(bdst), texrefs=[tref])
|
launch(bilateral, np.intp(bdst), i32(pattern), i32(r),
|
||||||
grad_tref.set_address_2d(bdst, grad_dsc, sb / 4)
|
f32(sstd), f32(cstd), f32(angscale),
|
||||||
launch(blur_v, dscratch, texrefs=[grad_tref])
|
f32(dhalfpt), f32(dspeed), f32(dpow), k2,
|
||||||
grad_tref.set_address_2d(dscratch, grad_dsc, sb / 4)
|
texrefs=[tref, grad_tref])
|
||||||
launch(bilateral, np.intp(bdst), f32(4), f32(0.1),
|
|
||||||
f32(5), f32(0.4), f32(0.6), texrefs=[tref, grad_tref])
|
def do_bilateral_range(bsrc, bdst, npats, *args, **kwargs):
|
||||||
return bdst, bsrc
|
|
||||||
|
|
||||||
|
for i in range(npats):
|
||||||
|
do_bilateral(bsrc, bdst, i, *args, **kwargs)
|
||||||
|
bdst, bsrc = bsrc, bdst
|
||||||
|
if npats % 2:
|
||||||
|
cuda.memcpy_dtod_async(bdst, bsrc, bs, stream)
|
||||||
|
|
||||||
# Filter the first xform, using `ddst` as an intermediate buffer.
|
# Filter the first xform, using `ddst` as an intermediate buffer.
|
||||||
# End result is the filtered copy in `dsrc`.
|
# End result is the filtered copy in `dsrc`.
|
||||||
a, b = dsrc, ddst
|
do_bilateral_range(dsrc, ddst, DIRECTIONS)
|
||||||
for i in range(ROUNDS):
|
|
||||||
a, b = do_bilateral(a, b)
|
|
||||||
if ROUNDS % 2:
|
|
||||||
cuda.memcpy_dtod_async(b, a, bs, stream)
|
|
||||||
|
|
||||||
# Filter the remaining xforms, using `ddst` as an intermediate
|
# Filter the remaining xforms, using `ddst` as an intermediate
|
||||||
# buffer, then add the result to `dsrc` (at the zero'th xform).
|
# buffer, then add the result to `dsrc` (at the zero'th xform).
|
||||||
for x in range(1, nxf):
|
for x in range(1, nxf):
|
||||||
a, b = int(dsrc) + x * bs, ddst
|
src = int(dsrc) + x * bs
|
||||||
for i in range(ROUNDS):
|
do_bilateral_range(src, ddst, DIRECTIONS)
|
||||||
a, b = do_bilateral(a, b)
|
launch(fma_buf, dsrc, np.intp(src), i32(dim.astride), f32(1))
|
||||||
launch(fma_buf, dsrc, np.intp(a), i32(dim.astride), f32(1))
|
|
||||||
|
|
||||||
# Log-scale the accumulated buffer in `dsrc`.
|
|
||||||
k1 = f32(gnm.color.brightness(tc) * 268 / 256)
|
|
||||||
# Old definition of area is (w*h/(s*s)). Since new scale 'ns' is now
|
|
||||||
# s/w, new definition is (w*h/(s*s*w*w)) = (h/(s*s*w))
|
|
||||||
area = dim.h / (gnm.camera.scale(tc) ** 2 * dim.w)
|
|
||||||
k2 = f32(1.0 / (area * gnm.spp(tc)))
|
|
||||||
nbins = dim.ah * dim.astride
|
nbins = dim.ah * dim.astride
|
||||||
logscale = self.mod.get_function("logscale")
|
logscale = self.mod.get_function("logscale")
|
||||||
t = logscale(ddst, dsrc, k1, k2,
|
t = logscale(ddst, dsrc, k1, k2,
|
||||||
|
Loading…
Reference in New Issue
Block a user