mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Okay, now I'm satisfied.
This commit is contained in:
parent
387dfd9f8c
commit
6fba14e2f7
@ -207,30 +207,19 @@ __global__ void den_blur_1c(float *dst, int pattern, int upsample) {
|
|||||||
/* sstd: spatial standard deviation (Gaussian filter)
|
/* sstd: spatial standard deviation (Gaussian filter)
|
||||||
* cstd: color standard deviation (Gaussian on the range [0, 1], where 1
|
* cstd: color standard deviation (Gaussian on the range [0, 1], where 1
|
||||||
* represents an "opposite" color).
|
* represents an "opposite" color).
|
||||||
* angstd: inverse standard deviation of negative of cosine of angle
|
* dstd: Standard deviation (exp2f) of density filter at density = 1.0.
|
||||||
* between current filter direction and density gradient direction
|
* dpow: Exponent applied to density values before taking difference.
|
||||||
* (yes, this is absurd; no, I'm not joking)
|
* At dpow=0.8, difference between 1000 and 1001 is about 0.2.
|
||||||
*
|
* Use bigger dstd and bigger dpow to blur low-density areas more
|
||||||
* Density is controlled by a power-of-two Gompertz distribution:
|
* without clobbering high-density areas.
|
||||||
* v = 1 - 2^(-sum^dpow * 2^((dhalfpt - x) * dspeed))
|
* gspeed: Speed of (exp2f) Gompertz distribution governing how much to
|
||||||
*
|
* tighten gradients. Zero and negative values OK.
|
||||||
* dhalfpt: The difference in density values between two points at which the
|
|
||||||
* filter admits 50% of the spatial and color kernels, when dpow
|
|
||||||
* is 0. `3` seems to be a good fit for most images at decent
|
|
||||||
* sampling levels.
|
|
||||||
* dspeed: The sharpness of the filter's cutoff around dhalfpt. At `1`, the
|
|
||||||
* filter admits 75% of a point that differs by one fewer than
|
|
||||||
* `dhalfpt` density steps from the current point (when dpow is 0);
|
|
||||||
* at `2`, it admits 93.75% of the same. `0.5` works pretty well.
|
|
||||||
* dpow: The change of filter intensity as density scales. This should be
|
|
||||||
* set automatically in response to changes in expected density per
|
|
||||||
* cell.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
__global__
|
__global__
|
||||||
void bilateral(float4 *dst, int pattern, int radius,
|
void bilateral(float4 *dst, int pattern, int radius,
|
||||||
float sstd, float cstd, float angscale,
|
float sstd, float cstd, float dstd, float dpow, float gspeed)
|
||||||
float dhalfpt, float dspeed, float dpow, float k2
|
{
|
||||||
) {
|
|
||||||
int xi = blockIdx.x * blockDim.x + threadIdx.x;
|
int xi = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int yi = blockIdx.y * blockDim.y + threadIdx.y;
|
int yi = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
float x = xi, y = yi;
|
float x = xi, y = yi;
|
||||||
@ -245,8 +234,9 @@ void bilateral(float4 *dst, int pattern, int radius,
|
|||||||
|
|
||||||
// 3.0f compensates for [0,3] range of `cdiff`
|
// 3.0f compensates for [0,3] range of `cdiff`
|
||||||
float cscale = 1.0f / (-M_SQRT2 * 3.0f * cstd);
|
float cscale = 1.0f / (-M_SQRT2 * 3.0f * cstd);
|
||||||
|
float dscale = -0.5f / dstd;
|
||||||
|
|
||||||
// Gather the center point, and pre-average the color values for easier
|
// Gather the center point, and pre-average the color values for faster
|
||||||
// comparison.
|
// comparison.
|
||||||
float4 cen = tex2D(bilateral_src, x, y);
|
float4 cen = tex2D(bilateral_src, x, y);
|
||||||
float cdrcp = 1.0f / (cen.w + 1.0e-6f);
|
float cdrcp = 1.0f / (cen.w + 1.0e-6f);
|
||||||
@ -254,19 +244,7 @@ void bilateral(float4 *dst, int pattern, int radius,
|
|||||||
cen.y *= cdrcp;
|
cen.y *= cdrcp;
|
||||||
cen.z *= cdrcp;
|
cen.z *= cdrcp;
|
||||||
|
|
||||||
float clogden = powf(cen.w, 0.8);
|
float cpowden = powf(cen.w, dpow);
|
||||||
//logf(1.0f + cen.w * k2);
|
|
||||||
|
|
||||||
/*
|
|
||||||
// Calculate the gradient from the pre-blurred density texture in the
|
|
||||||
// "forward" and "crosswise" directions (separated by 90 degrees)
|
|
||||||
float cgrad_f = tex_shear(blur_src, pattern, x, y, 1)
|
|
||||||
- tex_shear(blur_src, pattern, x, y, -1);
|
|
||||||
float cgrad_c = tex_shear(blur_src, pattern ^ 1, x, y, 1)
|
|
||||||
- tex_shear(blur_src, pattern ^ 1, x, y, -1);
|
|
||||||
float gradrcp = 1.0f / sqrtf(cgrad_f * cgrad_f + cgrad_c * cgrad_c + 1.0e-6f);
|
|
||||||
float gradfact = cgrad_f * gradrcp;
|
|
||||||
*/
|
|
||||||
|
|
||||||
float4 out = make_float4(0, 0, 0, 0);
|
float4 out = make_float4(0, 0, 0, 0);
|
||||||
float weightsum = 0.0f;
|
float weightsum = 0.0f;
|
||||||
@ -282,9 +260,17 @@ void bilateral(float4 *dst, int pattern, int radius,
|
|||||||
pix = next;
|
pix = next;
|
||||||
next = tex_shear(bilateral_src, pattern, x, y, r + 1.0f);
|
next = tex_shear(bilateral_src, pattern, x, y, r + 1.0f);
|
||||||
|
|
||||||
|
// This initial factor is arbitrary, but seems to do a decent job at
|
||||||
|
// preventing excessive bleed-out from points inside an empty region.
|
||||||
|
// (It's used when either the center or the current point has no
|
||||||
|
// sample energy at all.)
|
||||||
float cdiff = 0.5f;
|
float cdiff = 0.5f;
|
||||||
|
|
||||||
if (pix.w > 0.0f && cen.w > 0.0f) {
|
if (pix.w > 0.0f && cen.w > 0.0f) {
|
||||||
|
// Compute the color difference as the simple magnitude difference
|
||||||
|
// between the YUV colors at the sampling location, unweighted by
|
||||||
|
// density. Essentially, this just identifies regions whose average
|
||||||
|
// color coordinates are similar.
|
||||||
float pdrcp = 1.0f / pix.w;
|
float pdrcp = 1.0f / pix.w;
|
||||||
float yd = pix.x * pdrcp - cen.x;
|
float yd = pix.x * pdrcp - cen.x;
|
||||||
float ud = pix.y * pdrcp - cen.y;
|
float ud = pix.y * pdrcp - cen.y;
|
||||||
@ -292,18 +278,29 @@ void bilateral(float4 *dst, int pattern, int radius,
|
|||||||
cdiff = yd * yd + ud * ud + vd * vd;
|
cdiff = yd * yd + ud * ud + vd * vd;
|
||||||
}
|
}
|
||||||
|
|
||||||
//float logden = logf(1.0f + pix.w * k2);
|
// Density factor
|
||||||
float logden = powf(pix.w, 0.8);
|
float powden = powf(pix.w, dpow);
|
||||||
float dfact = exp2f(-0.5f * fabsf(clogden - logden) * dhalfpt);
|
float dfact = exp2f(dscale * fabsf(cpowden - powden));
|
||||||
|
|
||||||
|
// Gradient energy factor. This favors points whose local energy
|
||||||
|
// gradient points towards the current point - in essence, it draws
|
||||||
|
// sampling energy "uphill" into denser regions rather than allowing
|
||||||
|
// it to be smeared in all directions. The effect is modulated by the
|
||||||
|
// average energy in the region (as determined from a blurred copy of
|
||||||
|
// the density map); weak gradients in dense image regions aren't
|
||||||
|
// affected as strongly. This is all very experimental, with little
|
||||||
|
// theoretical justification, but it seems to work very well.
|
||||||
|
//
|
||||||
|
// Note that both the gradient and the blurred weight are calculated
|
||||||
|
// in one dimension, along the current sampling vector.
|
||||||
float avg = tex_shear(blur_src, pattern, x, y, r);
|
float avg = tex_shear(blur_src, pattern, x, y, r);
|
||||||
float yayfact = (prev - next.w) / (avg + 1.0e-6f);
|
float gradfact = (next.w - prev) / (avg + 1.0e-6f);
|
||||||
yayfact = expf(-expf(0.5f * yayfact));
|
if (r < 0) gradfact = -gradfact;
|
||||||
|
gradfact = exp2f(-exp2f(gspeed * gradfact));
|
||||||
|
|
||||||
|
float factor = spa_coefs[(int) fabsf(r)] * expf(cscale * cdiff) * dfact;
|
||||||
|
if (r != 0) factor *= gradfact;
|
||||||
|
|
||||||
// Oh, this is ridiculous.
|
|
||||||
float factor = spa_coefs[(int) fabsf(r)];
|
|
||||||
if (r != 0) factor *= expf(cscale * cdiff) * dfact * yayfact;
|
|
||||||
// * expf(-cdrcp * expf((gradfact - 1.0f) * r));
|
|
||||||
weightsum += factor;
|
weightsum += factor;
|
||||||
out.x += factor * pix.x;
|
out.x += factor * pix.x;
|
||||||
out.y += factor * pix.y;
|
out.y += factor * pix.y;
|
||||||
@ -317,11 +314,6 @@ void bilateral(float4 *dst, int pattern, int radius,
|
|||||||
out.z *= weightrcp;
|
out.z *= weightrcp;
|
||||||
out.w *= weightrcp;
|
out.w *= weightrcp;
|
||||||
|
|
||||||
//out.x = out.w = tex_shear(blur_src, pattern, x, y, 0);
|
|
||||||
//out.y = cgrad_f;
|
|
||||||
//out.z = cgrad_c;
|
|
||||||
//out.y = gradfact * out.w;
|
|
||||||
|
|
||||||
const int astride = blockDim.x * gridDim.x;
|
const int astride = blockDim.x * gridDim.x;
|
||||||
dst[yi * astride + xi] = out;
|
dst[yi * astride + xi] = out;
|
||||||
}
|
}
|
||||||
@ -344,12 +336,6 @@ class Filtering(HunkOCode):
|
|||||||
self.init_mod()
|
self.init_mod()
|
||||||
|
|
||||||
def de(self, ddst, dsrc, dscratch, gnm, dim, tc, nxf, stream=None):
|
def de(self, ddst, dsrc, dscratch, gnm, dim, tc, nxf, stream=None):
|
||||||
# Log-scale the accumulated buffer in `dsrc`.
|
|
||||||
k1 = f32(gnm.color.brightness(tc) * 268 / 256)
|
|
||||||
# Old definition of area is (w*h/(s*s)). Since new scale 'ns' is now
|
|
||||||
# s/w, new definition is (w*h/(s*s*w*w)) = (h/(s*s*w))
|
|
||||||
area = dim.h / (gnm.camera.scale(tc) ** 2 * dim.w)
|
|
||||||
k2 = f32(1.0 / (area * gnm.spp(tc)))
|
|
||||||
|
|
||||||
# Helper variables and functions to keep it clean
|
# Helper variables and functions to keep it clean
|
||||||
sb = 16 * dim.astride
|
sb = 16 * dim.astride
|
||||||
@ -382,27 +368,28 @@ class Filtering(HunkOCode):
|
|||||||
# a requirement for the filter itself to get decent results).
|
# a requirement for the filter itself to get decent results).
|
||||||
DIRECTIONS = 8
|
DIRECTIONS = 8
|
||||||
|
|
||||||
def do_bilateral(bsrc, bdst, pattern, r=15, sstd=3, cstd=0.1,
|
def do_bilateral(bsrc, bdst, pattern, r=15, sstd=6, cstd=0.05,
|
||||||
angscale=2.5, dhalfpt=1, dspeed=2000000, dpow=0.6):
|
dstd=1.5, dpow=0.8, gspeed=4.0):
|
||||||
# Scale spatial parameters so that a "pixel" is equivalent to an
|
# Scale spatial parameter so that a "pixel" is equivalent to an
|
||||||
# actual pixel at 1080p
|
# actual pixel at 1080p
|
||||||
sstd *= 1920. / dim.w
|
sstd *= 1920. / dim.w
|
||||||
|
|
||||||
tref.set_address_2d(bsrc, dsc, sb)
|
tref.set_address_2d(bsrc, dsc, sb)
|
||||||
|
|
||||||
|
# Blur density two octaves along sampling vector, ultimately
|
||||||
|
# storing in `dscratch`
|
||||||
launch(den_blur, np.intp(bdst), i32(pattern), i32(0),
|
launch(den_blur, np.intp(bdst), i32(pattern), i32(0),
|
||||||
texrefs=[tref])
|
texrefs=[tref])
|
||||||
grad_tref.set_address_2d(bdst, grad_dsc, sb / 4)
|
grad_tref.set_address_2d(bdst, grad_dsc, sb / 4)
|
||||||
launch(den_blur_1c, dscratch, i32(pattern), i32(1),
|
launch(den_blur_1c, dscratch, i32(pattern), i32(1),
|
||||||
texrefs=[grad_tref])
|
texrefs=[grad_tref])
|
||||||
grad_tref.set_address_2d(dscratch, grad_dsc, sb / 4)
|
grad_tref.set_address_2d(dscratch, grad_dsc, sb / 4)
|
||||||
|
|
||||||
launch(bilateral, np.intp(bdst), i32(pattern), i32(r),
|
launch(bilateral, np.intp(bdst), i32(pattern), i32(r),
|
||||||
f32(sstd), f32(cstd), f32(angscale),
|
f32(sstd), f32(cstd), f32(dstd), f32(dpow), f32(gspeed),
|
||||||
f32(dhalfpt), f32(dspeed), f32(dpow), k2,
|
|
||||||
texrefs=[tref, grad_tref])
|
texrefs=[tref, grad_tref])
|
||||||
|
|
||||||
def do_bilateral_range(bsrc, bdst, npats, *args, **kwargs):
|
def do_bilateral_range(bsrc, bdst, npats, *args, **kwargs):
|
||||||
|
|
||||||
|
|
||||||
for i in range(npats):
|
for i in range(npats):
|
||||||
do_bilateral(bsrc, bdst, i, *args, **kwargs)
|
do_bilateral(bsrc, bdst, i, *args, **kwargs)
|
||||||
bdst, bsrc = bsrc, bdst
|
bdst, bsrc = bsrc, bdst
|
||||||
@ -420,6 +407,13 @@ class Filtering(HunkOCode):
|
|||||||
do_bilateral_range(src, ddst, DIRECTIONS)
|
do_bilateral_range(src, ddst, DIRECTIONS)
|
||||||
launch(fma_buf, dsrc, np.intp(src), i32(dim.astride), f32(1))
|
launch(fma_buf, dsrc, np.intp(src), i32(dim.astride), f32(1))
|
||||||
|
|
||||||
|
# Log-scale the accumulated buffer in `dsrc`.
|
||||||
|
k1 = f32(gnm.color.brightness(tc) * 268 / 256)
|
||||||
|
# Old definition of area is (w*h/(s*s)). Since new scale 'ns' is now
|
||||||
|
# s/w, new definition is (w*h/(s*s*w*w)) = (h/(s*s*w))
|
||||||
|
area = dim.h / (gnm.camera.scale(tc) ** 2 * dim.w)
|
||||||
|
k2 = f32(1.0 / (area * gnm.spp(tc)))
|
||||||
|
|
||||||
nbins = dim.ah * dim.astride
|
nbins = dim.ah * dim.astride
|
||||||
logscale = self.mod.get_function("logscale")
|
logscale = self.mod.get_function("logscale")
|
||||||
t = logscale(ddst, dsrc, k1, k2,
|
t = logscale(ddst, dsrc, k1, k2,
|
||||||
|
@ -58,7 +58,7 @@ class Renderer(object):
|
|||||||
# which further xforms will wrap to the first when writing. Currently it
|
# which further xforms will wrap to the first when writing. Currently it
|
||||||
# is compiled in, so power-of-two and no runtime maximization. Current
|
# is compiled in, so power-of-two and no runtime maximization. Current
|
||||||
# value of 16 fits into a 1GB card at 1080p.
|
# value of 16 fits into a 1GB card at 1080p.
|
||||||
max_nxf = 16
|
max_nxf = 1
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
chaos_used = False
|
chaos_used = False
|
||||||
|
Loading…
Reference in New Issue
Block a user