mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Expand max filter radius to 21 pixels
This commit is contained in:
parent
f3a79b200c
commit
6b2cb024ac
@ -87,13 +87,10 @@ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
|
|||||||
class DensityEst(HunkOCode):
|
class DensityEst(HunkOCode):
|
||||||
"""
|
"""
|
||||||
NOTE: for now, this *must* be invoked with a block size of (32,32,1), and
|
NOTE: for now, this *must* be invoked with a block size of (32,32,1), and
|
||||||
a grid size of (W/32,1). At least 15 pixel gutters are required, and the
|
a grid size of (W/32,1). At least 21 pixel gutters are required, and the
|
||||||
stride and height probably need to be multiples of 32.
|
stride and height probably need to be multiples of 32.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Note, changing this does not yet have any effect, it's just informational
|
|
||||||
MAX_WIDTH=15
|
|
||||||
|
|
||||||
def __init__(self, info):
|
def __init__(self, info):
|
||||||
self.info = info
|
self.info = info
|
||||||
|
|
||||||
@ -103,9 +100,9 @@ class DensityEst(HunkOCode):
|
|||||||
return self.defs_tmpl.substitute(info=self.info)
|
return self.defs_tmpl.substitute(info=self.info)
|
||||||
|
|
||||||
defs_tmpl = Template('''
|
defs_tmpl = Template('''
|
||||||
#define W 15 // Filter width (regardless of standard deviation chosen)
|
#define W 21 // Filter width (regardless of standard deviation chosen)
|
||||||
#define W2 7 // Half of filter width, rounded down
|
#define W2 10 // Half of filter width, rounded down
|
||||||
#define FW 46 // Width of local result storage (NW+W2+W2)
|
#define FW 52 // Width of local result storage (NW+W2+W2)
|
||||||
#define FW2 (FW*FW)
|
#define FW2 (FW*FW)
|
||||||
|
|
||||||
__shared__ float de_r[FW2], de_g[FW2], de_b[FW2], de_a[FW2];
|
__shared__ float de_r[FW2], de_g[FW2], de_b[FW2], de_a[FW2];
|
||||||
@ -137,7 +134,7 @@ void logscale(float4 *pixbuf, float4 *outbuf, float k1, float k2) {
|
|||||||
|
|
||||||
// See helpers/filt_err.py for source of these values.
|
// See helpers/filt_err.py for source of these values.
|
||||||
#define MIN_SD 0.23299530f
|
#define MIN_SD 0.23299530f
|
||||||
#define MAX_SD 2.5f
|
#define MAX_SD 4.33333333f
|
||||||
|
|
||||||
__global__
|
__global__
|
||||||
void density_est(float4 *pixbuf, float4 *outbuf,
|
void density_est(float4 *pixbuf, float4 *outbuf,
|
||||||
@ -199,15 +196,15 @@ void density_est(float4 *pixbuf, float4 *outbuf,
|
|||||||
filtsum = filtsum * sd + 9.04126644f;
|
filtsum = filtsum * sd + 9.04126644f;
|
||||||
filtsum = filtsum * sd + 0.10304667f;
|
filtsum = filtsum * sd + 0.10304667f;
|
||||||
} else {
|
} else {
|
||||||
filtsum = -0.00403376f;
|
filtsum = 0.01162011f;
|
||||||
filtsum = filtsum * sd + 0.06608720f;
|
filtsum = filtsum * sd + -0.21552004f;
|
||||||
filtsum = filtsum * sd + -0.38924992f;
|
filtsum = filtsum * sd + 1.66545594f;
|
||||||
filtsum = filtsum * sd + 0.84797901f;
|
filtsum = filtsum * sd + -7.00809765f;
|
||||||
filtsum = filtsum * sd + 0.34173131f;
|
filtsum = filtsum * sd + 17.55487633f;
|
||||||
filtsum = filtsum * sd + -4.67077589f;
|
filtsum = filtsum * sd + -26.80626106f;
|
||||||
filtsum = filtsum * sd + 14.34595776f;
|
filtsum = filtsum * sd + 30.61903954f;
|
||||||
filtsum = filtsum * sd + -5.80082798f;
|
filtsum = filtsum * sd + -12.00870514f;
|
||||||
filtsum = filtsum * sd + 1.54098487f;
|
filtsum = filtsum * sd + 2.46708894f;
|
||||||
}
|
}
|
||||||
float filtscale = 1.0f / filtsum;
|
float filtscale = 1.0f / filtsum;
|
||||||
|
|
||||||
@ -225,6 +222,7 @@ void density_est(float4 *pixbuf, float4 *outbuf,
|
|||||||
float coeff = exp2f((jj2f + iif * iif) * rsd)
|
float coeff = exp2f((jj2f + iif * iif) * rsd)
|
||||||
* filtscale;
|
* filtscale;
|
||||||
if (coeff < 0.0001f) break;
|
if (coeff < 0.0001f) break;
|
||||||
|
iif += 1;
|
||||||
|
|
||||||
float4 scaled;
|
float4 scaled;
|
||||||
scaled.x = in.x * coeff;
|
scaled.x = in.x * coeff;
|
||||||
@ -247,7 +245,6 @@ void density_est(float4 *pixbuf, float4 *outbuf,
|
|||||||
de_add(si, -jj, -ii, scaled);
|
de_add(si, -jj, -ii, scaled);
|
||||||
de_add(si, jj, -ii, scaled);
|
de_add(si, jj, -ii, scaled);
|
||||||
|
|
||||||
iif += 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -96,7 +96,7 @@ class RenderInfo(object):
|
|||||||
# Maximum width of DE and other spatial filters, and thus in turn the
|
# Maximum width of DE and other spatial filters, and thus in turn the
|
||||||
# amount of padding applied. Note that, for now, this must not be changed!
|
# amount of padding applied. Note that, for now, this must not be changed!
|
||||||
# The filtering code makes deep assumptions about this value.
|
# The filtering code makes deep assumptions about this value.
|
||||||
gutter = 16
|
gutter = 22
|
||||||
|
|
||||||
# TODO: for now, we always throw away the alpha channel before writing.
|
# TODO: for now, we always throw away the alpha channel before writing.
|
||||||
# All code is in place to not do this, we just need to find a way to expose
|
# All code is in place to not do this, we just need to find a way to expose
|
||||||
@ -106,7 +106,6 @@ class RenderInfo(object):
|
|||||||
|
|
||||||
# TODO: fix these
|
# TODO: fix these
|
||||||
chaos_used = False
|
chaos_used = False
|
||||||
std_xforms = [0, 1, 2]
|
|
||||||
final_xform_index = 3
|
final_xform_index = 3
|
||||||
pal_has_alpha = False
|
pal_has_alpha = False
|
||||||
density = 2000
|
density = 2000
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# The maximum number of coeffecients that will ever be retained on the device
|
# The maximum number of coeffecients that will ever be retained on the device
|
||||||
FWIDTH = 15
|
FWIDTH = 21
|
||||||
|
|
||||||
# The number of points on either side of the center in one dimension
|
# The number of points on either side of the center in one dimension
|
||||||
F2 = int(FWIDTH/2)
|
F2 = int(FWIDTH/2)
|
||||||
|
Loading…
Reference in New Issue
Block a user