mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Make DE use current center CP instead of anim-wide CP; start debugging color
This commit is contained in:
parent
7ff0b65d81
commit
6b09e162a3
@ -11,23 +11,29 @@ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
|
|||||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
float4 pix = pixbuf[i];
|
float4 pix = pixbuf[i];
|
||||||
|
|
||||||
if (pix.w <= 0) return;
|
if (pix.w <= 0) {
|
||||||
|
pixbuf[i] = make_float4(0, 0, 0, 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
float4 opix = pix;
|
float4 opix = pix;
|
||||||
|
|
||||||
float alpha = pow(pix.w, gamma);
|
float alpha = powf(pix.w, gamma);
|
||||||
if (pix.w < linrange) {
|
if (pix.w < linrange) {
|
||||||
float frac = pix.w / linrange;
|
float frac = pix.w / linrange;
|
||||||
alpha = (1.0f - frac) * pix.w * lingam / linrange + frac * alpha;
|
alpha = (1.0f - frac) * pix.w * lingam + frac * alpha;
|
||||||
|
pixbuf[i] = make_float4(0, 0, 0, 0);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
float ls = vibrancy * alpha / pix.w;
|
float ls = vibrancy * alpha / pix.w;
|
||||||
|
alpha = fminf(1.0f, fmaxf(0.0f, alpha));
|
||||||
|
|
||||||
float maxc = fmaxf(pix.x, fmaxf(pix.y, pix.z));
|
float maxc = fmaxf(pix.x, fmaxf(pix.y, pix.z));
|
||||||
float newls = 1 / maxc;
|
float maxa = maxc * ls;
|
||||||
|
float newls = 1.0f / maxc;
|
||||||
|
|
||||||
if (maxc * ls > 1 && highpow >= 0) {
|
if (maxa > 1.0f && highpow >= 0.0f) {
|
||||||
// TODO: does CUDA autopromote the int here to a float before GPU?
|
|
||||||
float lsratio = powf(newls / ls, highpow);
|
float lsratio = powf(newls / ls, highpow);
|
||||||
|
|
||||||
pix.x *= newls;
|
pix.x *= newls;
|
||||||
@ -41,19 +47,32 @@ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
|
|||||||
pix.x = maxc - (maxc - pix.x) * lsratio;
|
pix.x = maxc - (maxc - pix.x) * lsratio;
|
||||||
pix.y = maxc - (maxc - pix.y) * lsratio;
|
pix.y = maxc - (maxc - pix.y) * lsratio;
|
||||||
pix.z = maxc - (maxc - pix.z) * lsratio;
|
pix.z = maxc - (maxc - pix.z) * lsratio;
|
||||||
|
pix.x = 1.0f;
|
||||||
|
pixbuf[i] = pix;
|
||||||
|
return;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
highpow = -highpow;
|
float adjhlp = -highpow;
|
||||||
if (highpow > 1 || maxc * ls <= 1) highpow = 1;
|
if (adjhlp > 1.0f || maxa <= 1.0f) adjhlp = 1.0f;
|
||||||
float adj = ((1.0 - highpow) * newls + highpow * ls);
|
float adj = ((1.0f - adjhlp) * newls + adjhlp * ls);
|
||||||
pix.x *= adj;
|
pix.x *= adj;
|
||||||
pix.y *= adj;
|
pix.y *= adj;
|
||||||
pix.z *= adj;
|
pix.z *= adj;
|
||||||
}
|
}
|
||||||
|
|
||||||
pix.x = fminf(1.0, pix.x + (1.0 - vibrancy) * powf(opix.x, gamma));
|
pix.x += (1.0f - vibrancy) * powf(opix.x, gamma);
|
||||||
pix.y = fminf(1.0, pix.y + (1.0 - vibrancy) * powf(opix.y, gamma));
|
pix.y += (1.0f - vibrancy) * powf(opix.y, gamma);
|
||||||
pix.z = fminf(1.0, pix.z + (1.0 - vibrancy) * powf(opix.z, gamma));
|
pix.z += (1.0f - vibrancy) * powf(opix.z, gamma);
|
||||||
|
|
||||||
|
if (alpha > 0.0f) {
|
||||||
|
pix.x = fminf(1.0f, pix.x * alpha);
|
||||||
|
pix.y = fminf(1.0f, pix.y * alpha);
|
||||||
|
pix.z = fminf(1.0f, pix.z * alpha);
|
||||||
|
} else {
|
||||||
|
pix.x = pix.y = pix.z = 0;
|
||||||
|
// color pixel green; likewise, don't think this code can be reached
|
||||||
|
pix.y = 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
pixbuf[i] = pix;
|
pixbuf[i] = pix;
|
||||||
}
|
}
|
||||||
@ -62,7 +81,7 @@ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
|
|||||||
class DensityEst(HunkOCode):
|
class DensityEst(HunkOCode):
|
||||||
"""
|
"""
|
||||||
NOTE: for now, this *must* be invoked with a block size of (32,32,1), and
|
NOTE: for now, this *must* be invoked with a block size of (32,32,1), and
|
||||||
a grid size of (W/32,1). At least 7 pixel gutters are required, and the
|
a grid size of (W/32,1). At least 15 pixel gutters are required, and the
|
||||||
stride and height probably need to be multiples of 32.
|
stride and height probably need to be multiples of 32.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -98,7 +117,7 @@ void logscale(float4 *pixbuf, float4 *outbuf, float k1, float k2) {
|
|||||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
float4 pix = pixbuf[i];
|
float4 pix = pixbuf[i];
|
||||||
|
|
||||||
float ls = fmaxf(0, k1 * logf(1.0 + pix.w * k2) / pix.w);
|
float ls = fmaxf(0, k1 * logf(1.0f + pix.w * k2) / pix.w);
|
||||||
pix.x *= ls;
|
pix.x *= ls;
|
||||||
pix.y *= ls;
|
pix.y *= ls;
|
||||||
pix.z *= ls;
|
pix.z *= ls;
|
||||||
@ -109,6 +128,7 @@ void logscale(float4 *pixbuf, float4 *outbuf, float k1, float k2) {
|
|||||||
|
|
||||||
__global__
|
__global__
|
||||||
void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
||||||
|
float est_sd, float neg_est_curve, float est_min,
|
||||||
float k1, float k2) {
|
float k1, float k2) {
|
||||||
for (int i = threadIdx.x + 32*threadIdx.y; i < FW2; i += 32)
|
for (int i = threadIdx.x + 32*threadIdx.y; i < FW2; i += 32)
|
||||||
de_r[i] = de_g[i] = de_b[i] = de_a[i] = 0.0f;
|
de_r[i] = de_g[i] = de_b[i] = de_a[i] = 0.0f;
|
||||||
@ -129,22 +149,16 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
|||||||
in.z *= ls;
|
in.z *= ls;
|
||||||
in.w *= ls;
|
in.w *= ls;
|
||||||
|
|
||||||
// Calculate standard deviation of Gaussian kernel.
|
// Calculate standard deviation of Gaussian kernel. The base SD is
|
||||||
// flam3_gaussian_filter() uses an implicit standard deviation of 0.5,
|
// then scaled in inverse proportion to the density of the point
|
||||||
// but the DE filters scale filter distance by the default spatial
|
// being scaled.
|
||||||
// support factor of 1.5, so the effective base SD is (0.5/1.5)=1/3.
|
float sd = est_sd * powf(den+1.0f, neg_est_curve);
|
||||||
float sd = {{cp.estimator / 3.}};
|
|
||||||
|
|
||||||
// The base SD is then scaled in inverse proportion to the density of
|
|
||||||
// the point being scaled.
|
|
||||||
sd *= powf(den+1.0f, {{-cp.estimator_curve}});
|
|
||||||
|
|
||||||
// Clamp the final standard deviation. Things will go badly if the
|
// Clamp the final standard deviation. Things will go badly if the
|
||||||
// minimum is undershot.
|
// minimum is undershot.
|
||||||
sd = fmaxf(sd, {{max(cp.estimator_minimum / 3., 0.3)}} );
|
sd = fmaxf(sd, est_min);
|
||||||
|
|
||||||
// This five-term polynomial approximates the sum of the filters with
|
// This five-term polynomial approximates the sum of the filters
|
||||||
// the clamping logic used here. See helpers/filt_err.py.
|
// with the clamping logic used here. See helpers/filt_err.py.
|
||||||
float filtsum;
|
float filtsum;
|
||||||
filtsum = -0.20885075f * sd + 0.90557721f;
|
filtsum = -0.20885075f * sd + 0.90557721f;
|
||||||
filtsum = filtsum * sd + 5.28363054f;
|
filtsum = filtsum * sd + 5.28363054f;
|
||||||
@ -229,22 +243,30 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
|||||||
|
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def invoke(self, mod, abufd, obufd, dbufd, stream=None):
|
def invoke(self, mod, cp, abufd, obufd, dbufd, stream=None):
|
||||||
# TODO: add no-est version
|
# TODO: add no-est version
|
||||||
# TODO: come up with a general way to average these parameters
|
# TODO: come up with a general way to average these parameters
|
||||||
|
|
||||||
k1 = self.cp.brightness * 268 / 256
|
k1 = np.float32(cp.brightness * 268 / 256)
|
||||||
area = self.features.acc_width * self.features.acc_height / self.cp.ppu ** 2
|
area = self.features.width * self.features.height / cp.ppu ** 2
|
||||||
k2 = 1 / (area * self.cp.adj_density)
|
k2 = np.float32(1 / (area * cp.adj_density))
|
||||||
|
print k1, k2
|
||||||
|
|
||||||
if self.cp.estimator == 0:
|
if self.cp.estimator == 0:
|
||||||
|
nbins = self.features.acc_height * self.features.acc_stride
|
||||||
fun = mod.get_function("logscale")
|
fun = mod.get_function("logscale")
|
||||||
t = fun(abufd, obufd, np.float32(k1), np.float32(k2),
|
t = fun(abufd, obufd, k1, k2,
|
||||||
block=(self.features.acc_width, 1, 1),
|
block=(512, 1, 1), grid=(nbins/512, 1), stream=stream)
|
||||||
grid=(self.features.acc_height, 1), stream=stream)
|
|
||||||
else:
|
else:
|
||||||
|
# flam3_gaussian_filter() uses an implicit standard deviation of
|
||||||
|
# 0.5, but the DE filters scale filter distance by the default
|
||||||
|
# spatial support factor of 1.5, so the effective base SD is
|
||||||
|
# (0.5/1.5)=1/3.
|
||||||
|
est_sd = np.float32(cp.estimator / 3.)
|
||||||
|
neg_est_curve = np.float32(-cp.estimator_curve)
|
||||||
|
est_min = np.float32(cp.estimator_minimum)
|
||||||
fun = mod.get_function("density_est")
|
fun = mod.get_function("density_est")
|
||||||
fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2),
|
fun(abufd, obufd, dbufd, est_sd, neg_est_curve, est_min, k1, k2,
|
||||||
block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
|
block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
|
||||||
stream=stream)
|
stream=stream)
|
||||||
|
|
||||||
|
@ -53,7 +53,13 @@ class Genome(object):
|
|||||||
and height. Assumes that width and height are constant.
|
and height. Assumes that width and height are constant.
|
||||||
"""
|
"""
|
||||||
g = Features.gutter
|
g = Features.gutter
|
||||||
return ( affine.translate(0.5 * (cp.width + g), 0.5 * (cp.height + g))
|
if cp.estimator:
|
||||||
|
# The filter shifts by this amount as a side effect of it being
|
||||||
|
# written in a confusing and sloppy manner
|
||||||
|
# TODO: this will be weird in an animation where one endpoint has
|
||||||
|
# a radius of 0, and the other does not
|
||||||
|
g -= Features.gutter / 2 - 1
|
||||||
|
return ( affine.translate(0.5 * cp.width + g, 0.5 * cp.height + g)
|
||||||
* affine.scale(cp.ppu, cp.ppu)
|
* affine.scale(cp.ppu, cp.ppu)
|
||||||
* affine.translate(-cp._center[0], -cp._center[1])
|
* affine.translate(-cp._center[0], -cp._center[1])
|
||||||
* affine.rotate(cp.rotate * 2 * np.pi / 360,
|
* affine.rotate(cp.rotate * 2 * np.pi / 360,
|
||||||
@ -247,7 +253,7 @@ class _AnimRenderer(object):
|
|||||||
iter_fun.set_cache_config(cuda.func_cache.PREFER_L1)
|
iter_fun.set_cache_config(cuda.func_cache.PREFER_L1)
|
||||||
|
|
||||||
# Must be accumulated over all CPs
|
# Must be accumulated over all CPs
|
||||||
gam, vib, hipow = 0, 0, 0
|
gam, vib = 0, 0
|
||||||
|
|
||||||
# This is gross, but there are a lot of fiddly corner cases with any
|
# This is gross, but there are a lot of fiddly corner cases with any
|
||||||
# index-based iteration scheme.
|
# index-based iteration scheme.
|
||||||
@ -262,7 +268,6 @@ class _AnimRenderer(object):
|
|||||||
infos.append(info)
|
infos.append(info)
|
||||||
gam += cp.gamma
|
gam += cp.gamma
|
||||||
vib += cp.vibrancy
|
vib += cp.vibrancy
|
||||||
hipow += cp.highlight_power
|
|
||||||
else:
|
else:
|
||||||
# Can't interpolate normally; just pack copies
|
# Can't interpolate normally; just pack copies
|
||||||
# TODO: this still packs the genome 20 times or so instead of
|
# TODO: this still packs the genome 20 times or so instead of
|
||||||
@ -271,7 +276,6 @@ class _AnimRenderer(object):
|
|||||||
infos = [packed] * len(block_times)
|
infos = [packed] * len(block_times)
|
||||||
gam += a.genomes[0].gamma * len(block_times)
|
gam += a.genomes[0].gamma * len(block_times)
|
||||||
vib += a.genomes[0].vibrancy * len(block_times)
|
vib += a.genomes[0].vibrancy * len(block_times)
|
||||||
hipow += a.genomes[0].highlight_power * len(block_times)
|
|
||||||
|
|
||||||
infos = np.concatenate(infos)
|
infos = np.concatenate(infos)
|
||||||
offset = b * packer.align * self.cps_per_block
|
offset = b * packer.align * self.cps_per_block
|
||||||
@ -304,7 +308,8 @@ class _AnimRenderer(object):
|
|||||||
util.BaseCode.zero_dptr(a.mod, self.d_out, 4 * self.nbins,
|
util.BaseCode.zero_dptr(a.mod, self.d_out, 4 * self.nbins,
|
||||||
self.stream)
|
self.stream)
|
||||||
self.stream.synchronize()
|
self.stream.synchronize()
|
||||||
a._de.invoke(a.mod, self.d_accum, self.d_out, self.d_den,
|
a._de.invoke(a.mod, Genome(cen_cp),
|
||||||
|
self.d_accum, self.d_out, self.d_den,
|
||||||
self.stream)
|
self.stream)
|
||||||
self.stream.synchronize()
|
self.stream.synchronize()
|
||||||
|
|
||||||
@ -312,9 +317,10 @@ class _AnimRenderer(object):
|
|||||||
n = f(self.ncps)
|
n = f(self.ncps)
|
||||||
gam = f(n / gam)
|
gam = f(n / gam)
|
||||||
vib = f(vib / n)
|
vib = f(vib / n)
|
||||||
hipow = f(hipow / n)
|
hipow = f(cen_cp.highlight_power)
|
||||||
lin = f(cen_cp.gam_lin_thresh)
|
lin = f(cen_cp.gam_lin_thresh)
|
||||||
lingam = f(math.pow(cen_cp.gam_lin_thresh, gam))
|
lingam = f(math.pow(cen_cp.gam_lin_thresh, gam-1.0))
|
||||||
|
print gam, lin, lingam, cen_cp.gamma
|
||||||
|
|
||||||
# TODO: get block size from colorclip class? It actually does not
|
# TODO: get block size from colorclip class? It actually does not
|
||||||
# depend on that being the case
|
# depend on that being the case
|
||||||
|
Loading…
Reference in New Issue
Block a user