Filter adjustments (density prefilter, gutter)

This commit is contained in:
Steven Robertson 2011-06-11 15:58:15 -04:00
parent 1deb3105a0
commit 94c453d153
4 changed files with 31 additions and 26 deletions

View File

@ -117,7 +117,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
float den = denbuf[idx];
if (in.w > 0 && den > 0) {
float ls = k1 * 12 * logf(1.0 + in.w * k2) / in.w;
float ls = k1 * logf(1.0f + in.w * k2) / in.w;
in.x *= ls;
in.y *= ls;
in.z *= ls;
@ -200,19 +200,6 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
atomicAdd(out+3, de_a[si]);
}
if (threadIdx.y == 5000) {
for (int i = threadIdx.x; i < FW; i += 32) {
idx = {{features.acc_stride}} * (imrow + 32)
+ blockIdx.x * 32 + i + W2;
int si = 32 * FW + i;
float *out = reinterpret_cast<float*>(&outbuf[idx]);
atomicAdd(out, 0.2 + de_r[si]);
atomicAdd(out+1, de_g[si]);
atomicAdd(out+2, de_b[si]);
atomicAdd(out+3, de_a[si]);
}
}
__syncthreads();
// TODO: shift instead of copying
int tid = threadIdx.y * 32 + threadIdx.x;
@ -239,9 +226,11 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
def invoke(self, mod, abufd, obufd, dbufd):
# TODO: add no-est version
# TODO: come up with a general way to average these parameters
k1 = self.cp.brightness * 268 / 256
area = self.features.width * self.features.height / self.cp.ppu ** 2
area = self.features.acc_width * self.features.acc_height / self.cp.ppu ** 2
k2 = 1 / (area * self.cp.adj_density)
print k1, k2, area
if self.cp.estimator == 0:
fun = mod.get_function("logscale")
@ -251,7 +240,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
else:
fun = mod.get_function("density_est")
t = fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2),
block=(32, 32, 1), grid=(self.features.acc_stride/32 - 1, 1),
block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
time_kernel=True)
print "Density estimation: %g" % t

View File

@ -8,6 +8,7 @@ import pycuda.driver as cuda
from pycuda.driver import In, Out, InOut
from pycuda.compiler import SourceModule
import numpy as np
from scipy import ndimage
from fr0stlib.pyflam3 import flam3_interpolate
from cuburn.code import mwc, variations, filter
@ -125,8 +126,8 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
int ix = trunca(cx+ditherx), iy = trunca(cy+dithery);
if (ix < 0 || ix >= {{features.width}} ||
iy < 0 || iy >= {{features.height}} ) {
if (ix < 0 || ix >= {{features.acc_width}} ||
iy < 0 || iy >= {{features.acc_height}} ) {
consec_bad++;
if (consec_bad > {{features.max_oob}}) {
x = mwc_next_11(&rctx);
@ -137,7 +138,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
continue;
}
int i = iy * {{features.width}} + ix;
int i = iy * {{features.acc_stride}} + ix;
float4 outcol = tex2D(palTex, color, {{packer.get('cp_step_frac')}});
float4 pix = accbuf[i];
@ -148,6 +149,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
accbuf[i] = pix; // TODO: atomic operations (or better)
denbuf[i] += 1.0f;
}
asm volatile ("membar.cta;");
}
""")
return tmpl.substitute(
@ -158,8 +160,8 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
def render(features, cps):
# TODO: make this adjustable via genome
nsteps = 1000
abuf = np.zeros((features.height, features.width, 4), dtype=np.float32)
dbuf = np.zeros((features.height, features.width), dtype=np.float32)
abuf = np.zeros((features.acc_height, features.acc_stride, 4), dtype=np.float32)
dbuf = np.zeros((features.acc_height, features.acc_stride), dtype=np.float32)
seeds = mwc.MWC.make_seeds(512 * nsteps)
iter = IterCode(features)
@ -221,9 +223,13 @@ def render(features, cps):
f = np.float32
npix = features.width * features.height
npix = features.acc_width * features.acc_height
# TODO: just allocate
obufd = cuda.to_device(abuf)
dbuf = cuda.from_device_like(dbufd, dbuf)
dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6)
dbufd = cuda.to_device(dbuf)
de.invoke(mod, abufd, obufd, dbufd)
fun = mod.get_function("colorclip")

View File

@ -38,7 +38,9 @@ class Genome(pyflam3.Genome):
"""
# TODO: when reading as a property during packing, this may be
# calculated 6 times instead of 1
return ( affine.translate(0.5 * cp.width, 0.5 * cp.height)
# TODO: also requires knowing gutter width
g = Features.gutter
return ( affine.translate(0.5 * cp.width + g, 0.5 * cp.height + g)
* affine.scale(cp.ppu, cp.ppu)
* affine.translate(-cp._center[0], -cp._center[1])
* affine.rotate(cp.rotate * 2 * np.pi / 360,
@ -90,6 +92,10 @@ class Features(object):
# performance too much. Power-of-two, please.
palette_height = 16
# Maximum width of DE and other spatial filters, and thus in turn the
# amount of padding applied
gutter = 16
def __init__(self, genomes):
any = lambda l: bool(filter(None, map(l, genomes)))
self.max_ntemporal_samples = max(
@ -112,9 +118,9 @@ class Features(object):
self.width = genomes[0].width
self.height = genomes[0].height
self.acc_width = genomes[0].width
self.acc_height = genomes[0].height
self.acc_stride = genomes[0].width
self.acc_width = genomes[0].width + 2 * self.gutter
self.acc_height = genomes[0].height + 2 * self.gutter
self.acc_stride = genomes[0].width + 2 * self.gutter
class XFormFeatures(object):
def __init__(self, xforms, xform_id):

View File

@ -44,6 +44,10 @@ def main(args):
genomes = Genome.from_string(fp.read())
anim = Animation(genomes)
accum, den = render(anim.features, genomes)
accum = np.delete(accum, np.s_[:16], axis=0)
accum = np.delete(accum, np.s_[:16], axis=1)
accum = np.delete(accum, np.s_[-16:], axis=0)
accum = np.delete(accum, np.s_[-16:], axis=1)
noalpha = np.delete(accum, 3, axis=2)
scipy.misc.imsave('rendered.png', noalpha)