Filter adjustments (density prefilter, gutter)

This commit is contained in:
Steven Robertson 2011-06-11 15:58:15 -04:00
parent 1deb3105a0
commit 94c453d153
4 changed files with 31 additions and 26 deletions

View File

@ -117,7 +117,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
float den = denbuf[idx]; float den = denbuf[idx];
if (in.w > 0 && den > 0) { if (in.w > 0 && den > 0) {
float ls = k1 * 12 * logf(1.0 + in.w * k2) / in.w; float ls = k1 * logf(1.0f + in.w * k2) / in.w;
in.x *= ls; in.x *= ls;
in.y *= ls; in.y *= ls;
in.z *= ls; in.z *= ls;
@ -200,19 +200,6 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
atomicAdd(out+3, de_a[si]); atomicAdd(out+3, de_a[si]);
} }
if (threadIdx.y == 5000) {
for (int i = threadIdx.x; i < FW; i += 32) {
idx = {{features.acc_stride}} * (imrow + 32)
+ blockIdx.x * 32 + i + W2;
int si = 32 * FW + i;
float *out = reinterpret_cast<float*>(&outbuf[idx]);
atomicAdd(out, 0.2 + de_r[si]);
atomicAdd(out+1, de_g[si]);
atomicAdd(out+2, de_b[si]);
atomicAdd(out+3, de_a[si]);
}
}
__syncthreads(); __syncthreads();
// TODO: shift instead of copying // TODO: shift instead of copying
int tid = threadIdx.y * 32 + threadIdx.x; int tid = threadIdx.y * 32 + threadIdx.x;
@ -239,9 +226,11 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
def invoke(self, mod, abufd, obufd, dbufd): def invoke(self, mod, abufd, obufd, dbufd):
# TODO: add no-est version # TODO: add no-est version
# TODO: come up with a general way to average these parameters # TODO: come up with a general way to average these parameters
k1 = self.cp.brightness * 268 / 256 k1 = self.cp.brightness * 268 / 256
area = self.features.width * self.features.height / self.cp.ppu ** 2 area = self.features.acc_width * self.features.acc_height / self.cp.ppu ** 2
k2 = 1 / (area * self.cp.adj_density) k2 = 1 / (area * self.cp.adj_density)
print k1, k2, area
if self.cp.estimator == 0: if self.cp.estimator == 0:
fun = mod.get_function("logscale") fun = mod.get_function("logscale")
@ -251,7 +240,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
else: else:
fun = mod.get_function("density_est") fun = mod.get_function("density_est")
t = fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2), t = fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2),
block=(32, 32, 1), grid=(self.features.acc_stride/32 - 1, 1), block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
time_kernel=True) time_kernel=True)
print "Density estimation: %g" % t print "Density estimation: %g" % t

View File

@ -8,6 +8,7 @@ import pycuda.driver as cuda
from pycuda.driver import In, Out, InOut from pycuda.driver import In, Out, InOut
from pycuda.compiler import SourceModule from pycuda.compiler import SourceModule
import numpy as np import numpy as np
from scipy import ndimage
from fr0stlib.pyflam3 import flam3_interpolate from fr0stlib.pyflam3 import flam3_interpolate
from cuburn.code import mwc, variations, filter from cuburn.code import mwc, variations, filter
@ -125,8 +126,8 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
int ix = trunca(cx+ditherx), iy = trunca(cy+dithery); int ix = trunca(cx+ditherx), iy = trunca(cy+dithery);
if (ix < 0 || ix >= {{features.width}} || if (ix < 0 || ix >= {{features.acc_width}} ||
iy < 0 || iy >= {{features.height}} ) { iy < 0 || iy >= {{features.acc_height}} ) {
consec_bad++; consec_bad++;
if (consec_bad > {{features.max_oob}}) { if (consec_bad > {{features.max_oob}}) {
x = mwc_next_11(&rctx); x = mwc_next_11(&rctx);
@ -137,7 +138,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
continue; continue;
} }
int i = iy * {{features.width}} + ix; int i = iy * {{features.acc_stride}} + ix;
float4 outcol = tex2D(palTex, color, {{packer.get('cp_step_frac')}}); float4 outcol = tex2D(palTex, color, {{packer.get('cp_step_frac')}});
float4 pix = accbuf[i]; float4 pix = accbuf[i];
@ -148,6 +149,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
accbuf[i] = pix; // TODO: atomic operations (or better) accbuf[i] = pix; // TODO: atomic operations (or better)
denbuf[i] += 1.0f; denbuf[i] += 1.0f;
} }
asm volatile ("membar.cta;");
} }
""") """)
return tmpl.substitute( return tmpl.substitute(
@ -158,8 +160,8 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
def render(features, cps): def render(features, cps):
# TODO: make this adjustable via genome # TODO: make this adjustable via genome
nsteps = 1000 nsteps = 1000
abuf = np.zeros((features.height, features.width, 4), dtype=np.float32) abuf = np.zeros((features.acc_height, features.acc_stride, 4), dtype=np.float32)
dbuf = np.zeros((features.height, features.width), dtype=np.float32) dbuf = np.zeros((features.acc_height, features.acc_stride), dtype=np.float32)
seeds = mwc.MWC.make_seeds(512 * nsteps) seeds = mwc.MWC.make_seeds(512 * nsteps)
iter = IterCode(features) iter = IterCode(features)
@ -221,9 +223,13 @@ def render(features, cps):
f = np.float32 f = np.float32
npix = features.width * features.height npix = features.acc_width * features.acc_height
# TODO: just allocate
obufd = cuda.to_device(abuf) obufd = cuda.to_device(abuf)
dbuf = cuda.from_device_like(dbufd, dbuf)
dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6)
dbufd = cuda.to_device(dbuf)
de.invoke(mod, abufd, obufd, dbufd) de.invoke(mod, abufd, obufd, dbufd)
fun = mod.get_function("colorclip") fun = mod.get_function("colorclip")

View File

@ -38,7 +38,9 @@ class Genome(pyflam3.Genome):
""" """
# TODO: when reading as a property during packing, this may be # TODO: when reading as a property during packing, this may be
# calculated 6 times instead of 1 # calculated 6 times instead of 1
return ( affine.translate(0.5 * cp.width, 0.5 * cp.height) # TODO: also requires knowing gutter width
g = Features.gutter
return ( affine.translate(0.5 * cp.width + g, 0.5 * cp.height + g)
* affine.scale(cp.ppu, cp.ppu) * affine.scale(cp.ppu, cp.ppu)
* affine.translate(-cp._center[0], -cp._center[1]) * affine.translate(-cp._center[0], -cp._center[1])
* affine.rotate(cp.rotate * 2 * np.pi / 360, * affine.rotate(cp.rotate * 2 * np.pi / 360,
@ -90,6 +92,10 @@ class Features(object):
# performance too much. Power-of-two, please. # performance too much. Power-of-two, please.
palette_height = 16 palette_height = 16
# Maximum width of DE and other spatial filters, and thus in turn the
# amount of padding applied
gutter = 16
def __init__(self, genomes): def __init__(self, genomes):
any = lambda l: bool(filter(None, map(l, genomes))) any = lambda l: bool(filter(None, map(l, genomes)))
self.max_ntemporal_samples = max( self.max_ntemporal_samples = max(
@ -112,9 +118,9 @@ class Features(object):
self.width = genomes[0].width self.width = genomes[0].width
self.height = genomes[0].height self.height = genomes[0].height
self.acc_width = genomes[0].width self.acc_width = genomes[0].width + 2 * self.gutter
self.acc_height = genomes[0].height self.acc_height = genomes[0].height + 2 * self.gutter
self.acc_stride = genomes[0].width self.acc_stride = genomes[0].width + 2 * self.gutter
class XFormFeatures(object): class XFormFeatures(object):
def __init__(self, xforms, xform_id): def __init__(self, xforms, xform_id):

View File

@ -44,6 +44,10 @@ def main(args):
genomes = Genome.from_string(fp.read()) genomes = Genome.from_string(fp.read())
anim = Animation(genomes) anim = Animation(genomes)
accum, den = render(anim.features, genomes) accum, den = render(anim.features, genomes)
accum = np.delete(accum, np.s_[:16], axis=0)
accum = np.delete(accum, np.s_[:16], axis=1)
accum = np.delete(accum, np.s_[-16:], axis=0)
accum = np.delete(accum, np.s_[-16:], axis=1)
noalpha = np.delete(accum, 3, axis=2) noalpha = np.delete(accum, 3, axis=2)
scipy.misc.imsave('rendered.png', noalpha) scipy.misc.imsave('rendered.png', noalpha)