mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Filter adjustments (density prefilter, gutter)
This commit is contained in:
parent
1deb3105a0
commit
94c453d153
@ -117,7 +117,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
||||
float den = denbuf[idx];
|
||||
|
||||
if (in.w > 0 && den > 0) {
|
||||
float ls = k1 * 12 * logf(1.0 + in.w * k2) / in.w;
|
||||
float ls = k1 * logf(1.0f + in.w * k2) / in.w;
|
||||
in.x *= ls;
|
||||
in.y *= ls;
|
||||
in.z *= ls;
|
||||
@ -200,19 +200,6 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
||||
atomicAdd(out+3, de_a[si]);
|
||||
}
|
||||
|
||||
if (threadIdx.y == 5000) {
|
||||
for (int i = threadIdx.x; i < FW; i += 32) {
|
||||
idx = {{features.acc_stride}} * (imrow + 32)
|
||||
+ blockIdx.x * 32 + i + W2;
|
||||
int si = 32 * FW + i;
|
||||
float *out = reinterpret_cast<float*>(&outbuf[idx]);
|
||||
atomicAdd(out, 0.2 + de_r[si]);
|
||||
atomicAdd(out+1, de_g[si]);
|
||||
atomicAdd(out+2, de_b[si]);
|
||||
atomicAdd(out+3, de_a[si]);
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
// TODO: shift instead of copying
|
||||
int tid = threadIdx.y * 32 + threadIdx.x;
|
||||
@ -239,9 +226,11 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
||||
def invoke(self, mod, abufd, obufd, dbufd):
|
||||
# TODO: add no-est version
|
||||
# TODO: come up with a general way to average these parameters
|
||||
|
||||
k1 = self.cp.brightness * 268 / 256
|
||||
area = self.features.width * self.features.height / self.cp.ppu ** 2
|
||||
area = self.features.acc_width * self.features.acc_height / self.cp.ppu ** 2
|
||||
k2 = 1 / (area * self.cp.adj_density)
|
||||
print k1, k2, area
|
||||
|
||||
if self.cp.estimator == 0:
|
||||
fun = mod.get_function("logscale")
|
||||
@ -251,7 +240,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
||||
else:
|
||||
fun = mod.get_function("density_est")
|
||||
t = fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2),
|
||||
block=(32, 32, 1), grid=(self.features.acc_stride/32 - 1, 1),
|
||||
block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
|
||||
time_kernel=True)
|
||||
print "Density estimation: %g" % t
|
||||
|
||||
|
@ -8,6 +8,7 @@ import pycuda.driver as cuda
|
||||
from pycuda.driver import In, Out, InOut
|
||||
from pycuda.compiler import SourceModule
|
||||
import numpy as np
|
||||
from scipy import ndimage
|
||||
|
||||
from fr0stlib.pyflam3 import flam3_interpolate
|
||||
from cuburn.code import mwc, variations, filter
|
||||
@ -125,8 +126,8 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
||||
|
||||
int ix = trunca(cx+ditherx), iy = trunca(cy+dithery);
|
||||
|
||||
if (ix < 0 || ix >= {{features.width}} ||
|
||||
iy < 0 || iy >= {{features.height}} ) {
|
||||
if (ix < 0 || ix >= {{features.acc_width}} ||
|
||||
iy < 0 || iy >= {{features.acc_height}} ) {
|
||||
consec_bad++;
|
||||
if (consec_bad > {{features.max_oob}}) {
|
||||
x = mwc_next_11(&rctx);
|
||||
@ -137,7 +138,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int i = iy * {{features.width}} + ix;
|
||||
int i = iy * {{features.acc_stride}} + ix;
|
||||
|
||||
float4 outcol = tex2D(palTex, color, {{packer.get('cp_step_frac')}});
|
||||
float4 pix = accbuf[i];
|
||||
@ -148,6 +149,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
||||
accbuf[i] = pix; // TODO: atomic operations (or better)
|
||||
denbuf[i] += 1.0f;
|
||||
}
|
||||
asm volatile ("membar.cta;");
|
||||
}
|
||||
""")
|
||||
return tmpl.substitute(
|
||||
@ -158,8 +160,8 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
||||
def render(features, cps):
|
||||
# TODO: make this adjustable via genome
|
||||
nsteps = 1000
|
||||
abuf = np.zeros((features.height, features.width, 4), dtype=np.float32)
|
||||
dbuf = np.zeros((features.height, features.width), dtype=np.float32)
|
||||
abuf = np.zeros((features.acc_height, features.acc_stride, 4), dtype=np.float32)
|
||||
dbuf = np.zeros((features.acc_height, features.acc_stride), dtype=np.float32)
|
||||
seeds = mwc.MWC.make_seeds(512 * nsteps)
|
||||
|
||||
iter = IterCode(features)
|
||||
@ -221,9 +223,13 @@ def render(features, cps):
|
||||
|
||||
f = np.float32
|
||||
|
||||
npix = features.width * features.height
|
||||
npix = features.acc_width * features.acc_height
|
||||
|
||||
# TODO: just allocate
|
||||
obufd = cuda.to_device(abuf)
|
||||
dbuf = cuda.from_device_like(dbufd, dbuf)
|
||||
dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6)
|
||||
dbufd = cuda.to_device(dbuf)
|
||||
de.invoke(mod, abufd, obufd, dbufd)
|
||||
|
||||
fun = mod.get_function("colorclip")
|
||||
|
@ -38,7 +38,9 @@ class Genome(pyflam3.Genome):
|
||||
"""
|
||||
# TODO: when reading as a property during packing, this may be
|
||||
# calculated 6 times instead of 1
|
||||
return ( affine.translate(0.5 * cp.width, 0.5 * cp.height)
|
||||
# TODO: also requires knowing gutter width
|
||||
g = Features.gutter
|
||||
return ( affine.translate(0.5 * cp.width + g, 0.5 * cp.height + g)
|
||||
* affine.scale(cp.ppu, cp.ppu)
|
||||
* affine.translate(-cp._center[0], -cp._center[1])
|
||||
* affine.rotate(cp.rotate * 2 * np.pi / 360,
|
||||
@ -90,6 +92,10 @@ class Features(object):
|
||||
# performance too much. Power-of-two, please.
|
||||
palette_height = 16
|
||||
|
||||
# Maximum width of DE and other spatial filters, and thus in turn the
|
||||
# amount of padding applied
|
||||
gutter = 16
|
||||
|
||||
def __init__(self, genomes):
|
||||
any = lambda l: bool(filter(None, map(l, genomes)))
|
||||
self.max_ntemporal_samples = max(
|
||||
@ -112,9 +118,9 @@ class Features(object):
|
||||
|
||||
self.width = genomes[0].width
|
||||
self.height = genomes[0].height
|
||||
self.acc_width = genomes[0].width
|
||||
self.acc_height = genomes[0].height
|
||||
self.acc_stride = genomes[0].width
|
||||
self.acc_width = genomes[0].width + 2 * self.gutter
|
||||
self.acc_height = genomes[0].height + 2 * self.gutter
|
||||
self.acc_stride = genomes[0].width + 2 * self.gutter
|
||||
|
||||
class XFormFeatures(object):
|
||||
def __init__(self, xforms, xform_id):
|
||||
|
4
main.py
4
main.py
@ -44,6 +44,10 @@ def main(args):
|
||||
genomes = Genome.from_string(fp.read())
|
||||
anim = Animation(genomes)
|
||||
accum, den = render(anim.features, genomes)
|
||||
accum = np.delete(accum, np.s_[:16], axis=0)
|
||||
accum = np.delete(accum, np.s_[:16], axis=1)
|
||||
accum = np.delete(accum, np.s_[-16:], axis=0)
|
||||
accum = np.delete(accum, np.s_[-16:], axis=1)
|
||||
|
||||
noalpha = np.delete(accum, 3, axis=2)
|
||||
scipy.misc.imsave('rendered.png', noalpha)
|
||||
|
Loading…
Reference in New Issue
Block a user