mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Filter adjustments (density prefilter, gutter)
This commit is contained in:
parent
1deb3105a0
commit
94c453d153
@ -117,7 +117,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
|||||||
float den = denbuf[idx];
|
float den = denbuf[idx];
|
||||||
|
|
||||||
if (in.w > 0 && den > 0) {
|
if (in.w > 0 && den > 0) {
|
||||||
float ls = k1 * 12 * logf(1.0 + in.w * k2) / in.w;
|
float ls = k1 * logf(1.0f + in.w * k2) / in.w;
|
||||||
in.x *= ls;
|
in.x *= ls;
|
||||||
in.y *= ls;
|
in.y *= ls;
|
||||||
in.z *= ls;
|
in.z *= ls;
|
||||||
@ -200,19 +200,6 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
|||||||
atomicAdd(out+3, de_a[si]);
|
atomicAdd(out+3, de_a[si]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (threadIdx.y == 5000) {
|
|
||||||
for (int i = threadIdx.x; i < FW; i += 32) {
|
|
||||||
idx = {{features.acc_stride}} * (imrow + 32)
|
|
||||||
+ blockIdx.x * 32 + i + W2;
|
|
||||||
int si = 32 * FW + i;
|
|
||||||
float *out = reinterpret_cast<float*>(&outbuf[idx]);
|
|
||||||
atomicAdd(out, 0.2 + de_r[si]);
|
|
||||||
atomicAdd(out+1, de_g[si]);
|
|
||||||
atomicAdd(out+2, de_b[si]);
|
|
||||||
atomicAdd(out+3, de_a[si]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
// TODO: shift instead of copying
|
// TODO: shift instead of copying
|
||||||
int tid = threadIdx.y * 32 + threadIdx.x;
|
int tid = threadIdx.y * 32 + threadIdx.x;
|
||||||
@ -239,9 +226,11 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
|||||||
def invoke(self, mod, abufd, obufd, dbufd):
|
def invoke(self, mod, abufd, obufd, dbufd):
|
||||||
# TODO: add no-est version
|
# TODO: add no-est version
|
||||||
# TODO: come up with a general way to average these parameters
|
# TODO: come up with a general way to average these parameters
|
||||||
|
|
||||||
k1 = self.cp.brightness * 268 / 256
|
k1 = self.cp.brightness * 268 / 256
|
||||||
area = self.features.width * self.features.height / self.cp.ppu ** 2
|
area = self.features.acc_width * self.features.acc_height / self.cp.ppu ** 2
|
||||||
k2 = 1 / (area * self.cp.adj_density)
|
k2 = 1 / (area * self.cp.adj_density)
|
||||||
|
print k1, k2, area
|
||||||
|
|
||||||
if self.cp.estimator == 0:
|
if self.cp.estimator == 0:
|
||||||
fun = mod.get_function("logscale")
|
fun = mod.get_function("logscale")
|
||||||
@ -251,7 +240,7 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
|||||||
else:
|
else:
|
||||||
fun = mod.get_function("density_est")
|
fun = mod.get_function("density_est")
|
||||||
t = fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2),
|
t = fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2),
|
||||||
block=(32, 32, 1), grid=(self.features.acc_stride/32 - 1, 1),
|
block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
|
||||||
time_kernel=True)
|
time_kernel=True)
|
||||||
print "Density estimation: %g" % t
|
print "Density estimation: %g" % t
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ import pycuda.driver as cuda
|
|||||||
from pycuda.driver import In, Out, InOut
|
from pycuda.driver import In, Out, InOut
|
||||||
from pycuda.compiler import SourceModule
|
from pycuda.compiler import SourceModule
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from scipy import ndimage
|
||||||
|
|
||||||
from fr0stlib.pyflam3 import flam3_interpolate
|
from fr0stlib.pyflam3 import flam3_interpolate
|
||||||
from cuburn.code import mwc, variations, filter
|
from cuburn.code import mwc, variations, filter
|
||||||
@ -125,8 +126,8 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
|
|
||||||
int ix = trunca(cx+ditherx), iy = trunca(cy+dithery);
|
int ix = trunca(cx+ditherx), iy = trunca(cy+dithery);
|
||||||
|
|
||||||
if (ix < 0 || ix >= {{features.width}} ||
|
if (ix < 0 || ix >= {{features.acc_width}} ||
|
||||||
iy < 0 || iy >= {{features.height}} ) {
|
iy < 0 || iy >= {{features.acc_height}} ) {
|
||||||
consec_bad++;
|
consec_bad++;
|
||||||
if (consec_bad > {{features.max_oob}}) {
|
if (consec_bad > {{features.max_oob}}) {
|
||||||
x = mwc_next_11(&rctx);
|
x = mwc_next_11(&rctx);
|
||||||
@ -137,7 +138,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
int i = iy * {{features.width}} + ix;
|
int i = iy * {{features.acc_stride}} + ix;
|
||||||
|
|
||||||
float4 outcol = tex2D(palTex, color, {{packer.get('cp_step_frac')}});
|
float4 outcol = tex2D(palTex, color, {{packer.get('cp_step_frac')}});
|
||||||
float4 pix = accbuf[i];
|
float4 pix = accbuf[i];
|
||||||
@ -148,6 +149,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
accbuf[i] = pix; // TODO: atomic operations (or better)
|
accbuf[i] = pix; // TODO: atomic operations (or better)
|
||||||
denbuf[i] += 1.0f;
|
denbuf[i] += 1.0f;
|
||||||
}
|
}
|
||||||
|
asm volatile ("membar.cta;");
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
return tmpl.substitute(
|
return tmpl.substitute(
|
||||||
@ -158,8 +160,8 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
def render(features, cps):
|
def render(features, cps):
|
||||||
# TODO: make this adjustable via genome
|
# TODO: make this adjustable via genome
|
||||||
nsteps = 1000
|
nsteps = 1000
|
||||||
abuf = np.zeros((features.height, features.width, 4), dtype=np.float32)
|
abuf = np.zeros((features.acc_height, features.acc_stride, 4), dtype=np.float32)
|
||||||
dbuf = np.zeros((features.height, features.width), dtype=np.float32)
|
dbuf = np.zeros((features.acc_height, features.acc_stride), dtype=np.float32)
|
||||||
seeds = mwc.MWC.make_seeds(512 * nsteps)
|
seeds = mwc.MWC.make_seeds(512 * nsteps)
|
||||||
|
|
||||||
iter = IterCode(features)
|
iter = IterCode(features)
|
||||||
@ -221,9 +223,13 @@ def render(features, cps):
|
|||||||
|
|
||||||
f = np.float32
|
f = np.float32
|
||||||
|
|
||||||
npix = features.width * features.height
|
npix = features.acc_width * features.acc_height
|
||||||
|
|
||||||
|
# TODO: just allocate
|
||||||
obufd = cuda.to_device(abuf)
|
obufd = cuda.to_device(abuf)
|
||||||
|
dbuf = cuda.from_device_like(dbufd, dbuf)
|
||||||
|
dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6)
|
||||||
|
dbufd = cuda.to_device(dbuf)
|
||||||
de.invoke(mod, abufd, obufd, dbufd)
|
de.invoke(mod, abufd, obufd, dbufd)
|
||||||
|
|
||||||
fun = mod.get_function("colorclip")
|
fun = mod.get_function("colorclip")
|
||||||
|
@ -38,7 +38,9 @@ class Genome(pyflam3.Genome):
|
|||||||
"""
|
"""
|
||||||
# TODO: when reading as a property during packing, this may be
|
# TODO: when reading as a property during packing, this may be
|
||||||
# calculated 6 times instead of 1
|
# calculated 6 times instead of 1
|
||||||
return ( affine.translate(0.5 * cp.width, 0.5 * cp.height)
|
# TODO: also requires knowing gutter width
|
||||||
|
g = Features.gutter
|
||||||
|
return ( affine.translate(0.5 * cp.width + g, 0.5 * cp.height + g)
|
||||||
* affine.scale(cp.ppu, cp.ppu)
|
* affine.scale(cp.ppu, cp.ppu)
|
||||||
* affine.translate(-cp._center[0], -cp._center[1])
|
* affine.translate(-cp._center[0], -cp._center[1])
|
||||||
* affine.rotate(cp.rotate * 2 * np.pi / 360,
|
* affine.rotate(cp.rotate * 2 * np.pi / 360,
|
||||||
@ -90,6 +92,10 @@ class Features(object):
|
|||||||
# performance too much. Power-of-two, please.
|
# performance too much. Power-of-two, please.
|
||||||
palette_height = 16
|
palette_height = 16
|
||||||
|
|
||||||
|
# Maximum width of DE and other spatial filters, and thus in turn the
|
||||||
|
# amount of padding applied
|
||||||
|
gutter = 16
|
||||||
|
|
||||||
def __init__(self, genomes):
|
def __init__(self, genomes):
|
||||||
any = lambda l: bool(filter(None, map(l, genomes)))
|
any = lambda l: bool(filter(None, map(l, genomes)))
|
||||||
self.max_ntemporal_samples = max(
|
self.max_ntemporal_samples = max(
|
||||||
@ -112,9 +118,9 @@ class Features(object):
|
|||||||
|
|
||||||
self.width = genomes[0].width
|
self.width = genomes[0].width
|
||||||
self.height = genomes[0].height
|
self.height = genomes[0].height
|
||||||
self.acc_width = genomes[0].width
|
self.acc_width = genomes[0].width + 2 * self.gutter
|
||||||
self.acc_height = genomes[0].height
|
self.acc_height = genomes[0].height + 2 * self.gutter
|
||||||
self.acc_stride = genomes[0].width
|
self.acc_stride = genomes[0].width + 2 * self.gutter
|
||||||
|
|
||||||
class XFormFeatures(object):
|
class XFormFeatures(object):
|
||||||
def __init__(self, xforms, xform_id):
|
def __init__(self, xforms, xform_id):
|
||||||
|
4
main.py
4
main.py
@ -44,6 +44,10 @@ def main(args):
|
|||||||
genomes = Genome.from_string(fp.read())
|
genomes = Genome.from_string(fp.read())
|
||||||
anim = Animation(genomes)
|
anim = Animation(genomes)
|
||||||
accum, den = render(anim.features, genomes)
|
accum, den = render(anim.features, genomes)
|
||||||
|
accum = np.delete(accum, np.s_[:16], axis=0)
|
||||||
|
accum = np.delete(accum, np.s_[:16], axis=1)
|
||||||
|
accum = np.delete(accum, np.s_[-16:], axis=0)
|
||||||
|
accum = np.delete(accum, np.s_[-16:], axis=1)
|
||||||
|
|
||||||
noalpha = np.delete(accum, 3, axis=2)
|
noalpha = np.delete(accum, 3, axis=2)
|
||||||
scipy.misc.imsave('rendered.png', noalpha)
|
scipy.misc.imsave('rendered.png', noalpha)
|
||||||
|
Loading…
Reference in New Issue
Block a user