Add background color support, and tentatively disable density blurring.

This commit is contained in:
Steven Robertson 2011-06-19 00:30:54 -04:00
parent 883de380fc
commit c66cb463d4
2 changed files with 39 additions and 15 deletions

View File

@ -2,17 +2,20 @@
from cuburn.code.util import * from cuburn.code.util import *
class ColorClip(HunkOCode): class ColorClip(HunkOCode):
defs = """ def __init__(self, features):
self.defs = self.defs_tmpl.substitute(features=features)
defs_tmpl = Template("""
__global__ __global__
void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow, void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
float linrange, float lingam) { float linrange, float lingam, float3 bkgd) {
// TODO: test if over an edge of the framebuffer - currently gutters are // TODO: test if over an edge of the framebuffer - currently gutters are
// used and up to 256 pixels are ignored, which breaks when width<256 // used and up to 256 pixels are ignored, which breaks when width<256
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
float4 pix = pixbuf[i]; float4 pix = pixbuf[i];
if (pix.w <= 0) { if (pix.w <= 0) {
pixbuf[i] = make_float4(0, 0, 0, 0); pixbuf[i] = make_float4(bkgd.x, bkgd.y, bkgd.z, 0);
return; return;
} }
@ -57,6 +60,18 @@ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
pix.y += (1.0f - vibrancy) * powf(opix.y, gamma); pix.y += (1.0f - vibrancy) * powf(opix.y, gamma);
pix.z += (1.0f - vibrancy) * powf(opix.z, gamma); pix.z += (1.0f - vibrancy) * powf(opix.z, gamma);
{{if features.alpha_output_channel}}
float 1_alpha = 1 / alpha;
pix.x *= 1_alpha;
pix.y *= 1_alpha;
pix.z *= 1_alpha;
{{else}}
pix.x += (1.0f - alpha) * bkgd.x;
pix.y += (1.0f - alpha) * bkgd.y;
pix.z += (1.0f - alpha) * bkgd.z;
{{endif}}
pix.w = alpha;
// Clamp values. I think this is superfluous, but I'm not certain. // Clamp values. I think this is superfluous, but I'm not certain.
pix.x = fminf(1.0f, pix.x); pix.x = fminf(1.0f, pix.x);
pix.y = fminf(1.0f, pix.y); pix.y = fminf(1.0f, pix.y);
@ -64,7 +79,7 @@ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
pixbuf[i] = pix; pixbuf[i] = pix;
} }
""" """)
class DensityEst(HunkOCode): class DensityEst(HunkOCode):
""" """

View File

@ -14,6 +14,7 @@ from fr0stlib.pyflam3.constants import *
import pycuda.compiler import pycuda.compiler
import pycuda.driver as cuda import pycuda.driver as cuda
from pycuda.gpuarray import vec
from cuburn import affine from cuburn import affine
from cuburn.code import util, mwc, iter, filtering from cuburn.code import util, mwc, iter, filtering
@ -107,10 +108,11 @@ class Animation(object):
""" """
self._iter = iter.IterCode(self.features) self._iter = iter.IterCode(self.features)
self._de = filtering.DensityEst(self.features, self.genomes[0]) self._de = filtering.DensityEst(self.features, self.genomes[0])
cclip = filtering.ColorClip(self.features)
# TODO: make choice of filtering explicit # TODO: make choice of filtering explicit
# TODO: autoload dependent modules? # TODO: autoload dependent modules?
self.src = util.assemble_code(util.BaseCode, mwc.MWC, self._iter.packer, self.src = util.assemble_code(util.BaseCode, mwc.MWC, self._iter.packer,
self._iter, filtering.ColorClip, self._de) self._iter, cclip, self._de)
self.cubin = pycuda.compiler.compile(self.src, keep=keep, self.cubin = pycuda.compiler.compile(self.src, keep=keep,
options=list(cmp_options)) options=list(cmp_options))
return self.src return self.src
@ -256,6 +258,7 @@ class _AnimRenderer(object):
# Must be accumulated over all CPs # Must be accumulated over all CPs
gam, vib = 0, 0 gam, vib = 0, 0
bkgd = np.zeros(3)
# This is gross, but there are a lot of fiddly corner cases with any # This is gross, but there are a lot of fiddly corner cases with any
# index-based iteration scheme. # index-based iteration scheme.
@ -270,6 +273,7 @@ class _AnimRenderer(object):
infos.append(info) infos.append(info)
gam += cp.gamma gam += cp.gamma
vib += cp.vibrancy vib += cp.vibrancy
bkgd += np.array(cp.background)
else: else:
# Can't interpolate normally; just pack copies # Can't interpolate normally; just pack copies
# TODO: this still packs the genome 20 times or so instead of # TODO: this still packs the genome 20 times or so instead of
@ -278,6 +282,7 @@ class _AnimRenderer(object):
infos = [packed] * len(block_times) infos = [packed] * len(block_times)
gam += a.genomes[0].gamma * len(block_times) gam += a.genomes[0].gamma * len(block_times)
vib += a.genomes[0].vibrancy * len(block_times) vib += a.genomes[0].vibrancy * len(block_times)
bkgd += np.array(a.genomes[0].background) * len(block_times)
infos = np.concatenate(infos) infos = np.concatenate(infos)
offset = b * packer.align * self.cps_per_block offset = b * packer.align * self.cps_per_block
@ -293,7 +298,7 @@ class _AnimRenderer(object):
# TODO: get block config from IterCode # TODO: get block config from IterCode
# TODO: print timing information # TODO: print timing information
iter_fun(self.d_seeds[b], np.uint64(d_info_off), iter_fun(self.d_seeds[b], np.uint64(d_info_off),
self.d_accum, self.d_den, self.d_accum, self.d_den, texrefs=[tref],
block=(32, 16, 1), grid=(len(block_times), 1), block=(32, 16, 1), grid=(len(block_times), 1),
stream=self.stream) stream=self.stream)
@ -301,19 +306,17 @@ class _AnimRenderer(object):
# stream here. Later, once we've decided on a density-buffer prefilter, # stream here. Later, once we've decided on a density-buffer prefilter,
# we will move it to the GPU, allowing it to be embedded in the stream # we will move it to the GPU, allowing it to be embedded in the stream
# and letting the remaining code be asynchronous. # and letting the remaining code be asynchronous.
self.stream.synchronize() #self.stream.synchronize()
dbuf_dim = (a.features.acc_height, a.features.acc_stride) #dbuf_dim = (a.features.acc_height, a.features.acc_stride)
dbuf = cuda.from_device(self.d_den, dbuf_dim, np.float32) #dbuf = cuda.from_device(self.d_den, dbuf_dim, np.float32)
dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6) #dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6)
cuda.memcpy_htod(self.d_den, dbuf) #cuda.memcpy_htod(self.d_den, dbuf)
util.BaseCode.zero_dptr(a.mod, self.d_out, 4 * self.nbins, util.BaseCode.zero_dptr(a.mod, self.d_out, 4 * self.nbins,
self.stream) self.stream)
self.stream.synchronize()
a._de.invoke(a.mod, Genome(cen_cp), a._de.invoke(a.mod, Genome(cen_cp),
self.d_accum, self.d_out, self.d_den, self.d_accum, self.d_out, self.d_den,
self.stream) self.stream)
self.stream.synchronize()
f = np.float32 f = np.float32
n = f(self.ncps) n = f(self.ncps)
@ -322,12 +325,12 @@ class _AnimRenderer(object):
hipow = f(cen_cp.highlight_power) hipow = f(cen_cp.highlight_power)
lin = f(cen_cp.gam_lin_thresh) lin = f(cen_cp.gam_lin_thresh)
lingam = f(math.pow(cen_cp.gam_lin_thresh, gam-1.0) if lin > 0 else 0) lingam = f(math.pow(cen_cp.gam_lin_thresh, gam-1.0) if lin > 0 else 0)
print gam, vib, lin, lingam, cen_cp.gamma bkgd = vec.make_float3(*(bkgd / n))
# TODO: get block size from colorclip class? It actually does not # TODO: get block size from colorclip class? It actually does not
# depend on that being the case # depend on that being the case
color_fun = a.mod.get_function("colorclip") color_fun = a.mod.get_function("colorclip")
color_fun(self.d_out, gam, vib, hipow, lin, lingam, color_fun(self.d_out, gam, vib, hipow, lin, lingam, bkgd,
block=(256, 1, 1), grid=(self.nbins / 256, 1), block=(256, 1, 1), grid=(self.nbins / 256, 1),
stream=self.stream) stream=self.stream)
@ -402,6 +405,12 @@ class Features(object):
# The filtering code makes deep assumptions about this value. # The filtering code makes deep assumptions about this value.
gutter = 16 gutter = 16
# TODO: for now, we always throw away the alpha channel before writing.
# All code is in place to not do this, we just need to find a way to expose
# this preference via the API (or push alpha blending entirely on the client,
# which I'm not opposed to)
alpha_output_channel = False
def __init__(self, genomes): def __init__(self, genomes):
any = lambda l: bool(filter(None, map(l, genomes))) any = lambda l: bool(filter(None, map(l, genomes)))
self.max_ntemporal_samples = max( self.max_ntemporal_samples = max(