mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Add background color support, and tentatively disable density blurring.
This commit is contained in:
parent
883de380fc
commit
c66cb463d4
@ -2,17 +2,20 @@
|
|||||||
from cuburn.code.util import *
|
from cuburn.code.util import *
|
||||||
|
|
||||||
class ColorClip(HunkOCode):
|
class ColorClip(HunkOCode):
|
||||||
defs = """
|
def __init__(self, features):
|
||||||
|
self.defs = self.defs_tmpl.substitute(features=features)
|
||||||
|
|
||||||
|
defs_tmpl = Template("""
|
||||||
__global__
|
__global__
|
||||||
void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
|
void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
|
||||||
float linrange, float lingam) {
|
float linrange, float lingam, float3 bkgd) {
|
||||||
// TODO: test if over an edge of the framebuffer - currently gutters are
|
// TODO: test if over an edge of the framebuffer - currently gutters are
|
||||||
// used and up to 256 pixels are ignored, which breaks when width<256
|
// used and up to 256 pixels are ignored, which breaks when width<256
|
||||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
float4 pix = pixbuf[i];
|
float4 pix = pixbuf[i];
|
||||||
|
|
||||||
if (pix.w <= 0) {
|
if (pix.w <= 0) {
|
||||||
pixbuf[i] = make_float4(0, 0, 0, 0);
|
pixbuf[i] = make_float4(bkgd.x, bkgd.y, bkgd.z, 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,6 +60,18 @@ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
|
|||||||
pix.y += (1.0f - vibrancy) * powf(opix.y, gamma);
|
pix.y += (1.0f - vibrancy) * powf(opix.y, gamma);
|
||||||
pix.z += (1.0f - vibrancy) * powf(opix.z, gamma);
|
pix.z += (1.0f - vibrancy) * powf(opix.z, gamma);
|
||||||
|
|
||||||
|
{{if features.alpha_output_channel}}
|
||||||
|
float 1_alpha = 1 / alpha;
|
||||||
|
pix.x *= 1_alpha;
|
||||||
|
pix.y *= 1_alpha;
|
||||||
|
pix.z *= 1_alpha;
|
||||||
|
{{else}}
|
||||||
|
pix.x += (1.0f - alpha) * bkgd.x;
|
||||||
|
pix.y += (1.0f - alpha) * bkgd.y;
|
||||||
|
pix.z += (1.0f - alpha) * bkgd.z;
|
||||||
|
{{endif}}
|
||||||
|
pix.w = alpha;
|
||||||
|
|
||||||
// Clamp values. I think this is superfluous, but I'm not certain.
|
// Clamp values. I think this is superfluous, but I'm not certain.
|
||||||
pix.x = fminf(1.0f, pix.x);
|
pix.x = fminf(1.0f, pix.x);
|
||||||
pix.y = fminf(1.0f, pix.y);
|
pix.y = fminf(1.0f, pix.y);
|
||||||
@ -64,7 +79,7 @@ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
|
|||||||
|
|
||||||
pixbuf[i] = pix;
|
pixbuf[i] = pix;
|
||||||
}
|
}
|
||||||
"""
|
""")
|
||||||
|
|
||||||
class DensityEst(HunkOCode):
|
class DensityEst(HunkOCode):
|
||||||
"""
|
"""
|
||||||
|
@ -14,6 +14,7 @@ from fr0stlib.pyflam3.constants import *
|
|||||||
|
|
||||||
import pycuda.compiler
|
import pycuda.compiler
|
||||||
import pycuda.driver as cuda
|
import pycuda.driver as cuda
|
||||||
|
from pycuda.gpuarray import vec
|
||||||
|
|
||||||
from cuburn import affine
|
from cuburn import affine
|
||||||
from cuburn.code import util, mwc, iter, filtering
|
from cuburn.code import util, mwc, iter, filtering
|
||||||
@ -107,10 +108,11 @@ class Animation(object):
|
|||||||
"""
|
"""
|
||||||
self._iter = iter.IterCode(self.features)
|
self._iter = iter.IterCode(self.features)
|
||||||
self._de = filtering.DensityEst(self.features, self.genomes[0])
|
self._de = filtering.DensityEst(self.features, self.genomes[0])
|
||||||
|
cclip = filtering.ColorClip(self.features)
|
||||||
# TODO: make choice of filtering explicit
|
# TODO: make choice of filtering explicit
|
||||||
# TODO: autoload dependent modules?
|
# TODO: autoload dependent modules?
|
||||||
self.src = util.assemble_code(util.BaseCode, mwc.MWC, self._iter.packer,
|
self.src = util.assemble_code(util.BaseCode, mwc.MWC, self._iter.packer,
|
||||||
self._iter, filtering.ColorClip, self._de)
|
self._iter, cclip, self._de)
|
||||||
self.cubin = pycuda.compiler.compile(self.src, keep=keep,
|
self.cubin = pycuda.compiler.compile(self.src, keep=keep,
|
||||||
options=list(cmp_options))
|
options=list(cmp_options))
|
||||||
return self.src
|
return self.src
|
||||||
@ -256,6 +258,7 @@ class _AnimRenderer(object):
|
|||||||
|
|
||||||
# Must be accumulated over all CPs
|
# Must be accumulated over all CPs
|
||||||
gam, vib = 0, 0
|
gam, vib = 0, 0
|
||||||
|
bkgd = np.zeros(3)
|
||||||
|
|
||||||
# This is gross, but there are a lot of fiddly corner cases with any
|
# This is gross, but there are a lot of fiddly corner cases with any
|
||||||
# index-based iteration scheme.
|
# index-based iteration scheme.
|
||||||
@ -270,6 +273,7 @@ class _AnimRenderer(object):
|
|||||||
infos.append(info)
|
infos.append(info)
|
||||||
gam += cp.gamma
|
gam += cp.gamma
|
||||||
vib += cp.vibrancy
|
vib += cp.vibrancy
|
||||||
|
bkgd += np.array(cp.background)
|
||||||
else:
|
else:
|
||||||
# Can't interpolate normally; just pack copies
|
# Can't interpolate normally; just pack copies
|
||||||
# TODO: this still packs the genome 20 times or so instead of
|
# TODO: this still packs the genome 20 times or so instead of
|
||||||
@ -278,6 +282,7 @@ class _AnimRenderer(object):
|
|||||||
infos = [packed] * len(block_times)
|
infos = [packed] * len(block_times)
|
||||||
gam += a.genomes[0].gamma * len(block_times)
|
gam += a.genomes[0].gamma * len(block_times)
|
||||||
vib += a.genomes[0].vibrancy * len(block_times)
|
vib += a.genomes[0].vibrancy * len(block_times)
|
||||||
|
bkgd += np.array(a.genomes[0].background) * len(block_times)
|
||||||
|
|
||||||
infos = np.concatenate(infos)
|
infos = np.concatenate(infos)
|
||||||
offset = b * packer.align * self.cps_per_block
|
offset = b * packer.align * self.cps_per_block
|
||||||
@ -293,7 +298,7 @@ class _AnimRenderer(object):
|
|||||||
# TODO: get block config from IterCode
|
# TODO: get block config from IterCode
|
||||||
# TODO: print timing information
|
# TODO: print timing information
|
||||||
iter_fun(self.d_seeds[b], np.uint64(d_info_off),
|
iter_fun(self.d_seeds[b], np.uint64(d_info_off),
|
||||||
self.d_accum, self.d_den,
|
self.d_accum, self.d_den, texrefs=[tref],
|
||||||
block=(32, 16, 1), grid=(len(block_times), 1),
|
block=(32, 16, 1), grid=(len(block_times), 1),
|
||||||
stream=self.stream)
|
stream=self.stream)
|
||||||
|
|
||||||
@ -301,19 +306,17 @@ class _AnimRenderer(object):
|
|||||||
# stream here. Later, once we've decided on a density-buffer prefilter,
|
# stream here. Later, once we've decided on a density-buffer prefilter,
|
||||||
# we will move it to the GPU, allowing it to be embedded in the stream
|
# we will move it to the GPU, allowing it to be embedded in the stream
|
||||||
# and letting the remaining code be asynchronous.
|
# and letting the remaining code be asynchronous.
|
||||||
self.stream.synchronize()
|
#self.stream.synchronize()
|
||||||
dbuf_dim = (a.features.acc_height, a.features.acc_stride)
|
#dbuf_dim = (a.features.acc_height, a.features.acc_stride)
|
||||||
dbuf = cuda.from_device(self.d_den, dbuf_dim, np.float32)
|
#dbuf = cuda.from_device(self.d_den, dbuf_dim, np.float32)
|
||||||
dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6)
|
#dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6)
|
||||||
cuda.memcpy_htod(self.d_den, dbuf)
|
#cuda.memcpy_htod(self.d_den, dbuf)
|
||||||
|
|
||||||
util.BaseCode.zero_dptr(a.mod, self.d_out, 4 * self.nbins,
|
util.BaseCode.zero_dptr(a.mod, self.d_out, 4 * self.nbins,
|
||||||
self.stream)
|
self.stream)
|
||||||
self.stream.synchronize()
|
|
||||||
a._de.invoke(a.mod, Genome(cen_cp),
|
a._de.invoke(a.mod, Genome(cen_cp),
|
||||||
self.d_accum, self.d_out, self.d_den,
|
self.d_accum, self.d_out, self.d_den,
|
||||||
self.stream)
|
self.stream)
|
||||||
self.stream.synchronize()
|
|
||||||
|
|
||||||
f = np.float32
|
f = np.float32
|
||||||
n = f(self.ncps)
|
n = f(self.ncps)
|
||||||
@ -322,12 +325,12 @@ class _AnimRenderer(object):
|
|||||||
hipow = f(cen_cp.highlight_power)
|
hipow = f(cen_cp.highlight_power)
|
||||||
lin = f(cen_cp.gam_lin_thresh)
|
lin = f(cen_cp.gam_lin_thresh)
|
||||||
lingam = f(math.pow(cen_cp.gam_lin_thresh, gam-1.0) if lin > 0 else 0)
|
lingam = f(math.pow(cen_cp.gam_lin_thresh, gam-1.0) if lin > 0 else 0)
|
||||||
print gam, vib, lin, lingam, cen_cp.gamma
|
bkgd = vec.make_float3(*(bkgd / n))
|
||||||
|
|
||||||
# TODO: get block size from colorclip class? It actually does not
|
# TODO: get block size from colorclip class? It actually does not
|
||||||
# depend on that being the case
|
# depend on that being the case
|
||||||
color_fun = a.mod.get_function("colorclip")
|
color_fun = a.mod.get_function("colorclip")
|
||||||
color_fun(self.d_out, gam, vib, hipow, lin, lingam,
|
color_fun(self.d_out, gam, vib, hipow, lin, lingam, bkgd,
|
||||||
block=(256, 1, 1), grid=(self.nbins / 256, 1),
|
block=(256, 1, 1), grid=(self.nbins / 256, 1),
|
||||||
stream=self.stream)
|
stream=self.stream)
|
||||||
|
|
||||||
@ -402,6 +405,12 @@ class Features(object):
|
|||||||
# The filtering code makes deep assumptions about this value.
|
# The filtering code makes deep assumptions about this value.
|
||||||
gutter = 16
|
gutter = 16
|
||||||
|
|
||||||
|
# TODO: for now, we always throw away the alpha channel before writing.
|
||||||
|
# All code is in place to not do this, we just need to find a way to expose
|
||||||
|
# this preference via the API (or push alpha blending entirely on the client,
|
||||||
|
# which I'm not opposed to)
|
||||||
|
alpha_output_channel = False
|
||||||
|
|
||||||
def __init__(self, genomes):
|
def __init__(self, genomes):
|
||||||
any = lambda l: bool(filter(None, map(l, genomes)))
|
any = lambda l: bool(filter(None, map(l, genomes)))
|
||||||
self.max_ntemporal_samples = max(
|
self.max_ntemporal_samples = max(
|
||||||
|
Loading…
Reference in New Issue
Block a user