mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-07-12 03:05:14 -04:00
Refactor API
--HG-- rename : cuburn/code/filter.py => cuburn/code/filtering.py
This commit is contained in:
@ -223,24 +223,22 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf,
|
||||
|
||||
""")
|
||||
|
||||
def invoke(self, mod, abufd, obufd, dbufd):
|
||||
def invoke(self, mod, abufd, obufd, dbufd, stream=None):
|
||||
# TODO: add no-est version
|
||||
# TODO: come up with a general way to average these parameters
|
||||
|
||||
k1 = self.cp.brightness * 268 / 256
|
||||
area = self.features.acc_width * self.features.acc_height / self.cp.ppu ** 2
|
||||
k2 = 1 / (area * self.cp.adj_density)
|
||||
print k1, k2, area
|
||||
|
||||
if self.cp.estimator == 0:
|
||||
fun = mod.get_function("logscale")
|
||||
t = fun(abufd, obufd, np.float32(k1), np.float32(k2),
|
||||
block=(self.features.acc_width, 1, 1),
|
||||
grid=(self.features.acc_height, 1), time_kernel=True)
|
||||
grid=(self.features.acc_height, 1), stream=stream)
|
||||
else:
|
||||
fun = mod.get_function("density_est")
|
||||
t = fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2),
|
||||
block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
|
||||
time_kernel=True)
|
||||
print "Density estimation: %g" % t
|
||||
fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2),
|
||||
block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
|
||||
stream=stream)
|
||||
|
@ -2,20 +2,13 @@
|
||||
The main iteration loop.
|
||||
"""
|
||||
|
||||
from ctypes import byref, memset, sizeof
|
||||
|
||||
import pycuda.driver as cuda
|
||||
from pycuda.driver import In, Out, InOut
|
||||
from pycuda.compiler import SourceModule
|
||||
import numpy as np
|
||||
from scipy import ndimage
|
||||
|
||||
from fr0stlib.pyflam3 import flam3_interpolate
|
||||
from cuburn.code import mwc, variations, filter
|
||||
from cuburn.code import mwc, variations
|
||||
from cuburn.code.util import *
|
||||
from cuburn.render import Genome
|
||||
|
||||
class IterCode(HunkOCode):
|
||||
# The number of threads per block
|
||||
NTHREADS = 512
|
||||
|
||||
def __init__(self, features):
|
||||
self.features = features
|
||||
self.packer = DataPacker('iter_info')
|
||||
@ -69,14 +62,14 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
||||
iter_info *info_glob = &(infos[blockIdx.x]);
|
||||
|
||||
// load info to shared memory cooperatively
|
||||
for (int i = threadIdx.y * 32 + threadIdx.x;
|
||||
for (int i = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
i * 4 < sizeof(iter_info); i += blockDim.x * blockDim.y)
|
||||
reinterpret_cast<float*>(&info)[i] =
|
||||
reinterpret_cast<float*>(info_glob)[i];
|
||||
|
||||
int consec_bad = -{{features.fuse}};
|
||||
// TODO: make nsteps adjustable via genome
|
||||
int nsamps = {{packer.get('cp.width * cp.height / 512000. * cp.adj_density')}};
|
||||
// TODO: remove '512' constant
|
||||
int nsamps = {{packer.get('cp.width * cp.height / (cp.ntemporal_samples * 512.) * cp.adj_density')}};
|
||||
|
||||
float x, y, color;
|
||||
x = mwc_next_11(&rctx);
|
||||
@ -157,86 +150,3 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
||||
packer = self.packer.view('info'),
|
||||
**globals())
|
||||
|
||||
def render(features, cps):
|
||||
# TODO: make this adjustable via genome
|
||||
nsteps = 1000
|
||||
abuf = np.zeros((features.acc_height, features.acc_stride, 4), dtype=np.float32)
|
||||
dbuf = np.zeros((features.acc_height, features.acc_stride), dtype=np.float32)
|
||||
seeds = mwc.MWC.make_seeds(512 * nsteps)
|
||||
|
||||
iter = IterCode(features)
|
||||
de = filter.DensityEst(features, cps[0])
|
||||
code = assemble_code(BaseCode, mwc.MWC, iter.packer, iter,
|
||||
filter.ColorClip, de)
|
||||
|
||||
for lno, line in enumerate(code.split('\n')):
|
||||
print '%3d %s' % (lno, line)
|
||||
mod = SourceModule(code,
|
||||
options=['-use_fast_math', '-maxrregcount', '32'])
|
||||
|
||||
cps_as_array = (Genome * len(cps))()
|
||||
for i, cp in enumerate(cps):
|
||||
cps_as_array[i] = cp
|
||||
|
||||
infos = []
|
||||
pal = np.empty((16, 256, 4), dtype=np.uint8)
|
||||
|
||||
# TODO: move this into a common function
|
||||
if len(cps) > 1:
|
||||
cp = Genome()
|
||||
memset(byref(cp), 0, sizeof(cp))
|
||||
|
||||
sampAt = [int(i/15.*(nsteps-1)) for i in range(16)]
|
||||
for n in range(nsteps):
|
||||
flam3_interpolate(cps_as_array, 2, float(n)/nsteps - 0.5,
|
||||
0, byref(cp))
|
||||
cp._init()
|
||||
if n in sampAt:
|
||||
pidx = sampAt.index(n)
|
||||
for i, e in enumerate(cp.palette.entries):
|
||||
pal[pidx][i] = np.uint8(np.array(e.color) * 255.0)
|
||||
infos.append(iter.packer.pack(cp=cp, cp_step_frac=float(n)/nsteps))
|
||||
else:
|
||||
for i, e in enumerate(cps[0].palette.entries):
|
||||
pal[0][i] = np.uint8(np.array(e.color) * 255.0)
|
||||
pal[1:] = pal[0]
|
||||
infos.append(iter.packer.pack(cp=cps[0], cp_step_frac=0))
|
||||
infos *= nsteps
|
||||
|
||||
infos = np.concatenate(infos)
|
||||
|
||||
dpal = cuda.make_multichannel_2d_array(pal, 'C')
|
||||
tref = mod.get_texref('palTex')
|
||||
tref.set_array(dpal)
|
||||
tref.set_format(cuda.array_format.UNSIGNED_INT8, 4)
|
||||
tref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES)
|
||||
tref.set_filter_mode(cuda.filter_mode.LINEAR)
|
||||
|
||||
abufd = cuda.to_device(abuf)
|
||||
dbufd = cuda.to_device(dbuf)
|
||||
|
||||
fun = mod.get_function("iter")
|
||||
fun.set_cache_config(cuda.func_cache.PREFER_L1)
|
||||
t = fun(InOut(seeds), InOut(infos), abufd, dbufd,
|
||||
block=(32,16,1), grid=(nsteps,1), time_kernel=True)
|
||||
print "Completed render in %g seconds" % t
|
||||
|
||||
f = np.float32
|
||||
|
||||
npix = features.acc_width * features.acc_height
|
||||
|
||||
# TODO: just allocate
|
||||
obufd = cuda.to_device(abuf)
|
||||
dbuf = cuda.from_device_like(dbufd, dbuf)
|
||||
dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6)
|
||||
dbufd = cuda.to_device(dbuf)
|
||||
de.invoke(mod, abufd, obufd, dbufd)
|
||||
|
||||
fun = mod.get_function("colorclip")
|
||||
t = fun(obufd, f(1 / cp.gamma), f(cp.vibrancy), f(cp.highlight_power),
|
||||
block=(256,1,1), grid=(npix/256,1), time_kernel=True)
|
||||
print "Completed color filtering in %g seconds" % t
|
||||
|
||||
abuf = cuda.from_device_like(obufd, abuf)
|
||||
return abuf, dbuf
|
||||
|
||||
|
@ -66,8 +66,26 @@ int trunca(float f) {
|
||||
asm("cvt.rni.s32.f32 %0, %1;" : "=r"(ret) : "f"(f));
|
||||
return ret;
|
||||
}
|
||||
|
||||
__global__
|
||||
void zero_dptr(float* dptr, int size) {
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < size) {
|
||||
dptr[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def zero_dptr(mod, dptr, size, stream=None):
|
||||
"""
|
||||
A memory zeroer which can be embedded in a stream. Size is the
|
||||
number of 4-byte words in the pointer.
|
||||
"""
|
||||
zero = mod.get_function("zero_dptr")
|
||||
zero(dptr, np.int32(size), stream=stream,
|
||||
block=(1024, 1, 1), grid=(size/1024+1, 1))
|
||||
|
||||
class DataPackerView(object):
|
||||
"""
|
||||
View of a data packer. Intended to be initialized using DataPacker.view().
|
||||
|
Reference in New Issue
Block a user