diff --git a/cuburn/code/filter.py b/cuburn/code/filter.py new file mode 100644 index 0000000..c34392f --- /dev/null +++ b/cuburn/code/filter.py @@ -0,0 +1,66 @@ + +from cuburn.code.util import * + +class ColorClip(HunkOCode): + defs = """ +__global__ +void logfilt(float4 *pixbuf, float k1, float k2, + float gamma, float vibrancy, float highpow) { + // TODO: test if over an edge of the framebuffer + int i = 512 * blockIdx.x + threadIdx.x; + float4 pix = pixbuf[i]; + + if (pix.w <= 0) return; + + float ls = k1 * logf(1.0 + pix.w * k2) / pix.w; + pix.x *= ls; + pix.y *= ls; + pix.z *= ls; + pix.w *= ls; + + float4 opix = pix; + + // TODO: linearized bottom range + float alpha = powf(pix.w, gamma); + ls = vibrancy * alpha / pix.w; + + float maxc = fmaxf(pix.x, fmaxf(pix.y, pix.z)); + float newls = 1 / maxc; + + // TODO: detect if highlight power is globally disabled and drop + // this branch + + if (maxc * ls > 1 && highpow >= 0) { + // TODO: does CUDA autopromote the int here to a float before GPU? + float lsratio = powf(newls / ls, highpow); + + pix.x *= newls; + pix.y *= newls; + pix.z *= newls; + maxc *= newls; + + // Reduce saturation (according to the HSV model) by proportionally + // increasing the values of the other colors. + + pix.x = maxc - (maxc - pix.x) * lsratio; + pix.y = maxc - (maxc - pix.y) * lsratio; + pix.z = maxc - (maxc - pix.z) * lsratio; + + } else { + highpow = -highpow; + if (highpow > 1 || maxc * ls <= 1) highpow = 1; + float adj = ((1.0 - highpow) * newls + highpow * ls); + pix.x *= adj; + pix.y *= adj; + pix.z *= adj; + } + + pix.x = fminf(1.0, pix.x + (1.0 - vibrancy) * powf(opix.x, gamma)); + pix.y = fminf(1.0, pix.y + (1.0 - vibrancy) * powf(opix.y, gamma)); + pix.z = fminf(1.0, pix.z + (1.0 - vibrancy) * powf(opix.z, gamma)); + + pixbuf[i] = pix; +} +""" + + diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py index e8318b4..4d947dc 100644 --- a/cuburn/code/iter.py +++ b/cuburn/code/iter.py @@ -10,7 +10,7 @@ from pycuda.compiler import SourceModule import numpy as np from fr0stlib.pyflam3 import flam3_interpolate -from cuburn.code import mwc, variations +from cuburn.code import mwc, variations, filter from cuburn.code.util import * from cuburn.render import Genome @@ -109,7 +109,7 @@ void iter(mwc_st *msts, const iter_info *infos, float *accbuf, float *denbuf) { } // TODO: dither? - int i = ((int)((y + 1.0f) * 255.0f) * 512) + int i = ((int)((1.0f - y) * 255.0f) * 512) + (int)((x + 1.0f) * 255.0f); // since info was declared const, C++ barfs unless it's loaded first @@ -120,7 +120,6 @@ void iter(mwc_st *msts, const iter_info *infos, float *accbuf, float *denbuf) { accbuf[i*4+2] += outcol.z; accbuf[i*4+3] += outcol.w; denbuf[i] += 1.0f; - } } """) @@ -136,7 +135,7 @@ def silly(features, cps): seeds = mwc.MWC.make_seeds(512 * nsteps) iter = IterCode(features) - code = assemble_code(BaseCode, mwc.MWC, iter, iter.packer) + code = assemble_code(BaseCode, mwc.MWC, iter, iter.packer, filter.ColorClip) print code mod = SourceModule(code, options=['-use_fast_math'], keep=True) @@ -168,10 +167,26 @@ def silly(features, cps): tref.set_format(cuda.array_format.UNSIGNED_INT8, 4) tref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES) + abufd = cuda.to_device(abuf) + dbufd = cuda.to_device(dbuf) + fun = mod.get_function("iter") - t = fun(InOut(seeds), In(infos), InOut(abuf), InOut(dbuf), + t = fun(InOut(seeds), In(infos), abufd, dbufd, block=(512,1,1), grid=(nsteps,1), time_kernel=True) print "Completed render in %g seconds" % t + f = np.float32 + + k1 = cp.contrast * cp.brightness * 268 / 256 + area = 1 + k2 = 4 / (cp.contrast * 5000) + + fun = mod.get_function("logfilt") + t = fun(abufd, f(k1), f(k2), + f(1 / cp.gamma), f(cp.vibrancy), f(cp.highlight_power), + block=(512,1,1), grid=(512,1), time_kernel=True) + print "Completed color filtering in %g seconds" % t + + abuf = cuda.from_device_like(abufd, abuf) return abuf, dbuf diff --git a/main.py b/main.py index 15ebd7d..028355c 100644 --- a/main.py +++ b/main.py @@ -48,7 +48,7 @@ def main(args): if '-g' not in args: return - imgbuf = (accum * 255).astype(np.uint8) + imgbuf = (np.minimum(accum * 255, 255)).astype(np.uint8) window = pyglet.window.Window(1600, 900) image = pyglet.image.ImageData(512, 512, 'RGBA', imgbuf.tostring())