mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Log scaling and color clipping (in a sense)
This commit is contained in:
parent
b710de4865
commit
cd803cb3af
66
cuburn/code/filter.py
Normal file
66
cuburn/code/filter.py
Normal file
@ -0,0 +1,66 @@
|
||||
|
||||
from cuburn.code.util import *
|
||||
|
||||
class ColorClip(HunkOCode):
|
||||
defs = """
|
||||
__global__
|
||||
void logfilt(float4 *pixbuf, float k1, float k2,
|
||||
float gamma, float vibrancy, float highpow) {
|
||||
// TODO: test if over an edge of the framebuffer
|
||||
int i = 512 * blockIdx.x + threadIdx.x;
|
||||
float4 pix = pixbuf[i];
|
||||
|
||||
if (pix.w <= 0) return;
|
||||
|
||||
float ls = k1 * logf(1.0 + pix.w * k2) / pix.w;
|
||||
pix.x *= ls;
|
||||
pix.y *= ls;
|
||||
pix.z *= ls;
|
||||
pix.w *= ls;
|
||||
|
||||
float4 opix = pix;
|
||||
|
||||
// TODO: linearized bottom range
|
||||
float alpha = powf(pix.w, gamma);
|
||||
ls = vibrancy * alpha / pix.w;
|
||||
|
||||
float maxc = fmaxf(pix.x, fmaxf(pix.y, pix.z));
|
||||
float newls = 1 / maxc;
|
||||
|
||||
// TODO: detect if highlight power is globally disabled and drop
|
||||
// this branch
|
||||
|
||||
if (maxc * ls > 1 && highpow >= 0) {
|
||||
// TODO: does CUDA autopromote the int here to a float before GPU?
|
||||
float lsratio = powf(newls / ls, highpow);
|
||||
|
||||
pix.x *= newls;
|
||||
pix.y *= newls;
|
||||
pix.z *= newls;
|
||||
maxc *= newls;
|
||||
|
||||
// Reduce saturation (according to the HSV model) by proportionally
|
||||
// increasing the values of the other colors.
|
||||
|
||||
pix.x = maxc - (maxc - pix.x) * lsratio;
|
||||
pix.y = maxc - (maxc - pix.y) * lsratio;
|
||||
pix.z = maxc - (maxc - pix.z) * lsratio;
|
||||
|
||||
} else {
|
||||
highpow = -highpow;
|
||||
if (highpow > 1 || maxc * ls <= 1) highpow = 1;
|
||||
float adj = ((1.0 - highpow) * newls + highpow * ls);
|
||||
pix.x *= adj;
|
||||
pix.y *= adj;
|
||||
pix.z *= adj;
|
||||
}
|
||||
|
||||
pix.x = fminf(1.0, pix.x + (1.0 - vibrancy) * powf(opix.x, gamma));
|
||||
pix.y = fminf(1.0, pix.y + (1.0 - vibrancy) * powf(opix.y, gamma));
|
||||
pix.z = fminf(1.0, pix.z + (1.0 - vibrancy) * powf(opix.z, gamma));
|
||||
|
||||
pixbuf[i] = pix;
|
||||
}
|
||||
"""
|
||||
|
||||
|
@ -10,7 +10,7 @@ from pycuda.compiler import SourceModule
|
||||
import numpy as np
|
||||
|
||||
from fr0stlib.pyflam3 import flam3_interpolate
|
||||
from cuburn.code import mwc, variations
|
||||
from cuburn.code import mwc, variations, filter
|
||||
from cuburn.code.util import *
|
||||
from cuburn.render import Genome
|
||||
|
||||
@ -109,7 +109,7 @@ void iter(mwc_st *msts, const iter_info *infos, float *accbuf, float *denbuf) {
|
||||
}
|
||||
|
||||
// TODO: dither?
|
||||
int i = ((int)((y + 1.0f) * 255.0f) * 512)
|
||||
int i = ((int)((1.0f - y) * 255.0f) * 512)
|
||||
+ (int)((x + 1.0f) * 255.0f);
|
||||
|
||||
// since info was declared const, C++ barfs unless it's loaded first
|
||||
@ -120,7 +120,6 @@ void iter(mwc_st *msts, const iter_info *infos, float *accbuf, float *denbuf) {
|
||||
accbuf[i*4+2] += outcol.z;
|
||||
accbuf[i*4+3] += outcol.w;
|
||||
denbuf[i] += 1.0f;
|
||||
|
||||
}
|
||||
}
|
||||
""")
|
||||
@ -136,7 +135,7 @@ def silly(features, cps):
|
||||
seeds = mwc.MWC.make_seeds(512 * nsteps)
|
||||
|
||||
iter = IterCode(features)
|
||||
code = assemble_code(BaseCode, mwc.MWC, iter, iter.packer)
|
||||
code = assemble_code(BaseCode, mwc.MWC, iter, iter.packer, filter.ColorClip)
|
||||
print code
|
||||
mod = SourceModule(code, options=['-use_fast_math'], keep=True)
|
||||
|
||||
@ -168,10 +167,26 @@ def silly(features, cps):
|
||||
tref.set_format(cuda.array_format.UNSIGNED_INT8, 4)
|
||||
tref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES)
|
||||
|
||||
abufd = cuda.to_device(abuf)
|
||||
dbufd = cuda.to_device(dbuf)
|
||||
|
||||
fun = mod.get_function("iter")
|
||||
t = fun(InOut(seeds), In(infos), InOut(abuf), InOut(dbuf),
|
||||
t = fun(InOut(seeds), In(infos), abufd, dbufd,
|
||||
block=(512,1,1), grid=(nsteps,1), time_kernel=True)
|
||||
print "Completed render in %g seconds" % t
|
||||
|
||||
f = np.float32
|
||||
|
||||
k1 = cp.contrast * cp.brightness * 268 / 256
|
||||
area = 1
|
||||
k2 = 4 / (cp.contrast * 5000)
|
||||
|
||||
fun = mod.get_function("logfilt")
|
||||
t = fun(abufd, f(k1), f(k2),
|
||||
f(1 / cp.gamma), f(cp.vibrancy), f(cp.highlight_power),
|
||||
block=(512,1,1), grid=(512,1), time_kernel=True)
|
||||
print "Completed color filtering in %g seconds" % t
|
||||
|
||||
abuf = cuda.from_device_like(abufd, abuf)
|
||||
return abuf, dbuf
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user