diff --git a/cuburn/code/filters.py b/cuburn/code/filters.py index 4d03309..74067af 100644 --- a/cuburn/code/filters.py +++ b/cuburn/code/filters.py @@ -220,6 +220,43 @@ bilateral(float4 *dst, int pattern, int radius, } ''') +halocliplib = devlib(deps=[yuvlib, denblurlib], defs=r''' +__global__ void apply_gamma(float *dst, float4 *src, float gamma) { + GET_IDX(i); + float4 pix = src[i]; + float ls = powf(fmaxf(0.0f, src[i].z), gamma); + dst[i] = ls * pix.x; +} + +__global__ void +haloclip(float4 *pixbuf, const float *denbuf, float gamma) { + GET_IDX(i); + float4 pix = pixbuf[i]; + float areaval = denbuf[i]; + + if (pix.w <= 0) { + pixbuf[i] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + return; + } + + float ls = powf(pix.z, gamma) / fmaxf(1.0f, areaval); + + pix.x *= ls; + pix.y *= ls; + pix.z *= ls; + pix.w *= ls; + + pix.y -= 0.5f * pix.w; + pix.z -= 0.5f * pix.w; + float3 tmp = yuv2rgb(make_float3(pix.x, pix.y, pix.z)); + pix.x = fmaxf(0.0f, tmp.x); + pix.y = fmaxf(0.0f, tmp.y); + pix.z = fmaxf(0.0f, tmp.z); + + pixbuf[i] = pix; +} +''') + colorcliplib = devlib(deps=[yuvlib], defs=r''' __global__ void colorclip(float4 *pixbuf, float gamma, float vibrance, float highpow, diff --git a/cuburn/filters.py b/cuburn/filters.py index f45b5df..0cc99d1 100644 --- a/cuburn/filters.py +++ b/cuburn/filters.py @@ -85,6 +85,26 @@ class Logscale(Filter, ClsMod): launch2('logscale', self.mod, stream, dim, fb.d_front, fb.d_front, k1, k2) +class HaloClip(Filter, ClsMod): + lib = code.filters.halocliplib + def apply(self, fb, gnm, dim, tc, stream=None): + gam = f32(1 / gnm.color.gamma(tc) - 1) + + dsc = mkdsc(dim, 1) + tref = mktref(self.mod, 'chan1_src') + + launch2('apply_gamma', self.mod, stream, dim, + fb.d_side, fb.d_front, gam) + tref.set_address_2d(fb.d_side, dsc, 4 * dim.astride) + launch2('den_blur_1c', self.mod, stream, dim, + fb.d_back, i32(0), i32(0), texrefs=[tref]) + tref.set_address_2d(fb.d_back, dsc, 4 * dim.astride) + launch2('den_blur_1c', self.mod, stream, dim, + fb.d_side, i32(1), i32(0), texrefs=[tref]) + + launch2('haloclip', self.mod, stream, dim, + fb.d_front, fb.d_side) + class ColorClip(Filter, ClsMod): lib = code.filters.colorcliplib def apply(self, fb, gnm, dim, tc, stream=None):