diff --git a/cuburn/code/color.py b/cuburn/code/color.py index 046bc84..0934c6d 100644 --- a/cuburn/code/color.py +++ b/cuburn/code/color.py @@ -28,6 +28,17 @@ __device__ float3 yuv2rgb(float3 yuv) { yuv.x - 0.34414f * yuv.y - 0.71414f * yuv.z, yuv.x + 1.772f * yuv.y); } + +// As used in the various cliplibs. +__device__ void yuvo2rgb(float4& pix) { + pix.y -= 0.5f * pix.w; + pix.z -= 0.5f * pix.w; + float3 tmp = yuv2rgb(make_float3(pix.x, pix.y, pix.z)); + pix.x = fmaxf(0.0f, tmp.x); + pix.y = fmaxf(0.0f, tmp.y); + pix.z = fmaxf(0.0f, tmp.z); +} + ''') hsvlib = devlib(decls=''' diff --git a/cuburn/code/filters.py b/cuburn/code/filters.py index 4de14e2..16d4733 100644 --- a/cuburn/code/filters.py +++ b/cuburn/code/filters.py @@ -241,17 +241,9 @@ haloclip(float4 *pixbuf, const float *denbuf, float gamma) { float ls = powf(pix.z, gamma) / fmaxf(1.0f, areaval); - pix.x *= ls; - pix.y *= ls; - pix.z *= ls; - pix.w *= ls; + scale_float4(pix, ls); - pix.y -= 0.5f * pix.w; - pix.z -= 0.5f * pix.w; - float3 tmp = yuv2rgb(make_float3(pix.x, pix.y, pix.z)); - pix.x = fmaxf(0.0f, tmp.x); - pix.y = fmaxf(0.0f, tmp.y); - pix.z = fmaxf(0.0f, tmp.z); + yuvo2rgb(pix); pixbuf[i] = pix; } @@ -269,17 +261,7 @@ colorclip(float4 *pixbuf, float gamma, float vibrance, float highpow, pixbuf[i] = make_float4(0, 0, 0, 0); return; } - pix.y -= 0.5f * pix.w; - pix.z -= 0.5f * pix.w; - float3 tmp = yuv2rgb(make_float3(pix.x, pix.y, pix.z)); - pix.x = tmp.x; - pix.y = tmp.y; - pix.z = tmp.z; - - pix.x = fmaxf(0.0f, pix.x); - pix.y = fmaxf(0.0f, pix.y); - pix.z = fmaxf(0.0f, pix.z); - + yuvo2rgb(pix); float4 opix = pix; float alpha = powf(pix.w, gamma); diff --git a/cuburn/code/util.py b/cuburn/code/util.py index 30bbd29..7b22502 100644 --- a/cuburn/code/util.py +++ b/cuburn/code/util.py @@ -181,6 +181,13 @@ __device__ uint32_t trunca(float f) { asm("cvt.rni.s32.f32 %0, %1;" : "=r"(ret) : "f"(f)); return ret; } + +__device__ void scale_float4(float4& pix, float scale) { + pix.x *= scale; + pix.y *= scale; + pix.z *= scale; + pix.w *= scale; +} ''') def mkbinsearchlib(rounds):