diff --git a/cuburn/code/output.py b/cuburn/code/output.py index 808698b..1644e74 100644 --- a/cuburn/code/output.py +++ b/cuburn/code/output.py @@ -1,7 +1,19 @@ from util import devlib, ringbuflib from mwc import mwclib -rgba8lib = devlib(deps=[ringbuflib, mwclib], defs=r''' +ditherlib = devlib(deps=[mwclib], defs=r''' +// Clamp an input between 0 and a given peak (inclusive), dithering its output, +// with full clamping for pixels that are true-black for compressibility. +__device__ float dclampf(mwc_st &rctx, float peak, float in) { + float ret = 0.0f; + if (in > 0.0f) { + ret = fminf(peak, fmaxf(0.0f, in * peak + 0.49f * mwc_next_11(rctx))); + } + return ret; +} +''') + +rgba8lib = devlib(deps=[ringbuflib, mwclib, ditherlib], defs=r''' // Perform a conversion from float32 values to uint8 ones, applying // pixel- and channel-independent dithering to reduce suprathreshold banding // artifacts. Clamps values larger than 1.0f. @@ -22,10 +34,10 @@ __global__ void f32_to_rgba_u8( float4 in = src[isrc]; uchar4 out = make_uchar4( - fminf(1.0f, in.x) * 255.0f + 0.49f * mwc_next_11(rctx), - fminf(1.0f, in.y) * 255.0f + 0.49f * mwc_next_11(rctx), - fminf(1.0f, in.z) * 255.0f + 0.49f * mwc_next_11(rctx), - fminf(1.0f, in.w) * 255.0f + 0.49f * mwc_next_11(rctx) + dclampf(rctx, 255.0f, in.x), + dclampf(rctx, 255.0f, in.y), + dclampf(rctx, 255.0f, in.z), + dclampf(rctx, 255.0f, in.w) ); int idst = dstride * y + x; @@ -34,7 +46,7 @@ __global__ void f32_to_rgba_u8( } ''') -rgba16lib = devlib(deps=[ringbuflib, mwclib], defs=r''' +rgba16lib = devlib(deps=[ringbuflib, mwclib, ditherlib], defs=r''' // Perform a conversion from float32 values to uint16 ones, as above. __global__ void f32_to_rgba_u16( ushort4 *dst, const float4 *src, @@ -51,10 +63,10 @@ __global__ void f32_to_rgba_u16( float4 in = src[isrc]; ushort4 out = make_ushort4( - fminf(1.0f, in.x) * 65535.0f + 0.49f * mwc_next_11(rctx), - fminf(1.0f, in.y) * 65535.0f + 0.49f * mwc_next_11(rctx), - fminf(1.0f, in.z) * 65535.0f + 0.49f * mwc_next_11(rctx), - fminf(1.0f, in.w) * 65535.0f + 0.49f * mwc_next_11(rctx) + dclampf(rctx, 65535.0f, in.x), + dclampf(rctx, 65535.0f, in.y), + dclampf(rctx, 65535.0f, in.z), + dclampf(rctx, 65535.0f, in.w) ); int idst = dstride * y + x;