diff --git a/cuburn/code/filtering.py b/cuburn/code/filtering.py index 10515bc..702c15a 100644 --- a/cuburn/code/filtering.py +++ b/cuburn/code/filtering.py @@ -8,10 +8,10 @@ class ColorClip(HunkOCode): defs_tmpl = Template(''' __global__ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow, - float linrange, float lingam, float3 bkgd) { - // TODO: test if over an edge of the framebuffer - currently gutters are - // used and up to 256 pixels are ignored, which breaks when width<256 - int i = (gridDim.x * blockIdx.y + blockIdx.x) * blockDim.x + threadIdx.x; + float linrange, float lingam, float3 bkgd, int fbsize) { + int i = gtid(); + if (i >= fbsize) return; + float4 pix = pixbuf[i]; if (pix.w <= 0) { diff --git a/cuburn/render.py b/cuburn/render.py index d79659d..28dd6bd 100644 --- a/cuburn/render.py +++ b/cuburn/render.py @@ -338,7 +338,7 @@ class Animation(object): color_fun = self.mod.get_function("colorclip") blocks = int(np.ceil(np.sqrt(nbins / 256))) - color_fun(d_out, gam, vib, hipow, lin, lingam, bkgd, + color_fun(d_out, gam, vib, hipow, lin, lingam, bkgd, np.int32(nbins), block=(256, 1, 1), grid=(blocks, blocks), stream=filt_stream) cuda.memcpy_dtoh_async(h_out, d_out, filt_stream)