diff --git a/cuburn/code/interp.py b/cuburn/code/interp.py index 2c1a2e4..e1c6968 100644 --- a/cuburn/code/interp.py +++ b/cuburn/code/interp.py @@ -325,7 +325,6 @@ __global__ void interp_palette_flat( float tstart, float tstep) { mwc_st rctx = rctxs[rb_incr(rb->head, threadIdx.x)]; - int gid = blockIdx.x * blockDim.x + threadIdx.x; float time = tstart + blockIdx.x * tstep; float4 yuva = interp_color(times, sources, time); diff --git a/cuburn/code/util.py b/cuburn/code/util.py index 062b751..e3501da 100644 --- a/cuburn/code/util.py +++ b/cuburn/code/util.py @@ -280,13 +280,13 @@ def mkringbuflib(rb_size): """ return devlib(headers="#define RB_SIZE_MASK %d" % (rb_size - 1), decls=''' typedef struct { - int head; - int tail; + uint32_t head; + uint32_t tail; } ringbuf; ''', defs=r''' -__shared__ int rb_idx; -__device__ int rb_incr(int &rb_base, int tidx) { - if (threadIdx.y == 1 && threadIdx.x == 1) +__shared__ uint32_t rb_idx; +__device__ uint32_t rb_incr(uint32_t &rb_base, int tidx) { + if (threadIdx.y == 0 && threadIdx.x == 0) rb_idx = 256 * (atomicAdd(&rb_base, 1) & RB_SIZE_MASK); __syncthreads(); return rb_idx + tidx;