Fix rb_incr() when blockDim.y == 1.

This commit is contained in:
Steven Robertson 2012-02-15 10:06:35 -05:00
parent 8210c8fc73
commit 88abefa4f4
2 changed files with 5 additions and 6 deletions

View File

@ -325,7 +325,6 @@ __global__ void interp_palette_flat(
float tstart, float tstep)
{
mwc_st rctx = rctxs[rb_incr(rb->head, threadIdx.x)];
int gid = blockIdx.x * blockDim.x + threadIdx.x;
float time = tstart + blockIdx.x * tstep;
float4 yuva = interp_color(times, sources, time);

View File

@ -280,13 +280,13 @@ def mkringbuflib(rb_size):
"""
return devlib(headers="#define RB_SIZE_MASK %d" % (rb_size - 1), decls='''
typedef struct {
int head;
int tail;
uint32_t head;
uint32_t tail;
} ringbuf;
''', defs=r'''
__shared__ int rb_idx;
__device__ int rb_incr(int &rb_base, int tidx) {
if (threadIdx.y == 1 && threadIdx.x == 1)
__shared__ uint32_t rb_idx;
__device__ uint32_t rb_incr(uint32_t &rb_base, int tidx) {
if (threadIdx.y == 0 && threadIdx.x == 0)
rb_idx = 256 * (atomicAdd(&rb_base, 1) & RB_SIZE_MASK);
__syncthreads();
return rb_idx + tidx;