Fix dumb overflow bug

This commit is contained in:
Steven Robertson 2011-11-11 17:54:33 -05:00
parent eb43b151dc
commit 9ef5363652

View File

@ -372,27 +372,20 @@ void iter(
#define SHAB 12 #define SHAB 12
#define SHAW (1<<SHAB) #define SHAW (1<<SHAB)
// These two accumulators, used in write_shmem, hold {density, red} and
// {green, blue} values as packed u16 pairs. The fixed size represents 4,096
// pixels in the accumulator.
__shared__ uint32_t s_acc_dr[SHAW];
__shared__ uint32_t s_acc_gb[SHAW];
// Read from the shm accumulators and write to the global ones. // Read from the shm accumulators and write to the global ones.
__device__ __device__
void write_shmem_helper( void write_shmem_helper(
float4 *acc, float4 *acc,
const int glo_base, const int glo_idx,
const int idx const int dr,
const int gb
) { ) {
float4 pix = acc[glo_base+idx]; float4 pix = acc[glo_idx];
uint32_t dr = s_acc_dr[idx];
pix.x += (dr & 0xffff) / 255.0f; pix.x += (dr & 0xffff) / 255.0f;
pix.w += dr >> 16; pix.w += dr >> 16;
uint32_t gb = s_acc_gb[idx];
pix.y += (gb & 0xffff) / 255.0f; pix.y += (gb & 0xffff) / 255.0f;
pix.z += (gb >> 16) / 255.0f; pix.z += (gb >> 16) / 255.0f;
acc[glo_base+idx] = pix; acc[glo_idx] = pix;
} }
// Read the point log, accumulate in shared memory, and write the results. // Read the point log, accumulate in shared memory, and write the results.
@ -417,6 +410,12 @@ write_shmem(
const int tid = threadIdx.x; const int tid = threadIdx.x;
const int bid = blockIdx.x; const int bid = blockIdx.x;
// These two accumulators, used in write_shmem, hold {density, red} and
// {green, blue} values as packed u16 pairs. The fixed size represents
// 4,096 pixels in the accumulator.
__shared__ uint32_t s_acc_dr[SHAW];
__shared__ uint32_t s_acc_gb[SHAW];
// TODO: doesn't respect SHAW/BS // TODO: doesn't respect SHAW/BS
// TODO: compare generated code with unrolled for-loop // TODO: compare generated code with unrolled for-loop
s_acc_dr[tid] = 0; s_acc_dr[tid] = 0;
@ -483,7 +482,7 @@ write_shmem(
if (d == 250) { if (d == 250) {
atomicSub(s_acc_dr + shr_addr, dr); atomicSub(s_acc_dr + shr_addr, dr);
atomicSub(s_acc_gb + shr_addr, gb); atomicSub(s_acc_gb + shr_addr, gb);
write_shmem_helper(acc, glo_base, shr_addr); write_shmem_helper(acc, glo_base + shr_addr, dr, gb);
} }
time += time_step; time += time_step;
} }
@ -491,7 +490,7 @@ write_shmem(
__syncthreads(); __syncthreads();
int idx = tid; int idx = tid;
for (int i = 0; i < (SHAW / BS); i++) { for (int i = 0; i < (SHAW / BS); i++) {
write_shmem_helper(acc, glo_base, idx); write_shmem_helper(acc, glo_base + idx, s_acc_dr[idx], s_acc_gb[idx]);
idx += BS; idx += BS;
} }
} }