From 9ef53636521c1799aa1db34cd7f4e75e52e22df7 Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Fri, 11 Nov 2011 17:54:33 -0500 Subject: [PATCH] Fix dumb overflow bug --- cuburn/code/iter.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py index 2399540..0825dbf 100644 --- a/cuburn/code/iter.py +++ b/cuburn/code/iter.py @@ -372,27 +372,20 @@ void iter( #define SHAB 12 #define SHAW (1<> 16; - uint32_t gb = s_acc_gb[idx]; pix.y += (gb & 0xffff) / 255.0f; pix.z += (gb >> 16) / 255.0f; - acc[glo_base+idx] = pix; + acc[glo_idx] = pix; } // Read the point log, accumulate in shared memory, and write the results. @@ -417,6 +410,12 @@ write_shmem( const int tid = threadIdx.x; const int bid = blockIdx.x; + // These two accumulators, used in write_shmem, hold {density, red} and + // {green, blue} values as packed u16 pairs. The fixed size represents + // 4,096 pixels in the accumulator. + __shared__ uint32_t s_acc_dr[SHAW]; + __shared__ uint32_t s_acc_gb[SHAW]; + // TODO: doesn't respect SHAW/BS // TODO: compare generated code with unrolled for-loop s_acc_dr[tid] = 0; @@ -483,7 +482,7 @@ write_shmem( if (d == 250) { atomicSub(s_acc_dr + shr_addr, dr); atomicSub(s_acc_gb + shr_addr, gb); - write_shmem_helper(acc, glo_base, shr_addr); + write_shmem_helper(acc, glo_base + shr_addr, dr, gb); } time += time_step; } @@ -491,7 +490,7 @@ write_shmem( __syncthreads(); int idx = tid; for (int i = 0; i < (SHAW / BS); i++) { - write_shmem_helper(acc, glo_base, idx); + write_shmem_helper(acc, glo_base + idx, s_acc_dr[idx], s_acc_gb[idx]); idx += BS; } }