mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Fix dumb overflow bug
This commit is contained in:
parent
eb43b151dc
commit
9ef5363652
@ -372,27 +372,20 @@ void iter(
|
|||||||
#define SHAB 12
|
#define SHAB 12
|
||||||
#define SHAW (1<<SHAB)
|
#define SHAW (1<<SHAB)
|
||||||
|
|
||||||
// These two accumulators, used in write_shmem, hold {density, red} and
|
|
||||||
// {green, blue} values as packed u16 pairs. The fixed size represents 4,096
|
|
||||||
// pixels in the accumulator.
|
|
||||||
__shared__ uint32_t s_acc_dr[SHAW];
|
|
||||||
__shared__ uint32_t s_acc_gb[SHAW];
|
|
||||||
|
|
||||||
// Read from the shm accumulators and write to the global ones.
|
// Read from the shm accumulators and write to the global ones.
|
||||||
__device__
|
__device__
|
||||||
void write_shmem_helper(
|
void write_shmem_helper(
|
||||||
float4 *acc,
|
float4 *acc,
|
||||||
const int glo_base,
|
const int glo_idx,
|
||||||
const int idx
|
const int dr,
|
||||||
|
const int gb
|
||||||
) {
|
) {
|
||||||
float4 pix = acc[glo_base+idx];
|
float4 pix = acc[glo_idx];
|
||||||
uint32_t dr = s_acc_dr[idx];
|
|
||||||
pix.x += (dr & 0xffff) / 255.0f;
|
pix.x += (dr & 0xffff) / 255.0f;
|
||||||
pix.w += dr >> 16;
|
pix.w += dr >> 16;
|
||||||
uint32_t gb = s_acc_gb[idx];
|
|
||||||
pix.y += (gb & 0xffff) / 255.0f;
|
pix.y += (gb & 0xffff) / 255.0f;
|
||||||
pix.z += (gb >> 16) / 255.0f;
|
pix.z += (gb >> 16) / 255.0f;
|
||||||
acc[glo_base+idx] = pix;
|
acc[glo_idx] = pix;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the point log, accumulate in shared memory, and write the results.
|
// Read the point log, accumulate in shared memory, and write the results.
|
||||||
@ -417,6 +410,12 @@ write_shmem(
|
|||||||
const int tid = threadIdx.x;
|
const int tid = threadIdx.x;
|
||||||
const int bid = blockIdx.x;
|
const int bid = blockIdx.x;
|
||||||
|
|
||||||
|
// These two accumulators, used in write_shmem, hold {density, red} and
|
||||||
|
// {green, blue} values as packed u16 pairs. The fixed size represents
|
||||||
|
// 4,096 pixels in the accumulator.
|
||||||
|
__shared__ uint32_t s_acc_dr[SHAW];
|
||||||
|
__shared__ uint32_t s_acc_gb[SHAW];
|
||||||
|
|
||||||
// TODO: doesn't respect SHAW/BS
|
// TODO: doesn't respect SHAW/BS
|
||||||
// TODO: compare generated code with unrolled for-loop
|
// TODO: compare generated code with unrolled for-loop
|
||||||
s_acc_dr[tid] = 0;
|
s_acc_dr[tid] = 0;
|
||||||
@ -483,7 +482,7 @@ write_shmem(
|
|||||||
if (d == 250) {
|
if (d == 250) {
|
||||||
atomicSub(s_acc_dr + shr_addr, dr);
|
atomicSub(s_acc_dr + shr_addr, dr);
|
||||||
atomicSub(s_acc_gb + shr_addr, gb);
|
atomicSub(s_acc_gb + shr_addr, gb);
|
||||||
write_shmem_helper(acc, glo_base, shr_addr);
|
write_shmem_helper(acc, glo_base + shr_addr, dr, gb);
|
||||||
}
|
}
|
||||||
time += time_step;
|
time += time_step;
|
||||||
}
|
}
|
||||||
@ -491,7 +490,7 @@ write_shmem(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
int idx = tid;
|
int idx = tid;
|
||||||
for (int i = 0; i < (SHAW / BS); i++) {
|
for (int i = 0; i < (SHAW / BS); i++) {
|
||||||
write_shmem_helper(acc, glo_base, idx);
|
write_shmem_helper(acc, glo_base + idx, s_acc_dr[idx], s_acc_gb[idx]);
|
||||||
idx += BS;
|
idx += BS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user