From e08444f74b9400f73fe94bb04816feff2704a61b Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Sat, 14 Feb 2015 17:48:22 -0800 Subject: [PATCH] Work around an overflow condition for now. I'm not sure what's going wrong; the math still holds up at higher densities, but when you crank up the samples-per-pixel count the accumulators start overflowing stochastically, and when they do they dump nonsense into the output. Until I have time, take a small perf hit by flushing much more often. --- cuburn/code/iter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py index cfbaa66..7b71237 100644 --- a/cuburn/code/iter.py +++ b/cuburn/code/iter.py @@ -346,17 +346,17 @@ iter(uint64_t out_ptr, uint64_t atom_ptr, cvt.u64.u32 ptr, off; add.u64 ptr, ptr, %4; - // 97% of the time, do an atomic add, then jump to the end without + // 80% of the time, do an atomic add, then jump to the end without // stalling the thread waiting for the data value - setp.le.f32 p, %5, 0.97; + setp.le.f32 p, %5, 0.80; @p red.global.add.u64 [ptr], val; @p bra oflow_end; - // 3% of the time, do the atomic add, and wait for the results + // 20% of the time, do the atomic add, and wait for the results atom.global.add.u64 val, [ptr], val; mov.b64 {lo, hi}, val; - // If the density is less than 256, jump to the end + // If the density is less than 32, jump to the end setp.lo.u32 p, hi, (256 << 22); @p bra oflow_end;