From e08444f74b9400f73fe94bb04816feff2704a61b Mon Sep 17 00:00:00 2001
From: Steven Robertson <steven@strobe.cc>
Date: Sat, 14 Feb 2015 17:48:22 -0800
Subject: [PATCH] Work around an overflow condition for now.

I'm not sure what's going wrong; the math still holds up at higher
densities, but when you crank up the samples-per-pixel count the
accumulators start overflowing stochastically, and when they do
they dump nonsense into the output. Until I have time, take a small
perf hit by flushing much more often.
---
 cuburn/code/iter.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py
index cfbaa66..7b71237 100644
--- a/cuburn/code/iter.py
+++ b/cuburn/code/iter.py
@@ -346,17 +346,17 @@ iter(uint64_t out_ptr, uint64_t atom_ptr,
     cvt.u64.u32         ptr,    off;
     add.u64             ptr,    ptr,    %4;
 
-    // 97% of the time, do an atomic add, then jump to the end without
+    // 80% of the time, do an atomic add, then jump to the end without
     // stalling the thread waiting for the data value
-    setp.le.f32         p,      %5,     0.97;
+    setp.le.f32         p,      %5,     0.80;
 @p  red.global.add.u64  [ptr],  val;
 @p  bra                 oflow_end;
 
-    // 3% of the time, do the atomic add, and wait for the results
+    // 20% of the time, do the atomic add, and wait for the results
     atom.global.add.u64 val,    [ptr],  val;
     mov.b64             {lo, hi},       val;
 
-    // If the density is less than 256, jump to the end
+    // If the density is less than 32, jump to the end
     setp.lo.u32         p,      hi,     (256 << 22);
 @p  bra                 oflow_end;