Crank up the dispatch params again after fix.

This commit is contained in:
Steven Robertson 2015-10-11 00:51:22 -07:00
parent 0e91d01528
commit 5f5e69f3a3

View File

@ -346,18 +346,18 @@ iter(uint64_t out_ptr, uint64_t atom_ptr,
cvt.u64.u32 ptr, off; cvt.u64.u32 ptr, off;
add.u64 ptr, ptr, %4; add.u64 ptr, ptr, %4;
// 80% of the time, do an atomic add, then jump to the end without // 97% of the time, do an atomic add, then jump to the end without
// stalling the thread waiting for the data value // stalling the thread waiting for the data value
setp.le.f32 p, %5, 0.80; setp.le.f32 p, %5, 0.97;
@p red.global.add.u64 [ptr], val; @p red.global.add.u64 [ptr], val;
@p bra oflow_end; @p bra oflow_end;
// 20% of the time, do the atomic add, and wait for the results // 3% of the time, do the atomic add, and wait for the results
atom.global.add.u64 val, [ptr], val; atom.global.add.u64 val, [ptr], val;
mov.b64 {lo, hi}, val; mov.b64 {lo, hi}, val;
// If the density is less than 32, jump to the end // If the density is less than 64, jump to the end
setp.lo.u32 p, hi, (256 << 22); setp.lo.u32 p, hi, (256 << 23);
@p bra oflow_end; @p bra oflow_end;
// Atomically swap the integer cell with 0 and read its current value // Atomically swap the integer cell with 0 and read its current value