Use reordered, lossy bit handling

2025-07-05 15:55:14 -04:00 · 2011-12-09 14:14:36 -05:00
parent b592cda3db
commit 6bac3b3a95
2 changed files with 35 additions and 24 deletions
--- a/cuburn/render.py
+++ b/cuburn/render.py
@ -8,6 +8,7 @@ from itertools import cycle, repeat, chain, izip
 from ctypes import *
 from cStringIO import StringIO
 import numpy as np
+from numpy import int32 as i32
 from scipy import ndimage

 from fr0stlib import pyflam3
@ -147,7 +148,9 @@ class Renderer(object):
            d_log = cuda.mem_alloc(log_size * 4)
            d_log_sorted = cuda.mem_alloc(log_size * 4)
            sorter = sort.Sorter(log_size)
-            nwriteblocks = int(np.ceil(nbins / float(1<<12)))
+            # We need to cover each unique tag - address bits 20-23 - with one
+            # write block per sort bin. Or somethinig like that.
+            nwriteblocks = int(np.ceil(nbins / float(1<<20))) * 256

        # Calculate 'nslots', the number of simultaneous running threads that
        # can be active on the GPU during iteration (and thus the number of
@ -267,10 +270,11 @@ class Renderer(object):
                             block=(32, self._iter.NTHREADS/32, 1),
                             grid=(ntemporal_samples, 1), stream=iter_stream)
                    _sync_stream(write_stream, iter_stream)
-                    sorter.sort(d_log_sorted, d_log, log_size, 12, True,
+                    sorter.sort(d_log_sorted, d_log, log_size, 3, True,
                                stream=write_stream)
+                    #print cuda.from_device(sorter.dglobal, (256,), np.uint32)
                    _sync_stream(iter_stream, write_stream)
-                    write_fun(d_accum, d_log_sorted, sorter.dglobal,
+                    write_fun(d_accum, d_log_sorted, sorter.dglobal, i32(nbins),
                              block=(1024, 1, 1), grid=(nwriteblocks, 1),
                              stream=write_stream)
            else:
@ -302,5 +306,5 @@ class Renderer(object):

    def _trim(self, result):
        g = self.info.gutter
-        return result[g:-g,g:-g].copy()
+        return result[g:-g,g:g+self.info.width].copy()