Change synchronization model.

2025-06-10 09:21:29 -04:00 · 2011-10-12 14:08:13 -04:00 · 2011-10-12 14:08:13 -04:00 · 4834c9fdfa
commit 4834c9fdfa
parent 81f61d4d5d
1 changed files with 24 additions and 20 deletions
--- a/cuburn/render.py
+++ b/cuburn/render.py
@ -170,11 +170,13 @@ class Animation(object):
            self.compile()
        self.mod = cuda.module_from_buffer(self.cubin, jit_options)
-    def render_frames(self, times=None):
+    def render_frames(self, times=None, block=True):
        """
        Render a flame for each genome in the iterable value 'genomes'.
-        Returns a Python generator object which will yield one NumPy array
+        Returns a Python generator object which will yield a 2-tuple of
-        for each rendered image.
+        ``(time, buf)``, where ``time`` is the central time of the frame and
        ``buf`` is a 3D (width, height, channel) NumPy array containing
        [0,1]-valued RGBA components.
        This method produces a considerable amount of side effects, and should
        not be used lightly. Things may go poorly for you if this method is not
@ -189,6 +191,10 @@ class Animation(object):
        ``times`` is a sequence of center times at which to render, or ``None``
        to render one frame for each genome used to create the animation.
        ``block`` will cause this thread to spin, waiting for the GPU to
        finish the current task. Otherwise, this generator will yield ``None``
        until the GPU is finished, for filtering later.
        """
        # Don't see this changing, but empirical tests could prove me wrong
        NRENDERERS = 2
@ -201,11 +207,14 @@ class Animation(object):
        # genomes at the end to flush all pending tasks
        times = times if times is not None else [cp.time for cp in self.genomes]
        exttimes = chain(times, repeat(None, NRENDERERS))
-        for rdr, time in izip(cycle(rdrs), exttimes):
+        for rdr, t in izip(cycle(rdrs), exttimes):
-            if rdr.wait():
+            if rdr.pending:
                if not block:
                    while not rdr.done():
                        yield None
                yield rdr.get_result()
-            if time is not None:
+            if t is not None:
-                rdr.render(time)
+                rdr.render(t)
    def _interp(self, time, cp):
        flam3_interpolate(self._g_arr, len(self._g_arr), time, 0, byref(cp))
@ -225,10 +234,10 @@ class _AnimRenderer(object):
    # used, no matter the number of time steps.
    PAL_HEIGHT = 16
    def __init__(self, anim):
        self.anim = anim
        self.pending = False
        self.cen_time = None
        self.stream = cuda.Stream()
        self._nsms = cuda.Context.get_device().multiprocessor_count
@ -262,6 +271,7 @@ class _AnimRenderer(object):
    def render(self, cen_time):
        assert not self.pending, "Tried to render with results pending!"
        self.pending = True
        self.cen_time = cen_time
        a = self.anim
        cen_cp = self._cen_cp
@ -397,23 +407,17 @@ class _AnimRenderer(object):
            pal[1:] = pal[0]
        return pal
-    def wait(self):
+    def done(self):
-        if self.pending:
+        return self.stream.is_done()
            self.stream.synchronize()
            self.pending = False
            return True
        return False
    def get_result(self):
        self.stream.synchronize()
        self.pending = False
        a = self.anim
        g = a.features.gutter
        obuf_dim = (a.features.acc_height, a.features.acc_stride, 4)
        out = cuda.from_device(self.d_out, obuf_dim, np.float32)
-        out = np.delete(out, np.s_[:g], axis=0)
+        g = a.features.gutter
-        out = np.delete(out, np.s_[:g], axis=1)
+        return self.cen_time, out[g:-g,g:-g]
        out = np.delete(out, np.s_[-g:], axis=0)
        out = np.delete(out, np.s_[a.features.width:], axis=1)
        return out
    @staticmethod
    def _mk_dts(cen_time, cen_cp, ncps):