From 4834c9fdfaaaa6e44f8249139c854a309d2e70ac Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Wed, 12 Oct 2011 14:08:13 -0400 Subject: [PATCH] Change synchronization model. --- cuburn/render.py | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/cuburn/render.py b/cuburn/render.py index 97ef656..cc4c380 100644 --- a/cuburn/render.py +++ b/cuburn/render.py @@ -170,11 +170,13 @@ class Animation(object): self.compile() self.mod = cuda.module_from_buffer(self.cubin, jit_options) - def render_frames(self, times=None): + def render_frames(self, times=None, block=True): """ Render a flame for each genome in the iterable value 'genomes'. - Returns a Python generator object which will yield one NumPy array - for each rendered image. + Returns a Python generator object which will yield a 2-tuple of + ``(time, buf)``, where ``time`` is the central time of the frame and + ``buf`` is a 3D (width, height, channel) NumPy array containing + [0,1]-valued RGBA components. This method produces a considerable amount of side effects, and should not be used lightly. Things may go poorly for you if this method is not @@ -189,6 +191,10 @@ class Animation(object): ``times`` is a sequence of center times at which to render, or ``None`` to render one frame for each genome used to create the animation. + + ``block`` will cause this thread to spin, waiting for the GPU to + finish the current task. Otherwise, this generator will yield ``None`` + until the GPU is finished, for filtering later. """ # Don't see this changing, but empirical tests could prove me wrong NRENDERERS = 2 @@ -201,11 +207,14 @@ class Animation(object): # genomes at the end to flush all pending tasks times = times if times is not None else [cp.time for cp in self.genomes] exttimes = chain(times, repeat(None, NRENDERERS)) - for rdr, time in izip(cycle(rdrs), exttimes): - if rdr.wait(): + for rdr, t in izip(cycle(rdrs), exttimes): + if rdr.pending: + if not block: + while not rdr.done(): + yield None yield rdr.get_result() - if time is not None: - rdr.render(time) + if t is not None: + rdr.render(t) def _interp(self, time, cp): flam3_interpolate(self._g_arr, len(self._g_arr), time, 0, byref(cp)) @@ -225,10 +234,10 @@ class _AnimRenderer(object): # used, no matter the number of time steps. PAL_HEIGHT = 16 - def __init__(self, anim): self.anim = anim self.pending = False + self.cen_time = None self.stream = cuda.Stream() self._nsms = cuda.Context.get_device().multiprocessor_count @@ -262,6 +271,7 @@ class _AnimRenderer(object): def render(self, cen_time): assert not self.pending, "Tried to render with results pending!" self.pending = True + self.cen_time = cen_time a = self.anim cen_cp = self._cen_cp @@ -397,23 +407,17 @@ class _AnimRenderer(object): pal[1:] = pal[0] return pal - def wait(self): - if self.pending: - self.stream.synchronize() - self.pending = False - return True - return False + def done(self): + return self.stream.is_done() def get_result(self): + self.stream.synchronize() + self.pending = False a = self.anim - g = a.features.gutter obuf_dim = (a.features.acc_height, a.features.acc_stride, 4) out = cuda.from_device(self.d_out, obuf_dim, np.float32) - out = np.delete(out, np.s_[:g], axis=0) - out = np.delete(out, np.s_[:g], axis=1) - out = np.delete(out, np.s_[-g:], axis=0) - out = np.delete(out, np.s_[a.features.width:], axis=1) - return out + g = a.features.gutter + return self.cen_time, out[g:-g,g:-g] @staticmethod def _mk_dts(cen_time, cen_cp, ncps):