Support x264 10-bit output format.

2026-02-22 19:20:20 -05:00 · 2012-07-22 15:53:38 -07:00
parent 21f783730a
commit 3294ba10d6
10 changed files with 363 additions and 116 deletions
--- a/cuburn/code/output.py
+++ b/cuburn/code/output.py
@ -1,15 +1,16 @@
 from util import devlib, ringbuflib
 from mwc import mwclib
-f32tou8lib = devlib(deps=[ringbuflib, mwclib], defs=r'''
+rgba8lib = devlib(deps=[ringbuflib, mwclib], defs=r'''
 // Perform a conversion from float32 values to uint8 ones, applying
 // pixel- and channel-independent dithering to reduce suprathreshold banding
 // artifacts. Clamps values larger than 1.0f.
 // TODO: move to a separate module?
 // TODO: less ineffecient mwc_st handling?
-__global__ void f32_to_u8(
+__global__ void f32_to_rgba_u8(
-    ringbuf *rb, mwc_st *rctxs, uchar4 *dst, const float4 *src,
+    uchar4 *dst, const float4 *src,
-    int gutter, int dstride, int sstride, int height)
+    int gutter, int dstride, int sstride, int height,
    ringbuf *rb, mwc_st *rctxs)
 {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -32,3 +33,34 @@ __global__ void f32_to_u8(
    rctxs[rb_incr(rb->tail, tid)] = rctx;
 }
 ''')
 rgba16lib = devlib(deps=[ringbuflib, mwclib], defs=r'''
 // Perform a conversion from float32 values to uint16 ones, as above.
 __global__ void f32_to_rgba_u16(
    ushort4 *dst, const float4 *src,
    int gutter, int dstride, int sstride, int height,
    ringbuf *rb, mwc_st *rctxs)
 {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    if (x > dstride || y > height) return;
    int isrc = sstride * (y + gutter) + x + gutter;
    int tid = blockDim.x * threadIdx.y + threadIdx.x;
    mwc_st rctx = rctxs[rb_incr(rb->head, tid)];
    float4 in = src[isrc];
    ushort4 out = make_ushort4(
        fminf(1.0f, in.x) * 65535.0f + 0.49f * mwc_next_11(rctx),
        fminf(1.0f, in.y) * 65535.0f + 0.49f * mwc_next_11(rctx),
        fminf(1.0f, in.z) * 65535.0f + 0.49f * mwc_next_11(rctx),
        fminf(1.0f, in.w) * 65535.0f + 0.49f * mwc_next_11(rctx)
    );
    int idst = dstride * y + x;
    dst[idst] = out;
    rctxs[rb_incr(rb->tail, tid)] = rctx;
 }
 ''')
 pixfmtlib = devlib(deps=[rgba8lib, rgba16lib])
--- a/cuburn/genome/specs.py
+++ b/cuburn/genome/specs.py
@ -115,6 +115,8 @@ profile = (
  , 'end': Scalar(None, 'Last frame to render (1-indexed, exclusive; '
                  'negative indexes from the end)')
  , 'skip': Scalar(0, 'Skip this many frames between each rendered frame')
  , 'shard': Scalar(0, 'Pack this many frames in each output file '
                    '(causing start, end, and skip to be ignored)')
  , 'height': Scalar(1920, 'Output height in pixels')
  , 'width': Scalar(1080, 'Output width in pixels')
@ -123,7 +125,9 @@ profile = (
  , 'filter_order': list_(enum(filters.keys()), default_filters)
  , 'filters': prof_filters
-  , 'output_format': enum('jpg png tif', 'jpg')
+  # The other keys in the 'output' dictionary are format-specific and not
  # documented here.
  , 'output': {'type': enum('jpeg png tiff x264', 'jpeg')}
  })
 # Types recognized as independent units with a 'type' key
--- a/cuburn/output.py
+++ b/cuburn/output.py
@ -1,10 +1,14 @@
 import os
 import tempfile
 from cStringIO import StringIO
 from subprocess import Popen, PIPE
 import numpy as np
 from numpy import float32 as f32, int32 as i32
 import pycuda.driver as cuda
 from code.util import ClsMod, launch
-from code.output import f32tou8lib
+from code.output import pixfmtlib
 import scipy.misc
@ -12,39 +16,209 @@ if not hasattr(scipy.misc, 'toimage'):
    raise ImportError("Could not find scipy.misc.toimage. "
                      "Are scipy and PIL installed?")
 def launchC(name, mod, stream, dim, fb, *args):
    launch(name, mod, stream,
            (32, 8, 1), (int(np.ceil(dim.w/32.)), int(np.ceil(dim.h/8.))),
            fb.d_back, fb.d_front,
            i32(fb.gutter), i32(dim.w), i32(dim.astride), i32(dim.h),
            *args)
 class Output(object):
    def convert(self, fb, gnm, dim, stream=None):
        """
        Convert a filtered buffer to whatever output format is needed by the
        writer.
        This function is intended for use by the Renderer, and should not be
        called by clients. It does not modify its instance.
        """
        raise NotImplementedError()
    def copy(self, fb, dim, pool, stream=None):
        """
        Schedule a copy from the device buffer to host memory, returning the
-        target buffer.
+        target buffer(s).
        This function is intended for use by the Renderer, and should not be
        called by clients. It does not modify its instance.
        """
        raise NotImplementedError()
    def encode(self, host_frame):
        """
        Push `host_frame` (as returned from `Output.copy`) into the encoding
        pipeline, and return any completed media segments. If `host_frame` is
        None, flush the encoding pipeline.
        The return value is a 2-tuple `(media, logs)`. `media` is a dictionary
        mapping channel names (appropriate for use as file suffixes) to
        file-like objects containing the encoded media segments. `logs` is a
        dictionary containing log entries. Either or both entries can be empty
        at any time (and will typically be either populated on each frame
        except the flush, for non-temporal codecs, or will be empty on all
        frames except the flush, for temporal codecs.)
        Media segments are discretely decodeable chunks of content. The
        mapping of media segments to individual frames is not specified.
        """
        raise NotImplementedError()
    @property
    def suffix(self):
        """
        Return the file suffix that will be used. If more than one suffix will
        be used, the value returned is the one considered to be "primary".
        """
        raise NotImplementedError()
 class PILOutput(Output, ClsMod):
-    lib = f32tou8lib
+    lib = pixfmtlib
    def __init__(self, codec='jpeg', quality=100, alpha=False):
        super(PILOutput, self).__init__()
        self.type, self.quality, self.alpha = codec, quality, alpha
    def convert(self, fb, gnm, dim, stream=None):
-        launch('f32_to_u8', self.mod, stream,
+        launchC('f32_to_rgba_u8', self.mod, stream, dim, fb,
-                (32, 8, 1), (int(np.ceil(dim.w/32.)), int(np.ceil(dim.h/8.))),
+                fb.d_rb, fb.d_seeds)
                fb.d_rb, fb.d_seeds, fb.d_back, fb.d_front,
                i32(fb.gutter), i32(dim.w), i32(dim.astride), i32(dim.h))
    def copy(self, fb, dim, pool, stream=None):
        h_out = pool.allocate((dim.h, dim.w, 4), 'u1')
        cuda.memcpy_dtoh_async(h_out, fb.d_back, stream)
        return h_out
-    @staticmethod
+    def _convert_buf(self, buf):
-    def save(buf, name, type=None, quality=98):
+        out = StringIO()
        type = dict(jpg='jpeg', tif='tiff').get(type, type)
        if type == 'jpeg' or (type is None and name.endswith('.jpg')):
            buf = buf[:,:,:3]
        img = scipy.misc.toimage(buf, cmin=0, cmax=1)
-        img.save(name, type, quality=quality)
+        img.save(out, self.type, quality=self.quality)
        out.seek(0)
        return out
    def encode(self, buf):
        if buf is None: return {}, []
        if self.type == 'jpeg':
            out = self._convert_buf(buf[:,:,:3])
            if self.alpha:
                alpha = self._convert_buf(buf[:,:,3])
                return {'_color.jpg': out, '_alpha.jpg': alpha}, []
            return {'.jpg': out}, {}
        return {'.'+self.type: self._convert_buf(buf)}, []
    @property
    def suffix(self):
        if self.type == 'jpeg':
            if self.alpha: return '_color.jpg'
            return '.jpg'
        return '.'+self.type
 class X264Output(Output, ClsMod):
    lib = pixfmtlib
    profiles = (
      { 'normal': '--profile high444 --level 4.2'
      , '': ''
      })
    base = ('x264 --no-progress --input-depth 16 --sync-lookahead 0 '
            '--rc-lookahead 5 --muxer raw -o - - --log-level debug ')
    def __init__(self, profile='normal', csp='i444', crf=15,
                 x264opts='', alpha=False):
        super(X264Output, self).__init__()
        self.args = ' '.join([self.base, self.profiles[profile],
                              '--crf', str(crf), x264opts]).split()
        self.alpha = alpha
        self.csp = csp
        self.framesize = None
        self.zeros = None
        self.subp = None
        self.outf = None
        self.asubp = None
        self.aoutf = None
    def convert(self, fb, gnm, dim, stream=None):
        launchC('f32_to_rgba_u16', self.mod, stream, dim, fb,
                fb.d_rb, fb.d_seeds)
    def copy(self, fb, dim, pool, stream=None):
        h_out = pool.allocate((dim.h, dim.w, 4), 'u2')
        cuda.memcpy_dtoh_async(h_out, fb.d_back, stream)
        return h_out
    def _spawn_sub(self, framesize, alpha):
        res = '%dx%d' % (framesize[1], framesize[0])
        csp = 'yv12' if alpha else 'rgb'
        extras = ['--input-csp', csp, '--demuxer', 'raw', '--input-res', res]
        outf = tempfile.TemporaryFile(bufsize=0)
        if alpha:
            extras += ['--output-csp', 'i420', '--chroma-qp-offset', '24']
        else:
            extras += ['--output-csp', self.csp]
        subp = Popen(self.args + extras, stdin=PIPE, stderr=PIPE,
                     stdout=os.dup(outf.fileno()))
        return outf, subp
    def _spawn(self, framesize):
        self.framesize = framesize
        self.outf, self.subp = self._spawn_sub(framesize, False)
        if self.alpha:
            self.aoutf, self.asubp = self._spawn_sub(framesize, True)
            bufsz = framesize[0] * framesize[1] / 2
            self.zeros = np.empty(bufsz, dtype='u2')
            self.zeros.fill(32767)
    def _flush_sub(self, subp):
        (stdout, stderr) = subp.communicate()
        if subp.returncode:
            raise IOError("x264 exited with an error")
        return stderr
    def _flush(self):
        if self.subp is None:
            return {}, []
        log = self._flush_sub(self.subp)
        self.outf.seek(0)
        self.subp = None
        if self.alpha:
            alog = self._flush_sub(self.asubp)
            self.aoutf.seek(0)
            self.asubp = None
            return ({'_color.h264': self.outf, '_alpha.h264': self.aoutf},
                    [('x264_color', log), ('x264_alpha', alog)])
        return {'.h264': self.outf}, [('x264_color', stderr)]
    def _write(self, buf, subp):
        try:
            subp.stdin.write(buffer(buf))
        except IOError, e:
            print 'Exception while writing. Log:'
            print subp.stderr.read()
            raise e
    def encode(self, buf):
        out = ({}, [])
        if buf is None or self.framesize != buf.shape[:2]:
            out = self._flush()
        if buf is None:
            return out
        if self.subp is None:
            self._spawn(buf.shape[:2])
        self._write(np.delete(buf, 3, axis=2), self.subp)
        if self.alpha:
            self._write(buf[:,:,3].tostring(), self.asubp)
            self._write(buffer(self.zeros), self.asubp)
        return out
    @property
    def suffix(self):
        if self.alpha: return '_color.h264'
        return '.h264'
 def get_output_for_profile(gprof):
    opts = dict(gprof.output._val)
    handler = opts.pop('type', 'jpeg')
    if handler in ('jpeg', 'png', 'tiff'):
        return PILOutput(codec=handler, **opts)
    elif handler == 'x264':
        return X264Output(**opts)
    raise ValueError('Invalid output type "%s".' % handler)
--- a/cuburn/profile.py
+++ b/cuburn/profile.py
@ -37,6 +37,13 @@ def add_args(parser=None):
        help="Last frame to render (1-indexed, exclusive, negative from end)")
    tmp.add_argument('--skip', dest='skip', metavar='N', type=int,
        help="Skip N frames between each rendered frame")
    # TODO: eliminate the 'silently overwritten' bit.
    tmp.add_argument('--shard', dest='shard', metavar='SECS', type=float,
        help="Write SECS of output into each file, instead of one frame per "
             "file. If set, causes 'start', 'end', and 'skip' to be ignored. "
             "If output codecs don't support multi-file writing, files will "
             "be silently overwritten.")
    tmp.add_argument('--still', action='store_true',
        help='Override start, end, and temporal frame width to render one '
             'frame without motion blur.')
@ -48,7 +55,7 @@ def add_args(parser=None):
    spa.add_argument('--height', type=int, metavar='PX')
    out = parser.add_argument_group('Output options')
-    out.add_argument('--codec', choices=['jpg', 'png', 'tiff'])
+    out.add_argument('--codec', choices=['jpg', 'png', 'tiff', 'x264'])
    return parser
 def get_from_args(args):
@ -64,9 +71,11 @@ def get_from_args(args):
    if args.still:
        base.update(frame_width=0, start=1, end=2)
-    for arg in 'duration fps start end skip spp width height'.split():
+    for arg in 'duration fps start end skip shard spp width height'.split():
        if getattr(args, arg, None) is not None:
            base[arg] = getattr(args, arg)
    if args.codec is not None:
        base.setdefault('output', {})['type'] = args.codec
    return name, base
@ -82,13 +91,20 @@ def wrap(prof, gnm):
 def enumerate_times(gprof):
    """
-    Given a profile, return a list of `(frame_no, center_time)` pairs. Note
+    Given a profile, return a list of `(frame_no, center_times)` pairs. Note
    that the enumeration is applied before `start`, `end`, and `skip`, and so
    `frame_no` may be non-contiguous.
    """
    nframes = round(gprof.fps * gprof.duration)
    times = np.linspace(0, 1, nframes + 1)
-    times = list(enumerate(times[:-1] + 0.5 * (times[1] - times[0]), 1))
+    times = times[:-1] + 0.5 * (times[1] - times[0])
    if gprof.shard:
        s = max(1, int(round(gprof.fps * gprof.shard)))
        return [(i, times[t:t+s])
                for i, t in enumerate(range(0, len(times), s), 1)]
    else:
        times = [[t] for t in times]
    times = list(enumerate(times, 1))
    if gprof.end is not None:
        times = times[:gprof.end]
    if gprof.start is not None:
--- a/cuburn/render.py
+++ b/cuburn/render.py
@ -235,7 +235,7 @@ class Renderer(object):
        self.packer, self.lib, self.cubin = self.compile(gnm)
        self.mod = self.load(self.cubin)
        self.filts = filters.create(gprof)
-        self.out = output.PILOutput()
+        self.out = output.get_output_for_profile(gprof)
 class RenderManager(ClsMod):
    lib = devlib(deps=[interp.palintlib, filldptrlib, iter.flushatomlib])
@ -395,26 +395,3 @@ class RenderManager(ClsMod):
        self.info_a, self.info_b = self.info_b, self.info_a
        self.stream_a, self.stream_b = self.stream_b, self.stream_a
        return self.copy_evt, h_out
    def render(self, gnm, gprof, times):
        """
        A port of the old rendering function, retained for backwards
        compatibility. Some of this will be pulled into as-yet-undecided
        methods for more DRY.
        """
        rdr = Renderer(gnm, gprof)
        last_evt = cuda.Event().record(self.stream_a)
        last_idx = None
        def wait(): # Times like these where you wish for a macro
            while not last_evt.query():
                time.sleep(0.01)
            gpu_time = last_evt.time_since(two_evts_ago)
            return RenderedImage(last_buf, last_idx, gpu_time)
        for idx, tc in times:
            evt, h_buf = self.queue_frame(rdr, gnm, gprof, tc, last_idx is None)
            if last_idx:
                yield wait()
            two_evts_ago, last_evt = last_evt, evt
            last_buf, last_idx = h_buf, idx
        if last_idx:
            yield wait()
--- a/dist/client.py
+++ b/dist/client.py
@ -10,11 +10,15 @@ from gevent import spawn, queue, coros
 import zmq.green as zmq
 import _importhack
-from cuburn import profile
+from cuburn import profile, output
 from cuburn.genome import db, util
 from messages import *
 # TODO: remove this dependency (loading the output module to get the suffix
 # requires a compiler / default instance)
 import pycuda.autoinit
 class RenderClient(object):
    def __init__(self, task_addr, rsp_addr, ctx=None, start=True):
        ctx = zmq.Context() if ctx is None else ctx
@ -56,12 +60,12 @@ class RenderClient(object):
    def _deal_rsps(self):
        while True:
            rsp = self.rsock.recv_multipart(copy=False)
            assert len(rsp) == 2
            rq = self.taskmap.get(rsp[0].bytes, None)
-            if rq: rq.put(rsp[1])
+            if rq: rq.put((rsp[1].bytes, rsp[2].bytes.split('\0'), rsp[3:]))
 # Time (in seconds) before a job times out
-TIMEOUT=240
+# TODO: replace timeout mechanism with polling?
 TIMEOUT=2400
 # Max. queue length before request considered lost, as a multiple of the
 # number of in-flight requests
@ -92,21 +96,27 @@ def iter_genomes(prof, outpath, gpaths):
            os.makedirs(odir)
        with open(os.path.join(odir, 'NFRAMES'), 'w') as fp:
            fp.write(str(len(times)))
        outmod = output.get_output_for_profile(gprof)
        for i, t in times:
-            opath = os.path.join(odir, '%05d.%s' % (i, gprof.output_format))
+            opath = os.path.join(odir, '%05d' % i)
-            if not os.path.isfile(opath):
+            if not os.path.isfile(opath + outmod.suffix):
                yield Task(opath, ghash, prof, gnm, t)
 def get_result(cli, task, rq):
    try:
-        rsp = rq.get(timeout=TIMEOUT)
+        log, names, bufs = rq.get(timeout=TIMEOUT)
    except queue.Empty:
        cli.put(task, rq)
        print '>>', task.id
-        rsp = rq.get()
+        log, names, bufs = rq.get()
-    with open(task.id, 'wb') as fp:
+    with open(task.id + '.log', 'wb') as fp:
-        fp.write(buffer(rsp))
+        fp.write(log)
    for name in reversed(names):
        buf = bufs.pop()
        with open(task.id + name, 'wb') as fp:
            fp.write(buffer(buf))
    print '< ', task.id
 def main(addrs):
@ -128,6 +138,8 @@ def main(addrs):
    while cli.taskmap:
        print 'Still waiting on %d tasks...' % len(cli.taskmap)
        for i in cli.taskmap.items():
            print i
        gevent.sleep(3)
 if __name__ == "__main__":
--- a/dist/messages.py
+++ b/dist/messages.py
@ -1,5 +1,5 @@
 from collections import namedtuple
-Task = namedtuple('Task', 'id hash profile anim time')
+Task = namedtuple('Task', 'id hash profile anim times')
 AddressedTask = namedtuple('AddressedTask', 'addr task')
 FullTask = namedtuple('FullTask', 'addr task cubin packer')
--- a/dist/server.py
+++ b/dist/server.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python2
 from itertools import takewhile
 import gevent
 from gevent import spawn, queue, event
@ -29,7 +30,6 @@ def setup_task_listeners(addrs, tq, rq):
                # losock to be added to the queue.
                loevt.set()
            task = hisock.recv_pyobj()
            print 'OOOOOH! Got a hiprio evt'
            loevt.clear() # Got message; pause listen_lo().
            tq.put(task)
            hisock.send('')
@ -77,7 +77,7 @@ def setup_worker_listener(addrs, tq, rq):
        while True:
            rsp = wsock.recv_multipart(copy=False)
            if rsp[2].bytes != '':
-                print '< ', ' '.join([r.bytes for r in rsp[2:-1]])
+                print '< ', rsp[2].bytes, rsp[3].bytes
                rq.put(rsp[2:])
            readyq.put(rsp[0])
--- a/dist/worker.py
+++ b/dist/worker.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python2
 import sys
 import socket
 from cStringIO import StringIO
 import gevent
@ -37,20 +38,29 @@ def main(worker_addr):
        hash = None
        while True:
            log = [('worker', socket.gethostname() + ':' +
                    cuda.Context.get_current().get_device().pci_bus_id())]
            addr, task, cubin, packer = sock.recv_pyobj()
            gprof = profile.wrap(task.profile, task.anim)
            if hash != task.hash:
                rdr = PrecompiledRenderer(task.anim, gprof, packer, cubin)
-            evt, buf = rmgr.queue_frame(rdr, task.anim, gprof, task.time)
+            for t in task.times:
                evt, buf = rmgr.queue_frame(rdr, task.anim, gprof, t)
                while not evt.query():
                    gevent.sleep(0.01)
-            ofile = StringIO()
+                out, frame_log = rdr.out.encode(buf)
-            output.PILOutput.save(buf, ofile, task.id[-3:])
+                log += frame_log
            ofile.seek(0)
            sock.send_multipart(addr + [ofile.read()])
            hash = task.hash
                print 'Rendered', task.id, 'in', int(evt.time()), 'ms'
            final_out, final_log = rdr.out.encode(None)
            assert not (out and final_out), 'Got output from two sources!'
            out = out or final_out
            log += final_log
            log = '\0'.join([k + ' ' + v for k, v in log])
            suffixes, files = zip(*[(k, v.read())
                                    for k, v in sorted(out.items())])
            # TODO: reduce copies, generally spruce up the memory usage here
            sock.send_multipart(addr + [log, '\0'.join(suffixes)] + list(files))
    # Spawn two request loops to take advantage of CUDA pipelining.
    spawn(request_loop)
--- a/main.py
+++ b/main.py
@ -26,44 +26,16 @@ sys.path.insert(0, os.path.dirname(__file__))
 from cuburn import render, filters, output, profile
 from cuburn.genome import convert, use, db
-def save(out):
+def save(output_module, name, rendered_frame):
-    # Temporary! TODO: fix this
+    out, log = output_module.encode(rendered_frame)
-    output.PILOutput.save(out.buf, out.idx)
+    for suffix, file_like in out.items():
-    print out.idx, out.gpu_time
+        with open(name + suffix, 'w') as fp:
-
+            fp.write(file_like.read())
-def main(args, prof):
+    for key, val in log:
-    gdb = db.connect(args.genomedb)
+        print '\n=== %s ===' % key
-    gnm, basename = gdb.get_anim(args.flame, args.half)
+        print val
    if getattr(args, 'print'):
        print convert.to_json(gnm)
        return
    gprof = profile.wrap(prof, gnm)
    if args.name is not None:
        basename = args.name
    prefix = os.path.join(args.dir, basename)
    if args.subdir:
        if not os.path.isdir(prefix):
            os.mkdir(prefix)
        prefix += '/'
    else:
        prefix += '_'
    frames = [('%s%05d%s.jpg' % (prefix, (i+1), args.suffix), t)
              for i, t in profile.enumerate_times(gprof)]
    if args.resume:
        m = os.path.getmtime(args.flame)
        frames = (f for f in frames
                  if not os.path.isfile(f[0]) or m > os.path.getmtime(f[0]))
    import pycuda.autoinit
    rmgr = render.RenderManager()
    gen = rmgr.render(gnm, gprof, frames)
    if not args.gfx:
        for out in gen:
            save(out)
        return
 def pyglet_preview(args, gprof, itr):
    import pyglet
    import pyglet.gl as gl
    w, h = gprof.width, gprof.height
@ -92,39 +64,89 @@ def main(args, prof):
    last_time = [time.time()]
    def poll(dt):
-        out = next(gen, False)
+        out = next(itr, False)
        if out is False:
            if args.pause:
                label.text = "Done. ('q' to quit)"
                #pyglet.clock.unschedule(poll)
            else:
                pyglet.app.exit()
        elif out is not None:
            name, buf = out
            real_dt = time.time() - last_time[0]
            last_time[0] = time.time()
-            save(out)
+            if buf.dtype == np.uint8:
            if out.buf.dtype == np.uint8:
                fmt = gl.GL_UNSIGNED_BYTE
-            elif out.buf.dtype == np.uint16:
+            elif buf.dtype == np.uint16:
                fmt = gl.GL_UNSIGNED_SHORT
            else:
-                label.text = 'Unsupported format: ' + out.buf.dtype
+                label.text = 'Unsupported format: ' + buf.dtype
                return
-            h, w, ch = out.buf.shape
+            h, w, ch = buf.shape
            gl.glEnable(tex.target)
            gl.glBindTexture(tex.target, tex.id)
            gl.glTexImage2D(tex.target, 0, gl.GL_RGB8, w, h, 0, gl.GL_RGBA,
-                            fmt, out.buf.tostring())
+                            fmt, buf.tostring())
            gl.glDisable(tex.target)
-            label.text = '%s (%g fps)' % (out.idx, 1./real_dt)
+            label.text = '%s (%g fps)' % (name, 1./real_dt)
        else:
            label.text += '.'
-    pyglet.clock.set_fps_limit(30)
+    pyglet.clock.set_fps_limit(20)
-    pyglet.clock.schedule_interval(poll, 1/30.)
+    pyglet.clock.schedule_interval(poll, 1/20.)
    pyglet.app.run()
 def main(args, prof):
    gdb = db.connect(args.genomedb)
    gnm, basename = gdb.get_anim(args.flame, args.half)
    if getattr(args, 'print'):
        print convert.to_json(gnm)
        return
    gprof = profile.wrap(prof, gnm)
    if args.name is not None:
        basename = args.name
    prefix = os.path.join(args.dir, basename)
    if args.subdir:
        if not os.path.isdir(prefix):
            os.mkdir(prefix)
        prefix_plus = prefix + '/'
    else:
        prefix_plus = prefix + '_'
    frames = [('%s%05d%s' % (prefix_plus, i, args.suffix), t)
              for i, t in profile.enumerate_times(gprof)]
    # We don't initialize a CUDA context until here. This keeps other
    # functions like --help and --print snappy.
    import pycuda.autoinit
    rmgr = render.RenderManager()
    rdr = render.Renderer(gnm, gprof)
    def render_iter():
        m = os.path.getmtime(args.flame)
        first = True
        for name, times in frames:
            if args.resume:
                fp = name + rdr.out.suffix
                if os.path.isfile(fp) and m < os.path.getmtime(f[0]+ext):
                    continue
            for t in times:
                evt, buf = rmgr.queue_frame(rdr, gnm, gprof, t, first)
                first = False
                while not evt.query():
                    time.sleep(0.01)
                    yield None
                save(rdr.out, name, buf)
                print name, evt.time()
                yield name, buf
            save(rdr.out, name, None)
    if args.gfx:
        pyglet_preview(args, gprof, render_iter())
    else:
        for i in render_iter(): pass
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Render fractal flames.')