Support x264 10-bit output format.

This commit is contained in:
Steven Robertson 2012-07-22 15:53:38 -07:00
parent 21f783730a
commit 3294ba10d6
10 changed files with 363 additions and 116 deletions

View File

@ -1,15 +1,16 @@
from util import devlib, ringbuflib from util import devlib, ringbuflib
from mwc import mwclib from mwc import mwclib
f32tou8lib = devlib(deps=[ringbuflib, mwclib], defs=r''' rgba8lib = devlib(deps=[ringbuflib, mwclib], defs=r'''
// Perform a conversion from float32 values to uint8 ones, applying // Perform a conversion from float32 values to uint8 ones, applying
// pixel- and channel-independent dithering to reduce suprathreshold banding // pixel- and channel-independent dithering to reduce suprathreshold banding
// artifacts. Clamps values larger than 1.0f. // artifacts. Clamps values larger than 1.0f.
// TODO: move to a separate module? // TODO: move to a separate module?
// TODO: less ineffecient mwc_st handling? // TODO: less ineffecient mwc_st handling?
__global__ void f32_to_u8( __global__ void f32_to_rgba_u8(
ringbuf *rb, mwc_st *rctxs, uchar4 *dst, const float4 *src, uchar4 *dst, const float4 *src,
int gutter, int dstride, int sstride, int height) int gutter, int dstride, int sstride, int height,
ringbuf *rb, mwc_st *rctxs)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -32,3 +33,34 @@ __global__ void f32_to_u8(
rctxs[rb_incr(rb->tail, tid)] = rctx; rctxs[rb_incr(rb->tail, tid)] = rctx;
} }
''') ''')
rgba16lib = devlib(deps=[ringbuflib, mwclib], defs=r'''
// Perform a conversion from float32 values to uint16 ones, as above.
__global__ void f32_to_rgba_u16(
ushort4 *dst, const float4 *src,
int gutter, int dstride, int sstride, int height,
ringbuf *rb, mwc_st *rctxs)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x > dstride || y > height) return;
int isrc = sstride * (y + gutter) + x + gutter;
int tid = blockDim.x * threadIdx.y + threadIdx.x;
mwc_st rctx = rctxs[rb_incr(rb->head, tid)];
float4 in = src[isrc];
ushort4 out = make_ushort4(
fminf(1.0f, in.x) * 65535.0f + 0.49f * mwc_next_11(rctx),
fminf(1.0f, in.y) * 65535.0f + 0.49f * mwc_next_11(rctx),
fminf(1.0f, in.z) * 65535.0f + 0.49f * mwc_next_11(rctx),
fminf(1.0f, in.w) * 65535.0f + 0.49f * mwc_next_11(rctx)
);
int idst = dstride * y + x;
dst[idst] = out;
rctxs[rb_incr(rb->tail, tid)] = rctx;
}
''')
pixfmtlib = devlib(deps=[rgba8lib, rgba16lib])

View File

@ -115,6 +115,8 @@ profile = (
, 'end': Scalar(None, 'Last frame to render (1-indexed, exclusive; ' , 'end': Scalar(None, 'Last frame to render (1-indexed, exclusive; '
'negative indexes from the end)') 'negative indexes from the end)')
, 'skip': Scalar(0, 'Skip this many frames between each rendered frame') , 'skip': Scalar(0, 'Skip this many frames between each rendered frame')
, 'shard': Scalar(0, 'Pack this many frames in each output file '
'(causing start, end, and skip to be ignored)')
, 'height': Scalar(1920, 'Output height in pixels') , 'height': Scalar(1920, 'Output height in pixels')
, 'width': Scalar(1080, 'Output width in pixels') , 'width': Scalar(1080, 'Output width in pixels')
@ -123,7 +125,9 @@ profile = (
, 'filter_order': list_(enum(filters.keys()), default_filters) , 'filter_order': list_(enum(filters.keys()), default_filters)
, 'filters': prof_filters , 'filters': prof_filters
, 'output_format': enum('jpg png tif', 'jpg') # The other keys in the 'output' dictionary are format-specific and not
# documented here.
, 'output': {'type': enum('jpeg png tiff x264', 'jpeg')}
}) })
# Types recognized as independent units with a 'type' key # Types recognized as independent units with a 'type' key

View File

@ -1,10 +1,14 @@
import os
import tempfile
from cStringIO import StringIO
from subprocess import Popen, PIPE
import numpy as np import numpy as np
from numpy import float32 as f32, int32 as i32 from numpy import float32 as f32, int32 as i32
import pycuda.driver as cuda import pycuda.driver as cuda
from code.util import ClsMod, launch from code.util import ClsMod, launch
from code.output import f32tou8lib from code.output import pixfmtlib
import scipy.misc import scipy.misc
@ -12,39 +16,209 @@ if not hasattr(scipy.misc, 'toimage'):
raise ImportError("Could not find scipy.misc.toimage. " raise ImportError("Could not find scipy.misc.toimage. "
"Are scipy and PIL installed?") "Are scipy and PIL installed?")
def launchC(name, mod, stream, dim, fb, *args):
launch(name, mod, stream,
(32, 8, 1), (int(np.ceil(dim.w/32.)), int(np.ceil(dim.h/8.))),
fb.d_back, fb.d_front,
i32(fb.gutter), i32(dim.w), i32(dim.astride), i32(dim.h),
*args)
class Output(object): class Output(object):
def convert(self, fb, gnm, dim, stream=None): def convert(self, fb, gnm, dim, stream=None):
""" """
Convert a filtered buffer to whatever output format is needed by the Convert a filtered buffer to whatever output format is needed by the
writer. writer.
This function is intended for use by the Renderer, and should not be
called by clients. It does not modify its instance.
""" """
raise NotImplementedError() raise NotImplementedError()
def copy(self, fb, dim, pool, stream=None): def copy(self, fb, dim, pool, stream=None):
""" """
Schedule a copy from the device buffer to host memory, returning the Schedule a copy from the device buffer to host memory, returning the
target buffer. target buffer(s).
This function is intended for use by the Renderer, and should not be
called by clients. It does not modify its instance.
""" """
raise NotImplementedError() raise NotImplementedError()
def encode(self, host_frame):
"""
Push `host_frame` (as returned from `Output.copy`) into the encoding
pipeline, and return any completed media segments. If `host_frame` is
None, flush the encoding pipeline.
The return value is a 2-tuple `(media, logs)`. `media` is a dictionary
mapping channel names (appropriate for use as file suffixes) to
file-like objects containing the encoded media segments. `logs` is a
dictionary containing log entries. Either or both entries can be empty
at any time (and will typically be either populated on each frame
except the flush, for non-temporal codecs, or will be empty on all
frames except the flush, for temporal codecs.)
Media segments are discretely decodeable chunks of content. The
mapping of media segments to individual frames is not specified.
"""
raise NotImplementedError()
@property
def suffix(self):
"""
Return the file suffix that will be used. If more than one suffix will
be used, the value returned is the one considered to be "primary".
"""
raise NotImplementedError()
class PILOutput(Output, ClsMod): class PILOutput(Output, ClsMod):
lib = f32tou8lib lib = pixfmtlib
def __init__(self, codec='jpeg', quality=100, alpha=False):
super(PILOutput, self).__init__()
self.type, self.quality, self.alpha = codec, quality, alpha
def convert(self, fb, gnm, dim, stream=None): def convert(self, fb, gnm, dim, stream=None):
launch('f32_to_u8', self.mod, stream, launchC('f32_to_rgba_u8', self.mod, stream, dim, fb,
(32, 8, 1), (int(np.ceil(dim.w/32.)), int(np.ceil(dim.h/8.))), fb.d_rb, fb.d_seeds)
fb.d_rb, fb.d_seeds, fb.d_back, fb.d_front,
i32(fb.gutter), i32(dim.w), i32(dim.astride), i32(dim.h))
def copy(self, fb, dim, pool, stream=None): def copy(self, fb, dim, pool, stream=None):
h_out = pool.allocate((dim.h, dim.w, 4), 'u1') h_out = pool.allocate((dim.h, dim.w, 4), 'u1')
cuda.memcpy_dtoh_async(h_out, fb.d_back, stream) cuda.memcpy_dtoh_async(h_out, fb.d_back, stream)
return h_out return h_out
@staticmethod def _convert_buf(self, buf):
def save(buf, name, type=None, quality=98): out = StringIO()
type = dict(jpg='jpeg', tif='tiff').get(type, type)
if type == 'jpeg' or (type is None and name.endswith('.jpg')):
buf = buf[:,:,:3]
img = scipy.misc.toimage(buf, cmin=0, cmax=1) img = scipy.misc.toimage(buf, cmin=0, cmax=1)
img.save(name, type, quality=quality) img.save(out, self.type, quality=self.quality)
out.seek(0)
return out
def encode(self, buf):
if buf is None: return {}, []
if self.type == 'jpeg':
out = self._convert_buf(buf[:,:,:3])
if self.alpha:
alpha = self._convert_buf(buf[:,:,3])
return {'_color.jpg': out, '_alpha.jpg': alpha}, []
return {'.jpg': out}, {}
return {'.'+self.type: self._convert_buf(buf)}, []
@property
def suffix(self):
if self.type == 'jpeg':
if self.alpha: return '_color.jpg'
return '.jpg'
return '.'+self.type
class X264Output(Output, ClsMod):
lib = pixfmtlib
profiles = (
{ 'normal': '--profile high444 --level 4.2'
, '': ''
})
base = ('x264 --no-progress --input-depth 16 --sync-lookahead 0 '
'--rc-lookahead 5 --muxer raw -o - - --log-level debug ')
def __init__(self, profile='normal', csp='i444', crf=15,
x264opts='', alpha=False):
super(X264Output, self).__init__()
self.args = ' '.join([self.base, self.profiles[profile],
'--crf', str(crf), x264opts]).split()
self.alpha = alpha
self.csp = csp
self.framesize = None
self.zeros = None
self.subp = None
self.outf = None
self.asubp = None
self.aoutf = None
def convert(self, fb, gnm, dim, stream=None):
launchC('f32_to_rgba_u16', self.mod, stream, dim, fb,
fb.d_rb, fb.d_seeds)
def copy(self, fb, dim, pool, stream=None):
h_out = pool.allocate((dim.h, dim.w, 4), 'u2')
cuda.memcpy_dtoh_async(h_out, fb.d_back, stream)
return h_out
def _spawn_sub(self, framesize, alpha):
res = '%dx%d' % (framesize[1], framesize[0])
csp = 'yv12' if alpha else 'rgb'
extras = ['--input-csp', csp, '--demuxer', 'raw', '--input-res', res]
outf = tempfile.TemporaryFile(bufsize=0)
if alpha:
extras += ['--output-csp', 'i420', '--chroma-qp-offset', '24']
else:
extras += ['--output-csp', self.csp]
subp = Popen(self.args + extras, stdin=PIPE, stderr=PIPE,
stdout=os.dup(outf.fileno()))
return outf, subp
def _spawn(self, framesize):
self.framesize = framesize
self.outf, self.subp = self._spawn_sub(framesize, False)
if self.alpha:
self.aoutf, self.asubp = self._spawn_sub(framesize, True)
bufsz = framesize[0] * framesize[1] / 2
self.zeros = np.empty(bufsz, dtype='u2')
self.zeros.fill(32767)
def _flush_sub(self, subp):
(stdout, stderr) = subp.communicate()
if subp.returncode:
raise IOError("x264 exited with an error")
return stderr
def _flush(self):
if self.subp is None:
return {}, []
log = self._flush_sub(self.subp)
self.outf.seek(0)
self.subp = None
if self.alpha:
alog = self._flush_sub(self.asubp)
self.aoutf.seek(0)
self.asubp = None
return ({'_color.h264': self.outf, '_alpha.h264': self.aoutf},
[('x264_color', log), ('x264_alpha', alog)])
return {'.h264': self.outf}, [('x264_color', stderr)]
def _write(self, buf, subp):
try:
subp.stdin.write(buffer(buf))
except IOError, e:
print 'Exception while writing. Log:'
print subp.stderr.read()
raise e
def encode(self, buf):
out = ({}, [])
if buf is None or self.framesize != buf.shape[:2]:
out = self._flush()
if buf is None:
return out
if self.subp is None:
self._spawn(buf.shape[:2])
self._write(np.delete(buf, 3, axis=2), self.subp)
if self.alpha:
self._write(buf[:,:,3].tostring(), self.asubp)
self._write(buffer(self.zeros), self.asubp)
return out
@property
def suffix(self):
if self.alpha: return '_color.h264'
return '.h264'
def get_output_for_profile(gprof):
opts = dict(gprof.output._val)
handler = opts.pop('type', 'jpeg')
if handler in ('jpeg', 'png', 'tiff'):
return PILOutput(codec=handler, **opts)
elif handler == 'x264':
return X264Output(**opts)
raise ValueError('Invalid output type "%s".' % handler)

View File

@ -37,6 +37,13 @@ def add_args(parser=None):
help="Last frame to render (1-indexed, exclusive, negative from end)") help="Last frame to render (1-indexed, exclusive, negative from end)")
tmp.add_argument('--skip', dest='skip', metavar='N', type=int, tmp.add_argument('--skip', dest='skip', metavar='N', type=int,
help="Skip N frames between each rendered frame") help="Skip N frames between each rendered frame")
# TODO: eliminate the 'silently overwritten' bit.
tmp.add_argument('--shard', dest='shard', metavar='SECS', type=float,
help="Write SECS of output into each file, instead of one frame per "
"file. If set, causes 'start', 'end', and 'skip' to be ignored. "
"If output codecs don't support multi-file writing, files will "
"be silently overwritten.")
tmp.add_argument('--still', action='store_true', tmp.add_argument('--still', action='store_true',
help='Override start, end, and temporal frame width to render one ' help='Override start, end, and temporal frame width to render one '
'frame without motion blur.') 'frame without motion blur.')
@ -48,7 +55,7 @@ def add_args(parser=None):
spa.add_argument('--height', type=int, metavar='PX') spa.add_argument('--height', type=int, metavar='PX')
out = parser.add_argument_group('Output options') out = parser.add_argument_group('Output options')
out.add_argument('--codec', choices=['jpg', 'png', 'tiff']) out.add_argument('--codec', choices=['jpg', 'png', 'tiff', 'x264'])
return parser return parser
def get_from_args(args): def get_from_args(args):
@ -64,9 +71,11 @@ def get_from_args(args):
if args.still: if args.still:
base.update(frame_width=0, start=1, end=2) base.update(frame_width=0, start=1, end=2)
for arg in 'duration fps start end skip spp width height'.split(): for arg in 'duration fps start end skip shard spp width height'.split():
if getattr(args, arg, None) is not None: if getattr(args, arg, None) is not None:
base[arg] = getattr(args, arg) base[arg] = getattr(args, arg)
if args.codec is not None:
base.setdefault('output', {})['type'] = args.codec
return name, base return name, base
@ -82,13 +91,20 @@ def wrap(prof, gnm):
def enumerate_times(gprof): def enumerate_times(gprof):
""" """
Given a profile, return a list of `(frame_no, center_time)` pairs. Note Given a profile, return a list of `(frame_no, center_times)` pairs. Note
that the enumeration is applied before `start`, `end`, and `skip`, and so that the enumeration is applied before `start`, `end`, and `skip`, and so
`frame_no` may be non-contiguous. `frame_no` may be non-contiguous.
""" """
nframes = round(gprof.fps * gprof.duration) nframes = round(gprof.fps * gprof.duration)
times = np.linspace(0, 1, nframes + 1) times = np.linspace(0, 1, nframes + 1)
times = list(enumerate(times[:-1] + 0.5 * (times[1] - times[0]), 1)) times = times[:-1] + 0.5 * (times[1] - times[0])
if gprof.shard:
s = max(1, int(round(gprof.fps * gprof.shard)))
return [(i, times[t:t+s])
for i, t in enumerate(range(0, len(times), s), 1)]
else:
times = [[t] for t in times]
times = list(enumerate(times, 1))
if gprof.end is not None: if gprof.end is not None:
times = times[:gprof.end] times = times[:gprof.end]
if gprof.start is not None: if gprof.start is not None:

View File

@ -235,7 +235,7 @@ class Renderer(object):
self.packer, self.lib, self.cubin = self.compile(gnm) self.packer, self.lib, self.cubin = self.compile(gnm)
self.mod = self.load(self.cubin) self.mod = self.load(self.cubin)
self.filts = filters.create(gprof) self.filts = filters.create(gprof)
self.out = output.PILOutput() self.out = output.get_output_for_profile(gprof)
class RenderManager(ClsMod): class RenderManager(ClsMod):
lib = devlib(deps=[interp.palintlib, filldptrlib, iter.flushatomlib]) lib = devlib(deps=[interp.palintlib, filldptrlib, iter.flushatomlib])
@ -395,26 +395,3 @@ class RenderManager(ClsMod):
self.info_a, self.info_b = self.info_b, self.info_a self.info_a, self.info_b = self.info_b, self.info_a
self.stream_a, self.stream_b = self.stream_b, self.stream_a self.stream_a, self.stream_b = self.stream_b, self.stream_a
return self.copy_evt, h_out return self.copy_evt, h_out
def render(self, gnm, gprof, times):
"""
A port of the old rendering function, retained for backwards
compatibility. Some of this will be pulled into as-yet-undecided
methods for more DRY.
"""
rdr = Renderer(gnm, gprof)
last_evt = cuda.Event().record(self.stream_a)
last_idx = None
def wait(): # Times like these where you wish for a macro
while not last_evt.query():
time.sleep(0.01)
gpu_time = last_evt.time_since(two_evts_ago)
return RenderedImage(last_buf, last_idx, gpu_time)
for idx, tc in times:
evt, h_buf = self.queue_frame(rdr, gnm, gprof, tc, last_idx is None)
if last_idx:
yield wait()
two_evts_ago, last_evt = last_evt, evt
last_buf, last_idx = h_buf, idx
if last_idx:
yield wait()

32
dist/client.py vendored
View File

@ -10,11 +10,15 @@ from gevent import spawn, queue, coros
import zmq.green as zmq import zmq.green as zmq
import _importhack import _importhack
from cuburn import profile from cuburn import profile, output
from cuburn.genome import db, util from cuburn.genome import db, util
from messages import * from messages import *
# TODO: remove this dependency (loading the output module to get the suffix
# requires a compiler / default instance)
import pycuda.autoinit
class RenderClient(object): class RenderClient(object):
def __init__(self, task_addr, rsp_addr, ctx=None, start=True): def __init__(self, task_addr, rsp_addr, ctx=None, start=True):
ctx = zmq.Context() if ctx is None else ctx ctx = zmq.Context() if ctx is None else ctx
@ -56,12 +60,12 @@ class RenderClient(object):
def _deal_rsps(self): def _deal_rsps(self):
while True: while True:
rsp = self.rsock.recv_multipart(copy=False) rsp = self.rsock.recv_multipart(copy=False)
assert len(rsp) == 2
rq = self.taskmap.get(rsp[0].bytes, None) rq = self.taskmap.get(rsp[0].bytes, None)
if rq: rq.put(rsp[1]) if rq: rq.put((rsp[1].bytes, rsp[2].bytes.split('\0'), rsp[3:]))
# Time (in seconds) before a job times out # Time (in seconds) before a job times out
TIMEOUT=240 # TODO: replace timeout mechanism with polling?
TIMEOUT=2400
# Max. queue length before request considered lost, as a multiple of the # Max. queue length before request considered lost, as a multiple of the
# number of in-flight requests # number of in-flight requests
@ -92,21 +96,27 @@ def iter_genomes(prof, outpath, gpaths):
os.makedirs(odir) os.makedirs(odir)
with open(os.path.join(odir, 'NFRAMES'), 'w') as fp: with open(os.path.join(odir, 'NFRAMES'), 'w') as fp:
fp.write(str(len(times))) fp.write(str(len(times)))
outmod = output.get_output_for_profile(gprof)
for i, t in times: for i, t in times:
opath = os.path.join(odir, '%05d.%s' % (i, gprof.output_format)) opath = os.path.join(odir, '%05d' % i)
if not os.path.isfile(opath): if not os.path.isfile(opath + outmod.suffix):
yield Task(opath, ghash, prof, gnm, t) yield Task(opath, ghash, prof, gnm, t)
def get_result(cli, task, rq): def get_result(cli, task, rq):
try: try:
rsp = rq.get(timeout=TIMEOUT) log, names, bufs = rq.get(timeout=TIMEOUT)
except queue.Empty: except queue.Empty:
cli.put(task, rq) cli.put(task, rq)
print '>>', task.id print '>>', task.id
rsp = rq.get() log, names, bufs = rq.get()
with open(task.id, 'wb') as fp: with open(task.id + '.log', 'wb') as fp:
fp.write(buffer(rsp)) fp.write(log)
for name in reversed(names):
buf = bufs.pop()
with open(task.id + name, 'wb') as fp:
fp.write(buffer(buf))
print '< ', task.id print '< ', task.id
def main(addrs): def main(addrs):
@ -128,6 +138,8 @@ def main(addrs):
while cli.taskmap: while cli.taskmap:
print 'Still waiting on %d tasks...' % len(cli.taskmap) print 'Still waiting on %d tasks...' % len(cli.taskmap)
for i in cli.taskmap.items():
print i
gevent.sleep(3) gevent.sleep(3)
if __name__ == "__main__": if __name__ == "__main__":

2
dist/messages.py vendored
View File

@ -1,5 +1,5 @@
from collections import namedtuple from collections import namedtuple
Task = namedtuple('Task', 'id hash profile anim time') Task = namedtuple('Task', 'id hash profile anim times')
AddressedTask = namedtuple('AddressedTask', 'addr task') AddressedTask = namedtuple('AddressedTask', 'addr task')
FullTask = namedtuple('FullTask', 'addr task cubin packer') FullTask = namedtuple('FullTask', 'addr task cubin packer')

4
dist/server.py vendored
View File

@ -1,4 +1,5 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
from itertools import takewhile
import gevent import gevent
from gevent import spawn, queue, event from gevent import spawn, queue, event
@ -29,7 +30,6 @@ def setup_task_listeners(addrs, tq, rq):
# losock to be added to the queue. # losock to be added to the queue.
loevt.set() loevt.set()
task = hisock.recv_pyobj() task = hisock.recv_pyobj()
print 'OOOOOH! Got a hiprio evt'
loevt.clear() # Got message; pause listen_lo(). loevt.clear() # Got message; pause listen_lo().
tq.put(task) tq.put(task)
hisock.send('') hisock.send('')
@ -77,7 +77,7 @@ def setup_worker_listener(addrs, tq, rq):
while True: while True:
rsp = wsock.recv_multipart(copy=False) rsp = wsock.recv_multipart(copy=False)
if rsp[2].bytes != '': if rsp[2].bytes != '':
print '< ', ' '.join([r.bytes for r in rsp[2:-1]]) print '< ', rsp[2].bytes, rsp[3].bytes
rq.put(rsp[2:]) rq.put(rsp[2:])
readyq.put(rsp[0]) readyq.put(rsp[0])

28
dist/worker.py vendored
View File

@ -1,5 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
import sys import sys
import socket
from cStringIO import StringIO from cStringIO import StringIO
import gevent import gevent
@ -37,20 +38,29 @@ def main(worker_addr):
hash = None hash = None
while True: while True:
log = [('worker', socket.gethostname() + ':' +
cuda.Context.get_current().get_device().pci_bus_id())]
addr, task, cubin, packer = sock.recv_pyobj() addr, task, cubin, packer = sock.recv_pyobj()
gprof = profile.wrap(task.profile, task.anim) gprof = profile.wrap(task.profile, task.anim)
if hash != task.hash: if hash != task.hash:
rdr = PrecompiledRenderer(task.anim, gprof, packer, cubin) rdr = PrecompiledRenderer(task.anim, gprof, packer, cubin)
evt, buf = rmgr.queue_frame(rdr, task.anim, gprof, task.time) for t in task.times:
while not evt.query(): evt, buf = rmgr.queue_frame(rdr, task.anim, gprof, t)
gevent.sleep(0.01) while not evt.query():
ofile = StringIO() gevent.sleep(0.01)
output.PILOutput.save(buf, ofile, task.id[-3:]) out, frame_log = rdr.out.encode(buf)
ofile.seek(0) log += frame_log
sock.send_multipart(addr + [ofile.read()]) print 'Rendered', task.id, 'in', int(evt.time()), 'ms'
hash = task.hash final_out, final_log = rdr.out.encode(None)
assert not (out and final_out), 'Got output from two sources!'
out = out or final_out
log += final_log
log = '\0'.join([k + ' ' + v for k, v in log])
print 'Rendered', task.id, 'in', int(evt.time()), 'ms' suffixes, files = zip(*[(k, v.read())
for k, v in sorted(out.items())])
# TODO: reduce copies, generally spruce up the memory usage here
sock.send_multipart(addr + [log, '\0'.join(suffixes)] + list(files))
# Spawn two request loops to take advantage of CUDA pipelining. # Spawn two request loops to take advantage of CUDA pipelining.
spawn(request_loop) spawn(request_loop)

118
main.py
View File

@ -26,44 +26,16 @@ sys.path.insert(0, os.path.dirname(__file__))
from cuburn import render, filters, output, profile from cuburn import render, filters, output, profile
from cuburn.genome import convert, use, db from cuburn.genome import convert, use, db
def save(out): def save(output_module, name, rendered_frame):
# Temporary! TODO: fix this out, log = output_module.encode(rendered_frame)
output.PILOutput.save(out.buf, out.idx) for suffix, file_like in out.items():
print out.idx, out.gpu_time with open(name + suffix, 'w') as fp:
fp.write(file_like.read())
def main(args, prof): for key, val in log:
gdb = db.connect(args.genomedb) print '\n=== %s ===' % key
gnm, basename = gdb.get_anim(args.flame, args.half) print val
if getattr(args, 'print'):
print convert.to_json(gnm)
return
gprof = profile.wrap(prof, gnm)
if args.name is not None:
basename = args.name
prefix = os.path.join(args.dir, basename)
if args.subdir:
if not os.path.isdir(prefix):
os.mkdir(prefix)
prefix += '/'
else:
prefix += '_'
frames = [('%s%05d%s.jpg' % (prefix, (i+1), args.suffix), t)
for i, t in profile.enumerate_times(gprof)]
if args.resume:
m = os.path.getmtime(args.flame)
frames = (f for f in frames
if not os.path.isfile(f[0]) or m > os.path.getmtime(f[0]))
import pycuda.autoinit
rmgr = render.RenderManager()
gen = rmgr.render(gnm, gprof, frames)
if not args.gfx:
for out in gen:
save(out)
return
def pyglet_preview(args, gprof, itr):
import pyglet import pyglet
import pyglet.gl as gl import pyglet.gl as gl
w, h = gprof.width, gprof.height w, h = gprof.width, gprof.height
@ -92,39 +64,89 @@ def main(args, prof):
last_time = [time.time()] last_time = [time.time()]
def poll(dt): def poll(dt):
out = next(gen, False) out = next(itr, False)
if out is False: if out is False:
if args.pause: if args.pause:
label.text = "Done. ('q' to quit)" label.text = "Done. ('q' to quit)"
#pyglet.clock.unschedule(poll)
else: else:
pyglet.app.exit() pyglet.app.exit()
elif out is not None: elif out is not None:
name, buf = out
real_dt = time.time() - last_time[0] real_dt = time.time() - last_time[0]
last_time[0] = time.time() last_time[0] = time.time()
save(out) if buf.dtype == np.uint8:
if out.buf.dtype == np.uint8:
fmt = gl.GL_UNSIGNED_BYTE fmt = gl.GL_UNSIGNED_BYTE
elif out.buf.dtype == np.uint16: elif buf.dtype == np.uint16:
fmt = gl.GL_UNSIGNED_SHORT fmt = gl.GL_UNSIGNED_SHORT
else: else:
label.text = 'Unsupported format: ' + out.buf.dtype label.text = 'Unsupported format: ' + buf.dtype
return return
h, w, ch = out.buf.shape h, w, ch = buf.shape
gl.glEnable(tex.target) gl.glEnable(tex.target)
gl.glBindTexture(tex.target, tex.id) gl.glBindTexture(tex.target, tex.id)
gl.glTexImage2D(tex.target, 0, gl.GL_RGB8, w, h, 0, gl.GL_RGBA, gl.glTexImage2D(tex.target, 0, gl.GL_RGB8, w, h, 0, gl.GL_RGBA,
fmt, out.buf.tostring()) fmt, buf.tostring())
gl.glDisable(tex.target) gl.glDisable(tex.target)
label.text = '%s (%g fps)' % (out.idx, 1./real_dt) label.text = '%s (%g fps)' % (name, 1./real_dt)
else: else:
label.text += '.' label.text += '.'
pyglet.clock.set_fps_limit(30) pyglet.clock.set_fps_limit(20)
pyglet.clock.schedule_interval(poll, 1/30.) pyglet.clock.schedule_interval(poll, 1/20.)
pyglet.app.run() pyglet.app.run()
def main(args, prof):
gdb = db.connect(args.genomedb)
gnm, basename = gdb.get_anim(args.flame, args.half)
if getattr(args, 'print'):
print convert.to_json(gnm)
return
gprof = profile.wrap(prof, gnm)
if args.name is not None:
basename = args.name
prefix = os.path.join(args.dir, basename)
if args.subdir:
if not os.path.isdir(prefix):
os.mkdir(prefix)
prefix_plus = prefix + '/'
else:
prefix_plus = prefix + '_'
frames = [('%s%05d%s' % (prefix_plus, i, args.suffix), t)
for i, t in profile.enumerate_times(gprof)]
# We don't initialize a CUDA context until here. This keeps other
# functions like --help and --print snappy.
import pycuda.autoinit
rmgr = render.RenderManager()
rdr = render.Renderer(gnm, gprof)
def render_iter():
m = os.path.getmtime(args.flame)
first = True
for name, times in frames:
if args.resume:
fp = name + rdr.out.suffix
if os.path.isfile(fp) and m < os.path.getmtime(f[0]+ext):
continue
for t in times:
evt, buf = rmgr.queue_frame(rdr, gnm, gprof, t, first)
first = False
while not evt.query():
time.sleep(0.01)
yield None
save(rdr.out, name, buf)
print name, evt.time()
yield name, buf
save(rdr.out, name, None)
if args.gfx:
pyglet_preview(args, gprof, render_iter())
else:
for i in render_iter(): pass
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Render fractal flames.') parser = argparse.ArgumentParser(description='Render fractal flames.')