mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Support x264 10-bit output format.
This commit is contained in:
parent
21f783730a
commit
3294ba10d6
@ -1,15 +1,16 @@
|
|||||||
from util import devlib, ringbuflib
|
from util import devlib, ringbuflib
|
||||||
from mwc import mwclib
|
from mwc import mwclib
|
||||||
|
|
||||||
f32tou8lib = devlib(deps=[ringbuflib, mwclib], defs=r'''
|
rgba8lib = devlib(deps=[ringbuflib, mwclib], defs=r'''
|
||||||
// Perform a conversion from float32 values to uint8 ones, applying
|
// Perform a conversion from float32 values to uint8 ones, applying
|
||||||
// pixel- and channel-independent dithering to reduce suprathreshold banding
|
// pixel- and channel-independent dithering to reduce suprathreshold banding
|
||||||
// artifacts. Clamps values larger than 1.0f.
|
// artifacts. Clamps values larger than 1.0f.
|
||||||
// TODO: move to a separate module?
|
// TODO: move to a separate module?
|
||||||
// TODO: less ineffecient mwc_st handling?
|
// TODO: less ineffecient mwc_st handling?
|
||||||
__global__ void f32_to_u8(
|
__global__ void f32_to_rgba_u8(
|
||||||
ringbuf *rb, mwc_st *rctxs, uchar4 *dst, const float4 *src,
|
uchar4 *dst, const float4 *src,
|
||||||
int gutter, int dstride, int sstride, int height)
|
int gutter, int dstride, int sstride, int height,
|
||||||
|
ringbuf *rb, mwc_st *rctxs)
|
||||||
{
|
{
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -32,3 +33,34 @@ __global__ void f32_to_u8(
|
|||||||
rctxs[rb_incr(rb->tail, tid)] = rctx;
|
rctxs[rb_incr(rb->tail, tid)] = rctx;
|
||||||
}
|
}
|
||||||
''')
|
''')
|
||||||
|
|
||||||
|
rgba16lib = devlib(deps=[ringbuflib, mwclib], defs=r'''
|
||||||
|
// Perform a conversion from float32 values to uint16 ones, as above.
|
||||||
|
__global__ void f32_to_rgba_u16(
|
||||||
|
ushort4 *dst, const float4 *src,
|
||||||
|
int gutter, int dstride, int sstride, int height,
|
||||||
|
ringbuf *rb, mwc_st *rctxs)
|
||||||
|
{
|
||||||
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
if (x > dstride || y > height) return;
|
||||||
|
int isrc = sstride * (y + gutter) + x + gutter;
|
||||||
|
|
||||||
|
int tid = blockDim.x * threadIdx.y + threadIdx.x;
|
||||||
|
mwc_st rctx = rctxs[rb_incr(rb->head, tid)];
|
||||||
|
|
||||||
|
float4 in = src[isrc];
|
||||||
|
ushort4 out = make_ushort4(
|
||||||
|
fminf(1.0f, in.x) * 65535.0f + 0.49f * mwc_next_11(rctx),
|
||||||
|
fminf(1.0f, in.y) * 65535.0f + 0.49f * mwc_next_11(rctx),
|
||||||
|
fminf(1.0f, in.z) * 65535.0f + 0.49f * mwc_next_11(rctx),
|
||||||
|
fminf(1.0f, in.w) * 65535.0f + 0.49f * mwc_next_11(rctx)
|
||||||
|
);
|
||||||
|
|
||||||
|
int idst = dstride * y + x;
|
||||||
|
dst[idst] = out;
|
||||||
|
rctxs[rb_incr(rb->tail, tid)] = rctx;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
|
||||||
|
pixfmtlib = devlib(deps=[rgba8lib, rgba16lib])
|
||||||
|
@ -115,6 +115,8 @@ profile = (
|
|||||||
, 'end': Scalar(None, 'Last frame to render (1-indexed, exclusive; '
|
, 'end': Scalar(None, 'Last frame to render (1-indexed, exclusive; '
|
||||||
'negative indexes from the end)')
|
'negative indexes from the end)')
|
||||||
, 'skip': Scalar(0, 'Skip this many frames between each rendered frame')
|
, 'skip': Scalar(0, 'Skip this many frames between each rendered frame')
|
||||||
|
, 'shard': Scalar(0, 'Pack this many frames in each output file '
|
||||||
|
'(causing start, end, and skip to be ignored)')
|
||||||
|
|
||||||
, 'height': Scalar(1920, 'Output height in pixels')
|
, 'height': Scalar(1920, 'Output height in pixels')
|
||||||
, 'width': Scalar(1080, 'Output width in pixels')
|
, 'width': Scalar(1080, 'Output width in pixels')
|
||||||
@ -123,7 +125,9 @@ profile = (
|
|||||||
, 'filter_order': list_(enum(filters.keys()), default_filters)
|
, 'filter_order': list_(enum(filters.keys()), default_filters)
|
||||||
, 'filters': prof_filters
|
, 'filters': prof_filters
|
||||||
|
|
||||||
, 'output_format': enum('jpg png tif', 'jpg')
|
# The other keys in the 'output' dictionary are format-specific and not
|
||||||
|
# documented here.
|
||||||
|
, 'output': {'type': enum('jpeg png tiff x264', 'jpeg')}
|
||||||
})
|
})
|
||||||
|
|
||||||
# Types recognized as independent units with a 'type' key
|
# Types recognized as independent units with a 'type' key
|
||||||
|
200
cuburn/output.py
200
cuburn/output.py
@ -1,10 +1,14 @@
|
|||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from cStringIO import StringIO
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from numpy import float32 as f32, int32 as i32
|
from numpy import float32 as f32, int32 as i32
|
||||||
|
|
||||||
import pycuda.driver as cuda
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
from code.util import ClsMod, launch
|
from code.util import ClsMod, launch
|
||||||
from code.output import f32tou8lib
|
from code.output import pixfmtlib
|
||||||
|
|
||||||
import scipy.misc
|
import scipy.misc
|
||||||
|
|
||||||
@ -12,39 +16,209 @@ if not hasattr(scipy.misc, 'toimage'):
|
|||||||
raise ImportError("Could not find scipy.misc.toimage. "
|
raise ImportError("Could not find scipy.misc.toimage. "
|
||||||
"Are scipy and PIL installed?")
|
"Are scipy and PIL installed?")
|
||||||
|
|
||||||
|
def launchC(name, mod, stream, dim, fb, *args):
|
||||||
|
launch(name, mod, stream,
|
||||||
|
(32, 8, 1), (int(np.ceil(dim.w/32.)), int(np.ceil(dim.h/8.))),
|
||||||
|
fb.d_back, fb.d_front,
|
||||||
|
i32(fb.gutter), i32(dim.w), i32(dim.astride), i32(dim.h),
|
||||||
|
*args)
|
||||||
|
|
||||||
class Output(object):
|
class Output(object):
|
||||||
def convert(self, fb, gnm, dim, stream=None):
|
def convert(self, fb, gnm, dim, stream=None):
|
||||||
"""
|
"""
|
||||||
Convert a filtered buffer to whatever output format is needed by the
|
Convert a filtered buffer to whatever output format is needed by the
|
||||||
writer.
|
writer.
|
||||||
|
|
||||||
|
This function is intended for use by the Renderer, and should not be
|
||||||
|
called by clients. It does not modify its instance.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def copy(self, fb, dim, pool, stream=None):
|
def copy(self, fb, dim, pool, stream=None):
|
||||||
"""
|
"""
|
||||||
Schedule a copy from the device buffer to host memory, returning the
|
Schedule a copy from the device buffer to host memory, returning the
|
||||||
target buffer.
|
target buffer(s).
|
||||||
|
|
||||||
|
This function is intended for use by the Renderer, and should not be
|
||||||
|
called by clients. It does not modify its instance.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def encode(self, host_frame):
|
||||||
|
"""
|
||||||
|
Push `host_frame` (as returned from `Output.copy`) into the encoding
|
||||||
|
pipeline, and return any completed media segments. If `host_frame` is
|
||||||
|
None, flush the encoding pipeline.
|
||||||
|
|
||||||
|
The return value is a 2-tuple `(media, logs)`. `media` is a dictionary
|
||||||
|
mapping channel names (appropriate for use as file suffixes) to
|
||||||
|
file-like objects containing the encoded media segments. `logs` is a
|
||||||
|
dictionary containing log entries. Either or both entries can be empty
|
||||||
|
at any time (and will typically be either populated on each frame
|
||||||
|
except the flush, for non-temporal codecs, or will be empty on all
|
||||||
|
frames except the flush, for temporal codecs.)
|
||||||
|
|
||||||
|
Media segments are discretely decodeable chunks of content. The
|
||||||
|
mapping of media segments to individual frames is not specified.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def suffix(self):
|
||||||
|
"""
|
||||||
|
Return the file suffix that will be used. If more than one suffix will
|
||||||
|
be used, the value returned is the one considered to be "primary".
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
class PILOutput(Output, ClsMod):
|
class PILOutput(Output, ClsMod):
|
||||||
lib = f32tou8lib
|
lib = pixfmtlib
|
||||||
|
|
||||||
|
def __init__(self, codec='jpeg', quality=100, alpha=False):
|
||||||
|
super(PILOutput, self).__init__()
|
||||||
|
self.type, self.quality, self.alpha = codec, quality, alpha
|
||||||
|
|
||||||
def convert(self, fb, gnm, dim, stream=None):
|
def convert(self, fb, gnm, dim, stream=None):
|
||||||
launch('f32_to_u8', self.mod, stream,
|
launchC('f32_to_rgba_u8', self.mod, stream, dim, fb,
|
||||||
(32, 8, 1), (int(np.ceil(dim.w/32.)), int(np.ceil(dim.h/8.))),
|
fb.d_rb, fb.d_seeds)
|
||||||
fb.d_rb, fb.d_seeds, fb.d_back, fb.d_front,
|
|
||||||
i32(fb.gutter), i32(dim.w), i32(dim.astride), i32(dim.h))
|
|
||||||
|
|
||||||
def copy(self, fb, dim, pool, stream=None):
|
def copy(self, fb, dim, pool, stream=None):
|
||||||
h_out = pool.allocate((dim.h, dim.w, 4), 'u1')
|
h_out = pool.allocate((dim.h, dim.w, 4), 'u1')
|
||||||
cuda.memcpy_dtoh_async(h_out, fb.d_back, stream)
|
cuda.memcpy_dtoh_async(h_out, fb.d_back, stream)
|
||||||
return h_out
|
return h_out
|
||||||
|
|
||||||
@staticmethod
|
def _convert_buf(self, buf):
|
||||||
def save(buf, name, type=None, quality=98):
|
out = StringIO()
|
||||||
type = dict(jpg='jpeg', tif='tiff').get(type, type)
|
|
||||||
if type == 'jpeg' or (type is None and name.endswith('.jpg')):
|
|
||||||
buf = buf[:,:,:3]
|
|
||||||
img = scipy.misc.toimage(buf, cmin=0, cmax=1)
|
img = scipy.misc.toimage(buf, cmin=0, cmax=1)
|
||||||
img.save(name, type, quality=quality)
|
img.save(out, self.type, quality=self.quality)
|
||||||
|
out.seek(0)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def encode(self, buf):
|
||||||
|
if buf is None: return {}, []
|
||||||
|
if self.type == 'jpeg':
|
||||||
|
out = self._convert_buf(buf[:,:,:3])
|
||||||
|
if self.alpha:
|
||||||
|
alpha = self._convert_buf(buf[:,:,3])
|
||||||
|
return {'_color.jpg': out, '_alpha.jpg': alpha}, []
|
||||||
|
return {'.jpg': out}, {}
|
||||||
|
return {'.'+self.type: self._convert_buf(buf)}, []
|
||||||
|
|
||||||
|
@property
|
||||||
|
def suffix(self):
|
||||||
|
if self.type == 'jpeg':
|
||||||
|
if self.alpha: return '_color.jpg'
|
||||||
|
return '.jpg'
|
||||||
|
return '.'+self.type
|
||||||
|
|
||||||
|
class X264Output(Output, ClsMod):
|
||||||
|
lib = pixfmtlib
|
||||||
|
|
||||||
|
profiles = (
|
||||||
|
{ 'normal': '--profile high444 --level 4.2'
|
||||||
|
, '': ''
|
||||||
|
})
|
||||||
|
base = ('x264 --no-progress --input-depth 16 --sync-lookahead 0 '
|
||||||
|
'--rc-lookahead 5 --muxer raw -o - - --log-level debug ')
|
||||||
|
|
||||||
|
def __init__(self, profile='normal', csp='i444', crf=15,
|
||||||
|
x264opts='', alpha=False):
|
||||||
|
super(X264Output, self).__init__()
|
||||||
|
self.args = ' '.join([self.base, self.profiles[profile],
|
||||||
|
'--crf', str(crf), x264opts]).split()
|
||||||
|
self.alpha = alpha
|
||||||
|
self.csp = csp
|
||||||
|
self.framesize = None
|
||||||
|
self.zeros = None
|
||||||
|
self.subp = None
|
||||||
|
self.outf = None
|
||||||
|
self.asubp = None
|
||||||
|
self.aoutf = None
|
||||||
|
|
||||||
|
def convert(self, fb, gnm, dim, stream=None):
|
||||||
|
launchC('f32_to_rgba_u16', self.mod, stream, dim, fb,
|
||||||
|
fb.d_rb, fb.d_seeds)
|
||||||
|
|
||||||
|
def copy(self, fb, dim, pool, stream=None):
|
||||||
|
h_out = pool.allocate((dim.h, dim.w, 4), 'u2')
|
||||||
|
cuda.memcpy_dtoh_async(h_out, fb.d_back, stream)
|
||||||
|
return h_out
|
||||||
|
|
||||||
|
def _spawn_sub(self, framesize, alpha):
|
||||||
|
res = '%dx%d' % (framesize[1], framesize[0])
|
||||||
|
csp = 'yv12' if alpha else 'rgb'
|
||||||
|
extras = ['--input-csp', csp, '--demuxer', 'raw', '--input-res', res]
|
||||||
|
outf = tempfile.TemporaryFile(bufsize=0)
|
||||||
|
if alpha:
|
||||||
|
extras += ['--output-csp', 'i420', '--chroma-qp-offset', '24']
|
||||||
|
else:
|
||||||
|
extras += ['--output-csp', self.csp]
|
||||||
|
subp = Popen(self.args + extras, stdin=PIPE, stderr=PIPE,
|
||||||
|
stdout=os.dup(outf.fileno()))
|
||||||
|
return outf, subp
|
||||||
|
|
||||||
|
def _spawn(self, framesize):
|
||||||
|
self.framesize = framesize
|
||||||
|
self.outf, self.subp = self._spawn_sub(framesize, False)
|
||||||
|
if self.alpha:
|
||||||
|
self.aoutf, self.asubp = self._spawn_sub(framesize, True)
|
||||||
|
bufsz = framesize[0] * framesize[1] / 2
|
||||||
|
self.zeros = np.empty(bufsz, dtype='u2')
|
||||||
|
self.zeros.fill(32767)
|
||||||
|
|
||||||
|
def _flush_sub(self, subp):
|
||||||
|
(stdout, stderr) = subp.communicate()
|
||||||
|
if subp.returncode:
|
||||||
|
raise IOError("x264 exited with an error")
|
||||||
|
return stderr
|
||||||
|
|
||||||
|
def _flush(self):
|
||||||
|
if self.subp is None:
|
||||||
|
return {}, []
|
||||||
|
log = self._flush_sub(self.subp)
|
||||||
|
self.outf.seek(0)
|
||||||
|
self.subp = None
|
||||||
|
if self.alpha:
|
||||||
|
alog = self._flush_sub(self.asubp)
|
||||||
|
self.aoutf.seek(0)
|
||||||
|
self.asubp = None
|
||||||
|
return ({'_color.h264': self.outf, '_alpha.h264': self.aoutf},
|
||||||
|
[('x264_color', log), ('x264_alpha', alog)])
|
||||||
|
return {'.h264': self.outf}, [('x264_color', stderr)]
|
||||||
|
|
||||||
|
def _write(self, buf, subp):
|
||||||
|
try:
|
||||||
|
subp.stdin.write(buffer(buf))
|
||||||
|
except IOError, e:
|
||||||
|
print 'Exception while writing. Log:'
|
||||||
|
print subp.stderr.read()
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def encode(self, buf):
|
||||||
|
out = ({}, [])
|
||||||
|
if buf is None or self.framesize != buf.shape[:2]:
|
||||||
|
out = self._flush()
|
||||||
|
if buf is None:
|
||||||
|
return out
|
||||||
|
if self.subp is None:
|
||||||
|
self._spawn(buf.shape[:2])
|
||||||
|
self._write(np.delete(buf, 3, axis=2), self.subp)
|
||||||
|
if self.alpha:
|
||||||
|
self._write(buf[:,:,3].tostring(), self.asubp)
|
||||||
|
self._write(buffer(self.zeros), self.asubp)
|
||||||
|
return out
|
||||||
|
|
||||||
|
@property
|
||||||
|
def suffix(self):
|
||||||
|
if self.alpha: return '_color.h264'
|
||||||
|
return '.h264'
|
||||||
|
|
||||||
|
def get_output_for_profile(gprof):
|
||||||
|
opts = dict(gprof.output._val)
|
||||||
|
handler = opts.pop('type', 'jpeg')
|
||||||
|
if handler in ('jpeg', 'png', 'tiff'):
|
||||||
|
return PILOutput(codec=handler, **opts)
|
||||||
|
elif handler == 'x264':
|
||||||
|
return X264Output(**opts)
|
||||||
|
raise ValueError('Invalid output type "%s".' % handler)
|
||||||
|
@ -37,6 +37,13 @@ def add_args(parser=None):
|
|||||||
help="Last frame to render (1-indexed, exclusive, negative from end)")
|
help="Last frame to render (1-indexed, exclusive, negative from end)")
|
||||||
tmp.add_argument('--skip', dest='skip', metavar='N', type=int,
|
tmp.add_argument('--skip', dest='skip', metavar='N', type=int,
|
||||||
help="Skip N frames between each rendered frame")
|
help="Skip N frames between each rendered frame")
|
||||||
|
# TODO: eliminate the 'silently overwritten' bit.
|
||||||
|
tmp.add_argument('--shard', dest='shard', metavar='SECS', type=float,
|
||||||
|
help="Write SECS of output into each file, instead of one frame per "
|
||||||
|
"file. If set, causes 'start', 'end', and 'skip' to be ignored. "
|
||||||
|
"If output codecs don't support multi-file writing, files will "
|
||||||
|
"be silently overwritten.")
|
||||||
|
|
||||||
tmp.add_argument('--still', action='store_true',
|
tmp.add_argument('--still', action='store_true',
|
||||||
help='Override start, end, and temporal frame width to render one '
|
help='Override start, end, and temporal frame width to render one '
|
||||||
'frame without motion blur.')
|
'frame without motion blur.')
|
||||||
@ -48,7 +55,7 @@ def add_args(parser=None):
|
|||||||
spa.add_argument('--height', type=int, metavar='PX')
|
spa.add_argument('--height', type=int, metavar='PX')
|
||||||
|
|
||||||
out = parser.add_argument_group('Output options')
|
out = parser.add_argument_group('Output options')
|
||||||
out.add_argument('--codec', choices=['jpg', 'png', 'tiff'])
|
out.add_argument('--codec', choices=['jpg', 'png', 'tiff', 'x264'])
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def get_from_args(args):
|
def get_from_args(args):
|
||||||
@ -64,9 +71,11 @@ def get_from_args(args):
|
|||||||
|
|
||||||
if args.still:
|
if args.still:
|
||||||
base.update(frame_width=0, start=1, end=2)
|
base.update(frame_width=0, start=1, end=2)
|
||||||
for arg in 'duration fps start end skip spp width height'.split():
|
for arg in 'duration fps start end skip shard spp width height'.split():
|
||||||
if getattr(args, arg, None) is not None:
|
if getattr(args, arg, None) is not None:
|
||||||
base[arg] = getattr(args, arg)
|
base[arg] = getattr(args, arg)
|
||||||
|
if args.codec is not None:
|
||||||
|
base.setdefault('output', {})['type'] = args.codec
|
||||||
|
|
||||||
return name, base
|
return name, base
|
||||||
|
|
||||||
@ -82,13 +91,20 @@ def wrap(prof, gnm):
|
|||||||
|
|
||||||
def enumerate_times(gprof):
|
def enumerate_times(gprof):
|
||||||
"""
|
"""
|
||||||
Given a profile, return a list of `(frame_no, center_time)` pairs. Note
|
Given a profile, return a list of `(frame_no, center_times)` pairs. Note
|
||||||
that the enumeration is applied before `start`, `end`, and `skip`, and so
|
that the enumeration is applied before `start`, `end`, and `skip`, and so
|
||||||
`frame_no` may be non-contiguous.
|
`frame_no` may be non-contiguous.
|
||||||
"""
|
"""
|
||||||
nframes = round(gprof.fps * gprof.duration)
|
nframes = round(gprof.fps * gprof.duration)
|
||||||
times = np.linspace(0, 1, nframes + 1)
|
times = np.linspace(0, 1, nframes + 1)
|
||||||
times = list(enumerate(times[:-1] + 0.5 * (times[1] - times[0]), 1))
|
times = times[:-1] + 0.5 * (times[1] - times[0])
|
||||||
|
if gprof.shard:
|
||||||
|
s = max(1, int(round(gprof.fps * gprof.shard)))
|
||||||
|
return [(i, times[t:t+s])
|
||||||
|
for i, t in enumerate(range(0, len(times), s), 1)]
|
||||||
|
else:
|
||||||
|
times = [[t] for t in times]
|
||||||
|
times = list(enumerate(times, 1))
|
||||||
if gprof.end is not None:
|
if gprof.end is not None:
|
||||||
times = times[:gprof.end]
|
times = times[:gprof.end]
|
||||||
if gprof.start is not None:
|
if gprof.start is not None:
|
||||||
|
@ -235,7 +235,7 @@ class Renderer(object):
|
|||||||
self.packer, self.lib, self.cubin = self.compile(gnm)
|
self.packer, self.lib, self.cubin = self.compile(gnm)
|
||||||
self.mod = self.load(self.cubin)
|
self.mod = self.load(self.cubin)
|
||||||
self.filts = filters.create(gprof)
|
self.filts = filters.create(gprof)
|
||||||
self.out = output.PILOutput()
|
self.out = output.get_output_for_profile(gprof)
|
||||||
|
|
||||||
class RenderManager(ClsMod):
|
class RenderManager(ClsMod):
|
||||||
lib = devlib(deps=[interp.palintlib, filldptrlib, iter.flushatomlib])
|
lib = devlib(deps=[interp.palintlib, filldptrlib, iter.flushatomlib])
|
||||||
@ -395,26 +395,3 @@ class RenderManager(ClsMod):
|
|||||||
self.info_a, self.info_b = self.info_b, self.info_a
|
self.info_a, self.info_b = self.info_b, self.info_a
|
||||||
self.stream_a, self.stream_b = self.stream_b, self.stream_a
|
self.stream_a, self.stream_b = self.stream_b, self.stream_a
|
||||||
return self.copy_evt, h_out
|
return self.copy_evt, h_out
|
||||||
|
|
||||||
def render(self, gnm, gprof, times):
|
|
||||||
"""
|
|
||||||
A port of the old rendering function, retained for backwards
|
|
||||||
compatibility. Some of this will be pulled into as-yet-undecided
|
|
||||||
methods for more DRY.
|
|
||||||
"""
|
|
||||||
rdr = Renderer(gnm, gprof)
|
|
||||||
last_evt = cuda.Event().record(self.stream_a)
|
|
||||||
last_idx = None
|
|
||||||
def wait(): # Times like these where you wish for a macro
|
|
||||||
while not last_evt.query():
|
|
||||||
time.sleep(0.01)
|
|
||||||
gpu_time = last_evt.time_since(two_evts_ago)
|
|
||||||
return RenderedImage(last_buf, last_idx, gpu_time)
|
|
||||||
for idx, tc in times:
|
|
||||||
evt, h_buf = self.queue_frame(rdr, gnm, gprof, tc, last_idx is None)
|
|
||||||
if last_idx:
|
|
||||||
yield wait()
|
|
||||||
two_evts_ago, last_evt = last_evt, evt
|
|
||||||
last_buf, last_idx = h_buf, idx
|
|
||||||
if last_idx:
|
|
||||||
yield wait()
|
|
||||||
|
32
dist/client.py
vendored
32
dist/client.py
vendored
@ -10,11 +10,15 @@ from gevent import spawn, queue, coros
|
|||||||
import zmq.green as zmq
|
import zmq.green as zmq
|
||||||
|
|
||||||
import _importhack
|
import _importhack
|
||||||
from cuburn import profile
|
from cuburn import profile, output
|
||||||
from cuburn.genome import db, util
|
from cuburn.genome import db, util
|
||||||
|
|
||||||
from messages import *
|
from messages import *
|
||||||
|
|
||||||
|
# TODO: remove this dependency (loading the output module to get the suffix
|
||||||
|
# requires a compiler / default instance)
|
||||||
|
import pycuda.autoinit
|
||||||
|
|
||||||
class RenderClient(object):
|
class RenderClient(object):
|
||||||
def __init__(self, task_addr, rsp_addr, ctx=None, start=True):
|
def __init__(self, task_addr, rsp_addr, ctx=None, start=True):
|
||||||
ctx = zmq.Context() if ctx is None else ctx
|
ctx = zmq.Context() if ctx is None else ctx
|
||||||
@ -56,12 +60,12 @@ class RenderClient(object):
|
|||||||
def _deal_rsps(self):
|
def _deal_rsps(self):
|
||||||
while True:
|
while True:
|
||||||
rsp = self.rsock.recv_multipart(copy=False)
|
rsp = self.rsock.recv_multipart(copy=False)
|
||||||
assert len(rsp) == 2
|
|
||||||
rq = self.taskmap.get(rsp[0].bytes, None)
|
rq = self.taskmap.get(rsp[0].bytes, None)
|
||||||
if rq: rq.put(rsp[1])
|
if rq: rq.put((rsp[1].bytes, rsp[2].bytes.split('\0'), rsp[3:]))
|
||||||
|
|
||||||
# Time (in seconds) before a job times out
|
# Time (in seconds) before a job times out
|
||||||
TIMEOUT=240
|
# TODO: replace timeout mechanism with polling?
|
||||||
|
TIMEOUT=2400
|
||||||
|
|
||||||
# Max. queue length before request considered lost, as a multiple of the
|
# Max. queue length before request considered lost, as a multiple of the
|
||||||
# number of in-flight requests
|
# number of in-flight requests
|
||||||
@ -92,21 +96,27 @@ def iter_genomes(prof, outpath, gpaths):
|
|||||||
os.makedirs(odir)
|
os.makedirs(odir)
|
||||||
with open(os.path.join(odir, 'NFRAMES'), 'w') as fp:
|
with open(os.path.join(odir, 'NFRAMES'), 'w') as fp:
|
||||||
fp.write(str(len(times)))
|
fp.write(str(len(times)))
|
||||||
|
outmod = output.get_output_for_profile(gprof)
|
||||||
for i, t in times:
|
for i, t in times:
|
||||||
opath = os.path.join(odir, '%05d.%s' % (i, gprof.output_format))
|
opath = os.path.join(odir, '%05d' % i)
|
||||||
if not os.path.isfile(opath):
|
if not os.path.isfile(opath + outmod.suffix):
|
||||||
yield Task(opath, ghash, prof, gnm, t)
|
yield Task(opath, ghash, prof, gnm, t)
|
||||||
|
|
||||||
def get_result(cli, task, rq):
|
def get_result(cli, task, rq):
|
||||||
try:
|
try:
|
||||||
rsp = rq.get(timeout=TIMEOUT)
|
log, names, bufs = rq.get(timeout=TIMEOUT)
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
cli.put(task, rq)
|
cli.put(task, rq)
|
||||||
print '>>', task.id
|
print '>>', task.id
|
||||||
rsp = rq.get()
|
log, names, bufs = rq.get()
|
||||||
|
|
||||||
with open(task.id, 'wb') as fp:
|
with open(task.id + '.log', 'wb') as fp:
|
||||||
fp.write(buffer(rsp))
|
fp.write(log)
|
||||||
|
|
||||||
|
for name in reversed(names):
|
||||||
|
buf = bufs.pop()
|
||||||
|
with open(task.id + name, 'wb') as fp:
|
||||||
|
fp.write(buffer(buf))
|
||||||
print '< ', task.id
|
print '< ', task.id
|
||||||
|
|
||||||
def main(addrs):
|
def main(addrs):
|
||||||
@ -128,6 +138,8 @@ def main(addrs):
|
|||||||
|
|
||||||
while cli.taskmap:
|
while cli.taskmap:
|
||||||
print 'Still waiting on %d tasks...' % len(cli.taskmap)
|
print 'Still waiting on %d tasks...' % len(cli.taskmap)
|
||||||
|
for i in cli.taskmap.items():
|
||||||
|
print i
|
||||||
gevent.sleep(3)
|
gevent.sleep(3)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
2
dist/messages.py
vendored
2
dist/messages.py
vendored
@ -1,5 +1,5 @@
|
|||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
Task = namedtuple('Task', 'id hash profile anim time')
|
Task = namedtuple('Task', 'id hash profile anim times')
|
||||||
AddressedTask = namedtuple('AddressedTask', 'addr task')
|
AddressedTask = namedtuple('AddressedTask', 'addr task')
|
||||||
FullTask = namedtuple('FullTask', 'addr task cubin packer')
|
FullTask = namedtuple('FullTask', 'addr task cubin packer')
|
||||||
|
4
dist/server.py
vendored
4
dist/server.py
vendored
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
from itertools import takewhile
|
||||||
|
|
||||||
import gevent
|
import gevent
|
||||||
from gevent import spawn, queue, event
|
from gevent import spawn, queue, event
|
||||||
@ -29,7 +30,6 @@ def setup_task_listeners(addrs, tq, rq):
|
|||||||
# losock to be added to the queue.
|
# losock to be added to the queue.
|
||||||
loevt.set()
|
loevt.set()
|
||||||
task = hisock.recv_pyobj()
|
task = hisock.recv_pyobj()
|
||||||
print 'OOOOOH! Got a hiprio evt'
|
|
||||||
loevt.clear() # Got message; pause listen_lo().
|
loevt.clear() # Got message; pause listen_lo().
|
||||||
tq.put(task)
|
tq.put(task)
|
||||||
hisock.send('')
|
hisock.send('')
|
||||||
@ -77,7 +77,7 @@ def setup_worker_listener(addrs, tq, rq):
|
|||||||
while True:
|
while True:
|
||||||
rsp = wsock.recv_multipart(copy=False)
|
rsp = wsock.recv_multipart(copy=False)
|
||||||
if rsp[2].bytes != '':
|
if rsp[2].bytes != '':
|
||||||
print '< ', ' '.join([r.bytes for r in rsp[2:-1]])
|
print '< ', rsp[2].bytes, rsp[3].bytes
|
||||||
rq.put(rsp[2:])
|
rq.put(rsp[2:])
|
||||||
readyq.put(rsp[0])
|
readyq.put(rsp[0])
|
||||||
|
|
||||||
|
24
dist/worker.py
vendored
24
dist/worker.py
vendored
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
import sys
|
import sys
|
||||||
|
import socket
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
import gevent
|
import gevent
|
||||||
@ -37,20 +38,29 @@ def main(worker_addr):
|
|||||||
|
|
||||||
hash = None
|
hash = None
|
||||||
while True:
|
while True:
|
||||||
|
log = [('worker', socket.gethostname() + ':' +
|
||||||
|
cuda.Context.get_current().get_device().pci_bus_id())]
|
||||||
addr, task, cubin, packer = sock.recv_pyobj()
|
addr, task, cubin, packer = sock.recv_pyobj()
|
||||||
gprof = profile.wrap(task.profile, task.anim)
|
gprof = profile.wrap(task.profile, task.anim)
|
||||||
if hash != task.hash:
|
if hash != task.hash:
|
||||||
rdr = PrecompiledRenderer(task.anim, gprof, packer, cubin)
|
rdr = PrecompiledRenderer(task.anim, gprof, packer, cubin)
|
||||||
evt, buf = rmgr.queue_frame(rdr, task.anim, gprof, task.time)
|
for t in task.times:
|
||||||
|
evt, buf = rmgr.queue_frame(rdr, task.anim, gprof, t)
|
||||||
while not evt.query():
|
while not evt.query():
|
||||||
gevent.sleep(0.01)
|
gevent.sleep(0.01)
|
||||||
ofile = StringIO()
|
out, frame_log = rdr.out.encode(buf)
|
||||||
output.PILOutput.save(buf, ofile, task.id[-3:])
|
log += frame_log
|
||||||
ofile.seek(0)
|
|
||||||
sock.send_multipart(addr + [ofile.read()])
|
|
||||||
hash = task.hash
|
|
||||||
|
|
||||||
print 'Rendered', task.id, 'in', int(evt.time()), 'ms'
|
print 'Rendered', task.id, 'in', int(evt.time()), 'ms'
|
||||||
|
final_out, final_log = rdr.out.encode(None)
|
||||||
|
assert not (out and final_out), 'Got output from two sources!'
|
||||||
|
out = out or final_out
|
||||||
|
log += final_log
|
||||||
|
log = '\0'.join([k + ' ' + v for k, v in log])
|
||||||
|
|
||||||
|
suffixes, files = zip(*[(k, v.read())
|
||||||
|
for k, v in sorted(out.items())])
|
||||||
|
# TODO: reduce copies, generally spruce up the memory usage here
|
||||||
|
sock.send_multipart(addr + [log, '\0'.join(suffixes)] + list(files))
|
||||||
|
|
||||||
# Spawn two request loops to take advantage of CUDA pipelining.
|
# Spawn two request loops to take advantage of CUDA pipelining.
|
||||||
spawn(request_loop)
|
spawn(request_loop)
|
||||||
|
118
main.py
118
main.py
@ -26,44 +26,16 @@ sys.path.insert(0, os.path.dirname(__file__))
|
|||||||
from cuburn import render, filters, output, profile
|
from cuburn import render, filters, output, profile
|
||||||
from cuburn.genome import convert, use, db
|
from cuburn.genome import convert, use, db
|
||||||
|
|
||||||
def save(out):
|
def save(output_module, name, rendered_frame):
|
||||||
# Temporary! TODO: fix this
|
out, log = output_module.encode(rendered_frame)
|
||||||
output.PILOutput.save(out.buf, out.idx)
|
for suffix, file_like in out.items():
|
||||||
print out.idx, out.gpu_time
|
with open(name + suffix, 'w') as fp:
|
||||||
|
fp.write(file_like.read())
|
||||||
def main(args, prof):
|
for key, val in log:
|
||||||
gdb = db.connect(args.genomedb)
|
print '\n=== %s ===' % key
|
||||||
gnm, basename = gdb.get_anim(args.flame, args.half)
|
print val
|
||||||
if getattr(args, 'print'):
|
|
||||||
print convert.to_json(gnm)
|
|
||||||
return
|
|
||||||
gprof = profile.wrap(prof, gnm)
|
|
||||||
|
|
||||||
if args.name is not None:
|
|
||||||
basename = args.name
|
|
||||||
prefix = os.path.join(args.dir, basename)
|
|
||||||
if args.subdir:
|
|
||||||
if not os.path.isdir(prefix):
|
|
||||||
os.mkdir(prefix)
|
|
||||||
prefix += '/'
|
|
||||||
else:
|
|
||||||
prefix += '_'
|
|
||||||
frames = [('%s%05d%s.jpg' % (prefix, (i+1), args.suffix), t)
|
|
||||||
for i, t in profile.enumerate_times(gprof)]
|
|
||||||
if args.resume:
|
|
||||||
m = os.path.getmtime(args.flame)
|
|
||||||
frames = (f for f in frames
|
|
||||||
if not os.path.isfile(f[0]) or m > os.path.getmtime(f[0]))
|
|
||||||
|
|
||||||
import pycuda.autoinit
|
|
||||||
rmgr = render.RenderManager()
|
|
||||||
gen = rmgr.render(gnm, gprof, frames)
|
|
||||||
|
|
||||||
if not args.gfx:
|
|
||||||
for out in gen:
|
|
||||||
save(out)
|
|
||||||
return
|
|
||||||
|
|
||||||
|
def pyglet_preview(args, gprof, itr):
|
||||||
import pyglet
|
import pyglet
|
||||||
import pyglet.gl as gl
|
import pyglet.gl as gl
|
||||||
w, h = gprof.width, gprof.height
|
w, h = gprof.width, gprof.height
|
||||||
@ -92,39 +64,89 @@ def main(args, prof):
|
|||||||
last_time = [time.time()]
|
last_time = [time.time()]
|
||||||
|
|
||||||
def poll(dt):
|
def poll(dt):
|
||||||
out = next(gen, False)
|
out = next(itr, False)
|
||||||
if out is False:
|
if out is False:
|
||||||
if args.pause:
|
if args.pause:
|
||||||
label.text = "Done. ('q' to quit)"
|
label.text = "Done. ('q' to quit)"
|
||||||
#pyglet.clock.unschedule(poll)
|
|
||||||
else:
|
else:
|
||||||
pyglet.app.exit()
|
pyglet.app.exit()
|
||||||
elif out is not None:
|
elif out is not None:
|
||||||
|
name, buf = out
|
||||||
real_dt = time.time() - last_time[0]
|
real_dt = time.time() - last_time[0]
|
||||||
last_time[0] = time.time()
|
last_time[0] = time.time()
|
||||||
save(out)
|
if buf.dtype == np.uint8:
|
||||||
if out.buf.dtype == np.uint8:
|
|
||||||
fmt = gl.GL_UNSIGNED_BYTE
|
fmt = gl.GL_UNSIGNED_BYTE
|
||||||
elif out.buf.dtype == np.uint16:
|
elif buf.dtype == np.uint16:
|
||||||
fmt = gl.GL_UNSIGNED_SHORT
|
fmt = gl.GL_UNSIGNED_SHORT
|
||||||
else:
|
else:
|
||||||
label.text = 'Unsupported format: ' + out.buf.dtype
|
label.text = 'Unsupported format: ' + buf.dtype
|
||||||
return
|
return
|
||||||
|
|
||||||
h, w, ch = out.buf.shape
|
h, w, ch = buf.shape
|
||||||
gl.glEnable(tex.target)
|
gl.glEnable(tex.target)
|
||||||
gl.glBindTexture(tex.target, tex.id)
|
gl.glBindTexture(tex.target, tex.id)
|
||||||
gl.glTexImage2D(tex.target, 0, gl.GL_RGB8, w, h, 0, gl.GL_RGBA,
|
gl.glTexImage2D(tex.target, 0, gl.GL_RGB8, w, h, 0, gl.GL_RGBA,
|
||||||
fmt, out.buf.tostring())
|
fmt, buf.tostring())
|
||||||
gl.glDisable(tex.target)
|
gl.glDisable(tex.target)
|
||||||
label.text = '%s (%g fps)' % (out.idx, 1./real_dt)
|
label.text = '%s (%g fps)' % (name, 1./real_dt)
|
||||||
else:
|
else:
|
||||||
label.text += '.'
|
label.text += '.'
|
||||||
|
|
||||||
pyglet.clock.set_fps_limit(30)
|
pyglet.clock.set_fps_limit(20)
|
||||||
pyglet.clock.schedule_interval(poll, 1/30.)
|
pyglet.clock.schedule_interval(poll, 1/20.)
|
||||||
pyglet.app.run()
|
pyglet.app.run()
|
||||||
|
|
||||||
|
def main(args, prof):
|
||||||
|
gdb = db.connect(args.genomedb)
|
||||||
|
gnm, basename = gdb.get_anim(args.flame, args.half)
|
||||||
|
if getattr(args, 'print'):
|
||||||
|
print convert.to_json(gnm)
|
||||||
|
return
|
||||||
|
gprof = profile.wrap(prof, gnm)
|
||||||
|
|
||||||
|
if args.name is not None:
|
||||||
|
basename = args.name
|
||||||
|
prefix = os.path.join(args.dir, basename)
|
||||||
|
if args.subdir:
|
||||||
|
if not os.path.isdir(prefix):
|
||||||
|
os.mkdir(prefix)
|
||||||
|
prefix_plus = prefix + '/'
|
||||||
|
else:
|
||||||
|
prefix_plus = prefix + '_'
|
||||||
|
|
||||||
|
frames = [('%s%05d%s' % (prefix_plus, i, args.suffix), t)
|
||||||
|
for i, t in profile.enumerate_times(gprof)]
|
||||||
|
|
||||||
|
# We don't initialize a CUDA context until here. This keeps other
|
||||||
|
# functions like --help and --print snappy.
|
||||||
|
import pycuda.autoinit
|
||||||
|
rmgr = render.RenderManager()
|
||||||
|
rdr = render.Renderer(gnm, gprof)
|
||||||
|
|
||||||
|
def render_iter():
|
||||||
|
m = os.path.getmtime(args.flame)
|
||||||
|
first = True
|
||||||
|
for name, times in frames:
|
||||||
|
if args.resume:
|
||||||
|
fp = name + rdr.out.suffix
|
||||||
|
if os.path.isfile(fp) and m < os.path.getmtime(f[0]+ext):
|
||||||
|
continue
|
||||||
|
|
||||||
|
for t in times:
|
||||||
|
evt, buf = rmgr.queue_frame(rdr, gnm, gprof, t, first)
|
||||||
|
first = False
|
||||||
|
while not evt.query():
|
||||||
|
time.sleep(0.01)
|
||||||
|
yield None
|
||||||
|
save(rdr.out, name, buf)
|
||||||
|
print name, evt.time()
|
||||||
|
yield name, buf
|
||||||
|
save(rdr.out, name, None)
|
||||||
|
|
||||||
|
if args.gfx:
|
||||||
|
pyglet_preview(args, gprof, render_iter())
|
||||||
|
else:
|
||||||
|
for i in render_iter(): pass
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='Render fractal flames.')
|
parser = argparse.ArgumentParser(description='Render fractal flames.')
|
||||||
|
Loading…
Reference in New Issue
Block a user