mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Refactor host rendering code for better load
This commit is contained in:
parent
8e99c9c463
commit
9bafbda81a
359
cuburn/render.py
359
cuburn/render.py
@ -107,7 +107,19 @@ class Animation(object):
|
|||||||
In other words, it's best to use exactly one Animation for each
|
In other words, it's best to use exactly one Animation for each
|
||||||
interpolated sequence between one or two genomes.
|
interpolated sequence between one or two genomes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Large launches lock the display for a considerable period and may be
|
||||||
|
# killed due to a device timeout; small launches are harder to load-balance
|
||||||
|
# on the GPU and incur overhead. This empirical value is multiplied by the
|
||||||
|
# number of SMs on the device to determine how many blocks should be in
|
||||||
|
# each launch. Extremely high quality, high resolution renders may still
|
||||||
|
# encounter a device timeout, requiring the user to increase the split
|
||||||
|
# amount. This factor is not used in async mode.
|
||||||
|
SM_FACTOR = 8
|
||||||
|
|
||||||
cmp_options = ('-use_fast_math', '-maxrregcount', '42')
|
cmp_options = ('-use_fast_math', '-maxrregcount', '42')
|
||||||
|
|
||||||
|
|
||||||
keep = False
|
keep = False
|
||||||
|
|
||||||
def __init__(self, ctypes_genome_array):
|
def __init__(self, ctypes_genome_array):
|
||||||
@ -170,7 +182,8 @@ class Animation(object):
|
|||||||
self.compile()
|
self.compile()
|
||||||
self.mod = cuda.module_from_buffer(self.cubin, jit_options)
|
self.mod = cuda.module_from_buffer(self.cubin, jit_options)
|
||||||
|
|
||||||
def render_frames(self, times=None, block=True):
|
|
||||||
|
def render_frames(self, times=None, sync=False):
|
||||||
"""
|
"""
|
||||||
Render a flame for each genome in the iterable value 'genomes'.
|
Render a flame for each genome in the iterable value 'genomes'.
|
||||||
Returns a Python generator object which will yield a 2-tuple of
|
Returns a Python generator object which will yield a 2-tuple of
|
||||||
@ -192,260 +205,182 @@ class Animation(object):
|
|||||||
``times`` is a sequence of center times at which to render, or ``None``
|
``times`` is a sequence of center times at which to render, or ``None``
|
||||||
to render one frame for each genome used to create the animation.
|
to render one frame for each genome used to create the animation.
|
||||||
|
|
||||||
``block`` will cause this thread to spin, waiting for the GPU to
|
If ``sync`` is True, the CPU will sync with the GPU after every block
|
||||||
finish the current task. Otherwise, this generator will yield ``None``
|
of temporal samples and yield None until the frame is ready. This
|
||||||
until the GPU is finished, for filtering later.
|
allows a single-card system to avoid having to go thirty seconds
|
||||||
|
between window refreshes while rendering. Otherwise, tasks will be
|
||||||
|
piled asynchronously on the card so that it is always under load.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
f = self.features
|
||||||
|
|
||||||
times = times if times is not None else [cp.time for cp in self.genomes]
|
times = times if times is not None else [cp.time for cp in self.genomes]
|
||||||
|
iter_stream = cuda.Stream()
|
||||||
|
filt_stream = cuda.Stream()
|
||||||
|
cen_cp = pyflam3.Genome()
|
||||||
|
dst_cp = pyflam3.Genome()
|
||||||
|
|
||||||
if block:
|
nbins = f.acc_height * f.acc_stride
|
||||||
rdr = _AnimRenderer(self)
|
d_accum = cuda.mem_alloc(16 * nbins)
|
||||||
for t in times:
|
d_out = cuda.mem_alloc(16 * nbins)
|
||||||
rdr.render(t)
|
|
||||||
yield rdr.get_result()
|
num_sm = cuda.Context.get_device().multiprocessor_count
|
||||||
|
if sync:
|
||||||
|
cps_per_block = num_sm * self.SM_FACTOR
|
||||||
else:
|
else:
|
||||||
# TODO: share buffers.
|
cps_per_block = f.max_cps
|
||||||
rdrs = [_AnimRenderer(self) for i in range(2)]
|
|
||||||
|
|
||||||
# Zip up each genome with an alternating renderer, plus 2 empty
|
info_size = self._iter.packer.align * cps_per_block
|
||||||
# genomes at the end to flush all pending tasks
|
d_infos = cuda.mem_alloc(info_size)
|
||||||
exttimes = times[:] + [None, None]
|
d_palmem = cuda.mem_alloc(256 * f.palette_height * 4)
|
||||||
for rdr, t in izip(cycle(rdrs), exttimes):
|
|
||||||
if rdr.pending:
|
|
||||||
while not rdr.done():
|
|
||||||
yield None
|
|
||||||
yield rdr.get_result()
|
|
||||||
if t is not None:
|
|
||||||
rdr.render(t)
|
|
||||||
|
|
||||||
def _interp(self, time, cp):
|
seeds = mwc.MWC.make_seeds(self._iter.NTHREADS * cps_per_block)
|
||||||
flam3_interpolate(self._g_arr, len(self._g_arr), time, 0, byref(cp))
|
d_seeds = cuda.to_device(seeds)
|
||||||
|
|
||||||
class _AnimRenderer(object):
|
h_infos = cuda.pagelocked_empty((info_size / 4,), np.float32)
|
||||||
# Large launches lock the display for a considerable period and may be
|
h_palmem = cuda.pagelocked_empty(
|
||||||
# killed due to a device timeout; small launches are harder to load-balance
|
(f.palette_height, 256, 4), np.uint8)
|
||||||
# on the GPU and incur overhead. This empirical value is multiplied by the
|
h_out = cuda.pagelocked_empty((f.acc_height, f.acc_stride, 4), np.float32)
|
||||||
# number of SMs on the device to determine how many blocks should be in
|
|
||||||
# each launch. Extremely high quality, high resolution renders may still
|
|
||||||
# encounter a device timeout, and no workaround is in place for that yet.
|
|
||||||
SM_FACTOR = 8
|
|
||||||
|
|
||||||
# Currently, palette interpolation is done independently of animation
|
filter_done_event = None
|
||||||
# interpolation, so that the process is not biased and so we only need to
|
|
||||||
# mess about with one texture per renderer. This many steps will always be
|
|
||||||
# used, no matter the number of time steps.
|
|
||||||
PAL_HEIGHT = 16
|
|
||||||
|
|
||||||
# Use synchronous launches
|
packer = self._iter.packer
|
||||||
sync = False
|
iter_fun = self.mod.get_function("iter")
|
||||||
# Delay this long between iterations (only active when sync is True)
|
|
||||||
sleep = None
|
|
||||||
|
|
||||||
def __init__(self, anim):
|
|
||||||
self.anim = anim
|
|
||||||
self.pending = False
|
|
||||||
self.cen_time = None
|
|
||||||
self.stream = cuda.Stream()
|
|
||||||
|
|
||||||
self._nsms = cuda.Context.get_device().multiprocessor_count
|
|
||||||
self.cps_per_block = self._nsms * self.SM_FACTOR
|
|
||||||
self.ncps = anim.features.max_cps
|
|
||||||
self.nblocks = int(math.ceil(self.ncps / float(self.cps_per_block)))
|
|
||||||
|
|
||||||
# These are stored to avoid leaks, not to be stateful in method calls
|
|
||||||
self._dst_cp = pyflam3.Genome()
|
|
||||||
memset(byref(self._dst_cp), 0, sizeof(self._dst_cp))
|
|
||||||
self._cen_cp = pyflam3.Genome()
|
|
||||||
memset(byref(self._cen_cp), 0, sizeof(self._cen_cp))
|
|
||||||
|
|
||||||
self.nbins = anim.features.acc_height * anim.features.acc_stride
|
|
||||||
self.d_accum = cuda.mem_alloc(16 * self.nbins)
|
|
||||||
self.d_out = cuda.mem_alloc(16 * self.nbins)
|
|
||||||
|
|
||||||
info_size = anim._iter.packer.align * self.ncps
|
|
||||||
self.d_infos = cuda.mem_alloc(info_size)
|
|
||||||
# Defer generation of seeds until they're first needed
|
|
||||||
self.d_seeds = None
|
|
||||||
|
|
||||||
# During the main rendering loop, we alternate between two streams and
|
|
||||||
# two sets of seeds, synchronizing them at the end of rendering.
|
|
||||||
self.alt_stream = cuda.Stream()
|
|
||||||
self.d_alt_seeds = None
|
|
||||||
|
|
||||||
# It's less than ideal, but we lock some memory ahead of time
|
|
||||||
self.h_infos_locked = cuda.pagelocked_empty((info_size/4,), np.float32)
|
|
||||||
|
|
||||||
if self.sync:
|
|
||||||
self.stream = self.alt_stream = None
|
|
||||||
|
|
||||||
def render(self, cen_time):
|
|
||||||
assert not self.pending, "Tried to render with results pending!"
|
|
||||||
self.pending = True
|
|
||||||
self.cen_time = cen_time
|
|
||||||
a = self.anim
|
|
||||||
|
|
||||||
cen_cp = self._cen_cp
|
|
||||||
a._interp(cen_time, cen_cp)
|
|
||||||
palette = self._interp_colors(cen_time, cen_cp)
|
|
||||||
|
|
||||||
util.BaseCode.zero_dptr(a.mod, self.d_accum, 4 * self.nbins,
|
|
||||||
self.stream)
|
|
||||||
# Ensure all main stream tasks are done before starting alt stream
|
|
||||||
if not self.sync:
|
|
||||||
self.alt_stream.wait_for_event(cuda.Event().record(self.stream))
|
|
||||||
|
|
||||||
dpal = cuda.make_multichannel_2d_array(palette, 'C')
|
|
||||||
tref = a.mod.get_texref('palTex')
|
|
||||||
tref.set_array(dpal)
|
|
||||||
tref.set_format(cuda.array_format.UNSIGNED_INT8, 4)
|
|
||||||
tref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES)
|
|
||||||
tref.set_filter_mode(cuda.filter_mode.LINEAR)
|
|
||||||
|
|
||||||
cp = self._dst_cp
|
|
||||||
packer = a._iter.packer
|
|
||||||
|
|
||||||
iter_fun = a.mod.get_function("iter")
|
|
||||||
#iter_fun.set_cache_config(cuda.func_cache.PREFER_L1)
|
#iter_fun.set_cache_config(cuda.func_cache.PREFER_L1)
|
||||||
|
|
||||||
# Must be accumulated over all CPs
|
util.BaseCode.zero_dptr(self.mod, d_accum, 4 * nbins, filt_stream)
|
||||||
gam, vib = 0, 0
|
|
||||||
bkgd = np.zeros(3)
|
|
||||||
|
|
||||||
# This is gross, but there are a lot of fiddly corner cases with any
|
last_time = times[0]
|
||||||
# index-based iteration scheme.
|
|
||||||
times = list(enumerate(self._mk_dts(cen_time, cen_cp, self.ncps)))
|
|
||||||
for b, block_times in enumerate(_chunk(times, self.cps_per_block)):
|
|
||||||
on_main = b % 2 == 0
|
|
||||||
stream = self.stream if on_main else self.alt_stream
|
|
||||||
d_seeds = self.d_seeds if on_main else self.d_alt_seeds
|
|
||||||
|
|
||||||
if not d_seeds:
|
for time in times:
|
||||||
seeds = mwc.MWC.make_seeds(a._iter.NTHREADS *
|
self._interp(cen_cp, time)
|
||||||
self.cps_per_block)
|
|
||||||
if self.sync:
|
h_palmem[:] = self._interp_colors(dst_cp, time,
|
||||||
d_seeds = cuda.to_device(seeds)
|
cen_cp.temporal_filter_width)
|
||||||
|
cuda.memcpy_htod_async(d_palmem, h_palmem, iter_stream)
|
||||||
|
tref = self.mod.get_texref('palTex')
|
||||||
|
array_info = cuda.ArrayDescriptor()
|
||||||
|
array_info.height = f.palette_height
|
||||||
|
array_info.width = 256
|
||||||
|
array_info.array_format = cuda.array_format.UNSIGNED_INT8
|
||||||
|
array_info.num_channels = 4
|
||||||
|
tref.set_address_2d(d_palmem, array_info, 1024)
|
||||||
|
|
||||||
|
tref.set_format(cuda.array_format.UNSIGNED_INT8, 4)
|
||||||
|
tref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES)
|
||||||
|
tref.set_filter_mode(cuda.filter_mode.LINEAR)
|
||||||
|
|
||||||
|
# Must be accumulated over all CPs
|
||||||
|
gam, vib = 0, 0
|
||||||
|
bkgd = np.zeros(3)
|
||||||
|
|
||||||
|
mblur_times = enumerate( np.linspace(-0.5, 0.5, cen_cp.ntemporal_samples)
|
||||||
|
* cen_cp.temporal_filter_width + time )
|
||||||
|
|
||||||
|
for block_times in _chunk(list(mblur_times), cps_per_block):
|
||||||
|
infos = []
|
||||||
|
if len(self.genomes) > 1:
|
||||||
|
for n, t in block_times:
|
||||||
|
self._interp(dst_cp, t)
|
||||||
|
frac = float(n) / cen_cp.ntemporal_samples
|
||||||
|
info = packer.pack(cp=Genome(dst_cp), cp_step_frac=frac)
|
||||||
|
infos.append(info)
|
||||||
|
gam += dst_cp.gamma
|
||||||
|
vib += dst_cp.vibrancy
|
||||||
|
bkgd += np.array(dst_cp.background)
|
||||||
else:
|
else:
|
||||||
size = seeds.dtype.itemsize * seeds.size
|
# Can't interpolate normally; just pack copies
|
||||||
d_seeds = cuda.mem_alloc(size)
|
packed = packer.pack(cp=self.genomes[0], cp_step_frac=0)
|
||||||
h_seeds = cuda.pagelocked_empty(seeds.shape, seeds.dtype)
|
infos = [packed] * len(block_times)
|
||||||
h_seeds[:] = seeds
|
gam += self.genomes[0].gamma * len(block_times)
|
||||||
cuda.memcpy_htod_async(d_seeds, h_seeds, stream)
|
vib += self.genomes[0].vibrancy * len(block_times)
|
||||||
if on_main:
|
bkgd += np.array(self.genomes[0].background) * len(block_times)
|
||||||
self.d_seeds = d_seeds
|
|
||||||
else:
|
|
||||||
self.d_alt_seeds = d_seeds
|
|
||||||
|
|
||||||
infos = []
|
infos = np.concatenate(infos)
|
||||||
if len(a.genomes) > 1:
|
h_infos[:len(infos)] = infos
|
||||||
for n, t in block_times:
|
cuda.memcpy_htod_async(d_infos, h_infos)
|
||||||
a._interp(t, cp)
|
|
||||||
frac = float(n) / cen_cp.ntemporal_samples
|
|
||||||
info = packer.pack(cp=Genome(cp), cp_step_frac=frac)
|
|
||||||
infos.append(info)
|
|
||||||
gam += cp.gamma
|
|
||||||
vib += cp.vibrancy
|
|
||||||
bkgd += np.array(cp.background)
|
|
||||||
else:
|
|
||||||
# Can't interpolate normally; just pack copies
|
|
||||||
packed = packer.pack(cp=a.genomes[0], cp_step_frac=0)
|
|
||||||
infos = [packed] * len(block_times)
|
|
||||||
gam += a.genomes[0].gamma * len(block_times)
|
|
||||||
vib += a.genomes[0].vibrancy * len(block_times)
|
|
||||||
bkgd += np.array(a.genomes[0].background) * len(block_times)
|
|
||||||
|
|
||||||
infos = np.concatenate(infos)
|
if filter_done_event:
|
||||||
offset = b * packer.align * self.cps_per_block
|
iter_stream.wait_for_event(filter_done_event)
|
||||||
d_info_off = int(self.d_infos) + offset
|
|
||||||
if self.sync:
|
|
||||||
cuda.memcpy_htod(d_info_off, infos)
|
|
||||||
else:
|
|
||||||
h_infos = self.h_infos_locked[offset/4:offset/4+len(infos)]
|
|
||||||
h_infos[:] = infos
|
|
||||||
cuda.memcpy_htod_async(d_info_off, h_infos, stream)
|
|
||||||
|
|
||||||
iter_fun(d_seeds, np.uintp(d_info_off), np.uint64(self.d_accum),
|
# TODO: replace with option to split long runs shorter ones
|
||||||
block=(32, a._iter.NTHREADS/32, 1),
|
# for interactivity
|
||||||
grid=(len(block_times), 1),
|
for i in range(1):
|
||||||
texrefs=[tref], stream=stream)
|
iter_fun(d_seeds, d_infos, np.uint64(d_accum),
|
||||||
|
block=(32, self._iter.NTHREADS/32, 1),
|
||||||
|
grid=(len(block_times), 1),
|
||||||
|
texrefs=[tref], stream=iter_stream)
|
||||||
|
|
||||||
if self.sync and self.sleep:
|
if sync:
|
||||||
time.sleep(self.sleep)
|
iter_stream.synchronize()
|
||||||
|
yield None
|
||||||
|
|
||||||
# Now ensure all alt stream tasks are done before continuing main
|
if filter_done_event and not sync:
|
||||||
if not self.sync:
|
filt_stream.synchronize()
|
||||||
self.stream.wait_for_event(cuda.Event().record(self.alt_stream))
|
yield last_time, self._trim(h_out)
|
||||||
|
last_time = time
|
||||||
|
|
||||||
util.BaseCode.zero_dptr(a.mod, self.d_out, 4 * self.nbins,
|
util.BaseCode.zero_dptr(self.mod, d_out, 4 * nbins, filt_stream)
|
||||||
self.stream)
|
self._de.invoke(self.mod, Genome(cen_cp), d_accum, d_out, filt_stream)
|
||||||
a._de.invoke(a.mod, Genome(cen_cp), self.d_accum, self.d_out,
|
util.BaseCode.zero_dptr(self.mod, d_accum, 4 * nbins, filt_stream)
|
||||||
self.stream)
|
filter_done_event = cuda.Event().record(filt_stream)
|
||||||
|
|
||||||
f = np.float32
|
f32 = np.float32
|
||||||
n = f(self.ncps)
|
n = f32(cen_cp.ntemporal_samples)
|
||||||
gam = f(n / gam)
|
gam = f32(n / gam)
|
||||||
vib = f(vib / n)
|
vib = f32(vib / n)
|
||||||
hipow = f(cen_cp.highlight_power)
|
hipow = f32(cen_cp.highlight_power)
|
||||||
lin = f(cen_cp.gam_lin_thresh)
|
lin = f32(cen_cp.gam_lin_thresh)
|
||||||
lingam = f(math.pow(cen_cp.gam_lin_thresh, gam-1.0) if lin > 0 else 0)
|
lingam = f32(math.pow(cen_cp.gam_lin_thresh, gam-1.0) if lin > 0 else 0)
|
||||||
bkgd = vec.make_float3(*(bkgd / n))
|
bkgd = vec.make_float3(*(bkgd / n))
|
||||||
|
|
||||||
# TODO: get block size from colorclip class? It actually does not
|
color_fun = self.mod.get_function("colorclip")
|
||||||
# depend on that being the case
|
color_fun(d_out, gam, vib, hipow, lin, lingam, bkgd,
|
||||||
color_fun = a.mod.get_function("colorclip")
|
block=(256, 1, 1), grid=(nbins / 256, 1),
|
||||||
color_fun(self.d_out, gam, vib, hipow, lin, lingam, bkgd,
|
stream=filt_stream)
|
||||||
block=(256, 1, 1), grid=(self.nbins / 256, 1),
|
cuda.memcpy_dtoh_async(h_out, d_out, filt_stream)
|
||||||
stream=self.stream)
|
|
||||||
|
|
||||||
|
if sync:
|
||||||
|
filt_stream.synchronize()
|
||||||
|
yield time, self._trim(h_out)
|
||||||
|
|
||||||
# TODO: The stream seems to sync right here, automatically, before
|
if not sync:
|
||||||
# returning. I think PyCUDA is forcing a sync when something drops out
|
filt_stream.synchronize()
|
||||||
# of scope. Investigate.
|
yield time, self._trim(h_out)
|
||||||
|
|
||||||
def _pal_to_np(self, cp):
|
def _interp(self, cp, time):
|
||||||
|
flam3_interpolate(self._g_arr, len(self._g_arr), time, 0, byref(cp))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _pal_to_np(cp):
|
||||||
# Converting palettes by iteration has an enormous performance
|
# Converting palettes by iteration has an enormous performance
|
||||||
# overhead. We cheat massively and dangerously here.
|
# overhead. We cheat massively and dangerously here.
|
||||||
pal = cast(pointer(cp.palette), POINTER(c_double * (256 * 5)))
|
pal = cast(pointer(cp.palette), POINTER(c_double * (256 * 5)))
|
||||||
val = np.frombuffer(buffer(pal.contents), count=256*5)
|
val = np.frombuffer(buffer(pal.contents), count=256*5)
|
||||||
return np.uint8(np.reshape(val, (256, 5))[:,1:] * 255.0)
|
return np.uint8(np.reshape(val, (256, 5))[:,1:] * 255.0)
|
||||||
|
|
||||||
def _interp_colors(self, cen_time, cen_cp):
|
def _interp_colors(self, cp, time, twidth):
|
||||||
# TODO: any visible difference between uint8 and richer formats?
|
# TODO: any visible difference between uint8 and richer formats?
|
||||||
pal = np.empty((self.PAL_HEIGHT, 256, 4), dtype=np.uint8)
|
height = self.features.palette_height
|
||||||
a = self.anim
|
pal = np.empty((height, 256, 4), dtype=np.uint8)
|
||||||
|
|
||||||
if len(a.genomes) > 1:
|
if len(self.genomes) > 1:
|
||||||
# The typical case; applying real motion blur
|
# The typical case; applying real motion blur
|
||||||
cp = self._dst_cp
|
times = np.linspace(-0.5, 0.5, height) * twidth + time
|
||||||
times = self._mk_dts(cen_time, cen_cp, self.PAL_HEIGHT)
|
|
||||||
for n, t in enumerate(times):
|
for n, t in enumerate(times):
|
||||||
a._interp(t, cp)
|
self._interp(cp, t)
|
||||||
pal[n] = self._pal_to_np(cp)
|
pal[n] = self._pal_to_np(cp)
|
||||||
else:
|
else:
|
||||||
# Cannot call any interp functions on a single genome; rather than
|
# Cannot call any interp functions on a single genome; rather than
|
||||||
# have alternate code-paths, just copy the same colors everywhere
|
# have alternate code-paths, just copy the same colors everywhere
|
||||||
pal[0] = self._pal_to_np(a.genomes[0])
|
pal[0] = self._pal_to_np(self.genomes[0])
|
||||||
pal[1:] = pal[0]
|
pal[1:] = pal[0]
|
||||||
return pal
|
return pal
|
||||||
|
|
||||||
def done(self):
|
def _trim(self, result):
|
||||||
if self.sync:
|
g = self.features.gutter
|
||||||
return True
|
return result[g:-g,g:-g].copy()
|
||||||
return self.stream.is_done()
|
|
||||||
|
|
||||||
def get_result(self):
|
|
||||||
if not self.sync:
|
|
||||||
self.stream.synchronize()
|
|
||||||
self.pending = False
|
|
||||||
a = self.anim
|
|
||||||
obuf_dim = (a.features.acc_height, a.features.acc_stride, 4)
|
|
||||||
out = cuda.from_device(self.d_out, obuf_dim, np.float32)
|
|
||||||
g = a.features.gutter
|
|
||||||
return self.cen_time, out[g:-g,g:-g]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _mk_dts(cen_time, cen_cp, ncps):
|
|
||||||
w = cen_cp.temporal_filter_width
|
|
||||||
return [cen_time + w * (t / (ncps - 1.0) - 0.5) for t in range(ncps)]
|
|
||||||
|
|
||||||
class Features(object):
|
class Features(object):
|
||||||
"""
|
"""
|
||||||
|
12
main.py
12
main.py
@ -15,6 +15,7 @@ import argparse
|
|||||||
import multiprocessing
|
import multiprocessing
|
||||||
from subprocess import Popen
|
from subprocess import Popen
|
||||||
from ctypes import *
|
from ctypes import *
|
||||||
|
from itertools import ifilter
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import Image
|
import Image
|
||||||
@ -47,6 +48,7 @@ def save(args, time, raw):
|
|||||||
noalpha = raw[:,:,:3]
|
noalpha = raw[:,:,:3]
|
||||||
if args.raw:
|
if args.raw:
|
||||||
real_stdout.write(buffer(np.uint8(noalpha * 255.0)))
|
real_stdout.write(buffer(np.uint8(noalpha * 255.0)))
|
||||||
|
sys.stderr.write('.')
|
||||||
return
|
return
|
||||||
|
|
||||||
name = fmt_filename(args, time)
|
name = fmt_filename(args, time)
|
||||||
@ -161,7 +163,7 @@ def main(args):
|
|||||||
def on_mouse_motion(x, y, dx, dy):
|
def on_mouse_motion(x, y, dx, dy):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
frames = anim.render_frames(times, block=False)
|
frames = anim.render_frames(times, sync=args.sync)
|
||||||
def poll(dt):
|
def poll(dt):
|
||||||
out = next(frames, False)
|
out = next(frames, False)
|
||||||
if out is False:
|
if out is False:
|
||||||
@ -173,14 +175,20 @@ def main(args):
|
|||||||
imgbuf = np.uint8(buf.flatten() * 255)
|
imgbuf = np.uint8(buf.flatten() * 255)
|
||||||
image.set_data('RGBA', -anim.features.width*4, imgbuf.tostring())
|
image.set_data('RGBA', -anim.features.width*4, imgbuf.tostring())
|
||||||
label.text = '%s %4g' % (args.name, time)
|
label.text = '%s %4g' % (args.name, time)
|
||||||
|
else:
|
||||||
|
label.text += '.'
|
||||||
|
if args.sleep:
|
||||||
|
time.sleep(args.sleep)
|
||||||
|
|
||||||
pyglet.clock.set_fps_limit(30)
|
pyglet.clock.set_fps_limit(30)
|
||||||
pyglet.clock.schedule_interval(poll, 1/30.)
|
pyglet.clock.schedule_interval(poll, 1/30.)
|
||||||
pyglet.app.run()
|
pyglet.app.run()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
for time, out in anim.render_frames(times):
|
for time, out in ifilter(None, anim.render_frames(times, sync=args.sync)):
|
||||||
save(args, time, out)
|
save(args, time, out)
|
||||||
|
if args.sleep:
|
||||||
|
time.sleep(args.sleep)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='Render fractal flames.')
|
parser = argparse.ArgumentParser(description='Render fractal flames.')
|
||||||
|
Loading…
Reference in New Issue
Block a user