From e79df46c66183caaef4914d64668c0d68fb85133 Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Sat, 11 Jun 2011 15:59:10 -0400 Subject: [PATCH] Refactor API --HG-- rename : cuburn/code/filter.py => cuburn/code/filtering.py --- cuburn/_pyflam3_hacks.py | 2 + cuburn/code/{filter.py => filtering.py} | 12 +- cuburn/code/iter.py | 104 +------- cuburn/code/util.py | 18 ++ cuburn/render.py | 339 ++++++++++++++++++++++-- main.py | 29 +- 6 files changed, 362 insertions(+), 142 deletions(-) rename cuburn/code/{filter.py => filtering.py} (95%) diff --git a/cuburn/_pyflam3_hacks.py b/cuburn/_pyflam3_hacks.py index 8656e95..d98a474 100644 --- a/cuburn/_pyflam3_hacks.py +++ b/cuburn/_pyflam3_hacks.py @@ -14,6 +14,8 @@ from ctypes import * from fr0stlib.pyflam3 import constants from fr0stlib.pyflam3._flam3 import * +from cuburn import render + flam3_nvariations = constants.flam3_nvariations = 99 BaseXForm._fields_ = [('var', c_double * flam3_nvariations) diff --git a/cuburn/code/filter.py b/cuburn/code/filtering.py similarity index 95% rename from cuburn/code/filter.py rename to cuburn/code/filtering.py index a41674d..bd1ae73 100644 --- a/cuburn/code/filter.py +++ b/cuburn/code/filtering.py @@ -223,24 +223,22 @@ void density_est(float4 *pixbuf, float4 *outbuf, float *denbuf, """) - def invoke(self, mod, abufd, obufd, dbufd): + def invoke(self, mod, abufd, obufd, dbufd, stream=None): # TODO: add no-est version # TODO: come up with a general way to average these parameters k1 = self.cp.brightness * 268 / 256 area = self.features.acc_width * self.features.acc_height / self.cp.ppu ** 2 k2 = 1 / (area * self.cp.adj_density) - print k1, k2, area if self.cp.estimator == 0: fun = mod.get_function("logscale") t = fun(abufd, obufd, np.float32(k1), np.float32(k2), block=(self.features.acc_width, 1, 1), - grid=(self.features.acc_height, 1), time_kernel=True) + grid=(self.features.acc_height, 1), stream=stream) else: fun = mod.get_function("density_est") - t = fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2), - block=(32, 32, 1), grid=(self.features.acc_width/32, 1), - time_kernel=True) - print "Density estimation: %g" % t + fun(abufd, obufd, dbufd, np.float32(k1), np.float32(k2), + block=(32, 32, 1), grid=(self.features.acc_width/32, 1), + stream=stream) diff --git a/cuburn/code/iter.py b/cuburn/code/iter.py index 4e96c81..6ebab15 100644 --- a/cuburn/code/iter.py +++ b/cuburn/code/iter.py @@ -2,20 +2,13 @@ The main iteration loop. """ -from ctypes import byref, memset, sizeof - -import pycuda.driver as cuda -from pycuda.driver import In, Out, InOut -from pycuda.compiler import SourceModule -import numpy as np -from scipy import ndimage - -from fr0stlib.pyflam3 import flam3_interpolate -from cuburn.code import mwc, variations, filter +from cuburn.code import mwc, variations from cuburn.code.util import * -from cuburn.render import Genome class IterCode(HunkOCode): + # The number of threads per block + NTHREADS = 512 + def __init__(self, features): self.features = features self.packer = DataPacker('iter_info') @@ -69,14 +62,14 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) { iter_info *info_glob = &(infos[blockIdx.x]); // load info to shared memory cooperatively - for (int i = threadIdx.y * 32 + threadIdx.x; + for (int i = threadIdx.y * blockDim.x + threadIdx.x; i * 4 < sizeof(iter_info); i += blockDim.x * blockDim.y) reinterpret_cast(&info)[i] = reinterpret_cast(info_glob)[i]; int consec_bad = -{{features.fuse}}; - // TODO: make nsteps adjustable via genome - int nsamps = {{packer.get('cp.width * cp.height / 512000. * cp.adj_density')}}; + // TODO: remove '512' constant + int nsamps = {{packer.get('cp.width * cp.height / (cp.ntemporal_samples * 512.) * cp.adj_density')}}; float x, y, color; x = mwc_next_11(&rctx); @@ -157,86 +150,3 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) { packer = self.packer.view('info'), **globals()) -def render(features, cps): - # TODO: make this adjustable via genome - nsteps = 1000 - abuf = np.zeros((features.acc_height, features.acc_stride, 4), dtype=np.float32) - dbuf = np.zeros((features.acc_height, features.acc_stride), dtype=np.float32) - seeds = mwc.MWC.make_seeds(512 * nsteps) - - iter = IterCode(features) - de = filter.DensityEst(features, cps[0]) - code = assemble_code(BaseCode, mwc.MWC, iter.packer, iter, - filter.ColorClip, de) - - for lno, line in enumerate(code.split('\n')): - print '%3d %s' % (lno, line) - mod = SourceModule(code, - options=['-use_fast_math', '-maxrregcount', '32']) - - cps_as_array = (Genome * len(cps))() - for i, cp in enumerate(cps): - cps_as_array[i] = cp - - infos = [] - pal = np.empty((16, 256, 4), dtype=np.uint8) - - # TODO: move this into a common function - if len(cps) > 1: - cp = Genome() - memset(byref(cp), 0, sizeof(cp)) - - sampAt = [int(i/15.*(nsteps-1)) for i in range(16)] - for n in range(nsteps): - flam3_interpolate(cps_as_array, 2, float(n)/nsteps - 0.5, - 0, byref(cp)) - cp._init() - if n in sampAt: - pidx = sampAt.index(n) - for i, e in enumerate(cp.palette.entries): - pal[pidx][i] = np.uint8(np.array(e.color) * 255.0) - infos.append(iter.packer.pack(cp=cp, cp_step_frac=float(n)/nsteps)) - else: - for i, e in enumerate(cps[0].palette.entries): - pal[0][i] = np.uint8(np.array(e.color) * 255.0) - pal[1:] = pal[0] - infos.append(iter.packer.pack(cp=cps[0], cp_step_frac=0)) - infos *= nsteps - - infos = np.concatenate(infos) - - dpal = cuda.make_multichannel_2d_array(pal, 'C') - tref = mod.get_texref('palTex') - tref.set_array(dpal) - tref.set_format(cuda.array_format.UNSIGNED_INT8, 4) - tref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES) - tref.set_filter_mode(cuda.filter_mode.LINEAR) - - abufd = cuda.to_device(abuf) - dbufd = cuda.to_device(dbuf) - - fun = mod.get_function("iter") - fun.set_cache_config(cuda.func_cache.PREFER_L1) - t = fun(InOut(seeds), InOut(infos), abufd, dbufd, - block=(32,16,1), grid=(nsteps,1), time_kernel=True) - print "Completed render in %g seconds" % t - - f = np.float32 - - npix = features.acc_width * features.acc_height - - # TODO: just allocate - obufd = cuda.to_device(abuf) - dbuf = cuda.from_device_like(dbufd, dbuf) - dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6) - dbufd = cuda.to_device(dbuf) - de.invoke(mod, abufd, obufd, dbufd) - - fun = mod.get_function("colorclip") - t = fun(obufd, f(1 / cp.gamma), f(cp.vibrancy), f(cp.highlight_power), - block=(256,1,1), grid=(npix/256,1), time_kernel=True) - print "Completed color filtering in %g seconds" % t - - abuf = cuda.from_device_like(obufd, abuf) - return abuf, dbuf - diff --git a/cuburn/code/util.py b/cuburn/code/util.py index ebba41d..9c71873 100644 --- a/cuburn/code/util.py +++ b/cuburn/code/util.py @@ -66,8 +66,26 @@ int trunca(float f) { asm("cvt.rni.s32.f32 %0, %1;" : "=r"(ret) : "f"(f)); return ret; } + +__global__ +void zero_dptr(float* dptr, int size) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < size) { + dptr[i] = 0.0f; + } +} """ + @staticmethod + def zero_dptr(mod, dptr, size, stream=None): + """ + A memory zeroer which can be embedded in a stream. Size is the + number of 4-byte words in the pointer. + """ + zero = mod.get_function("zero_dptr") + zero(dptr, np.int32(size), stream=stream, + block=(1024, 1, 1), grid=(size/1024+1, 1)) + class DataPackerView(object): """ View of a data packer. Intended to be initialized using DataPacker.view(). diff --git a/cuburn/render.py b/cuburn/render.py index 9267f14..54e4ed3 100644 --- a/cuburn/render.py +++ b/cuburn/render.py @@ -1,44 +1,57 @@ import sys import math import re +from itertools import cycle, repeat, chain, izip from ctypes import * from cStringIO import StringIO import numpy as np +from scipy import ndimage from fr0stlib import pyflam3 from fr0stlib.pyflam3._flam3 import * from fr0stlib.pyflam3.constants import * +import pycuda.compiler +import pycuda.driver as cuda + from cuburn import affine -from cuburn.variations import Variations +from cuburn.code import util, mwc, iter, filtering -class Genome(pyflam3.Genome): - @classmethod - def from_string(cls, *args, **kwargs): - gnms = super(Genome, cls).from_string(*args, **kwargs) - for g in gnms: g._init() - return gnms +def _chunk(l, cs): + """ + Yield the contents of list ``l`` in chunks of size no more than ``cs``. + """ + for i in range(0, len(l), cs): + yield l[i:i+cs] - def _init(self): +class Genome(object): + """ + Normalizes and precalculates some properties of a Genome. Assumes that + Genome argument passed in will not change. + """ + # Fix the ctypes ugliness since switching to __getattribute__ in 2.7. + # There are more elegant ways to do this, but I can't be bothered. + def __getattr__(self, name): + return getattr(self.cp, name) + + def __init__(self, ctypes_genome): + self.cp = ctypes_genome self.xforms = [self.xform[i] for i in range(self.num_xforms)] dens = np.array([x.density for i, x in enumerate(self.xforms) if i != self.final_xform_index]) dens /= np.sum(dens) self.norm_density = [np.sum(dens[:i+1]) for i in range(len(dens))] + self.camera_transform = self.calc_camera_transform() scale = property(lambda cp: 2.0 ** cp.zoom) adj_density = property(lambda cp: cp.sample_density * (cp.scale ** 2)) ppu = property(lambda cp: cp.pixels_per_unit * cp.scale) - @property - def camera_transform(cp): + def calc_camera_transform(cp): """ An affine matrix which will transform IFS coordinates to image width and height. Assumes that width and height are constant. """ - # TODO: when reading as a property during packing, this may be - # calculated 6 times instead of 1 - # TODO: also requires knowing gutter width g = Features.gutter return ( affine.translate(0.5 * cp.width + g, 0.5 * cp.height + g) * affine.scale(cp.ppu, cp.ppu) @@ -65,13 +78,294 @@ class Animation(object): In other words, it's best to use exactly one Animation for each interpolated sequence between one or two genomes. """ - def __init__(self, genomes, ngenomes = None): - self.features = Features(genomes) + def __init__(self, ctypes_genome_array): + self._g_arr = ctypes_genome_array + self.genomes = map(Genome, ctypes_genome_array) + self.features = Features(self.genomes) + self._iter = self._de = self.src = self.cubin = self.mod = None - def compile(self): - pass - def render_frame(self, time=0): - pass + def compile(self, keep=False, + cmp_options=('-use_fast_math', '-maxrregcount', '32')): + """ + Compile a kernel capable of rendering every frame in this animation. + The resulting compiled kernel is stored in the ``cubin`` property; + the source is available as ``src``, and is also returned for + inspection and display. + + This operation is idempotent, and has no side effects outside of + setting properties on this instance (unless there's a compiler error, + which is a bug); it should therefore be threadsafe as well. + It is, however, rather slow. + """ + self._iter = iter.IterCode(self.features) + self._de = filtering.DensityEst(self.features, self.genomes[0]) + # TODO: make choice of filtering explicit + # TODO: autoload dependent modules? + self.src = util.assemble_code(util.BaseCode, mwc.MWC, self._iter.packer, + self._iter, filtering.ColorClip, self._de) + self.cubin = pycuda.compiler.compile(self.src, keep=False, + options=list(cmp_options)) + return self.src + + def copy(self): + """ + Return a copy of this animation without any references to the current + CUDA context. This can be used to load an animation in multiple CUDA + contexts without recompiling, so that rendering can proceed across + multiple devices - but managing that is up to you. + """ + import copy + new = copy.copy(self) + new.mod = None + return new + + def load(self, jit_options=[]): + """ + Replace the currently loaded CUDA module in the active CUDA context + with the compiled code's module. A reference is kept to the module, + meaning that rendering should henceforth only be called from the + thread and context in which this function was called. + """ + if self.cubin is None: + self.compile() + self.mod = cuda.module_from_buffer(self.cubin, jit_options) + + def render_frames(self, times=None): + """ + Render a flame for each genome in the iterable value 'genomes'. + Returns a Python generator object which will yield one NumPy array + for each rendered image. + + This method produces a considerable amount of side effects, and should + not be used lightly. Things may go poorly for you if this method is not + allowed to run until completion (by exhausting all items in the + generator object). + + A performance note: while any ready tasks will be scheduled on the GPU + before yielding a result, spending a lot of time before returning + control to this function can allow the GPU to become idle. It's best + to hand the resulting array to another thread after grabbing it from + the renderer for handling. + + ``times`` is a sequence of center times at which to render, or ``None`` + to render one frame for each genome used to create the animation. + """ + # Don't see this changing, but empirical tests could prove me wrong + NRENDERERS = 2 + # TODO: under a slightly modified sequencing, certain buffers can be + # shared (though this may be unimportant if a good AA technique which + # doesn't require full SS can be found) + rdrs = [_AnimRenderer(self) for i in range(NRENDERERS)] + + # Zip up each genome with an alternating renderer, plus enough empty + # genomes at the end to flush all pending tasks + times = times or [cp.time for cp in self.genomes] + exttimes = chain(times, repeat(None, NRENDERERS)) + for rdr, time in izip(cycle(rdrs), exttimes): + if rdr.wait(): + yield rdr.get_result() + if time is not None: + rdr.render(time) + + def _interp(self, time, cp): + flam3_interpolate(self._g_arr, len(self._g_arr), time, 0, byref(cp)) + + + +class _AnimRenderer(object): + # Large launches lock the display for a considerable period and may be + # killed due to a device timeout; small launches are harder to load-balance + # on the GPU and incur overhead. This empirical value is multiplied by the + # number of SMs on the device to determine how many blocks should be in + # each launch. Extremely high quality, high resolution renders may still + # encounter a device timeout, and no workaround is in place for that yet. + SM_FACTOR = 8 + + # Currently, palette interpolation is done independently of animation + # interpolation, so that the process is not biased and so we only need to + # mess about with one texture per renderer. This many steps will always be + # used, no matter the number of time steps. + PAL_HEIGHT = 16 + + + def __init__(self, anim): + self.anim = anim + self.pending = False + self.stream = cuda.Stream() + + self._nsms = cuda.Context.get_device().multiprocessor_count + self.cps_per_block = self._nsms * self.SM_FACTOR + self.ncps = anim.features.max_cps + self.nblocks = int(math.ceil(self.ncps / float(self.cps_per_block))) + + # These are stored to avoid leaks, not to be stateful in method calls + # TODO: ensure proper cleanup is done + self._dst_cp = pyflam3.Genome() + memset(byref(self._dst_cp), 0, sizeof(self._dst_cp)) + self._cen_cp = pyflam3.Genome() + memset(byref(self._cen_cp), 0, sizeof(self._cen_cp)) + + self.nbins = anim.features.acc_height * anim.features.acc_stride + self.d_den = cuda.mem_alloc(4 * self.nbins) + self.d_accum = cuda.mem_alloc(16 * self.nbins) + self.d_out = cuda.mem_alloc(16 * self.nbins) + self.d_infos = cuda.mem_alloc(anim._iter.packer.align * self.ncps) + # Defer allocation until first needed + self.d_seeds = [None] * self.nblocks + + def render(self, cen_time): + assert not self.pending, "Tried to render with results pending!" + self.pending = True + a = self.anim + + cen_cp = self._cen_cp + a._interp(cen_time, cen_cp) + palette = self._interp_colors(cen_time, cen_cp) + + util.BaseCode.zero_dptr(a.mod, self.d_den, self.nbins, + self.stream) + util.BaseCode.zero_dptr(a.mod, self.d_accum, 4 * self.nbins, + self.stream) + + # ------------------------------------------------------------ + # TODO WARNING TODO WARNING TODO WARNING TODO WARNING TODO + # This will replace the palette while it's in use by the other + # rendering function. Need to pass palette texref in function + # invocation. + # ------------------------------------------------------------ + dpal = cuda.make_multichannel_2d_array(palette, 'C') + tref = a.mod.get_texref('palTex') + tref.set_array(dpal) + tref.set_format(cuda.array_format.UNSIGNED_INT8, 4) + tref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES) + tref.set_filter_mode(cuda.filter_mode.LINEAR) + + cp = self._dst_cp + packer = a._iter.packer + + iter_fun = a.mod.get_function("iter") + iter_fun.set_cache_config(cuda.func_cache.PREFER_L1) + + # Must be accumulated over all CPs + gam, vib, hipow = 0, 0, 0 + + # This is gross, but there are a lot of fiddly corner cases with any + # index-based iteration scheme. + times = list(enumerate(self._mk_dts(cen_time, cen_cp, self.ncps))) + for b, block_times in enumerate(_chunk(times, self.cps_per_block)): + infos = [] + if len(a.genomes) > 1: + for n, t in block_times: + a._interp(t, cp) + frac = float(n) / cen_cp.ntemporal_samples + info = packer.pack(cp=Genome(cp), cp_step_frac=frac) + infos.append(info) + gam += cp.gamma + vib += cp.vibrancy + hipow += cp.highlight_power + else: + # Can't interpolate normally; just pack copies + # TODO: this still packs the genome 20 times or so instead of + # once + packed = packer.pack(cp=a.genomes[0], cp_step_frac=0) + infos = [packed] * len(block_times) + gam += a.genomes[0].gamma * len(block_times) + vib += a.genomes[0].vibrancy * len(block_times) + hipow += a.genomes[0].highlight_power * len(block_times) + + infos = np.concatenate(infos) + offset = b * packer.align * self.cps_per_block + # TODO: portable across 32/64-bit arches? + d_info_off = int(self.d_infos) + offset + cuda.memcpy_htod(d_info_off, infos) + + if not self.d_seeds[b]: + seeds = mwc.MWC.make_seeds(iter.IterCode.NTHREADS * + self.cps_per_block) + self.d_seeds[b] = cuda.to_device(seeds) + + # TODO: get block config from IterCode + # TODO: print timing information + iter_fun(self.d_seeds[b], np.uint64(d_info_off), + self.d_accum, self.d_den, + block=(32, 16, 1), grid=(len(block_times), 1), + stream=self.stream) + + # MAJOR TODO: for now, we kill almost all parallelism by forcing the + # stream here. Later, once we've decided on a density-buffer prefilter, + # we will move it to the GPU, allowing it to be embedded in the stream + # and letting the remaining code be asynchronous. + self.stream.synchronize() + dbuf_dim = (a.features.acc_height, a.features.acc_stride) + dbuf = cuda.from_device(self.d_den, dbuf_dim, np.float32) + dbuf = ndimage.filters.gaussian_filter(dbuf, 0.6) + cuda.memcpy_htod(self.d_den, dbuf) + + util.BaseCode.zero_dptr(a.mod, self.d_out, 4 * self.nbins, + self.stream) + self.stream.synchronize() + a._de.invoke(a.mod, self.d_accum, self.d_out, self.d_den, + self.stream) + self.stream.synchronize() + + + n = np.float32(self.ncps) + gam = np.float32(n / gam) + vib = np.float32(vib / n) + hipow = np.float32(hipow / n) + + # TODO: get block size from colorclip class? It actually does not + # depend on that being the case + color_fun = a.mod.get_function("colorclip") + color_fun(self.d_out, gam, vib, hipow, + block=(256, 1, 1), grid=(self.nbins / 256, 1), + stream=self.stream) + + def _interp_colors(self, cen_time, cen_cp): + # TODO: any visible difference between uint8 and richer formats? + pal = np.empty((self.PAL_HEIGHT, 256, 4), dtype=np.uint8) + a = self.anim + + if len(a.genomes) > 1: + # The typical case; applying real motion blur + cp = self._dst_cp + times = self._mk_dts(cen_time, cen_cp, self.PAL_HEIGHT) + for n, t in enumerate(times): + a._interp(t, cp) + for i, e in enumerate(cp.palette.entries): + pal[n][i] = np.uint8(np.array(e.color) * 255.0) + else: + # Cannot call any interp functions on a single genome; rather than + # have alternate code-paths, just copy the same colors everywhere + for i, e in enumerate(a.genomes[0].palette.entries): + # TODO: This triggers a RuntimeWarning + pal[0][i] = np.uint8(np.array(e.color) * 255.0) + pal[1:] = pal[0] + return pal + + def wait(self): + if self.pending: + self.stream.synchronize() + self.pending = False + return True + return False + + def get_result(self): + a = self.anim + g = a.features.gutter + obuf_dim = (a.features.acc_height, a.features.acc_stride, 4) + out = cuda.from_device(self.d_out, obuf_dim, np.float32) + # TODO: performance? + out = np.delete(out, np.s_[:16], axis=0) + out = np.delete(out, np.s_[:16], axis=1) + out = np.delete(out, np.s_[-16:], axis=0) + out = np.delete(out, np.s_[-16:], axis=1) + return out + + @staticmethod + def _mk_dts(cen_time, cen_cp, ncps): + w = cen_cp.temporal_filter_width + return [w * (t / (ncps - 1.0) - 0.5) for t in range(ncps)] class Features(object): """ @@ -93,7 +387,8 @@ class Features(object): palette_height = 16 # Maximum width of DE and other spatial filters, and thus in turn the - # amount of padding applied + # amount of padding applied. Note that, for now, this must not be changed! + # The filtering code makes deep assumptions about this value. gutter = 16 def __init__(self, genomes): @@ -116,11 +411,13 @@ class Features(object): else: self.final_xform_index = None + self.max_cps = max([cp.ntemporal_samples for cp in genomes]) + self.width = genomes[0].width self.height = genomes[0].height self.acc_width = genomes[0].width + 2 * self.gutter self.acc_height = genomes[0].height + 2 * self.gutter - self.acc_stride = genomes[0].width + 2 * self.gutter + self.acc_stride = 32 * int(math.ceil(self.acc_width / 32.)) class XFormFeatures(object): def __init__(self, xforms, xform_id): diff --git a/main.py b/main.py index bf1ce85..731d944 100644 --- a/main.py +++ b/main.py @@ -22,13 +22,10 @@ import scipy import pyglet import pycuda.autoinit -from fr0stlib.pyflam3 import * -from fr0stlib.pyflam3._flam3 import * - import cuburn._pyflam3_hacks +from fr0stlib import pyflam3 from cuburn.render import * from cuburn.code.mwc import MWCTest -from cuburn.code.iter import render, membench # Required on my system; CUDA doesn't yet work with GCC 4.5 os.environ['PATH'] = ('/usr/x86_64-pc-linux-gnu/gcc-bin/4.4.5:' @@ -37,24 +34,22 @@ os.environ['PATH'] = ('/usr/x86_64-pc-linux-gnu/gcc-bin/4.4.5:' def main(args): if '-t' in args: MWCTest.test_mwc() - membench() - with open(args[1]) as fp: - genomes = Genome.from_string(fp.read()) + genome_ptr, ngenomes = pyflam3.Genome.from_string(fp.read()) + genomes = cast(genome_ptr, POINTER(pyflam3.Genome*ngenomes)).contents anim = Animation(genomes) - accum, den = render(anim.features, genomes) - accum = np.delete(accum, np.s_[:16], axis=0) - accum = np.delete(accum, np.s_[:16], axis=1) - accum = np.delete(accum, np.s_[-16:], axis=0) - accum = np.delete(accum, np.s_[-16:], axis=1) + anim.compile() + anim.load() + for n, out in enumerate(anim.render_frames()): + noalpha = np.delete(out, 3, axis=2) + scipy.misc.imsave('rendered_%03d.png' % n, noalpha) + scipy.misc.imsave('rendered_%03d.jpg' % n, noalpha) - noalpha = np.delete(accum, 3, axis=2) - scipy.misc.imsave('rendered.png', noalpha) - scipy.misc.imsave('rendered.jpg', noalpha) + return - if '-g' not in args: - return + #if '-g' not in args: + # return window = pyglet.window.Window(anim.features.width, anim.features.height) imgbuf = (np.minimum(accum * 255, 255)).astype(np.uint8)