mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Lots-o-stuff.
This commit is contained in:
parent
1f7b00b61e
commit
086e4e4fb4
14
TODO
14
TODO
@ -2,17 +2,7 @@ Status: passes rudimentary tests
|
|||||||
|
|
||||||
Current goals:
|
Current goals:
|
||||||
|
|
||||||
- Draw some dang points!
|
|
||||||
- Allocate buffer (can it be pre-allocated?)
|
|
||||||
- Direct scatter linear points by GTID from flame number
|
|
||||||
- Re-enable preview window
|
|
||||||
- Execute frame, update texture, repeat
|
|
||||||
- Writeback of points to the buffer
|
|
||||||
- Define writeback class, args
|
|
||||||
- Do camera rotation across frameset
|
|
||||||
- Postpone other kinds of testing and address clamping for now
|
|
||||||
- Start xforms
|
- Start xforms
|
||||||
- At first, fixed Sierpinski triangle or something
|
|
||||||
- xform selection, pre- and post-transform in xform
|
- xform selection, pre- and post-transform in xform
|
||||||
- first of the variations
|
- first of the variations
|
||||||
|
|
||||||
@ -43,16 +33,12 @@ Things to do (rather severely incomplete):
|
|||||||
|
|
||||||
Things to test:
|
Things to test:
|
||||||
|
|
||||||
- DeviceStream allocator and proper handling of corner cases
|
|
||||||
- Debug flag/dict/whatever for entire project in general
|
- Debug flag/dict/whatever for entire project in general
|
||||||
- Iteration counters for IterThread
|
- Iteration counters for IterThread
|
||||||
|
|
||||||
Things to benchmark:
|
Things to benchmark:
|
||||||
|
|
||||||
- Kernel invocation and/or interrupt times (will high load freeze X?)
|
- Kernel invocation and/or interrupt times (will high load freeze X?)
|
||||||
- 1D/2D texture load+interpolation speeds vs constant memory loading
|
|
||||||
- Must test under high SFU load
|
|
||||||
- Tex uses separate cache? Has lower bandwidth penalty for gather?
|
|
||||||
- MWC float conversion
|
- MWC float conversion
|
||||||
- The entire scatter process
|
- The entire scatter process
|
||||||
- Radix sort of writeback coordinates
|
- Radix sort of writeback coordinates
|
||||||
|
@ -11,7 +11,7 @@ import numpy as np
|
|||||||
|
|
||||||
from cuburnlib.ptx import *
|
from cuburnlib.ptx import *
|
||||||
|
|
||||||
class IterThread(PTXTest):
|
class IterThread(PTXEntryPoint):
|
||||||
entry_name = 'iter_thread'
|
entry_name = 'iter_thread'
|
||||||
entry_params = []
|
entry_params = []
|
||||||
|
|
||||||
@ -19,7 +19,7 @@ class IterThread(PTXTest):
|
|||||||
self.cps_uploaded = False
|
self.cps_uploaded = False
|
||||||
|
|
||||||
def deps(self):
|
def deps(self):
|
||||||
return [MWCRNG, CPDataStream]
|
return [MWCRNG, CPDataStream, HistScatter]
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def module_setup(self):
|
def module_setup(self):
|
||||||
@ -61,18 +61,19 @@ class IterThread(PTXTest):
|
|||||||
reg.u32('cp_idx cpA')
|
reg.u32('cp_idx cpA')
|
||||||
with block("Claim a CP"):
|
with block("Claim a CP"):
|
||||||
std.set_is_first_thread(reg.pred('p_is_first'))
|
std.set_is_first_thread(reg.pred('p_is_first'))
|
||||||
op.atom.inc.u32(cp_idx, addr(g_num_cps_started), 1, ifp=p_is_first)
|
op.atom.add.u32(cp_idx, addr(g_num_cps_started), 1, ifp=p_is_first)
|
||||||
op.st.shared.u32(addr(s_cp_idx), cp_idx, ifp=p_is_first)
|
op.st.shared.u32(addr(s_cp_idx), cp_idx, ifp=p_is_first)
|
||||||
|
op.st.shared.u32(addr(s_num_samples), 0, ifp=p_is_first)
|
||||||
|
|
||||||
comment("Load the CP index in all threads")
|
comment("Load the CP index in all threads")
|
||||||
op.bar.sync(0)
|
op.bar.sync(1)
|
||||||
op.ld.shared.u32(cp_idx, addr(s_cp_idx))
|
op.ld.shared.u32(cp_idx, addr(s_cp_idx))
|
||||||
|
|
||||||
with block("Check to see if this CP is valid (if not, we're done"):
|
with block("Check to see if this CP is valid (if not, we're done)"):
|
||||||
reg.u32('num_cps')
|
reg.u32('num_cps')
|
||||||
reg.pred('p_last_cp')
|
reg.pred('p_last_cp')
|
||||||
op.ldu.u32(num_cps, addr(g_num_cps))
|
op.ldu.u32(num_cps, addr(g_num_cps))
|
||||||
op.setp.ge.u32(p_last_cp, cp_idx, 1)
|
op.setp.ge.u32(p_last_cp, cp_idx, num_cps)
|
||||||
op.bra.uni('all_cps_done', ifp=p_last_cp)
|
op.bra.uni('all_cps_done', ifp=p_last_cp)
|
||||||
|
|
||||||
with block('Load CP address'):
|
with block('Load CP address'):
|
||||||
@ -85,33 +86,37 @@ class IterThread(PTXTest):
|
|||||||
with block("If still fusing, increment count unconditionally"):
|
with block("If still fusing, increment count unconditionally"):
|
||||||
std.set_is_first_thread(reg.pred('p_is_first'))
|
std.set_is_first_thread(reg.pred('p_is_first'))
|
||||||
op.red.shared.add.s32(addr(s_num_samples), 1, ifp=p_is_first)
|
op.red.shared.add.s32(addr(s_num_samples), 1, ifp=p_is_first)
|
||||||
op.bar.sync(0)
|
op.bar.sync(2)
|
||||||
|
|
||||||
label('iter_loop_start')
|
label('iter_loop_start')
|
||||||
|
|
||||||
comment('Do... well, most of everything')
|
comment('Do... well, most of everything')
|
||||||
|
|
||||||
|
mwc.next_f32_11(x_coord)
|
||||||
|
mwc.next_f32_11(y_coord)
|
||||||
|
mwc.next_f32_01(color_coord)
|
||||||
|
|
||||||
op.add.u32(num_rounds, num_rounds, 1)
|
op.add.u32(num_rounds, num_rounds, 1)
|
||||||
|
|
||||||
with block("Test if we're still in FUSE"):
|
with block("Test if we're still in FUSE"):
|
||||||
reg.s32('num_samples')
|
reg.s32('num_samples')
|
||||||
reg.pred('p_in_fuse')
|
reg.pred('p_in_fuse')
|
||||||
op.ld.shared.u32(num_samples, addr(s_num_samples))
|
op.ld.shared.s32(num_samples, addr(s_num_samples))
|
||||||
op.setp.lt.s32(p_in_fuse, num_samples, 0)
|
op.setp.lt.s32(p_in_fuse, num_samples, 0)
|
||||||
op.bra.uni(fuse_loop_start, ifp=p_in_fuse)
|
op.bra.uni(fuse_loop_start, ifp=p_in_fuse)
|
||||||
|
|
||||||
with block("Ordinarily, we'd write the result here"):
|
reg.pred('p_point_is_valid')
|
||||||
op.add.u32(num_writes, num_writes, 1)
|
with block("Write the result"):
|
||||||
|
hist.scatter(x_coord, y_coord, color_coord, 0, p_point_is_valid)
|
||||||
# For testing, declare and clear p_badval
|
op.add.u32(num_writes, num_writes, 1, ifp=p_point_is_valid)
|
||||||
reg.pred('p_goodval')
|
|
||||||
op.setp.eq.u32(p_goodval, 1, 1)
|
|
||||||
|
|
||||||
with block("Increment number of samples by number of good values"):
|
with block("Increment number of samples by number of good values"):
|
||||||
reg.b32('good_samples')
|
reg.b32('good_samples laneid')
|
||||||
op.vote.ballot.b32(good_samples, p_goodval)
|
reg.pred('p_is_first')
|
||||||
|
op.vote.ballot.b32(good_samples, p_point_is_valid)
|
||||||
op.popc.b32(good_samples, good_samples)
|
op.popc.b32(good_samples, good_samples)
|
||||||
std.set_is_first_thread(reg.pred('p_is_first'))
|
op.mov.u32(laneid, '%laneid')
|
||||||
|
op.setp.eq.u32(p_is_first, laneid, 0)
|
||||||
op.red.shared.add.s32(addr(s_num_samples), good_samples,
|
op.red.shared.add.s32(addr(s_num_samples), good_samples,
|
||||||
ifp=p_is_first)
|
ifp=p_is_first)
|
||||||
|
|
||||||
@ -138,6 +143,9 @@ class IterThread(PTXTest):
|
|||||||
|
|
||||||
num_cps_dp, num_cps_l = ctx.mod.get_global('g_num_cps')
|
num_cps_dp, num_cps_l = ctx.mod.get_global('g_num_cps')
|
||||||
cuda.memset_d32(num_cps_dp, num_cps, 1)
|
cuda.memset_d32(num_cps_dp, num_cps, 1)
|
||||||
|
# TODO: "if debug >= 3"
|
||||||
|
print "Uploaded stream to card:"
|
||||||
|
CPDataStream.print_record(ctx, cp_stream, 5)
|
||||||
self.cps_uploaded = True
|
self.cps_uploaded = True
|
||||||
|
|
||||||
@instmethod
|
@instmethod
|
||||||
@ -148,14 +156,228 @@ class IterThread(PTXTest):
|
|||||||
cuda.memset_d32(num_cps_st_dp, 0, 1)
|
cuda.memset_d32(num_cps_st_dp, 0, 1)
|
||||||
|
|
||||||
func = ctx.mod.get_function('iter_thread')
|
func = ctx.mod.get_function('iter_thread')
|
||||||
dtime = func(block=ctx.block, grid=ctx.grid, time_kernel=True)
|
tr = ctx.ptx.instances[PaletteLookup].texref
|
||||||
|
dtime = func(block=ctx.block, grid=ctx.grid, time_kernel=True,
|
||||||
|
texrefs=[tr])
|
||||||
|
|
||||||
|
shape = (ctx.grid[0], ctx.block[0]/32, 32)
|
||||||
num_rounds_dp, num_rounds_l = ctx.mod.get_global('g_num_rounds')
|
num_rounds_dp, num_rounds_l = ctx.mod.get_global('g_num_rounds')
|
||||||
num_writes_dp, num_writes_l = ctx.mod.get_global('g_num_writes')
|
num_writes_dp, num_writes_l = ctx.mod.get_global('g_num_writes')
|
||||||
rounds = cuda.from_device(num_rounds_dp, ctx.threads, np.uint32)
|
rounds = cuda.from_device(num_rounds_dp, shape, np.int32)
|
||||||
writes = cuda.from_device(num_writes_dp, ctx.threads, np.uint32)
|
writes = cuda.from_device(num_writes_dp, shape, np.int32)
|
||||||
print "Rounds:", rounds
|
print "Rounds:", sum(rounds)
|
||||||
print "Writes:", writes
|
print "Writes:", sum(writes)
|
||||||
|
print rounds
|
||||||
|
print writes
|
||||||
|
|
||||||
|
class CameraTransform(PTXFragment):
|
||||||
|
shortname = 'camera'
|
||||||
|
def deps(self):
|
||||||
|
return [CPDataStream]
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def rotate(self, rotated_x, rotated_y, x, y):
|
||||||
|
"""
|
||||||
|
Rotate an IFS-space coordinate as defined by the camera.
|
||||||
|
"""
|
||||||
|
if features.camera_rotation:
|
||||||
|
assert rotated_x.name != x.name and rotated_y.name != y.name
|
||||||
|
with block("Rotate %s, %s to camera alignment" % (x, y)):
|
||||||
|
reg.f32('rot_center_x rot_center_y')
|
||||||
|
cp.get_v2(cpA, rot_center_x, 'cp.rot_center[0]',
|
||||||
|
rot_center_y, 'cp.rot_center[1]')
|
||||||
|
op.sub.f32(x, x, rot_center_x)
|
||||||
|
op.sub.f32(y, y, rot_center_y)
|
||||||
|
|
||||||
|
reg.f32('rot_sin_t rot_cos_t rot_old_x rot_old_y')
|
||||||
|
cp.get_v2(cpA, rot_cos_t, 'cos(cp.rotate * 2 * pi / 360.)',
|
||||||
|
rot_sin_t, '-sin(cp.rotate * 2 * pi / 360.)')
|
||||||
|
|
||||||
|
comment('rotated_x = x * cos(t) - y * sin(t) + rot_center_x')
|
||||||
|
op.fma.rn.f32(rotated_x, x, rot_cos_t, rot_center_x)
|
||||||
|
op.fma.rn.f32(rotated_x, y, rot_sin_t, rotated_x)
|
||||||
|
|
||||||
|
op.neg.f32(rot_sin_t, rot_sin_t)
|
||||||
|
comment('rotated_y = x * sin(t) + y * cos(t) + rot_center_y')
|
||||||
|
op.fma.rn.f32(rotated_y, x, rot_sin_t, rot_center_y)
|
||||||
|
op.fma.rn.f32(rotated_y, y, rot_cos_t, rotated_y)
|
||||||
|
|
||||||
|
# TODO: if this is a register-critical section, reloading
|
||||||
|
# rot_center_[xy] here should save two regs. OTOH, if this is
|
||||||
|
# *not* reg-crit, moving the subtraction above to new variables
|
||||||
|
# may save a few clocks
|
||||||
|
op.add.f32(x, x, rot_center_x)
|
||||||
|
op.add.f32(y, y, rot_center_y)
|
||||||
|
else:
|
||||||
|
comment("No camera rotation in this kernel")
|
||||||
|
op.mov.f32(rotated_x, x)
|
||||||
|
op.mov.f32(rotated_y, y)
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def get_norm(self, norm_x, norm_y, x, y):
|
||||||
|
"""
|
||||||
|
Find the [0,1]-normalized floating-point histogram coordinates
|
||||||
|
``norm_x, norm_y`` from the given IFS-space coordinates ``x, y``.
|
||||||
|
"""
|
||||||
|
self.rotate(norm_x, norm_y, x, y)
|
||||||
|
with block("Scale rotated points to [0,1]-normalized coordinates"):
|
||||||
|
reg.f32('cam_scale cam_offset')
|
||||||
|
cp.get_v2(cpA, cam_scale, 'cp.camera.norm_scale[0]',
|
||||||
|
cam_offset, 'cp.camera.norm_offset[0]')
|
||||||
|
op.fma.f32(norm_x, norm_x, cam_scale, cam_offset)
|
||||||
|
cp.get_v2(cpA, cam_scale, 'cp.camera.norm_scale[1]',
|
||||||
|
cam_offset, 'cp.camera.norm_offset[1]')
|
||||||
|
op.fma.f32(norm_y, norm_y, cam_scale, cam_offset)
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def get_index(self, index, x, y, pred=None):
|
||||||
|
"""
|
||||||
|
Find the histogram index (as a u32) from the IFS spatial coordinate in
|
||||||
|
``x, y``.
|
||||||
|
|
||||||
|
If the coordinates are out of bounds, 0xffffffff will be stored to
|
||||||
|
``index``. If ``pred`` is given, it will be set if the point is valid,
|
||||||
|
and cleared if not.
|
||||||
|
"""
|
||||||
|
# A few instructions could probably be shaved off of this one
|
||||||
|
with block("Find histogram index"):
|
||||||
|
reg.f32('norm_x norm_y')
|
||||||
|
self.rotate(norm_x, norm_y, x, y)
|
||||||
|
comment('Scale and offset from IFS to index coordinates')
|
||||||
|
reg.f32('cam_scale cam_offset')
|
||||||
|
cp.get_v2(cpA, cam_scale, 'cp.camera.idx_scale[0]',
|
||||||
|
cam_offset, 'cp.camera.idx_offset[0]')
|
||||||
|
op.fma.rn.f32(norm_x, norm_x, cam_scale, cam_offset)
|
||||||
|
|
||||||
|
cp.get_v2(cpA, cam_scale, 'cp.camera.idx_scale[1]',
|
||||||
|
cam_offset, 'cp.camera.idx_offset[1]')
|
||||||
|
op.fma.rn.f32(norm_y, norm_y, cam_scale, cam_offset)
|
||||||
|
|
||||||
|
comment('Check for bad value')
|
||||||
|
reg.u32('index_x index_y')
|
||||||
|
if not pred:
|
||||||
|
pred = reg.pred('p_valid')
|
||||||
|
|
||||||
|
op.cvt.rzi.s32.f32(index_x, norm_x)
|
||||||
|
op.setp.ge.s32(pred, index_x, 0)
|
||||||
|
op.setp.lt.and_.s32(pred, index_x, features.hist_width, pred)
|
||||||
|
|
||||||
|
op.cvt.rzi.s32.f32(index_y, norm_y)
|
||||||
|
op.setp.ge.and_.s32(pred, index_y, 0, pred)
|
||||||
|
op.setp.lt.and_.s32(pred, index_y, features.hist_height, pred)
|
||||||
|
|
||||||
|
op.mad.lo.u32(index, index_y, features.hist_stride, index_x)
|
||||||
|
op.mov.u32(index, 0xffffffff, ifnotp=pred)
|
||||||
|
|
||||||
|
class PaletteLookup(PTXFragment):
|
||||||
|
shortname = "palette"
|
||||||
|
# Resolution of texture on device. Bigger = more palette rez, maybe slower
|
||||||
|
texheight = 16
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.texref = None
|
||||||
|
|
||||||
|
def deps(self):
|
||||||
|
return [CPDataStream]
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def module_setup(self):
|
||||||
|
mem.global_.texref('t_palette')
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def look_up(self, r, g, b, a, color, norm_time):
|
||||||
|
"""
|
||||||
|
Look up the values of ``r, g, b, a`` corresponding to ``color_coord``
|
||||||
|
at the CP indexed in ``timestamp_idx``. Note that both ``color_coord``
|
||||||
|
and ``timestamp_idx`` should be [0,1]-normalized floats.
|
||||||
|
"""
|
||||||
|
op.tex._2d.v4.f32.f32(vec(r, g, b, a),
|
||||||
|
addr([t_palette, ', ', vec(norm_time, color)]))
|
||||||
|
if features.non_box_temporal_filter:
|
||||||
|
raise NotImplementedError("Non-box temporal filters not supported")
|
||||||
|
|
||||||
|
@instmethod
|
||||||
|
def upload_palette(self, ctx, frame, cp_list):
|
||||||
|
"""
|
||||||
|
Extract the palette from the given list of interpolated CPs, and upload
|
||||||
|
it to the device as a texture.
|
||||||
|
"""
|
||||||
|
# TODO: figure out if storing the full list is an actual drag on
|
||||||
|
# performance/memory
|
||||||
|
if frame.center_cp.temporal_filter_type != 0:
|
||||||
|
# TODO: make texture sample based on time, not on CP index
|
||||||
|
raise NotImplementedError("Use box temporal filters for now")
|
||||||
|
pal = np.ndarray((self.texheight, 256, 4), dtype=np.float32)
|
||||||
|
inv = float(len(cp_list) - 1) / (self.texheight - 1)
|
||||||
|
for y in range(self.texheight):
|
||||||
|
for x in range(256):
|
||||||
|
for c in range(4):
|
||||||
|
# TODO: interpolate here?
|
||||||
|
cy = int(round(y * inv))
|
||||||
|
pal[y][x][c] = cp_list[cy].palette.entries[x].color[c]
|
||||||
|
dev_array = cuda.make_multichannel_2d_array(pal, "C")
|
||||||
|
self.texref = ctx.mod.get_texref('t_palette')
|
||||||
|
# TODO: float16? or can we still use interp with int storage?
|
||||||
|
self.texref.set_format(cuda.array_format.FLOAT, 4)
|
||||||
|
self.texref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES)
|
||||||
|
self.texref.set_filter_mode(cuda.filter_mode.LINEAR)
|
||||||
|
self.texref.set_address_mode(0, cuda.address_mode.CLAMP)
|
||||||
|
self.texref.set_address_mode(1, cuda.address_mode.CLAMP)
|
||||||
|
self.texref.set_array(dev_array)
|
||||||
|
|
||||||
|
def device_init(self, ctx):
|
||||||
|
assert self.texref, "Must upload palette texture before launch!"
|
||||||
|
|
||||||
|
class HistScatter(PTXFragment):
|
||||||
|
shortname = "hist"
|
||||||
|
def deps(self):
|
||||||
|
return [CPDataStream, CameraTransform, PaletteLookup]
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def module_setup(self):
|
||||||
|
mem.global_.f32('g_hist_bins',
|
||||||
|
features.hist_height * features.hist_stride * 4)
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def entry_setup(self):
|
||||||
|
comment("For now, assume histogram bins have been cleared by host")
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def scatter(self, x, y, color, xf_idx, p_valid=None):
|
||||||
|
"""
|
||||||
|
Scatter the given point directly to the histogram bins. I think this
|
||||||
|
technique has the worst performance of all of 'em. Accesses ``cpA``
|
||||||
|
directly.
|
||||||
|
"""
|
||||||
|
with block("Scatter directly to buffer"):
|
||||||
|
if p_valid is None:
|
||||||
|
p_valid = reg.pred('p_valid')
|
||||||
|
reg.u32('hist_index')
|
||||||
|
camera.get_index(hist_index, x, y, p_valid)
|
||||||
|
reg.u32('hist_bin_addr')
|
||||||
|
op.mov.u32(hist_bin_addr, g_hist_bins)
|
||||||
|
op.mad.lo.u32(hist_bin_addr, hist_index, 16, hist_bin_addr)
|
||||||
|
|
||||||
|
reg.f32('r g b a norm_time')
|
||||||
|
cp.get(cpA, norm_time, 'cp.norm_time')
|
||||||
|
palette.look_up(r, g, b, a, color, norm_time)
|
||||||
|
# TODO: look up, scale by xform visibility
|
||||||
|
op.red.add.f32(addr(hist_bin_addr), r)
|
||||||
|
op.red.add.f32(addr(hist_bin_addr,4), g)
|
||||||
|
op.red.add.f32(addr(hist_bin_addr,8), b)
|
||||||
|
op.red.add.f32(addr(hist_bin_addr,12), a)
|
||||||
|
|
||||||
|
|
||||||
|
def device_init(self, ctx):
|
||||||
|
hist_bins_dp, hist_bins_l = ctx.mod.get_global('g_hist_bins')
|
||||||
|
cuda.memset_d32(hist_bins_dp, 0, hist_bins_l/4)
|
||||||
|
|
||||||
|
@instmethod
|
||||||
|
def get_bins(self, ctx, features):
|
||||||
|
hist_bins_dp, hist_bins_l = ctx.mod.get_global('g_hist_bins')
|
||||||
|
return cuda.from_device(hist_bins_dp,
|
||||||
|
(features.hist_height, features.hist_stride, 4),
|
||||||
|
dtype=np.float32)
|
||||||
|
|
||||||
class MWCRNG(PTXFragment):
|
class MWCRNG(PTXFragment):
|
||||||
shortname = "mwc"
|
shortname = "mwc"
|
||||||
@ -218,14 +440,15 @@ class MWCRNG(PTXFragment):
|
|||||||
with block('Load random float [0,1] into ' + dst_reg.name):
|
with block('Load random float [0,1] into ' + dst_reg.name):
|
||||||
self._next()
|
self._next()
|
||||||
op.cvt.rn.f32.u32(dst_reg, mwc_st)
|
op.cvt.rn.f32.u32(dst_reg, mwc_st)
|
||||||
op.mul.f32(dst_reg, dst_reg, '0f0000802F') # 1./(1<<32)
|
op.mul.f32(dst_reg, dst_reg, '0f2F800000') # 1./(1<<32)
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def next_f32_11(self, dst_reg):
|
def next_f32_11(self, dst_reg):
|
||||||
with block('Load random float [-1,1) into ' + dst_reg.name):
|
with block('Load random float [-1,1) into ' + dst_reg.name):
|
||||||
|
reg.u32('mwc_to_float')
|
||||||
self._next()
|
self._next()
|
||||||
op.cvt.rn.f32.s32(dst_reg, mwc_st)
|
op.cvt.rn.f32.s32(dst_reg, mwc_st)
|
||||||
op.mul.f32(dst_reg, dst_reg, '0f00000030') # 1./(1<<31)
|
op.mul.f32(dst_reg, dst_reg, '0f30000000') # 1./(1<<31)
|
||||||
|
|
||||||
def device_init(self, ctx):
|
def device_init(self, ctx):
|
||||||
if self.threads_ready >= ctx.threads:
|
if self.threads_ready >= ctx.threads:
|
||||||
|
@ -14,6 +14,7 @@ import types
|
|||||||
import struct
|
import struct
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
from math import *
|
||||||
|
|
||||||
# Okay, so here's what's going on.
|
# Okay, so here's what's going on.
|
||||||
#
|
#
|
||||||
@ -137,7 +138,7 @@ class _Block(object):
|
|||||||
self.stack = [self.outer_ctx]
|
self.stack = [self.outer_ctx]
|
||||||
def clean_injectors(self):
|
def clean_injectors(self):
|
||||||
inj = self.stack[-1].injectors
|
inj = self.stack[-1].injectors
|
||||||
[inj.remove(i) for i in inj if i.dead]
|
[inj.remove(i) for i in list(inj) if i.dead]
|
||||||
def push_ctx(self):
|
def push_ctx(self):
|
||||||
self.clean_injectors()
|
self.clean_injectors()
|
||||||
self.stack.append(BlockCtx(dict(self.stack[-1].locals), [], []))
|
self.stack.append(BlockCtx(dict(self.stack[-1].locals), [], []))
|
||||||
@ -155,8 +156,6 @@ class _Block(object):
|
|||||||
def pop_ctx(self):
|
def pop_ctx(self):
|
||||||
self.clean_injectors()
|
self.clean_injectors()
|
||||||
bs = self.stack.pop()
|
bs = self.stack.pop()
|
||||||
# TODO: figure out why this next line is needed
|
|
||||||
[bs.injectors.remove(i) for i in bs.injectors if i.dead]
|
|
||||||
self.stack[-1].code.extend(bs.code)
|
self.stack[-1].code.extend(bs.code)
|
||||||
if len(self.stack) == 1:
|
if len(self.stack) == 1:
|
||||||
# We're on outer_ctx, so all injectors should be gone.
|
# We're on outer_ctx, so all injectors should be gone.
|
||||||
@ -337,8 +336,8 @@ class _CallChain(object):
|
|||||||
self.__chain = []
|
self.__chain = []
|
||||||
return r
|
return r
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
if name.endswith('_'):
|
# Work around keword conflicts between python and ptx
|
||||||
name = name[:-1]
|
name = name.strip('_')
|
||||||
self.__chain.append(name)
|
self.__chain.append(name)
|
||||||
# Another great crime against the universe:
|
# Another great crime against the universe:
|
||||||
return self
|
return self
|
||||||
@ -455,20 +454,30 @@ class Mem(object):
|
|||||||
|
|
||||||
class _MemFactory(_CallChain):
|
class _MemFactory(_CallChain):
|
||||||
"""Actual `mem` object"""
|
"""Actual `mem` object"""
|
||||||
def _call(self, type, name, array=False, initializer=None):
|
def _call(self, type, name, array=False, init=None):
|
||||||
assert len(type) == 2
|
assert len(type) == 2
|
||||||
memobj = Mem(type, name, array, initializer)
|
memobj = Mem(type, name, array, init)
|
||||||
if array is True:
|
if array is True:
|
||||||
array = ['[]']
|
array = ['[]']
|
||||||
elif array:
|
elif array:
|
||||||
array = ['[', array, ']']
|
array = ['[', array, ']']
|
||||||
else:
|
else:
|
||||||
array = []
|
array = []
|
||||||
if initializer:
|
if init:
|
||||||
array += [' = ', initializer]
|
array += [' = ', init]
|
||||||
self.block.code(op=['.%s.%s ' % (type[0], type[1]), name, array])
|
self.block.code(op=['.%s.%s ' % (type[0], type[1]), name, array])
|
||||||
self.block.inject(name, memobj)
|
self.block.inject(name, memobj)
|
||||||
|
|
||||||
|
# TODO: move vec, addr here, or make this public
|
||||||
|
@staticmethod
|
||||||
|
def initializer(*args, **kwargs):
|
||||||
|
if args and kwargs:
|
||||||
|
raise ValueError("Cannot initialize in both list and struct style")
|
||||||
|
if args:
|
||||||
|
return ['{', _softjoin(args, ','), '}']
|
||||||
|
jkws = _softjoin([[k, ' = ', v] for k, v in kwargs.items()], ', ')
|
||||||
|
return ['{', jkws, '}']
|
||||||
|
|
||||||
class Label(object):
|
class Label(object):
|
||||||
"""
|
"""
|
||||||
Specifies the target for a branch.
|
Specifies the target for a branch.
|
||||||
@ -586,7 +595,7 @@ def instmethod(func):
|
|||||||
"""
|
"""
|
||||||
def wrap(cls, ctx, *args, **kwargs):
|
def wrap(cls, ctx, *args, **kwargs):
|
||||||
inst = ctx.ptx.instances[cls]
|
inst = ctx.ptx.instances[cls]
|
||||||
func(inst, ctx, *args, **kwargs)
|
return func(inst, ctx, *args, **kwargs)
|
||||||
return classmethod(wrap)
|
return classmethod(wrap)
|
||||||
|
|
||||||
class PTXEntryPoint(PTXFragment):
|
class PTXEntryPoint(PTXFragment):
|
||||||
@ -979,23 +988,22 @@ class DataStream(PTXFragment):
|
|||||||
assert self.cells[idx].texp is None
|
assert self.cells[idx].texp is None
|
||||||
offset = self.cells[idx].offset
|
offset = self.cells[idx].offset
|
||||||
self.cells[idx] = _DataCell(offset, vsize, texp)
|
self.cells[idx] = _DataCell(offset, vsize, texp)
|
||||||
|
self.free.pop(alloc)
|
||||||
# Now reinsert the fragmented free cells.
|
# Now reinsert the fragmented free cells.
|
||||||
fragments = alloc - vsize
|
fragments = alloc - vsize
|
||||||
foffset = offset + vsize
|
foffset = offset + vsize
|
||||||
fsize = 1
|
fsize = 1
|
||||||
fidx = idx
|
fidx = idx
|
||||||
while fsize <= self.alignment:
|
while fsize < self.alignment:
|
||||||
if fragments & fsize:
|
if fragments & fsize:
|
||||||
assert fsize not in self.free
|
assert fsize not in self.free
|
||||||
fidx += 1
|
fidx += 1
|
||||||
self.cells.insert(fidx, _DataCell(foffset, fsize, None))
|
self.cells.insert(fidx, _DataCell(foffset, fsize, None))
|
||||||
foffset += fsize
|
foffset += fsize
|
||||||
|
for k, v in filter(lambda (k, v): v >= fidx, self.free.items()):
|
||||||
|
self.free[k] = v+1
|
||||||
self.free[fsize] = fidx
|
self.free[fsize] = fidx
|
||||||
fsize *= 2
|
fsize *= 2
|
||||||
# Adjust indexes. This is ugly, but evidently unavoidable
|
|
||||||
if fidx-idx:
|
|
||||||
for k, v in filter(lambda (k, v): v > idx, self.free.items()):
|
|
||||||
self.free[k] = v+(fidx-idx)
|
|
||||||
return offset
|
return offset
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
@ -1011,7 +1019,7 @@ class DataStream(PTXFragment):
|
|||||||
opname = ['ldu', 'b%d' % (size*8)]
|
opname = ['ldu', 'b%d' % (size*8)]
|
||||||
if len(dregs) > 1:
|
if len(dregs) > 1:
|
||||||
opname.insert(1, 'v%d' % len(dregs))
|
opname.insert(1, 'v%d' % len(dregs))
|
||||||
dregs = vec(dregs)
|
dregs = vec(*dregs)
|
||||||
op._call(opname, dregs, addr(areg, offset), ifp=ifp, ifnotp=ifnotp)
|
op._call(opname, dregs, addr(areg, offset), ifp=ifp, ifnotp=ifnotp)
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
@ -1042,6 +1050,8 @@ class DataStream(PTXFragment):
|
|||||||
self.finalized = True
|
self.finalized = True
|
||||||
for dv in self.size_delayvars:
|
for dv in self.size_delayvars:
|
||||||
dv.val = self._size
|
dv.val = self._size
|
||||||
|
print "Finalized stream:"
|
||||||
|
self._print_format()
|
||||||
|
|
||||||
@instmethod
|
@instmethod
|
||||||
def pack(self, ctx, _out_file_ = None, **kwargs):
|
def pack(self, ctx, _out_file_ = None, **kwargs):
|
||||||
@ -1087,8 +1097,7 @@ class DataStream(PTXFragment):
|
|||||||
vals = []
|
vals = []
|
||||||
outfile.write(struct.pack(type, *vals))
|
outfile.write(struct.pack(type, *vals))
|
||||||
|
|
||||||
@instmethod
|
def _print_format(self, ctx=None, stream=None):
|
||||||
def print_record(self, ctx):
|
|
||||||
for cell in self.cells:
|
for cell in self.cells:
|
||||||
if cell.texp is None:
|
if cell.texp is None:
|
||||||
print '%3d %2d --' % (cell.offset, cell.size)
|
print '%3d %2d --' % (cell.offset, cell.size)
|
||||||
@ -1096,5 +1105,24 @@ class DataStream(PTXFragment):
|
|||||||
print '%3d %2d %4s %s' % (cell.offset, cell.size, cell.texp.type,
|
print '%3d %2d %4s %s' % (cell.offset, cell.size, cell.texp.type,
|
||||||
cell.texp.exprlist[0])
|
cell.texp.exprlist[0])
|
||||||
for exp in cell.texp.exprlist[1:]:
|
for exp in cell.texp.exprlist[1:]:
|
||||||
print '%12s %s' % ('', exp)
|
print '%11s %s' % ('', exp)
|
||||||
|
print_format = instmethod(_print_format)
|
||||||
|
|
||||||
|
@instmethod
|
||||||
|
def print_record(self, ctx, stream, limit=None):
|
||||||
|
for i in range(0, len(stream), self._size):
|
||||||
|
for cell in self.cells:
|
||||||
|
if cell.texp is None:
|
||||||
|
print '%3d %2d --' % (cell.offset, cell.size)
|
||||||
|
continue
|
||||||
|
print '%3d %2d %4s %s' % (cell.offset, cell.size,
|
||||||
|
cell.texp.type,
|
||||||
|
struct.unpack(cell.texp.type,
|
||||||
|
stream[cell.offset:cell.offset+cell.size]))
|
||||||
|
for exp in cell.texp.exprlist:
|
||||||
|
print '%11s %s' % ('', exp)
|
||||||
|
print '\n----\n'
|
||||||
|
if limit is not None:
|
||||||
|
limit -= 1
|
||||||
|
if limit <= 0: break
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import math
|
||||||
from ctypes import *
|
from ctypes import *
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -7,53 +8,80 @@ from fr0stlib.pyflam3._flam3 import *
|
|||||||
from fr0stlib.pyflam3.constants import *
|
from fr0stlib.pyflam3.constants import *
|
||||||
|
|
||||||
from cuburnlib.cuda import LaunchContext
|
from cuburnlib.cuda import LaunchContext
|
||||||
from cuburnlib.device_code import IterThread, CPDataStream
|
from cuburnlib.device_code import *
|
||||||
|
|
||||||
Point = lambda x, y: np.array([x, y], dtype=np.double)
|
Point = lambda x, y: np.array([x, y], dtype=np.double)
|
||||||
|
|
||||||
class Genome(pyflam3.Genome):
|
class Genome(pyflam3.Genome):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class Frame(pyflam3.Frame):
|
class _Frame(pyflam3.Frame):
|
||||||
def interpolate(self, time, cp):
|
"""
|
||||||
flam3_interpolate(self.genomes, self.ngenomes, time, 0, byref(cp))
|
ctypes flam3_frame object used for genome interpolation and
|
||||||
|
spatial filter creation
|
||||||
|
"""
|
||||||
|
def __init__(self, genomes, *args, **kwargs):
|
||||||
|
pyflam3.Frame.__init__(self, *args, **kwargs)
|
||||||
|
self.genomes = (BaseGenome * len(genomes))()
|
||||||
|
for i in range(len(genomes)):
|
||||||
|
memmove(byref(self.genomes[i]), byref(genomes[i]),
|
||||||
|
sizeof(BaseGenome))
|
||||||
|
self.ngenomes = len(genomes)
|
||||||
|
|
||||||
def pack_stream(self, ctx, time):
|
# TODO: do this here?
|
||||||
"""
|
self.pixel_aspect_ratio = float(genomes[0].height) / genomes[0].width
|
||||||
Pack and return the control point data stream to render this frame.
|
|
||||||
"""
|
|
||||||
# Get the central control point, and calculate parameters that change
|
|
||||||
# once per frame
|
|
||||||
cp = BaseGenome()
|
|
||||||
self.interpolate(time, cp)
|
|
||||||
self.filt = Filters(self, cp)
|
|
||||||
rw = cp.spatial_oversample * cp.width + 2 * self.filt.gutter
|
|
||||||
rh = cp.spatial_oversample * cp.height + 2 * self.filt.gutter
|
|
||||||
|
|
||||||
if cp.nbatches * cp.ntemporal_samples < ctx.ctas:
|
def interpolate(self, time, stagger=0, cp=None):
|
||||||
|
cp = cp or BaseGenome()
|
||||||
|
flam3_interpolate(self.genomes, self.ngenomes, time,
|
||||||
|
stagger, byref(cp))
|
||||||
|
return cp
|
||||||
|
|
||||||
|
class Frame(object):
|
||||||
|
"""
|
||||||
|
Handler for a single frame of a rendered genome.
|
||||||
|
"""
|
||||||
|
def __init__(self, _frame, time):
|
||||||
|
self._frame = _frame
|
||||||
|
self.center_cp = self._frame.interpolate(time)
|
||||||
|
|
||||||
|
def upload_data(self, ctx, filters, time):
|
||||||
|
"""
|
||||||
|
Prepare and upload the data needed to render this frame to the device.
|
||||||
|
"""
|
||||||
|
center = self.center_cp
|
||||||
|
ncps = center.nbatches * center.ntemporal_samples
|
||||||
|
|
||||||
|
if ncps < ctx.ctas:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"Distribution of a CP across multiple CTAs not yet done")
|
"Distribution of a CP across multiple CTAs not yet done")
|
||||||
# Interpolate each time step, calculate per-step variables, and pack
|
|
||||||
# into the stream
|
# TODO: isn't this leaking ctypes xforms all over the place?
|
||||||
stream = StringIO()
|
stream = StringIO()
|
||||||
print "Data stream contents:"
|
cp_list = []
|
||||||
CPDataStream.print_record(ctx)
|
|
||||||
tcp = BaseGenome()
|
|
||||||
for batch_idx in range(cp.nbatches):
|
|
||||||
for time_idx in range(cp.ntemporal_samples):
|
|
||||||
idx = time_idx + batch_idx * cp.nbatches
|
|
||||||
cp_time = time + self.filt.temporal_deltas[idx]
|
|
||||||
self.interpolate(time, tcp)
|
|
||||||
tcp.camera = Camera(self, tcp, self.filt)
|
|
||||||
|
|
||||||
tcp.nsamples = (tcp.camera.sample_density *
|
for batch_idx in range(center.nbatches):
|
||||||
cp.width * cp.height) / (
|
for time_idx in range(center.ntemporal_samples):
|
||||||
cp.nbatches * cp.ntemporal_samples)
|
idx = time_idx + batch_idx * center.nbatches
|
||||||
|
time = time + filters.temporal_deltas[idx]
|
||||||
|
cp = self._frame.interpolate(time)
|
||||||
|
cp_list.append(cp)
|
||||||
|
|
||||||
CPDataStream.pack_into(ctx, stream,
|
cp.camera = Camera(self._frame, cp, filters)
|
||||||
frame=self, cp=tcp, cp_idx=idx)
|
cp.nsamples = (cp.camera.sample_density *
|
||||||
|
center.width * center.height) / ncps
|
||||||
|
|
||||||
|
print "Expected writes:", (
|
||||||
|
cp.camera.sample_density * center.width * center.height)
|
||||||
|
min_time = min(filters.temporal_deltas)
|
||||||
|
max_time = max(filters.temporal_deltas)
|
||||||
|
for i, cp in enumerate(cp_list):
|
||||||
|
cp.norm_time = (filters.temporal_deltas[i] - min_time) / (
|
||||||
|
max_time - min_time)
|
||||||
|
CPDataStream.pack_into(ctx, stream, frame=self, cp=cp, cp_idx=idx)
|
||||||
|
PaletteLookup.upload_palette(ctx, self, cp_list)
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
return (stream.read(), cp.nbatches * cp.ntemporal_samples)
|
IterThread.upload_cp_stream(ctx, stream.read(), ncps)
|
||||||
|
|
||||||
class Animation(object):
|
class Animation(object):
|
||||||
"""
|
"""
|
||||||
@ -74,15 +102,12 @@ class Animation(object):
|
|||||||
interpolated sequence between one or two genomes.
|
interpolated sequence between one or two genomes.
|
||||||
"""
|
"""
|
||||||
def __init__(self, genomes):
|
def __init__(self, genomes):
|
||||||
self.genomes = (Genome * len(genomes))()
|
# _frame is the ctypes frame object used only for interpolation
|
||||||
for i in range(len(genomes)):
|
self._frame = _Frame(genomes)
|
||||||
memmove(byref(self.genomes[i]), byref(genomes[i]),
|
|
||||||
sizeof(BaseGenome))
|
|
||||||
|
|
||||||
self.features = Features(genomes)
|
# Use the same set of filters throughout the anim, a la flam3
|
||||||
self.frame = Frame()
|
self.filters = Filters(self._frame, genomes[0])
|
||||||
self.frame.genomes = cast(self.genomes, POINTER(BaseGenome))
|
self.features = Features(genomes, self.filters)
|
||||||
self.frame.ngenomes = len(genomes)
|
|
||||||
|
|
||||||
self.ctx = None
|
self.ctx = None
|
||||||
|
|
||||||
@ -103,25 +128,17 @@ class Animation(object):
|
|||||||
# TODO: support more nuanced frame control than just 'time'
|
# TODO: support more nuanced frame control than just 'time'
|
||||||
# TODO: reuse more information between frames
|
# TODO: reuse more information between frames
|
||||||
# TODO: allow animation-long override of certain parameters (size, etc)
|
# TODO: allow animation-long override of certain parameters (size, etc)
|
||||||
cp_stream, num_cps = self.frame.pack_stream(self.ctx, time)
|
frame = Frame(self._frame, time)
|
||||||
iter_thread = self.ctx.ptx.instances[IterThread]
|
frame.upload_data(self.ctx, self.filters, time)
|
||||||
IterThread.upload_cp_stream(self.ctx, cp_stream, num_cps)
|
self.ctx.set_up()
|
||||||
IterThread.call(self.ctx)
|
IterThread.call(self.ctx)
|
||||||
|
return HistScatter.get_bins(self.ctx, self.features)
|
||||||
class Features(object):
|
|
||||||
"""
|
|
||||||
Determine features and constants required to render a particular set of
|
|
||||||
genomes. The values of this class are fixed before compilation begins.
|
|
||||||
"""
|
|
||||||
# Constant; number of rounds spent fusing points on first CP of a frame
|
|
||||||
num_fuse_samples = 25
|
|
||||||
|
|
||||||
def __init__(self, genomes):
|
|
||||||
self.max_ntemporal_samples = max(
|
|
||||||
[cp.nbatches * cp.ntemporal_samples for cp in genomes]) + 1
|
|
||||||
|
|
||||||
class Filters(object):
|
class Filters(object):
|
||||||
def __init__(self, frame, cp):
|
def __init__(self, frame, cp):
|
||||||
|
# Use one oversample per filter set, even over multiple timesteps
|
||||||
|
self.oversample = frame.genomes[0].spatial_oversample
|
||||||
|
|
||||||
# Ugh. I'd really like to replace this mess
|
# Ugh. I'd really like to replace this mess
|
||||||
spa_filt_ptr = POINTER(c_double)()
|
spa_filt_ptr = POINTER(c_double)()
|
||||||
spa_width = flam3_create_spatial_filter(byref(frame),
|
spa_width = flam3_create_spatial_filter(byref(frame),
|
||||||
@ -152,7 +169,32 @@ class Filters(object):
|
|||||||
flam3_free(tmp_deltas_ptr)
|
flam3_free(tmp_deltas_ptr)
|
||||||
|
|
||||||
# TODO: density estimation
|
# TODO: density estimation
|
||||||
self.gutter = (spa_width - cp.spatial_oversample) / 2
|
self.gutter = (spa_width - self.oversample) / 2
|
||||||
|
|
||||||
|
class Features(object):
|
||||||
|
"""
|
||||||
|
Determine features and constants required to render a particular set of
|
||||||
|
genomes. The values of this class are fixed before compilation begins.
|
||||||
|
"""
|
||||||
|
# Constant; number of rounds spent fusing points on first CP of a frame
|
||||||
|
num_fuse_samples = 25
|
||||||
|
|
||||||
|
def __init__(self, genomes, flt):
|
||||||
|
any = lambda l: bool(filter(None, map(l, genomes)))
|
||||||
|
self.max_ntemporal_samples = max(
|
||||||
|
[cp.nbatches * cp.ntemporal_samples for cp in genomes])
|
||||||
|
self.camera_rotation = any(lambda cp: cp.rotate)
|
||||||
|
self.non_box_temporal_filter = genomes[0].temporal_filter_type
|
||||||
|
self.palette_mode = genomes[0].palette_mode and "linear" or "nearest"
|
||||||
|
|
||||||
|
# Histogram (and log-density copy) width and height
|
||||||
|
self.hist_width = flt.oversample * genomes[0].width + 2 * flt.gutter
|
||||||
|
self.hist_height = flt.oversample * genomes[0].height + 2 * flt.gutter
|
||||||
|
# Histogram stride, for better filtering. This code assumes the
|
||||||
|
# 128-byte L1 cache line width of Fermi devices, and a 16-byte
|
||||||
|
# histogram bucket size. TODO: detect these things programmatically,
|
||||||
|
# particularly the histogram bucket size, which may be split soon
|
||||||
|
self.hist_stride = 8 * int(math.ceil(self.hist_width / 8.0))
|
||||||
|
|
||||||
class Camera(object):
|
class Camera(object):
|
||||||
"""Viewport and exposure."""
|
"""Viewport and exposure."""
|
||||||
@ -165,6 +207,7 @@ class Camera(object):
|
|||||||
|
|
||||||
center = Point(cp._center[0], cp._center[1])
|
center = Point(cp._center[0], cp._center[1])
|
||||||
size = Point(cp.width, cp.height)
|
size = Point(cp.width, cp.height)
|
||||||
|
|
||||||
# pix per unit, where 'unit' is '1.0' in IFS space
|
# pix per unit, where 'unit' is '1.0' in IFS space
|
||||||
self.ppu = Point(
|
self.ppu = Point(
|
||||||
cp.pixels_per_unit * scale / frame.pixel_aspect_ratio,
|
cp.pixels_per_unit * scale / frame.pixel_aspect_ratio,
|
||||||
@ -174,6 +217,8 @@ class Camera(object):
|
|||||||
cornerLL = center - (size / (2 * self.ppu))
|
cornerLL = center - (size / (2 * self.ppu))
|
||||||
self.lower_bounds = cornerLL - gutter
|
self.lower_bounds = cornerLL - gutter
|
||||||
self.upper_bounds = cornerLL + (size / self.ppu) + gutter
|
self.upper_bounds = cornerLL + (size / self.ppu) + gutter
|
||||||
self.ifs_space_size = 1.0 / (self.upper_bounds - self.lower_bounds)
|
self.norm_scale = 1.0 / (self.upper_bounds - self.lower_bounds)
|
||||||
# TODO: coordinate transforms in concert with GPU (rotation, size)
|
self.norm_offset = -self.norm_scale * self.lower_bounds
|
||||||
|
self.idx_scale = size * self.norm_scale
|
||||||
|
self.idx_offset = size * self.norm_offset
|
||||||
|
|
||||||
|
49
main.py
49
main.py
@ -21,6 +21,14 @@ from fr0stlib.pyflam3 import *
|
|||||||
from fr0stlib.pyflam3._flam3 import *
|
from fr0stlib.pyflam3._flam3 import *
|
||||||
from cuburnlib.render import *
|
from cuburnlib.render import *
|
||||||
|
|
||||||
|
import pyglet
|
||||||
|
|
||||||
|
def dump_3d(nda):
|
||||||
|
with open('/tmp/data.txt', 'w') as f:
|
||||||
|
for row in nda:
|
||||||
|
f.write(' | '.join([' '.join(
|
||||||
|
['%4.1g\t' % x for x in pt]) for pt in row]) + '\n')
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
verbose = 1
|
verbose = 1
|
||||||
if '-d' in args:
|
if '-d' in args:
|
||||||
@ -30,28 +38,37 @@ def main(args):
|
|||||||
genomes = Genome.from_string(fp.read())
|
genomes = Genome.from_string(fp.read())
|
||||||
anim = Animation(genomes)
|
anim = Animation(genomes)
|
||||||
anim.compile()
|
anim.compile()
|
||||||
anim.render_frame()
|
bins = anim.render_frame()
|
||||||
|
#dump_3d(bins)
|
||||||
|
bins /= ((np.mean(bins)+1e-9)/128.)
|
||||||
|
bins.astype(np.uint8)
|
||||||
|
|
||||||
|
|
||||||
#genome.width, genome.height = 512, 512
|
if '-g' not in args:
|
||||||
#genome.sample_density = 1000
|
return
|
||||||
#obuf, stats, frame = genome.render(estimator=3)
|
|
||||||
#gc.collect()
|
|
||||||
|
|
||||||
##q.put(str(obuf))
|
print anim.features.hist_width
|
||||||
##p = Process(target=render, args=(q, genome_path))
|
print anim.features.hist_height
|
||||||
##p.start()
|
print anim.features.hist_stride
|
||||||
|
window = pyglet.window.Window(800, 600)
|
||||||
|
image = pyglet.image.ImageData(anim.features.hist_width,
|
||||||
|
anim.features.hist_height,
|
||||||
|
'RGBA',
|
||||||
|
bins.tostring(),
|
||||||
|
anim.features.hist_stride*4)
|
||||||
|
tex = image.texture
|
||||||
|
|
||||||
#window = pyglet.window.Window()
|
@window.event
|
||||||
#image = pyglet.image.ImageData(genome.width, genome.height, 'RGB', obuf)
|
def on_draw():
|
||||||
#tex = image.texture
|
window.clear()
|
||||||
|
tex.blit(0, 0)
|
||||||
|
|
||||||
#@window.event
|
@window.event
|
||||||
#def on_draw():
|
def on_key_press(sym, mod):
|
||||||
#window.clear()
|
if sym == pyglet.window.key.Q:
|
||||||
#tex.blit(0, 0)
|
pyglet.app.exit()
|
||||||
|
|
||||||
#pyglet.app.run()
|
pyglet.app.run()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) < 2 or not os.path.isfile(sys.argv[-1]):
|
if len(sys.argv) < 2 or not os.path.isfile(sys.argv[-1]):
|
||||||
|
Loading…
Reference in New Issue
Block a user