mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Finally runs again
This commit is contained in:
parent
27e7fd82a3
commit
f3298e0bed
@ -15,7 +15,8 @@ from cuburnlib.ptx import PTXModule
|
|||||||
class LaunchContext(object):
|
class LaunchContext(object):
|
||||||
"""
|
"""
|
||||||
Context collecting the information needed to create, run, and gather the
|
Context collecting the information needed to create, run, and gather the
|
||||||
results of a device computation.
|
results of a device computation. This may eventually also include an actual
|
||||||
|
CUDA context, but for now it just uses the global one.
|
||||||
|
|
||||||
To create the fastest device code across multiple device families, this
|
To create the fastest device code across multiple device families, this
|
||||||
context may decide to iteratively refine the final PTX by regenerating
|
context may decide to iteratively refine the final PTX by regenerating
|
||||||
@ -32,34 +33,27 @@ class LaunchContext(object):
|
|||||||
`mod`: Final compiled module. Unavailable during assembly.
|
`mod`: Final compiled module. Unavailable during assembly.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, entries, block=(1,1,1), grid=(1,1), seed=None,
|
def __init__(self, entries, block=(1,1,1), grid=(1,1), tests=False):
|
||||||
tests=False):
|
|
||||||
self.entry_types = entries
|
self.entry_types = entries
|
||||||
self.block, self.grid, self.build_tests = block, grid, tests
|
self.block, self.grid, self.build_tests = block, grid, tests
|
||||||
self.rand = np.random.mtrand.RandomState(seed)
|
|
||||||
self.setup_done = False
|
self.setup_done = False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def threads(self):
|
def threads(self):
|
||||||
return reduce(lambda a, b: a*b, self.block + self.grid)
|
return reduce(lambda a, b: a*b, self.block + self.grid)
|
||||||
|
|
||||||
def print_source(self):
|
def compile(self, verbose=False, **kwargs):
|
||||||
print '\n'.join(["%03d %s" % (i+1, l) for (i, l) in
|
kwargs['ctx'] = self
|
||||||
enumerate(self.ptx.source.split('\n'))])
|
self.ptx = PTXModule(self.entry_types, kwargs, self.build_tests)
|
||||||
|
|
||||||
def compile(self, to_inject={}, verbose=False):
|
|
||||||
inj = dict(to_inject)
|
|
||||||
inj['ctx'] = self
|
|
||||||
self.ptx = PTXModule(self.entry_types, inj, self.build_tests)
|
|
||||||
try:
|
try:
|
||||||
self.mod = cuda.module_from_buffer(self.ptx.source)
|
self.mod = cuda.module_from_buffer(self.ptx.source)
|
||||||
except (cuda.CompileError, cuda.RuntimeError), e:
|
except (cuda.CompileError, cuda.RuntimeError), e:
|
||||||
print "Aww, dang, compile error. Here's the source:"
|
print "Aww, dang, compile error. Here's the source:"
|
||||||
self.print_source()
|
self.ptx.print_source()
|
||||||
raise e
|
raise e
|
||||||
if verbose:
|
if verbose:
|
||||||
if verbose >= 3:
|
if verbose >= 3:
|
||||||
self.print_source()
|
self.ptx.print_source()
|
||||||
for entry in self.ptx.entries:
|
for entry in self.ptx.entries:
|
||||||
func = self.mod.get_function(entry.entry_name)
|
func = self.mod.get_function(entry.entry_name)
|
||||||
print "Compiled %s: used %d regs, %d sm, %d local" % (
|
print "Compiled %s: used %d regs, %d sm, %d local" % (
|
||||||
|
@ -4,126 +4,34 @@ Contains the PTX fragments which will drive the device.
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import struct
|
||||||
|
|
||||||
import pycuda.driver as cuda
|
import pycuda.driver as cuda
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from cuburnlib.ptx import *
|
from cuburnlib.ptx import *
|
||||||
|
|
||||||
"""
|
|
||||||
Here's the current draft of the full algorithm implementation.
|
|
||||||
|
|
||||||
declare xform jump table
|
|
||||||
|
|
||||||
load random state
|
|
||||||
|
|
||||||
clear x_coord, y_coord, z_coord, w_coord;
|
|
||||||
store -(FUSE+1) to shared (per-warp) num_samples_sh
|
|
||||||
clear badvals [1]
|
|
||||||
|
|
||||||
load param (global_cp_idx_addr)
|
|
||||||
index table start (global_cp_idx) [2]
|
|
||||||
load count of indexes from global cp index =>
|
|
||||||
store to qlocal current_cp_num [3]
|
|
||||||
|
|
||||||
outermost loop start:
|
|
||||||
load current_cp_num
|
|
||||||
if current_cp_num <= 0:
|
|
||||||
exit
|
|
||||||
|
|
||||||
load param global_cp_idx_addr
|
|
||||||
calculate offset into address with current_cp_num, global_cp_idx_addr
|
|
||||||
load cp_base_address
|
|
||||||
stream_start (cp_base, cp_base_addr) [4]
|
|
||||||
|
|
||||||
FUSE_START:
|
|
||||||
num_samples += 1
|
|
||||||
if num_samples >= 0:
|
|
||||||
# Okay, we're done FUSEing, prepare to enter normal loop
|
|
||||||
load num_samples => store to shared (per-warp) num_samples
|
|
||||||
|
|
||||||
|
|
||||||
ITER_LOOP_START:
|
|
||||||
reg xform_addr, xform_stream_addr, xform_select
|
|
||||||
|
|
||||||
mwc_next_u32 to xform_select
|
|
||||||
# Performance test: roll/unroll this loop?
|
|
||||||
stream_load xform_prob (cp_stream)
|
|
||||||
if xform_select <= xform_prob:
|
|
||||||
bra.uni XFORM_1_LBL
|
|
||||||
...
|
|
||||||
stream_load xform_prob (cp_stream)
|
|
||||||
if xform_select <= xform_prob:
|
|
||||||
bra.uni XFORM_N_LBL
|
|
||||||
|
|
||||||
XFORM_1_LBL:
|
|
||||||
stream_load xform_1_ (cp_stream)
|
|
||||||
...
|
|
||||||
bra.uni XFORM_POST
|
|
||||||
|
|
||||||
XFORM_POST:
|
|
||||||
[if final_xform:]
|
|
||||||
[do final_xform]
|
|
||||||
|
|
||||||
if num_samples < 0:
|
|
||||||
# FUSE still in progress
|
|
||||||
bra.uni FUSE_START
|
|
||||||
|
|
||||||
FRAGMENT_WRITEBACK:
|
|
||||||
# Unknown at this time.
|
|
||||||
|
|
||||||
SHUFFLE:
|
|
||||||
# Unknown at this time.
|
|
||||||
|
|
||||||
load num_samples from num_samples_sh
|
|
||||||
num_samples -= 1
|
|
||||||
if num_samples > 0:
|
|
||||||
bra.uni ITER_LOOP_START
|
|
||||||
|
|
||||||
|
|
||||||
[1] Tracking 'badvals' can put a pretty large hit on performance, particularly
|
|
||||||
for images that sample a small amount of the grid. So this might be cut
|
|
||||||
when rendering for performance. On the other hand, it might actually help
|
|
||||||
tune the algorithm later, so it'll definitely be an option.
|
|
||||||
|
|
||||||
[2] Control points for each temporal sample will be preloaded to the
|
|
||||||
device in the compact DataStream format (more on this later). Their
|
|
||||||
locations are represented in an index table, which starts with a single
|
|
||||||
`.u32 length`, followed by `length` pointers. To avoid having to keep
|
|
||||||
reloading `length`, or worse, using a register to hold it in memory, we
|
|
||||||
instead count *down* to zero. This is a very common idiom.
|
|
||||||
|
|
||||||
[3] 'qlocal' is quasi-local storage. it could easily be actual local storage,
|
|
||||||
depending on how local storage is implemented, but the extra 128-byte loads
|
|
||||||
for such values might make a performance difference. qlocal variables may
|
|
||||||
be identical across a warp or even a CTA, and so variables noted as
|
|
||||||
"qlocal" here might end up in shared memory or even a small per-warp or
|
|
||||||
per-CTA buffer in global memory created specifically for this purpose,
|
|
||||||
after benchmarking is done.
|
|
||||||
|
|
||||||
[4] DataStreams are "opaque" data serialization structures defined below. The
|
|
||||||
structure of a stream is actually created while parsing the DSL by the load
|
|
||||||
statements themselves. Some benchmarks need to be done before DataStreams
|
|
||||||
stop being "opaque" and become simply "dynamic".
|
|
||||||
"""
|
|
||||||
|
|
||||||
class IterThread(PTXTest):
|
class IterThread(PTXTest):
|
||||||
entry_name = 'iter_thread'
|
entry_name = 'iter_thread'
|
||||||
entry_params = []
|
entry_params = []
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.cps_uploaded = False
|
||||||
|
|
||||||
def deps(self):
|
def deps(self):
|
||||||
return [MWCRNG, CPDataStream]
|
return [MWCRNG, CPDataStream]
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def module_setup(self):
|
def module_setup(self):
|
||||||
mem.global_.u32('g_cp_array',
|
mem.global_.u32('g_cp_array',
|
||||||
[features.max_ntemporal_samples,'*',cp_stream_size])
|
cp_stream_size*features.max_ntemporal_samples)
|
||||||
mem.global_.u32('g_num_cps')
|
mem.global_.u32('g_num_cps')
|
||||||
# TODO move into debug statement
|
# TODO move into debug statement
|
||||||
mem.global_.u32('g_num_rounds', ctx.threads)
|
mem.global_.u32('g_num_rounds', ctx.threads)
|
||||||
mem.global_.u32('g_num_writes', ctx.threads)
|
mem.global_.u32('g_num_writes', ctx.threads)
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def entry():
|
def entry(self):
|
||||||
reg.f32('x_coord y_coord color_coord alpha_coord')
|
reg.f32('x_coord y_coord color_coord alpha_coord')
|
||||||
|
|
||||||
# TODO: temporary, for testing
|
# TODO: temporary, for testing
|
||||||
@ -158,8 +66,8 @@ class IterThread(PTXTest):
|
|||||||
op.mov.s32(num_samples, -(features.num_fuse_samples+1))
|
op.mov.s32(num_samples, -(features.num_fuse_samples+1))
|
||||||
|
|
||||||
# TODO: Move cp_num to qlocal storage (or spill it, rarely accessed)
|
# TODO: Move cp_num to qlocal storage (or spill it, rarely accessed)
|
||||||
reg.u32('cp_num cpA')
|
reg.u32('cp_idx cpA')
|
||||||
mov.u32(cp_num, 0)
|
op.mov.u32(cp_idx, 0)
|
||||||
|
|
||||||
label('cp_loop_start')
|
label('cp_loop_start')
|
||||||
op.bar.sync(0)
|
op.bar.sync(0)
|
||||||
@ -168,19 +76,19 @@ class IterThread(PTXTest):
|
|||||||
reg.u32('num_cps')
|
reg.u32('num_cps')
|
||||||
reg.pred('p_last_cp')
|
reg.pred('p_last_cp')
|
||||||
op.ldu.u32(num_cps, addr(g_num_cps))
|
op.ldu.u32(num_cps, addr(g_num_cps))
|
||||||
op.setp.lt.u32(p_last_cp, cp_num, num_cps)
|
op.setp.ge.u32(p_last_cp, cp_idx, num_cps)
|
||||||
op.bra.uni('all_cps_done', ifp=p_last_cp)
|
op.bra.uni('all_cps_done', ifp=p_last_cp)
|
||||||
|
|
||||||
with block('Load CP address'):
|
with block('Load CP address'):
|
||||||
op.mov.u32(cpA, g_cp_array)
|
op.mov.u32(cpA, g_cp_array)
|
||||||
op.mad.lo.u32(cpA, cp_num, cp_stream_size, cpA)
|
op.mad.lo.u32(cpA, cp_idx, cp_stream_size, cpA)
|
||||||
|
|
||||||
with block('Increment CP number, load num_samples (unless in fuse)'):
|
with block('Increment CP index, load num_samples (unless in fuse)'):
|
||||||
reg.pred('p_in_fuse')
|
reg.pred('p_not_in_fuse')
|
||||||
op.setp.lt.s32(p_in_fuse, num_samples, 0)
|
op.setp.ge.s32(p_not_in_fuse, num_samples, 0)
|
||||||
op.add.u32(cp_num, cp_num, 1, ifp=p_in_fuse)
|
op.add.u32(cp_idx, cp_idx, 1, ifp=p_not_in_fuse)
|
||||||
cp_stream_get(cpA, num_samples, 'cp.samples_per_thread',
|
cp_stream_get(cpA, num_samples, 'samples_per_thread',
|
||||||
ifp=p_in_fuse)
|
ifp=p_not_in_fuse)
|
||||||
|
|
||||||
label('fuse_loop_start')
|
label('fuse_loop_start')
|
||||||
with block('FUSE-specific stuff'):
|
with block('FUSE-specific stuff'):
|
||||||
@ -188,7 +96,7 @@ class IterThread(PTXTest):
|
|||||||
comment('If num_samples == -1, set it to 0 and jump back up')
|
comment('If num_samples == -1, set it to 0 and jump back up')
|
||||||
comment('This will start the normal CP loading machinery')
|
comment('This will start the normal CP loading machinery')
|
||||||
op.setp.eq.s32(p_fuse, num_samples, -1)
|
op.setp.eq.s32(p_fuse, num_samples, -1)
|
||||||
op.mov.s32(p_fuse, 0, ifp=p_fuse)
|
op.mov.s32(num_samples, 0, ifp=p_fuse)
|
||||||
op.bra.uni(cp_loop_start, ifp=p_fuse)
|
op.bra.uni(cp_loop_start, ifp=p_fuse)
|
||||||
|
|
||||||
comment('If num_samples < -1, still fusing, so increment')
|
comment('If num_samples < -1, still fusing, so increment')
|
||||||
@ -204,33 +112,55 @@ class IterThread(PTXTest):
|
|||||||
with block("Test if we're still in FUSE"):
|
with block("Test if we're still in FUSE"):
|
||||||
reg.pred('p_in_fuse')
|
reg.pred('p_in_fuse')
|
||||||
op.setp.lt.s32(p_in_fuse, num_samples, 0)
|
op.setp.lt.s32(p_in_fuse, num_samples, 0)
|
||||||
op.bra.uni(fuse_start, ifp=p_in_fuse)
|
op.bra.uni(fuse_loop_start, ifp=p_in_fuse)
|
||||||
|
|
||||||
with block("Ordinarily, we'd write the result here"):
|
with block("Ordinarily, we'd write the result here"):
|
||||||
op.add.u32(num_writes, num_writes, 1)
|
op.add.u32(num_writes, num_writes, 1)
|
||||||
|
|
||||||
with block("Check to see if we're done with this CP"):
|
with block("Check to see if we're done with this CP"):
|
||||||
reg.pred('p_cp_done')
|
reg.pred('p_cp_done')
|
||||||
|
op.add.s32(num_samples, num_samples, -1)
|
||||||
op.setp.eq.s32(p_cp_done, num_samples, 0)
|
op.setp.eq.s32(p_cp_done, num_samples, 0)
|
||||||
op.bra.uni(cp_loop_start, ifp=p_cp_done)
|
op.bra.uni(cp_loop_start, ifp=p_cp_done)
|
||||||
|
|
||||||
op.bra.uni(iter_loop_start)
|
op.bra.uni(iter_loop_start)
|
||||||
|
|
||||||
|
label('all_cps_done')
|
||||||
# TODO this is for testing, move it to a debug statement
|
# TODO this is for testing, move it to a debug statement
|
||||||
store_per_thread(g_num_rounds, num_rounds)
|
store_per_thread(g_num_rounds, num_rounds)
|
||||||
store_per_thread(g_num_writes, num_writes)
|
store_per_thread(g_num_writes, num_writes)
|
||||||
|
|
||||||
def call(self, ctx):
|
def upload_cp_stream(self, ctx, cp_stream, num_cps):
|
||||||
raise HorribleDeathError("Okay I'm going to bed now")
|
cp_array_dp, cp_array_l = ctx.mod.get_global('g_cp_array')
|
||||||
|
assert len(cp_stream) <= cp_array_l, "Stream too big!"
|
||||||
|
cuda.memcpy_htod_async(cp_array_dp, cp_stream)
|
||||||
|
num_cps_dp, num_cps_l = ctx.mod.get_global('g_num_cps')
|
||||||
|
cuda.memcpy_htod_async(num_cps_dp, struct.pack('i', num_cps))
|
||||||
|
self.cps_uploaded = True
|
||||||
|
|
||||||
|
def call(self, ctx):
|
||||||
|
if not self.cps_uploaded:
|
||||||
|
raise Error("Cannot call IterThread before uploading CPs")
|
||||||
|
func = ctx.mod.get_function('iter_thread')
|
||||||
|
dtime = func(block=ctx.block, grid=ctx.grid, time_kernel=True)
|
||||||
|
|
||||||
|
num_rounds_dp, num_rounds_l = ctx.mod.get_global('g_num_rounds')
|
||||||
|
num_writes_dp, num_writes_l = ctx.mod.get_global('g_num_writes')
|
||||||
|
rounds = cuda.from_device(num_rounds_dp, ctx.threads, np.uint32)
|
||||||
|
writes = cuda.from_device(num_writes_dp, ctx.threads, np.uint32)
|
||||||
|
print "Rounds:", rounds
|
||||||
|
print "Writes:", writes
|
||||||
|
|
||||||
class MWCRNG(PTXFragment):
|
class MWCRNG(PTXFragment):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.rand = np.random
|
||||||
self.threads_ready = 0
|
self.threads_ready = 0
|
||||||
if not os.path.isfile('primes.bin'):
|
if not os.path.isfile('primes.bin'):
|
||||||
raise EnvironmentError('primes.bin not found')
|
raise EnvironmentError('primes.bin not found')
|
||||||
|
|
||||||
|
def set_seed(self, seed):
|
||||||
|
self.rand = np.random.mtrand.RandomState(seed)
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def module_setup(self):
|
def module_setup(self):
|
||||||
mem.global_.u32('mwc_rng_mults', ctx.threads)
|
mem.global_.u32('mwc_rng_mults', ctx.threads)
|
||||||
@ -284,13 +214,13 @@ class MWCRNG(PTXFragment):
|
|||||||
# Randomness in choosing multipliers is good, but larger multipliers
|
# Randomness in choosing multipliers is good, but larger multipliers
|
||||||
# have longer periods, which is also good. This is a compromise.
|
# have longer periods, which is also good. This is a compromise.
|
||||||
mults = np.array(mults[:ctx.threads*4])
|
mults = np.array(mults[:ctx.threads*4])
|
||||||
ctx.rand.shuffle(mults)
|
self.rand.shuffle(mults)
|
||||||
# Copy multipliers and seeds to the device
|
# Copy multipliers and seeds to the device
|
||||||
multdp, multl = ctx.mod.get_global('mwc_rng_mults')
|
multdp, multl = ctx.mod.get_global('mwc_rng_mults')
|
||||||
cuda.memcpy_htod_async(multdp, mults.tostring()[:multl])
|
cuda.memcpy_htod_async(multdp, mults.tostring()[:multl])
|
||||||
# Intentionally excludes both 0 and (2^32-1), as they can lead to
|
# Intentionally excludes both 0 and (2^32-1), as they can lead to
|
||||||
# degenerate sequences of period 0
|
# degenerate sequences of period 0
|
||||||
states = np.array(ctx.rand.randint(1, 0xffffffff, size=2*ctx.threads),
|
states = np.array(self.rand.randint(1, 0xffffffff, size=2*ctx.threads),
|
||||||
dtype=np.uint32)
|
dtype=np.uint32)
|
||||||
statedp, statel = ctx.mod.get_global('mwc_rng_state')
|
statedp, statel = ctx.mod.get_global('mwc_rng_state')
|
||||||
cuda.memcpy_htod_async(statedp, states.tostring())
|
cuda.memcpy_htod_async(statedp, states.tostring())
|
||||||
@ -376,7 +306,7 @@ class MWCRNGTest(PTXTest):
|
|||||||
class CameraCoordTransform(PTXFragment):
|
class CameraCoordTransform(PTXFragment):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class CPDataStream(PTXFragment):
|
class CPDataStream(DataStream):
|
||||||
"""DataStream which stores the control points."""
|
"""DataStream which stores the control points."""
|
||||||
prefix = 'cp'
|
prefix = 'cp'
|
||||||
|
|
||||||
|
125
cuburnlib/ptx.py
125
cuburnlib/ptx.py
@ -11,7 +11,7 @@ easier to maintain using this system.
|
|||||||
# If you see 'import inspect', you know you're in for a good time
|
# If you see 'import inspect', you know you're in for a good time
|
||||||
import inspect
|
import inspect
|
||||||
import types
|
import types
|
||||||
import traceback
|
import struct
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
@ -116,6 +116,8 @@ class _BlockInjector(object):
|
|||||||
self.dead = False
|
self.dead = False
|
||||||
map(self.inject, self.to_inject.items())
|
map(self.inject, self.to_inject.items())
|
||||||
def __exit__(self, exc_type, exc_val, tb):
|
def __exit__(self, exc_type, exc_val, tb):
|
||||||
|
# Do some real exceptorin'
|
||||||
|
if exc_type is not None: return
|
||||||
for k in self.injected:
|
for k in self.injected:
|
||||||
del self.inject_into[k]
|
del self.inject_into[k]
|
||||||
self.dead = True
|
self.dead = True
|
||||||
@ -137,17 +139,27 @@ class _Block(object):
|
|||||||
inj = self.stack[-1].injectors
|
inj = self.stack[-1].injectors
|
||||||
[inj.remove(i) for i in inj if i.dead]
|
[inj.remove(i) for i in inj if i.dead]
|
||||||
def push_ctx(self):
|
def push_ctx(self):
|
||||||
# Move most recent active injector to new context
|
|
||||||
self.clean_injectors()
|
self.clean_injectors()
|
||||||
last_inj = self.stack[-1].injectors.pop()
|
self.stack.append(BlockCtx(dict(self.stack[-1].locals), [], []))
|
||||||
self.stack.append(BlockCtx(dict(self.stack[-1].locals), [],
|
# The only reason we should have no injectors in the previous block is
|
||||||
[last_inj]))
|
# if we are hitting a new ptx_func entry point or global declaration at
|
||||||
|
# PTX module scope, which means the stack only contains the outer
|
||||||
|
# context and the current one (i.e. len(stack) == 2)
|
||||||
|
if len(self.stack[-2].injectors) == 0:
|
||||||
|
assert len(self.stack) == 2, "Empty injector list too early!"
|
||||||
|
# Otherwise, the active injector in the previous block is the one for
|
||||||
|
# the Python function which is currently creating a new PTX block, and
|
||||||
|
# and it needs to be promoted to the current block
|
||||||
|
else:
|
||||||
|
self.stack[-1].injectors.append(self.stack[-2].injectors.pop())
|
||||||
def pop_ctx(self):
|
def pop_ctx(self):
|
||||||
self.clean_injectors()
|
self.clean_injectors()
|
||||||
bs = self.stack.pop()
|
bs = self.stack.pop()
|
||||||
|
# TODO: figure out why this next line is needed
|
||||||
|
[bs.injectors.remove(i) for i in bs.injectors if i.dead]
|
||||||
self.stack[-1].code.extend(bs.code)
|
self.stack[-1].code.extend(bs.code)
|
||||||
if len(self.stack) == 1:
|
if len(self.stack) == 1:
|
||||||
# We're on outer_ctx, so all injectors should be gone
|
# We're on outer_ctx, so all injectors should be gone.
|
||||||
assert len(bs.injectors) == 0, "Injector/context mismatch"
|
assert len(bs.injectors) == 0, "Injector/context mismatch"
|
||||||
return
|
return
|
||||||
# The only injector should be the one added in push_ctx
|
# The only injector should be the one added in push_ctx
|
||||||
@ -186,7 +198,7 @@ class _Block(object):
|
|||||||
spacing. To keep things simple, nested lists and tuples will be reduced
|
spacing. To keep things simple, nested lists and tuples will be reduced
|
||||||
in this manner (but not other iterable types). Coercion will not happen
|
in this manner (but not other iterable types). Coercion will not happen
|
||||||
until after the entire DSL call tree has been walked. This allows a
|
until after the entire DSL call tree has been walked. This allows a
|
||||||
class to submit a mutable type (e.g. the trivial `StrVar`) when first
|
class to submit a mutable type (e.g. ``DelayVar``) when first
|
||||||
walked with an undefined value, then substitute the correct value on
|
walked with an undefined value, then substitute the correct value on
|
||||||
being finalized.
|
being finalized.
|
||||||
|
|
||||||
@ -196,14 +208,23 @@ class _Block(object):
|
|||||||
"""
|
"""
|
||||||
self.stack[-1].code.append(PTXStmt(prefix, op, vars, semi, indent))
|
self.stack[-1].code.append(PTXStmt(prefix, op, vars, semi, indent))
|
||||||
|
|
||||||
class StrVar(object):
|
class DelayVar(object):
|
||||||
"""
|
"""
|
||||||
Trivial wrapper to allow deferred variable substitution.
|
Trivial wrapper to allow deferred variable substitution.
|
||||||
"""
|
"""
|
||||||
def __init__(self, val=None):
|
def __init__(self, val=None):
|
||||||
self.val = val
|
self.val = val
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return str(val)
|
return str(self.val)
|
||||||
|
def __mul__(self, other):
|
||||||
|
# Oh this is truly egregious
|
||||||
|
return DelayVarProxy(self, "self.other.val*" + str(other))
|
||||||
|
|
||||||
|
class DelayVarProxy(object):
|
||||||
|
def __init__(self, other, expr):
|
||||||
|
self.other, self.expr = other, expr
|
||||||
|
def __str__(self):
|
||||||
|
return str(eval(self.expr))
|
||||||
|
|
||||||
class _PTXFuncWrapper(object):
|
class _PTXFuncWrapper(object):
|
||||||
"""Enables ptx_func"""
|
"""Enables ptx_func"""
|
||||||
@ -298,6 +319,9 @@ class Block(object):
|
|||||||
self.block.code(op=['// ', self.comment], semi=False)
|
self.block.code(op=['// ', self.comment], semi=False)
|
||||||
self.comment = None
|
self.comment = None
|
||||||
def __exit__(self, exc_type, exc_value, tb):
|
def __exit__(self, exc_type, exc_value, tb):
|
||||||
|
# Allow exceptions to be propagated; things get really messy if we try
|
||||||
|
# to pop the stack if things aren't ordered correctly
|
||||||
|
if exc_type is not None: return
|
||||||
self.block.code(indent=-1)
|
self.block.code(indent=-1)
|
||||||
self.block.code(op='}', semi=False)
|
self.block.code(op='}', semi=False)
|
||||||
self.block.pop_ctx()
|
self.block.pop_ctx()
|
||||||
@ -370,12 +394,14 @@ class Op(_CallChain):
|
|||||||
"""
|
"""
|
||||||
def _call(self, op, *args, **kwargs):
|
def _call(self, op, *args, **kwargs):
|
||||||
pred = ''
|
pred = ''
|
||||||
if 'ifp' in kwargs:
|
ifp = kwargs.get('ifp')
|
||||||
if 'ifnotp' in kwargs:
|
ifnotp = kwargs.get('ifnotp')
|
||||||
|
if ifp:
|
||||||
|
if ifnotp:
|
||||||
raise SyntaxError("can't use both, fool")
|
raise SyntaxError("can't use both, fool")
|
||||||
pred = ['@', kwargs['ifp']]
|
pred = ['@', ifp]
|
||||||
if 'ifnotp' in kwargs:
|
if ifnotp:
|
||||||
pred = ['@!', kwargs['ifnotp']]
|
pred = ['@!', ifnotp]
|
||||||
self.block.code(pred, '.'.join(op), _softjoin(args, ','))
|
self.block.code(pred, '.'.join(op), _softjoin(args, ','))
|
||||||
|
|
||||||
class Mem(object):
|
class Mem(object):
|
||||||
@ -421,7 +447,7 @@ class Mem(object):
|
|||||||
>>> op.st.global.v2.u32(addr(areg), vec(reg1, reg2))
|
>>> op.st.global.v2.u32(addr(areg), vec(reg1, reg2))
|
||||||
>>> op.ld.global.v2.u32(vec(reg1, reg2), addr(areg, 8))
|
>>> op.ld.global.v2.u32(vec(reg1, reg2), addr(areg, 8))
|
||||||
"""
|
"""
|
||||||
return ['[', areg, aoffset and '+' or '', aoffset, ']']
|
return ['[', areg, aoffset is not '' and '+' or '', aoffset, ']']
|
||||||
|
|
||||||
class _MemFactory(_CallChain):
|
class _MemFactory(_CallChain):
|
||||||
"""Actual `mem` object"""
|
"""Actual `mem` object"""
|
||||||
@ -538,8 +564,8 @@ class PTXFragment(object):
|
|||||||
"""
|
"""
|
||||||
Called after running all PTX DSL functions, but before code generation,
|
Called after running all PTX DSL functions, but before code generation,
|
||||||
to allow fragments which postponed variable evaluation (e.g. using
|
to allow fragments which postponed variable evaluation (e.g. using
|
||||||
`StrVar`) to fill in the resulting values. Most fragments should not
|
``DelayVar``) to fill in the resulting values. Most fragments should
|
||||||
use this.
|
not use this.
|
||||||
|
|
||||||
If implemented, this function *may* use an @ptx_func decorator to
|
If implemented, this function *may* use an @ptx_func decorator to
|
||||||
access the global DSL scope, but pretty please don't emit any code
|
access the global DSL scope, but pretty please don't emit any code
|
||||||
@ -796,6 +822,13 @@ class PTXModule(object):
|
|||||||
raise ValueError("Too many recompiles scheduled!")
|
raise ValueError("Too many recompiles scheduled!")
|
||||||
self.__needs_recompilation = True
|
self.__needs_recompilation = True
|
||||||
|
|
||||||
|
def print_source(self):
|
||||||
|
if not hasattr(self, 'source'):
|
||||||
|
raise ValueError("Not assembled yet!")
|
||||||
|
print '\n'.join(["%03d %s" % (i+1, l) for (i, l) in
|
||||||
|
enumerate(self.source.split('\n'))])
|
||||||
|
|
||||||
|
|
||||||
def _flatten(val):
|
def _flatten(val):
|
||||||
if isinstance(val, (list, tuple)):
|
if isinstance(val, (list, tuple)):
|
||||||
return ''.join(map(_flatten, val))
|
return ''.join(map(_flatten, val))
|
||||||
@ -806,7 +839,7 @@ class PTXFormatter(object):
|
|||||||
Formats PTXStmt items into beautiful code. Well, the beautiful part is
|
Formats PTXStmt items into beautiful code. Well, the beautiful part is
|
||||||
postponed for now.
|
postponed for now.
|
||||||
"""
|
"""
|
||||||
def __init__(self, indent_amt=2, oplen_max=20, varlen_max=12):
|
def __init__(self, indent_amt=4, oplen_max=20, varlen_max=12):
|
||||||
self.idamt, self.opm, self.vm = indent_amt, oplen_max, varlen_max
|
self.idamt, self.opm, self.vm = indent_amt, oplen_max, varlen_max
|
||||||
def format(self, code):
|
def format(self, code):
|
||||||
out = []
|
out = []
|
||||||
@ -844,7 +877,7 @@ class PTXFormatter(object):
|
|||||||
_TExp = namedtuple('_TExp', 'type exprlist')
|
_TExp = namedtuple('_TExp', 'type exprlist')
|
||||||
_DataCell = namedtuple('_DataCell', 'offset size texp')
|
_DataCell = namedtuple('_DataCell', 'offset size texp')
|
||||||
|
|
||||||
class DataStream(object):
|
class DataStream(PTXFragment):
|
||||||
"""
|
"""
|
||||||
Simple interface between Python and PTX, designed to create and tightly
|
Simple interface between Python and PTX, designed to create and tightly
|
||||||
pack control structs.
|
pack control structs.
|
||||||
@ -914,19 +947,19 @@ class DataStream(object):
|
|||||||
self.cells = []
|
self.cells = []
|
||||||
self.stream_size = 0
|
self.stream_size = 0
|
||||||
self.free = {}
|
self.free = {}
|
||||||
self.size_strvar = StrVar("not_yet_determined")
|
self.size_delayvars = []
|
||||||
|
|
||||||
_types = dict(s8='b', u8='B', s16='h', u16='H', s32='i', u32='I', f32='f',
|
_types = dict(s8='b', u8='B', s16='h', u16='H', s32='i', u32='I', f32='f',
|
||||||
s64='l', u64='L', f64='d')
|
s64='l', u64='L', f64='d')
|
||||||
def _get_type(self, *regs):
|
def _get_type(self, regs):
|
||||||
size = int(regs[0].type[1:])
|
size = int(regs[0].type[1:])
|
||||||
for r in regs:
|
for reg in regs:
|
||||||
if reg.type not in self._types:
|
if reg.type not in self._types:
|
||||||
raise TypeError("Register %s of type %s not supported" %
|
raise TypeError("Register %s of type %s not supported" %
|
||||||
(reg.name, reg.type))
|
(reg.name, reg.type))
|
||||||
if int(r.type[1:]) != size:
|
if int(reg.type[1:]) != size:
|
||||||
raise TypeError("Can't vector-load different size regs")
|
raise TypeError("Can't vector-load different size regs")
|
||||||
return size, ''.join([self._types.get(r.type) for r in regs])
|
return size/8, ''.join([self._types.get(r.type) for r in regs])
|
||||||
|
|
||||||
def _alloc(self, vsize, texp):
|
def _alloc(self, vsize, texp):
|
||||||
# A really crappy allocator. May later include optimizations for
|
# A really crappy allocator. May later include optimizations for
|
||||||
@ -939,7 +972,7 @@ class DataStream(object):
|
|||||||
if idx is None:
|
if idx is None:
|
||||||
# No aligned free cells, allocate a new `align`-byte free cell
|
# No aligned free cells, allocate a new `align`-byte free cell
|
||||||
assert alloc not in self.free
|
assert alloc not in self.free
|
||||||
self.free[alloc] = idx = len(self.stream_size)
|
self.free[alloc] = idx = len(self.cells)
|
||||||
self.cells.append(_DataCell(self.stream_size, alloc, None))
|
self.cells.append(_DataCell(self.stream_size, alloc, None))
|
||||||
self.stream_size += alloc
|
self.stream_size += alloc
|
||||||
# Overwrite the free cell at `idx` with texp
|
# Overwrite the free cell at `idx` with texp
|
||||||
@ -958,27 +991,28 @@ class DataStream(object):
|
|||||||
self.cells.insert(fidx, _DataCell(foffset, fsize, None))
|
self.cells.insert(fidx, _DataCell(foffset, fsize, None))
|
||||||
foffset += fsize
|
foffset += fsize
|
||||||
self.free[fsize] = fidx
|
self.free[fsize] = fidx
|
||||||
|
fsize *= 2
|
||||||
# Adjust indexes. This is ugly, but evidently unavoidable
|
# Adjust indexes. This is ugly, but evidently unavoidable
|
||||||
if fidx-idx:
|
if fidx-idx:
|
||||||
for k, v in filter(lambda k, v: v > idx, self.free.items()):
|
for k, v in filter(lambda (k, v): v > idx, self.free.items()):
|
||||||
self.free[k] = v+(fidx-idx)
|
self.free[k] = v+(fidx-idx)
|
||||||
return self.offset
|
return offset
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def _stream_get_internal(self, areg, dregs, exprs, ifp, ifnotp):
|
def _stream_get_internal(self, areg, dregs, exprs, ifp, ifnotp):
|
||||||
size, type = self._get_type(dregs)
|
size, type = self._get_type(dregs)
|
||||||
vsize = size * len(dregs)
|
vsize = size * len(dregs)
|
||||||
texp = _TExp(type, [expr])
|
texp = _TExp(type, tuple(exprs))
|
||||||
if texp in self.expr_map:
|
if texp in self.texp_map:
|
||||||
offset = self.texp_map[texp]
|
offset = self.texp_map[texp]
|
||||||
else:
|
else:
|
||||||
offset = self._alloc(vsize, texp)
|
offset = self._alloc(vsize, texp)
|
||||||
self.texp_map[texp] = offset
|
self.texp_map[texp] = offset
|
||||||
vtype = {1: '', 2: '.v2', 4: '.v4'}.get(len(dregs))
|
opname = ['ldu', 'b%d' % (size*8)]
|
||||||
if len(dregs) > 0:
|
if len(dregs) > 1:
|
||||||
|
opname.insert(1, 'v%d' % len(dregs))
|
||||||
dregs = vec(dregs)
|
dregs = vec(dregs)
|
||||||
op._call('ldu%s.b%d' % (vtype, size), dregs, addr(areg+off),
|
op._call(opname, dregs, addr(areg, offset), ifp=ifp, ifnotp=ifnotp)
|
||||||
ifp=ifp, ifnotp=ifnotp)
|
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def _stream_get(self, areg, dreg, expr, ifp=None, ifnotp=None):
|
def _stream_get(self, areg, dreg, expr, ifp=None, ifnotp=None):
|
||||||
@ -991,16 +1025,20 @@ class DataStream(object):
|
|||||||
ifp, ifnotp)
|
ifp, ifnotp)
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def _stream_get_v2(self, areg, d1, e1, d2, e2, d3, e3, d4, e4,
|
def _stream_get_v4(self, areg, d1, e1, d2, e2, d3, e3, d4, e4,
|
||||||
ifp=None, ifnotp=None):
|
ifp=None, ifnotp=None):
|
||||||
self._stream_get_internal(areg, [d1, d2, d3, d4], [e1, e2, e3, e4],
|
self._stream_get_internal(areg, [d1, d2, d3, d4], [e1, e2, e3, e4],
|
||||||
ifp, ifnotp)
|
ifp, ifnotp)
|
||||||
|
|
||||||
|
@property
|
||||||
def _stream_size(self):
|
def _stream_size(self):
|
||||||
return self.size_strvar
|
x = DelayVar("not_yet_determined")
|
||||||
|
self.size_delayvars.append(x)
|
||||||
|
return x
|
||||||
|
|
||||||
def finalize_code(self):
|
def finalize_code(self):
|
||||||
self.size_strvar.val = str(self.stream_size)
|
for dv in self.size_delayvars:
|
||||||
|
dv.val = self.stream_size
|
||||||
|
|
||||||
def to_inject(self):
|
def to_inject(self):
|
||||||
return {self.prefix + '_stream_get': self._stream_get,
|
return {self.prefix + '_stream_get': self._stream_get,
|
||||||
@ -1039,9 +1077,20 @@ class DataStream(object):
|
|||||||
for offset, size, texp in self.cells:
|
for offset, size, texp in self.cells:
|
||||||
if texp:
|
if texp:
|
||||||
type = texp.type
|
type = texp.type
|
||||||
vals = [eval(e, globals(), kwargs) for e in texp.expr_list]
|
vals = [eval(e, globals(), kwargs) for e in texp.exprlist]
|
||||||
else:
|
else:
|
||||||
type = 'x'*size # Padding bytes
|
type = 'x'*size # Padding bytes
|
||||||
vals = []
|
vals = []
|
||||||
out.write(struct.pack(type, *vals))
|
outfile.write(struct.pack(type, *vals))
|
||||||
|
|
||||||
|
def print_record(self):
|
||||||
|
for cell in self.cells:
|
||||||
|
if cell.texp is None:
|
||||||
|
print '%3d %2d --' % (cell.offset, cell.size)
|
||||||
|
continue
|
||||||
|
print '%3d %2d %4s %s' % (cell.offset, cell.size, cell.texp.type,
|
||||||
|
cell.texp.exprlist[0])
|
||||||
|
for exp in cell.texp.exprlist[1:]:
|
||||||
|
print '%12s %s' % ('', exp)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,12 +1,62 @@
|
|||||||
|
|
||||||
from ctypes import *
|
from ctypes import *
|
||||||
|
from cStringIO import StringIO
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from fr0stlib.pyflam3 import Genome, Frame
|
|
||||||
|
from fr0stlib import pyflam3
|
||||||
from fr0stlib.pyflam3._flam3 import *
|
from fr0stlib.pyflam3._flam3 import *
|
||||||
from fr0stlib.pyflam3.constants import *
|
from fr0stlib.pyflam3.constants import *
|
||||||
|
|
||||||
|
from cuburnlib.cuda import LaunchContext
|
||||||
|
from cuburnlib.device_code import IterThread, CPDataStream
|
||||||
|
|
||||||
Point = lambda x, y: np.array([x, y], dtype=np.double)
|
Point = lambda x, y: np.array([x, y], dtype=np.double)
|
||||||
|
|
||||||
|
class Genome(pyflam3.Genome):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class Frame(pyflam3.Frame):
|
||||||
|
def interpolate(self, time, cp):
|
||||||
|
flam3_interpolate(self.genomes, self.ngenomes, time, 0, byref(cp))
|
||||||
|
|
||||||
|
def pack_stream(self, ctx, time):
|
||||||
|
"""
|
||||||
|
Pack and return the control point data stream to render this frame.
|
||||||
|
"""
|
||||||
|
# Get the central control point, and calculate parameters that change
|
||||||
|
# once per frame
|
||||||
|
cp = BaseGenome()
|
||||||
|
self.interpolate(time, cp)
|
||||||
|
self.filt = Filters(self, cp)
|
||||||
|
rw = cp.spatial_oversample * cp.width + 2 * self.filt.gutter
|
||||||
|
rh = cp.spatial_oversample * cp.height + 2 * self.filt.gutter
|
||||||
|
|
||||||
|
# Interpolate each time step, calculate per-step variables, and pack
|
||||||
|
# into the stream
|
||||||
|
cp_streamer = ctx.ptx.instances[CPDataStream]
|
||||||
|
stream = StringIO()
|
||||||
|
print "Data stream contents:"
|
||||||
|
cp_streamer.print_record()
|
||||||
|
tcp = BaseGenome()
|
||||||
|
for batch_idx in range(cp.nbatches):
|
||||||
|
for time_idx in range(cp.ntemporal_samples):
|
||||||
|
idx = time_idx + batch_idx * cp.nbatches
|
||||||
|
cp_time = time + self.filt.temporal_deltas[idx]
|
||||||
|
self.interpolate(time, tcp)
|
||||||
|
tcp.camera = Camera(self, tcp, self.filt)
|
||||||
|
|
||||||
|
# TODO: figure out which object to pack this into
|
||||||
|
nsamples = ((tcp.camera.sample_density * cp.width * cp.height) /
|
||||||
|
(cp.nbatches * cp.ntemporal_samples))
|
||||||
|
samples_per_thread = nsamples / ctx.threads + 15
|
||||||
|
|
||||||
|
cp_streamer.pack_into(stream,
|
||||||
|
frame=self,
|
||||||
|
cp=tcp,
|
||||||
|
cp_idx=idx,
|
||||||
|
samples_per_thread=samples_per_thread)
|
||||||
|
stream.seek(0)
|
||||||
|
return (stream.read(), cp.nbatches * cp.ntemporal_samples)
|
||||||
|
|
||||||
class Animation(object):
|
class Animation(object):
|
||||||
"""
|
"""
|
||||||
Control structure for rendering a series of frames.
|
Control structure for rendering a series of frames.
|
||||||
@ -31,46 +81,46 @@ class Animation(object):
|
|||||||
memmove(byref(self.genomes[i]), byref(genomes[i]),
|
memmove(byref(self.genomes[i]), byref(genomes[i]),
|
||||||
sizeof(BaseGenome))
|
sizeof(BaseGenome))
|
||||||
|
|
||||||
self._frame = Frame()
|
self.features = Features(genomes)
|
||||||
self._frame.genomes = cast(self.genomes, POINTER(BaseGenome))
|
self.frame = Frame()
|
||||||
self._frame.ngenomes = len(genomes)
|
self.frame.genomes = cast(self.genomes, POINTER(BaseGenome))
|
||||||
|
self.frame.ngenomes = len(genomes)
|
||||||
|
|
||||||
|
self.ctx = None
|
||||||
|
|
||||||
|
def compile(self):
|
||||||
|
"""
|
||||||
|
Create a PTX kernel optimized for this animation, compile it, and
|
||||||
|
attach it to a LaunchContext with a thread distribution optimized for
|
||||||
|
the active device.
|
||||||
|
"""
|
||||||
|
# TODO: user-configurable test control
|
||||||
|
self.ctx = LaunchContext([IterThread], block=(256,1,1), grid=(54,1),
|
||||||
|
tests=True)
|
||||||
|
# TODO: user-configurable verbosity control
|
||||||
|
self.ctx.compile(verbose=3, anim=self, features=self.features)
|
||||||
|
# TODO: automatic optimization of block parameters
|
||||||
|
|
||||||
def render_frame(self, time=0):
|
def render_frame(self, time=0):
|
||||||
# TODO: support more nuanced frame control than just 'time'
|
# TODO: support more nuanced frame control than just 'time'
|
||||||
# TODO: reuse more information between frames
|
# TODO: reuse more information between frames
|
||||||
# TODO: allow animation-long override of certain parameters (size, etc)
|
# TODO: allow animation-long override of certain parameters (size, etc)
|
||||||
|
cp_stream, num_cps = self.frame.pack_stream(self.ctx, time)
|
||||||
|
iter_thread = self.ctx.ptx.instances[IterThread]
|
||||||
|
iter_thread.upload_cp_stream(self.ctx, cp_stream, num_cps)
|
||||||
|
iter_thread.call(self.ctx)
|
||||||
|
|
||||||
cp = BaseGenome()
|
class Features(object):
|
||||||
flam3_interpolate(self.frame.genomes, len(self.genomes), time, 0,
|
"""
|
||||||
byref(cp))
|
Determine features and constants required to render a particular set of
|
||||||
filt = Filters(self.frame, cp)
|
genomes. The values of this class are fixed before compilation begins.
|
||||||
rw = cp.spatial_oversample * cp.width + 2 * filt.gutter
|
"""
|
||||||
rh = cp.spatial_oversample * cp.height + 2 * filt.gutter
|
# Constant; number of rounds spent fusing points on first CP of a frame
|
||||||
|
num_fuse_samples = 25
|
||||||
|
|
||||||
# Allocate buckets, accumulator
|
def __init__(self, genomes):
|
||||||
# Loop over all batches:
|
self.max_ntemporal_samples = max(
|
||||||
# [density estimation]
|
[cp.nbatches * cp.ntemporal_samples for cp in genomes]) + 1
|
||||||
# Loop over all temporal samples:
|
|
||||||
# Color scalar = temporal filter at index
|
|
||||||
# Interpolate and get control point
|
|
||||||
# Precalculate
|
|
||||||
# Prepare xforms
|
|
||||||
# Compute colormap
|
|
||||||
# Run iterations
|
|
||||||
# Accumulate vibrancy, gamma, background
|
|
||||||
# Calculate k1, k2
|
|
||||||
# If not DE, then do log filtering to accumulator
|
|
||||||
# Else, [density estimation]
|
|
||||||
# Do final clip and filter
|
|
||||||
|
|
||||||
# For now:
|
|
||||||
# Loop over all batches:
|
|
||||||
# Loop over all temporal samples:
|
|
||||||
# Interpolate and get control point
|
|
||||||
# Read the
|
|
||||||
# Dump noise into buckets
|
|
||||||
# Do log filtering to accumulator
|
|
||||||
# Do simplified final clip
|
|
||||||
|
|
||||||
class Filters(object):
|
class Filters(object):
|
||||||
def __init__(self, frame, cp):
|
def __init__(self, frame, cp):
|
||||||
@ -115,7 +165,7 @@ class Camera(object):
|
|||||||
scale = 2.0 ** cp.zoom
|
scale = 2.0 ** cp.zoom
|
||||||
self.sample_density = cp.sample_density * scale * scale
|
self.sample_density = cp.sample_density * scale * scale
|
||||||
|
|
||||||
center = Point(cp.center[0], cp.center[1])
|
center = Point(cp._center[0], cp._center[1])
|
||||||
size = Point(cp.width, cp.height)
|
size = Point(cp.width, cp.height)
|
||||||
# pix per unit, where 'unit' is '1.0' in IFS space
|
# pix per unit, where 'unit' is '1.0' in IFS space
|
||||||
self.ppu = Point(
|
self.ppu = Point(
|
||||||
@ -129,4 +179,3 @@ class Camera(object):
|
|||||||
self.ifs_space_size = 1.0 / (self.upper_bounds - self.lower_bounds)
|
self.ifs_space_size = 1.0 / (self.upper_bounds - self.lower_bounds)
|
||||||
# TODO: coordinate transforms in concert with GPU (rotation, size)
|
# TODO: coordinate transforms in concert with GPU (rotation, size)
|
||||||
|
|
||||||
|
|
||||||
|
5
main.py
5
main.py
@ -25,15 +25,14 @@ def main(args):
|
|||||||
verbose = 1
|
verbose = 1
|
||||||
if '-d' in args:
|
if '-d' in args:
|
||||||
verbose = 3
|
verbose = 3
|
||||||
ctx = LaunchContext([IterThread], block=(256,1,1), grid=(64,1), tests=True)
|
|
||||||
ctx.compile(verbose=verbose)
|
|
||||||
ctx.run_tests()
|
|
||||||
|
|
||||||
with open(args[-1]) as fp:
|
with open(args[-1]) as fp:
|
||||||
genomes = Genome.from_string(fp.read())
|
genomes = Genome.from_string(fp.read())
|
||||||
anim = Animation(genomes)
|
anim = Animation(genomes)
|
||||||
|
anim.compile()
|
||||||
anim.render_frame()
|
anim.render_frame()
|
||||||
|
|
||||||
|
|
||||||
#genome.width, genome.height = 512, 512
|
#genome.width, genome.height = 512, 512
|
||||||
#genome.sample_density = 1000
|
#genome.sample_density = 1000
|
||||||
#obuf, stats, frame = genome.render(estimator=3)
|
#obuf, stats, frame = genome.render(estimator=3)
|
||||||
|
Loading…
Reference in New Issue
Block a user