mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
instmethod decorator: another hack (to get around ctx.ptx.instances[])
This commit is contained in:
parent
094890c324
commit
1f7b00b61e
6
bench.py
6
bench.py
@ -7,7 +7,7 @@ Various micro-benchmarks and other experiments.
|
||||
import numpy as np
|
||||
import pycuda.autoinit
|
||||
import pycuda.driver as cuda
|
||||
from cuburnlib.ptx import PTXFragment, PTXTest, ptx_func
|
||||
from cuburnlib.ptx import PTXFragment, PTXTest, ptx_func, instmethod
|
||||
from cuburnlib.cuda import LaunchContext
|
||||
from cuburnlib.device_code import MWCRNG
|
||||
|
||||
@ -104,7 +104,7 @@ class L2WriteCombining(PTXTest):
|
||||
op.setp.ge.u32(p_done, x, 2)
|
||||
op.bra.uni(l2_restart, ifnotp=p_done)
|
||||
|
||||
|
||||
@instmethod
|
||||
def call(self, ctx):
|
||||
scratch = np.zeros(self.block_size*ctx.ctas/4, np.uint64)
|
||||
times_bytes = np.zeros((4, ctx.threads), np.uint64, 'F')
|
||||
@ -137,7 +137,7 @@ def main():
|
||||
ctx = LaunchContext([L2WriteCombining], block=(128,1,1), grid=(7*8,1),
|
||||
tests=True)
|
||||
ctx.compile(verbose=3)
|
||||
ctx.ptx.instances[L2WriteCombining].call(ctx)
|
||||
L2WriteCombining.call(ctx)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -130,6 +130,7 @@ class IterThread(PTXTest):
|
||||
std.store_per_thread(g_num_rounds, num_rounds)
|
||||
std.store_per_thread(g_num_writes, num_writes)
|
||||
|
||||
@instmethod
|
||||
def upload_cp_stream(self, ctx, cp_stream, num_cps):
|
||||
cp_array_dp, cp_array_l = ctx.mod.get_global('g_cp_array')
|
||||
assert len(cp_stream) <= cp_array_l, "Stream too big!"
|
||||
@ -139,6 +140,7 @@ class IterThread(PTXTest):
|
||||
cuda.memset_d32(num_cps_dp, num_cps, 1)
|
||||
self.cps_uploaded = True
|
||||
|
||||
@instmethod
|
||||
def call(self, ctx):
|
||||
if not self.cps_uploaded:
|
||||
raise Error("Cannot call IterThread before uploading CPs")
|
||||
|
@ -500,6 +500,9 @@ class PTXFragment(object):
|
||||
An object containing PTX DSL functions. The object, and all its
|
||||
dependencies, will be instantiated by a PTX module. Each object will be
|
||||
bound to the name given by ``shortname`` in the DSL namespace.
|
||||
|
||||
Because of the instantiation weirdness, use the instmethod decorator on
|
||||
instance methods that will be called from regular Python code.
|
||||
"""
|
||||
|
||||
# Name under which to make this code available in ptx_funcs
|
||||
@ -575,6 +578,17 @@ class PTXFragment(object):
|
||||
"""
|
||||
pass
|
||||
|
||||
def instmethod(func):
|
||||
"""
|
||||
Wrapper to allow instances to be retrieved from an active context. Use it
|
||||
on methods which depend on state created during a compilation phase, but
|
||||
are intended to be called from normal Python code.
|
||||
"""
|
||||
def wrap(cls, ctx, *args, **kwargs):
|
||||
inst = ctx.ptx.instances[cls]
|
||||
func(inst, ctx, *args, **kwargs)
|
||||
return classmethod(wrap)
|
||||
|
||||
class PTXEntryPoint(PTXFragment):
|
||||
# Human-readable entry point name
|
||||
name = ""
|
||||
@ -591,6 +605,7 @@ class PTXEntryPoint(PTXFragment):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@instmethod
|
||||
def call(self, ctx):
|
||||
"""
|
||||
Calls the entry point on the device. Haven't worked out the details
|
||||
@ -819,7 +834,6 @@ class PTXModule(object):
|
||||
print '\n'.join(["%03d %s" % (i+1, l) for (i, l) in
|
||||
enumerate(self.source.split('\n'))])
|
||||
|
||||
|
||||
def _flatten(val):
|
||||
if isinstance(val, (list, tuple)):
|
||||
return ''.join(map(_flatten, val))
|
||||
@ -883,7 +897,7 @@ class DataStream(PTXFragment):
|
||||
>>> class ExampleDataStream(DataStream):
|
||||
>>> shortname = "ex"
|
||||
|
||||
Inside DSL functions, you can "retrieve" arbitrary Python expressions from
|
||||
Inside DSL functions, you can retrieve arbitrary Python expressions from
|
||||
the data stream.
|
||||
|
||||
>>> @ptx_func
|
||||
@ -892,22 +906,17 @@ class DataStream(PTXFragment):
|
||||
>>> op.mov.u32(regA, some_device_allocation_base_address)
|
||||
>>> # From the structure at the base address in 'regA', load the value
|
||||
>>> # of 'ctx.nthreads' into reg1
|
||||
>>> ex.get(regA, reg1, 'ctx.nthreads')
|
||||
>>> ex.get(regA, reg1, 'ctx.nthreads+padding')
|
||||
|
||||
The expressions will be stored as strings and mapped to particular
|
||||
positions in the struct. Later, the expressions will be evaluated and
|
||||
coerced into a type matching the destination register:
|
||||
|
||||
>>> # Fish the instance holding the data stream from the compiled module
|
||||
>>> ex_stream = launch_context.ptx.instances[ExampleDataStream]
|
||||
>>> # Evaluate the expressions in the current namespace, augmented with the
|
||||
>>> # supplied objects
|
||||
>>> data = ex_stream.pack(ctx=launch_context)
|
||||
>>> data = ExampleDataStream.pack(ctx, padding=4)
|
||||
|
||||
Expressions will be aligned and may be reused in such a way as to minimize
|
||||
access times when taking device caching into account. This also implies
|
||||
that the evaluated expressions should not modify any state, but that should
|
||||
be obvious, no?
|
||||
that the evaluated expressions should not modify any state.
|
||||
|
||||
>>> @ptx_func
|
||||
>>> def example_func_2():
|
||||
@ -1034,7 +1043,8 @@ class DataStream(PTXFragment):
|
||||
for dv in self.size_delayvars:
|
||||
dv.val = self._size
|
||||
|
||||
def pack(self, _out_file_ = None, **kwargs):
|
||||
@instmethod
|
||||
def pack(self, ctx, _out_file_ = None, **kwargs):
|
||||
"""
|
||||
Evaluates all statements in the context of **kwargs. Take this code,
|
||||
presumably inside a PTX func::
|
||||
@ -1043,25 +1053,31 @@ class DataStream(PTXFragment):
|
||||
|
||||
To pack this into a struct, call this method on an instance:
|
||||
|
||||
>>> ex_stream = launch_context.ptx.instances[ExampleDataStream]
|
||||
>>> data = ex_stream.pack(frob=4, xyz=xyz)
|
||||
>>> data = ExampleDataStream.pack(ctx, frob=4, xyz=xyz)
|
||||
|
||||
This evaluates each Python expression from the stream with the provided
|
||||
arguments as locals, coerces it to the appropriate type, and returns
|
||||
the resulting structure as a string.
|
||||
|
||||
The supplied LaunchContext is added to the namespace as ``ctx`` by
|
||||
default. To supress, this, override ``ctx`` in the keyword arguments:
|
||||
|
||||
>>> data = ExampleDataStream.pack(ctx, frob=5, xyz=xyz, ctx=None)
|
||||
"""
|
||||
out = StringIO()
|
||||
self.pack_into(out, kwargs)
|
||||
cls.pack_into(out, kwargs)
|
||||
return out.read()
|
||||
|
||||
def pack_into(self, outfile, **kwargs):
|
||||
@instmethod
|
||||
def pack_into(self, ctx, outfile, **kwargs):
|
||||
"""
|
||||
Like pack(), but write data to a file-like object at the file's current
|
||||
offset instead of returning it as a string.
|
||||
|
||||
>>> ex_stream.pack_into(strio_inst, frob=4, xyz=thing)
|
||||
>>> ex_stream.pack_into(strio_inst, frob=6, xyz=another_thing)
|
||||
>>> ex_stream.pack_into(ctx, strio_inst, frob=4, xyz=thing)
|
||||
>>> ex_stream.pack_into(ctx, strio_inst, frob=6, xyz=another_thing)
|
||||
"""
|
||||
kwargs.setdefault('ctx', ctx)
|
||||
for offset, size, texp in self.cells:
|
||||
if texp:
|
||||
type = texp.type
|
||||
@ -1071,7 +1087,8 @@ class DataStream(PTXFragment):
|
||||
vals = []
|
||||
outfile.write(struct.pack(type, *vals))
|
||||
|
||||
def print_record(self):
|
||||
@instmethod
|
||||
def print_record(self, ctx):
|
||||
for cell in self.cells:
|
||||
if cell.texp is None:
|
||||
print '%3d %2d --' % (cell.offset, cell.size)
|
||||
|
@ -35,10 +35,9 @@ class Frame(pyflam3.Frame):
|
||||
"Distribution of a CP across multiple CTAs not yet done")
|
||||
# Interpolate each time step, calculate per-step variables, and pack
|
||||
# into the stream
|
||||
cp_streamer = ctx.ptx.instances[CPDataStream]
|
||||
stream = StringIO()
|
||||
print "Data stream contents:"
|
||||
cp_streamer.print_record()
|
||||
CPDataStream.print_record(ctx)
|
||||
tcp = BaseGenome()
|
||||
for batch_idx in range(cp.nbatches):
|
||||
for time_idx in range(cp.ntemporal_samples):
|
||||
@ -51,10 +50,8 @@ class Frame(pyflam3.Frame):
|
||||
cp.width * cp.height) / (
|
||||
cp.nbatches * cp.ntemporal_samples)
|
||||
|
||||
cp_streamer.pack_into(stream,
|
||||
frame=self,
|
||||
cp=tcp,
|
||||
cp_idx=idx)
|
||||
CPDataStream.pack_into(ctx, stream,
|
||||
frame=self, cp=tcp, cp_idx=idx)
|
||||
stream.seek(0)
|
||||
return (stream.read(), cp.nbatches * cp.ntemporal_samples)
|
||||
|
||||
@ -108,8 +105,8 @@ class Animation(object):
|
||||
# TODO: allow animation-long override of certain parameters (size, etc)
|
||||
cp_stream, num_cps = self.frame.pack_stream(self.ctx, time)
|
||||
iter_thread = self.ctx.ptx.instances[IterThread]
|
||||
iter_thread.upload_cp_stream(self.ctx, cp_stream, num_cps)
|
||||
iter_thread.call(self.ctx)
|
||||
IterThread.upload_cp_stream(self.ctx, cp_stream, num_cps)
|
||||
IterThread.call(self.ctx)
|
||||
|
||||
class Features(object):
|
||||
"""
|
||||
|
Loading…
Reference in New Issue
Block a user