mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Remove some dead code
This commit is contained in:
parent
a3660ec6e4
commit
32f68ea1d5
@ -34,7 +34,6 @@ class LaunchContext(object):
|
|||||||
"""
|
"""
|
||||||
def __init__(self, entries, block=(1,1,1), grid=(1,1), seed=None,
|
def __init__(self, entries, block=(1,1,1), grid=(1,1), seed=None,
|
||||||
tests=False):
|
tests=False):
|
||||||
self.devinfo = pycuda.tools.DeviceData()
|
|
||||||
self.entry_types = entries
|
self.entry_types = entries
|
||||||
self.block, self.grid, self.build_tests = block, grid, tests
|
self.block, self.grid, self.build_tests = block, grid, tests
|
||||||
self.rand = np.random.mtrand.RandomState(seed)
|
self.rand = np.random.mtrand.RandomState(seed)
|
||||||
|
@ -175,7 +175,6 @@ class MWCRNG(PTXFragment):
|
|||||||
states = np.array(ctx.rand.randint(1, 0xffffffff, size=2*ctx.threads),
|
states = np.array(ctx.rand.randint(1, 0xffffffff, size=2*ctx.threads),
|
||||||
dtype=np.uint32)
|
dtype=np.uint32)
|
||||||
statedp, statel = ctx.mod.get_global('mwc_rng_state')
|
statedp, statel = ctx.mod.get_global('mwc_rng_state')
|
||||||
print states, len(states.tostring())
|
|
||||||
cuda.memcpy_htod_async(statedp, states.tostring())
|
cuda.memcpy_htod_async(statedp, states.tostring())
|
||||||
self.threads_ready = ctx.threads
|
self.threads_ready = ctx.threads
|
||||||
|
|
||||||
@ -184,7 +183,7 @@ class MWCRNG(PTXFragment):
|
|||||||
|
|
||||||
class MWCRNGTest(PTXTest):
|
class MWCRNGTest(PTXTest):
|
||||||
name = "MWC RNG sum-of-threads"
|
name = "MWC RNG sum-of-threads"
|
||||||
rounds = 10000
|
rounds = 5000
|
||||||
entry_name = 'MWC_RNG_test'
|
entry_name = 'MWC_RNG_test'
|
||||||
entry_params = ''
|
entry_params = ''
|
||||||
|
|
||||||
@ -227,7 +226,7 @@ class MWCRNGTest(PTXTest):
|
|||||||
fullstates = cuda.from_device(statedp, ctx.threads, np.uint64)
|
fullstates = cuda.from_device(statedp, ctx.threads, np.uint64)
|
||||||
sums = np.zeros(ctx.threads, np.uint64)
|
sums = np.zeros(ctx.threads, np.uint64)
|
||||||
|
|
||||||
print "Running states forward %d rounds" % self.rounds
|
print "Running %d states forward %d rounds" % (len(mults), self.rounds)
|
||||||
ctime = time.time()
|
ctime = time.time()
|
||||||
for i in range(self.rounds):
|
for i in range(self.rounds):
|
||||||
states = fullstates & 0xffffffff
|
states = fullstates & 0xffffffff
|
||||||
@ -241,7 +240,6 @@ class MWCRNGTest(PTXTest):
|
|||||||
dtime = func(block=ctx.block, grid=ctx.grid, time_kernel=True)
|
dtime = func(block=ctx.block, grid=ctx.grid, time_kernel=True)
|
||||||
print "Done on device, took %g seconds (%gx)" % (dtime, ctime/dtime)
|
print "Done on device, took %g seconds (%gx)" % (dtime, ctime/dtime)
|
||||||
dfullstates = cuda.from_device(statedp, ctx.threads, np.uint64)
|
dfullstates = cuda.from_device(statedp, ctx.threads, np.uint64)
|
||||||
print dfullstates, fullstates
|
|
||||||
if not (dfullstates == fullstates).all():
|
if not (dfullstates == fullstates).all():
|
||||||
print "State discrepancy"
|
print "State discrepancy"
|
||||||
print dfullstates
|
print dfullstates
|
||||||
@ -250,7 +248,6 @@ class MWCRNGTest(PTXTest):
|
|||||||
|
|
||||||
sumdp, suml = ctx.mod.get_global('mwc_rng_test_sums')
|
sumdp, suml = ctx.mod.get_global('mwc_rng_test_sums')
|
||||||
dsums = cuda.from_device(sumdp, ctx.threads, np.uint64)
|
dsums = cuda.from_device(sumdp, ctx.threads, np.uint64)
|
||||||
print dsums, sums
|
|
||||||
if not (dsums == sums).all():
|
if not (dsums == sums).all():
|
||||||
print "Sum discrepancy"
|
print "Sum discrepancy"
|
||||||
print dsums
|
print dsums
|
||||||
@ -259,30 +256,7 @@ class MWCRNGTest(PTXTest):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
class CameraCoordTransform(PTXFragment):
|
class CameraCoordTransform(PTXFragment):
|
||||||
# This is here until I get the device stream packer going, or decide on
|
# TODO finish
|
||||||
# how to handle C struct addressing if we go for unpacked structures
|
pass
|
||||||
prelude = ".global .u32 camera_coords[8];"
|
|
||||||
|
|
||||||
def _cam_coord_xf(self, x, y, dreg):
|
|
||||||
"""
|
|
||||||
Given `.f32 x, y`, a coordinate in IFS space, writes the integer
|
|
||||||
offset from the start of the sampling lattice into `.u32 dreg`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
return """{
|
|
||||||
.pred is_badval;
|
|
||||||
// TODO: This will change when data streaming is done
|
|
||||||
.reg .u32 camera_coord_address;
|
|
||||||
mov.u32 camera_coord_address, camera_coords;
|
|
||||||
// TODO: see if preloading everything hurts register count
|
|
||||||
.reg .f32 width_scale, width_upper_bound, height_scale, height_upper_bound;
|
|
||||||
ldu.v4.f32 {width_scale, width_upper_bound,
|
|
||||||
height_scale, height_upper_bound},
|
|
||||||
[camera_coord_address+0];
|
|
||||||
.reg .f32 x_xf, y_xf;
|
|
||||||
mad.rz.f32 x_xf, x, width_scale"""
|
|
||||||
# TODO unfinished
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ from collections import namedtuple
|
|||||||
# reg.u32('hooray_reg')
|
# reg.u32('hooray_reg')
|
||||||
# load_zero(hooray_reg)
|
# load_zero(hooray_reg)
|
||||||
#
|
#
|
||||||
# But using blocks alone to track names, it would turn in to this ugliness::
|
# But using blocks alone to track names, it would turn in to this mess::
|
||||||
#
|
#
|
||||||
# def load_zero(block, dest_reg):
|
# def load_zero(block, dest_reg):
|
||||||
# block.op.mov.u32(block.op.dest_reg, 0)
|
# block.op.mov.u32(block.op.dest_reg, 0)
|
||||||
@ -229,9 +229,6 @@ class _PTXFuncWrapper(object):
|
|||||||
func = types.FunctionType(self.func.func_code, newglobals,
|
func = types.FunctionType(self.func.func_code, newglobals,
|
||||||
self.func.func_name, self.func.func_defaults,
|
self.func.func_name, self.func.func_defaults,
|
||||||
self.func.func_closure)
|
self.func.func_closure)
|
||||||
# TODO: if we generate a new dict every time, we can kill the
|
|
||||||
# _BlockInjector and move BI.inject() back to _Block, but I don't want
|
|
||||||
# to delete working code just yet
|
|
||||||
with block.injector(func.func_globals):
|
with block.injector(func.func_globals):
|
||||||
func(*args, **kwargs)
|
func(*args, **kwargs)
|
||||||
|
|
||||||
@ -348,37 +345,6 @@ class _RegFactory(_CallChain):
|
|||||||
self.block.code(op='.reg .' + type, vars=_softjoin(names, ', '))
|
self.block.code(op='.reg .' + type, vars=_softjoin(names, ', '))
|
||||||
[self.block.inject(r.name, r) for r in regs]
|
[self.block.inject(r.name, r) for r in regs]
|
||||||
|
|
||||||
# Pending resolution of the op(regs, guard=x) debate
|
|
||||||
#class Pred(object):
|
|
||||||
#"""
|
|
||||||
#Allows for predicated execution of operations.
|
|
||||||
|
|
||||||
#>>> pred('p_some_test p_another_test')
|
|
||||||
#>>> op.setp.eq.u32(p_some_test, reg1, reg2)
|
|
||||||
#>>> op.setp.and.eq.u32(p_another_test, reg1, reg2, p_some_test)
|
|
||||||
#>>> with p_some_test.is_set():
|
|
||||||
#>>> op.ld.global.u32(reg1, addr(areg))
|
|
||||||
|
|
||||||
#Predication supports nested function calls, and will cover all code
|
|
||||||
#generated inside the predicate block:
|
|
||||||
|
|
||||||
#>>> with p_another_test.is_unset():
|
|
||||||
#>>> some_ptxdsl_function(reg2)
|
|
||||||
#>>> op.st.global.u32(addr(areg), reg2)
|
|
||||||
|
|
||||||
#It is a syntax error to declare registers,
|
|
||||||
#However, multiple predicate blocks cannot be nested. Doing so is a syntax
|
|
||||||
#error.
|
|
||||||
|
|
||||||
#>>> with p_some_test.is_set():
|
|
||||||
#>>> with p_another_test.is_unset():
|
|
||||||
#>>> pass
|
|
||||||
#SyntaxError: ...
|
|
||||||
#"""
|
|
||||||
#def __init__(self, name):
|
|
||||||
#self.name = name
|
|
||||||
#def is_set(self, isnot=False):
|
|
||||||
|
|
||||||
class Op(_CallChain):
|
class Op(_CallChain):
|
||||||
"""
|
"""
|
||||||
Performs an operation.
|
Performs an operation.
|
||||||
@ -470,7 +436,7 @@ class _MemFactory(_CallChain):
|
|||||||
|
|
||||||
class Label(object):
|
class Label(object):
|
||||||
"""
|
"""
|
||||||
Specifies the target for a branch. Scoped in PTX? TODO: test that it is.
|
Specifies the target for a branch.
|
||||||
|
|
||||||
>>> label('infinite_loop')
|
>>> label('infinite_loop')
|
||||||
>>> op.bra.uni('label')
|
>>> op.bra.uni('label')
|
||||||
@ -704,7 +670,6 @@ class PTXModule(object):
|
|||||||
self.__needs_recompilation = False
|
self.__needs_recompilation = False
|
||||||
self.assemble(block, all_deps, entry_deps)
|
self.assemble(block, all_deps, entry_deps)
|
||||||
self.instances.pop(_PTXStdLib)
|
self.instances.pop(_PTXStdLib)
|
||||||
print self.instances
|
|
||||||
|
|
||||||
if not formatter:
|
if not formatter:
|
||||||
formatter = PTXFormatter()
|
formatter = PTXFormatter()
|
||||||
|
Loading…
Reference in New Issue
Block a user