mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
* Fix deptrace typos
* Add predicate support to DeviceStream fetches, making them even uglier * Add `store_per_thread` to PTX stdlib
This commit is contained in:
parent
a68fc064a1
commit
2c26ff9ab6
@ -634,6 +634,16 @@ class _PTXStdLib(PTXFragment):
|
|||||||
op.add.u32(gtid, gtid, tid)
|
op.add.u32(gtid, gtid, tid)
|
||||||
op.mov.b32(dst, gtid)
|
op.mov.b32(dst, gtid)
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def _store_per_thread(self, base, val):
|
||||||
|
"""Store b32 at `base+gtid*4`. Super-common debug pattern."""
|
||||||
|
with block("Per-thread store of %s" % str(val)):
|
||||||
|
reg.u32('spt_base spt_offset')
|
||||||
|
op.mov.u32(spt_base, base)
|
||||||
|
get_gtid(spt_offset)
|
||||||
|
op.mad.lo.u32(spt_base, spt_offset, 4, spt_base)
|
||||||
|
op.st.b32(addr(spt_base), val)
|
||||||
|
|
||||||
def to_inject(self):
|
def to_inject(self):
|
||||||
return dict(
|
return dict(
|
||||||
_block=self.block,
|
_block=self.block,
|
||||||
@ -645,7 +655,8 @@ class _PTXStdLib(PTXFragment):
|
|||||||
vec=Mem.vec,
|
vec=Mem.vec,
|
||||||
label=_LabelFactory(self.block),
|
label=_LabelFactory(self.block),
|
||||||
comment=Comment(self.block),
|
comment=Comment(self.block),
|
||||||
get_gtid=self._get_gtid)
|
get_gtid=self._get_gtid,
|
||||||
|
store_per_thread=self._store_per_thread)
|
||||||
|
|
||||||
class PTXModule(object):
|
class PTXModule(object):
|
||||||
"""
|
"""
|
||||||
@ -747,7 +758,7 @@ class PTXModule(object):
|
|||||||
if test not in tests:
|
if test not in tests:
|
||||||
tests.add(test)
|
tests.add(test)
|
||||||
if test not in instances:
|
if test not in instances:
|
||||||
unvisisted_entries.append(tests)
|
unvisited_entries.append(test)
|
||||||
# For this entry, store insts of all dependencies in order.
|
# For this entry, store insts of all dependencies in order.
|
||||||
entry_deps[ent] = self.deporder(map(instances.get, seen),
|
entry_deps[ent] = self.deporder(map(instances.get, seen),
|
||||||
instances)
|
instances)
|
||||||
@ -954,7 +965,7 @@ class DataStream(object):
|
|||||||
return self.offset
|
return self.offset
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def _stream_get_internal(self, areg, dregs, exprs):
|
def _stream_get_internal(self, areg, dregs, exprs, ifp, ifnotp):
|
||||||
size, type = self._get_type(dregs)
|
size, type = self._get_type(dregs)
|
||||||
vsize = size * len(dregs)
|
vsize = size * len(dregs)
|
||||||
texp = _TExp(type, [expr])
|
texp = _TExp(type, [expr])
|
||||||
@ -966,19 +977,24 @@ class DataStream(object):
|
|||||||
vtype = {1: '', 2: '.v2', 4: '.v4'}.get(len(dregs))
|
vtype = {1: '', 2: '.v2', 4: '.v4'}.get(len(dregs))
|
||||||
if len(dregs) > 0:
|
if len(dregs) > 0:
|
||||||
dregs = vec(dregs)
|
dregs = vec(dregs)
|
||||||
op._call('ldu%s.b%d' % (vtype, size), dregs, addr(areg+off))
|
op._call('ldu%s.b%d' % (vtype, size), dregs, addr(areg+off),
|
||||||
|
ifp=ifp, ifnotp=ifnotp)
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def _stream_get(self, areg, dreg, expr):
|
def _stream_get(self, areg, dreg, expr, ifp=None, ifnotp=None):
|
||||||
self._stream_get_internal(areg, [dreg], [expr])
|
self._stream_get_internal(areg, [dreg], [expr], ifp, ifnotp)
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def _stream_get_v2(self, areg, dreg1, expr1, dreg2, expr2):
|
def _stream_get_v2(self, areg, dreg1, expr1, dreg2, expr2,
|
||||||
self._stream_get_internal(areg, [dreg1, dreg2], [expr1, expr2])
|
ifp=None, ifnotp=None):
|
||||||
|
self._stream_get_internal(areg, [dreg1, dreg2], [expr1, expr2],
|
||||||
|
ifp, ifnotp)
|
||||||
|
|
||||||
@ptx_func
|
@ptx_func
|
||||||
def _stream_get_v2(self, areg, d1, e1, d2, e2, d3, e3, d4, e4):
|
def _stream_get_v2(self, areg, d1, e1, d2, e2, d3, e3, d4, e4,
|
||||||
self._stream_get_internal(areg, [d1, d2, d3, d4], [e1, e2, e3, e4])
|
ifp=None, ifnotp=None):
|
||||||
|
self._stream_get_internal(areg, [d1, d2, d3, d4], [e1, e2, e3, e4],
|
||||||
|
ifp, ifnotp)
|
||||||
|
|
||||||
def _stream_size(self):
|
def _stream_size(self):
|
||||||
return self.size_strvar
|
return self.size_strvar
|
||||||
|
Loading…
Reference in New Issue
Block a user