* Fix deptrace typos

* Add predicate support to DeviceStream fetches, making them even uglier
* Add `store_per_thread` to PTX stdlib
This commit is contained in:
Steven Robertson 2010-09-03 00:51:23 -04:00
parent a68fc064a1
commit 2c26ff9ab6

View File

@ -634,6 +634,16 @@ class _PTXStdLib(PTXFragment):
op.add.u32(gtid, gtid, tid) op.add.u32(gtid, gtid, tid)
op.mov.b32(dst, gtid) op.mov.b32(dst, gtid)
@ptx_func
def _store_per_thread(self, base, val):
"""Store b32 at `base+gtid*4`. Super-common debug pattern."""
with block("Per-thread store of %s" % str(val)):
reg.u32('spt_base spt_offset')
op.mov.u32(spt_base, base)
get_gtid(spt_offset)
op.mad.lo.u32(spt_base, spt_offset, 4, spt_base)
op.st.b32(addr(spt_base), val)
def to_inject(self): def to_inject(self):
return dict( return dict(
_block=self.block, _block=self.block,
@ -645,7 +655,8 @@ class _PTXStdLib(PTXFragment):
vec=Mem.vec, vec=Mem.vec,
label=_LabelFactory(self.block), label=_LabelFactory(self.block),
comment=Comment(self.block), comment=Comment(self.block),
get_gtid=self._get_gtid) get_gtid=self._get_gtid,
store_per_thread=self._store_per_thread)
class PTXModule(object): class PTXModule(object):
""" """
@ -747,7 +758,7 @@ class PTXModule(object):
if test not in tests: if test not in tests:
tests.add(test) tests.add(test)
if test not in instances: if test not in instances:
unvisisted_entries.append(tests) unvisited_entries.append(test)
# For this entry, store insts of all dependencies in order. # For this entry, store insts of all dependencies in order.
entry_deps[ent] = self.deporder(map(instances.get, seen), entry_deps[ent] = self.deporder(map(instances.get, seen),
instances) instances)
@ -954,7 +965,7 @@ class DataStream(object):
return self.offset return self.offset
@ptx_func @ptx_func
def _stream_get_internal(self, areg, dregs, exprs): def _stream_get_internal(self, areg, dregs, exprs, ifp, ifnotp):
size, type = self._get_type(dregs) size, type = self._get_type(dregs)
vsize = size * len(dregs) vsize = size * len(dregs)
texp = _TExp(type, [expr]) texp = _TExp(type, [expr])
@ -966,19 +977,24 @@ class DataStream(object):
vtype = {1: '', 2: '.v2', 4: '.v4'}.get(len(dregs)) vtype = {1: '', 2: '.v2', 4: '.v4'}.get(len(dregs))
if len(dregs) > 0: if len(dregs) > 0:
dregs = vec(dregs) dregs = vec(dregs)
op._call('ldu%s.b%d' % (vtype, size), dregs, addr(areg+off)) op._call('ldu%s.b%d' % (vtype, size), dregs, addr(areg+off),
ifp=ifp, ifnotp=ifnotp)
@ptx_func @ptx_func
def _stream_get(self, areg, dreg, expr): def _stream_get(self, areg, dreg, expr, ifp=None, ifnotp=None):
self._stream_get_internal(areg, [dreg], [expr]) self._stream_get_internal(areg, [dreg], [expr], ifp, ifnotp)
@ptx_func @ptx_func
def _stream_get_v2(self, areg, dreg1, expr1, dreg2, expr2): def _stream_get_v2(self, areg, dreg1, expr1, dreg2, expr2,
self._stream_get_internal(areg, [dreg1, dreg2], [expr1, expr2]) ifp=None, ifnotp=None):
self._stream_get_internal(areg, [dreg1, dreg2], [expr1, expr2],
ifp, ifnotp)
@ptx_func @ptx_func
def _stream_get_v2(self, areg, d1, e1, d2, e2, d3, e3, d4, e4): def _stream_get_v2(self, areg, d1, e1, d2, e2, d3, e3, d4, e4,
self._stream_get_internal(areg, [d1, d2, d3, d4], [e1, e2, e3, e4]) ifp=None, ifnotp=None):
self._stream_get_internal(areg, [d1, d2, d3, d4], [e1, e2, e3, e4],
ifp, ifnotp)
def _stream_size(self): def _stream_size(self):
return self.size_strvar return self.size_strvar