mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-03-15 16:01:29 -04:00
...except I missed the file that actually contained the new method
This commit is contained in:
parent
6ed8907fcb
commit
7ef0d334ca
@ -110,3 +110,17 @@ class LaunchContext(object):
|
|||||||
all_okay = False
|
all_okay = False
|
||||||
return all_okay
|
return all_okay
|
||||||
|
|
||||||
|
def get_per_thread(self, name, dtype, shaped=False):
|
||||||
|
"""
|
||||||
|
Convenience function to get the contents of the global memory variable
|
||||||
|
``name`` from the device as a numpy array of type ``dtype``, as might
|
||||||
|
be stored by _PTXStdLib.store_per_thread. If ``shaped`` is True, the
|
||||||
|
array will be 3D, as (cta_no, warp_no, lane_no).
|
||||||
|
"""
|
||||||
|
if shaped:
|
||||||
|
shape = (self.nctas, self.warps_per_cta, 32)
|
||||||
|
else:
|
||||||
|
shape = self.nthreads
|
||||||
|
dp, l = self.mod.get_global(name)
|
||||||
|
return cuda.from_device(dp, shape, dtype)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user