...except I missed the file that actually contained the new method

2025-12-20 20:17:06 -05:00 · 2010-09-12 14:06:07 -04:00
parent 6ed8907fcb
commit 7ef0d334ca
1 changed files with 14 additions and 0 deletions
--- a/cuburn/cuda.py
+++ b/cuburn/cuda.py
@ -110,3 +110,17 @@ class LaunchContext(object):
                all_okay = False
        return all_okay
    def get_per_thread(self, name, dtype, shaped=False):
        """
        Convenience function to get the contents of the global memory variable
        ``name`` from the device as a numpy array of type ``dtype``, as might
        be stored by _PTXStdLib.store_per_thread. If ``shaped`` is True, the
        array will be 3D, as (cta_no, warp_no, lane_no).
        """
        if shaped:
            shape = (self.nctas, self.warps_per_cta, 32)
        else:
            shape = self.nthreads
        dp, l = self.mod.get_global(name)
        return cuda.from_device(dp, shape, dtype)