Populate arch by default; add --keep

2026-02-18 17:20:44 -05:00 · 2017-04-24 16:39:15 -07:00
parent 582221dd0f
commit 9892acbc7f
3 changed files with 13 additions and 7 deletions
--- a/cuburn/code/util.py
+++ b/cuburn/code/util.py
@ -96,7 +96,7 @@ def assemble_code(*libs):
 DEFAULT_CMP_OPTIONS = ('-use_fast_math', '-lineinfo')
 DEFAULT_SAVE_KERNEL = True
 def compile(name, src, opts=DEFAULT_CMP_OPTIONS, save=DEFAULT_SAVE_KERNEL,
-            arch=None):
+            arch=None, keep=False):
    """
    Compile a module. Returns a copy of the source (for inspection or
    display) and the compiled cubin.
@ -105,7 +105,8 @@ def compile(name, src, opts=DEFAULT_CMP_OPTIONS, save=DEFAULT_SAVE_KERNEL,
    if save:
        with open(os.path.join(dir, name + '_kern.cu'), 'w') as fp:
            fp.write(src)
-    cubin = pycuda.compiler.compile(src, options=list(opts), arch=arch)
+    cubin = pycuda.compiler.compile(src, options=list(opts), arch=arch,
+                                    keep=keep)
    if save:
        with open(os.path.join(dir, name + '_kern.cubin'), 'w') as fp:
            fp.write(cubin)
--- a/cuburn/render.py
+++ b/cuburn/render.py
@ -218,9 +218,9 @@ class Renderer(object):
    _modrefs = {}

    @classmethod
-    def compile(cls, gnm, arch=None):
+    def compile(cls, gnm, arch=None, keep=False):
        packer, lib = iter.mkiterlib(gnm)
-        cubin = util.compile('iter', assemble_code(lib), arch=arch)
+        cubin = util.compile('iter', assemble_code(lib), arch=arch, keep=keep)
        return packer, lib, cubin

    def load(self, cubin):
@ -232,8 +232,8 @@ class Renderer(object):
        self._modrefs[cubin] = mod
        return mod

-    def __init__(self, gnm, gprof):
-        self.packer, self.lib, self.cubin = self.compile(gnm)
+    def __init__(self, gnm, gprof, keep=False, arch=None):
+        self.packer, self.lib, self.cubin = self.compile(gnm, keep=keep, arch=arch)
        self.mod = self.load(self.cubin)
        self.filts = filters.create(gprof)
        self.out = output.get_output_for_profile(gprof)
--- a/main.py
+++ b/main.py
@ -125,7 +125,10 @@ def main(args, prof):

    try:
      rmgr = render.RenderManager()
-      rdr = render.Renderer(gnm, gprof)
+      arch = 'sm_{}{}'.format(
+          dev.get_attribute(cuda.device_attribute.COMPUTE_CAPABILITY_MAJOR),
+          dev.get_attribute(cuda.device_attribute.COMPUTE_CAPABILITY_MINOR))
+      rdr = render.Renderer(gnm, gprof, keep=args.keep, arch=arch)

      def render_iter():
          m = os.path.getmtime(args.flame)
@ -191,6 +194,8 @@ if __name__ == "__main__":
        help="Print the blended animation and exit.")
    parser.add_argument('--device', metavar='NUM', type=int,
                        help="GPU device number to use (from nvidia-smi).")
+    parser.add_argument('--keep', action='store_true',
+                        help="Keep compiled kernels to help with profiling")
    profile.add_args(parser)

    args = parser.parse_args()