diff --git a/cuburn/code/util.py b/cuburn/code/util.py index b1e2dd0..a12bcea 100644 --- a/cuburn/code/util.py +++ b/cuburn/code/util.py @@ -96,7 +96,7 @@ def assemble_code(*libs): DEFAULT_CMP_OPTIONS = ('-use_fast_math', '-lineinfo') DEFAULT_SAVE_KERNEL = True def compile(name, src, opts=DEFAULT_CMP_OPTIONS, save=DEFAULT_SAVE_KERNEL, - arch=None): + arch=None, keep=False): """ Compile a module. Returns a copy of the source (for inspection or display) and the compiled cubin. @@ -105,7 +105,8 @@ def compile(name, src, opts=DEFAULT_CMP_OPTIONS, save=DEFAULT_SAVE_KERNEL, if save: with open(os.path.join(dir, name + '_kern.cu'), 'w') as fp: fp.write(src) - cubin = pycuda.compiler.compile(src, options=list(opts), arch=arch) + cubin = pycuda.compiler.compile(src, options=list(opts), arch=arch, + keep=keep) if save: with open(os.path.join(dir, name + '_kern.cubin'), 'w') as fp: fp.write(cubin) diff --git a/cuburn/render.py b/cuburn/render.py index 41fb2ed..7405fa2 100644 --- a/cuburn/render.py +++ b/cuburn/render.py @@ -218,9 +218,9 @@ class Renderer(object): _modrefs = {} @classmethod - def compile(cls, gnm, arch=None): + def compile(cls, gnm, arch=None, keep=False): packer, lib = iter.mkiterlib(gnm) - cubin = util.compile('iter', assemble_code(lib), arch=arch) + cubin = util.compile('iter', assemble_code(lib), arch=arch, keep=keep) return packer, lib, cubin def load(self, cubin): @@ -232,8 +232,8 @@ class Renderer(object): self._modrefs[cubin] = mod return mod - def __init__(self, gnm, gprof): - self.packer, self.lib, self.cubin = self.compile(gnm) + def __init__(self, gnm, gprof, keep=False, arch=None): + self.packer, self.lib, self.cubin = self.compile(gnm, keep=keep, arch=arch) self.mod = self.load(self.cubin) self.filts = filters.create(gprof) self.out = output.get_output_for_profile(gprof) diff --git a/main.py b/main.py index 6abfb65..a38830d 100755 --- a/main.py +++ b/main.py @@ -125,7 +125,10 @@ def main(args, prof): try: rmgr = render.RenderManager() - rdr = render.Renderer(gnm, gprof) + arch = 'sm_{}{}'.format( + dev.get_attribute(cuda.device_attribute.COMPUTE_CAPABILITY_MAJOR), + dev.get_attribute(cuda.device_attribute.COMPUTE_CAPABILITY_MINOR)) + rdr = render.Renderer(gnm, gprof, keep=args.keep, arch=arch) def render_iter(): m = os.path.getmtime(args.flame) @@ -191,6 +194,8 @@ if __name__ == "__main__": help="Print the blended animation and exit.") parser.add_argument('--device', metavar='NUM', type=int, help="GPU device number to use (from nvidia-smi).") + parser.add_argument('--keep', action='store_true', + help="Keep compiled kernels to help with profiling") profile.add_args(parser) args = parser.parse_args()