From 8c7db9d0fcb5669b0aa0ea32ff28b5ea4156beda Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Sun, 20 May 2012 13:05:28 -0700 Subject: [PATCH] Changes to CUDA module loading Modules may (once again) be compiled and loaded in separate stages, including compiling without having a CUDA context on hand. Also, modules will be reused if they are already loaded. --- cuburn/render.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/cuburn/render.py b/cuburn/render.py index 8ffc5cc..e70c4de 100644 --- a/cuburn/render.py +++ b/cuburn/render.py @@ -200,17 +200,26 @@ class Renderer(object): # asynchronous, and avoid expensive CPU polling, this hangs on to # a number of (relatively small) CUDA modules and flushes them together. MAX_MODREFS = 20 - _modrefs = [] + _modrefs = {} + + @classmethod + def compile(cls, gnm, arch=None): + packer, lib = iter.mkiterlib(gnm) + cubin = util.compile('iter', assemble_code(lib), arch=arch) + return packer, lib, cubin + + def load(self, cubin): + if cubin in self._modrefs: + return self._modrefs[cubin] + mod = cuda.module_from_buffer(self.cubin) + if len(self._modrefs) > self.MAX_MODREFS: + self._modrefs.clear() + self._modrefs[cubin] = mod + return mod def __init__(self, gnm, gprof): - self.packer, self.lib = iter.mkiterlib(gnm) - cubin = util.compile('iter', assemble_code(self.lib)) - self.mod = cuda.module_from_buffer(cubin) - - if len(self._modrefs) > self.MAX_MODREFS: - del self._modrefs[:] - self._modrefs.append(self.mod) - + self.packer, self.lib, self.cubin = self.compile(gnm) + self.mod = self.load(self.cubin) self.filts = filters.create(gprof) self.out = output.PILOutput()