Changes to CUDA module loading

Modules may (once again) be compiled and loaded in separate stages, including compiling without having a CUDA context on hand. Also, modules will be reused if they are already loaded.
2025-06-10 17:31:31 -04:00 · 2012-05-20 13:05:28 -07:00 · 2012-05-20 13:05:28 -07:00 · 8c7db9d0fc
commit 8c7db9d0fc
parent 5083fefad7
1 changed files with 18 additions and 9 deletions
--- a/cuburn/render.py
+++ b/cuburn/render.py
@ -200,17 +200,26 @@ class Renderer(object):
    # asynchronous, and avoid expensive CPU polling, this hangs on to
    # a number of (relatively small) CUDA modules and flushes them together.
    MAX_MODREFS = 20
-    _modrefs = []
+    _modrefs = {}
    @classmethod
    def compile(cls, gnm, arch=None):
        packer, lib = iter.mkiterlib(gnm)
        cubin = util.compile('iter', assemble_code(lib), arch=arch)
        return packer, lib, cubin
    def load(self, cubin):
        if cubin in self._modrefs:
            return self._modrefs[cubin]
        mod = cuda.module_from_buffer(self.cubin)
        if len(self._modrefs) > self.MAX_MODREFS:
            self._modrefs.clear()
        self._modrefs[cubin] = mod
        return mod
    def __init__(self, gnm, gprof):
-        self.packer, self.lib = iter.mkiterlib(gnm)
+        self.packer, self.lib, self.cubin = self.compile(gnm)
-        cubin = util.compile('iter', assemble_code(self.lib))
+        self.mod = self.load(self.cubin)
        self.mod = cuda.module_from_buffer(cubin)
        if len(self._modrefs) > self.MAX_MODREFS:
            del self._modrefs[:]
        self._modrefs.append(self.mod)
        self.filts = filters.create(gprof)
        self.out = output.PILOutput()