mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Um, missed this file. Also, just fixed an obvious memory bug.
This commit is contained in:
parent
e68d6fca84
commit
1dad09fc03
166
cuburn/code/util.py
Normal file
166
cuburn/code/util.py
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
"""
|
||||||
|
Provides tools and miscellaneous functions for building device code.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import tempita
|
||||||
|
|
||||||
|
class HunkOCode(object):
|
||||||
|
"""An apparently passive container for device code."""
|
||||||
|
# Use property objects to make these dynamic
|
||||||
|
headers = ''
|
||||||
|
decls = ''
|
||||||
|
defs = ''
|
||||||
|
|
||||||
|
def assemble_code(*sections):
|
||||||
|
return '\n'.join(['\n'.join([getattr(sect, kind) for sect in sections])
|
||||||
|
for kind in ['headers', 'decls', 'defs']])
|
||||||
|
|
||||||
|
def apply_affine(x, y, xo, yo, packer, base_accessor, base_name):
|
||||||
|
return tempita.Template("""
|
||||||
|
{{xo}} = {{packer.get(ba + '[0][0]', bn + '_xx')}} * {{x}}
|
||||||
|
+ {{packer.get(ba + '[1][0]', bn + '_xy')}} * {{y}}
|
||||||
|
+ {{packer.get(ba + '[2][0]', bn + '_xo')}};
|
||||||
|
{{yo}} = {{packer.get(ba + '[0][1]', bn + '_yx')}} * {{x}}
|
||||||
|
+ {{packer.get(ba + '[1][1]', bn + '_yy')}} * {{y}}
|
||||||
|
+ {{packer.get(ba + '[2][1]', bn + '_yo')}};
|
||||||
|
""").substitute(x=x, y=y, xo=xo, yo=yo, packer=packer,
|
||||||
|
ba=base_accessor, bn=base_name)
|
||||||
|
|
||||||
|
class BaseCode(HunkOCode):
|
||||||
|
headers = """
|
||||||
|
#include<cuda.h>
|
||||||
|
#include<stdint.h>
|
||||||
|
"""
|
||||||
|
|
||||||
|
defs = """
|
||||||
|
// TODO: use launch parameter preconfig to eliminate unnecessary parts
|
||||||
|
__device__
|
||||||
|
uint32_t gtid() {
|
||||||
|
return threadIdx.x + blockDim.x *
|
||||||
|
(threadIdx.y + blockDim.y *
|
||||||
|
(threadIdx.z + blockDim.z *
|
||||||
|
(blockIdx.x + (gridDim.x * blockIdx.y))));
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
class DataPackerView(object):
|
||||||
|
"""
|
||||||
|
View of a data packer. Intended to be initialized using DataPacker.view().
|
||||||
|
|
||||||
|
All views of a data packer share the same stream parameters, such as
|
||||||
|
position and total size, but do not share other parameters, such as the
|
||||||
|
pointer name used in emitted code lookups or the lookup context.
|
||||||
|
"""
|
||||||
|
def __init__(self, packer, ptr, prefix, ns):
|
||||||
|
self.packer, self.ptr, self.prefix, self.ns = packer, ptr, prefix, ns
|
||||||
|
|
||||||
|
def get(self, accessor, name=None):
|
||||||
|
"""
|
||||||
|
Add an access to the stream, returning the formatted load expression
|
||||||
|
for device use. If 'name' is missing, the name components after the
|
||||||
|
final dot in the accessor will be used. Little effort is made to
|
||||||
|
ensure that this is valid C.
|
||||||
|
"""
|
||||||
|
if name is None:
|
||||||
|
name = accessor.rsplit('.', 1)[-1]
|
||||||
|
name = name.replace('[', '_').replace(']', '')
|
||||||
|
name = self.prefix + name
|
||||||
|
self.packer._access(self, accessor, name)
|
||||||
|
return '%s->%s' % (self.ptr, name)
|
||||||
|
|
||||||
|
def sub(self, dst, src):
|
||||||
|
"""Add a substitution to the namespace."""
|
||||||
|
self.ns.append((src, dst))
|
||||||
|
|
||||||
|
def view(self, ptr_name, prefix=''):
|
||||||
|
"""
|
||||||
|
As DataPacker.view(), but preserving the current set of namespace
|
||||||
|
substitutions.
|
||||||
|
"""
|
||||||
|
return DataPackerView(self.packer, ptr_name, prefix, list(self.ns))
|
||||||
|
|
||||||
|
def _apply_subs(self, ns):
|
||||||
|
for s, d in self.ns:
|
||||||
|
ns[d] = eval(s, ns)
|
||||||
|
return ns
|
||||||
|
|
||||||
|
class DataPacker(HunkOCode):
|
||||||
|
"""
|
||||||
|
Packs 32-bit float values into a dynamic data structure, and emits
|
||||||
|
accessors to those data values from device code. Might get fancier in the
|
||||||
|
future, but for now it's incredibly barebones.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, tname, clsize=128):
|
||||||
|
"""
|
||||||
|
Create a new DataPacker.
|
||||||
|
|
||||||
|
``tname`` is the name of the structure typedef that will be emitted
|
||||||
|
via this object's ``decls`` property.
|
||||||
|
|
||||||
|
``clsize`` is the size of a cache line, in bytes. The resulting
|
||||||
|
data structure will be padded to that size.
|
||||||
|
"""
|
||||||
|
self.tname = tname
|
||||||
|
self.clsize = clsize
|
||||||
|
self.packed = {}
|
||||||
|
self.packed_order = []
|
||||||
|
|
||||||
|
def view(self, ptr_name, prefix=''):
|
||||||
|
"""Create a DataPacker view. See DataPackerView class for details."""
|
||||||
|
return DataPackerView(self, ptr_name, prefix, list())
|
||||||
|
|
||||||
|
def _access(self, view, accessor, name):
|
||||||
|
if name in self.packed:
|
||||||
|
pview, paccessor = self.packed[name]
|
||||||
|
if pview == view and (accessor is None or paccessor == accessor):
|
||||||
|
return
|
||||||
|
raise ValueError("Same name, different accessor or view: %s" % name)
|
||||||
|
self.packed[name] = (view, accessor)
|
||||||
|
self.packed_order.append(name)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.packed_order)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def align(self):
|
||||||
|
return (4 * len(self) + self.clsize - 1) / self.clsize * self.clsize
|
||||||
|
|
||||||
|
def pack(self, **kwargs):
|
||||||
|
out = np.zeros(self.align/4, dtype=np.float32)
|
||||||
|
subbed_nses = {}
|
||||||
|
|
||||||
|
for i, name in enumerate(self.packed_order):
|
||||||
|
view, accessor = self.packed[name]
|
||||||
|
if view not in subbed_nses:
|
||||||
|
subbed_nses[view] = view._apply_subs(dict(kwargs))
|
||||||
|
try:
|
||||||
|
val = eval(accessor, subbed_nses[view])
|
||||||
|
except Exception, e:
|
||||||
|
print 'Error while evaluating accessor "%s"' % accessor
|
||||||
|
raise e
|
||||||
|
out[i] = val
|
||||||
|
return out
|
||||||
|
|
||||||
|
@property
|
||||||
|
def decls(self):
|
||||||
|
tmpl = tempita.Template("""
|
||||||
|
typedef struct {
|
||||||
|
|
||||||
|
{{for name, accessor in values}}
|
||||||
|
float {{'%-20s' % name}}; // {{accessor}}
|
||||||
|
{{endfor}}
|
||||||
|
{{if padding > 0}}
|
||||||
|
// Align to fill whole cache lines
|
||||||
|
float padding[{{padding}}];
|
||||||
|
{{endif}}
|
||||||
|
|
||||||
|
} {{tname}};
|
||||||
|
""")
|
||||||
|
return tmpl.substitute(
|
||||||
|
values = [(n, self.packed[n][1]) for n in self.packed_order],
|
||||||
|
padding = len(self) % (self.clsize / 4),
|
||||||
|
tname = self.tname
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user