mirror of
				https://github.com/stevenrobertson/cuburn.git
				synced 2025-11-03 18:00:55 -05:00 
			
		
		
		
	Um, missed this file. Also, just fixed an obvious memory bug.
This commit is contained in:
		
							
								
								
									
										166
									
								
								cuburn/code/util.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										166
									
								
								cuburn/code/util.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,166 @@
 | 
				
			|||||||
 | 
					"""
 | 
				
			||||||
 | 
					Provides tools and miscellaneous functions for building device code.
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import numpy as np
 | 
				
			||||||
 | 
					import tempita
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class HunkOCode(object):
 | 
				
			||||||
 | 
					    """An apparently passive container for device code."""
 | 
				
			||||||
 | 
					    # Use property objects to make these dynamic
 | 
				
			||||||
 | 
					    headers = ''
 | 
				
			||||||
 | 
					    decls = ''
 | 
				
			||||||
 | 
					    defs = ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def assemble_code(*sections):
 | 
				
			||||||
 | 
					    return '\n'.join(['\n'.join([getattr(sect, kind) for sect in sections])
 | 
				
			||||||
 | 
					                      for kind in ['headers', 'decls', 'defs']])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def apply_affine(x, y, xo, yo, packer, base_accessor, base_name):
 | 
				
			||||||
 | 
					    return tempita.Template("""
 | 
				
			||||||
 | 
					    {{xo}} = {{packer.get(ba + '[0][0]', bn + '_xx')}} * {{x}}
 | 
				
			||||||
 | 
					           + {{packer.get(ba + '[1][0]', bn + '_xy')}} * {{y}}
 | 
				
			||||||
 | 
					           + {{packer.get(ba + '[2][0]', bn + '_xo')}};
 | 
				
			||||||
 | 
					    {{yo}} = {{packer.get(ba + '[0][1]', bn + '_yx')}} * {{x}}
 | 
				
			||||||
 | 
					           + {{packer.get(ba + '[1][1]', bn + '_yy')}} * {{y}}
 | 
				
			||||||
 | 
					           + {{packer.get(ba + '[2][1]', bn + '_yo')}};
 | 
				
			||||||
 | 
					    """).substitute(x=x, y=y, xo=xo, yo=yo, packer=packer,
 | 
				
			||||||
 | 
					                    ba=base_accessor, bn=base_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class BaseCode(HunkOCode):
 | 
				
			||||||
 | 
					    headers = """
 | 
				
			||||||
 | 
					#include<cuda.h>
 | 
				
			||||||
 | 
					#include<stdint.h>
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    defs = """
 | 
				
			||||||
 | 
					// TODO: use launch parameter preconfig to eliminate unnecessary parts
 | 
				
			||||||
 | 
					__device__
 | 
				
			||||||
 | 
					uint32_t gtid() {
 | 
				
			||||||
 | 
					    return threadIdx.x + blockDim.x *
 | 
				
			||||||
 | 
					            (threadIdx.y + blockDim.y *
 | 
				
			||||||
 | 
					                (threadIdx.z + blockDim.z *
 | 
				
			||||||
 | 
					                    (blockIdx.x + (gridDim.x * blockIdx.y))));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class DataPackerView(object):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    View of a data packer. Intended to be initialized using DataPacker.view().
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    All views of a data packer share the same stream parameters, such as
 | 
				
			||||||
 | 
					    position and total size, but do not share other parameters, such as the
 | 
				
			||||||
 | 
					    pointer name used in emitted code lookups or the lookup context.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    def __init__(self, packer, ptr, prefix, ns):
 | 
				
			||||||
 | 
					        self.packer, self.ptr, self.prefix, self.ns = packer, ptr, prefix, ns
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get(self, accessor, name=None):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Add an access to the stream, returning the formatted load expression
 | 
				
			||||||
 | 
					        for device use. If 'name' is missing, the name components after the
 | 
				
			||||||
 | 
					        final dot in the accessor will be used. Little effort is made to
 | 
				
			||||||
 | 
					        ensure that this is valid C.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        if name is None:
 | 
				
			||||||
 | 
					            name = accessor.rsplit('.', 1)[-1]
 | 
				
			||||||
 | 
					            name = name.replace('[', '_').replace(']', '')
 | 
				
			||||||
 | 
					        name = self.prefix + name
 | 
				
			||||||
 | 
					        self.packer._access(self, accessor, name)
 | 
				
			||||||
 | 
					        return '%s->%s' % (self.ptr, name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def sub(self, dst, src):
 | 
				
			||||||
 | 
					        """Add a substitution to the namespace."""
 | 
				
			||||||
 | 
					        self.ns.append((src, dst))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def view(self, ptr_name, prefix=''):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        As DataPacker.view(), but preserving the current set of namespace
 | 
				
			||||||
 | 
					        substitutions.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        return DataPackerView(self.packer, ptr_name, prefix, list(self.ns))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _apply_subs(self, ns):
 | 
				
			||||||
 | 
					        for s, d in self.ns:
 | 
				
			||||||
 | 
					            ns[d] = eval(s, ns)
 | 
				
			||||||
 | 
					        return ns
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class DataPacker(HunkOCode):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Packs 32-bit float values into a dynamic data structure, and emits
 | 
				
			||||||
 | 
					    accessors to those data values from device code. Might get fancier in the
 | 
				
			||||||
 | 
					    future, but for now it's incredibly barebones.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, tname, clsize=128):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Create a new DataPacker.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ``tname`` is the name of the structure typedef that will be emitted
 | 
				
			||||||
 | 
					        via this object's ``decls`` property.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ``clsize`` is the size of a cache line, in bytes. The resulting
 | 
				
			||||||
 | 
					        data structure will be padded to that size.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        self.tname = tname
 | 
				
			||||||
 | 
					        self.clsize = clsize
 | 
				
			||||||
 | 
					        self.packed = {}
 | 
				
			||||||
 | 
					        self.packed_order = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def view(self, ptr_name, prefix=''):
 | 
				
			||||||
 | 
					        """Create a DataPacker view. See DataPackerView class for details."""
 | 
				
			||||||
 | 
					        return DataPackerView(self, ptr_name, prefix, list())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _access(self, view, accessor, name):
 | 
				
			||||||
 | 
					        if name in self.packed:
 | 
				
			||||||
 | 
					            pview, paccessor = self.packed[name]
 | 
				
			||||||
 | 
					            if pview == view and (accessor is None or paccessor == accessor):
 | 
				
			||||||
 | 
					                return
 | 
				
			||||||
 | 
					            raise ValueError("Same name, different accessor or view: %s" % name)
 | 
				
			||||||
 | 
					        self.packed[name] = (view, accessor)
 | 
				
			||||||
 | 
					        self.packed_order.append(name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __len__(self):
 | 
				
			||||||
 | 
					        return len(self.packed_order)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def align(self):
 | 
				
			||||||
 | 
					        return (4 * len(self) + self.clsize - 1) / self.clsize * self.clsize
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def pack(self, **kwargs):
 | 
				
			||||||
 | 
					        out = np.zeros(self.align/4, dtype=np.float32)
 | 
				
			||||||
 | 
					        subbed_nses = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for i, name in enumerate(self.packed_order):
 | 
				
			||||||
 | 
					            view, accessor = self.packed[name]
 | 
				
			||||||
 | 
					            if view not in subbed_nses:
 | 
				
			||||||
 | 
					                subbed_nses[view] = view._apply_subs(dict(kwargs))
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                val = eval(accessor, subbed_nses[view])
 | 
				
			||||||
 | 
					            except Exception, e:
 | 
				
			||||||
 | 
					                print 'Error while evaluating accessor "%s"' % accessor
 | 
				
			||||||
 | 
					                raise e
 | 
				
			||||||
 | 
					            out[i] = val
 | 
				
			||||||
 | 
					        return out
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def decls(self):
 | 
				
			||||||
 | 
					        tmpl = tempita.Template("""
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					{{for name, accessor in values}}
 | 
				
			||||||
 | 
					    float   {{'%-20s' % name}}; // {{accessor}}
 | 
				
			||||||
 | 
					{{endfor}}
 | 
				
			||||||
 | 
					{{if padding > 0}}
 | 
				
			||||||
 | 
					    // Align to fill whole cache lines
 | 
				
			||||||
 | 
					    float   padding[{{padding}}];
 | 
				
			||||||
 | 
					{{endif}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} {{tname}};
 | 
				
			||||||
 | 
					""")
 | 
				
			||||||
 | 
					        return tmpl.substitute(
 | 
				
			||||||
 | 
					                values  = [(n, self.packed[n][1]) for n in self.packed_order],
 | 
				
			||||||
 | 
					                padding = len(self) % (self.clsize / 4),
 | 
				
			||||||
 | 
					                tname   = self.tname
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		Reference in New Issue
	
	Block a user