mirror of
				https://github.com/stevenrobertson/cuburn.git
				synced 2025-11-03 18:00:55 -05:00 
			
		
		
		
	Initial commit.
This commit is contained in:
		
							
								
								
									
										57
									
								
								helpers/genprimes.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								helpers/genprimes.c
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,57 @@
 | 
			
		||||
/* Public domain, FWIW
 | 
			
		||||
 * gcc -o genprimes -lgmp genprimes.c; ./genprimes > primes.bin
 | 
			
		||||
 * see http://www.ast.cam.ac.uk/~stg20/cuda/random/index.html
 | 
			
		||||
 */
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdarg.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <gmp.h>
 | 
			
		||||
 | 
			
		||||
/* To verify the primes against the linked URL, compile this instead:
 | 
			
		||||
int main() {
 | 
			
		||||
    FILE *fp = fopen("primes.bin", "r");
 | 
			
		||||
    char stuff[5];
 | 
			
		||||
    while (fread(stuff, 4, 1, fp) == 1) {
 | 
			
		||||
        uint64_t i = *((uint32_t*)stuff);
 | 
			
		||||
        i = (i>>24) + (1<<8)*((i>>16)&0xff) + (1<<16)*((i>>8)&0xff) + (1<<24)*(i&0xff);
 | 
			
		||||
        uint64_t j = i * 4294967296L - 1;
 | 
			
		||||
        uint64_t k = (j-1)/2;
 | 
			
		||||
        printf("%lu %lu %lu\n", i, j, k);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
int main(int argc, char* argv[]) {
 | 
			
		||||
    fprintf(stderr, "Generating list of multipliers for mod(2^32) MWC RNG\n");
 | 
			
		||||
    mpz_t candidate, twotothethirtytwo;
 | 
			
		||||
    mpz_init(candidate);
 | 
			
		||||
    mpz_init_set_d(twotothethirtytwo, (double) (4294967296L));
 | 
			
		||||
 | 
			
		||||
    char bytes[5];
 | 
			
		||||
    bytes[4] = 0;
 | 
			
		||||
    unsigned int i, found=0;
 | 
			
		||||
    for (i = 4294967295L; i > 2147483648; i--) {
 | 
			
		||||
        mpz_set_ui(candidate, i);
 | 
			
		||||
        mpz_mul(candidate, candidate, twotothethirtytwo);
 | 
			
		||||
        mpz_sub_ui(candidate, candidate, 1);
 | 
			
		||||
        if(mpz_probab_prime_p(candidate, 200)) {
 | 
			
		||||
            mpz_sub_ui(candidate, candidate, 1);
 | 
			
		||||
            mpz_tdiv_q_ui(candidate, candidate, 2);
 | 
			
		||||
            if(mpz_probab_prime_p(candidate, 200)) {
 | 
			
		||||
                bytes[0] = (i>>24)&0xff;
 | 
			
		||||
                bytes[1] = (i>>16)&0xff;
 | 
			
		||||
                bytes[2] = (i>>8)&0xff;
 | 
			
		||||
                bytes[3] = i&0xff;
 | 
			
		||||
                fwrite(bytes, 4, 1, stdout);
 | 
			
		||||
                found++;
 | 
			
		||||
                if (!(found&0xff)) fprintf(stderr, ".");
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    fprintf(stderr, "\nFound %d multipliers.\n", found);
 | 
			
		||||
    mpz_clear(candidate);
 | 
			
		||||
    mpz_clear(twotothethirtytwo);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										257
									
								
								main.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										257
									
								
								main.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,257 @@
 | 
			
		||||
#!/usr/bin/python
 | 
			
		||||
#
 | 
			
		||||
# flam3cuda, one of a surprisingly large number of ports of the fractal flame
 | 
			
		||||
# algorithm to NVIDIA GPUs.
 | 
			
		||||
#
 | 
			
		||||
# This one is copyright 2010 Steven Robertson <steven@strobe.cc>
 | 
			
		||||
#
 | 
			
		||||
# This program is free software; you can redistribute it and/or modify
 | 
			
		||||
# it under the terms of the GNU General Public License version 2 or later
 | 
			
		||||
# as published by the Free Software Foundation.
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import ctypes
 | 
			
		||||
import struct
 | 
			
		||||
 | 
			
		||||
import tempita
 | 
			
		||||
 | 
			
		||||
# These imports are order-sensitive!
 | 
			
		||||
import pyglet
 | 
			
		||||
import pyglet.gl as gl
 | 
			
		||||
gl.get_current_context()
 | 
			
		||||
 | 
			
		||||
import pycuda.driver as cuda
 | 
			
		||||
import pycuda.gl as cudagl
 | 
			
		||||
import pycuda.gl.autoinit
 | 
			
		||||
from pycuda.compiler import SourceModule
 | 
			
		||||
 | 
			
		||||
from multiprocessing import Process, Queue
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
from fr0stlib import pyflam3
 | 
			
		||||
 | 
			
		||||
# PTX header and functions used for debugging.
 | 
			
		||||
prelude = """
 | 
			
		||||
.version 2.0
 | 
			
		||||
.target sm_20
 | 
			
		||||
 | 
			
		||||
.func (.reg .u32 $ret) get_gtid ()
 | 
			
		||||
{
 | 
			
		||||
    .reg .u16 tmp;
 | 
			
		||||
    .reg .u32 cta, ncta, tid, gtid;
 | 
			
		||||
 | 
			
		||||
    mov.u16         tmp,    %ctaid.x;
 | 
			
		||||
    cvt.u32.u16     cta,    tmp;
 | 
			
		||||
    mov.u16         tmp,    %ntid.x;
 | 
			
		||||
    cvt.u32.u16     ncta,   tmp;
 | 
			
		||||
    mul24.lo.u32    gtid,   cta,    ncta;
 | 
			
		||||
 | 
			
		||||
    mov.u16         tmp,    %tid.x;
 | 
			
		||||
    cvt.u32.u16     tid,    tmp;
 | 
			
		||||
    add.u32         gtid,   gtid,   tid;
 | 
			
		||||
    mov.b32         $ret,   gtid;
 | 
			
		||||
    ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.entry write_to_buffer ( .param .u32 bufbase )
 | 
			
		||||
{
 | 
			
		||||
    .reg .u32 base, gtid, off;
 | 
			
		||||
 | 
			
		||||
    ld.param.u32    base,       [bufbase];
 | 
			
		||||
    call.uni        (off),      get_gtid,   ();
 | 
			
		||||
    mad24.lo.u32    base,       off,        4,          base;
 | 
			
		||||
    st.volatile.global.b32      [base],     off;
 | 
			
		||||
}
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
class CUGenome(pyflam3.Genome):
 | 
			
		||||
    def _render(self, frame, trans):
 | 
			
		||||
        obuf = (ctypes.c_ubyte * ((3+trans)*self.width*self.height))()
 | 
			
		||||
        stats = pyflam3.RenderStats()
 | 
			
		||||
        pyflam3.flam3_render(ctypes.byref(frame), obuf, pyflam3.flam3_field_both,
 | 
			
		||||
                     trans+3, trans, ctypes.byref(stats))
 | 
			
		||||
        return obuf, stats, frame
 | 
			
		||||
 | 
			
		||||
class LaunchContext(self):
 | 
			
		||||
    def __init__(self, seed=None):
 | 
			
		||||
        self.block, self.grid, self.threads = None, None, None
 | 
			
		||||
        self.stream = cuda.Stream()
 | 
			
		||||
        self.rand = mtrand.RandomState(seed)
 | 
			
		||||
 | 
			
		||||
    def set_size(self, block, grid):
 | 
			
		||||
        self.block, self.grid = block, grid
 | 
			
		||||
        self.threads = reduce(lambda a, b: a*b, self.block + self.grid)
 | 
			
		||||
 | 
			
		||||
class PTXFragment(object):
 | 
			
		||||
    """
 | 
			
		||||
    Wrapper for sections of template PTX.
 | 
			
		||||
 | 
			
		||||
    In order to provide the best optimization, and avoid a web of hard-coded
 | 
			
		||||
    parameters, the PTX module may be regenerated and recompiled several times
 | 
			
		||||
    with different or incomplete launch context parameters. To this end, avoid
 | 
			
		||||
    accessing the GPU in such functions, and do not depend on context values
 | 
			
		||||
    which are marked as "tuned" in the LaunchContext docstring being available.
 | 
			
		||||
 | 
			
		||||
    The final compilation pass is guaranteed to have all "tuned" values fixed
 | 
			
		||||
    in their final values for the stream.
 | 
			
		||||
 | 
			
		||||
    Template code will be processed recursively until all "{{" instances have
 | 
			
		||||
    been replaced, using the same namespace each time.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def deps(self, ctx):
 | 
			
		||||
        """
 | 
			
		||||
        Returns a list of PTXFragment objects on which this object depends
 | 
			
		||||
        for successful compilation. Circular dependencies are forbidden.
 | 
			
		||||
        """
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    def subs(self, ctx):
 | 
			
		||||
        """
 | 
			
		||||
        Returns a dict of items to add to the template substitution namespace.
 | 
			
		||||
        The entire dict will be assembled, including all dependencies, before
 | 
			
		||||
        any templates are evaluated.
 | 
			
		||||
        """
 | 
			
		||||
        return {}
 | 
			
		||||
 | 
			
		||||
    def prelude(self, ctx):
 | 
			
		||||
        """
 | 
			
		||||
        Returns a template string containing any code (variable declarations,
 | 
			
		||||
        probably) that should be inserted at module scope. The prelude of
 | 
			
		||||
        all deps will be inserted above this prelude.
 | 
			
		||||
        """
 | 
			
		||||
        return ""
 | 
			
		||||
 | 
			
		||||
    def entryPrelude(self, ctx):
 | 
			
		||||
        """
 | 
			
		||||
        Returns a template string that should be inserted at the top of any
 | 
			
		||||
        entry point which depends on this method. The entry prelude of all
 | 
			
		||||
        deps will be inserted above this entry prelude.
 | 
			
		||||
        """
 | 
			
		||||
        return ""
 | 
			
		||||
 | 
			
		||||
    def setUp(self, ctx):
 | 
			
		||||
        """
 | 
			
		||||
        Do start-of-stream initialization, such as copying data to the device.
 | 
			
		||||
        """
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    def test(self, ctx):
 | 
			
		||||
        """
 | 
			
		||||
        Perform device tests. Returns True on success, False on failure,
 | 
			
		||||
        or raises an exception.
 | 
			
		||||
        """
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
class PTXEntryPoint(PTXFragment):
 | 
			
		||||
    def entry(self, ctx):
 | 
			
		||||
        """
 | 
			
		||||
        Returns a template string corresponding to a PTX entry point.
 | 
			
		||||
        """
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    def call(self, ctx):
 | 
			
		||||
        """
 | 
			
		||||
        Calls the entry point on the device. Haven't worked out the details
 | 
			
		||||
        of this one yet.
 | 
			
		||||
        """
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DeviceHelpers(PTXFragment):
 | 
			
		||||
    """This one's included by default, no need to depend on it"""
 | 
			
		||||
    def subs(self, ctx):
 | 
			
		||||
        return {
 | 
			
		||||
            'PTRT': ctypes.sizeof(ctypes.c_void_p) == 8 and '.u64' or '.u32',
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
class MWCRandGen(PTXFragment):
 | 
			
		||||
 | 
			
		||||
    _prelude = """
 | 
			
		||||
    .const {{PTRT}} mwc_rng_mults_p;
 | 
			
		||||
    .const {{PTRT}} mwc_rng_seeds_p;
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        if not os.path.isfile(os.path.join(os.path.dirname(__FILE__),
 | 
			
		||||
                                           'primes.bin')):
 | 
			
		||||
            raise EnvironmentError('primes.bin not found')
 | 
			
		||||
 | 
			
		||||
    def prelude(self):
 | 
			
		||||
        return self._prelude
 | 
			
		||||
 | 
			
		||||
    def setUp(self, ctx):
 | 
			
		||||
        # Load raw big-endian u32 multipliers from primes.bin.
 | 
			
		||||
        with open('primes.bin') as primefp:
 | 
			
		||||
            dt = np.dtype(np.uint32).newbyteorder('B')
 | 
			
		||||
            mults = np.frombuffer(primefp.read(), dtype=dt)
 | 
			
		||||
        # Randomness in choosing multipliers is good, but larger multipliers
 | 
			
		||||
        # have longer periods, which is also good. This is a compromise.
 | 
			
		||||
        ctx.rand.shuffle(mults[:ctx.threads*4])
 | 
			
		||||
        # Copy multipliers and seeds to the device
 | 
			
		||||
        devmp, devml = ctx.mod.get_global('mwc_rng_mults')
 | 
			
		||||
        cuda.memcpy_htod_async(devmp, mults.tostring()[:devml], ctx.stream)
 | 
			
		||||
        devsp, devsl = ctx.mod.get_global('mwc_rng_seeds')
 | 
			
		||||
        cuda.memcpy_htod_async(devsp, ctx.rand.bytes(devsl), ctx.stream)
 | 
			
		||||
 | 
			
		||||
    def _next_b32(self, dreg):
 | 
			
		||||
        return """
 | 
			
		||||
    mul.wide.u32    mwc_rng_
 | 
			
		||||
        mul.wide.u32
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def templates(self, ctx):
 | 
			
		||||
        return {'mwc_next_b32', self._next_b32}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def test(self, ctx):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def launch(self, ctx):
 | 
			
		||||
        if self.mults
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main(genome_path):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    #with open(genome_path) as fp:
 | 
			
		||||
        #genome = CUGenome.from_string(fp.read())[0]
 | 
			
		||||
    #genome.width, genome.height = 512, 512
 | 
			
		||||
    #genome.sample_density = 1000
 | 
			
		||||
    #obuf, stats, frame = genome.render(estimator=3)
 | 
			
		||||
    #gc.collect()
 | 
			
		||||
 | 
			
		||||
        ##q.put(str(obuf))
 | 
			
		||||
    ##p = Process(target=render, args=(q, genome_path))
 | 
			
		||||
    ##p.start()
 | 
			
		||||
 | 
			
		||||
    #window = pyglet.window.Window()
 | 
			
		||||
    #image = pyglet.image.ImageData(genome.width, genome.height, 'RGB', obuf)
 | 
			
		||||
    #tex = image.texture
 | 
			
		||||
 | 
			
		||||
    #@window.event
 | 
			
		||||
    #def on_draw():
 | 
			
		||||
        #window.clear()
 | 
			
		||||
        #tex.blit(0, 0)
 | 
			
		||||
 | 
			
		||||
    #pyglet.app.run()
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    if len(sys.argv) < 2 or not os.path.isfile(sys.argv[1]):
 | 
			
		||||
        print "First argument must be a path to a genome file"
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
    main(sys.argv[1])
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user