mirror of
				https://github.com/stevenrobertson/cuburn.git
				synced 2025-11-03 18:00:55 -05:00 
			
		
		
		
	Initial commit.
This commit is contained in:
		
							
								
								
									
										57
									
								
								helpers/genprimes.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								helpers/genprimes.c
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,57 @@
 | 
				
			|||||||
 | 
					/* Public domain, FWIW
 | 
				
			||||||
 | 
					 * gcc -o genprimes -lgmp genprimes.c; ./genprimes > primes.bin
 | 
				
			||||||
 | 
					 * see http://www.ast.cam.ac.uk/~stg20/cuda/random/index.html
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdarg.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					#include <gmp.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* To verify the primes against the linked URL, compile this instead:
 | 
				
			||||||
 | 
					int main() {
 | 
				
			||||||
 | 
					    FILE *fp = fopen("primes.bin", "r");
 | 
				
			||||||
 | 
					    char stuff[5];
 | 
				
			||||||
 | 
					    while (fread(stuff, 4, 1, fp) == 1) {
 | 
				
			||||||
 | 
					        uint64_t i = *((uint32_t*)stuff);
 | 
				
			||||||
 | 
					        i = (i>>24) + (1<<8)*((i>>16)&0xff) + (1<<16)*((i>>8)&0xff) + (1<<24)*(i&0xff);
 | 
				
			||||||
 | 
					        uint64_t j = i * 4294967296L - 1;
 | 
				
			||||||
 | 
					        uint64_t k = (j-1)/2;
 | 
				
			||||||
 | 
					        printf("%lu %lu %lu\n", i, j, k);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char* argv[]) {
 | 
				
			||||||
 | 
					    fprintf(stderr, "Generating list of multipliers for mod(2^32) MWC RNG\n");
 | 
				
			||||||
 | 
					    mpz_t candidate, twotothethirtytwo;
 | 
				
			||||||
 | 
					    mpz_init(candidate);
 | 
				
			||||||
 | 
					    mpz_init_set_d(twotothethirtytwo, (double) (4294967296L));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    char bytes[5];
 | 
				
			||||||
 | 
					    bytes[4] = 0;
 | 
				
			||||||
 | 
					    unsigned int i, found=0;
 | 
				
			||||||
 | 
					    for (i = 4294967295L; i > 2147483648; i--) {
 | 
				
			||||||
 | 
					        mpz_set_ui(candidate, i);
 | 
				
			||||||
 | 
					        mpz_mul(candidate, candidate, twotothethirtytwo);
 | 
				
			||||||
 | 
					        mpz_sub_ui(candidate, candidate, 1);
 | 
				
			||||||
 | 
					        if(mpz_probab_prime_p(candidate, 200)) {
 | 
				
			||||||
 | 
					            mpz_sub_ui(candidate, candidate, 1);
 | 
				
			||||||
 | 
					            mpz_tdiv_q_ui(candidate, candidate, 2);
 | 
				
			||||||
 | 
					            if(mpz_probab_prime_p(candidate, 200)) {
 | 
				
			||||||
 | 
					                bytes[0] = (i>>24)&0xff;
 | 
				
			||||||
 | 
					                bytes[1] = (i>>16)&0xff;
 | 
				
			||||||
 | 
					                bytes[2] = (i>>8)&0xff;
 | 
				
			||||||
 | 
					                bytes[3] = i&0xff;
 | 
				
			||||||
 | 
					                fwrite(bytes, 4, 1, stdout);
 | 
				
			||||||
 | 
					                found++;
 | 
				
			||||||
 | 
					                if (!(found&0xff)) fprintf(stderr, ".");
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fprintf(stderr, "\nFound %d multipliers.\n", found);
 | 
				
			||||||
 | 
					    mpz_clear(candidate);
 | 
				
			||||||
 | 
					    mpz_clear(twotothethirtytwo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										257
									
								
								main.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										257
									
								
								main.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,257 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/python
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# flam3cuda, one of a surprisingly large number of ports of the fractal flame
 | 
				
			||||||
 | 
					# algorithm to NVIDIA GPUs.
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# This one is copyright 2010 Steven Robertson <steven@strobe.cc>
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					# it under the terms of the GNU General Public License version 2 or later
 | 
				
			||||||
 | 
					# as published by the Free Software Foundation.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import ctypes
 | 
				
			||||||
 | 
					import struct
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import tempita
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# These imports are order-sensitive!
 | 
				
			||||||
 | 
					import pyglet
 | 
				
			||||||
 | 
					import pyglet.gl as gl
 | 
				
			||||||
 | 
					gl.get_current_context()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import pycuda.driver as cuda
 | 
				
			||||||
 | 
					import pycuda.gl as cudagl
 | 
				
			||||||
 | 
					import pycuda.gl.autoinit
 | 
				
			||||||
 | 
					from pycuda.compiler import SourceModule
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from multiprocessing import Process, Queue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import numpy as np
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from fr0stlib import pyflam3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# PTX header and functions used for debugging.
 | 
				
			||||||
 | 
					prelude = """
 | 
				
			||||||
 | 
					.version 2.0
 | 
				
			||||||
 | 
					.target sm_20
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.func (.reg .u32 $ret) get_gtid ()
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    .reg .u16 tmp;
 | 
				
			||||||
 | 
					    .reg .u32 cta, ncta, tid, gtid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mov.u16         tmp,    %ctaid.x;
 | 
				
			||||||
 | 
					    cvt.u32.u16     cta,    tmp;
 | 
				
			||||||
 | 
					    mov.u16         tmp,    %ntid.x;
 | 
				
			||||||
 | 
					    cvt.u32.u16     ncta,   tmp;
 | 
				
			||||||
 | 
					    mul24.lo.u32    gtid,   cta,    ncta;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mov.u16         tmp,    %tid.x;
 | 
				
			||||||
 | 
					    cvt.u32.u16     tid,    tmp;
 | 
				
			||||||
 | 
					    add.u32         gtid,   gtid,   tid;
 | 
				
			||||||
 | 
					    mov.b32         $ret,   gtid;
 | 
				
			||||||
 | 
					    ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.entry write_to_buffer ( .param .u32 bufbase )
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    .reg .u32 base, gtid, off;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ld.param.u32    base,       [bufbase];
 | 
				
			||||||
 | 
					    call.uni        (off),      get_gtid,   ();
 | 
				
			||||||
 | 
					    mad24.lo.u32    base,       off,        4,          base;
 | 
				
			||||||
 | 
					    st.volatile.global.b32      [base],     off;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CUGenome(pyflam3.Genome):
 | 
				
			||||||
 | 
					    def _render(self, frame, trans):
 | 
				
			||||||
 | 
					        obuf = (ctypes.c_ubyte * ((3+trans)*self.width*self.height))()
 | 
				
			||||||
 | 
					        stats = pyflam3.RenderStats()
 | 
				
			||||||
 | 
					        pyflam3.flam3_render(ctypes.byref(frame), obuf, pyflam3.flam3_field_both,
 | 
				
			||||||
 | 
					                     trans+3, trans, ctypes.byref(stats))
 | 
				
			||||||
 | 
					        return obuf, stats, frame
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class LaunchContext(self):
 | 
				
			||||||
 | 
					    def __init__(self, seed=None):
 | 
				
			||||||
 | 
					        self.block, self.grid, self.threads = None, None, None
 | 
				
			||||||
 | 
					        self.stream = cuda.Stream()
 | 
				
			||||||
 | 
					        self.rand = mtrand.RandomState(seed)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def set_size(self, block, grid):
 | 
				
			||||||
 | 
					        self.block, self.grid = block, grid
 | 
				
			||||||
 | 
					        self.threads = reduce(lambda a, b: a*b, self.block + self.grid)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class PTXFragment(object):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Wrapper for sections of template PTX.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    In order to provide the best optimization, and avoid a web of hard-coded
 | 
				
			||||||
 | 
					    parameters, the PTX module may be regenerated and recompiled several times
 | 
				
			||||||
 | 
					    with different or incomplete launch context parameters. To this end, avoid
 | 
				
			||||||
 | 
					    accessing the GPU in such functions, and do not depend on context values
 | 
				
			||||||
 | 
					    which are marked as "tuned" in the LaunchContext docstring being available.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    The final compilation pass is guaranteed to have all "tuned" values fixed
 | 
				
			||||||
 | 
					    in their final values for the stream.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Template code will be processed recursively until all "{{" instances have
 | 
				
			||||||
 | 
					    been replaced, using the same namespace each time.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def deps(self, ctx):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Returns a list of PTXFragment objects on which this object depends
 | 
				
			||||||
 | 
					        for successful compilation. Circular dependencies are forbidden.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def subs(self, ctx):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Returns a dict of items to add to the template substitution namespace.
 | 
				
			||||||
 | 
					        The entire dict will be assembled, including all dependencies, before
 | 
				
			||||||
 | 
					        any templates are evaluated.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        return {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def prelude(self, ctx):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Returns a template string containing any code (variable declarations,
 | 
				
			||||||
 | 
					        probably) that should be inserted at module scope. The prelude of
 | 
				
			||||||
 | 
					        all deps will be inserted above this prelude.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        return ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def entryPrelude(self, ctx):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Returns a template string that should be inserted at the top of any
 | 
				
			||||||
 | 
					        entry point which depends on this method. The entry prelude of all
 | 
				
			||||||
 | 
					        deps will be inserted above this entry prelude.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        return ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def setUp(self, ctx):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Do start-of-stream initialization, such as copying data to the device.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test(self, ctx):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Perform device tests. Returns True on success, False on failure,
 | 
				
			||||||
 | 
					        or raises an exception.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class PTXEntryPoint(PTXFragment):
 | 
				
			||||||
 | 
					    def entry(self, ctx):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Returns a template string corresponding to a PTX entry point.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def call(self, ctx):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Calls the entry point on the device. Haven't worked out the details
 | 
				
			||||||
 | 
					        of this one yet.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class DeviceHelpers(PTXFragment):
 | 
				
			||||||
 | 
					    """This one's included by default, no need to depend on it"""
 | 
				
			||||||
 | 
					    def subs(self, ctx):
 | 
				
			||||||
 | 
					        return {
 | 
				
			||||||
 | 
					            'PTRT': ctypes.sizeof(ctypes.c_void_p) == 8 and '.u64' or '.u32',
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class MWCRandGen(PTXFragment):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _prelude = """
 | 
				
			||||||
 | 
					    .const {{PTRT}} mwc_rng_mults_p;
 | 
				
			||||||
 | 
					    .const {{PTRT}} mwc_rng_seeds_p;
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self):
 | 
				
			||||||
 | 
					        if not os.path.isfile(os.path.join(os.path.dirname(__FILE__),
 | 
				
			||||||
 | 
					                                           'primes.bin')):
 | 
				
			||||||
 | 
					            raise EnvironmentError('primes.bin not found')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def prelude(self):
 | 
				
			||||||
 | 
					        return self._prelude
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def setUp(self, ctx):
 | 
				
			||||||
 | 
					        # Load raw big-endian u32 multipliers from primes.bin.
 | 
				
			||||||
 | 
					        with open('primes.bin') as primefp:
 | 
				
			||||||
 | 
					            dt = np.dtype(np.uint32).newbyteorder('B')
 | 
				
			||||||
 | 
					            mults = np.frombuffer(primefp.read(), dtype=dt)
 | 
				
			||||||
 | 
					        # Randomness in choosing multipliers is good, but larger multipliers
 | 
				
			||||||
 | 
					        # have longer periods, which is also good. This is a compromise.
 | 
				
			||||||
 | 
					        ctx.rand.shuffle(mults[:ctx.threads*4])
 | 
				
			||||||
 | 
					        # Copy multipliers and seeds to the device
 | 
				
			||||||
 | 
					        devmp, devml = ctx.mod.get_global('mwc_rng_mults')
 | 
				
			||||||
 | 
					        cuda.memcpy_htod_async(devmp, mults.tostring()[:devml], ctx.stream)
 | 
				
			||||||
 | 
					        devsp, devsl = ctx.mod.get_global('mwc_rng_seeds')
 | 
				
			||||||
 | 
					        cuda.memcpy_htod_async(devsp, ctx.rand.bytes(devsl), ctx.stream)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _next_b32(self, dreg):
 | 
				
			||||||
 | 
					        return """
 | 
				
			||||||
 | 
					    mul.wide.u32    mwc_rng_
 | 
				
			||||||
 | 
					        mul.wide.u32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def templates(self, ctx):
 | 
				
			||||||
 | 
					        return {'mwc_next_b32', self._next_b32}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test(self, ctx):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def launch(self, ctx):
 | 
				
			||||||
 | 
					        if self.mults
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def main(genome_path):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #with open(genome_path) as fp:
 | 
				
			||||||
 | 
					        #genome = CUGenome.from_string(fp.read())[0]
 | 
				
			||||||
 | 
					    #genome.width, genome.height = 512, 512
 | 
				
			||||||
 | 
					    #genome.sample_density = 1000
 | 
				
			||||||
 | 
					    #obuf, stats, frame = genome.render(estimator=3)
 | 
				
			||||||
 | 
					    #gc.collect()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ##q.put(str(obuf))
 | 
				
			||||||
 | 
					    ##p = Process(target=render, args=(q, genome_path))
 | 
				
			||||||
 | 
					    ##p.start()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #window = pyglet.window.Window()
 | 
				
			||||||
 | 
					    #image = pyglet.image.ImageData(genome.width, genome.height, 'RGB', obuf)
 | 
				
			||||||
 | 
					    #tex = image.texture
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #@window.event
 | 
				
			||||||
 | 
					    #def on_draw():
 | 
				
			||||||
 | 
					        #window.clear()
 | 
				
			||||||
 | 
					        #tex.blit(0, 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #pyglet.app.run()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
					    if len(sys.argv) < 2 or not os.path.isfile(sys.argv[1]):
 | 
				
			||||||
 | 
					        print "First argument must be a path to a genome file"
 | 
				
			||||||
 | 
					        sys.exit(1)
 | 
				
			||||||
 | 
					    main(sys.argv[1])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		Reference in New Issue
	
	Block a user