mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 03:30:05 -05:00
Initial commit.
This commit is contained in:
commit
a23ebdcf5f
57
helpers/genprimes.c
Normal file
57
helpers/genprimes.c
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
/* Public domain, FWIW
|
||||||
|
* gcc -o genprimes -lgmp genprimes.c; ./genprimes > primes.bin
|
||||||
|
* see http://www.ast.cam.ac.uk/~stg20/cuda/random/index.html
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <gmp.h>
|
||||||
|
|
||||||
|
/* To verify the primes against the linked URL, compile this instead:
|
||||||
|
int main() {
|
||||||
|
FILE *fp = fopen("primes.bin", "r");
|
||||||
|
char stuff[5];
|
||||||
|
while (fread(stuff, 4, 1, fp) == 1) {
|
||||||
|
uint64_t i = *((uint32_t*)stuff);
|
||||||
|
i = (i>>24) + (1<<8)*((i>>16)&0xff) + (1<<16)*((i>>8)&0xff) + (1<<24)*(i&0xff);
|
||||||
|
uint64_t j = i * 4294967296L - 1;
|
||||||
|
uint64_t k = (j-1)/2;
|
||||||
|
printf("%lu %lu %lu\n", i, j, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
fprintf(stderr, "Generating list of multipliers for mod(2^32) MWC RNG\n");
|
||||||
|
mpz_t candidate, twotothethirtytwo;
|
||||||
|
mpz_init(candidate);
|
||||||
|
mpz_init_set_d(twotothethirtytwo, (double) (4294967296L));
|
||||||
|
|
||||||
|
char bytes[5];
|
||||||
|
bytes[4] = 0;
|
||||||
|
unsigned int i, found=0;
|
||||||
|
for (i = 4294967295L; i > 2147483648; i--) {
|
||||||
|
mpz_set_ui(candidate, i);
|
||||||
|
mpz_mul(candidate, candidate, twotothethirtytwo);
|
||||||
|
mpz_sub_ui(candidate, candidate, 1);
|
||||||
|
if(mpz_probab_prime_p(candidate, 200)) {
|
||||||
|
mpz_sub_ui(candidate, candidate, 1);
|
||||||
|
mpz_tdiv_q_ui(candidate, candidate, 2);
|
||||||
|
if(mpz_probab_prime_p(candidate, 200)) {
|
||||||
|
bytes[0] = (i>>24)&0xff;
|
||||||
|
bytes[1] = (i>>16)&0xff;
|
||||||
|
bytes[2] = (i>>8)&0xff;
|
||||||
|
bytes[3] = i&0xff;
|
||||||
|
fwrite(bytes, 4, 1, stdout);
|
||||||
|
found++;
|
||||||
|
if (!(found&0xff)) fprintf(stderr, ".");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stderr, "\nFound %d multipliers.\n", found);
|
||||||
|
mpz_clear(candidate);
|
||||||
|
mpz_clear(twotothethirtytwo);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
257
main.py
Normal file
257
main.py
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
#
|
||||||
|
# flam3cuda, one of a surprisingly large number of ports of the fractal flame
|
||||||
|
# algorithm to NVIDIA GPUs.
|
||||||
|
#
|
||||||
|
# This one is copyright 2010 Steven Robertson <steven@strobe.cc>
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 or later
|
||||||
|
# as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import ctypes
|
||||||
|
import struct
|
||||||
|
|
||||||
|
import tempita
|
||||||
|
|
||||||
|
# These imports are order-sensitive!
|
||||||
|
import pyglet
|
||||||
|
import pyglet.gl as gl
|
||||||
|
gl.get_current_context()
|
||||||
|
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
import pycuda.gl as cudagl
|
||||||
|
import pycuda.gl.autoinit
|
||||||
|
from pycuda.compiler import SourceModule
|
||||||
|
|
||||||
|
from multiprocessing import Process, Queue
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from fr0stlib import pyflam3
|
||||||
|
|
||||||
|
# PTX header and functions used for debugging.
|
||||||
|
prelude = """
|
||||||
|
.version 2.0
|
||||||
|
.target sm_20
|
||||||
|
|
||||||
|
.func (.reg .u32 $ret) get_gtid ()
|
||||||
|
{
|
||||||
|
.reg .u16 tmp;
|
||||||
|
.reg .u32 cta, ncta, tid, gtid;
|
||||||
|
|
||||||
|
mov.u16 tmp, %ctaid.x;
|
||||||
|
cvt.u32.u16 cta, tmp;
|
||||||
|
mov.u16 tmp, %ntid.x;
|
||||||
|
cvt.u32.u16 ncta, tmp;
|
||||||
|
mul24.lo.u32 gtid, cta, ncta;
|
||||||
|
|
||||||
|
mov.u16 tmp, %tid.x;
|
||||||
|
cvt.u32.u16 tid, tmp;
|
||||||
|
add.u32 gtid, gtid, tid;
|
||||||
|
mov.b32 $ret, gtid;
|
||||||
|
ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
.entry write_to_buffer ( .param .u32 bufbase )
|
||||||
|
{
|
||||||
|
.reg .u32 base, gtid, off;
|
||||||
|
|
||||||
|
ld.param.u32 base, [bufbase];
|
||||||
|
call.uni (off), get_gtid, ();
|
||||||
|
mad24.lo.u32 base, off, 4, base;
|
||||||
|
st.volatile.global.b32 [base], off;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
class CUGenome(pyflam3.Genome):
|
||||||
|
def _render(self, frame, trans):
|
||||||
|
obuf = (ctypes.c_ubyte * ((3+trans)*self.width*self.height))()
|
||||||
|
stats = pyflam3.RenderStats()
|
||||||
|
pyflam3.flam3_render(ctypes.byref(frame), obuf, pyflam3.flam3_field_both,
|
||||||
|
trans+3, trans, ctypes.byref(stats))
|
||||||
|
return obuf, stats, frame
|
||||||
|
|
||||||
|
class LaunchContext(self):
|
||||||
|
def __init__(self, seed=None):
|
||||||
|
self.block, self.grid, self.threads = None, None, None
|
||||||
|
self.stream = cuda.Stream()
|
||||||
|
self.rand = mtrand.RandomState(seed)
|
||||||
|
|
||||||
|
def set_size(self, block, grid):
|
||||||
|
self.block, self.grid = block, grid
|
||||||
|
self.threads = reduce(lambda a, b: a*b, self.block + self.grid)
|
||||||
|
|
||||||
|
class PTXFragment(object):
|
||||||
|
"""
|
||||||
|
Wrapper for sections of template PTX.
|
||||||
|
|
||||||
|
In order to provide the best optimization, and avoid a web of hard-coded
|
||||||
|
parameters, the PTX module may be regenerated and recompiled several times
|
||||||
|
with different or incomplete launch context parameters. To this end, avoid
|
||||||
|
accessing the GPU in such functions, and do not depend on context values
|
||||||
|
which are marked as "tuned" in the LaunchContext docstring being available.
|
||||||
|
|
||||||
|
The final compilation pass is guaranteed to have all "tuned" values fixed
|
||||||
|
in their final values for the stream.
|
||||||
|
|
||||||
|
Template code will be processed recursively until all "{{" instances have
|
||||||
|
been replaced, using the same namespace each time.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def deps(self, ctx):
|
||||||
|
"""
|
||||||
|
Returns a list of PTXFragment objects on which this object depends
|
||||||
|
for successful compilation. Circular dependencies are forbidden.
|
||||||
|
"""
|
||||||
|
return []
|
||||||
|
|
||||||
|
def subs(self, ctx):
|
||||||
|
"""
|
||||||
|
Returns a dict of items to add to the template substitution namespace.
|
||||||
|
The entire dict will be assembled, including all dependencies, before
|
||||||
|
any templates are evaluated.
|
||||||
|
"""
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def prelude(self, ctx):
|
||||||
|
"""
|
||||||
|
Returns a template string containing any code (variable declarations,
|
||||||
|
probably) that should be inserted at module scope. The prelude of
|
||||||
|
all deps will be inserted above this prelude.
|
||||||
|
"""
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def entryPrelude(self, ctx):
|
||||||
|
"""
|
||||||
|
Returns a template string that should be inserted at the top of any
|
||||||
|
entry point which depends on this method. The entry prelude of all
|
||||||
|
deps will be inserted above this entry prelude.
|
||||||
|
"""
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def setUp(self, ctx):
|
||||||
|
"""
|
||||||
|
Do start-of-stream initialization, such as copying data to the device.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test(self, ctx):
|
||||||
|
"""
|
||||||
|
Perform device tests. Returns True on success, False on failure,
|
||||||
|
or raises an exception.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
class PTXEntryPoint(PTXFragment):
|
||||||
|
def entry(self, ctx):
|
||||||
|
"""
|
||||||
|
Returns a template string corresponding to a PTX entry point.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def call(self, ctx):
|
||||||
|
"""
|
||||||
|
Calls the entry point on the device. Haven't worked out the details
|
||||||
|
of this one yet.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DeviceHelpers(PTXFragment):
|
||||||
|
"""This one's included by default, no need to depend on it"""
|
||||||
|
def subs(self, ctx):
|
||||||
|
return {
|
||||||
|
'PTRT': ctypes.sizeof(ctypes.c_void_p) == 8 and '.u64' or '.u32',
|
||||||
|
}
|
||||||
|
|
||||||
|
class MWCRandGen(PTXFragment):
|
||||||
|
|
||||||
|
_prelude = """
|
||||||
|
.const {{PTRT}} mwc_rng_mults_p;
|
||||||
|
.const {{PTRT}} mwc_rng_seeds_p;
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if not os.path.isfile(os.path.join(os.path.dirname(__FILE__),
|
||||||
|
'primes.bin')):
|
||||||
|
raise EnvironmentError('primes.bin not found')
|
||||||
|
|
||||||
|
def prelude(self):
|
||||||
|
return self._prelude
|
||||||
|
|
||||||
|
def setUp(self, ctx):
|
||||||
|
# Load raw big-endian u32 multipliers from primes.bin.
|
||||||
|
with open('primes.bin') as primefp:
|
||||||
|
dt = np.dtype(np.uint32).newbyteorder('B')
|
||||||
|
mults = np.frombuffer(primefp.read(), dtype=dt)
|
||||||
|
# Randomness in choosing multipliers is good, but larger multipliers
|
||||||
|
# have longer periods, which is also good. This is a compromise.
|
||||||
|
ctx.rand.shuffle(mults[:ctx.threads*4])
|
||||||
|
# Copy multipliers and seeds to the device
|
||||||
|
devmp, devml = ctx.mod.get_global('mwc_rng_mults')
|
||||||
|
cuda.memcpy_htod_async(devmp, mults.tostring()[:devml], ctx.stream)
|
||||||
|
devsp, devsl = ctx.mod.get_global('mwc_rng_seeds')
|
||||||
|
cuda.memcpy_htod_async(devsp, ctx.rand.bytes(devsl), ctx.stream)
|
||||||
|
|
||||||
|
def _next_b32(self, dreg):
|
||||||
|
return """
|
||||||
|
mul.wide.u32 mwc_rng_
|
||||||
|
mul.wide.u32
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def templates(self, ctx):
|
||||||
|
return {'mwc_next_b32', self._next_b32}
|
||||||
|
|
||||||
|
|
||||||
|
def test(self, ctx):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def launch(self, ctx):
|
||||||
|
if self.mults
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main(genome_path):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#with open(genome_path) as fp:
|
||||||
|
#genome = CUGenome.from_string(fp.read())[0]
|
||||||
|
#genome.width, genome.height = 512, 512
|
||||||
|
#genome.sample_density = 1000
|
||||||
|
#obuf, stats, frame = genome.render(estimator=3)
|
||||||
|
#gc.collect()
|
||||||
|
|
||||||
|
##q.put(str(obuf))
|
||||||
|
##p = Process(target=render, args=(q, genome_path))
|
||||||
|
##p.start()
|
||||||
|
|
||||||
|
#window = pyglet.window.Window()
|
||||||
|
#image = pyglet.image.ImageData(genome.width, genome.height, 'RGB', obuf)
|
||||||
|
#tex = image.texture
|
||||||
|
|
||||||
|
#@window.event
|
||||||
|
#def on_draw():
|
||||||
|
#window.clear()
|
||||||
|
#tex.blit(0, 0)
|
||||||
|
|
||||||
|
#pyglet.app.run()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) < 2 or not os.path.isfile(sys.argv[1]):
|
||||||
|
print "First argument must be a path to a genome file"
|
||||||
|
sys.exit(1)
|
||||||
|
main(sys.argv[1])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user