mirror of
				https://github.com/stevenrobertson/cuburn.git
				synced 2025-11-03 18:00:55 -05:00 
			
		
		
		
	Some amount of dynamic rendering
This commit is contained in:
		@ -1,19 +1,9 @@
 | 
				
			|||||||
"""
 | 
					"""
 | 
				
			||||||
Contains the PTX fragments which will drive the device.
 | 
					Contains the PTX fragments which will drive the device, and helper functions
 | 
				
			||||||
 | 
					to combine those fragments.
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Basic headers, utility functions, and so on
 | 
					import util
 | 
				
			||||||
base = """
 | 
					import mwc
 | 
				
			||||||
#include<cuda.h>
 | 
					import iter
 | 
				
			||||||
#include<stdint.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// TODO: use launch parameter preconfig to eliminate unnecessary parts
 | 
					 | 
				
			||||||
__device__
 | 
					 | 
				
			||||||
uint32_t gtid() {
 | 
					 | 
				
			||||||
    return threadIdx.x + blockDim.x *
 | 
					 | 
				
			||||||
            (threadIdx.y + blockDim.y *
 | 
					 | 
				
			||||||
                (threadIdx.z + blockDim.z *
 | 
					 | 
				
			||||||
                    (blockIdx.x + (gridDim.x * blockIdx.y))));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
"""
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -7,62 +7,86 @@ from pycuda.driver import In, Out, InOut
 | 
				
			|||||||
from pycuda.compiler import SourceModule
 | 
					from pycuda.compiler import SourceModule
 | 
				
			||||||
import numpy as np
 | 
					import numpy as np
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from cuburn import code
 | 
					 | 
				
			||||||
from cuburn.code import mwc
 | 
					from cuburn.code import mwc
 | 
				
			||||||
 | 
					from cuburn.code.util import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
src = r"""
 | 
					import tempita
 | 
				
			||||||
#define FUSE 20
 | 
					 | 
				
			||||||
#define MAXOOB 10
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
typedef struct {
 | 
					class IterCode(HunkOCode):
 | 
				
			||||||
    // Number of iterations to perform, *per thread*.
 | 
					    def __init__(self, features):
 | 
				
			||||||
    uint32_t    niters;
 | 
					        self.features = features
 | 
				
			||||||
 | 
					        self.packer = DataPacker('iter_info')
 | 
				
			||||||
 | 
					        iterbody = self._iterbody()
 | 
				
			||||||
 | 
					        bodies = [self._xfbody(i,x) for i,x in enumerate(self.features.xforms)]
 | 
				
			||||||
 | 
					        bodies.append(iterbody)
 | 
				
			||||||
 | 
					        self.defs = '\n'.join(bodies)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Number of accumulators per row and column in the accum buffer
 | 
					    def _xfbody(self, xfid, xform):
 | 
				
			||||||
    uint32_t    accwidth, accheight;
 | 
					        px = self.packer.view('info', 'xf%d_' % xfid)
 | 
				
			||||||
} iter_info;
 | 
					        px.sub('xf', 'cp.xforms[%d]' % xfid)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        tmpl = tempita.Template("""
 | 
				
			||||||
 | 
					__device__
 | 
				
			||||||
 | 
					void apply_xf{{xfid}}(float *ix, float *iy, float *icolor,
 | 
				
			||||||
 | 
					                      const iter_info *info) {
 | 
				
			||||||
 | 
					    float tx, ty, ox = *ix, oy = *iy;
 | 
				
			||||||
 | 
					    {{apply_affine('ox', 'oy', 'tx', 'ty', px, 'xf.c', 'pre')}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // tiny little TODO: variations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    *ix = tx;
 | 
				
			||||||
 | 
					    *iy = ty;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    float csp = {{px.get('xf.color_speed')}};
 | 
				
			||||||
 | 
					    *icolor = *icolor * (1.0f - csp) + {{px.get('xf.color')}} * csp;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					""")
 | 
				
			||||||
 | 
					        g = dict(globals())
 | 
				
			||||||
 | 
					        g.update(locals())
 | 
				
			||||||
 | 
					        return tmpl.substitute(g)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _iterbody(self):
 | 
				
			||||||
 | 
					        tmpl = tempita.Template("""
 | 
				
			||||||
__global__
 | 
					__global__
 | 
				
			||||||
void silly(mwc_st *msts, iter_info *infos, float *accbuf, float *denbuf) {
 | 
					void iter(mwc_st *msts, const iter_info *infos, float *accbuf, float *denbuf) {
 | 
				
			||||||
    mwc_st rctx = msts[gtid()];
 | 
					    mwc_st rctx = msts[gtid()];
 | 
				
			||||||
    iter_info *info = &(infos[blockIdx.x]);
 | 
					    const iter_info *info = &(infos[blockIdx.x]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    float consec_bad = -FUSE;
 | 
					    int consec_bad = -{{features.fuse}};
 | 
				
			||||||
    float nsamps = info->niters;
 | 
					    int nsamps = 500;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    float x, y, color;
 | 
					    float x, y, color;
 | 
				
			||||||
    x = mwc_next_11(&rctx);
 | 
					    x = mwc_next_11(&rctx);
 | 
				
			||||||
    y = mwc_next_11(&rctx);
 | 
					    y = mwc_next_11(&rctx);
 | 
				
			||||||
    color = mwc_next_01(&rctx);
 | 
					    color = mwc_next_01(&rctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while (nsamps > 0.0f) {
 | 
					    while (nsamps > 0) {
 | 
				
			||||||
        float xfsel = mwc_next_01(&rctx);
 | 
					        float xfsel = mwc_next_01(&rctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        x *= 0.5f;
 | 
					        {{for xfid, xform in enumerate(features.xforms)}}
 | 
				
			||||||
        y *= 0.5f;
 | 
					        if (xfsel < {{packer.get('cp.norm_density[%d]' % xfid)}}) {
 | 
				
			||||||
        color *= 0.5f;
 | 
					            apply_xf{{xfid}}(&x, &y, &color, info);
 | 
				
			||||||
        if (xfsel < 0.33f) {
 | 
					        } else
 | 
				
			||||||
            color += 0.25f;
 | 
					        {{endfor}}
 | 
				
			||||||
            x += 0.5f;
 | 
					        {
 | 
				
			||||||
        } else if (xfsel < 0.66f) {
 | 
					            denbuf[0] = xfsel;
 | 
				
			||||||
            color += 0.5f;
 | 
					            break; // TODO: fail here
 | 
				
			||||||
            y += 0.5f;
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (consec_bad < 0.0f) {
 | 
					        if (consec_bad < 0) {
 | 
				
			||||||
            consec_bad++;
 | 
					            consec_bad++;
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (x <= -1.0f || x >= 1.0f || y <= -1.0f || y >= 1.0f
 | 
					        if (x <= -1.0f || x >= 1.0f || y <= -1.0f || y >= 1.0f
 | 
				
			||||||
            || consec_bad < 0.0f) {
 | 
					            || consec_bad < 0) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            consec_bad++;
 | 
					            consec_bad++;
 | 
				
			||||||
            if (consec_bad > MAXOOB) {
 | 
					            if (consec_bad > {{features.max_oob}}) {
 | 
				
			||||||
                x = mwc_next_11(&rctx);
 | 
					                x = mwc_next_11(&rctx);
 | 
				
			||||||
                y = mwc_next_11(&rctx);
 | 
					                y = mwc_next_11(&rctx);
 | 
				
			||||||
                color = mwc_next_01(&rctx);
 | 
					                color = mwc_next_01(&rctx);
 | 
				
			||||||
                consec_bad = -FUSE;
 | 
					                consec_bad = -{{features.fuse}};
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@ -80,26 +104,28 @@ void silly(mwc_st *msts, iter_info *infos, float *accbuf, float *denbuf) {
 | 
				
			|||||||
        nsamps--;
 | 
					        nsamps--;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
"""
 | 
					""")
 | 
				
			||||||
 | 
					        return tmpl.substitute(
 | 
				
			||||||
 | 
					                features = self.features,
 | 
				
			||||||
 | 
					                packer = self.packer.view('info'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def silly():
 | 
					
 | 
				
			||||||
    mod = SourceModule(code.base + mwc.src + src)
 | 
					def silly(features, cp):
 | 
				
			||||||
    abuf = np.zeros((512, 512, 4), dtype=np.float32)
 | 
					    abuf = np.zeros((512, 512, 4), dtype=np.float32)
 | 
				
			||||||
    dbuf = np.zeros((512, 512), dtype=np.float32)
 | 
					    dbuf = np.zeros((512, 512), dtype=np.float32)
 | 
				
			||||||
    seeds = mwc.build_mwc_seeds(512 * 24, seed=5)
 | 
					    seeds = mwc.MWC.make_seeds(512 * 24)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    info = np.zeros(3, dtype=np.uint32)
 | 
					    iter = IterCode(features)
 | 
				
			||||||
    info[0] = 5000
 | 
					    code = assemble_code(BaseCode, mwc.MWC, iter, iter.packer)
 | 
				
			||||||
    info[1] = 512
 | 
					    print code
 | 
				
			||||||
    info[2] = 512
 | 
					    mod = SourceModule(code)
 | 
				
			||||||
    info = np.repeat([info], 24, axis=0)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fun = mod.get_function("silly")
 | 
					    info = iter.packer.pack(cp=cp)
 | 
				
			||||||
 | 
					    print info
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fun = mod.get_function("iter")
 | 
				
			||||||
    fun(InOut(seeds), In(info), InOut(abuf), InOut(dbuf),
 | 
					    fun(InOut(seeds), In(info), InOut(abuf), InOut(dbuf),
 | 
				
			||||||
        block=(512,1,1), grid=(24,1), time_kernel=True)
 | 
					        block=(512,1,1), grid=(1,1), time_kernel=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print abuf
 | 
					 | 
				
			||||||
    print dbuf
 | 
					 | 
				
			||||||
    print sum(dbuf)
 | 
					 | 
				
			||||||
    return abuf, dbuf
 | 
					    return abuf, dbuf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -2,23 +2,21 @@
 | 
				
			|||||||
The multiply-with-carry random number generator.
 | 
					The multiply-with-carry random number generator.
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import time
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import pycuda.driver as cuda
 | 
					 | 
				
			||||||
from pycuda.compiler import SourceModule
 | 
					 | 
				
			||||||
import numpy as np
 | 
					import numpy as np
 | 
				
			||||||
 | 
					import tempita
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from jinja2 import Template
 | 
					from cuburn.code.util import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from cuburn import code
 | 
					class MWC(HunkOCode):
 | 
				
			||||||
 | 
					    decls = """
 | 
				
			||||||
src = r"""
 | 
					 | 
				
			||||||
typedef struct {
 | 
					typedef struct {
 | 
				
			||||||
    uint32_t    mul;
 | 
					    uint32_t    mul;
 | 
				
			||||||
    uint32_t    state;
 | 
					    uint32_t    state;
 | 
				
			||||||
    uint32_t    carry;
 | 
					    uint32_t    carry;
 | 
				
			||||||
} mwc_st;
 | 
					} mwc_st;
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    defs = r"""
 | 
				
			||||||
__device__ uint32_t mwc_next(mwc_st *st) {
 | 
					__device__ uint32_t mwc_next(mwc_st *st) {
 | 
				
			||||||
    asm("{\n\t.reg .u64 val;\n\t"
 | 
					    asm("{\n\t.reg .u64 val;\n\t"
 | 
				
			||||||
        "cvt.u64.u32  val, %0;\n\t"
 | 
					        "cvt.u64.u32  val, %0;\n\t"
 | 
				
			||||||
@ -35,10 +33,38 @@ __device__ float mwc_next_01(mwc_st *st) {
 | 
				
			|||||||
__device__ float mwc_next_11(mwc_st *st) {
 | 
					__device__ float mwc_next_11(mwc_st *st) {
 | 
				
			||||||
    return ((int32_t) mwc_next(st)) * (1.0f / 2147483648.0f);
 | 
					    return ((int32_t) mwc_next(st)) * (1.0f / 2147483648.0f);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
testsrc = code.base + src + """
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def make_seeds(nthreads, host_seed=None):
 | 
				
			||||||
 | 
					        if host_seed:
 | 
				
			||||||
 | 
					            rand = np.random.RandomState(host_seed)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            rand = np.random
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Load raw big-endian u32 multipliers from primes.bin.
 | 
				
			||||||
 | 
					        with open('primes.bin') as primefp:
 | 
				
			||||||
 | 
					            dt = np.dtype(np.uint32).newbyteorder('B')
 | 
				
			||||||
 | 
					            mults = np.frombuffer(primefp.read(), dtype=dt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Create the seed structures. TODO: check that struct is 4-byte aligned
 | 
				
			||||||
 | 
					        seeds = np.empty((3, nthreads), dtype=np.uint32, order='F')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Randomness in choosing multipliers is good, but larger multipliers
 | 
				
			||||||
 | 
					        # have longer periods, which is also good. This is a compromise.
 | 
				
			||||||
 | 
					        mults = np.array(mults[:nthreads*4])
 | 
				
			||||||
 | 
					        rand.shuffle(mults)
 | 
				
			||||||
 | 
					        seeds[0][:] = mults[:nthreads]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Intentionally excludes both 0 and (2^32-1), as they can lead to
 | 
				
			||||||
 | 
					        # degenerate sequences of period 0
 | 
				
			||||||
 | 
					        seeds[1] = rand.randint(1, 0xffffffff, size=nthreads)
 | 
				
			||||||
 | 
					        seeds[2] = rand.randint(1, 0xffffffff, size=nthreads)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return seeds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class MWCTest(HunkOCode):
 | 
				
			||||||
 | 
					    defs = """
 | 
				
			||||||
__global__ void test_mwc(mwc_st *msts, uint64_t *sums, float nrounds) {
 | 
					__global__ void test_mwc(mwc_st *msts, uint64_t *sums, float nrounds) {
 | 
				
			||||||
    mwc_st rctx = msts[gtid()];
 | 
					    mwc_st rctx = msts[gtid()];
 | 
				
			||||||
    uint64_t sum = 0;
 | 
					    uint64_t sum = 0;
 | 
				
			||||||
@ -48,69 +74,45 @@ __global__ void test_mwc(mwc_st *msts, uint64_t *sums, float nrounds) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def build_mwc_seeds(nthreads, seed=None):
 | 
					    @classmethod
 | 
				
			||||||
    if seed:
 | 
					    def test_mwc(cls, rounds=5000, nblocks=64, blockwidth=512):
 | 
				
			||||||
        rand = np.random.RandomState(seed)
 | 
					        import pycuda.driver as cuda
 | 
				
			||||||
    else:
 | 
					        from pycuda.compiler import SourceModule
 | 
				
			||||||
        rand = np.random
 | 
					        import time
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Load raw big-endian u32 multipliers from primes.bin.
 | 
					        nthreads = blockwidth * nblocks
 | 
				
			||||||
    with open('primes.bin') as primefp:
 | 
					        seeds = MWC.make_seeds(nthreads, host_seed = 5)
 | 
				
			||||||
        dt = np.dtype(np.uint32).newbyteorder('B')
 | 
					        dseeds = cuda.to_device(seeds)
 | 
				
			||||||
        mults = np.frombuffer(primefp.read(), dtype=dt)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Create the seed structures. TODO: check that struct is 4-byte aligned
 | 
					        mod = SourceModule(assemble_code(BaseCode, MWC, cls))
 | 
				
			||||||
    seeds = np.empty((3, nthreads), dtype=np.uint32, order='F')
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Randomness in choosing multipliers is good, but larger multipliers
 | 
					        for trial in range(2):
 | 
				
			||||||
    # have longer periods, which is also good. This is a compromise.
 | 
					            print "Trial %d, on CPU: " % trial,
 | 
				
			||||||
    mults = np.array(mults[:nthreads*4])
 | 
					            sums = np.zeros(nthreads, dtype=np.uint64)
 | 
				
			||||||
    rand.shuffle(mults)
 | 
					            ctime = time.time()
 | 
				
			||||||
    seeds[0][:] = mults[:nthreads]
 | 
					            mults = seeds[0].astype(np.uint64)
 | 
				
			||||||
 | 
					            states = seeds[1]
 | 
				
			||||||
 | 
					            carries = seeds[2]
 | 
				
			||||||
 | 
					            for i in range(rounds):
 | 
				
			||||||
 | 
					                step = np.frombuffer((mults * states + carries).data,
 | 
				
			||||||
 | 
					                           dtype=np.uint32).reshape((2, nthreads), order='F')
 | 
				
			||||||
 | 
					                states[:] = step[0]
 | 
				
			||||||
 | 
					                carries[:] = step[1]
 | 
				
			||||||
 | 
					                sums += states
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Intentionally excludes both 0 and (2^32-1), as they can lead to
 | 
					            ctime = time.time() - ctime
 | 
				
			||||||
    # degenerate sequences of period 0
 | 
					            print "Took %g seconds." % ctime
 | 
				
			||||||
    seeds[1] = rand.randint(1, 0xffffffff, size=nthreads)
 | 
					 | 
				
			||||||
    seeds[2] = rand.randint(1, 0xffffffff, size=nthreads)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return seeds
 | 
					            print "Trial %d, on device: " % trial,
 | 
				
			||||||
 | 
					            dsums = cuda.mem_alloc(8*nthreads)
 | 
				
			||||||
def test_mwc():
 | 
					            fun = mod.get_function("test_mwc")
 | 
				
			||||||
    rounds = 5000
 | 
					            dtime = fun(dseeds, dsums, np.float32(rounds),
 | 
				
			||||||
    nblocks = 64
 | 
					                        block=(blockwidth,1,1), grid=(nblocks,1),
 | 
				
			||||||
    nthreads = 512 * nblocks
 | 
					                        time_kernel=True)
 | 
				
			||||||
 | 
					            print "Took %g seconds." % dtime
 | 
				
			||||||
    seeds = build_mwc_seeds(nthreads, seed = 5)
 | 
					            dsums = cuda.from_device(dsums, nthreads, np.uint64)
 | 
				
			||||||
    dseeds = cuda.to_device(seeds)
 | 
					            if not np.all(np.equal(sums, dsums)):
 | 
				
			||||||
 | 
					                print "Sum discrepancy!"
 | 
				
			||||||
    mod = SourceModule(testsrc)
 | 
					                print sums
 | 
				
			||||||
 | 
					                print dsums
 | 
				
			||||||
    for trial in range(2):
 | 
					 | 
				
			||||||
        print "Trial %d, on CPU: " % trial,
 | 
					 | 
				
			||||||
        sums = np.zeros(nthreads, dtype=np.uint64)
 | 
					 | 
				
			||||||
        ctime = time.time()
 | 
					 | 
				
			||||||
        mults = seeds[0].astype(np.uint64)
 | 
					 | 
				
			||||||
        states = seeds[1]
 | 
					 | 
				
			||||||
        carries = seeds[2]
 | 
					 | 
				
			||||||
        for i in range(rounds):
 | 
					 | 
				
			||||||
            step = np.frombuffer((mults * states + carries).data,
 | 
					 | 
				
			||||||
                       dtype=np.uint32).reshape((2, nthreads), order='F')
 | 
					 | 
				
			||||||
            states[:] = step[0]
 | 
					 | 
				
			||||||
            carries[:] = step[1]
 | 
					 | 
				
			||||||
            sums += states
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        ctime = time.time() - ctime
 | 
					 | 
				
			||||||
        print "Took %g seconds." % ctime
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        print "Trial %d, on device: " % trial,
 | 
					 | 
				
			||||||
        dsums = cuda.mem_alloc(8*nthreads)
 | 
					 | 
				
			||||||
        fun = mod.get_function("test_mwc")
 | 
					 | 
				
			||||||
        dtime = fun(dseeds, dsums, np.float32(rounds),
 | 
					 | 
				
			||||||
                    block=(512,1,1), grid=(nblocks,1), time_kernel=True)
 | 
					 | 
				
			||||||
        print "Took %g seconds." % dtime
 | 
					 | 
				
			||||||
        dsums = cuda.from_device(dsums, nthreads, np.uint64)
 | 
					 | 
				
			||||||
        if not np.all(np.equal(sums, dsums)):
 | 
					 | 
				
			||||||
            print "Sum discrepancy!"
 | 
					 | 
				
			||||||
            print sums
 | 
					 | 
				
			||||||
            print dsums
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -14,7 +14,17 @@ from cuburn.variations import Variations
 | 
				
			|||||||
Point = lambda x, y: np.array([x, y], dtype=np.double)
 | 
					Point = lambda x, y: np.array([x, y], dtype=np.double)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Genome(pyflam3.Genome):
 | 
					class Genome(pyflam3.Genome):
 | 
				
			||||||
    pass
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def from_string(cls, *args, **kwargs):
 | 
				
			||||||
 | 
					        gnms = super(Genome, cls).from_string(*args, **kwargs)
 | 
				
			||||||
 | 
					        for g in gnms: g._init()
 | 
				
			||||||
 | 
					        return gnms
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _init(self):
 | 
				
			||||||
 | 
					        self.xforms = [self.xform[i] for i in range(self.num_xforms)]
 | 
				
			||||||
 | 
					        dens = np.array([x.density for x in self.xforms])
 | 
				
			||||||
 | 
					        dens /= np.sum(dens)
 | 
				
			||||||
 | 
					        self.norm_density = [np.sum(dens[:i+1]) for i in range(len(dens))]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class XForm(object):
 | 
					class XForm(object):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
@ -99,7 +109,7 @@ class Frame(object):
 | 
				
			|||||||
                cp.camera = Camera(self._frame, cp, filters)
 | 
					                cp.camera = Camera(self._frame, cp, filters)
 | 
				
			||||||
                cp.nsamples = (cp.camera.sample_density *
 | 
					                cp.nsamples = (cp.camera.sample_density *
 | 
				
			||||||
                               center.width * center.height) / ncps
 | 
					                               center.width * center.height) / ncps
 | 
				
			||||||
                cp.xforms = XForm.parse(cp)
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        print "Expected writes:", (
 | 
					        print "Expected writes:", (
 | 
				
			||||||
                cp.camera.sample_density * center.width * center.height)
 | 
					                cp.camera.sample_density * center.width * center.height)
 | 
				
			||||||
@ -190,9 +200,10 @@ class Features(object):
 | 
				
			|||||||
    """
 | 
					    """
 | 
				
			||||||
    # Constant parameters which control handling of out-of-frame samples:
 | 
					    # Constant parameters which control handling of out-of-frame samples:
 | 
				
			||||||
    # Number of iterations to iterate without write after new point
 | 
					    # Number of iterations to iterate without write after new point
 | 
				
			||||||
    fuse = 2
 | 
					    fuse = 20
 | 
				
			||||||
    # Maximum consecutive out-of-frame points before picking new point
 | 
					    # Maximum consecutive out-of-bounds points before picking new point
 | 
				
			||||||
    max_bad = 3
 | 
					    max_oob = 10
 | 
				
			||||||
 | 
					    max_nxforms = 12
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Height of the texture pallete which gets uploaded to the GPU (assuming
 | 
					    # Height of the texture pallete which gets uploaded to the GPU (assuming
 | 
				
			||||||
    # that palette-from-texture is enabled). For most genomes, this doesn't
 | 
					    # that palette-from-texture is enabled). For most genomes, this doesn't
 | 
				
			||||||
@ -205,7 +216,6 @@ class Features(object):
 | 
				
			|||||||
        any = lambda l: bool(filter(None, map(l, genomes)))
 | 
					        any = lambda l: bool(filter(None, map(l, genomes)))
 | 
				
			||||||
        self.max_ntemporal_samples = max(
 | 
					        self.max_ntemporal_samples = max(
 | 
				
			||||||
                [cp.nbatches * cp.ntemporal_samples for cp in genomes])
 | 
					                [cp.nbatches * cp.ntemporal_samples for cp in genomes])
 | 
				
			||||||
        self.camera_rotation = any(lambda cp: cp.rotate)
 | 
					 | 
				
			||||||
        self.non_box_temporal_filter = genomes[0].temporal_filter_type
 | 
					        self.non_box_temporal_filter = genomes[0].temporal_filter_type
 | 
				
			||||||
        self.palette_mode = genomes[0].palette_mode and "linear" or "nearest"
 | 
					        self.palette_mode = genomes[0].palette_mode and "linear" or "nearest"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -214,6 +224,7 @@ class Features(object):
 | 
				
			|||||||
            "number of xforms! (try running through flam3-genome first)")
 | 
					            "number of xforms! (try running through flam3-genome first)")
 | 
				
			||||||
        self.xforms = [XFormFeatures([x[i] for x in xforms], i)
 | 
					        self.xforms = [XFormFeatures([x[i] for x in xforms], i)
 | 
				
			||||||
                       for i in range(len(xforms[0]))]
 | 
					                       for i in range(len(xforms[0]))]
 | 
				
			||||||
 | 
					        self.nxforms = len(self.xforms)
 | 
				
			||||||
        if any(lambda cp: cp.final_xform_enable):
 | 
					        if any(lambda cp: cp.final_xform_enable):
 | 
				
			||||||
            raise NotImplementedError("Final xform")
 | 
					            raise NotImplementedError("Final xform")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										7
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								main.py
									
									
									
									
									
								
							@ -24,16 +24,15 @@ import pyglet
 | 
				
			|||||||
import pycuda.autoinit
 | 
					import pycuda.autoinit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from cuburn.render import *
 | 
					from cuburn.render import *
 | 
				
			||||||
from cuburn.code.mwc import test_mwc
 | 
					from cuburn.code.mwc import MWCTest
 | 
				
			||||||
from cuburn.code.iter import silly
 | 
					from cuburn.code.iter import silly
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
def main(args):
 | 
					def main(args):
 | 
				
			||||||
 | 
					    #MWCTest.test_mwc()
 | 
				
			||||||
    with open(args[-1]) as fp:
 | 
					    with open(args[-1]) as fp:
 | 
				
			||||||
        genomes = Genome.from_string(fp.read())
 | 
					        genomes = Genome.from_string(fp.read())
 | 
				
			||||||
    anim = Animation(genomes)
 | 
					    anim = Animation(genomes)
 | 
				
			||||||
 | 
					    accum, den = silly(anim.features, genomes[0])
 | 
				
			||||||
    accum, den = silly()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if False:
 | 
					    if False:
 | 
				
			||||||
        bins = anim.render_frame()
 | 
					        bins = anim.render_frame()
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user