mirror of
				https://github.com/stevenrobertson/cuburn.git
				synced 2025-11-03 18:00:55 -05:00 
			
		
		
		
	New genome representation, and device interp.
This commit is contained in:
		@ -2,8 +2,8 @@
 | 
			
		||||
from cuburn.code.util import *
 | 
			
		||||
 | 
			
		||||
class ColorClip(HunkOCode):
 | 
			
		||||
    def __init__(self, features):
 | 
			
		||||
        self.defs = self.defs_tmpl.substitute(features=features)
 | 
			
		||||
    def __init__(self, info):
 | 
			
		||||
        self.defs = self.defs_tmpl.substitute(info=info)
 | 
			
		||||
 | 
			
		||||
    defs_tmpl = Template('''
 | 
			
		||||
__global__
 | 
			
		||||
@ -63,7 +63,7 @@ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
 | 
			
		||||
    pix.y += (1.0f - vibrancy) * powf(opix.y, gamma);
 | 
			
		||||
    pix.z += (1.0f - vibrancy) * powf(opix.z, gamma);
 | 
			
		||||
 | 
			
		||||
    {{if features.alpha_output_channel}}
 | 
			
		||||
    {{if info.alpha_output_channel}}
 | 
			
		||||
    float 1_alpha = 1 / alpha;
 | 
			
		||||
    pix.x *= 1_alpha;
 | 
			
		||||
    pix.y *= 1_alpha;
 | 
			
		||||
@ -94,13 +94,13 @@ class DensityEst(HunkOCode):
 | 
			
		||||
    # Note, changing this does not yet have any effect, it's just informational
 | 
			
		||||
    MAX_WIDTH=15
 | 
			
		||||
 | 
			
		||||
    def __init__(self, features, cp):
 | 
			
		||||
        self.features, self.cp = features, cp
 | 
			
		||||
    def __init__(self, info):
 | 
			
		||||
        self.info = info
 | 
			
		||||
 | 
			
		||||
    headers = "#include<math_constants.h>\n"
 | 
			
		||||
    @property
 | 
			
		||||
    def defs(self):
 | 
			
		||||
        return self.defs_tmpl.substitute(features=self.features, cp=self.cp)
 | 
			
		||||
        return self.defs_tmpl.substitute(info=self.info)
 | 
			
		||||
 | 
			
		||||
    defs_tmpl = Template('''
 | 
			
		||||
#define W 15        // Filter width (regardless of standard deviation chosen)
 | 
			
		||||
@ -147,9 +147,9 @@ void density_est(float4 *pixbuf, float4 *outbuf,
 | 
			
		||||
        de_r[i] = de_g[i] = de_b[i] = de_a[i] = 0.0f;
 | 
			
		||||
    __syncthreads();
 | 
			
		||||
 | 
			
		||||
    for (int imrow = threadIdx.y + W2; imrow < {{features.acc_height}}; imrow += 32)
 | 
			
		||||
    for (int imrow = threadIdx.y + W2; imrow < {{info.acc_height}}; imrow += 32)
 | 
			
		||||
    {
 | 
			
		||||
        int idx = {{features.acc_stride}} * imrow +
 | 
			
		||||
        int idx = {{info.acc_stride}} * imrow +
 | 
			
		||||
                + blockIdx.x * 32 + threadIdx.x + W2;
 | 
			
		||||
 | 
			
		||||
        float4 in = pixbuf[idx];
 | 
			
		||||
@ -249,7 +249,7 @@ void density_est(float4 *pixbuf, float4 *outbuf,
 | 
			
		||||
        __syncthreads();
 | 
			
		||||
        // TODO: could coalesce this, but what a pain
 | 
			
		||||
        for (int i = threadIdx.x; i < FW; i += 32) {
 | 
			
		||||
            idx = {{features.acc_stride}} * imrow + blockIdx.x * 32 + i + W2;
 | 
			
		||||
            idx = {{info.acc_stride}} * imrow + blockIdx.x * 32 + i + W2;
 | 
			
		||||
            int si = threadIdx.y * FW + i;
 | 
			
		||||
            float *out = reinterpret_cast<float*>(&outbuf[idx]);
 | 
			
		||||
            atomicAdd(out,   de_r[si]);
 | 
			
		||||
@ -285,12 +285,14 @@ void density_est(float4 *pixbuf, float4 *outbuf,
 | 
			
		||||
        # TODO: add no-est version
 | 
			
		||||
        # TODO: come up with a general way to average these parameters
 | 
			
		||||
 | 
			
		||||
        k1 = np.float32(cp.brightness * 268 / 256)
 | 
			
		||||
        area = self.features.width * self.features.height / cp.ppu ** 2
 | 
			
		||||
        k2 = np.float32(1 / (area * cp.adj_density ))
 | 
			
		||||
        k1 = np.float32(cp.color.brightness * 268 / 256)
 | 
			
		||||
        # Old definition of area is (w*h/(s*s)). Since new scale 'ns' is now
 | 
			
		||||
        # s/w, new definition is (w*h/(s*s*w*w)) = (h/(s*s*w))
 | 
			
		||||
        area = self.info.height / (cp.camera.scale ** 2 * self.info.width)
 | 
			
		||||
        k2 = np.float32(1 / (area * self.info.density ))
 | 
			
		||||
 | 
			
		||||
        if self.cp.estimator == 0:
 | 
			
		||||
            nbins = self.features.acc_height * self.features.acc_stride
 | 
			
		||||
        if cp.de.radius == 0:
 | 
			
		||||
            nbins = self.info.acc_height * self.info.acc_stride
 | 
			
		||||
            fun = mod.get_function("logscale")
 | 
			
		||||
            t = fun(abufd, obufd, k1, k2,
 | 
			
		||||
                    block=(512, 1, 1), grid=(nbins/512, 1), stream=stream)
 | 
			
		||||
@ -299,11 +301,11 @@ void density_est(float4 *pixbuf, float4 *outbuf,
 | 
			
		||||
            # 0.5, but the DE filters scale filter distance by the default
 | 
			
		||||
            # spatial support factor of 1.5, so the effective base SD is
 | 
			
		||||
            # (0.5/1.5)=1/3.
 | 
			
		||||
            est_sd = np.float32(cp.estimator / 3.)
 | 
			
		||||
            neg_est_curve = np.float32(-cp.estimator_curve)
 | 
			
		||||
            est_min = np.float32(cp.estimator_minimum / 3.)
 | 
			
		||||
            est_sd = np.float32(cp.de.radius / 3.)
 | 
			
		||||
            neg_est_curve = np.float32(-cp.de.curve)
 | 
			
		||||
            est_min = np.float32(cp.de.minimum / 3.)
 | 
			
		||||
            fun = mod.get_function("density_est")
 | 
			
		||||
            fun(abufd, obufd, est_sd, neg_est_curve, est_min, k1, k2,
 | 
			
		||||
                block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
 | 
			
		||||
                block=(32, 32, 1), grid=(self.info.acc_width/32, 1),
 | 
			
		||||
                stream=stream)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										318
									
								
								cuburn/code/interp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										318
									
								
								cuburn/code/interp.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,318 @@
 | 
			
		||||
from collections import OrderedDict
 | 
			
		||||
from itertools import cycle
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
import tempita
 | 
			
		||||
from cuburn.code.util import HunkOCode, Template
 | 
			
		||||
from cuburn.genome import SplEval
 | 
			
		||||
 | 
			
		||||
class GenomePackerName(str):
 | 
			
		||||
    """Class to indicate that a property is precalculated on the device"""
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
class GenomePackerView(object):
 | 
			
		||||
    """
 | 
			
		||||
    Obtain accessors in generated code.
 | 
			
		||||
 | 
			
		||||
    Call ``GenomePacker.view(ptr_name, wrapped_obj, prefix)`` to generate a
 | 
			
		||||
    view, where ``ptr_name`` is the name of the CUDA pointer which holds the
 | 
			
		||||
    interpolated values, ``wrapped_obj`` is the base Genome instance which
 | 
			
		||||
    will be uploaded to the device for interpolating from, and ``prefix`` is
 | 
			
		||||
    the prefix that will be assigned to property accessors from this object.
 | 
			
		||||
 | 
			
		||||
    Accessing a property on the object synthesizes an accessor for use in your
 | 
			
		||||
    code and an interpolator for use in generating that code. This conversion
 | 
			
		||||
    is done when the property is coerced into a string by the templating
 | 
			
		||||
    mechanism, so you can easily nest objects by saying, for instance,
 | 
			
		||||
    {{pcp.camera.rotation}} from within templated code. The accessed property
 | 
			
		||||
    must be a SplEval object, or a precalculated value (see
 | 
			
		||||
    ``GenomePackerPrecalc``).
 | 
			
		||||
 | 
			
		||||
    Index operations are converted to property accesses as well, so that you
 | 
			
		||||
    don't have to make a mess with 'getattr' in your code: {{pcp.xforms[x]}}
 | 
			
		||||
    works just fine. This means, however, that no arrays can be packed
 | 
			
		||||
    directly; they must be converted to have string-based keys first, and
 | 
			
		||||
    any loops must be unrolled in your code.
 | 
			
		||||
    """
 | 
			
		||||
    def __init__(self, packer, ptr_name, wrapped, prefix=()):
 | 
			
		||||
        self.packer = packer
 | 
			
		||||
        self.ptr_name = ptr_name
 | 
			
		||||
        self.wrapped = wrapped
 | 
			
		||||
        self.prefix = prefix
 | 
			
		||||
    def __getattr__(self, name):
 | 
			
		||||
        w = getattr(self.wrapped, name)
 | 
			
		||||
        return type(self)(self.packer, self.ptr_name, w, self.prefix+(name,))
 | 
			
		||||
    # As with the Genome class, we're all-dict, no-array here
 | 
			
		||||
    __getitem__ = lambda s, n: getattr(s, str(n))
 | 
			
		||||
 | 
			
		||||
    def __str__(self):
 | 
			
		||||
        """
 | 
			
		||||
        Returns the packed name in a format suitable for embedding directly
 | 
			
		||||
        into device code.
 | 
			
		||||
        """
 | 
			
		||||
        # So evil. When the template calls __str__ to format the output, we
 | 
			
		||||
        # allocate things. This makes for neater embedded code, which is where
 | 
			
		||||
        # the real complexity lies, but it also means printf() debugging when
 | 
			
		||||
        # templating will screw with the allocation tables!
 | 
			
		||||
        if isinstance(self.wrapped, SplEval):
 | 
			
		||||
            self.packer._require(self.prefix)
 | 
			
		||||
        elif not isinstance(self.wrapped, GenomePackerName):
 | 
			
		||||
            raise TypeError("Tried to pack something that wasn't a spline or "
 | 
			
		||||
                            "a precalculated value")
 | 
			
		||||
        # TODO: verify namespace stomping, etc
 | 
			
		||||
        return '%s.%s' % (self.ptr_name, '_'.join(self.prefix))
 | 
			
		||||
 | 
			
		||||
    def _precalc(self):
 | 
			
		||||
        """Create a GenomePackerPrecalc object. See that class for details."""
 | 
			
		||||
        return GenomePackerPrecalc(self.packer, self.ptr_name,
 | 
			
		||||
                                   self.wrapped, self.prefix)
 | 
			
		||||
 | 
			
		||||
class GenomePackerPrecalc(GenomePackerView):
 | 
			
		||||
    """
 | 
			
		||||
    Insert precalculated values into the packed genome.
 | 
			
		||||
 | 
			
		||||
    Create a Precalc object by calling a view object's _precalc() method. The
 | 
			
		||||
    returned object shares the same referent dict as the view object, but
 | 
			
		||||
    instead of returning view accessors for use in the body of your code,
 | 
			
		||||
    accessing a property synthesizes a call to an interpolation function for
 | 
			
		||||
    use within the precalculating function. Use this in your precalculated
 | 
			
		||||
    code to obtain values from a genome.
 | 
			
		||||
 | 
			
		||||
    Once you've obtained the needed parameters and performed the
 | 
			
		||||
    precalculation, write them to the genome object with a call to the '_set'
 | 
			
		||||
    method in your precalc template. This method generates a new accessor to
 | 
			
		||||
    which you can assign a value in your precalculation function. It also
 | 
			
		||||
    makes this accessor available for use on the original packer object from
 | 
			
		||||
    within your main function.
 | 
			
		||||
 | 
			
		||||
    Finally, call the '_code' method on the precalc object with your generated
 | 
			
		||||
    precalculation code to add it to the precalculation function. The code
 | 
			
		||||
    will be wrapped in a C block to prevent namespace leakage, so that
 | 
			
		||||
    multiple invocations of the precalculation code on different code blocks
 | 
			
		||||
    can be performed.
 | 
			
		||||
 | 
			
		||||
    Example:
 | 
			
		||||
 | 
			
		||||
        def do_precalc(px):
 | 
			
		||||
            pcam = px._precalc()
 | 
			
		||||
            pcam._code(Template('''
 | 
			
		||||
                {{pcam._set('prop_sin')}} = sin({{pcam.prop}});
 | 
			
		||||
                ''').substitute(pcam=pcam))
 | 
			
		||||
 | 
			
		||||
        def gen_code(px):
 | 
			
		||||
            return Template('''
 | 
			
		||||
                {{do_precalc(px)}}
 | 
			
		||||
                printf("The sin of %g is %g.", {{px.prop}}, {{px.prop_sin}});
 | 
			
		||||
                ''').substitute(px=px)
 | 
			
		||||
    """
 | 
			
		||||
    def __init__(self, packer, ptr_name, wrapped, prefix):
 | 
			
		||||
        super(GenomePackerPrecalc, self).__init__(packer, 'out', wrapped, prefix)
 | 
			
		||||
    def __str__(self):
 | 
			
		||||
        return self.packer._require_pre(self.prefix)
 | 
			
		||||
    def _set(self, name):
 | 
			
		||||
        fullname = self.prefix + (name,)
 | 
			
		||||
        self.packer._pre_alloc(fullname)
 | 
			
		||||
        # This just modifies the underlying object, because I'm too lazy right
 | 
			
		||||
        # now to ghost the namespace
 | 
			
		||||
        self.wrapped[name] = GenomePackerName('_'.join(fullname))
 | 
			
		||||
        return '%s->%s' % (self.ptr_name, self.wrapped[name])
 | 
			
		||||
    def _code(self, code):
 | 
			
		||||
        self.packer.precalc_code.append(code)
 | 
			
		||||
 | 
			
		||||
class GenomePacker(HunkOCode):
 | 
			
		||||
    """
 | 
			
		||||
    Packs a genome for use in iteration.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # 2^search_rounds is the maximum number of control points, including
 | 
			
		||||
    # endpoints, that can be used in a single genome. It should be okay to
 | 
			
		||||
    # change this number without touching other code, but 32 samples fits
 | 
			
		||||
    # nicely on a single cache line.
 | 
			
		||||
    search_rounds = 5
 | 
			
		||||
 | 
			
		||||
    def __init__(self, tname):
 | 
			
		||||
        """
 | 
			
		||||
        Create a new DataPacker.
 | 
			
		||||
 | 
			
		||||
        ``tname`` is the name of the structure typedef that will be emitted
 | 
			
		||||
        via this object's ``decls`` property.
 | 
			
		||||
        """
 | 
			
		||||
        self.tname = tname
 | 
			
		||||
        # We could do this in the order that things are requested, but we want
 | 
			
		||||
        # to be able to treat the direct stuff as a list so this function
 | 
			
		||||
        # doesn't unroll any more than it has to. So we separate things into
 | 
			
		||||
        # direct requests, and those that need precalculation.
 | 
			
		||||
        # Values of OrderedDict are unused; basically, it's just OrderedSet.
 | 
			
		||||
        self.packed_direct = OrderedDict()
 | 
			
		||||
        self.genome_precalc = OrderedDict()
 | 
			
		||||
        self.packed_precalc = OrderedDict()
 | 
			
		||||
        self.precalc_code = []
 | 
			
		||||
 | 
			
		||||
        self.ns = {}
 | 
			
		||||
 | 
			
		||||
        self._len = None
 | 
			
		||||
        self.decls = None
 | 
			
		||||
        self.defs = None
 | 
			
		||||
 | 
			
		||||
        self.packed = None
 | 
			
		||||
        self.genome = None
 | 
			
		||||
 | 
			
		||||
    def __len__(self):
 | 
			
		||||
        assert self._len is not None, 'len() called before finalize()'
 | 
			
		||||
        return self._len
 | 
			
		||||
 | 
			
		||||
    def view(self, ptr_name, wrapped_obj, prefix):
 | 
			
		||||
        """Create a DataPacker view. See DataPackerView class for details."""
 | 
			
		||||
        self.ns[prefix] = wrapped_obj
 | 
			
		||||
        return GenomePackerView(self, ptr_name, wrapped_obj, (prefix,))
 | 
			
		||||
 | 
			
		||||
    def _require(self, name):
 | 
			
		||||
        """
 | 
			
		||||
        Called to indicate that the named parameter from the original genome
 | 
			
		||||
        must be available during interpolation.
 | 
			
		||||
        """
 | 
			
		||||
        self.packed_direct[name] = None
 | 
			
		||||
 | 
			
		||||
    def _require_pre(self, name):
 | 
			
		||||
        i = len(self.genome_precalc) << self.search_rounds
 | 
			
		||||
        self.genome_precalc[name] = None
 | 
			
		||||
        return 'catmull_rom(×[%d], &knots[%d], time)' % (i, i)
 | 
			
		||||
 | 
			
		||||
    def _pre_alloc(self, name):
 | 
			
		||||
        self.packed_precalc[name] = None
 | 
			
		||||
 | 
			
		||||
    def finalize(self):
 | 
			
		||||
        """
 | 
			
		||||
        Create the code to render this genome.
 | 
			
		||||
        """
 | 
			
		||||
        # At the risk of packing a few things more than once, we don't
 | 
			
		||||
        # uniquify the overall precalc order, sparing us the need to implement
 | 
			
		||||
        # recursive code generation
 | 
			
		||||
        self.packed = self.packed_direct.keys() + self.packed_precalc.keys()
 | 
			
		||||
        self.genome = self.packed_direct.keys() + self.genome_precalc.keys()
 | 
			
		||||
 | 
			
		||||
        self._len = len(self.packed)
 | 
			
		||||
 | 
			
		||||
        self.decls = self._decls.substitute(
 | 
			
		||||
                packed=self.packed, tname=self.tname,
 | 
			
		||||
                search_rounds=self.search_rounds)
 | 
			
		||||
        self.defs = self._defs.substitute(
 | 
			
		||||
                packed_direct=self.packed_direct, tname=self.tname,
 | 
			
		||||
                precalc_code=self.precalc_code,
 | 
			
		||||
                search_rounds=self.search_rounds)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def pack(self):
 | 
			
		||||
        """
 | 
			
		||||
        Return a packed copy of the genome ready for uploading to the GPU as a
 | 
			
		||||
        3D NDArray, with the first element being the times and the second
 | 
			
		||||
        being the values.
 | 
			
		||||
        """
 | 
			
		||||
        width = 1 << self.search_rounds
 | 
			
		||||
        out = np.empty((2, len(self.genome), width), dtype=np.float32)
 | 
			
		||||
        # Ensure that unused values at the top are always big (must be >2.0)
 | 
			
		||||
        out[0].fill(1e9)
 | 
			
		||||
 | 
			
		||||
        for idx, gname in enumerate(self.genome):
 | 
			
		||||
            attr = self.ns[gname[0]]
 | 
			
		||||
            for g in gname[1:]:
 | 
			
		||||
                attr = getattr(attr, g)
 | 
			
		||||
            if not isinstance(attr, SplEval):
 | 
			
		||||
                raise TypeError("%s isn't a spline" % '.'.join(gname))
 | 
			
		||||
            out[0][idx][:len(attr.knots[0])] = attr.knots[0]
 | 
			
		||||
            out[1][idx][:len(attr.knots[1])] = attr.knots[1]
 | 
			
		||||
        return out
 | 
			
		||||
 | 
			
		||||
    _defs = Template(r"""
 | 
			
		||||
 | 
			
		||||
__global__
 | 
			
		||||
void interp_{{tname}}({{tname}}* out, float *times, float *knots,
 | 
			
		||||
        float tstart, float tstep, mwc_st *rctxes) {
 | 
			
		||||
    int id = gtid();
 | 
			
		||||
    out = &out[id];
 | 
			
		||||
    mwc_st rctx = rctxes[id];
 | 
			
		||||
    float time = tstart + id * tstep;
 | 
			
		||||
 | 
			
		||||
    float *outf = reinterpret_cast<float*>(out);
 | 
			
		||||
 | 
			
		||||
    // TODO: unroll pragma?
 | 
			
		||||
    for (int i = 0; i < {{len(packed_direct)}}; i++) {
 | 
			
		||||
        int j = i << {{search_rounds}};
 | 
			
		||||
        outf[i] = catmull_rom(×[j], &knots[j], time);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Advance 'times' and 'knots' to the purely generated sections, so that
 | 
			
		||||
    // the pregenerated statements emitted by _require_pre are correct.
 | 
			
		||||
    times = ×[{{len(packed_direct)<<search_rounds}}];
 | 
			
		||||
    knots = &knots[{{len(packed_direct)<<search_rounds}}];
 | 
			
		||||
 | 
			
		||||
    {{for hunk in precalc_code}}
 | 
			
		||||
    if (1) {
 | 
			
		||||
    {{hunk}}
 | 
			
		||||
    }
 | 
			
		||||
    {{endfor}}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
""")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    _decls = Template(r"""
 | 
			
		||||
typedef struct {
 | 
			
		||||
{{for name in packed}}
 | 
			
		||||
    float   {{'_'.join(name)}};
 | 
			
		||||
{{endfor}}
 | 
			
		||||
} {{tname}};
 | 
			
		||||
 | 
			
		||||
/* Search through the fixed-size list 'hay' to find the rightmost index which
 | 
			
		||||
 * contains a value strictly smaller than the input 'needle'. The crazy
 | 
			
		||||
 * bitwise search is just for my own amusement.
 | 
			
		||||
 */
 | 
			
		||||
__device__
 | 
			
		||||
int bitwise_binsearch(float *hay, float needle) {
 | 
			
		||||
    int lo = 0;
 | 
			
		||||
 | 
			
		||||
    {{for i in range(search_rounds-1, -1, -1)}}
 | 
			
		||||
    if (needle > hay[lo | {{1 << i}}])
 | 
			
		||||
        lo |= {{1 << i}};
 | 
			
		||||
    {{endfor}}
 | 
			
		||||
    return lo;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__device__
 | 
			
		||||
float catmull_rom(float *times, float *knots, float t) {
 | 
			
		||||
    int idx = bitwise_binsearch(times, t);
 | 
			
		||||
 | 
			
		||||
    // The left bias of the search means that we never have to worry about
 | 
			
		||||
    // overshooting unless the genome is corrupted
 | 
			
		||||
    idx = max(idx, 1);
 | 
			
		||||
 | 
			
		||||
    float t1 = times[idx], t2 = times[idx+1] - t1;
 | 
			
		||||
    float rt2 = 1.0f / t2;
 | 
			
		||||
    float t0 = (times[idx-1] - t1) * rt2, t3 = (times[idx+2] - t1) * rt2;
 | 
			
		||||
    t = (t - t1) * rt2;
 | 
			
		||||
 | 
			
		||||
    // Now t1 is effectively 0 and t2 is 1
 | 
			
		||||
 | 
			
		||||
    float k0 = knots[idx-1], k1 = knots[idx],
 | 
			
		||||
          k2 = knots[idx+1], k3 = knots[idx+2];
 | 
			
		||||
 | 
			
		||||
    float m1 = (k2 - k0) / (1.0f - t0),
 | 
			
		||||
          m2 = (k3 - k1) / (t3);
 | 
			
		||||
 | 
			
		||||
    float tt = t * t, ttt = tt * t;
 | 
			
		||||
 | 
			
		||||
    return m1 * (      ttt - 2.0f*tt + t)
 | 
			
		||||
         + k1 * ( 2.0f*ttt - 3.0f*tt + 1)
 | 
			
		||||
         + m2 * (      ttt -      tt)
 | 
			
		||||
         + k2 * (-2.0f*ttt + 3.0f*tt);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__global__
 | 
			
		||||
void test_cr(float *times, float *knots, float *t, float *r) {
 | 
			
		||||
    int i = threadIdx.x + blockDim.x * blockIdx.x;
 | 
			
		||||
    r[i] = catmull_rom(times, knots, t[i]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
""")
 | 
			
		||||
 | 
			
		||||
@ -2,26 +2,132 @@
 | 
			
		||||
The main iteration loop.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from cuburn.code import mwc, variations
 | 
			
		||||
from cuburn.code import mwc, variations, interp
 | 
			
		||||
from cuburn.code.util import *
 | 
			
		||||
 | 
			
		||||
def precalc_densities(pcp, std_xforms):
 | 
			
		||||
    # This pattern recurs a few times for precalc segments. Unfortunately,
 | 
			
		||||
    # namespace stuff means it's not easy to functionalize this boilerplate
 | 
			
		||||
    pre_cp = pcp._precalc()
 | 
			
		||||
    pre_cp._code(Template(r"""
 | 
			
		||||
 | 
			
		||||
        float sum = 0.0f;
 | 
			
		||||
 | 
			
		||||
        {{for n in std_xforms}}
 | 
			
		||||
        float den_{{n}} = {{pre_cp.xforms[n].density}};
 | 
			
		||||
        sum += den_{{n}};
 | 
			
		||||
        {{endfor}}
 | 
			
		||||
 | 
			
		||||
        float rsum = 1.0f / sum;
 | 
			
		||||
        sum = 0.0f;
 | 
			
		||||
 | 
			
		||||
        {{for n in std_xforms[:-1]}}
 | 
			
		||||
        sum += den_{{n}} * rsum;
 | 
			
		||||
        {{pre_cp._set('den_' + n)}} = sum;
 | 
			
		||||
        {{endfor}}
 | 
			
		||||
    """).substitute(locals()))
 | 
			
		||||
 | 
			
		||||
def precalc_chaos(pcp, std_xforms):
 | 
			
		||||
    pre_cp = pcp._precalc()
 | 
			
		||||
    pre_cp._code(Template("""
 | 
			
		||||
 | 
			
		||||
        float sum, rsum;
 | 
			
		||||
 | 
			
		||||
        {{for p in std_xforms}}
 | 
			
		||||
        sum = 0.0f;
 | 
			
		||||
 | 
			
		||||
        {{for n in std_xforms}}
 | 
			
		||||
        float den_{{p}}_{{n}} = {{pre_cp.xforms[p].chaos[n]}};
 | 
			
		||||
        sum += den_{{p}}_{{n}};
 | 
			
		||||
        {{endfor}}
 | 
			
		||||
 | 
			
		||||
        rsum = 1.0f / sum;
 | 
			
		||||
        sum = 0.0f;
 | 
			
		||||
 | 
			
		||||
        {{for n in std_xforms[:-1]}}
 | 
			
		||||
        sum += den_{{p}}_{{n}} * rsum;
 | 
			
		||||
        {{pre_cp._set('chaos_%s_%s' % (p, n))}} = sum;
 | 
			
		||||
        {{endfor}}
 | 
			
		||||
 | 
			
		||||
        {{endfor}}
 | 
			
		||||
 | 
			
		||||
    """).substitute(locals()))
 | 
			
		||||
 | 
			
		||||
def precalc_camera(info, pcam):
 | 
			
		||||
    pre_cam = pcam._precalc()
 | 
			
		||||
 | 
			
		||||
    # Maxima code to check my logic:
 | 
			
		||||
    #   matrix([1,0,0.5*width + g],[0,1,0.5*height+g],[0,0,1])
 | 
			
		||||
    # . matrix([width * scale,0,0], [0,width * scale,0], [0,0,1])
 | 
			
		||||
    # . matrix([cosr,-sinr,0], [sinr,cosr,0], [0,0,1])
 | 
			
		||||
    # . matrix([1,0,-cenx],[0,1,-ceny],[0,0,1])
 | 
			
		||||
    # . matrix([X],[Y],[1]);
 | 
			
		||||
 | 
			
		||||
    pre_cam._code(Template(r"""
 | 
			
		||||
 | 
			
		||||
        float rot = {{pre_cam.rotation}} * M_PI / 180.0f;
 | 
			
		||||
        float rotsin = sin(rot), rotcos = cos(rot);
 | 
			
		||||
        float cenx = {{pre_cam.center.x}}, ceny = {{pre_cam.center.y}};
 | 
			
		||||
        float scale = {{pre_cam.scale}} * {{info.width}};
 | 
			
		||||
 | 
			
		||||
        float ditherwidth = {{pre_cam.dither_width}} * 0.33f;
 | 
			
		||||
        float u0 = mwc_next_01(rctx);
 | 
			
		||||
        float r = ditherwidth * sqrtf(-2.0f * log2f(u0) / M_LOG2E);
 | 
			
		||||
 | 
			
		||||
        float u1 = 2.0f * M_PI * mwc_next_01(rctx);
 | 
			
		||||
        float ditherx = r * cos(u1);
 | 
			
		||||
        float dithery = r * sin(u1);
 | 
			
		||||
 | 
			
		||||
        {{pre_cam._set('xx')}} = scale * rotcos;
 | 
			
		||||
        {{pre_cam._set('xy')}} = scale * -rotsin;
 | 
			
		||||
        {{pre_cam._set('xo')}} = scale * (rotsin * ceny - rotcos * cenx)
 | 
			
		||||
                              + {{0.5 * info.width + info.gutter}} + ditherx;
 | 
			
		||||
 | 
			
		||||
        {{pre_cam._set('yx')}} = scale * rotsin;
 | 
			
		||||
        {{pre_cam._set('yy')}} = scale * rotcos;
 | 
			
		||||
        {{pre_cam._set('yo')}} = scale * -(rotsin * cenx + rotcos * ceny)
 | 
			
		||||
                              + {{0.5 * info.height + info.gutter}} + dithery;
 | 
			
		||||
 | 
			
		||||
    """).substitute(locals()))
 | 
			
		||||
 | 
			
		||||
def precalc_xf_affine(px):
 | 
			
		||||
    pre = px._precalc()
 | 
			
		||||
    pre._code(Template(r"""
 | 
			
		||||
 | 
			
		||||
        float pri = {{pre.angle}} * M_PI / 180.0f;
 | 
			
		||||
        float spr = {{pre.spread}} * M_PI / 180.0f;
 | 
			
		||||
 | 
			
		||||
        float magx = {{pre.magnitude.x}};
 | 
			
		||||
        float magy = {{pre.magnitude.y}};
 | 
			
		||||
 | 
			
		||||
        {{pre._set('xx')}} = magx * cos(pri-spr);
 | 
			
		||||
        {{pre._set('xy')}} = magx * sin(pri-spr);
 | 
			
		||||
        {{pre._set('yx')}} = magy * cos(pri+spr);
 | 
			
		||||
        {{pre._set('yy')}} = magy * sin(pri+spr);
 | 
			
		||||
        {{pre._set('xo')}} = {{pre.offset.x}};
 | 
			
		||||
        {{pre._set('yo')}} = {{pre.offset.y}};
 | 
			
		||||
 | 
			
		||||
    """).substitute(locals()))
 | 
			
		||||
 | 
			
		||||
class IterCode(HunkOCode):
 | 
			
		||||
    # The number of threads per block
 | 
			
		||||
    NTHREADS = 256
 | 
			
		||||
 | 
			
		||||
    def __init__(self, features):
 | 
			
		||||
        self.features = features
 | 
			
		||||
        self.packer = DataPacker('iter_info')
 | 
			
		||||
    def __init__(self, info):
 | 
			
		||||
        self.info = info
 | 
			
		||||
        self.packer = interp.GenomePacker('iter_params')
 | 
			
		||||
        self.pcp = self.packer.view('params', self.info.genome, 'cp')
 | 
			
		||||
 | 
			
		||||
        iterbody = self._iterbody()
 | 
			
		||||
        bodies = [self._xfbody(i,x) for i,x in enumerate(self.features.xforms)]
 | 
			
		||||
        bodies = [self._xfbody(i,x) for i,x in sorted(info.genome.xforms.items())]
 | 
			
		||||
        bodies.append(iterbody)
 | 
			
		||||
        self.defs = '\n'.join(bodies)
 | 
			
		||||
        self.decls += self.pix_helpers.substitute(features=features)
 | 
			
		||||
        self.decls += self.pix_helpers.substitute(info=info)
 | 
			
		||||
 | 
			
		||||
    decls = """
 | 
			
		||||
// Note: for normalized lookups, uchar4 actually returns floats
 | 
			
		||||
texture<uchar4, cudaTextureType2D, cudaReadModeNormalizedFloat> palTex;
 | 
			
		||||
__shared__ iter_info info;
 | 
			
		||||
__shared__ iter_params params;
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
@ -29,14 +135,14 @@ __shared__ iter_info info;
 | 
			
		||||
__device__
 | 
			
		||||
void read_pix(float4 &pix, float &den) {
 | 
			
		||||
    den = pix.w;
 | 
			
		||||
    {{if features.pal_has_alpha}}
 | 
			
		||||
    {{if info.pal_has_alpha}}
 | 
			
		||||
    read_half(pix.z, pix.w, pix.z, den);
 | 
			
		||||
    {{endif}}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__device__
 | 
			
		||||
void write_pix(float4 &pix, float den) {
 | 
			
		||||
    {{if features.pal_has_alpha}}
 | 
			
		||||
    {{if info.pal_has_alpha}}
 | 
			
		||||
    write_half(pix.z, pix.z, pix.w, den);
 | 
			
		||||
    {{endif}}
 | 
			
		||||
    pix.w = den;
 | 
			
		||||
@ -44,7 +150,7 @@ void write_pix(float4 &pix, float den) {
 | 
			
		||||
 | 
			
		||||
__device__
 | 
			
		||||
void update_pix(uint64_t ptr, uint32_t i, float4 c) {
 | 
			
		||||
    {{if features.pal_has_alpha}}
 | 
			
		||||
    {{if info.pal_has_alpha}}
 | 
			
		||||
    asm volatile ({{crep('''
 | 
			
		||||
    {
 | 
			
		||||
        .reg .u16       sz, sw;
 | 
			
		||||
@ -98,34 +204,37 @@ void update_pix(uint64_t ptr, uint32_t i, float4 c) {
 | 
			
		||||
""")
 | 
			
		||||
 | 
			
		||||
    def _xfbody(self, xfid, xform):
 | 
			
		||||
        px = self.packer.view('info', 'xf%d_' % xfid)
 | 
			
		||||
        px.sub('xf', 'cp.xforms[%d]' % xfid)
 | 
			
		||||
 | 
			
		||||
        tmpl = Template("""
 | 
			
		||||
        px = self.pcp.xforms[xfid]
 | 
			
		||||
        tmpl = Template(r"""
 | 
			
		||||
__device__
 | 
			
		||||
void apply_xf{{xfid}}(float &ox, float &oy, float &color, mwc_st &rctx) {
 | 
			
		||||
void apply_xf_{{xfid}}(float &ox, float &oy, float &color, mwc_st &rctx) {
 | 
			
		||||
    float tx, ty;
 | 
			
		||||
 | 
			
		||||
    {{apply_affine_flam3('ox', 'oy', 'tx', 'ty', px, 'xf.c', 'pre')}}
 | 
			
		||||
    {{precalc_xf_affine(px.affine)}}
 | 
			
		||||
    {{apply_affine('ox', 'oy', 'tx', 'ty', px.affine)}}
 | 
			
		||||
 | 
			
		||||
    ox = 0;
 | 
			
		||||
    oy = 0;
 | 
			
		||||
 | 
			
		||||
    {{for v in xform.vars}}
 | 
			
		||||
    {{for name in xform.variations}}
 | 
			
		||||
    if (1) {
 | 
			
		||||
        float w = {{px.get('xf.var[%d]' % v)}};
 | 
			
		||||
        {{variations.var_code[variations.var_nos[v]].substitute(locals())}}
 | 
			
		||||
    {{py:pv = px.variations[name]}}
 | 
			
		||||
    float w = {{pv.weight}};
 | 
			
		||||
    {{variations.var_code[name].substitute(locals())}}
 | 
			
		||||
    }
 | 
			
		||||
    {{endfor}}
 | 
			
		||||
 | 
			
		||||
    {{if xform.has_post}}
 | 
			
		||||
    {{if 'post' in xform}}
 | 
			
		||||
    tx = ox;
 | 
			
		||||
    ty = oy;
 | 
			
		||||
    {{apply_affine_flam3('tx', 'ty', 'ox', 'oy', px, 'xf.post', 'post')}}
 | 
			
		||||
    {{precalc_xf_affine(px.post)}}
 | 
			
		||||
    {{apply_affine('tx', 'ty', 'ox', 'oy', px.post)}}
 | 
			
		||||
    {{endif}}
 | 
			
		||||
 | 
			
		||||
    float csp = {{px.get('xf.color_speed')}};
 | 
			
		||||
    color = color * (1.0f - csp) + {{px.get('xf.color')}} * csp;
 | 
			
		||||
    {{if 'color' in xform}}
 | 
			
		||||
    float csp = {{px.color_speed}};
 | 
			
		||||
    color = color * (1.0f - csp) + {{px.color}} * csp;
 | 
			
		||||
    {{endif}}
 | 
			
		||||
};
 | 
			
		||||
""")
 | 
			
		||||
        g = dict(globals())
 | 
			
		||||
@ -135,42 +244,38 @@ void apply_xf{{xfid}}(float &ox, float &oy, float &color, mwc_st &rctx) {
 | 
			
		||||
    def _iterbody(self):
 | 
			
		||||
        tmpl = Template(r'''
 | 
			
		||||
__global__
 | 
			
		||||
void iter(mwc_st *msts, iter_info *infos, uint64_t accbuf_ptr) {
 | 
			
		||||
    __shared__ int nsamps;
 | 
			
		||||
void iter(uint64_t accbuf_ptr, mwc_st *msts, iter_params *all_params,
 | 
			
		||||
          int nsamps_to_generate) {
 | 
			
		||||
    mwc_st rctx = msts[gtid()];
 | 
			
		||||
    iter_info *info_glob = &(infos[blockIdx.x]);
 | 
			
		||||
    iter_params *global_params = &(all_params[blockIdx.x]);
 | 
			
		||||
 | 
			
		||||
    // load info to shared memory cooperatively
 | 
			
		||||
    __shared__ int nsamps;
 | 
			
		||||
    nsamps = nsamps_to_generate;
 | 
			
		||||
 | 
			
		||||
    __shared__ float time_frac;
 | 
			
		||||
    time_frac = blockIdx.x / (float) gridDim.x;
 | 
			
		||||
 | 
			
		||||
    // load params to shared memory cooperatively
 | 
			
		||||
    for (int i = threadIdx.y * blockDim.x + threadIdx.x;
 | 
			
		||||
         i * 4 < sizeof(iter_info); i += blockDim.x * blockDim.y)
 | 
			
		||||
        reinterpret_cast<float*>(&info)[i] =
 | 
			
		||||
            reinterpret_cast<float*>(info_glob)[i];
 | 
			
		||||
         i * 4 < sizeof(iter_params); i += blockDim.x * blockDim.y)
 | 
			
		||||
        reinterpret_cast<float*>(¶ms)[i] =
 | 
			
		||||
            reinterpret_cast<float*>(global_params)[i];
 | 
			
		||||
 | 
			
		||||
    if (threadIdx.y == 0 && threadIdx.x == 0)
 | 
			
		||||
        nsamps = {{packer.get("cp.width * cp.height / cp.ntemporal_samples * cp.adj_density")}};
 | 
			
		||||
 | 
			
		||||
    {{if features.chaos_used}}
 | 
			
		||||
{{if info.chaos_used}}
 | 
			
		||||
    int last_xf_used = 0;
 | 
			
		||||
    {{else}}
 | 
			
		||||
    // Size can be reduced by a factor of four using a slower 4-stage reduce
 | 
			
		||||
{{else}}
 | 
			
		||||
    // Shared memory size can be reduced by a factor of four using a slower
 | 
			
		||||
    // 4-stage reduce, but on Fermi hardware shmem use isn't a bottleneck
 | 
			
		||||
    __shared__ float swap[{{4*NTHREADS}}];
 | 
			
		||||
    __shared__ float cosel[{{NWARPS}}];
 | 
			
		||||
 | 
			
		||||
    // This is normally done after the swap-sync in the main loop
 | 
			
		||||
    if (threadIdx.y == 0 && threadIdx.x < {{NWARPS}})
 | 
			
		||||
        cosel[threadIdx.x] = mwc_next_01(rctx);
 | 
			
		||||
    {{endif}}
 | 
			
		||||
{{endif}}
 | 
			
		||||
 | 
			
		||||
    __syncthreads();
 | 
			
		||||
    int consec_bad = -{{features.fuse}};
 | 
			
		||||
 | 
			
		||||
    if (threadIdx.y == 1 && threadIdx.x == 0) {
 | 
			
		||||
        float ditherwidth = {{packer.get("0.33 * cp.spatial_filter_radius")}};
 | 
			
		||||
        float u0 = mwc_next_01(rctx);
 | 
			
		||||
        float r = ditherwidth * sqrtf(-2.0f * log2f(u0) / M_LOG2E);
 | 
			
		||||
 | 
			
		||||
        float u1 = 2.0f * M_PI * mwc_next_01(rctx);
 | 
			
		||||
        info.cam_xo += r * cos(u1);
 | 
			
		||||
        info.cam_yo += r * sin(u1);
 | 
			
		||||
    }
 | 
			
		||||
    int consec_bad = -{{info.fuse}};
 | 
			
		||||
 | 
			
		||||
    float x, y, color;
 | 
			
		||||
    x = mwc_next_11(rctx);
 | 
			
		||||
@ -178,37 +283,44 @@ void iter(mwc_st *msts, iter_info *infos, uint64_t accbuf_ptr) {
 | 
			
		||||
    color = mwc_next_01(rctx);
 | 
			
		||||
 | 
			
		||||
    while (1) {
 | 
			
		||||
        {{if features.chaos_used}}
 | 
			
		||||
        // For now, we can't use the swap buffer with chaos enabled
 | 
			
		||||
        float xfsel = mwc_next_01(rctx);
 | 
			
		||||
        {{else}}
 | 
			
		||||
        float xfsel = cosel[threadIdx.y];
 | 
			
		||||
        {{endif}}
 | 
			
		||||
 | 
			
		||||
        {{if features.chaos_used}}
 | 
			
		||||
        {{for density_row_idx, prior_xform_idx in enumerate(features.std_xforms)}}
 | 
			
		||||
        {{for density_col_idx,  this_xform_idx in enumerate(features.std_xforms)}}
 | 
			
		||||
        if (last_xf_used == {{prior_xform_idx}} &&
 | 
			
		||||
                xfsel <= {{packer.get("cp.chaos_densities[%d][%d]" % (density_row_idx, density_col_idx))}}) {
 | 
			
		||||
            apply_xf{{this_xform_idx}}(x, y, color, rctx);
 | 
			
		||||
            last_xf_used = {{this_xform_idx}};
 | 
			
		||||
{{if info.chaos_used}}
 | 
			
		||||
 | 
			
		||||
        {{precalc_chaos(pcp, std_xforms)}}
 | 
			
		||||
 | 
			
		||||
        // For now, we don't attempt to use the swap buffer when chaos is used
 | 
			
		||||
        float xfsel = mwc_next_01(rctx);
 | 
			
		||||
 | 
			
		||||
        {{for prior_xform_idx, prior_xform_name in enumerate(std_xforms)}}
 | 
			
		||||
        if (last_xf_used == {{prior_xform_idx}}) {
 | 
			
		||||
            {{for xform_idx, xform_name in enumerate(std_xforms[:-1])}}
 | 
			
		||||
            if (xfsel <= {{pcp['chaos_'+prior_xform_name+'_'+xform_name]}}) {
 | 
			
		||||
                apply_xf_{{xform_name}}(x, y, color, rctx);
 | 
			
		||||
                last_xf_used = {{xform_idx}};
 | 
			
		||||
            } else
 | 
			
		||||
            {{endfor}}
 | 
			
		||||
            {
 | 
			
		||||
                apply_xf_{{std_xforms[-1]}}(x, y, color, rctx);
 | 
			
		||||
                last_xf_used = {{len(std_xforms)-1}};
 | 
			
		||||
            }
 | 
			
		||||
        } else
 | 
			
		||||
        {{endfor}}
 | 
			
		||||
        {{endfor}}
 | 
			
		||||
        {{else}}
 | 
			
		||||
        {{for density_col_idx, this_xform_idx in enumerate(features.std_xforms)}}
 | 
			
		||||
        if (xfsel <= {{packer.get("cp.norm_density[%d]" % (density_col_idx))}}) {
 | 
			
		||||
            apply_xf{{this_xform_idx}}(x, y, color, rctx);
 | 
			
		||||
        } else
 | 
			
		||||
        {{endfor}}
 | 
			
		||||
        {{endif}}
 | 
			
		||||
        {
 | 
			
		||||
            printf("Reached trap, aborting execution! %g (%d,%d,%d)\n",
 | 
			
		||||
                   xfsel, blockIdx.x, threadIdx.y, threadIdx.x);
 | 
			
		||||
            asm volatile ("trap;");
 | 
			
		||||
            printf("Something went *very* wrong.\n");
 | 
			
		||||
            asm("trap;");
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        {{if not features.chaos_used}}
 | 
			
		||||
{{else}}
 | 
			
		||||
        {{precalc_densities(pcp, std_xforms)}}
 | 
			
		||||
        float xfsel = cosel[threadIdx.y];
 | 
			
		||||
 | 
			
		||||
        {{for xform_name in std_xforms[:-1]}}
 | 
			
		||||
        if (xfsel <= {{pcp['den_'+xform_name]}}) {
 | 
			
		||||
            apply_xf_{{xform_name}}(x, y, color, rctx);
 | 
			
		||||
        } else
 | 
			
		||||
        {{endfor}}
 | 
			
		||||
            apply_xf_{{std_xforms[-1]}}(x, y, color, rctx);
 | 
			
		||||
 | 
			
		||||
        // Swap thread states here so that writeback skipping logic doesn't die
 | 
			
		||||
        int sw = (threadIdx.y * 32 + threadIdx.x * 33) & {{NTHREADS-1}};
 | 
			
		||||
        int sr = threadIdx.y * 32 + threadIdx.x;
 | 
			
		||||
@ -222,17 +334,16 @@ void iter(mwc_st *msts, iter_info *infos, uint64_t accbuf_ptr) {
 | 
			
		||||
        // required per loop.
 | 
			
		||||
        if (nsamps < 0) break;
 | 
			
		||||
 | 
			
		||||
        {{if not features.chaos_used}}
 | 
			
		||||
        // Similarly, we select the next xforms here.
 | 
			
		||||
        if (threadIdx.y == 0 && threadIdx.x < {{NWARPS}})
 | 
			
		||||
            cosel[threadIdx.x] = mwc_next_01(rctx);
 | 
			
		||||
        {{endif}}
 | 
			
		||||
 | 
			
		||||
        consec_bad = swap[sr];
 | 
			
		||||
        x = swap[sr+{{NTHREADS}}];
 | 
			
		||||
        y = swap[sr+{{2*NTHREADS}}];
 | 
			
		||||
        color = swap[sr+{{3*NTHREADS}}];
 | 
			
		||||
        {{endif}}
 | 
			
		||||
 | 
			
		||||
{{endif}}
 | 
			
		||||
 | 
			
		||||
        if (consec_bad < 0) {
 | 
			
		||||
            consec_bad++;
 | 
			
		||||
@ -242,45 +353,50 @@ void iter(mwc_st *msts, iter_info *infos, uint64_t accbuf_ptr) {
 | 
			
		||||
        int remain = __popc(__ballot(1));
 | 
			
		||||
        if (threadIdx.x == 0) atomicSub(&nsamps, remain);
 | 
			
		||||
 | 
			
		||||
        {{if features.final_xform_index}}
 | 
			
		||||
{{if 'final' in cp.xforms}}
 | 
			
		||||
        float fx = x, fy = y, fcolor;
 | 
			
		||||
        apply_xf{{features.final_xform_index}}(fx, fy, fcolor, rctx);
 | 
			
		||||
        {{endif}}
 | 
			
		||||
        apply_xf_final(fx, fy, fcolor, rctx);
 | 
			
		||||
{{endif}}
 | 
			
		||||
 | 
			
		||||
        float cx, cy;
 | 
			
		||||
        {{if features.final_xform_index}}
 | 
			
		||||
        {{apply_affine('fx', 'fy', 'cx', 'cy', packer,
 | 
			
		||||
                       'cp.camera_transform', 'cam')}}
 | 
			
		||||
        {{else}}
 | 
			
		||||
        {{apply_affine('x', 'y', 'cx', 'cy', packer,
 | 
			
		||||
                       'cp.camera_transform', 'cam')}}
 | 
			
		||||
        {{endif}}
 | 
			
		||||
 | 
			
		||||
        {{precalc_camera(info, pcp.camera)}}
 | 
			
		||||
 | 
			
		||||
{{if 'final' in cp.xforms}}
 | 
			
		||||
        {{apply_affine('fx', 'fy', 'cx', 'cy', pcp.camera)}}
 | 
			
		||||
{{else}}
 | 
			
		||||
        {{apply_affine('x', 'y', 'cx', 'cy', pcp.camera)}}
 | 
			
		||||
{{endif}}
 | 
			
		||||
 | 
			
		||||
        uint32_t ix = trunca(cx), iy = trunca(cy);
 | 
			
		||||
 | 
			
		||||
        if (ix >= {{features.acc_width}} || iy >= {{features.acc_height}} ) {
 | 
			
		||||
        if (ix >= {{info.acc_width}} || iy >= {{info.acc_height}} ) {
 | 
			
		||||
            consec_bad++;
 | 
			
		||||
            if (consec_bad > {{features.max_oob}}) {
 | 
			
		||||
            if (consec_bad > {{info.max_oob}}) {
 | 
			
		||||
                x = mwc_next_11(rctx);
 | 
			
		||||
                y = mwc_next_11(rctx);
 | 
			
		||||
                color = mwc_next_01(rctx);
 | 
			
		||||
                consec_bad = -{{features.fuse}};
 | 
			
		||||
                consec_bad = -{{info.fuse}};
 | 
			
		||||
            }
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        uint32_t i = iy * {{features.acc_stride}} + ix;
 | 
			
		||||
        uint32_t i = iy * {{info.acc_stride}} + ix;
 | 
			
		||||
 | 
			
		||||
        float4 outcol = tex2D(palTex, color, {{packer.get("cp_step_frac")}});
 | 
			
		||||
 | 
			
		||||
        float4 outcol = tex2D(palTex, color, time_frac);
 | 
			
		||||
        update_pix(accbuf_ptr, i, outcol);
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
    msts[gtid()] = rctx;
 | 
			
		||||
}
 | 
			
		||||
''')
 | 
			
		||||
        return tmpl.substitute(
 | 
			
		||||
                features = self.features,
 | 
			
		||||
                packer = self.packer.view('info'),
 | 
			
		||||
                info = self.info,
 | 
			
		||||
                cp = self.info.genome,
 | 
			
		||||
                pcp = self.pcp,
 | 
			
		||||
                NTHREADS = self.NTHREADS,
 | 
			
		||||
                NWARPS = self.NTHREADS / 32,
 | 
			
		||||
                std_xforms = [n for n in sorted(self.info.genome.xforms)
 | 
			
		||||
                              if n != 'final'],
 | 
			
		||||
                **globals())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -24,28 +24,11 @@ def assemble_code(*sections):
 | 
			
		||||
    return ''.join([''.join([getattr(sect, kind) for sect in sections])
 | 
			
		||||
                    for kind in ['headers', 'decls', 'defs']])
 | 
			
		||||
 | 
			
		||||
def apply_affine(x, y, xo, yo, packer, base_accessor, base_name):
 | 
			
		||||
def apply_affine(x, y, xo, yo, packer):
 | 
			
		||||
    return Template("""
 | 
			
		||||
    {{xo}} = {{packer.get(ba + '[0,0]', bn + '_xx')}} * {{x}}
 | 
			
		||||
           + {{packer.get(ba + '[0,1]', bn + '_xy')}} * {{y}}
 | 
			
		||||
           + {{packer.get(ba + '[0,2]', bn + '_xo')}};
 | 
			
		||||
    {{yo}} = {{packer.get(ba + '[1,0]', bn + '_yx')}} * {{x}}
 | 
			
		||||
           + {{packer.get(ba + '[1,1]', bn + '_yy')}} * {{y}}
 | 
			
		||||
           + {{packer.get(ba + '[1,2]', bn + '_yo')}};
 | 
			
		||||
    """).substitute(x=x, y=y, xo=xo, yo=yo, packer=packer,
 | 
			
		||||
                    ba=base_accessor, bn=base_name)
 | 
			
		||||
 | 
			
		||||
def apply_affine_flam3(x, y, xo, yo, packer, base_accessor, base_name):
 | 
			
		||||
    """Read an affine transformation in *flam3 order* and apply it."""
 | 
			
		||||
    return tempita.Template("""
 | 
			
		||||
    {{xo}} = {{packer.get(ba + '[0][0]', bn + '_xx')}} * {{x}}
 | 
			
		||||
           + {{packer.get(ba + '[1][0]', bn + '_xy')}} * {{y}}
 | 
			
		||||
           + {{packer.get(ba + '[2][0]', bn + '_xo')}};
 | 
			
		||||
    {{yo}} = {{packer.get(ba + '[0][1]', bn + '_yx')}} * {{x}}
 | 
			
		||||
           + {{packer.get(ba + '[1][1]', bn + '_yy')}} * {{y}}
 | 
			
		||||
           + {{packer.get(ba + '[2][1]', bn + '_yo')}};
 | 
			
		||||
    """).substitute(x=x, y=y, xo=xo, yo=yo, packer=packer,
 | 
			
		||||
                    ba=base_accessor, bn=base_name)
 | 
			
		||||
    {{xo}} = {{packer.xx}} * {{x}} + {{packer.xy}} * {{y}} + {{packer.xo}};
 | 
			
		||||
    {{yo}} = {{packer.yx}} * {{x}} + {{packer.yy}} * {{y}} + {{packer.yo}};
 | 
			
		||||
    """).substitute(locals())
 | 
			
		||||
 | 
			
		||||
class BaseCode(HunkOCode):
 | 
			
		||||
    headers = """
 | 
			
		||||
@ -165,126 +148,4 @@ void write_half(float &xy, float x, float y, float den) {
 | 
			
		||||
        zero(dptr, np.int32(size), stream=stream,
 | 
			
		||||
             block=(1024, 1, 1), grid=(blocks, blocks, 1))
 | 
			
		||||
 | 
			
		||||
class DataPackerView(object):
 | 
			
		||||
    """
 | 
			
		||||
    View of a data packer. Intended to be initialized using DataPacker.view().
 | 
			
		||||
 | 
			
		||||
    All views of a data packer share the same stream parameters, such as
 | 
			
		||||
    position and total size, but do not share other parameters, such as the
 | 
			
		||||
    pointer name used in emitted code lookups or the lookup context.
 | 
			
		||||
    """
 | 
			
		||||
    def __init__(self, packer, ptr, prefix, ns):
 | 
			
		||||
        self.packer, self.ptr, self.prefix, self.ns = packer, ptr, prefix, ns
 | 
			
		||||
 | 
			
		||||
    def get(self, accessor, name=None):
 | 
			
		||||
        """
 | 
			
		||||
        Add an access to the stream, returning the formatted load expression
 | 
			
		||||
        for device use. If 'name' is missing, the name components after the
 | 
			
		||||
        final dot in the accessor will be used. Little effort is made to
 | 
			
		||||
        ensure that this is valid C.
 | 
			
		||||
        """
 | 
			
		||||
        if name is None:
 | 
			
		||||
            name = accessor.rsplit('.', 1)[-1]
 | 
			
		||||
            name = name.replace('[', '_').replace(']', '')
 | 
			
		||||
        name = self.prefix + name
 | 
			
		||||
        self.packer._access(self, accessor, name)
 | 
			
		||||
        return '%s.%s' % (self.ptr, name)
 | 
			
		||||
 | 
			
		||||
    def sub(self, dst, src):
 | 
			
		||||
        """Add a substitution to the namespace."""
 | 
			
		||||
        self.ns.append((src, dst))
 | 
			
		||||
 | 
			
		||||
    def view(self, ptr_name, prefix=''):
 | 
			
		||||
        """
 | 
			
		||||
        As DataPacker.view(), but preserving the current set of namespace
 | 
			
		||||
        substitutions.
 | 
			
		||||
        """
 | 
			
		||||
        return DataPackerView(self.packer, ptr_name, prefix, list(self.ns))
 | 
			
		||||
 | 
			
		||||
    def _apply_subs(self, ns):
 | 
			
		||||
        for s, d in self.ns:
 | 
			
		||||
            ns[d] = eval(s, ns)
 | 
			
		||||
        return ns
 | 
			
		||||
 | 
			
		||||
class DataPacker(HunkOCode):
 | 
			
		||||
    """
 | 
			
		||||
    Packs 32-bit float values into a dynamic data structure, and emits
 | 
			
		||||
    accessors to those data values from device code. Might get fancier in the
 | 
			
		||||
    future, but for now it's incredibly barebones.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    default_namespace = {'np': np}
 | 
			
		||||
 | 
			
		||||
    def __init__(self, tname, clsize=4):
 | 
			
		||||
        """
 | 
			
		||||
        Create a new DataPacker.
 | 
			
		||||
 | 
			
		||||
        ``tname`` is the name of the structure typedef that will be emitted
 | 
			
		||||
        via this object's ``decls`` property.
 | 
			
		||||
 | 
			
		||||
        ``clsize`` is the size of a cache line, in bytes. The resulting
 | 
			
		||||
        data structure will be padded to that size.
 | 
			
		||||
        """
 | 
			
		||||
        self.tname = tname
 | 
			
		||||
        self.clsize = clsize
 | 
			
		||||
        self.packed = {}
 | 
			
		||||
        self.packed_order = []
 | 
			
		||||
 | 
			
		||||
    def view(self, ptr_name, prefix=''):
 | 
			
		||||
        """Create a DataPacker view. See DataPackerView class for details."""
 | 
			
		||||
        return DataPackerView(self, ptr_name, prefix, list())
 | 
			
		||||
 | 
			
		||||
    def _access(self, view, accessor, name):
 | 
			
		||||
        if name in self.packed:
 | 
			
		||||
            pview, paccessor, pcomp = self.packed[name]
 | 
			
		||||
            if pview == view and (accessor is None or paccessor == accessor):
 | 
			
		||||
                return
 | 
			
		||||
            raise ValueError("Same name, different accessor or view: %s" % name)
 | 
			
		||||
        comp_accessor = compile(accessor, '{{template}}', 'eval')
 | 
			
		||||
        self.packed[name] = (view, accessor, comp_accessor)
 | 
			
		||||
        self.packed_order.append(name)
 | 
			
		||||
 | 
			
		||||
    def __len__(self):
 | 
			
		||||
        return len(self.packed_order)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def align(self):
 | 
			
		||||
        return (4 * len(self) + self.clsize - 1) / self.clsize * self.clsize
 | 
			
		||||
 | 
			
		||||
    def pack(self, **kwargs):
 | 
			
		||||
        base_ns = self.default_namespace.copy()
 | 
			
		||||
        base_ns.update(kwargs)
 | 
			
		||||
        out = np.zeros(self.align/4, dtype=np.float32)
 | 
			
		||||
        subbed_nses = {}
 | 
			
		||||
 | 
			
		||||
        for i, name in enumerate(self.packed_order):
 | 
			
		||||
            view, accessor, comp = self.packed[name]
 | 
			
		||||
            if view not in subbed_nses:
 | 
			
		||||
                subbed_nses[view] = view._apply_subs(dict(base_ns))
 | 
			
		||||
            try:
 | 
			
		||||
                val = eval(comp, subbed_nses[view])
 | 
			
		||||
            except Exception, e:
 | 
			
		||||
                print 'Error while evaluating accessor "%s"' % accessor
 | 
			
		||||
                raise e
 | 
			
		||||
            out[i] = val
 | 
			
		||||
        return out
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def decls(self):
 | 
			
		||||
        tmpl = Template("""
 | 
			
		||||
typedef struct {
 | 
			
		||||
{{for name, accessor in values}}
 | 
			
		||||
    float   {{'%-20s' % name}}; // {{accessor}}
 | 
			
		||||
{{endfor}}
 | 
			
		||||
{{if padding > 0}}
 | 
			
		||||
    // Align to fill whole cache lines
 | 
			
		||||
    float   padding[{{padding}}];
 | 
			
		||||
{{endif}}
 | 
			
		||||
} {{tname}};
 | 
			
		||||
""")
 | 
			
		||||
        return tmpl.substitute(
 | 
			
		||||
                values  = [(n, self.packed[n][1]) for n in self.packed_order],
 | 
			
		||||
                padding = self.align / 4 - len(self),
 | 
			
		||||
                tname   = self.tname
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,11 +1,32 @@
 | 
			
		||||
import tempita
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
from cuburn.code.util import Template
 | 
			
		||||
 | 
			
		||||
var_nos = {}
 | 
			
		||||
var_code = {}
 | 
			
		||||
var_params = {}
 | 
			
		||||
 | 
			
		||||
def var(num, name, code):
 | 
			
		||||
    var_nos[num] = name
 | 
			
		||||
    var_code[name] = tempita.Template(code)
 | 
			
		||||
def var(num, name, code, params=None):
 | 
			
		||||
    var_code[name] = Template(code)
 | 
			
		||||
    if params is not None:
 | 
			
		||||
        r = {}
 | 
			
		||||
        for p in params.split():
 | 
			
		||||
            if '=' in p:
 | 
			
		||||
                p, default = p.split('=')
 | 
			
		||||
                if default == 'M_PI':
 | 
			
		||||
                    default = np.pi
 | 
			
		||||
                else:
 | 
			
		||||
                    default = float(default)
 | 
			
		||||
            else:
 | 
			
		||||
                default = 0.0
 | 
			
		||||
            r[p] = default
 | 
			
		||||
        var_params[name] = r
 | 
			
		||||
 | 
			
		||||
# TODO: This is a shitty hack
 | 
			
		||||
def precalc(name, code):
 | 
			
		||||
    def precalc_fun(pv):
 | 
			
		||||
        pre = pv._precalc()
 | 
			
		||||
        pre._code(Template(code).substitute(pre=pre))
 | 
			
		||||
    Template.default_namespace[name+'_precalc'] = precalc_fun
 | 
			
		||||
 | 
			
		||||
# Variables note: all functions will have their weights as 'w',
 | 
			
		||||
# input variables 'tx' and 'ty', and output 'ox' and 'oy' available
 | 
			
		||||
@ -119,10 +140,10 @@ var(14, 'bent', """
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(15, 'waves', """
 | 
			
		||||
    float c10 = {{px.get(None, 'pre_xy')}};
 | 
			
		||||
    float c11 = {{px.get(None, 'pre_yy')}};
 | 
			
		||||
    float dx = {{px.get(None, 'pre_xo')}};
 | 
			
		||||
    float dy = {{px.get(None, 'pre_yo')}};
 | 
			
		||||
    float c10 = {{px.affine.xy}};
 | 
			
		||||
    float c11 = {{px.affine.yy}};
 | 
			
		||||
    float dx = {{px.affine.xo}};
 | 
			
		||||
    float dy = {{px.affine.yo}};
 | 
			
		||||
    float dx2 = 1.0f / (dx * dx);
 | 
			
		||||
    float dy2 = 1.0f / (dy * dy);
 | 
			
		||||
 | 
			
		||||
@ -140,8 +161,8 @@ var(16, 'fisheye', """
 | 
			
		||||
var(17, 'popcorn', """
 | 
			
		||||
    float dx = tanf(3.0f*ty);
 | 
			
		||||
    float dy = tanf(3.0f*tx);
 | 
			
		||||
    ox += w * (tx + {{px.get(None, 'pre_xo')}} * sinf(dx));
 | 
			
		||||
    oy += w * (ty + {{px.get(None, 'pre_yo')}} * sinf(dy));
 | 
			
		||||
    ox += w * (tx + {{px.affine.xo}} * sinf(dx));
 | 
			
		||||
    oy += w * (ty + {{px.affine.yo}} * sinf(dy));
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(18, 'exponential', """
 | 
			
		||||
@ -166,7 +187,7 @@ var(20, 'cosine', """
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(21, 'rings', """
 | 
			
		||||
    float dx = {{px.get(None, 'pre_xo')}} * {{px.get(None, 'pre_xo')}};
 | 
			
		||||
    float dx = {{px.affine.xo}} * {{px.affine.xo}};
 | 
			
		||||
    float r = sqrtf(tx*tx + ty*ty);
 | 
			
		||||
    float a = atan2f(tx, ty);
 | 
			
		||||
    r = w * (fmodf(r+dx, 2.0f*dx) - dx + r * (1.0f - dx));
 | 
			
		||||
@ -175,9 +196,9 @@ var(21, 'rings', """
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(22, 'fan', """
 | 
			
		||||
    float dx = M_PI * ({{px.get(None, 'pre_xo')}} * {{px.get(None, 'pre_xo')}});
 | 
			
		||||
    float dx = M_PI * ({{px.affine.xo}} * {{px.affine.xo}});
 | 
			
		||||
    float dx2 = 0.5f * dx;
 | 
			
		||||
    float dy = {{px.get(None, 'pre_yo')}};
 | 
			
		||||
    float dy = {{px.affine.yo}};
 | 
			
		||||
    float a = atan2f(tx, ty);
 | 
			
		||||
    a += (fmodf(a+dy, dx) > dx2) ? -dx2 : dx2;
 | 
			
		||||
    float r = w * sqrtf(tx*tx + ty*ty);
 | 
			
		||||
@ -188,24 +209,24 @@ var(22, 'fan', """
 | 
			
		||||
var(23, 'blob', """
 | 
			
		||||
    float r = sqrtf(tx*tx + ty*ty);
 | 
			
		||||
    float a = atan2f(tx, ty);
 | 
			
		||||
    float bdiff = 0.5f * ({{px.get('xf.blob_high - xf.blob_low','blob_diff')}});
 | 
			
		||||
    r *= w * ({{px.get('xf.blob_low')}} + bdiff * (1.0f + sinf({{px.get('xf.blob_waves')}} * a)));
 | 
			
		||||
    float bdiff = 0.5f * ({{pv.high}} - {{pv.low}});
 | 
			
		||||
    r *= w * ({{pv.low}} + bdiff * (1.0f + sinf({{pv.waves}} * a)));
 | 
			
		||||
    ox += sinf(a) * r;
 | 
			
		||||
    oy += cosf(a) * r;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'low high=1 waves=1')
 | 
			
		||||
 | 
			
		||||
var(24, 'pdj', """
 | 
			
		||||
    float nx1 = cosf({{px.get('xf.pdj_b')}} * tx);
 | 
			
		||||
    float nx2 = sinf({{px.get('xf.pdj_c')}} * tx);
 | 
			
		||||
    float ny1 = sinf({{px.get('xf.pdj_a')}} * ty);
 | 
			
		||||
    float ny2 = cosf({{px.get('xf.pdj_d')}} * ty);
 | 
			
		||||
    float nx1 = cosf({{pv.b}} * tx);
 | 
			
		||||
    float nx2 = sinf({{pv.c}} * tx);
 | 
			
		||||
    float ny1 = sinf({{pv.a}} * ty);
 | 
			
		||||
    float ny2 = cosf({{pv.d}} * ty);
 | 
			
		||||
    ox += w * (ny1 - nx1);
 | 
			
		||||
    oy += w * (nx2 - ny2);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'a b c d')
 | 
			
		||||
 | 
			
		||||
var(25, 'fan2', """
 | 
			
		||||
    float dy = {{px.get('xf.fan2_y')}};
 | 
			
		||||
    float dx = M_PI * {{px.get('xf.fan2_x')}} * {{px.get('xf.fan2_x')}};
 | 
			
		||||
    float dy = {{pv.y}};
 | 
			
		||||
    float dx = M_PI * {{pv.x}} * {{pv.x}};
 | 
			
		||||
    float dx2 = 0.5f * dx;
 | 
			
		||||
    float a = atan2f(tx, ty);
 | 
			
		||||
    float r = w * sqrtf(tx*tx + ty*ty);
 | 
			
		||||
@ -217,16 +238,16 @@ var(25, 'fan2', """
 | 
			
		||||
 | 
			
		||||
    ox += r * sinf(a);
 | 
			
		||||
    oy += r * cosf(a);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'x y')
 | 
			
		||||
 | 
			
		||||
var(26, 'rings2', """
 | 
			
		||||
    float dx = {{px.get('xf.rings2_val')}} * {{px.get('xf.rings2_val')}};
 | 
			
		||||
    float dx = {{pv.val}} * {{pv.val}};
 | 
			
		||||
    float r = sqrtf(tx*tx + ty*ty);
 | 
			
		||||
    float a = atan2f(tx, ty);
 | 
			
		||||
    r += -2.0f * dx * (int)((r+dx)/(2.0f*dx)) + r * (1.0f - dx);
 | 
			
		||||
    ox += w * sinf(a) * r;
 | 
			
		||||
    oy += w * cosf(a) * r;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'val')
 | 
			
		||||
 | 
			
		||||
var(27, 'eyefish', """
 | 
			
		||||
    float r = 2.0f * w / (sqrtf(tx*tx + ty*ty) + 1.0f);
 | 
			
		||||
@ -245,16 +266,21 @@ var(29, 'cylinder', """
 | 
			
		||||
    oy += w * ty;
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(30, 'perspective', """
 | 
			
		||||
    float pdist = {{px.get('xf.perspective_dist')}};
 | 
			
		||||
    float pvsin = {{px.get('np.sin(xf.perspective_angle*np.pi/2)', 'pvsin')}};
 | 
			
		||||
    float pvfcos = {{px.get(
 | 
			
		||||
        'xf.perspective_dist*np.cos(xf.perspective_angle*np.pi/2)', 'pvfcos')}};
 | 
			
		||||
precalc('perspective', """
 | 
			
		||||
        float pang = {{pre.angle}} * M_PI_2;
 | 
			
		||||
        float pdist = fmaxf(1e-9, {{pre.dist}});
 | 
			
		||||
        {{pre._set('mdist')}} = pdist;
 | 
			
		||||
        {{pre._set('sin')}} = sin(pang);
 | 
			
		||||
        {{pre._set('cos')}} = pdist * cos(pang);
 | 
			
		||||
        """)
 | 
			
		||||
 | 
			
		||||
    float t = 1.0f / (pdist - ty * pvsin);
 | 
			
		||||
    ox += w * pdist * tx * t;
 | 
			
		||||
    oy += w * pvfcos * ty * t;
 | 
			
		||||
    """)
 | 
			
		||||
var(30, 'perspective', """
 | 
			
		||||
    {{perspective_precalc(pv)}}
 | 
			
		||||
 | 
			
		||||
    float t = 1.0f / ({{pv.mdist}} - ty * {{pv.sin}});
 | 
			
		||||
    ox += w * {{pv.mdist}} * tx * t;
 | 
			
		||||
    oy += w * {{pv.cos}} * ty * t;
 | 
			
		||||
    """, 'angle dist')
 | 
			
		||||
 | 
			
		||||
var(31, 'noise', """
 | 
			
		||||
    float tmpr = mwc_next_01(rctx) * 2.0f * M_PI;
 | 
			
		||||
@ -263,33 +289,39 @@ var(31, 'noise', """
 | 
			
		||||
    oy += ty * r * sinf(tmpr);
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
precalc('julian',
 | 
			
		||||
        "{{pre._set('cn')}} = {{pre.dist}} / (2.0f * {{pre.power}});\n")
 | 
			
		||||
 | 
			
		||||
var(32, 'julian', """
 | 
			
		||||
    float power = {{px.get('xf.julian_power')}};
 | 
			
		||||
    {{julina_precalc(pv)}}
 | 
			
		||||
    float power = {{pv.power}};
 | 
			
		||||
    float t_rnd = truncf(mwc_next_01(rctx) * fabsf(power));
 | 
			
		||||
    float a = atan2f(ty, tx);
 | 
			
		||||
    float tmpr = (a + 2.0f * M_PI * t_rnd) / power;
 | 
			
		||||
    float cn = {{px.get('xf.julian_dist / xf.julian_power / 2', 'julian_cn')}};
 | 
			
		||||
    float cn = {{pv.cn}};
 | 
			
		||||
    float r = w * powf(tx * tx + ty * ty, cn);
 | 
			
		||||
 | 
			
		||||
    ox += r * cosf(tmpr);
 | 
			
		||||
    oy += r * sinf(tmpr);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'power=1 dist=1')
 | 
			
		||||
 | 
			
		||||
precalc('juliascope',
 | 
			
		||||
        "{{pre._set('cn')}} = {{pre.dist}} / (2.0f * {{pre.power}});\n")
 | 
			
		||||
 | 
			
		||||
var(33, 'juliascope', """
 | 
			
		||||
    {{juliascope_precalc(pv)}}
 | 
			
		||||
 | 
			
		||||
    float ang = atan2f(ty, tx);
 | 
			
		||||
    float power = {{px.get('xf.juliascope_power', 'juscope_power')}};
 | 
			
		||||
    float power = {{pv.power}};
 | 
			
		||||
    float t_rnd = truncf(mwc_next_01(rctx) * fabsf(power));
 | 
			
		||||
    // TODO: don't draw the extra random number
 | 
			
		||||
    if (mwc_next(rctx) & 1) ang = -ang;
 | 
			
		||||
    float tmpr = (2.0f * M_PI * t_rnd + ang) / power;
 | 
			
		||||
 | 
			
		||||
    float cn = {{px.get('xf.juliascope_dist / xf.juliascope_power / 2',
 | 
			
		||||
                         'juscope_cn')}};
 | 
			
		||||
    float r = w * powf(tx * tx + ty * ty, cn);
 | 
			
		||||
    float r = w * powf(tx * tx + ty * ty, {{pv.cn}});
 | 
			
		||||
 | 
			
		||||
    ox += r * cosf(tmpr);
 | 
			
		||||
    oy += r * sinf(tmpr);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'power=1 dist=1')
 | 
			
		||||
 | 
			
		||||
var(34, 'blur', """
 | 
			
		||||
    float tmpr = mwc_next_01(rctx) * 2.0f * M_PI;
 | 
			
		||||
@ -300,40 +332,39 @@ var(34, 'blur', """
 | 
			
		||||
 | 
			
		||||
var(35, 'gaussian', """
 | 
			
		||||
    float ang = mwc_next_01(rctx) * 2.0f * M_PI;
 | 
			
		||||
    float r = w * ( mwc_next_01(rctx) + mwc_next_01(rctx)
 | 
			
		||||
                  + mwc_next_01(rctx) + mwc_next_01(rctx) - 2.0f );
 | 
			
		||||
    float r = w * sqrtf(-2.0f * log2f(mwc_next_01(rctx)) / M_LOG2E);
 | 
			
		||||
    ox += r * cosf(ang);
 | 
			
		||||
    oy += r * sinf(ang);
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(36, 'radial_blur', """
 | 
			
		||||
    float blur_angle = {{px.get('xf.radial_blur_angle')}} * M_PI * 0.5f;
 | 
			
		||||
    float blur_angle = {{pv.angle}} * M_PI * 0.5f;
 | 
			
		||||
    float spinvar = sinf(blur_angle);
 | 
			
		||||
    float zoomvar = cosf(blur_angle);
 | 
			
		||||
    float r = w * ( mwc_next_01(rctx) + mwc_next_01(rctx)
 | 
			
		||||
                   + mwc_next_01(rctx) + mwc_next_01(rctx) - 2.0f );
 | 
			
		||||
 | 
			
		||||
    float r = w * sqrtf(-2.0f * log2f(mwc_next_01(rctx)) / M_LOG2E);
 | 
			
		||||
    float ra = sqrtf(tx*tx + ty*ty);
 | 
			
		||||
    float tmpa = atan2f(ty, tx) + spinvar * r;
 | 
			
		||||
    float rz = zoomvar * r - 1.0f;
 | 
			
		||||
    ox += ra*cosf(tmpa) + rz*tx;
 | 
			
		||||
    oy += ra*sinf(tmpa) + rz*ty;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'angle')
 | 
			
		||||
 | 
			
		||||
var(37, 'pie', """
 | 
			
		||||
    float slices = {{px.get('xf.pie_slices')}};
 | 
			
		||||
    float slices = {{pv.slices}};
 | 
			
		||||
    float sl = truncf(mwc_next_01(rctx) * slices + 0.5f);
 | 
			
		||||
    float a = {{px.get('xf.pie_rotation')}} +
 | 
			
		||||
                2.0f * M_PI * (sl + mwc_next_01(rctx) * {{px.get('xf.pie_thickness')}}) / slices;
 | 
			
		||||
    float a = {{pv.rotation}} +
 | 
			
		||||
                2.0f * M_PI * (sl + mwc_next_01(rctx) * {{pv.thickness}}) / slices;
 | 
			
		||||
    float r = w * mwc_next_01(rctx);
 | 
			
		||||
    ox += r * cosf(a);
 | 
			
		||||
    oy += r * sinf(a);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'slices=6 rotation thickness=0.5')
 | 
			
		||||
 | 
			
		||||
var(38, 'ngon', """
 | 
			
		||||
    float power = {{px.get('xf.ngon_power')}} * 0.5f;
 | 
			
		||||
    float b = 2.0f * M_PI / {{px.get('xf.ngon_sides')}};
 | 
			
		||||
    float corners = {{px.get('xf.ngon_corners')}};
 | 
			
		||||
    float circle = {{px.get('xf.ngon_circle')}};
 | 
			
		||||
    float power = {{pv.power}} * 0.5f;
 | 
			
		||||
    float b = 2.0f * M_PI / {{pv.sides}};
 | 
			
		||||
    float corners = {{pv.corners}};
 | 
			
		||||
    float circle = {{pv.circle}};
 | 
			
		||||
 | 
			
		||||
    float r_factor = powf(tx*tx + ty*ty, power);
 | 
			
		||||
    float theta = atan2f(ty, tx);
 | 
			
		||||
@ -343,11 +374,11 @@ var(38, 'ngon', """
 | 
			
		||||
 | 
			
		||||
    ox += w * tx * amp;
 | 
			
		||||
    oy += w * ty * amp;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'sides=5 power=3 circle=1 corners=2')
 | 
			
		||||
 | 
			
		||||
var(39, 'curl', """
 | 
			
		||||
    float c1 = {{px.get('xf.curl_c1')}};
 | 
			
		||||
    float c2 = {{px.get('xf.curl_c2')}};
 | 
			
		||||
    float c1 = {{pv.c1}};
 | 
			
		||||
    float c2 = {{pv.c2}};
 | 
			
		||||
 | 
			
		||||
    float re = 1.0f + c1*tx + c2*(tx*tx - ty*ty);
 | 
			
		||||
    float im = c1*ty + 2.0f*c2*tx*ty;
 | 
			
		||||
@ -355,15 +386,15 @@ var(39, 'curl', """
 | 
			
		||||
 | 
			
		||||
    ox += r * (tx*re + ty*im);
 | 
			
		||||
    oy += r * (ty*re - tx*im);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'c1=1 c2')
 | 
			
		||||
 | 
			
		||||
var(40, 'rectangles', """
 | 
			
		||||
    float rx = {{px.get('xf.rectangles_x')}};
 | 
			
		||||
    float ry = {{px.get('xf.rectangles_y')}};
 | 
			
		||||
    float rx = {{pv.x}};
 | 
			
		||||
    float ry = {{pv.y}};
 | 
			
		||||
 | 
			
		||||
    ox += w * ( (rx==0.0f) ? tx : rx * (2.0f * floorf(tx/rx) + 1.0f) - tx);
 | 
			
		||||
    oy += w * ( (ry==0.0f) ? ty : ry * (2.0f * floorf(ty/ry) + 1.0f) - ty);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'x y')
 | 
			
		||||
 | 
			
		||||
var(41, 'arch', """
 | 
			
		||||
    float ang = mwc_next_01(rctx) * w * M_PI;
 | 
			
		||||
@ -417,9 +448,8 @@ var(48, 'cross', """
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(49, 'disc2', """
 | 
			
		||||
    float twist = {{px.get('xf.disc2_twist')}};
 | 
			
		||||
    float rotpi = {{px.get('xf.disc2_rot', 'disc2_rotpi')}};
 | 
			
		||||
    rotpi *= M_PI;
 | 
			
		||||
    float twist = {{pv.twist}}
 | 
			
		||||
    float rotpi = {{pv.rot}} * M_PI;
 | 
			
		||||
 | 
			
		||||
    float sintwist = sinf(twist);
 | 
			
		||||
    float costwist = cosf(twist) - 1.0f;
 | 
			
		||||
@ -441,70 +471,71 @@ var(49, 'disc2', """
 | 
			
		||||
 | 
			
		||||
    ox += r * (sinf(t) + costwist);
 | 
			
		||||
    oy += r * (cosf(t) + sintwist);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'rot twist')
 | 
			
		||||
 | 
			
		||||
var(50, 'super_shape', """
 | 
			
		||||
    float ang = atan2f(ty, tx);
 | 
			
		||||
    float theta = 0.25f * ({{px.get('xf.super_shape_m')}} * ang + M_PI);
 | 
			
		||||
    float theta = 0.25f * ({{pv.m}} * ang + M_PI);
 | 
			
		||||
    float t1 = fabsf(cosf(theta));
 | 
			
		||||
    float t2 = fabsf(sinf(theta));
 | 
			
		||||
    t1 = powf(t1,{{px.get('xf.super_shape_n2')}});
 | 
			
		||||
    t2 = powf(t2,{{px.get('xf.super_shape_n3')}});
 | 
			
		||||
    float myrnd = {{px.get('xf.super_shape_rnd')}};
 | 
			
		||||
    t1 = powf(t1, {{pv.n2}});
 | 
			
		||||
    t2 = powf(t2, {{pv.n3}});
 | 
			
		||||
    float myrnd = {{pv.rnd}};
 | 
			
		||||
    float d = sqrtf(tx*tx+ty*ty);
 | 
			
		||||
 | 
			
		||||
    float r = w * ((myrnd*mwc_next_01(rctx) + (1.0f-myrnd)*d) - {{px.get('xf.super_shape_holes')}})
 | 
			
		||||
                * powf(t1+t2, {{px.get('-1.0 / xf.super_shape_n1', 'super_shape_pneg')}}) / d;
 | 
			
		||||
    float r = w * ((myrnd*mwc_next_01(rctx) + (1.0f-myrnd)*d) - {{pv.holes}})
 | 
			
		||||
                * powf(t1+t2, -1.0f / {{pv.n1}}) / d;
 | 
			
		||||
 | 
			
		||||
    ox += r * tx;
 | 
			
		||||
    oy += r * ty;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'rnd m n1=1 n2=1 n3=1 holes')
 | 
			
		||||
 | 
			
		||||
var(51, 'flower', """
 | 
			
		||||
    float holes = {{px.get('xf.flower_holes')}};
 | 
			
		||||
    float petals = {{px.get('xf.flower_petals')}};
 | 
			
		||||
    float holes = {{pv.holes}};
 | 
			
		||||
    float petals = {{pv.petals}};
 | 
			
		||||
 | 
			
		||||
    float r = w * (mwc_next_01(rctx) - holes) * cosf(petals*atan2f(ty, tx)) / sqrtf(tx*tx + ty*ty);
 | 
			
		||||
    float r = w * (mwc_next_01(rctx) - holes)
 | 
			
		||||
                * cosf(petals*atan2f(ty, tx)) / sqrtf(tx*tx + ty*ty);
 | 
			
		||||
 | 
			
		||||
    ox += r * tx;
 | 
			
		||||
    oy += r * ty;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'holes petals')
 | 
			
		||||
 | 
			
		||||
var(52, 'conic', """
 | 
			
		||||
    float d = sqrtf(tx*tx + ty*ty);
 | 
			
		||||
    float ct = tx / d;
 | 
			
		||||
    float holes = {{px.get('xf.conic_holes')}};
 | 
			
		||||
    float eccen = {{px.get('xf.conic_eccentricity')}};
 | 
			
		||||
    float holes = {{pv.holes}};
 | 
			
		||||
    float eccen = {{pv.eccentricity}};
 | 
			
		||||
 | 
			
		||||
    float r = w * (mwc_next_01(rctx) - holes) * eccen / (1.0f + eccen*ct) / d;
 | 
			
		||||
 | 
			
		||||
    ox += r * tx;
 | 
			
		||||
    oy += r * ty;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'holes eccentricity=1')
 | 
			
		||||
 | 
			
		||||
var(53, 'parabola', """
 | 
			
		||||
    float r = sqrtf(tx*tx + ty*ty);
 | 
			
		||||
    float sr = sinf(r);
 | 
			
		||||
    float cr = cosf(r);
 | 
			
		||||
 | 
			
		||||
    ox += {{px.get('xf.parabola_height')}} * w * sr * sr * mwc_next_01(rctx);
 | 
			
		||||
    oy += {{px.get('xf.parabola_width')}} * w * cr * mwc_next_01(rctx);
 | 
			
		||||
    """)
 | 
			
		||||
    ox += {{pv.height}} * w * sr * sr * mwc_next_01(rctx);
 | 
			
		||||
    oy += {{pv.width}}  * w * cr * mwc_next_01(rctx);
 | 
			
		||||
    """, 'height width')
 | 
			
		||||
 | 
			
		||||
var(54, 'bent2', """
 | 
			
		||||
    float nx = 1.0f;
 | 
			
		||||
    if (tx < 0.0f) nx = {{px.get('xf.bent2_x')}};
 | 
			
		||||
    if (tx < 0.0f) nx = {{pv.x}};
 | 
			
		||||
    float ny = 1.0f;
 | 
			
		||||
    if (ty < 0.0f) ny = {{px.get('xf.bent2_y')}};
 | 
			
		||||
    if (ty < 0.0f) ny = {{pv.y}};
 | 
			
		||||
    ox += w * nx * tx;
 | 
			
		||||
    oy += w * ny * ty;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'x=1 y=1')
 | 
			
		||||
 | 
			
		||||
var(55, 'bipolar', """
 | 
			
		||||
    float x2y2 = tx*tx + ty*ty;
 | 
			
		||||
    float t = x2y2 + 1.0f;
 | 
			
		||||
    float x2 = tx * 2.0f;
 | 
			
		||||
    float ps = -M_PI_2 * {{px.get('xf.bipolar_shift')}};
 | 
			
		||||
    float ps = -M_PI_2 * {{pv.shift}};
 | 
			
		||||
    float y = 0.5f * atan2f(2.0f * ty, x2y2 - 1.0f) + ps;
 | 
			
		||||
 | 
			
		||||
    if (y > M_PI_2)
 | 
			
		||||
@ -514,7 +545,7 @@ var(55, 'bipolar', """
 | 
			
		||||
 | 
			
		||||
    ox += w * 0.25f * M_2_PI * logf( (t+x2) / (t-x2) );
 | 
			
		||||
    oy += w * M_2_PI * y;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'shift')
 | 
			
		||||
 | 
			
		||||
var(56, 'boarders', """
 | 
			
		||||
    float roundX = rintf(tx);
 | 
			
		||||
@ -556,7 +587,7 @@ var(57, 'butterfly', """
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(58, 'cell', """
 | 
			
		||||
    float cell_size = {{px.get('xf.cell_size')}};
 | 
			
		||||
    float cell_size = {{pv.size}};
 | 
			
		||||
    float inv_cell_size = 1.0f/cell_size;
 | 
			
		||||
 | 
			
		||||
    /* calculate input cell */
 | 
			
		||||
@ -588,31 +619,35 @@ var(58, 'cell', """
 | 
			
		||||
 | 
			
		||||
    ox += w * (dx + x*cell_size);
 | 
			
		||||
    oy -= w * (dy + y*cell_size);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'size=1')
 | 
			
		||||
 | 
			
		||||
var(59, 'cpow', """
 | 
			
		||||
    float a = atan2f(ty, tx);
 | 
			
		||||
    float lnr = 0.5f * logf(tx*tx+ty*ty);
 | 
			
		||||
    float power = {{px.get('xf.cpow_power')}};
 | 
			
		||||
    float power = {{pv.power}};
 | 
			
		||||
    float va = 2.0f * M_PI / power;
 | 
			
		||||
    float vc = {{px.get('xf.cpow_r')}} / power;
 | 
			
		||||
    float vd = {{px.get('xf.cpow_i')}} / power;
 | 
			
		||||
    float vc = {{pv.cpow_r}} / power;
 | 
			
		||||
    float vd = {{pv.cpow_i}} / power;
 | 
			
		||||
    float ang = vc*a + vd*lnr + va*floorf(power*mwc_next_01(rctx));
 | 
			
		||||
    float m = w * expf(vc * lnr - vd * a);
 | 
			
		||||
    ox += m * cosf(ang);
 | 
			
		||||
    oy += m * sinf(ang);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'r=1 i power=1')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
precalc('curve', '''
 | 
			
		||||
        float xl = {{pv.xlength}}, yl = {{pv.ylength}};
 | 
			
		||||
        {{pre._set('x2')}} = 1.0f / max(1e-20f, xl * xl);
 | 
			
		||||
        {{pre._set('y2')}} = 1.0f / max(1e-20f, yl * yl);
 | 
			
		||||
        ''')
 | 
			
		||||
 | 
			
		||||
var(60, 'curve', """
 | 
			
		||||
    float pc_xlen = {{px.get('xf.curve_xlength * xf.curve_xlength','pc_xlen')}};
 | 
			
		||||
    float pc_ylen = {{px.get('xf.curve_ylength * xf.curve_ylength','pc_ylen')}};
 | 
			
		||||
    {{curve_precalc()}}
 | 
			
		||||
    float pc_xlen = {{pv.x2}}, pc_ylen = {{pv.y2}};
 | 
			
		||||
 | 
			
		||||
    if (pc_xlen<1E-20f) pc_xlen = 1E-20f;
 | 
			
		||||
    if (pc_ylen<1E-20f) pc_ylen = 1E-20f;
 | 
			
		||||
 | 
			
		||||
    ox += w * (tx + {{px.get('xf.curve_xamp')}} * expf(-ty*ty/pc_xlen));
 | 
			
		||||
    oy += w * (ty + {{px.get('xf.curve_yamp')}} * expf(-tx*tx/pc_ylen));
 | 
			
		||||
    """)
 | 
			
		||||
    ox += w * (tx + {{pv.xamp}} * expf(-ty*ty*pc_xlen));
 | 
			
		||||
    oy += w * (ty + {{pv.yamp}} * expf(-tx*tx*pc_ylen));
 | 
			
		||||
    """, 'xamp yamp xlength=1 ylength=1')
 | 
			
		||||
 | 
			
		||||
var(61, 'edisc', """
 | 
			
		||||
    float tmp = tx*tx + ty*ty + 1.0f;
 | 
			
		||||
@ -662,7 +697,7 @@ var(62, 'elliptic', """
 | 
			
		||||
var(63, 'escher', """
 | 
			
		||||
    float a = atan2f(ty,tx);
 | 
			
		||||
    float lnr = 0.5f * logf(tx*tx + ty*ty);
 | 
			
		||||
    float ebeta = {{px.get('xf.escher_beta')}};
 | 
			
		||||
    float ebeta = {{pv.beta}};
 | 
			
		||||
    float seb = sinf(ebeta);
 | 
			
		||||
    float ceb = cosf(ebeta);
 | 
			
		||||
    float vc = 0.5f * (1.0f + ceb);
 | 
			
		||||
@ -672,7 +707,7 @@ var(63, 'escher', """
 | 
			
		||||
 | 
			
		||||
    ox += m * cosf(n);
 | 
			
		||||
    oy += m * sinf(n);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'beta')
 | 
			
		||||
 | 
			
		||||
var(64, 'foci', """
 | 
			
		||||
    float expx = expf(tx) * 0.5f;
 | 
			
		||||
@ -685,27 +720,26 @@ var(64, 'foci', """
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(65, 'lazysusan', """
 | 
			
		||||
    float lx = {{px.get('xf.lazysusan_x')}};
 | 
			
		||||
    float ly = {{px.get('xf.lazysusan_y')}};
 | 
			
		||||
    float lx = {{pv.x}};
 | 
			
		||||
    float ly = {{pv.y}};
 | 
			
		||||
    float x = tx - lx;
 | 
			
		||||
    float y = ty + ly;
 | 
			
		||||
    float r = sqrtf(x*x + y*y);
 | 
			
		||||
 | 
			
		||||
    if (r < w) {
 | 
			
		||||
        float a = atan2f(y,x) + {{px.get('xf.lazysusan_spin')}} +
 | 
			
		||||
                 {{px.get('xf.lazysusan_twist')}}*(w-r);
 | 
			
		||||
        float a = atan2f(y,x) + {{pv.spin}} + {{pv.twist}} * (w - r);
 | 
			
		||||
        r = w * r;
 | 
			
		||||
 | 
			
		||||
        ox += r * cosf(a) + lx;
 | 
			
		||||
        oy += r * sinf(a) - ly;
 | 
			
		||||
 | 
			
		||||
    } else {
 | 
			
		||||
        r = w * (1.0f + {{px.get('xf.lazysusan_space')}} / r);
 | 
			
		||||
        r = w * (1.0f + {{pv.space}} / r);
 | 
			
		||||
 | 
			
		||||
        ox += r * x + lx;
 | 
			
		||||
        oy += r * y - ly;
 | 
			
		||||
    }
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'x y twist space spin')
 | 
			
		||||
 | 
			
		||||
var(66, 'loonie', """
 | 
			
		||||
    float r2 = tx*tx + ty*ty;;
 | 
			
		||||
@ -732,8 +766,7 @@ var(67, 'pre_blur', """
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(68, 'modulus', """
 | 
			
		||||
    float mx = {{px.get('xf.modulus_x')}};
 | 
			
		||||
    float my = {{px.get('xf.modulus_y')}};
 | 
			
		||||
    float mx = {{pv.x}}, my = {{pv.y}};
 | 
			
		||||
    float xr = 2.0f*mx;
 | 
			
		||||
    float yr = 2.0f*my;
 | 
			
		||||
 | 
			
		||||
@ -750,13 +783,13 @@ var(68, 'modulus', """
 | 
			
		||||
        oy += w * ( my - fmodf(my - ty, yr));
 | 
			
		||||
    else
 | 
			
		||||
        oy += w * ty;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'x y')
 | 
			
		||||
 | 
			
		||||
var(69, 'oscope', """
 | 
			
		||||
    float tpf = 2.0f * M_PI * {{px.get('xf.oscope_frequency')}};
 | 
			
		||||
    float amp = {{px.get('xf.oscope_amplitude')}};
 | 
			
		||||
    float sep = {{px.get('xf.oscope_separation')}};
 | 
			
		||||
    float dmp = {{px.get('xf.oscope_damping')}};
 | 
			
		||||
    float tpf = 2.0f * M_PI * {{pv.frequency}};
 | 
			
		||||
    float amp = {{pv.amplitude}};
 | 
			
		||||
    float sep = {{pv.separation}};
 | 
			
		||||
    float dmp = {{pv.damping}};
 | 
			
		||||
 | 
			
		||||
    float t = amp * expf(-fabsf(tx)*dmp) * cosf(tpf*tx) + sep;
 | 
			
		||||
 | 
			
		||||
@ -765,7 +798,7 @@ var(69, 'oscope', """
 | 
			
		||||
        oy -= w*ty;
 | 
			
		||||
    else
 | 
			
		||||
        oy += w*ty;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'separation=1 frequency=M_PI amplitude=1 damping')
 | 
			
		||||
 | 
			
		||||
var(70, 'polar2', """
 | 
			
		||||
    float p2v = w / M_PI;
 | 
			
		||||
@ -774,10 +807,10 @@ var(70, 'polar2', """
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(71, 'popcorn2', """
 | 
			
		||||
    float c = {{px.get('xf.popcorn2_c')}};
 | 
			
		||||
    ox += w * (tx + {{px.get('xf.popcorn2_x')}} * sinf(tanf(ty*c)));
 | 
			
		||||
    oy += w * (ty + {{px.get('xf.popcorn2_y')}} * sinf(tanf(tx*c)));
 | 
			
		||||
    """)
 | 
			
		||||
    float c = {{pv.c}};
 | 
			
		||||
    ox += w * (tx + {{pv.x}} * sinf(tanf(ty*c)));
 | 
			
		||||
    oy += w * (ty + {{pv.y}} * sinf(tanf(tx*c)));
 | 
			
		||||
    """, 'x y c')
 | 
			
		||||
 | 
			
		||||
var(72, 'scry', """
 | 
			
		||||
    /* note that scry does not multiply by weight, but as the */
 | 
			
		||||
@ -790,68 +823,61 @@ var(72, 'scry', """
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
var(73, 'separation', """
 | 
			
		||||
    float sx2 = {{px.get('xf.separation_x * xf.separation_x', 'sx2')}};
 | 
			
		||||
    float sy2 = {{px.get('xf.separation_y * xf.separation_y', 'sy2')}};
 | 
			
		||||
    float sx2 = {{pv.x}} * {{pv.x}};
 | 
			
		||||
    float sy2 = {{pv.y}} * {{pv.y}};
 | 
			
		||||
 | 
			
		||||
    if (tx > 0.0f)
 | 
			
		||||
        ox += w * (sqrtf(tx*tx + sx2) - tx*{{px.get('xf.separation_xinside')}});
 | 
			
		||||
        ox += w * (sqrtf(tx*tx + sx2) - tx*{{pv.xinside}});
 | 
			
		||||
    else
 | 
			
		||||
        ox -= w * (sqrtf(tx*tx + sx2) + tx*{{px.get('xf.separation_xinside')}});
 | 
			
		||||
        ox -= w * (sqrtf(tx*tx + sx2) + tx*{{pv.xinside}});
 | 
			
		||||
 | 
			
		||||
    if (ty > 0.0f)
 | 
			
		||||
        oy += w * (sqrtf(ty*ty + sy2) - ty*{{px.get('xf.separation_yinside')}});
 | 
			
		||||
        oy += w * (sqrtf(ty*ty + sy2) - ty*{{pv.yinside}});
 | 
			
		||||
    else
 | 
			
		||||
        oy -= w * (sqrtf(ty*ty + sy2) + ty*{{px.get('xf.separation_yinside')}});
 | 
			
		||||
    """)
 | 
			
		||||
        oy -= w * (sqrtf(ty*ty + sy2) + ty*{{pv.yinside}});
 | 
			
		||||
    """, 'x xinside y yinside')
 | 
			
		||||
 | 
			
		||||
var(74, 'split', """
 | 
			
		||||
    if (cosf(tx*{{px.get('xf.split_xsize')}}*M_PI) >= 0.0f)
 | 
			
		||||
    if (cosf(tx*{{pv.xsize}}*M_PI) >= 0.0f)
 | 
			
		||||
        oy += w*ty;
 | 
			
		||||
    else
 | 
			
		||||
        oy -= w*ty;
 | 
			
		||||
 | 
			
		||||
    if (cosf(ty*{{px.get('xf.split_ysize')}}*M_PI) >= 0.0f)
 | 
			
		||||
    if (cosf(ty*{{pv.ysize}}*M_PI) >= 0.0f)
 | 
			
		||||
        ox += w*tx;
 | 
			
		||||
    else
 | 
			
		||||
        ox -= w*tx;
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'xsize ysize')
 | 
			
		||||
 | 
			
		||||
var(75, 'splits', """
 | 
			
		||||
    if (tx >= 0.0f)
 | 
			
		||||
        ox += w*(tx + {{px.get('xf.splits_x')}});
 | 
			
		||||
    else
 | 
			
		||||
        ox += w*(tx - {{px.get('xf.splits_x')}});
 | 
			
		||||
 | 
			
		||||
    if (ty >= 0)
 | 
			
		||||
        oy += w*(ty + {{px.get('xf.splits_y')}});
 | 
			
		||||
    else
 | 
			
		||||
        oy += w*(ty - {{px.get('xf.splits_y')}});
 | 
			
		||||
    """)
 | 
			
		||||
    ox += w*(tx + copysignf({{pv.x}}, tx));
 | 
			
		||||
    oy += w*(ty + copysignf({{pv.y}}, ty));
 | 
			
		||||
    """, 'x y')
 | 
			
		||||
 | 
			
		||||
var(76, 'stripes', """
 | 
			
		||||
    float roundx = floorf(tx + 0.5f);
 | 
			
		||||
    float offsetx = tx - roundx;
 | 
			
		||||
    ox += w * (offsetx * (1.0f - {{px.get('xf.stripes_space')}}) + roundx);
 | 
			
		||||
    oy += w * (ty + offsetx*offsetx*{{px.get('xf.stripes_warp')}});
 | 
			
		||||
    """)
 | 
			
		||||
    ox += w * (offsetx * (1.0f - {{pv.space}}) + roundx);
 | 
			
		||||
    oy += w * (ty + offsetx*offsetx*{{pv.warp}});
 | 
			
		||||
    """, 'space warp')
 | 
			
		||||
 | 
			
		||||
var(77, 'wedge', """
 | 
			
		||||
    float r = sqrtf(tx*tx + ty*ty);
 | 
			
		||||
    float a = atan2f(ty, tx) + {{px.get('xf.wedge_swirl')}} * r;
 | 
			
		||||
    float wc = {{px.get('xf.wedge_count')}};
 | 
			
		||||
    float wa = {{px.get('xf.wedge_angle')}};
 | 
			
		||||
    float a = atan2f(ty, tx) + {{pv.swirl}} * r;
 | 
			
		||||
    float wc = {{pv.count}};
 | 
			
		||||
    float wa = {{pv.angle}};
 | 
			
		||||
    float c = floorf((wc * a + M_PI) * M_1_PI * 0.5f);
 | 
			
		||||
    float comp_fac = 1 - wa * wc * M_1_PI * 0.5f;
 | 
			
		||||
    a = a * comp_fac + c * wa;
 | 
			
		||||
    r = w * (r + {{px.get('xf.wedge_hole')}});
 | 
			
		||||
    r = w * (r + {{pv.hole}});
 | 
			
		||||
    ox += r * cosf(a);
 | 
			
		||||
    oy += r * sinf(a);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'angle hole count=1 swirl')
 | 
			
		||||
 | 
			
		||||
var(81, 'waves2', """
 | 
			
		||||
    ox += w*(tx + {{px.get('xf.waves2_scalex')}}*sinf(ty * {{px.get('xf.waves2_freqx')}}));
 | 
			
		||||
    oy += w*(ty + {{px.get('xf.waves2_scaley')}}*sinf(tx * {{px.get('xf.waves2_freqy')}}));
 | 
			
		||||
    """)
 | 
			
		||||
    ox += w*(tx + {{pv.scalex}}*sinf(ty * {{pv.freqx}}));
 | 
			
		||||
    oy += w*(ty + {{pv.scaley}}*sinf(tx * {{pv.freqy}}));
 | 
			
		||||
    """, 'scalex scaley freqx freqy')
 | 
			
		||||
 | 
			
		||||
var(82, 'exp', """
 | 
			
		||||
    float expe = expf(tx);
 | 
			
		||||
@ -935,21 +961,22 @@ var(95, 'coth', """
 | 
			
		||||
var(97, 'flux', """
 | 
			
		||||
    float xpw = tx + w;
 | 
			
		||||
    float xmw = tx - w;
 | 
			
		||||
    float avgr = w * (2.0f + {{px.get('xf.flux_spread')}}) * sqrtf(sqrtf(ty*ty+xpw*xpw)/sqrtf(ty*ty+xmw*xmw));
 | 
			
		||||
    float avgr = w * (2.0f + {{pv.spread}})
 | 
			
		||||
               * sqrtf(sqrtf(ty*ty+xpw*xpw)/sqrtf(ty*ty+xmw*xmw));
 | 
			
		||||
    float avga = (atan2f(ty, xmw) - atan2f(ty,xpw))*0.5f;
 | 
			
		||||
    ox += avgr * cosf(avga);
 | 
			
		||||
    oy += avgr * sinf(avga);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 'spread')
 | 
			
		||||
 | 
			
		||||
var(98, 'mobius', """
 | 
			
		||||
    float rea = {{px.get('xf.mobius_re_a')}};
 | 
			
		||||
    float ima = {{px.get('xf.mobius_im_a')}};
 | 
			
		||||
    float reb = {{px.get('xf.mobius_re_b')}};
 | 
			
		||||
    float imb = {{px.get('xf.mobius_im_b')}};
 | 
			
		||||
    float rec = {{px.get('xf.mobius_re_c')}};
 | 
			
		||||
    float imc = {{px.get('xf.mobius_im_c')}};
 | 
			
		||||
    float red = {{px.get('xf.mobius_re_d')}};
 | 
			
		||||
    float imd = {{px.get('xf.mobius_im_d')}};
 | 
			
		||||
    float rea = {{pv.re_a}};
 | 
			
		||||
    float ima = {{pv.im_a}};
 | 
			
		||||
    float reb = {{pv.re_b}};
 | 
			
		||||
    float imb = {{pv.im_b}};
 | 
			
		||||
    float rec = {{pv.re_c}};
 | 
			
		||||
    float imc = {{pv.im_c}};
 | 
			
		||||
    float red = {{pv.re_d}};
 | 
			
		||||
    float imd = {{pv.im_d}};
 | 
			
		||||
 | 
			
		||||
    float re_u, im_u, re_v, im_v, rad_v;
 | 
			
		||||
 | 
			
		||||
@ -962,5 +989,5 @@ var(98, 'mobius', """
 | 
			
		||||
 | 
			
		||||
    ox += rad_v * (re_u*re_v + im_u*im_v);
 | 
			
		||||
    oy += rad_v * (im_u*re_v - re_u*im_v);
 | 
			
		||||
    """)
 | 
			
		||||
    """, 're_a im_a re_b im_b re_c im_c re_d im_d')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user