New genome representation, and device interp.

This commit is contained in:
Steven Robertson
2011-10-25 15:44:39 -04:00
parent be31708c09
commit 8939a6343a
8 changed files with 1030 additions and 729 deletions

View File

@ -2,8 +2,8 @@
from cuburn.code.util import *
class ColorClip(HunkOCode):
def __init__(self, features):
self.defs = self.defs_tmpl.substitute(features=features)
def __init__(self, info):
self.defs = self.defs_tmpl.substitute(info=info)
defs_tmpl = Template('''
__global__
@ -63,7 +63,7 @@ void colorclip(float4 *pixbuf, float gamma, float vibrancy, float highpow,
pix.y += (1.0f - vibrancy) * powf(opix.y, gamma);
pix.z += (1.0f - vibrancy) * powf(opix.z, gamma);
{{if features.alpha_output_channel}}
{{if info.alpha_output_channel}}
float 1_alpha = 1 / alpha;
pix.x *= 1_alpha;
pix.y *= 1_alpha;
@ -94,13 +94,13 @@ class DensityEst(HunkOCode):
# Note, changing this does not yet have any effect, it's just informational
MAX_WIDTH=15
def __init__(self, features, cp):
self.features, self.cp = features, cp
def __init__(self, info):
self.info = info
headers = "#include<math_constants.h>\n"
@property
def defs(self):
return self.defs_tmpl.substitute(features=self.features, cp=self.cp)
return self.defs_tmpl.substitute(info=self.info)
defs_tmpl = Template('''
#define W 15 // Filter width (regardless of standard deviation chosen)
@ -147,9 +147,9 @@ void density_est(float4 *pixbuf, float4 *outbuf,
de_r[i] = de_g[i] = de_b[i] = de_a[i] = 0.0f;
__syncthreads();
for (int imrow = threadIdx.y + W2; imrow < {{features.acc_height}}; imrow += 32)
for (int imrow = threadIdx.y + W2; imrow < {{info.acc_height}}; imrow += 32)
{
int idx = {{features.acc_stride}} * imrow +
int idx = {{info.acc_stride}} * imrow +
+ blockIdx.x * 32 + threadIdx.x + W2;
float4 in = pixbuf[idx];
@ -249,7 +249,7 @@ void density_est(float4 *pixbuf, float4 *outbuf,
__syncthreads();
// TODO: could coalesce this, but what a pain
for (int i = threadIdx.x; i < FW; i += 32) {
idx = {{features.acc_stride}} * imrow + blockIdx.x * 32 + i + W2;
idx = {{info.acc_stride}} * imrow + blockIdx.x * 32 + i + W2;
int si = threadIdx.y * FW + i;
float *out = reinterpret_cast<float*>(&outbuf[idx]);
atomicAdd(out, de_r[si]);
@ -285,12 +285,14 @@ void density_est(float4 *pixbuf, float4 *outbuf,
# TODO: add no-est version
# TODO: come up with a general way to average these parameters
k1 = np.float32(cp.brightness * 268 / 256)
area = self.features.width * self.features.height / cp.ppu ** 2
k2 = np.float32(1 / (area * cp.adj_density ))
k1 = np.float32(cp.color.brightness * 268 / 256)
# Old definition of area is (w*h/(s*s)). Since new scale 'ns' is now
# s/w, new definition is (w*h/(s*s*w*w)) = (h/(s*s*w))
area = self.info.height / (cp.camera.scale ** 2 * self.info.width)
k2 = np.float32(1 / (area * self.info.density ))
if self.cp.estimator == 0:
nbins = self.features.acc_height * self.features.acc_stride
if cp.de.radius == 0:
nbins = self.info.acc_height * self.info.acc_stride
fun = mod.get_function("logscale")
t = fun(abufd, obufd, k1, k2,
block=(512, 1, 1), grid=(nbins/512, 1), stream=stream)
@ -299,11 +301,11 @@ void density_est(float4 *pixbuf, float4 *outbuf,
# 0.5, but the DE filters scale filter distance by the default
# spatial support factor of 1.5, so the effective base SD is
# (0.5/1.5)=1/3.
est_sd = np.float32(cp.estimator / 3.)
neg_est_curve = np.float32(-cp.estimator_curve)
est_min = np.float32(cp.estimator_minimum / 3.)
est_sd = np.float32(cp.de.radius / 3.)
neg_est_curve = np.float32(-cp.de.curve)
est_min = np.float32(cp.de.minimum / 3.)
fun = mod.get_function("density_est")
fun(abufd, obufd, est_sd, neg_est_curve, est_min, k1, k2,
block=(32, 32, 1), grid=(self.features.acc_width/32, 1),
block=(32, 32, 1), grid=(self.info.acc_width/32, 1),
stream=stream)

318
cuburn/code/interp.py Normal file
View File

@ -0,0 +1,318 @@
from collections import OrderedDict
from itertools import cycle
import numpy as np
import tempita
from cuburn.code.util import HunkOCode, Template
from cuburn.genome import SplEval
class GenomePackerName(str):
"""Class to indicate that a property is precalculated on the device"""
pass
class GenomePackerView(object):
"""
Obtain accessors in generated code.
Call ``GenomePacker.view(ptr_name, wrapped_obj, prefix)`` to generate a
view, where ``ptr_name`` is the name of the CUDA pointer which holds the
interpolated values, ``wrapped_obj`` is the base Genome instance which
will be uploaded to the device for interpolating from, and ``prefix`` is
the prefix that will be assigned to property accessors from this object.
Accessing a property on the object synthesizes an accessor for use in your
code and an interpolator for use in generating that code. This conversion
is done when the property is coerced into a string by the templating
mechanism, so you can easily nest objects by saying, for instance,
{{pcp.camera.rotation}} from within templated code. The accessed property
must be a SplEval object, or a precalculated value (see
``GenomePackerPrecalc``).
Index operations are converted to property accesses as well, so that you
don't have to make a mess with 'getattr' in your code: {{pcp.xforms[x]}}
works just fine. This means, however, that no arrays can be packed
directly; they must be converted to have string-based keys first, and
any loops must be unrolled in your code.
"""
def __init__(self, packer, ptr_name, wrapped, prefix=()):
self.packer = packer
self.ptr_name = ptr_name
self.wrapped = wrapped
self.prefix = prefix
def __getattr__(self, name):
w = getattr(self.wrapped, name)
return type(self)(self.packer, self.ptr_name, w, self.prefix+(name,))
# As with the Genome class, we're all-dict, no-array here
__getitem__ = lambda s, n: getattr(s, str(n))
def __str__(self):
"""
Returns the packed name in a format suitable for embedding directly
into device code.
"""
# So evil. When the template calls __str__ to format the output, we
# allocate things. This makes for neater embedded code, which is where
# the real complexity lies, but it also means printf() debugging when
# templating will screw with the allocation tables!
if isinstance(self.wrapped, SplEval):
self.packer._require(self.prefix)
elif not isinstance(self.wrapped, GenomePackerName):
raise TypeError("Tried to pack something that wasn't a spline or "
"a precalculated value")
# TODO: verify namespace stomping, etc
return '%s.%s' % (self.ptr_name, '_'.join(self.prefix))
def _precalc(self):
"""Create a GenomePackerPrecalc object. See that class for details."""
return GenomePackerPrecalc(self.packer, self.ptr_name,
self.wrapped, self.prefix)
class GenomePackerPrecalc(GenomePackerView):
"""
Insert precalculated values into the packed genome.
Create a Precalc object by calling a view object's _precalc() method. The
returned object shares the same referent dict as the view object, but
instead of returning view accessors for use in the body of your code,
accessing a property synthesizes a call to an interpolation function for
use within the precalculating function. Use this in your precalculated
code to obtain values from a genome.
Once you've obtained the needed parameters and performed the
precalculation, write them to the genome object with a call to the '_set'
method in your precalc template. This method generates a new accessor to
which you can assign a value in your precalculation function. It also
makes this accessor available for use on the original packer object from
within your main function.
Finally, call the '_code' method on the precalc object with your generated
precalculation code to add it to the precalculation function. The code
will be wrapped in a C block to prevent namespace leakage, so that
multiple invocations of the precalculation code on different code blocks
can be performed.
Example:
def do_precalc(px):
pcam = px._precalc()
pcam._code(Template('''
{{pcam._set('prop_sin')}} = sin({{pcam.prop}});
''').substitute(pcam=pcam))
def gen_code(px):
return Template('''
{{do_precalc(px)}}
printf("The sin of %g is %g.", {{px.prop}}, {{px.prop_sin}});
''').substitute(px=px)
"""
def __init__(self, packer, ptr_name, wrapped, prefix):
super(GenomePackerPrecalc, self).__init__(packer, 'out', wrapped, prefix)
def __str__(self):
return self.packer._require_pre(self.prefix)
def _set(self, name):
fullname = self.prefix + (name,)
self.packer._pre_alloc(fullname)
# This just modifies the underlying object, because I'm too lazy right
# now to ghost the namespace
self.wrapped[name] = GenomePackerName('_'.join(fullname))
return '%s->%s' % (self.ptr_name, self.wrapped[name])
def _code(self, code):
self.packer.precalc_code.append(code)
class GenomePacker(HunkOCode):
"""
Packs a genome for use in iteration.
"""
# 2^search_rounds is the maximum number of control points, including
# endpoints, that can be used in a single genome. It should be okay to
# change this number without touching other code, but 32 samples fits
# nicely on a single cache line.
search_rounds = 5
def __init__(self, tname):
"""
Create a new DataPacker.
``tname`` is the name of the structure typedef that will be emitted
via this object's ``decls`` property.
"""
self.tname = tname
# We could do this in the order that things are requested, but we want
# to be able to treat the direct stuff as a list so this function
# doesn't unroll any more than it has to. So we separate things into
# direct requests, and those that need precalculation.
# Values of OrderedDict are unused; basically, it's just OrderedSet.
self.packed_direct = OrderedDict()
self.genome_precalc = OrderedDict()
self.packed_precalc = OrderedDict()
self.precalc_code = []
self.ns = {}
self._len = None
self.decls = None
self.defs = None
self.packed = None
self.genome = None
def __len__(self):
assert self._len is not None, 'len() called before finalize()'
return self._len
def view(self, ptr_name, wrapped_obj, prefix):
"""Create a DataPacker view. See DataPackerView class for details."""
self.ns[prefix] = wrapped_obj
return GenomePackerView(self, ptr_name, wrapped_obj, (prefix,))
def _require(self, name):
"""
Called to indicate that the named parameter from the original genome
must be available during interpolation.
"""
self.packed_direct[name] = None
def _require_pre(self, name):
i = len(self.genome_precalc) << self.search_rounds
self.genome_precalc[name] = None
return 'catmull_rom(&times[%d], &knots[%d], time)' % (i, i)
def _pre_alloc(self, name):
self.packed_precalc[name] = None
def finalize(self):
"""
Create the code to render this genome.
"""
# At the risk of packing a few things more than once, we don't
# uniquify the overall precalc order, sparing us the need to implement
# recursive code generation
self.packed = self.packed_direct.keys() + self.packed_precalc.keys()
self.genome = self.packed_direct.keys() + self.genome_precalc.keys()
self._len = len(self.packed)
self.decls = self._decls.substitute(
packed=self.packed, tname=self.tname,
search_rounds=self.search_rounds)
self.defs = self._defs.substitute(
packed_direct=self.packed_direct, tname=self.tname,
precalc_code=self.precalc_code,
search_rounds=self.search_rounds)
def pack(self):
"""
Return a packed copy of the genome ready for uploading to the GPU as a
3D NDArray, with the first element being the times and the second
being the values.
"""
width = 1 << self.search_rounds
out = np.empty((2, len(self.genome), width), dtype=np.float32)
# Ensure that unused values at the top are always big (must be >2.0)
out[0].fill(1e9)
for idx, gname in enumerate(self.genome):
attr = self.ns[gname[0]]
for g in gname[1:]:
attr = getattr(attr, g)
if not isinstance(attr, SplEval):
raise TypeError("%s isn't a spline" % '.'.join(gname))
out[0][idx][:len(attr.knots[0])] = attr.knots[0]
out[1][idx][:len(attr.knots[1])] = attr.knots[1]
return out
_defs = Template(r"""
__global__
void interp_{{tname}}({{tname}}* out, float *times, float *knots,
float tstart, float tstep, mwc_st *rctxes) {
int id = gtid();
out = &out[id];
mwc_st rctx = rctxes[id];
float time = tstart + id * tstep;
float *outf = reinterpret_cast<float*>(out);
// TODO: unroll pragma?
for (int i = 0; i < {{len(packed_direct)}}; i++) {
int j = i << {{search_rounds}};
outf[i] = catmull_rom(&times[j], &knots[j], time);
}
// Advance 'times' and 'knots' to the purely generated sections, so that
// the pregenerated statements emitted by _require_pre are correct.
times = &times[{{len(packed_direct)<<search_rounds}}];
knots = &knots[{{len(packed_direct)<<search_rounds}}];
{{for hunk in precalc_code}}
if (1) {
{{hunk}}
}
{{endfor}}
}
""")
_decls = Template(r"""
typedef struct {
{{for name in packed}}
float {{'_'.join(name)}};
{{endfor}}
} {{tname}};
/* Search through the fixed-size list 'hay' to find the rightmost index which
* contains a value strictly smaller than the input 'needle'. The crazy
* bitwise search is just for my own amusement.
*/
__device__
int bitwise_binsearch(float *hay, float needle) {
int lo = 0;
{{for i in range(search_rounds-1, -1, -1)}}
if (needle > hay[lo | {{1 << i}}])
lo |= {{1 << i}};
{{endfor}}
return lo;
}
__device__
float catmull_rom(float *times, float *knots, float t) {
int idx = bitwise_binsearch(times, t);
// The left bias of the search means that we never have to worry about
// overshooting unless the genome is corrupted
idx = max(idx, 1);
float t1 = times[idx], t2 = times[idx+1] - t1;
float rt2 = 1.0f / t2;
float t0 = (times[idx-1] - t1) * rt2, t3 = (times[idx+2] - t1) * rt2;
t = (t - t1) * rt2;
// Now t1 is effectively 0 and t2 is 1
float k0 = knots[idx-1], k1 = knots[idx],
k2 = knots[idx+1], k3 = knots[idx+2];
float m1 = (k2 - k0) / (1.0f - t0),
m2 = (k3 - k1) / (t3);
float tt = t * t, ttt = tt * t;
return m1 * ( ttt - 2.0f*tt + t)
+ k1 * ( 2.0f*ttt - 3.0f*tt + 1)
+ m2 * ( ttt - tt)
+ k2 * (-2.0f*ttt + 3.0f*tt);
}
__global__
void test_cr(float *times, float *knots, float *t, float *r) {
int i = threadIdx.x + blockDim.x * blockIdx.x;
r[i] = catmull_rom(times, knots, t[i]);
}
""")

View File

@ -2,26 +2,132 @@
The main iteration loop.
"""
from cuburn.code import mwc, variations
from cuburn.code import mwc, variations, interp
from cuburn.code.util import *
def precalc_densities(pcp, std_xforms):
# This pattern recurs a few times for precalc segments. Unfortunately,
# namespace stuff means it's not easy to functionalize this boilerplate
pre_cp = pcp._precalc()
pre_cp._code(Template(r"""
float sum = 0.0f;
{{for n in std_xforms}}
float den_{{n}} = {{pre_cp.xforms[n].density}};
sum += den_{{n}};
{{endfor}}
float rsum = 1.0f / sum;
sum = 0.0f;
{{for n in std_xforms[:-1]}}
sum += den_{{n}} * rsum;
{{pre_cp._set('den_' + n)}} = sum;
{{endfor}}
""").substitute(locals()))
def precalc_chaos(pcp, std_xforms):
pre_cp = pcp._precalc()
pre_cp._code(Template("""
float sum, rsum;
{{for p in std_xforms}}
sum = 0.0f;
{{for n in std_xforms}}
float den_{{p}}_{{n}} = {{pre_cp.xforms[p].chaos[n]}};
sum += den_{{p}}_{{n}};
{{endfor}}
rsum = 1.0f / sum;
sum = 0.0f;
{{for n in std_xforms[:-1]}}
sum += den_{{p}}_{{n}} * rsum;
{{pre_cp._set('chaos_%s_%s' % (p, n))}} = sum;
{{endfor}}
{{endfor}}
""").substitute(locals()))
def precalc_camera(info, pcam):
pre_cam = pcam._precalc()
# Maxima code to check my logic:
# matrix([1,0,0.5*width + g],[0,1,0.5*height+g],[0,0,1])
# . matrix([width * scale,0,0], [0,width * scale,0], [0,0,1])
# . matrix([cosr,-sinr,0], [sinr,cosr,0], [0,0,1])
# . matrix([1,0,-cenx],[0,1,-ceny],[0,0,1])
# . matrix([X],[Y],[1]);
pre_cam._code(Template(r"""
float rot = {{pre_cam.rotation}} * M_PI / 180.0f;
float rotsin = sin(rot), rotcos = cos(rot);
float cenx = {{pre_cam.center.x}}, ceny = {{pre_cam.center.y}};
float scale = {{pre_cam.scale}} * {{info.width}};
float ditherwidth = {{pre_cam.dither_width}} * 0.33f;
float u0 = mwc_next_01(rctx);
float r = ditherwidth * sqrtf(-2.0f * log2f(u0) / M_LOG2E);
float u1 = 2.0f * M_PI * mwc_next_01(rctx);
float ditherx = r * cos(u1);
float dithery = r * sin(u1);
{{pre_cam._set('xx')}} = scale * rotcos;
{{pre_cam._set('xy')}} = scale * -rotsin;
{{pre_cam._set('xo')}} = scale * (rotsin * ceny - rotcos * cenx)
+ {{0.5 * info.width + info.gutter}} + ditherx;
{{pre_cam._set('yx')}} = scale * rotsin;
{{pre_cam._set('yy')}} = scale * rotcos;
{{pre_cam._set('yo')}} = scale * -(rotsin * cenx + rotcos * ceny)
+ {{0.5 * info.height + info.gutter}} + dithery;
""").substitute(locals()))
def precalc_xf_affine(px):
pre = px._precalc()
pre._code(Template(r"""
float pri = {{pre.angle}} * M_PI / 180.0f;
float spr = {{pre.spread}} * M_PI / 180.0f;
float magx = {{pre.magnitude.x}};
float magy = {{pre.magnitude.y}};
{{pre._set('xx')}} = magx * cos(pri-spr);
{{pre._set('xy')}} = magx * sin(pri-spr);
{{pre._set('yx')}} = magy * cos(pri+spr);
{{pre._set('yy')}} = magy * sin(pri+spr);
{{pre._set('xo')}} = {{pre.offset.x}};
{{pre._set('yo')}} = {{pre.offset.y}};
""").substitute(locals()))
class IterCode(HunkOCode):
# The number of threads per block
NTHREADS = 256
def __init__(self, features):
self.features = features
self.packer = DataPacker('iter_info')
def __init__(self, info):
self.info = info
self.packer = interp.GenomePacker('iter_params')
self.pcp = self.packer.view('params', self.info.genome, 'cp')
iterbody = self._iterbody()
bodies = [self._xfbody(i,x) for i,x in enumerate(self.features.xforms)]
bodies = [self._xfbody(i,x) for i,x in sorted(info.genome.xforms.items())]
bodies.append(iterbody)
self.defs = '\n'.join(bodies)
self.decls += self.pix_helpers.substitute(features=features)
self.decls += self.pix_helpers.substitute(info=info)
decls = """
// Note: for normalized lookups, uchar4 actually returns floats
texture<uchar4, cudaTextureType2D, cudaReadModeNormalizedFloat> palTex;
__shared__ iter_info info;
__shared__ iter_params params;
"""
@ -29,14 +135,14 @@ __shared__ iter_info info;
__device__
void read_pix(float4 &pix, float &den) {
den = pix.w;
{{if features.pal_has_alpha}}
{{if info.pal_has_alpha}}
read_half(pix.z, pix.w, pix.z, den);
{{endif}}
}
__device__
void write_pix(float4 &pix, float den) {
{{if features.pal_has_alpha}}
{{if info.pal_has_alpha}}
write_half(pix.z, pix.z, pix.w, den);
{{endif}}
pix.w = den;
@ -44,7 +150,7 @@ void write_pix(float4 &pix, float den) {
__device__
void update_pix(uint64_t ptr, uint32_t i, float4 c) {
{{if features.pal_has_alpha}}
{{if info.pal_has_alpha}}
asm volatile ({{crep('''
{
.reg .u16 sz, sw;
@ -98,34 +204,37 @@ void update_pix(uint64_t ptr, uint32_t i, float4 c) {
""")
def _xfbody(self, xfid, xform):
px = self.packer.view('info', 'xf%d_' % xfid)
px.sub('xf', 'cp.xforms[%d]' % xfid)
tmpl = Template("""
px = self.pcp.xforms[xfid]
tmpl = Template(r"""
__device__
void apply_xf{{xfid}}(float &ox, float &oy, float &color, mwc_st &rctx) {
void apply_xf_{{xfid}}(float &ox, float &oy, float &color, mwc_st &rctx) {
float tx, ty;
{{apply_affine_flam3('ox', 'oy', 'tx', 'ty', px, 'xf.c', 'pre')}}
{{precalc_xf_affine(px.affine)}}
{{apply_affine('ox', 'oy', 'tx', 'ty', px.affine)}}
ox = 0;
oy = 0;
{{for v in xform.vars}}
{{for name in xform.variations}}
if (1) {
float w = {{px.get('xf.var[%d]' % v)}};
{{variations.var_code[variations.var_nos[v]].substitute(locals())}}
{{py:pv = px.variations[name]}}
float w = {{pv.weight}};
{{variations.var_code[name].substitute(locals())}}
}
{{endfor}}
{{if xform.has_post}}
{{if 'post' in xform}}
tx = ox;
ty = oy;
{{apply_affine_flam3('tx', 'ty', 'ox', 'oy', px, 'xf.post', 'post')}}
{{precalc_xf_affine(px.post)}}
{{apply_affine('tx', 'ty', 'ox', 'oy', px.post)}}
{{endif}}
float csp = {{px.get('xf.color_speed')}};
color = color * (1.0f - csp) + {{px.get('xf.color')}} * csp;
{{if 'color' in xform}}
float csp = {{px.color_speed}};
color = color * (1.0f - csp) + {{px.color}} * csp;
{{endif}}
};
""")
g = dict(globals())
@ -135,42 +244,38 @@ void apply_xf{{xfid}}(float &ox, float &oy, float &color, mwc_st &rctx) {
def _iterbody(self):
tmpl = Template(r'''
__global__
void iter(mwc_st *msts, iter_info *infos, uint64_t accbuf_ptr) {
__shared__ int nsamps;
void iter(uint64_t accbuf_ptr, mwc_st *msts, iter_params *all_params,
int nsamps_to_generate) {
mwc_st rctx = msts[gtid()];
iter_info *info_glob = &(infos[blockIdx.x]);
iter_params *global_params = &(all_params[blockIdx.x]);
// load info to shared memory cooperatively
__shared__ int nsamps;
nsamps = nsamps_to_generate;
__shared__ float time_frac;
time_frac = blockIdx.x / (float) gridDim.x;
// load params to shared memory cooperatively
for (int i = threadIdx.y * blockDim.x + threadIdx.x;
i * 4 < sizeof(iter_info); i += blockDim.x * blockDim.y)
reinterpret_cast<float*>(&info)[i] =
reinterpret_cast<float*>(info_glob)[i];
i * 4 < sizeof(iter_params); i += blockDim.x * blockDim.y)
reinterpret_cast<float*>(&params)[i] =
reinterpret_cast<float*>(global_params)[i];
if (threadIdx.y == 0 && threadIdx.x == 0)
nsamps = {{packer.get("cp.width * cp.height / cp.ntemporal_samples * cp.adj_density")}};
{{if features.chaos_used}}
{{if info.chaos_used}}
int last_xf_used = 0;
{{else}}
// Size can be reduced by a factor of four using a slower 4-stage reduce
{{else}}
// Shared memory size can be reduced by a factor of four using a slower
// 4-stage reduce, but on Fermi hardware shmem use isn't a bottleneck
__shared__ float swap[{{4*NTHREADS}}];
__shared__ float cosel[{{NWARPS}}];
// This is normally done after the swap-sync in the main loop
if (threadIdx.y == 0 && threadIdx.x < {{NWARPS}})
cosel[threadIdx.x] = mwc_next_01(rctx);
{{endif}}
{{endif}}
__syncthreads();
int consec_bad = -{{features.fuse}};
if (threadIdx.y == 1 && threadIdx.x == 0) {
float ditherwidth = {{packer.get("0.33 * cp.spatial_filter_radius")}};
float u0 = mwc_next_01(rctx);
float r = ditherwidth * sqrtf(-2.0f * log2f(u0) / M_LOG2E);
float u1 = 2.0f * M_PI * mwc_next_01(rctx);
info.cam_xo += r * cos(u1);
info.cam_yo += r * sin(u1);
}
int consec_bad = -{{info.fuse}};
float x, y, color;
x = mwc_next_11(rctx);
@ -178,37 +283,44 @@ void iter(mwc_st *msts, iter_info *infos, uint64_t accbuf_ptr) {
color = mwc_next_01(rctx);
while (1) {
{{if features.chaos_used}}
// For now, we can't use the swap buffer with chaos enabled
float xfsel = mwc_next_01(rctx);
{{else}}
float xfsel = cosel[threadIdx.y];
{{endif}}
{{if features.chaos_used}}
{{for density_row_idx, prior_xform_idx in enumerate(features.std_xforms)}}
{{for density_col_idx, this_xform_idx in enumerate(features.std_xforms)}}
if (last_xf_used == {{prior_xform_idx}} &&
xfsel <= {{packer.get("cp.chaos_densities[%d][%d]" % (density_row_idx, density_col_idx))}}) {
apply_xf{{this_xform_idx}}(x, y, color, rctx);
last_xf_used = {{this_xform_idx}};
{{if info.chaos_used}}
{{precalc_chaos(pcp, std_xforms)}}
// For now, we don't attempt to use the swap buffer when chaos is used
float xfsel = mwc_next_01(rctx);
{{for prior_xform_idx, prior_xform_name in enumerate(std_xforms)}}
if (last_xf_used == {{prior_xform_idx}}) {
{{for xform_idx, xform_name in enumerate(std_xforms[:-1])}}
if (xfsel <= {{pcp['chaos_'+prior_xform_name+'_'+xform_name]}}) {
apply_xf_{{xform_name}}(x, y, color, rctx);
last_xf_used = {{xform_idx}};
} else
{{endfor}}
{
apply_xf_{{std_xforms[-1]}}(x, y, color, rctx);
last_xf_used = {{len(std_xforms)-1}};
}
} else
{{endfor}}
{{endfor}}
{{else}}
{{for density_col_idx, this_xform_idx in enumerate(features.std_xforms)}}
if (xfsel <= {{packer.get("cp.norm_density[%d]" % (density_col_idx))}}) {
apply_xf{{this_xform_idx}}(x, y, color, rctx);
} else
{{endfor}}
{{endif}}
{
printf("Reached trap, aborting execution! %g (%d,%d,%d)\n",
xfsel, blockIdx.x, threadIdx.y, threadIdx.x);
asm volatile ("trap;");
printf("Something went *very* wrong.\n");
asm("trap;");
}
{{if not features.chaos_used}}
{{else}}
{{precalc_densities(pcp, std_xforms)}}
float xfsel = cosel[threadIdx.y];
{{for xform_name in std_xforms[:-1]}}
if (xfsel <= {{pcp['den_'+xform_name]}}) {
apply_xf_{{xform_name}}(x, y, color, rctx);
} else
{{endfor}}
apply_xf_{{std_xforms[-1]}}(x, y, color, rctx);
// Swap thread states here so that writeback skipping logic doesn't die
int sw = (threadIdx.y * 32 + threadIdx.x * 33) & {{NTHREADS-1}};
int sr = threadIdx.y * 32 + threadIdx.x;
@ -222,17 +334,16 @@ void iter(mwc_st *msts, iter_info *infos, uint64_t accbuf_ptr) {
// required per loop.
if (nsamps < 0) break;
{{if not features.chaos_used}}
// Similarly, we select the next xforms here.
if (threadIdx.y == 0 && threadIdx.x < {{NWARPS}})
cosel[threadIdx.x] = mwc_next_01(rctx);
{{endif}}
consec_bad = swap[sr];
x = swap[sr+{{NTHREADS}}];
y = swap[sr+{{2*NTHREADS}}];
color = swap[sr+{{3*NTHREADS}}];
{{endif}}
{{endif}}
if (consec_bad < 0) {
consec_bad++;
@ -242,45 +353,50 @@ void iter(mwc_st *msts, iter_info *infos, uint64_t accbuf_ptr) {
int remain = __popc(__ballot(1));
if (threadIdx.x == 0) atomicSub(&nsamps, remain);
{{if features.final_xform_index}}
{{if 'final' in cp.xforms}}
float fx = x, fy = y, fcolor;
apply_xf{{features.final_xform_index}}(fx, fy, fcolor, rctx);
{{endif}}
apply_xf_final(fx, fy, fcolor, rctx);
{{endif}}
float cx, cy;
{{if features.final_xform_index}}
{{apply_affine('fx', 'fy', 'cx', 'cy', packer,
'cp.camera_transform', 'cam')}}
{{else}}
{{apply_affine('x', 'y', 'cx', 'cy', packer,
'cp.camera_transform', 'cam')}}
{{endif}}
{{precalc_camera(info, pcp.camera)}}
{{if 'final' in cp.xforms}}
{{apply_affine('fx', 'fy', 'cx', 'cy', pcp.camera)}}
{{else}}
{{apply_affine('x', 'y', 'cx', 'cy', pcp.camera)}}
{{endif}}
uint32_t ix = trunca(cx), iy = trunca(cy);
if (ix >= {{features.acc_width}} || iy >= {{features.acc_height}} ) {
if (ix >= {{info.acc_width}} || iy >= {{info.acc_height}} ) {
consec_bad++;
if (consec_bad > {{features.max_oob}}) {
if (consec_bad > {{info.max_oob}}) {
x = mwc_next_11(rctx);
y = mwc_next_11(rctx);
color = mwc_next_01(rctx);
consec_bad = -{{features.fuse}};
consec_bad = -{{info.fuse}};
}
continue;
}
uint32_t i = iy * {{features.acc_stride}} + ix;
uint32_t i = iy * {{info.acc_stride}} + ix;
float4 outcol = tex2D(palTex, color, {{packer.get("cp_step_frac")}});
float4 outcol = tex2D(palTex, color, time_frac);
update_pix(accbuf_ptr, i, outcol);
}
msts[gtid()] = rctx;
}
''')
return tmpl.substitute(
features = self.features,
packer = self.packer.view('info'),
info = self.info,
cp = self.info.genome,
pcp = self.pcp,
NTHREADS = self.NTHREADS,
NWARPS = self.NTHREADS / 32,
std_xforms = [n for n in sorted(self.info.genome.xforms)
if n != 'final'],
**globals())

View File

@ -24,28 +24,11 @@ def assemble_code(*sections):
return ''.join([''.join([getattr(sect, kind) for sect in sections])
for kind in ['headers', 'decls', 'defs']])
def apply_affine(x, y, xo, yo, packer, base_accessor, base_name):
def apply_affine(x, y, xo, yo, packer):
return Template("""
{{xo}} = {{packer.get(ba + '[0,0]', bn + '_xx')}} * {{x}}
+ {{packer.get(ba + '[0,1]', bn + '_xy')}} * {{y}}
+ {{packer.get(ba + '[0,2]', bn + '_xo')}};
{{yo}} = {{packer.get(ba + '[1,0]', bn + '_yx')}} * {{x}}
+ {{packer.get(ba + '[1,1]', bn + '_yy')}} * {{y}}
+ {{packer.get(ba + '[1,2]', bn + '_yo')}};
""").substitute(x=x, y=y, xo=xo, yo=yo, packer=packer,
ba=base_accessor, bn=base_name)
def apply_affine_flam3(x, y, xo, yo, packer, base_accessor, base_name):
"""Read an affine transformation in *flam3 order* and apply it."""
return tempita.Template("""
{{xo}} = {{packer.get(ba + '[0][0]', bn + '_xx')}} * {{x}}
+ {{packer.get(ba + '[1][0]', bn + '_xy')}} * {{y}}
+ {{packer.get(ba + '[2][0]', bn + '_xo')}};
{{yo}} = {{packer.get(ba + '[0][1]', bn + '_yx')}} * {{x}}
+ {{packer.get(ba + '[1][1]', bn + '_yy')}} * {{y}}
+ {{packer.get(ba + '[2][1]', bn + '_yo')}};
""").substitute(x=x, y=y, xo=xo, yo=yo, packer=packer,
ba=base_accessor, bn=base_name)
{{xo}} = {{packer.xx}} * {{x}} + {{packer.xy}} * {{y}} + {{packer.xo}};
{{yo}} = {{packer.yx}} * {{x}} + {{packer.yy}} * {{y}} + {{packer.yo}};
""").substitute(locals())
class BaseCode(HunkOCode):
headers = """
@ -165,126 +148,4 @@ void write_half(float &xy, float x, float y, float den) {
zero(dptr, np.int32(size), stream=stream,
block=(1024, 1, 1), grid=(blocks, blocks, 1))
class DataPackerView(object):
"""
View of a data packer. Intended to be initialized using DataPacker.view().
All views of a data packer share the same stream parameters, such as
position and total size, but do not share other parameters, such as the
pointer name used in emitted code lookups or the lookup context.
"""
def __init__(self, packer, ptr, prefix, ns):
self.packer, self.ptr, self.prefix, self.ns = packer, ptr, prefix, ns
def get(self, accessor, name=None):
"""
Add an access to the stream, returning the formatted load expression
for device use. If 'name' is missing, the name components after the
final dot in the accessor will be used. Little effort is made to
ensure that this is valid C.
"""
if name is None:
name = accessor.rsplit('.', 1)[-1]
name = name.replace('[', '_').replace(']', '')
name = self.prefix + name
self.packer._access(self, accessor, name)
return '%s.%s' % (self.ptr, name)
def sub(self, dst, src):
"""Add a substitution to the namespace."""
self.ns.append((src, dst))
def view(self, ptr_name, prefix=''):
"""
As DataPacker.view(), but preserving the current set of namespace
substitutions.
"""
return DataPackerView(self.packer, ptr_name, prefix, list(self.ns))
def _apply_subs(self, ns):
for s, d in self.ns:
ns[d] = eval(s, ns)
return ns
class DataPacker(HunkOCode):
"""
Packs 32-bit float values into a dynamic data structure, and emits
accessors to those data values from device code. Might get fancier in the
future, but for now it's incredibly barebones.
"""
default_namespace = {'np': np}
def __init__(self, tname, clsize=4):
"""
Create a new DataPacker.
``tname`` is the name of the structure typedef that will be emitted
via this object's ``decls`` property.
``clsize`` is the size of a cache line, in bytes. The resulting
data structure will be padded to that size.
"""
self.tname = tname
self.clsize = clsize
self.packed = {}
self.packed_order = []
def view(self, ptr_name, prefix=''):
"""Create a DataPacker view. See DataPackerView class for details."""
return DataPackerView(self, ptr_name, prefix, list())
def _access(self, view, accessor, name):
if name in self.packed:
pview, paccessor, pcomp = self.packed[name]
if pview == view and (accessor is None or paccessor == accessor):
return
raise ValueError("Same name, different accessor or view: %s" % name)
comp_accessor = compile(accessor, '{{template}}', 'eval')
self.packed[name] = (view, accessor, comp_accessor)
self.packed_order.append(name)
def __len__(self):
return len(self.packed_order)
@property
def align(self):
return (4 * len(self) + self.clsize - 1) / self.clsize * self.clsize
def pack(self, **kwargs):
base_ns = self.default_namespace.copy()
base_ns.update(kwargs)
out = np.zeros(self.align/4, dtype=np.float32)
subbed_nses = {}
for i, name in enumerate(self.packed_order):
view, accessor, comp = self.packed[name]
if view not in subbed_nses:
subbed_nses[view] = view._apply_subs(dict(base_ns))
try:
val = eval(comp, subbed_nses[view])
except Exception, e:
print 'Error while evaluating accessor "%s"' % accessor
raise e
out[i] = val
return out
@property
def decls(self):
tmpl = Template("""
typedef struct {
{{for name, accessor in values}}
float {{'%-20s' % name}}; // {{accessor}}
{{endfor}}
{{if padding > 0}}
// Align to fill whole cache lines
float padding[{{padding}}];
{{endif}}
} {{tname}};
""")
return tmpl.substitute(
values = [(n, self.packed[n][1]) for n in self.packed_order],
padding = self.align / 4 - len(self),
tname = self.tname
)

View File

@ -1,11 +1,32 @@
import tempita
import numpy as np
from cuburn.code.util import Template
var_nos = {}
var_code = {}
var_params = {}
def var(num, name, code):
var_nos[num] = name
var_code[name] = tempita.Template(code)
def var(num, name, code, params=None):
var_code[name] = Template(code)
if params is not None:
r = {}
for p in params.split():
if '=' in p:
p, default = p.split('=')
if default == 'M_PI':
default = np.pi
else:
default = float(default)
else:
default = 0.0
r[p] = default
var_params[name] = r
# TODO: This is a shitty hack
def precalc(name, code):
def precalc_fun(pv):
pre = pv._precalc()
pre._code(Template(code).substitute(pre=pre))
Template.default_namespace[name+'_precalc'] = precalc_fun
# Variables note: all functions will have their weights as 'w',
# input variables 'tx' and 'ty', and output 'ox' and 'oy' available
@ -119,10 +140,10 @@ var(14, 'bent', """
""")
var(15, 'waves', """
float c10 = {{px.get(None, 'pre_xy')}};
float c11 = {{px.get(None, 'pre_yy')}};
float dx = {{px.get(None, 'pre_xo')}};
float dy = {{px.get(None, 'pre_yo')}};
float c10 = {{px.affine.xy}};
float c11 = {{px.affine.yy}};
float dx = {{px.affine.xo}};
float dy = {{px.affine.yo}};
float dx2 = 1.0f / (dx * dx);
float dy2 = 1.0f / (dy * dy);
@ -140,8 +161,8 @@ var(16, 'fisheye', """
var(17, 'popcorn', """
float dx = tanf(3.0f*ty);
float dy = tanf(3.0f*tx);
ox += w * (tx + {{px.get(None, 'pre_xo')}} * sinf(dx));
oy += w * (ty + {{px.get(None, 'pre_yo')}} * sinf(dy));
ox += w * (tx + {{px.affine.xo}} * sinf(dx));
oy += w * (ty + {{px.affine.yo}} * sinf(dy));
""")
var(18, 'exponential', """
@ -166,7 +187,7 @@ var(20, 'cosine', """
""")
var(21, 'rings', """
float dx = {{px.get(None, 'pre_xo')}} * {{px.get(None, 'pre_xo')}};
float dx = {{px.affine.xo}} * {{px.affine.xo}};
float r = sqrtf(tx*tx + ty*ty);
float a = atan2f(tx, ty);
r = w * (fmodf(r+dx, 2.0f*dx) - dx + r * (1.0f - dx));
@ -175,9 +196,9 @@ var(21, 'rings', """
""")
var(22, 'fan', """
float dx = M_PI * ({{px.get(None, 'pre_xo')}} * {{px.get(None, 'pre_xo')}});
float dx = M_PI * ({{px.affine.xo}} * {{px.affine.xo}});
float dx2 = 0.5f * dx;
float dy = {{px.get(None, 'pre_yo')}};
float dy = {{px.affine.yo}};
float a = atan2f(tx, ty);
a += (fmodf(a+dy, dx) > dx2) ? -dx2 : dx2;
float r = w * sqrtf(tx*tx + ty*ty);
@ -188,24 +209,24 @@ var(22, 'fan', """
var(23, 'blob', """
float r = sqrtf(tx*tx + ty*ty);
float a = atan2f(tx, ty);
float bdiff = 0.5f * ({{px.get('xf.blob_high - xf.blob_low','blob_diff')}});
r *= w * ({{px.get('xf.blob_low')}} + bdiff * (1.0f + sinf({{px.get('xf.blob_waves')}} * a)));
float bdiff = 0.5f * ({{pv.high}} - {{pv.low}});
r *= w * ({{pv.low}} + bdiff * (1.0f + sinf({{pv.waves}} * a)));
ox += sinf(a) * r;
oy += cosf(a) * r;
""")
""", 'low high=1 waves=1')
var(24, 'pdj', """
float nx1 = cosf({{px.get('xf.pdj_b')}} * tx);
float nx2 = sinf({{px.get('xf.pdj_c')}} * tx);
float ny1 = sinf({{px.get('xf.pdj_a')}} * ty);
float ny2 = cosf({{px.get('xf.pdj_d')}} * ty);
float nx1 = cosf({{pv.b}} * tx);
float nx2 = sinf({{pv.c}} * tx);
float ny1 = sinf({{pv.a}} * ty);
float ny2 = cosf({{pv.d}} * ty);
ox += w * (ny1 - nx1);
oy += w * (nx2 - ny2);
""")
""", 'a b c d')
var(25, 'fan2', """
float dy = {{px.get('xf.fan2_y')}};
float dx = M_PI * {{px.get('xf.fan2_x')}} * {{px.get('xf.fan2_x')}};
float dy = {{pv.y}};
float dx = M_PI * {{pv.x}} * {{pv.x}};
float dx2 = 0.5f * dx;
float a = atan2f(tx, ty);
float r = w * sqrtf(tx*tx + ty*ty);
@ -217,16 +238,16 @@ var(25, 'fan2', """
ox += r * sinf(a);
oy += r * cosf(a);
""")
""", 'x y')
var(26, 'rings2', """
float dx = {{px.get('xf.rings2_val')}} * {{px.get('xf.rings2_val')}};
float dx = {{pv.val}} * {{pv.val}};
float r = sqrtf(tx*tx + ty*ty);
float a = atan2f(tx, ty);
r += -2.0f * dx * (int)((r+dx)/(2.0f*dx)) + r * (1.0f - dx);
ox += w * sinf(a) * r;
oy += w * cosf(a) * r;
""")
""", 'val')
var(27, 'eyefish', """
float r = 2.0f * w / (sqrtf(tx*tx + ty*ty) + 1.0f);
@ -245,16 +266,21 @@ var(29, 'cylinder', """
oy += w * ty;
""")
var(30, 'perspective', """
float pdist = {{px.get('xf.perspective_dist')}};
float pvsin = {{px.get('np.sin(xf.perspective_angle*np.pi/2)', 'pvsin')}};
float pvfcos = {{px.get(
'xf.perspective_dist*np.cos(xf.perspective_angle*np.pi/2)', 'pvfcos')}};
precalc('perspective', """
float pang = {{pre.angle}} * M_PI_2;
float pdist = fmaxf(1e-9, {{pre.dist}});
{{pre._set('mdist')}} = pdist;
{{pre._set('sin')}} = sin(pang);
{{pre._set('cos')}} = pdist * cos(pang);
""")
float t = 1.0f / (pdist - ty * pvsin);
ox += w * pdist * tx * t;
oy += w * pvfcos * ty * t;
""")
var(30, 'perspective', """
{{perspective_precalc(pv)}}
float t = 1.0f / ({{pv.mdist}} - ty * {{pv.sin}});
ox += w * {{pv.mdist}} * tx * t;
oy += w * {{pv.cos}} * ty * t;
""", 'angle dist')
var(31, 'noise', """
float tmpr = mwc_next_01(rctx) * 2.0f * M_PI;
@ -263,33 +289,39 @@ var(31, 'noise', """
oy += ty * r * sinf(tmpr);
""")
precalc('julian',
"{{pre._set('cn')}} = {{pre.dist}} / (2.0f * {{pre.power}});\n")
var(32, 'julian', """
float power = {{px.get('xf.julian_power')}};
{{julina_precalc(pv)}}
float power = {{pv.power}};
float t_rnd = truncf(mwc_next_01(rctx) * fabsf(power));
float a = atan2f(ty, tx);
float tmpr = (a + 2.0f * M_PI * t_rnd) / power;
float cn = {{px.get('xf.julian_dist / xf.julian_power / 2', 'julian_cn')}};
float cn = {{pv.cn}};
float r = w * powf(tx * tx + ty * ty, cn);
ox += r * cosf(tmpr);
oy += r * sinf(tmpr);
""")
""", 'power=1 dist=1')
precalc('juliascope',
"{{pre._set('cn')}} = {{pre.dist}} / (2.0f * {{pre.power}});\n")
var(33, 'juliascope', """
{{juliascope_precalc(pv)}}
float ang = atan2f(ty, tx);
float power = {{px.get('xf.juliascope_power', 'juscope_power')}};
float power = {{pv.power}};
float t_rnd = truncf(mwc_next_01(rctx) * fabsf(power));
// TODO: don't draw the extra random number
if (mwc_next(rctx) & 1) ang = -ang;
float tmpr = (2.0f * M_PI * t_rnd + ang) / power;
float cn = {{px.get('xf.juliascope_dist / xf.juliascope_power / 2',
'juscope_cn')}};
float r = w * powf(tx * tx + ty * ty, cn);
float r = w * powf(tx * tx + ty * ty, {{pv.cn}});
ox += r * cosf(tmpr);
oy += r * sinf(tmpr);
""")
""", 'power=1 dist=1')
var(34, 'blur', """
float tmpr = mwc_next_01(rctx) * 2.0f * M_PI;
@ -300,40 +332,39 @@ var(34, 'blur', """
var(35, 'gaussian', """
float ang = mwc_next_01(rctx) * 2.0f * M_PI;
float r = w * ( mwc_next_01(rctx) + mwc_next_01(rctx)
+ mwc_next_01(rctx) + mwc_next_01(rctx) - 2.0f );
float r = w * sqrtf(-2.0f * log2f(mwc_next_01(rctx)) / M_LOG2E);
ox += r * cosf(ang);
oy += r * sinf(ang);
""")
var(36, 'radial_blur', """
float blur_angle = {{px.get('xf.radial_blur_angle')}} * M_PI * 0.5f;
float blur_angle = {{pv.angle}} * M_PI * 0.5f;
float spinvar = sinf(blur_angle);
float zoomvar = cosf(blur_angle);
float r = w * ( mwc_next_01(rctx) + mwc_next_01(rctx)
+ mwc_next_01(rctx) + mwc_next_01(rctx) - 2.0f );
float r = w * sqrtf(-2.0f * log2f(mwc_next_01(rctx)) / M_LOG2E);
float ra = sqrtf(tx*tx + ty*ty);
float tmpa = atan2f(ty, tx) + spinvar * r;
float rz = zoomvar * r - 1.0f;
ox += ra*cosf(tmpa) + rz*tx;
oy += ra*sinf(tmpa) + rz*ty;
""")
""", 'angle')
var(37, 'pie', """
float slices = {{px.get('xf.pie_slices')}};
float slices = {{pv.slices}};
float sl = truncf(mwc_next_01(rctx) * slices + 0.5f);
float a = {{px.get('xf.pie_rotation')}} +
2.0f * M_PI * (sl + mwc_next_01(rctx) * {{px.get('xf.pie_thickness')}}) / slices;
float a = {{pv.rotation}} +
2.0f * M_PI * (sl + mwc_next_01(rctx) * {{pv.thickness}}) / slices;
float r = w * mwc_next_01(rctx);
ox += r * cosf(a);
oy += r * sinf(a);
""")
""", 'slices=6 rotation thickness=0.5')
var(38, 'ngon', """
float power = {{px.get('xf.ngon_power')}} * 0.5f;
float b = 2.0f * M_PI / {{px.get('xf.ngon_sides')}};
float corners = {{px.get('xf.ngon_corners')}};
float circle = {{px.get('xf.ngon_circle')}};
float power = {{pv.power}} * 0.5f;
float b = 2.0f * M_PI / {{pv.sides}};
float corners = {{pv.corners}};
float circle = {{pv.circle}};
float r_factor = powf(tx*tx + ty*ty, power);
float theta = atan2f(ty, tx);
@ -343,11 +374,11 @@ var(38, 'ngon', """
ox += w * tx * amp;
oy += w * ty * amp;
""")
""", 'sides=5 power=3 circle=1 corners=2')
var(39, 'curl', """
float c1 = {{px.get('xf.curl_c1')}};
float c2 = {{px.get('xf.curl_c2')}};
float c1 = {{pv.c1}};
float c2 = {{pv.c2}};
float re = 1.0f + c1*tx + c2*(tx*tx - ty*ty);
float im = c1*ty + 2.0f*c2*tx*ty;
@ -355,15 +386,15 @@ var(39, 'curl', """
ox += r * (tx*re + ty*im);
oy += r * (ty*re - tx*im);
""")
""", 'c1=1 c2')
var(40, 'rectangles', """
float rx = {{px.get('xf.rectangles_x')}};
float ry = {{px.get('xf.rectangles_y')}};
float rx = {{pv.x}};
float ry = {{pv.y}};
ox += w * ( (rx==0.0f) ? tx : rx * (2.0f * floorf(tx/rx) + 1.0f) - tx);
oy += w * ( (ry==0.0f) ? ty : ry * (2.0f * floorf(ty/ry) + 1.0f) - ty);
""")
""", 'x y')
var(41, 'arch', """
float ang = mwc_next_01(rctx) * w * M_PI;
@ -417,9 +448,8 @@ var(48, 'cross', """
""")
var(49, 'disc2', """
float twist = {{px.get('xf.disc2_twist')}};
float rotpi = {{px.get('xf.disc2_rot', 'disc2_rotpi')}};
rotpi *= M_PI;
float twist = {{pv.twist}}
float rotpi = {{pv.rot}} * M_PI;
float sintwist = sinf(twist);
float costwist = cosf(twist) - 1.0f;
@ -441,70 +471,71 @@ var(49, 'disc2', """
ox += r * (sinf(t) + costwist);
oy += r * (cosf(t) + sintwist);
""")
""", 'rot twist')
var(50, 'super_shape', """
float ang = atan2f(ty, tx);
float theta = 0.25f * ({{px.get('xf.super_shape_m')}} * ang + M_PI);
float theta = 0.25f * ({{pv.m}} * ang + M_PI);
float t1 = fabsf(cosf(theta));
float t2 = fabsf(sinf(theta));
t1 = powf(t1,{{px.get('xf.super_shape_n2')}});
t2 = powf(t2,{{px.get('xf.super_shape_n3')}});
float myrnd = {{px.get('xf.super_shape_rnd')}};
t1 = powf(t1, {{pv.n2}});
t2 = powf(t2, {{pv.n3}});
float myrnd = {{pv.rnd}};
float d = sqrtf(tx*tx+ty*ty);
float r = w * ((myrnd*mwc_next_01(rctx) + (1.0f-myrnd)*d) - {{px.get('xf.super_shape_holes')}})
* powf(t1+t2, {{px.get('-1.0 / xf.super_shape_n1', 'super_shape_pneg')}}) / d;
float r = w * ((myrnd*mwc_next_01(rctx) + (1.0f-myrnd)*d) - {{pv.holes}})
* powf(t1+t2, -1.0f / {{pv.n1}}) / d;
ox += r * tx;
oy += r * ty;
""")
""", 'rnd m n1=1 n2=1 n3=1 holes')
var(51, 'flower', """
float holes = {{px.get('xf.flower_holes')}};
float petals = {{px.get('xf.flower_petals')}};
float holes = {{pv.holes}};
float petals = {{pv.petals}};
float r = w * (mwc_next_01(rctx) - holes) * cosf(petals*atan2f(ty, tx)) / sqrtf(tx*tx + ty*ty);
float r = w * (mwc_next_01(rctx) - holes)
* cosf(petals*atan2f(ty, tx)) / sqrtf(tx*tx + ty*ty);
ox += r * tx;
oy += r * ty;
""")
""", 'holes petals')
var(52, 'conic', """
float d = sqrtf(tx*tx + ty*ty);
float ct = tx / d;
float holes = {{px.get('xf.conic_holes')}};
float eccen = {{px.get('xf.conic_eccentricity')}};
float holes = {{pv.holes}};
float eccen = {{pv.eccentricity}};
float r = w * (mwc_next_01(rctx) - holes) * eccen / (1.0f + eccen*ct) / d;
ox += r * tx;
oy += r * ty;
""")
""", 'holes eccentricity=1')
var(53, 'parabola', """
float r = sqrtf(tx*tx + ty*ty);
float sr = sinf(r);
float cr = cosf(r);
ox += {{px.get('xf.parabola_height')}} * w * sr * sr * mwc_next_01(rctx);
oy += {{px.get('xf.parabola_width')}} * w * cr * mwc_next_01(rctx);
""")
ox += {{pv.height}} * w * sr * sr * mwc_next_01(rctx);
oy += {{pv.width}} * w * cr * mwc_next_01(rctx);
""", 'height width')
var(54, 'bent2', """
float nx = 1.0f;
if (tx < 0.0f) nx = {{px.get('xf.bent2_x')}};
if (tx < 0.0f) nx = {{pv.x}};
float ny = 1.0f;
if (ty < 0.0f) ny = {{px.get('xf.bent2_y')}};
if (ty < 0.0f) ny = {{pv.y}};
ox += w * nx * tx;
oy += w * ny * ty;
""")
""", 'x=1 y=1')
var(55, 'bipolar', """
float x2y2 = tx*tx + ty*ty;
float t = x2y2 + 1.0f;
float x2 = tx * 2.0f;
float ps = -M_PI_2 * {{px.get('xf.bipolar_shift')}};
float ps = -M_PI_2 * {{pv.shift}};
float y = 0.5f * atan2f(2.0f * ty, x2y2 - 1.0f) + ps;
if (y > M_PI_2)
@ -514,7 +545,7 @@ var(55, 'bipolar', """
ox += w * 0.25f * M_2_PI * logf( (t+x2) / (t-x2) );
oy += w * M_2_PI * y;
""")
""", 'shift')
var(56, 'boarders', """
float roundX = rintf(tx);
@ -556,7 +587,7 @@ var(57, 'butterfly', """
""")
var(58, 'cell', """
float cell_size = {{px.get('xf.cell_size')}};
float cell_size = {{pv.size}};
float inv_cell_size = 1.0f/cell_size;
/* calculate input cell */
@ -588,31 +619,35 @@ var(58, 'cell', """
ox += w * (dx + x*cell_size);
oy -= w * (dy + y*cell_size);
""")
""", 'size=1')
var(59, 'cpow', """
float a = atan2f(ty, tx);
float lnr = 0.5f * logf(tx*tx+ty*ty);
float power = {{px.get('xf.cpow_power')}};
float power = {{pv.power}};
float va = 2.0f * M_PI / power;
float vc = {{px.get('xf.cpow_r')}} / power;
float vd = {{px.get('xf.cpow_i')}} / power;
float vc = {{pv.cpow_r}} / power;
float vd = {{pv.cpow_i}} / power;
float ang = vc*a + vd*lnr + va*floorf(power*mwc_next_01(rctx));
float m = w * expf(vc * lnr - vd * a);
ox += m * cosf(ang);
oy += m * sinf(ang);
""")
""", 'r=1 i power=1')
precalc('curve', '''
float xl = {{pv.xlength}}, yl = {{pv.ylength}};
{{pre._set('x2')}} = 1.0f / max(1e-20f, xl * xl);
{{pre._set('y2')}} = 1.0f / max(1e-20f, yl * yl);
''')
var(60, 'curve', """
float pc_xlen = {{px.get('xf.curve_xlength * xf.curve_xlength','pc_xlen')}};
float pc_ylen = {{px.get('xf.curve_ylength * xf.curve_ylength','pc_ylen')}};
{{curve_precalc()}}
float pc_xlen = {{pv.x2}}, pc_ylen = {{pv.y2}};
if (pc_xlen<1E-20f) pc_xlen = 1E-20f;
if (pc_ylen<1E-20f) pc_ylen = 1E-20f;
ox += w * (tx + {{px.get('xf.curve_xamp')}} * expf(-ty*ty/pc_xlen));
oy += w * (ty + {{px.get('xf.curve_yamp')}} * expf(-tx*tx/pc_ylen));
""")
ox += w * (tx + {{pv.xamp}} * expf(-ty*ty*pc_xlen));
oy += w * (ty + {{pv.yamp}} * expf(-tx*tx*pc_ylen));
""", 'xamp yamp xlength=1 ylength=1')
var(61, 'edisc', """
float tmp = tx*tx + ty*ty + 1.0f;
@ -662,7 +697,7 @@ var(62, 'elliptic', """
var(63, 'escher', """
float a = atan2f(ty,tx);
float lnr = 0.5f * logf(tx*tx + ty*ty);
float ebeta = {{px.get('xf.escher_beta')}};
float ebeta = {{pv.beta}};
float seb = sinf(ebeta);
float ceb = cosf(ebeta);
float vc = 0.5f * (1.0f + ceb);
@ -672,7 +707,7 @@ var(63, 'escher', """
ox += m * cosf(n);
oy += m * sinf(n);
""")
""", 'beta')
var(64, 'foci', """
float expx = expf(tx) * 0.5f;
@ -685,27 +720,26 @@ var(64, 'foci', """
""")
var(65, 'lazysusan', """
float lx = {{px.get('xf.lazysusan_x')}};
float ly = {{px.get('xf.lazysusan_y')}};
float lx = {{pv.x}};
float ly = {{pv.y}};
float x = tx - lx;
float y = ty + ly;
float r = sqrtf(x*x + y*y);
if (r < w) {
float a = atan2f(y,x) + {{px.get('xf.lazysusan_spin')}} +
{{px.get('xf.lazysusan_twist')}}*(w-r);
float a = atan2f(y,x) + {{pv.spin}} + {{pv.twist}} * (w - r);
r = w * r;
ox += r * cosf(a) + lx;
oy += r * sinf(a) - ly;
} else {
r = w * (1.0f + {{px.get('xf.lazysusan_space')}} / r);
r = w * (1.0f + {{pv.space}} / r);
ox += r * x + lx;
oy += r * y - ly;
}
""")
""", 'x y twist space spin')
var(66, 'loonie', """
float r2 = tx*tx + ty*ty;;
@ -732,8 +766,7 @@ var(67, 'pre_blur', """
""")
var(68, 'modulus', """
float mx = {{px.get('xf.modulus_x')}};
float my = {{px.get('xf.modulus_y')}};
float mx = {{pv.x}}, my = {{pv.y}};
float xr = 2.0f*mx;
float yr = 2.0f*my;
@ -750,13 +783,13 @@ var(68, 'modulus', """
oy += w * ( my - fmodf(my - ty, yr));
else
oy += w * ty;
""")
""", 'x y')
var(69, 'oscope', """
float tpf = 2.0f * M_PI * {{px.get('xf.oscope_frequency')}};
float amp = {{px.get('xf.oscope_amplitude')}};
float sep = {{px.get('xf.oscope_separation')}};
float dmp = {{px.get('xf.oscope_damping')}};
float tpf = 2.0f * M_PI * {{pv.frequency}};
float amp = {{pv.amplitude}};
float sep = {{pv.separation}};
float dmp = {{pv.damping}};
float t = amp * expf(-fabsf(tx)*dmp) * cosf(tpf*tx) + sep;
@ -765,7 +798,7 @@ var(69, 'oscope', """
oy -= w*ty;
else
oy += w*ty;
""")
""", 'separation=1 frequency=M_PI amplitude=1 damping')
var(70, 'polar2', """
float p2v = w / M_PI;
@ -774,10 +807,10 @@ var(70, 'polar2', """
""")
var(71, 'popcorn2', """
float c = {{px.get('xf.popcorn2_c')}};
ox += w * (tx + {{px.get('xf.popcorn2_x')}} * sinf(tanf(ty*c)));
oy += w * (ty + {{px.get('xf.popcorn2_y')}} * sinf(tanf(tx*c)));
""")
float c = {{pv.c}};
ox += w * (tx + {{pv.x}} * sinf(tanf(ty*c)));
oy += w * (ty + {{pv.y}} * sinf(tanf(tx*c)));
""", 'x y c')
var(72, 'scry', """
/* note that scry does not multiply by weight, but as the */
@ -790,68 +823,61 @@ var(72, 'scry', """
""")
var(73, 'separation', """
float sx2 = {{px.get('xf.separation_x * xf.separation_x', 'sx2')}};
float sy2 = {{px.get('xf.separation_y * xf.separation_y', 'sy2')}};
float sx2 = {{pv.x}} * {{pv.x}};
float sy2 = {{pv.y}} * {{pv.y}};
if (tx > 0.0f)
ox += w * (sqrtf(tx*tx + sx2) - tx*{{px.get('xf.separation_xinside')}});
ox += w * (sqrtf(tx*tx + sx2) - tx*{{pv.xinside}});
else
ox -= w * (sqrtf(tx*tx + sx2) + tx*{{px.get('xf.separation_xinside')}});
ox -= w * (sqrtf(tx*tx + sx2) + tx*{{pv.xinside}});
if (ty > 0.0f)
oy += w * (sqrtf(ty*ty + sy2) - ty*{{px.get('xf.separation_yinside')}});
oy += w * (sqrtf(ty*ty + sy2) - ty*{{pv.yinside}});
else
oy -= w * (sqrtf(ty*ty + sy2) + ty*{{px.get('xf.separation_yinside')}});
""")
oy -= w * (sqrtf(ty*ty + sy2) + ty*{{pv.yinside}});
""", 'x xinside y yinside')
var(74, 'split', """
if (cosf(tx*{{px.get('xf.split_xsize')}}*M_PI) >= 0.0f)
if (cosf(tx*{{pv.xsize}}*M_PI) >= 0.0f)
oy += w*ty;
else
oy -= w*ty;
if (cosf(ty*{{px.get('xf.split_ysize')}}*M_PI) >= 0.0f)
if (cosf(ty*{{pv.ysize}}*M_PI) >= 0.0f)
ox += w*tx;
else
ox -= w*tx;
""")
""", 'xsize ysize')
var(75, 'splits', """
if (tx >= 0.0f)
ox += w*(tx + {{px.get('xf.splits_x')}});
else
ox += w*(tx - {{px.get('xf.splits_x')}});
if (ty >= 0)
oy += w*(ty + {{px.get('xf.splits_y')}});
else
oy += w*(ty - {{px.get('xf.splits_y')}});
""")
ox += w*(tx + copysignf({{pv.x}}, tx));
oy += w*(ty + copysignf({{pv.y}}, ty));
""", 'x y')
var(76, 'stripes', """
float roundx = floorf(tx + 0.5f);
float offsetx = tx - roundx;
ox += w * (offsetx * (1.0f - {{px.get('xf.stripes_space')}}) + roundx);
oy += w * (ty + offsetx*offsetx*{{px.get('xf.stripes_warp')}});
""")
ox += w * (offsetx * (1.0f - {{pv.space}}) + roundx);
oy += w * (ty + offsetx*offsetx*{{pv.warp}});
""", 'space warp')
var(77, 'wedge', """
float r = sqrtf(tx*tx + ty*ty);
float a = atan2f(ty, tx) + {{px.get('xf.wedge_swirl')}} * r;
float wc = {{px.get('xf.wedge_count')}};
float wa = {{px.get('xf.wedge_angle')}};
float a = atan2f(ty, tx) + {{pv.swirl}} * r;
float wc = {{pv.count}};
float wa = {{pv.angle}};
float c = floorf((wc * a + M_PI) * M_1_PI * 0.5f);
float comp_fac = 1 - wa * wc * M_1_PI * 0.5f;
a = a * comp_fac + c * wa;
r = w * (r + {{px.get('xf.wedge_hole')}});
r = w * (r + {{pv.hole}});
ox += r * cosf(a);
oy += r * sinf(a);
""")
""", 'angle hole count=1 swirl')
var(81, 'waves2', """
ox += w*(tx + {{px.get('xf.waves2_scalex')}}*sinf(ty * {{px.get('xf.waves2_freqx')}}));
oy += w*(ty + {{px.get('xf.waves2_scaley')}}*sinf(tx * {{px.get('xf.waves2_freqy')}}));
""")
ox += w*(tx + {{pv.scalex}}*sinf(ty * {{pv.freqx}}));
oy += w*(ty + {{pv.scaley}}*sinf(tx * {{pv.freqy}}));
""", 'scalex scaley freqx freqy')
var(82, 'exp', """
float expe = expf(tx);
@ -935,21 +961,22 @@ var(95, 'coth', """
var(97, 'flux', """
float xpw = tx + w;
float xmw = tx - w;
float avgr = w * (2.0f + {{px.get('xf.flux_spread')}}) * sqrtf(sqrtf(ty*ty+xpw*xpw)/sqrtf(ty*ty+xmw*xmw));
float avgr = w * (2.0f + {{pv.spread}})
* sqrtf(sqrtf(ty*ty+xpw*xpw)/sqrtf(ty*ty+xmw*xmw));
float avga = (atan2f(ty, xmw) - atan2f(ty,xpw))*0.5f;
ox += avgr * cosf(avga);
oy += avgr * sinf(avga);
""")
""", 'spread')
var(98, 'mobius', """
float rea = {{px.get('xf.mobius_re_a')}};
float ima = {{px.get('xf.mobius_im_a')}};
float reb = {{px.get('xf.mobius_re_b')}};
float imb = {{px.get('xf.mobius_im_b')}};
float rec = {{px.get('xf.mobius_re_c')}};
float imc = {{px.get('xf.mobius_im_c')}};
float red = {{px.get('xf.mobius_re_d')}};
float imd = {{px.get('xf.mobius_im_d')}};
float rea = {{pv.re_a}};
float ima = {{pv.im_a}};
float reb = {{pv.re_b}};
float imb = {{pv.im_b}};
float rec = {{pv.re_c}};
float imc = {{pv.im_c}};
float red = {{pv.re_d}};
float imd = {{pv.im_d}};
float re_u, im_u, re_v, im_v, rad_v;
@ -962,5 +989,5 @@ var(98, 'mobius', """
ox += rad_v * (re_u*re_v + im_u*im_v);
oy += rad_v * (im_u*re_v - re_u*im_v);
""")
""", 're_a im_a re_b im_b re_c im_c re_d im_d')