2014-07-08 03:11:14 -04:00
# pragma once
# include "EmberCLPch.h"
# include "EmberCLStructs.h"
/// <summary>
/// OpenCL global function strings.
/// </summary>
namespace EmberCLns
{
/// <summary>
/// OpenCL equivalent of Palette::RgbToHsv().
/// </summary>
static const char * RgbToHsvFunctionString =
//rgb 0 - 1,
//h 0 - 6, s 0 - 1, v 0 - 1
2015-08-10 23:10:23 -04:00
" static inline void RgbToHsv(real4_bucket* rgb, real4_bucket* hsv) \n "
2014-07-08 03:11:14 -04:00
" { \n "
2015-08-10 23:10:23 -04:00
" real_bucket_t max, min, del, rc, gc, bc; \n "
2014-07-08 03:11:14 -04:00
" \n "
//Compute maximum of r, g, b.
" if ((*rgb).x >= (*rgb).y) \n "
" { \n "
" if ((*rgb).x >= (*rgb).z) \n "
" max = (*rgb).x; \n "
" else \n "
" max = (*rgb).z; \n "
" } \n "
" else \n "
" { \n "
" if ((*rgb).y >= (*rgb).z) \n "
" max = (*rgb).y; \n "
" else \n "
" max = (*rgb).z; \n "
" } \n "
" \n "
//Compute minimum of r, g, b.
" if ((*rgb).x <= (*rgb).y) \n "
" { \n "
" if ((*rgb).x <= (*rgb).z) \n "
" min = (*rgb).x; \n "
" else \n "
" min = (*rgb).z; \n "
" } \n "
" else \n "
" { \n "
" if ((*rgb).y <= (*rgb).z) \n "
" min = (*rgb).y; \n "
" else \n "
" min = (*rgb).z; \n "
" } \n "
" \n "
" del = max - min; \n "
" (*hsv).z = max; \n "
" \n "
" if (max != 0) \n "
" (*hsv).y = del / max; \n "
" else \n "
" (*hsv).y = 0; \n "
" \n "
" (*hsv).x = 0; \n "
" if ((*hsv).y != 0) \n "
" { \n "
" rc = (max - (*rgb).x) / del; \n "
" gc = (max - (*rgb).y) / del; \n "
" bc = (max - (*rgb).z) / del; \n "
" \n "
" if ((*rgb).x == max) \n "
" (*hsv).x = bc - gc; \n "
" else if ((*rgb).y == max) \n "
" (*hsv).x = 2 + rc - bc; \n "
" else if ((*rgb).z == max) \n "
" (*hsv).x = 4 + gc - rc; \n "
" \n "
" if ((*hsv).x < 0) \n "
" (*hsv).x += 6; \n "
" } \n "
" } \n "
" \n " ;
/// <summary>
/// OpenCL equivalent of Palette::HsvToRgb().
/// </summary>
static const char * HsvToRgbFunctionString =
//h 0 - 6, s 0 - 1, v 0 - 1
//rgb 0 - 1
2015-08-10 23:10:23 -04:00
" static inline void HsvToRgb(real4_bucket* hsv, real4_bucket* rgb) \n "
2014-07-08 03:11:14 -04:00
" { \n "
" int j; \n "
2015-08-10 23:10:23 -04:00
" real_bucket_t f, p, q, t; \n "
2014-07-08 03:11:14 -04:00
" \n "
" while ((*hsv).x >= 6) \n "
" (*hsv).x = (*hsv).x - 6; \n "
" \n "
" while ((*hsv).x < 0) \n "
" (*hsv).x = (*hsv).x + 6; \n "
" \n "
" j = (int)floor((*hsv).x); \n "
" f = (*hsv).x - j; \n "
" p = (*hsv).z * (1 - (*hsv).y); \n "
" q = (*hsv).z * (1 - ((*hsv).y * f)); \n "
" t = (*hsv).z * (1 - ((*hsv).y * (1 - f))); \n "
" \n "
" switch (j) \n "
" { \n "
" case 0: (*rgb).x = (*hsv).z; (*rgb).y = t; (*rgb).z = p; break; \n "
" case 1: (*rgb).x = q; (*rgb).y = (*hsv).z; (*rgb).z = p; break; \n "
" case 2: (*rgb).x = p; (*rgb).y = (*hsv).z; (*rgb).z = t; break; \n "
" case 3: (*rgb).x = p; (*rgb).y = q; (*rgb).z = (*hsv).z; break; \n "
" case 4: (*rgb).x = t; (*rgb).y = p; (*rgb).z = (*hsv).z; break; \n "
" case 5: (*rgb).x = (*hsv).z; (*rgb).y = p; (*rgb).z = q; break; \n "
" default: (*rgb).x = (*hsv).z; (*rgb).y = t; (*rgb).z = p; break; \n "
" } \n "
" } \n "
" \n " ;
/// <summary>
/// OpenCL equivalent of Palette::CalcAlpha().
/// </summary>
static const char * CalcAlphaFunctionString =
2015-08-10 23:10:23 -04:00
" static inline real_t CalcAlpha(real_bucket_t density, real_bucket_t gamma, real_bucket_t linrange) \n " //Not the slightest clue what this is doing.//DOC
2014-07-08 03:11:14 -04:00
" { \n "
2015-08-10 23:10:23 -04:00
" real_bucket_t frac, alpha, funcval = pow(linrange, gamma); \n "
2014-07-08 03:11:14 -04:00
" \n "
" if (density > 0) \n "
" { \n "
" if (density < linrange) \n "
" { \n "
" frac = density / linrange; \n "
" alpha = (1.0 - frac) * density * (funcval / linrange) + frac * pow(density, gamma); \n "
" } \n "
" else \n "
" alpha = pow(density, gamma); \n "
" } \n "
" else \n "
" alpha = 0; \n "
" \n "
" return alpha; \n "
" } \n "
" \n " ;
2015-03-21 18:27:37 -04:00
/// <summary>
/// OpenCL equivalent of Renderer::CurveAdjust().
/// Only use float here instead of real_t because the output will be passed to write_imagef()
/// during final accumulation, which only takes floats.
/// </summary>
static const char * CurveAdjustFunctionString =
2015-08-10 23:10:23 -04:00
" static inline void CurveAdjust(__constant real4reals_bucket* csa, float* a, uint index) \n "
2015-03-21 18:27:37 -04:00
" { \n "
--User changes
-Add new variations: bubbleT3D, crob, hexaplay3D, hexcrop, hexes, hexnix3D, loonie2, loonie3, nBlur, octapol and synth.
-Allow for pre/post versions of dc_bubble, dc_cylinder and dc_linear whereas before they were omitted.
-When saving a file with multiple embers in it, detect if time values are all the same and if so, start them at zero and increment by 1 for each ember.
-Allow for numerous quality increases to be coalesced into one. It will pick up at the end of the current render.
-Show selection highlight on variations tree in response to mouse hover. This makes it easier to see for which variation or param the current mouse wheel action will apply.
-Make default temporal samples be 100, whereas before it was 1000 which was overkill.
-Require the shift key to be held with delete for deleting an ember to prevent it from triggering when the user enters delete in the edit box.
-This wasn't otherwise fixable without writing a lot more code.
--Bug fixes
-EmberGenome was crashing when generating a sequence from a source file with more than 2 embers in it.
-EmberGenome was improperly handling the first frame of a merge after the last frame of the loop.
-These bugs were due to a previous commit. Revert parts of that commit.
-Prevent a zoom value of less than 0 when reading from xml.
-Slight optimization of the crescents, and mask variations, if the compiler wasn't doing it already.
-Unique file naming was broken because it was looking for _# and the default names ended with -#.
-Disallow renaming of an ember in the library tree to an empty string.
-Severe bug that prevented some variations from being read correctly from params generated outside this program.
-Severe OpenCL randomization bug. The first x coordinates of the first points in the first kernel call of the first ember of a render since the OpenCL renderer object was created were not random and were mostly -1.
-Severe bug when populating xform selection distributions that could sometimes cause a crash due to roundoff error. Fix by using double.
-Limit the max number of variations in a random ember to MAX_CL_VARS, which is 8. This ensures they'll look the same on CPU and GPU.
-Prevent user from saving stylesheet to default.qss, it's a special reserved filename.
--Code changes
-Generalize using the running sum output point inside of a variation for all cases: pre, reg and post.
-Allow for array variables in variations where the address of each element is stored in m_Params.
-Qualify all math functions with std::
-No longer use our own Clamp() in OpenCL, instead use the standard clamp().
-Redesign how functions are used in the variations OpenCL code.
-Add tests to EmberTester to verify some of the new functionality.
-Place more const and override qualifiers on functions where appropriate.
-Add a global rand with a lock to be used very sparingly.
-Use a map instead of a vector for bad param names in Xml parsing.
-Prefix affine interpolation mode defines with "AFFINE_" to make their purpose more clear.
-Allow for variations that change state during iteration by sending a separate copy of the ember to each rendering thread.
-Implement this same functionality with a local struct in OpenCL. It's members are the total of all variables that need to change state within an ember.
-Add Contains() function to Utils.h.
-EmberRender: print names of kernels being printed with --dump_kernel option.
-Clean up EmberTester to handle some of the recent changes.
-Fix various casts.
-Replace % 2 with & 1, even though the compiler was likely doing this already.
-Add new file Variations06.h to accommodate new variations.
-General cleanup.
2015-11-22 17:15:07 -05:00
" uint tempIndex = (uint)clamp(*a, (float)0.0, (float)COLORMAP_LENGTH_MINUS_1); \n "
" uint tempIndex2 = (uint)clamp((float)csa[tempIndex].m_Real4.x, (float)0.0, (float)COLORMAP_LENGTH_MINUS_1); \n "
2015-03-21 18:27:37 -04:00
" \n "
" *a = (float)round(csa[tempIndex2].m_Reals[index]); \n "
" } \n " ;
2014-07-08 03:11:14 -04:00
/// <summary>
/// Use MWC 64 from David Thomas at the Imperial College of London for
/// random numbers in OpenCL, instead of ISAAC which was used
/// for CPU rendering.
/// </summary>
static const char * RandFunctionString =
" enum { MWC64X_A = 4294883355u }; \n \n "
" inline uint MwcNext(uint2* s) \n "
" { \n "
" uint res = (*s).x ^ (*s).y; \n " //Calculate the result.
" uint hi = mul_hi((*s).x, MWC64X_A); \n " //Step the RNG.
" (*s).x = (*s).x * MWC64X_A + (*s).y; \n " //Pack the state back up.
" (*s).y = hi + ((*s).x < (*s).y); \n "
" return res; \n " //Return the next result.
" } \n "
" \n "
" inline uint MwcNextRange(uint2* s, uint val) \n "
" { \n "
" return (val == 0) ? MwcNext(s) : (MwcNext(s) % val); \n "
" } \n "
" \n "
" inline real_t MwcNext01(uint2* s) \n "
" { \n "
" return MwcNext(s) * (1.0 / 4294967296.0); \n "
" } \n "
" \n "
" inline real_t MwcNextNeg1Pos1(uint2* s) \n "
" { \n "
Numerous fixes
0.4.0.5 Beta 07/18/2014
--User Changes
Allow for vibrancy values > 1.
Add flatten and unflatten menu items.
Automatically flatten like Apophysis does.
Add plugin and new_linear tags to Xml to be compatible with Apophysis.
--Bug Fixes
Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho,
julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x,
sinusoidal, spherical, spherical3d, stripes.
Unique filename on final render was completely broken.
Two severe OpenCL bugs. Random seeds were biased and fusing was being
reset too often leading to results that differ from the CPU.
Subtle, but sometimes severe bug in the setup of the xaos weights.
Use properly defined epsilon by getting the value from
std::numeric_limits, rather than hard coding 1e-6 or 1e-10.
Omit incorrect usage of epsilon everywhere. It should not be
automatically added to denominators. Rather, it should only be used if
the denominator is zero.
Force final render progress bars to 100 on completion. Sometimes they
didn't seem to make it there.
Make variation name and params comparisons be case insensitive.
--Code Changes
Make ForEach and FindIf wrappers around std::for_each and std::find_if.
2014-07-19 02:33:18 -04:00
" real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX; \n "
" return -1.0 + (f * 2.0); \n "
2014-07-08 03:11:14 -04:00
" } \n "
2014-07-26 20:26:15 -04:00
" \n "
" inline real_t MwcNext0505(uint2* s) \n "
" { \n "
" real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX; \n "
" return -0.5 + f; \n "
" } \n "
2014-07-08 03:11:14 -04:00
" \n " ;
/// <summary>
/// OpenCL equivalent Renderer::AddToAccum().
/// </summary>
static const char * AddToAccumWithCheckFunctionString =
" inline bool AccumCheck(int superRasW, int superRasH, int i, int ii, int j, int jj) \n "
" { \n "
" return (j + jj >= 0 && j + jj < superRasH && i + ii >= 0 && i + ii < superRasW); \n "
" } \n "
" \n " ;
/// <summary>
/// OpenCL equivalent various CarToRas member functions.
/// </summary>
static const char * CarToRasFunctionString =
2014-12-06 00:05:09 -05:00
" inline void CarToRasConvertPointToSingle(__constant CarToRasCL* carToRas, Point* point, uint* singleBufferIndex) \n "
2014-07-08 03:11:14 -04:00
" { \n "
2014-12-06 00:05:09 -05:00
" *singleBufferIndex = (uint)(carToRas->m_PixPerImageUnitW * point->m_X - carToRas->m_RasLlX) + (carToRas->m_RasWidth * (uint)(carToRas->m_PixPerImageUnitH * point->m_Y - carToRas->m_RasLlY)); \n "
2014-07-08 03:11:14 -04:00
" } \n "
" \n "
" inline bool CarToRasInBounds(__constant CarToRasCL* carToRas, Point* point) \n "
" { \n "
" return point->m_X >= carToRas->m_CarLlX && \n "
" point->m_X < carToRas->m_CarUrX && \n "
" point->m_Y < carToRas->m_CarUrY && \n "
" point->m_Y >= carToRas->m_CarLlY; \n "
" } \n "
" \n " ;
2015-08-12 21:51:07 -04:00
static string AtomicString ( )
2014-07-08 03:11:14 -04:00
{
ostringstream os ;
2015-08-12 21:51:07 -04:00
os < <
2015-08-10 23:10:23 -04:00
" void AtomicAdd(volatile __global real_bucket_t* source, const real_bucket_t operand) \n "
2014-07-08 03:11:14 -04:00
" { \n "
" union \n "
" { \n "
" atomi intVal; \n "
2015-08-10 23:10:23 -04:00
" real_bucket_t realVal; \n "
2014-07-08 03:11:14 -04:00
" } newVal; \n "
" \n "
" union \n "
" { \n "
" atomi intVal; \n "
2015-08-10 23:10:23 -04:00
" real_bucket_t realVal; \n "
2014-07-08 03:11:14 -04:00
" } prevVal; \n "
" \n "
" do \n "
" { \n "
" prevVal.realVal = *source; \n "
" newVal.realVal = prevVal.realVal + operand; \n "
" } while (atomic_cmpxchg((volatile __global atomi*)source, prevVal.intVal, newVal.intVal) != prevVal.intVal); \n "
" } \n " ;
return os . str ( ) ;
}
}