2014-07-08 03:11:14 -04:00
# pragma once
# include "EmberCLPch.h"
# include "EmberCLStructs.h"
/// <summary>
/// OpenCL global function strings.
/// </summary>
namespace EmberCLns
{
/// <summary>
/// OpenCL equivalent of Palette::RgbToHsv().
/// </summary>
2017-07-05 02:08:06 -04:00
static const char * RgbToHsvFunctionString =
2014-07-08 03:11:14 -04:00
//rgb 0 - 1,
//h 0 - 6, s 0 - 1, v 0 - 1
2015-08-10 23:10:23 -04:00
" static inline void RgbToHsv(real4_bucket* rgb, real4_bucket* hsv) \n "
2014-07-08 03:11:14 -04:00
" { \n "
2015-08-10 23:10:23 -04:00
" real_bucket_t max, min, del, rc, gc, bc; \n "
2014-07-08 03:11:14 -04:00
" \n "
//Compute maximum of r, g, b.
" if ((*rgb).x >= (*rgb).y) \n "
" { \n "
" if ((*rgb).x >= (*rgb).z) \n "
" max = (*rgb).x; \n "
" else \n "
" max = (*rgb).z; \n "
" } \n "
" else \n "
" { \n "
" if ((*rgb).y >= (*rgb).z) \n "
" max = (*rgb).y; \n "
" else \n "
" max = (*rgb).z; \n "
" } \n "
" \n "
//Compute minimum of r, g, b.
" if ((*rgb).x <= (*rgb).y) \n "
" { \n "
" if ((*rgb).x <= (*rgb).z) \n "
" min = (*rgb).x; \n "
" else \n "
" min = (*rgb).z; \n "
" } \n "
" else \n "
" { \n "
" if ((*rgb).y <= (*rgb).z) \n "
" min = (*rgb).y; \n "
" else \n "
" min = (*rgb).z; \n "
" } \n "
" \n "
" del = max - min; \n "
" (*hsv).z = max; \n "
" \n "
" if (max != 0) \n "
" (*hsv).y = del / max; \n "
" else \n "
" (*hsv).y = 0; \n "
" \n "
" (*hsv).x = 0; \n "
" if ((*hsv).y != 0) \n "
" { \n "
" rc = (max - (*rgb).x) / del; \n "
" gc = (max - (*rgb).y) / del; \n "
" bc = (max - (*rgb).z) / del; \n "
" \n "
" if ((*rgb).x == max) \n "
" (*hsv).x = bc - gc; \n "
" else if ((*rgb).y == max) \n "
" (*hsv).x = 2 + rc - bc; \n "
" else if ((*rgb).z == max) \n "
" (*hsv).x = 4 + gc - rc; \n "
" \n "
" if ((*hsv).x < 0) \n "
" (*hsv).x += 6; \n "
" } \n "
" } \n "
" \n " ;
/// <summary>
/// OpenCL equivalent of Palette::HsvToRgb().
/// </summary>
2017-07-05 02:08:06 -04:00
static const char * HsvToRgbFunctionString =
2014-07-08 03:11:14 -04:00
//h 0 - 6, s 0 - 1, v 0 - 1
2017-07-05 02:08:06 -04:00
//rgb 0 - 1
2015-08-10 23:10:23 -04:00
" static inline void HsvToRgb(real4_bucket* hsv, real4_bucket* rgb) \n "
2014-07-08 03:11:14 -04:00
" { \n "
" int j; \n "
2015-08-10 23:10:23 -04:00
" real_bucket_t f, p, q, t; \n "
2014-07-08 03:11:14 -04:00
" \n "
" while ((*hsv).x >= 6) \n "
" (*hsv).x = (*hsv).x - 6; \n "
" \n "
" while ((*hsv).x < 0) \n "
" (*hsv).x = (*hsv).x + 6; \n "
" \n "
" j = (int)floor((*hsv).x); \n "
" f = (*hsv).x - j; \n "
" p = (*hsv).z * (1 - (*hsv).y); \n "
" q = (*hsv).z * (1 - ((*hsv).y * f)); \n "
" t = (*hsv).z * (1 - ((*hsv).y * (1 - f))); \n "
" \n "
" switch (j) \n "
" { \n "
" case 0: (*rgb).x = (*hsv).z; (*rgb).y = t; (*rgb).z = p; break; \n "
" case 1: (*rgb).x = q; (*rgb).y = (*hsv).z; (*rgb).z = p; break; \n "
" case 2: (*rgb).x = p; (*rgb).y = (*hsv).z; (*rgb).z = t; break; \n "
" case 3: (*rgb).x = p; (*rgb).y = q; (*rgb).z = (*hsv).z; break; \n "
" case 4: (*rgb).x = t; (*rgb).y = p; (*rgb).z = (*hsv).z; break; \n "
" case 5: (*rgb).x = (*hsv).z; (*rgb).y = p; (*rgb).z = q; break; \n "
" default: (*rgb).x = (*hsv).z; (*rgb).y = t; (*rgb).z = p; break; \n "
" } \n "
" } \n "
" \n " ;
/// <summary>
/// OpenCL equivalent of Palette::CalcAlpha().
/// </summary>
2017-07-05 02:08:06 -04:00
static const char * CalcAlphaFunctionString =
2015-08-10 23:10:23 -04:00
" static inline real_t CalcAlpha(real_bucket_t density, real_bucket_t gamma, real_bucket_t linrange) \n " //Not the slightest clue what this is doing.//DOC
2014-07-08 03:11:14 -04:00
" { \n "
2015-08-10 23:10:23 -04:00
" real_bucket_t frac, alpha, funcval = pow(linrange, gamma); \n "
2017-07-05 02:08:06 -04:00
" \n "
2014-07-08 03:11:14 -04:00
" if (density > 0) \n "
" { \n "
" if (density < linrange) \n "
" { \n "
" frac = density / linrange; \n "
" alpha = (1.0 - frac) * density * (funcval / linrange) + frac * pow(density, gamma); \n "
" } \n "
" else \n "
" alpha = pow(density, gamma); \n "
" } \n "
" else \n "
" alpha = 0; \n "
" \n "
" return alpha; \n "
" } \n "
" \n " ;
2015-03-21 18:27:37 -04:00
/// <summary>
/// OpenCL equivalent of Renderer::CurveAdjust().
/// Only use float here instead of real_t because the output will be passed to write_imagef()
/// during final accumulation, which only takes floats.
/// </summary>
static const char * CurveAdjustFunctionString =
2017-07-22 16:43:35 -04:00
" static inline void CurveAdjust(__global real4reals_bucket* csa, float* a, uint index) \n "
2017-07-05 02:08:06 -04:00
" { \n "
2017-07-22 16:43:35 -04:00
" uint tempIndex = (uint)clamp(*a * CURVES_LENGTH_M1, 0.0f, CURVES_LENGTH_M1); \n "
" uint tempIndex2 = (uint)clamp(csa[tempIndex].m_Real4.x * CURVES_LENGTH_M1, 0.0f, CURVES_LENGTH_M1); \n "
2017-07-05 02:08:06 -04:00
" \n "
" *a = (float)csa[tempIndex2].m_Reals[index]; \n "
2017-07-22 16:43:35 -04:00
" } \n "
" \n " ;
2015-03-21 18:27:37 -04:00
2014-07-08 03:11:14 -04:00
/// <summary>
/// Use MWC 64 from David Thomas at the Imperial College of London for
/// random numbers in OpenCL, instead of ISAAC which was used
/// for CPU rendering.
/// </summary>
static const char * RandFunctionString =
" enum { MWC64X_A = 4294883355u }; \n \n "
" inline uint MwcNext(uint2* s) \n "
" { \n "
" uint res = (*s).x ^ (*s).y; \n " //Calculate the result.
" uint hi = mul_hi((*s).x, MWC64X_A); \n " //Step the RNG.
" (*s).x = (*s).x * MWC64X_A + (*s).y; \n " //Pack the state back up.
" (*s).y = hi + ((*s).x < (*s).y); \n "
" return res; \n " //Return the next result.
" } \n "
" \n "
" inline uint MwcNextRange(uint2* s, uint val) \n "
" { \n "
" return (val == 0) ? MwcNext(s) : (MwcNext(s) % val); \n "
" } \n "
" \n "
" inline real_t MwcNext01(uint2* s) \n "
" { \n "
" return MwcNext(s) * (1.0 / 4294967296.0); \n "
" } \n "
" \n "
" inline real_t MwcNextNeg1Pos1(uint2* s) \n "
" { \n "
Numerous fixes
0.4.0.5 Beta 07/18/2014
--User Changes
Allow for vibrancy values > 1.
Add flatten and unflatten menu items.
Automatically flatten like Apophysis does.
Add plugin and new_linear tags to Xml to be compatible with Apophysis.
--Bug Fixes
Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho,
julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x,
sinusoidal, spherical, spherical3d, stripes.
Unique filename on final render was completely broken.
Two severe OpenCL bugs. Random seeds were biased and fusing was being
reset too often leading to results that differ from the CPU.
Subtle, but sometimes severe bug in the setup of the xaos weights.
Use properly defined epsilon by getting the value from
std::numeric_limits, rather than hard coding 1e-6 or 1e-10.
Omit incorrect usage of epsilon everywhere. It should not be
automatically added to denominators. Rather, it should only be used if
the denominator is zero.
Force final render progress bars to 100 on completion. Sometimes they
didn't seem to make it there.
Make variation name and params comparisons be case insensitive.
--Code Changes
Make ForEach and FindIf wrappers around std::for_each and std::find_if.
2014-07-19 02:33:18 -04:00
" real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX; \n "
" return -1.0 + (f * 2.0); \n "
2014-07-08 03:11:14 -04:00
" } \n "
2014-07-26 20:26:15 -04:00
" \n "
" inline real_t MwcNext0505(uint2* s) \n "
" { \n "
" real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX; \n "
" return -0.5 + f; \n "
" } \n "
2014-07-08 03:11:14 -04:00
" \n " ;
/// <summary>
/// OpenCL equivalent Renderer::AddToAccum().
/// </summary>
2017-07-05 02:08:06 -04:00
static const char * AddToAccumWithCheckFunctionString =
2014-07-08 03:11:14 -04:00
" inline bool AccumCheck(int superRasW, int superRasH, int i, int ii, int j, int jj) \n "
" { \n "
" return (j + jj >= 0 && j + jj < superRasH && i + ii >= 0 && i + ii < superRasW); \n "
" } \n "
" \n " ;
/// <summary>
/// OpenCL equivalent various CarToRas member functions.
/// </summary>
2017-07-05 02:08:06 -04:00
static const char * CarToRasFunctionString =
2014-12-06 00:05:09 -05:00
" inline void CarToRasConvertPointToSingle(__constant CarToRasCL* carToRas, Point* point, uint* singleBufferIndex) \n "
2014-07-08 03:11:14 -04:00
" { \n "
2014-12-06 00:05:09 -05:00
" *singleBufferIndex = (uint)(carToRas->m_PixPerImageUnitW * point->m_X - carToRas->m_RasLlX) + (carToRas->m_RasWidth * (uint)(carToRas->m_PixPerImageUnitH * point->m_Y - carToRas->m_RasLlY)); \n "
2014-07-08 03:11:14 -04:00
" } \n "
" \n "
" inline bool CarToRasInBounds(__constant CarToRasCL* carToRas, Point* point) \n "
" { \n "
" return point->m_X >= carToRas->m_CarLlX && \n "
" point->m_X < carToRas->m_CarUrX && \n "
" point->m_Y < carToRas->m_CarUrY && \n "
" point->m_Y >= carToRas->m_CarLlY; \n "
" } \n "
" \n " ;
2015-08-12 21:51:07 -04:00
static string AtomicString ( )
2014-07-08 03:11:14 -04:00
{
ostringstream os ;
2015-08-12 21:51:07 -04:00
os < <
2017-07-05 02:08:06 -04:00
" void AtomicAdd(volatile __global real_bucket_t* source, const real_bucket_t operand) \n "
" { \n "
" union \n "
" { \n "
" atomi intVal; \n "
" real_bucket_t realVal; \n "
" } newVal; \n "
" \n "
" union \n "
" { \n "
" atomi intVal; \n "
" real_bucket_t realVal; \n "
" } prevVal; \n "
" \n "
" do \n "
" { \n "
" prevVal.realVal = *source; \n "
" newVal.realVal = prevVal.realVal + operand; \n "
" } while (atomic_cmpxchg((volatile __global atomi*)source, prevVal.intVal, newVal.intVal) != prevVal.intVal); \n "
" } \n " ;
2014-07-08 03:11:14 -04:00
return os . str ( ) ;
}
}