--User changes

-Add new variations: bubbleT3D, crob, hexaplay3D, hexcrop, hexes, hexnix3D, loonie2, loonie3, nBlur, octapol and synth.
 -Allow for pre/post versions of dc_bubble, dc_cylinder and dc_linear whereas before they were omitted.
 -When saving a file with multiple embers in it, detect if time values are all the same and if so, start them at zero and increment by 1 for each ember.
 -Allow for numerous quality increases to be coalesced into one. It will pick up at the end of the current render.
 -Show selection highlight on variations tree in response to mouse hover. This makes it easier to see for which variation or param the current mouse wheel action will apply.
 -Make default temporal samples be 100, whereas before it was 1000 which was overkill.
 -Require the shift key to be held with delete for deleting an ember to prevent it from triggering when the user enters delete in the edit box.
  -This wasn't otherwise fixable without writing a lot more code.

--Bug fixes
 -EmberGenome was crashing when generating a sequence from a source file with more than 2 embers in it.
 -EmberGenome was improperly handling the first frame of a merge after the last frame of the loop.
  -These bugs were due to a previous commit. Revert parts of that commit.
 -Prevent a zoom value of less than 0 when reading from xml.
 -Slight optimization of the crescents, and mask variations, if the compiler wasn't doing it already.
 -Unique file naming was broken because it was looking for _# and the default names ended with -#.
 -Disallow renaming of an ember in the library tree to an empty string.
 -Severe bug that prevented some variations from being read correctly from params generated outside this program.
 -Severe OpenCL randomization bug. The first x coordinates of the first points in the first kernel call of the first ember of a render since the OpenCL renderer object was created were not random and were mostly -1.
 -Severe bug when populating xform selection distributions that could sometimes cause a crash due to roundoff error. Fix by using double.
 -Limit the max number of variations in a random ember to MAX_CL_VARS, which is 8. This ensures they'll look the same on CPU and GPU.
 -Prevent user from saving stylesheet to default.qss, it's a special reserved filename.

--Code changes
 -Generalize using the running sum output point inside of a variation for all cases: pre, reg and post.
 -Allow for array variables in variations where the address of each element is stored in m_Params.
 -Qualify all math functions with std::
 -No longer use our own Clamp() in OpenCL, instead use the standard clamp().
 -Redesign how functions are used in the variations OpenCL code.
 -Add tests to EmberTester to verify some of the new functionality.
 -Place more const and override qualifiers on functions where appropriate.
 -Add a global rand with a lock to be used very sparingly.
 -Use a map instead of a vector for bad param names in Xml parsing.
 -Prefix affine interpolation mode defines with "AFFINE_" to make their purpose more clear.
 -Allow for variations that change state during iteration by sending a separate copy of the ember to each rendering thread.
 -Implement this same functionality with a local struct in OpenCL. It's members are the total of all variables that need to change state within an ember.
 -Add Contains() function to Utils.h.
 -EmberRender: print names of kernels being printed with --dump_kernel option.
 -Clean up EmberTester to handle some of the recent changes.
 -Fix various casts.
 -Replace % 2 with & 1, even though the compiler was likely doing this already.
 -Add new file Variations06.h to accommodate new variations.
 -General cleanup.
This commit is contained in:
mfeemster
2015-11-22 14:15:07 -08:00
parent 04e72c27de
commit 330074cfb2
62 changed files with 8176 additions and 1877 deletions

View File

@ -787,7 +787,8 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernelNoLocalCache(size_t ss)
"\n";
if (doScf)
os << " filterSelect *= scfact;\n";
os <<
" filterSelect *= scfact;\n";
}
else
{
@ -830,7 +831,7 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernelNoLocalCache(size_t ss)
" }\n"
" }\n"//bucket.w != 0.
" }\n"//In bounds.
"\n"
//"\n"
//" barrier(CLK_GLOBAL_MEM_FENCE);\n"//Just to be safe.
"}\n";

View File

@ -149,8 +149,8 @@ static const char* CalcAlphaFunctionString =
static const char* CurveAdjustFunctionString =
"static inline void CurveAdjust(__constant real4reals_bucket* csa, float* a, uint index)\n"
"{\n"
" uint tempIndex = (uint)Clamp(*a, 0.0, (float)COLORMAP_LENGTH_MINUS_1);\n"
" uint tempIndex2 = (uint)Clamp(csa[tempIndex].m_Real4.x, 0.0, (real_t)COLORMAP_LENGTH_MINUS_1);\n"
" uint tempIndex = (uint)clamp(*a, (float)0.0, (float)COLORMAP_LENGTH_MINUS_1);\n"
" uint tempIndex2 = (uint)clamp((float)csa[tempIndex].m_Real4.x, (float)0.0, (float)COLORMAP_LENGTH_MINUS_1);\n"
"\n"
" *a = (float)round(csa[tempIndex2].m_Reals[index]);\n"
"}\n";
@ -194,135 +194,6 @@ static const char* RandFunctionString =
"}\n"
"\n";
/// <summary>
/// OpenCL equivalent of the global ClampRef().
/// </summary>
static const char* ClampRealFunctionString =
"inline real_t Clamp(real_t val, real_t min, real_t max)\n"
"{\n"
" if (val < min)\n"
" return min;\n"
" else if (val > max)\n"
" return max;\n"
" else\n"
" return val;\n"
"}\n"
"\n"
"inline void ClampRef(real_t* val, real_t min, real_t max)\n"
"{\n"
" if (*val < min)\n"
" *val = min;\n"
" else if (*val > max)\n"
" *val = max;\n"
"}\n"
"\n"
"inline real_t ClampGte(real_t val, real_t gte)\n"
"{\n"
" return (val < gte) ? gte : val;\n"
"}\n"
"\n";
/// <summary>
/// OpenCL equivalent of the global LRint().
/// </summary>
static const char* InlineMathFunctionsString =
"inline real_t LRint(real_t x)\n"
"{\n"
" intPrec temp = (x >= 0.0 ? (intPrec)(x + 0.5) : (intPrec)(x - 0.5));\n"
" return (real_t)temp;\n"
"}\n"
"\n"
"inline real_t Round(real_t r)\n"
"{\n"
" return (r > 0.0) ? floor(r + 0.5) : ceil(r - 0.5);\n"
"}\n"
"\n"
"inline real_t Sign(real_t v)\n"
"{\n"
" return (v < 0.0) ? -1 : (v > 0.0) ? 1 : 0.0;\n"
"}\n"
"\n"
"inline real_t SignNz(real_t v)\n"
"{\n"
" return (v < 0.0) ? -1.0 : 1.0;\n"
"}\n"
"\n"
"inline real_t Sqr(real_t v)\n"
"{\n"
" return v * v;\n"
"}\n"
"\n"
"inline real_t SafeSqrt(real_t x)\n"
"{\n"
" if (x <= 0.0)\n"
" return 0.0;\n"
"\n"
" return sqrt(x);\n"
"}\n"
"\n"
"inline real_t Cube(real_t v)\n"
"{\n"
" return v * v * v;\n"
"}\n"
"\n"
"inline real_t Hypot(real_t x, real_t y)\n"
"{\n"
" return sqrt(SQR(x) + SQR(y));\n"
"}\n"
"\n"
"inline real_t Spread(real_t x, real_t y)\n"
"{\n"
" return Hypot(x, y) * ((x) > 0.0 ? 1.0 : -1.0);\n"
"}\n"
"\n"
"inline real_t Powq4(real_t x, real_t y)\n"
"{\n"
" return pow(fabs(x), y) * SignNz(x);\n"
"}\n"
"\n"
"inline real_t Powq4c(real_t x, real_t y)\n"
"{\n"
" return y == 1.0 ? x : Powq4(x, y);\n"
"}\n"
"\n"
"inline real_t Zeps(real_t x)\n"
"{\n"
" return x == 0.0 ? EPS : x;\n"
"}\n"
"\n"
"inline real_t Lerp(real_t a, real_t b, real_t p)\n"
"{\n"
" return a + (b - a) * p;\n"
"}\n"
"\n"
"inline real_t Fabsmod(real_t v)\n"
"{\n"
" real_t dummy;\n"
"\n"
" return modf(v, &dummy);\n"
"}\n"
"\n"
"inline real_t Fosc(real_t p, real_t amp, real_t ph)\n"
"{\n"
" return 0.5 - cos(p * amp + ph) * 0.5;\n"
"}\n"
"\n"
"inline real_t Foscn(real_t p, real_t ph)\n"
"{\n"
" return 0.5 - cos(p + ph) * 0.5;\n"
"}\n"
"\n"
"inline real_t LogScale(real_t x)\n"
"{\n"
" return x == 0.0 ? 0.0 : log((fabs(x) + 1) * M_E) * SignNz(x) / M_E;\n"
"}\n"
"\n"
"inline real_t LogMap(real_t x)\n"
"{\n"
" return x == 0.0 ? 0.0 : (M_E + log(x * M_E)) * 0.25 * SignNz(x);\n"
"}\n"
"\n";
/// <summary>
/// OpenCL equivalent Renderer::AddToAccum().
/// </summary>

View File

@ -44,9 +44,11 @@
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <set>
#include <string>
#include <iterator>
#include <time.h>
#include <unordered_map>
#ifdef _WIN32
#if defined(BUILDING_EMBERCL)

View File

@ -13,15 +13,6 @@
namespace EmberCLns
{
//These two must always match.
#ifdef WIN32
#define ALIGN __declspec(align(16))
#else
#define ALIGN __attribute__ ((aligned (16)))
#endif
#define ALIGN_CL "((aligned (16)))"//The extra parens are necessary.
/// <summary>
/// Various constants needed for rendering.
/// </summary>
@ -32,20 +23,23 @@ static string ConstantDefinesString(bool doublePrecision)
if (doublePrecision)
{
os << "#if defined(cl_amd_fp64)\n"//AMD extension available?
<< " #pragma OPENCL EXTENSION cl_amd_fp64 : enable\n"
<< "#endif\n"
<< "#if defined(cl_khr_fp64)\n"//Khronos extension available?
<< " #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
<< "#endif\n"
<< "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"//Only supported on nVidia.
<< "typedef long intPrec;\n"
<< "typedef uint atomi;\n"//Same size as real_bucket_t, always 4 bytes.
<< "typedef double real_t;\n"
<< "typedef float real_bucket_t;\n"//Assume buckets are always float, even though iter calcs are in double.
<< "typedef double4 real4;\n"
<< "typedef float4 real4_bucket;\n"//And here too.
<< "#define EPS (DBL_EPSILON)\n"
;
" #pragma OPENCL EXTENSION cl_amd_fp64 : enable\n"
"#endif\n"
"#if defined(cl_khr_fp64)\n"//Khronos extension available?
" #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"#endif\n"
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"//Only supported on nVidia.
"typedef long intPrec;\n"
"typedef uint atomi;\n"//Same size as real_bucket_t, always 4 bytes.
"typedef double real_t;\n"
"typedef float real_bucket_t;\n"//Assume buckets are always float, even though iter calcs are in double.
"typedef double2 real2;\n"
"typedef double4 real4;\n"
"typedef float4 real4_bucket;\n"//And here too.
"#define EPS (DBL_EPSILON)\n"
"#define TLOW (DBL_MIN)\n"
"#define TMAX (DBL_MAX)\n"
;
}
else
{
@ -53,9 +47,12 @@ static string ConstantDefinesString(bool doublePrecision)
"typedef uint atomi;\n"
"typedef float real_t;\n"
"typedef float real_bucket_t;\n"
"typedef float2 real2;\n"
"typedef float4 real4;\n"
"typedef float4 real4_bucket;\n"
"#define EPS (FLT_EPSILON)\n"
"#define TLOW (FLT_MIN)\n"
"#define TMAX (FLT_MAX)\n"
;
}

View File

@ -186,7 +186,6 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli
os <<
ConstantDefinesString(m_DoublePrecision) <<
ClampRealFunctionString <<
UnionCLStructString <<
RgbToHsvFunctionString <<
HsvToRgbFunctionString <<
@ -484,7 +483,6 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString(bool alp
os <<
ConstantDefinesString(m_DoublePrecision) <<
ClampRealFunctionString <<
UnionCLStructString <<
RgbToHsvFunctionString <<
HsvToRgbFunctionString <<

View File

@ -0,0 +1,151 @@
#include "EmberCLPch.h"
#include "FunctionMapper.h"
namespace EmberCLns
{
std::unordered_map<string, string> FunctionMapper::m_GlobalMap;
FunctionMapper::FunctionMapper()
{
if (m_GlobalMap.empty())
{
m_GlobalMap["LRint"] =
"inline real_t LRint(real_t x)\n"
"{\n"
" intPrec temp = (x >= 0.0 ? (intPrec)(x + 0.5) : (intPrec)(x - 0.5));\n"
" return (real_t)temp;\n"
"}\n";
m_GlobalMap["Round"] =
"inline real_t Round(real_t r)\n"
"{\n"
" return (r > 0.0) ? floor(r + 0.5) : ceil(r - 0.5);\n"
"}\n";
m_GlobalMap["Sign"] =
"inline real_t Sign(real_t v)\n"
"{\n"
" return (v < 0.0) ? -1 : (v > 0.0) ? 1 : 0.0;\n"
"}\n";
m_GlobalMap["SignNz"] =
"inline real_t SignNz(real_t v)\n"
"{\n"
" return (v < 0.0) ? -1.0 : 1.0;\n"
"}\n";
m_GlobalMap["Sqr"] =
"inline real_t Sqr(real_t v)\n"
"{\n"
" return v * v;\n"
"}\n";
m_GlobalMap["SafeSqrt"] =
"inline real_t SafeSqrt(real_t x)\n"
"{\n"
" if (x <= 0.0)\n"
" return 0.0;\n"
"\n"
" return sqrt(x);\n"
"}\n";
m_GlobalMap["Cube"] =
"inline real_t Cube(real_t v)\n"
"{\n"
" return v * v * v;\n"
"}\n";
m_GlobalMap["Hypot"] =
"inline real_t Hypot(real_t x, real_t y)\n"
"{\n"
" return sqrt(SQR(x) + SQR(y));\n"
"}\n";
m_GlobalMap["Spread"] =
"inline real_t Spread(real_t x, real_t y)\n"
"{\n"
" return Hypot(x, y) * ((x) > 0.0 ? 1.0 : -1.0);\n"
"}\n";
m_GlobalMap["Powq4"] =
"inline real_t Powq4(real_t x, real_t y)\n"
"{\n"
" return pow(fabs(x), y) * SignNz(x);\n"
"}\n";
m_GlobalMap["Powq4c"] =
"inline real_t Powq4c(real_t x, real_t y)\n"
"{\n"
" return y == 1.0 ? x : Powq4(x, y);\n"
"}\n";
m_GlobalMap["Zeps"] =
"inline real_t Zeps(real_t x)\n"
"{\n"
" return x == 0.0 ? EPS : x;\n"
"}\n";
m_GlobalMap["Lerp"] =
"inline real_t Lerp(real_t a, real_t b, real_t p)\n"
"{\n"
" return a + (b - a) * p;\n"
"}\n";
m_GlobalMap["Fabsmod"] =
"inline real_t Fabsmod(real_t v)\n"
"{\n"
" real_t dummy;\n"
"\n"
" return modf(v, &dummy);\n"
"}\n";
m_GlobalMap["Fosc"] =
"inline real_t Fosc(real_t p, real_t amp, real_t ph)\n"
"{\n"
" return 0.5 - cos(p * amp + ph) * 0.5;\n"
"}\n";
m_GlobalMap["Foscn"] =
"inline real_t Foscn(real_t p, real_t ph)\n"
"{\n"
" return 0.5 - cos(p + ph) * 0.5;\n"
"}\n";
m_GlobalMap["LogScale"] =
"inline real_t LogScale(real_t x)\n"
"{\n"
" return x == 0.0 ? 0.0 : log((fabs(x) + 1) * M_E) * SignNz(x) / M_E;\n"
"}\n";
m_GlobalMap["LogMap"] =
"inline real_t LogMap(real_t x)\n"
"{\n"
" return x == 0.0 ? 0.0 : (M_E + log(x * M_E)) * 0.25 * SignNz(x);\n"
"}\n";
m_GlobalMap["ClampGte"] =
"inline real_t ClampGte(real_t val, real_t gte)\n"
"{\n"
" return (val < gte) ? gte : val;\n"
"}\n";
m_GlobalMap["Swap"] =
"inline void Swap(real_t* val1, real_t* val2)\n"
"{\n"
" real_t tmp = *val1;\n"
" *val1 = *val2;\n"
" *val2 = tmp;\n"
"}\n";
}
}
const string* FunctionMapper::GetGlobalFunc(const string& func)
{
auto& text = m_GlobalMap.find(func);
if (text != m_GlobalMap.end())
return &text->second;
else
return nullptr;
}
}

View File

@ -0,0 +1,21 @@
#pragma once
#include "EmberCLPch.h"
namespace EmberCLns
{
/// <summary>
/// Functionality to map OpenCL function names to their full function body program strings.
/// This is used to ensure only the functions that are needed by a program are included once
/// in the program string.
/// </summary>
class EMBERCL_API FunctionMapper
{
public:
FunctionMapper();
static const string* GetGlobalFunc(const string& func);
private:
static std::unordered_map<string, string> m_GlobalMap;
};
}

View File

@ -2,7 +2,6 @@
#include "IterOpenCLKernelCreator.h"
//#define STRAIGHT_RAND 1
#define USE_CASE 1
namespace EmberCLns
{
@ -39,16 +38,20 @@ template <typename T> const string& IterOpenCLKernelCreator<T>::IterEntryPoint()
/// <param name="doAccum">Debugging parameter to include or omit accumulating to the histogram. Default: true.</param>
/// <returns>The kernel string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, string& parVarDefines, bool lockAccum, bool doAccum)
string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember, string& parVarDefines, bool lockAccum, bool doAccum)
{
bool doublePrecision = typeid(T) == typeid(double);
size_t i, v, varIndex, varCount, totalXformCount = ember.TotalXformCount();
ostringstream kernelIterBody, xformFuncs, os;
vector<Variation<T>*> variations;
xformFuncs << "\n" << parVarDefines << endl;
xformFuncs << VariationStateString(ember);
xformFuncs << parVarDefines << endl;
ember.GetPresentVariations(variations);
for (auto var : variations) if (var) xformFuncs << var->OpenCLFuncsString();
for (auto var : variations)
if (var)
xformFuncs << var->OpenCLFuncsString();
for (i = 0; i < totalXformCount; i++)
{
@ -62,7 +65,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
v = varIndex = varCount = 0;
xformFuncs <<
"void Xform" << i << "(__constant XformCL* xform, __constant real_t* parVars, Point* inPoint, Point* outPoint, uint2* mwc)\n" <<
"void Xform" << i << "(__constant XformCL* xform, __constant real_t* parVars, Point* inPoint, Point* outPoint, uint2* mwc, VariationState* varState)\n" <<
"{\n"
" real_t transX, transY, transZ;\n"
" real4 vIn, vOut = 0.0;\n";
@ -215,8 +218,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
os <<
ConstantDefinesString(doublePrecision) <<
InlineMathFunctionsString <<
ClampRealFunctionString <<
GlobalFunctionsString(ember) <<
RandFunctionString <<
PointCLStructString <<
XformCLStructString <<
@ -237,12 +239,13 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
" __constant EmberCL* ember,\n"
" __constant XformCL* xforms,\n"
" __constant real_t* parVars,\n"
" __global uchar* xformDistributions,\n"//Using uchar is quicker than uint. Can't be constant because the size can be too large to fit when using xaos.//FINALOPT
" __global uchar* xformDistributions,\n"//Using uchar is quicker than uint. Can't be constant because the size can be too large to fit when using xaos.
" __constant CarToRasCL* carToRas,\n"
" __global real4reals_bucket* histogram,\n"
" uint histSize,\n"
" __read_only image2d_t palette,\n"
" __global Point* points\n"
//" uint startRender\n"
"\t)\n"
"{\n"
" bool fuse, ok;\n"
@ -261,8 +264,9 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
" CLK_ADDRESS_CLAMP_TO_EDGE |\n"//Clamp to edge
" CLK_FILTER_NEAREST;\n"//Don't interpolate
" uint threadXY = (THREAD_ID_X + THREAD_ID_Y);\n"
" uint threadXDivRows = (THREAD_ID_X / (NTHREADS / THREADS_PER_WARP));\n"
" uint threadXDivRows = (THREAD_ID_X / NWARPS);\n"
" uint threadsMinus1 = NTHREADS - 1;\n"
" VariationState varState;\n"
;
os <<
@ -278,6 +282,9 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
" {\n"
" fuse = true;\n"
" itersToDo = fuseCount;\n"
//Calling MwcNextNeg1Pos1() twice is deliberate. The first call to mwc is not very random since it just does
//an xor. So it must be called twice to get it in a good random state.
" firstPoint.m_X = MwcNextNeg1Pos1(&mwc);\n"
" firstPoint.m_X = MwcNextNeg1Pos1(&mwc);\n"
" firstPoint.m_Y = MwcNextNeg1Pos1(&mwc);\n"
" firstPoint.m_Z = 0.0;\n"
@ -290,7 +297,13 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
" itersToDo = iterCount;\n"
" firstPoint = points[pointsIndex];\n"
" }\n"
"\n";
"\n"
;
auto varStateString = VariationStateInitString(ember);
if (!varStateString.empty())
os << varStateString << "\n\n";
//This is done once initially here and then again after each swap-sync in the main loop.
//This along with the randomness that the point shuffle provides gives sufficient randomness
@ -336,7 +349,6 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
for (i = 0; i < ember.XformCount(); i++)
{
#ifdef USE_CASE
if (i == 0)
{
os <<
@ -347,7 +359,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
os <<
" case " << i << ":\n"
" {\n" <<
" Xform" << i << "(&(xforms[" << i << "]), parVars, &firstPoint, &secondPoint, &mwc);\n" <<
" Xform" << i << "(&(xforms[" << i << "]), parVars, &firstPoint, &secondPoint, &mwc, &varState);\n" <<
" break;\n"
" }\n";
@ -356,19 +368,6 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
os <<
" }\n";
}
#else
if (i == 0)
os <<
" if (secondPoint.m_LastXfUsed == " << i << ")\n";
else
os <<
" else if (secondPoint.m_LastXfUsed == " << i << ")\n";
os <<
" {\n" <<
" Xform" << i << "(&(xforms[" << i << "]), parVars, &firstPoint, &secondPoint, &mwc);\n" <<
" }\n";
#endif
}
os <<
@ -438,7 +437,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
" if ((xforms[" << finalIndex << "].m_Opacity == 1) || (MwcNext01(&mwc) < xforms[" << finalIndex << "].m_Opacity))\n"
" {\n"
" tempPoint.m_LastXfUsed = secondPoint.m_LastXfUsed;\n"
" Xform" << finalIndex << "(&(xforms[" << finalIndex << "]), parVars, &secondPoint, &tempPoint, &mwc);\n"
" Xform" << finalIndex << "(&(xforms[" << finalIndex << "]), parVars, &secondPoint, &tempPoint, &mwc, &varState);\n"
" secondPoint = tempPoint;\n"
" }\n"
"\n";
@ -543,6 +542,50 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
return os.str();
}
/// <summary>
/// Return a string containing all of the global functions needed by the passed in ember.
/// </summary>
/// <param name="ember">The ember to create the global function strings from</param>
/// <returns>String of all global function names and bodies</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::GlobalFunctionsString(const Ember<T>& ember)
{
size_t i, j, xformCount = ember.TotalXformCount();
vector<string> funcNames;//Can't use a set here because they sort and we must preserve the insertion order due to nested function calls.
ostringstream os;
static string zeps = "Zeps";
for (i = 0; i < xformCount; i++)
{
if (auto xform = ember.GetTotalXform(i))
{
size_t varCount = xform->TotalVariationCount();
if (xform->NeedPrecalcAngles())
if (!Contains(funcNames, zeps))
funcNames.push_back(zeps);
for (j = 0; j < varCount; j++)
{
if (auto var = xform->GetVariation(j))
{
auto names = var->OpenCLGlobalFuncNames();
for (auto& name : names)
if (!Contains(funcNames, name))
funcNames.push_back(name);
}
}
}
}
for (auto& funcName : funcNames)
if(auto text = m_FunctionMapper.GetGlobalFunc(funcName))
os << *text << endl;
return os.str();
}
/// <summary>
/// Create an OpenCL string of #defines and a corresponding host side vector for parametric variation values.
/// Parametric variations present a special problem in the iteration code.
@ -571,9 +614,9 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
/// #define CURL_C2_2 3
/// #define BLOB_LOW_3 4
/// #define BLOB_HIGH_3 5
/// #define BLOB_WAVES_ 6
/// #define BLOB_WAVES_3 6
///
/// The variations the use these #defines by first looking up the index of the
/// The variations use these #defines by first looking up the index of the
/// xform they belong to in the parent ember and generating the OpenCL string based on that
/// in their overridden OpenCLString() functions.
/// Template argument expected to be float or double.
@ -583,10 +626,9 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
/// <param name="doVals">True if the vector should be populated, else false. Default: true.</param>
/// <param name="doString">True if the string should be populated, else false. Default: true.</param>
template <typename T>
void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string, vector<T>>& params, bool doVals, bool doString)
void IterOpenCLKernelCreator<T>::ParVarIndexDefines(const Ember<T>& ember, pair<string, vector<T>>& params, bool doVals, bool doString)
{
size_t i, j, k, size = 0, xformCount = ember.TotalXformCount();
Xform<T>* xform;
ostringstream os;
if (doVals)
@ -594,23 +636,26 @@ void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string
for (i = 0; i < xformCount; i++)
{
if ((xform = ember.GetTotalXform(i)))
if (auto xform = ember.GetTotalXform(i))
{
size_t varCount = xform->TotalVariationCount();
for (j = 0; j < varCount; j++)
{
if (ParametricVariation<T>* parVar = dynamic_cast<ParametricVariation<T>*>(xform->GetVariation(j)))
if (auto parVar = dynamic_cast<ParametricVariation<T>*>(xform->GetVariation(j)))
{
for (k = 0; k < parVar->ParamCount(); k++)
{
if (doString)
os << "#define " << ToUpper(parVar->Params()[k].Name()) << "_" << i << " " << size << endl;//Uniquely identify this param in this variation in this xform.
if (!parVar->Params()[k].IsState())
{
if (doString)
os << "#define " << ToUpper(parVar->Params()[k].Name()) << "_" << i << " " << size << endl;//Uniquely identify this param in this variation in this xform.
if (doVals)
params.second.push_back(parVar->Params()[k].ParamVal());
if (doVals)
params.second.push_back(parVar->Params()[k].ParamVal());
size++;
size++;
}
}
}
}
@ -624,6 +669,69 @@ void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string
}
}
/// <summary>
/// Create the string needed for the struct whose values will change between each iteration.
/// This is only needed for variations whose state changes.
/// If none are present, the struct will be empty.
/// </summary>
/// <param name="ember">The ember to generate the variation state struct string for</param>
/// <returns>The variation state struct string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::VariationStateString(const Ember<T>& ember)
{
ostringstream os;
os << "typedef struct __attribute__ " ALIGN_CL " _VariationState\n{";
for (size_t i = 0; i < ember.TotalXformCount(); i++)
{
if (auto xform = ember.GetTotalXform(i))
{
for (size_t j = 0; j < xform->TotalVariationCount(); j++)
{
if (auto var = xform->GetVariation(j))
{
os << var->StateOpenCLString();
}
}
}
}
os << "\n} VariationState;\n\n";
return os.str();
}
/// <summary>
/// Create the string needed for the initial state of the struct whose values will change between each iteration.
/// This is only needed for variations whose state changes.
/// If none are present, the returned init string will be empty.
/// This will be called at the beginning of each kernel.
/// </summary>
/// <param name="ember">The ember to generate the variation state struct init string for</param>
/// <returns>The variation state struct init string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::VariationStateInitString(const Ember<T>& ember)
{
ostringstream os;
for (size_t i = 0; i < ember.TotalXformCount(); i++)
{
if (auto xform = ember.GetTotalXform(i))
{
for (size_t j = 0; j < xform->TotalVariationCount(); j++)
{
if (auto var = xform->GetVariation(j))
{
os << var->StateInitOpenCLString();
}
}
}
}
return os.str();
}
/// <summary>
/// Determine whether the two embers passed in differ enough
/// to require a rebuild of the iteration code.
@ -641,7 +749,7 @@ void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string
/// <param name="ember2">The second ember to compare</param>
/// <returns>True if a rebuild is required, else false</returns>
template <typename T>
bool IterOpenCLKernelCreator<T>::IsBuildRequired(Ember<T>& ember1, Ember<T>& ember2)
bool IterOpenCLKernelCreator<T>::IsBuildRequired(const Ember<T>& ember1, const Ember<T>& ember2)
{
size_t i, j, xformCount = ember1.TotalXformCount();
@ -688,7 +796,7 @@ bool IterOpenCLKernelCreator<T>::IsBuildRequired(Ember<T>& ember1, Ember<T>& emb
/// </summary>
/// <returns>The kernel string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::CreateZeroizeKernelString()
string IterOpenCLKernelCreator<T>::CreateZeroizeKernelString() const
{
ostringstream os;
@ -707,8 +815,16 @@ string IterOpenCLKernelCreator<T>::CreateZeroizeKernelString()
return os.str();
}
/// <summary>
/// Create the histogram summing kernel string.
/// This is used when running with multiple GPUs. It takes
/// two histograms present on a single device, source and dest,
/// and adds the values of source to dest.
/// It optionally sets all values of source to zero.
/// </summary>
/// <returns>The kernel string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::CreateSumHistKernelString()
string IterOpenCLKernelCreator<T>::CreateSumHistKernelString() const
{
ostringstream os;
@ -739,7 +855,7 @@ string IterOpenCLKernelCreator<T>::CreateSumHistKernelString()
/// <param name="ember">The ember to create the projection string for</param>
/// <returns>The kernel string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::CreateProjectionString(Ember<T>& ember)
string IterOpenCLKernelCreator<T>::CreateProjectionString(const Ember<T>& ember) const
{
size_t projBits = ember.ProjBits();
ostringstream os;

View File

@ -3,6 +3,7 @@
#include "EmberCLPch.h"
#include "EmberCLStructs.h"
#include "EmberCLFunctions.h"
#include "FunctionMapper.h"
/// <summary>
/// IterOpenCLKernelCreator class.
@ -29,20 +30,24 @@ public:
const string& SumHistKernel() const;
const string& SumHistEntryPoint() const;
const string& IterEntryPoint() const;
string CreateIterKernelString(Ember<T>& ember, string& parVarDefines, bool lockAccum = false, bool doAccum = true);
static void ParVarIndexDefines(Ember<T>& ember, pair<string, vector<T>>& params, bool doVals = true, bool doString = true);
static bool IsBuildRequired(Ember<T>& ember1, Ember<T>& ember2);
string CreateIterKernelString(const Ember<T>& ember, string& parVarDefines, bool lockAccum = false, bool doAccum = true);
string GlobalFunctionsString(const Ember<T>& ember);
static void ParVarIndexDefines(const Ember<T>& ember, pair<string, vector<T>>& params, bool doVals = true, bool doString = true);
static string VariationStateString(const Ember<T>& ember);
static string VariationStateInitString(const Ember<T>& ember);
static bool IsBuildRequired(const Ember<T>& ember1, const Ember<T>& ember2);
private:
string CreateZeroizeKernelString();
string CreateSumHistKernelString();
string CreateProjectionString(Ember<T>& ember);
string CreateZeroizeKernelString() const;
string CreateSumHistKernelString() const;
string CreateProjectionString(const Ember<T>& ember) const;
string m_IterEntryPoint;
string m_ZeroizeKernel;
string m_ZeroizeEntryPoint;
string m_SumHistKernel;
string m_SumHistEntryPoint;
FunctionMapper m_FunctionMapper;
};
#ifdef OPEN_CL_TEST_AREA

View File

@ -354,7 +354,7 @@ bool RendererCL<T, bucketT>::WritePoints(size_t device, vector<PointCL<T>>& vec)
#ifdef TEST_CL
template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::WriteRandomPoints()
bool RendererCL<T, bucketT>::WriteRandomPoints(size_t device)
{
size_t size = IterGridKernelCount();
vector<PointCL<T>> vec(size);
@ -368,7 +368,7 @@ bool RendererCL<T, bucketT>::WriteRandomPoints()
vec[i].m_LastXfUsed = 0;
}
return WritePoints(vec);
return WritePoints(device, vec);
}
#endif
@ -613,6 +613,8 @@ vector<string> RendererCL<T, bucketT>::ErrorReport()
/// <summary>
/// Set the vector of random contexts on every device.
/// Call the base, and reset the seeds vector.
/// Used on the command line when the user wants a specific set of seeds to start with to
/// produce an exact result. Mostly for debugging.
/// </summary>
/// <param name="randVec">The vector of random contexts to assign</param>
/// <returns>True if the size of the vector matched the number of threads used for rendering and writing seeds to OpenCL succeeded, else false.</returns>
@ -707,9 +709,10 @@ bool RendererCL<T, bucketT>::ResetBuckets(bool resetHist, bool resetAccum)
/// <summary>
/// Perform log scale density filtering on the primary device.
/// </summary>
/// <param name="forceOutput">Whether this output was forced due to an interactive render</param>
/// <returns>True if success and not aborted, else false.</returns>
template <typename T, typename bucketT>
eRenderStatus RendererCL<T, bucketT>::LogScaleDensityFilter()
eRenderStatus RendererCL<T, bucketT>::LogScaleDensityFilter(bool forceOutput)
{
return RunLogScaleFilter();
}
@ -804,10 +807,10 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
{
auto& wrapper = device->m_Wrapper;
if (b && !(b = wrapper.WriteBuffer (m_EmberBufferName, reinterpret_cast<void*>(&m_EmberCL), sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = wrapper.WriteBuffer (m_XformsBufferName, reinterpret_cast<void*>(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast<void*>(const_cast<byte*>(XformDistributions())), XformDistributionsSize()))) { this->m_ErrorReport.push_back(loc); }//Will be resized for xaos.
if (b && !(b = wrapper.WriteBuffer (m_CarToRasBufferName, reinterpret_cast<void*>(&m_CarToRasCL), sizeof(m_CarToRasCL)))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = wrapper.WriteBuffer (m_EmberBufferName, reinterpret_cast<void*>(&m_EmberCL), sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = wrapper.WriteBuffer (m_XformsBufferName, reinterpret_cast<void*>(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast<void*>(const_cast<byte*>(XformDistributions())), XformDistributionsSize()))) { this->m_ErrorReport.push_back(loc); }//Will be resized for xaos.
if (b && !(b = wrapper.WriteBuffer (m_CarToRasBufferName, reinterpret_cast<void*>(&m_CarToRasCL), sizeof(m_CarToRasCL)))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_Dmap.m_Entries.size(), 1, 0, m_Dmap.m_Entries.data()))) { this->m_ErrorReport.push_back(loc); }
@ -966,7 +969,7 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
{
cl_uint argIndex = 0;
#ifdef TEST_CL
fuse = 0;
uint fuse = 0;
#else
uint fuse = uint((m_Devices[dev]->m_Calls % fuseFreq) == 0u ? FuseCount() : 0u);
#endif

View File

@ -129,7 +129,7 @@ public:
bool ClearAccum();
bool WritePoints(size_t device, vector<PointCL<T>>& vec);
#ifdef TEST_CL
bool WriteRandomPoints();
bool WriteRandomPoints(size_t device);
#endif
const string& IterKernel() const;
const string& DEKernel() const;
@ -159,7 +159,7 @@ protected:
//Protected virtual functions overridden from Renderer.
virtual bool Alloc(bool histOnly = false) override;
virtual bool ResetBuckets(bool resetHist = true, bool resetAccum = true) override;
virtual eRenderStatus LogScaleDensityFilter() override;
virtual eRenderStatus LogScaleDensityFilter(bool forceOutput = false) override;
virtual eRenderStatus GaussianDensityFilter() override;
virtual eRenderStatus AccumulatorToFinalImage(byte* pixels, size_t finalOffset) override;
virtual EmberStats Iterate(size_t iterCount, size_t temporalSample) override;