--User changes

-Add new variations: bubbleT3D, crob, hexaplay3D, hexcrop, hexes, hexnix3D, loonie2, loonie3, nBlur, octapol and synth.
 -Allow for pre/post versions of dc_bubble, dc_cylinder and dc_linear whereas before they were omitted.
 -When saving a file with multiple embers in it, detect if time values are all the same and if so, start them at zero and increment by 1 for each ember.
 -Allow for numerous quality increases to be coalesced into one. It will pick up at the end of the current render.
 -Show selection highlight on variations tree in response to mouse hover. This makes it easier to see for which variation or param the current mouse wheel action will apply.
 -Make default temporal samples be 100, whereas before it was 1000 which was overkill.
 -Require the shift key to be held with delete for deleting an ember to prevent it from triggering when the user enters delete in the edit box.
  -This wasn't otherwise fixable without writing a lot more code.

--Bug fixes
 -EmberGenome was crashing when generating a sequence from a source file with more than 2 embers in it.
 -EmberGenome was improperly handling the first frame of a merge after the last frame of the loop.
  -These bugs were due to a previous commit. Revert parts of that commit.
 -Prevent a zoom value of less than 0 when reading from xml.
 -Slight optimization of the crescents, and mask variations, if the compiler wasn't doing it already.
 -Unique file naming was broken because it was looking for _# and the default names ended with -#.
 -Disallow renaming of an ember in the library tree to an empty string.
 -Severe bug that prevented some variations from being read correctly from params generated outside this program.
 -Severe OpenCL randomization bug. The first x coordinates of the first points in the first kernel call of the first ember of a render since the OpenCL renderer object was created were not random and were mostly -1.
 -Severe bug when populating xform selection distributions that could sometimes cause a crash due to roundoff error. Fix by using double.
 -Limit the max number of variations in a random ember to MAX_CL_VARS, which is 8. This ensures they'll look the same on CPU and GPU.
 -Prevent user from saving stylesheet to default.qss, it's a special reserved filename.

--Code changes
 -Generalize using the running sum output point inside of a variation for all cases: pre, reg and post.
 -Allow for array variables in variations where the address of each element is stored in m_Params.
 -Qualify all math functions with std::
 -No longer use our own Clamp() in OpenCL, instead use the standard clamp().
 -Redesign how functions are used in the variations OpenCL code.
 -Add tests to EmberTester to verify some of the new functionality.
 -Place more const and override qualifiers on functions where appropriate.
 -Add a global rand with a lock to be used very sparingly.
 -Use a map instead of a vector for bad param names in Xml parsing.
 -Prefix affine interpolation mode defines with "AFFINE_" to make their purpose more clear.
 -Allow for variations that change state during iteration by sending a separate copy of the ember to each rendering thread.
 -Implement this same functionality with a local struct in OpenCL. It's members are the total of all variables that need to change state within an ember.
 -Add Contains() function to Utils.h.
 -EmberRender: print names of kernels being printed with --dump_kernel option.
 -Clean up EmberTester to handle some of the recent changes.
 -Fix various casts.
 -Replace % 2 with & 1, even though the compiler was likely doing this already.
 -Add new file Variations06.h to accommodate new variations.
 -General cleanup.
This commit is contained in:
mfeemster
2015-11-22 14:15:07 -08:00
parent 04e72c27de
commit 330074cfb2
62 changed files with 8176 additions and 1877 deletions

View File

@ -2,7 +2,6 @@
#include "IterOpenCLKernelCreator.h"
//#define STRAIGHT_RAND 1
#define USE_CASE 1
namespace EmberCLns
{
@ -39,16 +38,20 @@ template <typename T> const string& IterOpenCLKernelCreator<T>::IterEntryPoint()
/// <param name="doAccum">Debugging parameter to include or omit accumulating to the histogram. Default: true.</param>
/// <returns>The kernel string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, string& parVarDefines, bool lockAccum, bool doAccum)
string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember, string& parVarDefines, bool lockAccum, bool doAccum)
{
bool doublePrecision = typeid(T) == typeid(double);
size_t i, v, varIndex, varCount, totalXformCount = ember.TotalXformCount();
ostringstream kernelIterBody, xformFuncs, os;
vector<Variation<T>*> variations;
xformFuncs << "\n" << parVarDefines << endl;
xformFuncs << VariationStateString(ember);
xformFuncs << parVarDefines << endl;
ember.GetPresentVariations(variations);
for (auto var : variations) if (var) xformFuncs << var->OpenCLFuncsString();
for (auto var : variations)
if (var)
xformFuncs << var->OpenCLFuncsString();
for (i = 0; i < totalXformCount; i++)
{
@ -62,7 +65,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
v = varIndex = varCount = 0;
xformFuncs <<
"void Xform" << i << "(__constant XformCL* xform, __constant real_t* parVars, Point* inPoint, Point* outPoint, uint2* mwc)\n" <<
"void Xform" << i << "(__constant XformCL* xform, __constant real_t* parVars, Point* inPoint, Point* outPoint, uint2* mwc, VariationState* varState)\n" <<
"{\n"
" real_t transX, transY, transZ;\n"
" real4 vIn, vOut = 0.0;\n";
@ -215,8 +218,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
os <<
ConstantDefinesString(doublePrecision) <<
InlineMathFunctionsString <<
ClampRealFunctionString <<
GlobalFunctionsString(ember) <<
RandFunctionString <<
PointCLStructString <<
XformCLStructString <<
@ -237,12 +239,13 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
" __constant EmberCL* ember,\n"
" __constant XformCL* xforms,\n"
" __constant real_t* parVars,\n"
" __global uchar* xformDistributions,\n"//Using uchar is quicker than uint. Can't be constant because the size can be too large to fit when using xaos.//FINALOPT
" __global uchar* xformDistributions,\n"//Using uchar is quicker than uint. Can't be constant because the size can be too large to fit when using xaos.
" __constant CarToRasCL* carToRas,\n"
" __global real4reals_bucket* histogram,\n"
" uint histSize,\n"
" __read_only image2d_t palette,\n"
" __global Point* points\n"
//" uint startRender\n"
"\t)\n"
"{\n"
" bool fuse, ok;\n"
@ -261,8 +264,9 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
" CLK_ADDRESS_CLAMP_TO_EDGE |\n"//Clamp to edge
" CLK_FILTER_NEAREST;\n"//Don't interpolate
" uint threadXY = (THREAD_ID_X + THREAD_ID_Y);\n"
" uint threadXDivRows = (THREAD_ID_X / (NTHREADS / THREADS_PER_WARP));\n"
" uint threadXDivRows = (THREAD_ID_X / NWARPS);\n"
" uint threadsMinus1 = NTHREADS - 1;\n"
" VariationState varState;\n"
;
os <<
@ -278,6 +282,9 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
" {\n"
" fuse = true;\n"
" itersToDo = fuseCount;\n"
//Calling MwcNextNeg1Pos1() twice is deliberate. The first call to mwc is not very random since it just does
//an xor. So it must be called twice to get it in a good random state.
" firstPoint.m_X = MwcNextNeg1Pos1(&mwc);\n"
" firstPoint.m_X = MwcNextNeg1Pos1(&mwc);\n"
" firstPoint.m_Y = MwcNextNeg1Pos1(&mwc);\n"
" firstPoint.m_Z = 0.0;\n"
@ -290,7 +297,13 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
" itersToDo = iterCount;\n"
" firstPoint = points[pointsIndex];\n"
" }\n"
"\n";
"\n"
;
auto varStateString = VariationStateInitString(ember);
if (!varStateString.empty())
os << varStateString << "\n\n";
//This is done once initially here and then again after each swap-sync in the main loop.
//This along with the randomness that the point shuffle provides gives sufficient randomness
@ -336,7 +349,6 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
for (i = 0; i < ember.XformCount(); i++)
{
#ifdef USE_CASE
if (i == 0)
{
os <<
@ -347,7 +359,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
os <<
" case " << i << ":\n"
" {\n" <<
" Xform" << i << "(&(xforms[" << i << "]), parVars, &firstPoint, &secondPoint, &mwc);\n" <<
" Xform" << i << "(&(xforms[" << i << "]), parVars, &firstPoint, &secondPoint, &mwc, &varState);\n" <<
" break;\n"
" }\n";
@ -356,19 +368,6 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
os <<
" }\n";
}
#else
if (i == 0)
os <<
" if (secondPoint.m_LastXfUsed == " << i << ")\n";
else
os <<
" else if (secondPoint.m_LastXfUsed == " << i << ")\n";
os <<
" {\n" <<
" Xform" << i << "(&(xforms[" << i << "]), parVars, &firstPoint, &secondPoint, &mwc);\n" <<
" }\n";
#endif
}
os <<
@ -438,7 +437,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
" if ((xforms[" << finalIndex << "].m_Opacity == 1) || (MwcNext01(&mwc) < xforms[" << finalIndex << "].m_Opacity))\n"
" {\n"
" tempPoint.m_LastXfUsed = secondPoint.m_LastXfUsed;\n"
" Xform" << finalIndex << "(&(xforms[" << finalIndex << "]), parVars, &secondPoint, &tempPoint, &mwc);\n"
" Xform" << finalIndex << "(&(xforms[" << finalIndex << "]), parVars, &secondPoint, &tempPoint, &mwc, &varState);\n"
" secondPoint = tempPoint;\n"
" }\n"
"\n";
@ -543,6 +542,50 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
return os.str();
}
/// <summary>
/// Return a string containing all of the global functions needed by the passed in ember.
/// </summary>
/// <param name="ember">The ember to create the global function strings from</param>
/// <returns>String of all global function names and bodies</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::GlobalFunctionsString(const Ember<T>& ember)
{
size_t i, j, xformCount = ember.TotalXformCount();
vector<string> funcNames;//Can't use a set here because they sort and we must preserve the insertion order due to nested function calls.
ostringstream os;
static string zeps = "Zeps";
for (i = 0; i < xformCount; i++)
{
if (auto xform = ember.GetTotalXform(i))
{
size_t varCount = xform->TotalVariationCount();
if (xform->NeedPrecalcAngles())
if (!Contains(funcNames, zeps))
funcNames.push_back(zeps);
for (j = 0; j < varCount; j++)
{
if (auto var = xform->GetVariation(j))
{
auto names = var->OpenCLGlobalFuncNames();
for (auto& name : names)
if (!Contains(funcNames, name))
funcNames.push_back(name);
}
}
}
}
for (auto& funcName : funcNames)
if(auto text = m_FunctionMapper.GetGlobalFunc(funcName))
os << *text << endl;
return os.str();
}
/// <summary>
/// Create an OpenCL string of #defines and a corresponding host side vector for parametric variation values.
/// Parametric variations present a special problem in the iteration code.
@ -571,9 +614,9 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
/// #define CURL_C2_2 3
/// #define BLOB_LOW_3 4
/// #define BLOB_HIGH_3 5
/// #define BLOB_WAVES_ 6
/// #define BLOB_WAVES_3 6
///
/// The variations the use these #defines by first looking up the index of the
/// The variations use these #defines by first looking up the index of the
/// xform they belong to in the parent ember and generating the OpenCL string based on that
/// in their overridden OpenCLString() functions.
/// Template argument expected to be float or double.
@ -583,10 +626,9 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
/// <param name="doVals">True if the vector should be populated, else false. Default: true.</param>
/// <param name="doString">True if the string should be populated, else false. Default: true.</param>
template <typename T>
void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string, vector<T>>& params, bool doVals, bool doString)
void IterOpenCLKernelCreator<T>::ParVarIndexDefines(const Ember<T>& ember, pair<string, vector<T>>& params, bool doVals, bool doString)
{
size_t i, j, k, size = 0, xformCount = ember.TotalXformCount();
Xform<T>* xform;
ostringstream os;
if (doVals)
@ -594,23 +636,26 @@ void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string
for (i = 0; i < xformCount; i++)
{
if ((xform = ember.GetTotalXform(i)))
if (auto xform = ember.GetTotalXform(i))
{
size_t varCount = xform->TotalVariationCount();
for (j = 0; j < varCount; j++)
{
if (ParametricVariation<T>* parVar = dynamic_cast<ParametricVariation<T>*>(xform->GetVariation(j)))
if (auto parVar = dynamic_cast<ParametricVariation<T>*>(xform->GetVariation(j)))
{
for (k = 0; k < parVar->ParamCount(); k++)
{
if (doString)
os << "#define " << ToUpper(parVar->Params()[k].Name()) << "_" << i << " " << size << endl;//Uniquely identify this param in this variation in this xform.
if (!parVar->Params()[k].IsState())
{
if (doString)
os << "#define " << ToUpper(parVar->Params()[k].Name()) << "_" << i << " " << size << endl;//Uniquely identify this param in this variation in this xform.
if (doVals)
params.second.push_back(parVar->Params()[k].ParamVal());
if (doVals)
params.second.push_back(parVar->Params()[k].ParamVal());
size++;
size++;
}
}
}
}
@ -624,6 +669,69 @@ void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string
}
}
/// <summary>
/// Create the string needed for the struct whose values will change between each iteration.
/// This is only needed for variations whose state changes.
/// If none are present, the struct will be empty.
/// </summary>
/// <param name="ember">The ember to generate the variation state struct string for</param>
/// <returns>The variation state struct string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::VariationStateString(const Ember<T>& ember)
{
ostringstream os;
os << "typedef struct __attribute__ " ALIGN_CL " _VariationState\n{";
for (size_t i = 0; i < ember.TotalXformCount(); i++)
{
if (auto xform = ember.GetTotalXform(i))
{
for (size_t j = 0; j < xform->TotalVariationCount(); j++)
{
if (auto var = xform->GetVariation(j))
{
os << var->StateOpenCLString();
}
}
}
}
os << "\n} VariationState;\n\n";
return os.str();
}
/// <summary>
/// Create the string needed for the initial state of the struct whose values will change between each iteration.
/// This is only needed for variations whose state changes.
/// If none are present, the returned init string will be empty.
/// This will be called at the beginning of each kernel.
/// </summary>
/// <param name="ember">The ember to generate the variation state struct init string for</param>
/// <returns>The variation state struct init string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::VariationStateInitString(const Ember<T>& ember)
{
ostringstream os;
for (size_t i = 0; i < ember.TotalXformCount(); i++)
{
if (auto xform = ember.GetTotalXform(i))
{
for (size_t j = 0; j < xform->TotalVariationCount(); j++)
{
if (auto var = xform->GetVariation(j))
{
os << var->StateInitOpenCLString();
}
}
}
}
return os.str();
}
/// <summary>
/// Determine whether the two embers passed in differ enough
/// to require a rebuild of the iteration code.
@ -641,7 +749,7 @@ void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string
/// <param name="ember2">The second ember to compare</param>
/// <returns>True if a rebuild is required, else false</returns>
template <typename T>
bool IterOpenCLKernelCreator<T>::IsBuildRequired(Ember<T>& ember1, Ember<T>& ember2)
bool IterOpenCLKernelCreator<T>::IsBuildRequired(const Ember<T>& ember1, const Ember<T>& ember2)
{
size_t i, j, xformCount = ember1.TotalXformCount();
@ -688,7 +796,7 @@ bool IterOpenCLKernelCreator<T>::IsBuildRequired(Ember<T>& ember1, Ember<T>& emb
/// </summary>
/// <returns>The kernel string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::CreateZeroizeKernelString()
string IterOpenCLKernelCreator<T>::CreateZeroizeKernelString() const
{
ostringstream os;
@ -707,8 +815,16 @@ string IterOpenCLKernelCreator<T>::CreateZeroizeKernelString()
return os.str();
}
/// <summary>
/// Create the histogram summing kernel string.
/// This is used when running with multiple GPUs. It takes
/// two histograms present on a single device, source and dest,
/// and adds the values of source to dest.
/// It optionally sets all values of source to zero.
/// </summary>
/// <returns>The kernel string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::CreateSumHistKernelString()
string IterOpenCLKernelCreator<T>::CreateSumHistKernelString() const
{
ostringstream os;
@ -739,7 +855,7 @@ string IterOpenCLKernelCreator<T>::CreateSumHistKernelString()
/// <param name="ember">The ember to create the projection string for</param>
/// <returns>The kernel string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::CreateProjectionString(Ember<T>& ember)
string IterOpenCLKernelCreator<T>::CreateProjectionString(const Ember<T>& ember) const
{
size_t projBits = ember.ProjBits();
ostringstream os;