--User changes

-Add new palettes from user Rubydeva.

--Bug fixes
 -Avoid an occasional divide by zero in the OpenCL renderer when using the interactive editor.

--Code changes
 -Use exact comparisons in IsID() and IsZero() in Affine2D.
 -When testing for bad point values while iterating, only test for NaN now.
 -For rendering with OpenCL on the command line and in the final render dialog, use an optimized kernel that does a direct assignment for any affines which are ID.
This commit is contained in:
Person
2018-09-21 22:42:18 -07:00
parent 6f11f7df92
commit 55a2c393cf
21 changed files with 75557 additions and 48 deletions

View File

@ -73,7 +73,7 @@ static string ConstantDefinesString(bool doublePrecision)
"#define COLORMAP_LENGTH 256u\n"
"#define COLORMAP_LENGTH_MINUS_1 255\n"
"#define DE_THRESH 100u\n"
"#define BadVal(x) (((x) != (x)) || ((x) > 1e20) || ((x) < -1e20))\n"
"#define BadVal(x) (isnan(x))\n"
"#define SQR(x) ((x) * (x))\n"
"#define CUBE(x) ((x) * (x) * (x))\n"
"#define MPI ((real_t)M_PI)\n"

View File

@ -32,10 +32,12 @@ template <typename T> const string& IterOpenCLKernelCreator<T>::IterEntryPoint()
/// </summary>
/// <param name="ember">The ember to create the kernel string for</param>
/// <param name="params">The parametric variation #define string</param>
/// <param name="optAffine">True to optimize with a simple assignment when the pre affine transform is empty, else false. True is better for final renders, false for interactive to reduce repeated compilations.</param>
/// <param name="lockAccum">Whether to lock when accumulating to the histogram. This is only for debugging. Default: false.</param>
/// <param name="doAccum">Debugging parameter to include or omit accumulating to the histogram. Default: true.</param>
/// <returns>The kernel string</returns>
template <typename T>
string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember, const string& parVarDefines, const string& globalSharedDefines, bool lockAccum, bool doAccum)
string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember, const string& parVarDefines, const string& globalSharedDefines, bool optAffine, bool lockAccum, bool doAccum)
{
bool doublePrecision = typeid(T) == typeid(double);
size_t i = 0, v, varIndex, varCount;
@ -102,18 +104,19 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember,
}
else
{
/* if (xform->m_Affine.IsID())
{
if (optAffine && xform->m_Affine.IsID())
{
xformFuncs <<
" transX = inPoint->m_X;\n" <<
" transY = inPoint->m_Y;\n";
}
else*/
}
else
{
xformFuncs <<
" transX = fma(xform->m_A, inPoint->m_X, fma(xform->m_B, inPoint->m_Y, xform->m_C));\n" <<
" transY = fma(xform->m_D, inPoint->m_X, fma(xform->m_E, inPoint->m_Y, xform->m_F));\n";
}
xformFuncs <<
" transZ = inPoint->m_Z;\n";
varCount = xform->PreVariationCount();
@ -375,7 +378,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember,
os <<
"\n"
" ok = !BadVal(secondPoint.m_X) && !BadVal(secondPoint.m_Y);\n"
//" ok = !BadVal(secondPoint.m_X) && !BadVal(secondPoint.m_Y) && !BadVal(secondPoint.m_Z);\n"
//" ok = !BadVal(secondPoint.m_X) && !BadVal(secondPoint.m_Y) && !isnan(secondPoint.m_Z);\n"
"\n"
" if (!ok)\n"
" {\n"
@ -803,9 +806,10 @@ string IterOpenCLKernelCreator<T>::VariationStateInitString(const Ember<T>& embe
/// </summary>
/// <param name="ember1">The first ember to compare</param>
/// <param name="ember2">The second ember to compare</param>
/// <param name="optAffine">True to optimize with a simple assignment when the pre affine transform is empty, else false. True is better for final renders, false for interactive to reduce repeated compilations.</param>
/// <returns>True if a rebuild is required, else false</returns>
template <typename T>
bool IterOpenCLKernelCreator<T>::IsBuildRequired(const Ember<T>& ember1, const Ember<T>& ember2)
bool IterOpenCLKernelCreator<T>::IsBuildRequired(const Ember<T>& ember1, const Ember<T>& ember2, bool optAffine)
{
size_t i, j, xformCount = ember1.TotalXformCount();
@ -830,8 +834,8 @@ bool IterOpenCLKernelCreator<T>::IsBuildRequired(const Ember<T>& ember1, const E
auto xform2 = ember2.GetTotalXform(i);
auto varCount = xform1->TotalVariationCount();
//if (xform1->m_Affine.IsID() != xform2->m_Affine.IsID())
// return true;
if (optAffine && (xform1->m_Affine.IsID() != xform2->m_Affine.IsID()))
return true;
if (xform1->HasPost() != xform2->HasPost())
return true;

View File

@ -30,13 +30,13 @@ public:
const string& SumHistKernel() const;
const string& SumHistEntryPoint() const;
const string& IterEntryPoint() const;
string CreateIterKernelString(const Ember<T>& ember, const string& parVarDefines, const string& globalSharedDefines, bool lockAccum = false, bool doAccum = true);
string CreateIterKernelString(const Ember<T>& ember, const string& parVarDefines, const string& globalSharedDefines, bool optAffine, bool lockAccum = false, bool doAccum = true);
string GlobalFunctionsString(const Ember<T>& ember);
static void ParVarIndexDefines(const Ember<T>& ember, pair<string, vector<T>>& params, bool doVals = true, bool doString = true);
static void SharedDataIndexDefines(const Ember<T>& ember, pair<string, vector<T>>& params, bool doVals = true, bool doString = true);
static string VariationStateString(const Ember<T>& ember);
static string VariationStateInitString(const Ember<T>& ember);
static bool IsBuildRequired(const Ember<T>& ember1, const Ember<T>& ember2);
static bool IsBuildRequired(const Ember<T>& ember1, const Ember<T>& ember2, bool optAffine);
private:
string CreateZeroizeKernelString() const;

View File

@ -798,7 +798,7 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
ConvertCarToRas(CoordMap());
//Rebuilding is expensive, so only do it if it's required.
if (IterOpenCLKernelCreator<T>::IsBuildRequired(m_Ember, m_LastBuiltEmber))
if (IterOpenCLKernelCreator<T>::IsBuildRequired(m_Ember, m_LastBuiltEmber, m_OptAffine))
b = BuildIterProgramForEmber(true);
if (b)
@ -905,7 +905,7 @@ bool RendererCL<T, bucketT>::BuildIterProgramForEmber(bool doAccum)
if (b)
{
m_CompileBegun();
m_IterKernel = m_IterOpenCLKernelCreator.CreateIterKernelString(m_Ember, m_Params.first, m_GlobalShared.first, m_LockAccum, doAccum);
m_IterKernel = m_IterOpenCLKernelCreator.CreateIterKernelString(m_Ember, m_Params.first, m_GlobalShared.first, m_OptAffine, m_LockAccum, doAccum);
//cout << "Building: " << "\n" << iterProgram << "\n";
vector<std::thread> threads;
std::function<void(RendererClDevice*)> func = [&](RendererClDevice * dev)
@ -962,6 +962,9 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, size_t& itersRan)
{
//Timing t;//, t2(4);
if (iterCount == 0)//In rare cases this can happen in the interactive renderer, so just assume it's finished iterating to avoid dividing by zero below.
return true;
bool success = !m_Devices.empty();
uint histSuperSize = uint(SuperSize());
size_t launches = size_t(ceil(double(iterCount) / IterCountPerGrid()));

View File

@ -22,7 +22,13 @@ public:
virtual bool ReadFinal(v4F* pixels) { return false; }
virtual bool ClearFinal() { return false; }
virtual bool AnyNvidia() const { return false; }
bool OptAffine() const { return m_OptAffine; }
void OptAffine(bool optAffine) { m_OptAffine = optAffine; }
std::function<void(void)> m_CompileBegun;
protected:
bool m_OptAffine = false;
};
/// <summary>