--User changes

-Add Simon Detheridge's name to the About Box.

--Bug fixes
 -Fix bug in OpenCL atomic string, which is never used.
 -Wrong hist and accum allocation size in RendererCL when using float-only buffers now.
 -Move some kernel initialization to a place where it's done once per render, rather than on every interactive iter chunk.

--Code changes
 -Make ConvertCarToRas() just assign to the member rather than return a struct.
 -Make kernel string accessor functions in IterOpenCLKernelCreator, FinalAccumOpenCLKernelCreator and DEOpenCLKernelCreator be const and return a const string reference.
 -Don't include atomic string unless locking on the GPU, which is never.
This commit is contained in:
mfeemster 2015-08-12 18:51:07 -07:00
parent 6b813c8dac
commit a4aae06b02
12 changed files with 132 additions and 155 deletions

View File

@ -56,8 +56,8 @@ DEOpenCLKernelCreator::DEOpenCLKernelCreator(bool doublePrecision, bool nVidia)
/// Kernel source and entry point properties, getters only.
/// </summary>
string DEOpenCLKernelCreator::LogScaleAssignDEKernel() { return m_LogScaleAssignDEKernel; }
string DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() { return m_LogScaleAssignDEEntryPoint; }
const string& DEOpenCLKernelCreator::LogScaleAssignDEKernel() const { return m_LogScaleAssignDEKernel; }
const string& DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() const { return m_LogScaleAssignDEEntryPoint; }
/// <summary>
/// Get the kernel source for the specified supersample and filterWidth.
@ -65,7 +65,7 @@ string DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() { return m_LogScaleAs
/// <param name="ss">The supersample being used</param>
/// <param name="filterWidth">Filter width</param>
/// <returns>The kernel source</returns>
string DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth)
const string& DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth) const
{
#ifndef ROW_ONLY_DE
if (filterWidth > MaxDEFilterSize())
@ -101,7 +101,7 @@ string DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth)
/// <param name="ss">The supersample being used</param>
/// <param name="filterWidth">Filter width</param>
/// <returns>The name of the density estimation filtering entry point kernel function</returns>
string DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filterWidth)
const string& DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filterWidth) const
{
#ifndef ROW_ONLY_DE
if (filterWidth > MaxDEFilterSize())

View File

@ -34,10 +34,10 @@ public:
DEOpenCLKernelCreator(bool doublePrecision, bool nVidia);
//Accessors.
string LogScaleAssignDEKernel();
string LogScaleAssignDEEntryPoint();
string GaussianDEKernel(size_t ss, uint filterWidth);
string GaussianDEEntryPoint(size_t ss, uint filterWidth);
const string& LogScaleAssignDEKernel() const;
const string& LogScaleAssignDEEntryPoint() const;
const string& GaussianDEKernel(size_t ss, uint filterWidth) const;
const string& GaussianDEEntryPoint(size_t ss, uint filterWidth) const;
//Miscellaneous static functions.
static uint MaxDEFilterSize();

View File

@ -351,14 +351,11 @@ static const char* CarToRasFunctionString =
"}\n"
"\n";
static string AtomicString(bool doublePrecision, bool dp64AtomicSupport)
static string AtomicString()
{
ostringstream os;
//If they want single precision, or if they want double precision and have dp atomic support.
if (!doublePrecision || dp64AtomicSupport)
{
os <<
os <<
"void AtomicAdd(volatile __global real_bucket_t* source, const real_bucket_t operand)\n"
"{\n"
" union\n"
@ -379,32 +376,6 @@ static string AtomicString(bool doublePrecision, bool dp64AtomicSupport)
" newVal.realVal = prevVal.realVal + operand;\n"
" } while (atomic_cmpxchg((volatile __global atomi*)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);\n"
"}\n";
}
else//They want double precision and do not have dp atomic support.
{
os <<
"void AtomicAdd(volatile __global double* source, const double operand)\n"
"{\n"
" union\n"
" {\n"
" uint intVal[2];\n"
" double realVal;\n"
" } newVal;\n"
"\n"
" union\n"
" {\n"
" uint intVal[2];\n"
" double realVal;\n"
" } prevVal;\n"
"\n"
" do\n"
" {\n"
" prevVal.realVal = *source;\n"
" newVal.realVal = prevVal.realVal + operand;\n"
" } while ((atomic_cmpxchg((volatile __global uint*)source, prevVal.intVal[0], newVal.intVal[0]) != prevVal.intVal[0]) ||\n"
" (atomic_cmpxchg((volatile __global uint*)source + 1, prevVal.intVal[1], newVal.intVal[1]) != prevVal.intVal[1]));\n"
"}\n";
}
return os.str();
}

View File

@ -39,7 +39,7 @@ static string ConstantDefinesString(bool doublePrecision)
<< "#endif\n"
<< "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"//Only supported on nVidia.
<< "typedef long intPrec;\n"
<< "typedef ulong atomi;\n"
<< "typedef uint atomi;\n"//Same size as real_bucket_t, always 4 bytes.
<< "typedef double real_t;\n"
<< "typedef float real_bucket_t;\n"//Assume buckets are always float, even though iter calcs are in double.
<< "typedef double4 real4;\n"

View File

@ -37,24 +37,24 @@ FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator(bool doublePrecisio
/// Kernel source and entry point properties, getters only.
/// </summary>
string FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcKernel() { return m_GammaCorrectionWithAlphaCalcKernel; }
string FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcEntryPoint() { return m_GammaCorrectionWithAlphaCalcEntryPoint; }
string FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcKernel() { return m_GammaCorrectionWithoutAlphaCalcKernel; }
string FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcEntryPoint() { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcKernel() const { return m_GammaCorrectionWithAlphaCalcKernel; }
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcEntryPoint() const { return m_GammaCorrectionWithAlphaCalcEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcKernel() const { return m_GammaCorrectionWithoutAlphaCalcKernel; }
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcEntryPoint() const { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; }
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipKernel() { return m_FinalAccumEarlyClipKernel; }
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipEntryPoint() { return m_FinalAccumEarlyClipEntryPoint; }
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; }
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; }
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; }
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipKernel() const { return m_FinalAccumEarlyClipKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipEntryPoint() const { return m_FinalAccumEarlyClipEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipKernel() { return m_FinalAccumLateClipKernel; }
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipEntryPoint() { return m_FinalAccumLateClipEntryPoint; }
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; }
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; }
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; }
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipKernel() const { return m_FinalAccumLateClipKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipEntryPoint() const { return m_FinalAccumLateClipEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
/// <summary>
/// Get the gamma correction entry point.
@ -62,7 +62,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlph
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The name of the gamma correction entry point kernel function</returns>
string FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels, bool transparency)
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels, bool transparency) const
{
bool alphaCalc = ((channels > 3) && transparency);
return alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint;
@ -74,7 +74,7 @@ string FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels,
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The gamma correction kernel string</returns>
string FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, bool transparency)
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, bool transparency) const
{
bool alphaCalc = ((channels > 3) && transparency);
return alphaCalc ? m_GammaCorrectionWithAlphaCalcKernel : m_GammaCorrectionWithoutAlphaCalcKernel;
@ -89,7 +89,7 @@ string FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, boo
/// <param name="alphaBase">Storage for the alpha base value used in the kernel. 0 if transparency is true, else 255.</param>
/// <param name="alphaScale">Storage for the alpha scale value used in the kernel. 255 if transparency is true, else 0.</param>
/// <returns>The name of the final accumulation entry point kernel function</returns>
string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale)
const string& FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale) const
{
bool alphaCalc = ((channels > 3) && transparency);
bool alphaAccum = channels > 3;
@ -109,7 +109,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
else
{
@ -120,7 +120,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
}
@ -131,7 +131,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The final accumulation kernel string</returns>
string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency)
const string& FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) const
{
bool alphaCalc = (channels > 3 && transparency);
bool alphaAccum = channels > 3;
@ -145,7 +145,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t ch
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
else
{
@ -156,7 +156,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t ch
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
}

View File

@ -25,29 +25,29 @@ class EMBERCL_API FinalAccumOpenCLKernelCreator
public:
FinalAccumOpenCLKernelCreator(bool doublePrecision);
string GammaCorrectionWithAlphaCalcKernel();
string GammaCorrectionWithAlphaCalcEntryPoint();
const string& GammaCorrectionWithAlphaCalcKernel() const;
const string& GammaCorrectionWithAlphaCalcEntryPoint() const;
string GammaCorrectionWithoutAlphaCalcKernel();
string GammaCorrectionWithoutAlphaCalcEntryPoint();
const string& GammaCorrectionWithoutAlphaCalcKernel() const;
const string& GammaCorrectionWithoutAlphaCalcEntryPoint() const;
string FinalAccumEarlyClipKernel();
string FinalAccumEarlyClipEntryPoint();
string FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel();
string FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint();
string FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel();
string FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
const string& FinalAccumEarlyClipKernel() const;
const string& FinalAccumEarlyClipEntryPoint() const;
const string& FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() const;
const string& FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() const;
const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() const;
const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const;
string FinalAccumLateClipKernel();
string FinalAccumLateClipEntryPoint();
string FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel();
string FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint();
string FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
string FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
string GammaCorrectionEntryPoint(size_t channels, bool transparency);
string GammaCorrectionKernel(size_t channels, bool transparency);
string FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale);
string FinalAccumKernel(bool earlyClip, size_t channels, bool transparency);
const string& FinalAccumLateClipKernel() const;
const string& FinalAccumLateClipEntryPoint() const;
const string& FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() const;
const string& FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() const;
const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() const;
const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const;
const string& GammaCorrectionEntryPoint(size_t channels, bool transparency) const;
const string& GammaCorrectionKernel(size_t channels, bool transparency) const;
const string& FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale) const;
const string& FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) const;
private:
string CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency);
@ -56,6 +56,7 @@ private:
string CreateFinalAccumKernelString(bool earlyClip, bool alphaCalc, bool alphaAccum);
string CreateGammaCorrectionFunctionString(bool globalBucket, bool alphaCalc, bool alphaAccum, bool finalOut);
string CreateCalcNewRgbFunctionString(bool globalBucket);
string m_GammaCorrectionWithAlphaCalcKernel;
string m_GammaCorrectionWithAlphaCalcEntryPoint;
@ -76,6 +77,7 @@ private:
string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;//False, true.
string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint;
string m_Empty;
bool m_DoublePrecision;
};
}

View File

@ -10,11 +10,9 @@ namespace EmberCLns
/// Constructor that sets up some basic entry point strings and creates
/// the zeroization kernel string since it requires no conditional inputs.
/// </summary>
/// <param name="nVidia">True if running on an nVidia card, else false.</param>
template <typename T>
IterOpenCLKernelCreator<T>::IterOpenCLKernelCreator(bool nVidia)
IterOpenCLKernelCreator<T>::IterOpenCLKernelCreator()
{
m_NVidia = nVidia;
m_IterEntryPoint = "IterateKernel";
m_ZeroizeEntryPoint = "ZeroizeKernel";
m_ZeroizeKernel = CreateZeroizeKernelString();
@ -24,9 +22,9 @@ IterOpenCLKernelCreator<T>::IterOpenCLKernelCreator(bool nVidia)
/// Accessors.
/// </summary>
template <typename T> string IterOpenCLKernelCreator<T>::ZeroizeKernel() { return m_ZeroizeKernel; }
template <typename T> string IterOpenCLKernelCreator<T>::ZeroizeEntryPoint() { return m_ZeroizeEntryPoint; }
template <typename T> string IterOpenCLKernelCreator<T>::IterEntryPoint() { return m_IterEntryPoint; }
template <typename T> const string& IterOpenCLKernelCreator<T>::ZeroizeKernel() const { return m_ZeroizeKernel; }
template <typename T> const string& IterOpenCLKernelCreator<T>::ZeroizeEntryPoint() const { return m_ZeroizeEntryPoint; }
template <typename T> const string& IterOpenCLKernelCreator<T>::IterEntryPoint() const { return m_IterEntryPoint; }
/// <summary>
/// Create the iteration kernel string using the Cuburn method.
@ -221,8 +219,12 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
EmberCLStructString <<
UnionCLStructString <<
CarToRasCLStructString <<
CarToRasFunctionString <<
AtomicString(doublePrecision, m_NVidia) <<
CarToRasFunctionString;
if (lockAccum)
os << AtomicString();
os <<
xformFuncs.str() <<
"__kernel void " << m_IterEntryPoint << "(\n" <<
" uint iterCount,\n"

View File

@ -23,10 +23,10 @@ template <typename T>
class EMBERCL_API IterOpenCLKernelCreator
{
public:
IterOpenCLKernelCreator(bool nVidia);
string ZeroizeKernel();
string ZeroizeEntryPoint();
string IterEntryPoint();
IterOpenCLKernelCreator();
const string& ZeroizeKernel() const;
const string& ZeroizeEntryPoint() const;
const string& IterEntryPoint() const;
string CreateIterKernelString(Ember<T>& ember, string& parVarDefines, bool lockAccum = false, bool doAccum = true);
static void ParVarIndexDefines(Ember<T>& ember, pair<string, vector<T>>& params, bool doVals = true, bool doString = true);
static bool IsBuildRequired(Ember<T>& ember1, Ember<T>& ember2);
@ -38,7 +38,6 @@ private:
string m_IterEntryPoint;
string m_ZeroizeKernel;
string m_ZeroizeEntryPoint;
bool m_NVidia;
};
#ifdef OPEN_CL_TEST_AREA

View File

@ -15,7 +15,7 @@ namespace EmberCLns
template <typename T, typename bucketT>
RendererCL<T, bucketT>::RendererCL(uint platform, uint device, bool shared, GLuint outputTexID)
:
m_IterOpenCLKernelCreator(false),
m_IterOpenCLKernelCreator(),
m_DEOpenCLKernelCreator(typeid(T) == typeid(double), false),
m_FinalAccumOpenCLKernelCreator(typeid(T) == typeid(double))
{
@ -105,7 +105,7 @@ bool RendererCL<T, bucketT>::Init(uint platform, uint device, bool shared, GLuin
{
m_NVidia = ToLower(m_Wrapper.DeviceAndPlatformNames()).find_first_of("nvidia") != string::npos && m_Wrapper.LocalMemSize() > (32 * 1024);
m_WarpSize = m_NVidia ? 32 : 64;
m_IterOpenCLKernelCreator = IterOpenCLKernelCreator<T>(m_NVidia);
m_IterOpenCLKernelCreator = IterOpenCLKernelCreator<T>();
m_DEOpenCLKernelCreator = DEOpenCLKernelCreator(m_DoublePrecision, m_NVidia);
string zeroizeProgram = m_IterOpenCLKernelCreator.ZeroizeKernel();
@ -286,7 +286,7 @@ bool RendererCL<T, bucketT>::WriteRandomPoints()
/// </summary>
/// <returns>The string representation of the kernel for the last built iter program.</returns>
template <typename T, typename bucketT>
string RendererCL<T, bucketT>::IterKernel() { return m_IterKernel; }
const string& RendererCL<T, bucketT>::IterKernel() const { return m_IterKernel; }
/// <summary>
@ -294,14 +294,14 @@ string RendererCL<T, bucketT>::IterKernel() { return m_IterKernel; }
/// </summary>
/// <returns>The string representation of the kernel for the last built density filtering program.</returns>
template <typename T, typename bucketT>
string RendererCL<T, bucketT>::DEKernel() { return m_DEOpenCLKernelCreator.GaussianDEKernel(Supersample(), m_DensityFilterCL.m_FilterWidth); }
const string& RendererCL<T, bucketT>::DEKernel() const { return m_DEOpenCLKernelCreator.GaussianDEKernel(Supersample(), m_DensityFilterCL.m_FilterWidth); }
/// <summary>
/// Get the kernel string for the last built final accumulation program.
/// </summary>
/// <returns>The string representation of the kernel for the last built final accumulation program.</returns>
template <typename T, typename bucketT>
string RendererCL<T, bucketT>::FinalAccumKernel() { return m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency()); }
const string& RendererCL<T, bucketT>::FinalAccumKernel() const { return m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency()); }
/// <summary>
/// Virtual functions overridden from RendererCLBase.
@ -567,8 +567,8 @@ bool RendererCL<T, bucketT>::Alloc()
m_XformsCL.resize(m_Ember.TotalXformCount());
bool b = true;
size_t histLength = SuperSize() * sizeof(v4T);
size_t accumLength = SuperSize() * sizeof(v4T);
size_t histLength = SuperSize() * sizeof(v4bT);
size_t accumLength = SuperSize() * sizeof(v4bT);
const char* loc = __FUNCTION__;
if (b && !(b = m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); }
@ -694,18 +694,37 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
EmberStats stats;//Do not record bad vals with with GPU. If the user needs to investigate bad vals, use the CPU.
const char* loc = __FUNCTION__;
IterOpenCLKernelCreator<T>::ParVarIndexDefines(m_Ember, m_Params, true, false);//Always do this to get the values (but no string), regardless of whether a rebuild is necessary.
//Don't know the size of the parametric varations parameters buffer until the ember is examined.
//So set it up right before the run.
if (!m_Params.second.empty())
//Only need to do this once on the beginning of a new render. Last iter will always be 0 at the beginning of a full render or temporal sample.
if (m_LastIter == 0)
{
if (!m_Wrapper.AddAndWriteBuffer(m_ParVarsBufferName, m_Params.second.data(), m_Params.second.size() * sizeof(m_Params.second[0])))
ConvertEmber(m_Ember, m_EmberCL, m_XformsCL);
ConvertCarToRas(*CoordMap());
if (b && !(b = m_Wrapper.WriteBuffer(m_EmberBufferName, reinterpret_cast<void*>(&m_EmberCL), sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.WriteBuffer(m_XformsBufferName, reinterpret_cast<void*>(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast<void*>(const_cast<byte*>(XformDistributions())), XformDistributionsSize()))) { this->m_ErrorReport.push_back(loc); }//Will be resized for xaos.
if (b && !(b = m_Wrapper.WriteBuffer(m_CarToRasBufferName, reinterpret_cast<void*>(&m_CarToRasCL), sizeof(m_CarToRasCL)))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_DmapCL.m_Entries.size(), 1, 0, m_DmapCL.m_Entries.data()))) { this->m_ErrorReport.push_back(loc); }
if (b)
{
m_Abort = true;
this->m_ErrorReport.push_back(loc);
return stats;
IterOpenCLKernelCreator<T>::ParVarIndexDefines(m_Ember, m_Params, true, false);//Always do this to get the values (but no string), regardless of whether a rebuild is necessary.
//Don't know the size of the parametric varations parameters buffer until the ember is examined.
//So set it up right before the run.
if (!m_Params.second.empty())
{
if (!m_Wrapper.AddAndWriteBuffer(m_ParVarsBufferName, m_Params.second.data(), m_Params.second.size() * sizeof(m_Params.second[0])))
{
m_Abort = true;
this->m_ErrorReport.push_back(loc);
return stats;
}
}
}
else
return stats;
}
//Rebuilding is expensive, so only do it if it's required.
@ -716,7 +735,7 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
{
m_IterTimer.Tic();//Tic() here to avoid including build time in iter time measurement.
if (m_Stats.m_Iters == 0)//Only reset the call count on the beginning of a new render. Do not reset on KEEP_ITERATING.
if (m_LastIter == 0)//Only reset the call count on the beginning of a new render. Do not reset on KEEP_ITERATING.
m_Calls = 0;
b = RunIter(iterCount, temporalSample, stats.m_Iters);
@ -772,10 +791,8 @@ bool RendererCL<T, bucketT>::BuildIterProgramForEmber(bool doAccum)
/// <summary>
/// Run the iteration kernel.
/// Fusing on the CPU is done once per sub batch, usually 10,000 iters, however
/// determining when to do it in OpenCL is much more difficult.
/// Currently it's done once every 4 kernel calls which seems to be a good balance
/// between quality of the final image and performance.
/// Fusing on the CPU is done once per sub batch, usually 10,000 iters. Here,
/// the same fusing frequency is kept, but is done per kernel thread.
/// </summary>
/// <param name="iterCount">The number of iterations to run</param>
/// <param name="temporalSample">The temporal sample this is running for</param>
@ -803,16 +820,6 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
if (kernelIndex != -1)
{
ConvertEmber(m_Ember, m_EmberCL, m_XformsCL);
m_CarToRasCL = ConvertCarToRas(*CoordMap());
if (b && !(b = m_Wrapper.WriteBuffer (m_EmberBufferName, reinterpret_cast<void*>(&m_EmberCL), sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.WriteBuffer (m_XformsBufferName, reinterpret_cast<void*>(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast<void*>(const_cast<byte*>(XformDistributions())), XformDistributionsSize()))) { this->m_ErrorReport.push_back(loc); }//Will be resized for xaos.
if (b && !(b = m_Wrapper.WriteBuffer (m_CarToRasBufferName, reinterpret_cast<void*>(&m_CarToRasCL), sizeof(m_CarToRasCL)))) { this->m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_DmapCL.m_Entries.size(), 1, 0, m_DmapCL.m_Entries.data()))) { this->m_ErrorReport.push_back(loc); }
//If animating, treat each temporal sample as a newly started render for fusing purposes.
if (temporalSample > 0)
m_Calls = 0;
@ -1255,13 +1262,13 @@ bool RendererCL<T, bucketT>::RunDensityFilterPrivate(uint kernelIndex, uint grid
template <typename T, typename bucketT>
int RendererCL<T, bucketT>::MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth)
{
string deEntryPoint = m_DEOpenCLKernelCreator.GaussianDEEntryPoint(ss, filterWidth);
auto& deEntryPoint = m_DEOpenCLKernelCreator.GaussianDEEntryPoint(ss, filterWidth);
int kernelIndex = m_Wrapper.FindKernelIndex(deEntryPoint);
const char* loc = __FUNCTION__;
if (kernelIndex == -1)//Has not been built yet.
{
string kernel = m_DEOpenCLKernelCreator.GaussianDEKernel(ss, filterWidth);
auto& kernel = m_DEOpenCLKernelCreator.GaussianDEKernel(ss, filterWidth);
bool b = m_Wrapper.AddProgram(deEntryPoint, kernel, deEntryPoint, m_DoublePrecision);
if (b)
@ -1288,13 +1295,13 @@ int RendererCL<T, bucketT>::MakeAndGetDensityFilterProgram(size_t ss, uint filte
template <typename T, typename bucketT>
int RendererCL<T, bucketT>::MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale)
{
string finalAccumEntryPoint = m_FinalAccumOpenCLKernelCreator.FinalAccumEntryPoint(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency(), alphaBase, alphaScale);
auto& finalAccumEntryPoint = m_FinalAccumOpenCLKernelCreator.FinalAccumEntryPoint(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency(), alphaBase, alphaScale);
int kernelIndex = m_Wrapper.FindKernelIndex(finalAccumEntryPoint);
const char* loc = __FUNCTION__;
if (kernelIndex == -1)//Has not been built yet.
{
string kernel = m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency());
auto& kernel = m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency());
bool b = m_Wrapper.AddProgram(finalAccumEntryPoint, kernel, finalAccumEntryPoint, m_DoublePrecision);
if (b)
@ -1313,13 +1320,13 @@ int RendererCL<T, bucketT>::MakeAndGetFinalAccumProgram(double& alphaBase, doubl
template <typename T, typename bucketT>
int RendererCL<T, bucketT>::MakeAndGetGammaCorrectionProgram()
{
string gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint(Renderer<T, bucketT>::NumChannels(), Transparency());
auto& gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint(Renderer<T, bucketT>::NumChannels(), Transparency());
int kernelIndex = m_Wrapper.FindKernelIndex(gammaEntryPoint);
const char* loc = __FUNCTION__;
if (kernelIndex == -1)//Has not been built yet.
{
string kernel = m_FinalAccumOpenCLKernelCreator.GammaCorrectionKernel(Renderer<T, bucketT>::NumChannels(), Transparency());
auto& kernel = m_FinalAccumOpenCLKernelCreator.GammaCorrectionKernel(Renderer<T, bucketT>::NumChannels(), Transparency());
bool b = m_Wrapper.AddProgram(gammaEntryPoint, kernel, gammaEntryPoint, m_DoublePrecision);
if (b)
@ -1454,21 +1461,17 @@ void RendererCL<T, bucketT>::ConvertEmber(Ember<T>& ember, EmberCL<T>& emberCL,
/// <param name="carToRas">The CarToRas object to convert</param>
/// <returns>The CarToRasCL object</returns>
template <typename T, typename bucketT>
CarToRasCL<T> RendererCL<T, bucketT>::ConvertCarToRas(const CarToRas<T>& carToRas)
void RendererCL<T, bucketT>::ConvertCarToRas(const CarToRas<T>& carToRas)
{
CarToRasCL<T> carToRasCL;
carToRasCL.m_RasWidth = uint(carToRas.RasWidth());
carToRasCL.m_PixPerImageUnitW = carToRas.PixPerImageUnitW();
carToRasCL.m_RasLlX = carToRas.RasLlX();
carToRasCL.m_PixPerImageUnitH = carToRas.PixPerImageUnitH();
carToRasCL.m_RasLlY = carToRas.RasLlY();
carToRasCL.m_CarLlX = carToRas.CarLlX();
carToRasCL.m_CarLlY = carToRas.CarLlY();
carToRasCL.m_CarUrX = carToRas.CarUrX();
carToRasCL.m_CarUrY = carToRas.CarUrY();
return carToRasCL;
m_CarToRasCL.m_RasWidth = uint(carToRas.RasWidth());
m_CarToRasCL.m_PixPerImageUnitW = carToRas.PixPerImageUnitW();
m_CarToRasCL.m_RasLlX = carToRas.RasLlX();
m_CarToRasCL.m_PixPerImageUnitH = carToRas.PixPerImageUnitH();
m_CarToRasCL.m_RasLlY = carToRas.RasLlY();
m_CarToRasCL.m_CarLlX = carToRas.CarLlX();
m_CarToRasCL.m_CarLlY = carToRas.CarLlY();
m_CarToRasCL.m_CarUrX = carToRas.CarUrX();
m_CarToRasCL.m_CarUrY = carToRas.CarUrY();
}
/// <summary>

View File

@ -124,9 +124,9 @@ public:
#ifdef TEST_CL
bool WriteRandomPoints();
#endif
string IterKernel();
string DEKernel();
string FinalAccumKernel();
const string& IterKernel() const;
const string& DEKernel() const;
const string& FinalAccumKernel() const;
//Virtual functions overridden from RendererCLBase.
virtual bool ReadFinal(byte* pixels);
@ -179,7 +179,7 @@ private:
void ConvertDensityFilter();
void ConvertSpatialFilter();
void ConvertEmber(Ember<T>& ember, EmberCL<T>& emberCL, vector<XformCL<T>>& xformsCL);
static CarToRasCL<T> ConvertCarToRas(const CarToRas<T>& carToRas);
void ConvertCarToRas(const CarToRas<T>& carToRas);
bool m_Init;
bool m_NVidia;

View File

@ -71,7 +71,7 @@ Ember<T> CreateBasicEmber(uint width, uint height, uint ss, T quality, T centerX
string GetEmberCLKernelString(Ember<float>& ember, bool iter, bool log, bool de, uint ss, bool accum)
{
ostringstream os;
IterOpenCLKernelCreator<float> iterCreator(false);
IterOpenCLKernelCreator<float> iterCreator;
DEOpenCLKernelCreator deCreator(false, false);
FinalAccumOpenCLKernelCreator accumCreator(false);
pair<string, vector<float>> pair;

View File

@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>488</width>
<height>567</height>
<height>595</height>
</rect>
</property>
<property name="sizePolicy">
@ -58,7 +58,7 @@
</font>
</property>
<property name="text">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p align=&quot;center&quot;&gt;&lt;br/&gt;&lt;span style=&quot; font-size:12pt;&quot;&gt;Fractorium 0.4.1.9 Beta&lt;/span&gt;&lt;/p&gt;&lt;p align=&quot;center&quot;&gt;&lt;span style=&quot; font-size:10pt;&quot;&gt;&lt;br/&gt;A Qt-based fractal flame editor which uses a C++ re-write of the flam3 algorithm named Ember and a GPU capable version named EmberCL which implements a portion of the cuburn algorithm in OpenCL.&lt;/span&gt;&lt;/p&gt;&lt;p align=&quot;center&quot;&gt;&lt;span style=&quot; font-size:10pt;&quot;&gt;Matt Feemster&lt;/span&gt;&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p align=&quot;center&quot;&gt;&lt;br/&gt;Fractorium 0.4.1.9 Beta&lt;/p&gt;&lt;p align=&quot;center&quot;&gt;&lt;span style=&quot; font-size:10pt;&quot;&gt;&lt;br/&gt;A Qt-based fractal flame editor which uses a C++ re-write of the flam3 algorithm named Ember and a GPU capable version named EmberCL which implements a portion of the cuburn algorithm in OpenCL.&lt;/span&gt;&lt;/p&gt;&lt;p align=&quot;center&quot;&gt;&lt;span style=&quot; font-size:10pt;&quot;&gt;Lead: Matt Feemster&lt;/span&gt;&lt;/p&gt;&lt;p align=&quot;center&quot;&gt;&lt;span style=&quot; font-size:10pt;&quot;&gt;Contributors: Simon Detheridge&lt;/span&gt;&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="textFormat">
<enum>Qt::RichText</enum>