mirror of
https://bitbucket.org/mfeemster/fractorium.git
synced 2025-01-21 21:20:07 -05:00
--User changes
-Add Simon Detheridge's name to the About Box. --Bug fixes -Fix bug in OpenCL atomic string, which is never used. -Wrong hist and accum allocation size in RendererCL when using float-only buffers now. -Move some kernel initialization to a place where it's done once per render, rather than on every interactive iter chunk. --Code changes -Make ConvertCarToRas() just assign to the member rather than return a struct. -Make kernel string accessor functions in IterOpenCLKernelCreator, FinalAccumOpenCLKernelCreator and DEOpenCLKernelCreator be const and return a const string reference. -Don't include atomic string unless locking on the GPU, which is never.
This commit is contained in:
parent
6b813c8dac
commit
a4aae06b02
@ -56,8 +56,8 @@ DEOpenCLKernelCreator::DEOpenCLKernelCreator(bool doublePrecision, bool nVidia)
|
||||
/// Kernel source and entry point properties, getters only.
|
||||
/// </summary>
|
||||
|
||||
string DEOpenCLKernelCreator::LogScaleAssignDEKernel() { return m_LogScaleAssignDEKernel; }
|
||||
string DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() { return m_LogScaleAssignDEEntryPoint; }
|
||||
const string& DEOpenCLKernelCreator::LogScaleAssignDEKernel() const { return m_LogScaleAssignDEKernel; }
|
||||
const string& DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() const { return m_LogScaleAssignDEEntryPoint; }
|
||||
|
||||
/// <summary>
|
||||
/// Get the kernel source for the specified supersample and filterWidth.
|
||||
@ -65,7 +65,7 @@ string DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() { return m_LogScaleAs
|
||||
/// <param name="ss">The supersample being used</param>
|
||||
/// <param name="filterWidth">Filter width</param>
|
||||
/// <returns>The kernel source</returns>
|
||||
string DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth)
|
||||
const string& DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth) const
|
||||
{
|
||||
#ifndef ROW_ONLY_DE
|
||||
if (filterWidth > MaxDEFilterSize())
|
||||
@ -101,7 +101,7 @@ string DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth)
|
||||
/// <param name="ss">The supersample being used</param>
|
||||
/// <param name="filterWidth">Filter width</param>
|
||||
/// <returns>The name of the density estimation filtering entry point kernel function</returns>
|
||||
string DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filterWidth)
|
||||
const string& DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filterWidth) const
|
||||
{
|
||||
#ifndef ROW_ONLY_DE
|
||||
if (filterWidth > MaxDEFilterSize())
|
||||
|
@ -34,10 +34,10 @@ public:
|
||||
DEOpenCLKernelCreator(bool doublePrecision, bool nVidia);
|
||||
|
||||
//Accessors.
|
||||
string LogScaleAssignDEKernel();
|
||||
string LogScaleAssignDEEntryPoint();
|
||||
string GaussianDEKernel(size_t ss, uint filterWidth);
|
||||
string GaussianDEEntryPoint(size_t ss, uint filterWidth);
|
||||
const string& LogScaleAssignDEKernel() const;
|
||||
const string& LogScaleAssignDEEntryPoint() const;
|
||||
const string& GaussianDEKernel(size_t ss, uint filterWidth) const;
|
||||
const string& GaussianDEEntryPoint(size_t ss, uint filterWidth) const;
|
||||
|
||||
//Miscellaneous static functions.
|
||||
static uint MaxDEFilterSize();
|
||||
|
@ -351,13 +351,10 @@ static const char* CarToRasFunctionString =
|
||||
"}\n"
|
||||
"\n";
|
||||
|
||||
static string AtomicString(bool doublePrecision, bool dp64AtomicSupport)
|
||||
static string AtomicString()
|
||||
{
|
||||
ostringstream os;
|
||||
|
||||
//If they want single precision, or if they want double precision and have dp atomic support.
|
||||
if (!doublePrecision || dp64AtomicSupport)
|
||||
{
|
||||
os <<
|
||||
"void AtomicAdd(volatile __global real_bucket_t* source, const real_bucket_t operand)\n"
|
||||
"{\n"
|
||||
@ -379,32 +376,6 @@ static string AtomicString(bool doublePrecision, bool dp64AtomicSupport)
|
||||
" newVal.realVal = prevVal.realVal + operand;\n"
|
||||
" } while (atomic_cmpxchg((volatile __global atomi*)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);\n"
|
||||
"}\n";
|
||||
}
|
||||
else//They want double precision and do not have dp atomic support.
|
||||
{
|
||||
os <<
|
||||
"void AtomicAdd(volatile __global double* source, const double operand)\n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
" {\n"
|
||||
" uint intVal[2];\n"
|
||||
" double realVal;\n"
|
||||
" } newVal;\n"
|
||||
"\n"
|
||||
" union\n"
|
||||
" {\n"
|
||||
" uint intVal[2];\n"
|
||||
" double realVal;\n"
|
||||
" } prevVal;\n"
|
||||
"\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" prevVal.realVal = *source;\n"
|
||||
" newVal.realVal = prevVal.realVal + operand;\n"
|
||||
" } while ((atomic_cmpxchg((volatile __global uint*)source, prevVal.intVal[0], newVal.intVal[0]) != prevVal.intVal[0]) ||\n"
|
||||
" (atomic_cmpxchg((volatile __global uint*)source + 1, prevVal.intVal[1], newVal.intVal[1]) != prevVal.intVal[1]));\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
return os.str();
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ static string ConstantDefinesString(bool doublePrecision)
|
||||
<< "#endif\n"
|
||||
<< "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"//Only supported on nVidia.
|
||||
<< "typedef long intPrec;\n"
|
||||
<< "typedef ulong atomi;\n"
|
||||
<< "typedef uint atomi;\n"//Same size as real_bucket_t, always 4 bytes.
|
||||
<< "typedef double real_t;\n"
|
||||
<< "typedef float real_bucket_t;\n"//Assume buckets are always float, even though iter calcs are in double.
|
||||
<< "typedef double4 real4;\n"
|
||||
|
@ -37,24 +37,24 @@ FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator(bool doublePrecisio
|
||||
/// Kernel source and entry point properties, getters only.
|
||||
/// </summary>
|
||||
|
||||
string FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcKernel() { return m_GammaCorrectionWithAlphaCalcKernel; }
|
||||
string FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcEntryPoint() { return m_GammaCorrectionWithAlphaCalcEntryPoint; }
|
||||
string FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcKernel() { return m_GammaCorrectionWithoutAlphaCalcKernel; }
|
||||
string FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcEntryPoint() { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; }
|
||||
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcKernel() const { return m_GammaCorrectionWithAlphaCalcKernel; }
|
||||
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcEntryPoint() const { return m_GammaCorrectionWithAlphaCalcEntryPoint; }
|
||||
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcKernel() const { return m_GammaCorrectionWithoutAlphaCalcKernel; }
|
||||
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcEntryPoint() const { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; }
|
||||
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipKernel() { return m_FinalAccumEarlyClipKernel; }
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipEntryPoint() { return m_FinalAccumEarlyClipEntryPoint; }
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; }
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; }
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; }
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipKernel() const { return m_FinalAccumEarlyClipKernel; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipEntryPoint() const { return m_FinalAccumEarlyClipEntryPoint; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
|
||||
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipKernel() { return m_FinalAccumLateClipKernel; }
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipEntryPoint() { return m_FinalAccumLateClipEntryPoint; }
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; }
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; }
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; }
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipKernel() const { return m_FinalAccumLateClipKernel; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipEntryPoint() const { return m_FinalAccumLateClipEntryPoint; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; }
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
|
||||
|
||||
/// <summary>
|
||||
/// Get the gamma correction entry point.
|
||||
@ -62,7 +62,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlph
|
||||
/// <param name="channels">The number of channels used, 3 or 4.</param>
|
||||
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
|
||||
/// <returns>The name of the gamma correction entry point kernel function</returns>
|
||||
string FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels, bool transparency)
|
||||
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels, bool transparency) const
|
||||
{
|
||||
bool alphaCalc = ((channels > 3) && transparency);
|
||||
return alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint;
|
||||
@ -74,7 +74,7 @@ string FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels,
|
||||
/// <param name="channels">The number of channels used, 3 or 4.</param>
|
||||
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
|
||||
/// <returns>The gamma correction kernel string</returns>
|
||||
string FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, bool transparency)
|
||||
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, bool transparency) const
|
||||
{
|
||||
bool alphaCalc = ((channels > 3) && transparency);
|
||||
return alphaCalc ? m_GammaCorrectionWithAlphaCalcKernel : m_GammaCorrectionWithoutAlphaCalcKernel;
|
||||
@ -89,7 +89,7 @@ string FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, boo
|
||||
/// <param name="alphaBase">Storage for the alpha base value used in the kernel. 0 if transparency is true, else 255.</param>
|
||||
/// <param name="alphaScale">Storage for the alpha scale value used in the kernel. 255 if transparency is true, else 0.</param>
|
||||
/// <returns>The name of the final accumulation entry point kernel function</returns>
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale)
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale) const
|
||||
{
|
||||
bool alphaCalc = ((channels > 3) && transparency);
|
||||
bool alphaAccum = channels > 3;
|
||||
@ -109,7 +109,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_
|
||||
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
|
||||
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
|
||||
else
|
||||
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
|
||||
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -120,7 +120,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_
|
||||
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
|
||||
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
|
||||
else
|
||||
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
|
||||
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,7 +131,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_
|
||||
/// <param name="channels">The number of channels used, 3 or 4.</param>
|
||||
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
|
||||
/// <returns>The final accumulation kernel string</returns>
|
||||
string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency)
|
||||
const string& FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) const
|
||||
{
|
||||
bool alphaCalc = (channels > 3 && transparency);
|
||||
bool alphaAccum = channels > 3;
|
||||
@ -145,7 +145,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t ch
|
||||
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
|
||||
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel();
|
||||
else
|
||||
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
|
||||
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -156,7 +156,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t ch
|
||||
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
|
||||
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
|
||||
else
|
||||
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
|
||||
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -25,29 +25,29 @@ class EMBERCL_API FinalAccumOpenCLKernelCreator
|
||||
public:
|
||||
FinalAccumOpenCLKernelCreator(bool doublePrecision);
|
||||
|
||||
string GammaCorrectionWithAlphaCalcKernel();
|
||||
string GammaCorrectionWithAlphaCalcEntryPoint();
|
||||
const string& GammaCorrectionWithAlphaCalcKernel() const;
|
||||
const string& GammaCorrectionWithAlphaCalcEntryPoint() const;
|
||||
|
||||
string GammaCorrectionWithoutAlphaCalcKernel();
|
||||
string GammaCorrectionWithoutAlphaCalcEntryPoint();
|
||||
const string& GammaCorrectionWithoutAlphaCalcKernel() const;
|
||||
const string& GammaCorrectionWithoutAlphaCalcEntryPoint() const;
|
||||
|
||||
string FinalAccumEarlyClipKernel();
|
||||
string FinalAccumEarlyClipEntryPoint();
|
||||
string FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel();
|
||||
string FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint();
|
||||
string FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel();
|
||||
string FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
|
||||
const string& FinalAccumEarlyClipKernel() const;
|
||||
const string& FinalAccumEarlyClipEntryPoint() const;
|
||||
const string& FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() const;
|
||||
const string& FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() const;
|
||||
const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() const;
|
||||
const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const;
|
||||
|
||||
string FinalAccumLateClipKernel();
|
||||
string FinalAccumLateClipEntryPoint();
|
||||
string FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel();
|
||||
string FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint();
|
||||
string FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
|
||||
string FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
|
||||
string GammaCorrectionEntryPoint(size_t channels, bool transparency);
|
||||
string GammaCorrectionKernel(size_t channels, bool transparency);
|
||||
string FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale);
|
||||
string FinalAccumKernel(bool earlyClip, size_t channels, bool transparency);
|
||||
const string& FinalAccumLateClipKernel() const;
|
||||
const string& FinalAccumLateClipEntryPoint() const;
|
||||
const string& FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() const;
|
||||
const string& FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() const;
|
||||
const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() const;
|
||||
const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const;
|
||||
const string& GammaCorrectionEntryPoint(size_t channels, bool transparency) const;
|
||||
const string& GammaCorrectionKernel(size_t channels, bool transparency) const;
|
||||
const string& FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale) const;
|
||||
const string& FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) const;
|
||||
|
||||
private:
|
||||
string CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency);
|
||||
@ -56,6 +56,7 @@ private:
|
||||
string CreateFinalAccumKernelString(bool earlyClip, bool alphaCalc, bool alphaAccum);
|
||||
string CreateGammaCorrectionFunctionString(bool globalBucket, bool alphaCalc, bool alphaAccum, bool finalOut);
|
||||
string CreateCalcNewRgbFunctionString(bool globalBucket);
|
||||
|
||||
string m_GammaCorrectionWithAlphaCalcKernel;
|
||||
string m_GammaCorrectionWithAlphaCalcEntryPoint;
|
||||
|
||||
@ -76,6 +77,7 @@ private:
|
||||
string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;//False, true.
|
||||
string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint;
|
||||
|
||||
string m_Empty;
|
||||
bool m_DoublePrecision;
|
||||
};
|
||||
}
|
||||
|
@ -10,11 +10,9 @@ namespace EmberCLns
|
||||
/// Constructor that sets up some basic entry point strings and creates
|
||||
/// the zeroization kernel string since it requires no conditional inputs.
|
||||
/// </summary>
|
||||
/// <param name="nVidia">True if running on an nVidia card, else false.</param>
|
||||
template <typename T>
|
||||
IterOpenCLKernelCreator<T>::IterOpenCLKernelCreator(bool nVidia)
|
||||
IterOpenCLKernelCreator<T>::IterOpenCLKernelCreator()
|
||||
{
|
||||
m_NVidia = nVidia;
|
||||
m_IterEntryPoint = "IterateKernel";
|
||||
m_ZeroizeEntryPoint = "ZeroizeKernel";
|
||||
m_ZeroizeKernel = CreateZeroizeKernelString();
|
||||
@ -24,9 +22,9 @@ IterOpenCLKernelCreator<T>::IterOpenCLKernelCreator(bool nVidia)
|
||||
/// Accessors.
|
||||
/// </summary>
|
||||
|
||||
template <typename T> string IterOpenCLKernelCreator<T>::ZeroizeKernel() { return m_ZeroizeKernel; }
|
||||
template <typename T> string IterOpenCLKernelCreator<T>::ZeroizeEntryPoint() { return m_ZeroizeEntryPoint; }
|
||||
template <typename T> string IterOpenCLKernelCreator<T>::IterEntryPoint() { return m_IterEntryPoint; }
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::ZeroizeKernel() const { return m_ZeroizeKernel; }
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::ZeroizeEntryPoint() const { return m_ZeroizeEntryPoint; }
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::IterEntryPoint() const { return m_IterEntryPoint; }
|
||||
|
||||
/// <summary>
|
||||
/// Create the iteration kernel string using the Cuburn method.
|
||||
@ -221,8 +219,12 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
|
||||
EmberCLStructString <<
|
||||
UnionCLStructString <<
|
||||
CarToRasCLStructString <<
|
||||
CarToRasFunctionString <<
|
||||
AtomicString(doublePrecision, m_NVidia) <<
|
||||
CarToRasFunctionString;
|
||||
|
||||
if (lockAccum)
|
||||
os << AtomicString();
|
||||
|
||||
os <<
|
||||
xformFuncs.str() <<
|
||||
"__kernel void " << m_IterEntryPoint << "(\n" <<
|
||||
" uint iterCount,\n"
|
||||
|
@ -23,10 +23,10 @@ template <typename T>
|
||||
class EMBERCL_API IterOpenCLKernelCreator
|
||||
{
|
||||
public:
|
||||
IterOpenCLKernelCreator(bool nVidia);
|
||||
string ZeroizeKernel();
|
||||
string ZeroizeEntryPoint();
|
||||
string IterEntryPoint();
|
||||
IterOpenCLKernelCreator();
|
||||
const string& ZeroizeKernel() const;
|
||||
const string& ZeroizeEntryPoint() const;
|
||||
const string& IterEntryPoint() const;
|
||||
string CreateIterKernelString(Ember<T>& ember, string& parVarDefines, bool lockAccum = false, bool doAccum = true);
|
||||
static void ParVarIndexDefines(Ember<T>& ember, pair<string, vector<T>>& params, bool doVals = true, bool doString = true);
|
||||
static bool IsBuildRequired(Ember<T>& ember1, Ember<T>& ember2);
|
||||
@ -38,7 +38,6 @@ private:
|
||||
string m_IterEntryPoint;
|
||||
string m_ZeroizeKernel;
|
||||
string m_ZeroizeEntryPoint;
|
||||
bool m_NVidia;
|
||||
};
|
||||
|
||||
#ifdef OPEN_CL_TEST_AREA
|
||||
|
@ -15,7 +15,7 @@ namespace EmberCLns
|
||||
template <typename T, typename bucketT>
|
||||
RendererCL<T, bucketT>::RendererCL(uint platform, uint device, bool shared, GLuint outputTexID)
|
||||
:
|
||||
m_IterOpenCLKernelCreator(false),
|
||||
m_IterOpenCLKernelCreator(),
|
||||
m_DEOpenCLKernelCreator(typeid(T) == typeid(double), false),
|
||||
m_FinalAccumOpenCLKernelCreator(typeid(T) == typeid(double))
|
||||
{
|
||||
@ -105,7 +105,7 @@ bool RendererCL<T, bucketT>::Init(uint platform, uint device, bool shared, GLuin
|
||||
{
|
||||
m_NVidia = ToLower(m_Wrapper.DeviceAndPlatformNames()).find_first_of("nvidia") != string::npos && m_Wrapper.LocalMemSize() > (32 * 1024);
|
||||
m_WarpSize = m_NVidia ? 32 : 64;
|
||||
m_IterOpenCLKernelCreator = IterOpenCLKernelCreator<T>(m_NVidia);
|
||||
m_IterOpenCLKernelCreator = IterOpenCLKernelCreator<T>();
|
||||
m_DEOpenCLKernelCreator = DEOpenCLKernelCreator(m_DoublePrecision, m_NVidia);
|
||||
|
||||
string zeroizeProgram = m_IterOpenCLKernelCreator.ZeroizeKernel();
|
||||
@ -286,7 +286,7 @@ bool RendererCL<T, bucketT>::WriteRandomPoints()
|
||||
/// </summary>
|
||||
/// <returns>The string representation of the kernel for the last built iter program.</returns>
|
||||
template <typename T, typename bucketT>
|
||||
string RendererCL<T, bucketT>::IterKernel() { return m_IterKernel; }
|
||||
const string& RendererCL<T, bucketT>::IterKernel() const { return m_IterKernel; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
@ -294,14 +294,14 @@ string RendererCL<T, bucketT>::IterKernel() { return m_IterKernel; }
|
||||
/// </summary>
|
||||
/// <returns>The string representation of the kernel for the last built density filtering program.</returns>
|
||||
template <typename T, typename bucketT>
|
||||
string RendererCL<T, bucketT>::DEKernel() { return m_DEOpenCLKernelCreator.GaussianDEKernel(Supersample(), m_DensityFilterCL.m_FilterWidth); }
|
||||
const string& RendererCL<T, bucketT>::DEKernel() const { return m_DEOpenCLKernelCreator.GaussianDEKernel(Supersample(), m_DensityFilterCL.m_FilterWidth); }
|
||||
|
||||
/// <summary>
|
||||
/// Get the kernel string for the last built final accumulation program.
|
||||
/// </summary>
|
||||
/// <returns>The string representation of the kernel for the last built final accumulation program.</returns>
|
||||
template <typename T, typename bucketT>
|
||||
string RendererCL<T, bucketT>::FinalAccumKernel() { return m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency()); }
|
||||
const string& RendererCL<T, bucketT>::FinalAccumKernel() const { return m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency()); }
|
||||
|
||||
/// <summary>
|
||||
/// Virtual functions overridden from RendererCLBase.
|
||||
@ -567,8 +567,8 @@ bool RendererCL<T, bucketT>::Alloc()
|
||||
m_XformsCL.resize(m_Ember.TotalXformCount());
|
||||
|
||||
bool b = true;
|
||||
size_t histLength = SuperSize() * sizeof(v4T);
|
||||
size_t accumLength = SuperSize() * sizeof(v4T);
|
||||
size_t histLength = SuperSize() * sizeof(v4bT);
|
||||
size_t accumLength = SuperSize() * sizeof(v4bT);
|
||||
const char* loc = __FUNCTION__;
|
||||
|
||||
if (b && !(b = m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); }
|
||||
@ -694,6 +694,21 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
|
||||
EmberStats stats;//Do not record bad vals with with GPU. If the user needs to investigate bad vals, use the CPU.
|
||||
const char* loc = __FUNCTION__;
|
||||
|
||||
//Only need to do this once on the beginning of a new render. Last iter will always be 0 at the beginning of a full render or temporal sample.
|
||||
if (m_LastIter == 0)
|
||||
{
|
||||
ConvertEmber(m_Ember, m_EmberCL, m_XformsCL);
|
||||
ConvertCarToRas(*CoordMap());
|
||||
|
||||
if (b && !(b = m_Wrapper.WriteBuffer(m_EmberBufferName, reinterpret_cast<void*>(&m_EmberCL), sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); }
|
||||
if (b && !(b = m_Wrapper.WriteBuffer(m_XformsBufferName, reinterpret_cast<void*>(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) { this->m_ErrorReport.push_back(loc); }
|
||||
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast<void*>(const_cast<byte*>(XformDistributions())), XformDistributionsSize()))) { this->m_ErrorReport.push_back(loc); }//Will be resized for xaos.
|
||||
if (b && !(b = m_Wrapper.WriteBuffer(m_CarToRasBufferName, reinterpret_cast<void*>(&m_CarToRasCL), sizeof(m_CarToRasCL)))) { this->m_ErrorReport.push_back(loc); }
|
||||
|
||||
if (b && !(b = m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_DmapCL.m_Entries.size(), 1, 0, m_DmapCL.m_Entries.data()))) { this->m_ErrorReport.push_back(loc); }
|
||||
|
||||
if (b)
|
||||
{
|
||||
IterOpenCLKernelCreator<T>::ParVarIndexDefines(m_Ember, m_Params, true, false);//Always do this to get the values (but no string), regardless of whether a rebuild is necessary.
|
||||
|
||||
//Don't know the size of the parametric varations parameters buffer until the ember is examined.
|
||||
@ -707,6 +722,10 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
|
||||
return stats;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
return stats;
|
||||
}
|
||||
|
||||
//Rebuilding is expensive, so only do it if it's required.
|
||||
if (IterOpenCLKernelCreator<T>::IsBuildRequired(m_Ember, m_LastBuiltEmber))
|
||||
@ -716,7 +735,7 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
|
||||
{
|
||||
m_IterTimer.Tic();//Tic() here to avoid including build time in iter time measurement.
|
||||
|
||||
if (m_Stats.m_Iters == 0)//Only reset the call count on the beginning of a new render. Do not reset on KEEP_ITERATING.
|
||||
if (m_LastIter == 0)//Only reset the call count on the beginning of a new render. Do not reset on KEEP_ITERATING.
|
||||
m_Calls = 0;
|
||||
|
||||
b = RunIter(iterCount, temporalSample, stats.m_Iters);
|
||||
@ -772,10 +791,8 @@ bool RendererCL<T, bucketT>::BuildIterProgramForEmber(bool doAccum)
|
||||
|
||||
/// <summary>
|
||||
/// Run the iteration kernel.
|
||||
/// Fusing on the CPU is done once per sub batch, usually 10,000 iters, however
|
||||
/// determining when to do it in OpenCL is much more difficult.
|
||||
/// Currently it's done once every 4 kernel calls which seems to be a good balance
|
||||
/// between quality of the final image and performance.
|
||||
/// Fusing on the CPU is done once per sub batch, usually 10,000 iters. Here,
|
||||
/// the same fusing frequency is kept, but is done per kernel thread.
|
||||
/// </summary>
|
||||
/// <param name="iterCount">The number of iterations to run</param>
|
||||
/// <param name="temporalSample">The temporal sample this is running for</param>
|
||||
@ -803,16 +820,6 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
|
||||
|
||||
if (kernelIndex != -1)
|
||||
{
|
||||
ConvertEmber(m_Ember, m_EmberCL, m_XformsCL);
|
||||
m_CarToRasCL = ConvertCarToRas(*CoordMap());
|
||||
|
||||
if (b && !(b = m_Wrapper.WriteBuffer (m_EmberBufferName, reinterpret_cast<void*>(&m_EmberCL), sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); }
|
||||
if (b && !(b = m_Wrapper.WriteBuffer (m_XformsBufferName, reinterpret_cast<void*>(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) { this->m_ErrorReport.push_back(loc); }
|
||||
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast<void*>(const_cast<byte*>(XformDistributions())), XformDistributionsSize()))) { this->m_ErrorReport.push_back(loc); }//Will be resized for xaos.
|
||||
if (b && !(b = m_Wrapper.WriteBuffer (m_CarToRasBufferName, reinterpret_cast<void*>(&m_CarToRasCL), sizeof(m_CarToRasCL)))) { this->m_ErrorReport.push_back(loc); }
|
||||
|
||||
if (b && !(b = m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_DmapCL.m_Entries.size(), 1, 0, m_DmapCL.m_Entries.data()))) { this->m_ErrorReport.push_back(loc); }
|
||||
|
||||
//If animating, treat each temporal sample as a newly started render for fusing purposes.
|
||||
if (temporalSample > 0)
|
||||
m_Calls = 0;
|
||||
@ -1255,13 +1262,13 @@ bool RendererCL<T, bucketT>::RunDensityFilterPrivate(uint kernelIndex, uint grid
|
||||
template <typename T, typename bucketT>
|
||||
int RendererCL<T, bucketT>::MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth)
|
||||
{
|
||||
string deEntryPoint = m_DEOpenCLKernelCreator.GaussianDEEntryPoint(ss, filterWidth);
|
||||
auto& deEntryPoint = m_DEOpenCLKernelCreator.GaussianDEEntryPoint(ss, filterWidth);
|
||||
int kernelIndex = m_Wrapper.FindKernelIndex(deEntryPoint);
|
||||
const char* loc = __FUNCTION__;
|
||||
|
||||
if (kernelIndex == -1)//Has not been built yet.
|
||||
{
|
||||
string kernel = m_DEOpenCLKernelCreator.GaussianDEKernel(ss, filterWidth);
|
||||
auto& kernel = m_DEOpenCLKernelCreator.GaussianDEKernel(ss, filterWidth);
|
||||
bool b = m_Wrapper.AddProgram(deEntryPoint, kernel, deEntryPoint, m_DoublePrecision);
|
||||
|
||||
if (b)
|
||||
@ -1288,13 +1295,13 @@ int RendererCL<T, bucketT>::MakeAndGetDensityFilterProgram(size_t ss, uint filte
|
||||
template <typename T, typename bucketT>
|
||||
int RendererCL<T, bucketT>::MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale)
|
||||
{
|
||||
string finalAccumEntryPoint = m_FinalAccumOpenCLKernelCreator.FinalAccumEntryPoint(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency(), alphaBase, alphaScale);
|
||||
auto& finalAccumEntryPoint = m_FinalAccumOpenCLKernelCreator.FinalAccumEntryPoint(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency(), alphaBase, alphaScale);
|
||||
int kernelIndex = m_Wrapper.FindKernelIndex(finalAccumEntryPoint);
|
||||
const char* loc = __FUNCTION__;
|
||||
|
||||
if (kernelIndex == -1)//Has not been built yet.
|
||||
{
|
||||
string kernel = m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency());
|
||||
auto& kernel = m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency());
|
||||
bool b = m_Wrapper.AddProgram(finalAccumEntryPoint, kernel, finalAccumEntryPoint, m_DoublePrecision);
|
||||
|
||||
if (b)
|
||||
@ -1313,13 +1320,13 @@ int RendererCL<T, bucketT>::MakeAndGetFinalAccumProgram(double& alphaBase, doubl
|
||||
template <typename T, typename bucketT>
|
||||
int RendererCL<T, bucketT>::MakeAndGetGammaCorrectionProgram()
|
||||
{
|
||||
string gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint(Renderer<T, bucketT>::NumChannels(), Transparency());
|
||||
auto& gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint(Renderer<T, bucketT>::NumChannels(), Transparency());
|
||||
int kernelIndex = m_Wrapper.FindKernelIndex(gammaEntryPoint);
|
||||
const char* loc = __FUNCTION__;
|
||||
|
||||
if (kernelIndex == -1)//Has not been built yet.
|
||||
{
|
||||
string kernel = m_FinalAccumOpenCLKernelCreator.GammaCorrectionKernel(Renderer<T, bucketT>::NumChannels(), Transparency());
|
||||
auto& kernel = m_FinalAccumOpenCLKernelCreator.GammaCorrectionKernel(Renderer<T, bucketT>::NumChannels(), Transparency());
|
||||
bool b = m_Wrapper.AddProgram(gammaEntryPoint, kernel, gammaEntryPoint, m_DoublePrecision);
|
||||
|
||||
if (b)
|
||||
@ -1454,21 +1461,17 @@ void RendererCL<T, bucketT>::ConvertEmber(Ember<T>& ember, EmberCL<T>& emberCL,
|
||||
/// <param name="carToRas">The CarToRas object to convert</param>
|
||||
/// <returns>The CarToRasCL object</returns>
|
||||
template <typename T, typename bucketT>
|
||||
CarToRasCL<T> RendererCL<T, bucketT>::ConvertCarToRas(const CarToRas<T>& carToRas)
|
||||
void RendererCL<T, bucketT>::ConvertCarToRas(const CarToRas<T>& carToRas)
|
||||
{
|
||||
CarToRasCL<T> carToRasCL;
|
||||
|
||||
carToRasCL.m_RasWidth = uint(carToRas.RasWidth());
|
||||
carToRasCL.m_PixPerImageUnitW = carToRas.PixPerImageUnitW();
|
||||
carToRasCL.m_RasLlX = carToRas.RasLlX();
|
||||
carToRasCL.m_PixPerImageUnitH = carToRas.PixPerImageUnitH();
|
||||
carToRasCL.m_RasLlY = carToRas.RasLlY();
|
||||
carToRasCL.m_CarLlX = carToRas.CarLlX();
|
||||
carToRasCL.m_CarLlY = carToRas.CarLlY();
|
||||
carToRasCL.m_CarUrX = carToRas.CarUrX();
|
||||
carToRasCL.m_CarUrY = carToRas.CarUrY();
|
||||
|
||||
return carToRasCL;
|
||||
m_CarToRasCL.m_RasWidth = uint(carToRas.RasWidth());
|
||||
m_CarToRasCL.m_PixPerImageUnitW = carToRas.PixPerImageUnitW();
|
||||
m_CarToRasCL.m_RasLlX = carToRas.RasLlX();
|
||||
m_CarToRasCL.m_PixPerImageUnitH = carToRas.PixPerImageUnitH();
|
||||
m_CarToRasCL.m_RasLlY = carToRas.RasLlY();
|
||||
m_CarToRasCL.m_CarLlX = carToRas.CarLlX();
|
||||
m_CarToRasCL.m_CarLlY = carToRas.CarLlY();
|
||||
m_CarToRasCL.m_CarUrX = carToRas.CarUrX();
|
||||
m_CarToRasCL.m_CarUrY = carToRas.CarUrY();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -124,9 +124,9 @@ public:
|
||||
#ifdef TEST_CL
|
||||
bool WriteRandomPoints();
|
||||
#endif
|
||||
string IterKernel();
|
||||
string DEKernel();
|
||||
string FinalAccumKernel();
|
||||
const string& IterKernel() const;
|
||||
const string& DEKernel() const;
|
||||
const string& FinalAccumKernel() const;
|
||||
|
||||
//Virtual functions overridden from RendererCLBase.
|
||||
virtual bool ReadFinal(byte* pixels);
|
||||
@ -179,7 +179,7 @@ private:
|
||||
void ConvertDensityFilter();
|
||||
void ConvertSpatialFilter();
|
||||
void ConvertEmber(Ember<T>& ember, EmberCL<T>& emberCL, vector<XformCL<T>>& xformsCL);
|
||||
static CarToRasCL<T> ConvertCarToRas(const CarToRas<T>& carToRas);
|
||||
void ConvertCarToRas(const CarToRas<T>& carToRas);
|
||||
|
||||
bool m_Init;
|
||||
bool m_NVidia;
|
||||
|
@ -71,7 +71,7 @@ Ember<T> CreateBasicEmber(uint width, uint height, uint ss, T quality, T centerX
|
||||
string GetEmberCLKernelString(Ember<float>& ember, bool iter, bool log, bool de, uint ss, bool accum)
|
||||
{
|
||||
ostringstream os;
|
||||
IterOpenCLKernelCreator<float> iterCreator(false);
|
||||
IterOpenCLKernelCreator<float> iterCreator;
|
||||
DEOpenCLKernelCreator deCreator(false, false);
|
||||
FinalAccumOpenCLKernelCreator accumCreator(false);
|
||||
pair<string, vector<float>> pair;
|
||||
|
@ -7,7 +7,7 @@
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>488</width>
|
||||
<height>567</height>
|
||||
<height>595</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="sizePolicy">
|
||||
@ -58,7 +58,7 @@
|
||||
</font>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string><html><head/><body><p align="center"><br/><span style=" font-size:12pt;">Fractorium 0.4.1.9 Beta</span></p><p align="center"><span style=" font-size:10pt;"><br/>A Qt-based fractal flame editor which uses a C++ re-write of the flam3 algorithm named Ember and a GPU capable version named EmberCL which implements a portion of the cuburn algorithm in OpenCL.</span></p><p align="center"><span style=" font-size:10pt;">Matt Feemster</span></p></body></html></string>
|
||||
<string><html><head/><body><p align="center"><br/>Fractorium 0.4.1.9 Beta</p><p align="center"><span style=" font-size:10pt;"><br/>A Qt-based fractal flame editor which uses a C++ re-write of the flam3 algorithm named Ember and a GPU capable version named EmberCL which implements a portion of the cuburn algorithm in OpenCL.</span></p><p align="center"><span style=" font-size:10pt;">Lead: Matt Feemster</span></p><p align="center"><span style=" font-size:10pt;">Contributors: Simon Detheridge</span></p></body></html></string>
|
||||
</property>
|
||||
<property name="textFormat">
|
||||
<enum>Qt::RichText</enum>
|
||||
|
Loading…
Reference in New Issue
Block a user