From a4aae06b028d17e4c3817471bbe6a773d3803982 Mon Sep 17 00:00:00 2001 From: mfeemster Date: Wed, 12 Aug 2015 18:51:07 -0700 Subject: [PATCH] --User changes -Add Simon Detheridge's name to the About Box. --Bug fixes -Fix bug in OpenCL atomic string, which is never used. -Wrong hist and accum allocation size in RendererCL when using float-only buffers now. -Move some kernel initialization to a place where it's done once per render, rather than on every interactive iter chunk. --Code changes -Make ConvertCarToRas() just assign to the member rather than return a struct. -Make kernel string accessor functions in IterOpenCLKernelCreator, FinalAccumOpenCLKernelCreator and DEOpenCLKernelCreator be const and return a const string reference. -Don't include atomic string unless locking on the GPU, which is never. --- Source/EmberCL/DEOpenCLKernelCreator.cpp | 8 +- Source/EmberCL/DEOpenCLKernelCreator.h | 8 +- Source/EmberCL/EmberCLFunctions.h | 33 +----- Source/EmberCL/EmberCLStructs.h | 2 +- .../EmberCL/FinalAccumOpenCLKernelCreator.cpp | 48 ++++---- .../EmberCL/FinalAccumOpenCLKernelCreator.h | 42 +++---- Source/EmberCL/IterOpenCLKernelCreator.cpp | 18 +-- Source/EmberCL/IterOpenCLKernelCreator.h | 9 +- Source/EmberCL/RendererCL.cpp | 105 +++++++++--------- Source/EmberCL/RendererCL.h | 8 +- Source/EmberTester/EmberTester.cpp | 2 +- Source/Fractorium/AboutDialog.ui | 4 +- 12 files changed, 132 insertions(+), 155 deletions(-) diff --git a/Source/EmberCL/DEOpenCLKernelCreator.cpp b/Source/EmberCL/DEOpenCLKernelCreator.cpp index 6dc5d9d..e5ad8f7 100644 --- a/Source/EmberCL/DEOpenCLKernelCreator.cpp +++ b/Source/EmberCL/DEOpenCLKernelCreator.cpp @@ -56,8 +56,8 @@ DEOpenCLKernelCreator::DEOpenCLKernelCreator(bool doublePrecision, bool nVidia) /// Kernel source and entry point properties, getters only. /// -string DEOpenCLKernelCreator::LogScaleAssignDEKernel() { return m_LogScaleAssignDEKernel; } -string DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() { return m_LogScaleAssignDEEntryPoint; } +const string& DEOpenCLKernelCreator::LogScaleAssignDEKernel() const { return m_LogScaleAssignDEKernel; } +const string& DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() const { return m_LogScaleAssignDEEntryPoint; } /// /// Get the kernel source for the specified supersample and filterWidth. @@ -65,7 +65,7 @@ string DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() { return m_LogScaleAs /// The supersample being used /// Filter width /// The kernel source -string DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth) +const string& DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth) const { #ifndef ROW_ONLY_DE if (filterWidth > MaxDEFilterSize()) @@ -101,7 +101,7 @@ string DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth) /// The supersample being used /// Filter width /// The name of the density estimation filtering entry point kernel function -string DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filterWidth) +const string& DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filterWidth) const { #ifndef ROW_ONLY_DE if (filterWidth > MaxDEFilterSize()) diff --git a/Source/EmberCL/DEOpenCLKernelCreator.h b/Source/EmberCL/DEOpenCLKernelCreator.h index 68305e0..4793036 100644 --- a/Source/EmberCL/DEOpenCLKernelCreator.h +++ b/Source/EmberCL/DEOpenCLKernelCreator.h @@ -34,10 +34,10 @@ public: DEOpenCLKernelCreator(bool doublePrecision, bool nVidia); //Accessors. - string LogScaleAssignDEKernel(); - string LogScaleAssignDEEntryPoint(); - string GaussianDEKernel(size_t ss, uint filterWidth); - string GaussianDEEntryPoint(size_t ss, uint filterWidth); + const string& LogScaleAssignDEKernel() const; + const string& LogScaleAssignDEEntryPoint() const; + const string& GaussianDEKernel(size_t ss, uint filterWidth) const; + const string& GaussianDEEntryPoint(size_t ss, uint filterWidth) const; //Miscellaneous static functions. static uint MaxDEFilterSize(); diff --git a/Source/EmberCL/EmberCLFunctions.h b/Source/EmberCL/EmberCLFunctions.h index 4230646..c9048c7 100644 --- a/Source/EmberCL/EmberCLFunctions.h +++ b/Source/EmberCL/EmberCLFunctions.h @@ -351,14 +351,11 @@ static const char* CarToRasFunctionString = "}\n" "\n"; -static string AtomicString(bool doublePrecision, bool dp64AtomicSupport) +static string AtomicString() { ostringstream os; - //If they want single precision, or if they want double precision and have dp atomic support. - if (!doublePrecision || dp64AtomicSupport) - { - os << + os << "void AtomicAdd(volatile __global real_bucket_t* source, const real_bucket_t operand)\n" "{\n" " union\n" @@ -379,32 +376,6 @@ static string AtomicString(bool doublePrecision, bool dp64AtomicSupport) " newVal.realVal = prevVal.realVal + operand;\n" " } while (atomic_cmpxchg((volatile __global atomi*)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);\n" "}\n"; - } - else//They want double precision and do not have dp atomic support. - { - os << - "void AtomicAdd(volatile __global double* source, const double operand)\n" - "{\n" - " union\n" - " {\n" - " uint intVal[2];\n" - " double realVal;\n" - " } newVal;\n" - "\n" - " union\n" - " {\n" - " uint intVal[2];\n" - " double realVal;\n" - " } prevVal;\n" - "\n" - " do\n" - " {\n" - " prevVal.realVal = *source;\n" - " newVal.realVal = prevVal.realVal + operand;\n" - " } while ((atomic_cmpxchg((volatile __global uint*)source, prevVal.intVal[0], newVal.intVal[0]) != prevVal.intVal[0]) ||\n" - " (atomic_cmpxchg((volatile __global uint*)source + 1, prevVal.intVal[1], newVal.intVal[1]) != prevVal.intVal[1]));\n" - "}\n"; - } return os.str(); } diff --git a/Source/EmberCL/EmberCLStructs.h b/Source/EmberCL/EmberCLStructs.h index 59b7521..fa31933 100644 --- a/Source/EmberCL/EmberCLStructs.h +++ b/Source/EmberCL/EmberCLStructs.h @@ -39,7 +39,7 @@ static string ConstantDefinesString(bool doublePrecision) << "#endif\n" << "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"//Only supported on nVidia. << "typedef long intPrec;\n" - << "typedef ulong atomi;\n" + << "typedef uint atomi;\n"//Same size as real_bucket_t, always 4 bytes. << "typedef double real_t;\n" << "typedef float real_bucket_t;\n"//Assume buckets are always float, even though iter calcs are in double. << "typedef double4 real4;\n" diff --git a/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp b/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp index e06ce79..7ebf7ee 100644 --- a/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp +++ b/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp @@ -37,24 +37,24 @@ FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator(bool doublePrecisio /// Kernel source and entry point properties, getters only. /// -string FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcKernel() { return m_GammaCorrectionWithAlphaCalcKernel; } -string FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcEntryPoint() { return m_GammaCorrectionWithAlphaCalcEntryPoint; } -string FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcKernel() { return m_GammaCorrectionWithoutAlphaCalcKernel; } -string FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcEntryPoint() { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; } +const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcKernel() const { return m_GammaCorrectionWithAlphaCalcKernel; } +const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcEntryPoint() const { return m_GammaCorrectionWithAlphaCalcEntryPoint; } +const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcKernel() const { return m_GammaCorrectionWithoutAlphaCalcKernel; } +const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcEntryPoint() const { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; } -string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipKernel() { return m_FinalAccumEarlyClipKernel; } -string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipEntryPoint() { return m_FinalAccumEarlyClipEntryPoint; } -string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; } -string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; } -string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; } -string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipKernel() const { return m_FinalAccumEarlyClipKernel; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipEntryPoint() const { return m_FinalAccumEarlyClipEntryPoint; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; } -string FinalAccumOpenCLKernelCreator::FinalAccumLateClipKernel() { return m_FinalAccumLateClipKernel; } -string FinalAccumOpenCLKernelCreator::FinalAccumLateClipEntryPoint() { return m_FinalAccumLateClipEntryPoint; } -string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; } -string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; } -string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; } -string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipKernel() const { return m_FinalAccumLateClipKernel; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipEntryPoint() const { return m_FinalAccumLateClipEntryPoint; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; } +const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; } /// /// Get the gamma correction entry point. @@ -62,7 +62,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlph /// The number of channels used, 3 or 4. /// True if channels equals 4 and using transparency, else false. /// The name of the gamma correction entry point kernel function -string FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels, bool transparency) +const string& FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels, bool transparency) const { bool alphaCalc = ((channels > 3) && transparency); return alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint; @@ -74,7 +74,7 @@ string FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels, /// The number of channels used, 3 or 4. /// True if channels equals 4 and using transparency, else false. /// The gamma correction kernel string -string FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, bool transparency) +const string& FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, bool transparency) const { bool alphaCalc = ((channels > 3) && transparency); return alphaCalc ? m_GammaCorrectionWithAlphaCalcKernel : m_GammaCorrectionWithoutAlphaCalcKernel; @@ -89,7 +89,7 @@ string FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, boo /// Storage for the alpha base value used in the kernel. 0 if transparency is true, else 255. /// Storage for the alpha scale value used in the kernel. 255 if transparency is true, else 0. /// The name of the final accumulation entry point kernel function -string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale) +const string& FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale) const { bool alphaCalc = ((channels > 3) && transparency); bool alphaAccum = channels > 3; @@ -109,7 +109,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_ else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency. return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint(); else - return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense. + return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense. } else { @@ -120,7 +120,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_ else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency. return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint(); else - return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense. + return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense. } } @@ -131,7 +131,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_ /// The number of channels used, 3 or 4. /// True if channels equals 4 and using transparency, else false. /// The final accumulation kernel string -string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) +const string& FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) const { bool alphaCalc = (channels > 3 && transparency); bool alphaAccum = channels > 3; @@ -145,7 +145,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t ch else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency. return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel(); else - return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense. + return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense. } else { @@ -156,7 +156,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t ch else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency. return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel(); else - return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense. + return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense. } } diff --git a/Source/EmberCL/FinalAccumOpenCLKernelCreator.h b/Source/EmberCL/FinalAccumOpenCLKernelCreator.h index 6d6ee26..1018768 100644 --- a/Source/EmberCL/FinalAccumOpenCLKernelCreator.h +++ b/Source/EmberCL/FinalAccumOpenCLKernelCreator.h @@ -25,29 +25,29 @@ class EMBERCL_API FinalAccumOpenCLKernelCreator public: FinalAccumOpenCLKernelCreator(bool doublePrecision); - string GammaCorrectionWithAlphaCalcKernel(); - string GammaCorrectionWithAlphaCalcEntryPoint(); + const string& GammaCorrectionWithAlphaCalcKernel() const; + const string& GammaCorrectionWithAlphaCalcEntryPoint() const; - string GammaCorrectionWithoutAlphaCalcKernel(); - string GammaCorrectionWithoutAlphaCalcEntryPoint(); + const string& GammaCorrectionWithoutAlphaCalcKernel() const; + const string& GammaCorrectionWithoutAlphaCalcEntryPoint() const; - string FinalAccumEarlyClipKernel(); - string FinalAccumEarlyClipEntryPoint(); - string FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel(); - string FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint(); - string FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel(); - string FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint(); + const string& FinalAccumEarlyClipKernel() const; + const string& FinalAccumEarlyClipEntryPoint() const; + const string& FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() const; + const string& FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() const; + const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() const; + const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const; - string FinalAccumLateClipKernel(); - string FinalAccumLateClipEntryPoint(); - string FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel(); - string FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint(); - string FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel(); - string FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint(); - string GammaCorrectionEntryPoint(size_t channels, bool transparency); - string GammaCorrectionKernel(size_t channels, bool transparency); - string FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale); - string FinalAccumKernel(bool earlyClip, size_t channels, bool transparency); + const string& FinalAccumLateClipKernel() const; + const string& FinalAccumLateClipEntryPoint() const; + const string& FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() const; + const string& FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() const; + const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() const; + const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const; + const string& GammaCorrectionEntryPoint(size_t channels, bool transparency) const; + const string& GammaCorrectionKernel(size_t channels, bool transparency) const; + const string& FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale) const; + const string& FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) const; private: string CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency); @@ -56,6 +56,7 @@ private: string CreateFinalAccumKernelString(bool earlyClip, bool alphaCalc, bool alphaAccum); string CreateGammaCorrectionFunctionString(bool globalBucket, bool alphaCalc, bool alphaAccum, bool finalOut); string CreateCalcNewRgbFunctionString(bool globalBucket); + string m_GammaCorrectionWithAlphaCalcKernel; string m_GammaCorrectionWithAlphaCalcEntryPoint; @@ -76,6 +77,7 @@ private: string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;//False, true. string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; + string m_Empty; bool m_DoublePrecision; }; } diff --git a/Source/EmberCL/IterOpenCLKernelCreator.cpp b/Source/EmberCL/IterOpenCLKernelCreator.cpp index d1a8e23..d634152 100644 --- a/Source/EmberCL/IterOpenCLKernelCreator.cpp +++ b/Source/EmberCL/IterOpenCLKernelCreator.cpp @@ -10,11 +10,9 @@ namespace EmberCLns /// Constructor that sets up some basic entry point strings and creates /// the zeroization kernel string since it requires no conditional inputs. /// -/// True if running on an nVidia card, else false. template -IterOpenCLKernelCreator::IterOpenCLKernelCreator(bool nVidia) +IterOpenCLKernelCreator::IterOpenCLKernelCreator() { - m_NVidia = nVidia; m_IterEntryPoint = "IterateKernel"; m_ZeroizeEntryPoint = "ZeroizeKernel"; m_ZeroizeKernel = CreateZeroizeKernelString(); @@ -24,9 +22,9 @@ IterOpenCLKernelCreator::IterOpenCLKernelCreator(bool nVidia) /// Accessors. /// -template string IterOpenCLKernelCreator::ZeroizeKernel() { return m_ZeroizeKernel; } -template string IterOpenCLKernelCreator::ZeroizeEntryPoint() { return m_ZeroizeEntryPoint; } -template string IterOpenCLKernelCreator::IterEntryPoint() { return m_IterEntryPoint; } +template const string& IterOpenCLKernelCreator::ZeroizeKernel() const { return m_ZeroizeKernel; } +template const string& IterOpenCLKernelCreator::ZeroizeEntryPoint() const { return m_ZeroizeEntryPoint; } +template const string& IterOpenCLKernelCreator::IterEntryPoint() const { return m_IterEntryPoint; } /// /// Create the iteration kernel string using the Cuburn method. @@ -221,8 +219,12 @@ string IterOpenCLKernelCreator::CreateIterKernelString(Ember& ember, strin EmberCLStructString << UnionCLStructString << CarToRasCLStructString << - CarToRasFunctionString << - AtomicString(doublePrecision, m_NVidia) << + CarToRasFunctionString; + + if (lockAccum) + os << AtomicString(); + + os << xformFuncs.str() << "__kernel void " << m_IterEntryPoint << "(\n" << " uint iterCount,\n" diff --git a/Source/EmberCL/IterOpenCLKernelCreator.h b/Source/EmberCL/IterOpenCLKernelCreator.h index 4eb1e2a..9054ce1 100644 --- a/Source/EmberCL/IterOpenCLKernelCreator.h +++ b/Source/EmberCL/IterOpenCLKernelCreator.h @@ -23,10 +23,10 @@ template class EMBERCL_API IterOpenCLKernelCreator { public: - IterOpenCLKernelCreator(bool nVidia); - string ZeroizeKernel(); - string ZeroizeEntryPoint(); - string IterEntryPoint(); + IterOpenCLKernelCreator(); + const string& ZeroizeKernel() const; + const string& ZeroizeEntryPoint() const; + const string& IterEntryPoint() const; string CreateIterKernelString(Ember& ember, string& parVarDefines, bool lockAccum = false, bool doAccum = true); static void ParVarIndexDefines(Ember& ember, pair>& params, bool doVals = true, bool doString = true); static bool IsBuildRequired(Ember& ember1, Ember& ember2); @@ -38,7 +38,6 @@ private: string m_IterEntryPoint; string m_ZeroizeKernel; string m_ZeroizeEntryPoint; - bool m_NVidia; }; #ifdef OPEN_CL_TEST_AREA diff --git a/Source/EmberCL/RendererCL.cpp b/Source/EmberCL/RendererCL.cpp index 118051b..c7faf68 100644 --- a/Source/EmberCL/RendererCL.cpp +++ b/Source/EmberCL/RendererCL.cpp @@ -15,7 +15,7 @@ namespace EmberCLns template RendererCL::RendererCL(uint platform, uint device, bool shared, GLuint outputTexID) : - m_IterOpenCLKernelCreator(false), + m_IterOpenCLKernelCreator(), m_DEOpenCLKernelCreator(typeid(T) == typeid(double), false), m_FinalAccumOpenCLKernelCreator(typeid(T) == typeid(double)) { @@ -105,7 +105,7 @@ bool RendererCL::Init(uint platform, uint device, bool shared, GLuin { m_NVidia = ToLower(m_Wrapper.DeviceAndPlatformNames()).find_first_of("nvidia") != string::npos && m_Wrapper.LocalMemSize() > (32 * 1024); m_WarpSize = m_NVidia ? 32 : 64; - m_IterOpenCLKernelCreator = IterOpenCLKernelCreator(m_NVidia); + m_IterOpenCLKernelCreator = IterOpenCLKernelCreator(); m_DEOpenCLKernelCreator = DEOpenCLKernelCreator(m_DoublePrecision, m_NVidia); string zeroizeProgram = m_IterOpenCLKernelCreator.ZeroizeKernel(); @@ -286,7 +286,7 @@ bool RendererCL::WriteRandomPoints() /// /// The string representation of the kernel for the last built iter program. template -string RendererCL::IterKernel() { return m_IterKernel; } +const string& RendererCL::IterKernel() const { return m_IterKernel; } /// @@ -294,14 +294,14 @@ string RendererCL::IterKernel() { return m_IterKernel; } /// /// The string representation of the kernel for the last built density filtering program. template -string RendererCL::DEKernel() { return m_DEOpenCLKernelCreator.GaussianDEKernel(Supersample(), m_DensityFilterCL.m_FilterWidth); } +const string& RendererCL::DEKernel() const { return m_DEOpenCLKernelCreator.GaussianDEKernel(Supersample(), m_DensityFilterCL.m_FilterWidth); } /// /// Get the kernel string for the last built final accumulation program. /// /// The string representation of the kernel for the last built final accumulation program. template -string RendererCL::FinalAccumKernel() { return m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer::NumChannels(), Transparency()); } +const string& RendererCL::FinalAccumKernel() const { return m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer::NumChannels(), Transparency()); } /// /// Virtual functions overridden from RendererCLBase. @@ -567,8 +567,8 @@ bool RendererCL::Alloc() m_XformsCL.resize(m_Ember.TotalXformCount()); bool b = true; - size_t histLength = SuperSize() * sizeof(v4T); - size_t accumLength = SuperSize() * sizeof(v4T); + size_t histLength = SuperSize() * sizeof(v4bT); + size_t accumLength = SuperSize() * sizeof(v4bT); const char* loc = __FUNCTION__; if (b && !(b = m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); } @@ -694,18 +694,37 @@ EmberStats RendererCL::Iterate(size_t iterCount, size_t temporalSamp EmberStats stats;//Do not record bad vals with with GPU. If the user needs to investigate bad vals, use the CPU. const char* loc = __FUNCTION__; - IterOpenCLKernelCreator::ParVarIndexDefines(m_Ember, m_Params, true, false);//Always do this to get the values (but no string), regardless of whether a rebuild is necessary. - - //Don't know the size of the parametric varations parameters buffer until the ember is examined. - //So set it up right before the run. - if (!m_Params.second.empty()) + //Only need to do this once on the beginning of a new render. Last iter will always be 0 at the beginning of a full render or temporal sample. + if (m_LastIter == 0) { - if (!m_Wrapper.AddAndWriteBuffer(m_ParVarsBufferName, m_Params.second.data(), m_Params.second.size() * sizeof(m_Params.second[0]))) + ConvertEmber(m_Ember, m_EmberCL, m_XformsCL); + ConvertCarToRas(*CoordMap()); + + if (b && !(b = m_Wrapper.WriteBuffer(m_EmberBufferName, reinterpret_cast(&m_EmberCL), sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.WriteBuffer(m_XformsBufferName, reinterpret_cast(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) { this->m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast(const_cast(XformDistributions())), XformDistributionsSize()))) { this->m_ErrorReport.push_back(loc); }//Will be resized for xaos. + if (b && !(b = m_Wrapper.WriteBuffer(m_CarToRasBufferName, reinterpret_cast(&m_CarToRasCL), sizeof(m_CarToRasCL)))) { this->m_ErrorReport.push_back(loc); } + + if (b && !(b = m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_DmapCL.m_Entries.size(), 1, 0, m_DmapCL.m_Entries.data()))) { this->m_ErrorReport.push_back(loc); } + + if (b) { - m_Abort = true; - this->m_ErrorReport.push_back(loc); - return stats; + IterOpenCLKernelCreator::ParVarIndexDefines(m_Ember, m_Params, true, false);//Always do this to get the values (but no string), regardless of whether a rebuild is necessary. + + //Don't know the size of the parametric varations parameters buffer until the ember is examined. + //So set it up right before the run. + if (!m_Params.second.empty()) + { + if (!m_Wrapper.AddAndWriteBuffer(m_ParVarsBufferName, m_Params.second.data(), m_Params.second.size() * sizeof(m_Params.second[0]))) + { + m_Abort = true; + this->m_ErrorReport.push_back(loc); + return stats; + } + } } + else + return stats; } //Rebuilding is expensive, so only do it if it's required. @@ -716,7 +735,7 @@ EmberStats RendererCL::Iterate(size_t iterCount, size_t temporalSamp { m_IterTimer.Tic();//Tic() here to avoid including build time in iter time measurement. - if (m_Stats.m_Iters == 0)//Only reset the call count on the beginning of a new render. Do not reset on KEEP_ITERATING. + if (m_LastIter == 0)//Only reset the call count on the beginning of a new render. Do not reset on KEEP_ITERATING. m_Calls = 0; b = RunIter(iterCount, temporalSample, stats.m_Iters); @@ -772,10 +791,8 @@ bool RendererCL::BuildIterProgramForEmber(bool doAccum) /// /// Run the iteration kernel. -/// Fusing on the CPU is done once per sub batch, usually 10,000 iters, however -/// determining when to do it in OpenCL is much more difficult. -/// Currently it's done once every 4 kernel calls which seems to be a good balance -/// between quality of the final image and performance. +/// Fusing on the CPU is done once per sub batch, usually 10,000 iters. Here, +/// the same fusing frequency is kept, but is done per kernel thread. /// /// The number of iterations to run /// The temporal sample this is running for @@ -803,16 +820,6 @@ bool RendererCL::RunIter(size_t iterCount, size_t temporalSample, si if (kernelIndex != -1) { - ConvertEmber(m_Ember, m_EmberCL, m_XformsCL); - m_CarToRasCL = ConvertCarToRas(*CoordMap()); - - if (b && !(b = m_Wrapper.WriteBuffer (m_EmberBufferName, reinterpret_cast(&m_EmberCL), sizeof(m_EmberCL)))) { this->m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.WriteBuffer (m_XformsBufferName, reinterpret_cast(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) { this->m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast(const_cast(XformDistributions())), XformDistributionsSize()))) { this->m_ErrorReport.push_back(loc); }//Will be resized for xaos. - if (b && !(b = m_Wrapper.WriteBuffer (m_CarToRasBufferName, reinterpret_cast(&m_CarToRasCL), sizeof(m_CarToRasCL)))) { this->m_ErrorReport.push_back(loc); } - - if (b && !(b = m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_DmapCL.m_Entries.size(), 1, 0, m_DmapCL.m_Entries.data()))) { this->m_ErrorReport.push_back(loc); } - //If animating, treat each temporal sample as a newly started render for fusing purposes. if (temporalSample > 0) m_Calls = 0; @@ -1255,13 +1262,13 @@ bool RendererCL::RunDensityFilterPrivate(uint kernelIndex, uint grid template int RendererCL::MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth) { - string deEntryPoint = m_DEOpenCLKernelCreator.GaussianDEEntryPoint(ss, filterWidth); + auto& deEntryPoint = m_DEOpenCLKernelCreator.GaussianDEEntryPoint(ss, filterWidth); int kernelIndex = m_Wrapper.FindKernelIndex(deEntryPoint); const char* loc = __FUNCTION__; if (kernelIndex == -1)//Has not been built yet. { - string kernel = m_DEOpenCLKernelCreator.GaussianDEKernel(ss, filterWidth); + auto& kernel = m_DEOpenCLKernelCreator.GaussianDEKernel(ss, filterWidth); bool b = m_Wrapper.AddProgram(deEntryPoint, kernel, deEntryPoint, m_DoublePrecision); if (b) @@ -1288,13 +1295,13 @@ int RendererCL::MakeAndGetDensityFilterProgram(size_t ss, uint filte template int RendererCL::MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale) { - string finalAccumEntryPoint = m_FinalAccumOpenCLKernelCreator.FinalAccumEntryPoint(EarlyClip(), Renderer::NumChannels(), Transparency(), alphaBase, alphaScale); + auto& finalAccumEntryPoint = m_FinalAccumOpenCLKernelCreator.FinalAccumEntryPoint(EarlyClip(), Renderer::NumChannels(), Transparency(), alphaBase, alphaScale); int kernelIndex = m_Wrapper.FindKernelIndex(finalAccumEntryPoint); const char* loc = __FUNCTION__; if (kernelIndex == -1)//Has not been built yet. { - string kernel = m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer::NumChannels(), Transparency()); + auto& kernel = m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer::NumChannels(), Transparency()); bool b = m_Wrapper.AddProgram(finalAccumEntryPoint, kernel, finalAccumEntryPoint, m_DoublePrecision); if (b) @@ -1313,13 +1320,13 @@ int RendererCL::MakeAndGetFinalAccumProgram(double& alphaBase, doubl template int RendererCL::MakeAndGetGammaCorrectionProgram() { - string gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint(Renderer::NumChannels(), Transparency()); + auto& gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint(Renderer::NumChannels(), Transparency()); int kernelIndex = m_Wrapper.FindKernelIndex(gammaEntryPoint); const char* loc = __FUNCTION__; if (kernelIndex == -1)//Has not been built yet. { - string kernel = m_FinalAccumOpenCLKernelCreator.GammaCorrectionKernel(Renderer::NumChannels(), Transparency()); + auto& kernel = m_FinalAccumOpenCLKernelCreator.GammaCorrectionKernel(Renderer::NumChannels(), Transparency()); bool b = m_Wrapper.AddProgram(gammaEntryPoint, kernel, gammaEntryPoint, m_DoublePrecision); if (b) @@ -1454,21 +1461,17 @@ void RendererCL::ConvertEmber(Ember& ember, EmberCL& emberCL, /// The CarToRas object to convert /// The CarToRasCL object template -CarToRasCL RendererCL::ConvertCarToRas(const CarToRas& carToRas) +void RendererCL::ConvertCarToRas(const CarToRas& carToRas) { - CarToRasCL carToRasCL; - - carToRasCL.m_RasWidth = uint(carToRas.RasWidth()); - carToRasCL.m_PixPerImageUnitW = carToRas.PixPerImageUnitW(); - carToRasCL.m_RasLlX = carToRas.RasLlX(); - carToRasCL.m_PixPerImageUnitH = carToRas.PixPerImageUnitH(); - carToRasCL.m_RasLlY = carToRas.RasLlY(); - carToRasCL.m_CarLlX = carToRas.CarLlX(); - carToRasCL.m_CarLlY = carToRas.CarLlY(); - carToRasCL.m_CarUrX = carToRas.CarUrX(); - carToRasCL.m_CarUrY = carToRas.CarUrY(); - - return carToRasCL; + m_CarToRasCL.m_RasWidth = uint(carToRas.RasWidth()); + m_CarToRasCL.m_PixPerImageUnitW = carToRas.PixPerImageUnitW(); + m_CarToRasCL.m_RasLlX = carToRas.RasLlX(); + m_CarToRasCL.m_PixPerImageUnitH = carToRas.PixPerImageUnitH(); + m_CarToRasCL.m_RasLlY = carToRas.RasLlY(); + m_CarToRasCL.m_CarLlX = carToRas.CarLlX(); + m_CarToRasCL.m_CarLlY = carToRas.CarLlY(); + m_CarToRasCL.m_CarUrX = carToRas.CarUrX(); + m_CarToRasCL.m_CarUrY = carToRas.CarUrY(); } /// diff --git a/Source/EmberCL/RendererCL.h b/Source/EmberCL/RendererCL.h index b1b3e6d..bf714ba 100644 --- a/Source/EmberCL/RendererCL.h +++ b/Source/EmberCL/RendererCL.h @@ -124,9 +124,9 @@ public: #ifdef TEST_CL bool WriteRandomPoints(); #endif - string IterKernel(); - string DEKernel(); - string FinalAccumKernel(); + const string& IterKernel() const; + const string& DEKernel() const; + const string& FinalAccumKernel() const; //Virtual functions overridden from RendererCLBase. virtual bool ReadFinal(byte* pixels); @@ -179,7 +179,7 @@ private: void ConvertDensityFilter(); void ConvertSpatialFilter(); void ConvertEmber(Ember& ember, EmberCL& emberCL, vector>& xformsCL); - static CarToRasCL ConvertCarToRas(const CarToRas& carToRas); + void ConvertCarToRas(const CarToRas& carToRas); bool m_Init; bool m_NVidia; diff --git a/Source/EmberTester/EmberTester.cpp b/Source/EmberTester/EmberTester.cpp index 4aee375..21ba90b 100644 --- a/Source/EmberTester/EmberTester.cpp +++ b/Source/EmberTester/EmberTester.cpp @@ -71,7 +71,7 @@ Ember CreateBasicEmber(uint width, uint height, uint ss, T quality, T centerX string GetEmberCLKernelString(Ember& ember, bool iter, bool log, bool de, uint ss, bool accum) { ostringstream os; - IterOpenCLKernelCreator iterCreator(false); + IterOpenCLKernelCreator iterCreator; DEOpenCLKernelCreator deCreator(false, false); FinalAccumOpenCLKernelCreator accumCreator(false); pair> pair; diff --git a/Source/Fractorium/AboutDialog.ui b/Source/Fractorium/AboutDialog.ui index 4a13f20..5384bf2 100644 --- a/Source/Fractorium/AboutDialog.ui +++ b/Source/Fractorium/AboutDialog.ui @@ -7,7 +7,7 @@ 0 0 488 - 567 + 595 @@ -58,7 +58,7 @@ - <html><head/><body><p align="center"><br/><span style=" font-size:12pt;">Fractorium 0.4.1.9 Beta</span></p><p align="center"><span style=" font-size:10pt;"><br/>A Qt-based fractal flame editor which uses a C++ re-write of the flam3 algorithm named Ember and a GPU capable version named EmberCL which implements a portion of the cuburn algorithm in OpenCL.</span></p><p align="center"><span style=" font-size:10pt;">Matt Feemster</span></p></body></html> + <html><head/><body><p align="center"><br/>Fractorium 0.4.1.9 Beta</p><p align="center"><span style=" font-size:10pt;"><br/>A Qt-based fractal flame editor which uses a C++ re-write of the flam3 algorithm named Ember and a GPU capable version named EmberCL which implements a portion of the cuburn algorithm in OpenCL.</span></p><p align="center"><span style=" font-size:10pt;">Lead: Matt Feemster</span></p><p align="center"><span style=" font-size:10pt;">Contributors: Simon Detheridge</span></p></body></html> Qt::RichText