diff --git a/Source/Ember/Ember.cpp b/Source/Ember/Ember.cpp index 89009c6..2bd55eb 100644 --- a/Source/Ember/Ember.cpp +++ b/Source/Ember/Ember.cpp @@ -412,6 +412,6 @@ EXPORT_TWO_TYPE_EMBER(float, float) #ifdef DO_DOUBLE EXPORT_SINGLE_TYPE_EMBER(double) - EXPORT_TWO_TYPE_EMBER(double, double) + EXPORT_TWO_TYPE_EMBER(double, float) #endif } diff --git a/Source/Ember/Ember.h b/Source/Ember/Ember.h index af394ae..ccaacaf 100644 --- a/Source/Ember/Ember.h +++ b/Source/Ember/Ember.h @@ -121,7 +121,6 @@ public: m_CenterY = T(ember.m_CenterY); m_RotCenterY = T(ember.m_RotCenterY); m_Rotate = T(ember.m_Rotate); - m_Hue = T(ember.m_Hue); m_Brightness = T(ember.m_Brightness); m_Gamma = T(ember.m_Gamma); m_Vibrancy = T(ember.m_Vibrancy); @@ -219,7 +218,6 @@ public: m_CenterY = 0; m_RotCenterY = 0; m_Rotate = 0; - m_Hue = 0; m_Brightness = 4; m_Gamma = 4; m_Vibrancy = 1; @@ -793,7 +791,6 @@ public: InterpT<&Ember::m_CenterY>(embers, coefs, size); InterpT<&Ember::m_RotCenterY>(embers, coefs, size); InterpT<&Ember::m_Rotate>(embers, coefs, size); - InterpT<&Ember::m_Hue>(embers, coefs, size); InterpT<&Ember::m_Brightness>(embers, coefs, size); InterpT<&Ember::m_Gamma>(embers, coefs, size); InterpT<&Ember::m_Vibrancy>(embers, coefs, size); @@ -1335,9 +1332,6 @@ public: case FLAME_MOTION_ROTATE: APP_FMP(m_Rotate); break; - case FLAME_MOTION_HUE: - APP_FMP(m_Hue); - break; case FLAME_MOTION_BRIGHTNESS: APP_FMP(m_Brightness); break; @@ -1381,7 +1375,6 @@ public: m_Vibrancy = 1; m_Brightness = 4; m_Symmetry = 0; - m_Hue = 0; m_Rotate = 0; m_PixelsPerUnit = 50; m_Interp = EMBER_INTERP_LINEAR; @@ -1502,7 +1495,6 @@ public: << "CenterY: " << m_CenterY << endl << "RotCenterY: " << m_RotCenterY << endl << "Rotate: " << m_Rotate << endl - << "Hue: " << m_Hue << endl << "Brightness: " << m_Brightness << endl << "Gamma: " << m_Gamma << endl << "Vibrancy: " << m_Vibrancy << endl @@ -1646,11 +1638,6 @@ public: //Xml field: "rotate". T m_Rotate; - //When specifying the palette as an index in the palette file, rather than inserted in the Xml, it can optionally have its hue - //rotated by this amount. - //Xml field: "hue". - T m_Hue; - //Determine how bright to make the image during final accumulation. //Xml field: "brightness". T m_Brightness; diff --git a/Source/Ember/EmberDefines.h b/Source/Ember/EmberDefines.h index 43107eb..d11f8cf 100644 --- a/Source/Ember/EmberDefines.h +++ b/Source/Ember/EmberDefines.h @@ -96,6 +96,7 @@ static inline size_t NowMs() #define v2T glm::tvec2 #define v3T glm::tvec3 #define v4T glm::tvec4 + #define v4bT glm::tvec4 #define m2T glm::tmat2x2 #define m3T glm::tmat3x3 #define m4T glm::tmat4x4 @@ -104,6 +105,7 @@ static inline size_t NowMs() #define v2T glm::detail::tvec2 #define v3T glm::detail::tvec3 #define v4T glm::detail::tvec4 + #define v4bT glm::detail::tvec4 #define m2T glm::detail::tmat2x2 #define m3T glm::detail::tmat3x3 #define m4T glm::detail::tmat4x4 @@ -132,14 +134,13 @@ enum eEmberMotionParam : uint FLAME_MOTION_CENTER_X = 7, FLAME_MOTION_CENTER_Y = 8, FLAME_MOTION_ROTATE = 9, - FLAME_MOTION_HUE = 10, - FLAME_MOTION_BRIGHTNESS = 11, - FLAME_MOTION_GAMMA = 12, - FLAME_MOTION_GAMMA_THRESH = 13, - FLAME_MOTION_HIGHLIGHT_POWER = 14, - FLAME_MOTION_BACKGROUND_R = 15, - FLAME_MOTION_BACKGROUND_G = 16, - FLAME_MOTION_BACKGROUND_B = 17, - FLAME_MOTION_VIBRANCY = 18 + FLAME_MOTION_BRIGHTNESS = 10, + FLAME_MOTION_GAMMA = 11, + FLAME_MOTION_GAMMA_THRESH = 12, + FLAME_MOTION_HIGHLIGHT_POWER = 13, + FLAME_MOTION_BACKGROUND_R = 14, + FLAME_MOTION_BACKGROUND_G = 15, + FLAME_MOTION_BACKGROUND_B = 16, + FLAME_MOTION_VIBRANCY = 17 }; } diff --git a/Source/Ember/EmberToXml.h b/Source/Ember/EmberToXml.h index 4080b74..37a0301 100644 --- a/Source/Ember/EmberToXml.h +++ b/Source/Ember/EmberToXml.h @@ -792,9 +792,6 @@ private: case FLAME_MOTION_ROTATE: os << " rotate=\"" << motion.m_MotionParams[i].second << "\""; break; - case FLAME_MOTION_HUE: - os << " hue=\"" << motion.m_MotionParams[i].second << "\""; - break; case FLAME_MOTION_BRIGHTNESS: os << " brightness=\"" << motion.m_MotionParams[i].second << "\""; break; diff --git a/Source/Ember/Isaac.h b/Source/Ember/Isaac.h index 887e5bd..2e4de38 100644 --- a/Source/Ember/Isaac.h +++ b/Source/Ember/Isaac.h @@ -276,7 +276,7 @@ public: } Isaac(ctx); //Fill in the first set of results. - ctx->randcnt = N;//TODO//0;//Prepare to use the first set of results. + ctx->randcnt = N;//0;//Prepare to use the first set of results. } /// diff --git a/Source/Ember/Renderer.cpp b/Source/Ember/Renderer.cpp index 4b1a12e..294f63f 100644 --- a/Source/Ember/Renderer.cpp +++ b/Source/Ember/Renderer.cpp @@ -212,7 +212,7 @@ bool Renderer::CreateDEFilter(bool& newAlloc) (m_Ember.m_CurveDE != m_DensityFilter->Curve()) || (m_Ember.m_Supersample != m_DensityFilter->Supersample())) { - m_DensityFilter = unique_ptr>(new DensityFilter(m_Ember.m_MinRadDE, m_Ember.m_MaxRadDE, m_Ember.m_CurveDE, m_Ember.m_Supersample)); + m_DensityFilter = unique_ptr>(new DensityFilter(bucketT(m_Ember.m_MinRadDE), bucketT(m_Ember.m_MaxRadDE), bucketT(m_Ember.m_CurveDE), m_Ember.m_Supersample)); newAlloc = true; } @@ -251,8 +251,8 @@ bool Renderer::CreateSpatialFilter(bool& newAlloc) (m_Ember.m_Supersample != m_SpatialFilter->Supersample()) || (m_PixelAspectRatio != m_SpatialFilter->PixelAspectRatio())) { - m_SpatialFilter = unique_ptr>( - SpatialFilterCreator::Create(m_Ember.m_SpatialFilterType, m_Ember.m_SpatialFilterRadius, m_Ember.m_Supersample, m_PixelAspectRatio)); + m_SpatialFilter = unique_ptr>( + SpatialFilterCreator::Create(m_Ember.m_SpatialFilterType, bucketT(m_Ember.m_SpatialFilterRadius), m_Ember.m_Supersample, bucketT(m_PixelAspectRatio))); m_Ember.m_SpatialFilterRadius = m_SpatialFilter->FilterRadius();//It may have been changed internally if it was too small, so ensure they're synced. newAlloc = true; @@ -386,8 +386,8 @@ eRenderStatus Renderer::Run(vector& finalImage, double time, s if ((filterAndAccumOnly || accumOnly) && TemporalSamples() == 1)//Disallow jumping when temporal samples > 1. { m_Ember = m_Embers[0]; - m_Vibrancy = m_Ember.m_Vibrancy; - m_Gamma = m_Ember.m_Gamma; + m_Vibrancy = Vibrancy(); + m_Gamma = Gamma(); m_Background = m_Ember.m_Background; if (filterAndAccumOnly) @@ -517,11 +517,11 @@ eRenderStatus Renderer::Run(vector& finalImage, double time, s //Allow for incremental rendering by only taking action if the iter loop for this temporal sample is completely done. if (m_LastIter >= itersPerTemporalSample) { - m_Vibrancy += m_Ember.m_Vibrancy; - m_Gamma += m_Ember.m_Gamma; - m_Background.r += m_Ember.m_Background.r; - m_Background.g += m_Ember.m_Background.g; - m_Background.b += m_Ember.m_Background.b; + m_Vibrancy += Vibrancy(); + m_Gamma += Gamma(); + m_Background.r += bucketT(m_Ember.m_Background.r); + m_Background.g += bucketT(m_Ember.m_Background.g); + m_Background.b += bucketT(m_Ember.m_Background.b); m_VibGamCount++; m_LastIter = 0; temporalSample++; @@ -554,7 +554,7 @@ FilterAndAccum: eRenderStatus fullRun = RENDER_OK;//Whether density filtering was run to completion without aborting prematurely or triggering an error. T area = FinalRasW() * FinalRasH() / (m_PixelsPerUnitX * m_PixelsPerUnitY);//Need to use temps from field if ever implemented. - m_K1 = (Brightness() * T(268.0)) / 256; + m_K1 = bucketT((Brightness() * 268) / 256); //When doing an interactive render, force output early on in the render process, before all iterations are done. //This presents a problem with the normal calculation of K2 since it relies on the quality value; it will scale the colors @@ -562,10 +562,10 @@ FilterAndAccum: if (forceOutput) { T quality = (T(m_Stats.m_Iters) / T(FinalDimensions())) * (m_Scale * m_Scale); - m_K2 = (Supersample() * Supersample()) / (area * quality * m_TemporalFilter->SumFilt()); + m_K2 = bucketT((Supersample() * Supersample()) / (area * quality * m_TemporalFilter->SumFilt())); } else - m_K2 = (Supersample() * Supersample()) / (area * m_ScaledQuality * m_TemporalFilter->SumFilt()); + m_K2 = bucketT((Supersample() * Supersample()) / (area * m_ScaledQuality * m_TemporalFilter->SumFilt())); ResetBuckets(false, true);//Only the histogram was reset above, now reset the density filtering buffer. //t.Tic(); @@ -824,11 +824,11 @@ eRenderStatus Renderer::LogScaleDensityFilter() //Check for visibility first before doing anything else to avoid all possible unnecessary calculations. if (m_HistBuckets[index].a != 0) { - T logScale = (m_K1 * log(1 + m_HistBuckets[index].a * m_K2)) / m_HistBuckets[index].a; + bucketT logScale = (m_K1 * log(1 + m_HistBuckets[index].a * m_K2)) / m_HistBuckets[index].a; //Original did a temporary assignment, then *= logScale, then passed the result to bump_no_overflow(). //Combine here into one operation for a slight speedup. - m_AccumulatorBuckets[index] = m_HistBuckets[index] * bucketT(logScale); + m_AccumulatorBuckets[index] = m_HistBuckets[index] * logScale; } } }); @@ -850,7 +850,7 @@ eRenderStatus Renderer::GaussianDensityFilter() Timing totalTime, localTime; bool scf = !(Supersample() & 1); intmax_t ss = Floor(Supersample() / T(2)); - T scfact = pow(Supersample() / (Supersample() + T(1.0)), T(2.0)); + T scfact = pow(Supersample() / (Supersample() + T(1)), T(2)); size_t threads = m_ThreadsToUse; size_t startRow = Supersample() - 1; @@ -874,8 +874,8 @@ eRenderStatus Renderer::GaussianDensityFilter() size_t bucketRowStart = j * m_SuperRasW;//Pull out of inner loop for optimization. const tvec4* bucket; const tvec4* buckets = m_HistBuckets.data(); - const T* filterCoefs = m_DensityFilter->Coefs(); - const T* filterWidths = m_DensityFilter->Widths(); + const bucketT* filterCoefs = m_DensityFilter->Coefs(); + const bucketT* filterWidths = m_DensityFilter->Widths(); for (intmax_t i = startCol; i < endCol; i++) { @@ -888,7 +888,7 @@ eRenderStatus Renderer::GaussianDensityFilter() if (bucket->a == 0) continue; - T cacheLog = (m_K1 * log(T(1.0) + bucket->a * m_K2)) / bucket->a;//Caching this calculation gives a 30% speedup. + bucketT cacheLog = (m_K1 * log(1 + bucket->a * m_K2)) / bucket->a;//Caching this calculation gives a 30% speedup. if (ss == 0) { @@ -938,10 +938,10 @@ eRenderStatus Renderer::GaussianDensityFilter() if (filterCoefs[filterCoefIndex] == 0) continue; - T logScale = filterCoefs[filterCoefIndex] * cacheLog; + bucketT logScale = filterCoefs[filterCoefIndex] * cacheLog; //Original first assigned the fields, then scaled them. Combine into a single step for a 1% optimization. - logScaleBucket = (*bucket * bucketT(logScale)); + logScaleBucket = (*bucket * logScale); if (jj == 0 && ii == 0) { @@ -1036,8 +1036,8 @@ eRenderStatus Renderer::AccumulatorToFinalImage(byte* pixels, size_t EnterFinalAccum(); //Timing t(4); size_t filterWidth = m_SpatialFilter->FinalFilterWidth(); - T g, linRange, vibrancy; - Color background; + bucketT g, linRange, vibrancy; + Color background; pixels += finalOffset; PrepFinalAccumVals(background, g, linRange, vibrancy); @@ -1090,7 +1090,7 @@ eRenderStatus Renderer::AccumulatorToFinalImage(byte* pixels, size_t for (ii = 0; ii < filterWidth; ii++) { //Need to dereference the spatial filter pointer object to use the [] operator. Makes no speed difference. - bucketT k = bucketT((*m_SpatialFilter)[ii + filterKRowIndex]); + bucketT k = ((*m_SpatialFilter)[ii + filterKRowIndex]); newBucket += (m_AccumulatorBuckets[(x + ii) + accumRowIndex] * k); } @@ -1340,12 +1340,12 @@ void Renderer::PixelAspectRatio(T pixelAspectRatio) template T Renderer::Scale() const { return m_Scale; } template T Renderer::PixelsPerUnitX() const { return m_PixelsPerUnitX; } template T Renderer::PixelsPerUnitY() const { return m_PixelsPerUnitY; } -template T Renderer::K1() const { return m_K1; } -template T Renderer::K2() const { return m_K2; } +template bucketT Renderer::K1() const { return m_K1; } +template bucketT Renderer::K2() const { return m_K2; } template const CarToRas* Renderer::CoordMap() const { return &m_CarToRas; } template tvec4* Renderer::HistBuckets() { return m_HistBuckets.data(); } template tvec4* Renderer::AccumulatorBuckets() { return m_AccumulatorBuckets.data(); } -template SpatialFilter* Renderer::GetSpatialFilter() { return m_SpatialFilter.get(); } +template SpatialFilter* Renderer::GetSpatialFilter() { return m_SpatialFilter.get(); } template TemporalFilter* Renderer::GetTemporalFilter() { return m_TemporalFilter.get(); } /// @@ -1374,12 +1374,11 @@ template T Renderer:: template T Renderer::CenterX() const { return m_Ember.m_CenterX; } template T Renderer::CenterY() const { return m_Ember.m_CenterY; } template T Renderer::Rotate() const { return m_Ember.m_Rotate; } -template T Renderer::Hue() const { return m_Ember.m_Hue; } -template T Renderer::Brightness() const { return m_Ember.m_Brightness; } -template T Renderer::Gamma() const { return m_Ember.m_Gamma; } -template T Renderer::Vibrancy() const { return m_Ember.m_Vibrancy; } -template T Renderer::GammaThresh() const { return m_Ember.m_GammaThresh; } -template T Renderer::HighlightPower() const { return m_Ember.m_HighlightPower; } +template bucketT Renderer::Brightness() const { return bucketT(m_Ember.m_Brightness); } +template bucketT Renderer::Gamma() const { return bucketT(m_Ember.m_Gamma); } +template bucketT Renderer::Vibrancy() const { return bucketT(m_Ember.m_Vibrancy); } +template bucketT Renderer::GammaThresh() const { return bucketT(m_Ember.m_GammaThresh); } +template bucketT Renderer::HighlightPower() const { return bucketT(m_Ember.m_HighlightPower); } template Color Renderer::Background() const { return m_Ember.m_Background; } template const Xform* Renderer::Xforms() const { return m_Ember.Xforms(); } template Xform* Renderer::NonConstXforms() { return m_Ember.NonConstXforms(); } @@ -1420,20 +1419,20 @@ template Point* Renderer::Sample /// The computed linear range /// The computed vibrancy template -void Renderer::PrepFinalAccumVals(Color& background, T& g, T& linRange, T& vibrancy) +void Renderer::PrepFinalAccumVals(Color& background, bucketT& g, bucketT& linRange, bucketT& vibrancy) { //If they are doing incremental rendering, they can get here without doing a full temporal //sample, which means the values will be zero. - vibrancy = m_Vibrancy == 0 ? m_Ember.m_Vibrancy : m_Vibrancy; + vibrancy = m_Vibrancy == 0 ? Vibrancy() : m_Vibrancy; size_t vibGamCount = m_VibGamCount == 0 ? 1 : m_VibGamCount; - T gamma = m_Gamma == 0 ? m_Ember.m_Gamma : m_Gamma; - g = T(1.0) / ClampGte(gamma / vibGamCount, T(0.01));//Ensure a divide by zero doesn't occur. + bucketT gamma = m_Gamma == 0 ? Gamma() : m_Gamma; + g = 1 / ClampGte(gamma / vibGamCount, bucketT(0.01));//Ensure a divide by zero doesn't occur. linRange = GammaThresh(); vibrancy /= vibGamCount; - background.x = (IsNearZero(m_Background.r) ? m_Ember.m_Background.r : m_Background.r) / (vibGamCount / T(256.0));//Background is [0, 1]. - background.y = (IsNearZero(m_Background.g) ? m_Ember.m_Background.g : m_Background.g) / (vibGamCount / T(256.0)); - background.z = (IsNearZero(m_Background.b) ? m_Ember.m_Background.b : m_Background.b) / (vibGamCount / T(256.0)); + background.x = (IsNearZero(m_Background.r) ? bucketT(m_Ember.m_Background.r) : m_Background.r) / (vibGamCount / bucketT(256.0));//Background is [0, 1]. + background.y = (IsNearZero(m_Background.g) ? bucketT(m_Ember.m_Background.g) : m_Background.g) / (vibGamCount / bucketT(256.0)); + background.z = (IsNearZero(m_Background.b) ? bucketT(m_Ember.m_Background.b) : m_Background.b) / (vibGamCount / bucketT(256.0)); } /// @@ -1570,7 +1569,7 @@ void Renderer::AddToAccum(const tvec4& bucke /// Because this code is used in both early and late clipping, a few extra arguments are passed /// to specify what actions to take. Coupled with an additional template argument, this allows /// using one function to perform all color clipping, gamma correction and final accumulation. -/// Template argument accumT is expected to match T for the case of early clipping, byte for late clip for +/// Template argument accumT is expected to match bucketT for the case of early clipping, byte for late clip for /// images with one byte per channel and unsigned short for images with two bytes per channel. /// /// The pixel to correct @@ -1583,11 +1582,10 @@ void Renderer::AddToAccum(const tvec4& bucke /// The storage space for the corrected values to be written to template template -void Renderer::GammaCorrection(tvec4& bucket, Color& background, T g, T linRange, T vibrancy, bool doAlpha, bool scale, accumT* correctedChannels) +void Renderer::GammaCorrection(tvec4& bucket, Color& background, bucketT g, bucketT linRange, bucketT vibrancy, bool doAlpha, bool scale, accumT* correctedChannels) { - T alpha, ls, a; - bucketT newRgb[3];//Would normally use a Color, but don't want to call a needless constructor every time this function is called, which is once per pixel. - static T scaleVal = (numeric_limits::max() + 1) / T(256.0); + bucketT alpha, ls, a, newRgb[3];//Would normally use a Color, but don't want to call a needless constructor every time this function is called, which is once per pixel. + static bucketT scaleVal = (numeric_limits::max() + 1) / bucketT(256.0); if (bucket.a <= 0) { @@ -1596,20 +1594,20 @@ void Renderer::GammaCorrection(tvec4& bucket } else { - alpha = Palette::CalcAlpha(bucket.a, g, linRange); - ls = vibrancy * T(255) * alpha / bucket.a; - ClampRef(alpha, 0, 1); + alpha = Palette::CalcAlpha(bucket.a, g, linRange); + ls = vibrancy * 255 * alpha / bucket.a; + ClampRef(alpha, 0, 1); } - Palette::template CalcNewRgb(&bucket[0], ls, HighlightPower(), newRgb); + Palette::template CalcNewRgb(&bucket[0], ls, HighlightPower(), newRgb); for (glm::length_t rgbi = 0; rgbi < 3; rgbi++) { - a = newRgb[rgbi] + ((T(1.0) - vibrancy) * T(255) * pow(T(bucket[rgbi]), g)); + a = newRgb[rgbi] + ((1 - vibrancy) * 255 * pow(bucket[rgbi], g)); if (NumChannels() <= 3 || !Transparency()) { - a += ((T(1.0) - alpha) * background[rgbi]); + a += (1 - alpha) * background[rgbi]; } else { @@ -1621,14 +1619,14 @@ void Renderer::GammaCorrection(tvec4& bucket if (!scale) { - correctedChannels[rgbi] = accumT(Clamp(a, 0, 255));//Early clip, just assign directly. + correctedChannels[rgbi] = accumT(Clamp(a, 0, 255));//Early clip, just assign directly. } else { if (m_CurvesSet) CurveAdjust(a, rgbi + 1); - correctedChannels[rgbi] = accumT(Clamp(a, 0, 255) * scaleVal);//Final accum, multiply by 1 for 8 bpc, or 256 for 16 bpc. + correctedChannels[rgbi] = accumT(Clamp(a, 0, 255) * scaleVal);//Final accum, multiply by 1 for 8 bpc, or 256 for 16 bpc. } } @@ -1644,10 +1642,10 @@ void Renderer::GammaCorrection(tvec4& bucket } template -void Renderer::CurveAdjust(T& a, const glm::length_t& index) +void Renderer::CurveAdjust(bucketT& a, const glm::length_t& index) { - size_t tempIndex = size_t(Clamp(a, 0, COLORMAP_LENGTH_MINUS_1)); - size_t tempIndex2 = size_t(Clamp(m_Csa[tempIndex].x, 0, COLORMAP_LENGTH_MINUS_1)); + size_t tempIndex = size_t(Clamp(a, 0, COLORMAP_LENGTH_MINUS_1)); + size_t tempIndex2 = size_t(Clamp(m_Csa[tempIndex].x, 0, COLORMAP_LENGTH_MINUS_1)); a = std::round(m_Csa[tempIndex2][index]); } @@ -1658,6 +1656,6 @@ void Renderer::CurveAdjust(T& a, const glm::length_t& index) template EMBER_API class Renderer; #ifdef DO_DOUBLE - template EMBER_API class Renderer; + template EMBER_API class Renderer; #endif } diff --git a/Source/Ember/Renderer.h b/Source/Ember/Renderer.h index 61c04f0..c0ded37 100644 --- a/Source/Ember/Renderer.h +++ b/Source/Ember/Renderer.h @@ -87,12 +87,12 @@ public: inline T Scale() const; inline T PixelsPerUnitX() const; inline T PixelsPerUnitY() const; - inline T K1() const; - inline T K2() const; + inline bucketT K1() const; + inline bucketT K2() const; inline const CarToRas* CoordMap() const; inline tvec4* HistBuckets(); inline tvec4* AccumulatorBuckets(); - inline SpatialFilter* GetSpatialFilter(); + inline SpatialFilter* GetSpatialFilter(); inline TemporalFilter* GetTemporalFilter(); //Virtual renderer properties overridden from RendererBase, getters only. @@ -116,12 +116,12 @@ public: inline T CenterY() const; inline T Rotate() const; inline T Hue() const; - inline T Brightness() const; - inline T Contrast() const; - inline T Gamma() const; - inline T Vibrancy() const; - inline T GammaThresh() const; - inline T HighlightPower() const; + inline bucketT Brightness() const; + inline bucketT Contrast() const; + inline bucketT Gamma() const; + inline bucketT Vibrancy() const; + inline bucketT GammaThresh() const; + inline bucketT HighlightPower() const; inline Color Background() const; inline const Xform* Xforms() const; inline Xform* NonConstXforms(); @@ -146,14 +146,14 @@ public: protected: //Non-virtual functions that might be needed by a derived class. - void PrepFinalAccumVals(Color& background, T& g, T& linRange, T& vibrancy); + void PrepFinalAccumVals(Color& background, bucketT& g, bucketT& linRange, bucketT& vibrancy); private: //Miscellaneous non-virtual functions used only in this class. void Accumulate(QTIsaac& rand, Point* samples, size_t sampleCount, const Palette* palette); /*inline*/ void AddToAccum(const tvec4& bucket, intmax_t i, intmax_t ii, intmax_t j, intmax_t jj); - template void GammaCorrection(tvec4& bucket, Color& background, T g, T linRange, T vibrancy, bool doAlpha, bool scale, accumT* correctedChannels); - void CurveAdjust(T& a, const glm::length_t& index); + template void GammaCorrection(tvec4& bucket, Color& background, bucketT g, bucketT linRange, bucketT vibrancy, bool doAlpha, bool scale, accumT* correctedChannels); + void CurveAdjust(bucketT& a, const glm::length_t& index); protected: T m_Scale; @@ -164,12 +164,12 @@ protected: T m_LowerLeftY; T m_UpperRightX; T m_UpperRightY; - T m_K1; - T m_K2; - T m_Vibrancy;//Accumulate these after each temporal sample. - T m_Gamma; + bucketT m_K1; + bucketT m_K2; + bucketT m_Vibrancy;//Accumulate these after each temporal sample. + bucketT m_Gamma; T m_ScaledQuality; - Color m_Background; + Color m_Background;//This is a scaled copy of the m_Background member of m_Ember, but with a type of bucketT. Affine2D m_RotMat; Ember m_Ember; Ember m_TempEmber; @@ -182,9 +182,9 @@ protected: Palette m_Dmap, m_Csa; vector> m_HistBuckets; vector> m_AccumulatorBuckets; - unique_ptr> m_SpatialFilter; + unique_ptr> m_SpatialFilter; unique_ptr> m_TemporalFilter; - unique_ptr> m_DensityFilter; + unique_ptr> m_DensityFilter; vector>> m_Samples; EmberToXml m_EmberToXml; }; @@ -192,9 +192,4 @@ protected: //This class had to be implemented in a cpp file because the compiler was breaking. //So the explicit instantiation must be declared here rather than in Ember.cpp where //all of the other classes are done. -//template EMBER_API class Renderer; - -//#ifdef DO_DOUBLE -// template EMBER_API class Renderer; -//#endif } diff --git a/Source/Ember/SheepTools.h b/Source/Ember/SheepTools.h index 07c3d2f..d9472eb 100644 --- a/Source/Ember/SheepTools.h +++ b/Source/Ember/SheepTools.h @@ -384,12 +384,8 @@ public: } else//Randomize palette only. { - Palette palette; - if (m_PaletteList.Size()) - palette = *m_PaletteList.GetRandomPalette(); - - palette.MakeHueAdjustedPalette(ember.m_Palette, ember.m_Hue); + ember.m_Palette = *m_PaletteList.GetRandomPalette(); //If the palette retrieval fails, skip the mutation. if (ember.m_Palette.m_Index >= 0) @@ -398,8 +394,7 @@ public: } else { - palette.Clear(false); - ember.m_Palette = palette; + ember.m_Palette.Clear(false); cout << "Failure getting random palette, palette set to white\n"; } } @@ -638,12 +633,10 @@ public: }; ember.Clear(); - ember.m_Hue = (m_Rand.Rand() & 7) ? 0 : m_Rand.Frand01(); if (m_PaletteList.Size()) - palette = *m_PaletteList.GetRandomPalette(); + ember.m_Palette = *m_PaletteList.GetRandomPalette(); - palette.MakeHueAdjustedPalette(ember.m_Palette, ember.m_Hue); ember.m_Time = 0; ember.m_Interp = EMBER_INTERP_LINEAR; ember.m_PaletteInterp = INTERP_HSV; @@ -922,16 +915,9 @@ public: if (changePalette) { - Palette* palette = nullptr; - - ember.m_Hue = 0.0; - if (m_PaletteList.Size()) - palette = m_PaletteList.GetRandomPalette(); - - if (palette) { - palette->MakeHueAdjustedPalette(ember.m_Palette, ember.m_Hue); + ember.m_Palette = *m_PaletteList.GetRandomPalette(); } else { diff --git a/Source/Ember/XmlToEmber.h b/Source/Ember/XmlToEmber.h index 9612876..87afb53 100644 --- a/Source/Ember/XmlToEmber.h +++ b/Source/Ember/XmlToEmber.h @@ -522,10 +522,10 @@ private: if (currentEmber.PaletteIndex() != -1) { - if (!m_PaletteList.GetHueAdjustedPalette(PaletteList::m_DefaultFilename, currentEmber.PaletteIndex(), currentEmber.m_Hue, currentEmber.m_Palette)) - { + if (auto pal = m_PaletteList.GetPalette(PaletteList::m_DefaultFilename, currentEmber.PaletteIndex())) + currentEmber.m_Palette = *pal; + else m_ErrorReport.push_back(string(loc) + " : Error assigning palette with index " + Itos(currentEmber.PaletteIndex())); - } } //if (!Interpolater::InterpMissingColors(currentEmber.m_Palette.m_Entries)) @@ -712,11 +712,6 @@ private: currentEmber.m_Background[1] = T(vals[1]); currentEmber.m_Background[2] = T(vals[2]); } - else if (!Compare(curAtt->name, "hue")) - { - Atof(attStr, currentEmber.m_Hue); - currentEmber.m_Hue = fmod(currentEmber.m_Hue, T(0.5));//Orig did fmod 1, but want it in the range -0.5 - 0.5. - } else if (!Compare(curAtt->name, "curves")) { stringstream ss(attStr); @@ -1055,8 +1050,6 @@ private: ret = ret && AttToEmberMotionFloat(att, attStr, FLAME_MOTION_DEPTH_BLUR, motion); else if (!Compare(curAtt->name, "rotate")) ret = ret && AttToEmberMotionFloat(att, attStr, FLAME_MOTION_ROTATE, motion); - else if (!Compare(curAtt->name, "hue")) - ret = ret && AttToEmberMotionFloat(att, attStr, FLAME_MOTION_HUE, motion); else if (!Compare(curAtt->name, "brightness")) ret = ret && AttToEmberMotionFloat(att, attStr, FLAME_MOTION_BRIGHTNESS, motion); else if (!Compare(curAtt->name, "gamma")) diff --git a/Source/EmberAnimate/EmberAnimate.cpp b/Source/EmberAnimate/EmberAnimate.cpp index b79ef83..e36ed1b 100644 --- a/Source/EmberAnimate/EmberAnimate.cpp +++ b/Source/EmberAnimate/EmberAnimate.cpp @@ -387,7 +387,7 @@ int _tmain(int argc, _TCHAR* argv[]) #ifdef DO_DOUBLE if (opt.Bits() == 64) { - b = EmberAnimate(opt); + b = EmberAnimate(opt); } else #endif diff --git a/Source/EmberCL/DEOpenCLKernelCreator.cpp b/Source/EmberCL/DEOpenCLKernelCreator.cpp index f940caf..6dc5d9d 100644 --- a/Source/EmberCL/DEOpenCLKernelCreator.cpp +++ b/Source/EmberCL/DEOpenCLKernelCreator.cpp @@ -4,58 +4,22 @@ namespace EmberCLns { /// -/// Empty constructor that does nothing. The user must call the one which takes a bool -/// argument before using this class. -/// This constructor only exists so the class can be a member of a class. -/// -template -DEOpenCLKernelCreator::DEOpenCLKernelCreator() -{ -} - -/// -/// Constructor for float template type that sets all kernel entry points as well as composes -/// all kernel source strings. -/// No program compilation is done here, the user must explicitly do it. -/// The caller must specify whether they are using an nVidia or AMD card because it changes -/// the amount of local memory available. -/// -/// True if running on an nVidia card, else false. -template <> -DEOpenCLKernelCreator::DEOpenCLKernelCreator(bool nVidia) -{ - m_NVidia = nVidia; - m_LogScaleAssignDEEntryPoint = "LogScaleAssignDensityFilterKernel"; - m_GaussianDEWithoutSsEntryPoint = "GaussianDEWithoutSsKernel"; - m_GaussianDESsWithScfEntryPoint = "GaussianDESsWithScfKernel"; - m_GaussianDESsWithoutScfEntryPoint = "GaussianDESsWithoutScfKernel"; - m_GaussianDEWithoutSsNoCacheEntryPoint = "GaussianDEWithoutSsNoCacheKernel"; - m_GaussianDESsWithScfNoCacheEntryPoint = "GaussianDESsWithScfNoCacheKernel"; - m_GaussianDESsWithoutScfNoCacheEntryPoint = "GaussianDESsWithoutScfNoCacheKernel"; - m_LogScaleAssignDEKernel = CreateLogScaleAssignDEKernelString(); - m_GaussianDEWithoutSsKernel = CreateGaussianDEKernel(1); - m_GaussianDESsWithScfKernel = CreateGaussianDEKernel(2); - m_GaussianDESsWithoutScfKernel = CreateGaussianDEKernel(3); - m_GaussianDEWithoutSsNoCacheKernel = CreateGaussianDEKernelNoLocalCache(1); - m_GaussianDESsWithScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(2); - m_GaussianDESsWithoutScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(3); -} - -/// -/// Constructor for double template type that sets all kernel entry points as well as composes +/// Constructor that sets all kernel entry points as well as composes /// all kernel source strings. /// Note that no versions of kernels that use the cache are compiled because /// the cache is not big enough to hold double4. /// No program compilation is done here, the user must explicitly do it. -/// Specifying true or false for the bool parameter has no effect since no local memory -/// is used when instantiated with type double. +/// The caller must specify whether they are using an nVidia or AMD card because it changes +/// the amount of local memory available. /// -/// True if running on an nVidia card, else false. Ignored. -template <> -DEOpenCLKernelCreator::DEOpenCLKernelCreator(bool nVidia) +/// True if double precision, else false for float. +/// True if running on an nVidia card, else false. +DEOpenCLKernelCreator::DEOpenCLKernelCreator(bool doublePrecision, bool nVidia) { -#ifdef ROW_ONLY_DE + m_DoublePrecision = doublePrecision; m_NVidia = nVidia; + +#ifdef ROW_ONLY_DE m_LogScaleAssignDEEntryPoint = "LogScaleAssignDensityFilterKernel"; m_GaussianDEWithoutSsEntryPoint = "GaussianDEWithoutSsKernel"; m_GaussianDESsWithScfEntryPoint = "GaussianDESsWithScfKernel"; @@ -71,24 +35,29 @@ DEOpenCLKernelCreator::DEOpenCLKernelCreator(bool nVidia) m_GaussianDESsWithScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(2); m_GaussianDESsWithoutScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(3); #else - m_NVidia = nVidia; - m_LogScaleAssignDEEntryPoint = "LogScaleAssignDensityFilterKernel"; - m_GaussianDEWithoutSsNoCacheEntryPoint = "GaussianDEWithoutSsNoCacheKernel"; - m_GaussianDESsWithScfNoCacheEntryPoint = "GaussianDESsWithScfNoCacheKernel"; + m_LogScaleAssignDEEntryPoint = "LogScaleAssignDensityFilterKernel"; + m_GaussianDEWithoutSsEntryPoint = "GaussianDEWithoutSsKernel"; + m_GaussianDESsWithScfEntryPoint = "GaussianDESsWithScfKernel"; + m_GaussianDESsWithoutScfEntryPoint = "GaussianDESsWithoutScfKernel"; + m_GaussianDEWithoutSsNoCacheEntryPoint = "GaussianDEWithoutSsNoCacheKernel"; + m_GaussianDESsWithScfNoCacheEntryPoint = "GaussianDESsWithScfNoCacheKernel"; m_GaussianDESsWithoutScfNoCacheEntryPoint = "GaussianDESsWithoutScfNoCacheKernel"; - m_LogScaleAssignDEKernel = CreateLogScaleAssignDEKernelString(); - m_GaussianDEWithoutSsNoCacheKernel = CreateGaussianDEKernelNoLocalCache(1); - m_GaussianDESsWithScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(2); - m_GaussianDESsWithoutScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(3); -#endif + m_LogScaleAssignDEKernel = CreateLogScaleAssignDEKernelString(); + m_GaussianDEWithoutSsKernel = CreateGaussianDEKernel(1); + m_GaussianDESsWithScfKernel = CreateGaussianDEKernel(2); + m_GaussianDESsWithoutScfKernel = CreateGaussianDEKernel(3); + m_GaussianDEWithoutSsNoCacheKernel = CreateGaussianDEKernelNoLocalCache(1); + m_GaussianDESsWithScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(2); + m_GaussianDESsWithoutScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(3); +#endif } /// /// Kernel source and entry point properties, getters only. /// -template string DEOpenCLKernelCreator::LogScaleAssignDEKernel() { return m_LogScaleAssignDEKernel; } -template string DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() { return m_LogScaleAssignDEEntryPoint; } +string DEOpenCLKernelCreator::LogScaleAssignDEKernel() { return m_LogScaleAssignDEKernel; } +string DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() { return m_LogScaleAssignDEEntryPoint; } /// /// Get the kernel source for the specified supersample and filterWidth. @@ -96,11 +65,10 @@ template string DEOpenCLKernelCreator::LogScaleAssignDEEntryPoin /// The supersample being used /// Filter width /// The kernel source -template -string DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth) +string DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth) { #ifndef ROW_ONLY_DE - if ((typeid(T) == typeid(double)) || (filterWidth > MaxDEFilterSize()))//Type double does not use cache. + if (filterWidth > MaxDEFilterSize()) { if (ss > 1) { @@ -133,11 +101,10 @@ string DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth) /// The supersample being used /// Filter width /// The name of the density estimation filtering entry point kernel function -template -string DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filterWidth) +string DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filterWidth) { #ifndef ROW_ONLY_DE - if ((typeid(T) == typeid(double)) || (filterWidth > MaxDEFilterSize()))//Type double does not use cache. + if (filterWidth > MaxDEFilterSize()) { if (ss > 1) { @@ -169,8 +136,7 @@ string DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filterWidt /// Filters larger than this value will run the version without local memory caching. /// /// The maximum filter size allowed for running the local memory version of density filtering -template -uint DEOpenCLKernelCreator::MaxDEFilterSize() { return 9; }//The true max would be (maxBoxSize - 1) / 2, but that's impractical because it can give us a tiny block size. +uint DEOpenCLKernelCreator::MaxDEFilterSize() { return 9; }//The true max would be (maxBoxSize - 1) / 2, but that's impractical because it can give us a tiny block size. /// /// Solve for the maximum filter radius. @@ -185,8 +151,7 @@ uint DEOpenCLKernelCreator::MaxDEFilterSize() { return 9; }//The true max wou /// Size of the desired filter. /// The supersample being used /// The maximum filter radius allowed -template -T DEOpenCLKernelCreator::SolveMaxDERad(uint maxBoxSize, T desiredFilterSize, T ss) +double DEOpenCLKernelCreator::SolveMaxDERad(uint maxBoxSize, double desiredFilterSize, double ss) { uint finalFilterSize = uint((ceil(desiredFilterSize) * ss) + (ss - 1.0)); @@ -195,7 +160,7 @@ T DEOpenCLKernelCreator::SolveMaxDERad(uint maxBoxSize, T desiredFilterSize, return desiredFilterSize; //The final size doesn't fit, so scale the original down until it fits. - return T(floor((MaxDEFilterSize() - (ss - 1.0)) / ss)); + return floor((MaxDEFilterSize() - (ss - 1.0)) / ss); } /// @@ -204,10 +169,9 @@ T DEOpenCLKernelCreator::SolveMaxDERad(uint maxBoxSize, T desiredFilterSize, /// /// The local memory available to a block /// The maximum filter box size allowed -template -uint DEOpenCLKernelCreator::SolveMaxBoxSize(uint localMem) +uint DEOpenCLKernelCreator::SolveMaxBoxSize(uint localMem) { - return uint(floor(std::sqrt(floor(T(localMem) / 16.0))));//Divide by 16 because each element is float4. + return uint(floor(std::sqrt(floor(localMem / 16.0))));//Divide by 16 because each element is float4. } /// @@ -215,17 +179,16 @@ uint DEOpenCLKernelCreator::SolveMaxBoxSize(uint localMem) /// Use this when Passes == 1. /// /// The kernel string -template -string DEOpenCLKernelCreator::CreateLogScaleAssignDEKernelString() +string DEOpenCLKernelCreator::CreateLogScaleAssignDEKernelString() { ostringstream os; os << - ConstantDefinesString(typeid(T) == typeid(double)) << + ConstantDefinesString(m_DoublePrecision) << DensityFilterCLStructString << "__kernel void " << m_LogScaleAssignDEEntryPoint << "(\n" - " const __global real4* histogram,\n" - " __global real4* accumulator,\n" + " const __global real4_bucket* histogram,\n" + " __global real4_bucket* accumulator,\n" " __constant DensityFilterCL* logFilter\n" "\t)\n" "{\n" @@ -235,7 +198,7 @@ string DEOpenCLKernelCreator::CreateLogScaleAssignDEKernelString() "\n" " if (histogram[index].w != 0)\n" " {\n" - " real_t logScale = (logFilter->m_K1 * log(1.0 + histogram[index].w * logFilter->m_K2)) / histogram[index].w;\n" + " real_bucket_t logScale = (logFilter->m_K1 * log(1.0 + histogram[index].w * logFilter->m_K2)) / histogram[index].w;\n" "\n" " accumulator[index] = histogram[index] * logScale;\n"//Using a single real4 vector operation doubles the speed from doing each component individually. " }\n" @@ -248,23 +211,22 @@ string DEOpenCLKernelCreator::CreateLogScaleAssignDEKernelString() } #ifdef ROW_ONLY_DE -template -string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) +string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) { bool doSS = ss > 1; bool doScf = !(ss & 1); ostringstream os; os << - ConstantDefinesString(typeid(T) == typeid(double)) << + ConstantDefinesString(m_DoublePrecision) << DensityFilterCLStructString << UnionCLStructString << "__kernel void " << GaussianDEEntryPoint(ss, MaxDEFilterSize()) << "(\n" << - " const __global real4* histogram,\n" - " __global real4reals* accumulator,\n" + " const __global real4_bucket* histogram,\n" + " __global real4reals_bucket* accumulator,\n" " __constant DensityFilterCL* densityFilter,\n" - " const __global real_t* filterCoefs,\n" - " const __global real_t* filterWidths,\n" + " const __global real_bucket_t* filterCoefs,\n" + " const __global real_bucket_t* filterWidths,\n" " const __global uint* coefIndices,\n" " const uint chunkSizeW,\n" " const uint chunkSizeH,\n" @@ -282,7 +244,7 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) if (doSS) { os << - " uint ss = (uint)floor((real_t)densityFilter->m_Supersample / 2.0);\n" + " uint ss = (uint)floor((real_bucket_t)densityFilter->m_Supersample / 2.0);\n" " int densityBoxLeftX;\n" " int densityBoxRightX;\n" " int densityBoxTopY;\n" @@ -291,7 +253,7 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) if (doScf) os << - " real_t scfact = pow(densityFilter->m_Supersample / (densityFilter->m_Supersample + (real_t)1.0), (real_t)2.0);\n"; + " real_bucket_t scfact = pow(densityFilter->m_Supersample / (densityFilter->m_Supersample + (real_bucket_t)1.0), (real_bucket_t)2.0);\n"; } os << @@ -320,7 +282,7 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) "\n" //Last, the indices in the global accumulator that the local bounds will be writing to. " accumWriteStartCol = blockHistStartCol - min(densityFilter->m_FilterWidth, blockHistStartCol);\n"//The first column in the accumulator this block will write to. - " colsToWrite = ceil((real_t)(boxReadEndCol - boxReadStartCol) / (real_t)BLOCK_SIZE_X);\n"//Elements per thread to be written to the accumulator. + " colsToWrite = ceil((real_bucket_t)(boxReadEndCol - boxReadStartCol) / (real_bucket_t)BLOCK_SIZE_X);\n"//Elements per thread to be written to the accumulator. " histCol = blockHistStartCol + THREAD_ID_X;\n"//The histogram column this individual thread will be reading from. "\n" " if (histCol >= rightBound)\n" @@ -331,15 +293,15 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) //These are the local indices for the local data that are temporarily accumulated to before //writing out to the global accumulator. " uint boxCol = densityFilter->m_FilterWidth + THREAD_ID_X;\n" - " uint colsToZeroOffset, colsToZero = ceil((real_t)fullTempBoxWidth / (real_t)(BLOCK_SIZE_X));\n"//Usually is 2. + " uint colsToZeroOffset, colsToZero = ceil((real_bucket_t)fullTempBoxWidth / (real_bucket_t)(BLOCK_SIZE_X));\n"//Usually is 2. " int i, j, k, jmin, jmax;\n" " uint filterSelectInt, filterCoefIndex;\n" - " real_t cacheLog;\n" - " real_t filterSelect;\n" - " real4 bucket;\n" + " real_bucket_t cacheLog;\n" + " real_bucket_t filterSelect;\n" + " real4_bucket bucket;\n" ; - os << " __local real4reals filterBox[192];\n";//Must be >= fullTempBoxWidth. + os << " __local real4reals_bucket filterBox[192];\n";//Must be >= fullTempBoxWidth. os << "\n" @@ -389,7 +351,7 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) " else if (filterSelect <= DE_THRESH)\n" " filterSelectInt = (int)ceil(filterSelect) - 1;\n" " else if (filterSelect != 0)\n" - " filterSelectInt = (int)DE_THRESH + (int)floor(pow((real_t)(filterSelect - DE_THRESH), densityFilter->m_Curve));\n" + " filterSelectInt = (int)DE_THRESH + (int)floor(pow((real_bucket_t)(filterSelect - DE_THRESH), densityFilter->m_Curve));\n" " else\n" " filterSelectInt = 0;\n" "\n" @@ -477,23 +439,22 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) /// /// The supersample being used /// The kernel string -template -string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) +string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) { bool doSS = ss > 1; bool doScf = !(ss & 1); ostringstream os; os << - ConstantDefinesString(typeid(T) == typeid(double)) << + ConstantDefinesString(m_DoublePrecision) << DensityFilterCLStructString << UnionCLStructString << "__kernel void " << GaussianDEEntryPoint(ss, MaxDEFilterSize()) << "(\n" << - " const __global real4* histogram,\n" - " __global real4reals* accumulator,\n" + " const __global real4_bucket* histogram,\n" + " __global real4reals_bucket* accumulator,\n" " __constant DensityFilterCL* densityFilter,\n" - " const __global real_t* filterCoefs,\n" - " const __global real_t* filterWidths,\n" + " const __global real_bucket_t* filterCoefs,\n" + " const __global real_bucket_t* filterWidths,\n" " const __global uint* coefIndices,\n" " const uint chunkSizeW,\n" " const uint chunkSizeH,\n" @@ -509,7 +470,7 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) if (doSS) { os << - " uint ss = (uint)floor((real_t)densityFilter->m_Supersample / 2.0);\n" + " uint ss = (uint)floor((real_bucket_t)densityFilter->m_Supersample / 2.0);\n" " int densityBoxLeftX;\n" " int densityBoxRightX;\n" " int densityBoxTopY;\n" @@ -518,7 +479,7 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) if (doScf) os << - " real_t scfact = pow(densityFilter->m_Supersample / (densityFilter->m_Supersample + (real_t)1.0), (real_t)2.0);\n"; + " real_bucket_t scfact = pow(densityFilter->m_Supersample / (densityFilter->m_Supersample + (real_bucket_t)1.0), (real_bucket_t)2.0);\n"; } //Compute the size of the temporary box which is the block width + 2 * filter width x block height + 2 * filter width. @@ -561,7 +522,7 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) //Last, the indices in the global accumulator that the local bounds will be writing to. " accumWriteStartRow = blockHistStartRow - min(densityFilter->m_FilterWidth, blockHistStartRow);\n"//Will be fw - 0 except for boundary columns, it will be less. " accumWriteStartCol = blockHistStartCol - min(densityFilter->m_FilterWidth, blockHistStartCol);\n" - " colsToWrite = ceil((real_t)(boxReadEndCol - boxReadStartCol) / (real_t)BLOCK_SIZE_X);\n" + " colsToWrite = ceil((real_bucket_t)(boxReadEndCol - boxReadStartCol) / (real_bucket_t)BLOCK_SIZE_X);\n" "\n" " uint threadHistRow = blockHistStartRow + THREAD_ID_Y;\n"//The histogram row this individual thread will be reading from. " uint threadHistCol = blockHistStartCol + THREAD_ID_X;\n"//The histogram column this individual thread will be reading from. @@ -573,19 +534,19 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) //writing out to the global accumulator. " uint boxRow = densityFilter->m_FilterWidth + THREAD_ID_Y;\n" " uint boxCol = densityFilter->m_FilterWidth + THREAD_ID_X;\n" - " uint colElementsToZero = ceil((real_t)fullTempBoxWidth / (real_t)(BLOCK_SIZE_X));\n"//Usually is 2. + " uint colElementsToZero = ceil((real_bucket_t)fullTempBoxWidth / (real_bucket_t)(BLOCK_SIZE_X));\n"//Usually is 2. " int i, j, k;\n" " uint filterSelectInt, filterCoefIndex;\n" - " real_t cacheLog;\n" - " real_t filterSelect;\n" - " real4 bucket;\n" + " real_bucket_t cacheLog;\n" + " real_bucket_t filterSelect;\n" + " real4_bucket bucket;\n" ; //This will be treated as having dimensions of (BLOCK_SIZE_X + (fw * 2)) x (BLOCK_SIZE_Y + (fw * 2)). if (m_NVidia) - os << " __local real4reals filterBox[3000];\n"; + os << " __local real4reals_bucket filterBox[3000];\n"; else - os << " __local real4reals filterBox[1200];\n"; + os << " __local real4reals_bucket filterBox[1200];\n"; os << //Zero the temp buffers first. This splits the zeroization evenly across all threads (columns) in the first block row. @@ -662,7 +623,7 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) " else if (filterSelect <= DE_THRESH)\n" " filterSelectInt = (int)ceil(filterSelect) - 1;\n" " else\n" - " filterSelectInt = (int)DE_THRESH + (int)floor(pow((real_t)(filterSelect - DE_THRESH), densityFilter->m_Curve));\n" + " filterSelectInt = (int)DE_THRESH + (int)floor(pow((real_bucket_t)(filterSelect - DE_THRESH), densityFilter->m_Curve));\n" "\n" " if (filterSelectInt > densityFilter->m_MaxFilterIndex)\n" " filterSelectInt = densityFilter->m_MaxFilterIndex;\n" @@ -736,24 +697,23 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss) /// /// The supersample being used /// The kernel string -template -string DEOpenCLKernelCreator::CreateGaussianDEKernelNoLocalCache(size_t ss) +string DEOpenCLKernelCreator::CreateGaussianDEKernelNoLocalCache(size_t ss) { bool doSS = ss > 1; bool doScf = !(ss & 1); ostringstream os; os << - ConstantDefinesString(typeid(T) == typeid(double)) << + ConstantDefinesString(m_DoublePrecision) << DensityFilterCLStructString << UnionCLStructString << AddToAccumWithCheckFunctionString << "__kernel void " << GaussianDEEntryPoint(ss, MaxDEFilterSize() + 1) << "(\n" << - " const __global real4* histogram,\n" - " __global real4reals* accumulator,\n" + " const __global real4_bucket* histogram,\n" + " __global real4reals_bucket* accumulator,\n" " __constant DensityFilterCL* densityFilter,\n" - " const __global real_t* filterCoefs,\n" - " const __global real_t* filterWidths,\n" + " const __global real_bucket_t* filterCoefs,\n" + " const __global real_bucket_t* filterWidths,\n" " const __global uint* coefIndices,\n" " const uint chunkSizeW,\n" " const uint chunkSizeH,\n" @@ -769,14 +729,14 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernelNoLocalCache(size_t ss) if (doSS) { os << - " uint ss = (uint)floor((real_t)densityFilter->m_Supersample / 2.0);\n" + " uint ss = (uint)floor((real_bucket_t)densityFilter->m_Supersample / 2.0);\n" " int densityBoxLeftX;\n" " int densityBoxRightX;\n" " int densityBoxTopY;\n" " int densityBoxBottomY;\n"; if (doScf) - os << " real_t scfact = pow((real_t)densityFilter->m_Supersample / ((real_t)densityFilter->m_Supersample + (real_t)1.0), (real_t)2.0);\n"; + os << " real_bucket_t scfact = pow((real_bucket_t)densityFilter->m_Supersample / ((real_bucket_t)densityFilter->m_Supersample + (real_bucket_t)1.0), (real_bucket_t)2.0);\n"; } os << @@ -796,10 +756,9 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernelNoLocalCache(size_t ss) "\n" " int i, j;\n" " uint filterSelectInt, filterCoefIndex;\n" - " real_t cacheLog;\n" - " real_t logScale;\n" - " real_t filterSelect;\n" - " real4 bucket;\n" + " real_bucket_t cacheLog;\n" + " real_bucket_t filterSelect;\n" + " real4_bucket bucket;\n" "\n" " if (threadHistRow < botBound && threadHistCol < rightBound)\n" " {\n" @@ -843,7 +802,7 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernelNoLocalCache(size_t ss) " else if (filterSelect <= DE_THRESH)\n" " filterSelectInt = (int)ceil(filterSelect) - 1;\n" " else\n" - " filterSelectInt = (int)DE_THRESH + (int)floor(pow((real_t)(filterSelect - DE_THRESH), densityFilter->m_Curve));\n" + " filterSelectInt = (int)DE_THRESH + (int)floor(pow((real_bucket_t)(filterSelect - DE_THRESH), densityFilter->m_Curve));\n" "\n" " if (filterSelectInt > densityFilter->m_MaxFilterIndex)\n" " filterSelectInt = densityFilter->m_MaxFilterIndex;\n" @@ -877,10 +836,4 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernelNoLocalCache(size_t ss) return os.str(); } - -template EMBERCL_API class DEOpenCLKernelCreator; - -#ifdef DO_DOUBLE - template EMBERCL_API class DEOpenCLKernelCreator; -#endif } diff --git a/Source/EmberCL/DEOpenCLKernelCreator.h b/Source/EmberCL/DEOpenCLKernelCreator.h index 075fd3b..68305e0 100644 --- a/Source/EmberCL/DEOpenCLKernelCreator.h +++ b/Source/EmberCL/DEOpenCLKernelCreator.h @@ -27,14 +27,11 @@ namespace EmberCLns /// ends up being not much faster than doing it on the CPU. /// String members are kept for the program source and entry points /// for each version of the program. -/// Template argument expected to be float or double. /// -template class EMBERCL_API DEOpenCLKernelCreator { public: - DEOpenCLKernelCreator(); - DEOpenCLKernelCreator(bool nVidia); + DEOpenCLKernelCreator(bool doublePrecision, bool nVidia); //Accessors. string LogScaleAssignDEKernel(); @@ -44,7 +41,7 @@ public: //Miscellaneous static functions. static uint MaxDEFilterSize(); - static T SolveMaxDERad(uint maxBoxSize, T desiredFilterSize, T ss); + static double SolveMaxDERad(uint maxBoxSize, double desiredFilterSize, double ss); static uint SolveMaxBoxSize(uint localMem); private: @@ -74,6 +71,7 @@ private: string m_GaussianDESsWithoutScfNoCacheKernel; string m_GaussianDESsWithoutScfNoCacheEntryPoint; + bool m_DoublePrecision; bool m_NVidia; }; } diff --git a/Source/EmberCL/EmberCLFunctions.h b/Source/EmberCL/EmberCLFunctions.h index 5f0f897..4230646 100644 --- a/Source/EmberCL/EmberCLFunctions.h +++ b/Source/EmberCL/EmberCLFunctions.h @@ -15,9 +15,9 @@ namespace EmberCLns static const char* RgbToHsvFunctionString = //rgb 0 - 1, //h 0 - 6, s 0 - 1, v 0 - 1 - "static inline void RgbToHsv(real4* rgb, real4* hsv)\n" + "static inline void RgbToHsv(real4_bucket* rgb, real4_bucket* hsv)\n" "{\n" - " real_t max, min, del, rc, gc, bc;\n" + " real_bucket_t max, min, del, rc, gc, bc;\n" "\n" //Compute maximum of r, g, b. " if ((*rgb).x >= (*rgb).y)\n" @@ -85,10 +85,10 @@ static const char* RgbToHsvFunctionString = static const char* HsvToRgbFunctionString = //h 0 - 6, s 0 - 1, v 0 - 1 //rgb 0 - 1 - "static inline void HsvToRgb(real4* hsv, real4* rgb)\n" + "static inline void HsvToRgb(real4_bucket* hsv, real4_bucket* rgb)\n" "{\n" " int j;\n" - " real_t f, p, q, t;\n" + " real_bucket_t f, p, q, t;\n" "\n" " while ((*hsv).x >= 6)\n" " (*hsv).x = (*hsv).x - 6;\n" @@ -119,9 +119,9 @@ static const char* HsvToRgbFunctionString = /// OpenCL equivalent of Palette::CalcAlpha(). /// static const char* CalcAlphaFunctionString = - "static inline real_t CalcAlpha(real_t density, real_t gamma, real_t linrange)\n"//Not the slightest clue what this is doing.//DOC + "static inline real_t CalcAlpha(real_bucket_t density, real_bucket_t gamma, real_bucket_t linrange)\n"//Not the slightest clue what this is doing.//DOC "{\n" - " real_t frac, alpha, funcval = pow(linrange, gamma);\n" + " real_bucket_t frac, alpha, funcval = pow(linrange, gamma);\n" "\n" " if (density > 0)\n" " {\n" @@ -147,7 +147,7 @@ static const char* CalcAlphaFunctionString = /// during final accumulation, which only takes floats. /// static const char* CurveAdjustFunctionString = -"static inline void CurveAdjust(__constant real4reals* csa, float* a, uint index)\n" +"static inline void CurveAdjust(__constant real4reals_bucket* csa, float* a, uint index)\n" "{\n" " uint tempIndex = (uint)Clamp(*a, 0.0, (float)COLORMAP_LENGTH_MINUS_1);\n" " uint tempIndex2 = (uint)Clamp(csa[tempIndex].m_Real4.x, 0.0, (real_t)COLORMAP_LENGTH_MINUS_1);\n" @@ -359,18 +359,18 @@ static string AtomicString(bool doublePrecision, bool dp64AtomicSupport) if (!doublePrecision || dp64AtomicSupport) { os << - "void AtomicAdd(volatile __global real_t* source, const real_t operand)\n" + "void AtomicAdd(volatile __global real_bucket_t* source, const real_bucket_t operand)\n" "{\n" " union\n" " {\n" " atomi intVal;\n" - " real_t realVal;\n" + " real_bucket_t realVal;\n" " } newVal;\n" "\n" " union\n" " {\n" " atomi intVal;\n" - " real_t realVal;\n" + " real_bucket_t realVal;\n" " } prevVal;\n" "\n" " do\n" @@ -383,18 +383,18 @@ static string AtomicString(bool doublePrecision, bool dp64AtomicSupport) else//They want double precision and do not have dp atomic support. { os << - "void AtomicAdd(volatile __global real_t* source, const real_t operand)\n" + "void AtomicAdd(volatile __global double* source, const double operand)\n" "{\n" " union\n" " {\n" " uint intVal[2];\n" - " real_t realVal;\n" + " double realVal;\n" " } newVal;\n" "\n" " union\n" " {\n" " uint intVal[2];\n" - " real_t realVal;\n" + " double realVal;\n" " } prevVal;\n" "\n" " do\n" @@ -408,27 +408,4 @@ static string AtomicString(bool doublePrecision, bool dp64AtomicSupport) return os.str(); } - -#ifdef GRAVEYARD -/*"void AtomicLocalAdd(volatile __local real_t* source, const real_t operand)\n" - "{\n" - " union\n" - " {\n" - " atomi intVal;\n" - " real_t realVal;\n" - " } newVal;\n" - "\n" - " union\n" - " {\n" - " atomi intVal;\n" - " real_t realVal;\n" - " } prevVal;\n" - "\n" - " do\n" - " {\n" - " prevVal.realVal = *source;\n" - " newVal.realVal = prevVal.realVal + operand;\n" - " } while (atomic_cmpxchg((volatile __local atomi*)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);\n" - "}\n"*/ -#endif } \ No newline at end of file diff --git a/Source/EmberCL/EmberCLStructs.h b/Source/EmberCL/EmberCLStructs.h index 6409c1c..59b7521 100644 --- a/Source/EmberCL/EmberCLStructs.h +++ b/Source/EmberCL/EmberCLStructs.h @@ -41,7 +41,9 @@ static string ConstantDefinesString(bool doublePrecision) << "typedef long intPrec;\n" << "typedef ulong atomi;\n" << "typedef double real_t;\n" + << "typedef float real_bucket_t;\n"//Assume buckets are always float, even though iter calcs are in double. << "typedef double4 real4;\n" + << "typedef float4 real4_bucket;\n"//And here too. << "#define EPS (DBL_EPSILON)\n" ; } @@ -50,7 +52,9 @@ static string ConstantDefinesString(bool doublePrecision) os << "typedef int intPrec;\n" "typedef uint atomi;\n" "typedef float real_t;\n" + "typedef float real_bucket_t;\n" "typedef float4 real4;\n" + "typedef float4 real4_bucket;\n" "#define EPS (FLT_EPSILON)\n" ; } @@ -284,9 +288,9 @@ struct ALIGN DensityFilterCL static const char* DensityFilterCLStructString = "typedef struct __attribute__ " ALIGN_CL " _DensityFilterCL\n" "{\n" -" real_t m_Curve;\n" -" real_t m_K1;\n" -" real_t m_K2;\n" +" real_bucket_t m_Curve;\n" +" real_bucket_t m_K1;\n" +" real_bucket_t m_K2;\n" " uint m_Supersample;\n" " uint m_SuperRasW;\n" " uint m_SuperRasH;\n" @@ -340,11 +344,11 @@ static const char* SpatialFilterCLStructString = " uint m_DensityFilterOffset;\n" " uint m_Transparency;\n" " uint m_YAxisUp;\n" -" real_t m_Vibrancy;\n" -" real_t m_HighlightPower;\n" -" real_t m_Gamma;\n" -" real_t m_LinRange;\n" -" real_t m_Background[4];\n"//For some reason, using float4/double4 here does not align no matter what. So just use an array of 4. +" real_bucket_t m_Vibrancy;\n" +" real_bucket_t m_HighlightPower;\n" +" real_bucket_t m_Gamma;\n" +" real_bucket_t m_LinRange;\n" +" real_bucket_t m_Background[4];\n"//For some reason, using float4/double4 here does not align no matter what. So just use an array of 4. "} SpatialFilterCL;\n" "\n"; @@ -383,5 +387,11 @@ static const char* UnionCLStructString = " real4 m_Real4;\n" " real_t m_Reals[4];\n" "} real4reals;\n" +"\n" +"typedef union\n"//Used to match the bucket template type. +"{\n" +" real4_bucket m_Real4;\n" +" real_bucket_t m_Reals[4];\n" +"} real4reals_bucket;\n" "\n"; } diff --git a/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp b/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp index ffb4140..e06ce79 100644 --- a/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp +++ b/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp @@ -7,9 +7,9 @@ namespace EmberCLns /// Constructor that creates all kernel strings. /// The caller will access these strings through the accessor functions. /// -template -FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator() +FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator(bool doublePrecision) { + m_DoublePrecision = doublePrecision; m_GammaCorrectionWithAlphaCalcEntryPoint = "GammaCorrectionWithAlphaCalcKernel"; m_GammaCorrectionWithoutAlphaCalcEntryPoint = "GammaCorrectionWithoutAlphaCalcKernel"; @@ -37,24 +37,24 @@ FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator() /// Kernel source and entry point properties, getters only. /// -template string FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcKernel() { return m_GammaCorrectionWithAlphaCalcKernel; } -template string FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcEntryPoint() { return m_GammaCorrectionWithAlphaCalcEntryPoint; } -template string FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcKernel() { return m_GammaCorrectionWithoutAlphaCalcKernel; } -template string FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcEntryPoint() { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; } +string FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcKernel() { return m_GammaCorrectionWithAlphaCalcKernel; } +string FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcEntryPoint() { return m_GammaCorrectionWithAlphaCalcEntryPoint; } +string FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcKernel() { return m_GammaCorrectionWithoutAlphaCalcKernel; } +string FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcEntryPoint() { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; } -template string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipKernel() { return m_FinalAccumEarlyClipKernel; } -template string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipEntryPoint() { return m_FinalAccumEarlyClipEntryPoint; } -template string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; } -template string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; } -template string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; } -template string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; } +string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipKernel() { return m_FinalAccumEarlyClipKernel; } +string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipEntryPoint() { return m_FinalAccumEarlyClipEntryPoint; } +string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; } +string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; } +string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; } +string FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; } -template string FinalAccumOpenCLKernelCreator::FinalAccumLateClipKernel() { return m_FinalAccumLateClipKernel; } -template string FinalAccumOpenCLKernelCreator::FinalAccumLateClipEntryPoint() { return m_FinalAccumLateClipEntryPoint; } -template string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; } -template string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; } -template string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; } -template string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; } +string FinalAccumOpenCLKernelCreator::FinalAccumLateClipKernel() { return m_FinalAccumLateClipKernel; } +string FinalAccumOpenCLKernelCreator::FinalAccumLateClipEntryPoint() { return m_FinalAccumLateClipEntryPoint; } +string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; } +string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; } +string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; } +string FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; } /// /// Get the gamma correction entry point. @@ -62,8 +62,7 @@ template string FinalAccumOpenCLKernelCreator::FinalAccumLateCli /// The number of channels used, 3 or 4. /// True if channels equals 4 and using transparency, else false. /// The name of the gamma correction entry point kernel function -template -string FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels, bool transparency) +string FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels, bool transparency) { bool alphaCalc = ((channels > 3) && transparency); return alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint; @@ -75,8 +74,7 @@ string FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channe /// The number of channels used, 3 or 4. /// True if channels equals 4 and using transparency, else false. /// The gamma correction kernel string -template -string FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, bool transparency) +string FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, bool transparency) { bool alphaCalc = ((channels > 3) && transparency); return alphaCalc ? m_GammaCorrectionWithAlphaCalcKernel : m_GammaCorrectionWithoutAlphaCalcKernel; @@ -91,16 +89,15 @@ string FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, /// Storage for the alpha base value used in the kernel. 0 if transparency is true, else 255. /// Storage for the alpha scale value used in the kernel. 255 if transparency is true, else 0. /// The name of the final accumulation entry point kernel function -template -string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, T& alphaBase, T& alphaScale) +string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale) { bool alphaCalc = ((channels > 3) && transparency); bool alphaAccum = channels > 3; if (alphaAccum) { - alphaBase = transparency ? 0.0f : 255.0f;//See the table below. - alphaScale = transparency ? 255.0f : 0.0f; + alphaBase = transparency ? 0 : 255;//See the table below. + alphaScale = transparency ? 255 : 0; } if (earlyClip) @@ -134,8 +131,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, si /// The number of channels used, 3 or 4. /// True if channels equals 4 and using transparency, else false. /// The final accumulation kernel string -template -string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) +string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) { bool alphaCalc = (channels > 3 && transparency); bool alphaAccum = channels > 3; @@ -171,8 +167,7 @@ string FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t /// The number of channels used, 3 or 4. /// True if channels equals 4 and using transparency, else false. /// The final accumulation kernel string -template -string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency) +string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency) { return CreateFinalAccumKernelString(earlyClip, (channels > 3 && transparency), channels > 3); } @@ -184,14 +179,13 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool early /// True if channels equals 4 and transparency is desired, else false. /// True if channels equals 4 /// The final accumulation kernel string -template -string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyClip, bool alphaCalc, bool alphaAccum) +string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyClip, bool alphaCalc, bool alphaAccum) { ostringstream os; string channels = alphaAccum ? "4" : "3"; os << - ConstantDefinesString(typeid(T) == typeid(double)) << + ConstantDefinesString(m_DoublePrecision) << ClampRealFunctionString << UnionCLStructString << RgbToHsvFunctionString << @@ -228,14 +222,14 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool early } os << - " const __global real4reals* accumulator,\n" + " const __global real4reals_bucket* accumulator,\n" " __write_only image2d_t pixels,\n" " __constant SpatialFilterCL* spatialFilter,\n" - " __constant real_t* filterCoefs,\n" - " __constant real4reals* csa,\n" + " __constant real_bucket_t* filterCoefs,\n" + " __constant real4reals_bucket* csa,\n" " const uint doCurves,\n" - " const real_t alphaBase,\n" - " const real_t alphaScale\n" + " const real_bucket_t alphaBase,\n" + " const real_bucket_t alphaScale\n" "\t)\n" "{\n" "\n" @@ -250,8 +244,8 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool early " float4floats finalColor;\n" " int ii, jj;\n" " uint filterKRowIndex;\n" - " const __global real4reals* accumBucket;\n" - " real4reals newBucket;\n" + " const __global real4reals_bucket* accumBucket;\n" + " real4reals_bucket newBucket;\n" " newBucket.m_Real4 = 0;\n" "\n" " for (jj = 0; jj < spatialFilter->m_FilterWidth; jj++)\n" @@ -260,7 +254,7 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool early "\n" " for (ii = 0; ii < spatialFilter->m_FilterWidth; ii++)\n" " {\n" - " real_t k = filterCoefs[ii + filterKRowIndex];\n" + " real_bucket_t k = filterCoefs[ii + filterKRowIndex];\n" "\n" " accumBucket = accumulator + (accumX + ii) + ((accumY + jj) * spatialFilter->m_SuperRasW);\n" " newBucket.m_Real4 += (k * accumBucket->m_Real4);\n" @@ -287,10 +281,10 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool early else { //Late clip, so must gamma correct from the temp new bucket to temp float4. - if (typeid(T) == typeid(double)) + if (m_DoublePrecision) { os << - " real4reals realFinal;\n" + " real4reals_bucket realFinal;\n" "\n" " GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(realFinal.m_Reals[0]));\n" " finalColor.m_Float4.x = (float)realFinal.m_Real4.x;\n" @@ -333,21 +327,20 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool early /// True if channels equals 4 /// True if writing to global buffer (late clip), else false (early clip). /// The gamma correction function string -template -string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool globalBucket, bool alphaCalc, bool alphaAccum, bool finalOut) +string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool globalBucket, bool alphaCalc, bool alphaAccum, bool finalOut) { ostringstream os; string dataType; string unionMember; - dataType = "real_t"; + dataType = "real_bucket_t"; //Use real_t for all cases, early clip and final accum. - os << "void GammaCorrectionFloats(" << (globalBucket ? "__global " : "") << "real4reals* bucket, __constant real_t* background, real_t g, real_t linRange, real_t vibrancy, real_t highlightPower, real_t alphaBase, real_t alphaScale, " << (finalOut ? "" : "__global") << " real_t* correctedChannels)\n"; + os << "void GammaCorrectionFloats(" << (globalBucket ? "__global " : "") << "real4reals_bucket* bucket, __constant real_bucket_t* background, real_bucket_t g, real_bucket_t linRange, real_bucket_t vibrancy, real_bucket_t highlightPower, real_bucket_t alphaBase, real_bucket_t alphaScale, " << (finalOut ? "" : "__global") << " real_bucket_t* correctedChannels)\n"; os << "{\n" - << " real_t alpha, ls, tmp, a;\n" - << " real4reals newRgb;\n" + << " real_bucket_t alpha, ls, tmp, a;\n" + << " real4reals_bucket newRgb;\n" << "\n" << " if (bucket->m_Reals[3] <= 0)\n" << " {\n" @@ -359,7 +352,7 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(boo << " tmp = bucket->m_Reals[3];\n" << " alpha = CalcAlpha(tmp, g, linRange);\n" << " ls = vibrancy * 256.0 * alpha / tmp;\n" - << " ClampRef(&alpha, 0.0, 1.0);\n" + << " alpha = clamp(alpha, (real_bucket_t)0.0, (real_bucket_t)1.0);\n" << " }\n" << "\n" << " CalcNewRgb(bucket, ls, highlightPower, &newRgb);\n" @@ -385,7 +378,7 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(boo os << "\n" - " correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_t)0.0, (real_t)255.0);\n" + " correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_bucket_t)0.0, (real_bucket_t)255.0);\n" " }\n" "\n"; @@ -416,19 +409,18 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(boo /// /// True if writing the corrected value to a global buffer (early clip), else false (late clip). /// The CalcNewRgb function string -template -string FinalAccumOpenCLKernelCreator::CreateCalcNewRgbFunctionString(bool globalBucket) +string FinalAccumOpenCLKernelCreator::CreateCalcNewRgbFunctionString(bool globalBucket) { ostringstream os; os << - "static void CalcNewRgb(" << (globalBucket ? "__global " : "") << "real4reals* oldRgb, real_t ls, real_t highPow, real4reals* newRgb)\n" + "static void CalcNewRgb(" << (globalBucket ? "__global " : "") << "real4reals_bucket* oldRgb, real_bucket_t ls, real_bucket_t highPow, real4reals_bucket* newRgb)\n" "{\n" " int rgbi;\n" - " real_t newls, lsratio;\n" - " real4reals newHsv;\n" - " real_t maxa, maxc;\n" - " real_t adjhlp;\n" + " real_bucket_t newls, lsratio;\n" + " real4reals_bucket newHsv;\n" + " real_bucket_t maxa, maxc;\n" + " real_bucket_t adjhlp;\n" "\n" " if (ls == 0 || (oldRgb->m_Real4.x == 0 && oldRgb->m_Real4.y == 0 && oldRgb->m_Real4.z == 0))\n"//Can't do a vector compare to zero. " {\n" @@ -485,14 +477,13 @@ string FinalAccumOpenCLKernelCreator::CreateCalcNewRgbFunctionString(bool glo /// /// True if channels equals 4 and transparency is desired, else false. /// The gamma correction kernel string used for early clipping -template -string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString(bool alphaCalc) +string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString(bool alphaCalc) { ostringstream os; string dataType; os << - ConstantDefinesString(typeid(T) == typeid(double)) << + ConstantDefinesString(m_DoublePrecision) << ClampRealFunctionString << UnionCLStructString << RgbToHsvFunctionString << @@ -503,7 +494,7 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString(bool CreateGammaCorrectionFunctionString(true, alphaCalc, true, false);//Will only be used with float in this case, early clip. Will always alpha accum. os << "__kernel void " << (alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint) << "(\n" << - " __global real4reals* accumulator,\n" + " __global real4reals_bucket* accumulator,\n" " __constant SpatialFilterCL* spatialFilter\n" ")\n" "{\n" @@ -513,7 +504,7 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString(bool " return;\n" "\n" " uint superIndex = (GLOBAL_ID_Y * spatialFilter->m_SuperRasW) + GLOBAL_ID_X;\n" - " __global real4reals* bucket = accumulator + superIndex;\n" + " __global real4reals_bucket* bucket = accumulator + superIndex;\n" //Pass in an alphaBase and alphaScale of 0, 1 which means to just directly assign the computed alpha value. " GammaCorrectionFloats(bucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, 0.0, 1.0, &(bucket->m_Reals[0]));\n" "}\n" @@ -521,10 +512,4 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString(bool return os.str(); } - -template EMBERCL_API class FinalAccumOpenCLKernelCreator; - -#ifdef DO_DOUBLE - template EMBERCL_API class FinalAccumOpenCLKernelCreator; -#endif } diff --git a/Source/EmberCL/FinalAccumOpenCLKernelCreator.h b/Source/EmberCL/FinalAccumOpenCLKernelCreator.h index c52c00f..6d6ee26 100644 --- a/Source/EmberCL/FinalAccumOpenCLKernelCreator.h +++ b/Source/EmberCL/FinalAccumOpenCLKernelCreator.h @@ -19,13 +19,11 @@ namespace EmberCLns /// Early clip/late clip /// Alpha channel, no alpha channel /// Alpha with/without transparency -/// Template argument expected to be float or double. /// -template class EMBERCL_API FinalAccumOpenCLKernelCreator { public: - FinalAccumOpenCLKernelCreator(); + FinalAccumOpenCLKernelCreator(bool doublePrecision); string GammaCorrectionWithAlphaCalcKernel(); string GammaCorrectionWithAlphaCalcEntryPoint(); @@ -48,7 +46,7 @@ public: string FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint(); string GammaCorrectionEntryPoint(size_t channels, bool transparency); string GammaCorrectionKernel(size_t channels, bool transparency); - string FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, T& alphaBase, T& alphaScale); + string FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale); string FinalAccumKernel(bool earlyClip, size_t channels, bool transparency); private: @@ -77,5 +75,7 @@ private: string m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;//False, true. string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; + + bool m_DoublePrecision; }; } diff --git a/Source/EmberCL/IterOpenCLKernelCreator.cpp b/Source/EmberCL/IterOpenCLKernelCreator.cpp index fd3773d..d1a8e23 100644 --- a/Source/EmberCL/IterOpenCLKernelCreator.cpp +++ b/Source/EmberCL/IterOpenCLKernelCreator.cpp @@ -6,20 +6,11 @@ namespace EmberCLns { -/// -/// Empty constructor that does nothing. The user must call the one which takes a bool -/// argument before using this class. -/// This constructor only exists so the class can be a member of a class. -/// -template -IterOpenCLKernelCreator::IterOpenCLKernelCreator() -{ -} - /// /// Constructor that sets up some basic entry point strings and creates /// the zeroization kernel string since it requires no conditional inputs. /// +/// True if running on an nVidia card, else false. template IterOpenCLKernelCreator::IterOpenCLKernelCreator(bool nVidia) { @@ -242,7 +233,7 @@ string IterOpenCLKernelCreator::CreateIterKernelString(Ember& ember, strin " __constant real_t* parVars,\n" " __global uchar* xformDistributions,\n"//Using uchar is quicker than uint. Can't be constant because the size can be too large to fit when using xaos.//FINALOPT " __constant CarToRasCL* carToRas,\n" - " __global real4reals* histogram,\n" + " __global real4reals_bucket* histogram,\n" " uint histSize,\n" " __read_only image2d_t palette,\n" " __global Point* points\n" @@ -506,41 +497,16 @@ string IterOpenCLKernelCreator::CreateIterKernelString(Ember& ember, strin if (lockAccum) { - if (typeid(T) == typeid(double)) - { - os << - " AtomicAdd(&(histogram[histIndex].m_Reals[0]), (real_t)palColor1.x * xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n"//Always apply opacity, even though it's usually 1. - " AtomicAdd(&(histogram[histIndex].m_Reals[1]), (real_t)palColor1.y * xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n" - " AtomicAdd(&(histogram[histIndex].m_Reals[2]), (real_t)palColor1.z * xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n" - " AtomicAdd(&(histogram[histIndex].m_Reals[3]), (real_t)palColor1.w * xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n"; - } - else - { - os << - " AtomicAdd(&(histogram[histIndex].m_Reals[0]), palColor1.x * xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n"//Always apply opacity, even though it's usually 1. - " AtomicAdd(&(histogram[histIndex].m_Reals[1]), palColor1.y * xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n" - " AtomicAdd(&(histogram[histIndex].m_Reals[2]), palColor1.z * xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n" - " AtomicAdd(&(histogram[histIndex].m_Reals[3]), palColor1.w * xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n"; - } + os << + " AtomicAdd(&(histogram[histIndex].m_Reals[0]), palColor1.x * (real_bucket_t)xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n"//Always apply opacity, even though it's usually 1. + " AtomicAdd(&(histogram[histIndex].m_Reals[1]), palColor1.y * (real_bucket_t)xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n" + " AtomicAdd(&(histogram[histIndex].m_Reals[2]), palColor1.z * (real_bucket_t)xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n" + " AtomicAdd(&(histogram[histIndex].m_Reals[3]), palColor1.w * (real_bucket_t)xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n"; } else { - if (typeid(T) == typeid(double)) - { - os << - " real4 realColor;\n" - "\n" - " realColor.x = (real_t)palColor1.x;\n" - " realColor.y = (real_t)palColor1.y;\n" - " realColor.z = (real_t)palColor1.z;\n" - " realColor.w = (real_t)palColor1.w;\n" - " histogram[histIndex].m_Real4 += (realColor * xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n"; - } - else - { - os << - " histogram[histIndex].m_Real4 += (palColor1 * xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n"; - } + os << + " histogram[histIndex].m_Real4 += (palColor1 * (real_bucket_t)xforms[secondPoint.m_LastXfUsed].m_VizAdjusted);\n";//real_bucket_t should always be float. } os << diff --git a/Source/EmberCL/IterOpenCLKernelCreator.h b/Source/EmberCL/IterOpenCLKernelCreator.h index 6e641c2..4eb1e2a 100644 --- a/Source/EmberCL/IterOpenCLKernelCreator.h +++ b/Source/EmberCL/IterOpenCLKernelCreator.h @@ -23,7 +23,6 @@ template class EMBERCL_API IterOpenCLKernelCreator { public: - IterOpenCLKernelCreator(); IterOpenCLKernelCreator(bool nVidia); string ZeroizeKernel(); string ZeroizeEntryPoint(); @@ -41,22 +40,6 @@ private: string m_ZeroizeEntryPoint; bool m_NVidia; }; -// -//template EMBERCL_API class IterOpenCLKernelCreator; -// -//#ifdef DO_DOUBLE -// template EMBERCL_API class IterOpenCLKernelCreator; -//#endif - -// -//template EMBERCL_API string IterOpenCLKernelCreator::CreateIterKernelString(Ember& ember, string& parVarDefines, bool lockAccum, bool doAccum); -//template EMBERCL_API string IterOpenCLKernelCreator::CreateIterKernelString(Ember& ember, string& parVarDefines, bool lockAccum, bool doAccum); -// -//template EMBERCL_API void IterOpenCLKernelCreator::ParVarIndexDefines(Ember& ember, pair>& params, bool doVals, bool doString); -//template EMBERCL_API void IterOpenCLKernelCreator::ParVarIndexDefines(Ember& ember, pair>& params, bool doVals, bool doString); -// -//template EMBERCL_API bool IterOpenCLKernelCreator::IsBuildRequired(Ember& ember1, Ember& ember2); -//template EMBERCL_API bool IterOpenCLKernelCreator::IsBuildRequired(Ember& ember1, Ember& ember2); #ifdef OPEN_CL_TEST_AREA typedef void (*KernelFuncPointer) (uint gridWidth, uint gridHeight, uint blockWidth, uint blockHeight, diff --git a/Source/EmberCL/RendererCL.cpp b/Source/EmberCL/RendererCL.cpp index 41fb0a5..d1465d3 100644 --- a/Source/EmberCL/RendererCL.cpp +++ b/Source/EmberCL/RendererCL.cpp @@ -6,13 +6,18 @@ namespace EmberCLns /// /// Constructor that inintializes various buffer names, block dimensions, image formats /// and finally initializes OpenCL using the passed in parameters. +/// Kernel creators are set to be non-nvidia by default. Will be properly set in Init(). /// /// The index platform of the platform to use. Default: 0. /// The index device of the device to use. Default: 0. /// True if shared with OpenGL, else false. Default: false. /// The texture ID of the shared OpenGL texture if shared. Default: 0. -template -RendererCL::RendererCL(uint platform, uint device, bool shared, GLuint outputTexID) +template +RendererCL::RendererCL(uint platform, uint device, bool shared, GLuint outputTexID) + : + m_IterOpenCLKernelCreator(false), + m_DEOpenCLKernelCreator(typeid(T) == typeid(double), false), + m_FinalAccumOpenCLKernelCreator(typeid(T) == typeid(double)) { m_Init = false; m_NVidia = false; @@ -61,8 +66,8 @@ RendererCL::RendererCL(uint platform, uint device, bool shared, GLuint output /// /// Virtual destructor. /// -template -RendererCL::~RendererCL() +template +RendererCL::~RendererCL() { } @@ -82,8 +87,8 @@ RendererCL::~RendererCL() /// True if shared with OpenGL, else false. /// The texture ID of the shared OpenGL texture if shared /// True if success, else false. -template -bool RendererCL::Init(uint platform, uint device, bool shared, GLuint outputTexID) +template +bool RendererCL::Init(uint platform, uint device, bool shared, GLuint outputTexID) { //Timing t; bool b = true; @@ -101,12 +106,12 @@ bool RendererCL::Init(uint platform, uint device, bool shared, GLuint outputT m_NVidia = ToLower(m_Wrapper.DeviceAndPlatformNames()).find_first_of("nvidia") != string::npos && m_Wrapper.LocalMemSize() > (32 * 1024); m_WarpSize = m_NVidia ? 32 : 64; m_IterOpenCLKernelCreator = IterOpenCLKernelCreator(m_NVidia); - m_DEOpenCLKernelCreator = DEOpenCLKernelCreator(m_NVidia); + m_DEOpenCLKernelCreator = DEOpenCLKernelCreator(m_DoublePrecision, m_NVidia); string zeroizeProgram = m_IterOpenCLKernelCreator.ZeroizeKernel(); string logAssignProgram = m_DEOpenCLKernelCreator.LogScaleAssignDEKernel();//Build a couple of simple programs to ensure OpenCL is working right. - if (b && !(b = m_Wrapper.AddProgram(m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), zeroizeProgram, m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), m_DoublePrecision))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddProgram(m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), zeroizeProgram, m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), m_DoublePrecision))) { m_ErrorReport.push_back(loc); } if (b && !(b = m_Wrapper.AddProgram(m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), logAssignProgram, m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DoublePrecision))) { m_ErrorReport.push_back(loc); } if (b && !(b = m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, 256, 1, 0, nullptr))) { m_ErrorReport.push_back(loc); } if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_SeedsBufferName, reinterpret_cast(m_Seeds.data()), SizeOf(m_Seeds)))) { m_ErrorReport.push_back(loc); } @@ -130,8 +135,8 @@ bool RendererCL::Init(uint platform, uint device, bool shared, GLuint outputT /// /// The texture ID of the shared OpenGL texture if shared /// True if success, else false. -template -bool RendererCL::SetOutputTexture(GLuint outputTexID) +template +bool RendererCL::SetOutputTexture(GLuint outputTexID) { bool success = true; const char* loc = __FUNCTION__; @@ -157,38 +162,38 @@ bool RendererCL::SetOutputTexture(GLuint outputTexID) /// //Iters per kernel/block/grid. -template uint RendererCL::IterCountPerKernel() const { return m_IterCountPerKernel; } -template uint RendererCL::IterCountPerBlock() const { return IterCountPerKernel() * IterBlockKernelCount(); } -template uint RendererCL::IterCountPerGrid() const { return IterCountPerKernel() * IterGridKernelCount(); } +template uint RendererCL::IterCountPerKernel() const { return m_IterCountPerKernel; } +template uint RendererCL::IterCountPerBlock() const { return IterCountPerKernel() * IterBlockKernelCount(); } +template uint RendererCL::IterCountPerGrid() const { return IterCountPerKernel() * IterGridKernelCount(); } //Kernels per block. -template uint RendererCL::IterBlockKernelWidth() const { return m_IterBlockWidth; } -template uint RendererCL::IterBlockKernelHeight() const { return m_IterBlockHeight; } -template uint RendererCL::IterBlockKernelCount() const { return IterBlockKernelWidth() * IterBlockKernelHeight(); } +template uint RendererCL::IterBlockKernelWidth() const { return m_IterBlockWidth; } +template uint RendererCL::IterBlockKernelHeight() const { return m_IterBlockHeight; } +template uint RendererCL::IterBlockKernelCount() const { return IterBlockKernelWidth() * IterBlockKernelHeight(); } //Kernels per grid. -template uint RendererCL::IterGridKernelWidth() const { return IterGridBlockWidth() * IterBlockKernelWidth(); } -template uint RendererCL::IterGridKernelHeight() const { return IterGridBlockHeight() * IterBlockKernelHeight(); } -template uint RendererCL::IterGridKernelCount() const { return IterGridKernelWidth() * IterGridKernelHeight(); } +template uint RendererCL::IterGridKernelWidth() const { return IterGridBlockWidth() * IterBlockKernelWidth(); } +template uint RendererCL::IterGridKernelHeight() const { return IterGridBlockHeight() * IterBlockKernelHeight(); } +template uint RendererCL::IterGridKernelCount() const { return IterGridKernelWidth() * IterGridKernelHeight(); } //Blocks per grid. -template uint RendererCL::IterGridBlockWidth() const { return m_IterBlocksWide; } -template uint RendererCL::IterGridBlockHeight() const { return m_IterBlocksHigh; } -template uint RendererCL::IterGridBlockCount() const { return IterGridBlockWidth() * IterGridBlockHeight(); } +template uint RendererCL::IterGridBlockWidth() const { return m_IterBlocksWide; } +template uint RendererCL::IterGridBlockHeight() const { return m_IterBlocksHigh; } +template uint RendererCL::IterGridBlockCount() const { return IterGridBlockWidth() * IterGridBlockHeight(); } -template uint RendererCL::PlatformIndex() { return m_Wrapper.PlatformIndex(); } -template uint RendererCL::DeviceIndex() { return m_Wrapper.DeviceIndex(); } +template uint RendererCL::PlatformIndex() { return m_Wrapper.PlatformIndex(); } +template uint RendererCL::DeviceIndex() { return m_Wrapper.DeviceIndex(); } /// /// Read the histogram into the host side CPU buffer. /// Used for debugging. /// /// True if success, else false. -template -bool RendererCL::ReadHist() +template +bool RendererCL::ReadHist() { - if (Renderer::Alloc())//Allocate the memory to read into. - return m_Wrapper.ReadBuffer(m_HistBufferName, reinterpret_cast(HistBuckets()), SuperSize() * sizeof(v4T)); + if (Renderer::Alloc())//Allocate the memory to read into. + return m_Wrapper.ReadBuffer(m_HistBufferName, reinterpret_cast(HistBuckets()), SuperSize() * sizeof(v4bT)); return false; } @@ -198,11 +203,11 @@ bool RendererCL::ReadHist() /// Used for debugging. /// /// True if success, else false. -template -bool RendererCL::ReadAccum() +template +bool RendererCL::ReadAccum() { - if (Renderer::Alloc())//Allocate the memory to read into. - return m_Wrapper.ReadBuffer(m_AccumBufferName, reinterpret_cast(AccumulatorBuckets()), SuperSize() * sizeof(v4T)); + if (Renderer::Alloc())//Allocate the memory to read into. + return m_Wrapper.ReadBuffer(m_AccumBufferName, reinterpret_cast(AccumulatorBuckets()), SuperSize() * sizeof(v4bT)); return false; } @@ -213,8 +218,8 @@ bool RendererCL::ReadAccum() /// /// The host side buffer to read into /// True if success, else false. -template -bool RendererCL::ReadPoints(vector>& vec) +template +bool RendererCL::ReadPoints(vector>& vec) { vec.resize(IterGridKernelCount());//Allocate the memory to read into. @@ -228,20 +233,20 @@ bool RendererCL::ReadPoints(vector>& vec) /// Clear the histogram buffer with all zeroes. /// /// True if success, else false. -template -bool RendererCL::ClearHist() +template +bool RendererCL::ClearHist() { - return ClearBuffer(m_HistBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4T)); + return ClearBuffer(m_HistBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4bT)); } /// /// Clear the desnity filtering buffer with all zeroes. /// /// True if success, else false. -template -bool RendererCL::ClearAccum() +template +bool RendererCL::ClearAccum() { - return ClearBuffer(m_AccumBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4T)); + return ClearBuffer(m_AccumBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4bT)); } /// @@ -250,15 +255,15 @@ bool RendererCL::ClearAccum() /// /// The host side buffer whose values to write /// True if success, else false. -template -bool RendererCL::WritePoints(vector>& vec) +template +bool RendererCL::WritePoints(vector>& vec) { return m_Wrapper.WriteBuffer(m_PointsBufferName, reinterpret_cast(vec.data()), SizeOf(vec)); } #ifdef TEST_CL -template -bool RendererCL::WriteRandomPoints() +template +bool RendererCL::WriteRandomPoints() { size_t size = IterGridKernelCount(); vector> vec(size); @@ -280,23 +285,23 @@ bool RendererCL::WriteRandomPoints() /// Get the kernel string for the last built iter program. /// /// The string representation of the kernel for the last built iter program. -template -string RendererCL::IterKernel() { return m_IterKernel; } +template +string RendererCL::IterKernel() { return m_IterKernel; } /// /// Get the kernel string for the last built density filtering program. /// /// The string representation of the kernel for the last built density filtering program. -template -string RendererCL::DEKernel() { return m_DEOpenCLKernelCreator.GaussianDEKernel(Supersample(), m_DensityFilterCL.m_FilterWidth); } +template +string RendererCL::DEKernel() { return m_DEOpenCLKernelCreator.GaussianDEKernel(Supersample(), m_DensityFilterCL.m_FilterWidth); } /// /// Get the kernel string for the last built final accumulation program. /// /// The string representation of the kernel for the last built final accumulation program. -template -string RendererCL::FinalAccumKernel() { return m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer::NumChannels(), Transparency()); } +template +string RendererCL::FinalAccumKernel() { return m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer::NumChannels(), Transparency()); } /// /// Virtual functions overridden from RendererCLBase. @@ -308,8 +313,8 @@ string RendererCL::FinalAccumKernel() { return m_FinalAccumOpenCLKernelCreato /// /// The host side buffer to read into /// True if success, else false. -template -bool RendererCL::ReadFinal(byte* pixels) +template +bool RendererCL::ReadFinal(byte* pixels) { if (pixels) return m_Wrapper.ReadImage(m_FinalImageName, FinalRasW(), FinalRasH(), 0, m_Wrapper.Shared(), pixels); @@ -322,8 +327,8 @@ bool RendererCL::ReadFinal(byte* pixels) /// Slow, but never used because the final output image is always completely overwritten. /// /// True if success, else false. -template -bool RendererCL::ClearFinal() +template +bool RendererCL::ClearFinal() { vector v; uint index = m_Wrapper.FindImageIndex(m_FinalImageName, m_Wrapper.Shared()); @@ -349,8 +354,8 @@ bool RendererCL::ClearFinal() /// The amount of video RAM available on the GPU to render with. /// /// An unsigned 64-bit integer specifying how much video memory is available -template -size_t RendererCL::MemoryAvailable() +template +size_t RendererCL::MemoryAvailable() { return Ok() ? m_Wrapper.GlobalMemSize() : 0ULL; } @@ -359,8 +364,8 @@ size_t RendererCL::MemoryAvailable() /// Return whether OpenCL has been properly initialized. /// /// True if OpenCL has been properly initialized, else false. -template -bool RendererCL::Ok() const +template +bool RendererCL::Ok() const { return m_Init; } @@ -370,8 +375,8 @@ bool RendererCL::Ok() const /// since the output is actually an image rather than just a buffer. /// /// The number of channels, ignored. -template -void RendererCL::NumChannels(size_t numChannels) +template +void RendererCL::NumChannels(size_t numChannels) { m_NumChannels = 4; } @@ -379,8 +384,8 @@ void RendererCL::NumChannels(size_t numChannels) /// /// Dump the error report for this class as well as the OpenCLWrapper member. /// -template -void RendererCL::DumpErrorReport() +template +void RendererCL::DumpErrorReport() { EmberReport::DumpErrorReport(); m_Wrapper.DumpErrorReport(); @@ -389,8 +394,8 @@ void RendererCL::DumpErrorReport() /// /// Clear the error report for this class as well as the OpenCLWrapper member. /// -template -void RendererCL::ClearErrorReport() +template +void RendererCL::ClearErrorReport() { EmberReport::ClearErrorReport(); m_Wrapper.ClearErrorReport(); @@ -402,8 +407,8 @@ void RendererCL::ClearErrorReport() /// change this. /// /// The number of iterations ran in a single kernel call -template -size_t RendererCL::SubBatchSize() const +template +size_t RendererCL::SubBatchSize() const { return IterCountPerGrid(); } @@ -413,8 +418,8 @@ size_t RendererCL::SubBatchSize() const /// the kernel internally runs many threads. /// /// 1 -template -size_t RendererCL::ThreadCount() const +template +size_t RendererCL::ThreadCount() const { return 1; } @@ -425,22 +430,21 @@ size_t RendererCL::ThreadCount() const /// /// True if a new filter instance was created, else false. /// True if success, else false. -template -bool RendererCL::CreateDEFilter(bool& newAlloc) +template +bool RendererCL::CreateDEFilter(bool& newAlloc) { bool b = true; - if (Renderer::CreateDEFilter(newAlloc)) + if (Renderer::CreateDEFilter(newAlloc)) { //Copy coefs and widths here. Convert and copy the other filter params right before calling the filtering kernel. if (newAlloc) { const char* loc = __FUNCTION__; - DensityFilter* filter = dynamic_cast*>(GetDensityFilter()); - if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DECoefsBufferName, reinterpret_cast(const_cast(filter->Coefs())), filter->CoefsSizeBytes()))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DEWidthsBufferName, reinterpret_cast(const_cast(filter->Widths())), filter->WidthsSizeBytes()))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DECoefIndicesBufferName, reinterpret_cast(const_cast(filter->CoefIndices())), filter->CoefsIndicesSizeBytes()))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DECoefsBufferName, reinterpret_cast(const_cast(m_DensityFilter->Coefs())), m_DensityFilter->CoefsSizeBytes()))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DEWidthsBufferName, reinterpret_cast(const_cast(m_DensityFilter->Widths())), m_DensityFilter->WidthsSizeBytes()))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DECoefIndicesBufferName, reinterpret_cast(const_cast(m_DensityFilter->CoefIndices())), m_DensityFilter->CoefsIndicesSizeBytes()))) { m_ErrorReport.push_back(loc); } } } else @@ -455,15 +459,15 @@ bool RendererCL::CreateDEFilter(bool& newAlloc) /// /// True if a new filter instance was created, else false. /// True if success, else false. -template -bool RendererCL::CreateSpatialFilter(bool& newAlloc) +template +bool RendererCL::CreateSpatialFilter(bool& newAlloc) { bool b = true; - if (Renderer::CreateSpatialFilter(newAlloc)) + if (Renderer::CreateSpatialFilter(newAlloc)) { if (newAlloc) - if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_SpatialFilterCoefsBufferName, reinterpret_cast(GetSpatialFilter()->Filter()), GetSpatialFilter()->BufferSizeBytes()))) { m_ErrorReport.push_back(__FUNCTION__); } + if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_SpatialFilterCoefsBufferName, reinterpret_cast(m_SpatialFilter->Filter()), m_SpatialFilter->BufferSizeBytes()))) { m_ErrorReport.push_back(__FUNCTION__); } } else @@ -476,8 +480,8 @@ bool RendererCL::CreateSpatialFilter(bool& newAlloc) /// Get the renderer type enum. /// /// OPENCL_RENDERER -template -eRendererType RendererCL::RendererType() const +template +eRendererType RendererCL::RendererType() const { return OPENCL_RENDERER; } @@ -487,8 +491,8 @@ eRendererType RendererCL::RendererType() const /// OpenCLWrapper member as a single string. /// /// The concatenated error report string -template -string RendererCL::ErrorReportString() +template +string RendererCL::ErrorReportString() { return EmberReport::ErrorReportString() + m_Wrapper.ErrorReportString(); } @@ -498,8 +502,8 @@ string RendererCL::ErrorReportString() /// OpenCLWrapper member as a vector of strings. /// /// The concatenated error report vector of strings -template -vector RendererCL::ErrorReport() +template +vector RendererCL::ErrorReport() { auto ours = EmberReport::ErrorReport(); auto wrappers = m_Wrapper.ErrorReport(); @@ -514,10 +518,10 @@ vector RendererCL::ErrorReport() /// /// The vector of random contexts to assign /// True if the size of the vector matched the number of threads used for rendering and writing seeds to OpenCL succeeded, else false. -template -bool RendererCL::RandVec(vector>& randVec) +template +bool RendererCL::RandVec(vector>& randVec) { - bool b = Renderer::RandVec(randVec); + bool b = Renderer::RandVec(randVec); const char* loc = __FUNCTION__; if (m_Wrapper.Ok()) @@ -540,8 +544,8 @@ bool RendererCL::RandVec(vector>& randVec) /// only supports floats for texture images. /// /// The color scalar to multiply the ember's palette by -template -void RendererCL::MakeDmap(T colorScalar) +template +void RendererCL::MakeDmap(T colorScalar) { //m_Ember.m_Palette.MakeDmap(m_DmapCL, colorScalar); m_Ember.m_Palette.MakeDmap(m_DmapCL, colorScalar); @@ -553,8 +557,8 @@ void RendererCL::MakeDmap(T colorScalar) /// 2D image. /// /// True if success, else false. -template -bool RendererCL::Alloc() +template +bool RendererCL::Alloc() { if (!m_Wrapper.Ok()) return false; @@ -567,17 +571,17 @@ bool RendererCL::Alloc() size_t accumLength = SuperSize() * sizeof(v4T); const char* loc = __FUNCTION__; - if (b && !(b = m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddBuffer(m_XformsBufferName, SizeOf(m_XformsCL)))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddBuffer(m_ParVarsBufferName, 128 * sizeof(T)))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddBuffer(m_DistBufferName, CHOOSE_XFORM_GRAIN))) { m_ErrorReport.push_back(loc); }//Will be resized for xaos. - if (b && !(b = m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddBuffer(m_DEFilterParamsBufferName, sizeof(m_DensityFilterCL)))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddBuffer(m_SpatialFilterParamsBufferName, sizeof(m_SpatialFilterCL)))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa.m_Entries)))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddBuffer(m_HistBufferName, histLength))) { m_ErrorReport.push_back(loc); }//Histogram. Will memset to zero later. - if (b && !(b = m_Wrapper.AddBuffer(m_AccumBufferName, accumLength))) { m_ErrorReport.push_back(loc); }//Accum buffer. - if (b && !(b = m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL)))) { m_ErrorReport.push_back(loc); }//Points between iter calls. + if (b && !(b = m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddBuffer(m_XformsBufferName, SizeOf(m_XformsCL)))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddBuffer(m_ParVarsBufferName, 128 * sizeof(T)))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddBuffer(m_DistBufferName, CHOOSE_XFORM_GRAIN))) { m_ErrorReport.push_back(loc); }//Will be resized for xaos. + if (b && !(b = m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddBuffer(m_DEFilterParamsBufferName, sizeof(m_DensityFilterCL)))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddBuffer(m_SpatialFilterParamsBufferName, sizeof(m_SpatialFilterCL)))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa.m_Entries)))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddBuffer(m_HistBufferName, histLength))) { m_ErrorReport.push_back(loc); }//Histogram. Will memset to zero later. + if (b && !(b = m_Wrapper.AddBuffer(m_AccumBufferName, accumLength))) { m_ErrorReport.push_back(loc); }//Accum buffer. + if (b && !(b = m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL)))) { m_ErrorReport.push_back(loc); }//Points between iter calls. LeaveResize(); @@ -592,8 +596,8 @@ bool RendererCL::Alloc() /// Clear histogram if true, else don't. /// Clear density filtering buffer if true, else don't. /// True if success, else false. -template -bool RendererCL::ResetBuckets(bool resetHist, bool resetAccum) +template +bool RendererCL::ResetBuckets(bool resetHist, bool resetAccum) { bool b = true; @@ -610,8 +614,8 @@ bool RendererCL::ResetBuckets(bool resetHist, bool resetAccum) /// Perform log scale density filtering. /// /// True if success and not aborted, else false. -template -eRenderStatus RendererCL::LogScaleDensityFilter() +template +eRenderStatus RendererCL::LogScaleDensityFilter() { return RunLogScaleFilter(); } @@ -620,8 +624,8 @@ eRenderStatus RendererCL::LogScaleDensityFilter() /// Run gaussian density estimation filtering. /// /// True if success and not aborted, else false. -template -eRenderStatus RendererCL::GaussianDensityFilter() +template +eRenderStatus RendererCL::GaussianDensityFilter() { //This commented section is for debugging density filtering by making it run on the CPU //then copying the results back to the GPU. @@ -630,8 +634,8 @@ eRenderStatus RendererCL::GaussianDensityFilter() // uint accumLength = SuperSize() * sizeof(glm::detail::tvec4); // const char* loc = __FUNCTION__; // - // Renderer::ResetBuckets(false, true); - // Renderer::GaussianDensityFilter(); + // Renderer::ResetBuckets(false, true); + // Renderer::GaussianDensityFilter(); // // if (!m_Wrapper.WriteBuffer(m_AccumBufferName, AccumulatorBuckets(), accumLength)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } // return RENDER_OK; @@ -656,8 +660,8 @@ eRenderStatus RendererCL::GaussianDensityFilter() /// The pixels to copy the final image to if not nullptr /// Offset in the buffer to store the pixels to /// True if success and not aborted, else false. -template -eRenderStatus RendererCL::AccumulatorToFinalImage(byte* pixels, size_t finalOffset) +template +eRenderStatus RendererCL::AccumulatorToFinalImage(byte* pixels, size_t finalOffset) { eRenderStatus status = RunFinalAccum(); @@ -683,8 +687,8 @@ eRenderStatus RendererCL::AccumulatorToFinalImage(byte* pixels, size_t finalO /// The number of iterations to run /// The temporal sample within the current pass this is running for /// Rendering statistics -template -EmberStats RendererCL::Iterate(size_t iterCount, size_t temporalSample) +template +EmberStats RendererCL::Iterate(size_t iterCount, size_t temporalSample) { bool b = true; EmberStats stats;//Do not record bad vals with with GPU. If the user needs to investigate bad vals, use the CPU. @@ -740,8 +744,8 @@ EmberStats RendererCL::Iterate(size_t iterCount, size_t temporalSample) /// /// Whether to build in accumulation, only for debugging. Default: true. /// True if success, else false. -template -bool RendererCL::BuildIterProgramForEmber(bool doAccum) +template +bool RendererCL::BuildIterProgramForEmber(bool doAccum) { //Timing t; const char* loc = __FUNCTION__; @@ -777,8 +781,8 @@ bool RendererCL::BuildIterProgramForEmber(bool doAccum) /// The temporal sample this is running for /// The storage for the number of iterations ran /// True if success, else false. -template -bool RendererCL::RunIter(size_t iterCount, size_t temporalSample, size_t& itersRan) +template +bool RendererCL::RunIter(size_t iterCount, size_t temporalSample, size_t& itersRan) { Timing t;//, t2(4); bool b = true; @@ -787,7 +791,7 @@ bool RendererCL::RunIter(size_t iterCount, size_t temporalSample, size_t& ite uint iterCountPerBlock = IterCountPerBlock(); uint supersize = uint(SuperSize()); int kernelIndex = m_Wrapper.FindKernelIndex(m_IterOpenCLKernelCreator.IterEntryPoint()); - size_t fuseFreq = Renderer::SubBatchSize() / m_IterCountPerKernel;//Use the base sbs to determine when to fuse. + size_t fuseFreq = Renderer::SubBatchSize() / m_IterCountPerKernel;//Use the base sbs to determine when to fuse. size_t itersRemaining; double percent, etaMs; const char* loc = __FUNCTION__; @@ -802,10 +806,10 @@ bool RendererCL::RunIter(size_t iterCount, size_t temporalSample, size_t& ite ConvertEmber(m_Ember, m_EmberCL, m_XformsCL); m_CarToRasCL = ConvertCarToRas(*CoordMap()); - if (b && !(b = m_Wrapper.WriteBuffer (m_EmberBufferName, reinterpret_cast(&m_EmberCL), sizeof(m_EmberCL)))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.WriteBuffer (m_XformsBufferName, reinterpret_cast(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast(const_cast(XformDistributions())), XformDistributionsSize()))) { m_ErrorReport.push_back(loc); }//Will be resized for xaos. - if (b && !(b = m_Wrapper.WriteBuffer (m_CarToRasBufferName, reinterpret_cast(&m_CarToRasCL), sizeof(m_CarToRasCL)))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.WriteBuffer (m_EmberBufferName, reinterpret_cast(&m_EmberCL), sizeof(m_EmberCL)))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.WriteBuffer (m_XformsBufferName, reinterpret_cast(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast(const_cast(XformDistributions())), XformDistributionsSize()))) { m_ErrorReport.push_back(loc); }//Will be resized for xaos. + if (b && !(b = m_Wrapper.WriteBuffer (m_CarToRasBufferName, reinterpret_cast(&m_CarToRasCL), sizeof(m_CarToRasCL)))) { m_ErrorReport.push_back(loc); } if (b && !(b = m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_DmapCL.m_Entries.size(), 1, 0, m_DmapCL.m_Entries.data()))) { m_ErrorReport.push_back(loc); } @@ -825,7 +829,7 @@ bool RendererCL::RunIter(size_t iterCount, size_t temporalSample, size_t& ite //fuse = ((m_Calls % 4) == 0 ? 100u : 0u); #endif itersRemaining = iterCount - itersRan; - uint gridW = uint(std::min(ceil(double(itersRemaining) / double(iterCountPerBlock)), double(IterGridBlockWidth()))); + uint gridW = uint(std::min(ceil(double(itersRemaining) / double(iterCountPerBlock)), double(IterGridBlockWidth()))); uint gridH = uint(std::min(ceil(double(itersRemaining) / double(gridW * iterCountPerBlock)), double(IterGridBlockHeight()))); uint iterCountThisLaunch = iterCountPerBlock * gridW * gridH; @@ -910,8 +914,8 @@ bool RendererCL::RunIter(size_t iterCount, size_t temporalSample, size_t& ite /// Run the log scale filter. /// /// True if success, else false. -template -eRenderStatus RendererCL::RunLogScaleFilter() +template +eRenderStatus RendererCL::RunLogScaleFilter() { //Timing t(4); bool b = true; @@ -920,7 +924,7 @@ eRenderStatus RendererCL::RunLogScaleFilter() if (kernelIndex != -1) { - m_DensityFilterCL = ConvertDensityFilter(); + ConvertDensityFilter(); uint argIndex = 0; uint blockW = m_WarpSize; uint blockH = 4;//A height of 4 seems to run the fastest. @@ -953,15 +957,15 @@ eRenderStatus RendererCL::RunLogScaleFilter() /// /// Run the Gaussian density filter. -/// Method 7: Each block processes a 32x32 block and exits. No column or row advancements happen. +/// Method 7: Each block processes a 16x16(AMD) or 32x32(Nvidia) block and exits. No column or row advancements happen. /// /// True if success and not aborted, else false. -template -eRenderStatus RendererCL::RunDensityFilter() +template +eRenderStatus RendererCL::RunDensityFilter() { bool b = true; Timing t(4);// , t2(4); - m_DensityFilterCL = ConvertDensityFilter(); + ConvertDensityFilter(); int kernelIndex = MakeAndGetDensityFilterProgram(Supersample(), m_DensityFilterCL.m_FilterWidth); const char* loc = __FUNCTION__; @@ -1074,13 +1078,13 @@ eRenderStatus RendererCL::RunDensityFilter() /// Run final accumulation to the 2D output image. /// /// True if success and not aborted, else false. -template -eRenderStatus RendererCL::RunFinalAccum() +template +eRenderStatus RendererCL::RunFinalAccum() { //Timing t(4); bool b = true; - T alphaBase; - T alphaScale; + double alphaBase; + double alphaScale; int accumKernelIndex = MakeAndGetFinalAccumProgram(alphaBase, alphaScale); uint argIndex; uint gridW; @@ -1093,10 +1097,10 @@ eRenderStatus RendererCL::RunFinalAccum() if (!m_Abort && accumKernelIndex != -1) { //This is needed with or without early clip. - m_SpatialFilterCL = ConvertSpatialFilter(); + ConvertSpatialFilter(); if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_SpatialFilterParamsBufferName, reinterpret_cast(&m_SpatialFilterCL), sizeof(m_SpatialFilterCL)))) { m_ErrorReport.push_back(loc); } - if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_CurvesCsaName, m_Csa.m_Entries.data(), SizeOf(m_Csa.m_Entries)))) { m_ErrorReport.push_back(loc); } + if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_CurvesCsaName, m_Csa.m_Entries.data(), SizeOf(m_Csa.m_Entries)))) { m_ErrorReport.push_back(loc); } //Since early clip requires gamma correcting the entire accumulator first, //it can't be done inside of the normal final accumulation kernel, so @@ -1140,8 +1144,8 @@ eRenderStatus RendererCL::RunFinalAccum() if (b && !(b = m_Wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_CurvesCsaName))) { m_ErrorReport.push_back(loc); }//Curve points. if (b && !(b = m_Wrapper.SetArg (accumKernelIndex, argIndex++, curvesSet))) { m_ErrorReport.push_back(loc); }//Do curves. - if (b && !(b = m_Wrapper.SetArg (accumKernelIndex, argIndex++, alphaBase))) { m_ErrorReport.push_back(loc); }//Alpha base. - if (b && !(b = m_Wrapper.SetArg (accumKernelIndex, argIndex++, alphaScale))) { m_ErrorReport.push_back(loc); }//Alpha scale. + if (b && !(b = m_Wrapper.SetArg (accumKernelIndex, argIndex++, bucketT(alphaBase)))) { m_ErrorReport.push_back(loc); }//Alpha base. + if (b && !(b = m_Wrapper.SetArg (accumKernelIndex, argIndex++, bucketT(alphaScale)))) { m_ErrorReport.push_back(loc); }//Alpha scale. if (b && m_Wrapper.Shared()) if (b && !(b = m_Wrapper.EnqueueAcquireGLObjects(m_FinalImageName))) { m_ErrorReport.push_back(loc); } @@ -1170,8 +1174,8 @@ eRenderStatus RendererCL::RunFinalAccum() /// Height in elements /// Size of each element /// True if success, else false. -template -bool RendererCL::ClearBuffer(const string& bufferName, uint width, uint height, uint elementSize) +template +bool RendererCL::ClearBuffer(const string& bufferName, uint width, uint height, uint elementSize) { bool b = true; int kernelIndex = m_Wrapper.FindKernelIndex(m_IterOpenCLKernelCreator.ZeroizeEntryPoint()); @@ -1215,8 +1219,8 @@ bool RendererCL::ClearBuffer(const string& bufferName, uint width, uint heigh /// Row parity /// Column parity /// True if success, else false. -template -bool RendererCL::RunDensityFilterPrivate(uint kernelIndex, uint gridW, uint gridH, uint blockW, uint blockH, uint chunkSizeW, uint chunkSizeH, uint chunkW, uint chunkH) +template +bool RendererCL::RunDensityFilterPrivate(uint kernelIndex, uint gridW, uint gridH, uint blockW, uint blockH, uint chunkSizeW, uint chunkSizeH, uint chunkW, uint chunkH) { //Timing t(4); bool b = true; @@ -1248,8 +1252,8 @@ bool RendererCL::RunDensityFilterPrivate(uint kernelIndex, uint gridW, uint g /// The supersample being used for the current ember /// Width of the gaussian filter /// The kernel index if successful, else -1. -template -int RendererCL::MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth) +template +int RendererCL::MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth) { string deEntryPoint = m_DEOpenCLKernelCreator.GaussianDEEntryPoint(ss, filterWidth); int kernelIndex = m_Wrapper.FindKernelIndex(deEntryPoint); @@ -1281,16 +1285,16 @@ int RendererCL::MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth) /// Storage for the alpha base value used in the kernel. 0 if transparency is true, else 255. /// Storage for the alpha scale value used in the kernel. 255 if transparency is true, else 0. /// The kernel index if successful, else -1. -template -int RendererCL::MakeAndGetFinalAccumProgram(T& alphaBase, T& alphaScale) +template +int RendererCL::MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale) { - string finalAccumEntryPoint = m_FinalAccumOpenCLKernelCreator.FinalAccumEntryPoint(EarlyClip(), Renderer::NumChannels(), Transparency(), alphaBase, alphaScale); + string finalAccumEntryPoint = m_FinalAccumOpenCLKernelCreator.FinalAccumEntryPoint(EarlyClip(), Renderer::NumChannels(), Transparency(), alphaBase, alphaScale); int kernelIndex = m_Wrapper.FindKernelIndex(finalAccumEntryPoint); const char* loc = __FUNCTION__; if (kernelIndex == -1)//Has not been built yet. { - string kernel = m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer::NumChannels(), Transparency()); + string kernel = m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer::NumChannels(), Transparency()); bool b = m_Wrapper.AddProgram(finalAccumEntryPoint, kernel, finalAccumEntryPoint, m_DoublePrecision); if (b) @@ -1306,16 +1310,16 @@ int RendererCL::MakeAndGetFinalAccumProgram(T& alphaBase, T& alphaScale) /// Make the gamma correction program for early clipping and return its index. /// /// The kernel index if successful, else -1. -template -int RendererCL::MakeAndGetGammaCorrectionProgram() +template +int RendererCL::MakeAndGetGammaCorrectionProgram() { - string gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint(Renderer::NumChannels(), Transparency()); + string gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint(Renderer::NumChannels(), Transparency()); int kernelIndex = m_Wrapper.FindKernelIndex(gammaEntryPoint); const char* loc = __FUNCTION__; if (kernelIndex == -1)//Has not been built yet. { - string kernel = m_FinalAccumOpenCLKernelCreator.GammaCorrectionKernel(Renderer::NumChannels(), Transparency()); + string kernel = m_FinalAccumOpenCLKernelCreator.GammaCorrectionKernel(Renderer::NumChannels(), Transparency()); bool b = m_Wrapper.AddProgram(gammaEntryPoint, kernel, gammaEntryPoint, m_DoublePrecision); if (b) @@ -1336,28 +1340,22 @@ int RendererCL::MakeAndGetGammaCorrectionProgram() /// for passing to OpenCL. /// /// The DensityFilterCL object -template -DensityFilterCL RendererCL::ConvertDensityFilter() +template +void RendererCL::ConvertDensityFilter() { - DensityFilterCL filterCL; - DensityFilter* densityFilter = dynamic_cast*>(GetDensityFilter()); - - filterCL.m_Supersample = uint(Supersample()); - filterCL.m_SuperRasW = uint(SuperRasW()); - filterCL.m_SuperRasH = uint(SuperRasH()); - filterCL.m_K1 = K1(); - filterCL.m_K2 = K2(); - - if (densityFilter) + if (m_DensityFilter.get()) { - filterCL.m_Curve = densityFilter->Curve(); - filterCL.m_KernelSize = uint(densityFilter->KernelSize()); - filterCL.m_MaxFilterIndex = uint(densityFilter->MaxFilterIndex()); - filterCL.m_MaxFilteredCounts = uint(densityFilter->MaxFilteredCounts()); - filterCL.m_FilterWidth = uint(densityFilter->FilterWidth()); + m_DensityFilterCL.m_Supersample = uint(Supersample()); + m_DensityFilterCL.m_SuperRasW = uint(SuperRasW()); + m_DensityFilterCL.m_SuperRasH = uint(SuperRasH()); + m_DensityFilterCL.m_K1 = K1(); + m_DensityFilterCL.m_K2 = K2(); + m_DensityFilterCL.m_Curve = m_DensityFilter->Curve(); + m_DensityFilterCL.m_KernelSize = uint(m_DensityFilter->KernelSize()); + m_DensityFilterCL.m_MaxFilterIndex = uint(m_DensityFilter->MaxFilterIndex()); + m_DensityFilterCL.m_MaxFilteredCounts = uint(m_DensityFilter->MaxFilteredCounts()); + m_DensityFilterCL.m_FilterWidth = uint(m_DensityFilter->FilterWidth()); } - - return filterCL; } /// @@ -1365,33 +1363,33 @@ DensityFilterCL RendererCL::ConvertDensityFilter() /// for passing to OpenCL. /// /// The SpatialFilterCL object -template -SpatialFilterCL RendererCL::ConvertSpatialFilter() +template +void RendererCL::ConvertSpatialFilter() { - T g, linRange, vibrancy; - Color background; - SpatialFilterCL filterCL; + bucketT g, linRange, vibrancy; + Color background; - this->PrepFinalAccumVals(background, g, linRange, vibrancy); + if (m_SpatialFilter.get()) + { + this->PrepFinalAccumVals(background, g, linRange, vibrancy); - filterCL.m_SuperRasW = uint(SuperRasW()); - filterCL.m_SuperRasH = uint(SuperRasH()); - filterCL.m_FinalRasW = uint(FinalRasW()); - filterCL.m_FinalRasH = uint(FinalRasH()); - filterCL.m_Supersample = uint(Supersample()); - filterCL.m_FilterWidth = uint(GetSpatialFilter()->FinalFilterWidth()); - filterCL.m_NumChannels = uint(Renderer::NumChannels()); - filterCL.m_BytesPerChannel = uint(BytesPerChannel()); - filterCL.m_DensityFilterOffset = uint(DensityFilterOffset()); - filterCL.m_Transparency = Transparency(); - filterCL.m_YAxisUp = uint(m_YAxisUp); - filterCL.m_Vibrancy = vibrancy; - filterCL.m_HighlightPower = HighlightPower(); - filterCL.m_Gamma = g; - filterCL.m_LinRange = linRange; - filterCL.m_Background = background; - - return filterCL; + m_SpatialFilterCL.m_SuperRasW = uint(SuperRasW()); + m_SpatialFilterCL.m_SuperRasH = uint(SuperRasH()); + m_SpatialFilterCL.m_FinalRasW = uint(FinalRasW()); + m_SpatialFilterCL.m_FinalRasH = uint(FinalRasH()); + m_SpatialFilterCL.m_Supersample = uint(Supersample()); + m_SpatialFilterCL.m_FilterWidth = uint(m_SpatialFilter->FinalFilterWidth()); + m_SpatialFilterCL.m_NumChannels = uint(Renderer::NumChannels()); + m_SpatialFilterCL.m_BytesPerChannel = uint(BytesPerChannel()); + m_SpatialFilterCL.m_DensityFilterOffset = uint(DensityFilterOffset()); + m_SpatialFilterCL.m_Transparency = Transparency(); + m_SpatialFilterCL.m_YAxisUp = uint(m_YAxisUp); + m_SpatialFilterCL.m_Vibrancy = vibrancy; + m_SpatialFilterCL.m_HighlightPower = HighlightPower(); + m_SpatialFilterCL.m_Gamma = g; + m_SpatialFilterCL.m_LinRange = linRange; + m_SpatialFilterCL.m_Background = background; + } } /// @@ -1401,8 +1399,8 @@ SpatialFilterCL RendererCL::ConvertSpatialFilter() /// The Ember object to convert /// The converted EmberCL /// The converted vector of XformCL -template -void RendererCL::ConvertEmber(Ember& ember, EmberCL& emberCL, vector>& xformsCL) +template +void RendererCL::ConvertEmber(Ember& ember, EmberCL& emberCL, vector>& xformsCL) { memset(&emberCL, 0, sizeof(EmberCL));//Might not really be needed. @@ -1455,8 +1453,8 @@ void RendererCL::ConvertEmber(Ember& ember, EmberCL& emberCL, vector /// The CarToRas object to convert /// The CarToRasCL object -template -CarToRasCL RendererCL::ConvertCarToRas(const CarToRas& carToRas) +template +CarToRasCL RendererCL::ConvertCarToRas(const CarToRas& carToRas) { CarToRasCL carToRasCL; @@ -1479,8 +1477,8 @@ CarToRasCL RendererCL::ConvertCarToRas(const CarToRas& carToRas) /// Note, WriteBuffer() must be called after this to actually copy the /// data from the host to the device. /// -template -void RendererCL::FillSeeds() +template +void RendererCL::FillSeeds() { double start, delta = std::floor((double)std::numeric_limits::max() / (IterGridKernelCount() * 2)); m_Seeds.resize(IterGridKernelCount()); @@ -1495,9 +1493,9 @@ void RendererCL::FillSeeds() } } -template EMBERCL_API class RendererCL; +template EMBERCL_API class RendererCL; #ifdef DO_DOUBLE - template EMBERCL_API class RendererCL; + template EMBERCL_API class RendererCL; #endif } diff --git a/Source/EmberCL/RendererCL.h b/Source/EmberCL/RendererCL.h index 3d22474..43e0b38 100644 --- a/Source/EmberCL/RendererCL.h +++ b/Source/EmberCL/RendererCL.h @@ -33,55 +33,55 @@ public: /// It does not support different types for T and bucketT, so it only has one template argument /// and uses both for the base. /// -template -class EMBERCL_API RendererCL : public Renderer, public RendererCLBase +template +class EMBERCL_API RendererCL : public Renderer, public RendererCLBase { -using EmberNs::Renderer::RendererBase::Abort; -using EmberNs::Renderer::RendererBase::EarlyClip; -using EmberNs::Renderer::RendererBase::Transparency; -using EmberNs::Renderer::RendererBase::EnterResize; -using EmberNs::Renderer::RendererBase::LeaveResize; -using EmberNs::Renderer::RendererBase::FinalRasW; -using EmberNs::Renderer::RendererBase::FinalRasH; -using EmberNs::Renderer::RendererBase::SuperRasW; -using EmberNs::Renderer::RendererBase::SuperRasH; -using EmberNs::Renderer::RendererBase::SuperSize; -using EmberNs::Renderer::RendererBase::BytesPerChannel; -using EmberNs::Renderer::RendererBase::TemporalSamples; -using EmberNs::Renderer::RendererBase::ItersPerTemporalSample; -using EmberNs::Renderer::RendererBase::FuseCount; -using EmberNs::Renderer::RendererBase::DensityFilterOffset; -using EmberNs::Renderer::RendererBase::m_ProgressParameter; -using EmberNs::Renderer::RendererBase::m_YAxisUp; -using EmberNs::Renderer::RendererBase::m_LockAccum; -using EmberNs::Renderer::RendererBase::m_Abort; -using EmberNs::Renderer::RendererBase::m_NumChannels; -using EmberNs::Renderer::RendererBase::m_LastIter; -using EmberNs::Renderer::RendererBase::m_LastIterPercent; -using EmberNs::Renderer::RendererBase::m_Stats; -using EmberNs::Renderer::RendererBase::m_Callback; -using EmberNs::Renderer::RendererBase::m_Rand; -using EmberNs::Renderer::RendererBase::m_RenderTimer; -using EmberNs::Renderer::RendererBase::m_IterTimer; -using EmberNs::Renderer::RendererBase::m_ProgressTimer; -using EmberNs::Renderer::RendererBase::EmberReport::m_ErrorReport; -using EmberNs::Renderer::m_RotMat; -using EmberNs::Renderer::m_Ember; -using EmberNs::Renderer::m_Csa; -using EmberNs::Renderer::m_CurvesSet; -using EmberNs::Renderer::CenterX; -using EmberNs::Renderer::CenterY; -using EmberNs::Renderer::K1; -using EmberNs::Renderer::K2; -using EmberNs::Renderer::Supersample; -using EmberNs::Renderer::HighlightPower; -using EmberNs::Renderer::HistBuckets; -using EmberNs::Renderer::AccumulatorBuckets; -using EmberNs::Renderer::GetDensityFilter; -using EmberNs::Renderer::GetSpatialFilter; -using EmberNs::Renderer::CoordMap; -using EmberNs::Renderer::XformDistributions; -using EmberNs::Renderer::XformDistributionsSize; +using EmberNs::Renderer::RendererBase::Abort; +using EmberNs::Renderer::RendererBase::EarlyClip; +using EmberNs::Renderer::RendererBase::Transparency; +using EmberNs::Renderer::RendererBase::EnterResize; +using EmberNs::Renderer::RendererBase::LeaveResize; +using EmberNs::Renderer::RendererBase::FinalRasW; +using EmberNs::Renderer::RendererBase::FinalRasH; +using EmberNs::Renderer::RendererBase::SuperRasW; +using EmberNs::Renderer::RendererBase::SuperRasH; +using EmberNs::Renderer::RendererBase::SuperSize; +using EmberNs::Renderer::RendererBase::BytesPerChannel; +using EmberNs::Renderer::RendererBase::TemporalSamples; +using EmberNs::Renderer::RendererBase::ItersPerTemporalSample; +using EmberNs::Renderer::RendererBase::FuseCount; +using EmberNs::Renderer::RendererBase::DensityFilterOffset; +using EmberNs::Renderer::RendererBase::m_ProgressParameter; +using EmberNs::Renderer::RendererBase::m_YAxisUp; +using EmberNs::Renderer::RendererBase::m_LockAccum; +using EmberNs::Renderer::RendererBase::m_Abort; +using EmberNs::Renderer::RendererBase::m_NumChannels; +using EmberNs::Renderer::RendererBase::m_LastIter; +using EmberNs::Renderer::RendererBase::m_LastIterPercent; +using EmberNs::Renderer::RendererBase::m_Stats; +using EmberNs::Renderer::RendererBase::m_Callback; +using EmberNs::Renderer::RendererBase::m_Rand; +using EmberNs::Renderer::RendererBase::m_RenderTimer; +using EmberNs::Renderer::RendererBase::m_IterTimer; +using EmberNs::Renderer::RendererBase::m_ProgressTimer; +using EmberNs::Renderer::RendererBase::EmberReport::m_ErrorReport; +using EmberNs::Renderer::m_RotMat; +using EmberNs::Renderer::m_Ember; +using EmberNs::Renderer::m_Csa; +using EmberNs::Renderer::m_CurvesSet; +using EmberNs::Renderer::CenterX; +using EmberNs::Renderer::CenterY; +using EmberNs::Renderer::K1; +using EmberNs::Renderer::K2; +using EmberNs::Renderer::Supersample; +using EmberNs::Renderer::HighlightPower; +using EmberNs::Renderer::HistBuckets; +using EmberNs::Renderer::AccumulatorBuckets; +using EmberNs::Renderer::GetDensityFilter; +using EmberNs::Renderer::GetSpatialFilter; +using EmberNs::Renderer::CoordMap; +using EmberNs::Renderer::XformDistributions; +using EmberNs::Renderer::XformDistributionsSize; public: RendererCL(uint platform = 0, uint device = 0, bool shared = false, GLuint outputTexID = 0); @@ -169,13 +169,13 @@ private: bool ClearBuffer(const string& bufferName, uint width, uint height, uint elementSize); bool RunDensityFilterPrivate(uint kernelIndex, uint gridW, uint gridH, uint blockW, uint blockH, uint chunkSizeW, uint chunkSizeH, uint chunkW, uint chunkH); int MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth); - int MakeAndGetFinalAccumProgram(T& alphaBase, T& alphaScale); + int MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale); int MakeAndGetGammaCorrectionProgram(); void FillSeeds(); //Private functions passing data to OpenCL programs. - DensityFilterCL ConvertDensityFilter(); - SpatialFilterCL ConvertSpatialFilter(); + void ConvertDensityFilter(); + void ConvertSpatialFilter(); void ConvertEmber(Ember& ember, EmberCL& emberCL, vector>& xformsCL); static CarToRasCL ConvertCarToRas(const CarToRas& carToRas); @@ -221,13 +221,13 @@ private: EmberCL m_EmberCL; vector> m_XformsCL; vector m_Seeds; - Palette m_DmapCL;//Used instead of the base class' m_Dmap because OpenCL only supports float textures. + Palette m_DmapCL;//Used instead of the base class' m_Dmap because OpenCL only supports float textures. Likely not needed if we switch to float only hist. CarToRasCL m_CarToRasCL; - DensityFilterCL m_DensityFilterCL; - SpatialFilterCL m_SpatialFilterCL; + DensityFilterCL m_DensityFilterCL; + SpatialFilterCL m_SpatialFilterCL; IterOpenCLKernelCreator m_IterOpenCLKernelCreator; - DEOpenCLKernelCreator m_DEOpenCLKernelCreator; - FinalAccumOpenCLKernelCreator m_FinalAccumOpenCLKernelCreator; + DEOpenCLKernelCreator m_DEOpenCLKernelCreator; + FinalAccumOpenCLKernelCreator m_FinalAccumOpenCLKernelCreator; pair> m_Params; Ember m_LastBuiltEmber; }; diff --git a/Source/EmberCommon/EmberCommon.h b/Source/EmberCommon/EmberCommon.h index a96f12a..b8dd006 100644 --- a/Source/EmberCommon/EmberCommon.h +++ b/Source/EmberCommon/EmberCommon.h @@ -259,7 +259,7 @@ static Renderer* CreateRenderer(eRendererType renderType, uint platf else if (renderType == OPENCL_RENDERER) { s = "OpenCL"; - renderer = unique_ptr>(new RendererCL(platform, device, shared, texId)); + renderer = unique_ptr>(new RendererCL(platform, device, shared, texId)); if (!renderer.get() || !renderer->Ok()) { diff --git a/Source/EmberGenome/EmberGenome.cpp b/Source/EmberGenome/EmberGenome.cpp index d62bc17..21f7612 100644 --- a/Source/EmberGenome/EmberGenome.cpp +++ b/Source/EmberGenome/EmberGenome.cpp @@ -787,7 +787,7 @@ int _tmain(int argc, _TCHAR* argv[]) #ifdef DO_DOUBLE if (opt.Bits() == 64) { - b = EmberGenome(opt); + b = EmberGenome(opt); } else #endif diff --git a/Source/EmberRender/EmberRender.cpp b/Source/EmberRender/EmberRender.cpp index e0b5822..e189864 100644 --- a/Source/EmberRender/EmberRender.cpp +++ b/Source/EmberRender/EmberRender.cpp @@ -302,7 +302,15 @@ bool EmberRender(EmberOptions& opt) }); if (opt.EmberCL() && opt.DumpKernel()) - cout << "Iteration kernel: \n" << reinterpret_cast*>(renderer.get())->IterKernel() << endl; + { + if (auto rendererCL = dynamic_cast*>(renderer.get())) + { + cout << "Iteration kernel: \n" << + rendererCL->IterKernel() << "\n\n" << + rendererCL->DEKernel() << "\n\n" << + rendererCL->FinalAccumKernel() << endl; + } + } VerbosePrint("Done."); } @@ -339,7 +347,7 @@ int _tmain(int argc, _TCHAR* argv[]) #ifdef DO_DOUBLE if (opt.Bits() == 64) { - b = EmberRender(opt); + b = EmberRender(opt); } else #endif diff --git a/Source/EmberTester/EmberTester.cpp b/Source/EmberTester/EmberTester.cpp index 381c48b..4aee375 100644 --- a/Source/EmberTester/EmberTester.cpp +++ b/Source/EmberTester/EmberTester.cpp @@ -71,9 +71,9 @@ Ember CreateBasicEmber(uint width, uint height, uint ss, T quality, T centerX string GetEmberCLKernelString(Ember& ember, bool iter, bool log, bool de, uint ss, bool accum) { ostringstream os; - IterOpenCLKernelCreator iterCreator; - DEOpenCLKernelCreator deCreator; - FinalAccumOpenCLKernelCreator accumCreator; + IterOpenCLKernelCreator iterCreator(false); + DEOpenCLKernelCreator deCreator(false, false); + FinalAccumOpenCLKernelCreator accumCreator(false); pair> pair; iterCreator.ParVarIndexDefines(ember, pair); diff --git a/Source/Fractorium/FinalRenderEmberController.cpp b/Source/Fractorium/FinalRenderEmberController.cpp index 13b9b00..ec5060a 100644 --- a/Source/Fractorium/FinalRenderEmberController.cpp +++ b/Source/Fractorium/FinalRenderEmberController.cpp @@ -92,7 +92,7 @@ template FinalRenderEmberController::FinalRenderEmberController(FractoriumFinalRenderDialog* finalRender) : FinalRenderEmberControllerBase(finalRender) { - m_FinalPreviewRenderer = unique_ptr>(new EmberNs::Renderer()); + m_FinalPreviewRenderer = unique_ptr>(new EmberNs::Renderer()); m_FinalPreviewRenderer->Callback(nullptr); m_FinalPreviewRenderer->NumChannels(4); @@ -431,7 +431,7 @@ bool FinalRenderEmberController::CreateRenderer(eRendererType renderType, uin m_OutputTexID = 0;//Don't care about tex ID when doing final render. m_Shared = shared; - m_Renderer = unique_ptr(::CreateRenderer(renderType, platform, device, shared, m_OutputTexID, emberReport)); + m_Renderer = unique_ptr(::CreateRenderer(renderType, platform, device, shared, m_OutputTexID, emberReport)); errorReport = emberReport.ErrorReport(); if (!errorReport.empty()) diff --git a/Source/Fractorium/FinalRenderEmberController.h b/Source/Fractorium/FinalRenderEmberController.h index 597d946..41233a1 100644 --- a/Source/Fractorium/FinalRenderEmberController.h +++ b/Source/Fractorium/FinalRenderEmberController.h @@ -138,6 +138,6 @@ protected: Ember m_PreviewEmber; EmberFile m_EmberFile; EmberToXml m_XmlWriter; - unique_ptr> m_FinalPreviewRenderer; + unique_ptr> m_FinalPreviewRenderer; }; diff --git a/Source/Fractorium/Fractorium.cpp b/Source/Fractorium/Fractorium.cpp index c9d1c0c..8c8ba93 100644 --- a/Source/Fractorium/Fractorium.cpp +++ b/Source/Fractorium/Fractorium.cpp @@ -117,6 +117,7 @@ Fractorium::Fractorium(QWidget* p) m_Controller = unique_ptr(new FractoriumEmberController(this)); m_Controller->SetupVariationTree(); + m_Controller->FilteredVariations(); if (m_Wrapper.CheckOpenCL() && m_Settings->OpenCL() && m_QualitySpin->value() < 30) m_QualitySpin->setValue(30); diff --git a/Source/Fractorium/Fractorium.ui b/Source/Fractorium/Fractorium.ui index 2bd0bb0..75cd285 100644 --- a/Source/Fractorium/Fractorium.ui +++ b/Source/Fractorium/Fractorium.ui @@ -75,7 +75,7 @@ 0 0 1175 - 861 + 859 @@ -2484,6 +2484,9 @@ false + + + QFrame::Panel @@ -2596,7 +2599,7 @@ - true + false QTabWidget::pane @@ -3262,7 +3265,7 @@ SpinBox - true + false Affine @@ -3289,7 +3292,7 @@ SpinBox - true + false QFrame::NoFrame @@ -3305,8 +3308,8 @@ SpinBox 0 0 - 243 - 745 + 118 + 618 @@ -3349,7 +3352,7 @@ SpinBox - true + false Pre Affine Transform @@ -3864,7 +3867,7 @@ SpinBox - true + false Show @@ -3938,7 +3941,7 @@ SpinBox - true + false Post Affine Transform @@ -4498,7 +4501,7 @@ SpinBox true - true + false Show @@ -4555,7 +4558,7 @@ SpinBox - true + false Pivot @@ -4907,7 +4910,7 @@ SpinBox - true + false Select Xforms @@ -4920,16 +4923,16 @@ SpinBox 0 - 0 + 2 - 0 + 2 - 0 + 2 - 0 + 2 @@ -4962,8 +4965,8 @@ SpinBox 0 0 - 243 - 680 + 133 + 52 @@ -5095,6 +5098,9 @@ SpinBox 0 + + true + 0 @@ -5119,6 +5125,9 @@ SpinBox 0 + + false + QTabWidget::Triangular @@ -5350,12 +5359,12 @@ SpinBox - + Collapse - + Expand @@ -6107,7 +6116,7 @@ SpinBox 0 0 256 - 830 + 828 @@ -6585,7 +6594,7 @@ SpinBox DP - Use DP to render + Use double precision to render diff --git a/Source/Fractorium/FractoriumEmberController.cpp b/Source/Fractorium/FractoriumEmberController.cpp index b2526bb..c3c0aa2 100644 --- a/Source/Fractorium/FractoriumEmberController.cpp +++ b/Source/Fractorium/FractoriumEmberController.cpp @@ -72,11 +72,11 @@ FractoriumEmberController::FractoriumEmberController(Fractorium* fractorium) { m_PreviewRun = false; m_PreviewRunning = false; - m_SheepTools = unique_ptr>(new SheepTools( + m_SheepTools = unique_ptr>(new SheepTools( QString(QApplication::applicationDirPath() + "flam3-palettes.xml").toLocal8Bit().data(), - new EmberNs::Renderer())); + new EmberNs::Renderer())); m_GLController = unique_ptr>(new GLEmberController(fractorium, fractorium->ui.GLDisplay, this)); - m_PreviewRenderer = unique_ptr>(new EmberNs::Renderer()); + m_PreviewRenderer = unique_ptr>(new EmberNs::Renderer()); //Initial combo change event to fill the palette table will be called automatically later. if (!InitPaletteList(QCoreApplication::applicationDirPath().toLocal8Bit().data())) diff --git a/Source/Fractorium/FractoriumEmberController.h b/Source/Fractorium/FractoriumEmberController.h index 8b7bff4..152f048 100644 --- a/Source/Fractorium/FractoriumEmberController.h +++ b/Source/Fractorium/FractoriumEmberController.h @@ -185,6 +185,7 @@ public: virtual void SetupVariationTree() { } virtual void ClearVariationsTree() { } virtual void VariationSpinBoxValueChanged(double d) { } + virtual void FilteredVariations() { } //Xforms Selection. @@ -230,7 +231,7 @@ protected: void AddProcessAction(eProcessAction action); eProcessAction CondenseAndClearProcessActions(); eProcessState ProcessState() { return m_Renderer.get() ? m_Renderer->ProcessState() : NONE; } - + //Non-templated members. bool m_Rendering; bool m_Shared; @@ -255,6 +256,7 @@ protected: vector m_FinalImage[2]; vector m_PreviewFinalImage; vector m_ProcessActions; + vector m_FilteredVariations; unique_ptr m_Renderer; QTIsaac m_Rand; Fractorium* m_Fractorium; @@ -423,6 +425,7 @@ public: virtual void SetupVariationTree() override; virtual void ClearVariationsTree() override; virtual void VariationSpinBoxValueChanged(double d) override; + virtual void FilteredVariations() override; void FillVariationTreeWithXform(Xform* xform); //Xforms Xaos. @@ -492,9 +495,9 @@ private: Palette m_TempPalette; PaletteList m_PaletteList; VariationList m_VariationList; - unique_ptr> m_SheepTools; + unique_ptr> m_SheepTools; unique_ptr> m_GLController; - unique_ptr> m_PreviewRenderer; + unique_ptr> m_PreviewRenderer; QFuture m_PreviewResult; std::function m_PreviewRenderFunc; }; diff --git a/Source/Fractorium/FractoriumLibrary.cpp b/Source/Fractorium/FractoriumLibrary.cpp index c208ced..789ac72 100644 --- a/Source/Fractorium/FractoriumLibrary.cpp +++ b/Source/Fractorium/FractoriumLibrary.cpp @@ -253,6 +253,8 @@ void Fractorium::OnEmberTreeItemChanged(QTreeWidgetItem* item, int col) { m_Cont /// Clears the undo state. /// Resets the rendering process. /// Called when the user double clicks on a library tree item. +/// This will get called twice for some reason, and there's no way to prevent it. +/// Doesn't seem to cause any problems. /// /// The item double clicked on /// The column clicked, ignored. diff --git a/Source/Fractorium/FractoriumMenus.cpp b/Source/Fractorium/FractoriumMenus.cpp index 3458f49..534ffc7 100644 --- a/Source/Fractorium/FractoriumMenus.cpp +++ b/Source/Fractorium/FractoriumMenus.cpp @@ -65,7 +65,7 @@ void FractoriumEmberController::NewFlock(uint count) for (uint i = 0; i < count; i++) { - m_SheepTools->Random(ember); + m_SheepTools->Random(ember, m_FilteredVariations, static_cast(QTIsaac::GlobalRand->Frand(-2, 2)), 0); ParamsToEmber(ember); ember.m_Index = i; ember.m_Name = m_EmberFile.m_Filename.toStdString() + "-" + ToString(i + 1).toStdString(); @@ -126,7 +126,7 @@ void FractoriumEmberController::NewRandomFlameInCurrentFile() Ember ember; StopPreviewRender(); - m_SheepTools->Random(ember); + m_SheepTools->Random(ember, m_FilteredVariations, static_cast(QTIsaac::GlobalRand->Frand(-2, 2)), 0); ParamsToEmber(ember); ember.m_Name = EmberFile::DefaultEmberName(m_EmberFile.Size() + 1).toStdString(); ember.m_Index = m_EmberFile.Size(); diff --git a/Source/Fractorium/FractoriumPalette.cpp b/Source/Fractorium/FractoriumPalette.cpp index 7daf3ca..678a0af 100644 --- a/Source/Fractorium/FractoriumPalette.cpp +++ b/Source/Fractorium/FractoriumPalette.cpp @@ -162,11 +162,10 @@ void FractoriumEmberController::ApplyPaletteToEmber() double sat = double(m_Fractorium->m_PaletteSaturationSpin->value() / 100.0); double brightness = double(m_Fractorium->m_PaletteBrightnessSpin->value() / 255.0); double contrast = double(m_Fractorium->m_PaletteContrastSpin->value() > 0 ? (m_Fractorium->m_PaletteContrastSpin->value() * 2) : m_Fractorium->m_PaletteContrastSpin->value()) / 100.0; - - m_Ember.m_Hue = double(m_Fractorium->m_PaletteHueSpin->value()) / 360.0;//This is the only palette adjustment value that gets saved with the ember, so just assign it here. + double hue = double(m_Fractorium->m_PaletteHueSpin->value()) / 360.0; //Use the temp palette as the base and apply the adjustments gotten from the GUI and save the result in the ember palette. - m_TempPalette.MakeAdjustedPalette(m_Ember.m_Palette, 0, m_Ember.m_Hue, sat, brightness, contrast, blur, freq); + m_TempPalette.MakeAdjustedPalette(m_Ember.m_Palette, 0, hue, sat, brightness, contrast, blur, freq); } /// diff --git a/Source/Fractorium/FractoriumParams.cpp b/Source/Fractorium/FractoriumParams.cpp index f24c856..fa30372 100644 --- a/Source/Fractorium/FractoriumParams.cpp +++ b/Source/Fractorium/FractoriumParams.cpp @@ -550,18 +550,28 @@ void FractoriumEmberController::FillParamTablesAndPalette() FillXaos(); //Palette. - m_Fractorium->ResetPaletteControls(); - m_Fractorium->m_PaletteHueSpin->SetValueStealth(NormalizeDeg180(m_Ember.m_Hue * 360.0));//Convert -0.5 to 0.5 range to -180 - 180. - //Use the ember's embedded palette, rather than one from the list, so assign it directly to the controls without applying adjustments. - //Normally, the temp palette is assigned whenever the user clicks on a palette cell. But since that is skipped here, must do it manually. + //The temp palette is assigned the palette read when the file was parsed/saved. The user can apply adjustments on the GUI later. + //These adjustments will be applied to the temp palette, then assigned back to m_Ember.m_Palette. + //Normally, the temp palette is assigned whenever the user clicks on a palette cell. But since this is not + //called in response to that event, it is skipped here so must do it manually. m_TempPalette = m_Ember.m_Palette; - auto temp = m_Ember.m_Palette.m_Filename; + + //Palette controls are reset on each ember load. This means that if the palette was adjusted, saved, the selected ember + //changed to another, then back, the previously adjusted palette will now be considered the base, and all adjustments set to 0. + //To fix this, the caller must preserve the temp palette and the adjustment values and reassign. See Fractorium::CreateControllerFromOptions() + //for an example. + m_Fractorium->ResetPaletteControls(); + auto temp = m_Ember.m_Palette.m_Filename; + if (temp.get()) m_Fractorium->SetPaletteFileComboIndex(*temp.get()); - UpdateAdjustedPaletteGUI(m_Ember.m_Palette);//Setting the palette will trigger a full render. + //Update the palette preview widget. + //Since the controls were cleared above, the adjusted palette will be identical to the base palette. + //Callers can set, apply and display palette adjustments after this function exits if needed. + UpdateAdjustedPaletteGUI(m_Ember.m_Palette);//Updating the palette GUI will trigger a full render. } /// diff --git a/Source/Fractorium/FractoriumRender.cpp b/Source/Fractorium/FractoriumRender.cpp index a8b79bc..e4ef869 100644 --- a/Source/Fractorium/FractoriumRender.cpp +++ b/Source/Fractorium/FractoriumRender.cpp @@ -277,7 +277,7 @@ bool FractoriumEmberController::SyncSizes() { bool changed = false; GLWidget* gl = m_Fractorium->ui.GLDisplay; - RendererCL* rendererCL = nullptr; + RendererCL* rendererCL = nullptr; if (!m_GLController->SizesMatch()) { @@ -286,7 +286,7 @@ bool FractoriumEmberController::SyncSizes() gl->Allocate(); gl->SetViewport(); - if (m_Renderer->RendererType() == OPENCL_RENDERER && (rendererCL = dynamic_cast*>(m_Renderer.get()))) + if (m_Renderer->RendererType() == OPENCL_RENDERER && (rendererCL = dynamic_cast*>(m_Renderer.get()))) rendererCL->SetOutputTexture(gl->OutputTexID()); m_Fractorium->CenterScrollbars(); @@ -308,11 +308,11 @@ bool FractoriumEmberController::Render() bool success = true; GLWidget* gl = m_Fractorium->ui.GLDisplay; - RendererCL* rendererCL = nullptr; + RendererCL* rendererCL = nullptr; eProcessAction action = CondenseAndClearProcessActions(); if (m_Renderer->RendererType() == OPENCL_RENDERER) - rendererCL = dynamic_cast*>(m_Renderer.get()); + rendererCL = dynamic_cast*>(m_Renderer.get()); //Force temporal samples to always be 1. Perhaps change later when animation is implemented. m_Ember.m_TemporalSamples = 1; @@ -524,7 +524,7 @@ bool FractoriumEmberController::CreateRenderer(eRendererType renderType, uint DeleteRenderer();//Delete the renderer and refresh the textures. //Before starting, must take care of allocations. gl->Allocate(true);//Forcing a realloc of the texture is necessary on AMD, but not on nVidia. - m_Renderer = unique_ptr(::CreateRenderer(renderType, platform, device, shared, gl->OutputTexID(), emberReport)); + m_Renderer = unique_ptr(::CreateRenderer(renderType, platform, device, shared, gl->OutputTexID(), emberReport));//Always make bucket type float. errorReport = emberReport.ErrorReport(); if (errorReport.empty()) @@ -699,20 +699,26 @@ bool Fractorium::CreateControllerFromOptions() //Restore the ember and ember file. if (m_Controller.get()) { - m_Controller->SetEmber(ed);//Convert float to double or set double verbatim; + ed.m_Palette = tempPalette;//Restore base temp palette. Adjustments will be then be applied and stored back in in m_Ember.m_Palette below. + m_Controller->SetEmber(ed);//Convert float to double or set double verbatim. This will assign m_Ember.m_Palette (which was just tempPalette) to m_TempPalette. m_Controller->SetEmberFile(efd); - //Template specific palette table and variations tree setup in controller constructor, but - //must manually setup the library tree here because it's after the embers were assigned. - m_Controller->FillLibraryTree(index.row());//Passing row re-selects the item that was previously selected. - m_Controller->SetTempPalette(tempPalette);//Restore palette. + //Setting these and updating the GUI overwrites the work of clearing them done in SetEmber() above. + //It's a corner case, but doesn't seem to matter. m_PaletteHueSpin->SetValueStealth(hue); m_PaletteSaturationSpin->SetValueStealth(sat); m_PaletteBrightnessSpin->SetValueStealth(bright); m_PaletteContrastSpin->SetValueStealth(con); m_PaletteBlurSpin->SetValueStealth(blur); m_PaletteFrequencySpin->SetValueStealth(freq); - m_Controller->PaletteAdjust();//Fills in the palette. + m_Controller->PaletteAdjust();//Applies the adjustments to temp and saves in m_Ember.m_Palette, then fills in the palette preview widget. + + //Template specific palette table and variations tree setup in controller constructor, but + //must manually setup the library tree here because it's after the embers were assigned. + //Passing row re-selects the item that was previously selected. + //This will eventually call FillParamTablesAndPalette(), which in addition to filling in various fields, + //will apply the palette adjustments. + m_Controller->FillLibraryTree(index.row()); } } diff --git a/Source/Fractorium/FractoriumToolbar.cpp b/Source/Fractorium/FractoriumToolbar.cpp index b1f94b9..4e8f5e6 100644 --- a/Source/Fractorium/FractoriumToolbar.cpp +++ b/Source/Fractorium/FractoriumToolbar.cpp @@ -15,7 +15,7 @@ void Fractorium::InitToolbarUI() spGroup->addAction(ui.ActionDP); SyncOptionsToToolbar(); - connect(ui.ActionCpu, SIGNAL(triggered(bool)), this, SLOT(OnActionCpu(bool)), Qt::QueuedConnection);//Need to sync these with options dialog.//TODO + connect(ui.ActionCpu, SIGNAL(triggered(bool)), this, SLOT(OnActionCpu(bool)), Qt::QueuedConnection); connect(ui.ActionCL, SIGNAL(triggered(bool)), this, SLOT(OnActionCL(bool)), Qt::QueuedConnection); connect(ui.ActionSP, SIGNAL(triggered(bool)), this, SLOT(OnActionSP(bool)), Qt::QueuedConnection); connect(ui.ActionDP, SIGNAL(triggered(bool)), this, SLOT(OnActionDP(bool)), Qt::QueuedConnection); diff --git a/Source/Fractorium/FractoriumXformsVariations.cpp b/Source/Fractorium/FractoriumXformsVariations.cpp index 93e2a68..fe38714 100644 --- a/Source/Fractorium/FractoriumXformsVariations.cpp +++ b/Source/Fractorium/FractoriumXformsVariations.cpp @@ -27,7 +27,10 @@ void Fractorium::InitXformsVariationsUI() void Fractorium::OnActionVariationsDialog(bool checked) { if (m_VarDialog->exec()) + { + m_Controller->FilteredVariations(); Filter(); + } } /// @@ -77,6 +80,20 @@ void Fractorium::Filter() m_Controller->Filter(ui.VariationsFilterLineEdit->text()); } +template +void FractoriumEmberController::FilteredVariations() +{ + auto& map = m_Fractorium->m_VarDialog->Map(); + + m_FilteredVariations.clear(); + m_FilteredVariations.reserve(map.size()); + + for (auto i = 0; i < m_VariationList.Size(); i++) + if (auto var = m_VariationList.GetVariation(i)) + if (map.contains(var->Name().c_str()) && map[var->Name().c_str()].toBool()) + m_FilteredVariations.push_back(var->VariationId()); +} + /// /// Dynamically populate the variation tree widget with VariationTreeWidgetItem and VariationTreeDoubleSpinBox /// templated with the correct type. @@ -222,6 +239,7 @@ void FractoriumEmberController::VariationSpinBoxValueChanged(double d)//Would if (xformVar) xform->DeleteVariationById(var->VariationId()); + //widgetItem->setBackgroundColor(0, Qt::darkGray);//Ensure background is always white if weight goes to zero. widgetItem->setBackgroundColor(0, QColor(255, 255, 255));//Ensure background is always white if weight goes to zero. } else @@ -238,6 +256,7 @@ void FractoriumEmberController::VariationSpinBoxValueChanged(double d)//Would newVar->m_Weight = d; xform->AddVariation(newVar); + //widgetItem->setBackgroundColor(0, Qt::darkGray);//Set background to gray when a variation has non-zero weight in this xform. widgetItem->setBackgroundColor(0, QColor(200, 200, 200));//Set background to gray when a variation has non-zero weight in this xform. //If they've added a new parametric variation, then grab the values currently in the spinners @@ -295,6 +314,7 @@ void FractoriumEmberController::FillVariationTreeWithXform(Xform* xform) item->setHidden(false); spinBox->SetValueStealth(var ? var->m_Weight : 0);//If the variation was present, set the spin box to its weight, else zero. + //item->setBackgroundColor(0, var ? Qt::darkGray : Qt::lightGray);//Ensure background is always white if the value goes to zero, else gray if var present. item->setBackgroundColor(0, var ? QColor(200, 200, 200) : QColor(255, 255, 255));//Ensure background is always white if the value goes to zero, else gray if var present. for (uint j = 0; j < item->childCount(); j++)//Iterate through all of the children, which will be the params if it was a parametric variation. diff --git a/Source/Fractorium/Main.cpp b/Source/Fractorium/Main.cpp index e929aee..999f2a5 100644 --- a/Source/Fractorium/Main.cpp +++ b/Source/Fractorium/Main.cpp @@ -48,11 +48,50 @@ int main(int argc, char *argv[]) try { + //a.setStyle(QStyleFactory::create("Fusion")); + //QPalette darkPalette; + /*darkPalette.setColor(QPalette::Window, QColor(53, 53, 53)); + darkPalette.setColor(QPalette::WindowText, Qt::white); + darkPalette.setColor(QPalette::Base, QColor(25, 25, 25)); + darkPalette.setColor(QPalette::AlternateBase, QColor(53, 53, 53)); + darkPalette.setColor(QPalette::ToolTipBase, Qt::white); + darkPalette.setColor(QPalette::ToolTipText, Qt::white); + darkPalette.setColor(QPalette::Text, Qt::white); + darkPalette.setColor(QPalette::Button, QColor(53, 53, 53)); + darkPalette.setColor(QPalette::ButtonText, Qt::white); + darkPalette.setColor(QPalette::BrightText, Qt::red); + darkPalette.setColor(QPalette::Link, QColor(42, 130, 218)); + + darkPalette.setColor(QPalette::Highlight, QColor(42, 130, 218)); + darkPalette.setColor(QPalette::HighlightedText, Qt::black);;*/ + + //darkPalette.setColor(QPalette::, Qt::lightGray); + //darkPalette.setColor(QPalette::Window, Qt::darkGray); + //darkPalette.setColor(QPalette::Disabled, QPalette::WindowText, Qt::red); + //darkPalette.setColor(QPalette::Disabled, QPalette::ButtonText, Qt::blue);//Works for disabled buttons, but not for disabled menus. + + //a.setPalette(darkPalette); + //a.setStyleSheet("QToolTip { color: #ffffff; background-color: darkgray; border: 1px solid white; }"); + //a.setStyleSheet("QTableWidget { border-color: darkgray; }") + //QString s; + + //s = "QTableView, QSpinBox, QDoubleSpinBox, QGroupBox, QTreeWidget { background-color: darkGray; } "; + //s += "QComboBox, QTextEdit, QLineEdit { background - color: lightGray; } "; + //s += "QTabWidget { window-color: darkGray; } "; + //a.setStyleSheet("{ color: rgb(85, 170, 0); }"); + //a.setStyleSheet("GLWidget { background-color: darkgray; }"); + //a.setStyleSheet("QTableView, QDoubleSpinBox { background-color: darkgray; }");//Works! + //a.setStyleSheet(s);//Works! + //a.setStyleSheet("QTableView, QSpinBox, QDoubleSpinBox, QTreeWidget, QTreeWidgetItem { background-color: darkgray; }");//QTreeWidgetItem not needed. + //a.setStyleSheet("QTableView, DoubleSpinBox { background-color: darkgray; }");//Works! + Fractorium w; w.show(); a.installEventFilter(&w); rv = a.exec(); - } catch (const char *e) { + } + catch (const char* e) + { QMessageBox::critical(0, "Fatal Error", e); }