diff --git a/Data/Bench/EmberBench.ps1 b/Data/Bench/EmberBench.ps1 index 7543381..9e3753b 100644 --- a/Data/Bench/EmberBench.ps1 +++ b/Data/Bench/EmberBench.ps1 @@ -72,8 +72,8 @@ function TestFileSupersamples([string]$filename, [string]$precision, [string]$su function BenchAllForFile([string]$filename) { #if you want to test extreme speed on your GPU, add this option, --sbpctth=1.0, to increase the amount of each sub batch that is done on each opencl thread per kernel launch. - #set the value from somewhere between 0.025 (the default) and 1.0 (the max). - #this usuall results in a roughly 1% speed improvement. + #set the value from somewhere between 0.025 (the default) and 1.0 (the max). Values above 0.3 don't make much of a difference. + #this usually results in a roughly 1% speed improvement. #however, it can cause the render to fail, especially on the golubaja_rippingfrominside_complexcode and zy0rg_six_bigcomplexcode flames when using double precision. $misc = "--opencl --device=" + $devices TestFileSupersamples $filename "--sp" "_f32_cpu" $script:cpuquality "" diff --git a/Source/Ember/Ember.h b/Source/Ember/Ember.h index a4c85e3..013cec0 100644 --- a/Source/Ember/Ember.h +++ b/Source/Ember/Ember.h @@ -273,7 +273,6 @@ public: if (UseFinalXform())//Caller wanted one and this ember has one. { ember.m_FinalXform = m_FinalXform; - ember.m_FinalXform.m_ColorX = T(XformCount() & 1); } else//Caller wanted one and this ember doesn't have one. { @@ -779,7 +778,24 @@ public: InterpT<&Ember::m_MinRadDE>(embers, coefs, size); InterpT<&Ember::m_CurveDE>(embers, coefs, size); InterpT<&Ember::m_SpatialFilterRadius>(embers, coefs, size); - InterpX, &Ember::m_Curves>(embers, coefs, size); + + //At this point, all of the curves at a given curve index (0 - 3) should have the same number of spline points across all embers. + for (size_t i = 0; i < embers[0].m_Curves.m_Points.size(); i++)//4 point arrays. + { + while (m_Curves.m_Points[i].size() < embers[0].m_Curves.m_Points[i].size()) + m_Curves.m_Points[i].push_back(v2F(0)); + + for (size_t j = 0; j < embers[0].m_Curves.m_Points[i].size(); j++)//Same number of points for this curve across all embers, so just use the first one. + { + v2F x(0); + + for (size_t k = 0; k < size; k++)//Iterate over all embers. + x += float(coefs[k]) * embers[k].m_Curves.m_Points[i][j]; + + m_Curves.m_Points[i][j] = x; + } + } + //Normally done in assignment, must manually do here. SetProjFunc(); //An extra step needed here due to the OOD that was not needed in the original. diff --git a/Source/Ember/Interpolate.h b/Source/Ember/Interpolate.h index db6f1aa..4ec90fd 100644 --- a/Source/Ember/Interpolate.h +++ b/Source/Ember/Interpolate.h @@ -61,13 +61,13 @@ public: { bool aligned = true; bool currentFinal, hasFinal = sourceEmbers[0].UseFinalXform(); - size_t i, xf, currentCount, maxCount = sourceEmbers[0].XformCount(); + size_t xf, currentCount, maxCount = sourceEmbers[0].XformCount(); Xform* destOtherXform; auto variationList = VariationList::Instance(); //Determine the max number of xforms present in sourceEmbers. //Also check if final xforms are used in any of them. - for (i = 1; i < count; i++) + for (size_t i = 1; i < count; i++) { currentCount = sourceEmbers[i].XformCount(); @@ -89,19 +89,31 @@ public: } //Copy them using the max xform count, and do final if any had final. - for (i = 0; i < count; i++) + for (size_t i = 0; i < count; i++) destEmbers[i] = sourceEmbers[i].Copy(maxCount, hasFinal); if (hasFinal) maxCount++; + std::array maxCurvePoints = { 0, 0, 0, 0 }; + + //Find the maximum number of points for each curve type in all curves. + for (size_t e = 0; e < count; e++) + for (size_t j = 0; j < sourceEmbers[0].m_Curves.m_Points.size(); j++)//Should always be 4 for every ember. + maxCurvePoints[j] = std::max(maxCurvePoints[j], sourceEmbers[e].m_Curves.m_Points[j].size()); + //Check to see if there's a parametric variation present in one xform //but not in an aligned xform. If this is the case, use the parameters //from the xform with the variation as the defaults for the blank one. //All embers will have the same number of xforms at this point. - for (i = 0; i < count; i++) + for (size_t i = 0; i < count; i++) { intmax_t ii; + destEmbers[i].m_Curves = sourceEmbers[i].m_Curves; + + for (size_t j = 0; j < sourceEmbers[0].m_Curves.m_Points.size(); j++)//Should always be 4 for every ember. + while (destEmbers[i].m_Curves.m_Points[j].size() < maxCurvePoints[j]) + destEmbers[i].m_Curves.m_Points[j].push_back(sourceEmbers[i].m_Curves.m_Points[j].back()); for (xf = 0; xf < maxCount; xf++)//This will include both normal xforms and the final. { diff --git a/Source/Ember/Renderer.cpp b/Source/Ember/Renderer.cpp index 05ae646..a446496 100644 --- a/Source/Ember/Renderer.cpp +++ b/Source/Ember/Renderer.cpp @@ -1733,25 +1733,18 @@ void Renderer::ComputeCurves() { if (m_CurvesSet) { - //Timing t; auto st = m_Csa.size(); - vector> vals; - vals.reserve(m_Ember.m_Curves.m_Points[0].size()); for (glm::length_t i = 0; i < m_Ember.m_Curves.m_Points.size(); i++)//Overall, r, g, b. { - for (auto& p : m_Ember.m_Curves.m_Points[i]) - vals.push_back(p); + if (!m_Ember.m_Curves.m_Points[i].empty()) + { + Spline spline(m_Ember.m_Curves.m_Points[i]);//Will internally sort. - Spline spline(vals);//Will internally sort. - - for (glm::length_t j = 0; j < st; j++) - m_Csa[j][i] = spline.Interpolate(j * ONE_OVER_CURVES_LENGTH_M1); - - vals.clear(); + for (glm::length_t j = 0; j < st; j++) + m_Csa[j][i] = spline.Interpolate(j * ONE_OVER_CURVES_LENGTH_M1); + } } - - //t.Toc("ComputeCurves"); } } diff --git a/Source/EmberCL/RendererCL.cpp b/Source/EmberCL/RendererCL.cpp index 1a5471a..2ff5ffd 100644 --- a/Source/EmberCL/RendererCL.cpp +++ b/Source/EmberCL/RendererCL.cpp @@ -853,7 +853,7 @@ EmberStats RendererCL::Iterate(size_t iterCount, size_t temporalSamp break; } - if (b && !(b = wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_Dmap.Size(), 1, 0, m_Dmap.m_Entries.data()))) + if (b && !(b = wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, m_PaletteFormat, m_Dmap.Size(), 1, 0, m_Dmap.m_Entries.data()))) { ErrorStr(loc, "Write palette buffer failed", device.get()); break; diff --git a/Source/Fractorium/FinalRenderDialog.ui b/Source/Fractorium/FinalRenderDialog.ui index 1329375..ae75abb 100644 --- a/Source/Fractorium/FinalRenderDialog.ui +++ b/Source/Fractorium/FinalRenderDialog.ui @@ -345,7 +345,7 @@ - + 0 0 diff --git a/Source/Fractorium/FractoriumPalette.cpp b/Source/Fractorium/FractoriumPalette.cpp index 45467d7..37d9ae6 100644 --- a/Source/Fractorium/FractoriumPalette.cpp +++ b/Source/Fractorium/FractoriumPalette.cpp @@ -584,7 +584,10 @@ void FractoriumEmberController::ClearColorCurves(int i) { Update([&] { - m_Ember.m_Curves.Init(i); + if (i < 0) + m_Ember.m_Curves.Init(); + else + m_Ember.m_Curves.Init(i); }, true, m_Renderer->EarlyClip() ? eProcessAction::FILTER_AND_ACCUM : eProcessAction::ACCUM_ONLY); FillCurvesControl(); @@ -607,10 +610,7 @@ void Fractorium::OnResetCurvesButtonClicked(bool checked) } else { - m_Controller->ClearColorCurves(0); - m_Controller->ClearColorCurves(1); - m_Controller->ClearColorCurves(2); - m_Controller->ClearColorCurves(3); + m_Controller->ClearColorCurves(-1); } }