Features:

--Add support for Exr files which use 32-bit floats for each RGBA channel. Seems to come out too washed out.
--Allow for clearing an individual color curve.
--Allow for saving multiple image types in EmberRender and EmberAnimate. All writes are threaded.
--Remove --bpc command line argument. Add format png16 as a replacement.
--Remove --enable_jpg_comments and --enable_png_comments command line arguments, and replace them with --enable_comments which applies to jpg, png and exr.
--Add menu items to variations and affine spinners which allow for easy entry of specific numeric values like pi.
--Make final render dialog be wider rather than so tall.

Bug fixes:
--Fix some OpenCL compile errors on Mac.
--Remove ability to save bitmap files on all platforms but Windows.

Code changes:
--New dependency on OpenEXR.
--Allow Curves class to interact with objects of a different template type.
--Make m_Curves member of Ember always use float as template type.
--Set the length of the curves array to always be 2^17 which should offer enough precision with new 32-bit float pixel types.
--Set pixel types to always be 32-bit float. This results in a major reduction of code in the final accumulation part of Renderer.h/cpp.
--Remove corresponding code from RendererCL and FinalAccumOpenCLKernelCreator.
--Remove Transparency, NumChannels and BytesPerPixel setters from Renderer.h/cpp.
--Add new global functions to format final image buffers and place all alpha calculation and scaling code in them.
--Blending is no longer needed in OpenGLWidget because of the new pixel type.
--Make new class, AffineDoubleSpinBox.
--Attempt to make file save dialog code work the same on all OSes.
--Remove some unused functions.
This commit is contained in:
Person
2017-07-22 13:43:35 -07:00
parent d5760e451a
commit de613404de
68 changed files with 1755 additions and 1276 deletions

View File

@ -9,7 +9,9 @@ namespace EmberNs
template <typename T, typename bucketT>
Renderer<T, bucketT>::Renderer()
{
m_Csa.resize(size_t(std::pow(size_t(256), BytesPerChannel())));//Need to at least have something here so the derived RendererCL can do the initial buffer allocation.
//Use a very large number regardless of the size of the output pixels. This should be sufficient granularity, even though
//it's technically less than the number of distinct values representable by a 32-bit float.
m_Csa.resize(size_t(CURVES_LENGTH));
}
/// <summary>
@ -344,7 +346,7 @@ bool Renderer<T, bucketT>::CreateTemporalFilter(bool& newAlloc)
/// <param name="finalOffset">Offset in finalImage to store the pixels to. Default: 0.</param>
/// <returns>True if nothing went wrong, else false.</returns>
template <typename T, typename bucketT>
eRenderStatus Renderer<T, bucketT>::Run(vector<byte>& finalImage, double time, size_t subBatchCountOverride, bool forceOutput, size_t finalOffset)
eRenderStatus Renderer<T, bucketT>::Run(vector<v4F>& finalImage, double time, size_t subBatchCountOverride, bool forceOutput, size_t finalOffset)
{
m_InRender = true;
EnterRender();
@ -645,7 +647,7 @@ AccumOnly:
CreateSpatialFilter(newFilterAlloc);
m_DensityFilterOffset = m_GutterWidth - size_t(Clamp<T>((T(m_SpatialFilter->FinalFilterWidth()) - T(Supersample())) / 2, 0, T(m_GutterWidth)));
m_CurvesSet = m_Ember.m_Curves.CurvesSet();
ComputeCurves(true);//Color curves must be re-calculated as well.
ComputeCurves();//Color curves must be re-calculated as well.
if (AccumulatorToFinalImage(finalImage, finalOffset) == eRenderStatus::RENDER_OK)
{
@ -822,6 +824,7 @@ bool Renderer<T, bucketT>::ResetBuckets(bool resetHist, bool resetAccum)
}
/// <summary>
/// THIS IS UNUSED.
/// Log scales a single row with a specially structured loop that will be vectorized by the compiler.
/// Note this adds an epsilon to the denomiator used to compute the logScale
/// value because the conditional check for zero would have prevented the loop from
@ -882,7 +885,7 @@ eRenderStatus Renderer<T, bucketT>::LogScaleDensityFilter(bool forceOutput)
bucketT* __restrict hist = glm::value_ptr(m_HistBuckets[i]);//Vectorizer can't tell these point to different locations.
bucketT* __restrict acc = glm::value_ptr(m_AccumulatorBuckets[i]);
for (size_t v = 0; v < 4; v++)
for (size_t v = 0; v < 4; v++)//Vectorized by compiler.
acc[v] = hist[v] * logScale;
}
}
@ -1063,7 +1066,7 @@ eRenderStatus Renderer<T, bucketT>::GaussianDensityFilter()
/// <param name="finalOffset">Offset in the buffer to store the pixels to</param>
/// <returns>True if not prematurely aborted, else false.</returns>
template <typename T, typename bucketT>
eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(vector<byte>& pixels, size_t finalOffset)
eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset)
{
if (PrepFinalAccumVector(pixels))
return AccumulatorToFinalImage(pixels.data(), finalOffset);
@ -1079,14 +1082,13 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(vector<byte>& pixels
/// <param name="finalOffset">Offset in the buffer to store the pixels to. Default: 0.</param>
/// <returns>True if not prematurely aborted, else false.</returns>
template <typename T, typename bucketT>
eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t finalOffset)
eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(v4F* pixels, size_t finalOffset)
{
if (!pixels)
return eRenderStatus::RENDER_ERROR;
EnterFinalAccum();
//Timing t(4);
bool doAlpha = NumChannels() > 3;
size_t filterWidth = m_SpatialFilter->FinalFilterWidth();
bucketT g, linRange, vibrancy;
Color<bucketT> background;
@ -1104,7 +1106,7 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t
while (rowStart < rowEnd && !m_Abort)//Use the pointer itself as the offset to save an extra addition per iter.
{
GammaCorrection(*rowStart, background, g, linRange, vibrancy, true, false, glm::value_ptr(*rowStart));//Write back in place.
GammaCorrection(*rowStart, background, g, linRange, vibrancy, false, glm::value_ptr(*rowStart));//Write back in place.
rowStart++;
}
});
@ -1123,11 +1125,12 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t
parallel_for(size_t(0), FinalRasH(), size_t(1), [&](size_t j)
{
Color<bucketT> newBucket;
size_t pixelsRowStart = (m_YAxisUp ? ((FinalRasH() - j) - 1) : j) * FinalRowSize();//Pull out of inner loop for optimization.
size_t pixelsRowStart = (m_YAxisUp ? ((FinalRasH() - j) - 1) : j) * FinalRasW();//Pull out of inner loop for optimization.
size_t y = m_DensityFilterOffset + (j * Supersample());//Start at the beginning row of each super sample block.
size_t clampedFilterH = std::min(filterWidth, m_SuperRasH - y);//Make sure the filter doesn't go past the bottom of the gutter.
auto pv4T = pixels + pixelsRowStart;
for (size_t i = 0; i < FinalRasW(); i++, pixelsRowStart += PixelSize())
for (size_t i = 0; i < FinalRasW(); i++, pv4T++)
{
size_t ii, jj;
size_t x = m_DensityFilterOffset + (i * Supersample());//Start at the beginning column of each super sample block.
@ -1149,68 +1152,8 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t
}
}
if (BytesPerChannel() == 2)
{
auto p16 = reinterpret_cast<glm::uint16*>(pixels + pixelsRowStart);
if (EarlyClip())
{
newBucket *= bucketT(65535);
if (m_CurvesSet)
{
CurveAdjust(newBucket.r, 1);
CurveAdjust(newBucket.g, 2);
CurveAdjust(newBucket.b, 3);
}
p16[0] = glm::uint16(Clamp<bucketT>(newBucket.r, 0, 65535));
p16[1] = glm::uint16(Clamp<bucketT>(newBucket.g, 0, 65535));
p16[2] = glm::uint16(Clamp<bucketT>(newBucket.b, 0, 65535));
if (doAlpha)
{
if (Transparency())
p16[3] = byte(Clamp<bucketT>(newBucket.a, 0, 65535));
else
p16[3] = 65535;
}
}
else
{
GammaCorrection(*(reinterpret_cast<tvec4<bucketT, glm::defaultp>*>(&newBucket)), background, g, linRange, vibrancy, doAlpha, true, p16);
}
}
else
{
if (EarlyClip())
{
newBucket *= bucketT(255);
if (m_CurvesSet)
{
CurveAdjust(newBucket.r, 1);
CurveAdjust(newBucket.g, 2);
CurveAdjust(newBucket.b, 3);
}
pixels[pixelsRowStart] = byte(Clamp<bucketT>(newBucket.r, 0, 255));
pixels[pixelsRowStart + 1] = byte(Clamp<bucketT>(newBucket.g, 0, 255));
pixels[pixelsRowStart + 2] = byte(Clamp<bucketT>(newBucket.b, 0, 255));
if (doAlpha)
{
if (Transparency())
pixels[pixelsRowStart + 3] = byte(Clamp<bucketT>(newBucket.a, 0, 255));
else
pixels[pixelsRowStart + 3] = 255;
}
}
else
{
GammaCorrection(*(reinterpret_cast<tvec4<bucketT, glm::defaultp>*>(&newBucket)), background, g, linRange, vibrancy, doAlpha, true, pixels + pixelsRowStart);
}
}
auto pf = reinterpret_cast<float*>(pv4T);
GammaCorrection(*(reinterpret_cast<tvec4<bucketT, glm::defaultp>*>(&newBucket)), background, g, linRange, vibrancy, true, pf);
}
});
@ -1222,30 +1165,15 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t
if (ph >= FinalRasH())
ph = FinalRasH();
if (BytesPerChannel() == 1)
for (j = 0; j < ph; j++)
{
for (j = 0; j < ph; j++)
for (i = 0; i < FinalRasW(); i++)
{
for (i = 0; i < FinalRasW(); i++)
{
auto p = pixels + (NumChannels() * (i + j * FinalRasW()));
p[0] = byte(m_TempEmber.m_Palette[i * 256 / FinalRasW()][0] * WHITE);//The palette is [0..1], output image is [0..255].
p[1] = byte(m_TempEmber.m_Palette[i * 256 / FinalRasW()][1] * WHITE);
p[2] = byte(m_TempEmber.m_Palette[i * 256 / FinalRasW()][2] * WHITE);
}
}
}
else//BPC == 2.
{
for (j = 0; j < ph; j++)
{
for (i = 0; i < FinalRasW(); i++)
{
auto p16 = reinterpret_cast<glm::uint16*>(pixels + (PixelSize() * (i + j * FinalRasW())));
p16[0] = glm::uint16(m_TempEmber.m_Palette[i * 256 / FinalRasW()][0] * WHITE * bucketT(256)); //The palette is [0..1], output image is [0..65535].
p16[1] = glm::uint16(m_TempEmber.m_Palette[i * 256 / FinalRasW()][1] * WHITE * bucketT(256));
p16[2] = glm::uint16(m_TempEmber.m_Palette[i * 256 / FinalRasW()][2] * WHITE * bucketT(256));
}
auto p = pixels + (i + j * FinalRasW());
p->r = m_TempEmber.m_Palette[i * 256 / FinalRasW()][0];
p->g = m_TempEmber.m_Palette[i * 256 / FinalRasW()][1];
p->b = m_TempEmber.m_Palette[i * 256 / FinalRasW()][2];
p->a = 1;
}
}
}
@ -1322,7 +1250,6 @@ EmberStats Renderer<T, bucketT>::Iterate(size_t iterCount, size_t temporalSample
//t.Tic();
//Iterating, loop 3.
m_BadVals[threadIndex] += m_Iterator->Iterate(m_ThreadEmbers[threadIndex], params, m_Samples[threadIndex].data(), m_Rand[threadIndex]);
//m_BadVals[threadIndex] += m_Iterator->Iterate(m_Ember, params, m_Samples[threadIndex].data(), m_Rand[threadIndex]);
//iterationTime += t.Toc();
if (m_LockAccum)
@ -1677,112 +1604,97 @@ void Renderer<T, bucketT>::AddToAccum(const tvec4<bucketT, glm::defaultp>& bucke
/// Because this code is used in both early and late clipping, a few extra arguments are passed
/// to specify what actions to take. Coupled with an additional template argument, this allows
/// using one function to perform all color clipping, gamma correction and final accumulation.
/// Template argument accumT is expected to match bucketT for the case of early clipping, byte for late clip for
/// images with one byte per channel and unsigned short for images with two bytes per channel.
/// Template argument accumT is expected to always be float4.
/// </summary>
/// <param name="bucket">The pixel to correct</param>
/// <param name="background">The background color</param>
/// <param name="g">The gamma to use</param>
/// <param name="linRange">The linear range to use</param>
/// <param name="vibrancy">The vibrancy to use</param>
/// <param name="doAlpha">True if either early clip, or late clip with 4 channel output, else false.</param>
/// <param name="scale">True if late clip, else false.</param>
/// <param name="correctedChannels">The storage space for the corrected values to be written to</param>
template <typename T, typename bucketT>
template <typename accumT>
void Renderer<T, bucketT>::GammaCorrection(tvec4<bucketT, glm::defaultp>& bucket, Color<bucketT>& background, bucketT g, bucketT linRange, bucketT vibrancy, bool doAlpha, bool scale, accumT* correctedChannels)
void Renderer<T, bucketT>::GammaCorrection(tvec4<bucketT, glm::defaultp>& bucket, Color<bucketT>& background, bucketT g, bucketT linRange, bucketT vibrancy, bool scale, accumT* correctedChannels)
{
bucketT alpha, ls, a, newRgb[3];//Would normally use a Color<bucketT>, but don't want to call a needless constructor every time this function is called, which is once per pixel.
static bucketT scaleVal = numeric_limits<accumT>::max();
auto bt1 = bucketT(1);
if (bucket.a <= 0)
if (scale && EarlyClip())
{
alpha = 0;
ls = 0;
if (m_CurvesSet)
{
CurveAdjust(bucket.r, 1);
CurveAdjust(bucket.g, 2);
CurveAdjust(bucket.b, 3);
}
correctedChannels[0] = accumT(Clamp<bucketT>(bucket.r, 0, bt1));
correctedChannels[1] = accumT(Clamp<bucketT>(bucket.g, 0, bt1));
correctedChannels[2] = accumT(Clamp<bucketT>(bucket.b, 0, bt1));
correctedChannels[3] = accumT(Clamp<bucketT>(bucket.a, 0, bt1));
}
else
{
alpha = Palette<bucketT>::CalcAlpha(bucket.a, g, linRange);
ls = vibrancy * alpha / bucket.a;
ClampRef<bucketT>(alpha, 0, 1);
}
bucketT alpha, ls, a, newRgb[3];//Would normally use a Color<bucketT>, but don't want to call a needless constructor every time this function is called, which is once per pixel.
Palette<bucketT>::template CalcNewRgb<bucketT>(glm::value_ptr(bucket), ls, HighlightPower(), newRgb);
for (glm::length_t rgbi = 0; rgbi < 3; rgbi++)
{
a = newRgb[rgbi] + ((1 - vibrancy) * std::pow(std::abs(bucket[rgbi]), g));//Must use abs(), else it it could be a negative value and return NAN.
if (NumChannels() <= 3 || !Transparency())
if (bucket.a <= 0)
{
alpha = 0;
ls = 0;
}
else
{
alpha = Palette<bucketT>::CalcAlpha(bucket.a, g, linRange);
ls = vibrancy * alpha / bucket.a;
ClampRef<bucketT>(alpha, 0, 1);
}
Palette<bucketT>::template CalcNewRgb<bucketT>(glm::value_ptr(bucket), ls, HighlightPower(), newRgb);
for (glm::length_t rgbi = 0; rgbi < 3; rgbi++)
{
a = newRgb[rgbi] + ((1 - vibrancy) * std::pow(std::abs(bucket[rgbi]), g));//Must use abs(), else it it could be a negative value and return NAN.
a += (1 - alpha) * background[rgbi];
}
else
{
if (alpha > 0)
a /= alpha;
else
a = 0;
}
if (!scale)
{
correctedChannels[rgbi] = accumT(Clamp<bucketT>(a, 0, 1.0));//Early clip, just assign directly.
}
else
{
a *= scaleVal;
if (m_CurvesSet)
if (scale && m_CurvesSet)
CurveAdjust(a, rgbi + 1);
correctedChannels[rgbi] = accumT(Clamp<bucketT>(a, 0, scaleVal));//Final accum, multiply by 255 for 8 bpc (0-255), or 65535 for 16 bpc (0-65535).
correctedChannels[rgbi] = accumT(Clamp<bucketT>(a, 0, bt1));//Early clip, just assign directly.
}
}
if (doAlpha)
{
if (!scale)
correctedChannels[3] = accumT(alpha);//Early clip, just assign alpha directly.
else if (Transparency())
correctedChannels[3] = accumT(alpha * scaleVal);//Final accum, 4 channels, using transparency. Scale alpha from 0-1 to 0-255 for 8 bpc or 0-65535 for 16 bpc.
else
correctedChannels[3] = accumT(scaleVal);//Final accum, 4 channels, but not using transparency. 255 for 8 bpc, 65535 for 16 bpc.
correctedChannels[3] = accumT(alpha);
}
}
/// <summary>
/// Setup the curve values when they are being used.
/// This will be either 255 values for bpc=8, or 65535 values for bpc=16.
/// </summary>
/// <param name="scale">Whether to scale from 0-1 to 0-255 or 0-65535</param>
template <typename T, typename bucketT>
void Renderer<T, bucketT>::ComputeCurves(bool scale)
void Renderer<T, bucketT>::ComputeCurves()
{
if (m_CurvesSet)
{
m_Csa.resize(size_t(std::pow(size_t(256), BytesPerChannel())));
Timing t;
auto st = m_Csa.size();
auto stm1 = st - 1;
T tscale = scale ? T(stm1) : T(1);
for (size_t i = 0; i < st; i++)
m_Csa[i] = m_Ember.m_Curves.BezierFunc(i / T(stm1)) * tscale;
m_Csa[i] = m_Ember.m_Curves.BezierFunc(i * ONE_OVER_CURVES_LENGTH_M1);
t.Toc("ComputeCurves");
}
}
/// <summary>
/// Apply the curve adjustment to a single channel.
/// </summary>
/// <param name="aScaled">The value of the channel to apply curve adjustment to, scaled to either 255 or 65535, depending on bpc.</param>
/// <param name="aScaled">The value of the channel to apply curve adjustment to.</param>
/// <param name="index">The index of the channel to apply curve adjustment to</param>
template <typename T, typename bucketT>
void Renderer<T, bucketT>::CurveAdjust(bucketT& aScaled, const glm::length_t& index)
void Renderer<T, bucketT>::CurveAdjust(bucketT& a, const glm::length_t& index)
{
bucketT stm1 = bucketT(m_Csa.size() - 1);
size_t tempIndex = size_t(Clamp<bucketT>(aScaled, 0, stm1));
size_t tempIndex2 = size_t(Clamp<bucketT>(m_Csa[tempIndex].x, 0, stm1));
aScaled = m_Csa[tempIndex2][index];
size_t tempIndex = size_t(Clamp<bucketT>(a * CURVES_LENGTH_M1, 0, CURVES_LENGTH_M1));
size_t tempIndex2 = size_t(Clamp<bucketT>(m_Csa[tempIndex].x * CURVES_LENGTH_M1, 0, CURVES_LENGTH_M1));
a = m_Csa[tempIndex2][index];
}
//This class had to be implemented in a cpp file because the compiler was breaking.