Features:

--Add support for Exr files which use 32-bit floats for each RGBA channel. Seems to come out too washed out.
--Allow for clearing an individual color curve.
--Allow for saving multiple image types in EmberRender and EmberAnimate. All writes are threaded.
--Remove --bpc command line argument. Add format png16 as a replacement.
--Remove --enable_jpg_comments and --enable_png_comments command line arguments, and replace them with --enable_comments which applies to jpg, png and exr.
--Add menu items to variations and affine spinners which allow for easy entry of specific numeric values like pi.
--Make final render dialog be wider rather than so tall.

Bug fixes:
--Fix some OpenCL compile errors on Mac.
--Remove ability to save bitmap files on all platforms but Windows.

Code changes:
--New dependency on OpenEXR.
--Allow Curves class to interact with objects of a different template type.
--Make m_Curves member of Ember always use float as template type.
--Set the length of the curves array to always be 2^17 which should offer enough precision with new 32-bit float pixel types.
--Set pixel types to always be 32-bit float. This results in a major reduction of code in the final accumulation part of Renderer.h/cpp.
--Remove corresponding code from RendererCL and FinalAccumOpenCLKernelCreator.
--Remove Transparency, NumChannels and BytesPerPixel setters from Renderer.h/cpp.
--Add new global functions to format final image buffers and place all alpha calculation and scaling code in them.
--Blending is no longer needed in OpenGLWidget because of the new pixel type.
--Make new class, AffineDoubleSpinBox.
--Attempt to make file save dialog code work the same on all OSes.
--Remove some unused functions.
This commit is contained in:
Person
2017-07-22 13:43:35 -07:00
parent d5760e451a
commit de613404de
68 changed files with 1755 additions and 1276 deletions

View File

@ -147,13 +147,14 @@ static const char* CalcAlphaFunctionString =
/// during final accumulation, which only takes floats.
/// </summary>
static const char* CurveAdjustFunctionString =
"static inline void CurveAdjust(__constant real4reals_bucket* csa, float* a, uint index)\n"
"static inline void CurveAdjust(__global real4reals_bucket* csa, float* a, uint index)\n"
"{\n"
" uint tempIndex = (uint)clamp(*a * (float)COLORMAP_LENGTH_MINUS_1, (float)0.0, (float)COLORMAP_LENGTH_MINUS_1);\n"
" uint tempIndex2 = (uint)clamp((float)csa[tempIndex].m_Real4.x * (float)COLORMAP_LENGTH_MINUS_1, (float)0.0, (float)COLORMAP_LENGTH_MINUS_1);\n"
" uint tempIndex = (uint)clamp(*a * CURVES_LENGTH_M1, 0.0f, CURVES_LENGTH_M1);\n"
" uint tempIndex2 = (uint)clamp(csa[tempIndex].m_Real4.x * CURVES_LENGTH_M1, 0.0f, CURVES_LENGTH_M1);\n"
"\n"
" *a = (float)csa[tempIndex2].m_Reals[index];\n"
"}\n";
"}\n"
"\n";
/// <summary>
/// Use MWC 64 from David Thomas at the Imperial College of London for

View File

@ -85,6 +85,8 @@ static string ConstantDefinesString(bool doublePrecision)
"#define SQRT5 2.2360679774997896964091736687313\n"
"#define M_PHI 1.61803398874989484820458683436563\n"
"#define DEG_2_RAD (MPI / 180)\n"
"#define CURVES_LENGTH_M1 131071.0f\n"
"#define ONE_OVER_CURVES_LENGTH_M1 7.62945273935e-6f\n"
"\n"
"//Index in each dimension of a thread within a block.\n"
"#define THREAD_ID_X (get_local_id(0))\n"
@ -314,10 +316,7 @@ struct ALIGN SpatialFilterCL
uint m_FinalRasH;
uint m_Supersample;
uint m_FilterWidth;
uint m_NumChannels;
uint m_BytesPerChannel;
uint m_DensityFilterOffset;
uint m_Transparency;
uint m_YAxisUp;
T m_Vibrancy;
T m_HighlightPower;
@ -339,10 +338,7 @@ static const char* SpatialFilterCLStructString =
" uint m_FinalRasH;\n"
" uint m_Supersample;\n"
" uint m_FilterWidth;\n"
" uint m_NumChannels;\n"
" uint m_BytesPerChannel;\n"
" uint m_DensityFilterOffset;\n"
" uint m_Transparency;\n"
" uint m_YAxisUp;\n"
" real_bucket_t m_Vibrancy;\n"
" real_bucket_t m_HighlightPower;\n"

View File

@ -10,166 +10,58 @@ namespace EmberCLns
FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator(bool doublePrecision)
{
m_DoublePrecision = doublePrecision;
m_GammaCorrectionWithAlphaCalcKernel = CreateGammaCorrectionKernelString(true);
m_GammaCorrectionWithoutAlphaCalcKernel = CreateGammaCorrectionKernelString(false);
m_FinalAccumEarlyClipKernel = CreateFinalAccumKernelString(true, false, false);
m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true, true, true);
m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true, false, true);
m_FinalAccumLateClipKernel = CreateFinalAccumKernelString(false, false, false);
m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, true, true);
m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, false, true);
m_GammaCorrectionWithoutAlphaCalcKernel = CreateGammaCorrectionKernelString();
m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true);
m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false);
}
/// <summary>
/// Kernel source and entry point properties, getters only.
/// </summary>
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcKernel() const { return m_GammaCorrectionWithAlphaCalcKernel; }
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithAlphaCalcEntryPoint() const { return m_GammaCorrectionWithAlphaCalcEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcKernel() const { return m_GammaCorrectionWithoutAlphaCalcKernel; }
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionWithoutAlphaCalcEntryPoint() const { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipKernel() const { return m_FinalAccumEarlyClipKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipEntryPoint() const { return m_FinalAccumEarlyClipEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipKernel() const { return m_FinalAccumLateClipKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipEntryPoint() const { return m_FinalAccumLateClipEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; }
const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
/// <summary>
/// Get the gamma correction entry point.
/// </summary>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The name of the gamma correction entry point kernel function</returns>
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint(size_t channels, bool transparency) const
{
bool alphaCalc = ((channels > 3) && transparency);
return alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint;
}
/// <summary>
/// Get the gamma correction kernel string.
/// </summary>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The gamma correction kernel string</returns>
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionKernel(size_t channels, bool transparency) const
{
bool alphaCalc = ((channels > 3) && transparency);
return alphaCalc ? m_GammaCorrectionWithAlphaCalcKernel : m_GammaCorrectionWithoutAlphaCalcKernel;
}
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint() const { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; }
const string& FinalAccumOpenCLKernelCreator::GammaCorrectionKernel() const { return m_GammaCorrectionWithoutAlphaCalcKernel; }
/// <summary>
/// Get the final accumulation entry point.
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <param name="alphaBase">Storage for the alpha base value used in the kernel. 0 if transparency is true, else 255.</param>
/// <param name="alphaScale">Storage for the alpha scale value used in the kernel. 255 if transparency is true, else 0.</param>
/// <returns>The name of the final accumulation entry point kernel function</returns>
const string& FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale) const
const string& FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip) const
{
bool alphaCalc = ((channels > 3) && transparency);
bool alphaAccum = channels > 3;
if (alphaAccum)
{
alphaBase = transparency ? 0 : 1;//See the table below.
alphaScale = transparency ? 1 : 0;
}
if (earlyClip)
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumEarlyClipEntryPoint();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
else
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
else
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumLateClipEntryPoint();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
else
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
}
/// <summary>
/// Get the final accumulation kernel string.
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The final accumulation kernel string</returns>
const string& FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) const
const string& FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip) const
{
bool alphaCalc = (channels > 3 && transparency);
bool alphaAccum = channels > 3;
if (earlyClip)
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumEarlyClipKernel();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel();
else
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel();
else
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumLateClipKernel();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
else
return m_Empty;//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
}
/// <summary>
/// Wrapper around CreateFinalAccumKernelString().
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The final accumulation kernel string</returns>
string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency)
{
return CreateFinalAccumKernelString(earlyClip, (channels > 3 && transparency), channels > 3);
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
}
/// <summary>
/// Create the final accumulation kernel string
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="alphaCalc">True if channels equals 4 and transparency is desired, else false.</param>
/// <param name="alphaAccum">True if channels equals 4</param>
/// <returns>The final accumulation kernel string</returns>
string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyClip, bool alphaCalc, bool alphaAccum)
string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyClip)
{
ostringstream os;
string channels = alphaAccum ? "4" : "3";
os <<
ConstantDefinesString(m_DoublePrecision) <<
UnionCLStructString <<
@ -181,29 +73,14 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli
if (earlyClip)
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
os << "__kernel void " << m_FinalAccumEarlyClipEntryPoint << "(\n";
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
os << "__kernel void " << m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
os << "__kernel void " << m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
os << "__kernel void " << m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
}
else
{
os <<
CreateCalcNewRgbFunctionString(false) <<
CreateGammaCorrectionFunctionString(false, alphaCalc, alphaAccum, true);
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
os << "__kernel void " << m_FinalAccumLateClipEntryPoint << "(\n";
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
os << "__kernel void " << m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
os << "__kernel void " << m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
CreateGammaCorrectionFunctionString(false, true) <<
"__kernel void " << m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
}
os <<
@ -211,10 +88,8 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli
" __write_only image2d_t pixels,\n"
" __constant SpatialFilterCL* spatialFilter,\n"
" __constant real_bucket_t* filterCoefs,\n"
" __constant real4reals_bucket* csa,\n"
" const uint doCurves,\n"
" const real_bucket_t alphaBase,\n"
" const real_bucket_t alphaScale\n"
" __global real4reals_bucket* csa,\n"
" const uint doCurves\n"
"\t)\n"
"{\n"
"\n"
@ -249,21 +124,13 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli
" }\n"
"\n";
//Not supporting 2 bytes per channel on the GPU. If the user wants it, run on the CPU.
if (earlyClip)//If early clip, simply assign values directly to the temp float4 since they've been gamma corrected already, then write it straight to the output image below.
{
os <<
" finalColor.m_Float4.x = (float)newBucket.m_Real4.x;\n"//CPU side clamps, skip here because write_imagef() does the clamping for us.
" finalColor.m_Float4.y = (float)newBucket.m_Real4.y;\n"
" finalColor.m_Float4.z = (float)newBucket.m_Real4.z;\n";
if (alphaAccum)
{
if (alphaCalc)
os << " finalColor.m_Float4.w = (float)newBucket.m_Real4.w;\n";
else
os << " finalColor.m_Float4.w = 1.0f;\n";
}
" finalColor.m_Float4.z = (float)newBucket.m_Real4.z;\n"
" finalColor.m_Float4.w = (float)newBucket.m_Real4.w;\n";
}
else
{
@ -273,7 +140,7 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli
os <<
" real4reals_bucket realFinal;\n"
"\n"
" GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(realFinal.m_Reals[0]));\n"
" GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, &(realFinal.m_Reals[0]));\n"
" finalColor.m_Float4.x = (float)realFinal.m_Real4.x;\n"
" finalColor.m_Float4.y = (float)realFinal.m_Real4.y;\n"
" finalColor.m_Float4.z = (float)realFinal.m_Real4.z;\n"
@ -283,7 +150,7 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli
else
{
os <<
" GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(finalColor.m_Floats[0]));\n";
" GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, &(finalColor.m_Floats[0]));\n";
}
}
@ -308,81 +175,45 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli
/// This is not a full kernel, just a function that is used in the kernels.
/// </summary>
/// <param name="globalBucket">True if writing to a global buffer (early clip), else false (late clip).</param>
/// <param name="alphaCalc">True if channels equals 4 and transparency is desired, else false.</param>
/// <param name="alphaAccum">True if channels equals 4</param>
/// <param name="finalOut">True if writing to global buffer (late clip), else false (early clip).</param>
/// <returns>The gamma correction function string</returns>
string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool globalBucket, bool alphaCalc, bool alphaAccum, bool finalOut)
string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool globalBucket, bool finalOut)
{
ostringstream os;
string dataType;
string unionMember;
dataType = "real_bucket_t";
//Use real_t for all cases, early clip and final accum.
os << "void GammaCorrectionFloats(" << (globalBucket ? "__global " : "") << "real4reals_bucket* bucket, __constant real_bucket_t* background, real_bucket_t g, real_bucket_t linRange, real_bucket_t vibrancy, real_bucket_t highlightPower, real_bucket_t alphaBase, real_bucket_t alphaScale, " << (finalOut ? "" : "__global") << " real_bucket_t* correctedChannels)\n";
os
<< "{\n"
<< " real_bucket_t alpha, ls, tmp, a;\n"
<< " real4reals_bucket newRgb;\n"
<< "\n"
<< " if (bucket->m_Reals[3] <= 0)\n"
<< " {\n"
<< " alpha = 0;\n"
<< " ls = 0;\n"
<< " }\n"
<< " else\n"
<< " {\n"
<< " tmp = bucket->m_Reals[3];\n"
<< " alpha = CalcAlpha(tmp, g, linRange);\n"
<< " ls = vibrancy * alpha / tmp;\n"
<< " alpha = clamp(alpha, (real_bucket_t)0.0, (real_bucket_t)1.0);\n"
<< " }\n"
<< "\n"
<< " CalcNewRgb(bucket, ls, highlightPower, &newRgb);\n"
<< "\n"
<< " for (uint rgbi = 0; rgbi < 3; rgbi++)\n"
<< " {\n"
<< " a = newRgb.m_Reals[rgbi] + ((1.0 - vibrancy) * pow(fabs(bucket->m_Reals[rgbi]), g));\n"
<< "\n";
if (!alphaCalc)
{
os <<
" a += ((1.0 - alpha) * background[rgbi]);\n";
}
else
{
os
<< " if (alpha > 0)\n"
<< " a /= alpha;\n"
<< " else\n"
<< " a = 0;\n";
}
os <<
"\n"
" correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_bucket_t)0.0, (real_bucket_t)1.0);\n"
" }\n"
"\n";
//The CPU code has 3 cases for assigning alpha:
//[3] = alpha.//Early clip.
//[3] = alpha * 255.//Final Rgba with transparency.
//[3] = 255.//Final Rgba without transparency.
//Putting conditionals in GPU code is to be avoided. So do base + alpha * scale which will
//work for all 3 cases without using a conditional, which should be faster on a GPU. This gives:
//Base = 0, scale = 1. [3] = (0 + (alpha * 1)). [3] = alpha.
//Base = 0, scale = 255. [3] = (0 + (alpha * 255)). [3] = alpha * 255.
//Base = 255, scale = 0. [3] = (255 + (alpha * 0)). [3] = 255.
if (alphaAccum)
{
os
<< " correctedChannels[3] = (" << dataType << ")(alphaBase + (alpha * alphaScale));\n";
}
os <<
"}\n"
"\n";
os << "void GammaCorrectionFloats(" << (globalBucket ? "__global " : "") << "real4reals_bucket* bucket, __constant real_bucket_t* background, real_bucket_t g, real_bucket_t linRange, real_bucket_t vibrancy, real_bucket_t highlightPower, " << (finalOut ? "" : "__global") << " real_bucket_t* correctedChannels)\n";
os << "{\n"
<< " real_bucket_t alpha, ls, tmp, a;\n"
<< " real4reals_bucket newRgb;\n"
<< "\n"
<< " if (bucket->m_Reals[3] <= 0)\n"
<< " {\n"
<< " alpha = 0;\n"
<< " ls = 0;\n"
<< " }\n"
<< " else\n"
<< " {\n"
<< " tmp = bucket->m_Reals[3];\n"
<< " alpha = CalcAlpha(tmp, g, linRange);\n"
<< " ls = vibrancy * alpha / tmp;\n"
<< " alpha = clamp(alpha, (real_bucket_t)0.0, (real_bucket_t)1.0);\n"
<< " }\n"
<< "\n"
<< " CalcNewRgb(bucket, ls, highlightPower, &newRgb);\n"
<< "\n"
<< " for (uint rgbi = 0; rgbi < 3; rgbi++)\n"
<< " {\n"
<< " a = newRgb.m_Reals[rgbi] + ((1.0 - vibrancy) * pow(fabs(bucket->m_Reals[rgbi]), g));\n"
<< " a += ((1.0 - alpha) * background[rgbi]);\n"
<< " correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_bucket_t)0.0, (real_bucket_t)1.0);\n"
<< " }\n"
<< "\n"
<< " correctedChannels[3] = (" << dataType << ")alpha;\n"
<< "}\n"
<< "\n";
return os.str();
}
@ -451,9 +282,8 @@ string FinalAccumOpenCLKernelCreator::CreateCalcNewRgbFunctionString(bool global
/// <summary>
/// Create the gamma correction kernel string used for early clipping.
/// </summary>
/// <param name="alphaCalc">True if channels equals 4 and transparency is desired, else false.</param>
/// <returns>The gamma correction kernel string used for early clipping</returns>
string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString(bool alphaCalc)
string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString()
{
ostringstream os;
string dataType;
@ -465,8 +295,8 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString(bool alp
CalcAlphaFunctionString <<
CreateCalcNewRgbFunctionString(true) <<
SpatialFilterCLStructString <<
CreateGammaCorrectionFunctionString(true, alphaCalc, true, false);//Will only be used with float in this case, early clip. Will always alpha accum.
os << "__kernel void " << (alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint) << "(\n" <<
CreateGammaCorrectionFunctionString(true, false);//Will only be used with float in this case, early clip. Will always alpha accum.
os << "__kernel void " << m_GammaCorrectionWithoutAlphaCalcEntryPoint << "(\n" <<
" __global real4reals_bucket* accumulator,\n"
" __constant SpatialFilterCL* spatialFilter\n"
")\n"
@ -478,8 +308,7 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString(bool alp
"\n"
" uint superIndex = (GLOBAL_ID_Y * spatialFilter->m_SuperRasW) + GLOBAL_ID_X;\n"
" __global real4reals_bucket* bucket = accumulator + superIndex;\n"
//Pass in an alphaBase and alphaScale of 0, 1 which means to just directly assign the computed alpha value.
" GammaCorrectionFloats(bucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, 0.0, 1.0, &(bucket->m_Reals[0]));\n"
" GammaCorrectionFloats(bucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, &(bucket->m_Reals[0]));\n"
"}\n"
;
return os.str();

View File

@ -17,64 +17,35 @@ namespace EmberCLns
/// with all conditionals and unnecessary calculations stripped out.
/// The conditionals are:
/// Early clip/late clip
/// Alpha channel, no alpha channel
/// Alpha with/without transparency
/// </summary>
class EMBERCL_API FinalAccumOpenCLKernelCreator
{
public:
FinalAccumOpenCLKernelCreator(bool doublePrecision);
const string& GammaCorrectionWithAlphaCalcKernel() const;
const string& GammaCorrectionWithAlphaCalcEntryPoint() const;
const string& GammaCorrectionWithoutAlphaCalcKernel() const;
const string& GammaCorrectionWithoutAlphaCalcEntryPoint() const;
const string& FinalAccumEarlyClipKernel() const;
const string& FinalAccumEarlyClipEntryPoint() const;
const string& FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() const;
const string& FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() const;
const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() const;
const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const;
const string& FinalAccumLateClipKernel() const;
const string& FinalAccumLateClipEntryPoint() const;
const string& FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() const;
const string& FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() const;
const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() const;
const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const;
const string& GammaCorrectionEntryPoint(size_t channels, bool transparency) const;
const string& GammaCorrectionKernel(size_t channels, bool transparency) const;
const string& FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, double& alphaBase, double& alphaScale) const;
const string& FinalAccumKernel(bool earlyClip, size_t channels, bool transparency) const;
const string& GammaCorrectionEntryPoint() const;
const string& GammaCorrectionKernel() const;
const string& FinalAccumEntryPoint(bool earlyClip) const;
const string& FinalAccumKernel(bool earlyClip) const;
private:
string CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency);
string CreateGammaCorrectionKernelString(bool alphaCalc);
string CreateFinalAccumKernelString(bool earlyClip);
string CreateGammaCorrectionKernelString();
string CreateFinalAccumKernelString(bool earlyClip, bool alphaCalc, bool alphaAccum);
string CreateGammaCorrectionFunctionString(bool globalBucket, bool alphaCalc, bool alphaAccum, bool finalOut);
string CreateGammaCorrectionFunctionString(bool globalBucket, bool finalOut);
string CreateCalcNewRgbFunctionString(bool globalBucket);
string m_GammaCorrectionWithAlphaCalcKernel;
string m_GammaCorrectionWithAlphaCalcEntryPoint = "GammaCorrectionWithAlphaCalcKernel";
string m_GammaCorrectionWithoutAlphaCalcKernel;
string m_GammaCorrectionWithoutAlphaCalcEntryPoint = "GammaCorrectionWithoutAlphaCalcKernel";
string m_FinalAccumEarlyClipKernel;//False, false.
string m_FinalAccumEarlyClipEntryPoint = "FinalAccumEarlyClipKernel";
string m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel;//True, true.
string m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel";
string m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel;//False, true.
string m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel;
string m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel";
string m_FinalAccumLateClipKernel;//False, false.
string m_FinalAccumLateClipEntryPoint = "FinalAccumLateClipKernel";
string m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel;//True, true.
string m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel";
string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;//False, true.
string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;
string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel";
string m_Empty;

View File

@ -40,7 +40,6 @@ void RendererCL<T, bucketT>::Init()
{
m_Init = false;
m_DoublePrecision = typeid(T) == typeid(double);
m_NumChannels = 4;
//Buffer names.
m_EmberBufferName = "Ember";
m_XformsBufferName = "Xforms";
@ -73,7 +72,7 @@ void RendererCL<T, bucketT>::Init()
m_PaletteFormat.image_channel_order = CL_RGBA;
m_PaletteFormat.image_channel_data_type = CL_FLOAT;
m_FinalFormat.image_channel_order = CL_RGBA;
m_FinalFormat.image_channel_data_type = CL_UNORM_INT8;//Change if this ever supports 2BPC outputs for PNG.
m_FinalFormat.image_channel_data_type = CL_FLOAT;
}
/// <summary>
@ -386,7 +385,7 @@ const string& RendererCL<T, bucketT>::DEKernel() const { return m_DEOpenCLKernel
/// </summary>
/// <returns>The string representation of the kernel for the last built final accumulation program.</returns>
template <typename T, typename bucketT>
const string& RendererCL<T, bucketT>::FinalAccumKernel() const { return m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency()); }
const string& RendererCL<T, bucketT>::FinalAccumKernel() const { return m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip()); }
/// <summary>
/// Get the a const referece to the devices this renderer will use.
@ -407,7 +406,7 @@ const vector<unique_ptr<RendererClDevice>>& RendererCL<T, bucketT>::Devices() co
/// <param name="pixels">The host side buffer to read into</param>
/// <returns>True if success, else false.</returns>
template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::ReadFinal(byte* pixels)
bool RendererCL<T, bucketT>::ReadFinal(v4F* pixels)
{
if (pixels && !m_Devices.empty())
return m_Devices[0]->m_Wrapper.ReadImage(m_FinalImageName, FinalRasW(), FinalRasH(), 0, m_Devices[0]->m_Wrapper.Shared(), pixels);
@ -423,7 +422,7 @@ bool RendererCL<T, bucketT>::ReadFinal(byte* pixels)
template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::ClearFinal()
{
vector<byte> v;
vector<v4F> v;
if (!m_Devices.empty())
{
@ -470,17 +469,6 @@ bool RendererCL<T, bucketT>::Ok() const
return !m_Devices.empty() && m_Init;
}
/// <summary>
/// Override to force num channels to be 4 because RGBA is always used for OpenCL
/// since the output is actually an image rather than just a buffer.
/// </summary>
/// <param name="numChannels">The number of channels, ignored.</param>
template <typename T, typename bucketT>
void RendererCL<T, bucketT>::NumChannels(size_t numChannels)
{
m_NumChannels = 4;
}
/// <summary>
/// Clear the error report for this class as well as the OpenCLWrapper members of each device.
/// </summary>
@ -771,7 +759,7 @@ eRenderStatus RendererCL<T, bucketT>::GaussianDensityFilter()
/// <param name="finalOffset">Offset in the buffer to store the pixels to</param>
/// <returns>True if success and not aborted, else false.</returns>
template <typename T, typename bucketT>
eRenderStatus RendererCL<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t finalOffset)
eRenderStatus RendererCL<T, bucketT>::AccumulatorToFinalImage(v4F* pixels, size_t finalOffset)
{
auto status = RunFinalAccum();
@ -877,17 +865,6 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
return stats;
}
/// <summary>
/// Override which just passes false to the base.
/// This is because curves are scaled from 0-1 to 0-255 or 0-65535 on the CPU, but need to be kept as 0-1 for OpenCL because the texture expects normalized values.
/// </summary>
/// <param name="scale">Ignored</param>
template <typename T, typename bucketT>
void RendererCL<T, bucketT>::ComputeCurves(bool scale)
{
Renderer<T, bucketT>::ComputeCurves(false);
}
/// <summary>
/// Private functions for making and running OpenCL programs.
/// </summary>
@ -1304,9 +1281,7 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
{
//Timing t(4);
bool b = true;
double alphaBase;
double alphaScale;
int accumKernelIndex = MakeAndGetFinalAccumProgram(alphaBase, alphaScale);
int accumKernelIndex = MakeAndGetFinalAccumProgram();
cl_uint argIndex;
size_t gridW;
size_t gridH;
@ -1323,7 +1298,7 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
if (b && !(b = wrapper.AddAndWriteBuffer(m_SpatialFilterParamsBufferName, reinterpret_cast<void*>(&m_SpatialFilterCL), sizeof(m_SpatialFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_CurvesCsaName, m_Csa.data(), SizeOf(m_Csa)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_CurvesCsaName, m_Csa.data(), SizeOf(m_Csa)))) { AddToReport(loc); }
//Since early clip requires gamma correcting the entire accumulator first,
//it can't be done inside of the normal final accumulation kernel, so
@ -1373,10 +1348,6 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
if (b && !(b = wrapper.SetArg (accumKernelIndex, argIndex++, curvesSet))) { AddToReport(loc); }//Do curves.
if (b && !(b = wrapper.SetArg (accumKernelIndex, argIndex++, bucketT(alphaBase)))) { AddToReport(loc); }//Alpha base.
if (b && !(b = wrapper.SetArg (accumKernelIndex, argIndex++, bucketT(alphaScale)))) { AddToReport(loc); }//Alpha scale.
if (b && wrapper.Shared())
if (b && !(b = wrapper.EnqueueAcquireGLObjects(m_FinalImageName))) { AddToReport(loc); }
@ -1534,26 +1505,22 @@ int RendererCL<T, bucketT>::MakeAndGetDensityFilterProgram(size_t ss, uint filte
/// <summary>
/// Make the final accumulation on the primary device program and return its index.
/// There are many different kernels for final accum, depending on early clip, alpha channel, and transparency.
/// Loading all of these in the beginning is too much, so only load the one for the current case being worked with.
/// </summary>
/// <param name="alphaBase">Storage for the alpha base value used in the kernel. 0 if transparency is true, else 255.</param>
/// <param name="alphaScale">Storage for the alpha scale value used in the kernel. 255 if transparency is true, else 0.</param>
/// <returns>The kernel index if successful, else -1.</returns>
template <typename T, typename bucketT>
int RendererCL<T, bucketT>::MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale)
int RendererCL<T, bucketT>::MakeAndGetFinalAccumProgram()
{
int kernelIndex = -1;
if (!m_Devices.empty())
{
auto& wrapper = m_Devices[0]->m_Wrapper;
auto& finalAccumEntryPoint = m_FinalAccumOpenCLKernelCreator.FinalAccumEntryPoint(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency(), alphaBase, alphaScale);
auto& finalAccumEntryPoint = m_FinalAccumOpenCLKernelCreator.FinalAccumEntryPoint(EarlyClip());
const char* loc = __FUNCTION__;
if ((kernelIndex = wrapper.FindKernelIndex(finalAccumEntryPoint)) == -1)//Has not been built yet.
{
auto& kernel = m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip(), Renderer<T, bucketT>::NumChannels(), Transparency());
auto& kernel = m_FinalAccumOpenCLKernelCreator.FinalAccumKernel(EarlyClip());
if (wrapper.AddProgram(finalAccumEntryPoint, kernel, finalAccumEntryPoint, m_DoublePrecision))
kernelIndex = wrapper.FindKernelIndex(finalAccumEntryPoint);//Try to find it again, it will be present if successfully built.
@ -1575,13 +1542,13 @@ int RendererCL<T, bucketT>::MakeAndGetGammaCorrectionProgram()
if (!m_Devices.empty())
{
auto& wrapper = m_Devices[0]->m_Wrapper;
auto& gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint(Renderer<T, bucketT>::NumChannels(), Transparency());
auto& gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint();
int kernelIndex = wrapper.FindKernelIndex(gammaEntryPoint);
const char* loc = __FUNCTION__;
if (kernelIndex == -1)//Has not been built yet.
{
auto& kernel = m_FinalAccumOpenCLKernelCreator.GammaCorrectionKernel(Renderer<T, bucketT>::NumChannels(), Transparency());
auto& kernel = m_FinalAccumOpenCLKernelCreator.GammaCorrectionKernel();
bool b = wrapper.AddProgram(gammaEntryPoint, kernel, gammaEntryPoint, m_DoublePrecision);
if (b)
@ -1735,10 +1702,7 @@ void RendererCL<T, bucketT>::ConvertSpatialFilter()
m_SpatialFilterCL.m_FinalRasH = uint(FinalRasH());
m_SpatialFilterCL.m_Supersample = uint(Supersample());
m_SpatialFilterCL.m_FilterWidth = uint(m_SpatialFilter->FinalFilterWidth());
m_SpatialFilterCL.m_NumChannels = uint(Renderer<T, bucketT>::NumChannels());
m_SpatialFilterCL.m_BytesPerChannel = uint(BytesPerChannel());
m_SpatialFilterCL.m_DensityFilterOffset = uint(DensityFilterOffset());
m_SpatialFilterCL.m_Transparency = Transparency();
m_SpatialFilterCL.m_YAxisUp = uint(m_YAxisUp);
m_SpatialFilterCL.m_Vibrancy = vibrancy;
m_SpatialFilterCL.m_HighlightPower = HighlightPower();

View File

@ -19,7 +19,7 @@ class EMBERCL_API RendererCLBase
{
public:
virtual ~RendererCLBase() { }
virtual bool ReadFinal(byte* pixels) = 0;
virtual bool ReadFinal(v4F* pixels) = 0;
virtual bool ClearFinal() = 0;
};
@ -43,7 +43,6 @@ class EMBERCL_API RendererCL : public Renderer<T, bucketT>, public RendererCLBas
{
using EmberNs::Renderer<T, bucketT>::RendererBase::Abort;
using EmberNs::Renderer<T, bucketT>::RendererBase::EarlyClip;
using EmberNs::Renderer<T, bucketT>::RendererBase::Transparency;
using EmberNs::Renderer<T, bucketT>::RendererBase::EnterResize;
using EmberNs::Renderer<T, bucketT>::RendererBase::LeaveResize;
using EmberNs::Renderer<T, bucketT>::RendererBase::FinalRasW;
@ -60,7 +59,6 @@ class EMBERCL_API RendererCL : public Renderer<T, bucketT>, public RendererCLBas
using EmberNs::Renderer<T, bucketT>::RendererBase::m_YAxisUp;
using EmberNs::Renderer<T, bucketT>::RendererBase::m_LockAccum;
using EmberNs::Renderer<T, bucketT>::RendererBase::m_Abort;
using EmberNs::Renderer<T, bucketT>::RendererBase::m_NumChannels;
using EmberNs::Renderer<T, bucketT>::RendererBase::m_LastIter;
using EmberNs::Renderer<T, bucketT>::RendererBase::m_LastIterPercent;
using EmberNs::Renderer<T, bucketT>::RendererBase::m_Stats;
@ -141,13 +139,12 @@ public:
const vector<unique_ptr<RendererClDevice>>& Devices() const;
//Virtual functions overridden from RendererCLBase.
virtual bool ReadFinal(byte* pixels);
virtual bool ReadFinal(v4F* pixels);
virtual bool ClearFinal();
//Public virtual functions overridden from Renderer or RendererBase.
virtual size_t MemoryAvailable() override;
virtual bool Ok() const override;
virtual void NumChannels(size_t numChannels) override;
virtual void ClearErrorReport() override;
virtual size_t SubBatchSize() const override;
virtual size_t ThreadCount() const override;
@ -166,9 +163,8 @@ protected:
virtual bool ResetBuckets(bool resetHist = true, bool resetAccum = true) override;
virtual eRenderStatus LogScaleDensityFilter(bool forceOutput = false) override;
virtual eRenderStatus GaussianDensityFilter() override;
virtual eRenderStatus AccumulatorToFinalImage(byte* pixels, size_t finalOffset) override;
virtual eRenderStatus AccumulatorToFinalImage(v4F* pixels, size_t finalOffset) override;
virtual EmberStats Iterate(size_t iterCount, size_t temporalSample) override;
virtual void ComputeCurves(bool scale) override;
#ifndef TEST_CL
private:
@ -183,7 +179,7 @@ private:
bool ClearBuffer(size_t device, const string& bufferName, uint width, uint height, uint elementSize);
bool RunDensityFilterPrivate(size_t kernelIndex, size_t gridW, size_t gridH, size_t blockW, size_t blockH, uint chunkSizeW, uint chunkSizeH, uint colChunkPass, uint rowChunkPass);
int MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth);
int MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale);
int MakeAndGetFinalAccumProgram();
int MakeAndGetGammaCorrectionProgram();
bool CreateHostBuffer();
bool SumDeviceHist();