fractorium/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp

531 lines
23 KiB
C++
Raw Normal View History

#include "EmberCLPch.h"
#include "FinalAccumOpenCLKernelCreator.h"
namespace EmberCLns
{
/// <summary>
/// Constructor that creates all kernel strings.
/// The caller will access these strings through the accessor functions.
/// </summary>
template <typename T>
FinalAccumOpenCLKernelCreator<T>::FinalAccumOpenCLKernelCreator()
{
m_GammaCorrectionWithAlphaCalcEntryPoint = "GammaCorrectionWithAlphaCalcKernel";
m_GammaCorrectionWithoutAlphaCalcEntryPoint = "GammaCorrectionWithoutAlphaCalcKernel";
m_GammaCorrectionWithAlphaCalcKernel = CreateGammaCorrectionKernelString(true);
m_GammaCorrectionWithoutAlphaCalcKernel = CreateGammaCorrectionKernelString(false);
m_FinalAccumEarlyClipEntryPoint = "FinalAccumEarlyClipKernel";
m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel";
m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel";
m_FinalAccumEarlyClipKernel = CreateFinalAccumKernelString(true, false, false);
m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true, true, true);
m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true, false, true);
m_FinalAccumLateClipEntryPoint = "FinalAccumLateClipKernel";
m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel";
m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel";
m_FinalAccumLateClipKernel = CreateFinalAccumKernelString(false, false, false);
m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, true, true);
m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, false, true);
}
/// <summary>
/// Kernel source and entry point properties, getters only.
/// </summary>
template <typename T> string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionWithAlphaCalcKernel() { return m_GammaCorrectionWithAlphaCalcKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionWithAlphaCalcEntryPoint() { return m_GammaCorrectionWithAlphaCalcEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionWithoutAlphaCalcKernel() { return m_GammaCorrectionWithoutAlphaCalcKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionWithoutAlphaCalcEntryPoint() { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipKernel() { return m_FinalAccumEarlyClipKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipEntryPoint() { return m_FinalAccumEarlyClipEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipKernel() { return m_FinalAccumLateClipKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipEntryPoint() { return m_FinalAccumLateClipEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
/// <summary>
/// Get the gamma correction entry point.
/// </summary>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The name of the gamma correction entry point kernel function</returns>
template <typename T>
0.4.1.3 Beta 10/14/2014 --User Changes Size is no longer fixed to the window size. Size scaling is done differently in the final render dialog. This fixes several bugs. Remove Xml saving size from settings and options dialog, it no longer applies. Final render can be broken into strips. Set default save path to the desktop if none is found in the settings file. Set default output size to 1920x1080 if none is found in the settings file. --Bug Fixes Better memory size reporting in final render dialog. --Code Changes Migrate to C++11, Qt 5.3.1, and Visual Studio 2013. Change most instances of unsigned int to size_t, and int to intmax_t. Add m_OrigPixPerUnit and m_ScaleType to Ember for scaling purposes. Replace some sprintf_s() calls in XmlToEmber with ostringstream. Move more non-templated members into RendererBase. Add CopyVec() overload that takes a per element function pointer. Add vector Memset(). Replace '&' with '+' instead of "&amp;" in XmlToEmber for much faster parsing. Break strips rendering out into EmberCommon and call from EmberRender and Fractorium. Make AddAndWriteBuffer() just call WriteBuffer(). Make AddAndWriteImage() delete the existing image first before replacing it. Add SetOutputTexture() to RendererCL to support making new textures in response to resize events. Remove multiple return statements in RendererCL, and replace with a bool that tracks results. Add ToDouble(), MakeEnd(), ToString() and Exists() wrappers in Fractorium. Add Size() wrapper in EmberFile. Make QString function arguments const QString&, and string with const string&. Make ShowCritical() wrapper for invoking a message box from another thread. Add combo box to TwoButtonWidget and rename.
2014-10-14 11:53:15 -04:00
string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionEntryPoint(size_t channels, bool transparency)
{
bool alphaCalc = ((channels > 3) && transparency);
return alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint;
}
/// <summary>
/// Get the gamma correction kernel string.
/// </summary>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The gamma correction kernel string</returns>
template <typename T>
0.4.1.3 Beta 10/14/2014 --User Changes Size is no longer fixed to the window size. Size scaling is done differently in the final render dialog. This fixes several bugs. Remove Xml saving size from settings and options dialog, it no longer applies. Final render can be broken into strips. Set default save path to the desktop if none is found in the settings file. Set default output size to 1920x1080 if none is found in the settings file. --Bug Fixes Better memory size reporting in final render dialog. --Code Changes Migrate to C++11, Qt 5.3.1, and Visual Studio 2013. Change most instances of unsigned int to size_t, and int to intmax_t. Add m_OrigPixPerUnit and m_ScaleType to Ember for scaling purposes. Replace some sprintf_s() calls in XmlToEmber with ostringstream. Move more non-templated members into RendererBase. Add CopyVec() overload that takes a per element function pointer. Add vector Memset(). Replace '&' with '+' instead of "&amp;" in XmlToEmber for much faster parsing. Break strips rendering out into EmberCommon and call from EmberRender and Fractorium. Make AddAndWriteBuffer() just call WriteBuffer(). Make AddAndWriteImage() delete the existing image first before replacing it. Add SetOutputTexture() to RendererCL to support making new textures in response to resize events. Remove multiple return statements in RendererCL, and replace with a bool that tracks results. Add ToDouble(), MakeEnd(), ToString() and Exists() wrappers in Fractorium. Add Size() wrapper in EmberFile. Make QString function arguments const QString&, and string with const string&. Make ShowCritical() wrapper for invoking a message box from another thread. Add combo box to TwoButtonWidget and rename.
2014-10-14 11:53:15 -04:00
string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionKernel(size_t channels, bool transparency)
{
bool alphaCalc = ((channels > 3) && transparency);
return alphaCalc ? m_GammaCorrectionWithAlphaCalcKernel : m_GammaCorrectionWithoutAlphaCalcKernel;
}
/// <summary>
/// Get the final accumulation entry point.
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <param name="alphaBase">Storage for the alpha base value used in the kernel. 0 if transparency is true, else 255.</param>
/// <param name="alphaScale">Storage for the alpha scale value used in the kernel. 255 if transparency is true, else 0.</param>
/// <returns>The name of the final accumulation entry point kernel function</returns>
template <typename T>
0.4.1.3 Beta 10/14/2014 --User Changes Size is no longer fixed to the window size. Size scaling is done differently in the final render dialog. This fixes several bugs. Remove Xml saving size from settings and options dialog, it no longer applies. Final render can be broken into strips. Set default save path to the desktop if none is found in the settings file. Set default output size to 1920x1080 if none is found in the settings file. --Bug Fixes Better memory size reporting in final render dialog. --Code Changes Migrate to C++11, Qt 5.3.1, and Visual Studio 2013. Change most instances of unsigned int to size_t, and int to intmax_t. Add m_OrigPixPerUnit and m_ScaleType to Ember for scaling purposes. Replace some sprintf_s() calls in XmlToEmber with ostringstream. Move more non-templated members into RendererBase. Add CopyVec() overload that takes a per element function pointer. Add vector Memset(). Replace '&' with '+' instead of "&amp;" in XmlToEmber for much faster parsing. Break strips rendering out into EmberCommon and call from EmberRender and Fractorium. Make AddAndWriteBuffer() just call WriteBuffer(). Make AddAndWriteImage() delete the existing image first before replacing it. Add SetOutputTexture() to RendererCL to support making new textures in response to resize events. Remove multiple return statements in RendererCL, and replace with a bool that tracks results. Add ToDouble(), MakeEnd(), ToString() and Exists() wrappers in Fractorium. Add Size() wrapper in EmberFile. Make QString function arguments const QString&, and string with const string&. Make ShowCritical() wrapper for invoking a message box from another thread. Add combo box to TwoButtonWidget and rename.
2014-10-14 11:53:15 -04:00
string FinalAccumOpenCLKernelCreator<T>::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, T& alphaBase, T& alphaScale)
{
bool alphaCalc = ((channels > 3) && transparency);
bool alphaAccum = channels > 3;
if (alphaAccum)
{
alphaBase = transparency ? 0.0f : 255.0f;//See the table below.
alphaScale = transparency ? 255.0f : 0.0f;
}
if (earlyClip)
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumEarlyClipEntryPoint();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
else
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumLateClipEntryPoint();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
}
/// <summary>
/// Get the final accumulation kernel string.
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The final accumulation kernel string</returns>
template <typename T>
0.4.1.3 Beta 10/14/2014 --User Changes Size is no longer fixed to the window size. Size scaling is done differently in the final render dialog. This fixes several bugs. Remove Xml saving size from settings and options dialog, it no longer applies. Final render can be broken into strips. Set default save path to the desktop if none is found in the settings file. Set default output size to 1920x1080 if none is found in the settings file. --Bug Fixes Better memory size reporting in final render dialog. --Code Changes Migrate to C++11, Qt 5.3.1, and Visual Studio 2013. Change most instances of unsigned int to size_t, and int to intmax_t. Add m_OrigPixPerUnit and m_ScaleType to Ember for scaling purposes. Replace some sprintf_s() calls in XmlToEmber with ostringstream. Move more non-templated members into RendererBase. Add CopyVec() overload that takes a per element function pointer. Add vector Memset(). Replace '&' with '+' instead of "&amp;" in XmlToEmber for much faster parsing. Break strips rendering out into EmberCommon and call from EmberRender and Fractorium. Make AddAndWriteBuffer() just call WriteBuffer(). Make AddAndWriteImage() delete the existing image first before replacing it. Add SetOutputTexture() to RendererCL to support making new textures in response to resize events. Remove multiple return statements in RendererCL, and replace with a bool that tracks results. Add ToDouble(), MakeEnd(), ToString() and Exists() wrappers in Fractorium. Add Size() wrapper in EmberFile. Make QString function arguments const QString&, and string with const string&. Make ShowCritical() wrapper for invoking a message box from another thread. Add combo box to TwoButtonWidget and rename.
2014-10-14 11:53:15 -04:00
string FinalAccumOpenCLKernelCreator<T>::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency)
{
bool alphaCalc = (channels > 3 && transparency);
bool alphaAccum = channels > 3;
if (earlyClip)
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumEarlyClipKernel();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
else
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumLateClipKernel();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
}
/// <summary>
/// Wrapper around CreateFinalAccumKernelString().
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The final accumulation kernel string</returns>
template <typename T>
0.4.1.3 Beta 10/14/2014 --User Changes Size is no longer fixed to the window size. Size scaling is done differently in the final render dialog. This fixes several bugs. Remove Xml saving size from settings and options dialog, it no longer applies. Final render can be broken into strips. Set default save path to the desktop if none is found in the settings file. Set default output size to 1920x1080 if none is found in the settings file. --Bug Fixes Better memory size reporting in final render dialog. --Code Changes Migrate to C++11, Qt 5.3.1, and Visual Studio 2013. Change most instances of unsigned int to size_t, and int to intmax_t. Add m_OrigPixPerUnit and m_ScaleType to Ember for scaling purposes. Replace some sprintf_s() calls in XmlToEmber with ostringstream. Move more non-templated members into RendererBase. Add CopyVec() overload that takes a per element function pointer. Add vector Memset(). Replace '&' with '+' instead of "&amp;" in XmlToEmber for much faster parsing. Break strips rendering out into EmberCommon and call from EmberRender and Fractorium. Make AddAndWriteBuffer() just call WriteBuffer(). Make AddAndWriteImage() delete the existing image first before replacing it. Add SetOutputTexture() to RendererCL to support making new textures in response to resize events. Remove multiple return statements in RendererCL, and replace with a bool that tracks results. Add ToDouble(), MakeEnd(), ToString() and Exists() wrappers in Fractorium. Add Size() wrapper in EmberFile. Make QString function arguments const QString&, and string with const string&. Make ShowCritical() wrapper for invoking a message box from another thread. Add combo box to TwoButtonWidget and rename.
2014-10-14 11:53:15 -04:00
string FinalAccumOpenCLKernelCreator<T>::CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency)
{
return CreateFinalAccumKernelString(earlyClip, (channels > 3 && transparency), channels > 3);
}
/// <summary>
/// Create the final accumulation kernel string
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="alphaCalc">True if channels equals 4 and transparency is desired, else false.</param>
/// <param name="alphaAccum">True if channels equals 4</param>
/// <returns>The final accumulation kernel string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::CreateFinalAccumKernelString(bool earlyClip, bool alphaCalc, bool alphaAccum)
{
ostringstream os;
string channels = alphaAccum ? "4" : "3";
os <<
ConstantDefinesString(typeid(T) == typeid(double)) <<
ClampRealFunctionString <<
UnionCLStructString <<
RgbToHsvFunctionString <<
HsvToRgbFunctionString <<
CalcAlphaFunctionString <<
CurveAdjustFunctionString <<
SpatialFilterCLStructString;
if (earlyClip)
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
os << "__kernel void " << m_FinalAccumEarlyClipEntryPoint << "(\n";
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
os << "__kernel void " << m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
os << "__kernel void " << m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
else
{
os <<
CreateCalcNewRgbFunctionString(false) <<
CreateGammaCorrectionFunctionString(false, alphaCalc, alphaAccum, true);
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
os << "__kernel void " << m_FinalAccumLateClipEntryPoint << "(\n";
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
os << "__kernel void " << m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
os << "__kernel void " << m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
os <<
" const __global real4reals* accumulator,\n"
" __write_only image2d_t pixels,\n"
" __constant SpatialFilterCL* spatialFilter,\n"
" __constant real_t* filterCoefs,\n"
" __constant real4reals* csa,\n"
" const uint doCurves,\n"
" const real_t alphaBase,\n"
" const real_t alphaScale\n"
"\t)\n"
"{\n"
"\n"
" if ((GLOBAL_ID_Y >= spatialFilter->m_FinalRasH) || (GLOBAL_ID_X >= spatialFilter->m_FinalRasW))\n"
" return;\n"
"\n"
" uint accumX = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_X * spatialFilter->m_Supersample);\n"
" uint accumY = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_Y * spatialFilter->m_Supersample);\n"
" int2 finalCoord;\n"
" finalCoord.x = GLOBAL_ID_X;\n"
" finalCoord.y = (int)((spatialFilter->m_YAxisUp == 1) ? ((spatialFilter->m_FinalRasH - GLOBAL_ID_Y) - 1) : GLOBAL_ID_Y);\n"
" float4floats finalColor;\n"
" int ii, jj;\n"
" uint filterKRowIndex;\n"
" const __global real4reals* accumBucket;\n"
" real4reals newBucket;\n"
" newBucket.m_Real4 = 0;\n"
"\n"
" for (jj = 0; jj < spatialFilter->m_FilterWidth; jj++)\n"
" {\n"
" filterKRowIndex = jj * spatialFilter->m_FilterWidth;\n"
"\n"
" for (ii = 0; ii < spatialFilter->m_FilterWidth; ii++)\n"
" {\n"
" real_t k = filterCoefs[ii + filterKRowIndex];\n"
"\n"
" accumBucket = accumulator + (accumX + ii) + ((accumY + jj) * spatialFilter->m_SuperRasW);\n"
" newBucket.m_Real4 += (k * accumBucket->m_Real4);\n"
" }\n"
" }\n"
"\n";
//Not supporting 2 bytes per channel on the GPU. If the user wants it, run on the CPU.
if (earlyClip)//If early clip, simply assign values directly to the temp float4 since they've been gamma corrected already, then write it straight to the output image below.
{
os <<
" finalColor.m_Float4.x = (float)newBucket.m_Real4.x;\n"//CPU side clamps, skip here because write_imagef() does the clamping for us.
" finalColor.m_Float4.y = (float)newBucket.m_Real4.y;\n"
" finalColor.m_Float4.z = (float)newBucket.m_Real4.z;\n";
if (alphaAccum)
{
if (alphaCalc)
os << " finalColor.m_Float4.w = (float)newBucket.m_Real4.w * 255.0f;\n";
else
os << " finalColor.m_Float4.w = 255;\n";
}
}
else
{
//Late clip, so must gamma correct from the temp new bucket to temp float4.
if (typeid(T) == typeid(double))
{
os <<
" real4reals realFinal;\n"
"\n"
" GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(realFinal.m_Reals[0]));\n"
" finalColor.m_Float4.x = (float)realFinal.m_Real4.x;\n"
" finalColor.m_Float4.y = (float)realFinal.m_Real4.y;\n"
" finalColor.m_Float4.z = (float)realFinal.m_Real4.z;\n"
" finalColor.m_Float4.w = (float)realFinal.m_Real4.w;\n"
;
}
else
{
os <<
" GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(finalColor.m_Floats[0]));\n";
}
}
os <<
"\n"
" if (doCurves)\n"
" {\n"
" CurveAdjust(csa, &(finalColor.m_Floats[0]), 1);\n"
" CurveAdjust(csa, &(finalColor.m_Floats[1]), 2);\n"
" CurveAdjust(csa, &(finalColor.m_Floats[2]), 3);\n"
" }\n"
"\n"
" finalColor.m_Float4 /= 255.0f;\n"
" write_imagef(pixels, finalCoord, finalColor.m_Float4);\n"//Use write_imagef instead of write_imageui because only the former works when sharing with an OpenGL texture.
" barrier(CLK_GLOBAL_MEM_FENCE);\n"//Required, or else page tearing will occur during interactive rendering.
"}\n"
;
return os.str();
}
/// <summary>
/// Creates the gamma correction function string.
/// This is not a full kernel, just a function that is used in the kernels.
/// </summary>
/// <param name="globalBucket">True if writing to a global buffer (early clip), else false (late clip).</param>
/// <param name="alphaCalc">True if channels equals 4 and transparency is desired, else false.</param>
/// <param name="alphaAccum">True if channels equals 4</param>
/// <param name="finalOut">True if writing to global buffer (late clip), else false (early clip).</param>
/// <returns>The gamma correction function string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::CreateGammaCorrectionFunctionString(bool globalBucket, bool alphaCalc, bool alphaAccum, bool finalOut)
{
ostringstream os;
string dataType;
string unionMember;
dataType = "real_t";
//Use real_t for all cases, early clip and final accum.
os << "void GammaCorrectionFloats(" << (globalBucket ? "__global " : "") << "real4reals* bucket, __constant real_t* background, real_t g, real_t linRange, real_t vibrancy, real_t highlightPower, real_t alphaBase, real_t alphaScale, " << (finalOut ? "" : "__global") << " real_t* correctedChannels)\n";
os
<< "{\n"
<< " real_t alpha, ls, tmp, a;\n"
<< " real4reals newRgb;\n"
<< "\n"
<< " if (bucket->m_Reals[3] <= 0)\n"
<< " {\n"
<< " alpha = 0;\n"
<< " ls = 0;\n"
<< " }\n"
<< " else\n"
<< " {\n"
<< " tmp = bucket->m_Reals[3];\n"
<< " alpha = CalcAlpha(tmp, g, linRange);\n"
<< " ls = vibrancy * 256.0 * alpha / tmp;\n"
<< " ClampRef(&alpha, 0.0, 1.0);\n"
<< " }\n"
<< "\n"
<< " CalcNewRgb(bucket, ls, highlightPower, &newRgb);\n"
<< "\n"
<< " for (uint rgbi = 0; rgbi < 3; rgbi++)\n"
<< " {\n"
<< " a = newRgb.m_Reals[rgbi] + ((1.0 - vibrancy) * 256.0 * pow(bucket->m_Reals[rgbi], g));\n"
<< "\n";
if (!alphaCalc)
{
os <<
" a += ((1.0 - alpha) * background[rgbi]);\n";
}
else
{
os
<< " if (alpha > 0)\n"
<< " a /= alpha;\n"
<< " else\n"
<< " a = 0;\n";
}
os <<
"\n"
" correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_t)0.0, (real_t)255.0);\n"
" }\n"
"\n";
//The CPU code has 3 cases for assigning alpha:
//[3] = alpha.//Early clip.
//[3] = alpha * 255.//Final Rgba with transparency.
//[3] = 255.//Final Rgba without transparency.
//Putting conditionals in GPU code is to be avoided. So do base + alpha * scale which will
//work for all 3 cases without using a conditional, which should be faster on a GPU. This gives:
//Base = 0, scale = 1. [3] = (0 + (alpha * 1)). [3] = alpha.
//Base = 0, scale = 255. [3] = (0 + (alpha * 255)). [3] = alpha * 255.
//Base = 255, scale = 0. [3] = (255 + (alpha * 0)). [3] = 255.
if (alphaAccum)
{
os
<< " correctedChannels[3] = (" << dataType << ")(alphaBase + (alpha * alphaScale));\n";
}
os <<
"}\n"
"\n";
return os.str();
}
/// <summary>
/// OpenCL equivalent of Palette::CalcNewRgb().
/// </summary>
/// <param name="globalBucket">True if writing the corrected value to a global buffer (early clip), else false (late clip).</param>
/// <returns>The CalcNewRgb function string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::CreateCalcNewRgbFunctionString(bool globalBucket)
{
ostringstream os;
os <<
"static void CalcNewRgb(" << (globalBucket ? "__global " : "") << "real4reals* oldRgb, real_t ls, real_t highPow, real4reals* newRgb)\n"
"{\n"
" int rgbi;\n"
" real_t newls, lsratio;\n"
" real4reals newHsv;\n"
" real_t maxa, maxc;\n"
" real_t adjhlp;\n"
"\n"
" if (ls == 0 || (oldRgb->m_Real4.x == 0 && oldRgb->m_Real4.y == 0 && oldRgb->m_Real4.z == 0))\n"//Can't do a vector compare to zero.
" {\n"
" newRgb->m_Real4 = 0;\n"
" return;\n"
" }\n"
"\n"
//Identify the most saturated channel.
" maxc = max(max(oldRgb->m_Reals[0], oldRgb->m_Reals[1]), oldRgb->m_Reals[2]);\n"
" maxa = ls * maxc;\n"
"\n"
//If a channel is saturated and highlight power is non-negative
//modify the color to prevent hue shift.
" if (maxa > 255 && highPow >= 0)\n"
" {\n"
" newls = 255.0 / maxc;\n"
" lsratio = pow(newls / ls, highPow);\n"
"\n"
//Calculate the max-value color (ranged 0 - 1).
" for (rgbi = 0; rgbi < 3; rgbi++)\n"
" newRgb->m_Reals[rgbi] = newls * oldRgb->m_Reals[rgbi] / 255.0;\n"
"\n"
//Reduce saturation by the lsratio.
" RgbToHsv(&(newRgb->m_Real4), &(newHsv.m_Real4));\n"
" newHsv.m_Real4.y *= lsratio;\n"
" HsvToRgb(&(newHsv.m_Real4), &(newRgb->m_Real4));\n"
"\n"
" for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling and vectorizing makes no difference.
" newRgb->m_Reals[rgbi] *= 255.0;\n"
" }\n"
" else\n"
" {\n"
" newls = 255.0 / maxc;\n"
" adjhlp = -highPow;\n"
"\n"
" if (adjhlp > 1)\n"
" adjhlp = 1;\n"
"\n"
" if (maxa <= 255)\n"
" adjhlp = 1;\n"
"\n"
//Calculate the max-value color (ranged 0 - 1) interpolated with the old behavior.
" for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling, caching and vectorizing makes no difference.
" newRgb->m_Reals[rgbi] = ((1.0 - adjhlp) * newls + adjhlp * ls) * oldRgb->m_Reals[rgbi];\n"
" }\n"
"}\n"
"\n";
return os.str();
}
/// <summary>
/// Create the gamma correction kernel string used for early clipping.
/// </summary>
/// <param name="alphaCalc">True if channels equals 4 and transparency is desired, else false.</param>
/// <returns>The gamma correction kernel string used for early clipping</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::CreateGammaCorrectionKernelString(bool alphaCalc)
{
ostringstream os;
string dataType;
os <<
ConstantDefinesString(typeid(T) == typeid(double)) <<
ClampRealFunctionString <<
UnionCLStructString <<
RgbToHsvFunctionString <<
HsvToRgbFunctionString <<
CalcAlphaFunctionString <<
CreateCalcNewRgbFunctionString(true) <<
SpatialFilterCLStructString <<
CreateGammaCorrectionFunctionString(true, alphaCalc, true, false);//Will only be used with float in this case, early clip. Will always alpha accum.
os << "__kernel void " << (alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint) << "(\n" <<
" __global real4reals* accumulator,\n"
" __constant SpatialFilterCL* spatialFilter\n"
")\n"
"{\n"
" int testGutter = 0;\n"
"\n"
" if (GLOBAL_ID_Y >= (spatialFilter->m_SuperRasH - testGutter) || GLOBAL_ID_X >= (spatialFilter->m_SuperRasW - testGutter))\n"
" return;\n"
"\n"
" uint superIndex = (GLOBAL_ID_Y * spatialFilter->m_SuperRasW) + GLOBAL_ID_X;\n"
" __global real4reals* bucket = accumulator + superIndex;\n"
//Pass in an alphaBase and alphaScale of 0, 1 which means to just directly assign the computed alpha value.
" GammaCorrectionFloats(bucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, 0.0, 1.0, &(bucket->m_Reals[0]));\n"
"}\n"
;
return os.str();
}
template EMBERCL_API class FinalAccumOpenCLKernelCreator<float>;
#ifdef DO_DOUBLE
template EMBERCL_API class FinalAccumOpenCLKernelCreator<double>;
#endif
}