fractorium/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp
mfeemster 07592c9d78 Remove ReadMe.txt from all project files.
Add Curves.h, and CurvesGraphicsView.h/cpp to support bezier color curves.
Add Curves member to Ember.
Add curves capability to EmberCL.
Remove some unused variables in the kernel created in RendererCL::CreateFinalAccumKernelString().
Use glm namespace for vec classes if GLM_VERSION >= 96, else use glm::detail.
As a result of using glm namespace, all instances of min and max had to be qualified with std::
Split ComputeCamera into that and ComputeQuality().
Reduce the amount of ComputeCamera() and MakeDmap() calls on each incremental iter that doesn't use temporal samples.
Fix clamping bug with DE filter widths.
Provide functions to return the kernels from RendererCL to assist with diagnostics and debugging.
Prevent extra newline in EmberRender when only rendering a single image.
Add the ability to delete an ember at a given index in EmberFile.
Allow deleting/focusing ember in library tab with delete and enter keys.
Reorder some code in Fractorium.h to match the tabs order.
Add and call ClearFinalImages() to clear buffers in controller to fix bug where previous CPU render would be shown for a split second when switching from OpenCL back to CPU.
Refactor ember library pointer syncing to a function SyncPointers().
Add the ability to save ember Xmls to an unique automatically generated name after the first time the user has specified a name.
2015-03-21 15:27:37 -07:00

531 lines
23 KiB
C++

#include "EmberCLPch.h"
#include "FinalAccumOpenCLKernelCreator.h"
namespace EmberCLns
{
/// <summary>
/// Constructor that creates all kernel strings.
/// The caller will access these strings through the accessor functions.
/// </summary>
template <typename T>
FinalAccumOpenCLKernelCreator<T>::FinalAccumOpenCLKernelCreator()
{
m_GammaCorrectionWithAlphaCalcEntryPoint = "GammaCorrectionWithAlphaCalcKernel";
m_GammaCorrectionWithoutAlphaCalcEntryPoint = "GammaCorrectionWithoutAlphaCalcKernel";
m_GammaCorrectionWithAlphaCalcKernel = CreateGammaCorrectionKernelString(true);
m_GammaCorrectionWithoutAlphaCalcKernel = CreateGammaCorrectionKernelString(false);
m_FinalAccumEarlyClipEntryPoint = "FinalAccumEarlyClipKernel";
m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel";
m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel";
m_FinalAccumEarlyClipKernel = CreateFinalAccumKernelString(true, false, false);
m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true, true, true);
m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true, false, true);
m_FinalAccumLateClipEntryPoint = "FinalAccumLateClipKernel";
m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel";
m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel";
m_FinalAccumLateClipKernel = CreateFinalAccumKernelString(false, false, false);
m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, true, true);
m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, false, true);
}
/// <summary>
/// Kernel source and entry point properties, getters only.
/// </summary>
template <typename T> string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionWithAlphaCalcKernel() { return m_GammaCorrectionWithAlphaCalcKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionWithAlphaCalcEntryPoint() { return m_GammaCorrectionWithAlphaCalcEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionWithoutAlphaCalcKernel() { return m_GammaCorrectionWithoutAlphaCalcKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionWithoutAlphaCalcEntryPoint() { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipKernel() { return m_FinalAccumEarlyClipKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipEntryPoint() { return m_FinalAccumEarlyClipEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipKernel() { return m_FinalAccumLateClipKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipEntryPoint() { return m_FinalAccumLateClipEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel; }
template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
/// <summary>
/// Get the gamma correction entry point.
/// </summary>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The name of the gamma correction entry point kernel function</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionEntryPoint(size_t channels, bool transparency)
{
bool alphaCalc = ((channels > 3) && transparency);
return alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint;
}
/// <summary>
/// Get the gamma correction kernel string.
/// </summary>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The gamma correction kernel string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionKernel(size_t channels, bool transparency)
{
bool alphaCalc = ((channels > 3) && transparency);
return alphaCalc ? m_GammaCorrectionWithAlphaCalcKernel : m_GammaCorrectionWithoutAlphaCalcKernel;
}
/// <summary>
/// Get the final accumulation entry point.
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <param name="alphaBase">Storage for the alpha base value used in the kernel. 0 if transparency is true, else 255.</param>
/// <param name="alphaScale">Storage for the alpha scale value used in the kernel. 255 if transparency is true, else 0.</param>
/// <returns>The name of the final accumulation entry point kernel function</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, T& alphaBase, T& alphaScale)
{
bool alphaCalc = ((channels > 3) && transparency);
bool alphaAccum = channels > 3;
if (alphaAccum)
{
alphaBase = transparency ? 0.0f : 255.0f;//See the table below.
alphaScale = transparency ? 255.0f : 0.0f;
}
if (earlyClip)
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumEarlyClipEntryPoint();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
else
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumLateClipEntryPoint();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
}
/// <summary>
/// Get the final accumulation kernel string.
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The final accumulation kernel string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency)
{
bool alphaCalc = (channels > 3 && transparency);
bool alphaAccum = channels > 3;
if (earlyClip)
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumEarlyClipKernel();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
else
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
return FinalAccumLateClipKernel();
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
return FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel();
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
}
/// <summary>
/// Wrapper around CreateFinalAccumKernelString().
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="channels">The number of channels used, 3 or 4.</param>
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The final accumulation kernel string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency)
{
return CreateFinalAccumKernelString(earlyClip, (channels > 3 && transparency), channels > 3);
}
/// <summary>
/// Create the final accumulation kernel string
/// </summary>
/// <param name="earlyClip">True if early clip is desired, else false.</param>
/// <param name="alphaCalc">True if channels equals 4 and transparency is desired, else false.</param>
/// <param name="alphaAccum">True if channels equals 4</param>
/// <returns>The final accumulation kernel string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::CreateFinalAccumKernelString(bool earlyClip, bool alphaCalc, bool alphaAccum)
{
ostringstream os;
string channels = alphaAccum ? "4" : "3";
os <<
ConstantDefinesString(typeid(T) == typeid(double)) <<
ClampRealFunctionString <<
UnionCLStructString <<
RgbToHsvFunctionString <<
HsvToRgbFunctionString <<
CalcAlphaFunctionString <<
CurveAdjustFunctionString <<
SpatialFilterCLStructString;
if (earlyClip)
{
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
os << "__kernel void " << m_FinalAccumEarlyClipEntryPoint << "(\n";
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
os << "__kernel void " << m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
os << "__kernel void " << m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
else
{
os <<
CreateCalcNewRgbFunctionString(false) <<
CreateGammaCorrectionFunctionString(false, alphaCalc, alphaAccum, true);
if (!alphaCalc && !alphaAccum)//Rgb output, the most common case.
os << "__kernel void " << m_FinalAccumLateClipEntryPoint << "(\n";
else if (alphaCalc && alphaAccum)//Rgba output and Transparency.
os << "__kernel void " << m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else if (!alphaCalc && alphaAccum)//Rgba output and !Transparency.
os << "__kernel void " << m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
else
return "";//Cannot have alphaCalc and !alphaAccum, it makes no sense.
}
os <<
" const __global real4reals* accumulator,\n"
" __write_only image2d_t pixels,\n"
" __constant SpatialFilterCL* spatialFilter,\n"
" __constant real_t* filterCoefs,\n"
" __constant real4reals* csa,\n"
" const uint doCurves,\n"
" const real_t alphaBase,\n"
" const real_t alphaScale\n"
"\t)\n"
"{\n"
"\n"
" if ((GLOBAL_ID_Y >= spatialFilter->m_FinalRasH) || (GLOBAL_ID_X >= spatialFilter->m_FinalRasW))\n"
" return;\n"
"\n"
" uint accumX = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_X * spatialFilter->m_Supersample);\n"
" uint accumY = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_Y * spatialFilter->m_Supersample);\n"
" int2 finalCoord;\n"
" finalCoord.x = GLOBAL_ID_X;\n"
" finalCoord.y = (int)((spatialFilter->m_YAxisUp == 1) ? ((spatialFilter->m_FinalRasH - GLOBAL_ID_Y) - 1) : GLOBAL_ID_Y);\n"
" float4floats finalColor;\n"
" int ii, jj;\n"
" uint filterKRowIndex;\n"
" const __global real4reals* accumBucket;\n"
" real4reals newBucket;\n"
" newBucket.m_Real4 = 0;\n"
"\n"
" for (jj = 0; jj < spatialFilter->m_FilterWidth; jj++)\n"
" {\n"
" filterKRowIndex = jj * spatialFilter->m_FilterWidth;\n"
"\n"
" for (ii = 0; ii < spatialFilter->m_FilterWidth; ii++)\n"
" {\n"
" real_t k = filterCoefs[ii + filterKRowIndex];\n"
"\n"
" accumBucket = accumulator + (accumX + ii) + ((accumY + jj) * spatialFilter->m_SuperRasW);\n"
" newBucket.m_Real4 += (k * accumBucket->m_Real4);\n"
" }\n"
" }\n"
"\n";
//Not supporting 2 bytes per channel on the GPU. If the user wants it, run on the CPU.
if (earlyClip)//If early clip, simply assign values directly to the temp float4 since they've been gamma corrected already, then write it straight to the output image below.
{
os <<
" finalColor.m_Float4.x = (float)newBucket.m_Real4.x;\n"//CPU side clamps, skip here because write_imagef() does the clamping for us.
" finalColor.m_Float4.y = (float)newBucket.m_Real4.y;\n"
" finalColor.m_Float4.z = (float)newBucket.m_Real4.z;\n";
if (alphaAccum)
{
if (alphaCalc)
os << " finalColor.m_Float4.w = (float)newBucket.m_Real4.w * 255.0f;\n";
else
os << " finalColor.m_Float4.w = 255;\n";
}
}
else
{
//Late clip, so must gamma correct from the temp new bucket to temp float4.
if (typeid(T) == typeid(double))
{
os <<
" real4reals realFinal;\n"
"\n"
" GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(realFinal.m_Reals[0]));\n"
" finalColor.m_Float4.x = (float)realFinal.m_Real4.x;\n"
" finalColor.m_Float4.y = (float)realFinal.m_Real4.y;\n"
" finalColor.m_Float4.z = (float)realFinal.m_Real4.z;\n"
" finalColor.m_Float4.w = (float)realFinal.m_Real4.w;\n"
;
}
else
{
os <<
" GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(finalColor.m_Floats[0]));\n";
}
}
os <<
"\n"
" if (doCurves)\n"
" {\n"
" CurveAdjust(csa, &(finalColor.m_Floats[0]), 1);\n"
" CurveAdjust(csa, &(finalColor.m_Floats[1]), 2);\n"
" CurveAdjust(csa, &(finalColor.m_Floats[2]), 3);\n"
" }\n"
"\n"
" finalColor.m_Float4 /= 255.0f;\n"
" write_imagef(pixels, finalCoord, finalColor.m_Float4);\n"//Use write_imagef instead of write_imageui because only the former works when sharing with an OpenGL texture.
" barrier(CLK_GLOBAL_MEM_FENCE);\n"//Required, or else page tearing will occur during interactive rendering.
"}\n"
;
return os.str();
}
/// <summary>
/// Creates the gamma correction function string.
/// This is not a full kernel, just a function that is used in the kernels.
/// </summary>
/// <param name="globalBucket">True if writing to a global buffer (early clip), else false (late clip).</param>
/// <param name="alphaCalc">True if channels equals 4 and transparency is desired, else false.</param>
/// <param name="alphaAccum">True if channels equals 4</param>
/// <param name="finalOut">True if writing to global buffer (late clip), else false (early clip).</param>
/// <returns>The gamma correction function string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::CreateGammaCorrectionFunctionString(bool globalBucket, bool alphaCalc, bool alphaAccum, bool finalOut)
{
ostringstream os;
string dataType;
string unionMember;
dataType = "real_t";
//Use real_t for all cases, early clip and final accum.
os << "void GammaCorrectionFloats(" << (globalBucket ? "__global " : "") << "real4reals* bucket, __constant real_t* background, real_t g, real_t linRange, real_t vibrancy, real_t highlightPower, real_t alphaBase, real_t alphaScale, " << (finalOut ? "" : "__global") << " real_t* correctedChannels)\n";
os
<< "{\n"
<< " real_t alpha, ls, tmp, a;\n"
<< " real4reals newRgb;\n"
<< "\n"
<< " if (bucket->m_Reals[3] <= 0)\n"
<< " {\n"
<< " alpha = 0;\n"
<< " ls = 0;\n"
<< " }\n"
<< " else\n"
<< " {\n"
<< " tmp = bucket->m_Reals[3];\n"
<< " alpha = CalcAlpha(tmp, g, linRange);\n"
<< " ls = vibrancy * 256.0 * alpha / tmp;\n"
<< " ClampRef(&alpha, 0.0, 1.0);\n"
<< " }\n"
<< "\n"
<< " CalcNewRgb(bucket, ls, highlightPower, &newRgb);\n"
<< "\n"
<< " for (uint rgbi = 0; rgbi < 3; rgbi++)\n"
<< " {\n"
<< " a = newRgb.m_Reals[rgbi] + ((1.0 - vibrancy) * 256.0 * pow(bucket->m_Reals[rgbi], g));\n"
<< "\n";
if (!alphaCalc)
{
os <<
" a += ((1.0 - alpha) * background[rgbi]);\n";
}
else
{
os
<< " if (alpha > 0)\n"
<< " a /= alpha;\n"
<< " else\n"
<< " a = 0;\n";
}
os <<
"\n"
" correctedChannels[rgbi] = (" << dataType << ")clamp(a, 0.0, 255.0);\n"
" }\n"
"\n";
//The CPU code has 3 cases for assigning alpha:
//[3] = alpha.//Early clip.
//[3] = alpha * 255.//Final Rgba with transparency.
//[3] = 255.//Final Rgba without transparency.
//Putting conditionals in GPU code is to be avoided. So do base + alpha * scale which will
//work for all 3 cases without using a conditional, which should be faster on a GPU. This gives:
//Base = 0, scale = 1. [3] = (0 + (alpha * 1)). [3] = alpha.
//Base = 0, scale = 255. [3] = (0 + (alpha * 255)). [3] = alpha * 255.
//Base = 255, scale = 0. [3] = (255 + (alpha * 0)). [3] = 255.
if (alphaAccum)
{
os
<< " correctedChannels[3] = (" << dataType << ")(alphaBase + (alpha * alphaScale));\n";
}
os <<
"}\n"
"\n";
return os.str();
}
/// <summary>
/// OpenCL equivalent of Palette::CalcNewRgb().
/// </summary>
/// <param name="globalBucket">True if writing the corrected value to a global buffer (early clip), else false (late clip).</param>
/// <returns>The CalcNewRgb function string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::CreateCalcNewRgbFunctionString(bool globalBucket)
{
ostringstream os;
os <<
"static void CalcNewRgb(" << (globalBucket ? "__global " : "") << "real4reals* oldRgb, real_t ls, real_t highPow, real4reals* newRgb)\n"
"{\n"
" int rgbi;\n"
" real_t newls, lsratio;\n"
" real4reals newHsv;\n"
" real_t maxa, maxc;\n"
" real_t adjhlp;\n"
"\n"
" if (ls == 0 || (oldRgb->m_Real4.x == 0 && oldRgb->m_Real4.y == 0 && oldRgb->m_Real4.z == 0))\n"//Can't do a vector compare to zero.
" {\n"
" newRgb->m_Real4 = 0;\n"
" return;\n"
" }\n"
"\n"
//Identify the most saturated channel.
" maxc = max(max(oldRgb->m_Reals[0], oldRgb->m_Reals[1]), oldRgb->m_Reals[2]);\n"
" maxa = ls * maxc;\n"
"\n"
//If a channel is saturated and highlight power is non-negative
//modify the color to prevent hue shift.
" if (maxa > 255 && highPow >= 0)\n"
" {\n"
" newls = 255.0 / maxc;\n"
" lsratio = pow(newls / ls, highPow);\n"
"\n"
//Calculate the max-value color (ranged 0 - 1).
" for (rgbi = 0; rgbi < 3; rgbi++)\n"
" newRgb->m_Reals[rgbi] = newls * oldRgb->m_Reals[rgbi] / 255.0;\n"
"\n"
//Reduce saturation by the lsratio.
" RgbToHsv(&(newRgb->m_Real4), &(newHsv.m_Real4));\n"
" newHsv.m_Real4.y *= lsratio;\n"
" HsvToRgb(&(newHsv.m_Real4), &(newRgb->m_Real4));\n"
"\n"
" for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling and vectorizing makes no difference.
" newRgb->m_Reals[rgbi] *= 255.0;\n"
" }\n"
" else\n"
" {\n"
" newls = 255.0 / maxc;\n"
" adjhlp = -highPow;\n"
"\n"
" if (adjhlp > 1)\n"
" adjhlp = 1;\n"
"\n"
" if (maxa <= 255)\n"
" adjhlp = 1;\n"
"\n"
//Calculate the max-value color (ranged 0 - 1) interpolated with the old behavior.
" for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling, caching and vectorizing makes no difference.
" newRgb->m_Reals[rgbi] = ((1.0 - adjhlp) * newls + adjhlp * ls) * oldRgb->m_Reals[rgbi];\n"
" }\n"
"}\n"
"\n";
return os.str();
}
/// <summary>
/// Create the gamma correction kernel string used for early clipping.
/// </summary>
/// <param name="alphaCalc">True if channels equals 4 and transparency is desired, else false.</param>
/// <returns>The gamma correction kernel string used for early clipping</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::CreateGammaCorrectionKernelString(bool alphaCalc)
{
ostringstream os;
string dataType;
os <<
ConstantDefinesString(typeid(T) == typeid(double)) <<
ClampRealFunctionString <<
UnionCLStructString <<
RgbToHsvFunctionString <<
HsvToRgbFunctionString <<
CalcAlphaFunctionString <<
CreateCalcNewRgbFunctionString(true) <<
SpatialFilterCLStructString <<
CreateGammaCorrectionFunctionString(true, alphaCalc, true, false);//Will only be used with float in this case, early clip. Will always alpha accum.
os << "__kernel void " << (alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint) << "(\n" <<
" __global real4reals* accumulator,\n"
" __constant SpatialFilterCL* spatialFilter\n"
")\n"
"{\n"
" int testGutter = 0;\n"
"\n"
" if (GLOBAL_ID_Y >= (spatialFilter->m_SuperRasH - testGutter) || GLOBAL_ID_X >= (spatialFilter->m_SuperRasW - testGutter))\n"
" return;\n"
"\n"
" uint superIndex = (GLOBAL_ID_Y * spatialFilter->m_SuperRasW) + GLOBAL_ID_X;\n"
" __global real4reals* bucket = accumulator + superIndex;\n"
//Pass in an alphaBase and alphaScale of 0, 1 which means to just directly assign the computed alpha value.
" GammaCorrectionFloats(bucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, 0.0, 1.0, &(bucket->m_Reals[0]));\n"
"}\n"
;
return os.str();
}
template EMBERCL_API class FinalAccumOpenCLKernelCreator<float>;
#ifdef DO_DOUBLE
template EMBERCL_API class FinalAccumOpenCLKernelCreator<double>;
#endif
}