diff --git a/Source/Ember/Renderer.cpp b/Source/Ember/Renderer.cpp index 7ea4173..388781d 100644 --- a/Source/Ember/Renderer.cpp +++ b/Source/Ember/Renderer.cpp @@ -1629,7 +1629,7 @@ void Renderer::GammaCorrection(tvec4& bucket for (glm::length_t rgbi = 0; rgbi < 3; rgbi++) { - a = newRgb[rgbi] + ((1 - vibrancy) * 255 * std::pow(bucket[rgbi], g)); + a = newRgb[rgbi] + ((1 - vibrancy) * 255 * std::pow(std::abs(bucket[rgbi]), g));//Must use abs(), else it it could be a negative value and return NAN. if (NumChannels() <= 3 || !Transparency()) { diff --git a/Source/Ember/Variations03.h b/Source/Ember/Variations03.h index cbafb9d..2b932e8 100644 --- a/Source/Ember/Variations03.h +++ b/Source/Ember/Variations03.h @@ -2393,9 +2393,10 @@ public: virtual void Precalc() override { - m_InvN = m_Dist / m_Power; - m_Inv2PiN = M_2PI / m_Power; - m_Cn = m_Dist / m_Power / 2; + auto zp = Zeps(m_Power); + m_InvN = m_Dist / zp; + m_Inv2PiN = M_2PI / zp; + m_Cn = m_Dist / zp / 2; } protected: @@ -2405,7 +2406,7 @@ protected: m_Params.clear(); m_Params.push_back(ParamWithName(&m_Power, prefix + "phoenix_julia_power", 2)); m_Params.push_back(ParamWithName(&m_Dist, prefix + "phoenix_julia_dist", 1)); - m_Params.push_back(ParamWithName(&m_XDistort, prefix + "phoenix_julia_x_distort", T(-T(0.5))));//Original omitted phoenix_ prefix. + m_Params.push_back(ParamWithName(&m_XDistort, prefix + "phoenix_julia_x_distort", T(-0.5)));//Original omitted phoenix_ prefix. m_Params.push_back(ParamWithName(&m_YDistort, prefix + "phoenix_julia_y_distort")); m_Params.push_back(ParamWithName(true, &m_Cn, prefix + "phoenix_julia_cn"));//Precalc. m_Params.push_back(ParamWithName(true, &m_InvN, prefix + "phoenix_julia_invn")); diff --git a/Source/Ember/Variations06.h b/Source/Ember/Variations06.h index c5696b4..a628682 100644 --- a/Source/Ember/Variations06.h +++ b/Source/Ember/Variations06.h @@ -36,7 +36,7 @@ public: static const T AYoXh = T(1.7320508075688772935 / 2.0); static const T AYoYh = T(1.7320508075688772935 / 2.0); static const v2T offset[4] { { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 } }; - int i = 0; + int i, j; T di, dj; T XCh, YCh, XCo, YCo, DXo, DYo, L, L1, L2, R, s, trgL; v2T u, v; @@ -60,13 +60,12 @@ public: YCh = T(Floor((AYhXo * u.x + AYhYo * u.y) / s)); // Get a set of 4 hex center points, based around the one above - for (di = XCh; di < XCh + T(1.1); di += 1) + for (i = 0, di = XCh; i < 2; di += 1, i++)//Note that in SP mode, these numbers won't advance if they are on the boundary of what can be represented with an DP number... { - for (dj = YCh; dj < YCh + T(1.1); dj += 1) + for (j = 0, dj = YCh; j < 2; dj += 1, j++)//...which is why the check uses i and j. { - P[i].x = (AXoXh * di + AXoYh * dj) * s; - P[i].y = (AYoXh * di + AYoYh * dj) * s; - i++; + P[(i * 2) + j].x = (AXoXh * di + AXoYh * dj) * s; + P[(i * 2) + j].y = (AYoXh * di + AYoYh * dj) * s; } } @@ -151,7 +150,7 @@ public: string rotsin = "parVars[" + ToUpper(m_Params[i++].Name()) + index; string rotcos = "parVars[" + ToUpper(m_Params[i++].Name()) + index; ss << "\t{\n" - << "\t\tint i = 0;\n" + << "\t\tint i, j;\n" << "\t\treal_t di, dj;\n" << "\t\treal_t XCh, YCh, XCo, YCo, DXo, DYo, L, L1, L2, R, s, trgL, Vx, Vy;\n" << "\t\treal2 U;\n" @@ -173,13 +172,12 @@ public: << "\t\tXCh = floor((AXhXo * U.x + AXhYo * U.y) / s);\n" << "\t\tYCh = floor((AYhXo * U.x + AYhYo * U.y) / s);\n" << "\n" - << "\t\tfor (di = XCh; di < XCh + 1.1; di += 1)\n" + << "\t\tfor (i = 0, di = XCh; i < 2; di += 1, i++)\n" << "\t\t{\n" - << "\t\t for (dj = YCh; dj < YCh + 1.1; dj += 1)\n" + << "\t\t for (j = 0, dj = YCh; j < 2; dj += 1, j++)\n" << "\t\t {\n" - << "\t\t P[i].x = (AXoXh * di + AXoYh * dj) * s;\n" - << "\t\t P[i].y = (AYoXh * di + AYoYh * dj) * s;\n" - << "\t\t i++;\n" + << "\t\t P[(i * 2) + j].x = (AXoXh * di + AXoYh * dj) * s;\n" + << "\t\t P[(i * 2) + j].y = (AYoXh * di + AYoYh * dj) * s;\n" << "\t\t }\n" << "\t\t}\n" << "\n" diff --git a/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp b/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp index ae322e1..7236a8d 100644 --- a/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp +++ b/Source/EmberCL/FinalAccumOpenCLKernelCreator.cpp @@ -12,22 +12,17 @@ FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator(bool doublePrecisio m_DoublePrecision = doublePrecision; m_GammaCorrectionWithAlphaCalcEntryPoint = "GammaCorrectionWithAlphaCalcKernel"; m_GammaCorrectionWithoutAlphaCalcEntryPoint = "GammaCorrectionWithoutAlphaCalcKernel"; - m_GammaCorrectionWithAlphaCalcKernel = CreateGammaCorrectionKernelString(true); m_GammaCorrectionWithoutAlphaCalcKernel = CreateGammaCorrectionKernelString(false); - m_FinalAccumEarlyClipEntryPoint = "FinalAccumEarlyClipKernel"; m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel"; m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel"; - m_FinalAccumEarlyClipKernel = CreateFinalAccumKernelString(true, false, false); m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true, true, true); m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true, false, true); - m_FinalAccumLateClipEntryPoint = "FinalAccumLateClipKernel"; m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel"; m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel"; - m_FinalAccumLateClipKernel = CreateFinalAccumKernelString(false, false, false); m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, true, true); m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, false, true); @@ -183,15 +178,14 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli { ostringstream os; string channels = alphaAccum ? "4" : "3"; - os << - ConstantDefinesString(m_DoublePrecision) << - UnionCLStructString << - RgbToHsvFunctionString << - HsvToRgbFunctionString << - CalcAlphaFunctionString << - CurveAdjustFunctionString << - SpatialFilterCLStructString; + ConstantDefinesString(m_DoublePrecision) << + UnionCLStructString << + RgbToHsvFunctionString << + HsvToRgbFunctionString << + CalcAlphaFunctionString << + CurveAdjustFunctionString << + SpatialFilterCLStructString; if (earlyClip) { @@ -207,8 +201,8 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli else { os << - CreateCalcNewRgbFunctionString(false) << - CreateGammaCorrectionFunctionString(false, alphaCalc, alphaAccum, true); + CreateCalcNewRgbFunctionString(false) << + CreateGammaCorrectionFunctionString(false, alphaCalc, alphaAccum, true); if (!alphaCalc && !alphaAccum)//Rgb output, the most common case. os << "__kernel void " << m_FinalAccumLateClipEntryPoint << "(\n"; @@ -221,53 +215,53 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli } os << - " const __global real4reals_bucket* accumulator,\n" - " __write_only image2d_t pixels,\n" - " __constant SpatialFilterCL* spatialFilter,\n" - " __constant real_bucket_t* filterCoefs,\n" - " __constant real4reals_bucket* csa,\n" - " const uint doCurves,\n" - " const real_bucket_t alphaBase,\n" - " const real_bucket_t alphaScale\n" - "\t)\n" - "{\n" - "\n" - " if ((GLOBAL_ID_Y >= spatialFilter->m_FinalRasH) || (GLOBAL_ID_X >= spatialFilter->m_FinalRasW))\n" - " return;\n" - "\n" - " uint accumX = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_X * spatialFilter->m_Supersample);\n" - " uint accumY = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_Y * spatialFilter->m_Supersample);\n" - " int2 finalCoord;\n" - " finalCoord.x = GLOBAL_ID_X;\n" - " finalCoord.y = (int)((spatialFilter->m_YAxisUp == 1) ? ((spatialFilter->m_FinalRasH - GLOBAL_ID_Y) - 1) : GLOBAL_ID_Y);\n" - " float4floats finalColor;\n" - " int ii, jj;\n" - " uint filterKRowIndex;\n" - " const __global real4reals_bucket* accumBucket;\n" - " real4reals_bucket newBucket;\n" - " newBucket.m_Real4 = 0;\n" - "\n" - " for (jj = 0; jj < spatialFilter->m_FilterWidth; jj++)\n" - " {\n" - " filterKRowIndex = jj * spatialFilter->m_FilterWidth;\n" - "\n" - " for (ii = 0; ii < spatialFilter->m_FilterWidth; ii++)\n" - " {\n" - " real_bucket_t k = filterCoefs[ii + filterKRowIndex];\n" - "\n" - " accumBucket = accumulator + (accumX + ii) + ((accumY + jj) * spatialFilter->m_SuperRasW);\n" - " newBucket.m_Real4 += (k * accumBucket->m_Real4);\n" - " }\n" - " }\n" - "\n"; + " const __global real4reals_bucket* accumulator,\n" + " __write_only image2d_t pixels,\n" + " __constant SpatialFilterCL* spatialFilter,\n" + " __constant real_bucket_t* filterCoefs,\n" + " __constant real4reals_bucket* csa,\n" + " const uint doCurves,\n" + " const real_bucket_t alphaBase,\n" + " const real_bucket_t alphaScale\n" + "\t)\n" + "{\n" + "\n" + " if ((GLOBAL_ID_Y >= spatialFilter->m_FinalRasH) || (GLOBAL_ID_X >= spatialFilter->m_FinalRasW))\n" + " return;\n" + "\n" + " uint accumX = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_X * spatialFilter->m_Supersample);\n" + " uint accumY = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_Y * spatialFilter->m_Supersample);\n" + " int2 finalCoord;\n" + " finalCoord.x = GLOBAL_ID_X;\n" + " finalCoord.y = (int)((spatialFilter->m_YAxisUp == 1) ? ((spatialFilter->m_FinalRasH - GLOBAL_ID_Y) - 1) : GLOBAL_ID_Y);\n" + " float4floats finalColor;\n" + " int ii, jj;\n" + " uint filterKRowIndex;\n" + " const __global real4reals_bucket* accumBucket;\n" + " real4reals_bucket newBucket;\n" + " newBucket.m_Real4 = 0;\n" + "\n" + " for (jj = 0; jj < spatialFilter->m_FilterWidth; jj++)\n" + " {\n" + " filterKRowIndex = jj * spatialFilter->m_FilterWidth;\n" + "\n" + " for (ii = 0; ii < spatialFilter->m_FilterWidth; ii++)\n" + " {\n" + " real_bucket_t k = filterCoefs[ii + filterKRowIndex];\n" + "\n" + " accumBucket = accumulator + (accumX + ii) + ((accumY + jj) * spatialFilter->m_SuperRasW);\n" + " newBucket.m_Real4 += (k * accumBucket->m_Real4);\n" + " }\n" + " }\n" + "\n"; //Not supporting 2 bytes per channel on the GPU. If the user wants it, run on the CPU. if (earlyClip)//If early clip, simply assign values directly to the temp float4 since they've been gamma corrected already, then write it straight to the output image below. { os << - " finalColor.m_Float4.x = (float)newBucket.m_Real4.x;\n"//CPU side clamps, skip here because write_imagef() does the clamping for us. - " finalColor.m_Float4.y = (float)newBucket.m_Real4.y;\n" - " finalColor.m_Float4.z = (float)newBucket.m_Real4.z;\n"; + " finalColor.m_Float4.x = (float)newBucket.m_Real4.x;\n"//CPU side clamps, skip here because write_imagef() does the clamping for us. + " finalColor.m_Float4.y = (float)newBucket.m_Real4.y;\n" + " finalColor.m_Float4.z = (float)newBucket.m_Real4.z;\n"; if (alphaAccum) { @@ -283,37 +277,36 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli if (m_DoublePrecision) { os << - " real4reals_bucket realFinal;\n" - "\n" - " GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(realFinal.m_Reals[0]));\n" - " finalColor.m_Float4.x = (float)realFinal.m_Real4.x;\n" - " finalColor.m_Float4.y = (float)realFinal.m_Real4.y;\n" - " finalColor.m_Float4.z = (float)realFinal.m_Real4.z;\n" - " finalColor.m_Float4.w = (float)realFinal.m_Real4.w;\n" - ; + " real4reals_bucket realFinal;\n" + "\n" + " GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(realFinal.m_Reals[0]));\n" + " finalColor.m_Float4.x = (float)realFinal.m_Real4.x;\n" + " finalColor.m_Float4.y = (float)realFinal.m_Real4.y;\n" + " finalColor.m_Float4.z = (float)realFinal.m_Real4.z;\n" + " finalColor.m_Float4.w = (float)realFinal.m_Real4.w;\n" + ; } else { os << - " GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(finalColor.m_Floats[0]));\n"; + " GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, alphaBase, alphaScale, &(finalColor.m_Floats[0]));\n"; } } os << - "\n" - " if (doCurves)\n" - " {\n" - " CurveAdjust(csa, &(finalColor.m_Floats[0]), 1);\n" - " CurveAdjust(csa, &(finalColor.m_Floats[1]), 2);\n" - " CurveAdjust(csa, &(finalColor.m_Floats[2]), 3);\n" - " }\n" - "\n" - " finalColor.m_Float4 /= 255.0f;\n" - " write_imagef(pixels, finalCoord, finalColor.m_Float4);\n"//Use write_imagef instead of write_imageui because only the former works when sharing with an OpenGL texture. - " barrier(CLK_GLOBAL_MEM_FENCE);\n"//Required, or else page tearing will occur during interactive rendering. - "}\n" - ; - + "\n" + " if (doCurves)\n" + " {\n" + " CurveAdjust(csa, &(finalColor.m_Floats[0]), 1);\n" + " CurveAdjust(csa, &(finalColor.m_Floats[1]), 2);\n" + " CurveAdjust(csa, &(finalColor.m_Floats[2]), 3);\n" + " }\n" + "\n" + " finalColor.m_Float4 /= 255.0f;\n" + " write_imagef(pixels, finalCoord, finalColor.m_Float4);\n"//Use write_imagef instead of write_imageui because only the former works when sharing with an OpenGL texture. + " barrier(CLK_GLOBAL_MEM_FENCE);\n"//Required, or else page tearing will occur during interactive rendering. + "}\n" + ; return os.str(); } @@ -332,54 +325,52 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool g string dataType; string unionMember; dataType = "real_bucket_t"; - //Use real_t for all cases, early clip and final accum. os << "void GammaCorrectionFloats(" << (globalBucket ? "__global " : "") << "real4reals_bucket* bucket, __constant real_bucket_t* background, real_bucket_t g, real_bucket_t linRange, real_bucket_t vibrancy, real_bucket_t highlightPower, real_bucket_t alphaBase, real_bucket_t alphaScale, " << (finalOut ? "" : "__global") << " real_bucket_t* correctedChannels)\n"; - os - << "{\n" - << " real_bucket_t alpha, ls, tmp, a;\n" - << " real4reals_bucket newRgb;\n" - << "\n" - << " if (bucket->m_Reals[3] <= 0)\n" - << " {\n" - << " alpha = 0;\n" - << " ls = 0;\n" - << " }\n" - << " else\n" - << " {\n" - << " tmp = bucket->m_Reals[3];\n" - << " alpha = CalcAlpha(tmp, g, linRange);\n" - << " ls = vibrancy * 256.0 * alpha / tmp;\n" - << " alpha = clamp(alpha, (real_bucket_t)0.0, (real_bucket_t)1.0);\n" - << " }\n" - << "\n" - << " CalcNewRgb(bucket, ls, highlightPower, &newRgb);\n" - << "\n" - << " for (uint rgbi = 0; rgbi < 3; rgbi++)\n" - << " {\n" - << " a = newRgb.m_Reals[rgbi] + ((1.0 - vibrancy) * 256.0 * pow(bucket->m_Reals[rgbi], g));\n" - << "\n"; + << "{\n" + << " real_bucket_t alpha, ls, tmp, a;\n" + << " real4reals_bucket newRgb;\n" + << "\n" + << " if (bucket->m_Reals[3] <= 0)\n" + << " {\n" + << " alpha = 0;\n" + << " ls = 0;\n" + << " }\n" + << " else\n" + << " {\n" + << " tmp = bucket->m_Reals[3];\n" + << " alpha = CalcAlpha(tmp, g, linRange);\n" + << " ls = vibrancy * 256.0 * alpha / tmp;\n" + << " alpha = clamp(alpha, (real_bucket_t)0.0, (real_bucket_t)1.0);\n" + << " }\n" + << "\n" + << " CalcNewRgb(bucket, ls, highlightPower, &newRgb);\n" + << "\n" + << " for (uint rgbi = 0; rgbi < 3; rgbi++)\n" + << " {\n" + << " a = newRgb.m_Reals[rgbi] + ((1.0 - vibrancy) * 256.0 * pow(fabs(bucket->m_Reals[rgbi]), g));\n" + << "\n"; if (!alphaCalc) { os << - " a += ((1.0 - alpha) * background[rgbi]);\n"; + " a += ((1.0 - alpha) * background[rgbi]);\n"; } else { os - << " if (alpha > 0)\n" - << " a /= alpha;\n" - << " else\n" - << " a = 0;\n"; + << " if (alpha > 0)\n" + << " a /= alpha;\n" + << " else\n" + << " a = 0;\n"; } os << - "\n" - " correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_bucket_t)0.0, (real_bucket_t)255.0);\n" - " }\n" - "\n"; + "\n" + " correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_bucket_t)0.0, (real_bucket_t)255.0);\n" + " }\n" + "\n"; //The CPU code has 3 cases for assigning alpha: //[3] = alpha.//Early clip. @@ -393,13 +384,12 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool g if (alphaAccum) { os - << " correctedChannels[3] = (" << dataType << ")(alphaBase + (alpha * alphaScale));\n"; + << " correctedChannels[3] = (" << dataType << ")(alphaBase + (alpha * alphaScale));\n"; } os << - "}\n" - "\n"; - + "}\n" + "\n"; return os.str(); } @@ -411,63 +401,61 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool g string FinalAccumOpenCLKernelCreator::CreateCalcNewRgbFunctionString(bool globalBucket) { ostringstream os; - os << - "static void CalcNewRgb(" << (globalBucket ? "__global " : "") << "real4reals_bucket* oldRgb, real_bucket_t ls, real_bucket_t highPow, real4reals_bucket* newRgb)\n" - "{\n" - " int rgbi;\n" - " real_bucket_t newls, lsratio;\n" - " real4reals_bucket newHsv;\n" - " real_bucket_t maxa, maxc;\n" - " real_bucket_t adjhlp;\n" - "\n" - " if (ls == 0 || (oldRgb->m_Real4.x == 0 && oldRgb->m_Real4.y == 0 && oldRgb->m_Real4.z == 0))\n"//Can't do a vector compare to zero. - " {\n" - " newRgb->m_Real4 = 0;\n" - " return;\n" - " }\n" - "\n" - //Identify the most saturated channel. - " maxc = max(max(oldRgb->m_Reals[0], oldRgb->m_Reals[1]), oldRgb->m_Reals[2]);\n" - " maxa = ls * maxc;\n" - "\n" - //If a channel is saturated and highlight power is non-negative - //modify the color to prevent hue shift. - " if (maxa > 255 && highPow >= 0)\n" - " {\n" - " newls = 255.0 / maxc;\n" - " lsratio = pow(newls / ls, highPow);\n" - "\n" - //Calculate the max-value color (ranged 0 - 1). - " for (rgbi = 0; rgbi < 3; rgbi++)\n" - " newRgb->m_Reals[rgbi] = newls * oldRgb->m_Reals[rgbi] / 255.0;\n" - "\n" - //Reduce saturation by the lsratio. - " RgbToHsv(&(newRgb->m_Real4), &(newHsv.m_Real4));\n" - " newHsv.m_Real4.y *= lsratio;\n" - " HsvToRgb(&(newHsv.m_Real4), &(newRgb->m_Real4));\n" - "\n" - " for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling and vectorizing makes no difference. - " newRgb->m_Reals[rgbi] *= 255.0;\n" - " }\n" - " else\n" - " {\n" - " newls = 255.0 / maxc;\n" - " adjhlp = -highPow;\n" - "\n" - " if (adjhlp > 1)\n" - " adjhlp = 1;\n" - "\n" - " if (maxa <= 255)\n" - " adjhlp = 1;\n" - "\n" - //Calculate the max-value color (ranged 0 - 1) interpolated with the old behavior. - " for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling, caching and vectorizing makes no difference. - " newRgb->m_Reals[rgbi] = ((1.0 - adjhlp) * newls + adjhlp * ls) * oldRgb->m_Reals[rgbi];\n" - " }\n" - "}\n" - "\n"; - + "static void CalcNewRgb(" << (globalBucket ? "__global " : "") << "real4reals_bucket* oldRgb, real_bucket_t ls, real_bucket_t highPow, real4reals_bucket* newRgb)\n" + "{\n" + " int rgbi;\n" + " real_bucket_t newls, lsratio;\n" + " real4reals_bucket newHsv;\n" + " real_bucket_t maxa, maxc;\n" + " real_bucket_t adjhlp;\n" + "\n" + " if (ls == 0 || (oldRgb->m_Real4.x == 0 && oldRgb->m_Real4.y == 0 && oldRgb->m_Real4.z == 0))\n"//Can't do a vector compare to zero. + " {\n" + " newRgb->m_Real4 = 0;\n" + " return;\n" + " }\n" + "\n" + //Identify the most saturated channel. + " maxc = max(max(oldRgb->m_Reals[0], oldRgb->m_Reals[1]), oldRgb->m_Reals[2]);\n" + " maxa = ls * maxc;\n" + "\n" + //If a channel is saturated and highlight power is non-negative + //modify the color to prevent hue shift. + " if (maxa > 255 && highPow >= 0)\n" + " {\n" + " newls = 255.0 / maxc;\n" + " lsratio = pow(newls / ls, highPow);\n" + "\n" + //Calculate the max-value color (ranged 0 - 1). + " for (rgbi = 0; rgbi < 3; rgbi++)\n" + " newRgb->m_Reals[rgbi] = newls * oldRgb->m_Reals[rgbi] / 255.0;\n" + "\n" + //Reduce saturation by the lsratio. + " RgbToHsv(&(newRgb->m_Real4), &(newHsv.m_Real4));\n" + " newHsv.m_Real4.y *= lsratio;\n" + " HsvToRgb(&(newHsv.m_Real4), &(newRgb->m_Real4));\n" + "\n" + " for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling and vectorizing makes no difference. + " newRgb->m_Reals[rgbi] *= 255.0;\n" + " }\n" + " else\n" + " {\n" + " newls = 255.0 / maxc;\n" + " adjhlp = -highPow;\n" + "\n" + " if (adjhlp > 1)\n" + " adjhlp = 1;\n" + "\n" + " if (maxa <= 255)\n" + " adjhlp = 1;\n" + "\n" + //Calculate the max-value color (ranged 0 - 1) interpolated with the old behavior. + " for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling, caching and vectorizing makes no difference. + " newRgb->m_Reals[rgbi] = ((1.0 - adjhlp) * newls + adjhlp * ls) * oldRgb->m_Reals[rgbi];\n" + " }\n" + "}\n" + "\n"; return os.str(); } @@ -480,34 +468,31 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString(bool alp { ostringstream os; string dataType; - os << - ConstantDefinesString(m_DoublePrecision) << - UnionCLStructString << - RgbToHsvFunctionString << - HsvToRgbFunctionString << - CalcAlphaFunctionString << - CreateCalcNewRgbFunctionString(true) << - SpatialFilterCLStructString << - CreateGammaCorrectionFunctionString(true, alphaCalc, true, false);//Will only be used with float in this case, early clip. Will always alpha accum. - - os << "__kernel void " << (alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint) << "(\n" << - " __global real4reals_bucket* accumulator,\n" - " __constant SpatialFilterCL* spatialFilter\n" - ")\n" - "{\n" - " int testGutter = 0;\n" - "\n" - " if (GLOBAL_ID_Y >= (spatialFilter->m_SuperRasH - testGutter) || GLOBAL_ID_X >= (spatialFilter->m_SuperRasW - testGutter))\n" - " return;\n" - "\n" - " uint superIndex = (GLOBAL_ID_Y * spatialFilter->m_SuperRasW) + GLOBAL_ID_X;\n" - " __global real4reals_bucket* bucket = accumulator + superIndex;\n" - //Pass in an alphaBase and alphaScale of 0, 1 which means to just directly assign the computed alpha value. - " GammaCorrectionFloats(bucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, 0.0, 1.0, &(bucket->m_Reals[0]));\n" - "}\n" - ; - + ConstantDefinesString(m_DoublePrecision) << + UnionCLStructString << + RgbToHsvFunctionString << + HsvToRgbFunctionString << + CalcAlphaFunctionString << + CreateCalcNewRgbFunctionString(true) << + SpatialFilterCLStructString << + CreateGammaCorrectionFunctionString(true, alphaCalc, true, false);//Will only be used with float in this case, early clip. Will always alpha accum. + os << "__kernel void " << (alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint) << "(\n" << + " __global real4reals_bucket* accumulator,\n" + " __constant SpatialFilterCL* spatialFilter\n" + ")\n" + "{\n" + " int testGutter = 0;\n" + "\n" + " if (GLOBAL_ID_Y >= (spatialFilter->m_SuperRasH - testGutter) || GLOBAL_ID_X >= (spatialFilter->m_SuperRasW - testGutter))\n" + " return;\n" + "\n" + " uint superIndex = (GLOBAL_ID_Y * spatialFilter->m_SuperRasW) + GLOBAL_ID_X;\n" + " __global real4reals_bucket* bucket = accumulator + superIndex;\n" + //Pass in an alphaBase and alphaScale of 0, 1 which means to just directly assign the computed alpha value. + " GammaCorrectionFloats(bucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, 0.0, 1.0, &(bucket->m_Reals[0]));\n" + "}\n" + ; return os.str(); } }