mirror of
https://bitbucket.org/mfeemster/fractorium.git
synced 2025-07-01 13:56:06 -04:00
--Bug fixes
-Attempt to fix several OpenCL build errors that were occurring on Mac. --Code changes -Improve the coloring code during final accum by getting rid of the last remnants of unnecessary scaling to 255 from flam3. -Begin work of supporting 16bpc on the GPU.
This commit is contained in:
@ -12,7 +12,7 @@ namespace EmberCLns
|
||||
/// <summary>
|
||||
/// OpenCL equivalent of Palette::RgbToHsv().
|
||||
/// </summary>
|
||||
static const char* RgbToHsvFunctionString =
|
||||
static const char* RgbToHsvFunctionString =
|
||||
//rgb 0 - 1,
|
||||
//h 0 - 6, s 0 - 1, v 0 - 1
|
||||
"static inline void RgbToHsv(real4_bucket* rgb, real4_bucket* hsv)\n"
|
||||
@ -82,9 +82,9 @@ static const char* RgbToHsvFunctionString =
|
||||
/// <summary>
|
||||
/// OpenCL equivalent of Palette::HsvToRgb().
|
||||
/// </summary>
|
||||
static const char* HsvToRgbFunctionString =
|
||||
static const char* HsvToRgbFunctionString =
|
||||
//h 0 - 6, s 0 - 1, v 0 - 1
|
||||
//rgb 0 - 1
|
||||
//rgb 0 - 1
|
||||
"static inline void HsvToRgb(real4_bucket* hsv, real4_bucket* rgb)\n"
|
||||
"{\n"
|
||||
" int j;\n"
|
||||
@ -118,11 +118,11 @@ static const char* HsvToRgbFunctionString =
|
||||
/// <summary>
|
||||
/// OpenCL equivalent of Palette::CalcAlpha().
|
||||
/// </summary>
|
||||
static const char* CalcAlphaFunctionString =
|
||||
static const char* CalcAlphaFunctionString =
|
||||
"static inline real_t CalcAlpha(real_bucket_t density, real_bucket_t gamma, real_bucket_t linrange)\n"//Not the slightest clue what this is doing.//DOC
|
||||
"{\n"
|
||||
" real_bucket_t frac, alpha, funcval = pow(linrange, gamma);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" if (density > 0)\n"
|
||||
" {\n"
|
||||
" if (density < linrange)\n"
|
||||
@ -147,13 +147,13 @@ static const char* CalcAlphaFunctionString =
|
||||
/// during final accumulation, which only takes floats.
|
||||
/// </summary>
|
||||
static const char* CurveAdjustFunctionString =
|
||||
"static inline void CurveAdjust(__constant real4reals_bucket* csa, float* a, uint index)\n"
|
||||
"{\n"
|
||||
" uint tempIndex = (uint)clamp(*a, (float)0.0, (float)COLORMAP_LENGTH_MINUS_1);\n"
|
||||
" uint tempIndex2 = (uint)clamp((float)csa[tempIndex].m_Real4.x, (float)0.0, (float)COLORMAP_LENGTH_MINUS_1);\n"
|
||||
"\n"
|
||||
" *a = (float)round(csa[tempIndex2].m_Reals[index]);\n"
|
||||
"}\n";
|
||||
"static inline void CurveAdjust(__constant real4reals_bucket* csa, float* a, uint index)\n"
|
||||
"{\n"
|
||||
" uint tempIndex = (uint)clamp(*a * (float)COLORMAP_LENGTH_MINUS_1, (float)0.0, (float)COLORMAP_LENGTH_MINUS_1);\n"
|
||||
" uint tempIndex2 = (uint)clamp((float)csa[tempIndex].m_Real4.x * (float)COLORMAP_LENGTH_MINUS_1, (float)0.0, (float)COLORMAP_LENGTH_MINUS_1);\n"
|
||||
"\n"
|
||||
" *a = (float)csa[tempIndex2].m_Reals[index];\n"
|
||||
"}\n";
|
||||
|
||||
/// <summary>
|
||||
/// Use MWC 64 from David Thomas at the Imperial College of London for
|
||||
@ -197,7 +197,7 @@ static const char* RandFunctionString =
|
||||
/// <summary>
|
||||
/// OpenCL equivalent Renderer::AddToAccum().
|
||||
/// </summary>
|
||||
static const char* AddToAccumWithCheckFunctionString =
|
||||
static const char* AddToAccumWithCheckFunctionString =
|
||||
"inline bool AccumCheck(int superRasW, int superRasH, int i, int ii, int j, int jj)\n"
|
||||
"{\n"
|
||||
" return (j + jj >= 0 && j + jj < superRasH && i + ii >= 0 && i + ii < superRasW);\n"
|
||||
@ -207,7 +207,7 @@ static const char* AddToAccumWithCheckFunctionString =
|
||||
/// <summary>
|
||||
/// OpenCL equivalent various CarToRas member functions.
|
||||
/// </summary>
|
||||
static const char* CarToRasFunctionString =
|
||||
static const char* CarToRasFunctionString =
|
||||
"inline void CarToRasConvertPointToSingle(__constant CarToRasCL* carToRas, Point* point, uint* singleBufferIndex)\n"
|
||||
"{\n"
|
||||
" *singleBufferIndex = (uint)(carToRas->m_PixPerImageUnitW * point->m_X - carToRas->m_RasLlX) + (carToRas->m_RasWidth * (uint)(carToRas->m_PixPerImageUnitH * point->m_Y - carToRas->m_RasLlY));\n"
|
||||
@ -225,29 +225,27 @@ static const char* CarToRasFunctionString =
|
||||
static string AtomicString()
|
||||
{
|
||||
ostringstream os;
|
||||
|
||||
os <<
|
||||
"void AtomicAdd(volatile __global real_bucket_t* source, const real_bucket_t operand)\n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
" {\n"
|
||||
" atomi intVal;\n"
|
||||
" real_bucket_t realVal;\n"
|
||||
" } newVal;\n"
|
||||
"\n"
|
||||
" union\n"
|
||||
" {\n"
|
||||
" atomi intVal;\n"
|
||||
" real_bucket_t realVal;\n"
|
||||
" } prevVal;\n"
|
||||
"\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" prevVal.realVal = *source;\n"
|
||||
" newVal.realVal = prevVal.realVal + operand;\n"
|
||||
" } while (atomic_cmpxchg((volatile __global atomi*)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);\n"
|
||||
"}\n";
|
||||
|
||||
"void AtomicAdd(volatile __global real_bucket_t* source, const real_bucket_t operand)\n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
" {\n"
|
||||
" atomi intVal;\n"
|
||||
" real_bucket_t realVal;\n"
|
||||
" } newVal;\n"
|
||||
"\n"
|
||||
" union\n"
|
||||
" {\n"
|
||||
" atomi intVal;\n"
|
||||
" real_bucket_t realVal;\n"
|
||||
" } prevVal;\n"
|
||||
"\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" prevVal.realVal = *source;\n"
|
||||
" newVal.realVal = prevVal.realVal + operand;\n"
|
||||
" } while (atomic_cmpxchg((volatile __global atomi*)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);\n"
|
||||
"}\n";
|
||||
return os.str();
|
||||
}
|
||||
}
|
@ -70,16 +70,21 @@ static string ConstantDefinesString(bool doublePrecision)
|
||||
"#define THREADS_PER_WARP 32u\n"
|
||||
"#define NWARPS (NTHREADS / THREADS_PER_WARP)\n"
|
||||
"#define COLORMAP_LENGTH 256u\n"
|
||||
"#define COLORMAP_LENGTH_MINUS_1 255u\n"
|
||||
"#define COLORMAP_LENGTH_MINUS_1 255\n"
|
||||
"#define DE_THRESH 100u\n"
|
||||
"#define BadVal(x) (((x) != (x)) || ((x) > 1e10) || ((x) < -1e10))\n"
|
||||
"#define SQR(x) ((x) * (x))\n"
|
||||
"#define CUBE(x) ((x) * (x) * (x))\n"
|
||||
"#define M_2PI (M_PI * 2)\n"
|
||||
"#define M_3PI (M_PI * 3)\n"
|
||||
"#define MPI ((real_t)M_PI)\n"
|
||||
"#define MPI2 ((real_t)M_PI_2)\n"
|
||||
"#define MPI4 ((real_t)M_PI_4)\n"
|
||||
"#define M1PI ((real_t)M_1_PI)\n"
|
||||
"#define M2PI ((real_t)M_2_PI)\n"
|
||||
"#define M_2PI (MPI * 2)\n"
|
||||
"#define M_3PI (MPI * 3)\n"
|
||||
"#define SQRT5 2.2360679774997896964091736687313\n"
|
||||
"#define M_PHI 1.61803398874989484820458683436563\n"
|
||||
"#define DEG_2_RAD (M_PI / 180)\n"
|
||||
"#define DEG_2_RAD (MPI / 180)\n"
|
||||
"\n"
|
||||
"//Index in each dimension of a thread within a block.\n"
|
||||
"#define THREAD_ID_X (get_local_id(0))\n"
|
||||
|
@ -83,8 +83,8 @@ const string& FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip
|
||||
|
||||
if (alphaAccum)
|
||||
{
|
||||
alphaBase = transparency ? 0 : 255;//See the table below.
|
||||
alphaScale = transparency ? 255 : 0;
|
||||
alphaBase = transparency ? 0 : 1;//See the table below.
|
||||
alphaScale = transparency ? 1 : 0;
|
||||
}
|
||||
|
||||
if (earlyClip)
|
||||
@ -260,9 +260,9 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli
|
||||
if (alphaAccum)
|
||||
{
|
||||
if (alphaCalc)
|
||||
os << " finalColor.m_Float4.w = (float)newBucket.m_Real4.w * 255.0f;\n";
|
||||
os << " finalColor.m_Float4.w = (float)newBucket.m_Real4.w;\n";
|
||||
else
|
||||
os << " finalColor.m_Float4.w = 255.0f;\n";
|
||||
os << " finalColor.m_Float4.w = 1.0f;\n";
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -296,7 +296,6 @@ string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyCli
|
||||
" CurveAdjust(csa, &(finalColor.m_Floats[2]), 3);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" finalColor.m_Float4 /= 255.0f;\n"
|
||||
" write_imagef(pixels, finalCoord, finalColor.m_Float4);\n"//Use write_imagef instead of write_imageui because only the former works when sharing with an OpenGL texture.
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"//Required, or else page tearing will occur during interactive rendering.
|
||||
"}\n"
|
||||
@ -335,7 +334,7 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool g
|
||||
<< " {\n"
|
||||
<< " tmp = bucket->m_Reals[3];\n"
|
||||
<< " alpha = CalcAlpha(tmp, g, linRange);\n"
|
||||
<< " ls = vibrancy * 256.0 * alpha / tmp;\n"
|
||||
<< " ls = vibrancy * alpha / tmp;\n"
|
||||
<< " alpha = clamp(alpha, (real_bucket_t)0.0, (real_bucket_t)1.0);\n"
|
||||
<< " }\n"
|
||||
<< "\n"
|
||||
@ -343,7 +342,7 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool g
|
||||
<< "\n"
|
||||
<< " for (uint rgbi = 0; rgbi < 3; rgbi++)\n"
|
||||
<< " {\n"
|
||||
<< " a = newRgb.m_Reals[rgbi] + ((1.0 - vibrancy) * 256.0 * pow(fabs(bucket->m_Reals[rgbi]), g));\n"
|
||||
<< " a = newRgb.m_Reals[rgbi] + ((1.0 - vibrancy) * pow(fabs(bucket->m_Reals[rgbi]), g));\n"
|
||||
<< "\n";
|
||||
|
||||
if (!alphaCalc)
|
||||
@ -362,7 +361,7 @@ string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool g
|
||||
|
||||
os <<
|
||||
"\n"
|
||||
" correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_bucket_t)0.0, (real_bucket_t)255.0);\n"
|
||||
" correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_bucket_t)0.0, (real_bucket_t)1.0);\n"
|
||||
" }\n"
|
||||
"\n";
|
||||
|
||||
@ -399,9 +398,9 @@ string FinalAccumOpenCLKernelCreator::CreateCalcNewRgbFunctionString(bool global
|
||||
"static void CalcNewRgb(" << (globalBucket ? "__global " : "") << "real4reals_bucket* oldRgb, real_bucket_t ls, real_bucket_t highPow, real4reals_bucket* newRgb)\n"
|
||||
"{\n"
|
||||
" int rgbi;\n"
|
||||
" real_bucket_t newls, lsratio;\n"
|
||||
" real_bucket_t lsratio;\n"
|
||||
" real4reals_bucket newHsv;\n"
|
||||
" real_bucket_t maxa, maxc;\n"
|
||||
" real_bucket_t maxa, maxc, newls;\n"
|
||||
" real_bucket_t adjhlp;\n"
|
||||
"\n"
|
||||
" if (ls == 0 || (oldRgb->m_Real4.x == 0 && oldRgb->m_Real4.y == 0 && oldRgb->m_Real4.z == 0))\n"//Can't do a vector compare to zero.
|
||||
@ -413,35 +412,31 @@ string FinalAccumOpenCLKernelCreator::CreateCalcNewRgbFunctionString(bool global
|
||||
//Identify the most saturated channel.
|
||||
" maxc = max(max(oldRgb->m_Reals[0], oldRgb->m_Reals[1]), oldRgb->m_Reals[2]);\n"
|
||||
" maxa = ls * maxc;\n"
|
||||
" newls = 1 / maxc;\n"
|
||||
"\n"
|
||||
//If a channel is saturated and highlight power is non-negative
|
||||
//modify the color to prevent hue shift.
|
||||
" if (maxa > 255 && highPow >= 0)\n"
|
||||
" if (maxa > 1 && highPow >= 0)\n"
|
||||
" {\n"
|
||||
" newls = 255.0 / maxc;\n"
|
||||
" lsratio = pow(newls / ls, highPow);\n"
|
||||
"\n"
|
||||
//Calculate the max-value color (ranged 0 - 1).
|
||||
" for (rgbi = 0; rgbi < 3; rgbi++)\n"
|
||||
" newRgb->m_Reals[rgbi] = newls * oldRgb->m_Reals[rgbi] / 255.0;\n"
|
||||
" newRgb->m_Reals[rgbi] = newls * oldRgb->m_Reals[rgbi];\n"
|
||||
"\n"
|
||||
//Reduce saturation by the lsratio.
|
||||
" RgbToHsv(&(newRgb->m_Real4), &(newHsv.m_Real4));\n"
|
||||
" newHsv.m_Real4.y *= lsratio;\n"
|
||||
" HsvToRgb(&(newHsv.m_Real4), &(newRgb->m_Real4));\n"
|
||||
"\n"
|
||||
" for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling and vectorizing makes no difference.
|
||||
" newRgb->m_Reals[rgbi] *= 255.0;\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" newls = 255.0 / maxc;\n"
|
||||
" adjhlp = -highPow;\n"
|
||||
"\n"
|
||||
" if (adjhlp > 1)\n"
|
||||
" adjhlp = 1;\n"
|
||||
"\n"
|
||||
" if (maxa <= 255)\n"
|
||||
" if (maxa <= 1)\n"
|
||||
" adjhlp = 1;\n"
|
||||
"\n"
|
||||
//Calculate the max-value color (ranged 0 - 1) interpolated with the old behavior.
|
||||
|
@ -669,7 +669,7 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly)
|
||||
|
||||
if (b && !(b = wrapper.AddBuffer(m_SpatialFilterParamsBufferName, sizeof(m_SpatialFilterCL)))) { AddToReport(loc); }
|
||||
|
||||
if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa.m_Entries)))) { AddToReport(loc); }
|
||||
if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa)))) { AddToReport(loc); }
|
||||
|
||||
if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, size))) { AddToReport(loc); }//Accum buffer.
|
||||
|
||||
@ -877,6 +877,17 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
|
||||
return stats;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Override which just passes false to the base.
|
||||
/// This is because curves are scaled from 0-1 to 0-255 or 0-65535 on the CPU, but need to be kept as 0-1 for OpenCL because the texture expects normalized values.
|
||||
/// </summary>
|
||||
/// <param name="scale">Ignored</param>
|
||||
template <typename T, typename bucketT>
|
||||
void RendererCL<T, bucketT>::ComputeCurves(bool scale)
|
||||
{
|
||||
Renderer<T, bucketT>::ComputeCurves(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Private functions for making and running OpenCL programs.
|
||||
/// </summary>
|
||||
@ -1312,7 +1323,7 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
|
||||
|
||||
if (b && !(b = wrapper.AddAndWriteBuffer(m_SpatialFilterParamsBufferName, reinterpret_cast<void*>(&m_SpatialFilterCL), sizeof(m_SpatialFilterCL)))) { AddToReport(loc); }
|
||||
|
||||
if (b && !(b = wrapper.AddAndWriteBuffer(m_CurvesCsaName, m_Csa.m_Entries.data(), SizeOf(m_Csa.m_Entries)))) { AddToReport(loc); }
|
||||
if (b && !(b = wrapper.AddAndWriteBuffer(m_CurvesCsaName, m_Csa.data(), SizeOf(m_Csa)))) { AddToReport(loc); }
|
||||
|
||||
//Since early clip requires gamma correcting the entire accumulator first,
|
||||
//it can't be done inside of the normal final accumulation kernel, so
|
||||
|
@ -168,6 +168,7 @@ protected:
|
||||
virtual eRenderStatus GaussianDensityFilter() override;
|
||||
virtual eRenderStatus AccumulatorToFinalImage(byte* pixels, size_t finalOffset) override;
|
||||
virtual EmberStats Iterate(size_t iterCount, size_t temporalSample) override;
|
||||
virtual void ComputeCurves(bool scale) override;
|
||||
|
||||
#ifndef TEST_CL
|
||||
private:
|
||||
|
Reference in New Issue
Block a user