mirror of
https://bitbucket.org/mfeemster/fractorium.git
synced 2025-07-02 14:26:17 -04:00
Numerous fixes
0.4.0.5 Beta 07/18/2014 --User Changes Allow for vibrancy values > 1. Add flatten and unflatten menu items. Automatically flatten like Apophysis does. Add plugin and new_linear tags to Xml to be compatible with Apophysis. --Bug Fixes Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho, julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x, sinusoidal, spherical, spherical3d, stripes. Unique filename on final render was completely broken. Two severe OpenCL bugs. Random seeds were biased and fusing was being reset too often leading to results that differ from the CPU. Subtle, but sometimes severe bug in the setup of the xaos weights. Use properly defined epsilon by getting the value from std::numeric_limits, rather than hard coding 1e-6 or 1e-10. Omit incorrect usage of epsilon everywhere. It should not be automatically added to denominators. Rather, it should only be used if the denominator is zero. Force final render progress bars to 100 on completion. Sometimes they didn't seem to make it there. Make variation name and params comparisons be case insensitive. --Code Changes Make ForEach and FindIf wrappers around std::for_each and std::find_if.
This commit is contained in:
@ -168,8 +168,8 @@ static const char* RandFunctionString =
|
||||
"\n"
|
||||
"inline real_t MwcNextNeg1Pos1(uint2* s)\n"
|
||||
"{\n"
|
||||
" real_t f = (real_t)MwcNext(s) / UINT_MAX;\n"
|
||||
" return -1.0 + (f * (1.0 - (-1.0)));\n"
|
||||
" real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX;\n"
|
||||
" return -1.0 + (f * 2.0);\n"
|
||||
"}\n"
|
||||
"\n";
|
||||
|
||||
@ -266,7 +266,7 @@ static const char* InlineMathFunctionsString =
|
||||
"\n"
|
||||
"inline real_t Zeps(real_t x)\n"
|
||||
"{\n"
|
||||
" return x == 0.0 ? EPS6 : x;\n"
|
||||
" return x == 0.0 ? EPS : x;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"inline real_t Lerp(real_t a, real_t b, real_t p)\n"
|
||||
|
@ -36,4 +36,5 @@
|
||||
|
||||
using namespace std;
|
||||
using namespace EmberNs;
|
||||
//#define TEST_CL 1
|
||||
//#define TEST_CL 1
|
||||
//#define TEST_CL_BUFFERS 1
|
@ -35,21 +35,24 @@ static string ConstantDefinesString(bool doublePrecision)
|
||||
<< "typedef long intPrec;\n"
|
||||
<< "typedef ulong atomi;\n"
|
||||
<< "typedef double real_t;\n"
|
||||
<< "typedef double4 real4;\n";
|
||||
<< "typedef double4 real4;\n"
|
||||
<< "#define EPS (DBL_EPSILON)\n"
|
||||
;
|
||||
}
|
||||
else
|
||||
{
|
||||
os << "typedef int intPrec;\n"
|
||||
"typedef unsigned int atomi;\n"
|
||||
"typedef float real_t;\n"
|
||||
"typedef float4 real4;\n";
|
||||
"typedef float4 real4;\n"
|
||||
"#define EPS (FLT_EPSILON)\n"
|
||||
;
|
||||
}
|
||||
|
||||
os <<
|
||||
"typedef long int int64;\n"
|
||||
"typedef unsigned long int uint64;\n"
|
||||
"\n"
|
||||
"#define EPS ((1e-10))\n"//May need to change this, it might not be enough in some cases. Maybe try 1e-9 if things look funny when close to zero.
|
||||
"#define EPS6 ((1e-6))\n"
|
||||
"\n"
|
||||
"//The number of threads per block used in the iteration function. Don't change\n"
|
||||
@ -122,7 +125,7 @@ struct ALIGN PointCL
|
||||
T m_Y;
|
||||
T m_Z;
|
||||
T m_ColorX;
|
||||
T m_LastXfUsed;
|
||||
unsigned int m_LastXfUsed;
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
@ -188,7 +191,6 @@ static const char* XformCLStructString =
|
||||
template <typename T>
|
||||
struct ALIGN EmberCL
|
||||
{
|
||||
unsigned int m_FinalXformIndex;
|
||||
XformCL<T> m_Xforms[MAX_CL_XFORM];
|
||||
T m_CamZPos;
|
||||
T m_CamPerspective;
|
||||
@ -207,7 +209,6 @@ struct ALIGN EmberCL
|
||||
static const char* EmberCLStructString =
|
||||
"typedef struct __attribute__ " ALIGN_CL " _EmberCL\n"
|
||||
"{\n"
|
||||
" uint m_FinalXformIndex;\n"
|
||||
" XformCL m_Xforms[" MAX_CL_XFORM_STRING "];\n"
|
||||
" real_t m_CamZPos;\n"
|
||||
" real_t m_CamPerspective;\n"
|
||||
|
@ -52,7 +52,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
|
||||
|
||||
xformFuncs << "\n" << parVarDefines << endl;
|
||||
ember.GetPresentVariations(variations);
|
||||
std::for_each(variations.begin(), variations.end(), [&](Variation<T>* var) { if (var) xformFuncs << var->OpenCLFuncsString(); });
|
||||
ForEach(variations, [&](Variation<T>* var) { if (var) xformFuncs << var->OpenCLFuncsString(); });
|
||||
|
||||
for (i = 0; i < totalXformCount; i++)
|
||||
{
|
||||
@ -99,7 +99,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
|
||||
if (needPrecalcAtanYX)
|
||||
xformFuncs << "\treal_t precalcAtanyx;\n";
|
||||
|
||||
xformFuncs << "\treal_t tempColor = outPoint->m_ColorX = xform->m_ColorSpeedCache + (xform->m_OneMinusColorCache * inPoint->m_ColorX);\n";
|
||||
xformFuncs << "\treal_t tempColor = outPoint->m_ColorX = xform->m_ColorSpeedCache + (xform->m_OneMinusColorCache * inPoint->m_ColorX);\n\n";
|
||||
|
||||
if (xform->PreVariationCount() + xform->VariationCount() == 0)
|
||||
{
|
||||
@ -145,8 +145,8 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
|
||||
|
||||
if (xform->NeedPrecalcAngles())
|
||||
{
|
||||
xformFuncs << "\tprecalcSina = transX / precalcSqrtSumSquares;\n";
|
||||
xformFuncs << "\tprecalcCosa = transY / precalcSqrtSumSquares;\n";
|
||||
xformFuncs << "\tprecalcSina = transX / Zeps(precalcSqrtSumSquares);\n";
|
||||
xformFuncs << "\tprecalcCosa = transY / Zeps(precalcSqrtSumSquares);\n";
|
||||
}
|
||||
|
||||
if (xform->NeedPrecalcAtanXY())
|
||||
@ -268,9 +268,9 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
|
||||
" __local Point swap[NTHREADS];\n"
|
||||
" __local uint xfsel[NWARPS];\n"
|
||||
"\n"
|
||||
" unsigned int pointsIndex = INDEX_IN_GRID_2D;\n"
|
||||
" mwc.x = (pointsIndex + 1 * seed) & 0x7FFFFFFF;\n"
|
||||
" mwc.y = ((BLOCK_ID_X + 1) + (pointsIndex + 1) * seed) & 0x7FFFFFFF;\n"
|
||||
" uint pointsIndex = INDEX_IN_GRID_2D;\n"
|
||||
" mwc.x = (pointsIndex + 1 * seed);\n"
|
||||
" mwc.y = ((BLOCK_ID_X + 1) * (pointsIndex + 1) * seed);\n"
|
||||
" iPaletteCoord.y = 0;\n"
|
||||
"\n"
|
||||
" if (fuseCount > 0)\n"
|
||||
@ -308,14 +308,18 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
|
||||
"\n"
|
||||
" do\n"
|
||||
" {\n";
|
||||
|
||||
//If xaos is present, the cuburn method is effectively ceased. Every thread will be picking a random xform.
|
||||
if (ember.XaosPresent())
|
||||
{
|
||||
os <<
|
||||
" secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y] + (" << CHOOSE_XFORM_GRAIN << " * (firstPoint.m_LastXfUsed + 1u))];\n\n";
|
||||
" secondPoint.m_LastXfUsed = xformDistributions[MwcNext(&mwc) % " << CHOOSE_XFORM_GRAIN << " + (" << CHOOSE_XFORM_GRAIN << " * (firstPoint.m_LastXfUsed + 1u))];\n\n";
|
||||
//" secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y] + (" << CHOOSE_XFORM_GRAIN << " * (firstPoint.m_LastXfUsed + 1u))];\n\n";//Partial cuburn hybrid.
|
||||
}
|
||||
else
|
||||
{
|
||||
os <<
|
||||
//" secondPoint.m_LastXfUsed = xformDistributions[MwcNext(&mwc) % " << CHOOSE_XFORM_GRAIN << "];\n\n";//For testing, using straight rand flam4/fractron style instead of cuburn.
|
||||
" secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y]];\n\n";
|
||||
}
|
||||
|
||||
@ -391,12 +395,15 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
|
||||
|
||||
if (ember.UseFinalXform())
|
||||
{
|
||||
unsigned int finalIndex = ember.TotalXformCount() - 1;
|
||||
|
||||
//CPU takes an extra step here to preserve the opacity of the randomly selected xform, rather than the final xform's opacity.
|
||||
//The same thing takes place here automatically because secondPoint.m_LastXfUsed is used below to retrieve the opacity when accumulating.
|
||||
os <<
|
||||
" if ((ember->m_Xforms[ember->m_FinalXformIndex].m_Opacity == 1) || (MwcNext01(&mwc) < ember->m_Xforms[ember->m_FinalXformIndex].m_Opacity))\n"
|
||||
" if ((ember->m_Xforms[" << finalIndex << "].m_Opacity == 1) || (MwcNext01(&mwc) < ember->m_Xforms[" << finalIndex << "].m_Opacity))\n"
|
||||
" {\n"
|
||||
" Xform" << (ember.TotalXformCount() - 1) << "(&(ember->m_Xforms[ember->m_FinalXformIndex]), parVars, &secondPoint, &tempPoint, &mwc);\n"
|
||||
" tempPoint.m_LastXfUsed = secondPoint.m_LastXfUsed;\n"
|
||||
" Xform" << finalIndex << "(&(ember->m_Xforms[" << finalIndex << "]), parVars, &secondPoint, &tempPoint, &mwc);\n"
|
||||
" secondPoint = tempPoint;\n"
|
||||
" }\n"
|
||||
"\n";
|
||||
@ -511,7 +518,14 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
|
||||
//At this point, iterating for this round is done, so write the final points back out
|
||||
//to the global points buffer to be used as inputs for the next round. This preserves point trajectory
|
||||
//between kernel calls.
|
||||
#ifdef TEST_CL_BUFFERS//Use this to populate with test values and read back in EmberTester.
|
||||
" points[pointsIndex].m_X = MwcNextNeg1Pos1(&mwc);\n"
|
||||
" points[pointsIndex].m_Y = MwcNextNeg1Pos1(&mwc);\n"
|
||||
" points[pointsIndex].m_Z = MwcNextNeg1Pos1(&mwc);\n"
|
||||
" points[pointsIndex].m_ColorX = MwcNextNeg1Pos1(&mwc);\n"
|
||||
#else
|
||||
" points[pointsIndex] = firstPoint;\n"
|
||||
#endif
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"}\n";
|
||||
|
||||
@ -562,7 +576,6 @@ void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string
|
||||
{
|
||||
unsigned int i, j, k, size = 0, xformCount = ember.TotalXformCount();
|
||||
Xform<T>* xform;
|
||||
ParametricVariation<T>* parVar;
|
||||
ostringstream os;
|
||||
|
||||
if (doVals)
|
||||
@ -576,7 +589,7 @@ void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string
|
||||
|
||||
for (j = 0; j < varCount; j++)
|
||||
{
|
||||
if (parVar = dynamic_cast<ParametricVariation<T>*>(xform->GetVariation(j)))
|
||||
if (ParametricVariation<T>* parVar = dynamic_cast<ParametricVariation<T>*>(xform->GetVariation(j)))
|
||||
{
|
||||
for (k = 0; k < parVar->ParamCount(); k++)
|
||||
{
|
||||
@ -711,7 +724,7 @@ string IterOpenCLKernelCreator<T>::CreateProjectionString(Ember<T>& ember)
|
||||
"\n"
|
||||
" z = ember->m_C02 * secondPoint.m_X + ember->m_C12 * secondPoint.m_Y + ember->m_C22 * z;\n"
|
||||
"\n"
|
||||
" real_t zr = 1 - ember->m_CamPerspective * z;\n"
|
||||
" real_t zr = Zeps(1 - ember->m_CamPerspective * z);\n"
|
||||
" real_t dr = MwcNext01(&mwc) * ember->m_BlurCoef * z;\n"
|
||||
"\n"
|
||||
" dsin = sin(t);\n"
|
||||
@ -731,7 +744,7 @@ string IterOpenCLKernelCreator<T>::CreateProjectionString(Ember<T>& ember)
|
||||
" z = secondPoint.m_Z - ember->m_CamZPos;\n"
|
||||
" y = ember->m_C11 * secondPoint.m_Y + ember->m_C21 * z;\n"
|
||||
" z = ember->m_C12 * secondPoint.m_Y + ember->m_C22 * z;\n"
|
||||
" zr = 1 - ember->m_CamPerspective * z;\n"
|
||||
" zr = Zeps(1 - ember->m_CamPerspective * z);\n"
|
||||
"\n"
|
||||
" dsin = sin(t);\n"
|
||||
" dcos = cos(t);\n"
|
||||
@ -751,7 +764,7 @@ string IterOpenCLKernelCreator<T>::CreateProjectionString(Ember<T>& ember)
|
||||
" real_t z = secondPoint.m_Z - ember->m_CamZPos;\n"
|
||||
" real_t x = ember->m_C00 * secondPoint.m_X + ember->m_C10 * secondPoint.m_Y;\n"
|
||||
" real_t y = ember->m_C01 * secondPoint.m_X + ember->m_C11 * secondPoint.m_Y + ember->m_C21 * z;\n"
|
||||
" real_t zr = 1 - ember->m_CamPerspective * (ember->m_C02 * secondPoint.m_X + ember->m_C12 * secondPoint.m_Y + ember->m_C22 * z);\n"
|
||||
" real_t zr = Zeps(1 - ember->m_CamPerspective * (ember->m_C02 * secondPoint.m_X + ember->m_C12 * secondPoint.m_Y + ember->m_C22 * z));\n"
|
||||
"\n"
|
||||
" secondPoint.m_X = x / zr;\n"
|
||||
" secondPoint.m_Y = y / zr;\n"
|
||||
@ -762,7 +775,7 @@ string IterOpenCLKernelCreator<T>::CreateProjectionString(Ember<T>& ember)
|
||||
os <<
|
||||
" real_t z = secondPoint.m_Z - ember->m_CamZPos;\n"
|
||||
" real_t y = ember->m_C11 * secondPoint.m_Y + ember->m_C21 * z;\n"
|
||||
" real_t zr = 1 - ember->m_CamPerspective * (ember->m_C12 * secondPoint.m_Y + ember->m_C22 * z);\n"
|
||||
" real_t zr = Zeps(1 - ember->m_CamPerspective * (ember->m_C12 * secondPoint.m_Y + ember->m_C22 * z));\n"
|
||||
"\n"
|
||||
" secondPoint.m_X /= zr;\n"
|
||||
" secondPoint.m_Y = y / zr;\n"
|
||||
@ -772,7 +785,7 @@ string IterOpenCLKernelCreator<T>::CreateProjectionString(Ember<T>& ember)
|
||||
else
|
||||
{
|
||||
os <<
|
||||
" real_t zr = 1 - ember->m_CamPerspective * (secondPoint.m_Z - ember->m_CamZPos);\n"
|
||||
" real_t zr = Zeps(1 - ember->m_CamPerspective * (secondPoint.m_Z - ember->m_CamZPos));\n"
|
||||
"\n"
|
||||
" secondPoint.m_X /= zr;\n"
|
||||
" secondPoint.m_Y /= zr;\n"
|
||||
|
@ -1262,6 +1262,8 @@ bool OpenCLWrapper::CreateSPK(std::string& name, std::string& program, std::stri
|
||||
err = spk.m_Program.build(m_DeviceVec, "-cl-mad-enable");//Tinker with other options later.
|
||||
else
|
||||
err = spk.m_Program.build(m_DeviceVec, "-cl-mad-enable -cl-no-signed-zeros -cl-single-precision-constant");
|
||||
//err = spk.m_Program.build(m_DeviceVec, "-cl-single-precision-constant");
|
||||
//err = spk.m_Program.build(m_DeviceVec, "-cl-mad-enable -cl-single-precision-constant");
|
||||
//err = spk.m_Program.build(m_DeviceVec, "-cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math -cl-single-precision-constant");//This can cause some rounding.
|
||||
//err = spk.m_Program.build(m_DeviceVec, "-cl-mad-enable -cl-single-precision-constant");
|
||||
|
||||
|
@ -40,6 +40,7 @@ RendererCL<T>::RendererCL(unsigned int platform, unsigned int device, bool share
|
||||
//based on the cuburn model of each kernel launch containing
|
||||
//256 threads. 32 wide by 8 high. Everything done in the OpenCL
|
||||
//iteraion kernel depends on these dimensions.
|
||||
m_IterCountPerKernel = 256;
|
||||
m_IterBlockWidth = 32;
|
||||
m_IterBlockHeight = 8;
|
||||
m_IterBlocksWide = 64;
|
||||
@ -125,6 +126,7 @@ bool RendererCL<T>::Init(unsigned int platform, unsigned int device, bool shared
|
||||
/// OpenCL property accessors, getters only.
|
||||
/// </summary>
|
||||
|
||||
template <typename T> unsigned int RendererCL<T>::IterCountPerKernel() { return m_IterCountPerKernel; }
|
||||
template <typename T> unsigned int RendererCL<T>::IterBlocksWide() { return m_IterBlocksWide; }
|
||||
template <typename T> unsigned int RendererCL<T>::IterBlocksHigh() { return m_IterBlocksHigh; }
|
||||
template <typename T> unsigned int RendererCL<T>::IterBlockWidth() { return m_IterBlockWidth; }
|
||||
@ -322,7 +324,7 @@ void RendererCL<T>::ClearErrorReport()
|
||||
template <typename T>
|
||||
unsigned int RendererCL<T>::SubBatchSize() const
|
||||
{
|
||||
return m_IterBlocksWide * m_IterBlocksHigh * 256 * 256;
|
||||
return m_IterBlocksWide * m_IterBlocksHigh * SQR(m_IterCountPerKernel);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -614,7 +616,7 @@ EmberStats RendererCL<T>::Iterate(unsigned __int64 iterCount, unsigned int pass,
|
||||
|
||||
if (b)
|
||||
{
|
||||
if (m_ProcessState == ITER_STARTED)
|
||||
if (m_Stats.m_Iters == 0)//Only reset the call count on the beginning of a new render. Do not reset on KEEP_ITERATING.
|
||||
m_Calls = 0;
|
||||
|
||||
b = RunIter(iterCount, pass, temporalSample, stats.m_Iters);
|
||||
@ -684,9 +686,10 @@ bool RendererCL<T>::RunIter(unsigned __int64 iterCount, unsigned int pass, unsig
|
||||
Timing t;//, t2(4);
|
||||
bool b = false;
|
||||
unsigned int fuse, argIndex;
|
||||
unsigned int iterCountPerKernel = 256;
|
||||
unsigned int iterCountPerKernel = m_IterCountPerKernel;
|
||||
unsigned int iterCountPerBlock = iterCountPerKernel * m_IterBlockWidth * m_IterBlockHeight;
|
||||
unsigned int seed;
|
||||
unsigned int fuseFreq = m_SubBatchSize / m_IterCountPerKernel;
|
||||
unsigned __int64 itersRemaining, localIterCount = 0;
|
||||
int kernelIndex = m_Wrapper.FindKernelIndex(m_IterOpenCLKernelCreator.IterEntryPoint());
|
||||
double percent, etaMs;
|
||||
@ -707,7 +710,7 @@ bool RendererCL<T>::RunIter(unsigned __int64 iterCount, unsigned int pass, unsig
|
||||
if (!m_Wrapper.AddAndWriteBuffer(m_DistBufferName, (void*)XformDistributions(), XformDistributionsSize())) { m_ErrorReport.push_back(loc); return false; }//Will be resized for xaos.
|
||||
if (!m_Wrapper.WriteBuffer (m_CarToRasBufferName, (void*)&m_CarToRasCL, sizeof(m_CarToRasCL))) { m_ErrorReport.push_back(loc); return false; }
|
||||
|
||||
if (!m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, 256, 1, 0, m_Dmap.m_Entries.data())) { m_ErrorReport.push_back(loc); return false; }
|
||||
if (!m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_Dmap.m_Entries.size(), 1, 0, m_Dmap.m_Entries.data())) { m_ErrorReport.push_back(loc); return false; }
|
||||
|
||||
//If animating, treat each temporal sample as a newly started render for fusing purposes.
|
||||
if (temporalSample > 0)
|
||||
@ -718,11 +721,12 @@ bool RendererCL<T>::RunIter(unsigned __int64 iterCount, unsigned int pass, unsig
|
||||
argIndex = 0;
|
||||
seed = m_Rand[0].Rand();
|
||||
#ifdef TEST_CL
|
||||
fuse = false;
|
||||
fuse = 0;
|
||||
#else
|
||||
fuse = ((m_Calls % 4) == 0 ? 100 : 0);
|
||||
#endif
|
||||
//fuse = 100;
|
||||
fuse = ((m_Calls % fuseFreq) == 0 ? (EarlyClip() ? 100u : 15u) : 0u);
|
||||
//fuse = ((m_Calls % 4) == 0 ? 100u : 0u);
|
||||
#endif
|
||||
itersRemaining = iterCount - itersRan;
|
||||
unsigned int gridW = (unsigned int)min(ceil((double)itersRemaining / (double)iterCountPerBlock), (double)IterBlocksWide());
|
||||
unsigned int gridH = (unsigned int)min(ceil((double)itersRemaining / ((double)gridW * iterCountPerBlock)), (double)IterBlocksHigh());
|
||||
@ -736,17 +740,17 @@ bool RendererCL<T>::RunIter(unsigned __int64 iterCount, unsigned int pass, unsig
|
||||
iterCountThisLaunch = iterCountPerKernel * (gridW * gridH * m_IterBlockWidth * m_IterBlockHeight);
|
||||
}
|
||||
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex, iterCountPerKernel)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Number of iters for each thread to run.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex, fuse)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Number of iters to fuse.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex, seed)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Seed.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_EmberBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Flame.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_ParVarsBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Parametric variation parameters.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_DistBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Xform distributions.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_CarToRasBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Coordinate converter.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_HistBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Histogram.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex, SuperSize())) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Histogram size.
|
||||
if (!m_Wrapper.SetImageArg (kernelIndex, argIndex, false, "Palette")) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Palette.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_PointsBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Random start points.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, iterCountPerKernel)) { m_ErrorReport.push_back(loc); return false; }//Number of iters for each thread to run.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, fuse)) { m_ErrorReport.push_back(loc); return false; }//Number of iters to fuse.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, seed)) { m_ErrorReport.push_back(loc); return false; }//Seed.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_EmberBufferName)) { m_ErrorReport.push_back(loc); return false; }//Flame.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_ParVarsBufferName)) { m_ErrorReport.push_back(loc); return false; }//Parametric variation parameters.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_DistBufferName)) { m_ErrorReport.push_back(loc); return false; }//Xform distributions.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_CarToRasBufferName)) { m_ErrorReport.push_back(loc); return false; }//Coordinate converter.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName)) { m_ErrorReport.push_back(loc); return false; }//Histogram.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, SuperSize())) { m_ErrorReport.push_back(loc); return false; }//Histogram size.
|
||||
if (!m_Wrapper.SetImageArg (kernelIndex, argIndex++, false, "Palette")) { m_ErrorReport.push_back(loc); return false; }//Palette.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_PointsBufferName)) { m_ErrorReport.push_back(loc); return false; }//Random start points.
|
||||
|
||||
if (!m_Wrapper.RunKernel(kernelIndex,
|
||||
gridW * IterBlockWidth(),//Total grid dims.
|
||||
@ -837,9 +841,9 @@ eRenderStatus RendererCL<T>::RunLogScaleFilter()
|
||||
|
||||
if (!m_Wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, (void*)&m_DensityFilterCL, sizeof(m_DensityFilterCL))) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
|
||||
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_HistBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//Histogram.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_AccumBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//Accumulator.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_DEFilterParamsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//DensityFilterCL.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Histogram.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Accumulator.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_DEFilterParamsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//DensityFilterCL.
|
||||
|
||||
//t.Tic();
|
||||
if (!m_Wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
|
||||
@ -981,8 +985,8 @@ eRenderStatus RendererCL<T>::RunFinalAccum()
|
||||
gridH = m_SpatialFilterCL.m_SuperRasH;
|
||||
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
|
||||
|
||||
if (!m_Wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex, m_AccumBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//Accumulator.
|
||||
if (!m_Wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex, m_SpatialFilterParamsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//SpatialFilterCL.
|
||||
if (!m_Wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_AccumBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Accumulator.
|
||||
if (!m_Wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_SpatialFilterParamsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//SpatialFilterCL.
|
||||
|
||||
if (!m_Wrapper.RunKernel(gammaCorrectKernelIndex, gridW, gridH, 1, blockW, blockH, 1)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
|
||||
}
|
||||
@ -1000,12 +1004,12 @@ eRenderStatus RendererCL<T>::RunFinalAccum()
|
||||
gridH = m_SpatialFilterCL.m_FinalRasH;
|
||||
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
|
||||
|
||||
if (!m_Wrapper.SetBufferArg(accumKernelIndex, argIndex, m_AccumBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//Accumulator.
|
||||
if (!m_Wrapper.SetImageArg(accumKernelIndex, argIndex, m_Wrapper.Shared(), m_FinalImageName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//Final image.
|
||||
if (!m_Wrapper.SetBufferArg(accumKernelIndex, argIndex, m_SpatialFilterParamsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//SpatialFilterCL.
|
||||
if (!m_Wrapper.SetBufferArg(accumKernelIndex, argIndex, m_SpatialFilterCoefsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//Filter coefs.
|
||||
if (!m_Wrapper.SetArg(accumKernelIndex, argIndex, alphaBase)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//Alpha base.
|
||||
if (!m_Wrapper.SetArg(accumKernelIndex, argIndex, alphaScale)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; } argIndex++;//Alpha scale.
|
||||
if (!m_Wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_AccumBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Accumulator.
|
||||
if (!m_Wrapper.SetImageArg(accumKernelIndex, argIndex++, m_Wrapper.Shared(), m_FinalImageName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Final image.
|
||||
if (!m_Wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterParamsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//SpatialFilterCL.
|
||||
if (!m_Wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterCoefsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Filter coefs.
|
||||
if (!m_Wrapper.SetArg (accumKernelIndex, argIndex++, alphaBase)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Alpha base.
|
||||
if (!m_Wrapper.SetArg (accumKernelIndex, argIndex++, alphaScale)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Alpha scale.
|
||||
|
||||
if (m_Wrapper.Shared())
|
||||
if (!m_Wrapper.EnqueueAcquireGLObjects(m_FinalImageName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
|
||||
@ -1050,9 +1054,9 @@ bool RendererCL<T>::ClearBuffer(string bufferName, unsigned int width, unsigned
|
||||
|
||||
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
|
||||
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, bufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Buffer of unsigned char.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex, width * elementSize)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Width.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex, height)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Height.
|
||||
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, bufferName)) { m_ErrorReport.push_back(loc); return false; }//Buffer of unsigned char.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, width * elementSize)) { m_ErrorReport.push_back(loc); return false; }//Width.
|
||||
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, height)) { m_ErrorReport.push_back(loc); return false; }//Height.
|
||||
if (!m_Wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1)) { m_ErrorReport.push_back(loc); return false; }
|
||||
|
||||
return true;
|
||||
@ -1281,7 +1285,6 @@ EmberCL<T> RendererCL<T>::ConvertEmber(Ember<T>& ember)
|
||||
emberCL.m_CamPitch = ember.m_CamPitch;
|
||||
emberCL.m_CamDepthBlur = ember.m_CamDepthBlur;
|
||||
emberCL.m_BlurCoef = ember.BlurCoef();
|
||||
emberCL.m_FinalXformIndex = ember.UseFinalXform() ? ember.TotalXformCount() - 1 : -1;
|
||||
|
||||
for (unsigned int i = 0; i < ember.TotalXformCount() && i < MAX_CL_XFORM; i++)//Copy the relevant values for each xform, capped at the max.
|
||||
{
|
||||
|
@ -38,6 +38,7 @@ public:
|
||||
|
||||
//Ordinary member functions for OpenCL specific tasks.
|
||||
bool Init(unsigned int platform, unsigned int device, bool shared, GLuint outputTexID);
|
||||
inline unsigned int IterCountPerKernel();
|
||||
inline unsigned int IterBlocksWide();
|
||||
inline unsigned int IterBlocksHigh();
|
||||
inline unsigned int IterBlockWidth();
|
||||
@ -106,6 +107,7 @@ private:
|
||||
bool m_Init;
|
||||
bool m_NVidia;
|
||||
bool m_DoublePrecision;
|
||||
unsigned int m_IterCountPerKernel;
|
||||
unsigned int m_IterBlocksWide, m_IterBlockWidth;
|
||||
unsigned int m_IterBlocksHigh, m_IterBlockHeight;
|
||||
unsigned int m_MaxDEBlockSizeW;
|
||||
|
Reference in New Issue
Block a user