mirror of
https://bitbucket.org/mfeemster/fractorium.git
synced 2025-07-01 05:46:06 -04:00
--User changes
-Remove the option --intpalette to format the palette in the xml as ints. If they are not hex formatted, then they should always be float. This option was pointless. -Cleanup some options text for the command line programs. -Allow for dragging around flames in the library tab. This is useful for setting up the order of an animation. -Make the opening of large files in Fractorium much more efficient when not-appending. -Make the opening of large files in all EmberRender and EmberAnimate more efficient. -Better error reporting when opening files. --Bug fixes -Get rid of leftover artifacts that would appear on preview thumbnails when either switching SP/DP or re-rendering previews. -Filename extension was not being appended on Linux when saving as Xml, thus making it impossible to drag that file back in becase drop is filtered on extension. --Code changes -Move GCC compiler spec to C++14. Building with 5.3 now on linux. -Use inline member data initializers. -Make a #define for static for use in Utils.h to make things a little cleaner. -Make various functions able to take arbitrary collections as their parameters rather than just vectors. -Make library collection a list rather than vector. This alleviates the need to re-sync pointers whenever the collection changes. -Subclass QTreeWidget for the library tree. Two new files added for this. -Remove all usage of #ifdef ROW_ONLY_DE in DEOpenCLKernelCreator, it was never used. -Add move constructor and assignment operator to EmberFile. -Add the ability to use a pointer to outside memory in the renderer for the vector of Ember<T>. -Make a lot more functions const where they should be.
This commit is contained in:
@ -16,14 +16,6 @@ DEOpenCLKernelCreator::DEOpenCLKernelCreator(bool doublePrecision, bool nVidia)
|
||||
{
|
||||
m_DoublePrecision = doublePrecision;
|
||||
m_NVidia = nVidia;
|
||||
#ifdef ROW_ONLY_DE
|
||||
m_LogScaleAssignDEEntryPoint = "LogScaleAssignDensityFilterKernel";
|
||||
m_GaussianDEWithoutSsEntryPoint = "GaussianDEWithoutSsKernel";
|
||||
m_GaussianDESsWithScfEntryPoint = "GaussianDESsWithScfKernel";
|
||||
m_GaussianDESsWithoutScfEntryPoint = "GaussianDESsWithoutScfKernel";
|
||||
m_GaussianDEWithoutSsNoCacheEntryPoint = "GaussianDEWithoutSsNoCacheKernel";
|
||||
m_GaussianDESsWithScfNoCacheEntryPoint = "GaussianDESsWithScfNoCacheKernel";
|
||||
m_GaussianDESsWithoutScfNoCacheEntryPoint = "GaussianDESsWithoutScfNoCacheKernel";
|
||||
m_LogScaleAssignDEKernel = CreateLogScaleAssignDEKernelString();
|
||||
m_GaussianDEWithoutSsKernel = CreateGaussianDEKernel(1);
|
||||
m_GaussianDESsWithScfKernel = CreateGaussianDEKernel(2);
|
||||
@ -31,22 +23,6 @@ DEOpenCLKernelCreator::DEOpenCLKernelCreator(bool doublePrecision, bool nVidia)
|
||||
m_GaussianDEWithoutSsNoCacheKernel = CreateGaussianDEKernelNoLocalCache(1);
|
||||
m_GaussianDESsWithScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(2);
|
||||
m_GaussianDESsWithoutScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(3);
|
||||
#else
|
||||
m_LogScaleAssignDEEntryPoint = "LogScaleAssignDensityFilterKernel";
|
||||
m_GaussianDEWithoutSsEntryPoint = "GaussianDEWithoutSsKernel";
|
||||
m_GaussianDESsWithScfEntryPoint = "GaussianDESsWithScfKernel";
|
||||
m_GaussianDESsWithoutScfEntryPoint = "GaussianDESsWithoutScfKernel";
|
||||
m_GaussianDEWithoutSsNoCacheEntryPoint = "GaussianDEWithoutSsNoCacheKernel";
|
||||
m_GaussianDESsWithScfNoCacheEntryPoint = "GaussianDESsWithScfNoCacheKernel";
|
||||
m_GaussianDESsWithoutScfNoCacheEntryPoint = "GaussianDESsWithoutScfNoCacheKernel";
|
||||
m_LogScaleAssignDEKernel = CreateLogScaleAssignDEKernelString();
|
||||
m_GaussianDEWithoutSsKernel = CreateGaussianDEKernel(1);
|
||||
m_GaussianDESsWithScfKernel = CreateGaussianDEKernel(2);
|
||||
m_GaussianDESsWithoutScfKernel = CreateGaussianDEKernel(3);
|
||||
m_GaussianDEWithoutSsNoCacheKernel = CreateGaussianDEKernelNoLocalCache(1);
|
||||
m_GaussianDESsWithScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(2);
|
||||
m_GaussianDESsWithoutScfNoCacheKernel = CreateGaussianDEKernelNoLocalCache(3);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -64,8 +40,6 @@ const string& DEOpenCLKernelCreator::LogScaleAssignDEEntryPoint() const { return
|
||||
/// <returns>The kernel source</returns>
|
||||
const string& DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidth) const
|
||||
{
|
||||
#ifndef ROW_ONLY_DE
|
||||
|
||||
if (filterWidth > MaxDEFilterSize())
|
||||
{
|
||||
if (ss > 1)
|
||||
@ -79,7 +53,6 @@ const string& DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidt
|
||||
return m_GaussianDEWithoutSsNoCacheKernel;//SS 1;
|
||||
}
|
||||
else//Use cache.
|
||||
#endif
|
||||
{
|
||||
if (ss > 1)
|
||||
{
|
||||
@ -101,8 +74,6 @@ const string& DEOpenCLKernelCreator::GaussianDEKernel(size_t ss, uint filterWidt
|
||||
/// <returns>The name of the density estimation filtering entry point kernel function</returns>
|
||||
const string& DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filterWidth) const
|
||||
{
|
||||
#ifndef ROW_ONLY_DE
|
||||
|
||||
if (filterWidth > MaxDEFilterSize())
|
||||
{
|
||||
if (ss > 1)
|
||||
@ -116,7 +87,6 @@ const string& DEOpenCLKernelCreator::GaussianDEEntryPoint(size_t ss, uint filter
|
||||
return m_GaussianDEWithoutSsNoCacheEntryPoint;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (ss > 1)
|
||||
{
|
||||
@ -206,210 +176,6 @@ string DEOpenCLKernelCreator::CreateLogScaleAssignDEKernelString()
|
||||
return os.str();
|
||||
}
|
||||
|
||||
#ifdef ROW_ONLY_DE
|
||||
string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss)
|
||||
{
|
||||
bool doSS = ss > 1;
|
||||
bool doScf = !(ss & 1);
|
||||
ostringstream os;
|
||||
os <<
|
||||
ConstantDefinesString(m_DoublePrecision) <<
|
||||
DensityFilterCLStructString <<
|
||||
UnionCLStructString <<
|
||||
"__kernel void " << GaussianDEEntryPoint(ss, MaxDEFilterSize()) << "(\n" <<
|
||||
" const __global real4_bucket* histogram,\n"
|
||||
" __global real4reals_bucket* accumulator,\n"
|
||||
" __constant DensityFilterCL* densityFilter,\n"
|
||||
" const __global real_bucket_t* filterCoefs,\n"
|
||||
" const __global real_bucket_t* filterWidths,\n"
|
||||
" const __global uint* coefIndices,\n"
|
||||
" const uint chunkSizeW,\n"
|
||||
" const uint chunkSizeH,\n"
|
||||
" const uint colChunkPass,\n"
|
||||
" const uint rowChunkPass\n"
|
||||
"\t)\n"
|
||||
"{\n"
|
||||
" uint rowsToProcess = 32;\n"//Rows to process.
|
||||
"\n"
|
||||
" if (((((BLOCK_ID_X * chunkSizeW) + colChunkPass) * BLOCK_SIZE_X) + THREAD_ID_X >= densityFilter->m_SuperRasW) ||\n"
|
||||
" ((((BLOCK_ID_Y * chunkSizeH) + rowChunkPass) * rowsToProcess) + THREAD_ID_Y >= densityFilter->m_SuperRasH))\n"
|
||||
" return;\n"
|
||||
"\n";
|
||||
|
||||
if (doSS)
|
||||
{
|
||||
os <<
|
||||
" uint ss = (uint)floor((real_bucket_t)densityFilter->m_Supersample / 2.0);\n"
|
||||
" int densityBoxLeftX;\n"
|
||||
" int densityBoxRightX;\n"
|
||||
" int densityBoxTopY;\n"
|
||||
" int densityBoxBottomY;\n"
|
||||
"\n";
|
||||
|
||||
if (doScf)
|
||||
os <<
|
||||
" real_bucket_t scfact = pow(densityFilter->m_Supersample / (densityFilter->m_Supersample + (real_bucket_t)1.0), (real_bucket_t)2.0);\n";
|
||||
}
|
||||
|
||||
os <<
|
||||
" uint fullTempBoxWidth;\n"
|
||||
" uint leftBound, rightBound, topBound, botBound;\n"
|
||||
" uint blockHistStartRow, blockHistEndRow, histCol;\n"
|
||||
" uint blockHistStartCol, boxReadStartCol, boxReadEndCol;\n"
|
||||
" uint accumWriteStartCol, colsToWrite, colOffset, colsToWriteOffset;\n"
|
||||
" int histRow, filterRow, accumWriteOffset;\n"
|
||||
"\n"
|
||||
" fullTempBoxWidth = BLOCK_SIZE_X + (densityFilter->m_FilterWidth * 2);\n"
|
||||
//Compute the bounds of the area to be sampled, which is just the ends minus the super sample minus 1.
|
||||
" leftBound = densityFilter->m_Supersample - 1;\n"
|
||||
" rightBound = densityFilter->m_SuperRasW - (densityFilter->m_Supersample - 1);\n"
|
||||
" topBound = densityFilter->m_Supersample - 1;\n"
|
||||
" botBound = densityFilter->m_SuperRasH - (densityFilter->m_Supersample - 1);\n"
|
||||
"\n"
|
||||
//Start and end values are the indices in the histogram read from
|
||||
//and written to in the accumulator. They are not the indices for the local block of data.
|
||||
//Before computing local offsets, compute the global offsets first to determine if any rows or cols fall outside of the bounds.
|
||||
" blockHistStartRow = min(botBound, topBound + (((BLOCK_ID_Y * chunkSizeH) + rowChunkPass) * rowsToProcess));\n"//The first histogram row this block will process.
|
||||
" blockHistEndRow = min(botBound, blockHistStartRow + rowsToProcess);\n"//The last histogram row this block will process, clamped to the last row.
|
||||
" blockHistStartCol = min(rightBound, leftBound + (((BLOCK_ID_X * chunkSizeW) + colChunkPass) * BLOCK_SIZE_X));\n"//The first histogram column this block will process.
|
||||
" boxReadStartCol = densityFilter->m_FilterWidth - min(densityFilter->m_FilterWidth, blockHistStartCol);\n"//The first box col this block will read from when copying to the accumulator.
|
||||
" boxReadEndCol = densityFilter->m_FilterWidth + min(densityFilter->m_FilterWidth + BLOCK_SIZE_X, densityFilter->m_SuperRasW - blockHistStartCol);\n"//The last box col this block will read from when copying to the accumulator.
|
||||
"\n"
|
||||
//Last, the indices in the global accumulator that the local bounds will be writing to.
|
||||
" accumWriteStartCol = blockHistStartCol - min(densityFilter->m_FilterWidth, blockHistStartCol);\n"//The first column in the accumulator this block will write to.
|
||||
" colsToWrite = ceil((real_bucket_t)(boxReadEndCol - boxReadStartCol) / (real_bucket_t)BLOCK_SIZE_X);\n"//Elements per thread to be written to the accumulator.
|
||||
" histCol = blockHistStartCol + THREAD_ID_X;\n"//The histogram column this individual thread will be reading from.
|
||||
"\n"
|
||||
" if (histCol >= rightBound)\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
//Compute the col position in this local box to serve as the center position
|
||||
//from which filter application offsets are computed.
|
||||
//These are the local indices for the local data that are temporarily accumulated to before
|
||||
//writing out to the global accumulator.
|
||||
" uint boxCol = densityFilter->m_FilterWidth + THREAD_ID_X;\n"
|
||||
" uint colsToZeroOffset, colsToZero = ceil((real_bucket_t)fullTempBoxWidth / (real_bucket_t)(BLOCK_SIZE_X));\n"//Usually is 2.
|
||||
" int i, j, k, jmin, jmax;\n"
|
||||
" uint filterSelectInt, filterCoefIndex;\n"
|
||||
" real_bucket_t cacheLog;\n"
|
||||
" real_bucket_t filterSelect;\n"
|
||||
" real4_bucket bucket;\n"
|
||||
;
|
||||
os << " __local real4reals_bucket filterBox[192];\n";//Must be >= fullTempBoxWidth.
|
||||
os <<
|
||||
"\n"
|
||||
" colsToZeroOffset = colsToZero * THREAD_ID_X;\n"
|
||||
" colsToWriteOffset = colsToWrite * THREAD_ID_X;\n"
|
||||
" k = (int)densityFilter->m_FilterWidth;\n"//Need a signed int to use below, really is filter width, but reusing a variable to save space.
|
||||
"\n"
|
||||
" for (histRow = blockHistStartRow; histRow < blockHistEndRow; histRow++)\n"//Process pixels by row, for 32 rows.
|
||||
" {\n"
|
||||
" bucket = histogram[(histRow * densityFilter->m_SuperRasW) + histCol];\n"
|
||||
"\n"
|
||||
" if (bucket.w != 0)\n"
|
||||
" cacheLog = (densityFilter->m_K1 * log(1.0 + bucket.w * densityFilter->m_K2)) / bucket.w;\n"
|
||||
"\n";
|
||||
|
||||
if (doSS)
|
||||
{
|
||||
os <<
|
||||
" filterSelect = 0;\n"
|
||||
" densityBoxLeftX = histCol - min(histCol, ss);\n"
|
||||
" densityBoxRightX = histCol + min(ss, (densityFilter->m_SuperRasW - histCol) - 1);\n"
|
||||
" densityBoxTopY = histRow - min((uint)histRow, ss);\n"
|
||||
" densityBoxBottomY = histRow + min(ss, (densityFilter->m_SuperRasH - histRow) - 1);\n"
|
||||
"\n"
|
||||
" for (j = densityBoxTopY; j <= densityBoxBottomY; j++)\n"
|
||||
" {\n"
|
||||
" for (i = densityBoxLeftX; i <= densityBoxRightX; i++)\n"
|
||||
" {\n"
|
||||
" filterSelect += histogram[(j * densityFilter->m_SuperRasW) + i].w;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n";
|
||||
|
||||
if (doScf)
|
||||
os << " filterSelect *= scfact;\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
os
|
||||
<< " filterSelect = bucket.w;\n";
|
||||
}
|
||||
|
||||
os <<
|
||||
"\n"
|
||||
" if (filterSelect > densityFilter->m_MaxFilteredCounts)\n"
|
||||
" filterSelectInt = densityFilter->m_MaxFilterIndex;\n"
|
||||
" else if (filterSelect <= DE_THRESH)\n"
|
||||
" filterSelectInt = (int)ceil(filterSelect) - 1;\n"
|
||||
" else if (filterSelect != 0)\n"
|
||||
" filterSelectInt = (int)DE_THRESH + (int)floor(pow((real_bucket_t)(filterSelect - DE_THRESH), densityFilter->m_Curve));\n"
|
||||
" else\n"
|
||||
" filterSelectInt = 0;\n"
|
||||
"\n"
|
||||
" if (filterSelectInt > densityFilter->m_MaxFilterIndex)\n"
|
||||
" filterSelectInt = densityFilter->m_MaxFilterIndex;\n"
|
||||
"\n"
|
||||
" filterCoefIndex = filterSelectInt * densityFilter->m_KernelSize;\n"
|
||||
"\n"
|
||||
//With this new method, only accumulate to the temp local buffer first. Write to the final accumulator last.
|
||||
//For each loop through, note that there is a local memory barrier call inside of each call to AddToAccumNoCheck().
|
||||
//If this isn't done, pixel errors occurr and even an out of resources error occurrs because too many writes are done to the same place in memory at once.
|
||||
" jmin = min(k, histRow);\n"
|
||||
" jmax = (int)min((densityFilter->m_SuperRasH - 1) - histRow, densityFilter->m_FilterWidth);\n"
|
||||
"\n"
|
||||
" for (j = -jmin; j <= jmax; j++)\n"
|
||||
" {\n"
|
||||
" for (i = 0; i < colsToZero && (colsToZeroOffset + i) < fullTempBoxWidth; i++)\n"//Each thread zeroizes a few columns.
|
||||
" {\n"
|
||||
" filterBox[colsToZeroOffset + i].m_Real4 = 0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" if (bucket.w != 0)\n"
|
||||
" {\n"
|
||||
" filterRow = abs(j) * (densityFilter->m_FilterWidth + 1);\n"
|
||||
"\n"
|
||||
" for (i = -k; i <= k; i++)\n"
|
||||
" {\n"
|
||||
" filterSelectInt = filterCoefIndex + coefIndices[filterRow + abs(i)];\n"//Really is filterCoeffIndexPlusOffset, but reusing a variable to save space.
|
||||
" filterBox[i + boxCol].m_Real4 += (bucket * (filterCoefs[filterSelectInt] * cacheLog));\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
//At this point, all threads in this block have applied the filter to their surrounding pixels and stored the results in the temp local box.
|
||||
//Add the cells of it that are in bounds to the global accumulator.
|
||||
//Compute offsets in local box to read from, and offsets into global accumulator to write to.
|
||||
//Use a method here that is similar to the zeroization above: Each thread (column) in the first row iterates through all of the
|
||||
//rows and adds a few columns to the accumulator.
|
||||
//" if (THREAD_ID_X == 0)\n"
|
||||
//" {\n"
|
||||
//" for (int kk = boxReadStartCol, i = 0; kk < boxReadEndCol; kk++, i++)\n"//Each thread writes a few columns.//Could do away with kk//TODO//OPT
|
||||
//" {\n"
|
||||
//" accumulator[((histRow + j) * densityFilter->m_SuperRasW) + (accumWriteStartCol + i)].m_Real4 += filterBox[kk].m_Real4;\n"
|
||||
//" }\n"
|
||||
//" }\n"
|
||||
" accumWriteOffset = ((histRow + j) * densityFilter->m_SuperRasW) + accumWriteStartCol;\n"
|
||||
"\n"
|
||||
" for (i = 0; i < colsToWrite; i++)\n"//Each thread writes a few columns.
|
||||
" {\n"
|
||||
" colOffset = colsToWriteOffset + i;\n"
|
||||
"\n"
|
||||
" if (boxReadStartCol + colOffset < boxReadEndCol)\n"
|
||||
" accumulator[accumWriteOffset + colOffset].m_Real4 += filterBox[boxReadStartCol + colOffset].m_Real4;\n"
|
||||
" }\n"
|
||||
" }\n"//for() filter rows.
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" }\n"//for() histogram rows.
|
||||
"}\n";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
#else
|
||||
/// <summary>
|
||||
/// Create the gaussian density filtering kernel string.
|
||||
/// 6 different methods of processing were tried before settling on this final and fastest 7th one.
|
||||
@ -660,7 +426,6 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss)
|
||||
"}\n";
|
||||
return os.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
/// <summary>
|
||||
/// Create the gaussian density filtering kernel string, but use no local cache and perform
|
||||
|
@ -8,8 +8,6 @@
|
||||
/// DEOpenCLKernelCreator class.
|
||||
/// </summary>
|
||||
|
||||
//#define ROW_ONLY_DE 1
|
||||
|
||||
namespace EmberCLns
|
||||
{
|
||||
/// <summary>
|
||||
@ -51,25 +49,25 @@ private:
|
||||
string CreateGaussianDEKernelNoLocalCache(size_t ss);
|
||||
|
||||
string m_LogScaleAssignDEKernel;
|
||||
string m_LogScaleAssignDEEntryPoint;
|
||||
string m_LogScaleAssignDEEntryPoint = "LogScaleAssignDensityFilterKernel";
|
||||
|
||||
string m_GaussianDEWithoutSsKernel;
|
||||
string m_GaussianDEWithoutSsEntryPoint;
|
||||
string m_GaussianDEWithoutSsEntryPoint = "GaussianDEWithoutSsKernel";
|
||||
|
||||
string m_GaussianDESsWithScfKernel;
|
||||
string m_GaussianDESsWithScfEntryPoint;
|
||||
string m_GaussianDESsWithScfEntryPoint = "GaussianDESsWithScfKernel";
|
||||
|
||||
string m_GaussianDESsWithoutScfKernel;
|
||||
string m_GaussianDESsWithoutScfEntryPoint;
|
||||
string m_GaussianDESsWithoutScfEntryPoint = "GaussianDESsWithoutScfKernel";
|
||||
|
||||
string m_GaussianDEWithoutSsNoCacheKernel;
|
||||
string m_GaussianDEWithoutSsNoCacheEntryPoint;
|
||||
string m_GaussianDEWithoutSsNoCacheEntryPoint = "GaussianDEWithoutSsNoCacheKernel";
|
||||
|
||||
string m_GaussianDESsWithScfNoCacheKernel;
|
||||
string m_GaussianDESsWithScfNoCacheEntryPoint;
|
||||
string m_GaussianDESsWithScfNoCacheEntryPoint = "GaussianDESsWithScfNoCacheKernel";
|
||||
|
||||
string m_GaussianDESsWithoutScfNoCacheKernel;
|
||||
string m_GaussianDESsWithoutScfNoCacheEntryPoint;
|
||||
string m_GaussianDESsWithoutScfNoCacheEntryPoint = "GaussianDESsWithoutScfNoCacheKernel";
|
||||
|
||||
bool m_DoublePrecision;
|
||||
bool m_NVidia;
|
||||
|
@ -10,22 +10,14 @@ namespace EmberCLns
|
||||
FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator(bool doublePrecision)
|
||||
{
|
||||
m_DoublePrecision = doublePrecision;
|
||||
m_GammaCorrectionWithAlphaCalcEntryPoint = "GammaCorrectionWithAlphaCalcKernel";
|
||||
m_GammaCorrectionWithoutAlphaCalcEntryPoint = "GammaCorrectionWithoutAlphaCalcKernel";
|
||||
m_GammaCorrectionWithAlphaCalcKernel = CreateGammaCorrectionKernelString(true);
|
||||
m_GammaCorrectionWithoutAlphaCalcKernel = CreateGammaCorrectionKernelString(false);
|
||||
m_FinalAccumEarlyClipEntryPoint = "FinalAccumEarlyClipKernel";
|
||||
m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel";
|
||||
m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel";
|
||||
m_GammaCorrectionWithAlphaCalcKernel = CreateGammaCorrectionKernelString(true);
|
||||
m_GammaCorrectionWithoutAlphaCalcKernel = CreateGammaCorrectionKernelString(false);
|
||||
m_FinalAccumEarlyClipKernel = CreateFinalAccumKernelString(true, false, false);
|
||||
m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true, true, true);
|
||||
m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true, false, true);
|
||||
m_FinalAccumLateClipEntryPoint = "FinalAccumLateClipKernel";
|
||||
m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel";
|
||||
m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel";
|
||||
m_FinalAccumLateClipKernel = CreateFinalAccumKernelString(false, false, false);
|
||||
m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, true, true);
|
||||
m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, false, true);
|
||||
m_FinalAccumLateClipKernel = CreateFinalAccumKernelString(false, false, false);
|
||||
m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, true, true);
|
||||
m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(false, false, true);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -58,24 +58,24 @@ private:
|
||||
string CreateCalcNewRgbFunctionString(bool globalBucket);
|
||||
|
||||
string m_GammaCorrectionWithAlphaCalcKernel;
|
||||
string m_GammaCorrectionWithAlphaCalcEntryPoint;
|
||||
string m_GammaCorrectionWithAlphaCalcEntryPoint = "GammaCorrectionWithAlphaCalcKernel";
|
||||
|
||||
string m_GammaCorrectionWithoutAlphaCalcKernel;
|
||||
string m_GammaCorrectionWithoutAlphaCalcEntryPoint;
|
||||
string m_GammaCorrectionWithoutAlphaCalcEntryPoint = "GammaCorrectionWithoutAlphaCalcKernel";
|
||||
|
||||
string m_FinalAccumEarlyClipKernel;//False, false.
|
||||
string m_FinalAccumEarlyClipEntryPoint;
|
||||
string m_FinalAccumEarlyClipEntryPoint = "FinalAccumEarlyClipKernel";
|
||||
string m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel;//True, true.
|
||||
string m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint;
|
||||
string m_FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithAlphaCalcWithAlphaAccumKernel";
|
||||
string m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel;//False, true.
|
||||
string m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint;
|
||||
string m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel";
|
||||
|
||||
string m_FinalAccumLateClipKernel;//False, false.
|
||||
string m_FinalAccumLateClipEntryPoint;
|
||||
string m_FinalAccumLateClipEntryPoint = "FinalAccumLateClipKernel";
|
||||
string m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel;//True, true.
|
||||
string m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint;
|
||||
string m_FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithAlphaCalcWithAlphaAccumKernel";
|
||||
string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;//False, true.
|
||||
string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint;
|
||||
string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel";
|
||||
|
||||
string m_Empty;
|
||||
bool m_DoublePrecision;
|
||||
|
@ -12,9 +12,6 @@ namespace EmberCLns
|
||||
template <typename T>
|
||||
IterOpenCLKernelCreator<T>::IterOpenCLKernelCreator()
|
||||
{
|
||||
m_IterEntryPoint = "IterateKernel";
|
||||
m_ZeroizeEntryPoint = "ZeroizeKernel";
|
||||
m_SumHistEntryPoint = "SumHisteKernel";
|
||||
m_ZeroizeKernel = CreateZeroizeKernelString();
|
||||
m_SumHistKernel = CreateSumHistKernelString();
|
||||
}
|
||||
@ -23,11 +20,11 @@ IterOpenCLKernelCreator<T>::IterOpenCLKernelCreator()
|
||||
/// Accessors.
|
||||
/// </summary>
|
||||
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::ZeroizeKernel() const { return m_ZeroizeKernel; }
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::ZeroizeKernel() const { return m_ZeroizeKernel; }
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::ZeroizeEntryPoint() const { return m_ZeroizeEntryPoint; }
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::SumHistKernel() const { return m_SumHistKernel; }
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::SumHistKernel() const { return m_SumHistKernel; }
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::SumHistEntryPoint() const { return m_SumHistEntryPoint; }
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::IterEntryPoint() const { return m_IterEntryPoint; }
|
||||
template <typename T> const string& IterOpenCLKernelCreator<T>::IterEntryPoint() const { return m_IterEntryPoint; }
|
||||
|
||||
/// <summary>
|
||||
/// Create the iteration kernel string using the Cuburn method.
|
||||
|
@ -43,11 +43,11 @@ private:
|
||||
string CreateSumHistKernelString() const;
|
||||
string CreateProjectionString(const Ember<T>& ember) const;
|
||||
|
||||
string m_IterEntryPoint;
|
||||
string m_IterEntryPoint = "IterateKernel";
|
||||
string m_ZeroizeKernel;
|
||||
string m_ZeroizeEntryPoint;
|
||||
string m_ZeroizeEntryPoint = "ZeroizeKernel";
|
||||
string m_SumHistKernel;
|
||||
string m_SumHistEntryPoint;
|
||||
string m_SumHistEntryPoint = "SumHisteKernel";
|
||||
FunctionMapper m_FunctionMapper;
|
||||
};
|
||||
|
||||
|
@ -10,17 +10,11 @@ namespace EmberCLns
|
||||
/// </summary>
|
||||
OpenCLWrapper::OpenCLWrapper()
|
||||
{
|
||||
m_Init = false;
|
||||
m_Shared = false;
|
||||
m_PlatformIndex = 0;
|
||||
m_DeviceIndex = 0;
|
||||
m_LocalMemSize = 0;
|
||||
//Pre-allocate some space to avoid temporary copying.
|
||||
m_Programs.reserve(4);
|
||||
m_Buffers.reserve(4);
|
||||
m_Images.reserve(4);
|
||||
m_GLImages.reserve(4);
|
||||
m_Info = OpenCLInfo::Instance();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -184,18 +184,18 @@ public:
|
||||
private:
|
||||
bool CreateSPK(const string& name, const string& program, const string& entryPoint, Spk& spk, bool doublePrecision);
|
||||
|
||||
bool m_Init;
|
||||
bool m_Shared;
|
||||
size_t m_PlatformIndex;
|
||||
size_t m_DeviceIndex;
|
||||
size_t m_LocalMemSize;
|
||||
bool m_Init = false;
|
||||
bool m_Shared = false;
|
||||
size_t m_PlatformIndex = 0;
|
||||
size_t m_DeviceIndex = 0;
|
||||
size_t m_LocalMemSize = 0;
|
||||
size_t m_GlobalMemSize;
|
||||
size_t m_MaxAllocSize;
|
||||
cl::Platform m_Platform;
|
||||
cl::Context m_Context;
|
||||
cl::Device m_Device;
|
||||
cl::CommandQueue m_Queue;
|
||||
shared_ptr<OpenCLInfo> m_Info;
|
||||
shared_ptr<OpenCLInfo> m_Info = OpenCLInfo::Instance();
|
||||
std::vector<cl::Device> m_DeviceVec;
|
||||
std::vector<Spk> m_Programs;
|
||||
std::vector<NamedBuffer> m_Buffers;
|
||||
|
@ -76,14 +76,6 @@ void RendererCL<T, bucketT>::Init()
|
||||
m_FinalFormat.image_channel_data_type = CL_UNORM_INT8;//Change if this ever supports 2BPC outputs for PNG.
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Virtual destructor.
|
||||
/// </summary>
|
||||
template <typename T, typename bucketT>
|
||||
RendererCL<T, bucketT>::~RendererCL()
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Non-virtual member functions for OpenCL specific tasks.
|
||||
/// </summary>
|
||||
|
@ -95,7 +95,9 @@ class EMBERCL_API RendererCL : public Renderer<T, bucketT>, public RendererCLBas
|
||||
|
||||
public:
|
||||
RendererCL(const vector<pair<size_t, size_t>>& devices, bool shared = false, GLuint outputTexID = 0);
|
||||
~RendererCL();
|
||||
RendererCL(const RendererCL<T, bucketT>& renderer) = delete;
|
||||
RendererCL<T, bucketT>& operator = (const RendererCL<T, bucketT>& renderer) = delete;
|
||||
virtual ~RendererCL() = default;
|
||||
|
||||
//Non-virtual member functions for OpenCL specific tasks.
|
||||
bool Init(const vector<pair<size_t, size_t>>& devices, bool shared, GLuint outputTexID);
|
||||
|
Reference in New Issue
Block a user