0.4.1.3 Beta 10/14/2014

--User Changes
 Size is no longer fixed to the window size.
 Size scaling is done differently in the final render dialog. This fixes several bugs.
 Remove Xml saving size from settings and options dialog, it no longer applies.
 Final render can be broken into strips.
 Set default save path to the desktop if none is found in the settings file.
 Set default output size to 1920x1080 if none is found in the settings file.

--Bug Fixes
 Better memory size reporting in final render dialog.

--Code Changes
 Migrate to C++11, Qt 5.3.1, and Visual Studio 2013.
 Change most instances of unsigned int to size_t, and int to intmax_t.
 Add m_OrigPixPerUnit and m_ScaleType to Ember for scaling purposes.
 Replace some sprintf_s() calls in XmlToEmber with ostringstream.
 Move more non-templated members into RendererBase.
 Add CopyVec() overload that takes a per element function pointer.
 Add vector Memset().
 Replace '&' with '+' instead of "&" in XmlToEmber for much faster parsing.
 Break strips rendering out into EmberCommon and call from EmberRender and Fractorium.
 Make AddAndWriteBuffer() just call WriteBuffer().
 Make AddAndWriteImage() delete the existing image first before replacing it.
 Add SetOutputTexture() to RendererCL to support making new textures in response to resize events.
 Remove multiple return statements in RendererCL, and replace with a bool that tracks results.
 Add ToDouble(), MakeEnd(), ToString() and Exists() wrappers in Fractorium.
 Add Size() wrapper in EmberFile.
 Make QString function arguments const QString&, and string with const string&.
 Make ShowCritical() wrapper for invoking a message box from another thread.
 Add combo box to TwoButtonWidget and rename.
This commit is contained in:
mfeemster
2014-10-14 08:53:15 -07:00
parent 44c90abb32
commit 9e94170a70
80 changed files with 4358 additions and 3661 deletions

View File

@ -85,7 +85,7 @@ template <typename T> string DEOpenCLKernelCreator<T>::LogScaleAssignDEEntryPoin
/// <param name="filterWidth">Filter width</param>
/// <returns>The kernel source</returns>
template <typename T>
string DEOpenCLKernelCreator<T>::GaussianDEKernel(unsigned int ss, unsigned int filterWidth)
string DEOpenCLKernelCreator<T>::GaussianDEKernel(size_t ss, unsigned int filterWidth)
{
if ((typeid(T) == typeid(double)) || (filterWidth > MaxDEFilterSize()))//Type double does not use cache.
{
@ -120,7 +120,7 @@ string DEOpenCLKernelCreator<T>::GaussianDEKernel(unsigned int ss, unsigned int
/// <param name="filterWidth">Filter width</param>
/// <returns>The name of the density estimation filtering entry point kernel function</returns>
template <typename T>
string DEOpenCLKernelCreator<T>::GaussianDEEntryPoint(unsigned int ss, unsigned int filterWidth)
string DEOpenCLKernelCreator<T>::GaussianDEEntryPoint(size_t ss, unsigned int filterWidth)
{
if ((typeid(T) == typeid(double)) || (filterWidth > MaxDEFilterSize()))//Type double does not use cache.
{
@ -292,7 +292,7 @@ string DEOpenCLKernelCreator<T>::CreateLogScaleAssignDEKernelString()
/// <param name="ss">The supersample being used</param>
/// <returns>The kernel string</returns>
template <typename T>
string DEOpenCLKernelCreator<T>::CreateGaussianDEKernel(unsigned int ss)
string DEOpenCLKernelCreator<T>::CreateGaussianDEKernel(size_t ss)
{
bool doSS = ss > 1;
bool doScf = !(ss & 1);
@ -552,7 +552,7 @@ string DEOpenCLKernelCreator<T>::CreateGaussianDEKernel(unsigned int ss)
/// <param name="ss">The supersample being used</param>
/// <returns>The kernel string</returns>
template <typename T>
string DEOpenCLKernelCreator<T>::CreateGaussianDEKernelNoLocalCache(unsigned int ss)
string DEOpenCLKernelCreator<T>::CreateGaussianDEKernelNoLocalCache(size_t ss)
{
bool doSS = ss > 1;
bool doScf = !(ss & 1);

View File

@ -39,8 +39,8 @@ public:
string LogScaleSumDEEntryPoint();
string LogScaleAssignDEKernel();
string LogScaleAssignDEEntryPoint();
string GaussianDEKernel(unsigned int ss, unsigned int filterWidth);
string GaussianDEEntryPoint(unsigned int ss, unsigned int filterWidth);
string GaussianDEKernel(size_t ss, unsigned int filterWidth);
string GaussianDEEntryPoint(size_t ss, unsigned int filterWidth);
//Miscellaneous static functions.
static unsigned int MaxDEFilterSize();
@ -51,8 +51,8 @@ private:
//Kernel creators.
string CreateLogScaleSumDEKernelString();
string CreateLogScaleAssignDEKernelString();
string CreateGaussianDEKernel(unsigned int ss);
string CreateGaussianDEKernelNoLocalCache(unsigned int ss);
string CreateGaussianDEKernel(size_t ss);
string CreateGaussianDEKernelNoLocalCache(size_t ss);
string m_LogScaleSumDEKernel;
string m_LogScaleSumDEEntryPoint;

View File

@ -63,7 +63,7 @@ template <typename T> string FinalAccumOpenCLKernelCreator<T>::FinalAccumLateCli
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The name of the gamma correction entry point kernel function</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionEntryPoint(unsigned int channels, bool transparency)
string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionEntryPoint(size_t channels, bool transparency)
{
bool alphaCalc = ((channels > 3) && transparency);
return alphaCalc ? m_GammaCorrectionWithAlphaCalcEntryPoint : m_GammaCorrectionWithoutAlphaCalcEntryPoint;
@ -76,7 +76,7 @@ string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionEntryPoint(unsigned int
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The gamma correction kernel string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionKernel(unsigned int channels, bool transparency)
string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionKernel(size_t channels, bool transparency)
{
bool alphaCalc = ((channels > 3) && transparency);
return alphaCalc ? m_GammaCorrectionWithAlphaCalcKernel : m_GammaCorrectionWithoutAlphaCalcKernel;
@ -92,7 +92,7 @@ string FinalAccumOpenCLKernelCreator<T>::GammaCorrectionKernel(unsigned int chan
/// <param name="alphaScale">Storage for the alpha scale value used in the kernel. 255 if transparency is true, else 0.</param>
/// <returns>The name of the final accumulation entry point kernel function</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::FinalAccumEntryPoint(bool earlyClip, unsigned int channels, bool transparency, T& alphaBase, T& alphaScale)
string FinalAccumOpenCLKernelCreator<T>::FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, T& alphaBase, T& alphaScale)
{
bool alphaCalc = ((channels > 3) && transparency);
bool alphaAccum = channels > 3;
@ -135,7 +135,7 @@ string FinalAccumOpenCLKernelCreator<T>::FinalAccumEntryPoint(bool earlyClip, un
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The final accumulation kernel string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::FinalAccumKernel(bool earlyClip, unsigned int channels, bool transparency)
string FinalAccumOpenCLKernelCreator<T>::FinalAccumKernel(bool earlyClip, size_t channels, bool transparency)
{
bool alphaCalc = (channels > 3 && transparency);
bool alphaAccum = channels > 3;
@ -172,7 +172,7 @@ string FinalAccumOpenCLKernelCreator<T>::FinalAccumKernel(bool earlyClip, unsign
/// <param name="transparency">True if channels equals 4 and using transparency, else false.</param>
/// <returns>The final accumulation kernel string</returns>
template <typename T>
string FinalAccumOpenCLKernelCreator<T>::CreateFinalAccumKernelString(bool earlyClip, unsigned int channels, bool transparency)
string FinalAccumOpenCLKernelCreator<T>::CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency)
{
return CreateFinalAccumKernelString(earlyClip, (channels > 3 && transparency), channels > 3);
}

View File

@ -46,13 +46,13 @@ public:
string FinalAccumLateClipWithAlphaCalcWithAlphaAccumEntryPoint();
string FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
string FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
string GammaCorrectionEntryPoint(unsigned int channels, bool transparency);
string GammaCorrectionKernel(unsigned int channels, bool transparency);
string FinalAccumEntryPoint(bool earlyClip, unsigned int channels, bool transparency, T& alphaBase, T& alphaScale);
string FinalAccumKernel(bool earlyClip, unsigned int channels, bool transparency);
string GammaCorrectionEntryPoint(size_t channels, bool transparency);
string GammaCorrectionKernel(size_t channels, bool transparency);
string FinalAccumEntryPoint(bool earlyClip, size_t channels, bool transparency, T& alphaBase, T& alphaScale);
string FinalAccumKernel(bool earlyClip, size_t channels, bool transparency);
private:
string CreateFinalAccumKernelString(bool earlyClip, unsigned int channels, bool transparency);
string CreateFinalAccumKernelString(bool earlyClip, size_t channels, bool transparency);
string CreateGammaCorrectionKernelString(bool alphaCalc);
string CreateFinalAccumKernelString(bool earlyClip, bool alphaCalc, bool alphaAccum);

View File

@ -46,7 +46,7 @@ template <typename T>
string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, string& parVarDefines, bool lockAccum, bool doAccum)
{
bool doublePrecision = typeid(T) == typeid(double);
unsigned int i, v, varIndex, varCount, totalXformCount = ember.TotalXformCount();
size_t i, v, varIndex, varCount, totalXformCount = ember.TotalXformCount();
ostringstream kernelIterBody, xformFuncs, os;
vector<Variation<T>*> variations;
@ -57,7 +57,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
for (i = 0; i < totalXformCount; i++)
{
Xform<T>* xform = ember.GetTotalXform(i);
unsigned int totalVarCount = xform->TotalVariationCount();
size_t totalVarCount = xform->TotalVariationCount();
bool needPrecalcSumSquares = false;
bool needPrecalcSqrtSumSquares = false;
bool needPrecalcAngles = false;
@ -395,7 +395,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
if (ember.UseFinalXform())
{
unsigned int finalIndex = ember.TotalXformCount() - 1;
size_t finalIndex = ember.TotalXformCount() - 1;
//CPU takes an extra step here to preserve the opacity of the randomly selected xform, rather than the final xform's opacity.
//The same thing takes place here automatically because secondPoint.m_LastXfUsed is used below to retrieve the opacity when accumulating.
@ -564,7 +564,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
///
/// The variations the use these #defines by first looking up the index of the
/// xform they belong to in the parent ember and generating the OpenCL string based on that
/// in their overriden OpenCLString() functions.
/// in their overridden OpenCLString() functions.
/// Template argument expected to be float or double.
/// </summary>
/// <param name="ember">The ember to create the values from</param>
@ -574,7 +574,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(Ember<T>& ember, strin
template <typename T>
void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string, vector<T>>& params, bool doVals, bool doString)
{
unsigned int i, j, k, size = 0, xformCount = ember.TotalXformCount();
size_t i, j, k, size = 0, xformCount = ember.TotalXformCount();
Xform<T>* xform;
ostringstream os;
@ -585,7 +585,7 @@ void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string
{
if (xform = ember.GetTotalXform(i))
{
unsigned int varCount = xform->TotalVariationCount();
size_t varCount = xform->TotalVariationCount();
for (j = 0; j < varCount; j++)
{
@ -632,7 +632,7 @@ void IterOpenCLKernelCreator<T>::ParVarIndexDefines(Ember<T>& ember, pair<string
template <typename T>
bool IterOpenCLKernelCreator<T>::IsBuildRequired(Ember<T>& ember1, Ember<T>& ember2)
{
unsigned int i, j, xformCount = ember1.TotalXformCount();
size_t i, j, xformCount = ember1.TotalXformCount();
if (xformCount != ember2.TotalXformCount())
return true;
@ -653,7 +653,7 @@ bool IterOpenCLKernelCreator<T>::IsBuildRequired(Ember<T>& ember1, Ember<T>& emb
{
Xform<T>* xform1 = ember1.GetTotalXform(i);
Xform<T>* xform2 = ember2.GetTotalXform(i);
unsigned int varCount = xform1->TotalVariationCount();
size_t varCount = xform1->TotalVariationCount();
if (xform1->HasPost() != xform2->HasPost())
return true;
@ -706,7 +706,7 @@ string IterOpenCLKernelCreator<T>::CreateZeroizeKernelString()
template <typename T>
string IterOpenCLKernelCreator<T>::CreateProjectionString(Ember<T>& ember)
{
unsigned int projBits = ember.ProjBits();
size_t projBits = ember.ProjBits();
ostringstream os;
if (projBits)

View File

@ -132,7 +132,7 @@ void OpenCLWrapper::ClearPrograms()
/// <param name="size">The size in bytes of the buffer</param>
/// <param name="flags">The buffer flags. Default: CL_MEM_READ_WRITE.</param>
/// <returns>True if success, else false.</returns>
bool OpenCLWrapper::AddBuffer(string name, size_t size, cl_mem_flags flags)
bool OpenCLWrapper::AddBuffer(const string& name, size_t size, cl_mem_flags flags)
{
cl_int err;
@ -153,16 +153,16 @@ bool OpenCLWrapper::AddBuffer(string name, size_t size, cl_mem_flags flags)
}
else if (GetBufferSize(bufferIndex) != size)//If it did exist, only create and add if the sizes were different.
{
m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, flags, 0, NULL, &err), "emptybuffer");
m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, flags, 0, NULL, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once.
cl::Buffer buff(m_Context, flags, size, NULL, &err);
cl::Buffer buff(m_Context, flags, size, NULL, &err);//Create the new buffer.
if (!CheckCL(err, "cl::Buffer()"))
return false;
NamedBuffer nb(buff, name);
NamedBuffer nb(buff, name);//Make a named buffer out of the new buffer.
m_Buffers[bufferIndex] = nb;
m_Buffers[bufferIndex] = nb;//Finally, assign.
}
//If the buffer existed and the sizes were the same, take no action.
@ -182,49 +182,14 @@ bool OpenCLWrapper::AddBuffer(string name, size_t size, cl_mem_flags flags)
/// <param name="name">The name of the buffer</param>
/// <param name="data">A pointer to the buffer</param>
/// <param name="size">The size in bytes of the buffer</param>
/// <param name="flags">The buffer flags. Default: CL_MEM_READ_WRITE.</param>
/// <returns>True if success, else false.</returns>
bool OpenCLWrapper::AddAndWriteBuffer(string name, void* data, size_t size)
bool OpenCLWrapper::AddAndWriteBuffer(const string& name, void* data, size_t size, cl_mem_flags flags)
{
cl_int err;
bool b = false;
if (m_Init)
{
int bufferIndex = FindBufferIndex(name);
//Easy case: totally new buffer, so just create and add.
if (bufferIndex == -1)
{
cl::Buffer buff(m_Context, CL_MEM_READ_WRITE, size, NULL, &err);
if (!CheckCL(err, "cl::Buffer()"))
return b;
NamedBuffer nb(buff, name);
m_Buffers.push_back(nb);
b = WriteBuffer((unsigned int)m_Buffers.size() - 1, data, size);
}
else//Harder case: the buffer already exists. Replace or overwrite?
{
if (GetBufferSize(bufferIndex) == size)//Size was equal, so just copy data without creating a new buffer.
{
b = WriteBuffer(bufferIndex, data, size);
}
else//Size was not equal, so create entirely new buffer, replace, and copy data.
{
cl::Buffer buff(m_Context, CL_MEM_READ_WRITE, size, NULL, &err);
if (!CheckCL(err, "cl::Buffer()"))
return b;
NamedBuffer nb(buff, name);
m_Buffers[bufferIndex] = nb;
b = WriteBuffer(bufferIndex, data, size);
}
}
}
if (AddBuffer(name, size, flags))
b = WriteBuffer(name, data, size);
return b;
}
@ -236,7 +201,7 @@ bool OpenCLWrapper::AddAndWriteBuffer(string name, void* data, size_t size)
/// <param name="data">A pointer to the buffer</param>
/// <param name="size">The size in bytes of the buffer</param>
/// <returns>True if success, else false.</returns>
bool OpenCLWrapper::WriteBuffer(string name, void* data, size_t size)
bool OpenCLWrapper::WriteBuffer(const string& name, void* data, size_t size)
{
int bufferIndex = FindBufferIndex(name);
@ -274,7 +239,7 @@ bool OpenCLWrapper::WriteBuffer(unsigned int bufferIndex, void* data, size_t siz
/// <param name="data">A pointer to a buffer to copy the data to</param>
/// <param name="size">The size in bytes of the buffer</param>
/// <returns>True if success, else false.</returns>
bool OpenCLWrapper::ReadBuffer(string name, void* data, size_t size)
bool OpenCLWrapper::ReadBuffer(const string& name, void* data, size_t size)
{
int bufferIndex = FindBufferIndex(name);
@ -310,7 +275,7 @@ bool OpenCLWrapper::ReadBuffer(unsigned int bufferIndex, void* data, size_t size
/// </summary>
/// <param name="name">The name of the buffer to search for</param>
/// <returns>The index if found, else -1.</returns>
int OpenCLWrapper::FindBufferIndex(string name)
int OpenCLWrapper::FindBufferIndex(const string& name)
{
for (unsigned int i = 0; i < m_Buffers.size(); i++)
if (m_Buffers[i].m_Name == name)
@ -324,7 +289,7 @@ int OpenCLWrapper::FindBufferIndex(string name)
/// </summary>
/// <param name="name">The name of the buffer to search for</param>
/// <returns>The size of the buffer if found, else 0.</returns>
unsigned int OpenCLWrapper::GetBufferSize(string name)
unsigned int OpenCLWrapper::GetBufferSize(const string& name)
{
unsigned int bufferIndex = FindBufferIndex(name);
@ -369,7 +334,7 @@ void OpenCLWrapper::ClearBuffers()
/// <param name="shared">True if shared with an OpenGL texture, else false. Default: false.</param>
/// <param name="texName">The texture ID of the shared OpenGL texture if shared. Default: 0.</param>
/// <returns>True if success, else false.</returns>
bool OpenCLWrapper::AddAndWriteImage(string name, cl_mem_flags flags, const cl::ImageFormat& format, ::size_t width, ::size_t height, ::size_t row_pitch, void* data, bool shared, GLuint texName)
bool OpenCLWrapper::AddAndWriteImage(const string& name, cl_mem_flags flags, const cl::ImageFormat& format, ::size_t width, ::size_t height, ::size_t row_pitch, void* data, bool shared, GLuint texName)
{
cl_int err;
@ -432,10 +397,10 @@ bool OpenCLWrapper::AddAndWriteImage(string name, cl_mem_flags flags, const cl::
}
else
{
NamedImage2D namedImage = m_Images[imageIndex];
if (!CompareImageParams(namedImage.m_Image, flags, format, width, height, row_pitch))
if (!CompareImageParams(m_Images[imageIndex].m_Image, flags, format, width, height, row_pitch))
{
m_Images[imageIndex] = NamedImage2D();//First clear out the original so the two don't exist in memory at once.
NamedImage2D namedImage(cl::Image2D(m_Context, flags, format, width, height, row_pitch, data, &err), name);
if (CheckCL(err, "cl::Image2D()"))
@ -517,7 +482,7 @@ bool OpenCLWrapper::WriteImage2D(unsigned int index, bool shared, ::size_t width
/// <param name="shared">True if shared with an OpenGL texture, else false.</param>
/// <param name="data">A pointer to a buffer to copy the data to</param>
/// <returns>True if success, else false.</returns>
bool OpenCLWrapper::ReadImage(string name, ::size_t width, ::size_t height, ::size_t row_pitch, bool shared, void* data)
bool OpenCLWrapper::ReadImage(const string& name, ::size_t width, ::size_t height, ::size_t row_pitch, bool shared, void* data)
{
if (m_Init)
{
@ -583,7 +548,7 @@ bool OpenCLWrapper::ReadImage(unsigned int imageIndex, ::size_t width, ::size_t
/// <param name="name">The name of the image to search for</param>
/// <param name="shared">True if shared with an OpenGL texture, else false.</param>
/// <returns>The index if found, else -1.</returns>
int OpenCLWrapper::FindImageIndex(string name, bool shared)
int OpenCLWrapper::FindImageIndex(const string& name, bool shared)
{
if (shared)
{
@ -607,7 +572,7 @@ int OpenCLWrapper::FindImageIndex(string name, bool shared)
/// <param name="name">The name of the image to search for</param>
/// <param name="shared">True if shared with an OpenGL texture, else false.</param>
/// <returns>The size of the 2D image if found, else 0.</returns>
unsigned int OpenCLWrapper::GetImageSize(string name, bool shared)
unsigned int OpenCLWrapper::GetImageSize(const string& name, bool shared)
{
int imageIndex = FindImageIndex(name, shared);
return GetImageSize(imageIndex, shared);
@ -745,7 +710,7 @@ bool OpenCLWrapper::CreateImage2DGL(IMAGEGL2D& image2DGL, cl_mem_flags flags, GL
/// </summary>
/// <param name="name">The name of the image to acquire</param>
/// <returns>True if success, else false.</returns>
bool OpenCLWrapper::EnqueueAcquireGLObjects(string name)
bool OpenCLWrapper::EnqueueAcquireGLObjects(const string& name)
{
int index = FindImageIndex(name, true);
@ -780,7 +745,7 @@ bool OpenCLWrapper::EnqueueAcquireGLObjects(IMAGEGL2D& image)
/// </summary>
/// <param name="name">The name of the image to release</param>
/// <returns>True if success, else false.</returns>
bool OpenCLWrapper::EnqueueReleaseGLObjects(string name)
bool OpenCLWrapper::EnqueueReleaseGLObjects(const string& name)
{
int index = FindImageIndex(name, true);
@ -953,7 +918,7 @@ bool OpenCLWrapper::SetImageArg(unsigned int kernelIndex, unsigned int argIndex,
/// </summary>
/// <param name="name">The name of the kernel to search for</param>
/// <returns>The index if found, else -1.</returns>
int OpenCLWrapper::FindKernelIndex(string name)
int OpenCLWrapper::FindKernelIndex(const string& name)
{
for (unsigned int i = 0; i < m_Programs.size(); i++)
if (m_Programs[i].m_Name == name)

View File

@ -111,32 +111,32 @@ public:
void ClearPrograms();
//Buffers.
bool AddBuffer(string name, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
bool AddAndWriteBuffer(string name, void* data, size_t size);
bool WriteBuffer(string name, void* data, size_t size);
bool AddBuffer(const string& name, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
bool AddAndWriteBuffer(const string& name, void* data, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
bool WriteBuffer(const string& name, void* data, size_t size);
bool WriteBuffer(unsigned int bufferIndex, void* data, size_t size);
bool ReadBuffer(string name, void* data, size_t size);
bool ReadBuffer(const string& name, void* data, size_t size);
bool ReadBuffer(unsigned int bufferIndex, void* data, size_t size);
int FindBufferIndex(string name);
unsigned int GetBufferSize(string name);
int FindBufferIndex(const string& name);
unsigned int GetBufferSize(const string& name);
unsigned int GetBufferSize(unsigned int bufferIndex);
void ClearBuffers();
//Images.
bool AddAndWriteImage(string name, cl_mem_flags flags, const cl::ImageFormat& format, ::size_t width, ::size_t height, ::size_t row_pitch, void* data = NULL, bool shared = false, GLuint texName = 0);
bool AddAndWriteImage(const string& name, cl_mem_flags flags, const cl::ImageFormat& format, ::size_t width, ::size_t height, ::size_t row_pitch, void* data = NULL, bool shared = false, GLuint texName = 0);
bool WriteImage2D(unsigned int index, bool shared, ::size_t width, ::size_t height, ::size_t row_pitch, void* data);
bool ReadImage(string name, ::size_t width, ::size_t height, ::size_t row_pitch, bool shared, void* data);
bool ReadImage(const string& name, ::size_t width, ::size_t height, ::size_t row_pitch, bool shared, void* data);
bool ReadImage(unsigned int imageIndex, ::size_t width, ::size_t height, ::size_t row_pitch, bool shared, void* data);
int FindImageIndex(string name, bool shared);
unsigned int GetImageSize(string name, bool shared);
int FindImageIndex(const string& name, bool shared);
unsigned int GetImageSize(const string& name, bool shared);
unsigned int GetImageSize(unsigned int imageIndex, bool shared);
bool CompareImageParams(cl::Image& image, cl_mem_flags flags, const cl::ImageFormat& format, ::size_t width, ::size_t height, ::size_t row_pitch);
void ClearImages(bool shared);
bool CreateImage2D(cl::Image2D& image2D, cl_mem_flags flags, cl::ImageFormat format, ::size_t width, ::size_t height, ::size_t row_pitch = 0, void* data = NULL);
bool CreateImage2DGL(IMAGEGL2D& image2DGL, cl_mem_flags flags, GLenum target, GLint miplevel, GLuint texobj);
bool EnqueueAcquireGLObjects(string name);
bool EnqueueAcquireGLObjects(const string& name);
bool EnqueueAcquireGLObjects(IMAGEGL2D& image);
bool EnqueueReleaseGLObjects(string name);
bool EnqueueReleaseGLObjects(const string& name);
bool EnqueueReleaseGLObjects(IMAGEGL2D& image);
bool EnqueueAcquireGLObjects(const VECTOR_CLASS<cl::Memory>* memObjects = NULL);
bool EnqueueReleaseGLObjects(const VECTOR_CLASS<cl::Memory>* memObjects = NULL);
@ -170,7 +170,7 @@ public:
}
//Kernels.
int FindKernelIndex(string name);
int FindKernelIndex(const string& name);
bool RunKernel(unsigned int kernelIndex, unsigned int totalGridWidth, unsigned int totalGridHeight, unsigned int totalGridDepth, unsigned int blockWidth, unsigned int blockHeight, unsigned int blockDepth);
//Info.

View File

@ -63,7 +63,7 @@ RendererCL<T>::~RendererCL()
}
/// <summary>
/// Ordinary member functions for OpenCL specific tasks.
/// Non-virtual member functions for OpenCL specific tasks.
/// </summary>
/// <summary>
@ -82,16 +82,17 @@ template <typename T>
bool RendererCL<T>::Init(unsigned int platform, unsigned int device, bool shared, GLuint outputTexID)
{
//Timing t;
bool b = true;
m_OutputTexID = outputTexID;
const char* loc = __FUNCTION__;
if (!m_Wrapper.Ok() || PlatformIndex() != platform || DeviceIndex() != device)
{
m_Init = false;
m_Wrapper.Init(platform, device, shared);
b = m_Wrapper.Init(platform, device, shared);
}
if (m_Wrapper.Ok() && !m_Init)
if (b && m_Wrapper.Ok() && !m_Init)
{
m_NVidia = ToLower(m_Wrapper.DeviceAndPlatformNames()).find_first_of("nvidia") != string::npos && m_Wrapper.LocalMemSize() > (32 * 1024);
m_WarpSize = m_NVidia ? 32 : 64;
@ -102,11 +103,11 @@ bool RendererCL<T>::Init(unsigned int platform, unsigned int device, bool shared
string logAssignProgram = m_DEOpenCLKernelCreator.LogScaleAssignDEKernel();
string logSumProgram = m_DEOpenCLKernelCreator.LogScaleSumDEKernel();//Build a couple of simple programs to ensure OpenCL is working right.
if (!m_Wrapper.AddProgram(m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), zeroizeProgram, m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), m_DoublePrecision)) { m_ErrorReport.push_back(loc); return false; }
if (!m_Wrapper.AddProgram(m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), logAssignProgram, m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DoublePrecision)) { m_ErrorReport.push_back(loc); return false; }
if (!m_Wrapper.AddProgram(m_DEOpenCLKernelCreator.LogScaleSumDEEntryPoint(), logSumProgram, m_DEOpenCLKernelCreator.LogScaleSumDEEntryPoint(), m_DoublePrecision)) { m_ErrorReport.push_back(loc); return false; }
if (b && !(b = m_Wrapper.AddProgram(m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), zeroizeProgram, m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), m_DoublePrecision))) { m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddProgram(m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), logAssignProgram, m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DoublePrecision))) { m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddProgram(m_DEOpenCLKernelCreator.LogScaleSumDEEntryPoint(), logSumProgram, m_DEOpenCLKernelCreator.LogScaleSumDEEntryPoint(), m_DoublePrecision))) { m_ErrorReport.push_back(loc); }
if (!m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, 256, 1, 0, NULL)) { m_ErrorReport.push_back(loc); return false; }
if (b && !(b = m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, 256, 1, 0, NULL))) { m_ErrorReport.push_back(loc); }
//This is the maximum box dimension for density filtering which consists of (blockSize * blockSize) + (2 * filterWidth).
//These blocks must be square, and ideally, 32x32.
@ -119,7 +120,29 @@ bool RendererCL<T>::Init(unsigned int platform, unsigned int device, bool shared
//t.Toc(loc);
}
return m_Init;
return b;
}
template <typename T>
bool RendererCL<T>::SetOutputTexture(GLuint outputTexID)
{
bool success = true;
const char* loc = __FUNCTION__;
if (!m_Wrapper.Ok())
return false;
m_OutputTexID = outputTexID;
EnterResize();
if (!m_Wrapper.AddAndWriteImage(m_FinalImageName, CL_MEM_WRITE_ONLY, m_FinalFormat, FinalRasW(), FinalRasH(), 0, NULL, m_Wrapper.Shared(), m_OutputTexID))
{
m_ErrorReport.push_back(loc);
success = false;
}
LeaveResize();
return success;
}
/// <summary>
@ -182,6 +205,49 @@ bool RendererCL<T>::ReadPoints(vector<PointCL<T>>& vec)
return false;
}
/// <summary>
/// Clear the histogram buffer with all zeroes.
/// </summary>
/// <returns>True if success, else false.</returns>
template <typename T>
bool RendererCL<T>::ClearHist()
{
return ClearBuffer(m_HistBufferName, (unsigned int)SuperRasW(), (unsigned int)SuperRasH(), sizeof(v4T));
}
/// <summary>
/// Clear the desnity filtering buffer with all zeroes.
/// </summary>
/// <returns>True if success, else false.</returns>
template <typename T>
bool RendererCL<T>::ClearAccum()
{
return ClearBuffer(m_AccumBufferName, (unsigned int)SuperRasW(), (unsigned int)SuperRasH(), sizeof(v4T));
}
/// <summary>
/// Write values from a host side CPU buffer into the temporary points buffer.
/// Used for debugging.
/// </summary>
/// <param name="vec">The host side buffer whose values to write</param>
/// <returns>True if success, else false.</returns>
template <typename T>
bool RendererCL<T>::WritePoints(vector<PointCL<T>>& vec)
{
return m_Wrapper.WriteBuffer(m_PointsBufferName, (void*)vec.data(), vec.size() * sizeof(vec[0]));
}
/// <summary>
/// Get the kernel string for the last built iter program.
/// </summary>
/// <returns>The string representation of the kernel for the last built iter program.</returns>
template <typename T>
string RendererCL<T>::IterKernel() { return m_IterKernel; }
/// <summary>
/// Virtual functions overridden from RendererCLBase.
/// </summary>
/// <summary>
/// Read the final image buffer buffer into the host side CPU buffer.
/// This must be called before saving the final output image to file.
@ -214,7 +280,7 @@ bool RendererCL<T>::ClearFinal()
if (!b)
m_ErrorReport.push_back(__FUNCTION__);
return b;
}
else
@ -222,46 +288,7 @@ bool RendererCL<T>::ClearFinal()
}
/// <summary>
/// Clear the histogram buffer with all zeroes.
/// </summary>
/// <returns>True if success, else false.</returns>
template <typename T>
bool RendererCL<T>::ClearHist()
{
return ClearBuffer(m_HistBufferName, SuperRasW(), SuperRasH(), sizeof(v4T));
}
/// <summary>
/// Clear the desnity filtering buffer with all zeroes.
/// </summary>
/// <returns>True if success, else false.</returns>
template <typename T>
bool RendererCL<T>::ClearAccum()
{
return ClearBuffer(m_AccumBufferName, SuperRasW(), SuperRasH(), sizeof(v4T));
}
/// <summary>
/// Write values from a host side CPU buffer into the temporary points buffer.
/// Used for debugging.
/// </summary>
/// <param name="vec">The host side buffer whose values to write</param>
/// <returns>True if success, else false.</returns>
template <typename T>
bool RendererCL<T>::WritePoints(vector<PointCL<T>>& vec)
{
return m_Wrapper.WriteBuffer(m_PointsBufferName, (void*)vec.data(), vec.size() * sizeof(vec[0]));
}
/// <summary>
/// Get the kernel string for the last built iter program.
/// </summary>
/// <returns>The string representation of the kernel for the last built iter program.</returns>
template <typename T>
string RendererCL<T>::IterKernel() { return m_IterKernel; }
/// <summary>
/// Public virtual functions overriden from Renderer.
/// Public virtual functions overridden from Renderer or RendererBase.
/// </summary>
/// <summary>
@ -269,7 +296,7 @@ string RendererCL<T>::IterKernel() { return m_IterKernel; }
/// </summary>
/// <returns>An unsigned 64-bit integer specifying how much video memory is available</returns>
template <typename T>
unsigned __int64 RendererCL<T>::MemoryAvailable()
size_t RendererCL<T>::MemoryAvailable()
{
return Ok() ? m_Wrapper.GetInfo<cl_ulong>(PlatformIndex(), DeviceIndex(), CL_DEVICE_GLOBAL_MEM_SIZE) : 0ULL;
}
@ -290,7 +317,7 @@ bool RendererCL<T>::Ok() const
/// </summary>
/// <param name="numChannels">The number of channels, ignored.</param>
template <typename T>
void RendererCL<T>::NumChannels(unsigned int numChannels)
void RendererCL<T>::NumChannels(size_t numChannels)
{
m_NumChannels = 4;
}
@ -322,7 +349,7 @@ void RendererCL<T>::ClearErrorReport()
/// </summary>
/// <returns>The number of iterations ran in a single kernel call</returns>
template <typename T>
unsigned int RendererCL<T>::SubBatchSize() const
size_t RendererCL<T>::SubBatchSize() const
{
return m_IterBlocksWide * m_IterBlocksHigh * SQR(m_IterCountPerKernel);
}
@ -333,24 +360,11 @@ unsigned int RendererCL<T>::SubBatchSize() const
/// </summary>
/// <returns>1</returns>
template <typename T>
unsigned int RendererCL<T>::ThreadCount() const
size_t RendererCL<T>::ThreadCount() const
{
return 1;
}
/// <summary>
/// Override to always set the thread count to 1 for OpenCL.
/// Specific seeds can't be used for OpenCL. If a repeatable trajectory
/// is needed for debugging, use the base class.
/// </summary>
/// <param name="threads">The number of threads to use, ignored.</param>
/// <param name="seedString">The seed string to use if threads is 1, ignored. Default: NULL.</param>
template <typename T>
void RendererCL<T>::ThreadCount(unsigned int threads, const char* seedString)
{
Renderer<T, T>::ThreadCount(threads, seedString);
}
/// <summary>
/// Create the density filter in the base class and copy the filter values
/// to the corresponding OpenCL buffers.
@ -360,22 +374,25 @@ void RendererCL<T>::ThreadCount(unsigned int threads, const char* seedString)
template <typename T>
bool RendererCL<T>::CreateDEFilter(bool& newAlloc)
{
bool b = true;
if (Renderer<T, T>::CreateDEFilter(newAlloc))
{
//Copy coefs and widths here. Convert and copy the other filter params right before calling the filtering kernel.
if (newAlloc)
{
DensityFilter<T>* filter = GetDensityFilter();
const char* loc = __FUNCTION__;
DensityFilter<T>* filter = dynamic_cast<DensityFilter<T>*>(GetDensityFilter());
if (!m_Wrapper.AddAndWriteBuffer(m_DECoefsBufferName, (void*)filter->Coefs(), filter->CoefsSizeBytes())) { m_ErrorReport.push_back(__FUNCTION__); return false; }
if (!m_Wrapper.AddAndWriteBuffer(m_DEWidthsBufferName, (void*)filter->Widths(), filter->WidthsSizeBytes())) { m_ErrorReport.push_back(__FUNCTION__); return false; }
if (!m_Wrapper.AddAndWriteBuffer(m_DECoefIndicesBufferName, (void*)filter->CoefIndices(), filter->CoefsIndicesSizeBytes())) { m_ErrorReport.push_back(__FUNCTION__); return false; }
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DECoefsBufferName, (void*)filter->Coefs(), filter->CoefsSizeBytes()))) { m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DEWidthsBufferName, (void*)filter->Widths(), filter->WidthsSizeBytes()))) { m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DECoefIndicesBufferName, (void*)filter->CoefIndices(), filter->CoefsIndicesSizeBytes()))) { m_ErrorReport.push_back(loc); }
}
return true;
}
else
b = false;
return false;
return b;
}
/// <summary>
@ -387,15 +404,18 @@ bool RendererCL<T>::CreateDEFilter(bool& newAlloc)
template <typename T>
bool RendererCL<T>::CreateSpatialFilter(bool& newAlloc)
{
bool b = true;
if (Renderer<T, T>::CreateSpatialFilter(newAlloc))
{
if (newAlloc)
if (!m_Wrapper.AddAndWriteBuffer(m_SpatialFilterCoefsBufferName, (void*)GetSpatialFilter()->Filter(), GetSpatialFilter()->BufferSizeBytes())) { m_ErrorReport.push_back(__FUNCTION__); return false; }
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_SpatialFilterCoefsBufferName, (void*)GetSpatialFilter()->Filter(), GetSpatialFilter()->BufferSizeBytes()))) { m_ErrorReport.push_back(__FUNCTION__); }
return true;
}
else
b = false;
return false;
return b;
}
/// <summary>
@ -435,7 +455,7 @@ vector<string> RendererCL<T>::ErrorReport()
}
/// <summary>
/// Protected virtual functions overriden from Renderer.
/// Protected virtual functions overridden from Renderer.
/// </summary>
/// <summary>
@ -469,26 +489,21 @@ bool RendererCL<T>::Alloc()
size_t accumLength = SuperSize() * sizeof(v4T);
const char* loc = __FUNCTION__;
if (!m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL))) { m_ErrorReport.push_back(loc); return false; }
if (!m_Wrapper.AddBuffer(m_ParVarsBufferName, 128 * sizeof(T))) { m_ErrorReport.push_back(loc); return false; }
if (!m_Wrapper.AddBuffer(m_DistBufferName, CHOOSE_XFORM_GRAIN)) { m_ErrorReport.push_back(loc); return false; }//Will be resized for xaos.
if (!m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL))) { m_ErrorReport.push_back(loc); return false; }
if (!m_Wrapper.AddBuffer(m_DEFilterParamsBufferName, sizeof(m_DensityFilterCL))) { m_ErrorReport.push_back(loc); return false; }
if (!m_Wrapper.AddBuffer(m_SpatialFilterParamsBufferName, sizeof(m_SpatialFilterCL))) { m_ErrorReport.push_back(loc); return false; }
if (b && !(b = m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddBuffer(m_ParVarsBufferName, 128 * sizeof(T)))) { m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddBuffer(m_DistBufferName, CHOOSE_XFORM_GRAIN))) { m_ErrorReport.push_back(loc); }//Will be resized for xaos.
if (b && !(b = m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddBuffer(m_DEFilterParamsBufferName, sizeof(m_DensityFilterCL)))) { m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddBuffer(m_SpatialFilterParamsBufferName, sizeof(m_SpatialFilterCL)))) { m_ErrorReport.push_back(loc); }
if (!m_Wrapper.AddBuffer(m_HistBufferName, histLength)) { m_ErrorReport.push_back(loc); return false; }//Histogram. Will memset to zero later.
if (!m_Wrapper.AddBuffer(m_AccumBufferName, accumLength)) { m_ErrorReport.push_back(loc); return false; }//Accum buffer.
if (!m_Wrapper.AddBuffer(m_PointsBufferName, TotalIterKernelCount() * sizeof(PointCL<T>))) { m_ErrorReport.push_back(loc); return false; }//Points between iter calls.
if (!m_Wrapper.AddAndWriteImage(m_FinalImageName, CL_MEM_WRITE_ONLY, m_FinalFormat, FinalRasW(), FinalRasH(), 0, NULL, m_Wrapper.Shared(), m_OutputTexID))
{
m_ErrorReport.push_back(loc);
LeaveResize();
return false;
}
if (b && !(b = m_Wrapper.AddBuffer(m_HistBufferName, histLength))) { m_ErrorReport.push_back(loc); }//Histogram. Will memset to zero later.
if (b && !(b = m_Wrapper.AddBuffer(m_AccumBufferName, accumLength))) { m_ErrorReport.push_back(loc); }//Accum buffer.
if (b && !(b = m_Wrapper.AddBuffer(m_PointsBufferName, TotalIterKernelCount() * sizeof(PointCL<T>)))) { m_ErrorReport.push_back(loc); }//Points between iter calls.
if (b && !(b = SetOutputTexture(m_OutputTexID))) { m_ErrorReport.push_back(loc); }
LeaveResize();
return true;
return b;
}
/// <summary>
@ -590,7 +605,7 @@ eRenderStatus RendererCL<T>::AccumulatorToFinalImage(unsigned char* pixels, size
/// <param name="temporalSample">The temporal sample within the current pass this is running for</param>
/// <returns>Rendering statistics</returns>
template <typename T>
EmberStats RendererCL<T>::Iterate(unsigned __int64 iterCount, unsigned int pass, unsigned int temporalSample)
EmberStats RendererCL<T>::Iterate(size_t iterCount, size_t pass, size_t temporalSample)
{
bool b = true;
EmberStats stats;//Do not record bad vals with with GPU. If the user needs to investigate bad vals, use the CPU.
@ -685,17 +700,17 @@ bool RendererCL<T>::BuildIterProgramForEmber(bool doAccum)
/// <param name="itersRan">The storage for the number of iterations ran</param>
/// <returns>True if success, else false.</returns>
template <typename T>
bool RendererCL<T>::RunIter(unsigned __int64 iterCount, unsigned int pass, unsigned int temporalSample, unsigned __int64& itersRan)
bool RendererCL<T>::RunIter(size_t iterCount, size_t pass, size_t temporalSample, size_t& itersRan)
{
Timing t;//, t2(4);
bool b = false;
unsigned int fuse, argIndex;
bool b = true;
unsigned int seed, fuse, argIndex;
unsigned int iterCountPerKernel = m_IterCountPerKernel;
unsigned int iterCountPerBlock = iterCountPerKernel * m_IterBlockWidth * m_IterBlockHeight;
unsigned int seed;
unsigned int fuseFreq = m_SubBatchSize / m_IterCountPerKernel;
unsigned __int64 itersRemaining, localIterCount = 0;
unsigned int supersize = (unsigned int)SuperSize();
int kernelIndex = m_Wrapper.FindKernelIndex(m_IterOpenCLKernelCreator.IterEntryPoint());
size_t fuseFreq = m_SubBatchSize / m_IterCountPerKernel;
size_t itersRemaining, localIterCount = 0;
double percent, etaMs;
const char* loc = __FUNCTION__;
@ -706,21 +721,20 @@ bool RendererCL<T>::RunIter(unsigned __int64 iterCount, unsigned int pass, unsig
if (kernelIndex != -1)
{
b = true;
m_EmberCL = ConvertEmber(m_Ember);
m_CarToRasCL = ConvertCarToRas(*CoordMap());
if (!m_Wrapper.WriteBuffer (m_EmberBufferName, (void*)&m_EmberCL, sizeof(m_EmberCL))) { m_ErrorReport.push_back(loc); return false; }
if (!m_Wrapper.AddAndWriteBuffer(m_DistBufferName, (void*)XformDistributions(), XformDistributionsSize())) { m_ErrorReport.push_back(loc); return false; }//Will be resized for xaos.
if (!m_Wrapper.WriteBuffer (m_CarToRasBufferName, (void*)&m_CarToRasCL, sizeof(m_CarToRasCL))) { m_ErrorReport.push_back(loc); return false; }
if (b && !(b = m_Wrapper.WriteBuffer (m_EmberBufferName, (void*)&m_EmberCL, sizeof(m_EmberCL)))) { m_ErrorReport.push_back(loc); }
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DistBufferName, (void*)XformDistributions(), XformDistributionsSize()))) { m_ErrorReport.push_back(loc); }//Will be resized for xaos.
if (b && !(b = m_Wrapper.WriteBuffer (m_CarToRasBufferName, (void*)&m_CarToRasCL, sizeof(m_CarToRasCL)))) { m_ErrorReport.push_back(loc); }
if (!m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_Dmap.m_Entries.size(), 1, 0, m_Dmap.m_Entries.data())) { m_ErrorReport.push_back(loc); return false; }
if (b && !(b = m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_Dmap.m_Entries.size(), 1, 0, m_Dmap.m_Entries.data()))) { m_ErrorReport.push_back(loc); }
//If animating, treat each temporal sample as a newly started render for fusing purposes.
if (temporalSample > 0)
m_Calls = 0;
while (itersRan < iterCount && !m_Abort)
while (b && itersRan < iterCount && !m_Abort)
{
argIndex = 0;
seed = m_Rand[0].Rand();
@ -744,27 +758,26 @@ bool RendererCL<T>::RunIter(unsigned __int64 iterCount, unsigned int pass, unsig
iterCountThisLaunch = iterCountPerKernel * (gridW * gridH * m_IterBlockWidth * m_IterBlockHeight);
}
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, iterCountPerKernel)) { m_ErrorReport.push_back(loc); return false; }//Number of iters for each thread to run.
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, fuse)) { m_ErrorReport.push_back(loc); return false; }//Number of iters to fuse.
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, seed)) { m_ErrorReport.push_back(loc); return false; }//Seed.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_EmberBufferName)) { m_ErrorReport.push_back(loc); return false; }//Flame.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_ParVarsBufferName)) { m_ErrorReport.push_back(loc); return false; }//Parametric variation parameters.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_DistBufferName)) { m_ErrorReport.push_back(loc); return false; }//Xform distributions.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_CarToRasBufferName)) { m_ErrorReport.push_back(loc); return false; }//Coordinate converter.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName)) { m_ErrorReport.push_back(loc); return false; }//Histogram.
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, SuperSize())) { m_ErrorReport.push_back(loc); return false; }//Histogram size.
if (!m_Wrapper.SetImageArg (kernelIndex, argIndex++, false, "Palette")) { m_ErrorReport.push_back(loc); return false; }//Palette.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_PointsBufferName)) { m_ErrorReport.push_back(loc); return false; }//Random start points.
if (b && !(b = m_Wrapper.SetArg (kernelIndex, argIndex++, iterCountPerKernel))) { m_ErrorReport.push_back(loc); }//Number of iters for each thread to run.
if (b && !(b = m_Wrapper.SetArg (kernelIndex, argIndex++, fuse))) { m_ErrorReport.push_back(loc); }//Number of iters to fuse.
if (b && !(b = m_Wrapper.SetArg (kernelIndex, argIndex++, seed))) { m_ErrorReport.push_back(loc); }//Seed.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_EmberBufferName))) { m_ErrorReport.push_back(loc); }//Flame.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_ParVarsBufferName))) { m_ErrorReport.push_back(loc); }//Parametric variation parameters.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_DistBufferName))) { m_ErrorReport.push_back(loc); }//Xform distributions.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_CarToRasBufferName))) { m_ErrorReport.push_back(loc); }//Coordinate converter.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { m_ErrorReport.push_back(loc); }//Histogram.
if (b && !(b = m_Wrapper.SetArg (kernelIndex, argIndex++, supersize))) { m_ErrorReport.push_back(loc); }//Histogram size.
if (b && !(b = m_Wrapper.SetImageArg (kernelIndex, argIndex++, false, "Palette"))) { m_ErrorReport.push_back(loc); }//Palette.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_PointsBufferName))) { m_ErrorReport.push_back(loc); }//Random start points.
if (!m_Wrapper.RunKernel(kernelIndex,
if (b && !(b = m_Wrapper.RunKernel(kernelIndex,
gridW * IterBlockWidth(),//Total grid dims.
gridH * IterBlockHeight(),
1,
IterBlockWidth(),//Individual block dims.
IterBlockHeight(),
1))
1)))
{
b = false;
m_Abort = true;
m_ErrorReport.push_back(loc);
break;
@ -808,6 +821,7 @@ bool RendererCL<T>::RunIter(unsigned __int64 iterCount, unsigned int pass, unsig
}
else
{
b = false;
m_ErrorReport.push_back(loc);
}
@ -823,6 +837,7 @@ template <typename T>
eRenderStatus RendererCL<T>::RunLogScaleFilter()
{
//Timing t(4);
bool b = true;
int kernelIndex;
const char* loc = __FUNCTION__;
eRenderStatus status = RENDER_OK;
@ -843,23 +858,23 @@ eRenderStatus RendererCL<T>::RunLogScaleFilter()
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (!m_Wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, (void*)&m_DensityFilterCL, sizeof(m_DensityFilterCL))) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, (void*)&m_DensityFilterCL, sizeof(m_DensityFilterCL)))) { m_ErrorReport.push_back(loc); }
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Histogram.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Accumulator.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_DEFilterParamsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//DensityFilterCL.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { m_ErrorReport.push_back(loc); }//Histogram.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName))) { m_ErrorReport.push_back(loc); }//Accumulator.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex++, m_DEFilterParamsBufferName))) { m_ErrorReport.push_back(loc); }//DensityFilterCL.
//t.Tic();
if (!m_Wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
if (b && !(b = m_Wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { m_ErrorReport.push_back(loc); }
//t.Toc(loc);
}
else
{
status = RENDER_ERROR;
b = false;
m_ErrorReport.push_back(loc);
}
return status;
return b ? RENDER_OK : RENDER_ERROR;
}
/// <summary>
@ -870,11 +885,11 @@ eRenderStatus RendererCL<T>::RunLogScaleFilter()
template <typename T>
eRenderStatus RendererCL<T>::RunDensityFilter()
{
bool b = true;
Timing t(4);//, t2(4);
m_DensityFilterCL = ConvertDensityFilter();
int kernelIndex = MakeAndGetDensityFilterProgram(Supersample(), m_DensityFilterCL.m_FilterWidth);
const char* loc = __FUNCTION__;
eRenderStatus status = RENDER_OK;
if (kernelIndex != -1)
{
@ -909,17 +924,17 @@ eRenderStatus RendererCL<T>::RunDensityFilter()
double totalChunks = chunkSizeW * chunkSizeH;
if (!m_Wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, (void*)&m_DensityFilterCL, sizeof(m_DensityFilterCL))) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, (void*)&m_DensityFilterCL, sizeof(m_DensityFilterCL)))) { m_ErrorReport.push_back(loc); }
for (unsigned int row = 0; row < chunkSizeH; row++)
for (unsigned int row = 0; b && !m_Abort && row < chunkSizeH; row++)
{
for (unsigned int col = 0; col < chunkSizeW; col++)
for (unsigned int col = 0; b && !m_Abort && col < chunkSizeW; col++)
{
//t2.Tic();
if (!RunDensityFilterPrivate(kernelIndex, gridW, gridH, blockSizeW, blockSizeH, chunkSizeW, chunkSizeH, row, col)) { m_Abort = true; m_ErrorReport.push_back(loc); return RENDER_ERROR; }
if (b && !(b = RunDensityFilterPrivate(kernelIndex, gridW, gridH, blockSizeW, blockSizeH, chunkSizeW, chunkSizeH, row, col))) { m_Abort = true; m_ErrorReport.push_back(loc); }
//t2.Toc(loc);
if (m_Callback)
if (b && m_Callback)
{
double percent = (double((row * chunkSizeW) + (col + 1)) / totalChunks) * 100.0;
double etaMs = ((100.0 - percent) / percent) * t.Toc();
@ -927,24 +942,21 @@ eRenderStatus RendererCL<T>::RunDensityFilter()
if (!m_Callback->ProgressFunc(m_Ember, m_ProgressParameter, percent, 1, etaMs))
Abort();
}
if (m_Abort)
return RENDER_ABORT;
}
}
if (m_Callback)
if (b && m_Callback)
m_Callback->ProgressFunc(m_Ember, m_ProgressParameter, 100.0, 1, 0.0);
//t2.Toc(__FUNCTION__ " all passes");
}
else
{
status = RENDER_ERROR;
b = false;
m_ErrorReport.push_back(loc);
}
return status;
return m_Abort ? RENDER_ABORT : (b ? RENDER_OK : RENDER_ERROR);
}
/// <summary>
@ -955,6 +967,7 @@ template <typename T>
eRenderStatus RendererCL<T>::RunFinalAccum()
{
//Timing t(4);
bool b = true;
T alphaBase;
T alphaScale;
int accumKernelIndex = MakeAndGetFinalAccumProgram(alphaBase, alphaScale);
@ -964,19 +977,18 @@ eRenderStatus RendererCL<T>::RunFinalAccum()
unsigned int blockW;
unsigned int blockH;
const char* loc = __FUNCTION__;
eRenderStatus status = RENDER_OK;
if (!m_Abort && accumKernelIndex != -1)
{
//This is needed with or without early clip.
m_SpatialFilterCL = ConvertSpatialFilter();
if (!m_Wrapper.AddAndWriteBuffer(m_SpatialFilterParamsBufferName, (void*)&m_SpatialFilterCL, sizeof(m_SpatialFilterCL))) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
if (b && !(b = m_Wrapper.AddAndWriteBuffer(m_SpatialFilterParamsBufferName, (void*)&m_SpatialFilterCL, sizeof(m_SpatialFilterCL)))) { m_ErrorReport.push_back(loc); }
//Since early clip requires gamma correcting the entire accumulator first,
//it can't be done inside of the normal final accumulation kernel, so
//an additional kernel must be launched first.
if (EarlyClip())
if (b && EarlyClip())
{
int gammaCorrectKernelIndex = MakeAndGetGammaCorrectionProgram();
@ -989,15 +1001,15 @@ eRenderStatus RendererCL<T>::RunFinalAccum()
gridH = m_SpatialFilterCL.m_SuperRasH;
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (!m_Wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_AccumBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Accumulator.
if (!m_Wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_SpatialFilterParamsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//SpatialFilterCL.
if (b && !(b = m_Wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_AccumBufferName))) { m_ErrorReport.push_back(loc); }//Accumulator.
if (b && !(b = m_Wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_SpatialFilterParamsBufferName))) { m_ErrorReport.push_back(loc); }//SpatialFilterCL.
if (!m_Wrapper.RunKernel(gammaCorrectKernelIndex, gridW, gridH, 1, blockW, blockH, 1)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
if (b && !(b = m_Wrapper.RunKernel(gammaCorrectKernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { m_ErrorReport.push_back(loc); }
}
else
{
b = false;
m_ErrorReport.push_back(loc);
return RENDER_ERROR;
}
}
@ -1008,30 +1020,30 @@ eRenderStatus RendererCL<T>::RunFinalAccum()
gridH = m_SpatialFilterCL.m_FinalRasH;
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (!m_Wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_AccumBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Accumulator.
if (!m_Wrapper.SetImageArg(accumKernelIndex, argIndex++, m_Wrapper.Shared(), m_FinalImageName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Final image.
if (!m_Wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterParamsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//SpatialFilterCL.
if (!m_Wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterCoefsBufferName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Filter coefs.
if (!m_Wrapper.SetArg (accumKernelIndex, argIndex++, alphaBase)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Alpha base.
if (!m_Wrapper.SetArg (accumKernelIndex, argIndex++, alphaScale)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }//Alpha scale.
if (b && !(b = m_Wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_AccumBufferName))) { m_ErrorReport.push_back(loc); }//Accumulator.
if (b && !(b = m_Wrapper.SetImageArg (accumKernelIndex, argIndex++, m_Wrapper.Shared(), m_FinalImageName))) { m_ErrorReport.push_back(loc); }//Final image.
if (b && !(b = m_Wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterParamsBufferName))) { m_ErrorReport.push_back(loc); }//SpatialFilterCL.
if (b && !(b = m_Wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterCoefsBufferName))) { m_ErrorReport.push_back(loc); }//Filter coefs.
if (b && !(b = m_Wrapper.SetArg (accumKernelIndex, argIndex++, alphaBase))) { m_ErrorReport.push_back(loc); }//Alpha base.
if (b && !(b = m_Wrapper.SetArg (accumKernelIndex, argIndex++, alphaScale))) { m_ErrorReport.push_back(loc); }//Alpha scale.
if (m_Wrapper.Shared())
if (!m_Wrapper.EnqueueAcquireGLObjects(m_FinalImageName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
if (b && m_Wrapper.Shared())
if (b && !(b = m_Wrapper.EnqueueAcquireGLObjects(m_FinalImageName))) { m_ErrorReport.push_back(loc); }
if (!m_Wrapper.RunKernel(accumKernelIndex, gridW, gridH, 1, blockW, blockH, 1)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
if (b && !(b = m_Wrapper.RunKernel(accumKernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { m_ErrorReport.push_back(loc); }
if (m_Wrapper.Shared())
if (!m_Wrapper.EnqueueReleaseGLObjects(m_FinalImageName)) { m_ErrorReport.push_back(loc); return RENDER_ERROR; }
if (b && m_Wrapper.Shared())
if (b && !(b = m_Wrapper.EnqueueReleaseGLObjects(m_FinalImageName))) { m_ErrorReport.push_back(loc); }
//t.Toc((char*)loc);
}
else
{
status = RENDER_ERROR;
b = false;
m_ErrorReport.push_back(loc);
}
return status;
return b ? RENDER_OK : RENDER_ERROR;
}
/// <summary>
@ -1043,8 +1055,9 @@ eRenderStatus RendererCL<T>::RunFinalAccum()
/// <param name="elementSize">Size of each element</param>
/// <returns>True if success, else false.</returns>
template <typename T>
bool RendererCL<T>::ClearBuffer(string bufferName, unsigned int width, unsigned int height, unsigned int elementSize)
bool RendererCL<T>::ClearBuffer(const string& bufferName, unsigned int width, unsigned int height, unsigned int elementSize)
{
bool b = true;
int kernelIndex = m_Wrapper.FindKernelIndex(m_IterOpenCLKernelCreator.ZeroizeEntryPoint());
unsigned int argIndex = 0;
const char* loc = __FUNCTION__;
@ -1058,17 +1071,18 @@ bool RendererCL<T>::ClearBuffer(string bufferName, unsigned int width, unsigned
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex++, bufferName)) { m_ErrorReport.push_back(loc); return false; }//Buffer of unsigned char.
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, width * elementSize)) { m_ErrorReport.push_back(loc); return false; }//Width.
if (!m_Wrapper.SetArg (kernelIndex, argIndex++, height)) { m_ErrorReport.push_back(loc); return false; }//Height.
if (!m_Wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1)) { m_ErrorReport.push_back(loc); return false; }
return true;
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex++, bufferName))) { m_ErrorReport.push_back(loc); }//Buffer of unsigned char.
if (b && !(b = m_Wrapper.SetArg (kernelIndex, argIndex++, width * elementSize))) { m_ErrorReport.push_back(loc); }//Width.
if (b && !(b = m_Wrapper.SetArg (kernelIndex, argIndex++, height))) { m_ErrorReport.push_back(loc); }//Height.
if (b && !(b = m_Wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { m_ErrorReport.push_back(loc); }
}
else
{
b = false;
m_ErrorReport.push_back(loc);
}
return false;
return b;
}
/// <summary>
@ -1092,23 +1106,23 @@ bool RendererCL<T>::RunDensityFilterPrivate(unsigned int kernelIndex, unsigned i
unsigned int argIndex = 0;
const char* loc = __FUNCTION__;
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_HistBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Histogram.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_AccumBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Accumulator.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_DEFilterParamsBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//FlameDensityFilterCL.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefsBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Coefs.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_DEWidthsBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Widths.
if (!m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefIndicesBufferName)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Coef indices.
if (!m_Wrapper.SetArg( kernelIndex, argIndex, chunkSizeW)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Chunk size width (gapW + 1).
if (!m_Wrapper.SetArg( kernelIndex, argIndex, chunkSizeH)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Chunk size height (gapH + 1).
if (!m_Wrapper.SetArg( kernelIndex, argIndex, rowParity)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Row parity.
if (!m_Wrapper.SetArg( kernelIndex, argIndex, colParity)) { m_ErrorReport.push_back(loc); return false; } argIndex++;//Col parity.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_HistBufferName))) { m_ErrorReport.push_back(loc); } argIndex++;//Histogram.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_AccumBufferName))) { m_ErrorReport.push_back(loc); } argIndex++;//Accumulator.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_DEFilterParamsBufferName))) { m_ErrorReport.push_back(loc); } argIndex++;//FlameDensityFilterCL.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefsBufferName))) { m_ErrorReport.push_back(loc); } argIndex++;//Coefs.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_DEWidthsBufferName))) { m_ErrorReport.push_back(loc); } argIndex++;//Widths.
if (b && !(b = m_Wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefIndicesBufferName))) { m_ErrorReport.push_back(loc); } argIndex++;//Coef indices.
if (b && !(b = m_Wrapper.SetArg( kernelIndex, argIndex, chunkSizeW))) { m_ErrorReport.push_back(loc); } argIndex++;//Chunk size width (gapW + 1).
if (b && !(b = m_Wrapper.SetArg( kernelIndex, argIndex, chunkSizeH))) { m_ErrorReport.push_back(loc); } argIndex++;//Chunk size height (gapH + 1).
if (b && !(b = m_Wrapper.SetArg( kernelIndex, argIndex, rowParity))) { m_ErrorReport.push_back(loc); } argIndex++;//Row parity.
if (b && !(b = m_Wrapper.SetArg( kernelIndex, argIndex, colParity))) { m_ErrorReport.push_back(loc); } argIndex++;//Col parity.
//t.Toc(__FUNCTION__ " set args");
//t.Tic();
if (!m_Wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1)) { m_ErrorReport.push_back(loc); return false; }//Method 7, accumulating to temp box area.
if (b && !(b = m_Wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { m_ErrorReport.push_back(loc); }//Method 7, accumulating to temp box area.
//t.Toc(__FUNCTION__ " RunKernel()");
return true;
return b;
}
/// <summary>
@ -1118,7 +1132,7 @@ bool RendererCL<T>::RunDensityFilterPrivate(unsigned int kernelIndex, unsigned i
/// <param name="filterWidth">Width of the gaussian filter</param>
/// <returns>The kernel index if successful, else -1.</returns>
template <typename T>
int RendererCL<T>::MakeAndGetDensityFilterProgram(unsigned int ss, unsigned int filterWidth)
int RendererCL<T>::MakeAndGetDensityFilterProgram(size_t ss, unsigned int filterWidth)
{
string deEntryPoint = m_DEOpenCLKernelCreator.GaussianDEEntryPoint(ss, filterWidth);
int kernelIndex = m_Wrapper.FindKernelIndex(deEntryPoint);
@ -1210,21 +1224,21 @@ template <typename T>
DensityFilterCL<T> RendererCL<T>::ConvertDensityFilter()
{
DensityFilterCL<T> filterCL;
DensityFilter<T>* densityFilter = GetDensityFilter();
DensityFilter<T>* densityFilter = dynamic_cast<DensityFilter<T>*>(GetDensityFilter());
filterCL.m_Supersample = Supersample();
filterCL.m_SuperRasW = SuperRasW();
filterCL.m_SuperRasH = SuperRasH();
filterCL.m_Supersample = (unsigned int)Supersample();
filterCL.m_SuperRasW = (unsigned int)SuperRasW();
filterCL.m_SuperRasH = (unsigned int)SuperRasH();
filterCL.m_K1 = K1();
filterCL.m_K2 = K2();
if (densityFilter)
{
filterCL.m_Curve = densityFilter->Curve();
filterCL.m_KernelSize = densityFilter->KernelSize();
filterCL.m_MaxFilterIndex = densityFilter->MaxFilterIndex();
filterCL.m_MaxFilteredCounts = densityFilter->MaxFilteredCounts();
filterCL.m_FilterWidth = densityFilter->FilterWidth();
filterCL.m_KernelSize = (unsigned int)densityFilter->KernelSize();
filterCL.m_MaxFilterIndex = (unsigned int)densityFilter->MaxFilterIndex();
filterCL.m_MaxFilteredCounts = (unsigned int)densityFilter->MaxFilteredCounts();
filterCL.m_FilterWidth = (unsigned int)densityFilter->FilterWidth();
}
return filterCL;
@ -1244,15 +1258,15 @@ SpatialFilterCL<T> RendererCL<T>::ConvertSpatialFilter()
PrepFinalAccumVals(background, g, linRange, vibrancy);
filterCL.m_SuperRasW = SuperRasW();
filterCL.m_SuperRasH = SuperRasH();
filterCL.m_FinalRasW = FinalRasW();
filterCL.m_FinalRasH = FinalRasH();
filterCL.m_Supersample = Supersample();
filterCL.m_FilterWidth = GetSpatialFilter()->FinalFilterWidth();
filterCL.m_NumChannels = Renderer<T, T>::NumChannels();
filterCL.m_BytesPerChannel = BytesPerChannel();
filterCL.m_DensityFilterOffset = DensityFilterOffset();
filterCL.m_SuperRasW = (unsigned int)SuperRasW();
filterCL.m_SuperRasH = (unsigned int)SuperRasH();
filterCL.m_FinalRasW = (unsigned int)FinalRasW();
filterCL.m_FinalRasH = (unsigned int)FinalRasH();
filterCL.m_Supersample = (unsigned int)Supersample();
filterCL.m_FilterWidth = (unsigned int)GetSpatialFilter()->FinalFilterWidth();
filterCL.m_NumChannels = (unsigned int)Renderer<T, T>::NumChannels();
filterCL.m_BytesPerChannel = (unsigned int)BytesPerChannel();
filterCL.m_DensityFilterOffset = (unsigned int)DensityFilterOffset();
filterCL.m_Transparency = Transparency();
filterCL.m_YAxisUp = (unsigned int)m_YAxisUp;
filterCL.m_Vibrancy = vibrancy;
@ -1333,7 +1347,7 @@ CarToRasCL<T> RendererCL<T>::ConvertCarToRas(const CarToRas<T>& carToRas)
{
CarToRasCL<T> carToRasCL;
carToRasCL.m_RasWidth = carToRas.RasWidth();
carToRasCL.m_RasWidth = (unsigned int)carToRas.RasWidth();
carToRasCL.m_PixPerImageUnitW = carToRas.PixPerImageUnitW();
carToRasCL.m_RasLlX = carToRas.RasLlX();
carToRasCL.m_PixPerImageUnitH = carToRas.PixPerImageUnitH();

View File

@ -15,8 +15,8 @@ namespace EmberCLns
class EMBERCL_API RendererCLBase
{
public:
virtual bool ReadFinal(unsigned char* pixels) { return false; }
virtual bool ClearFinal() { return false; }
virtual bool ReadFinal(unsigned char* pixels) = 0;
virtual bool ClearFinal() = 0;
};
/// <summary>
@ -36,8 +36,9 @@ public:
RendererCL(unsigned int platform = 0, unsigned int device = 0, bool shared = false, GLuint outputTexID = 0);
~RendererCL();
//Ordinary member functions for OpenCL specific tasks.
//Non-virtual member functions for OpenCL specific tasks.
bool Init(unsigned int platform, unsigned int device, bool shared, GLuint outputTexID);
bool SetOutputTexture(GLuint outputTexID);
inline unsigned int IterCountPerKernel();
inline unsigned int IterBlocksWide();
inline unsigned int IterBlocksHigh();
@ -51,50 +52,51 @@ public:
bool ReadHist();
bool ReadAccum();
bool ReadPoints(vector<PointCL<T>>& vec);
virtual bool ReadFinal(unsigned char* pixels);
virtual bool ClearFinal();
bool ClearHist();
bool ClearAccum();
bool WritePoints(vector<PointCL<T>>& vec);
string IterKernel();
//Public virtual functions overriden from Renderer.
virtual unsigned __int64 MemoryAvailable();
virtual bool Ok() const;
virtual void NumChannels(unsigned int numChannels);
virtual void DumpErrorReport();
virtual void ClearErrorReport();
virtual unsigned int SubBatchSize() const;
virtual unsigned int ThreadCount() const;
virtual void ThreadCount(unsigned int threads, const char* seedString = NULL);
virtual bool CreateDEFilter(bool& newAlloc);
virtual bool CreateSpatialFilter(bool& newAlloc);
virtual eRendererType RendererType() const;
virtual string ErrorReportString();
virtual vector<string> ErrorReport();
//Virtual functions overridden from RendererCLBase.
virtual bool ReadFinal(unsigned char* pixels);
virtual bool ClearFinal();
//Public virtual functions overridden from Renderer or RendererBase.
virtual size_t MemoryAvailable() override;
virtual bool Ok() const override;
virtual void NumChannels(size_t numChannels) override;
virtual void DumpErrorReport() override;
virtual void ClearErrorReport() override;
virtual size_t SubBatchSize() const override;
virtual size_t ThreadCount() const override;
virtual bool CreateDEFilter(bool& newAlloc) override;
virtual bool CreateSpatialFilter(bool& newAlloc) override;
virtual eRendererType RendererType() const override;
virtual string ErrorReportString() override;
virtual vector<string> ErrorReport() override;
#ifndef TEST_CL
protected:
#endif
//Protected virtual functions overriden from Renderer.
virtual void MakeDmap(T colorScalar);
virtual bool Alloc();
virtual bool ResetBuckets(bool resetHist = true, bool resetAccum = true);
virtual eRenderStatus LogScaleDensityFilter();
virtual eRenderStatus GaussianDensityFilter();
virtual eRenderStatus AccumulatorToFinalImage(unsigned char* pixels, size_t finalOffset);
virtual EmberStats Iterate(unsigned __int64 iterCount, unsigned int pass, unsigned int temporalSample);
//Protected virtual functions overridden from Renderer.
virtual void MakeDmap(T colorScalar) override;
virtual bool Alloc() override;
virtual bool ResetBuckets(bool resetHist = true, bool resetAccum = true) override;
virtual eRenderStatus LogScaleDensityFilter() override;
virtual eRenderStatus GaussianDensityFilter() override;
virtual eRenderStatus AccumulatorToFinalImage(unsigned char* pixels, size_t finalOffset) override;
virtual EmberStats Iterate(size_t iterCount, size_t pass, size_t temporalSample) override;
private:
//Private functions for making and running OpenCL programs.
bool BuildIterProgramForEmber(bool doAccum = true);
bool RunIter(unsigned __int64 iterCount, unsigned int pass, unsigned int temporalSample, unsigned __int64& itersRan);
bool RunIter(size_t iterCount, size_t pass, size_t temporalSample, size_t& itersRan);
eRenderStatus RunLogScaleFilter();
eRenderStatus RunDensityFilter();
eRenderStatus RunFinalAccum();
bool ClearBuffer(string bufferName, unsigned int width, unsigned int height, unsigned int elementSize);
bool ClearBuffer(const string& bufferName, unsigned int width, unsigned int height, unsigned int elementSize);
bool RunDensityFilterPrivate(unsigned int kernelIndex, unsigned int gridW, unsigned int gridH, unsigned int blockW, unsigned int blockH, unsigned int chunkSizeW, unsigned int chunkSizeH, unsigned int rowParity, unsigned int colParity);
int MakeAndGetDensityFilterProgram(unsigned int ss, unsigned int filterWidth);
int MakeAndGetDensityFilterProgram(size_t ss, unsigned int filterWidth);
int MakeAndGetFinalAccumProgram(T& alphaBase, T& alphaScale);
int MakeAndGetGammaCorrectionProgram();
@ -113,8 +115,9 @@ private:
unsigned int m_MaxDEBlockSizeW;
unsigned int m_MaxDEBlockSizeH;
unsigned int m_WarpSize;
unsigned int m_Calls;
size_t m_Calls;
//Buffer names.
string m_EmberBufferName;
string m_ParVarsBufferName;
string m_DistBufferName;
@ -130,6 +133,7 @@ private:
string m_FinalImageName;
string m_PointsBufferName;
//Kernels.
string m_IterKernel;
OpenCLWrapper m_Wrapper;