mirror of
https://bitbucket.org/mfeemster/fractorium.git
synced 2025-01-21 21:20:07 -05:00
--Code changes
-Make summing histograms from one or more secondary devices to the primary a little more elegant by using HOST_PTR. This requires one less copy.
This commit is contained in:
parent
7e1d41dfc7
commit
124f807772
@ -118,7 +118,7 @@ void OpenCLWrapper::ClearPrograms()
|
|||||||
/// Add a buffer with the specified size and name.
|
/// Add a buffer with the specified size and name.
|
||||||
/// Three possible actions to take:
|
/// Three possible actions to take:
|
||||||
/// Buffer didn't exist, so create and add.
|
/// Buffer didn't exist, so create and add.
|
||||||
/// Buffer existed, but was a different size. Replace.
|
/// Buffer existed, but was a different size, replace.
|
||||||
/// Buffer existed with the same size, do nothing.
|
/// Buffer existed with the same size, do nothing.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="name">The name of the buffer</param>
|
/// <param name="name">The name of the buffer</param>
|
||||||
@ -145,7 +145,7 @@ bool OpenCLWrapper::AddBuffer(const string& name, size_t size, cl_mem_flags flag
|
|||||||
}
|
}
|
||||||
else if (GetBufferSize(bufferIndex) != size)//If it did exist, only create and add if the sizes were different.
|
else if (GetBufferSize(bufferIndex) != size)//If it did exist, only create and add if the sizes were different.
|
||||||
{
|
{
|
||||||
m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, flags, 0, nullptr, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once.
|
m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, flags, size_t(0), nullptr, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once.
|
||||||
cl::Buffer buff(m_Context, flags, size, nullptr, &err);//Create the new buffer.
|
cl::Buffer buff(m_Context, flags, size, nullptr, &err);//Create the new buffer.
|
||||||
|
|
||||||
if (!m_Info->CheckCL(err, "cl::Buffer()"))
|
if (!m_Info->CheckCL(err, "cl::Buffer()"))
|
||||||
@ -156,7 +156,59 @@ bool OpenCLWrapper::AddBuffer(const string& name, size_t size, cl_mem_flags flag
|
|||||||
}
|
}
|
||||||
|
|
||||||
//If the buffer existed and the sizes were the same, take no action.
|
//If the buffer existed and the sizes were the same, take no action.
|
||||||
return true;
|
return true;//Either operation succeeded.
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Add a host side buffer with the specified name, size and host data pointer.
|
||||||
|
/// Three possible actions to take:
|
||||||
|
/// Buffer didn't exist, so create and add.
|
||||||
|
/// Buffer existed, but was a different size or pointer, replace.
|
||||||
|
/// Buffer existed with the same size and pointer, do nothing.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="name">The name of the buffer</param>
|
||||||
|
/// <param name="size">The size in bytes of the buffer</param>
|
||||||
|
/// <param name="data">The pointer to the beginning of the host side data.</param>
|
||||||
|
/// <returns>True if success, else false.</returns>
|
||||||
|
bool OpenCLWrapper::AddHostBuffer(const string& name, size_t size, void* data)
|
||||||
|
{
|
||||||
|
cl_int err;
|
||||||
|
|
||||||
|
if (m_Init)
|
||||||
|
{
|
||||||
|
int bufferIndex = FindBufferIndex(name);
|
||||||
|
|
||||||
|
if (bufferIndex == -1)//If the buffer didn't exist, create and add.
|
||||||
|
{
|
||||||
|
cl::Buffer buff(m_Context, CL_MEM_USE_HOST_PTR, size, data, &err);
|
||||||
|
|
||||||
|
if (!m_Info->CheckCL(err, "cl::Buffer()"))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
NamedBuffer nb(buff, name);
|
||||||
|
m_Buffers.push_back(nb);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (GetBufferSize(bufferIndex) != size ||//If it did exist, only create and add if the sizes...
|
||||||
|
data != m_Buffers[bufferIndex].m_Buffer.getInfo<CL_MEM_HOST_PTR>(nullptr))//...or addresses were different.
|
||||||
|
{
|
||||||
|
m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, CL_MEM_USE_HOST_PTR, size_t(0), data, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once.
|
||||||
|
cl::Buffer buff(m_Context, CL_MEM_USE_HOST_PTR, size, data, &err);//Create the new buffer.
|
||||||
|
|
||||||
|
if (!m_Info->CheckCL(err, "cl::Buffer()"))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
NamedBuffer nb(buff, name);//Make a named buffer out of the new buffer.
|
||||||
|
m_Buffers[bufferIndex] = nb;//Finally, assign.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//If the buffer existed and the sizes and pointers were the same, take no action.
|
||||||
|
return true;//Either operation succeeded.
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -106,6 +106,7 @@ public:
|
|||||||
|
|
||||||
//Buffers.
|
//Buffers.
|
||||||
bool AddBuffer(const string& name, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
|
bool AddBuffer(const string& name, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
|
||||||
|
bool AddHostBuffer(const string& name, size_t size, void* data);
|
||||||
bool AddAndWriteBuffer(const string& name, void* data, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
|
bool AddAndWriteBuffer(const string& name, void* data, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
|
||||||
bool WriteBuffer(const string& name, void* data, size_t size);
|
bool WriteBuffer(const string& name, void* data, size_t size);
|
||||||
bool WriteBuffer(size_t bufferIndex, void* data, size_t size);
|
bool WriteBuffer(size_t bufferIndex, void* data, size_t size);
|
||||||
|
@ -56,6 +56,7 @@ void RendererCL<T, bucketT>::Init()
|
|||||||
m_DECoefIndicesBufferName = "DECoefIndices";
|
m_DECoefIndicesBufferName = "DECoefIndices";
|
||||||
m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs";
|
m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs";
|
||||||
m_CurvesCsaName = "CurvesCsa";
|
m_CurvesCsaName = "CurvesCsa";
|
||||||
|
m_HostBufferName = "Host";
|
||||||
m_HistBufferName = "Hist";
|
m_HistBufferName = "Hist";
|
||||||
m_AccumBufferName = "Accum";
|
m_AccumBufferName = "Accum";
|
||||||
m_FinalImageName = "Final";
|
m_FinalImageName = "Final";
|
||||||
@ -256,7 +257,7 @@ bool RendererCL<T, bucketT>::ReadHist(size_t device)
|
|||||||
{
|
{
|
||||||
if (device < m_Devices.size())
|
if (device < m_Devices.size())
|
||||||
if (Renderer<T, bucketT>::Alloc(true))//Allocate the histogram memory to read into, other buffers not needed.
|
if (Renderer<T, bucketT>::Alloc(true))//Allocate the histogram memory to read into, other buffers not needed.
|
||||||
return m_Devices[device]->m_Wrapper.ReadBuffer(m_HistBufferName, reinterpret_cast<void*>(HistBuckets()), SuperSize() * sizeof(v4bT));
|
return m_Devices[device]->m_Wrapper.ReadBuffer(m_HistBufferName, reinterpret_cast<void*>(HistBuckets()), SuperSize() * sizeof(v4bT));//HistBuckets should have been created as a ClBuffer with HOST_PTR if more than one device is used.
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -668,8 +669,7 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly)
|
|||||||
EnterResize();
|
EnterResize();
|
||||||
m_XformsCL.resize(m_Ember.TotalXformCount());
|
m_XformsCL.resize(m_Ember.TotalXformCount());
|
||||||
bool b = true;
|
bool b = true;
|
||||||
size_t histLength = SuperSize() * sizeof(v4bT);
|
size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer.
|
||||||
size_t accumLength = SuperSize() * sizeof(v4bT);
|
|
||||||
const char* loc = __FUNCTION__;
|
const char* loc = __FUNCTION__;
|
||||||
auto& wrapper = m_Devices[0]->m_Wrapper;
|
auto& wrapper = m_Devices[0]->m_Wrapper;
|
||||||
|
|
||||||
@ -679,7 +679,7 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly)
|
|||||||
|
|
||||||
if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa.m_Entries)))) { AddToReport(loc); }
|
if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa.m_Entries)))) { AddToReport(loc); }
|
||||||
|
|
||||||
if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, accumLength))) { AddToReport(loc); }//Accum buffer.
|
if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, size))) { AddToReport(loc); }//Accum buffer.
|
||||||
|
|
||||||
for (auto& device : m_Devices)
|
for (auto& device : m_Devices)
|
||||||
{
|
{
|
||||||
@ -693,13 +693,16 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly)
|
|||||||
|
|
||||||
if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { AddToReport(loc); break; }
|
if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { AddToReport(loc); break; }
|
||||||
|
|
||||||
if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, histLength))) { AddToReport(loc); break; }//Histogram. Will memset to zero later.
|
if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size))) { AddToReport(loc); break; }//Histogram. Will memset to zero later.
|
||||||
|
|
||||||
if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL<T>)))) { AddToReport(loc); break; }//Points between iter calls.
|
if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL<T>)))) { AddToReport(loc); break; }//Points between iter calls.
|
||||||
|
|
||||||
//Global shared is allocated once and written when building the kernel.
|
//Global shared is allocated once and written when building the kernel.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_Devices.size() > 1)
|
||||||
|
b = CreateHostBuffer();
|
||||||
|
|
||||||
LeaveResize();
|
LeaveResize();
|
||||||
|
|
||||||
if (b && !(b = SetOutputTexture(m_OutputTexID))) { AddToReport(loc); }
|
if (b && !(b = SetOutputTexture(m_OutputTexID))) { AddToReport(loc); }
|
||||||
@ -1595,8 +1598,34 @@ int RendererCL<T, bucketT>::MakeAndGetGammaCorrectionProgram()
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Create the ClBuffer HOST_PTR wrapper around the CPU histogram buffer.
|
||||||
|
/// This is only used with multiple devices, and therefore should only be called in such cases.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>True if success, felse false.</returns>
|
||||||
|
template <typename T, typename bucketT>
|
||||||
|
bool RendererCL<T, bucketT>::CreateHostBuffer()
|
||||||
|
{
|
||||||
|
bool b = true;
|
||||||
|
size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer.
|
||||||
|
const char* loc = __FUNCTION__;
|
||||||
|
|
||||||
|
if (b = Renderer<T, bucketT>::Alloc(true))//Allocate the histogram memory to point this HOST_PTR buffer to, other buffers not needed.
|
||||||
|
{
|
||||||
|
if (b && !(b = m_Devices[0]->m_Wrapper.AddHostBuffer(m_HostBufferName, size, reinterpret_cast<void*>(HistBuckets()))))
|
||||||
|
AddToReport(string(loc) + ": creating OpenCL HOST_PTR buffer to point to host side histogram failed.");//Host side histogram for temporary use with multiple devices.
|
||||||
|
}
|
||||||
|
else
|
||||||
|
AddToReport(string(loc) + ": allocating host side histogram failed.");//Allocating histogram failed, something is seriously wrong.
|
||||||
|
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Sum all histograms from the secondary devices with the histogram on the primary device.
|
/// Sum all histograms from the secondary devices with the histogram on the primary device.
|
||||||
|
/// This works by reading the histogram from those devices one at a time into the host side buffer, which
|
||||||
|
/// is just an OpenCL pointer to the CPU histogram to use it as a temp space.
|
||||||
|
/// Then pass that buffer to a kernel that sums it with the histogram on the primary device.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <returns>True if success, else false.</returns>
|
/// <returns>True if success, else false.</returns>
|
||||||
template <typename T, typename bucketT>
|
template <typename T, typename bucketT>
|
||||||
@ -1617,30 +1646,25 @@ bool RendererCL<T, bucketT>::SumDeviceHist()
|
|||||||
|
|
||||||
if ((b = (kernelIndex != -1)))
|
if ((b = (kernelIndex != -1)))
|
||||||
{
|
{
|
||||||
for (size_t device = 1; device < m_Devices.size(); device++)
|
for (size_t device = 1; device < m_Devices.size(); device++)//All secondary devices.
|
||||||
{
|
{
|
||||||
|
//m_HostBufferName will have been created as a ClBuffer to wrap the CPU histogram buffer as a temp space.
|
||||||
|
//So read into it, then pass to the kernel below to sum to the primary device's histogram.
|
||||||
if ((b = (ReadHist(device) && ClearHist(device))))//Must clear hist on secondary devices after reading and summing because they'll be reused on a quality increase (KEEP_ITERATING).
|
if ((b = (ReadHist(device) && ClearHist(device))))//Must clear hist on secondary devices after reading and summing because they'll be reused on a quality increase (KEEP_ITERATING).
|
||||||
{
|
{
|
||||||
if ((b = wrapper.WriteBuffer(m_AccumBufferName, reinterpret_cast<void*>(HistBuckets()), SuperSize() * sizeof(v4bT))))
|
cl_uint argIndex = 0;
|
||||||
{
|
|
||||||
cl_uint argIndex = 0;
|
|
||||||
|
|
||||||
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName))) { break; }//Source buffer of v4bT.
|
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HostBufferName))) { break; }//Source buffer of v4bT.
|
||||||
|
|
||||||
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { break; }//Dest buffer of v4bT.
|
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { break; }//Dest buffer of v4bT.
|
||||||
|
|
||||||
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasW())))) { break; }//Width in pixels.
|
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasW())))) { break; }//Width in pixels.
|
||||||
|
|
||||||
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasH())))) { break; }//Height in pixels.
|
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasH())))) { break; }//Height in pixels.
|
||||||
|
|
||||||
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { break; }//Clear the source buffer on the last device.
|
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { break; }//Clear the source buffer on the last device.
|
||||||
|
|
||||||
if (b && !(b = wrapper.RunKernel (kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { break; }
|
if (b && !(b = wrapper.RunKernel (kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { break; }
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -182,6 +182,7 @@ private:
|
|||||||
int MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth);
|
int MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth);
|
||||||
int MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale);
|
int MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale);
|
||||||
int MakeAndGetGammaCorrectionProgram();
|
int MakeAndGetGammaCorrectionProgram();
|
||||||
|
bool CreateHostBuffer();
|
||||||
bool SumDeviceHist();
|
bool SumDeviceHist();
|
||||||
void FillSeeds();
|
void FillSeeds();
|
||||||
|
|
||||||
@ -214,6 +215,7 @@ private:
|
|||||||
string m_DEWidthsBufferName;
|
string m_DEWidthsBufferName;
|
||||||
string m_DECoefIndicesBufferName;
|
string m_DECoefIndicesBufferName;
|
||||||
string m_SpatialFilterCoefsBufferName;
|
string m_SpatialFilterCoefsBufferName;
|
||||||
|
string m_HostBufferName;
|
||||||
string m_HistBufferName;
|
string m_HistBufferName;
|
||||||
string m_AccumBufferName;
|
string m_AccumBufferName;
|
||||||
string m_FinalImageName;
|
string m_FinalImageName;
|
||||||
|
Loading…
Reference in New Issue
Block a user