diff --git a/Source/EmberCL/OpenCLWrapper.cpp b/Source/EmberCL/OpenCLWrapper.cpp index c53cf12..fffae8e 100644 --- a/Source/EmberCL/OpenCLWrapper.cpp +++ b/Source/EmberCL/OpenCLWrapper.cpp @@ -118,7 +118,7 @@ void OpenCLWrapper::ClearPrograms() /// Add a buffer with the specified size and name. /// Three possible actions to take: /// Buffer didn't exist, so create and add. -/// Buffer existed, but was a different size. Replace. +/// Buffer existed, but was a different size, replace. /// Buffer existed with the same size, do nothing. /// /// The name of the buffer @@ -145,7 +145,7 @@ bool OpenCLWrapper::AddBuffer(const string& name, size_t size, cl_mem_flags flag } else if (GetBufferSize(bufferIndex) != size)//If it did exist, only create and add if the sizes were different. { - m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, flags, 0, nullptr, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once. + m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, flags, size_t(0), nullptr, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once. cl::Buffer buff(m_Context, flags, size, nullptr, &err);//Create the new buffer. if (!m_Info->CheckCL(err, "cl::Buffer()")) @@ -156,7 +156,59 @@ bool OpenCLWrapper::AddBuffer(const string& name, size_t size, cl_mem_flags flag } //If the buffer existed and the sizes were the same, take no action. - return true; + return true;//Either operation succeeded. + } + + return false; +} + +/// +/// Add a host side buffer with the specified name, size and host data pointer. +/// Three possible actions to take: +/// Buffer didn't exist, so create and add. +/// Buffer existed, but was a different size or pointer, replace. +/// Buffer existed with the same size and pointer, do nothing. +/// +/// The name of the buffer +/// The size in bytes of the buffer +/// The pointer to the beginning of the host side data. +/// True if success, else false. +bool OpenCLWrapper::AddHostBuffer(const string& name, size_t size, void* data) +{ + cl_int err; + + if (m_Init) + { + int bufferIndex = FindBufferIndex(name); + + if (bufferIndex == -1)//If the buffer didn't exist, create and add. + { + cl::Buffer buff(m_Context, CL_MEM_USE_HOST_PTR, size, data, &err); + + if (!m_Info->CheckCL(err, "cl::Buffer()")) + return false; + + NamedBuffer nb(buff, name); + m_Buffers.push_back(nb); + } + else + { + if (GetBufferSize(bufferIndex) != size ||//If it did exist, only create and add if the sizes... + data != m_Buffers[bufferIndex].m_Buffer.getInfo(nullptr))//...or addresses were different. + { + m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, CL_MEM_USE_HOST_PTR, size_t(0), data, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once. + cl::Buffer buff(m_Context, CL_MEM_USE_HOST_PTR, size, data, &err);//Create the new buffer. + + if (!m_Info->CheckCL(err, "cl::Buffer()")) + return false; + + NamedBuffer nb(buff, name);//Make a named buffer out of the new buffer. + m_Buffers[bufferIndex] = nb;//Finally, assign. + } + } + + //If the buffer existed and the sizes and pointers were the same, take no action. + return true;//Either operation succeeded. } return false; diff --git a/Source/EmberCL/OpenCLWrapper.h b/Source/EmberCL/OpenCLWrapper.h index e7ead10..9d1888a 100644 --- a/Source/EmberCL/OpenCLWrapper.h +++ b/Source/EmberCL/OpenCLWrapper.h @@ -106,6 +106,7 @@ public: //Buffers. bool AddBuffer(const string& name, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE); + bool AddHostBuffer(const string& name, size_t size, void* data); bool AddAndWriteBuffer(const string& name, void* data, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE); bool WriteBuffer(const string& name, void* data, size_t size); bool WriteBuffer(size_t bufferIndex, void* data, size_t size); diff --git a/Source/EmberCL/RendererCL.cpp b/Source/EmberCL/RendererCL.cpp index 8fc059b..22fd9f9 100644 --- a/Source/EmberCL/RendererCL.cpp +++ b/Source/EmberCL/RendererCL.cpp @@ -56,6 +56,7 @@ void RendererCL::Init() m_DECoefIndicesBufferName = "DECoefIndices"; m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs"; m_CurvesCsaName = "CurvesCsa"; + m_HostBufferName = "Host"; m_HistBufferName = "Hist"; m_AccumBufferName = "Accum"; m_FinalImageName = "Final"; @@ -256,7 +257,7 @@ bool RendererCL::ReadHist(size_t device) { if (device < m_Devices.size()) if (Renderer::Alloc(true))//Allocate the histogram memory to read into, other buffers not needed. - return m_Devices[device]->m_Wrapper.ReadBuffer(m_HistBufferName, reinterpret_cast(HistBuckets()), SuperSize() * sizeof(v4bT)); + return m_Devices[device]->m_Wrapper.ReadBuffer(m_HistBufferName, reinterpret_cast(HistBuckets()), SuperSize() * sizeof(v4bT));//HistBuckets should have been created as a ClBuffer with HOST_PTR if more than one device is used. return false; } @@ -668,8 +669,7 @@ bool RendererCL::Alloc(bool histOnly) EnterResize(); m_XformsCL.resize(m_Ember.TotalXformCount()); bool b = true; - size_t histLength = SuperSize() * sizeof(v4bT); - size_t accumLength = SuperSize() * sizeof(v4bT); + size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer. const char* loc = __FUNCTION__; auto& wrapper = m_Devices[0]->m_Wrapper; @@ -679,7 +679,7 @@ bool RendererCL::Alloc(bool histOnly) if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa.m_Entries)))) { AddToReport(loc); } - if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, accumLength))) { AddToReport(loc); }//Accum buffer. + if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, size))) { AddToReport(loc); }//Accum buffer. for (auto& device : m_Devices) { @@ -693,13 +693,16 @@ bool RendererCL::Alloc(bool histOnly) if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { AddToReport(loc); break; } - if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, histLength))) { AddToReport(loc); break; }//Histogram. Will memset to zero later. + if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size))) { AddToReport(loc); break; }//Histogram. Will memset to zero later. if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL)))) { AddToReport(loc); break; }//Points between iter calls. //Global shared is allocated once and written when building the kernel. } + if (m_Devices.size() > 1) + b = CreateHostBuffer(); + LeaveResize(); if (b && !(b = SetOutputTexture(m_OutputTexID))) { AddToReport(loc); } @@ -1595,8 +1598,34 @@ int RendererCL::MakeAndGetGammaCorrectionProgram() return -1; } +/// +/// Create the ClBuffer HOST_PTR wrapper around the CPU histogram buffer. +/// This is only used with multiple devices, and therefore should only be called in such cases. +/// +/// True if success, felse false. +template +bool RendererCL::CreateHostBuffer() +{ + bool b = true; + size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer. + const char* loc = __FUNCTION__; + + if (b = Renderer::Alloc(true))//Allocate the histogram memory to point this HOST_PTR buffer to, other buffers not needed. + { + if (b && !(b = m_Devices[0]->m_Wrapper.AddHostBuffer(m_HostBufferName, size, reinterpret_cast(HistBuckets())))) + AddToReport(string(loc) + ": creating OpenCL HOST_PTR buffer to point to host side histogram failed.");//Host side histogram for temporary use with multiple devices. + } + else + AddToReport(string(loc) + ": allocating host side histogram failed.");//Allocating histogram failed, something is seriously wrong. + + return b; +} + /// /// Sum all histograms from the secondary devices with the histogram on the primary device. +/// This works by reading the histogram from those devices one at a time into the host side buffer, which +/// is just an OpenCL pointer to the CPU histogram to use it as a temp space. +/// Then pass that buffer to a kernel that sums it with the histogram on the primary device. /// /// True if success, else false. template @@ -1617,30 +1646,25 @@ bool RendererCL::SumDeviceHist() if ((b = (kernelIndex != -1))) { - for (size_t device = 1; device < m_Devices.size(); device++) + for (size_t device = 1; device < m_Devices.size(); device++)//All secondary devices. { + //m_HostBufferName will have been created as a ClBuffer to wrap the CPU histogram buffer as a temp space. + //So read into it, then pass to the kernel below to sum to the primary device's histogram. if ((b = (ReadHist(device) && ClearHist(device))))//Must clear hist on secondary devices after reading and summing because they'll be reused on a quality increase (KEEP_ITERATING). { - if ((b = wrapper.WriteBuffer(m_AccumBufferName, reinterpret_cast(HistBuckets()), SuperSize() * sizeof(v4bT)))) - { - cl_uint argIndex = 0; + cl_uint argIndex = 0; - if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName))) { break; }//Source buffer of v4bT. + if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HostBufferName))) { break; }//Source buffer of v4bT. - if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { break; }//Dest buffer of v4bT. + if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { break; }//Dest buffer of v4bT. - if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasW())))) { break; }//Width in pixels. + if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasW())))) { break; }//Width in pixels. - if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasH())))) { break; }//Height in pixels. + if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasH())))) { break; }//Height in pixels. - if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { break; }//Clear the source buffer on the last device. + if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { break; }//Clear the source buffer on the last device. - if (b && !(b = wrapper.RunKernel (kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { break; } - } - else - { - break; - } + if (b && !(b = wrapper.RunKernel (kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { break; } } else { diff --git a/Source/EmberCL/RendererCL.h b/Source/EmberCL/RendererCL.h index 4f5093b..d6a95eb 100644 --- a/Source/EmberCL/RendererCL.h +++ b/Source/EmberCL/RendererCL.h @@ -182,6 +182,7 @@ private: int MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth); int MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale); int MakeAndGetGammaCorrectionProgram(); + bool CreateHostBuffer(); bool SumDeviceHist(); void FillSeeds(); @@ -214,6 +215,7 @@ private: string m_DEWidthsBufferName; string m_DECoefIndicesBufferName; string m_SpatialFilterCoefsBufferName; + string m_HostBufferName; string m_HistBufferName; string m_AccumBufferName; string m_FinalImageName;