mirror of
				https://bitbucket.org/mfeemster/fractorium.git
				synced 2025-10-31 17:30:24 -04:00 
			
		
		
		
	--Code changes
-Make summing histograms from one or more secondary devices to the primary a little more elegant by using HOST_PTR. This requires one less copy.
This commit is contained in:
		| @ -118,7 +118,7 @@ void OpenCLWrapper::ClearPrograms() | ||||
| /// Add a buffer with the specified size and name. | ||||
| /// Three possible actions to take: | ||||
| ///		Buffer didn't exist, so create and add. | ||||
| ///		Buffer existed, but was a different size. Replace. | ||||
| ///		Buffer existed, but was a different size, replace. | ||||
| ///		Buffer existed with the same size, do nothing. | ||||
| /// </summary> | ||||
| /// <param name="name">The name of the buffer</param> | ||||
| @ -145,7 +145,7 @@ bool OpenCLWrapper::AddBuffer(const string& name, size_t size, cl_mem_flags flag | ||||
| 		} | ||||
| 		else if (GetBufferSize(bufferIndex) != size)//If it did exist, only create and add if the sizes were different. | ||||
| 		{ | ||||
| 			m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, flags, 0, nullptr, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once. | ||||
| 			m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, flags, size_t(0), nullptr, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once. | ||||
| 			cl::Buffer buff(m_Context, flags, size, nullptr, &err);//Create the new buffer. | ||||
|  | ||||
| 			if (!m_Info->CheckCL(err, "cl::Buffer()")) | ||||
| @ -156,7 +156,59 @@ bool OpenCLWrapper::AddBuffer(const string& name, size_t size, cl_mem_flags flag | ||||
| 		} | ||||
|  | ||||
| 		//If the buffer existed and the sizes were the same, take no action. | ||||
| 		return true; | ||||
| 		return true;//Either operation succeeded. | ||||
| 	} | ||||
|  | ||||
| 	return false; | ||||
| } | ||||
|  | ||||
| /// <summary> | ||||
| /// Add a host side buffer with the specified name, size and host data pointer. | ||||
| /// Three possible actions to take: | ||||
| ///		Buffer didn't exist, so create and add. | ||||
| ///		Buffer existed, but was a different size or pointer, replace. | ||||
| ///		Buffer existed with the same size and pointer, do nothing. | ||||
| /// </summary> | ||||
| /// <param name="name">The name of the buffer</param> | ||||
| /// <param name="size">The size in bytes of the buffer</param> | ||||
| /// <param name="data">The pointer to the beginning of the host side data.</param> | ||||
| /// <returns>True if success, else false.</returns> | ||||
| bool OpenCLWrapper::AddHostBuffer(const string& name, size_t size, void* data) | ||||
| { | ||||
| 	cl_int err; | ||||
|  | ||||
| 	if (m_Init) | ||||
| 	{ | ||||
| 		int bufferIndex = FindBufferIndex(name); | ||||
|  | ||||
| 		if (bufferIndex == -1)//If the buffer didn't exist, create and add. | ||||
| 		{ | ||||
| 			cl::Buffer buff(m_Context, CL_MEM_USE_HOST_PTR, size, data, &err); | ||||
|  | ||||
| 			if (!m_Info->CheckCL(err, "cl::Buffer()")) | ||||
| 				return false; | ||||
|  | ||||
| 			NamedBuffer nb(buff, name); | ||||
| 			m_Buffers.push_back(nb); | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			if (GetBufferSize(bufferIndex) != size ||//If it did exist, only create and add if the sizes... | ||||
| 					data != m_Buffers[bufferIndex].m_Buffer.getInfo<CL_MEM_HOST_PTR>(nullptr))//...or addresses were different. | ||||
| 			{ | ||||
| 				m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, CL_MEM_USE_HOST_PTR, size_t(0), data, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once. | ||||
| 				cl::Buffer buff(m_Context, CL_MEM_USE_HOST_PTR, size, data, &err);//Create the new buffer. | ||||
|  | ||||
| 				if (!m_Info->CheckCL(err, "cl::Buffer()")) | ||||
| 					return false; | ||||
|  | ||||
| 				NamedBuffer nb(buff, name);//Make a named buffer out of the new buffer. | ||||
| 				m_Buffers[bufferIndex] = nb;//Finally, assign. | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		//If the buffer existed and the sizes and pointers were the same, take no action. | ||||
| 		return true;//Either operation succeeded. | ||||
| 	} | ||||
|  | ||||
| 	return false; | ||||
|  | ||||
| @ -106,6 +106,7 @@ public: | ||||
|  | ||||
| 	//Buffers. | ||||
| 	bool AddBuffer(const string& name, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE); | ||||
| 	bool AddHostBuffer(const string& name, size_t size, void* data); | ||||
| 	bool AddAndWriteBuffer(const string& name, void* data, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE); | ||||
| 	bool WriteBuffer(const string& name, void* data, size_t size); | ||||
| 	bool WriteBuffer(size_t bufferIndex, void* data, size_t size); | ||||
|  | ||||
| @ -56,6 +56,7 @@ void RendererCL<T, bucketT>::Init() | ||||
| 	m_DECoefIndicesBufferName = "DECoefIndices"; | ||||
| 	m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs"; | ||||
| 	m_CurvesCsaName = "CurvesCsa"; | ||||
| 	m_HostBufferName = "Host"; | ||||
| 	m_HistBufferName = "Hist"; | ||||
| 	m_AccumBufferName = "Accum"; | ||||
| 	m_FinalImageName = "Final"; | ||||
| @ -256,7 +257,7 @@ bool RendererCL<T, bucketT>::ReadHist(size_t device) | ||||
| { | ||||
| 	if (device < m_Devices.size()) | ||||
| 		if (Renderer<T, bucketT>::Alloc(true))//Allocate the histogram memory to read into, other buffers not needed. | ||||
| 			return m_Devices[device]->m_Wrapper.ReadBuffer(m_HistBufferName, reinterpret_cast<void*>(HistBuckets()), SuperSize() * sizeof(v4bT)); | ||||
| 			return m_Devices[device]->m_Wrapper.ReadBuffer(m_HistBufferName, reinterpret_cast<void*>(HistBuckets()), SuperSize() * sizeof(v4bT));//HistBuckets should have been created as a ClBuffer with HOST_PTR if more than one device is used. | ||||
|  | ||||
| 	return false; | ||||
| } | ||||
| @ -668,8 +669,7 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly) | ||||
| 	EnterResize(); | ||||
| 	m_XformsCL.resize(m_Ember.TotalXformCount()); | ||||
| 	bool b = true; | ||||
| 	size_t histLength = SuperSize() * sizeof(v4bT); | ||||
| 	size_t accumLength = SuperSize() * sizeof(v4bT); | ||||
| 	size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer. | ||||
| 	const char* loc = __FUNCTION__; | ||||
| 	auto& wrapper = m_Devices[0]->m_Wrapper; | ||||
|  | ||||
| @ -679,7 +679,7 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly) | ||||
|  | ||||
| 	if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa.m_Entries))))					 { AddToReport(loc); } | ||||
|  | ||||
| 	if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, accumLength)))								 { AddToReport(loc); }//Accum buffer. | ||||
| 	if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, size)))										 { AddToReport(loc); }//Accum buffer. | ||||
|  | ||||
| 	for (auto& device : m_Devices) | ||||
| 	{ | ||||
| @ -693,13 +693,16 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly) | ||||
|  | ||||
| 		if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL))))					 { AddToReport(loc); break; } | ||||
|  | ||||
| 		if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, histLength)))									 { AddToReport(loc); break; }//Histogram. Will memset to zero later. | ||||
| 		if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size)))										 { AddToReport(loc); break; }//Histogram. Will memset to zero later. | ||||
|  | ||||
| 		if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL<T>)))) { AddToReport(loc); break; }//Points between iter calls. | ||||
|  | ||||
| 		//Global shared is allocated once and written when building the kernel. | ||||
| 	} | ||||
|  | ||||
| 	if (m_Devices.size() > 1) | ||||
| 		b = CreateHostBuffer(); | ||||
|  | ||||
| 	LeaveResize(); | ||||
|  | ||||
| 	if (b && !(b = SetOutputTexture(m_OutputTexID))) { AddToReport(loc); } | ||||
| @ -1595,8 +1598,34 @@ int RendererCL<T, bucketT>::MakeAndGetGammaCorrectionProgram() | ||||
| 	return -1; | ||||
| } | ||||
|  | ||||
| /// <summary> | ||||
| /// Create the ClBuffer HOST_PTR wrapper around the CPU histogram buffer. | ||||
| /// This is only used with multiple devices, and therefore should only be called in such cases. | ||||
| /// </summary> | ||||
| /// <returns>True if success, felse false.</returns> | ||||
| template <typename T, typename bucketT> | ||||
| bool RendererCL<T, bucketT>::CreateHostBuffer() | ||||
| { | ||||
| 	bool b = true; | ||||
| 	size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer. | ||||
| 	const char* loc = __FUNCTION__; | ||||
|  | ||||
| 	if (b = Renderer<T, bucketT>::Alloc(true))//Allocate the histogram memory to point this HOST_PTR buffer to, other buffers not needed. | ||||
| 	{ | ||||
| 		if (b && !(b = m_Devices[0]->m_Wrapper.AddHostBuffer(m_HostBufferName, size, reinterpret_cast<void*>(HistBuckets())))) | ||||
| 			AddToReport(string(loc) + ": creating OpenCL HOST_PTR buffer to point to host side histogram failed.");//Host side histogram for temporary use with multiple devices. | ||||
| 	} | ||||
| 	else | ||||
| 		AddToReport(string(loc) + ": allocating host side histogram failed.");//Allocating histogram failed, something is seriously wrong. | ||||
|  | ||||
| 	return b; | ||||
| } | ||||
|  | ||||
| /// <summary> | ||||
| /// Sum all histograms from the secondary devices with the histogram on the primary device. | ||||
| /// This works by reading the histogram from those devices one at a time into the host side buffer, which | ||||
| /// is just an OpenCL pointer to the CPU histogram to use it as a temp space. | ||||
| /// Then pass that buffer to a kernel that sums it with the histogram on the primary device. | ||||
| /// </summary> | ||||
| /// <returns>True if success, else false.</returns> | ||||
| template <typename T, typename bucketT> | ||||
| @ -1617,30 +1646,25 @@ bool RendererCL<T, bucketT>::SumDeviceHist() | ||||
|  | ||||
| 		if ((b = (kernelIndex != -1))) | ||||
| 		{ | ||||
| 			for (size_t device = 1; device < m_Devices.size(); device++) | ||||
| 			for (size_t device = 1; device < m_Devices.size(); device++)//All secondary devices. | ||||
| 			{ | ||||
| 				//m_HostBufferName will have been created as a ClBuffer to wrap the CPU histogram buffer as a temp space. | ||||
| 				//So read into it, then pass to the kernel below to sum to the primary device's histogram. | ||||
| 				if ((b = (ReadHist(device) && ClearHist(device))))//Must clear hist on secondary devices after reading and summing because they'll be reused on a quality increase (KEEP_ITERATING). | ||||
| 				{ | ||||
| 					if ((b = wrapper.WriteBuffer(m_AccumBufferName, reinterpret_cast<void*>(HistBuckets()), SuperSize() * sizeof(v4bT)))) | ||||
| 					{ | ||||
| 						cl_uint argIndex = 0; | ||||
| 					cl_uint argIndex = 0; | ||||
|  | ||||
| 						if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName)))						 { break; }//Source buffer of v4bT. | ||||
| 					if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HostBufferName)))						 { break; }//Source buffer of v4bT. | ||||
|  | ||||
| 						if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName)))						 { break; }//Dest buffer of v4bT. | ||||
| 					if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName)))						 { break; }//Dest buffer of v4bT. | ||||
|  | ||||
| 						if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, uint(SuperRasW()))))						 { break; }//Width in pixels. | ||||
| 					if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, uint(SuperRasW()))))						 { break; }//Width in pixels. | ||||
|  | ||||
| 						if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, uint(SuperRasH()))))						 { break; }//Height in pixels. | ||||
| 					if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, uint(SuperRasH()))))						 { break; }//Height in pixels. | ||||
|  | ||||
| 						if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { break; }//Clear the source buffer on the last device. | ||||
| 					if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { break; }//Clear the source buffer on the last device. | ||||
|  | ||||
| 						if (b && !(b = wrapper.RunKernel   (kernelIndex, gridW, gridH, 1, blockW, blockH, 1)))					 { break; } | ||||
| 					} | ||||
| 					else | ||||
| 					{ | ||||
| 						break; | ||||
| 					} | ||||
| 					if (b && !(b = wrapper.RunKernel   (kernelIndex, gridW, gridH, 1, blockW, blockH, 1)))					 { break; } | ||||
| 				} | ||||
| 				else | ||||
| 				{ | ||||
|  | ||||
| @ -182,6 +182,7 @@ private: | ||||
| 	int MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth); | ||||
| 	int MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale); | ||||
| 	int MakeAndGetGammaCorrectionProgram(); | ||||
| 	bool CreateHostBuffer(); | ||||
| 	bool SumDeviceHist(); | ||||
| 	void FillSeeds(); | ||||
|  | ||||
| @ -214,6 +215,7 @@ private: | ||||
| 	string m_DEWidthsBufferName; | ||||
| 	string m_DECoefIndicesBufferName; | ||||
| 	string m_SpatialFilterCoefsBufferName; | ||||
| 	string m_HostBufferName; | ||||
| 	string m_HistBufferName; | ||||
| 	string m_AccumBufferName; | ||||
| 	string m_FinalImageName; | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 mfeemster
					mfeemster