diff --git a/Source/EmberCL/OpenCLWrapper.cpp b/Source/EmberCL/OpenCLWrapper.cpp
index c53cf12..fffae8e 100644
--- a/Source/EmberCL/OpenCLWrapper.cpp
+++ b/Source/EmberCL/OpenCLWrapper.cpp
@@ -118,7 +118,7 @@ void OpenCLWrapper::ClearPrograms()
 /// Add a buffer with the specified size and name.
 /// Three possible actions to take:
 ///		Buffer didn't exist, so create and add.
-///		Buffer existed, but was a different size. Replace.
+///		Buffer existed, but was a different size, replace.
 ///		Buffer existed with the same size, do nothing.
 /// 
 /// The name of the buffer
@@ -145,7 +145,7 @@ bool OpenCLWrapper::AddBuffer(const string& name, size_t size, cl_mem_flags flag
 		}
 		else if (GetBufferSize(bufferIndex) != size)//If it did exist, only create and add if the sizes were different.
 		{
-			m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, flags, 0, nullptr, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once.
+			m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, flags, size_t(0), nullptr, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once.
 			cl::Buffer buff(m_Context, flags, size, nullptr, &err);//Create the new buffer.
 
 			if (!m_Info->CheckCL(err, "cl::Buffer()"))
@@ -156,7 +156,59 @@ bool OpenCLWrapper::AddBuffer(const string& name, size_t size, cl_mem_flags flag
 		}
 
 		//If the buffer existed and the sizes were the same, take no action.
-		return true;
+		return true;//Either operation succeeded.
+	}
+
+	return false;
+}
+
+/// 
+/// Add a host side buffer with the specified name, size and host data pointer.
+/// Three possible actions to take:
+///		Buffer didn't exist, so create and add.
+///		Buffer existed, but was a different size or pointer, replace.
+///		Buffer existed with the same size and pointer, do nothing.
+/// 
+/// The name of the buffer
+/// The size in bytes of the buffer
+/// The pointer to the beginning of the host side data.
+/// True if success, else false.
+bool OpenCLWrapper::AddHostBuffer(const string& name, size_t size, void* data)
+{
+	cl_int err;
+
+	if (m_Init)
+	{
+		int bufferIndex = FindBufferIndex(name);
+
+		if (bufferIndex == -1)//If the buffer didn't exist, create and add.
+		{
+			cl::Buffer buff(m_Context, CL_MEM_USE_HOST_PTR, size, data, &err);
+
+			if (!m_Info->CheckCL(err, "cl::Buffer()"))
+				return false;
+
+			NamedBuffer nb(buff, name);
+			m_Buffers.push_back(nb);
+		}
+		else
+		{
+			if (GetBufferSize(bufferIndex) != size ||//If it did exist, only create and add if the sizes...
+					data != m_Buffers[bufferIndex].m_Buffer.getInfo(nullptr))//...or addresses were different.
+			{
+				m_Buffers[bufferIndex] = NamedBuffer(cl::Buffer(m_Context, CL_MEM_USE_HOST_PTR, size_t(0), data, &err), "emptybuffer");//First clear out the original so the two don't exist in memory at once.
+				cl::Buffer buff(m_Context, CL_MEM_USE_HOST_PTR, size, data, &err);//Create the new buffer.
+
+				if (!m_Info->CheckCL(err, "cl::Buffer()"))
+					return false;
+
+				NamedBuffer nb(buff, name);//Make a named buffer out of the new buffer.
+				m_Buffers[bufferIndex] = nb;//Finally, assign.
+			}
+		}
+
+		//If the buffer existed and the sizes and pointers were the same, take no action.
+		return true;//Either operation succeeded.
 	}
 
 	return false;
diff --git a/Source/EmberCL/OpenCLWrapper.h b/Source/EmberCL/OpenCLWrapper.h
index e7ead10..9d1888a 100644
--- a/Source/EmberCL/OpenCLWrapper.h
+++ b/Source/EmberCL/OpenCLWrapper.h
@@ -106,6 +106,7 @@ public:
 
 	//Buffers.
 	bool AddBuffer(const string& name, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
+	bool AddHostBuffer(const string& name, size_t size, void* data);
 	bool AddAndWriteBuffer(const string& name, void* data, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
 	bool WriteBuffer(const string& name, void* data, size_t size);
 	bool WriteBuffer(size_t bufferIndex, void* data, size_t size);
diff --git a/Source/EmberCL/RendererCL.cpp b/Source/EmberCL/RendererCL.cpp
index 8fc059b..22fd9f9 100644
--- a/Source/EmberCL/RendererCL.cpp
+++ b/Source/EmberCL/RendererCL.cpp
@@ -56,6 +56,7 @@ void RendererCL::Init()
 	m_DECoefIndicesBufferName = "DECoefIndices";
 	m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs";
 	m_CurvesCsaName = "CurvesCsa";
+	m_HostBufferName = "Host";
 	m_HistBufferName = "Hist";
 	m_AccumBufferName = "Accum";
 	m_FinalImageName = "Final";
@@ -256,7 +257,7 @@ bool RendererCL::ReadHist(size_t device)
 {
 	if (device < m_Devices.size())
 		if (Renderer::Alloc(true))//Allocate the histogram memory to read into, other buffers not needed.
-			return m_Devices[device]->m_Wrapper.ReadBuffer(m_HistBufferName, reinterpret_cast(HistBuckets()), SuperSize() * sizeof(v4bT));
+			return m_Devices[device]->m_Wrapper.ReadBuffer(m_HistBufferName, reinterpret_cast(HistBuckets()), SuperSize() * sizeof(v4bT));//HistBuckets should have been created as a ClBuffer with HOST_PTR if more than one device is used.
 
 	return false;
 }
@@ -668,8 +669,7 @@ bool RendererCL::Alloc(bool histOnly)
 	EnterResize();
 	m_XformsCL.resize(m_Ember.TotalXformCount());
 	bool b = true;
-	size_t histLength = SuperSize() * sizeof(v4bT);
-	size_t accumLength = SuperSize() * sizeof(v4bT);
+	size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer.
 	const char* loc = __FUNCTION__;
 	auto& wrapper = m_Devices[0]->m_Wrapper;
 
@@ -679,7 +679,7 @@ bool RendererCL::Alloc(bool histOnly)
 
 	if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa.m_Entries))))					 { AddToReport(loc); }
 
-	if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, accumLength)))								 { AddToReport(loc); }//Accum buffer.
+	if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, size)))										 { AddToReport(loc); }//Accum buffer.
 
 	for (auto& device : m_Devices)
 	{
@@ -693,13 +693,16 @@ bool RendererCL::Alloc(bool histOnly)
 
 		if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL))))					 { AddToReport(loc); break; }
 
-		if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, histLength)))									 { AddToReport(loc); break; }//Histogram. Will memset to zero later.
+		if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size)))										 { AddToReport(loc); break; }//Histogram. Will memset to zero later.
 
 		if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL)))) { AddToReport(loc); break; }//Points between iter calls.
 
 		//Global shared is allocated once and written when building the kernel.
 	}
 
+	if (m_Devices.size() > 1)
+		b = CreateHostBuffer();
+
 	LeaveResize();
 
 	if (b && !(b = SetOutputTexture(m_OutputTexID))) { AddToReport(loc); }
@@ -1595,8 +1598,34 @@ int RendererCL::MakeAndGetGammaCorrectionProgram()
 	return -1;
 }
 
+/// 
+/// Create the ClBuffer HOST_PTR wrapper around the CPU histogram buffer.
+/// This is only used with multiple devices, and therefore should only be called in such cases.
+/// 
+/// True if success, felse false.
+template 
+bool RendererCL::CreateHostBuffer()
+{
+	bool b = true;
+	size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer.
+	const char* loc = __FUNCTION__;
+
+	if (b = Renderer::Alloc(true))//Allocate the histogram memory to point this HOST_PTR buffer to, other buffers not needed.
+	{
+		if (b && !(b = m_Devices[0]->m_Wrapper.AddHostBuffer(m_HostBufferName, size, reinterpret_cast(HistBuckets()))))
+			AddToReport(string(loc) + ": creating OpenCL HOST_PTR buffer to point to host side histogram failed.");//Host side histogram for temporary use with multiple devices.
+	}
+	else
+		AddToReport(string(loc) + ": allocating host side histogram failed.");//Allocating histogram failed, something is seriously wrong.
+
+	return b;
+}
+
 /// 
 /// Sum all histograms from the secondary devices with the histogram on the primary device.
+/// This works by reading the histogram from those devices one at a time into the host side buffer, which
+/// is just an OpenCL pointer to the CPU histogram to use it as a temp space.
+/// Then pass that buffer to a kernel that sums it with the histogram on the primary device.
 /// 
 /// True if success, else false.
 template 
@@ -1617,30 +1646,25 @@ bool RendererCL::SumDeviceHist()
 
 		if ((b = (kernelIndex != -1)))
 		{
-			for (size_t device = 1; device < m_Devices.size(); device++)
+			for (size_t device = 1; device < m_Devices.size(); device++)//All secondary devices.
 			{
+				//m_HostBufferName will have been created as a ClBuffer to wrap the CPU histogram buffer as a temp space.
+				//So read into it, then pass to the kernel below to sum to the primary device's histogram.
 				if ((b = (ReadHist(device) && ClearHist(device))))//Must clear hist on secondary devices after reading and summing because they'll be reused on a quality increase (KEEP_ITERATING).
 				{
-					if ((b = wrapper.WriteBuffer(m_AccumBufferName, reinterpret_cast(HistBuckets()), SuperSize() * sizeof(v4bT))))
-					{
-						cl_uint argIndex = 0;
+					cl_uint argIndex = 0;
 
-						if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName)))						 { break; }//Source buffer of v4bT.
+					if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HostBufferName)))						 { break; }//Source buffer of v4bT.
 
-						if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName)))						 { break; }//Dest buffer of v4bT.
+					if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName)))						 { break; }//Dest buffer of v4bT.
 
-						if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, uint(SuperRasW()))))						 { break; }//Width in pixels.
+					if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, uint(SuperRasW()))))						 { break; }//Width in pixels.
 
-						if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, uint(SuperRasH()))))						 { break; }//Height in pixels.
+					if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, uint(SuperRasH()))))						 { break; }//Height in pixels.
 
-						if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { break; }//Clear the source buffer on the last device.
+					if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { break; }//Clear the source buffer on the last device.
 
-						if (b && !(b = wrapper.RunKernel   (kernelIndex, gridW, gridH, 1, blockW, blockH, 1)))					 { break; }
-					}
-					else
-					{
-						break;
-					}
+					if (b && !(b = wrapper.RunKernel   (kernelIndex, gridW, gridH, 1, blockW, blockH, 1)))					 { break; }
 				}
 				else
 				{
diff --git a/Source/EmberCL/RendererCL.h b/Source/EmberCL/RendererCL.h
index 4f5093b..d6a95eb 100644
--- a/Source/EmberCL/RendererCL.h
+++ b/Source/EmberCL/RendererCL.h
@@ -182,6 +182,7 @@ private:
 	int MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth);
 	int MakeAndGetFinalAccumProgram(double& alphaBase, double& alphaScale);
 	int MakeAndGetGammaCorrectionProgram();
+	bool CreateHostBuffer();
 	bool SumDeviceHist();
 	void FillSeeds();
 
@@ -214,6 +215,7 @@ private:
 	string m_DEWidthsBufferName;
 	string m_DECoefIndicesBufferName;
 	string m_SpatialFilterCoefsBufferName;
+	string m_HostBufferName;
 	string m_HistBufferName;
 	string m_AccumBufferName;
 	string m_FinalImageName;