Features:

--Add support for Exr files which use 32-bit floats for each RGBA channel. Seems to come out too washed out. --Allow for clearing an individual color curve. --Allow for saving multiple image types in EmberRender and EmberAnimate. All writes are threaded. --Remove --bpc command line argument. Add format png16 as a replacement. --Remove --enable_jpg_comments and --enable_png_comments command line arguments, and replace them with --enable_comments which applies to jpg, png and exr. --Add menu items to variations and affine spinners which allow for easy entry of specific numeric values like pi. --Make final render dialog be wider rather than so tall. Bug fixes: --Fix some OpenCL compile errors on Mac. --Remove ability to save bitmap files on all platforms but Windows. Code changes: --New dependency on OpenEXR. --Allow Curves class to interact with objects of a different template type. --Make m_Curves member of Ember always use float as template type. --Set the length of the curves array to always be 2^17 which should offer enough precision with new 32-bit float pixel types. --Set pixel types to always be 32-bit float. This results in a major reduction of code in the final accumulation part of Renderer.h/cpp. --Remove corresponding code from RendererCL and FinalAccumOpenCLKernelCreator. --Remove Transparency, NumChannels and BytesPerPixel setters from Renderer.h/cpp. --Add new global functions to format final image buffers and place all alpha calculation and scaling code in them. --Blending is no longer needed in OpenGLWidget because of the new pixel type. --Make new class, AffineDoubleSpinBox. --Attempt to make file save dialog code work the same on all OSes. --Remove some unused functions.
2025-06-30 21:36:33 -04:00 · 2017-07-22 13:43:35 -07:00
parent d5760e451a
commit de613404de
68 changed files with 1755 additions and 1276 deletions
--- a/Source/Ember/Curves.h
+++ b/Source/Ember/Curves.h
@ -85,7 +85,8 @@ public:
 	/// </summary>
 	/// <param name="curves">The Curves object to add</param>
 	/// <returns>Reference to updated self</returns>
-	Curves<T>& operator += (const Curves<T>& curves)
+	template <typename U>
+	Curves<T>& operator += (const Curves<U>& curves)
 	{
 		for (size_t i = 0; i < 4; i++)
 		{
@ -93,8 +94,7 @@ public:
 			m_Points[i][1] += curves.m_Points[i][1];
 			m_Points[i][2] += curves.m_Points[i][2];
 			m_Points[i][3] += curves.m_Points[i][3];
-
-			m_Weights[i] += curves.m_Weights[i];
+			m_Weights[i]   += curves.m_Weights[i];
 		}

 		return *this;
@ -105,7 +105,8 @@ public:
 	/// </summary>
 	/// <param name="curves">The Curves object to multiply this one by</param>
 	/// <returns>Reference to updated self</returns>
-	Curves<T>& operator *= (const Curves<T>& curves)
+	template <typename U>
+	Curves<T>& operator *= (const Curves<U>& curves)
 	{
 		for (size_t i = 0; i < 4; i++)
 		{
@ -113,8 +114,7 @@ public:
 			m_Points[i][1] *= curves.m_Points[i][1];
 			m_Points[i][2] *= curves.m_Points[i][2];
 			m_Points[i][3] *= curves.m_Points[i][3];
-
-			m_Weights[i] *= curves.m_Weights[i];
+			m_Weights[i]   *= curves.m_Weights[i];
 		}

 		return *this;
@ -125,16 +125,16 @@ public:
 	/// </summary>
 	/// <param name="t">The scalar to multiply this object by</param>
 	/// <returns>Reference to updated self</returns>
-	Curves<T>& operator *= (const T& t)
+	template <typename U>
+	Curves<T>& operator *= (const U& t)
 	{
 		for (size_t i = 0; i < 4; i++)
 		{
-			m_Points[i][0] *= t;
-			m_Points[i][1] *= t;
-			m_Points[i][2] *= t;
-			m_Points[i][3] *= t;
-
-			m_Weights[i] *= t;
+			m_Points[i][0] *= T(t);
+			m_Points[i][1] *= T(t);
+			m_Points[i][2] *= T(t);
+			m_Points[i][3] *= T(t);
+			m_Weights[i]   *= T(t);
 		}

 		return *this;
@ -151,7 +151,21 @@ public:
 			m_Points[i][1] = v2T(0);
 			m_Points[i][2] = v2T(1);
 			m_Points[i][3] = v2T(1);
+			m_Weights[i] = v4T(1);
+		}
+	}

+	/// <summary>
+	/// Set the a specific curve and its weight value to their default state.
+	/// </summary>
+	void Init(size_t i)
+	{
+		if (i < 4)
+		{
+			m_Points[i][0] = v2T(0);//0,0 -> 0,0 -> 1,1 -> 1,1.
+			m_Points[i][1] = v2T(0);
+			m_Points[i][2] = v2T(1);
+			m_Points[i][3] = v2T(1);
 			m_Weights[i] = v4T(1);
 		}
 	}
@ -176,9 +190,9 @@ public:
 		for (size_t i = 0; i < 4; i++)
 		{
 			if ((m_Points[i][0] != v2T(0)) ||
-				(m_Points[i][1] != v2T(0)) ||
-				(m_Points[i][2] != v2T(1)) ||
-				(m_Points[i][3] != v2T(1)))
+					(m_Points[i][1] != v2T(0)) ||
+					(m_Points[i][2] != v2T(1)) ||
+					(m_Points[i][3] != v2T(1)))
 			{
 				set = true;
 				break;
@ -197,12 +211,10 @@ public:
 	{
 		v4T result;
 		v2T solution(0, 0);
-
 		BezierSolve(t, m_Points[0], &m_Weights[0], solution); result.x = solution.y;
 		BezierSolve(t, m_Points[1], &m_Weights[1], solution); result.y = solution.y;
 		BezierSolve(t, m_Points[2], &m_Weights[2], solution); result.z = solution.y;
 		BezierSolve(t, m_Points[3], &m_Weights[3], solution); result.w = solution.y;
-
 		return result;
 	}

@ -217,20 +229,14 @@ private:
 	/// <param name="solution">The vec2 to store the solution in</param>
 	void BezierSolve(const T& t, v2T* src, v4T* w, v2T& solution)
 	{
-		T s, s2, s3, t2, t3, nom_x, nom_y, denom;
-
-		s = 1 - t;
-		s2 = s * s;
-		s3 = s * s * s;
-		t2 = t * t;
-		t3 = t * t * t;
-
-		nom_x = (w->x * s3 * src->x) + (w->y * s2 * 3 * t * src[1].x) + (w->z * s * 3 * t2 * src[2].x) + (w->w * t3 * src[3].x);
-
-		nom_y = (w->x * s3 * src->y) + (w->y * s2 * 3 * t * src[1].y) + (w->z * s * 3 * t2 * src[2].y) + (w->w * t3 * src[3].y);
-
-		denom = (w->x * s3) + (w->y * s2 * 3 * t) + (w->z * s * 3 * t2) + (w->w * t3);
-
+		T s = 1 - t;
+		T s2 = s * s;
+		T s3 = s * s * s;
+		T t2 = t * t;
+		T t3 = t * t * t;
+		T nom_x = (w->x * s3 * src->x) + (w->y * s2 * 3 * t * src[1].x) + (w->z * s * 3 * t2 * src[2].x) + (w->w * t3 * src[3].x);
+		T nom_y = (w->x * s3 * src->y) + (w->y * s2 * 3 * t * src[1].y) + (w->z * s * 3 * t2 * src[2].y) + (w->w * t3 * src[3].y);
+		T denom = (w->x * s3) + (w->y * s2 * 3 * t) + (w->z * s * 3 * t2) + (w->w * t3);

 		if (std::isnan(nom_x) || std::isnan(nom_y) || std::isnan(denom) || denom == 0)
 			return;
@ -247,24 +253,24 @@ public:
 //Must declare this outside of the class to provide for both orders of parameters.

 /// <summary>
-/// Multiplication operator to multiply a Curves<T> object by a scalar of type T.
+/// Multiplication operator to multiply a Curves<T> object by a scalar of type U.
 /// </summary>
 /// <param name="curves">The curves object to multiply</param>
 /// <param name="t">The scalar to multiply curves by by</param>
 /// <returns>Copy of new Curves<T></returns>
-template<typename T>
-Curves<T> operator * (const Curves<T>& curves, const T& t)
+template <typename T, typename U>
+Curves<T> operator * (const Curves<T>& curves, const U& t)
 {
+	T tt = T(t);
 	Curves<T> c(curves);

 	for (size_t i = 0; i < 4; i++)
 	{
-		c.m_Points[i][0] *= t;
-		c.m_Points[i][1] *= t;
-		c.m_Points[i][2] *= t;
-		c.m_Points[i][3] *= t;
-
-		c.m_Weights[i] *= t;
+		c.m_Points[i][0] *= tt;
+		c.m_Points[i][1] *= tt;
+		c.m_Points[i][2] *= tt;
+		c.m_Points[i][3] *= tt;
+		c.m_Weights[i] *= tt;
 	}

 	return c;
@ -276,8 +282,8 @@ Curves<T> operator * (const Curves<T>& curves, const T& t)
 /// <param name="t">The scalar to multiply curves by by</param>
 /// <param name="curves">The curves object to multiply</param>
 /// <returns>Copy of new Curves<T></returns>
-template<typename T>
-Curves<T> operator * (const T& t, const Curves<T>& curves)
+template <typename T, typename U>
+Curves<T> operator * (const U& t, const Curves<T>& curves)
 {
 	return curves * t;
 }
--- a/Source/Ember/Ember.h
+++ b/Source/Ember/Ember.h
@ -727,7 +727,7 @@ public:
 		InterpT<&Ember<T>::m_MinRadDE>(embers, coefs, size);
 		InterpT<&Ember<T>::m_CurveDE>(embers, coefs, size);
 		InterpT<&Ember<T>::m_SpatialFilterRadius>(embers, coefs, size);
-		InterpX<Curves<T>, &Ember<T>::m_Curves>(embers, coefs, size);
+		InterpX<Curves<float>, &Ember<T>::m_Curves>(embers, coefs, size);
 		//Normally done in assignment, must manually do here.
 		SetProjFunc();
 		//An extra step needed here due to the OOD that was not needed in the original.
@ -1653,7 +1653,7 @@ public:
 	Palette<float> m_Palette;//Final palette that is actually used is a copy of this inside of render, which will be of type bucketT (float).

 	//Curves used to adjust the color during final accumulation.
-	Curves<T> m_Curves;
+	Curves<float> m_Curves;

 	//Strings.

--- a/Source/Ember/EmberDefines.h
+++ b/Source/Ember/EmberDefines.h
@ -37,7 +37,7 @@ static void sincos(float x, float* s, float* c)

 namespace EmberNs
 {
-#define EMBER_VERSION "1.0.0.4"
+#define EMBER_VERSION "1.0.0.5"
 #define EPS6 T(1e-6)
 #define EPS std::numeric_limits<T>::epsilon()//Apoplugin.h uses -20, but it's more mathematically correct to do it this way.
 #define ISAAC_SIZE 4
@ -68,6 +68,9 @@ namespace EmberNs
 #define TMAX std::numeric_limits<T>::max()
 #define FLOAT_MAX_TAN 8388607.0f
 #define FLOAT_MIN_TAN -FLOAT_MAX_TAN
+#define CURVES_LENGTH 131072
+#define CURVES_LENGTH_M1 131071.0f
+#define ONE_OVER_CURVES_LENGTH_M1 7.62945273935e-6f
 #define EMPTYFIELD -9999
 typedef std::chrono::high_resolution_clock Clock;
 typedef std::chrono::duration<double, std::ratio<1, 1000>> DoubleMs;
@ -110,6 +113,7 @@ typedef std::lock_guard <std::recursive_mutex> rlg;
 	#define m3T  glm::tmat3x3<T, glm::defaultp>
 	#define m4T  glm::tmat4x4<T, glm::defaultp>
 	#define m23T glm::tmat2x3<T, glm::defaultp>
+	typedef vector<glm::tvec4<float, glm::defaultp>> vv4F;
 #else
 	#define v2T  glm::detail::tvec2<T, glm::defaultp>
 	#define v3T  glm::detail::tvec3<T, glm::defaultp>
@ -121,6 +125,7 @@ typedef std::lock_guard <std::recursive_mutex> rlg;
 	#define m3T  glm::detail::tmat3x3<T, glm::defaultp>
 	#define m4T  glm::detail::tmat4x4<T, glm::defaultp>
 	#define m23T glm::detail::tmat2x3<T, glm::defaultp>
+	typedef vector<glm::detail::tvec4<float, glm::defaultp>> vv4F;
 #endif

 enum class eInterp : et { EMBER_INTERP_LINEAR = 0, EMBER_INTERP_SMOOTH = 1 };
--- a/Source/Ember/Renderer.cpp
+++ b/Source/Ember/Renderer.cpp
@ -9,7 +9,9 @@ namespace EmberNs
 template <typename T, typename bucketT>
 Renderer<T, bucketT>::Renderer()
 {
-	m_Csa.resize(size_t(std::pow(size_t(256), BytesPerChannel())));//Need to at least have something here so the derived RendererCL can do the initial buffer allocation.
+	//Use a very large number regardless of the size of the output pixels. This should be sufficient granularity, even though
+	//it's technically less than the number of distinct values representable by a 32-bit float.
+	m_Csa.resize(size_t(CURVES_LENGTH));
 }

 /// <summary>
@ -344,7 +346,7 @@ bool Renderer<T, bucketT>::CreateTemporalFilter(bool& newAlloc)
 /// <param name="finalOffset">Offset in finalImage to store the pixels to. Default: 0.</param>
 /// <returns>True if nothing went wrong, else false.</returns>
 template <typename T, typename bucketT>
-eRenderStatus Renderer<T, bucketT>::Run(vector<byte>& finalImage, double time, size_t subBatchCountOverride, bool forceOutput, size_t finalOffset)
+eRenderStatus Renderer<T, bucketT>::Run(vector<v4F>& finalImage, double time, size_t subBatchCountOverride, bool forceOutput, size_t finalOffset)
 {
 	m_InRender = true;
 	EnterRender();
@ -645,7 +647,7 @@ AccumOnly:
 		CreateSpatialFilter(newFilterAlloc);
 		m_DensityFilterOffset = m_GutterWidth - size_t(Clamp<T>((T(m_SpatialFilter->FinalFilterWidth()) - T(Supersample())) / 2, 0, T(m_GutterWidth)));
 		m_CurvesSet = m_Ember.m_Curves.CurvesSet();
-		ComputeCurves(true);//Color curves must be re-calculated as well.
+		ComputeCurves();//Color curves must be re-calculated as well.

 		if (AccumulatorToFinalImage(finalImage, finalOffset) == eRenderStatus::RENDER_OK)
 		{
@ -822,6 +824,7 @@ bool Renderer<T, bucketT>::ResetBuckets(bool resetHist, bool resetAccum)
 }

 /// <summary>
+/// THIS IS UNUSED.
 /// Log scales a single row with a specially structured loop that will be vectorized by the compiler.
 /// Note this adds an epsilon to the denomiator used to compute the logScale
 /// value because the conditional check for zero would have prevented the loop from
@ -882,7 +885,7 @@ eRenderStatus Renderer<T, bucketT>::LogScaleDensityFilter(bool forceOutput)
 					bucketT* __restrict hist = glm::value_ptr(m_HistBuckets[i]);//Vectorizer can't tell these point to different locations.
 					bucketT* __restrict acc = glm::value_ptr(m_AccumulatorBuckets[i]);

-					for (size_t v = 0; v < 4; v++)
+					for (size_t v = 0; v < 4; v++)//Vectorized by compiler.
 						acc[v] = hist[v] * logScale;
 				}
 			}
@ -1063,7 +1066,7 @@ eRenderStatus Renderer<T, bucketT>::GaussianDensityFilter()
 /// <param name="finalOffset">Offset in the buffer to store the pixels to</param>
 /// <returns>True if not prematurely aborted, else false.</returns>
 template <typename T, typename bucketT>
-eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(vector<byte>& pixels, size_t finalOffset)
+eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset)
 {
 	if (PrepFinalAccumVector(pixels))
 		return AccumulatorToFinalImage(pixels.data(), finalOffset);
@ -1079,14 +1082,13 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(vector<byte>& pixels
 /// <param name="finalOffset">Offset in the buffer to store the pixels to. Default: 0.</param>
 /// <returns>True if not prematurely aborted, else false.</returns>
 template <typename T, typename bucketT>
-eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t finalOffset)
+eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(v4F* pixels, size_t finalOffset)
 {
 	if (!pixels)
 		return eRenderStatus::RENDER_ERROR;

 	EnterFinalAccum();
 	//Timing t(4);
-	bool doAlpha = NumChannels() > 3;
 	size_t filterWidth = m_SpatialFilter->FinalFilterWidth();
 	bucketT g, linRange, vibrancy;
 	Color<bucketT> background;
@ -1104,7 +1106,7 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t

 			while (rowStart < rowEnd && !m_Abort)//Use the pointer itself as the offset to save an extra addition per iter.
 			{
-				GammaCorrection(*rowStart, background, g, linRange, vibrancy, true, false, glm::value_ptr(*rowStart));//Write back in place.
+				GammaCorrection(*rowStart, background, g, linRange, vibrancy, false, glm::value_ptr(*rowStart));//Write back in place.
 				rowStart++;
 			}
 		});
@ -1123,11 +1125,12 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t
 	parallel_for(size_t(0), FinalRasH(), size_t(1), [&](size_t j)
 	{
 		Color<bucketT> newBucket;
-		size_t pixelsRowStart = (m_YAxisUp ? ((FinalRasH() - j) - 1) : j) * FinalRowSize();//Pull out of inner loop for optimization.
+		size_t pixelsRowStart = (m_YAxisUp ? ((FinalRasH() - j) - 1) : j) * FinalRasW();//Pull out of inner loop for optimization.
 		size_t y = m_DensityFilterOffset + (j * Supersample());//Start at the beginning row of each super sample block.
 		size_t clampedFilterH = std::min(filterWidth, m_SuperRasH - y);//Make sure the filter doesn't go past the bottom of the gutter.
+		auto pv4T = pixels + pixelsRowStart;

-		for (size_t i = 0; i < FinalRasW(); i++, pixelsRowStart += PixelSize())
+		for (size_t i = 0; i < FinalRasW(); i++, pv4T++)
 		{
 			size_t ii, jj;
 			size_t x = m_DensityFilterOffset + (i * Supersample());//Start at the beginning column of each super sample block.
@ -1149,68 +1152,8 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t
 				}
 			}

-			if (BytesPerChannel() == 2)
-			{
-				auto p16 = reinterpret_cast<glm::uint16*>(pixels + pixelsRowStart);
-
-				if (EarlyClip())
-				{
-					newBucket *= bucketT(65535);
-
-					if (m_CurvesSet)
-					{
-						CurveAdjust(newBucket.r, 1);
-						CurveAdjust(newBucket.g, 2);
-						CurveAdjust(newBucket.b, 3);
-					}
-
-					p16[0] = glm::uint16(Clamp<bucketT>(newBucket.r, 0, 65535));
-					p16[1] = glm::uint16(Clamp<bucketT>(newBucket.g, 0, 65535));
-					p16[2] = glm::uint16(Clamp<bucketT>(newBucket.b, 0, 65535));
-
-					if (doAlpha)
-					{
-						if (Transparency())
-							p16[3] = byte(Clamp<bucketT>(newBucket.a, 0, 65535));
-						else
-							p16[3] = 65535;
-					}
-				}
-				else
-				{
-					GammaCorrection(*(reinterpret_cast<tvec4<bucketT, glm::defaultp>*>(&newBucket)), background, g, linRange, vibrancy, doAlpha, true, p16);
-				}
-			}
-			else
-			{
-				if (EarlyClip())
-				{
-					newBucket *= bucketT(255);
-
-					if (m_CurvesSet)
-					{
-						CurveAdjust(newBucket.r, 1);
-						CurveAdjust(newBucket.g, 2);
-						CurveAdjust(newBucket.b, 3);
-					}
-
-					pixels[pixelsRowStart] = byte(Clamp<bucketT>(newBucket.r, 0, 255));
-					pixels[pixelsRowStart + 1] = byte(Clamp<bucketT>(newBucket.g, 0, 255));
-					pixels[pixelsRowStart + 2] = byte(Clamp<bucketT>(newBucket.b, 0, 255));
-
-					if (doAlpha)
-					{
-						if (Transparency())
-							pixels[pixelsRowStart + 3] = byte(Clamp<bucketT>(newBucket.a, 0, 255));
-						else
-							pixels[pixelsRowStart + 3] = 255;
-					}
-				}
-				else
-				{
-					GammaCorrection(*(reinterpret_cast<tvec4<bucketT, glm::defaultp>*>(&newBucket)), background, g, linRange, vibrancy, doAlpha, true, pixels + pixelsRowStart);
-				}
-			}
+			auto pf = reinterpret_cast<float*>(pv4T);
+			GammaCorrection(*(reinterpret_cast<tvec4<bucketT, glm::defaultp>*>(&newBucket)), background, g, linRange, vibrancy, true, pf);
 		}
 	});

@ -1222,30 +1165,15 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(byte* pixels, size_t
 		if (ph >= FinalRasH())
 			ph = FinalRasH();

-		if (BytesPerChannel() == 1)
+		for (j = 0; j < ph; j++)
 		{
-			for (j = 0; j < ph; j++)
+			for (i = 0; i < FinalRasW(); i++)
 			{
-				for (i = 0; i < FinalRasW(); i++)
-				{
-					auto p = pixels + (NumChannels() * (i + j * FinalRasW()));
-					p[0] = byte(m_TempEmber.m_Palette[i * 256 / FinalRasW()][0] * WHITE);//The palette is [0..1], output image is [0..255].
-					p[1] = byte(m_TempEmber.m_Palette[i * 256 / FinalRasW()][1] * WHITE);
-					p[2] = byte(m_TempEmber.m_Palette[i * 256 / FinalRasW()][2] * WHITE);
-				}
-			}
-		}
-		else//BPC == 2.
-		{
-			for (j = 0; j < ph; j++)
-			{
-				for (i = 0; i < FinalRasW(); i++)
-				{
-					auto p16 = reinterpret_cast<glm::uint16*>(pixels + (PixelSize() * (i + j * FinalRasW())));
-					p16[0] = glm::uint16(m_TempEmber.m_Palette[i * 256 / FinalRasW()][0] * WHITE * bucketT(256)); //The palette is [0..1], output image is [0..65535].
-					p16[1] = glm::uint16(m_TempEmber.m_Palette[i * 256 / FinalRasW()][1] * WHITE * bucketT(256));
-					p16[2] = glm::uint16(m_TempEmber.m_Palette[i * 256 / FinalRasW()][2] * WHITE * bucketT(256));
-				}
+				auto p = pixels + (i + j * FinalRasW());
+				p->r = m_TempEmber.m_Palette[i * 256 / FinalRasW()][0];
+				p->g = m_TempEmber.m_Palette[i * 256 / FinalRasW()][1];
+				p->b = m_TempEmber.m_Palette[i * 256 / FinalRasW()][2];
+				p->a = 1;
 			}
 		}
 	}
@ -1322,7 +1250,6 @@ EmberStats Renderer<T, bucketT>::Iterate(size_t iterCount, size_t temporalSample
 			//t.Tic();
 			//Iterating, loop 3.
 			m_BadVals[threadIndex] += m_Iterator->Iterate(m_ThreadEmbers[threadIndex], params, m_Samples[threadIndex].data(), m_Rand[threadIndex]);
-			//m_BadVals[threadIndex] += m_Iterator->Iterate(m_Ember, params, m_Samples[threadIndex].data(), m_Rand[threadIndex]);
 			//iterationTime += t.Toc();

 			if (m_LockAccum)
@ -1677,112 +1604,97 @@ void Renderer<T, bucketT>::AddToAccum(const tvec4<bucketT, glm::defaultp>& bucke
 /// Because this code is used in both early and late clipping, a few extra arguments are passed
 /// to specify what actions to take. Coupled with an additional template argument, this allows
 /// using one function to perform all color clipping, gamma correction and final accumulation.
-/// Template argument accumT is expected to match bucketT for the case of early clipping, byte for late clip for
-/// images with one byte per channel and unsigned short for images with two bytes per channel.
+/// Template argument accumT is expected to always be float4.
 /// </summary>
 /// <param name="bucket">The pixel to correct</param>
 /// <param name="background">The background color</param>
 /// <param name="g">The gamma to use</param>
 /// <param name="linRange">The linear range to use</param>
 /// <param name="vibrancy">The vibrancy to use</param>
-/// <param name="doAlpha">True if either early clip, or late clip with 4 channel output, else false.</param>
 /// <param name="scale">True if late clip, else false.</param>
 /// <param name="correctedChannels">The storage space for the corrected values to be written to</param>
 template <typename T, typename bucketT>
 template <typename accumT>
-void Renderer<T, bucketT>::GammaCorrection(tvec4<bucketT, glm::defaultp>& bucket, Color<bucketT>& background, bucketT g, bucketT linRange, bucketT vibrancy, bool doAlpha, bool scale, accumT* correctedChannels)
+void Renderer<T, bucketT>::GammaCorrection(tvec4<bucketT, glm::defaultp>& bucket, Color<bucketT>& background, bucketT g, bucketT linRange, bucketT vibrancy, bool scale, accumT* correctedChannels)
 {
-	bucketT alpha, ls, a, newRgb[3];//Would normally use a Color<bucketT>, but don't want to call a needless constructor every time this function is called, which is once per pixel.
-	static bucketT scaleVal = numeric_limits<accumT>::max();
+	auto bt1 = bucketT(1);

-	if (bucket.a <= 0)
+	if (scale && EarlyClip())
 	{
-		alpha = 0;
-		ls = 0;
+		if (m_CurvesSet)
+		{
+			CurveAdjust(bucket.r, 1);
+			CurveAdjust(bucket.g, 2);
+			CurveAdjust(bucket.b, 3);
+		}
+
+		correctedChannels[0] = accumT(Clamp<bucketT>(bucket.r, 0, bt1));
+		correctedChannels[1] = accumT(Clamp<bucketT>(bucket.g, 0, bt1));
+		correctedChannels[2] = accumT(Clamp<bucketT>(bucket.b, 0, bt1));
+		correctedChannels[3] = accumT(Clamp<bucketT>(bucket.a, 0, bt1));
 	}
 	else
 	{
-		alpha = Palette<bucketT>::CalcAlpha(bucket.a, g, linRange);
-		ls = vibrancy * alpha / bucket.a;
-		ClampRef<bucketT>(alpha, 0, 1);
-	}
+		bucketT alpha, ls, a, newRgb[3];//Would normally use a Color<bucketT>, but don't want to call a needless constructor every time this function is called, which is once per pixel.

-	Palette<bucketT>::template CalcNewRgb<bucketT>(glm::value_ptr(bucket), ls, HighlightPower(), newRgb);
-
-	for (glm::length_t rgbi = 0; rgbi < 3; rgbi++)
-	{
-		a = newRgb[rgbi] + ((1 - vibrancy) * std::pow(std::abs(bucket[rgbi]), g));//Must use abs(), else it it could be a negative value and return NAN.
-
-		if (NumChannels() <= 3 || !Transparency())
+		if (bucket.a <= 0)
 		{
+			alpha = 0;
+			ls = 0;
+		}
+		else
+		{
+			alpha = Palette<bucketT>::CalcAlpha(bucket.a, g, linRange);
+			ls = vibrancy * alpha / bucket.a;
+			ClampRef<bucketT>(alpha, 0, 1);
+		}
+
+		Palette<bucketT>::template CalcNewRgb<bucketT>(glm::value_ptr(bucket), ls, HighlightPower(), newRgb);
+
+		for (glm::length_t rgbi = 0; rgbi < 3; rgbi++)
+		{
+			a = newRgb[rgbi] + ((1 - vibrancy) * std::pow(std::abs(bucket[rgbi]), g));//Must use abs(), else it it could be a negative value and return NAN.
 			a += (1 - alpha) * background[rgbi];
-		}
-		else
-		{
-			if (alpha > 0)
-				a /= alpha;
-			else
-				a = 0;
-		}

-		if (!scale)
-		{
-			correctedChannels[rgbi] = accumT(Clamp<bucketT>(a, 0, 1.0));//Early clip, just assign directly.
-		}
-		else
-		{
-			a *= scaleVal;
-
-			if (m_CurvesSet)
+			if (scale && m_CurvesSet)
 				CurveAdjust(a, rgbi + 1);

-			correctedChannels[rgbi] = accumT(Clamp<bucketT>(a, 0, scaleVal));//Final accum, multiply by 255 for 8 bpc (0-255), or 65535 for 16 bpc (0-65535).
+			correctedChannels[rgbi] = accumT(Clamp<bucketT>(a, 0, bt1));//Early clip, just assign directly.
 		}
-	}

-	if (doAlpha)
-	{
-		if (!scale)
-			correctedChannels[3] = accumT(alpha);//Early clip, just assign alpha directly.
-		else if (Transparency())
-			correctedChannels[3] = accumT(alpha * scaleVal);//Final accum, 4 channels, using transparency. Scale alpha from 0-1 to 0-255 for 8 bpc or 0-65535 for 16 bpc.
-		else
-			correctedChannels[3] = accumT(scaleVal);//Final accum, 4 channels, but not using transparency. 255 for 8 bpc, 65535 for 16 bpc.
+		correctedChannels[3] = accumT(alpha);
 	}
 }

 /// <summary>
 /// Setup the curve values when they are being used.
-/// This will be either 255 values for bpc=8, or 65535 values for bpc=16.
 /// </summary>
-/// <param name="scale">Whether to scale from 0-1 to 0-255 or 0-65535</param>
 template <typename T, typename bucketT>
-void Renderer<T, bucketT>::ComputeCurves(bool scale)
+void Renderer<T, bucketT>::ComputeCurves()
 {
 	if (m_CurvesSet)
 	{
-		m_Csa.resize(size_t(std::pow(size_t(256), BytesPerChannel())));
+		Timing t;
 		auto st = m_Csa.size();
-		auto stm1 = st - 1;
-		T tscale = scale ? T(stm1) : T(1);

 		for (size_t i = 0; i < st; i++)
-			m_Csa[i] = m_Ember.m_Curves.BezierFunc(i / T(stm1)) * tscale;
+			m_Csa[i] = m_Ember.m_Curves.BezierFunc(i * ONE_OVER_CURVES_LENGTH_M1);
+
+		t.Toc("ComputeCurves");
 	}
 }

 /// <summary>
 /// Apply the curve adjustment to a single channel.
 /// </summary>
-/// <param name="aScaled">The value of the channel to apply curve adjustment to, scaled to either 255 or 65535, depending on bpc.</param>
+/// <param name="aScaled">The value of the channel to apply curve adjustment to.</param>
 /// <param name="index">The index of the channel to apply curve adjustment to</param>
 template <typename T, typename bucketT>
-void Renderer<T, bucketT>::CurveAdjust(bucketT& aScaled, const glm::length_t& index)
+void Renderer<T, bucketT>::CurveAdjust(bucketT& a, const glm::length_t& index)
 {
-	bucketT stm1 = bucketT(m_Csa.size() - 1);
-	size_t tempIndex = size_t(Clamp<bucketT>(aScaled, 0, stm1));
-	size_t tempIndex2 = size_t(Clamp<bucketT>(m_Csa[tempIndex].x, 0, stm1));
-	aScaled = m_Csa[tempIndex2][index];
+	size_t tempIndex = size_t(Clamp<bucketT>(a * CURVES_LENGTH_M1, 0, CURVES_LENGTH_M1));
+	size_t tempIndex2 = size_t(Clamp<bucketT>(m_Csa[tempIndex].x * CURVES_LENGTH_M1, 0, CURVES_LENGTH_M1));
+	a = m_Csa[tempIndex2][index];
 }

 //This class had to be implemented in a cpp file because the compiler was breaking.
--- a/Source/Ember/Renderer.h
+++ b/Source/Ember/Renderer.h
@ -66,7 +66,7 @@ public:
 	virtual bool CreateSpatialFilter(bool& newAlloc) override;
 	virtual bool CreateTemporalFilter(bool& newAlloc) override;
 	virtual size_t HistBucketSize() const override { return sizeof(tvec4<bucketT, glm::defaultp>); }
-	virtual eRenderStatus Run(vector<byte>& finalImage, double time = 0, size_t subBatchCountOverride = 0, bool forceOutput = false, size_t finalOffset = 0) override;
+	virtual eRenderStatus Run(vector<v4F>& finalImage, double time = 0, size_t subBatchCountOverride = 0, bool forceOutput = false, size_t finalOffset = 0) override;
 	virtual EmberImageComments ImageComments(const EmberStats& stats, size_t printEditDepth = 0, bool hexPalette = true) override;

 protected:
@ -76,10 +76,10 @@ protected:
 	virtual bool ResetBuckets(bool resetHist = true, bool resetAccum = true);
 	virtual eRenderStatus LogScaleDensityFilter(bool forceOutput = false);
 	virtual eRenderStatus GaussianDensityFilter();
-	virtual eRenderStatus AccumulatorToFinalImage(vector<byte>& pixels, size_t finalOffset);
-	virtual eRenderStatus AccumulatorToFinalImage(byte* pixels, size_t finalOffset);
+	virtual eRenderStatus AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset);
+	virtual eRenderStatus AccumulatorToFinalImage(v4F* pixels, size_t finalOffset);
 	virtual EmberStats Iterate(size_t iterCount, size_t temporalSample);
-	virtual void ComputeCurves(bool scale);
+	virtual void ComputeCurves();

 public:
 	//Non-virtual render properties, getters and setters.
@ -118,9 +118,7 @@ public:
 	inline T                     CenterX()             const;
 	inline T                     CenterY()             const;
 	inline T                     Rotate()              const;
-	inline T                     Hue()                 const;
 	inline bucketT               Brightness()          const;
-	inline bucketT               Contrast()            const;
 	inline bucketT               Gamma()               const;
 	inline bucketT               Vibrancy()            const;
 	inline bucketT               GammaThresh()         const;
@ -154,8 +152,8 @@ protected:
 private:
 	//Miscellaneous non-virtual functions used only in this class.
 	void Accumulate(QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand, Point<T>* samples, size_t sampleCount, const Palette<bucketT>* palette);
-	/*inline*/ void AddToAccum(const tvec4<bucketT, glm::defaultp>& bucket, intmax_t i, intmax_t ii, intmax_t j, intmax_t jj);
-	template <typename accumT> void GammaCorrection(tvec4<bucketT, glm::defaultp>& bucket, Color<bucketT>& background, bucketT g, bucketT linRange, bucketT vibrancy, bool doAlpha, bool scale, accumT* correctedChannels);
+	void AddToAccum(const tvec4<bucketT, glm::defaultp>& bucket, intmax_t i, intmax_t ii, intmax_t j, intmax_t jj);
+	template <typename accumT> void GammaCorrection(tvec4<bucketT, glm::defaultp>& bucket, Color<bucketT>& background, bucketT g, bucketT linRange, bucketT vibrancy, bool scale, accumT* correctedChannels);
 	void CurveAdjust(bucketT& a, const glm::length_t& index);
 	void VectorizedLogScale(size_t row, size_t rowEnd);

--- a/Source/Ember/RendererBase.cpp
+++ b/Source/Ember/RendererBase.cpp
@ -171,10 +171,10 @@ bool RendererBase::RandVec(vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>>& randVec)
 /// </summary>
 /// <param name="pixels">The vector to allocate</param>
 /// <returns>True if the vector contains enough space to hold the output image</returns>
-bool RendererBase::PrepFinalAccumVector(vector<byte>& pixels)
+bool RendererBase::PrepFinalAccumVector(vector<v4F>& pixels)
 {
 	EnterResize();
-	size_t size = FinalBufferSize();
+	size_t size = FinalDimensions();

 	if (m_ReclaimOnResize)
 	{
@ -374,27 +374,6 @@ void RendererBase::ReclaimOnResize(bool reclaimOnResize)
 	ChangeVal([&] { m_ReclaimOnResize = reclaimOnResize; }, eProcessAction::FULL_RENDER);
 }

-/// <summary>
-/// Get whether to use transparency in the alpha channel.
-/// This only applies when the number of channels is 4 and the output
-/// image is Png.
-/// Default: false.
-/// </summary>
-/// <returns>True if using transparency, else false.</returns>
-bool RendererBase::Transparency() const { return m_Transparency; }
-
-/// <summary>
-/// Set whether to use transparency in the alpha channel.
-/// This only applies when the number of channels is 4 and the output
-/// image is Png.
-/// Set the render state to ACCUM_ONLY.
-/// </summary>
-/// <param name="transparency">True if using transparency, else false.</param>
-void RendererBase::Transparency(bool transparency)
-{
-	ChangeVal([&] { m_Transparency = transparency; }, eProcessAction::ACCUM_ONLY);
-}
-
 /// <summary>
 /// Set the callback object.
 /// </summary>
@ -474,40 +453,18 @@ void RendererBase::ThreadCount(size_t threads, const char* seedString)

 /// <summary>
 /// Get the bytes per channel of the output image.
-/// The only acceptable values are 1 and 2, and 2 is only
-/// used when the output is Png.
-/// Default: 1.
+/// This will always be 4 since each channel is a 32-bit float.
 /// </summary>
-/// <returns></returns>
+/// <returns>The number of bytes per channel</returns>
 size_t RendererBase::BytesPerChannel() const { return m_BytesPerChannel; }

 /// <summary>
-/// Set the bytes per channel of the output image.
-/// The only acceptable values are 1 and 2, and 2 is only
-/// used when the output is Png.
-/// Set the render state to ACCUM_ONLY.
-/// </summary>
-/// <param name="bytesPerChannel">The bytes per channel.</param>
-void RendererBase::BytesPerChannel(size_t bytesPerChannel)
-{
-	ChangeVal([&]
-	{
-		if (bytesPerChannel == 0 || bytesPerChannel > 2)
-			m_BytesPerChannel = 1;
-		else
-			m_BytesPerChannel = bytesPerChannel;
-	}, eProcessAction::ACCUM_ONLY);
-}
-
-/// <summary>
-/// Get the number of channels per pixel in the output image. 3 for RGB images
-/// like Bitmap and Jpeg, 4 for Png.
-/// Default is 3.
+/// Get the number of channels per pixel in the output image.
+/// This will always be 4 since each pixel is always RGBA.
 /// </summary>
 /// <returns>The number of channels per pixel in the output image</returns>
 size_t RendererBase::NumChannels() const { return m_NumChannels; }

-
 /// <summary>
 /// Get/set the priority used for the CPU rendering threads.
 /// This does not affect OpenCL rendering.
@ -543,18 +500,6 @@ void RendererBase::InteractiveFilter(eInteractiveFilter filter)
 /// Virtual render properties, getters and setters.
 /// </summary>

-/// <summary>
-/// Set the number of channels per pixel in the output image. 3 for RGB images
-/// like Bitmap and Jpeg, 4 for Png.
-/// Default is 3.
-/// Set the render state to ACCUM_ONLY.
-/// </summary>
-/// <param name="numChannels">The number of channels per pixel in the output image</param>
-void RendererBase::NumChannels(size_t numChannels)
-{
-	ChangeVal([&] { m_NumChannels = numChannels; }, eProcessAction::ACCUM_ONLY);
-}
-
 /// <summary>
 /// Get the number of threads used when rendering.
 /// Default: use all avaliable cores.
--- a/Source/Ember/RendererBase.h
+++ b/Source/Ember/RendererBase.h
@ -105,7 +105,7 @@ public:
 	size_t HistMemoryRequired(size_t strips);
 	pair<size_t, size_t> MemoryRequired(size_t strips, bool includeFinal, bool threadedWrite);
 	vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>> RandVec();
-	bool PrepFinalAccumVector(vector<byte>& pixels);
+	bool PrepFinalAccumVector(vector<v4F>& pixels);

 	//Virtual processing functions.
 	virtual bool Ok() const;
@ -121,7 +121,7 @@ public:
 	virtual void ComputeBounds() = 0;
 	virtual void ComputeQuality() = 0;
 	virtual void ComputeCamera() = 0;
-	virtual eRenderStatus Run(vector<byte>& finalImage, double time = 0, size_t subBatchCountOverride = 0, bool forceOutput = false, size_t finalOffset = 0) = 0;
+	virtual eRenderStatus Run(vector<v4F>& finalImage, double time = 0, size_t subBatchCountOverride = 0, bool forceOutput = false, size_t finalOffset = 0) = 0;
 	virtual EmberImageComments ImageComments(const EmberStats& stats, size_t printEditDepth = 0, bool hexPalette = true) = 0;
 	virtual DensityFilterBase* GetDensityFilter() = 0;

@ -152,12 +152,9 @@ public:
 	void InsertPalette(bool insertPalette);
 	bool ReclaimOnResize() const;
 	void ReclaimOnResize(bool reclaimOnResize);
-	bool Transparency() const;
-	void Transparency(bool transparency);
 	void Callback(RenderCallback* callback);
 	void ThreadCount(size_t threads, const char* seedString = nullptr);
 	size_t BytesPerChannel() const;
-	void BytesPerChannel(size_t bytesPerChannel);
 	size_t NumChannels() const;
 	eThreadPriority Priority() const;
 	void Priority(eThreadPriority priority);
@ -165,7 +162,6 @@ public:
 	void InteractiveFilter(eInteractiveFilter filter);

 	//Virtual render properties, getters and setters.
-	virtual void NumChannels(size_t numChannels);
 	virtual size_t ThreadCount()   const;
 	virtual eRendererType RendererType() const;

@ -200,7 +196,6 @@ public:
 protected:
 	bool m_EarlyClip = false;
 	bool m_YAxisUp = false;
-	bool m_Transparency = false;
 	bool m_LockAccum = false;
 	bool m_InRender = false;
 	bool m_InFinalAccum = false;
@ -213,8 +208,8 @@ protected:
 	size_t m_SuperSize = 0;
 	size_t m_GutterWidth;
 	size_t m_DensityFilterOffset;
-	size_t m_NumChannels = 3;
-	size_t m_BytesPerChannel = 1;
+	size_t m_NumChannels = 4;
+	size_t m_BytesPerChannel = 4;
 	size_t m_ThreadsToUse;
 	size_t m_VibGamCount;
 	size_t m_LastTemporalSample = 0;
--- a/Source/Ember/SheepTools.h
+++ b/Source/Ember/SheepTools.h
@ -836,7 +836,6 @@ public:
 	/// <returns>The percentage possible color values that were present in the final output image</returns>
 	T TryColors(Ember<T>& ember, size_t colorResolution)
 	{
-		byte* p;
 		size_t i, hits = 0, res = colorResolution;
 		size_t pixTotal, res3 = res * res * res;
 		T scalar;
@ -862,14 +861,12 @@ public:

 		m_Hist.resize(res3);
 		Memset(m_Hist);
-		p = m_FinalImage.data();
+		auto p = m_FinalImage.data();

 		for (i = 0; i < m_Renderer->FinalDimensions(); i++)
 		{
-			m_Hist[(p[0] * res / 256) +
-									  (p[1] * res / 256) * res +
-									  (p[2] * res / 256) * res * res]++;//A specific histogram index representing the sum of R,G,B values.
-			p += m_Renderer->PixelSize();//Advance the pointer by 1 pixel.
+			m_Hist[size_t((p->r * res) + (p->g * res) * res + (p->b * res) * res * res)]++;//A specific histogram index representing the sum of R,G,B values.
+			p++;
 		}

 		for (i = 0; i < res3; i++)
@ -1352,7 +1349,7 @@ private:
 	string m_Comment;

 	vector<Point<T>> m_Samples;
-	vector<byte> m_FinalImage;
+	vector<v4F> m_FinalImage;
 	vector<uint> m_Hist;
 	EmberToXml<T> m_EmberToXml;
 	Iterator<T>* m_Iterator;