--User changes

-Add new preset dimensions to the right click menu of the width and height fields in the editor. -Change QSS stylesheets to properly handle tabs. -Make tabs rectangular by default. For some reason, they had always been triangular. --Bug fixes -Incremental rendering times in the editor were wrong. --Code changes -Migrate to Qt6. There is probably more work to be done here. -Migrate to VS2022. -Migrate to Wix 4 installer. -Change installer to install to program files for all users. -Fix many VS2022 code analysis warnings. -No longer use byte typedef, because std::byte is now a type. Revert all back to unsigned char. -Upgrade OpenCL headers to version 3.0 and keep locally now rather than trying to look for system files. -No longer link to Nvidia or AMD specific OpenCL libraries. Use the generic installer located at OCL_ROOT too. -Add the ability to change OpenCL grid dimensions. This was attempted for investigating possible performance improvments, but made no difference. This has not been verified on Linux or Mac yet.
2026-07-27 22:23:25 -04:00 · 2023-04-25 17:59:54 -06:00
parent 64d4470b12
commit 1dfbd4eff2
306 changed files with 514515 additions and 491207 deletions
@@ -1,452 +1,452 @@
-#include "EmberPch.h"
-#include "Affine2D.h"
-
-namespace EmberNs
-{
-/// <summary>
-/// Default constructor which sets the matrix to the identity.
-/// </summary>
-template <typename T>
-Affine2D<T>::Affine2D()
-{
-	MakeID();
-}
-
-/// <summary>
-/// Default copy constructor.
-/// </summary>
-/// <param name="affine">The Affine2D object to copy</param>
-template <typename T>
-Affine2D<T>::Affine2D(const Affine2D<T>& affine)
-{
-	Affine2D<T>::operator=<T>(affine);
-}
-
-/// <summary>
-/// Constructor which takes each column of the affine as a separate parameter.
-/// </summary>
-/// <param name="x">A and D</param>
-/// <param name="y">B and E</param>
-/// <param name="t">C and F</param>
-template <typename T>
-Affine2D<T>::Affine2D(v2T& x, v2T& y, v2T& t)
-{
-	X(x);
-	Y(y);
-	O(t);
-}
-
-/// <summary>
-/// Constructor which takes all six of the affine values as parameters.
-/// </summary>
-/// <param name="xx">A</param>
-/// <param name="xy">D</param>
-/// <param name="yx">B</param>
-/// <param name="yy">E</param>
-/// <param name="tx">C</param>
-/// <param name="ty">F</param>
-template <typename T>
-Affine2D<T>::Affine2D(T xx, T xy, T yx, T yy, T tx, T ty)
-{
-	A(xx);
-	D(xy);
-	B(yx);
-	E(yy);
-	C(tx);
-	F(ty);
-}
-
-/// <summary>
-/// Constructor which takes a 4x4 matrix and assigns the
-/// corresponding values in the 2x3 affine matrix.
-/// </summary>
-/// <param name="mat">The 4x4 affine matrix to read from</param>
-template <typename T>
-Affine2D<T>::Affine2D(m4T& mat)
-{
-	A(mat[0][0]);
-	B(mat[0][1]);
-	C(mat[0][3]);
-	D(mat[1][0]);
-	E(mat[1][1]);
-	F(mat[1][3]);
-}
-
-/// <summary>
-/// Default assignment operator.
-/// </summary>
-/// <param name="affine">The Affine2D object to copy</param>
-template <typename T>
-Affine2D<T>& Affine2D<T>::operator = (const Affine2D<T>& affine)
-{
-	if (this != &affine)
-		Affine2D<T>::operator=<T>(affine);
-
-	return *this;
-}
-
-/// <summary>
-/// == operator which tests if all fields are equal with another Affine2D.
-/// </summary>
-/// <param name="affine">The Affine2D to compare to</param>
-/// <returns>True if all fields are equal, else false</returns>
-template <typename T>
-bool Affine2D<T>::operator == (const Affine2D<T>& affine) const
-{
-	return IsClose(A(), affine.A()) &&
-		   IsClose(B(), affine.B()) &&
-		   IsClose(C(), affine.C()) &&
-		   IsClose(D(), affine.D()) &&
-		   IsClose(E(), affine.E()) &&
-		   IsClose(F(), affine.F());
-}
-
-/// <summary>
-/// * operator to multiply this affine transform by a vec2 and return the result as a vec2.
-/// </summary>
-/// <param name="v">The vec2 to multiply by</param>
-/// <returns>A new vec2 which is the product of the multiplication</returns>
-template <typename T>
-typename v2T Affine2D<T>::operator * (const v2T& v) const
-{
-	return TransformVector(v);
-}
-
-/// <summary>
-/// Return a copy of the object with all values A-F scaled by the specified amount.
-/// </summary>
-/// <param name="amount">The amount to scale by</param>
-/// <returns>A new Affine2D which a scaled copy of this instance</returns>
-template <typename T>
-Affine2D<T> Affine2D<T>:: operator * (T t) const
-{
-	return Affine2D<T>(A() * t,
-					   D() * t,
-					   B() * t,
-					   E() * t,
-					   C() * t,
-					   F() * t);
-}
-
-/// <summary>
-/// Make this affine transform the identity matrix.
-/// A and E = 1, all else 0.
-/// </summary>
-template <typename T>
-void Affine2D<T>::MakeID()
-{
-	A(1);
-	B(0);
-	C(0);
-	D(0);
-	E(1);
-	F(0);
-}
-
-/// <summary>
-/// Determine whether this affine transform is the identity matrix.
-/// </summary>
-/// <returns>True if A and E are equal to 1 and all others are 0, else false.</returns>
-template <typename T>
-bool Affine2D<T>::IsID() const
-{
-	return (A() == 1) &&
-		   (B() == 0) &&
-		   (C() == 0) &&
-		   (D() == 0) &&
-		   (E() == 1) &&
-		   (F() == 0);
-}
-
-/// <summary>
-/// Determine whether this affine transform is all zeroes.
-/// </summary>
-/// <returns>True if all 6 elements equal zero, else false.</returns>
-template <typename T>
-bool Affine2D<T>::IsZero() const
-{
-	return (A() == 0) &&
-		   (B() == 0) &&
-		   (C() == 0) &&
-		   (D() == 0) &&
-		   (E() == 0) &&
-		   (F() == 0);
-}
-
-/// <summary>
-/// Determine whether this affine transform was deliberately set to all empty values.
-/// </summary>
-/// <returns>True if all 6 elements equal zero, else false.</returns>
-template <typename T>
-bool Affine2D<T>::IsEmpty() const
-{
-	return (IsClose<T>(A(), EMPTYFIELD)) &&
-		   (IsClose<T>(B(), EMPTYFIELD)) &&
-		   (IsClose<T>(C(), EMPTYFIELD)) &&
-		   (IsClose<T>(D(), EMPTYFIELD)) &&
-		   (IsClose<T>(E(), EMPTYFIELD)) &&
-		   (IsClose<T>(F(), EMPTYFIELD));
-}
-
-/// <summary>
-/// Scales all values A-F by the specified amount.
-/// </summary>
-/// <param name="amount">The amount to scale by</param>
-template <typename T>
-void Affine2D<T>::Scale(T amount)
-{
-	A(A() * amount);
-	B(B() * amount);
-	C(C() * amount);
-	D(D() * amount);
-	E(E() * amount);
-	F(F() * amount);
-}
-
-/// <summary>
-/// Scales all values A,B,D,E by the specified amount.
-/// </summary>
-/// <param name="amount">The amount to scale by</param>
-template <typename T>
-void Affine2D<T>::ScaleXY(T amount)
-{
-	A(A() * amount);
-	B(B() * amount);
-	D(D() * amount);
-	E(E() * amount);
-}
-
-/// <summary>
-/// Rotate this affine transform around its origin by the specified angle in degrees.
-/// </summary>
-/// <param name="angle">The angle to rotate by</param>
-template <typename T>
-void Affine2D<T>::Rotate(T rad)
-{
-	const m4T origMat4 = ToMat4ColMajor(true);//Must center and use column major for glm to work.
-	const m4T newMat4 = glm::rotate(origMat4, rad, v3T(0, 0, 1));//Assuming only rotating around z.
-	A(newMat4[0][0]);//Use direct assignments instead of constructor to skip assigning C and F.
-	B(newMat4[0][1]);
-	D(newMat4[1][0]);
-	E(newMat4[1][1]);
-}
-
-template <typename T>
-void Affine2D<T>::RotateTrans(T rad)
-{
-	const m4T origMat4 = TransToMat4ColMajor();//Only put translation in this matrix.
-	const m4T newMat4 = glm::rotate(origMat4, rad, v3T(0, 0, 1));//Assuming only rotating around z.
-	C(newMat4[0][3]);//Use direct assignments instead of constructor to skip assigning A, B, D, E.
-	F(newMat4[1][3]);
-}
-
-/// <summary>
-/// Move by v.
-/// </summary>
-/// <param name="v">The vec2 describing how far to move in the x and y directions</param>
-template <typename T>
-void Affine2D<T>::Translate(const v2T& v)
-{
-	O(O() + v);
-}
-
-/// <summary>
-/// Rotate and scale the X and Y components by a certain amount based on X.
-/// </summary>
-/// <param name="v">The vec2 describing how much to rotate and scale the X and Y components</param>
-template <typename T>
-void Affine2D<T>::RotateScaleXTo(const v2T& v)
-{
-	const Affine2D<T> rs = CalcRotateScale(X(), v);
-	X(rs.TransformNormal(X()));
-	Y(rs.TransformNormal(Y()));
-}
-
-/// <summary>
-/// Rotate and scale the X and Y components by a certain amount based on Y.
-/// </summary>
-/// <param name="v">The vec2 describing how much to rotate and scale the X and Y components</param>
-template <typename T>
-void Affine2D<T>::RotateScaleYTo(const v2T& v)
-{
-	const Affine2D<T> rs = CalcRotateScale(Y(), v);
-	X(rs.TransformNormal(X()));
-	Y(rs.TransformNormal(Y()));
-}
-
-/// <summary>
-/// Return the inverse of the 2x3 affine matrix.
-/// </summary>
-/// <returns>The inverse of this affine transform</returns>
-template <typename T>
-Affine2D<T> Affine2D<T>::Inverse() const
-{
-	const T det = A() * E() - D() * B();
-	return Affine2D<T>(E() / det, -D() / det,
-					   -B() / det,  A() / det,
-					   (F() * B() - C() * E()) / det, (C() * D() - F() * A()) / det);
-}
-
-/// <summary>
-/// Return a vec2 gotten from transforming this affine transform
-/// by the vec2 passed in, but with a T component of 0, 0.
-/// </summary>
-/// <param name="v">The vec2 describing how much to transform by</param>
-/// <returns>The centered, transformed vec2</returns>
-template <typename T>
-typename v2T Affine2D<T>::TransformNormal(const v2T& v) const
-{
-	return v2T(A() * v.x + B() * v.y, D() * v.x + E() * v.y);
-}
-
-/// <summary>
-/// Return a vec2 gotten from transforming this affine transform
-/// by the vec2 passed in, and applying T translation.
-/// </summary>
-/// <param name="v">The vec2 describing how much to transform by</param>
-/// <returns>The translated, transformed vec2</returns>
-template <typename T>
-typename v2T Affine2D<T>::TransformVector(const v2T& v) const
-{
-	return v2T(A() * v.x + B() * v.y + C(), D() * v.x + E() * v.y + F());
-}
-
-/// <summary>
-/// Return the X and Y components as a 2x2 matrix in column major order.
-/// </summary>
-/// <returns>The 2x2 matrix</returns>
-template <typename T>
-typename m2T Affine2D<T>::ToMat2ColMajor() const
-{
-	return m2T(A(), B(),//Col0...
-			   D(), E());//1
-}
-
-/// <summary>
-/// Return the X and Y components as a 2x2 matrix in row major order.
-/// </summary>
-/// <returns>The 2x2 matrix</returns>
-template <typename T>
-typename m2T Affine2D<T>::ToMat2RowMajor() const
-{
-	return m2T(A(), D(),//Col0...
-			   B(), E());//1
-}
-
-/// <summary>
-/// Return the 2x3 affine transform matrix as a 4x4 matrix in column major order.
-/// </summary>
-/// <param name="center">Whether to use T translation value or just 0 for center</param>
-/// <returns>The 4x4 matrix</returns>
-template <typename T>
-typename m4T Affine2D<T>::ToMat4ColMajor(bool center) const
-{
-	const m4T mat(A(), B(), 0, center ? 0 : C(), //Col0...
-				  D(), E(), 0, center ? 0 : F(), //1
-				  0,   0, 1,			    0, //2
-				  0,   0, 0,			    1);//3
-	return mat;
-}
-
-/// <summary>
-/// Return the 2x3 affine transform matrix as a 4x4 matrix in row major order.
-/// </summary>
-/// <param name="center">Whether to use T translation value or just 0 for center</param>
-/// <returns>The 4x4 matrix</returns>
-template <typename T>
-typename m4T Affine2D<T>::ToMat4RowMajor(bool center) const
-{
-	const m4T mat(A(), D(), 0, 0,
-				  B(), E(), 0, 0,
-				  0,   0, 1, 0,
-				  center ? 0 : C(), center ? 0 : F(), 0, 1);
-	return mat;
-}
-
-template <typename T>
-typename m4T Affine2D<T>::TransToMat4ColMajor() const
-{
-	const m4T mat(1, 0, 0, C(), //Col0...
-				  0, 1, 0, F(), //1
-				  0, 0, 1, 0, //2
-				  0, 0, 0, 1);//3
-	return mat;
-}
-
-/// <summary>
-/// Accessors.
-/// </summary>
-template <typename T> T Affine2D<T>::A() const { return m_Mat[0][0]; }//[0][0]//flam3
-template <typename T> T Affine2D<T>::B() const { return m_Mat[0][1]; }//[1][0]
-template <typename T> T Affine2D<T>::C() const { return m_Mat[0][2]; }//[2][0]
-template <typename T> T Affine2D<T>::D() const { return m_Mat[1][0]; }//[0][1]
-template <typename T> T Affine2D<T>::E() const { return m_Mat[1][1]; }//[1][1]
-template <typename T> T Affine2D<T>::F() const { return m_Mat[1][2]; }//[2][1]
-
-template <typename T> void Affine2D<T>::A(T a) { m_Mat[0][0] = a; }
-template <typename T> void Affine2D<T>::B(T b) { m_Mat[0][1] = b; }
-template <typename T> void Affine2D<T>::C(T c) { m_Mat[0][2] = c; }
-template <typename T> void Affine2D<T>::D(T d) { m_Mat[1][0] = d; }
-template <typename T> void Affine2D<T>::E(T e) { m_Mat[1][1] = e; }
-template <typename T> void Affine2D<T>::F(T f) { m_Mat[1][2] = f; }
-
-template <typename T> typename v2T Affine2D<T>::X() const { return v2T(A(), D()); }//X Axis.
-template <typename T> typename v2T Affine2D<T>::Y() const { return v2T(B(), E()); }//Y Axis.
-template <typename T> typename v2T Affine2D<T>::O() const { return v2T(C(), F()); }//Translation.
-
-template <typename T> void Affine2D<T>::X(const v2T& x) { A(x.x); D(x.y); }//X Axis.
-template <typename T> void Affine2D<T>::Y(const v2T& y) { B(y.x); E(y.y); }//Y Axis.
-template <typename T> void Affine2D<T>::O(const v2T& t) { C(t.x); F(t.y); }//Translation.
-
-template <typename T>
-string Affine2D<T>::ToString() const
-{
-	ostringstream ss;
-	ss << "A: " <<  A() << " "
-	   << "B: " << B() << " "
-	   << "C: " << C()
-	   << "\nD: " << D() << " "
-	   << "E: " << E() << " "
-	   << "F: " << F();
-	return ss.str();
-}
-
-/// <summary>
-/// Rotate and scale this affine transform and return as a copy. Orginal is unchanged.
-/// </summary>
-/// <param name="from">The starting point to rotate and scale from</param>
-/// <param name="to">The ending point to rotate and scale to</param>
-/// <returns>The newly rotated and scalled Affine2D</returns>
-template <typename T>
-Affine2D<T> Affine2D<T>::CalcRotateScale(const v2T& from, const v2T& to)
-{
-	T a, c;
-	CalcRSAC(from, to, a, c);
-	return Affine2D<T>(a, c, -c, a, 0, 0);
-}
-
-/// <summary>
-/// Never fully understood what this did or why it's named what it is.
-/// But it seems to handle some rotating and scaling.
-/// </summary>
-/// <param name="from">The starting point to rotate and scale from</param>
-/// <param name="to">The ending point to rotate and scale to</param>
-/// <param name="a">a</param>
-/// <param name="c">c</param>
-template <typename T>
-void Affine2D<T>::CalcRSAC(const v2T& from, const v2T& to, T& a, T& c)
-{
-	const T lsq = from.x * from.x + from.y * from.y;
-	a = (from.y * to.y + from.x * to.x) / lsq;
-	c = (from.x * to.y - from.y * to.x) / lsq;
-}
-
-//This class had to be implemented in a cpp file because the compiler was breaking.
-//So the explicit instantiation must be declared here rather than in Ember.cpp where
-//all of the other classes are done.
-template EMBER_API class Affine2D<float>;
-
-#ifdef DO_DOUBLE
-	template EMBER_API class Affine2D<double>;
-#endif
-}
+#include "EmberPch.h"
+#include "Affine2D.h"
+
+namespace EmberNs
+{
+/// <summary>
+/// Default constructor which sets the matrix to the identity.
+/// </summary>
+template <typename T>
+Affine2D<T>::Affine2D()
+{
+	MakeID();
+}
+
+/// <summary>
+/// Default copy constructor.
+/// </summary>
+/// <param name="affine">The Affine2D object to copy</param>
+template <typename T>
+Affine2D<T>::Affine2D(const Affine2D<T>& affine)
+{
+	Affine2D<T>::operator=<T>(affine);
+}
+
+/// <summary>
+/// Constructor which takes each column of the affine as a separate parameter.
+/// </summary>
+/// <param name="x">A and D</param>
+/// <param name="y">B and E</param>
+/// <param name="t">C and F</param>
+template <typename T>
+Affine2D<T>::Affine2D(v2T& x, v2T& y, v2T& t)
+{
+	X(x);
+	Y(y);
+	O(t);
+}
+
+/// <summary>
+/// Constructor which takes all six of the affine values as parameters.
+/// </summary>
+/// <param name="xx">A</param>
+/// <param name="xy">D</param>
+/// <param name="yx">B</param>
+/// <param name="yy">E</param>
+/// <param name="tx">C</param>
+/// <param name="ty">F</param>
+template <typename T>
+Affine2D<T>::Affine2D(T xx, T xy, T yx, T yy, T tx, T ty)
+{
+	A(xx);
+	D(xy);
+	B(yx);
+	E(yy);
+	C(tx);
+	F(ty);
+}
+
+/// <summary>
+/// Constructor which takes a 4x4 matrix and assigns the
+/// corresponding values in the 2x3 affine matrix.
+/// </summary>
+/// <param name="mat">The 4x4 affine matrix to read from</param>
+template <typename T>
+Affine2D<T>::Affine2D(m4T& mat)
+{
+	A(mat[0][0]);
+	B(mat[0][1]);
+	C(mat[0][3]);
+	D(mat[1][0]);
+	E(mat[1][1]);
+	F(mat[1][3]);
+}
+
+/// <summary>
+/// Default assignment operator.
+/// </summary>
+/// <param name="affine">The Affine2D object to copy</param>
+template <typename T>
+Affine2D<T>& Affine2D<T>::operator = (const Affine2D<T>& affine)
+{
+	if (this != &affine)
+		Affine2D<T>::operator=<T>(affine);
+
+	return *this;
+}
+
+/// <summary>
+/// == operator which tests if all fields are equal with another Affine2D.
+/// </summary>
+/// <param name="affine">The Affine2D to compare to</param>
+/// <returns>True if all fields are equal, else false</returns>
+template <typename T>
+bool Affine2D<T>::operator == (const Affine2D<T>& affine) const
+{
+	return IsClose(A(), affine.A()) &&
+		   IsClose(B(), affine.B()) &&
+		   IsClose(C(), affine.C()) &&
+		   IsClose(D(), affine.D()) &&
+		   IsClose(E(), affine.E()) &&
+		   IsClose(F(), affine.F());
+}
+
+/// <summary>
+/// * operator to multiply this affine transform by a vec2 and return the result as a vec2.
+/// </summary>
+/// <param name="v">The vec2 to multiply by</param>
+/// <returns>A new vec2 which is the product of the multiplication</returns>
+template <typename T>
+typename v2T Affine2D<T>::operator * (const v2T& v) const
+{
+	return TransformVector(v);
+}
+
+/// <summary>
+/// Return a copy of the object with all values A-F scaled by the specified amount.
+/// </summary>
+/// <param name="amount">The amount to scale by</param>
+/// <returns>A new Affine2D which a scaled copy of this instance</returns>
+template <typename T>
+Affine2D<T> Affine2D<T>:: operator * (T t) const
+{
+	return Affine2D<T>(A() * t,
+					   D() * t,
+					   B() * t,
+					   E() * t,
+					   C() * t,
+					   F() * t);
+}
+
+/// <summary>
+/// Make this affine transform the identity matrix.
+/// A and E = 1, all else 0.
+/// </summary>
+template <typename T>
+void Affine2D<T>::MakeID()
+{
+	A(1);
+	B(0);
+	C(0);
+	D(0);
+	E(1);
+	F(0);
+}
+
+/// <summary>
+/// Determine whether this affine transform is the identity matrix.
+/// </summary>
+/// <returns>True if A and E are equal to 1 and all others are 0, else false.</returns>
+template <typename T>
+bool Affine2D<T>::IsID() const
+{
+	return (A() == 1) &&
+		   (B() == 0) &&
+		   (C() == 0) &&
+		   (D() == 0) &&
+		   (E() == 1) &&
+		   (F() == 0);
+}
+
+/// <summary>
+/// Determine whether this affine transform is all zeroes.
+/// </summary>
+/// <returns>True if all 6 elements equal zero, else false.</returns>
+template <typename T>
+bool Affine2D<T>::IsZero() const
+{
+	return (A() == 0) &&
+		   (B() == 0) &&
+		   (C() == 0) &&
+		   (D() == 0) &&
+		   (E() == 0) &&
+		   (F() == 0);
+}
+
+/// <summary>
+/// Determine whether this affine transform was deliberately set to all empty values.
+/// </summary>
+/// <returns>True if all 6 elements equal zero, else false.</returns>
+template <typename T>
+bool Affine2D<T>::IsEmpty() const
+{
+	return (IsClose<T>(A(), EMPTYFIELD)) &&
+		   (IsClose<T>(B(), EMPTYFIELD)) &&
+		   (IsClose<T>(C(), EMPTYFIELD)) &&
+		   (IsClose<T>(D(), EMPTYFIELD)) &&
+		   (IsClose<T>(E(), EMPTYFIELD)) &&
+		   (IsClose<T>(F(), EMPTYFIELD));
+}
+
+/// <summary>
+/// Scales all values A-F by the specified amount.
+/// </summary>
+/// <param name="amount">The amount to scale by</param>
+template <typename T>
+void Affine2D<T>::Scale(T amount)
+{
+	A(A() * amount);
+	B(B() * amount);
+	C(C() * amount);
+	D(D() * amount);
+	E(E() * amount);
+	F(F() * amount);
+}
+
+/// <summary>
+/// Scales all values A,B,D,E by the specified amount.
+/// </summary>
+/// <param name="amount">The amount to scale by</param>
+template <typename T>
+void Affine2D<T>::ScaleXY(T amount)
+{
+	A(A() * amount);
+	B(B() * amount);
+	D(D() * amount);
+	E(E() * amount);
+}
+
+/// <summary>
+/// Rotate this affine transform around its origin by the specified angle in degrees.
+/// </summary>
+/// <param name="angle">The angle to rotate by</param>
+template <typename T>
+void Affine2D<T>::Rotate(T rad)
+{
+	const m4T origMat4 = ToMat4ColMajor(true);//Must center and use column major for glm to work.
+	const m4T newMat4 = glm::rotate(origMat4, rad, v3T(0, 0, 1));//Assuming only rotating around z.
+	A(newMat4[0][0]);//Use direct assignments instead of constructor to skip assigning C and F.
+	B(newMat4[0][1]);
+	D(newMat4[1][0]);
+	E(newMat4[1][1]);
+}
+
+template <typename T>
+void Affine2D<T>::RotateTrans(T rad)
+{
+	const m4T origMat4 = TransToMat4ColMajor();//Only put translation in this matrix.
+	const m4T newMat4 = glm::rotate(origMat4, rad, v3T(0, 0, 1));//Assuming only rotating around z.
+	C(newMat4[0][3]);//Use direct assignments instead of constructor to skip assigning A, B, D, E.
+	F(newMat4[1][3]);
+}
+
+/// <summary>
+/// Move by v.
+/// </summary>
+/// <param name="v">The vec2 describing how far to move in the x and y directions</param>
+template <typename T>
+void Affine2D<T>::Translate(const v2T& v)
+{
+	O(O() + v);
+}
+
+/// <summary>
+/// Rotate and scale the X and Y components by a certain amount based on X.
+/// </summary>
+/// <param name="v">The vec2 describing how much to rotate and scale the X and Y components</param>
+template <typename T>
+void Affine2D<T>::RotateScaleXTo(const v2T& v)
+{
+	const Affine2D<T> rs = CalcRotateScale(X(), v);
+	X(rs.TransformNormal(X()));
+	Y(rs.TransformNormal(Y()));
+}
+
+/// <summary>
+/// Rotate and scale the X and Y components by a certain amount based on Y.
+/// </summary>
+/// <param name="v">The vec2 describing how much to rotate and scale the X and Y components</param>
+template <typename T>
+void Affine2D<T>::RotateScaleYTo(const v2T& v)
+{
+	const Affine2D<T> rs = CalcRotateScale(Y(), v);
+	X(rs.TransformNormal(X()));
+	Y(rs.TransformNormal(Y()));
+}
+
+/// <summary>
+/// Return the inverse of the 2x3 affine matrix.
+/// </summary>
+/// <returns>The inverse of this affine transform</returns>
+template <typename T>
+Affine2D<T> Affine2D<T>::Inverse() const
+{
+	const T det = A() * E() - D() * B();
+	return Affine2D<T>(E() / det, -D() / det,
+					   -B() / det,  A() / det,
+					   (F() * B() - C() * E()) / det, (C() * D() - F() * A()) / det);
+}
+
+/// <summary>
+/// Return a vec2 gotten from transforming this affine transform
+/// by the vec2 passed in, but with a T component of 0, 0.
+/// </summary>
+/// <param name="v">The vec2 describing how much to transform by</param>
+/// <returns>The centered, transformed vec2</returns>
+template <typename T>
+typename v2T Affine2D<T>::TransformNormal(const v2T& v) const
+{
+	return v2T(A() * v.x + B() * v.y, D() * v.x + E() * v.y);
+}
+
+/// <summary>
+/// Return a vec2 gotten from transforming this affine transform
+/// by the vec2 passed in, and applying T translation.
+/// </summary>
+/// <param name="v">The vec2 describing how much to transform by</param>
+/// <returns>The translated, transformed vec2</returns>
+template <typename T>
+typename v2T Affine2D<T>::TransformVector(const v2T& v) const
+{
+	return v2T(A() * v.x + B() * v.y + C(), D() * v.x + E() * v.y + F());
+}
+
+/// <summary>
+/// Return the X and Y components as a 2x2 matrix in column major order.
+/// </summary>
+/// <returns>The 2x2 matrix</returns>
+template <typename T>
+typename m2T Affine2D<T>::ToMat2ColMajor() const
+{
+	return m2T(A(), B(),//Col0...
+			   D(), E());//1
+}
+
+/// <summary>
+/// Return the X and Y components as a 2x2 matrix in row major order.
+/// </summary>
+/// <returns>The 2x2 matrix</returns>
+template <typename T>
+typename m2T Affine2D<T>::ToMat2RowMajor() const
+{
+	return m2T(A(), D(),//Col0...
+			   B(), E());//1
+}
+
+/// <summary>
+/// Return the 2x3 affine transform matrix as a 4x4 matrix in column major order.
+/// </summary>
+/// <param name="center">Whether to use T translation value or just 0 for center</param>
+/// <returns>The 4x4 matrix</returns>
+template <typename T>
+typename m4T Affine2D<T>::ToMat4ColMajor(bool center) const
+{
+	const m4T mat(A(), B(), 0, center ? 0 : C(), //Col0...
+				  D(), E(), 0, center ? 0 : F(), //1
+				  0,   0, 1,			    0, //2
+				  0,   0, 0,			    1);//3
+	return mat;
+}
+
+/// <summary>
+/// Return the 2x3 affine transform matrix as a 4x4 matrix in row major order.
+/// </summary>
+/// <param name="center">Whether to use T translation value or just 0 for center</param>
+/// <returns>The 4x4 matrix</returns>
+template <typename T>
+typename m4T Affine2D<T>::ToMat4RowMajor(bool center) const
+{
+	const m4T mat(A(), D(), 0, 0,
+				  B(), E(), 0, 0,
+				  0,   0, 1, 0,
+				  center ? 0 : C(), center ? 0 : F(), 0, 1);
+	return mat;
+}
+
+template <typename T>
+typename m4T Affine2D<T>::TransToMat4ColMajor() const
+{
+	const m4T mat(1, 0, 0, C(), //Col0...
+				  0, 1, 0, F(), //1
+				  0, 0, 1, 0, //2
+				  0, 0, 0, 1);//3
+	return mat;
+}
+
+/// <summary>
+/// Accessors.
+/// </summary>
+template <typename T> T Affine2D<T>::A() const { return m_Mat[0][0]; }//[0][0]//flam3
+template <typename T> T Affine2D<T>::B() const { return m_Mat[0][1]; }//[1][0]
+template <typename T> T Affine2D<T>::C() const { return m_Mat[0][2]; }//[2][0]
+template <typename T> T Affine2D<T>::D() const { return m_Mat[1][0]; }//[0][1]
+template <typename T> T Affine2D<T>::E() const { return m_Mat[1][1]; }//[1][1]
+template <typename T> T Affine2D<T>::F() const { return m_Mat[1][2]; }//[2][1]
+
+template <typename T> void Affine2D<T>::A(T a) { m_Mat[0][0] = a; }
+template <typename T> void Affine2D<T>::B(T b) { m_Mat[0][1] = b; }
+template <typename T> void Affine2D<T>::C(T c) { m_Mat[0][2] = c; }
+template <typename T> void Affine2D<T>::D(T d) { m_Mat[1][0] = d; }
+template <typename T> void Affine2D<T>::E(T e) { m_Mat[1][1] = e; }
+template <typename T> void Affine2D<T>::F(T f) { m_Mat[1][2] = f; }
+
+template <typename T> typename v2T Affine2D<T>::X() const { return v2T(A(), D()); }//X Axis.
+template <typename T> typename v2T Affine2D<T>::Y() const { return v2T(B(), E()); }//Y Axis.
+template <typename T> typename v2T Affine2D<T>::O() const { return v2T(C(), F()); }//Translation.
+
+template <typename T> void Affine2D<T>::X(const v2T& x) { A(x.x); D(x.y); }//X Axis.
+template <typename T> void Affine2D<T>::Y(const v2T& y) { B(y.x); E(y.y); }//Y Axis.
+template <typename T> void Affine2D<T>::O(const v2T& t) { C(t.x); F(t.y); }//Translation.
+
+template <typename T>
+string Affine2D<T>::ToString() const
+{
+	ostringstream ss;
+	ss << "A: " <<  A() << " "
+	   << "B: " << B() << " "
+	   << "C: " << C()
+	   << "\nD: " << D() << " "
+	   << "E: " << E() << " "
+	   << "F: " << F();
+	return ss.str();
+}
+
+/// <summary>
+/// Rotate and scale this affine transform and return as a copy. Orginal is unchanged.
+/// </summary>
+/// <param name="from">The starting point to rotate and scale from</param>
+/// <param name="to">The ending point to rotate and scale to</param>
+/// <returns>The newly rotated and scalled Affine2D</returns>
+template <typename T>
+Affine2D<T> Affine2D<T>::CalcRotateScale(const v2T& from, const v2T& to)
+{
+	T a, c;
+	CalcRSAC(from, to, a, c);
+	return Affine2D<T>(a, c, -c, a, 0, 0);
+}
+
+/// <summary>
+/// Never fully understood what this did or why it's named what it is.
+/// But it seems to handle some rotating and scaling.
+/// </summary>
+/// <param name="from">The starting point to rotate and scale from</param>
+/// <param name="to">The ending point to rotate and scale to</param>
+/// <param name="a">a</param>
+/// <param name="c">c</param>
+template <typename T>
+void Affine2D<T>::CalcRSAC(const v2T& from, const v2T& to, T& a, T& c)
+{
+	const T lsq = from.x * from.x + from.y * from.y;
+	a = (from.y * to.y + from.x * to.x) / lsq;
+	c = (from.x * to.y - from.y * to.x) / lsq;
+}
+
+//This class had to be implemented in a cpp file because the compiler was breaking.
+//So the explicit instantiation must be declared here rather than in Ember.cpp where
+//all of the other classes are done.
+template EMBER_API class Affine2D<float>;
+
+#ifdef DO_DOUBLE
+	template EMBER_API class Affine2D<double>;
+#endif
+}
@@ -1,125 +1,125 @@
-#pragma once
-
-#include "Utils.h"
-
-/// <summary>
-/// Affine2D class.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// Uses matrix composition to handle the
-/// affine matrix. Taken almost entirely from
-/// Fractron, but using glm, and in C++.
-/// Note that the matrix layout differs from flam3 so it's best to use
-/// the A, B, C, D, E, F wrappers around the underlying matrix indices. But if the matrix must
-/// be accessed directly, the two are laid out as such:
-/// flam3: 3 columns of 2 rows each. Accessed col, row.
-/// [a(0,0)][b(1,0)][c(2,0)]
-/// [d(0,1)][e(1,1)][f(2,1)]
-/// Ember: 2 columns of 3 rows each. Accessed col, row.
-/// [a(0,0)][d(1,0)]
-/// [b(0,1)][e(1,1)]
-/// [c(0,2)][f(1,2)]
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-class EMBER_API Affine2D
-{
-public:
-	Affine2D();
-	Affine2D(const Affine2D<T>& affine);
-
-	/// <summary>
-	/// Copy constructor to copy an Affine2D object of type U.
-	/// Special case that must be here in the header because it has
-	/// a second template parameter.
-	/// </summary>
-	/// <param name="affine">The Affine2D object to copy</param>
-	template <typename U>
-	Affine2D(const Affine2D<U>& affine)
-	{
-		Affine2D<T>::operator=<U>(affine);
-	}
-
-	Affine2D(v2T& x, v2T& y, v2T& t);
-	Affine2D(T xx, T xy, T yx, T yy, T tx, T ty);
-	Affine2D(m4T& mat);
-	Affine2D<T>& operator = (const Affine2D<T>& affine);
-
-	/// <summary>
-	/// Assignment operator to assign an Affine2D object of type U.
-	/// Special case that must be here in the header because it has
-	/// a second template parameter.
-	/// </summary>
-	/// <param name="affine">The Affine2D object to copy.</param>
-	/// <returns>Reference to updated self</returns>
-	template <typename U>
-	Affine2D<T>& operator = (const Affine2D<U>& affine)
-	{
-		A(static_cast<T>(affine.A()));
-		B(static_cast<T>(affine.B()));
-		C(static_cast<T>(affine.C()));
-		D(static_cast<T>(affine.D()));
-		E(static_cast<T>(affine.E()));
-		F(static_cast<T>(affine.F()));
-		return *this;
-	}
-
-	bool operator == (const Affine2D<T>& affine) const;
-	v2T operator * (const v2T& v) const;
-	Affine2D<T> operator * (T t) const;
-
-	void MakeID();
-	bool IsID() const;
-	bool IsZero() const;
-	bool IsEmpty() const;
-	void Scale(T amount);
-	void ScaleXY(T amount);
-	Affine2D<T> ScaleCopy(T amount);
-	void Rotate(T rad);
-	void RotateTrans(T rad);
-	void Translate(const v2T& v);
-	void RotateScaleXTo(const v2T& v);
-	void RotateScaleYTo(const v2T& v);
-	Affine2D<T> Inverse() const;
-	v2T TransformNormal(const v2T& v) const;
-	v2T TransformVector(const v2T& v) const;
-	m2T ToMat2ColMajor() const;
-	m2T ToMat2RowMajor() const;
-	m4T ToMat4ColMajor(bool center = false) const;
-	m4T ToMat4RowMajor(bool center = false) const;
-	m4T TransToMat4ColMajor() const;
-
-	//Note that returning a copy is actually faster than a const ref&.
-	T A() const;
-	T B() const;
-	T C() const;
-	T D() const;
-	T E() const;
-	T F() const;
-
-	void A(T a);
-	void B(T b);
-	void C(T c);
-	void D(T d);
-	void E(T e);
-	void F(T f);
-
-	v2T X() const;
-	v2T Y() const;
-	v2T O() const;
-
-	void X(const v2T& x);
-	void Y(const v2T& y);
-	void O(const v2T& t);
-
-	string ToString() const;
-
-	static Affine2D CalcRotateScale(const v2T& from, const v2T& to);
-	static void CalcRSAC(const v2T& from, const v2T& to, T& a, T& c);
-
-	m23T m_Mat;
-};
-}
+#pragma once
+
+#include "Utils.h"
+
+/// <summary>
+/// Affine2D class.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// Uses matrix composition to handle the
+/// affine matrix. Taken almost entirely from
+/// Fractron, but using glm, and in C++.
+/// Note that the matrix layout differs from flam3 so it's best to use
+/// the A, B, C, D, E, F wrappers around the underlying matrix indices. But if the matrix must
+/// be accessed directly, the two are laid out as such:
+/// flam3: 3 columns of 2 rows each. Accessed col, row.
+/// [a(0,0)][b(1,0)][c(2,0)]
+/// [d(0,1)][e(1,1)][f(2,1)]
+/// Ember: 2 columns of 3 rows each. Accessed col, row.
+/// [a(0,0)][d(1,0)]
+/// [b(0,1)][e(1,1)]
+/// [c(0,2)][f(1,2)]
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+class EMBER_API Affine2D
+{
+public:
+	Affine2D();
+	Affine2D(const Affine2D<T>& affine);
+
+	/// <summary>
+	/// Copy constructor to copy an Affine2D object of type U.
+	/// Special case that must be here in the header because it has
+	/// a second template parameter.
+	/// </summary>
+	/// <param name="affine">The Affine2D object to copy</param>
+	template <typename U>
+	Affine2D(const Affine2D<U>& affine)
+	{
+		Affine2D<T>::operator=<U>(affine);
+	}
+
+	Affine2D(v2T& x, v2T& y, v2T& t);
+	Affine2D(T xx, T xy, T yx, T yy, T tx, T ty);
+	Affine2D(m4T& mat);
+	Affine2D<T>& operator = (const Affine2D<T>& affine);
+
+	/// <summary>
+	/// Assignment operator to assign an Affine2D object of type U.
+	/// Special case that must be here in the header because it has
+	/// a second template parameter.
+	/// </summary>
+	/// <param name="affine">The Affine2D object to copy.</param>
+	/// <returns>Reference to updated self</returns>
+	template <typename U>
+	Affine2D<T>& operator = (const Affine2D<U>& affine)
+	{
+		A(static_cast<T>(affine.A()));
+		B(static_cast<T>(affine.B()));
+		C(static_cast<T>(affine.C()));
+		D(static_cast<T>(affine.D()));
+		E(static_cast<T>(affine.E()));
+		F(static_cast<T>(affine.F()));
+		return *this;
+	}
+
+	bool operator == (const Affine2D<T>& affine) const;
+	v2T operator * (const v2T& v) const;
+	Affine2D<T> operator * (T t) const;
+
+	void MakeID();
+	bool IsID() const;
+	bool IsZero() const;
+	bool IsEmpty() const;
+	void Scale(T amount);
+	void ScaleXY(T amount);
+	Affine2D<T> ScaleCopy(T amount);
+	void Rotate(T rad);
+	void RotateTrans(T rad);
+	void Translate(const v2T& v);
+	void RotateScaleXTo(const v2T& v);
+	void RotateScaleYTo(const v2T& v);
+	Affine2D<T> Inverse() const;
+	v2T TransformNormal(const v2T& v) const;
+	v2T TransformVector(const v2T& v) const;
+	m2T ToMat2ColMajor() const;
+	m2T ToMat2RowMajor() const;
+	m4T ToMat4ColMajor(bool center = false) const;
+	m4T ToMat4RowMajor(bool center = false) const;
+	m4T TransToMat4ColMajor() const;
+
+	//Note that returning a copy is actually faster than a const ref&.
+	T A() const;
+	T B() const;
+	T C() const;
+	T D() const;
+	T E() const;
+	T F() const;
+
+	void A(T a);
+	void B(T b);
+	void C(T c);
+	void D(T d);
+	void E(T e);
+	void F(T f);
+
+	v2T X() const;
+	v2T Y() const;
+	v2T O() const;
+
+	void X(const v2T& x);
+	void Y(const v2T& y);
+	void O(const v2T& t);
+
+	string ToString() const;
+
+	static Affine2D CalcRotateScale(const v2T& from, const v2T& to);
+	static void CalcRSAC(const v2T& from, const v2T& to, T& a, T& c);
+
+	m23T m_Mat;
+};
+}
@@ -1,273 +1,273 @@
-#pragma once
-
-#include "Point.h"
-
-/// <summary>
-/// CarToRas class.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// When iterating, everything is positioned in terms of a carteseian plane with 0,0 in the center like so:
-/// [-1,1]			[1,1]
-/// [-1,-1]			[1,-1]
-/// However, when accumulating to the histogram, the data is stored in the traditional raster coordinate system
-/// of 0,0 at the top left and x,y at the bottom right. This class provides functionality to convert from one
-/// to the other and is used when accumulating a sub batch of iteration results to the histogram.
-/// Note the functions use reference arguments for the converted values because they are slightly faster than returning a value.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-class EMBER_API CarToRas
-{
-public:
-	/// <summary>
-	/// Empty constructor. This class should never be used unless it's been properly constructed with the constructor that takes arguments.
-	/// </summary>
-	CarToRas() = default;
-
-	/// <summary>
-	/// Constructor that takes arguments to set up the bounds and passes them to Init().
-	/// </summary>
-	/// <param name="carLlX">The lower left x of the cartesian plane</param>
-	/// <param name="carLlY">The lower left y of the cartesian plane</param>
-	/// <param name="carUrX">The upper right x of the cartesian plane</param>
-	/// <param name="carUrY">The upper right y of the cartesian plane</param>
-	/// <param name="rasW">The width in pixels of the raster image/histogram</param>
-	/// <param name="rasH">The height in pixels of the raster image/histogram</param>
-	/// <param name="aspectRatio">The aspect ratio, generally 1</param>
-	CarToRas(T carLlX, T carLlY, T carUrX, T carUrY, size_t rasW, size_t rasH, T aspectRatio)
-	{
-		Init(carLlX, carLlY, carUrX, carUrY, rasW, rasH, aspectRatio);
-	}
-
-	/// <summary>
-	/// Default copy constructor.
-	/// </summary>
-	/// <param name="carToRas">The CarToRas object to copy</param>
-	CarToRas(const CarToRas<T>& carToRas)
-	{
-		CarToRas<T>::operator=<T>(carToRas);
-	}
-
-	/// <summary>
-	/// Copy constructor to copy a CarToRas object of type U.
-	/// </summary>
-	/// <param name="carToRas">The CarToRas object to copy</param>
-	template <typename U>
-	CarToRas(const CarToRas<U>& carToRas)
-	{
-		CarToRas<T>::operator=<U>(carToRas);
-	}
-
-	/// <summary>
-	/// Default assignment operator.
-	/// </summary>
-	/// <param name="carToRas">The CarToRas object to copy</param>
-	CarToRas<T>& operator = (const CarToRas<T>& carToRas)
-	{
-		if (this != &carToRas)
-			CarToRas<T>::operator=<T>(carToRas);
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Assignment operator to assign a CarToRas object of type U.
-	/// </summary>
-	/// <param name="carToRas">The CarToRas object to copy.</param>
-	/// <returns>Reference to updated self</returns>
-	template <typename U>
-	CarToRas<T>& operator = (const CarToRas<U>& carToRas)
-	{
-		m_RasWidth = carToRas.RasWidth();
-		m_RasHeight = carToRas.RasHeight();
-		m_OneRow = T(carToRas.OneRow());
-		m_OneCol = T(carToRas.OneCol());
-		m_PixPerImageUnitW = T(carToRas.PixPerImageUnitW());
-		m_RasLlX = T(carToRas.RasLlX());
-		m_PixPerImageUnitH = T(carToRas.PixPerImageUnitH());
-		m_RasLlY = T(carToRas.RasLlY());
-		m_CarLlX = T(carToRas.CarLlX());
-		m_CarLlY = T(carToRas.CarLlY());
-		m_CarUrX = T(carToRas.CarUrX());
-		m_CarUrY = T(carToRas.CarUrY());
-		m_PadCarLlX = T(carToRas.PadCarLlX());
-		m_PadCarLlY = T(carToRas.PadCarLlY());
-		m_PadCarUrX = T(carToRas.PadCarUrX());
-		m_PadCarUrY = T(carToRas.PadCarUrY());
-		m_CarHalfX = T(carToRas.CarHalfX());
-		m_CarHalfY = T(carToRas.CarHalfY());
-		m_CarCenterX = T(carToRas.CarCenterX());
-		m_CarCenterY = T(carToRas.CarCenterY());
-		return *this;
-	}
-
-	/// <summary>
-	/// Initialize the dimensions with the specified bounds.
-	/// </summary>
-	/// <param name="carLlX">The lower left x of the cartesian plane</param>
-	/// <param name="carLlY">The lower left y of the cartesian plane</param>
-	/// <param name="carUrX">The upper right x of the cartesian plane</param>
-	/// <param name="carUrY">The upper right y of the cartesian plane</param>
-	/// <param name="rasW">The width in pixels of the raster image/histogram</param>
-	/// <param name="rasH">The height in pixels of the raster image/histogram</param>
-	/// <param name="aspectRatio">The aspect ratio, generally 1</param>
-	void Init(T carLlX, T carLlY, T carUrX, T carUrY, size_t rasW, size_t rasH, T aspectRatio)
-	{
-		m_RasWidth = rasW;
-		m_RasHeight = rasH;
-		m_CarLlX = carLlX;
-		m_CarLlY = carLlY;
-		m_CarUrX = carUrX;
-		m_CarUrY = carUrY;
-		T carW = m_CarUrX - m_CarLlX;//Right minus left.
-		T carH = m_CarUrY - m_CarLlY;//Top minus bottom.
-		T invSizeW = T(1.0) / carW;
-		T invSizeH = T(1.0) / carH;
-		m_PixPerImageUnitW = static_cast<T>(rasW) * invSizeW;
-		m_RasLlX = m_PixPerImageUnitW * carLlX;
-		m_PixPerImageUnitH = static_cast<T>(rasH) * invSizeH;
-		m_RasLlY = m_PixPerImageUnitH * carLlY;
-		m_OneRow = std::abs(m_CarUrY - m_CarLlY) / m_RasHeight;
-		m_OneCol = std::abs(m_CarUrX - m_CarLlX) / m_RasWidth;
-		m_PadCarLlX = m_CarLlX + m_OneCol;
-		m_PadCarUrX = m_CarUrX - m_OneCol;
-		m_PadCarLlY = m_CarLlY + m_OneRow;
-		m_PadCarUrY = m_CarUrY - m_OneRow;
-		m_CarHalfX = (m_CarUrX - m_CarLlX) / 2;
-		m_CarHalfY = (m_CarUrY - m_CarLlY) / 2;
-		m_CarCenterX = m_CarLlX + m_CarHalfX;
-		m_CarCenterY = m_CarLlY + m_CarHalfY;
-	}
-
-	/// <summary>
-	/// Assign values to the cached half width/height fields.
-	/// This is only done manually here and is used when rendering strips
-	/// because a cached copy of these is required because the real values
-	/// change with the assignment of each temporary strip ember object.
-	/// </summary>
-	/// <param name="x">The cached value equal to half of the cartesian width of the x plane</param>
-	/// <param name="y">The cached value equal to half of the cartesian width of the y plane</param>
-	void UpdateCachedHalf(T x, T y)
-	{
-		m_CachedCarHalfX = x;
-		m_CachedCarHalfY = y;
-	}
-
-	/// <summary>
-	/// Convert a cartesian x, y coordinate to a raster x, y coordinate.
-	/// This will flip the Y coordinate, so points that hit the bottom of the cartesian plane will
-	/// be mapped to the top of the histogram and vice versa.
-	/// There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test,
-	/// but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform an additional check after this call to make sure the
-	/// mapped point is in bounds.
-	/// </summary>
-	/// <param name="cartX">The cartesian x</param>
-	/// <param name="cartY">The cartesian y</param>
-	/// <param name="rasX">The converted raster x</param>
-	/// <param name="rasY">The converted raster y</param>
-	inline void Convert(T cartX, T cartY, size_t& rasX, size_t& rasY)
-	{
-		rasX = static_cast<size_t>(m_PixPerImageUnitW * cartX - m_RasLlX);
-		rasY = static_cast<size_t>(m_RasLlY - (m_PixPerImageUnitH * cartY));
-	}
-
-	/// <summary>
-	/// Convert a cartesian x, y coordinate to a single raster buffer index.
-	/// This will flip the Y coordinate, so points that hit the bottom of the cartesian plane will
-	/// be mapped to the top of the histogram and vice versa.
-	/// There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test,
-	/// but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform an additional check after this call to make sure the
-	/// mapped point is in bounds.
-	/// </summary>
-	/// <param name="cartX">The cartesian x</param>
-	/// <param name="cartY">The cartesian y</param>
-	/// <param name="singleBufferIndex">The converted single raster buffer index</param>
-	inline void Convert(T cartX, T cartY, size_t& singleBufferIndex)
-	{
-		singleBufferIndex = static_cast<size_t>(m_PixPerImageUnitW * cartX - m_RasLlX) + (m_RasWidth * static_cast<size_t>(m_PixPerImageUnitH * cartY - m_RasLlY));
-	}
-
-	/// <summary>
-	/// Convert a cartesian x, y point to a single raster buffer index.
-	/// This will flip the Y coordinate, so points that hit the bottom of the cartesian plane will
-	/// be mapped to the top of the histogram and vice versa.
-	/// This is the most efficient possible way of converting, consisting of only
-	/// a multiply and subtract per coordinate element.
-	/// There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test,
-	/// but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform an additional check after this call to make sure the
-	/// mapped point is in bounds.
-	/// </summary>
-	/// <param name="point">The cartesian y</param>
-	/// <param name="singleBufferIndex">The converted single raster buffer index</param>
-	inline void Convert(Point<T>& point, size_t& singleBufferIndex)
-	{
-		//singleBufferIndex = static_cast<size_t>(Round(m_PixPerImageUnitW * point.m_X - m_RasLlX) + (m_RasWidth * Round(m_PixPerImageUnitH * point.m_Y - m_RasLlY)));
-		singleBufferIndex = static_cast<size_t>(m_PixPerImageUnitW * point.m_X - m_RasLlX) + (m_RasWidth * static_cast<size_t>(m_PixPerImageUnitH * point.m_Y - m_RasLlY));
-	}
-
-	/// <summary>
-	/// Determine if a point in the cartesian plane can be converted to a point within the raster plane.
-	/// There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test,
-	/// but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform an additional check after this call to make sure the
-	/// mapped point is in bounds.
-	/// </summary>
-	/// <param name="point">The point to test</param>
-	/// <returns>True if within bounds, else false</returns>
-	inline bool InBounds(Point<T>& point)
-	{
-		//Debug check for hitting the very first pixel in the image.
-		//if (point.m_Y > m_CarLlY && point.m_Y <= m_PadCarLlY && //Mapped to top row...
-		//	point.m_X > m_CarLlX && point.m_X <= m_PadCarLlX)//...first col.
-		//{
-		//	cout << "First pixel hit.\n";
-		//}
-		return point.m_X >= m_CarLlX &&
-			   point.m_X < m_CarUrX &&
-			   point.m_Y < m_CarUrY &&
-			   point.m_Y >= m_CarLlY;
-	}
-
-	/// <summary>
-	/// Accessors.
-	/// </summary>
-	inline size_t RasWidth() const { return m_RasWidth; }
-	inline size_t RasHeight() const { return m_RasHeight; }
-	inline T OneRow() const { return m_OneRow; }
-	inline T OneCol() const { return m_OneCol; }
-	inline T PixPerImageUnitW() const { return m_PixPerImageUnitW; }
-	inline T RasLlX() const { return m_RasLlX; }
-	inline T PixPerImageUnitH() const { return m_PixPerImageUnitH; }
-	inline T RasLlY() const { return m_RasLlY; }
-	inline T CarLlX() const { return m_CarLlX; }
-	inline T CarLlY() const { return m_CarLlY; }
-	inline T CarUrX() const { return m_CarUrX; }
-	inline T CarUrY() const { return m_CarUrY; }
-	inline T PadCarLlX() const { return m_PadCarLlX; }
-	inline T PadCarLlY() const { return m_PadCarLlY; }
-	inline T PadCarUrX() const { return m_PadCarUrX; }
-	inline T PadCarUrY() const { return m_PadCarUrY; }
-	inline T CarHalfX() const { return m_CarHalfX; }
-	inline T CarHalfY() const { return m_CarHalfY; }
-	inline T CachedCarHalfX() const { return m_CachedCarHalfX; }
-	inline T CachedCarHalfY() const { return m_CachedCarHalfY; }
-	inline T CarCenterX() const { return m_CarCenterX; }
-	inline T CarCenterY() const { return m_CarCenterY; }
-
-private:
-	size_t m_RasWidth, m_RasHeight;//The width and height of the raster image.
-	T m_OneRow;//The distance that one raster row represents in the cartesian plane.
-	T m_OneCol;//The distance that one raster column represents in the cartesian plane.
-	T m_PixPerImageUnitW;//The number of columns in the raster plane that a horizontal distance of 1 in the cartesian plane represents. The higher the number, the more zoomed in.
-	T m_RasLlX;//The lower left x of the raster image plane.
-	T m_PixPerImageUnitH;//The number of rows in the raster plane that a vertical distance of 1 in the cartesian plane represents. The higher the number, the more zoomed in.
-	T m_RasLlY;//The lower left y of the raster image plane.
-	T m_CarLlX, m_CarLlY, m_CarUrX, m_CarUrY;//The bounds of the cartesian plane.
-	T m_PadCarLlX, m_PadCarLlY, m_PadCarUrX, m_PadCarUrY;//The bounds of the cartesian plane padded by one raster row and column on each side.
-	T m_CarHalfX, m_CarHalfY;//The distance from the center of the of the cartesian plane to the edges.
-	T m_CachedCarHalfX, m_CachedCarHalfY;//The cahced distance from the center of the of the cartesian plane to the edges, needed when rendering strips.
-	T m_CarCenterX, m_CarCenterY;//The center of the cartesian plane.
-};
-}
+#pragma once
+
+#include "Point.h"
+
+/// <summary>
+/// CarToRas class.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// When iterating, everything is positioned in terms of a carteseian plane with 0,0 in the center like so:
+/// [-1,1]			[1,1]
+/// [-1,-1]			[1,-1]
+/// However, when accumulating to the histogram, the data is stored in the traditional raster coordinate system
+/// of 0,0 at the top left and x,y at the bottom right. This class provides functionality to convert from one
+/// to the other and is used when accumulating a sub batch of iteration results to the histogram.
+/// Note the functions use reference arguments for the converted values because they are slightly faster than returning a value.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+class EMBER_API CarToRas
+{
+public:
+	/// <summary>
+	/// Empty constructor. This class should never be used unless it's been properly constructed with the constructor that takes arguments.
+	/// </summary>
+	CarToRas() = default;
+
+	/// <summary>
+	/// Constructor that takes arguments to set up the bounds and passes them to Init().
+	/// </summary>
+	/// <param name="carLlX">The lower left x of the cartesian plane</param>
+	/// <param name="carLlY">The lower left y of the cartesian plane</param>
+	/// <param name="carUrX">The upper right x of the cartesian plane</param>
+	/// <param name="carUrY">The upper right y of the cartesian plane</param>
+	/// <param name="rasW">The width in pixels of the raster image/histogram</param>
+	/// <param name="rasH">The height in pixels of the raster image/histogram</param>
+	/// <param name="aspectRatio">The aspect ratio, generally 1</param>
+	CarToRas(T carLlX, T carLlY, T carUrX, T carUrY, size_t rasW, size_t rasH, T aspectRatio)
+	{
+		Init(carLlX, carLlY, carUrX, carUrY, rasW, rasH, aspectRatio);
+	}
+
+	/// <summary>
+	/// Default copy constructor.
+	/// </summary>
+	/// <param name="carToRas">The CarToRas object to copy</param>
+	CarToRas(const CarToRas<T>& carToRas)
+	{
+		CarToRas<T>::operator=<T>(carToRas);
+	}
+
+	/// <summary>
+	/// Copy constructor to copy a CarToRas object of type U.
+	/// </summary>
+	/// <param name="carToRas">The CarToRas object to copy</param>
+	template <typename U>
+	CarToRas(const CarToRas<U>& carToRas)
+	{
+		CarToRas<T>::operator=<U>(carToRas);
+	}
+
+	/// <summary>
+	/// Default assignment operator.
+	/// </summary>
+	/// <param name="carToRas">The CarToRas object to copy</param>
+	CarToRas<T>& operator = (const CarToRas<T>& carToRas)
+	{
+		if (this != &carToRas)
+			CarToRas<T>::operator=<T>(carToRas);
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Assignment operator to assign a CarToRas object of type U.
+	/// </summary>
+	/// <param name="carToRas">The CarToRas object to copy.</param>
+	/// <returns>Reference to updated self</returns>
+	template <typename U>
+	CarToRas<T>& operator = (const CarToRas<U>& carToRas)
+	{
+		m_RasWidth = carToRas.RasWidth();
+		m_RasHeight = carToRas.RasHeight();
+		m_OneRow = T(carToRas.OneRow());
+		m_OneCol = T(carToRas.OneCol());
+		m_PixPerImageUnitW = T(carToRas.PixPerImageUnitW());
+		m_RasLlX = T(carToRas.RasLlX());
+		m_PixPerImageUnitH = T(carToRas.PixPerImageUnitH());
+		m_RasLlY = T(carToRas.RasLlY());
+		m_CarLlX = T(carToRas.CarLlX());
+		m_CarLlY = T(carToRas.CarLlY());
+		m_CarUrX = T(carToRas.CarUrX());
+		m_CarUrY = T(carToRas.CarUrY());
+		m_PadCarLlX = T(carToRas.PadCarLlX());
+		m_PadCarLlY = T(carToRas.PadCarLlY());
+		m_PadCarUrX = T(carToRas.PadCarUrX());
+		m_PadCarUrY = T(carToRas.PadCarUrY());
+		m_CarHalfX = T(carToRas.CarHalfX());
+		m_CarHalfY = T(carToRas.CarHalfY());
+		m_CarCenterX = T(carToRas.CarCenterX());
+		m_CarCenterY = T(carToRas.CarCenterY());
+		return *this;
+	}
+
+	/// <summary>
+	/// Initialize the dimensions with the specified bounds.
+	/// </summary>
+	/// <param name="carLlX">The lower left x of the cartesian plane</param>
+	/// <param name="carLlY">The lower left y of the cartesian plane</param>
+	/// <param name="carUrX">The upper right x of the cartesian plane</param>
+	/// <param name="carUrY">The upper right y of the cartesian plane</param>
+	/// <param name="rasW">The width in pixels of the raster image/histogram</param>
+	/// <param name="rasH">The height in pixels of the raster image/histogram</param>
+	/// <param name="aspectRatio">The aspect ratio, generally 1</param>
+	void Init(T carLlX, T carLlY, T carUrX, T carUrY, size_t rasW, size_t rasH, T aspectRatio)
+	{
+		m_RasWidth = rasW;
+		m_RasHeight = rasH;
+		m_CarLlX = carLlX;
+		m_CarLlY = carLlY;
+		m_CarUrX = carUrX;
+		m_CarUrY = carUrY;
+		T carW = m_CarUrX - m_CarLlX;//Right minus left.
+		T carH = m_CarUrY - m_CarLlY;//Top minus bottom.
+		T invSizeW = T(1.0) / carW;
+		T invSizeH = T(1.0) / carH;
+		m_PixPerImageUnitW = static_cast<T>(rasW) * invSizeW;
+		m_RasLlX = m_PixPerImageUnitW * carLlX;
+		m_PixPerImageUnitH = static_cast<T>(rasH) * invSizeH;
+		m_RasLlY = m_PixPerImageUnitH * carLlY;
+		m_OneRow = std::abs(m_CarUrY - m_CarLlY) / m_RasHeight;
+		m_OneCol = std::abs(m_CarUrX - m_CarLlX) / m_RasWidth;
+		m_PadCarLlX = m_CarLlX + m_OneCol;
+		m_PadCarUrX = m_CarUrX - m_OneCol;
+		m_PadCarLlY = m_CarLlY + m_OneRow;
+		m_PadCarUrY = m_CarUrY - m_OneRow;
+		m_CarHalfX = (m_CarUrX - m_CarLlX) / 2;
+		m_CarHalfY = (m_CarUrY - m_CarLlY) / 2;
+		m_CarCenterX = m_CarLlX + m_CarHalfX;
+		m_CarCenterY = m_CarLlY + m_CarHalfY;
+	}
+
+	/// <summary>
+	/// Assign values to the cached half width/height fields.
+	/// This is only done manually here and is used when rendering strips
+	/// because a cached copy of these is required because the real values
+	/// change with the assignment of each temporary strip ember object.
+	/// </summary>
+	/// <param name="x">The cached value equal to half of the cartesian width of the x plane</param>
+	/// <param name="y">The cached value equal to half of the cartesian width of the y plane</param>
+	void UpdateCachedHalf(T x, T y)
+	{
+		m_CachedCarHalfX = x;
+		m_CachedCarHalfY = y;
+	}
+
+	/// <summary>
+	/// Convert a cartesian x, y coordinate to a raster x, y coordinate.
+	/// This will flip the Y coordinate, so points that hit the bottom of the cartesian plane will
+	/// be mapped to the top of the histogram and vice versa.
+	/// There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test,
+	/// but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform an additional check after this call to make sure the
+	/// mapped point is in bounds.
+	/// </summary>
+	/// <param name="cartX">The cartesian x</param>
+	/// <param name="cartY">The cartesian y</param>
+	/// <param name="rasX">The converted raster x</param>
+	/// <param name="rasY">The converted raster y</param>
+	inline void Convert(T cartX, T cartY, size_t& rasX, size_t& rasY)
+	{
+		rasX = static_cast<size_t>(m_PixPerImageUnitW * cartX - m_RasLlX);
+		rasY = static_cast<size_t>(m_RasLlY - (m_PixPerImageUnitH * cartY));
+	}
+
+	/// <summary>
+	/// Convert a cartesian x, y coordinate to a single raster buffer index.
+	/// This will flip the Y coordinate, so points that hit the bottom of the cartesian plane will
+	/// be mapped to the top of the histogram and vice versa.
+	/// There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test,
+	/// but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform an additional check after this call to make sure the
+	/// mapped point is in bounds.
+	/// </summary>
+	/// <param name="cartX">The cartesian x</param>
+	/// <param name="cartY">The cartesian y</param>
+	/// <param name="singleBufferIndex">The converted single raster buffer index</param>
+	inline void Convert(T cartX, T cartY, size_t& singleBufferIndex)
+	{
+		singleBufferIndex = static_cast<size_t>(m_PixPerImageUnitW * cartX - m_RasLlX) + (m_RasWidth * static_cast<size_t>(m_PixPerImageUnitH * cartY - m_RasLlY));
+	}
+
+	/// <summary>
+	/// Convert a cartesian x, y point to a single raster buffer index.
+	/// This will flip the Y coordinate, so points that hit the bottom of the cartesian plane will
+	/// be mapped to the top of the histogram and vice versa.
+	/// This is the most efficient possible way of converting, consisting of only
+	/// a multiply and subtract per coordinate element.
+	/// There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test,
+	/// but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform an additional check after this call to make sure the
+	/// mapped point is in bounds.
+	/// </summary>
+	/// <param name="point">The cartesian y</param>
+	/// <param name="singleBufferIndex">The converted single raster buffer index</param>
+	inline void Convert(Point<T>& point, size_t& singleBufferIndex)
+	{
+		//singleBufferIndex = static_cast<size_t>(Round(m_PixPerImageUnitW * point.m_X - m_RasLlX) + (m_RasWidth * Round(m_PixPerImageUnitH * point.m_Y - m_RasLlY)));
+		singleBufferIndex = static_cast<size_t>(m_PixPerImageUnitW * point.m_X - m_RasLlX) + (m_RasWidth * static_cast<size_t>(m_PixPerImageUnitH * point.m_Y - m_RasLlY));
+	}
+
+	/// <summary>
+	/// Determine if a point in the cartesian plane can be converted to a point within the raster plane.
+	/// There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test,
+	/// but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform an additional check after this call to make sure the
+	/// mapped point is in bounds.
+	/// </summary>
+	/// <param name="point">The point to test</param>
+	/// <returns>True if within bounds, else false</returns>
+	inline bool InBounds(Point<T>& point)
+	{
+		//Debug check for hitting the very first pixel in the image.
+		//if (point.m_Y > m_CarLlY && point.m_Y <= m_PadCarLlY && //Mapped to top row...
+		//	point.m_X > m_CarLlX && point.m_X <= m_PadCarLlX)//...first col.
+		//{
+		//	cout << "First pixel hit.\n";
+		//}
+		return point.m_X >= m_CarLlX &&
+			   point.m_X < m_CarUrX &&
+			   point.m_Y < m_CarUrY &&
+			   point.m_Y >= m_CarLlY;
+	}
+
+	/// <summary>
+	/// Accessors.
+	/// </summary>
+	inline size_t RasWidth() const { return m_RasWidth; }
+	inline size_t RasHeight() const { return m_RasHeight; }
+	inline T OneRow() const { return m_OneRow; }
+	inline T OneCol() const { return m_OneCol; }
+	inline T PixPerImageUnitW() const { return m_PixPerImageUnitW; }
+	inline T RasLlX() const { return m_RasLlX; }
+	inline T PixPerImageUnitH() const { return m_PixPerImageUnitH; }
+	inline T RasLlY() const { return m_RasLlY; }
+	inline T CarLlX() const { return m_CarLlX; }
+	inline T CarLlY() const { return m_CarLlY; }
+	inline T CarUrX() const { return m_CarUrX; }
+	inline T CarUrY() const { return m_CarUrY; }
+	inline T PadCarLlX() const { return m_PadCarLlX; }
+	inline T PadCarLlY() const { return m_PadCarLlY; }
+	inline T PadCarUrX() const { return m_PadCarUrX; }
+	inline T PadCarUrY() const { return m_PadCarUrY; }
+	inline T CarHalfX() const { return m_CarHalfX; }
+	inline T CarHalfY() const { return m_CarHalfY; }
+	inline T CachedCarHalfX() const { return m_CachedCarHalfX; }
+	inline T CachedCarHalfY() const { return m_CachedCarHalfY; }
+	inline T CarCenterX() const { return m_CarCenterX; }
+	inline T CarCenterY() const { return m_CarCenterY; }
+
+private:
+	size_t m_RasWidth, m_RasHeight;//The width and height of the raster image.
+	T m_OneRow;//The distance that one raster row represents in the cartesian plane.
+	T m_OneCol;//The distance that one raster column represents in the cartesian plane.
+	T m_PixPerImageUnitW;//The number of columns in the raster plane that a horizontal distance of 1 in the cartesian plane represents. The higher the number, the more zoomed in.
+	T m_RasLlX;//The lower left x of the raster image plane.
+	T m_PixPerImageUnitH;//The number of rows in the raster plane that a vertical distance of 1 in the cartesian plane represents. The higher the number, the more zoomed in.
+	T m_RasLlY;//The lower left y of the raster image plane.
+	T m_CarLlX, m_CarLlY, m_CarUrX, m_CarUrY;//The bounds of the cartesian plane.
+	T m_PadCarLlX, m_PadCarLlY, m_PadCarUrX, m_PadCarUrY;//The bounds of the cartesian plane padded by one raster row and column on each side.
+	T m_CarHalfX, m_CarHalfY;//The distance from the center of the of the cartesian plane to the edges.
+	T m_CachedCarHalfX, m_CachedCarHalfY;//The cahced distance from the center of the of the cartesian plane to the edges, needed when rendering strips.
+	T m_CarCenterX, m_CarCenterY;//The center of the cartesian plane.
+};
+}
@@ -1,273 +1,273 @@
-#pragma once
-
-#include "Utils.h"
-#include "Isaac.h"
-#include "Curves.h"
-
-#define CURVE_POINTS 5
-
-/// <summary>
-/// Curves class.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// The b-spline curves used to adjust the colors during final accumulation.
-/// This functionality was gotten inferred from Chaotica.
-/// Note this is now incompatible with Apophysis, which uses Bezier curves instead.
-/// </summary>
-template <typename T>
-class EMBER_API Curves
-{
-public:
-	/// <summary>
-	/// Constructor which sets the curve and weight values to their defaults.
-	/// </summary>
-	Curves(bool init = false)
-	{
-		if (init)
-			Init();
-		else
-			Clear();
-	}
-
-	/// <summary>
-	/// Default copy constructor.
-	/// </summary>
-	/// <param name="curves">The Curves object to copy</param>
-	Curves(const Curves<T>& curves)
-	{
-		Curves<T>::operator=<T>(curves);
-	}
-
-	/// <summary>
-	/// Copy constructor to copy a Curves object of type U.
-	/// Special case that must be here in the header because it has
-	/// a second template parameter.
-	/// </summary>
-	/// <param name="curves">The Curves object to copy</param>
-	template <typename U>
-	Curves(const Curves<U>& curves)
-	{
-		Curves<T>::operator=<U>(curves);
-	}
-
-	/// <summary>
-	/// Default assignment operator.
-	/// </summary>
-	/// <param name="curves">The Curves object to copy</param>
-	Curves<T>& operator = (const Curves<T>& curves)
-	{
-		if (this != &curves)
-			Curves<T>::operator=<T>(curves);
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Assignment operator to assign a Curves object of type U.
-	/// </summary>
-	/// <param name="curves">The Curves object to copy</param>
-	/// <returns>Reference to updated self</returns>
-	template <typename U>
-	Curves<T>& operator = (const Curves<U>& curves)
-	{
-		int i = 0;
-
-		for (auto& pp : curves.m_Points)
-		{
-			int j = 0;
-			m_Points[i].clear();
-
-			for (auto& p : pp)
-			{
-				m_Points[i].push_back(p);
-				j++;
-			}
-
-			i++;
-		}
-
-		i = 0;
-		return *this;
-	}
-
-	/// <summary>
-	/// Unary addition operator to add a Curves<T> object to this one.
-	/// </summary>
-	/// <param name="curves">The Curves object to add</param>
-	/// <returns>Reference to updated self</returns>
-	template <typename U>
-	Curves<T>& operator += (const Curves<U>& curves)
-	{
-		int i = 0;
-
-		for (auto& pp : m_Points)
-		{
-			int j = 0;
-
-			for (auto& p : pp)
-			{
-				if (j < curves.m_Points[i].size())
-					p += curves.m_Points[i][j];
-				else
-					break;
-
-				j++;
-			}
-
-			i++;
-		}
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Unary multiplication operator to multiply this object by another Curves<T> object.
-	/// </summary>
-	/// <param name="curves">The Curves object to multiply this one by</param>
-	/// <returns>Reference to updated self</returns>
-	template <typename U>
-	Curves<T>& operator *= (const Curves<U>& curves)
-	{
-		int i = 0;
-
-		for (auto& pp : m_Points)
-		{
-			int j = 0;
-
-			for (auto& p : pp)
-			{
-				if (j < curves.m_Points[i].size())
-					p *= curves.m_Points[i][j];
-				else
-					break;
-
-				j++;
-			}
-
-			i++;
-		}
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Unary multiplication operator to multiply this object by a scalar of type T.
-	/// </summary>
-	/// <param name="t">The scalar to multiply this object by</param>
-	/// <returns>Reference to updated self</returns>
-	template <typename U>
-	Curves<T>& operator *= (const U& t)
-	{
-		for (auto& pp : m_Points)
-			for (auto& p : pp)
-				p *= T(t);
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Set the curve and weight values to their default state.
-	/// </summary>
-	void Init()
-	{
-		for (size_t i = 0; i < 4; i++)
-			Init(i);
-	}
-
-	/// <summary>
-	/// Set a specific curve and its weight value to their default state.
-	/// </summary>
-	void Init(size_t i)
-	{
-		if (i < 4)
-		{
-			m_Points[i].resize(5);
-			m_Points[i][0] = v2T{ 0 };
-			m_Points[i][1] = v2T{ static_cast<T>(0.25) };
-			m_Points[i][2] = v2T{ static_cast<T>(0.50) };
-			m_Points[i][3] = v2T{ static_cast<T>(0.75) };
-			m_Points[i][4] = v2T{ 1 };
-		}
-	}
-
-	/// <summary>
-	/// Set the curve and weight values to an empty state.
-	/// </summary>
-	void Clear()
-	{
-		for (auto& p : m_Points)
-			p.clear();
-	}
-
-	/// <summary>
-	/// Whether any points are not the default.
-	/// </summary>
-	/// <returns>True if any point has been set to a value other than the default, else false.</returns>
-	bool CurvesSet()
-	{
-		bool set = false;
-
-		for (size_t i = 0; i < 4; i++)
-		{
-			if (m_Points[i].size() != CURVE_POINTS)
-			{
-				set = true;
-				break;
-			}
-
-			if ((m_Points[i][0] != v2T(0)) ||
-					(m_Points[i][1] != v2T(static_cast<T>(0.25))) ||
-					(m_Points[i][2] != v2T(static_cast<T>(0.50))) ||
-					(m_Points[i][3] != v2T(static_cast<T>(0.75))) ||
-					(m_Points[i][4] != v2T(1))
-			   )
-			{
-				set = true;
-				break;
-			}
-		}
-
-		return set;
-	}
-
-
-public:
-	std::array<std::vector<v2T>, 4> m_Points;
-};
-
-//Must declare this outside of the class to provide for both orders of parameters.
-
-/// <summary>
-/// Multiplication operator to multiply a Curves<T> object by a scalar of type U.
-/// </summary>
-/// <param name="curves">The curves object to multiply</param>
-/// <param name="t">The scalar to multiply curves by by</param>
-/// <returns>Copy of new Curves<T></returns>
-template <typename T, typename U>
-Curves<T> operator * (const Curves<T>& curves, const U& t)
-{
-	T tt = T(t);
-	Curves<T> c(curves);
-
-	for (auto& pp : c.m_Points)
-		for (auto& p : pp)
-			p *= tt;
-
-	return c;
-}
-
-/// <summary>
-/// Multiplication operator for reverse order.
-/// </summary>
-/// <param name="t">The scalar to multiply curves by by</param>
-/// <param name="curves">The curves object to multiply</param>
-/// <returns>Copy of new Curves<T></returns>
-template <typename T, typename U>
-Curves<T> operator * (const U& t, const Curves<T>& curves)
-{
-	return curves * t;
-}
-}
+#pragma once
+
+#include "Utils.h"
+#include "Isaac.h"
+#include "Curves.h"
+
+#define CURVE_POINTS 5
+
+/// <summary>
+/// Curves class.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// The b-spline curves used to adjust the colors during final accumulation.
+/// This functionality was gotten inferred from Chaotica.
+/// Note this is now incompatible with Apophysis, which uses Bezier curves instead.
+/// </summary>
+template <typename T>
+class EMBER_API Curves
+{
+public:
+	/// <summary>
+	/// Constructor which sets the curve and weight values to their defaults.
+	/// </summary>
+	Curves(bool init = false)
+	{
+		if (init)
+			Init();
+		else
+			Clear();
+	}
+
+	/// <summary>
+	/// Default copy constructor.
+	/// </summary>
+	/// <param name="curves">The Curves object to copy</param>
+	Curves(const Curves<T>& curves)
+	{
+		Curves<T>::operator=<T>(curves);
+	}
+
+	/// <summary>
+	/// Copy constructor to copy a Curves object of type U.
+	/// Special case that must be here in the header because it has
+	/// a second template parameter.
+	/// </summary>
+	/// <param name="curves">The Curves object to copy</param>
+	template <typename U>
+	Curves(const Curves<U>& curves)
+	{
+		Curves<T>::operator=<U>(curves);
+	}
+
+	/// <summary>
+	/// Default assignment operator.
+	/// </summary>
+	/// <param name="curves">The Curves object to copy</param>
+	Curves<T>& operator = (const Curves<T>& curves)
+	{
+		if (this != &curves)
+			Curves<T>::operator=<T>(curves);
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Assignment operator to assign a Curves object of type U.
+	/// </summary>
+	/// <param name="curves">The Curves object to copy</param>
+	/// <returns>Reference to updated self</returns>
+	template <typename U>
+	Curves<T>& operator = (const Curves<U>& curves)
+	{
+		int i = 0;
+
+		for (auto& pp : curves.m_Points)
+		{
+			int j = 0;
+			m_Points[i].clear();
+
+			for (auto& p : pp)
+			{
+				m_Points[i].push_back(p);
+				j++;
+			}
+
+			i++;
+		}
+
+		i = 0;
+		return *this;
+	}
+
+	/// <summary>
+	/// Unary addition operator to add a Curves<T> object to this one.
+	/// </summary>
+	/// <param name="curves">The Curves object to add</param>
+	/// <returns>Reference to updated self</returns>
+	template <typename U>
+	Curves<T>& operator += (const Curves<U>& curves)
+	{
+		int i = 0;
+
+		for (auto& pp : m_Points)
+		{
+			int j = 0;
+
+			for (auto& p : pp)
+			{
+				if (j < curves.m_Points[i].size())
+					p += curves.m_Points[i][j];
+				else
+					break;
+
+				j++;
+			}
+
+			i++;
+		}
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Unary multiplication operator to multiply this object by another Curves<T> object.
+	/// </summary>
+	/// <param name="curves">The Curves object to multiply this one by</param>
+	/// <returns>Reference to updated self</returns>
+	template <typename U>
+	Curves<T>& operator *= (const Curves<U>& curves)
+	{
+		int i = 0;
+
+		for (auto& pp : m_Points)
+		{
+			int j = 0;
+
+			for (auto& p : pp)
+			{
+				if (j < curves.m_Points[i].size())
+					p *= curves.m_Points[i][j];
+				else
+					break;
+
+				j++;
+			}
+
+			i++;
+		}
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Unary multiplication operator to multiply this object by a scalar of type T.
+	/// </summary>
+	/// <param name="t">The scalar to multiply this object by</param>
+	/// <returns>Reference to updated self</returns>
+	template <typename U>
+	Curves<T>& operator *= (const U& t)
+	{
+		for (auto& pp : m_Points)
+			for (auto& p : pp)
+				p *= T(t);
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Set the curve and weight values to their default state.
+	/// </summary>
+	void Init()
+	{
+		for (size_t i = 0; i < 4; i++)
+			Init(i);
+	}
+
+	/// <summary>
+	/// Set a specific curve and its weight value to their default state.
+	/// </summary>
+	void Init(size_t i)
+	{
+		if (i < 4)
+		{
+			m_Points[i].resize(5);
+			m_Points[i][0] = v2T{ 0 };
+			m_Points[i][1] = v2T{ static_cast<T>(0.25) };
+			m_Points[i][2] = v2T{ static_cast<T>(0.50) };
+			m_Points[i][3] = v2T{ static_cast<T>(0.75) };
+			m_Points[i][4] = v2T{ 1 };
+		}
+	}
+
+	/// <summary>
+	/// Set the curve and weight values to an empty state.
+	/// </summary>
+	void Clear()
+	{
+		for (auto& p : m_Points)
+			p.clear();
+	}
+
+	/// <summary>
+	/// Whether any points are not the default.
+	/// </summary>
+	/// <returns>True if any point has been set to a value other than the default, else false.</returns>
+	bool CurvesSet()
+	{
+		bool set = false;
+
+		for (size_t i = 0; i < 4; i++)
+		{
+			if (m_Points[i].size() != CURVE_POINTS)
+			{
+				set = true;
+				break;
+			}
+
+			if ((m_Points[i][0] != v2T(0)) ||
+					(m_Points[i][1] != v2T(static_cast<T>(0.25))) ||
+					(m_Points[i][2] != v2T(static_cast<T>(0.50))) ||
+					(m_Points[i][3] != v2T(static_cast<T>(0.75))) ||
+					(m_Points[i][4] != v2T(1))
+			   )
+			{
+				set = true;
+				break;
+			}
+		}
+
+		return set;
+	}
+
+
+public:
+	std::array<std::vector<v2T>, 4> m_Points;
+};
+
+//Must declare this outside of the class to provide for both orders of parameters.
+
+/// <summary>
+/// Multiplication operator to multiply a Curves<T> object by a scalar of type U.
+/// </summary>
+/// <param name="curves">The curves object to multiply</param>
+/// <param name="t">The scalar to multiply curves by by</param>
+/// <returns>Copy of new Curves<T></returns>
+template <typename T, typename U>
+Curves<T> operator * (const Curves<T>& curves, const U& t)
+{
+	T tt = T(t);
+	Curves<T> c(curves);
+
+	for (auto& pp : c.m_Points)
+		for (auto& p : pp)
+			p *= tt;
+
+	return c;
+}
+
+/// <summary>
+/// Multiplication operator for reverse order.
+/// </summary>
+/// <param name="t">The scalar to multiply curves by by</param>
+/// <param name="curves">The curves object to multiply</param>
+/// <returns>Copy of new Curves<T></returns>
+template <typename T, typename U>
+Curves<T> operator * (const U& t, const Curves<T>& curves)
+{
+	return curves * t;
+}
+}
@@ -1,341 +1,341 @@
-#pragma once
-
-#include "SpatialFilter.h"
-
-/// <summary>
-/// DensityFilter class.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// A base class with virtual functions to allow both templating and polymorphism to work together.
-/// Derived classes will implement all of these functions.
-/// </summary>
-class EMBER_API DensityFilterBase
-{
-public:
-	DensityFilterBase() { }
-	virtual ~DensityFilterBase() { }
-
-	virtual intmax_t FilterWidth() const { return 0; }
-};
-
-/// <summary>
-/// The density estimation filter is used after iterating, but before final accumulation.
-/// It's a variable width Gaussian filter, whose width is inversely proportional
-/// to the number of hits a given histogram cell has received.
-/// That means the fewer hits in a cell, the more blur is applied. The greater the hits,
-/// the less blur.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-class EMBER_API DensityFilter : public DensityFilterBase
-{
-public:
-	/// <summary>
-	/// Constructor that assigns various fields but does not create the actual filter vector.
-	/// This is done because filter creation could fail, so the user must manually call it
-	/// after construction.
-	/// </summary>
-	/// <param name="minRad">The minimum filter radius</param>
-	/// <param name="maxRad">The maximum filter radius</param>
-	/// <param name="curve">The curve of the filter</param>
-	/// <param name="supersample">The supersample of the ember this filter will be used with</param>
-	DensityFilter(T minRad, T maxRad, T curve, size_t supersample)
-	{
-		m_MinRad = minRad;
-		m_MaxRad = maxRad;
-		m_Curve = curve;
-		m_Supersample = supersample;
-		m_MaxFilterIndex = 0;
-
-		//Make sure the values make sense.
-		if (m_Curve <= 0.0)
-			m_Curve = static_cast<T>(0.5);
-
-		if (m_MaxRad < m_MinRad)
-			m_MaxRad = m_MinRad + 1;
-
-		//Ensure it's valid.
-		while (!Valid())
-		{
-			m_Curve += static_cast<T>(0.1);
-		}
-	}
-
-	/// <summary>
-	/// Copy constructor.
-	/// </summary>
-	/// <param name="filter">The DensityFilter object to copy</param>
-	DensityFilter(const DensityFilter<T>& filter)
-	{
-		*this = filter;
-	}
-
-	/// <summary>
-	/// Assignment operator.
-	/// </summary>
-	/// <param name="filter">The DensityFilter object to copy.</param>
-	/// <returns>Reference to updated self</returns>
-	DensityFilter<T>& operator = (const DensityFilter<T>& filter)
-	{
-		if (this != &filter)
-		{
-			m_MinRad            = filter.m_MinRad;
-			m_MaxRad			= filter.m_MaxRad;
-			m_Curve			    = filter.m_Curve;
-			m_Supersample		= filter.m_Supersample;
-			m_KernelSize		= filter.m_KernelSize;
-			m_MaxFilterIndex	= filter.m_MaxFilterIndex;
-			m_MaxFilteredCounts = filter.m_MaxFilteredCounts;
-			m_FilterWidth		= filter.m_FilterWidth;
-			m_Coefs			    = filter.m_Coefs;
-			m_Widths			= filter.m_Widths;
-		}
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Create the filter vector of up to 10M entries.
-	/// If more than that are requested, it isn't created and
-	/// false is returned.
-	/// </summary>
-	/// <returns>True if success, else false.</returns>
-	bool Create()
-	{
-		size_t w;
-		int intFilterCount, maxIndex;
-		int rowSize;
-		size_t filterLoop;
-		int keepThresh = 100;
-		uint filterCoefIndex = 0;
-		T decFilterCount;
-		T finalMinRad = m_MinRad * m_Supersample + 1;//Should scale the filter width by the oversample.
-		T finalMaxRad = m_MaxRad * m_Supersample + 1;//The '+1' comes from the assumed distance to the first pixel.
-		GaussianFilter<T> gaussianFilter(m_MaxRad, m_Supersample);
-		m_KernelSize = 0;
-		m_MaxFilterIndex = 0;
-		//Calculate how many filter kernels are needed based on the decay function
-		//
-		//    num filters = (de_max_width / de_min_width)^(1 / estimator_curve)
-		//
-		decFilterCount = std::pow(finalMaxRad / finalMinRad, static_cast<T>(1) / m_Curve);
-
-		if (decFilterCount > 1e7)//Too many filters.
-			return false;
-
-		intFilterCount = static_cast<int>(ceil(decFilterCount));
-
-		//Condense the smaller kernels to save space.
-		if (intFilterCount > keepThresh)
-		{
-			maxIndex = static_cast<int>(ceil(DE_THRESH + std::pow(static_cast<T>(intFilterCount - DE_THRESH), m_Curve))) + 1;
-			m_MaxFilteredCounts = static_cast<int>(std::pow(static_cast<T>(maxIndex - DE_THRESH), static_cast<T>(1) / m_Curve)) + DE_THRESH;
-		}
-		else
-		{
-			maxIndex = intFilterCount;
-			m_MaxFilteredCounts = maxIndex;
-		}
-
-		//Allocate the memory for these filters and the hit/width lookup array.
-		rowSize = static_cast<int>(2 * std::ceil(finalMaxRad) - 1);
-		m_FilterWidth = (rowSize - 1) / 2;
-		m_KernelSize = (m_FilterWidth + 1) * (2 + m_FilterWidth) / 2;
-		m_Coefs.resize(maxIndex * m_KernelSize);
-		m_Widths.resize(maxIndex);
-
-		//Generate the filter coefficients.
-		for (filterLoop = 0; filterLoop < maxIndex; filterLoop++)
-		{
-			intmax_t dej, dek;
-			size_t coefIndex;
-			T filterSum = 0.0;
-			T filterVal;
-			T filterHeight;
-			T loopAdjust;
-
-			//Calculate the filter width for this number of hits in a bin.
-			if (filterLoop < keepThresh)
-			{
-				filterHeight = (finalMaxRad / std::pow(static_cast<T>(filterLoop + 1), m_Curve));
-			}
-			else
-			{
-				loopAdjust = std::pow(static_cast<T>(filterLoop - keepThresh), (static_cast<T>(1) / m_Curve)) + keepThresh;
-				filterHeight = (finalMaxRad / std::pow(loopAdjust + 1, m_Curve));
-			}
-
-			//Once we've reached the min radius, don't populate any more.
-			if (filterHeight <= finalMinRad)
-			{
-				filterHeight = finalMinRad;
-				m_MaxFilterIndex = filterLoop;
-			}
-
-			m_Widths[filterLoop] = filterHeight;
-
-			//Calculate norm of kernel separately (easier).
-			for (dej = -m_FilterWidth; dej <= m_FilterWidth; dej++)
-			{
-				for (dek = -m_FilterWidth; dek <= m_FilterWidth; dek++)
-				{
-					filterVal = std::sqrt(static_cast<T>(dej * dej + dek * dek)) / filterHeight;
-
-					//Only populate the coefs within this radius.
-					if (filterVal <= 1.0)
-						filterSum += gaussianFilter.Filter(gaussianFilter.Support() * filterVal);
-				}
-			}
-
-			coefIndex = filterLoop * m_KernelSize;
-
-			//Calculate the unique entries of the kernel.
-			for (dej = 0; dej <= m_FilterWidth; dej++)
-			{
-				for (dek = 0; dek <= dej; dek++)
-				{
-					filterVal = std::sqrt(static_cast<T>(dej * dej + dek * dek)) / filterHeight;
-
-					//Only populate the coefs within this radius.
-					if (filterVal > 1.0)
-						m_Coefs[coefIndex] = 0.0;
-					else
-						m_Coefs[coefIndex] = gaussianFilter.Filter(gaussianFilter.Support() * filterVal) / filterSum;
-
-					coefIndex++;
-				}
-			}
-
-			if (m_MaxFilterIndex > 0)
-				break;
-		}
-
-		if (m_MaxFilterIndex == 0)
-			m_MaxFilterIndex = maxIndex - 1;
-
-		w = m_FilterWidth + 1;
-		m_CoefIndices.resize(w * w);
-
-		//This will populate one quadrant of filter indices.
-		//Really only need 1/8th, but that would require a sparse matrix.
-		for (intmax_t j = 0; j <= m_FilterWidth; j++)
-		{
-			for (intmax_t i = 0; i <= j; i++, filterCoefIndex++)
-			{
-				if (j == 0 && i == 0)
-				{
-					m_CoefIndices[(j * w) + i] = filterCoefIndex;
-				}
-				else if (i == 0)
-				{
-					m_CoefIndices[(0 * w) + j] = filterCoefIndex;
-					m_CoefIndices[(j * w) + 0] = filterCoefIndex;
-				}
-				else if (j == i)
-				{
-					m_CoefIndices[(j * w) + i] = filterCoefIndex;
-				}
-				else
-				{
-					m_CoefIndices[(i * w) + j] = filterCoefIndex;
-					m_CoefIndices[(j * w) + i] = filterCoefIndex;
-				}
-			}
-		}
-
-		return true;
-	}
-
-	/// <summary>
-	/// Return whether the requested dimensions are valid.
-	/// Meaning, is the requested filter size less than or equal to 10M?
-	/// </summary>
-	/// <returns>True if requested filter size is less than or equal to 10M, else false.</returns>
-	inline bool Valid() const
-	{
-		T finalMaxRad = m_MaxRad * m_Supersample + 1;
-		T finalMinRad = m_MinRad * m_Supersample + 1;
-		return std::pow(finalMaxRad / finalMinRad, static_cast<T>(1) / m_Curve) <= 1e7;
-	}
-
-	/// <summary>
-	/// Return a string representation of this density estimation filter.
-	/// </summary>
-	/// <returns>The string representation of this density estimation filter</returns>
-	string ToString() const
-	{
-		size_t i, j, coefIndex = 0, w = m_FilterWidth + 1;
-		stringstream ss;
-		ss
-				<< "Density Filter:"
-				<< "\n	     Min radius: " << MinRad()
-				<< "\n	     Max radius: " << MaxRad()
-				<< "\n		      Curve: " << Curve()
-				<< "\n        Kernel size: " << KernelSize()
-				<< "\n   Max filter index: " << MaxFilterIndex()
-				<< "\nMax Filtered counts: " << MaxFilteredCounts()
-				<< "\n       Filter width: " << FilterWidth();
-		ss << "\nCoefficients: \n";
-
-		for (i = 0; i < m_Widths.size(); i++)
-		{
-			for (coefIndex = 0; coefIndex < m_KernelSize; coefIndex++)
-				ss << "Kernel[" << i << "].Coefs[" << coefIndex << "]: " << m_Coefs[(i * m_KernelSize) + coefIndex] << "\n";
-		}
-
-		ss << "\nWidths: \n";
-
-		for (i = 0; i < m_Widths.size(); i++)
-		{
-			ss << "Widths[" << i << "]: " << m_Widths[i] << "\n";
-		}
-
-		for (i = 0; i < w; i++)
-		{
-			for (j = 0; j < w; j++)
-			{
-				cout << std::setw(2) << std::setfill('0') << m_CoefIndices[i * w + j] << "\t";
-			}
-
-			cout << "\n";
-		}
-
-		return ss.str();
-	}
-
-	/// <summary>
-	/// Accessors.
-	/// </summary>
-	inline T MinRad() const { return m_MinRad; }
-	inline T MaxRad() const { return m_MaxRad; }
-	inline T Curve() const { return m_Curve; }
-	inline size_t Supersample() const { return m_Supersample; }
-	inline size_t KernelSize() const { return m_KernelSize; }
-	inline size_t MaxFilterIndex() const { return m_MaxFilterIndex; }
-	inline size_t MaxFilteredCounts() const { return m_MaxFilteredCounts; }
-	virtual intmax_t FilterWidth() const override { return m_FilterWidth; }
-	inline size_t BufferSize() const { return m_Widths.size(); }
-	inline size_t CoefsSizeBytes() const { return BufferSize() * m_KernelSize * sizeof(T); }
-	inline size_t WidthsSizeBytes() const { return SizeOf(m_Widths); }
-	inline size_t CoefsIndicesSizeBytes() const { return SizeOf(m_CoefIndices); }
-	inline const T* Coefs() const { return m_Coefs.data(); }
-	inline const T* Widths() const { return m_Widths.data(); }
-	inline const uint* CoefIndices() const { return m_CoefIndices.data(); }
-
-private:
-	T m_MinRad;
-	T m_MaxRad;//The original specified filter radius.
-	T m_Curve;
-	size_t m_Supersample;
-	size_t m_KernelSize;
-	size_t m_MaxFilterIndex;
-	size_t m_MaxFilteredCounts;
-	intmax_t m_FilterWidth;//The new radius after scaling for super sample and rounding. This is what's actually used.
-	vector<T> m_Coefs;
-	vector<T> m_Widths;
-	vector<uint> m_CoefIndices;
-};
-}
+#pragma once
+
+#include "SpatialFilter.h"
+
+/// <summary>
+/// DensityFilter class.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// A base class with virtual functions to allow both templating and polymorphism to work together.
+/// Derived classes will implement all of these functions.
+/// </summary>
+class EMBER_API DensityFilterBase
+{
+public:
+	DensityFilterBase() { }
+	virtual ~DensityFilterBase() { }
+
+	virtual intmax_t FilterWidth() const { return 0; }
+};
+
+/// <summary>
+/// The density estimation filter is used after iterating, but before final accumulation.
+/// It's a variable width Gaussian filter, whose width is inversely proportional
+/// to the number of hits a given histogram cell has received.
+/// That means the fewer hits in a cell, the more blur is applied. The greater the hits,
+/// the less blur.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+class EMBER_API DensityFilter : public DensityFilterBase
+{
+public:
+	/// <summary>
+	/// Constructor that assigns various fields but does not create the actual filter vector.
+	/// This is done because filter creation could fail, so the user must manually call it
+	/// after construction.
+	/// </summary>
+	/// <param name="minRad">The minimum filter radius</param>
+	/// <param name="maxRad">The maximum filter radius</param>
+	/// <param name="curve">The curve of the filter</param>
+	/// <param name="supersample">The supersample of the ember this filter will be used with</param>
+	DensityFilter(T minRad, T maxRad, T curve, size_t supersample)
+	{
+		m_MinRad = minRad;
+		m_MaxRad = maxRad;
+		m_Curve = curve;
+		m_Supersample = supersample;
+		m_MaxFilterIndex = 0;
+
+		//Make sure the values make sense.
+		if (m_Curve <= 0.0)
+			m_Curve = static_cast<T>(0.5);
+
+		if (m_MaxRad < m_MinRad)
+			m_MaxRad = m_MinRad + 1;
+
+		//Ensure it's valid.
+		while (!Valid())
+		{
+			m_Curve += static_cast<T>(0.1);
+		}
+	}
+
+	/// <summary>
+	/// Copy constructor.
+	/// </summary>
+	/// <param name="filter">The DensityFilter object to copy</param>
+	DensityFilter(const DensityFilter<T>& filter)
+	{
+		*this = filter;
+	}
+
+	/// <summary>
+	/// Assignment operator.
+	/// </summary>
+	/// <param name="filter">The DensityFilter object to copy.</param>
+	/// <returns>Reference to updated self</returns>
+	DensityFilter<T>& operator = (const DensityFilter<T>& filter)
+	{
+		if (this != &filter)
+		{
+			m_MinRad            = filter.m_MinRad;
+			m_MaxRad			= filter.m_MaxRad;
+			m_Curve			    = filter.m_Curve;
+			m_Supersample		= filter.m_Supersample;
+			m_KernelSize		= filter.m_KernelSize;
+			m_MaxFilterIndex	= filter.m_MaxFilterIndex;
+			m_MaxFilteredCounts = filter.m_MaxFilteredCounts;
+			m_FilterWidth		= filter.m_FilterWidth;
+			m_Coefs			    = filter.m_Coefs;
+			m_Widths			= filter.m_Widths;
+		}
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Create the filter vector of up to 10M entries.
+	/// If more than that are requested, it isn't created and
+	/// false is returned.
+	/// </summary>
+	/// <returns>True if success, else false.</returns>
+	bool Create()
+	{
+		size_t w;
+		int intFilterCount, maxIndex;
+		int rowSize;
+		size_t filterLoop;
+		int keepThresh = 100;
+		uint filterCoefIndex = 0;
+		T decFilterCount;
+		T finalMinRad = m_MinRad * m_Supersample + 1;//Should scale the filter width by the oversample.
+		T finalMaxRad = m_MaxRad * m_Supersample + 1;//The '+1' comes from the assumed distance to the first pixel.
+		GaussianFilter<T> gaussianFilter(m_MaxRad, m_Supersample);
+		m_KernelSize = 0;
+		m_MaxFilterIndex = 0;
+		//Calculate how many filter kernels are needed based on the decay function
+		//
+		//    num filters = (de_max_width / de_min_width)^(1 / estimator_curve)
+		//
+		decFilterCount = std::pow(finalMaxRad / finalMinRad, static_cast<T>(1) / m_Curve);
+
+		if (decFilterCount > 1e7)//Too many filters.
+			return false;
+
+		intFilterCount = static_cast<int>(ceil(decFilterCount));
+
+		//Condense the smaller kernels to save space.
+		if (intFilterCount > keepThresh)
+		{
+			maxIndex = static_cast<int>(ceil(DE_THRESH + std::pow(static_cast<T>(intFilterCount - DE_THRESH), m_Curve))) + 1;
+			m_MaxFilteredCounts = static_cast<int>(std::pow(static_cast<T>(maxIndex - DE_THRESH), static_cast<T>(1) / m_Curve)) + DE_THRESH;
+		}
+		else
+		{
+			maxIndex = intFilterCount;
+			m_MaxFilteredCounts = maxIndex;
+		}
+
+		//Allocate the memory for these filters and the hit/width lookup array.
+		rowSize = static_cast<int>(2 * std::ceil(finalMaxRad) - 1);
+		m_FilterWidth = (rowSize - 1) / 2;
+		m_KernelSize = (m_FilterWidth + 1) * (2 + m_FilterWidth) / 2;
+		m_Coefs.resize(maxIndex * m_KernelSize);
+		m_Widths.resize(maxIndex);
+
+		//Generate the filter coefficients.
+		for (filterLoop = 0; filterLoop < maxIndex; filterLoop++)
+		{
+			intmax_t dej, dek;
+			size_t coefIndex;
+			T filterSum = 0.0;
+			T filterVal;
+			T filterHeight;
+			T loopAdjust;
+
+			//Calculate the filter width for this number of hits in a bin.
+			if (filterLoop < keepThresh)
+			{
+				filterHeight = (finalMaxRad / std::pow(static_cast<T>(filterLoop + 1), m_Curve));
+			}
+			else
+			{
+				loopAdjust = std::pow(static_cast<T>(filterLoop - keepThresh), (static_cast<T>(1) / m_Curve)) + keepThresh;
+				filterHeight = (finalMaxRad / std::pow(loopAdjust + 1, m_Curve));
+			}
+
+			//Once we've reached the min radius, don't populate any more.
+			if (filterHeight <= finalMinRad)
+			{
+				filterHeight = finalMinRad;
+				m_MaxFilterIndex = filterLoop;
+			}
+
+			m_Widths[filterLoop] = filterHeight;
+
+			//Calculate norm of kernel separately (easier).
+			for (dej = -m_FilterWidth; dej <= m_FilterWidth; dej++)
+			{
+				for (dek = -m_FilterWidth; dek <= m_FilterWidth; dek++)
+				{
+					filterVal = std::sqrt(static_cast<T>(dej * dej + dek * dek)) / filterHeight;
+
+					//Only populate the coefs within this radius.
+					if (filterVal <= 1.0)
+						filterSum += gaussianFilter.Filter(gaussianFilter.Support() * filterVal);
+				}
+			}
+
+			coefIndex = filterLoop * m_KernelSize;
+
+			//Calculate the unique entries of the kernel.
+			for (dej = 0; dej <= m_FilterWidth; dej++)
+			{
+				for (dek = 0; dek <= dej; dek++)
+				{
+					filterVal = std::sqrt(static_cast<T>(dej * dej + dek * dek)) / filterHeight;
+
+					//Only populate the coefs within this radius.
+					if (filterVal > 1.0)
+						m_Coefs[coefIndex] = 0.0;
+					else
+						m_Coefs[coefIndex] = gaussianFilter.Filter(gaussianFilter.Support() * filterVal) / filterSum;
+
+					coefIndex++;
+				}
+			}
+
+			if (m_MaxFilterIndex > 0)
+				break;
+		}
+
+		if (m_MaxFilterIndex == 0)
+			m_MaxFilterIndex = maxIndex - 1;
+
+		w = m_FilterWidth + 1;
+		m_CoefIndices.resize(w * w);
+
+		//This will populate one quadrant of filter indices.
+		//Really only need 1/8th, but that would require a sparse matrix.
+		for (intmax_t j = 0; j <= m_FilterWidth; j++)
+		{
+			for (intmax_t i = 0; i <= j; i++, filterCoefIndex++)
+			{
+				if (j == 0 && i == 0)
+				{
+					m_CoefIndices[(j * w) + i] = filterCoefIndex;
+				}
+				else if (i == 0)
+				{
+					m_CoefIndices[(0 * w) + j] = filterCoefIndex;
+					m_CoefIndices[(j * w) + 0] = filterCoefIndex;
+				}
+				else if (j == i)
+				{
+					m_CoefIndices[(j * w) + i] = filterCoefIndex;
+				}
+				else
+				{
+					m_CoefIndices[(i * w) + j] = filterCoefIndex;
+					m_CoefIndices[(j * w) + i] = filterCoefIndex;
+				}
+			}
+		}
+
+		return true;
+	}
+
+	/// <summary>
+	/// Return whether the requested dimensions are valid.
+	/// Meaning, is the requested filter size less than or equal to 10M?
+	/// </summary>
+	/// <returns>True if requested filter size is less than or equal to 10M, else false.</returns>
+	inline bool Valid() const
+	{
+		T finalMaxRad = m_MaxRad * m_Supersample + 1;
+		T finalMinRad = m_MinRad * m_Supersample + 1;
+		return std::pow(finalMaxRad / finalMinRad, static_cast<T>(1) / m_Curve) <= 1e7;
+	}
+
+	/// <summary>
+	/// Return a string representation of this density estimation filter.
+	/// </summary>
+	/// <returns>The string representation of this density estimation filter</returns>
+	string ToString() const
+	{
+		size_t i, j, coefIndex = 0, w = m_FilterWidth + 1;
+		stringstream ss;
+		ss
+				<< "Density Filter:"
+				<< "\n	     Min radius: " << MinRad()
+				<< "\n	     Max radius: " << MaxRad()
+				<< "\n		      Curve: " << Curve()
+				<< "\n        Kernel size: " << KernelSize()
+				<< "\n   Max filter index: " << MaxFilterIndex()
+				<< "\nMax Filtered counts: " << MaxFilteredCounts()
+				<< "\n       Filter width: " << FilterWidth();
+		ss << "\nCoefficients: \n";
+
+		for (i = 0; i < m_Widths.size(); i++)
+		{
+			for (coefIndex = 0; coefIndex < m_KernelSize; coefIndex++)
+				ss << "Kernel[" << i << "].Coefs[" << coefIndex << "]: " << m_Coefs[(i * m_KernelSize) + coefIndex] << "\n";
+		}
+
+		ss << "\nWidths: \n";
+
+		for (i = 0; i < m_Widths.size(); i++)
+		{
+			ss << "Widths[" << i << "]: " << m_Widths[i] << "\n";
+		}
+
+		for (i = 0; i < w; i++)
+		{
+			for (j = 0; j < w; j++)
+			{
+				cout << std::setw(2) << std::setfill('0') << m_CoefIndices[i * w + j] << "\t";
+			}
+
+			cout << "\n";
+		}
+
+		return ss.str();
+	}
+
+	/// <summary>
+	/// Accessors.
+	/// </summary>
+	inline T MinRad() const { return m_MinRad; }
+	inline T MaxRad() const { return m_MaxRad; }
+	inline T Curve() const { return m_Curve; }
+	inline size_t Supersample() const { return m_Supersample; }
+	inline size_t KernelSize() const { return m_KernelSize; }
+	inline size_t MaxFilterIndex() const { return m_MaxFilterIndex; }
+	inline size_t MaxFilteredCounts() const { return m_MaxFilteredCounts; }
+	virtual intmax_t FilterWidth() const override { return m_FilterWidth; }
+	inline size_t BufferSize() const { return m_Widths.size(); }
+	inline size_t CoefsSizeBytes() const { return BufferSize() * m_KernelSize * sizeof(T); }
+	inline size_t WidthsSizeBytes() const { return SizeOf(m_Widths); }
+	inline size_t CoefsIndicesSizeBytes() const { return SizeOf(m_CoefIndices); }
+	inline const T* Coefs() const { return m_Coefs.data(); }
+	inline const T* Widths() const { return m_Widths.data(); }
+	inline const uint* CoefIndices() const { return m_CoefIndices.data(); }
+
+private:
+	T m_MinRad;
+	T m_MaxRad;//The original specified filter radius.
+	T m_Curve;
+	size_t m_Supersample;
+	size_t m_KernelSize;
+	size_t m_MaxFilterIndex;
+	size_t m_MaxFilteredCounts;
+	intmax_t m_FilterWidth;//The new radius after scaling for super sample and rounding. This is what's actually used.
+	vector<T> m_Coefs;
+	vector<T> m_Widths;
+	vector<uint> m_CoefIndices;
+};
+}
@@ -1,22 +1,22 @@
-#include "EmberPch.h"
-
-#ifdef _WIN32
-/// <summary>
-/// Generated by Visual Studio to make the DLL run properly.
-/// </summary>
-BOOL APIENTRY DllMain( HMODULE hModule,
-					   DWORD  ul_reason_for_call,
-					   LPVOID lpReserved
-					 )
-{
-	switch (ul_reason_for_call)
-	{
-	case DLL_PROCESS_ATTACH:
-	case DLL_THREAD_ATTACH:
-	case DLL_THREAD_DETACH:
-	case DLL_PROCESS_DETACH:
-		break;
-	}
-	return TRUE;
-}
-#endif
+#include "EmberPch.h"
+
+#ifdef _WIN32
+/// <summary>
+/// Generated by Visual Studio to make the DLL run properly.
+/// </summary>
+BOOL APIENTRY DllMain( HMODULE hModule,
+					   DWORD  ul_reason_for_call,
+					   LPVOID lpReserved
+					 )
+{
+	switch (ul_reason_for_call)
+	{
+	case DLL_PROCESS_ATTACH:
+	case DLL_THREAD_ATTACH:
+	case DLL_THREAD_DETACH:
+	case DLL_PROCESS_DETACH:
+		break;
+	}
+	return TRUE;
+}
+#endif
@@ -1,350 +1,350 @@
-#pragma once
-
-#include "EmberPch.h"
-
-/// <summary>
-/// Basic #defines used throughout the library.
-/// </summary>
-
-#ifdef _WIN32
-	#if defined(BUILDING_EMBER)
-		#define EMBER_API __declspec(dllexport)
-	#else
-		#define EMBER_API __declspec(dllimport)
-	#endif
-#else
-	#define EMBER_API
-	#define fopen_s(pFile,filename,mode) ((*(pFile)=fopen((filename),(mode)))==nullptr)
-	#define _stat stat
-	#define _fstat fstat
-	#define _stricmp strcasecmp
-	typedef int errno_t;
-#endif
-
-#define RESTRICT __restrict//This might make things faster, unsure if it really does though.
-//#define RESTRICT
-
-//Wrap the sincos function for Macs and PC.
-#if defined(__APPLE__) || defined(_MSC_VER)
-#define sincos(x, s, c) *(s)=std::sin(x); *(c)=std::cos(x);
-#else
-static void sincos(float x, float* s, float* c)
-{
-	*s = std::sin(x);
-	*c = std::cos(x);
-}
-#endif
-
-namespace EmberNs
-{
-#define EMBER_VERSION "22.21.4.2"
-//#define FLAM3_COMPAT 1//Uncomment this if you want full compatibility with flam3 regarding some of the trig-based variations in Variations01.h
-#define EPS6 T(1e-6)
-#define EPS std::numeric_limits<T>::epsilon()//Apoplugin.h uses -20, but it's more mathematically correct to do it this way.
-#define ISAAC_SIZE 4
-#define MEMALIGN 32
-#define DE_THRESH 100
-#define DEG_2_RAD (M_PI / 180)
-#define RAD_2_DEG (180 / M_PI)
-#define DEG_2_RAD_T (T{M_PI} / T{180})
-#define RAD_2_DEG_T (T{180} / T{M_PI})
-#define M_2PI (T{M_PI * 2})
-#define M_3PI (T{M_PI * 3})
-#define M_PI2 (T{M_PI_2})
-#define M_PI4 (T{M_PI_4})
-#define M_SQRT3 T(1.7320508075688772935274463415059)
-#define M_SQRT3_2 T(0.86602540378443864676372317075294)
-#define M_SQRT3_3 T(0.57735026918962576450914878050196)
-#define M_SQRT5 T(2.2360679774997896964091736687313)
-#define M_PHI T(1.61803398874989484820458683436563)
-#define M_1_2PI T(0.15915494309189533576888376337251)
-#define M_PI3 T(1.0471975511965977461542144610932)
-#define M_PI6 T(0.52359877559829887307710723054658)
-#define COLORMAP_LENGTH 256//These will need to change if 2D palette support is ever added, or variable sized palettes.
-#define WHITE 255
-#define DEFAULT_SBS (1024 * 10)
-//#define XC(c) ((const xmlChar*)(c))
-#define XC(c) (reinterpret_cast<const xmlChar*>(c))
-#define CX(c) (reinterpret_cast<char*>(c))
-#define CCX(c) (reinterpret_cast<const char*>(c))
-#define BadVal(x) (std::isnan(x))
-#define Vlen(x) (sizeof(x) / sizeof(*x))
-#define SQR(x) ((x) * (x))
-#define CUBE(x) ((x) * (x) * (x))
-#define TLOW std::numeric_limits<T>::lowest()
-#define TMAX std::numeric_limits<T>::max()
-#define FLOAT_MAX_TAN 8388607.0f
-#define FLOAT_MIN_TAN -FLOAT_MAX_TAN
-#define CURVES_LENGTH 65536
-#define CURVES_LENGTH_M1 65535.0f
-#define ONE_OVER_CURVES_LENGTH_M1 1.525902189669e-5f
-#define EMPTYFIELD -9999
-typedef unsigned char et;
-typedef std::lock_guard <std::recursive_mutex> rlg;
-
-/// <summary>
-/// Thin wrapper around getting the current time in milliseconds.
-/// </summary>
-typedef std::chrono::high_resolution_clock Clock;
-typedef std::chrono::duration<double, std::ratio<1, 1000>> DoubleMs;
-typedef std::chrono::time_point<Clock, DoubleMs> DoubleMsTimePoint;
-static inline DoubleMsTimePoint NowMsD() { return time_point_cast<DoubleMs>(Clock::now()); }
-static inline size_t NowMs() { return duration_cast<milliseconds>(Clock::now().time_since_epoch()).count(); }
-
-#ifndef byte
-	typedef unsigned char byte;
-#endif
-
-#define DO_DOUBLE 1//Comment this out for shorter build times during development. Always uncomment for release.
-//#define ISAAC_FLAM3_DEBUG 1//This is almost never needed, but is very useful when troubleshooting difficult bugs. Enable it to do a side by side comparison with flam3.
-
-//These two must always match.
-#ifdef _WIN32
-	#define ALIGN __declspec(align(16))
-	#define STATIC static
-#else
-	#define ALIGN __attribute__ ((aligned (16)))
-	#define STATIC
-#endif
-
-#define ALIGN_CL "((aligned (16)))"//The extra parens are necessary.
-
-#if GLM_VERSION >= 96
-	#define v2T  glm::tvec2<T, glm::defaultp>
-	#define v3T  glm::tvec3<T, glm::defaultp>
-	#define v4T  glm::tvec4<T, glm::defaultp>
-	#define v2F  glm::tvec2<float, glm::defaultp>
-	#define v4F  glm::tvec4<float, glm::defaultp>
-	#define v4D  glm::tvec4<double, glm::defaultp>
-	#define v4bT glm::tvec4<bucketT, glm::defaultp>
-	#define m2T  glm::tmat2x2<T, glm::defaultp>
-	#define m3T  glm::tmat3x3<T, glm::defaultp>
-	#define m4T  glm::tmat4x4<T, glm::defaultp>
-	#define m23T glm::tmat2x3<T, glm::defaultp>
-	typedef vector<glm::tvec4<float, glm::defaultp>> vv4F;
-#else
-	#define v2T  glm::detail::tvec2<T, glm::defaultp>
-	#define v3T  glm::detail::tvec3<T, glm::defaultp>
-	#define v4T  glm::detail::tvec4<T, glm::defaultp>
-	#define v2F  glm::detail::tvec2<float, glm::defaultp>
-	#define v4F  glm::detail::tvec4<float, glm::defaultp>
-	#define v4D  glm::detail::tvec4<double, glm::defaultp>
-	#define v4bT glm::detail::tvec4<bucketT, glm::defaultp>
-	#define m2T  glm::detail::tmat2x2<T, glm::defaultp>
-	#define m3T  glm::detail::tmat3x3<T, glm::defaultp>
-	#define m4T  glm::detail::tmat4x4<T, glm::defaultp>
-	#define m23T glm::detail::tmat2x3<T, glm::defaultp>
-	typedef vector<glm::detail::tvec4<float, glm::defaultp>> vv4F;
-#endif
-
-enum class eInterp : et { EMBER_INTERP_LINEAR = 0, EMBER_INTERP_SMOOTH = 1 };
-enum class eAffineInterp : et { AFFINE_INTERP_LINEAR = 0, AFFINE_INTERP_LOG = 1, AFFINE_INTERP_COMPAT = 2, AFFINE_INTERP_OLDER = 3 };
-enum class ePaletteMode : et { PALETTE_STEP = 0, PALETTE_LINEAR = 1 };
-enum class ePaletteInterp : et { INTERP_HSV = 0, INTERP_SWEEP = 1 };
-enum class eMotion : et { MOTION_SIN = 1, MOTION_TRIANGLE = 2, MOTION_HILL = 3, MOTION_SAW = 4 };
-enum class eProcessAction : et { NOTHING = 0, ACCUM_ONLY = 1, FILTER_AND_ACCUM = 2, KEEP_ITERATING = 3, FULL_RENDER = 4 };
-enum class eProcessState : et { NONE = 0, ITER_STARTED = 1, ITER_DONE = 2, FILTER_DONE = 3, ACCUM_DONE = 4 };
-enum class eInteractiveFilter : et { FILTER_LOG = 0, FILTER_DE = 1 };
-enum class eScaleType : et { SCALE_NONE = 0, SCALE_WIDTH = 1, SCALE_HEIGHT = 2 };
-enum class eRenderStatus : et { RENDER_OK = 0, RENDER_ERROR = 1, RENDER_ABORT = 2 };
-enum class eEmberMotionParam : et//These must remain in this order forever.
-{
-	FLAME_MOTION_NONE,
-	FLAME_MOTION_ZOOM,
-	FLAME_MOTION_ZPOS,
-	FLAME_MOTION_PERSPECTIVE,
-	FLAME_MOTION_YAW,
-	FLAME_MOTION_PITCH,
-	FLAME_MOTION_DEPTH_BLUR,
-	FLAME_MOTION_CENTER_X,
-	FLAME_MOTION_CENTER_Y,
-	FLAME_MOTION_ROTATE,
-	FLAME_MOTION_BRIGHTNESS,
-	FLAME_MOTION_GAMMA,
-	FLAME_MOTION_GAMMA_THRESH,
-	FLAME_MOTION_HIGHLIGHT_POWER,
-	FLAME_MOTION_K2,
-	FLAME_MOTION_RAND_RANGE,
-	FLAME_MOTION_BACKGROUND_R,
-	FLAME_MOTION_BACKGROUND_G,
-	FLAME_MOTION_BACKGROUND_B,
-	FLAME_MOTION_VIBRANCY,
-	FLAME_MOTION_BLUR_CURVE
-};
-
-/// <summary>
-/// Thin wrapper to allow << operator on interp type.
-/// </summary>
-/// <param name="stream">The stream to insert into</param>
-/// <param name="t">The type whose string representation will be inserted into the stream</param>
-/// <returns></returns>
-static std::ostream& operator<<(std::ostream& stream, const eInterp& t)
-{
-	switch (t)
-	{
-		case EmberNs::eInterp::EMBER_INTERP_LINEAR:
-			stream << "linear";
-			break;
-
-		case EmberNs::eInterp::EMBER_INTERP_SMOOTH:
-			stream << "smooth";
-			break;
-
-		default:
-			stream << "error";
-			break;
-	}
-
-	return stream;
-}
-
-/// <summary>
-/// Thin wrapper to allow << operator on affine interp type.
-/// </summary>
-/// <param name="stream">The stream to insert into</param>
-/// <param name="t">The type whose string representation will be inserted into the stream</param>
-/// <returns></returns>
-static std::ostream& operator<<(std::ostream& stream, const eAffineInterp& t)
-{
-	switch (t)
-	{
-		case EmberNs::eAffineInterp::AFFINE_INTERP_LINEAR:
-			stream << "linear";
-			break;
-
-		case EmberNs::eAffineInterp::AFFINE_INTERP_LOG:
-			stream << "log";
-			break;
-
-		case EmberNs::eAffineInterp::AFFINE_INTERP_COMPAT:
-			stream << "compat";
-			break;
-
-		case EmberNs::eAffineInterp::AFFINE_INTERP_OLDER:
-			stream << "older";
-			break;
-
-		default:
-			stream << "error";
-			break;
-	}
-
-	return stream;
-}
-
-/// <summary>
-/// Thin wrapper to allow << operator on palette mode type.
-/// </summary>
-/// <param name="stream">The stream to insert into</param>
-/// <param name="t">The type whose string representation will be inserted into the stream</param>
-/// <returns></returns>
-static std::ostream& operator<<(std::ostream& stream, const ePaletteMode& t)
-{
-	switch (t)
-	{
-		case EmberNs::ePaletteMode::PALETTE_STEP:
-			stream << "step";
-			break;
-
-		case EmberNs::ePaletteMode::PALETTE_LINEAR:
-			stream << "linear";
-			break;
-
-		default:
-			stream << "error";
-			break;
-	}
-
-	return stream;
-}
-
-/// <summary>
-/// Thin wrapper to allow << operator on palette interp type.
-/// </summary>
-/// <param name="stream">The stream to insert into</param>
-/// <param name="t">The type whose string representation will be inserted into the stream</param>
-/// <returns></returns>
-static std::ostream& operator<<(std::ostream& stream, const ePaletteInterp& t)
-{
-	switch (t)
-	{
-		case EmberNs::ePaletteInterp::INTERP_HSV:
-			stream << "hsv";
-			break;
-
-		case EmberNs::ePaletteInterp::INTERP_SWEEP:
-			stream << "sweep";
-			break;
-
-		default:
-			stream << "error";
-			break;
-	}
-
-	return stream;
-}
-
-/// <summary>
-/// Thin wrapper to allow << operator on scale type.
-/// </summary>
-/// <param name="stream">The stream to insert into</param>
-/// <param name="t">The type whose string representation will be inserted into the stream</param>
-/// <returns></returns>
-static std::ostream& operator<<(std::ostream& stream, const eScaleType& t)
-{
-	switch (t)
-	{
-		case EmberNs::eScaleType::SCALE_NONE:
-			stream << "none";
-			break;
-
-		case EmberNs::eScaleType::SCALE_WIDTH:
-			stream << "width";
-			break;
-
-		case EmberNs::eScaleType::SCALE_HEIGHT:
-			stream << "height";
-			break;
-
-		default:
-			stream << "error";
-			break;
-	}
-
-	return stream;
-}
-
-/// <summary>
-/// Thin wrapper to allow << operator on motion type.
-/// </summary>
-/// <param name="stream">The stream to insert into</param>
-/// <param name="t">The type whose string representation will be inserted into the stream</param>
-/// <returns></returns>
-static std::ostream& operator<<(std::ostream& stream, const eMotion& t)
-{
-	switch (t)
-	{
-		case EmberNs::eMotion::MOTION_SIN:
-			stream << "sin";
-			break;
-
-		case EmberNs::eMotion::MOTION_TRIANGLE:
-			stream << "triangle";
-			break;
-
-		case EmberNs::eMotion::MOTION_HILL:
-			stream << "hill";
-			break;
-
-		case EmberNs::eMotion::MOTION_SAW:
-			stream << "saw";
-			break;
-
-		default:
-			stream << "error";
-			break;
-	}
-
-	return stream;
-}
-}
+#pragma once
+
+#include "EmberPch.h"
+
+/// <summary>
+/// Basic #defines used throughout the library.
+/// </summary>
+
+#ifdef _WIN32
+	#if defined(BUILDING_EMBER)
+		#define EMBER_API __declspec(dllexport)
+	#else
+		#define EMBER_API __declspec(dllimport)
+	#endif
+#else
+	#define EMBER_API
+	#define fopen_s(pFile,filename,mode) ((*(pFile)=fopen((filename),(mode)))==nullptr)
+	#define _stat stat
+	#define _fstat fstat
+	#define _stricmp strcasecmp
+	typedef int errno_t;
+#endif
+
+#define RESTRICT __restrict//This might make things faster, unsure if it really does though.
+//#define RESTRICT
+
+//Wrap the sincos function for Macs and PC.
+#if defined(__APPLE__) || defined(_MSC_VER)
+#define sincos(x, s, c) *(s)=std::sin(x); *(c)=std::cos(x);
+#else
+static void sincos(float x, float* s, float* c)
+{
+	*s = std::sin(x);
+	*c = std::cos(x);
+}
+#endif
+
+namespace EmberNs
+{
+#define EMBER_VERSION "22.21.4.2"
+//#define FLAM3_COMPAT 1//Uncomment this if you want full compatibility with flam3 regarding some of the trig-based variations in Variations01.h
+#define EPS6 T(1e-6)
+#define EPS std::numeric_limits<T>::epsilon()//Apoplugin.h uses -20, but it's more mathematically correct to do it this way.
+#define ISAAC_SIZE 4
+#define MEMALIGN 32
+#define DE_THRESH 100
+#define DEG_2_RAD (M_PI / 180)
+#define RAD_2_DEG (180 / M_PI)
+#define DEG_2_RAD_T (T{M_PI} / T{180})
+#define RAD_2_DEG_T (T{180} / T{M_PI})
+#define M_2PI (T{M_PI * 2})
+#define M_3PI (T{M_PI * 3})
+#define M_PI2 (T{M_PI_2})
+#define M_PI4 (T{M_PI_4})
+#define M_SQRT3 T(1.7320508075688772935274463415059)
+#define M_SQRT3_2 T(0.86602540378443864676372317075294)
+#define M_SQRT3_3 T(0.57735026918962576450914878050196)
+#define M_SQRT5 T(2.2360679774997896964091736687313)
+#define M_PHI T(1.61803398874989484820458683436563)
+#define M_1_2PI T(0.15915494309189533576888376337251)
+#define M_PI3 T(1.0471975511965977461542144610932)
+#define M_PI6 T(0.52359877559829887307710723054658)
+#define COLORMAP_LENGTH 256//These will need to change if 2D palette support is ever added, or variable sized palettes.
+#define WHITE 255
+#define DEFAULT_SBS (1024 * 10)
+//#define XC(c) ((const xmlChar*)(c))
+#define XC(c) (reinterpret_cast<const xmlChar*>(c))
+#define CX(c) (reinterpret_cast<char*>(c))
+#define CCX(c) (reinterpret_cast<const char*>(c))
+#define BadVal(x) (std::isnan(x))
+#define Vlen(x) (sizeof(x) / sizeof(*x))
+#define SQR(x) ((x) * (x))
+#define CUBE(x) ((x) * (x) * (x))
+#define TLOW std::numeric_limits<T>::lowest()
+#define TMAX std::numeric_limits<T>::max()
+#define FLOAT_MAX_TAN 8388607.0f
+#define FLOAT_MIN_TAN -FLOAT_MAX_TAN
+#define CURVES_LENGTH 65536
+#define CURVES_LENGTH_M1 65535.0f
+#define ONE_OVER_CURVES_LENGTH_M1 1.525902189669e-5f
+#define EMPTYFIELD -9999
+typedef unsigned char et;
+typedef std::lock_guard <std::recursive_mutex> rlg;
+
+/// <summary>
+/// Thin wrapper around getting the current time in milliseconds.
+/// </summary>
+typedef std::chrono::high_resolution_clock Clock;
+typedef std::chrono::duration<double, std::ratio<1, 1000>> DoubleMs;
+typedef std::chrono::time_point<Clock, DoubleMs> DoubleMsTimePoint;
+static inline DoubleMsTimePoint NowMsD() noexcept { return time_point_cast<DoubleMs>(Clock::now()); }
+static inline size_t NowMs() noexcept { return duration_cast<milliseconds>(Clock::now().time_since_epoch()).count(); }
+
+//#ifndef byte
+//	typedef unsigned char byte;
+//#endif
+
+#define DO_DOUBLE 1//Comment this out for shorter build times during development. Always uncomment for release.
+//#define ISAAC_FLAM3_DEBUG 1//This is almost never needed, but is very useful when troubleshooting difficult bugs. Enable it to do a side by side comparison with flam3.
+
+//These two must always match.
+#ifdef _WIN32
+	#define ALIGN __declspec(align(16))
+	#define STATIC static
+#else
+	#define ALIGN __attribute__ ((aligned (16)))
+	#define STATIC
+#endif
+
+#define ALIGN_CL "((aligned (16)))"//The extra parens are necessary.
+
+#if GLM_VERSION >= 96
+	#define v2T  glm::tvec2<T, glm::defaultp>
+	#define v3T  glm::tvec3<T, glm::defaultp>
+	#define v4T  glm::tvec4<T, glm::defaultp>
+	#define v2F  glm::tvec2<float, glm::defaultp>
+	#define v4F  glm::tvec4<float, glm::defaultp>
+	#define v4D  glm::tvec4<double, glm::defaultp>
+	#define v4bT glm::tvec4<bucketT, glm::defaultp>
+	#define m2T  glm::tmat2x2<T, glm::defaultp>
+	#define m3T  glm::tmat3x3<T, glm::defaultp>
+	#define m4T  glm::tmat4x4<T, glm::defaultp>
+	#define m23T glm::tmat2x3<T, glm::defaultp>
+	typedef vector<glm::tvec4<float, glm::defaultp>> vv4F;
+#else
+	#define v2T  glm::detail::tvec2<T, glm::defaultp>
+	#define v3T  glm::detail::tvec3<T, glm::defaultp>
+	#define v4T  glm::detail::tvec4<T, glm::defaultp>
+	#define v2F  glm::detail::tvec2<float, glm::defaultp>
+	#define v4F  glm::detail::tvec4<float, glm::defaultp>
+	#define v4D  glm::detail::tvec4<double, glm::defaultp>
+	#define v4bT glm::detail::tvec4<bucketT, glm::defaultp>
+	#define m2T  glm::detail::tmat2x2<T, glm::defaultp>
+	#define m3T  glm::detail::tmat3x3<T, glm::defaultp>
+	#define m4T  glm::detail::tmat4x4<T, glm::defaultp>
+	#define m23T glm::detail::tmat2x3<T, glm::defaultp>
+	typedef vector<glm::detail::tvec4<float, glm::defaultp>> vv4F;
+#endif
+
+enum class eInterp : et { EMBER_INTERP_LINEAR = 0, EMBER_INTERP_SMOOTH = 1 };
+enum class eAffineInterp : et { AFFINE_INTERP_LINEAR = 0, AFFINE_INTERP_LOG = 1, AFFINE_INTERP_COMPAT = 2, AFFINE_INTERP_OLDER = 3 };
+enum class ePaletteMode : et { PALETTE_STEP = 0, PALETTE_LINEAR = 1 };
+enum class ePaletteInterp : et { INTERP_HSV = 0, INTERP_SWEEP = 1 };
+enum class eMotion : et { MOTION_SIN = 1, MOTION_TRIANGLE = 2, MOTION_HILL = 3, MOTION_SAW = 4 };
+enum class eProcessAction : et { NOTHING = 0, ACCUM_ONLY = 1, FILTER_AND_ACCUM = 2, KEEP_ITERATING = 3, FULL_RENDER = 4 };
+enum class eProcessState : et { NONE = 0, ITER_STARTED = 1, ITER_DONE = 2, FILTER_DONE = 3, ACCUM_DONE = 4 };
+enum class eInteractiveFilter : et { FILTER_LOG = 0, FILTER_DE = 1 };
+enum class eScaleType : et { SCALE_NONE = 0, SCALE_WIDTH = 1, SCALE_HEIGHT = 2 };
+enum class eRenderStatus : et { RENDER_OK = 0, RENDER_ERROR = 1, RENDER_ABORT = 2 };
+enum class eEmberMotionParam : et//These must remain in this order forever.
+{
+	FLAME_MOTION_NONE,
+	FLAME_MOTION_ZOOM,
+	FLAME_MOTION_ZPOS,
+	FLAME_MOTION_PERSPECTIVE,
+	FLAME_MOTION_YAW,
+	FLAME_MOTION_PITCH,
+	FLAME_MOTION_DEPTH_BLUR,
+	FLAME_MOTION_CENTER_X,
+	FLAME_MOTION_CENTER_Y,
+	FLAME_MOTION_ROTATE,
+	FLAME_MOTION_BRIGHTNESS,
+	FLAME_MOTION_GAMMA,
+	FLAME_MOTION_GAMMA_THRESH,
+	FLAME_MOTION_HIGHLIGHT_POWER,
+	FLAME_MOTION_K2,
+	FLAME_MOTION_RAND_RANGE,
+	FLAME_MOTION_BACKGROUND_R,
+	FLAME_MOTION_BACKGROUND_G,
+	FLAME_MOTION_BACKGROUND_B,
+	FLAME_MOTION_VIBRANCY,
+	FLAME_MOTION_BLUR_CURVE
+};
+
+/// <summary>
+/// Thin wrapper to allow << operator on interp type.
+/// </summary>
+/// <param name="stream">The stream to insert into</param>
+/// <param name="t">The type whose string representation will be inserted into the stream</param>
+/// <returns></returns>
+static std::ostream& operator<<(std::ostream& stream, const eInterp& t)
+{
+	switch (t)
+	{
+		case EmberNs::eInterp::EMBER_INTERP_LINEAR:
+			stream << "linear";
+			break;
+
+		case EmberNs::eInterp::EMBER_INTERP_SMOOTH:
+			stream << "smooth";
+			break;
+
+		default:
+			stream << "error";
+			break;
+	}
+
+	return stream;
+}
+
+/// <summary>
+/// Thin wrapper to allow << operator on affine interp type.
+/// </summary>
+/// <param name="stream">The stream to insert into</param>
+/// <param name="t">The type whose string representation will be inserted into the stream</param>
+/// <returns></returns>
+static std::ostream& operator<<(std::ostream& stream, const eAffineInterp& t)
+{
+	switch (t)
+	{
+		case EmberNs::eAffineInterp::AFFINE_INTERP_LINEAR:
+			stream << "linear";
+			break;
+
+		case EmberNs::eAffineInterp::AFFINE_INTERP_LOG:
+			stream << "log";
+			break;
+
+		case EmberNs::eAffineInterp::AFFINE_INTERP_COMPAT:
+			stream << "compat";
+			break;
+
+		case EmberNs::eAffineInterp::AFFINE_INTERP_OLDER:
+			stream << "older";
+			break;
+
+		default:
+			stream << "error";
+			break;
+	}
+
+	return stream;
+}
+
+/// <summary>
+/// Thin wrapper to allow << operator on palette mode type.
+/// </summary>
+/// <param name="stream">The stream to insert into</param>
+/// <param name="t">The type whose string representation will be inserted into the stream</param>
+/// <returns></returns>
+static std::ostream& operator<<(std::ostream& stream, const ePaletteMode& t)
+{
+	switch (t)
+	{
+		case EmberNs::ePaletteMode::PALETTE_STEP:
+			stream << "step";
+			break;
+
+		case EmberNs::ePaletteMode::PALETTE_LINEAR:
+			stream << "linear";
+			break;
+
+		default:
+			stream << "error";
+			break;
+	}
+
+	return stream;
+}
+
+/// <summary>
+/// Thin wrapper to allow << operator on palette interp type.
+/// </summary>
+/// <param name="stream">The stream to insert into</param>
+/// <param name="t">The type whose string representation will be inserted into the stream</param>
+/// <returns></returns>
+static std::ostream& operator<<(std::ostream& stream, const ePaletteInterp& t)
+{
+	switch (t)
+	{
+		case EmberNs::ePaletteInterp::INTERP_HSV:
+			stream << "hsv";
+			break;
+
+		case EmberNs::ePaletteInterp::INTERP_SWEEP:
+			stream << "sweep";
+			break;
+
+		default:
+			stream << "error";
+			break;
+	}
+
+	return stream;
+}
+
+/// <summary>
+/// Thin wrapper to allow << operator on scale type.
+/// </summary>
+/// <param name="stream">The stream to insert into</param>
+/// <param name="t">The type whose string representation will be inserted into the stream</param>
+/// <returns></returns>
+static std::ostream& operator<<(std::ostream& stream, const eScaleType& t)
+{
+	switch (t)
+	{
+		case EmberNs::eScaleType::SCALE_NONE:
+			stream << "none";
+			break;
+
+		case EmberNs::eScaleType::SCALE_WIDTH:
+			stream << "width";
+			break;
+
+		case EmberNs::eScaleType::SCALE_HEIGHT:
+			stream << "height";
+			break;
+
+		default:
+			stream << "error";
+			break;
+	}
+
+	return stream;
+}
+
+/// <summary>
+/// Thin wrapper to allow << operator on motion type.
+/// </summary>
+/// <param name="stream">The stream to insert into</param>
+/// <param name="t">The type whose string representation will be inserted into the stream</param>
+/// <returns></returns>
+static std::ostream& operator<<(std::ostream& stream, const eMotion& t)
+{
+	switch (t)
+	{
+		case EmberNs::eMotion::MOTION_SIN:
+			stream << "sin";
+			break;
+
+		case EmberNs::eMotion::MOTION_TRIANGLE:
+			stream << "triangle";
+			break;
+
+		case EmberNs::eMotion::MOTION_HILL:
+			stream << "hill";
+			break;
+
+		case EmberNs::eMotion::MOTION_SAW:
+			stream << "saw";
+			break;
+
+		default:
+			stream << "error";
+			break;
+	}
+
+	return stream;
+}
+}
@@ -1,145 +1,145 @@
-#pragma once
-
-#include "EmberDefines.h"
-
-namespace EmberNs
-{
-/// <summary>
-/// Thin derivation of pair<eEmberMotionParam, T> for the element type
-/// of EmberMotion<T>::m_MotionParams to allow for copying vectors
-/// of different types of T.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-class EMBER_API MotionParam : public pair <eEmberMotionParam, T>
-{
-public:
-	/// <summary>
-	/// Default constructor, which calls the base, which sets first and second to their defaults.
-	/// </summary>
-	MotionParam() = default;
-
-	/// <summary>
-	/// Member-wise constructor.
-	/// </summary>
-	/// <param name="e">The eEmberMotionParam value to assign to first</param>
-	/// <param name="t">The T value to assign to second</param>
-	MotionParam(eEmberMotionParam e, T t)
-		: pair<eEmberMotionParam, T>(e, t)
-	{
-	}
-
-	/// <summary>
-	/// Default copy constructor.
-	/// </summary>
-	/// <param name="other">The MotionParam object to copy</param>
-	MotionParam(const MotionParam<T>& other)
-		: pair <eEmberMotionParam, T>()
-	{
-		operator=<T>(other);
-	}
-
-	/// <summary>
-	/// Copy constructor to copy a MotionParam object of type U.
-	/// </summary>
-	/// <param name="other">The MotionParam object to copy</param>
-	template <typename U>
-	MotionParam(const MotionParam<U>& other)
-	{
-		operator=<U>(other);
-	}
-
-	/// <summary>
-	/// Default assignment operator.
-	/// </summary>
-	/// <param name="other">The MotionParam object to copy</param>
-	MotionParam<T>& operator = (const MotionParam<T>& other)
-	{
-		if (this != &other)
-			MotionParam<T>::operator=<T>(other);
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Assignment operator to assign a MotionParam object of type U.
-	/// </summary>
-	/// <param name="other">The MotionParam object to copy.</param>
-	/// <returns>Reference to updated self</returns>
-	template <typename U>
-	MotionParam& operator = (const MotionParam<U>& other)
-	{
-		this->first = other.first;
-		this->second = static_cast<T>(other.second);
-		return *this;
-	}
-};
-
-/// <summary>
-/// EmberMotion elements allow for motion of the flame parameters such as zoom, yaw, pitch and friends
-/// The values in these elements can be used to modify flame parameters during rotation in much the same
-/// way as motion elements on xforms do.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-class EMBER_API EmberMotion
-{
-public:
-	/// <summary>
-	/// Default constructor to initialize motion freq and offset to 0 and the motion func to SIN.
-	/// </summary>
-	EmberMotion() = default;
-	~EmberMotion() = default;
-
-	/// <summary>
-	/// Default copy constructor.
-	/// </summary>
-	/// <param name="other">The EmberMotion object to copy</param>
-	EmberMotion(const EmberMotion<T>& other)
-	{
-		operator=<T>(other);
-	}
-
-	/// <summary>
-	/// Copy constructor to copy a EmberMotion object of type U.
-	/// </summary>
-	/// <param name="other">The EmberMotion object to copy</param>
-	template <typename U>
-	EmberMotion(const EmberMotion<U>& other)
-	{
-		operator=<U>(other);
-	}
-
-	/// <summary>
-	/// Default assignment operator.
-	/// </summary>
-	/// <param name="other">The EmberMotion object to copy</param>
-	EmberMotion<T>& operator = (const EmberMotion<T>& other)
-	{
-		if (this != &other)
-			EmberMotion<T>::operator=<T>(other);
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Assignment operator to assign a EmberMotion object of type U.
-	/// </summary>
-	/// <param name="other">The EmberMotion object to copy.</param>
-	/// <returns>Reference to updated self</returns>
-	template <typename U>
-	EmberMotion& operator = (const EmberMotion<U>& other)
-	{
-		CopyCont(m_MotionParams, other.m_MotionParams);
-		m_MotionFunc = other.m_MotionFunc;
-		m_MotionFreq = static_cast<T>(other.m_MotionFreq);
-		m_MotionOffset = static_cast<T>(other.m_MotionOffset);
-		return *this;
-	}
-
-	T m_MotionFreq = 0;
-	T m_MotionOffset = 0;
-	eMotion m_MotionFunc = eMotion::MOTION_SIN;
-	vector<MotionParam<T>> m_MotionParams;
-};
-}
+#pragma once
+
+#include "EmberDefines.h"
+
+namespace EmberNs
+{
+/// <summary>
+/// Thin derivation of pair<eEmberMotionParam, T> for the element type
+/// of EmberMotion<T>::m_MotionParams to allow for copying vectors
+/// of different types of T.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+class EMBER_API MotionParam : public pair <eEmberMotionParam, T>
+{
+public:
+	/// <summary>
+	/// Default constructor, which calls the base, which sets first and second to their defaults.
+	/// </summary>
+	MotionParam() = default;
+
+	/// <summary>
+	/// Member-wise constructor.
+	/// </summary>
+	/// <param name="e">The eEmberMotionParam value to assign to first</param>
+	/// <param name="t">The T value to assign to second</param>
+	MotionParam(eEmberMotionParam e, T t)
+		: pair<eEmberMotionParam, T>(e, t)
+	{
+	}
+
+	/// <summary>
+	/// Default copy constructor.
+	/// </summary>
+	/// <param name="other">The MotionParam object to copy</param>
+	MotionParam(const MotionParam<T>& other)
+		: pair <eEmberMotionParam, T>()
+	{
+		operator=<T>(other);
+	}
+
+	/// <summary>
+	/// Copy constructor to copy a MotionParam object of type U.
+	/// </summary>
+	/// <param name="other">The MotionParam object to copy</param>
+	template <typename U>
+	MotionParam(const MotionParam<U>& other)
+	{
+		operator=<U>(other);
+	}
+
+	/// <summary>
+	/// Default assignment operator.
+	/// </summary>
+	/// <param name="other">The MotionParam object to copy</param>
+	MotionParam<T>& operator = (const MotionParam<T>& other)
+	{
+		if (this != &other)
+			MotionParam<T>::operator=<T>(other);
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Assignment operator to assign a MotionParam object of type U.
+	/// </summary>
+	/// <param name="other">The MotionParam object to copy.</param>
+	/// <returns>Reference to updated self</returns>
+	template <typename U>
+	MotionParam& operator = (const MotionParam<U>& other)
+	{
+		this->first = other.first;
+		this->second = static_cast<T>(other.second);
+		return *this;
+	}
+};
+
+/// <summary>
+/// EmberMotion elements allow for motion of the flame parameters such as zoom, yaw, pitch and friends
+/// The values in these elements can be used to modify flame parameters during rotation in much the same
+/// way as motion elements on xforms do.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+class EMBER_API EmberMotion
+{
+public:
+	/// <summary>
+	/// Default constructor to initialize motion freq and offset to 0 and the motion func to SIN.
+	/// </summary>
+	EmberMotion() = default;
+	~EmberMotion() = default;
+
+	/// <summary>
+	/// Default copy constructor.
+	/// </summary>
+	/// <param name="other">The EmberMotion object to copy</param>
+	EmberMotion(const EmberMotion<T>& other)
+	{
+		operator=<T>(other);
+	}
+
+	/// <summary>
+	/// Copy constructor to copy a EmberMotion object of type U.
+	/// </summary>
+	/// <param name="other">The EmberMotion object to copy</param>
+	template <typename U>
+	EmberMotion(const EmberMotion<U>& other)
+	{
+		operator=<U>(other);
+	}
+
+	/// <summary>
+	/// Default assignment operator.
+	/// </summary>
+	/// <param name="other">The EmberMotion object to copy</param>
+	EmberMotion<T>& operator = (const EmberMotion<T>& other)
+	{
+		if (this != &other)
+			EmberMotion<T>::operator=<T>(other);
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Assignment operator to assign a EmberMotion object of type U.
+	/// </summary>
+	/// <param name="other">The EmberMotion object to copy.</param>
+	/// <returns>Reference to updated self</returns>
+	template <typename U>
+	EmberMotion& operator = (const EmberMotion<U>& other)
+	{
+		CopyCont(m_MotionParams, other.m_MotionParams);
+		m_MotionFunc = other.m_MotionFunc;
+		m_MotionFreq = static_cast<T>(other.m_MotionFreq);
+		m_MotionOffset = static_cast<T>(other.m_MotionOffset);
+		return *this;
+	}
+
+	T m_MotionFreq = 0;
+	T m_MotionOffset = 0;
+	eMotion m_MotionFunc = eMotion::MOTION_SIN;
+	vector<MotionParam<T>> m_MotionParams;
+};
+}
@@ -1 +1 @@
-#include "EmberPch.h"
+#include "EmberPch.h"
@@ -1,90 +1,90 @@
-#ifdef _WIN32
-	#pragma once
-#endif
-
-/// <summary>
-/// Precompiled header file. Place all system includes here with appropriate #defines for different operating systems and compilers.
-/// </summary>
-
-#define NOMINMAX
-#define _USE_MATH_DEFINES
-#define __TBB_NO_IMPLICIT_LINKAGE 1//Prevent tbb from automatically looking for tbb_debug.lib. We only care about the release tbb.lib/dll.
-
-#ifdef _WIN32
-	#pragma warning(disable : 4251; disable : 4661; disable : 4100)
-	#define basename(x) _strdup(x)
-	#define WIN32_LEAN_AND_MEAN
-	#define EMBER_OS "WIN"
-
-	#include <SDKDDKVer.h>
-	#include <windows.h>
-#elif __APPLE__
-	#define EMBER_OS "OSX"
-#else
-	#include <libgen.h>
-	#include <unistd.h>
-	#define EMBER_OS "LNX"
-#endif
-
-//Standard headers.
-#include <algorithm>
-#include <array>
-#include <chrono>
-#include <complex>
-#include <cstdint>
-#include <fstream>
-#include <functional>
-#include <inttypes.h>
-#include <iostream>
-#include <iomanip>
-#include <limits>
-#include <list>
-#ifdef __APPLE__
-	#include <malloc/malloc.h>
-#else
-	#include <malloc.h>
-#endif
-#include <map>
-#include <math.h>
-#include <memory>
-#include <mutex>
-#include <numeric>
-#include <ostream>
-#include <sstream>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <thread>
-#include <time.h>
-#include <type_traits>
-#include <vector>
-#include <unordered_map>
-
-//Third party headers.
-#ifdef _WIN32
-	#include "libxml/parser.h"
-#else
-	#include "libxml2/libxml/parser.h"
-#endif
-
-#define GLM_FORCE_RADIANS 1
-#define GLM_ENABLE_EXPERIMENTAL 1
-
-#ifndef __APPLE__
-	#define GLM_FORCE_INLINE 1
-#endif
-
-//glm is what's used for matrix math.
-#include <glm/glm.hpp>
-#if GLM_VERSION <= 990
-	#include <glm/detail/type_int.hpp>
-#endif
-#include <glm/gtc/matrix_transform.hpp>
-#include <glm/gtc/type_ptr.hpp>
-#include <glm/gtx/string_cast.hpp>
-
-using namespace std;
-using namespace std::chrono;
-using namespace glm;
-using namespace glm::detail;
-using glm::uint;
-using glm::uint16;
+#ifdef _WIN32
+	#pragma once
+#endif
+
+/// <summary>
+/// Precompiled header file. Place all system includes here with appropriate #defines for different operating systems and compilers.
+/// </summary>
+
+#define NOMINMAX
+#define _USE_MATH_DEFINES
+#define __TBB_NO_IMPLICIT_LINKAGE 1//Prevent tbb from automatically looking for tbb_debug.lib. We only care about the release tbb.lib/dll.
+
+#ifdef _WIN32
+	#pragma warning(disable : 4251; disable : 4661; disable : 4100)
+	#define basename(x) _strdup(x)
+	#define WIN32_LEAN_AND_MEAN
+	#define EMBER_OS "WIN"
+
+	#include <SDKDDKVer.h>
+	#include <windows.h>
+#elif __APPLE__
+	#define EMBER_OS "OSX"
+#else
+	#include <libgen.h>
+	#include <unistd.h>
+	#define EMBER_OS "LNX"
+#endif
+
+//Standard headers.
+#include <algorithm>
+#include <array>
+#include <chrono>
+#include <complex>
+#include <cstdint>
+#include <fstream>
+#include <functional>
+#include <inttypes.h>
+#include <iostream>
+#include <iomanip>
+#include <limits>
+#include <list>
+#ifdef __APPLE__
+	#include <malloc/malloc.h>
+#else
+	#include <malloc.h>
+#endif
+#include <map>
+#include <math.h>
+#include <memory>
+#include <mutex>
+#include <numeric>
+#include <ostream>
+#include <sstream>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <thread>
+#include <time.h>
+#include <type_traits>
+#include <vector>
+#include <unordered_map>
+
+//Third party headers.
+#ifdef _WIN32
+	#include "libxml/parser.h"
+#else
+	#include "libxml2/libxml/parser.h"
+#endif
+
+#define GLM_FORCE_RADIANS 1
+#define GLM_ENABLE_EXPERIMENTAL 1
+
+#ifndef __APPLE__
+	#define GLM_FORCE_INLINE 1
+#endif
+
+//glm is what's used for matrix math.
+#include <glm/glm.hpp>
+#if GLM_VERSION <= 990
+	#include <glm/detail/type_int.hpp>
+#endif
+#include <glm/gtc/matrix_transform.hpp>
+#include <glm/gtc/type_ptr.hpp>
+#include <glm/gtx/string_cast.hpp>
+
+using namespace std;
+using namespace std::chrono;
+using namespace glm;
+using namespace glm::detail;
+using glm::uint;
+using glm::uint16;
@@ -1,42 +1,42 @@
-#pragma once
-
-#include "Utils.h"
-#include "PaletteList.h"
-#include "VariationList.h"
-#include "Ember.h"
-
-/// <summary>
-/// EmberToXml class.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// Class for converting ember objects to Xml documents.
-/// Support for saving one or more to a single file.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-class EMBER_API EmberToXml : public EmberReport
-{
-public:
-	/// <summary>
-	/// Empty constructor.
-	/// </summary>
-	EmberToXml() = default;
-	~EmberToXml() = default;
-	EmberToXml(const EmberToXml<T>& e) = delete;
-
-	bool Save(const string& filename, Ember<T>& ember, size_t printEditDepth, bool doEdits, bool hexPalette, bool append = false, bool start = false, bool finish = false);
-	template <typename Alloc, template <typename, typename> class C>
-	bool Save(const string& filename, C<Ember<T>, Alloc>& embers, size_t printEditDepth, bool doEdits, bool hexPalette, bool append, bool start, bool finish);
-	string ToString(Ember<T>& ember, const string& extraAttributes, size_t printEditDepth, bool doEdits, bool hexPalette = true);
-	xmlDocPtr CreateNewEditdoc(Ember<T>* parent0, Ember<T>* parent1, const string& action, const string& nick, const string& url, const string& id, const string& comment, intmax_t sheepGen = 0, intmax_t sheepId = 0);
-
-private:
-	string ToString(Xform<T>& xform, size_t xformCount, bool isFinal, bool doMotion);
-	string ToString(xmlNodePtr editNode, size_t tabs, bool formatting, size_t printEditDepth);
-	string ToString(const EmberMotion<T>& motion);
-	void AddFilenameWithoutAmpersand(xmlNodePtr node, string& filename);
-};
-}
+#pragma once
+
+#include "Utils.h"
+#include "PaletteList.h"
+#include "VariationList.h"
+#include "Ember.h"
+
+/// <summary>
+/// EmberToXml class.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// Class for converting ember objects to Xml documents.
+/// Support for saving one or more to a single file.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+class EMBER_API EmberToXml : public EmberReport
+{
+public:
+	/// <summary>
+	/// Empty constructor.
+	/// </summary>
+	EmberToXml() = default;
+	~EmberToXml() = default;
+	EmberToXml(const EmberToXml<T>& e) = delete;
+
+	bool Save(const string& filename, Ember<T>& ember, size_t printEditDepth, bool doEdits, bool hexPalette, bool append = false, bool start = false, bool finish = false);
+	template <typename Alloc, template <typename, typename> class C>
+	bool Save(const string& filename, C<Ember<T>, Alloc>& embers, size_t printEditDepth, bool doEdits, bool hexPalette, bool append, bool start, bool finish);
+	string ToString(Ember<T>& ember, const string& extraAttributes, size_t printEditDepth, bool doEdits, bool hexPalette = true);
+	xmlDocPtr CreateNewEditdoc(Ember<T>* parent0, Ember<T>* parent1, const string& action, const string& nick, const string& url, const string& id, const string& comment, intmax_t sheepGen = 0, intmax_t sheepId = 0);
+
+private:
+	string ToString(Xform<T>& xform, size_t xformCount, bool isFinal, bool doMotion);
+	string ToString(xmlNodePtr editNode, size_t tabs, bool formatting, size_t printEditDepth);
+	string ToString(const EmberMotion<T>& motion);
+	void AddFilenameWithoutAmpersand(xmlNodePtr node, string& filename);
+};
+}
@@ -1,60 +1,60 @@
-#pragma once
-
-#include "Palette.h"
-#include "Point.h"
-
-/// <summary>
-/// PaletteList class.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// Holds a list of palettes read from an Xml file. Since the default list from flam3-palettes.xml is fairly large at 700 palettes,
-/// the list member is kept as a static. This class derives from EmberReport in order to report any errors that occurred while reading the Xml.
-/// Note that although the Xml color values are expected to be 0-255, they are converted and stored as normalized colors, with values from 0-1.
-/// This can hold read only palettes, as well as user created and modifiable ones.
-/// The key in the map is the fully qualified path and filename to each palette file.
-/// Despite the keys being full paths, the same filename cannot be inserted twice, even if they reside
-/// in different folders. Functions are provided to retrieve palette files via filename only, or full path.
-/// Template argument should always be float (which makes the templating of this class pointless).
-/// </summary>
-template <typename T>
-class EMBER_API PaletteList : public EmberReport, public Singleton<PaletteList<T>>
-{
-public:
-	const char* m_DefaultFilename = "flam3-palettes.xml";
-
-	bool AddPaletteFile(const string& filename, const vector<Palette<T>>& palettes);
-	bool AddEmptyPaletteFile(const string& filename);
-	bool AddPaletteToFile(const string& filename, const Palette<T>& palette);
-	bool Replace(const string& filename, const Palette<T>& palette);
-	bool Replace(const string& filename, const Palette<T>& palette, int index);
-	bool Delete(const string& filename, int index);
-	bool Add(const string& filename, bool force = false);
-	Palette<T>* GetRandomPalette();
-	Palette<T>* GetPaletteByFilename(const string& filename, size_t i);
-	Palette<T>* GetPaletteByFullPath(const string& filename, size_t i);
-	Palette<T>* GetPaletteByName(const string& filename, const string& name);
-	vector<Palette<T>>* GetPaletteListByFilename(const string& filename);
-	vector<Palette<T>>* GetPaletteListByFullPath(const string& filename);
-	vector<Palette<T>>* GetPaletteListByFullPathOrFilename(const string& filename);
-	string GetFullPathFromFilename(const string& filename);
-	bool GetHueAdjustedPalette(const string& filename, size_t i, T hue, Palette<T>& palette);
-	void Clear();
-	size_t Size();
-	size_t Size(size_t index);
-	size_t Size(const string& s);
-	const string& Name(size_t index);
-	bool IsModifiable(const string& filename);
-	const map<string, vector<Palette<T>>>& Palettes() const;
-
-	SINGLETON_DERIVED_DECL(PaletteList<T>);
-private:
-	PaletteList();
-	bool Save(const string& filename);
-	void ParsePalettes(xmlNode* node, const shared_ptr<string>& filename, vector<Palette<T>>& palettes);
-	bool ParsePalettes(const string& buf, const shared_ptr<string>& filename, vector<Palette<T>>& palettes);
-	map<string, vector<Palette<T>>> s_Palettes;//The map of filenames to vectors that store the palettes.
-};
-}
+#pragma once
+
+#include "Palette.h"
+#include "Point.h"
+
+/// <summary>
+/// PaletteList class.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// Holds a list of palettes read from an Xml file. Since the default list from flam3-palettes.xml is fairly large at 700 palettes,
+/// the list member is kept as a static. This class derives from EmberReport in order to report any errors that occurred while reading the Xml.
+/// Note that although the Xml color values are expected to be 0-255, they are converted and stored as normalized colors, with values from 0-1.
+/// This can hold read only palettes, as well as user created and modifiable ones.
+/// The key in the map is the fully qualified path and filename to each palette file.
+/// Despite the keys being full paths, the same filename cannot be inserted twice, even if they reside
+/// in different folders. Functions are provided to retrieve palette files via filename only, or full path.
+/// Template argument should always be float (which makes the templating of this class pointless).
+/// </summary>
+template <typename T>
+class EMBER_API PaletteList : public EmberReport, public Singleton<PaletteList<T>>
+{
+public:
+	const char* m_DefaultFilename = "flam3-palettes.xml";
+
+	bool AddPaletteFile(const string& filename, const vector<Palette<T>>& palettes);
+	bool AddEmptyPaletteFile(const string& filename);
+	bool AddPaletteToFile(const string& filename, const Palette<T>& palette);
+	bool Replace(const string& filename, const Palette<T>& palette);
+	bool Replace(const string& filename, const Palette<T>& palette, int index);
+	bool Delete(const string& filename, int index);
+	bool Add(const string& filename, bool force = false);
+	Palette<T>* GetRandomPalette();
+	Palette<T>* GetPaletteByFilename(const string& filename, size_t i);
+	Palette<T>* GetPaletteByFullPath(const string& filename, size_t i);
+	Palette<T>* GetPaletteByName(const string& filename, const string& name);
+	vector<Palette<T>>* GetPaletteListByFilename(const string& filename);
+	vector<Palette<T>>* GetPaletteListByFullPath(const string& filename);
+	vector<Palette<T>>* GetPaletteListByFullPathOrFilename(const string& filename);
+	string GetFullPathFromFilename(const string& filename);
+	bool GetHueAdjustedPalette(const string& filename, size_t i, T hue, Palette<T>& palette);
+	void Clear();
+	size_t Size();
+	size_t Size(size_t index);
+	size_t Size(const string& s);
+	const string& Name(size_t index);
+	bool IsModifiable(const string& filename);
+	const map<string, vector<Palette<T>>>& Palettes() const;
+
+	SINGLETON_DERIVED_DECL(PaletteList<T>);
+private:
+	PaletteList();
+	bool Save(const string& filename);
+	void ParsePalettes(xmlNode* node, const shared_ptr<string>& filename, vector<Palette<T>>& palettes);
+	bool ParsePalettes(const string& buf, const shared_ptr<string>& filename, vector<Palette<T>>& palettes);
+	map<string, vector<Palette<T>>> s_Palettes;//The map of filenames to vectors that store the palettes.
+};
+}
@@ -1,217 +1,215 @@
-#pragma once
-
-#include "EmberDefines.h"
-#include "Affine2D.h"
-#include "Timing.h"
-
-/// <summary>
-/// Basic point and color structures used in iteration.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// The point used to store the result of each iteration, which is
-/// a spatial coordinate, a color index/coordinate and a visibility value.
-/// Note that a Y color coordinate is not used at the moment because
-/// only 1D palettes are supported like the original. However, in the future
-/// 2D palettes may be supported like Fractron does.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-class EMBER_API Point
-{
-public:
-	/// <summary>
-	/// Constructor to initialize spatial and color coordinates to zero, with full visibility.
-	/// </summary>
-	Point() = default;
-	~Point() = default;
-
-	/// <summary>
-	/// Default copy constructor.
-	/// </summary>
-	/// <param name="point">The Point object to copy</param>
-	Point(const Point<T>& point)
-	{
-		Point<T>::operator=<T>(point);
-	}
-
-	/// <summary>
-	/// Copy constructor to copy a Point object of type U.
-	/// </summary>
-	/// <param name="point">The Point object to copy</param>
-	template <typename U>
-	Point(const Point<U>& point)
-	{
-		Point<T>::operator=<U>(point);
-	}
-
-	/// <summary>
-	/// Default assignment operator.
-	/// </summary>
-	/// <param name="point">The Point object to copy</param>
-	Point<T>& operator = (const Point<T>& point)
-	{
-		if (this != &point)
-			Point<T>::operator=<T>(point);
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Assignment operator to assign a Point object of type U.
-	/// </summary>
-	/// <param name="point">The Point object to copy.</param>
-	/// <returns>Reference to updated self</returns>
-	template <typename U>
-	Point<T>& operator = (const Point<U>& point)
-	{
-		m_X = point.m_X;
-		m_Y = point.m_Y;
-		m_Z = point.m_Z;
-		m_ColorX = point.m_ColorX;
-		//m_ColorY = point.m_ColorY;
-		m_Opacity = point.m_Opacity;
-		return *this;
-	}
-
-	//Set spatial and color coordinates to zero, with full visibility.
-	T m_X = 0;
-	T m_Y = 0;
-	T m_Z = 0;
-	T m_ColorX = 0;
-	//T m_ColorY;
-	T m_Opacity = 1;
-};
-
-/// <summary>
-/// Comparer used for sorting the results of iteration by their spatial x coordinates.
-/// </summary>
-/// <param name="a">The first point to compare</param>
-/// <param name="b">The second point to compare</param>
-/// <returns>1 if the first point had an x coordinate less than the second point, else 0</returns>
-template <typename T>
-static int SortPointByX(const Point<T>& a, const Point<T>& b)
-{
-	return a.m_X < b.m_X;
-}
-
-/// <summary>
-/// Comparer used for sorting the results of iteration by their spatial y coordinates.
-/// </summary>
-/// <param name="a">The first point to compare</param>
-/// <param name="b">The second point to compare</param>
-/// <returns>1 if the first point had an y coordinate less than the second point, else 0</returns>
-template <typename T>
-static int SortPointByY(const Point<T>& a, const Point<T>& b)
-{
-	return a.m_Y < b.m_Y;
-}
-
-/// <summary>
-/// Thin override of a glm::vec4 which adds a couple of functions
-/// specific to color handling.
-/// </summary>
-template <typename T>
-struct EMBER_API Color : public v4T
-{
-#ifndef _WIN32
-	using v4T::r;
-	using v4T::g;
-	using v4T::b;
-	using v4T::a;
-#endif
-public:
-	/// <summary>
-	/// Constructor to set color values to zero, with full visibility.
-	/// </summary>
-	Color()
-	{
-		Reset();
-	}
-
-	/// <summary>
-	/// Default copy constructor.
-	/// </summary>
-	/// <param name="color">The Color object to copy</param>
-	Color(const Color<T>& color)
-		: v4T()
-	{
-		Color<T>::operator=<T>(color);
-	}
-
-	/// <summary>
-	/// Copy constructor to copy a Color object of type U.
-	/// </summary>
-	/// <param name="color">The Color object to copy</param>
-	template <typename U>
-	Color(const Color<U>& color)
-	{
-		Color<T>::operator=<U>(color);
-	}
-
-	/// <summary>
-	/// Default assignment operator.
-	/// </summary>
-	/// <param name="color">The Color object to copy</param>
-	Color<T>& operator = (const Color<T>& color)
-	{
-		if (this != &color)
-			Color<T>::operator=<T>(color);
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Assignment operator to assign a Color object of type U.
-	/// </summary>
-	/// <param name="color">The Color object to copy.</param>
-	/// <returns>Reference to updated self</returns>
-	template <typename U>
-	Color<T>& operator = (const Color<U>& color)
-	{
-#ifdef _WIN32
-		v4T::operator=<U>(color);
-#else
-		v4T::template operator=<U>(color);
-#endif
-		return *this;
-	}
-
-	/// <summary>
-	/// Member-wise constructor.
-	/// </summary>
-	/// <param name="rr">The red value, either 0-1 or 0-255.</param>
-	/// <param name="gg">The green value, either 0-1 or 0-255.</param>
-	/// <param name="bb">The blue value, either 0-1 or 0-255.</param>
-	/// <param name="aa">The alpha value, either 0-1 or 0-255.</param>
-	Color(T rr, T gg, T bb, T aa)
-		: v4T(rr, gg, bb, aa)
-	{
-	}
-
-	/// <summary>
-	/// Set color values and visibility to zero.
-	/// </summary>
-	inline void Clear()
-	{
-		r = 0;
-		g = 0;
-		b = 0;
-		a = 0;
-	}
-
-	/// <summary>
-	/// Set color values to zero, with full visibility.
-	/// </summary>
-	/// <param name="norm">If norm is true, the color fields are expected to have a range of 0-1, else 0-255</param>
-	inline void Reset(bool norm = true)
-	{
-		r = 0;
-		g = 0;
-		b = 0;
-		a = norm ? T{ 1 } : T{ 255 };
-	}
-};
-}
+#pragma once
+
+#include "EmberDefines.h"
+#include "Affine2D.h"
+#include "Timing.h"
+
+/// <summary>
+/// Basic point and color structures used in iteration.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// The point used to store the result of each iteration, which is
+/// a spatial coordinate, a color index/coordinate and a visibility value.
+/// Note that a Y color coordinate is not used at the moment because
+/// only 1D palettes are supported like the original. However, in the future
+/// 2D palettes may be supported like Fractron does.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+class EMBER_API Point
+{
+public:
+	/// <summary>
+	/// Constructor to initialize spatial and color coordinates to zero, with full visibility.
+	/// </summary>
+	Point() = default;
+	~Point() = default;
+
+	/// <summary>
+	/// Default copy constructor.
+	/// </summary>
+	/// <param name="point">The Point object to copy</param>
+	Point(const Point<T>& point)
+	{
+		Point<T>::operator=<T>(point);
+	}
+
+	/// <summary>
+	/// Copy constructor to copy a Point object of type U.
+	/// </summary>
+	/// <param name="point">The Point object to copy</param>
+	template <typename U>
+	Point(const Point<U>& point)
+	{
+		Point<T>::operator=<U>(point);
+	}
+
+	/// <summary>
+	/// Default assignment operator.
+	/// </summary>
+	/// <param name="point">The Point object to copy</param>
+	Point<T>& operator = (const Point<T>& point)
+	{
+		if (this != &point)
+			Point<T>::operator=<T>(point);
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Assignment operator to assign a Point object of type U.
+	/// </summary>
+	/// <param name="point">The Point object to copy.</param>
+	/// <returns>Reference to updated self</returns>
+	template <typename U>
+	Point<T>& operator = (const Point<U>& point)
+	{
+		m_X = point.m_X;
+		m_Y = point.m_Y;
+		m_Z = point.m_Z;
+		m_ColorX = point.m_ColorX;
+		//m_ColorY = point.m_ColorY;
+		m_Opacity = point.m_Opacity;
+		return *this;
+	}
+
+	//Set spatial and color coordinates to zero, with full visibility.
+	T m_X = 0;
+	T m_Y = 0;
+	T m_Z = 0;
+	T m_ColorX = 0;
+	//T m_ColorY;
+	T m_Opacity = 1;
+};
+
+/// <summary>
+/// Comparer used for sorting the results of iteration by their spatial x coordinates.
+/// </summary>
+/// <param name="a">The first point to compare</param>
+/// <param name="b">The second point to compare</param>
+/// <returns>1 if the first point had an x coordinate less than the second point, else 0</returns>
+template <typename T>
+static int SortPointByX(const Point<T>& a, const Point<T>& b)
+{
+	return a.m_X < b.m_X;
+}
+
+/// <summary>
+/// Comparer used for sorting the results of iteration by their spatial y coordinates.
+/// </summary>
+/// <param name="a">The first point to compare</param>
+/// <param name="b">The second point to compare</param>
+/// <returns>1 if the first point had an y coordinate less than the second point, else 0</returns>
+template <typename T>
+static int SortPointByY(const Point<T>& a, const Point<T>& b)
+{
+	return a.m_Y < b.m_Y;
+}
+
+/// <summary>
+/// Thin override of a glm::vec4 which adds a couple of functions
+/// specific to color handling.
+/// </summary>
+template <typename T>
+struct EMBER_API Color : public v4T
+{
+	using v4T::r;
+	using v4T::g;
+	using v4T::b;
+	using v4T::a;
+	public:
+	/// <summary>
+	/// Constructor to set color values to zero, with full visibility.
+	/// </summary>
+	Color()
+	{
+		Reset();
+	}
+
+	/// <summary>
+	/// Default copy constructor.
+	/// </summary>
+	/// <param name="color">The Color object to copy</param>
+	Color(const Color<T>& color)
+		: v4T()
+	{
+		Color<T>::operator=<T>(color);
+	}
+
+	/// <summary>
+	/// Copy constructor to copy a Color object of type U.
+	/// </summary>
+	/// <param name="color">The Color object to copy</param>
+	template <typename U>
+	Color(const Color<U>& color)
+	{
+		Color<T>::operator=<U>(color);
+	}
+
+	/// <summary>
+	/// Default assignment operator.
+	/// </summary>
+	/// <param name="color">The Color object to copy</param>
+	Color<T>& operator = (const Color<T>& color)
+	{
+		if (this != &color)
+			Color<T>::operator=<T>(color);
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Assignment operator to assign a Color object of type U.
+	/// </summary>
+	/// <param name="color">The Color object to copy.</param>
+	/// <returns>Reference to updated self</returns>
+	template <typename U>
+	Color<T>& operator = (const Color<U>& color)
+	{
+#ifdef _WIN32
+		v4T::operator=<U>(color);
+#else
+		v4T::template operator=<U>(color);
+#endif
+		return *this;
+	}
+
+	/// <summary>
+	/// Member-wise constructor.
+	/// </summary>
+	/// <param name="rr">The red value, either 0-1 or 0-255.</param>
+	/// <param name="gg">The green value, either 0-1 or 0-255.</param>
+	/// <param name="bb">The blue value, either 0-1 or 0-255.</param>
+	/// <param name="aa">The alpha value, either 0-1 or 0-255.</param>
+	Color(T rr, T gg, T bb, T aa)
+		: v4T(rr, gg, bb, aa)
+	{
+	}
+
+	/// <summary>
+	/// Set color values and visibility to zero.
+	/// </summary>
+	inline void Clear()
+	{
+		r = 0;
+		g = 0;
+		b = 0;
+		a = 0;
+	}
+
+	/// <summary>
+	/// Set color values to zero, with full visibility.
+	/// </summary>
+	/// <param name="norm">If norm is true, the color fields are expected to have a range of 0-1, else 0-255</param>
+	inline void Reset(bool norm = true)
+	{
+		r = 0;
+		g = 0;
+		b = 0;
+		a = norm ? T{ 1 } : T{ 255 };
+	}
+};
+}
@@ -1,206 +1,206 @@
-#pragma once
-
-#include "RendererBase.h"
-#include "Iterator.h"
-#include "SpatialFilter.h"
-#include "TemporalFilter.h"
-#include "Interpolate.h"
-#include "CarToRas.h"
-#include "EmberToXml.h"
-#include "Spline.h"
-
-/// <summary>
-/// Renderer.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// Renderer is the main class where all of the execution takes place.
-/// It is intended that the program have one instance of it that it
-/// keeps around for its duration. After a user sets up an ember, it's passed
-/// in to be rendered.
-/// This class derives from EmberReport, so the caller is able
-/// to retrieve a text dump of error information if any errors occur.
-/// The final image output vector is also passed in because the calling code has more
-/// use for it than this class does.
-/// Several functions are made virtual and have a default CPU-based implementation
-/// that roughly matches what flam3 did. However they can be overridden in derived classes
-/// to provide alternative rendering implementations, such as using the GPU.
-/// Since this is a templated class, it's supposed to be entirely implemented in this .h file.
-/// However, VC++ 2010 has very crippled support for lambdas, which Renderer makes use of.
-/// If too many lambdas are used in a .h file, it will crash the compiler when another library
-/// tries to link to it. To work around the bug, only declarations are here and all implementations
-/// are in the .cpp file. It's unclear at what point it starts/stops working. But it seems that once
-/// enough code is placed in the .h file, the compiler crashes. So for the sake of consistency, everything
-/// is moved to the .cpp, even simple getters. One drawback however, is that the class must be
-/// explicitly exported at the bottom of the file.
-/// Also, despite explicitly doing this, the compiler throws a C4661 warning
-/// for every single function in this class, saying it can't find the implementation. This warning
-/// can be safely ignored.
-/// Template argument T expected to be float or double.
-/// Template argument bucketT must always be float.
-/// </summary>
-template <typename T, typename bucketT>
-class EMBER_API Renderer : public RendererBase
-{
-public:
-	Renderer();
-	Renderer(const Renderer<T, bucketT>& renderer) = delete;
-	Renderer<T, bucketT>& operator = (const Renderer<T, bucketT>& renderer) = delete;
-	virtual ~Renderer() = default;
-
-	//Non-virtual processing functions.
-	void AddEmber(Ember<T>& ember);
-	bool AssignIterator();
-
-	//Virtual processing functions overriden from RendererBase.
-	void Prepare() override;
-	void ComputeBounds() override;
-	void ComputeQuality() override;
-	void ComputeCamera() override;
-	void SetEmber(const Ember<T>& ember, eProcessAction action = eProcessAction::FULL_RENDER, bool prep = false) override;
-	template <typename C>
-	void SetEmber(const C& embers);
-	void MoveEmbers(vector<Ember<T>>& embers);
-	void SetExternalEmbersPointer(vector<Ember<T>>* embers);
-	bool CreateDEFilter(bool& newAlloc) override;
-	bool CreateSpatialFilter(bool& newAlloc) override;
-	bool CreateTemporalFilter(bool& newAlloc) override;
-	size_t HistBucketSize() const override { return sizeof(tvec4<bucketT, glm::defaultp>); }
-	eRenderStatus Run(vector<v4F>& finalImage, double time = 0, size_t subBatchCountOverride = 0, bool forceOutput = false, size_t finalOffset = 0) override;
-	EmberImageComments ImageComments(const EmberStats& stats, size_t printEditDepth = 0, bool hexPalette = true) override;
-
-protected:
-	//New virtual functions to be overridden in derived renderers that use the GPU, but not accessed outside.
-	virtual void MakeDmap(T colorScalar);
-	virtual bool Alloc(bool histOnly = false);
-	virtual bool ResetBuckets(bool resetHist = true, bool resetAccum = true);
-	virtual eRenderStatus LogScaleDensityFilter(bool forceOutput = false);
-	virtual eRenderStatus GaussianDensityFilter();
-	virtual eRenderStatus AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset);
-	virtual EmberStats Iterate(size_t iterCount, size_t temporalSample);
-	virtual void ComputeCurves();
-
-public:
-	//Non-virtual render properties, getters and setters.
-	inline T PixelAspectRatio() const;
-	void PixelAspectRatio(T pixelAspectRatio);
-
-	//Non-virtual renderer properties, getters only.
-	inline T                              Scale()               const;
-	inline T                              PixelsPerUnitX()      const;
-	inline T                              PixelsPerUnitY()      const;
-	inline bucketT                        K1()                  const;
-	inline bucketT                        K2()                  const;
-	inline const CarToRas<T>&             CoordMap()            const;
-	inline tvec4<bucketT, glm::defaultp>* HistBuckets();
-	inline tvec4<bucketT, glm::defaultp>* AccumulatorBuckets();
-	inline SpatialFilter<bucketT>*        GetSpatialFilter();
-	inline TemporalFilter<T>*             GetTemporalFilter();
-
-	//Virtual renderer properties overridden from RendererBase, getters only.
-	double ScaledQuality()				   const override;
-	double LowerLeftX(bool  gutter = true) const override;
-	double LowerLeftY(bool  gutter = true) const override;
-	double UpperRightX(bool gutter = true) const override;
-	double UpperRightY(bool gutter = true) const override;
-	DensityFilterBase* GetDensityFilter()        override;
-
-	//Non-virtual ember wrappers, getters only.
-	inline bool                  XaosPresent()		   const;
-	inline size_t			     Supersample()         const;
-	inline size_t			     PaletteIndex()        const;
-	inline T                     Time()                const;
-	inline T                     Quality()             const;
-	inline T                     SpatialFilterRadius() const;
-	inline T                     PixelsPerUnit()       const;
-	inline T                     Zoom()                const;
-	inline T                     CenterX()             const;
-	inline T                     CenterY()             const;
-	inline T                     Rotate()              const;
-	inline bucketT               Brightness()          const;
-	inline bucketT               Gamma()               const;
-	inline bucketT               Vibrancy()            const;
-	inline bucketT               GammaThresh()         const;
-	inline bucketT               HighlightPower()      const;
-	inline Color<T>			     Background()          const;
-	inline const Xform<T>*       Xforms()              const;
-	inline Xform<T>*             NonConstXforms();
-	inline size_t			     XformCount()          const;
-	inline const Xform<T>*       FinalXform()          const;
-	inline Xform<T>*             NonConstFinalXform();
-	inline bool                  UseFinalXform()       const;
-	inline const Palette<float>* GetPalette()          const;
-	inline ePaletteMode          PaletteMode()         const;
-
-	//Virtual ember wrappers overridden from RendererBase, getters only.
-	size_t TemporalSamples() const override;
-	size_t FinalRasW()       const override;
-	size_t FinalRasH()       const override;
-	size_t SubBatchSize()    const override;
-	size_t FuseCount()		 const override;
-
-	//Non-virtual iterator wrappers.
-	const byte* XformDistributions()		const;
-	size_t 		XformDistributionsSize()    const;
-	Point<T>*	Samples(size_t threadIndex) const;
-
-protected:
-	//Non-virtual functions that might be needed by a derived class.
-	void PrepFinalAccumVals(Color<bucketT>& background, bucketT& g, bucketT& linRange, bucketT& vibrancy);
-
-private:
-	//Miscellaneous non-virtual functions used only in this class.
-	void Accumulate(QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand, Point<T>* samples, size_t sampleCount, const Palette<bucketT>* palette);
-	void AddToAccum(const tvec4<bucketT, glm::defaultp>& bucket, intmax_t i, intmax_t ii, intmax_t j, intmax_t jj);
-	template <typename accumT> void GammaCorrection(tvec4<bucketT, glm::defaultp>& bucket, Color<bucketT>& background, bucketT g, bucketT linRange, bucketT vibrancy, bool scale, accumT* correctedChannels);
-	void CurveAdjust(bucketT& a, const glm::length_t& index);
-	void VectorizedLogScale(size_t row, size_t rowEnd);
-
-protected:
-//public:
-	T m_Scale;
-	T m_PixelsPerUnitX;
-	T m_PixelsPerUnitY;
-	T m_PixelAspectRatio = 1;
-	T m_LowerLeftX;
-	T m_LowerLeftY;
-	T m_UpperRightX;
-	T m_UpperRightY;
-	bucketT m_K1;
-	bucketT m_K2;
-	bucketT m_Vibrancy;//Accumulate these after each temporal sample.
-	bucketT m_Gamma;
-	T m_ScaledQuality;
-	Color<bucketT> m_Background;//This is a scaled copy of the m_Background member of m_Ember, but with a type of bucketT.
-	Affine2D<T> m_RotMat;
-	Ember<T> m_Ember;
-	Ember<T> m_TempEmber;
-	Ember<T> m_LastEmber;
-private:
-	vector<Ember<T>> m_Embers;
-
-protected:
-	vector<Ember<T>>* m_EmbersP = &m_Embers;
-	vector<Ember<T>> m_ThreadEmbers;
-	Interpolater<T> m_Interpolater;
-	CarToRas<T> m_CarToRas;
-	unique_ptr<StandardIterator<T>> m_StandardIterator = make_unique<StandardIterator<T>>();
-	unique_ptr<XaosIterator<T>> m_XaosIterator = make_unique<XaosIterator<T>>();
-	Iterator<T>* m_Iterator = m_StandardIterator.get();
-	Palette<bucketT> m_Dmap;
-	vector<tvec4<bucketT, glm::defaultp>> m_Csa;
-	vector<tvec4<bucketT, glm::defaultp>> m_HistBuckets;
-	vector<tvec4<bucketT, glm::defaultp>> m_AccumulatorBuckets;
-	unique_ptr<SpatialFilter<bucketT>> m_SpatialFilter;
-	unique_ptr<TemporalFilter<T>> m_TemporalFilter;
-	unique_ptr<DensityFilter<bucketT>> m_DensityFilter;
-	vector<vector<Point<T>>> m_Samples;
-	EmberToXml<T> m_EmberToXml;
-};
-
-//This class had to be implemented in a cpp file because the compiler was breaking.
-//So the explicit instantiation must be declared here rather than in Ember.cpp where
-//all of the other classes are done.
-}
+#pragma once
+
+#include "RendererBase.h"
+#include "Iterator.h"
+#include "SpatialFilter.h"
+#include "TemporalFilter.h"
+#include "Interpolate.h"
+#include "CarToRas.h"
+#include "EmberToXml.h"
+#include "Spline.h"
+
+/// <summary>
+/// Renderer.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// Renderer is the main class where all of the execution takes place.
+/// It is intended that the program have one instance of it that it
+/// keeps around for its duration. After a user sets up an ember, it's passed
+/// in to be rendered.
+/// This class derives from EmberReport, so the caller is able
+/// to retrieve a text dump of error information if any errors occur.
+/// The final image output vector is also passed in because the calling code has more
+/// use for it than this class does.
+/// Several functions are made virtual and have a default CPU-based implementation
+/// that roughly matches what flam3 did. However they can be overridden in derived classes
+/// to provide alternative rendering implementations, such as using the GPU.
+/// Since this is a templated class, it's supposed to be entirely implemented in this .h file.
+/// However, VC++ 2010 has very crippled support for lambdas, which Renderer makes use of.
+/// If too many lambdas are used in a .h file, it will crash the compiler when another library
+/// tries to link to it. To work around the bug, only declarations are here and all implementations
+/// are in the .cpp file. It's unclear at what point it starts/stops working. But it seems that once
+/// enough code is placed in the .h file, the compiler crashes. So for the sake of consistency, everything
+/// is moved to the .cpp, even simple getters. One drawback however, is that the class must be
+/// explicitly exported at the bottom of the file.
+/// Also, despite explicitly doing this, the compiler throws a C4661 warning
+/// for every single function in this class, saying it can't find the implementation. This warning
+/// can be safely ignored.
+/// Template argument T expected to be float or double.
+/// Template argument bucketT must always be float.
+/// </summary>
+template <typename T, typename bucketT>
+class EMBER_API Renderer : public RendererBase
+{
+public:
+	Renderer();
+	Renderer(const Renderer<T, bucketT>& renderer) = delete;
+	Renderer<T, bucketT>& operator = (const Renderer<T, bucketT>& renderer) = delete;
+	virtual ~Renderer() = default;
+
+	//Non-virtual processing functions.
+	void AddEmber(Ember<T>& ember);
+	bool AssignIterator();
+
+	//Virtual processing functions overriden from RendererBase.
+	void Prepare() override;
+	void ComputeBounds() override;
+	void ComputeQuality() override;
+	void ComputeCamera() override;
+	void SetEmber(const Ember<T>& ember, eProcessAction action = eProcessAction::FULL_RENDER, bool prep = false) override;
+	template <typename C>
+	void SetEmber(const C& embers);
+	void MoveEmbers(vector<Ember<T>>& embers);
+	void SetExternalEmbersPointer(vector<Ember<T>>* embers);
+	bool CreateDEFilter(bool& newAlloc) override;
+	bool CreateSpatialFilter(bool& newAlloc) override;
+	bool CreateTemporalFilter(bool& newAlloc) override;
+	size_t HistBucketSize() const override { return sizeof(tvec4<bucketT, glm::defaultp>); }
+	eRenderStatus Run(vector<v4F>& finalImage, double time = 0, size_t subBatchCountOverride = 0, bool forceOutput = false, size_t finalOffset = 0) override;
+	EmberImageComments ImageComments(const EmberStats& stats, size_t printEditDepth = 0, bool hexPalette = true) override;
+
+protected:
+	//New virtual functions to be overridden in derived renderers that use the GPU, but not accessed outside.
+	virtual void MakeDmap(T colorScalar);
+	virtual bool Alloc(bool histOnly = false);
+	virtual bool ResetBuckets(bool resetHist = true, bool resetAccum = true);
+	virtual eRenderStatus LogScaleDensityFilter(bool forceOutput = false);
+	virtual eRenderStatus GaussianDensityFilter();
+	virtual eRenderStatus AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset);
+	virtual EmberStats Iterate(size_t iterCount, size_t temporalSample);
+	virtual void ComputeCurves();
+
+public:
+	//Non-virtual render properties, getters and setters.
+	inline T PixelAspectRatio() const;
+	void PixelAspectRatio(T pixelAspectRatio);
+
+	//Non-virtual renderer properties, getters only.
+	inline T                              Scale()               const;
+	inline T                              PixelsPerUnitX()      const;
+	inline T                              PixelsPerUnitY()      const;
+	inline bucketT                        K1()                  const;
+	inline bucketT                        K2()                  const;
+	inline const CarToRas<T>&             CoordMap()            const;
+	inline tvec4<bucketT, glm::defaultp>* HistBuckets();
+	inline tvec4<bucketT, glm::defaultp>* AccumulatorBuckets();
+	inline SpatialFilter<bucketT>*        GetSpatialFilter();
+	inline TemporalFilter<T>*             GetTemporalFilter();
+
+	//Virtual renderer properties overridden from RendererBase, getters only.
+	double ScaledQuality()				   const override;
+	double LowerLeftX(bool  gutter = true) const override;
+	double LowerLeftY(bool  gutter = true) const override;
+	double UpperRightX(bool gutter = true) const override;
+	double UpperRightY(bool gutter = true) const override;
+	DensityFilterBase* GetDensityFilter()        override;
+
+	//Non-virtual ember wrappers, getters only.
+	inline bool                  XaosPresent()		   const;
+	inline size_t			     Supersample()         const;
+	inline size_t			     PaletteIndex()        const;
+	inline T                     Time()                const;
+	inline T                     Quality()             const;
+	inline T                     SpatialFilterRadius() const;
+	inline T                     PixelsPerUnit()       const;
+	inline T                     Zoom()                const;
+	inline T                     CenterX()             const;
+	inline T                     CenterY()             const;
+	inline T                     Rotate()              const;
+	inline bucketT               Brightness()          const;
+	inline bucketT               Gamma()               const;
+	inline bucketT               Vibrancy()            const;
+	inline bucketT               GammaThresh()         const;
+	inline bucketT               HighlightPower()      const;
+	inline Color<T>			     Background()          const;
+	inline const Xform<T>*       Xforms()              const;
+	inline Xform<T>*             NonConstXforms();
+	inline size_t			     XformCount()          const;
+	inline const Xform<T>*       FinalXform()          const;
+	inline Xform<T>*             NonConstFinalXform();
+	inline bool                  UseFinalXform()       const;
+	inline const Palette<float>* GetPalette()          const;
+	inline ePaletteMode          PaletteMode()         const;
+
+	//Virtual ember wrappers overridden from RendererBase, getters only.
+	size_t TemporalSamples() const override;
+	size_t FinalRasW()       const override;
+	size_t FinalRasH()       const override;
+	size_t SubBatchSize()    const override;
+	size_t FuseCount()		 const override;
+
+	//Non-virtual iterator wrappers.
+	const byte* XformDistributions()		const;
+	size_t 		XformDistributionsSize()    const;
+	Point<T>*	Samples(size_t threadIndex) const;
+
+protected:
+	//Non-virtual functions that might be needed by a derived class.
+	void PrepFinalAccumVals(Color<bucketT>& background, bucketT& g, bucketT& linRange, bucketT& vibrancy);
+
+private:
+	//Miscellaneous non-virtual functions used only in this class.
+	void Accumulate(QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand, Point<T>* samples, size_t sampleCount, const Palette<bucketT>* palette);
+	void AddToAccum(const tvec4<bucketT, glm::defaultp>& bucket, intmax_t i, intmax_t ii, intmax_t j, intmax_t jj);
+	template <typename accumT> void GammaCorrection(tvec4<bucketT, glm::defaultp>& bucket, Color<bucketT>& background, bucketT g, bucketT linRange, bucketT vibrancy, bool scale, accumT* correctedChannels);
+	void CurveAdjust(bucketT& a, const glm::length_t& index);
+	void VectorizedLogScale(size_t row, size_t rowEnd);
+
+protected:
+//public:
+	T m_Scale;
+	T m_PixelsPerUnitX;
+	T m_PixelsPerUnitY;
+	T m_PixelAspectRatio = 1;
+	T m_LowerLeftX;
+	T m_LowerLeftY;
+	T m_UpperRightX;
+	T m_UpperRightY;
+	bucketT m_K1;
+	bucketT m_K2;
+	bucketT m_Vibrancy;//Accumulate these after each temporal sample.
+	bucketT m_Gamma;
+	T m_ScaledQuality;
+	Color<bucketT> m_Background;//This is a scaled copy of the m_Background member of m_Ember, but with a type of bucketT.
+	Affine2D<T> m_RotMat;
+	Ember<T> m_Ember;
+	Ember<T> m_TempEmber;
+	Ember<T> m_LastEmber;
+private:
+	vector<Ember<T>> m_Embers;
+
+protected:
+	vector<Ember<T>>* m_EmbersP = &m_Embers;
+	vector<Ember<T>> m_ThreadEmbers;
+	Interpolater<T> m_Interpolater;
+	CarToRas<T> m_CarToRas;
+	unique_ptr<StandardIterator<T>> m_StandardIterator = make_unique<StandardIterator<T>>();
+	unique_ptr<XaosIterator<T>> m_XaosIterator = make_unique<XaosIterator<T>>();
+	Iterator<T>* m_Iterator = m_StandardIterator.get();
+	Palette<bucketT> m_Dmap;
+	vector<tvec4<bucketT, glm::defaultp>> m_Csa;
+	vector<tvec4<bucketT, glm::defaultp>> m_HistBuckets;
+	vector<tvec4<bucketT, glm::defaultp>> m_AccumulatorBuckets;
+	unique_ptr<SpatialFilter<bucketT>> m_SpatialFilter;
+	unique_ptr<TemporalFilter<T>> m_TemporalFilter;
+	unique_ptr<DensityFilter<bucketT>> m_DensityFilter;
+	vector<vector<Point<T>>> m_Samples;
+	EmberToXml<T> m_EmberToXml;
+};
+
+//This class had to be implemented in a cpp file because the compiler was breaking.
+//So the explicit instantiation must be declared here rather than in Ember.cpp where
+//all of the other classes are done.
+}
@@ -1,237 +1,237 @@
-#pragma once
-
-#include "Utils.h"
-#include "Ember.h"
-#include "DensityFilter.h"
-
-/// <summary>
-/// RendererBase, RenderCallback and EmberStats classes.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// Function pointers present a major restriction when dealing
-/// with member functions, and that is they can only point to
-/// static ones. So instead of a straight function pointer, use
-/// a callback class with a single virtual callback
-/// member function.
-/// Template argument expected to be float or double.
-/// </summary>
-class EMBER_API RenderCallback
-{
-public:
-	RenderCallback() = default;
-	RenderCallback(RenderCallback& callback) = delete;
-
-	/// <summary>
-	/// Virtual destructor to ensure anything declared in derived classes gets cleaned up.
-	/// </summary>
-	virtual ~RenderCallback() = default;
-
-	/// <summary>
-	/// Empty progress function to be implemented in derived classes to take action on progress updates.
-	/// </summary>
-	/// <param name="ember">The ember currently being rendered</param>
-	/// <param name="foo">An extra dummy parameter</param>
-	/// <param name="fraction">The progress fraction from 0-100</param>
-	/// <param name="stage">The stage of iteration. 1 is iterating, 2 is density filtering, 2 is final accumulation.</param>
-	/// <param name="etaMs">The estimated milliseconds to completion of the current stage</param>
-	/// <returns>Override should return 0 if an abort is requested, else 1 to continue rendering</returns>
-	virtual int ProgressFunc(Ember<float>& ember, void* foo, double fraction, int stage, double etaMs) { return 0; }
-	virtual int ProgressFunc(Ember<double>& ember, void* foo, double fraction, int stage, double etaMs) { return 0; }
-};
-
-/// <summary>
-/// Render statistics for the number of iterations ran,
-/// number of bad values calculated during iteration, and
-/// the total time for the entire render from the start of
-/// iteration to the end of final accumulation.
-/// </summary>
-class EMBER_API EmberStats
-{
-public:
-	/// <summary>
-	/// Constructor which sets all values to 0.
-	/// </summary>
-	EmberStats()
-	{
-		Clear();
-	}
-
-	void Clear()
-	{
-		m_Success = true;
-		m_Iters = 0;
-		m_Badvals = 0;
-		m_IterMs = 0;
-		m_RenderMs = 0;
-	}
-
-	EmberStats& operator += (const EmberStats& stats)
-	{
-		m_Success &= stats.m_Success;
-		m_Iters += stats.m_Iters;
-		m_Badvals += stats.m_Badvals;
-		m_IterMs += stats.m_IterMs;
-		m_RenderMs += stats.m_RenderMs;
-		return *this;
-	}
-
-	bool m_Success = true;
-	size_t m_Iters, m_Badvals;
-	double m_IterMs, m_RenderMs;
-};
-
-/// <summary>
-/// The types of available renderers.
-/// Add more in the future as different rendering methods are experimented with.
-/// Possible values might be: CPU+OpenGL, Particle, Inverse.
-/// </summary>
-enum class eRendererType : et { CPU_RENDERER, OPENCL_RENDERER };
-
-/// <summary>
-/// A base class with virtual functions to allow both templating and polymorphism to work together.
-/// Derived classes will implement all of these functions.
-/// Note that functions which return a decimal number use the most precise type, double.
-/// </summary>
-class EMBER_API RendererBase : public EmberReport
-{
-public:
-	RendererBase();
-	RendererBase(const RendererBase& renderer) = delete;
-	RendererBase& operator = (const RendererBase& renderer) = delete;
-	virtual ~RendererBase() = default;
-
-	//Non-virtual processing functions.
-	void ChangeVal(std::function<void(void)> func, eProcessAction action);
-	size_t HistMemoryRequired(size_t strips);
-	pair<size_t, size_t> MemoryRequired(size_t strips, bool includeFinal, bool threadedWrite);
-	vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>> RandVec();
-	bool PrepFinalAccumVector(vector<v4F>& pixels);
-
-	//Virtual processing functions.
-	virtual bool Ok() const;
-	virtual size_t MemoryAvailable();
-	virtual void SetEmber(const Ember<float>& ember, eProcessAction action = eProcessAction::FULL_RENDER, bool prep = false) { }
-	virtual void SetEmber(const Ember<double>& ember, eProcessAction action = eProcessAction::FULL_RENDER, bool prep = false) { }
-	virtual bool RandVec(vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>>& randVec);
-
-	//Abstract processing functions.
-	virtual bool CreateDEFilter(bool& newAlloc) = 0;
-	virtual bool CreateSpatialFilter(bool& newAlloc) = 0;
-	virtual bool CreateTemporalFilter(bool& newAlloc) = 0;
-	virtual void Prepare() = 0;
-	virtual void ComputeBounds() = 0;
-	virtual void ComputeQuality() = 0;
-	virtual void ComputeCamera() = 0;
-	virtual eRenderStatus Run(vector<v4F>& finalImage, double time = 0, size_t subBatchCountOverride = 0, bool forceOutput = false, size_t finalOffset = 0) = 0;
-	virtual EmberImageComments ImageComments(const EmberStats& stats, size_t printEditDepth = 0, bool hexPalette = true) = 0;
-	virtual DensityFilterBase* GetDensityFilter() = 0;
-
-	//Non-virtual renderer properties, getters only.
-	size_t		   SuperRasW()					 const;
-	size_t		   SuperRasH()					 const;
-	size_t		   SuperSize()					 const;
-	size_t		   FinalRowSize()				 const;
-	size_t		   FinalDimensions()			 const;
-	size_t		   FinalBufferSize()			 const;
-	size_t		   PixelSize()					 const;
-	size_t		   GutterWidth()				 const;
-	size_t		   DensityFilterOffset()		 const;
-	size_t		   TotalIterCount(size_t strips) const;
-	size_t		   ItersPerTemporalSample()		 const;
-	eProcessState  ProcessState()				 const;
-	eProcessAction ProcessAction()				 const;
-	EmberStats     Stats() 						 const;
-
-	//Non-virtual render getters and setters.
-	bool LockAccum() const;
-	void LockAccum(bool lockAccum);
-	bool EarlyClip() const;
-	void EarlyClip(bool earlyClip);
-	bool YAxisUp() const;
-	void YAxisUp(bool yup);
-	bool InsertPalette() const;
-	void InsertPalette(bool insertPalette);
-	bool ReclaimOnResize() const;
-	void ReclaimOnResize(bool reclaimOnResize);
-	void Callback(RenderCallback* callback);
-	void ThreadCount(size_t threads, const char* seedString = nullptr);
-	size_t BytesPerChannel() const;
-	size_t NumChannels() const;
-	eThreadPriority Priority() const;
-	void Priority(eThreadPriority priority);
-	eInteractiveFilter InteractiveFilter() const;
-	void InteractiveFilter(eInteractiveFilter filter);
-
-	//Virtual render properties, getters and setters.
-	virtual size_t ThreadCount()   const;
-	virtual eRendererType RendererType() const;
-	virtual bool Shared() const;
-
-	//Abstract render properties, getters only.
-	virtual size_t TemporalSamples()			   const = 0;
-	virtual size_t HistBucketSize()				   const = 0;
-	virtual size_t FinalRasW()		               const = 0;
-	virtual size_t FinalRasH()					   const = 0;
-	virtual size_t SubBatchSize()				   const = 0;
-	virtual size_t FuseCount()					   const = 0;
-	virtual double ScaledQuality()                 const = 0;
-	virtual double LowerLeftX(bool  gutter = true) const = 0;
-	virtual double LowerLeftY(bool  gutter = true) const = 0;
-	virtual double UpperRightX(bool gutter = true) const = 0;
-	virtual double UpperRightY(bool gutter = true) const = 0;
-
-	//Non-virtual threading control.
-	void Reset();
-	void EnterRender();
-	void LeaveRender();
-	void EnterFinalAccum();
-	void LeaveFinalAccum();
-	void EnterResize();
-	void LeaveResize();
-	void Abort();
-	bool Aborted();
-	void Pause(bool pause);
-	bool Paused();
-	bool InRender();
-	bool InFinalAccum();
-
-	void* m_ProgressParameter = nullptr;
-protected:
-	bool m_EarlyClip = false;
-	bool m_YAxisUp = false;
-	bool m_LockAccum = false;
-	bool m_InRender = false;
-	bool m_InFinalAccum = false;
-	bool m_InsertPalette = false;
-	bool m_ReclaimOnResize = false;
-	bool m_CurvesSet = false;
-	volatile bool m_Abort = false;
-	volatile bool m_Pause = false;
-	size_t m_SuperRasW;
-	size_t m_SuperRasH;
-	size_t m_SuperSize = 0;
-	size_t m_GutterWidth;
-	size_t m_DensityFilterOffset;
-	size_t m_NumChannels = 4;
-	size_t m_BytesPerChannel = 4;
-	size_t m_ThreadsToUse;
-	size_t m_VibGamCount;
-	size_t m_LastTemporalSample = 0;
-	size_t m_LastIter = 0;
-	double m_LastIterPercent = 0;
-	eThreadPriority m_Priority = eThreadPriority::NORMAL;
-	eProcessAction m_ProcessAction = eProcessAction::FULL_RENDER;
-	eProcessState m_ProcessState = eProcessState::NONE;
-	eInteractiveFilter m_InteractiveFilter = eInteractiveFilter::FILTER_LOG;
-	EmberStats m_Stats;
-	RenderCallback* m_Callback = nullptr;
-	vector<size_t> m_SubBatch;
-	vector<size_t> m_BadVals;
-	vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>> m_Rand;
-	std::recursive_mutex m_RenderingCs, m_AccumCs, m_FinalAccumCs, m_ResizeCs;
-	Timing m_RenderTimer, m_IterTimer, m_ProgressTimer;
-};
-}
+#pragma once
+
+#include "Utils.h"
+#include "Ember.h"
+#include "DensityFilter.h"
+
+/// <summary>
+/// RendererBase, RenderCallback and EmberStats classes.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// Function pointers present a major restriction when dealing
+/// with member functions, and that is they can only point to
+/// static ones. So instead of a straight function pointer, use
+/// a callback class with a single virtual callback
+/// member function.
+/// Template argument expected to be float or double.
+/// </summary>
+class EMBER_API RenderCallback
+{
+public:
+	RenderCallback() = default;
+	RenderCallback(RenderCallback& callback) = delete;
+
+	/// <summary>
+	/// Virtual destructor to ensure anything declared in derived classes gets cleaned up.
+	/// </summary>
+	virtual ~RenderCallback() = default;
+
+	/// <summary>
+	/// Empty progress function to be implemented in derived classes to take action on progress updates.
+	/// </summary>
+	/// <param name="ember">The ember currently being rendered</param>
+	/// <param name="foo">An extra dummy parameter</param>
+	/// <param name="fraction">The progress fraction from 0-100</param>
+	/// <param name="stage">The stage of iteration. 1 is iterating, 2 is density filtering, 2 is final accumulation.</param>
+	/// <param name="etaMs">The estimated milliseconds to completion of the current stage</param>
+	/// <returns>Override should return 0 if an abort is requested, else 1 to continue rendering</returns>
+	virtual int ProgressFunc(Ember<float>& ember, void* foo, double fraction, int stage, double etaMs) { return 0; }
+	virtual int ProgressFunc(Ember<double>& ember, void* foo, double fraction, int stage, double etaMs) { return 0; }
+};
+
+/// <summary>
+/// Render statistics for the number of iterations ran,
+/// number of bad values calculated during iteration, and
+/// the total time for the entire render from the start of
+/// iteration to the end of final accumulation.
+/// </summary>
+class EMBER_API EmberStats
+{
+public:
+	/// <summary>
+	/// Constructor which sets all values to 0.
+	/// </summary>
+	EmberStats() noexcept
+	{
+		Clear();
+	}
+
+	void Clear() noexcept
+	{
+		m_Success = true;
+		m_Iters = 0;
+		m_Badvals = 0;
+		m_IterMs = 0;
+		m_RenderMs = 0;
+	}
+
+	EmberStats& operator += (const EmberStats& stats) noexcept
+	{
+		m_Success &= stats.m_Success;
+		m_Iters += stats.m_Iters;
+		m_Badvals += stats.m_Badvals;
+		m_IterMs += stats.m_IterMs;
+		m_RenderMs += stats.m_RenderMs;
+		return *this;
+	}
+
+	bool m_Success = true;
+	size_t m_Iters, m_Badvals;
+	double m_IterMs, m_RenderMs;
+};
+
+/// <summary>
+/// The types of available renderers.
+/// Add more in the future as different rendering methods are experimented with.
+/// Possible values might be: CPU+OpenGL, Particle, Inverse.
+/// </summary>
+enum class eRendererType : et { CPU_RENDERER, OPENCL_RENDERER };
+
+/// <summary>
+/// A base class with virtual functions to allow both templating and polymorphism to work together.
+/// Derived classes will implement all of these functions.
+/// Note that functions which return a decimal number use the most precise type, double.
+/// </summary>
+class EMBER_API RendererBase : public EmberReport
+{
+public:
+	RendererBase();
+	RendererBase(const RendererBase& renderer) = delete;
+	RendererBase& operator = (const RendererBase& renderer) = delete;
+	virtual ~RendererBase() = default;
+
+	//Non-virtual processing functions.
+	void ChangeVal(std::function<void(void)> func, eProcessAction action);
+	size_t HistMemoryRequired(size_t strips);
+	pair<size_t, size_t> MemoryRequired(size_t strips, bool includeFinal, bool threadedWrite);
+	vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>> RandVec();
+	bool PrepFinalAccumVector(vector<v4F>& pixels);
+
+	//Virtual processing functions.
+	virtual bool Ok() const;
+	virtual size_t MemoryAvailable();
+	virtual void SetEmber(const Ember<float>& ember, eProcessAction action = eProcessAction::FULL_RENDER, bool prep = false) { }
+	virtual void SetEmber(const Ember<double>& ember, eProcessAction action = eProcessAction::FULL_RENDER, bool prep = false) { }
+	virtual bool RandVec(vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>>& randVec);
+
+	//Abstract processing functions.
+	virtual bool CreateDEFilter(bool& newAlloc) = 0;
+	virtual bool CreateSpatialFilter(bool& newAlloc) = 0;
+	virtual bool CreateTemporalFilter(bool& newAlloc) = 0;
+	virtual void Prepare() = 0;
+	virtual void ComputeBounds() = 0;
+	virtual void ComputeQuality() = 0;
+	virtual void ComputeCamera() = 0;
+	virtual eRenderStatus Run(vector<v4F>& finalImage, double time = 0, size_t subBatchCountOverride = 0, bool forceOutput = false, size_t finalOffset = 0) = 0;
+	virtual EmberImageComments ImageComments(const EmberStats& stats, size_t printEditDepth = 0, bool hexPalette = true) = 0;
+	virtual DensityFilterBase* GetDensityFilter() = 0;
+
+	//Non-virtual renderer properties, getters only.
+	size_t		   SuperRasW()					 const;
+	size_t		   SuperRasH()					 const;
+	size_t		   SuperSize()					 const;
+	size_t		   FinalRowSize()				 const;
+	size_t		   FinalDimensions()			 const;
+	size_t		   FinalBufferSize()			 const;
+	size_t		   PixelSize()					 const;
+	size_t		   GutterWidth()				 const;
+	size_t		   DensityFilterOffset()		 const;
+	size_t		   TotalIterCount(size_t strips) const;
+	size_t		   ItersPerTemporalSample()		 const;
+	eProcessState  ProcessState()				 const;
+	eProcessAction ProcessAction()				 const;
+	EmberStats     Stats() 						 const;
+
+	//Non-virtual render getters and setters.
+	bool LockAccum() const;
+	void LockAccum(bool lockAccum);
+	bool EarlyClip() const;
+	void EarlyClip(bool earlyClip);
+	bool YAxisUp() const;
+	void YAxisUp(bool yup);
+	bool InsertPalette() const;
+	void InsertPalette(bool insertPalette);
+	bool ReclaimOnResize() const;
+	void ReclaimOnResize(bool reclaimOnResize);
+	void Callback(RenderCallback* callback);
+	void ThreadCount(size_t threads, const char* seedString = nullptr);
+	size_t BytesPerChannel() const;
+	size_t NumChannels() const;
+	eThreadPriority Priority() const;
+	void Priority(eThreadPriority priority);
+	eInteractiveFilter InteractiveFilter() const;
+	void InteractiveFilter(eInteractiveFilter filter);
+
+	//Virtual render properties, getters and setters.
+	virtual size_t ThreadCount()   const;
+	virtual eRendererType RendererType() const;
+	virtual bool Shared() const;
+
+	//Abstract render properties, getters only.
+	virtual size_t TemporalSamples()			   const = 0;
+	virtual size_t HistBucketSize()				   const = 0;
+	virtual size_t FinalRasW()		               const = 0;
+	virtual size_t FinalRasH()					   const = 0;
+	virtual size_t SubBatchSize()				   const = 0;
+	virtual size_t FuseCount()					   const = 0;
+	virtual double ScaledQuality()                 const = 0;
+	virtual double LowerLeftX(bool  gutter = true) const = 0;
+	virtual double LowerLeftY(bool  gutter = true) const = 0;
+	virtual double UpperRightX(bool gutter = true) const = 0;
+	virtual double UpperRightY(bool gutter = true) const = 0;
+
+	//Non-virtual threading control.
+	void Reset();
+	void EnterRender();
+	void LeaveRender();
+	void EnterFinalAccum();
+	void LeaveFinalAccum();
+	void EnterResize();
+	void LeaveResize();
+	void Abort();
+	bool Aborted();
+	void Pause(bool pause);
+	bool Paused();
+	bool InRender();
+	bool InFinalAccum();
+
+	void* m_ProgressParameter = nullptr;
+protected:
+	bool m_EarlyClip = false;
+	bool m_YAxisUp = false;
+	bool m_LockAccum = false;
+	bool m_InRender = false;
+	bool m_InFinalAccum = false;
+	bool m_InsertPalette = false;
+	bool m_ReclaimOnResize = false;
+	bool m_CurvesSet = false;
+	volatile bool m_Abort = false;
+	volatile bool m_Pause = false;
+	size_t m_SuperRasW;
+	size_t m_SuperRasH;
+	size_t m_SuperSize = 0;
+	size_t m_GutterWidth;
+	size_t m_DensityFilterOffset;
+	size_t m_NumChannels = 4;
+	size_t m_BytesPerChannel = 4;
+	size_t m_ThreadsToUse;
+	size_t m_VibGamCount;
+	size_t m_LastTemporalSample = 0;
+	size_t m_LastIter = 0;
+	double m_LastIterPercent = 0;
+	eThreadPriority m_Priority = eThreadPriority::NORMAL;
+	eProcessAction m_ProcessAction = eProcessAction::FULL_RENDER;
+	eProcessState m_ProcessState = eProcessState::NONE;
+	eInteractiveFilter m_InteractiveFilter = eInteractiveFilter::FILTER_LOG;
+	EmberStats m_Stats;
+	RenderCallback* m_Callback = nullptr;
+	vector<size_t> m_SubBatch;
+	vector<size_t> m_BadVals;
+	vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>> m_Rand;
+	std::recursive_mutex m_RenderingCs, m_AccumCs, m_FinalAccumCs, m_ResizeCs;
+	Timing m_RenderTimer, m_IterTimer, m_ProgressTimer;
+};
+}
@@ -1,127 +1,127 @@
-// This is a combination of this:
-// https://stackoverflow.com/questions/25379422/b-spline-curves/25379851#25379851
-// and this, but modified to operate on a spline with any number of points intead of just >= 4:
-//
-// Spline.cc
-// CubicSplineLib/
-//
-// Source file for the "CubicSpline" class. This object facilitates natural
-// cubic spline interpolation. Once instantiated the
-// constructor builds the spline polynomials on the intervals of the (x, y)
-// data provided and retains them for later invocation. Parallelized using
-// OpenMP.
-//
-// Copyright (C) Geoffrey Lentner 2015. All rights reserved.
-// See LICENCE file. (GPL v2.0)
-//
-// contact: Geoffrey Lentner, B.S.
-//          Graduate Student / Researcher
-//          102 Natural Science Building
-//          Department of Physics & Astronomy
-//          University of Louisville
-//          Louisville, KY 40292 USA
-//
-// email:   geoffrey.lentner@louisville.edu
-//
-// updated: 2015-1-19 13:10:30 EST
-//
-#include "EmberPch.h"
-#include "Spline.h"
-
-namespace EmberNs
-{
-/// <summary>
-/// Constructor that takes a vector of x,y points, optionally sorts them
-/// and builds the spline values.
-/// </summary>
-/// <param name="_vals">The vector of x,y points</param>
-/// <param name="sorted">True to skip sorting, false to sort.</param>
-template<class T>
-Spline<T>::Spline(const std::vector<v2T>& _vals, bool sorted)
-{
-	n = int(_vals.size() - 1);
-	vals = _vals;
-
-	// if not suppressed, ensure 'x' elements are in ascending order
-	if (!sorted)
-		std::sort(vals.begin(), vals.end(), [&](const v2T & lhs, const v2T & rhs) { return lhs.x < rhs.x; });
-	BuildSplines();
-}
-
-/// <summary>
-/// Compute spline values for the passed in points.
-/// This only needs to be done once.
-/// </summary>
-template<class T>
-void Spline<T>::BuildSplines()
-{
-	a.resize(n + 1);
-	b.resize(n + 1);
-	c.resize(n + 1);
-	d.resize(n + 1);
-	std::vector<T> w(n);
-	std::vector<T> h(n);
-	std::vector<T> ftt(n + 1);
-
-	for (int i = 0; i < n; i++)
-	{
-		w[i] = (vals[i + 1].x - vals[i].x);
-		h[i] = (vals[i + 1].y - vals[i].y) / w[i];
-	}
-
-	ftt[0] = 0;
-
-	for (int i = 0; i < n - 1; i++)
-		ftt[i + 1] = 3 * (h[i + 1] - h[i]) / (w[i + 1] + w[i]);
-
-	ftt[n] = 0;
-
-	for (int i = 0; i < n; i++)
-	{
-		a[i] = (ftt[i + 1] - ftt[i]) / (6 * w[i]);
-		b[i] = ftt[i] / 2;
-		c[i] = h[i] - w[i] * (ftt[i + 1] + 2 * ftt[i]) / 6;
-		d[i] = vals[i].y;
-	}
-}
-
-/// <summary>
-/// Wrapper to generate y points on the spline for a vector of passed in points.
-/// </summary>
-/// <param name="newX">The vector of x points to generate spline points for</param>
-/// <returns>The vector of computed spline y points.</returns>
-template<class T>
-std::vector<T> Spline<T>::Interpolate(const std::vector<T>& newX)
-{
-	std::vector<T> output; output.resize(newX.size());
-
-	for (size_t i = 0; i < newX.size(); i++)
-		output[i] = Interpolate(newX[i]);
-
-	return output;
-}
-
-/// <summary>
-/// Compute a y point on the spline for a the passed in value of x.
-/// </summary>
-/// <param name="newX">The x points to compute the spline point for</param>
-/// <returns>The computed spline y points.</returns>
-template<class T>
-T Spline<T>::Interpolate(T newX)
-{
-	ClampRef(newX, vals[0].x, vals[n].x);
-	int j = 0;
-
-	while (j < n && newX > vals[j + 1].x)
-		j++;
-
-	const auto xmxj = newX - vals[j].x;
-	const auto output = a[j] * (xmxj * xmxj * xmxj) +
-						b[j] * (xmxj * xmxj) +
-						c[j] * xmxj +
-						d[j];
-	return output;
-}
-
-template EMBER_API class Spline<float>;
-}
+// This is a combination of this:
+// https://stackoverflow.com/questions/25379422/b-spline-curves/25379851#25379851
+// and this, but modified to operate on a spline with any number of points intead of just >= 4:
+//
+// Spline.cc
+// CubicSplineLib/
+//
+// Source file for the "CubicSpline" class. This object facilitates natural
+// cubic spline interpolation. Once instantiated the
+// constructor builds the spline polynomials on the intervals of the (x, y)
+// data provided and retains them for later invocation. Parallelized using
+// OpenMP.
+//
+// Copyright (C) Geoffrey Lentner 2015. All rights reserved.
+// See LICENCE file. (GPL v2.0)
+//
+// contact: Geoffrey Lentner, B.S.
+//          Graduate Student / Researcher
+//          102 Natural Science Building
+//          Department of Physics & Astronomy
+//          University of Louisville
+//          Louisville, KY 40292 USA
+//
+// email:   geoffrey.lentner@louisville.edu
+//
+// updated: 2015-1-19 13:10:30 EST
+//
+#include "EmberPch.h"
+#include "Spline.h"
+
+namespace EmberNs
+{
+/// <summary>
+/// Constructor that takes a vector of x,y points, optionally sorts them
+/// and builds the spline values.
+/// </summary>
+/// <param name="_vals">The vector of x,y points</param>
+/// <param name="sorted">True to skip sorting, false to sort.</param>
+template<class T>
+Spline<T>::Spline(const std::vector<v2T>& _vals, bool sorted)
+{
+	n = int(_vals.size() - 1);
+	vals = _vals;
+
+	// if not suppressed, ensure 'x' elements are in ascending order
+	if (!sorted)
+		std::sort(vals.begin(), vals.end(), [&](const v2T & lhs, const v2T & rhs) { return lhs.x < rhs.x; });
+	BuildSplines();
+}
+
+/// <summary>
+/// Compute spline values for the passed in points.
+/// This only needs to be done once.
+/// </summary>
+template<class T>
+void Spline<T>::BuildSplines()
+{
+	a.resize(n + 1);
+	b.resize(n + 1);
+	c.resize(n + 1);
+	d.resize(n + 1);
+	std::vector<T> w(n);
+	std::vector<T> h(n);
+	std::vector<T> ftt(n + 1);
+
+	for (int i = 0; i < n; i++)
+	{
+		w[i] = (vals[i + 1].x - vals[i].x);
+		h[i] = (vals[i + 1].y - vals[i].y) / w[i];
+	}
+
+	ftt[0] = 0;
+
+	for (int i = 0; i < n - 1; i++)
+		ftt[i + 1] = 3 * (h[i + 1] - h[i]) / (w[i + 1] + w[i]);
+
+	ftt[n] = 0;
+
+	for (int i = 0; i < n; i++)
+	{
+		a[i] = (ftt[i + 1] - ftt[i]) / (6 * w[i]);
+		b[i] = ftt[i] / 2;
+		c[i] = h[i] - w[i] * (ftt[i + 1] + 2 * ftt[i]) / 6;
+		d[i] = vals[i].y;
+	}
+}
+
+/// <summary>
+/// Wrapper to generate y points on the spline for a vector of passed in points.
+/// </summary>
+/// <param name="newX">The vector of x points to generate spline points for</param>
+/// <returns>The vector of computed spline y points.</returns>
+template<class T>
+std::vector<T> Spline<T>::Interpolate(const std::vector<T>& newX)
+{
+	std::vector<T> output; output.resize(newX.size());
+
+	for (size_t i = 0; i < newX.size(); i++)
+		output[i] = Interpolate(newX[i]);
+
+	return output;
+}
+
+/// <summary>
+/// Compute a y point on the spline for a the passed in value of x.
+/// </summary>
+/// <param name="newX">The x points to compute the spline point for</param>
+/// <returns>The computed spline y points.</returns>
+template<class T>
+T Spline<T>::Interpolate(T newX)
+{
+	ClampRef(newX, vals[0].x, vals[n].x);
+	int j = 0;
+
+	while (j < n && newX > vals[j + 1].x)
+		j++;
+
+	const auto xmxj = newX - vals[j].x;
+	const auto output = a[j] * (xmxj * xmxj * xmxj) +
+						b[j] * (xmxj * xmxj) +
+						c[j] * xmxj +
+						d[j];
+	return output;
+}
+
+template EMBER_API class Spline<float>;
+}
@@ -1,55 +1,55 @@
-// This is a combination of this:
-// https://stackoverflow.com/questions/25379422/b-spline-curves/25379851#25379851
-// and this, but modified to operate on a spline with any number of points intead of just >= 4:
-//
-// Spline.h
-// CubicSplineLib/
-//
-// Header file for the "CubicSpline" class. This object facilitates natural
-// cubic spline interpolation. Once instantiated the
-// constructor builds the spline polynomials on the intervals of the (x, y)
-// data provided and retains them for later invocation. Parallelized using
-// OpenMP.
-//
-// Copyright (C) Geoffrey Lentner 2015. All rights reserved.
-// See LICENCE file. (GPL v2.0)
-//
-// contact: Geoffrey Lentner, B.S.
-//          Graduate Student / Researcher
-//          102 Natural Science Building
-//          Department of Physics & Astronomy
-//          University of Louisville
-//          Louisville, KY 40292 USA
-//
-// email:   geoffrey.lentner@louisville.edu
-//
-// updated: 2015-1-19 13:10:30 EST
-//
-#pragma once
-#include "Utils.h"
-
-namespace EmberNs
-{
-/// <summary>
-/// Class taking passed in x,y points, sorting them, and providing a function
-/// to compute and return an interpolated spline curve for any value between the
-/// first and last x.
-/// Template argument expected to be float.
-/// </summary>
-template<class T = float>
-class EMBER_API Spline
-{
-public:
-	Spline(const std::vector<v2T>& _vals, bool sorted = false);
-	std::vector<T> Interpolate(const std::vector<T>& newX);
-	T Interpolate(T newX);
-
-private:
-	void BuildSplines();
-	std::vector<v2T> vals;
-	std::vector<T> a, b, c, d;
-	std::vector<T> c_prime, d_prime;
-	std::vector<T> k;
-	int n;
-};
-}
+// This is a combination of this:
+// https://stackoverflow.com/questions/25379422/b-spline-curves/25379851#25379851
+// and this, but modified to operate on a spline with any number of points intead of just >= 4:
+//
+// Spline.h
+// CubicSplineLib/
+//
+// Header file for the "CubicSpline" class. This object facilitates natural
+// cubic spline interpolation. Once instantiated the
+// constructor builds the spline polynomials on the intervals of the (x, y)
+// data provided and retains them for later invocation. Parallelized using
+// OpenMP.
+//
+// Copyright (C) Geoffrey Lentner 2015. All rights reserved.
+// See LICENCE file. (GPL v2.0)
+//
+// contact: Geoffrey Lentner, B.S.
+//          Graduate Student / Researcher
+//          102 Natural Science Building
+//          Department of Physics & Astronomy
+//          University of Louisville
+//          Louisville, KY 40292 USA
+//
+// email:   geoffrey.lentner@louisville.edu
+//
+// updated: 2015-1-19 13:10:30 EST
+//
+#pragma once
+#include "Utils.h"
+
+namespace EmberNs
+{
+/// <summary>
+/// Class taking passed in x,y points, sorting them, and providing a function
+/// to compute and return an interpolated spline curve for any value between the
+/// first and last x.
+/// Template argument expected to be float.
+/// </summary>
+template<class T = float>
+class EMBER_API Spline
+{
+public:
+	Spline(const std::vector<v2T>& _vals, bool sorted = false);
+	std::vector<T> Interpolate(const std::vector<T>& newX);
+	T Interpolate(T newX);
+
+private:
+	void BuildSplines();
+	std::vector<v2T> vals;
+	std::vector<T> a, b, c, d;
+	std::vector<T> c_prime, d_prime;
+	std::vector<T> k;
+	int n;
+};
+}
@@ -1,399 +1,399 @@
-#pragma once
-
-#include "EmberDefines.h"
-
-/// <summary>
-/// TemporalFilter base, derived and factory classes.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// The types of temporal filters available.
-/// </summary>
-enum class eTemporalFilterType : et
-{
-	BOX_TEMPORAL_FILTER,
-	GAUSSIAN_TEMPORAL_FILTER,
-	EXP_TEMPORAL_FILTER
-};
-
-/// <summary>
-/// g++ needs a forward declaration here.
-/// </summary>
-template <typename T> class TemporalFilterCreator;
-
-#define TEMPORALFILTERUSINGS \
-	using TemporalFilter<T>::m_Filter; \
-	using TemporalFilter<T>::m_FilterExp; \
-	using TemporalFilter<T>::Size; \
-	using TemporalFilter<T>::FinishFilter;
-
-/// <summary>
-/// Temporal filter is for doing motion blur while rendering a series of frames for animation.
-/// The filter created is used as a vector of scalar values to multiply the time value by in between embers.
-/// There are three possible types: Gaussian, Box and Exp.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-class EMBER_API TemporalFilter
-{
-public:
-	/// <summary>
-	/// Constructor to set up basic filtering parameters, allocate buffers and calculate deltas.
-	/// Derived class constructors will complete the final part of filter setup.
-	/// </summary>
-	/// <param name="filterType">Type of the filter.</param>
-	/// <param name="temporalSamples">The number of temporal samples in the ember being rendered</param>
-	/// <param name="filterWidth">The width of the filter.</param>
-	/// <param name="filterExp">The filter exponent. Unused except with ExpTemporalFilter, but needed to prevent equality tests from failing.</param>
-	TemporalFilter(eTemporalFilterType filterType, size_t temporalSamples, T filterWidth, T filterExp)
-	{
-		size_t i, steps = temporalSamples;
-		m_TemporalSamples = temporalSamples;
-		m_FilterWidth = filterWidth;
-		m_Deltas.resize(steps);
-		m_Filter.resize(steps);
-		m_FilterType = filterType;
-		m_FilterExp = filterExp;//Always assign this to prevent excessive recreates in Renderer::CreateTemporalFilter().
-
-		if (steps == 1)
-		{
-			m_SumFilt = 1;
-			m_Deltas[0] = 0;
-			m_Filter[0] = 1;
-		}
-		else
-		{
-			//Define the temporal deltas.
-			for (i = 0; i < steps; i++)
-				m_Deltas[i] = (static_cast<T>(i) / static_cast<T>(steps - 1) - static_cast<T>(0.5)) * filterWidth;
-		}
-	}
-
-	/// <summary>
-	/// Copy constructor.
-	/// </summary>
-	/// <param name="filter">The TemporalFilter object to copy</param>
-	TemporalFilter(const TemporalFilter<T>& filter)
-	{
-		*this = filter;
-	}
-
-	/// <summary>
-	/// Virtual destructor so derived class destructors get called.
-	/// </summary>
-	virtual ~TemporalFilter()
-	{
-	}
-
-	/// <summary>
-	/// Assignment operator.
-	/// </summary>
-	/// <param name="filter">The TemporalFilter object to copy.</param>
-	/// <returns>Reference to updated self</returns>
-	TemporalFilter<T>& operator = (const TemporalFilter<T>& filter)
-	{
-		if (this != &filter)
-		{
-			m_TemporalSamples = filter.m_TemporalSamples;
-			m_FilterWidth = filter.m_FilterWidth;
-			m_FilterExp = filter.m_FilterExp;
-			m_SumFilt = filter.m_SumFilt;
-			m_Deltas = filter.m_Deltas;
-			m_Filter = filter.m_Filter;
-			m_FilterType = filter.m_FilterType;
-		}
-
-		return *this;
-	}
-
-	/// <summary>
-	/// Return a string representation of this filter.
-	/// </summary>
-	/// <returns>The string representation of this filter</returns>
-	string ToString() const
-	{
-		size_t i;
-		stringstream ss;
-		ss  << "Temporal Filter:\n"
-			<< "\n	       Size: " << Size()
-			<< "\n           Type: " << TemporalFilterCreator<T>::ToString(m_FilterType)
-			<< "\n       Sum Filt: " << SumFilt();
-		ss << "\nDeltas: \n";
-
-		for (i = 0; i < m_Deltas.size(); i++)
-		{
-			ss << "Deltas[" << i << "]: " << m_Deltas[i] << "\n";
-		}
-
-		ss << "Filter: \n";
-
-		for (i = 0; i < m_Filter.size(); i++)
-		{
-			ss << "Filter[" << i << "]: " << m_Filter[i] << "\n";
-			//ss << m_Filter[i] << "\n";
-		}
-
-		return ss.str();
-	}
-
-	/// <summary>
-	/// Accessors.
-	/// </summary>
-	size_t Size() const { return m_Filter.size(); }
-	size_t TemporalSamples() const { return m_TemporalSamples; }
-	T FilterWidth() const { return m_FilterWidth; }
-	T FilterExp() const { return m_FilterExp; }
-	T SumFilt() const { return m_SumFilt; }
-	T* Deltas() { return m_Deltas.data(); }
-	T* Filter() { return m_Filter.data(); }
-	eTemporalFilterType FilterType() const { return m_FilterType; }
-
-protected:
-	/// <summary>
-	/// Normalize the filter and the sum filt.
-	/// </summary>
-	/// <param name="maxFilt">The maximum filter value contained in the filter vector after it was created</param>
-	void FinishFilter(T maxFilt)
-	{
-		m_SumFilt = 0;
-
-		for (size_t i = 0; i < Size(); i++)
-		{
-			m_Filter[i] /= maxFilt;
-			m_SumFilt += m_Filter[i];
-		}
-
-		m_SumFilt /= Size();
-	}
-
-	T m_SumFilt = 1;//The sum of all filter values.
-	T m_FilterWidth;
-	T m_FilterExp;
-	size_t m_TemporalSamples;
-	vector<T> m_Deltas;//Delta vector.
-	vector<T> m_Filter;//Filter vector.
-	eTemporalFilterType m_FilterType;//The type of filter this is.
-};
-
-/// <summary>
-/// Derivation which implements the Exp filter.
-/// </summary>
-template <typename T>
-class EMBER_API ExpTemporalFilter : public TemporalFilter<T>
-{
-	TEMPORALFILTERUSINGS
-public:
-	/// <summary>
-	/// Constructor to create an Exp filter.
-	/// </summary>
-	/// <param name="temporalSamples">The number of temporal samples in the ember being rendered</param>
-	/// <param name="filterWidth">The width of the filter.</param>
-	/// <param name="filterExp">The filter exponent.</param>
-	ExpTemporalFilter(size_t temporalSamples, T filterWidth, T filterExp = 1)
-		: TemporalFilter<T>(eTemporalFilterType::EXP_TEMPORAL_FILTER, temporalSamples, filterWidth, filterExp)
-	{
-		if (Size() > 1)
-		{
-			T slpx, maxFilt = 0;
-
-			for (size_t i = 0; i < Size(); i++)
-			{
-				if (filterExp >= 0)
-					slpx = (static_cast<T>(i) + 1) / Size();
-				else
-					slpx = static_cast<T>(Size() - i) / Size();
-
-				//Scale the color based on these values.
-				m_Filter[i] = std::pow(slpx, fabs(filterExp));
-
-				//Keep the max.
-				if (m_Filter[i] > maxFilt)
-					maxFilt = m_Filter[i];
-			}
-
-			FinishFilter(maxFilt);
-		}
-	}
-};
-
-/// <summary>
-/// Derivation which implements the Gaussian filter.
-/// </summary>
-template <typename T>
-class EMBER_API GaussianTemporalFilter : public TemporalFilter<T>
-{
-	TEMPORALFILTERUSINGS
-public:
-	/// <summary>
-	/// Constructor to create a Gaussian filter.
-	/// </summary>
-	/// <param name="temporalSamples">The number of temporal samples in the ember being rendered</param>
-	/// <param name="filterWidth">The width of the filter.</param>
-	/// <param name="filterExp">Unused, but needed to prevent equality tests from failing.</param>
-	GaussianTemporalFilter(size_t temporalSamples, T filterWidth, T filterExp)
-		: TemporalFilter<T>(eTemporalFilterType::GAUSSIAN_TEMPORAL_FILTER, temporalSamples, filterWidth, filterExp)
-	{
-		if (Size() > 1)
-		{
-			T maxFilt = 0, halfSteps = static_cast<T>(Size()) / static_cast<T>(2);
-			GaussianFilter<T> gaussian(1, 1);//Just pass dummy values, they are unused in this case.
-
-			for (size_t i = 0; i < Size(); i++)
-			{
-				m_Filter[i] = gaussian.Filter(gaussian.Support() * fabs(i - halfSteps) / halfSteps);
-
-				//Keep the max.
-				if (m_Filter[i] > maxFilt)
-					maxFilt = m_Filter[i];
-			}
-
-			FinishFilter(maxFilt);
-		}
-	}
-};
-
-/// <summary>
-/// Derivation which implements the Box filter.
-/// </summary>
-template <typename T>
-class EMBER_API BoxTemporalFilter : public TemporalFilter<T>
-{
-	TEMPORALFILTERUSINGS
-public:
-	/// <summary>
-	/// Constructor to create a Box filter.
-	/// </summary>
-	/// <param name="temporalSamples">The number of temporal samples in the ember being rendered</param>
-	/// <param name="filterWidth">The width of the filter.</param>
-	/// <param name="filterExp">Unused, but needed to prevent equality tests from failing.</param>
-	BoxTemporalFilter(size_t temporalSamples, T filterWidth, T filterExp)
-		: TemporalFilter<T>(eTemporalFilterType::BOX_TEMPORAL_FILTER, temporalSamples, filterWidth, filterExp)
-	{
-		if (Size() > 1)
-		{
-			for (size_t i = 0; i < Size(); i++)
-				m_Filter[i] = 1;
-
-			FinishFilter(1);
-		}
-	}
-};
-
-/// <summary>
-/// Convenience class to assist in converting between filter names and the filter objects themselves.
-/// </summary>
-template <typename T>
-class EMBER_API TemporalFilterCreator
-{
-public:
-	/// <summary>
-	/// Creates the specified filter type based on the filterType enum parameter.
-	/// </summary>
-	/// <param name="filterType">Type of the filter</param>
-	/// <param name="temporalSamples">The number of temporal samples in the ember being rendered</param>
-	/// <param name="filterWidth">The width of the filter</param>
-	/// <param name="filterExp">The filter exp, only used with Exp filter, otherwise unused but needed to prevent equality tests from failing.</param>
-	/// <returns>A pointer to the newly created filter object</returns>
-	static TemporalFilter<T>* Create(eTemporalFilterType filterType, size_t temporalSamples, T filterWidth, T filterExp)
-	{
-		TemporalFilter<T>* filter = nullptr;
-
-		switch (filterType)
-		{
-			case EmberNs::eTemporalFilterType::BOX_TEMPORAL_FILTER:
-				filter = new BoxTemporalFilter<T>(temporalSamples, filterWidth, filterExp);
-				break;
-
-			case EmberNs::eTemporalFilterType::GAUSSIAN_TEMPORAL_FILTER:
-				filter = new GaussianTemporalFilter<T>(temporalSamples, filterWidth, filterExp);
-				break;
-
-			case EmberNs::eTemporalFilterType::EXP_TEMPORAL_FILTER:
-				filter = new ExpTemporalFilter<T>(temporalSamples, filterWidth, filterExp);
-				break;
-
-			default:
-				filter = new BoxTemporalFilter<T>(temporalSamples, filterWidth, filterExp);//Default to box if bad enum passed in.
-				break;
-		}
-
-		return filter;
-	}
-
-	/// <summary>
-	/// Return a string vector of the available filter types.
-	/// </summary>
-	/// <returns>A vector of strings populated with the available filter types</returns>
-	static vector<string> FilterTypes()
-	{
-		vector<string> v;
-		v.reserve(3);
-		v.push_back("Box");
-		v.push_back("Gaussian");
-		v.push_back("Exp");
-		return v;
-	}
-
-	/// <summary>
-	/// Convert between the filter name string and its type enum.
-	/// </summary>
-	/// <param name="filterType">The string name of the filter</param>
-	/// <returns>The filter type enum</returns>
-	static eTemporalFilterType FromString(const string& filterType)
-	{
-		if (!_stricmp(filterType.c_str(), "box"))
-			return eTemporalFilterType::BOX_TEMPORAL_FILTER;
-		else if (!_stricmp(filterType.c_str(), "gaussian"))
-			return eTemporalFilterType::GAUSSIAN_TEMPORAL_FILTER;
-		else if (!_stricmp(filterType.c_str(), "exp"))
-			return eTemporalFilterType::EXP_TEMPORAL_FILTER;
-		else
-			return eTemporalFilterType::BOX_TEMPORAL_FILTER;
-	}
-
-	/// <summary>
-	/// Convert between the filter type enum and its name string.
-	/// </summary>
-	/// <param name="eTemporalFilterType">The filter type enum</param>
-	/// <returns>The string name of the filter</returns>
-	static string ToString(eTemporalFilterType filterType)
-	{
-		string filter;
-
-		switch (filterType)
-		{
-			case EmberNs::eTemporalFilterType::BOX_TEMPORAL_FILTER:
-				filter = "Box";
-				break;
-
-			case EmberNs::eTemporalFilterType::GAUSSIAN_TEMPORAL_FILTER:
-				filter = "Gaussian";
-				break;
-
-			case EmberNs::eTemporalFilterType::EXP_TEMPORAL_FILTER:
-				filter = "Exp";
-				break;
-
-			default:
-				filter = "Box";
-				break;
-		}
-
-		return filter;
-	}
-};
-
-/// <summary>
-/// Thin wrapper around TemporalFilterCreator::ToString() to allow << operator on temporal filter type.
-/// </summary>
-/// <param name="stream">The stream to insert into</param>
-/// <param name="t">The type whose string representation will be inserted into the stream</param>
-/// <returns></returns>
-static std::ostream& operator<<(std::ostream& stream, const eTemporalFilterType& t)
-{
-	stream << TemporalFilterCreator<float>::ToString(t);
-	return stream;
-}
-}
+#pragma once
+
+#include "EmberDefines.h"
+
+/// <summary>
+/// TemporalFilter base, derived and factory classes.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// The types of temporal filters available.
+/// </summary>
+enum class eTemporalFilterType : et
+{
+	BOX_TEMPORAL_FILTER,
+	GAUSSIAN_TEMPORAL_FILTER,
+	EXP_TEMPORAL_FILTER
+};
+
+/// <summary>
+/// g++ needs a forward declaration here.
+/// </summary>
+template <typename T> class TemporalFilterCreator;
+
+#define TEMPORALFILTERUSINGS \
+	using TemporalFilter<T>::m_Filter; \
+	using TemporalFilter<T>::m_FilterExp; \
+	using TemporalFilter<T>::Size; \
+	using TemporalFilter<T>::FinishFilter;
+
+/// <summary>
+/// Temporal filter is for doing motion blur while rendering a series of frames for animation.
+/// The filter created is used as a vector of scalar values to multiply the time value by in between embers.
+/// There are three possible types: Gaussian, Box and Exp.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+class EMBER_API TemporalFilter
+{
+public:
+	/// <summary>
+	/// Constructor to set up basic filtering parameters, allocate buffers and calculate deltas.
+	/// Derived class constructors will complete the final part of filter setup.
+	/// </summary>
+	/// <param name="filterType">Type of the filter.</param>
+	/// <param name="temporalSamples">The number of temporal samples in the ember being rendered</param>
+	/// <param name="filterWidth">The width of the filter.</param>
+	/// <param name="filterExp">The filter exponent. Unused except with ExpTemporalFilter, but needed to prevent equality tests from failing.</param>
+	TemporalFilter(eTemporalFilterType filterType, size_t temporalSamples, T filterWidth, T filterExp)
+	{
+		size_t i, steps = temporalSamples;
+		m_TemporalSamples = temporalSamples;
+		m_FilterWidth = filterWidth;
+		m_Deltas.resize(steps);
+		m_Filter.resize(steps);
+		m_FilterType = filterType;
+		m_FilterExp = filterExp;//Always assign this to prevent excessive recreates in Renderer::CreateTemporalFilter().
+
+		if (steps == 1)
+		{
+			m_SumFilt = 1;
+			m_Deltas[0] = 0;
+			m_Filter[0] = 1;
+		}
+		else
+		{
+			//Define the temporal deltas.
+			for (i = 0; i < steps; i++)
+				m_Deltas[i] = (static_cast<T>(i) / static_cast<T>(steps - 1) - static_cast<T>(0.5)) * filterWidth;
+		}
+	}
+
+	/// <summary>
+	/// Copy constructor.
+	/// </summary>
+	/// <param name="filter">The TemporalFilter object to copy</param>
+	TemporalFilter(const TemporalFilter<T>& filter)
+	{
+		*this = filter;
+	}
+
+	/// <summary>
+	/// Virtual destructor so derived class destructors get called.
+	/// </summary>
+	virtual ~TemporalFilter()
+	{
+	}
+
+	/// <summary>
+	/// Assignment operator.
+	/// </summary>
+	/// <param name="filter">The TemporalFilter object to copy.</param>
+	/// <returns>Reference to updated self</returns>
+	TemporalFilter<T>& operator = (const TemporalFilter<T>& filter)
+	{
+		if (this != &filter)
+		{
+			m_TemporalSamples = filter.m_TemporalSamples;
+			m_FilterWidth = filter.m_FilterWidth;
+			m_FilterExp = filter.m_FilterExp;
+			m_SumFilt = filter.m_SumFilt;
+			m_Deltas = filter.m_Deltas;
+			m_Filter = filter.m_Filter;
+			m_FilterType = filter.m_FilterType;
+		}
+
+		return *this;
+	}
+
+	/// <summary>
+	/// Return a string representation of this filter.
+	/// </summary>
+	/// <returns>The string representation of this filter</returns>
+	string ToString() const
+	{
+		size_t i;
+		stringstream ss;
+		ss  << "Temporal Filter:\n"
+			<< "\n	       Size: " << Size()
+			<< "\n           Type: " << TemporalFilterCreator<T>::ToString(m_FilterType)
+			<< "\n       Sum Filt: " << SumFilt();
+		ss << "\nDeltas: \n";
+
+		for (i = 0; i < m_Deltas.size(); i++)
+		{
+			ss << "Deltas[" << i << "]: " << m_Deltas[i] << "\n";
+		}
+
+		ss << "Filter: \n";
+
+		for (i = 0; i < m_Filter.size(); i++)
+		{
+			ss << "Filter[" << i << "]: " << m_Filter[i] << "\n";
+			//ss << m_Filter[i] << "\n";
+		}
+
+		return ss.str();
+	}
+
+	/// <summary>
+	/// Accessors.
+	/// </summary>
+	size_t Size() const { return m_Filter.size(); }
+	size_t TemporalSamples() const { return m_TemporalSamples; }
+	T FilterWidth() const { return m_FilterWidth; }
+	T FilterExp() const { return m_FilterExp; }
+	T SumFilt() const { return m_SumFilt; }
+	T* Deltas() { return m_Deltas.data(); }
+	T* Filter() { return m_Filter.data(); }
+	eTemporalFilterType FilterType() const { return m_FilterType; }
+
+protected:
+	/// <summary>
+	/// Normalize the filter and the sum filt.
+	/// </summary>
+	/// <param name="maxFilt">The maximum filter value contained in the filter vector after it was created</param>
+	void FinishFilter(T maxFilt)
+	{
+		m_SumFilt = 0;
+
+		for (size_t i = 0; i < Size(); i++)
+		{
+			m_Filter[i] /= maxFilt;
+			m_SumFilt += m_Filter[i];
+		}
+
+		m_SumFilt /= Size();
+	}
+
+	T m_SumFilt = 1;//The sum of all filter values.
+	T m_FilterWidth;
+	T m_FilterExp;
+	size_t m_TemporalSamples;
+	vector<T> m_Deltas;//Delta vector.
+	vector<T> m_Filter;//Filter vector.
+	eTemporalFilterType m_FilterType;//The type of filter this is.
+};
+
+/// <summary>
+/// Derivation which implements the Exp filter.
+/// </summary>
+template <typename T>
+class EMBER_API ExpTemporalFilter : public TemporalFilter<T>
+{
+	TEMPORALFILTERUSINGS
+public:
+	/// <summary>
+	/// Constructor to create an Exp filter.
+	/// </summary>
+	/// <param name="temporalSamples">The number of temporal samples in the ember being rendered</param>
+	/// <param name="filterWidth">The width of the filter.</param>
+	/// <param name="filterExp">The filter exponent.</param>
+	ExpTemporalFilter(size_t temporalSamples, T filterWidth, T filterExp = 1)
+		: TemporalFilter<T>(eTemporalFilterType::EXP_TEMPORAL_FILTER, temporalSamples, filterWidth, filterExp)
+	{
+		if (Size() > 1)
+		{
+			T slpx, maxFilt = 0;
+
+			for (size_t i = 0; i < Size(); i++)
+			{
+				if (filterExp >= 0)
+					slpx = (static_cast<T>(i) + 1) / Size();
+				else
+					slpx = static_cast<T>(Size() - i) / Size();
+
+				//Scale the color based on these values.
+				m_Filter[i] = std::pow(slpx, fabs(filterExp));
+
+				//Keep the max.
+				if (m_Filter[i] > maxFilt)
+					maxFilt = m_Filter[i];
+			}
+
+			FinishFilter(maxFilt);
+		}
+	}
+};
+
+/// <summary>
+/// Derivation which implements the Gaussian filter.
+/// </summary>
+template <typename T>
+class EMBER_API GaussianTemporalFilter : public TemporalFilter<T>
+{
+	TEMPORALFILTERUSINGS
+public:
+	/// <summary>
+	/// Constructor to create a Gaussian filter.
+	/// </summary>
+	/// <param name="temporalSamples">The number of temporal samples in the ember being rendered</param>
+	/// <param name="filterWidth">The width of the filter.</param>
+	/// <param name="filterExp">Unused, but needed to prevent equality tests from failing.</param>
+	GaussianTemporalFilter(size_t temporalSamples, T filterWidth, T filterExp)
+		: TemporalFilter<T>(eTemporalFilterType::GAUSSIAN_TEMPORAL_FILTER, temporalSamples, filterWidth, filterExp)
+	{
+		if (Size() > 1)
+		{
+			T maxFilt = 0, halfSteps = static_cast<T>(Size()) / static_cast<T>(2);
+			GaussianFilter<T> gaussian(1, 1);//Just pass dummy values, they are unused in this case.
+
+			for (size_t i = 0; i < Size(); i++)
+			{
+				m_Filter[i] = gaussian.Filter(gaussian.Support() * fabs(i - halfSteps) / halfSteps);
+
+				//Keep the max.
+				if (m_Filter[i] > maxFilt)
+					maxFilt = m_Filter[i];
+			}
+
+			FinishFilter(maxFilt);
+		}
+	}
+};
+
+/// <summary>
+/// Derivation which implements the Box filter.
+/// </summary>
+template <typename T>
+class EMBER_API BoxTemporalFilter : public TemporalFilter<T>
+{
+	TEMPORALFILTERUSINGS
+public:
+	/// <summary>
+	/// Constructor to create a Box filter.
+	/// </summary>
+	/// <param name="temporalSamples">The number of temporal samples in the ember being rendered</param>
+	/// <param name="filterWidth">The width of the filter.</param>
+	/// <param name="filterExp">Unused, but needed to prevent equality tests from failing.</param>
+	BoxTemporalFilter(size_t temporalSamples, T filterWidth, T filterExp)
+		: TemporalFilter<T>(eTemporalFilterType::BOX_TEMPORAL_FILTER, temporalSamples, filterWidth, filterExp)
+	{
+		if (Size() > 1)
+		{
+			for (size_t i = 0; i < Size(); i++)
+				m_Filter[i] = 1;
+
+			FinishFilter(1);
+		}
+	}
+};
+
+/// <summary>
+/// Convenience class to assist in converting between filter names and the filter objects themselves.
+/// </summary>
+template <typename T>
+class EMBER_API TemporalFilterCreator
+{
+public:
+	/// <summary>
+	/// Creates the specified filter type based on the filterType enum parameter.
+	/// </summary>
+	/// <param name="filterType">Type of the filter</param>
+	/// <param name="temporalSamples">The number of temporal samples in the ember being rendered</param>
+	/// <param name="filterWidth">The width of the filter</param>
+	/// <param name="filterExp">The filter exp, only used with Exp filter, otherwise unused but needed to prevent equality tests from failing.</param>
+	/// <returns>A pointer to the newly created filter object</returns>
+	static TemporalFilter<T>* Create(eTemporalFilterType filterType, size_t temporalSamples, T filterWidth, T filterExp)
+	{
+		TemporalFilter<T>* filter = nullptr;
+
+		switch (filterType)
+		{
+			case EmberNs::eTemporalFilterType::BOX_TEMPORAL_FILTER:
+				filter = new BoxTemporalFilter<T>(temporalSamples, filterWidth, filterExp);
+				break;
+
+			case EmberNs::eTemporalFilterType::GAUSSIAN_TEMPORAL_FILTER:
+				filter = new GaussianTemporalFilter<T>(temporalSamples, filterWidth, filterExp);
+				break;
+
+			case EmberNs::eTemporalFilterType::EXP_TEMPORAL_FILTER:
+				filter = new ExpTemporalFilter<T>(temporalSamples, filterWidth, filterExp);
+				break;
+
+			default:
+				filter = new BoxTemporalFilter<T>(temporalSamples, filterWidth, filterExp);//Default to box if bad enum passed in.
+				break;
+		}
+
+		return filter;
+	}
+
+	/// <summary>
+	/// Return a string vector of the available filter types.
+	/// </summary>
+	/// <returns>A vector of strings populated with the available filter types</returns>
+	static vector<string> FilterTypes()
+	{
+		vector<string> v;
+		v.reserve(3);
+		v.push_back("Box");
+		v.push_back("Gaussian");
+		v.push_back("Exp");
+		return v;
+	}
+
+	/// <summary>
+	/// Convert between the filter name string and its type enum.
+	/// </summary>
+	/// <param name="filterType">The string name of the filter</param>
+	/// <returns>The filter type enum</returns>
+	static eTemporalFilterType FromString(const string& filterType)
+	{
+		if (!_stricmp(filterType.c_str(), "box"))
+			return eTemporalFilterType::BOX_TEMPORAL_FILTER;
+		else if (!_stricmp(filterType.c_str(), "gaussian"))
+			return eTemporalFilterType::GAUSSIAN_TEMPORAL_FILTER;
+		else if (!_stricmp(filterType.c_str(), "exp"))
+			return eTemporalFilterType::EXP_TEMPORAL_FILTER;
+		else
+			return eTemporalFilterType::BOX_TEMPORAL_FILTER;
+	}
+
+	/// <summary>
+	/// Convert between the filter type enum and its name string.
+	/// </summary>
+	/// <param name="eTemporalFilterType">The filter type enum</param>
+	/// <returns>The string name of the filter</returns>
+	static string ToString(eTemporalFilterType filterType)
+	{
+		string filter;
+
+		switch (filterType)
+		{
+			case EmberNs::eTemporalFilterType::BOX_TEMPORAL_FILTER:
+				filter = "Box";
+				break;
+
+			case EmberNs::eTemporalFilterType::GAUSSIAN_TEMPORAL_FILTER:
+				filter = "Gaussian";
+				break;
+
+			case EmberNs::eTemporalFilterType::EXP_TEMPORAL_FILTER:
+				filter = "Exp";
+				break;
+
+			default:
+				filter = "Box";
+				break;
+		}
+
+		return filter;
+	}
+};
+
+/// <summary>
+/// Thin wrapper around TemporalFilterCreator::ToString() to allow << operator on temporal filter type.
+/// </summary>
+/// <param name="stream">The stream to insert into</param>
+/// <param name="t">The type whose string representation will be inserted into the stream</param>
+/// <returns></returns>
+static std::ostream& operator<<(std::ostream& stream, const eTemporalFilterType& t)
+{
+	stream << TemporalFilterCreator<float>::ToString(t);
+	return stream;
+}
+}
@@ -1,146 +1,146 @@
-#pragma once
-
-#include "EmberDefines.h"
-
-/// <summary>
-/// Timing and CriticalSection classes.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// Since the algorithm is so computationally intensive, timing and benchmarking are an integral portion
-/// of both the development process and the execution results. This class provides an easy way to time
-/// things by simply calling its Tic() and Toc() member functions. It also assists with formatting the
-/// elapsed time as a string.
-/// </summary>
-class EMBER_API Timing
-{
-public:
-	/// <summary>
-	/// Constructor that takes an optional precision argument which specifies how many digits after the decimal place should be printed for seconds.
-	/// As a convenience, the Tic() function is called automatically.
-	/// </summary>
-	/// <param name="precision">The precision of the seconds field of the elapsed time. Default: 2.</param>
-	Timing(int precision = 2)
-	{
-		m_Precision = precision;
-		Init();
-		Tic();
-	}
-
-	/// <summary>
-	/// Set the begin time.
-	/// </summary>
-	/// <returns>The begin time cast to a double</returns>
-	double Tic()
-	{
-		m_BeginTime = NowMsD();
-		return BeginTime();
-	}
-
-	/// <summary>
-	/// Set the end time and optionally output a string showing the elapsed time.
-	/// </summary>
-	/// <param name="str">The string to output. Default: nullptr.</param>
-	/// <param name="fullString">If true, output the string verbatim, else output the text " processing time: " in between str and the formatted time.</param>
-	/// <returns>The elapsed time in milliseconds as a double</returns>
-	double Toc(const char* str = nullptr, bool fullString = false)
-	{
-		m_EndTime = NowMsD();
-		const auto ms = ElapsedTime();
-
-		if (str)
-		{
-			cout << string(str) << (fullString ? "" : " processing time: ") << Format(ms) << "\n";
-		}
-
-		return ms;
-	}
-
-	/// <summary>
-	/// Return the begin time as a double.
-	/// </summary>
-	/// <returns></returns>
-	double BeginTime() const { return static_cast<double>(m_BeginTime.time_since_epoch().count()); }
-
-	/// <summary>
-	/// Return the end time as a double.
-	/// </summary>
-	/// <returns></returns>
-	double EndTime() const { return static_cast<double>(m_EndTime.time_since_epoch().count()); }
-
-	/// <summary>
-	/// Return the elapsed time in milliseconds.
-	/// </summary>
-	/// <returns>The elapsed time in milliseconds as a double</returns>
-	double ElapsedTime() const
-	{
-		return (m_EndTime - m_BeginTime).count();
-	}
-
-	/// <summary>
-	/// Formats a specified milliseconds value as a string.
-	/// This uses some intelligence to determine what to return depending on how much time has elapsed.
-	/// Days, hours and minutes are only included if 1 or more of them has elapsed. Seconds are always
-	/// included as a decimal value with the precision the user specified in the constructor.
-	/// </summary>
-	/// <param name="ms">The time in milliseconds to format</param>
-	/// <returns>The formatted string</returns>
-	string Format(double ms) const
-	{
-		stringstream ss;
-		double x = ms / 1000;
-		const auto secs = fmod(x, 60);
-		x /= 60;
-		const auto mins = fmod(x, 60);
-		x /= 60;
-		const auto hours = fmod(x, 24);
-		x /= 24;
-		const auto days = x;
-
-		if (days >= 1)
-			ss << static_cast<int>(days) << "d ";
-
-		if (hours >= 1)
-			ss << static_cast<int>(hours) << "h ";
-
-		if (mins >= 1)
-			ss << static_cast<int>(mins) << "m ";
-
-		ss << std::fixed << std::setprecision(m_Precision) << secs << "s";
-		return ss.str();
-	}
-
-	/// <summary>
-	/// Return the number of cores in the system.
-	/// </summary>
-	/// <returns>The number of cores in the system</returns>
-	static uint ProcessorCount()
-	{
-		Init();
-		return m_ProcessorCount;
-	}
-
-private:
-	/// <summary>
-	/// Query and store the performance info of the system.
-	/// Since it will never change it only needs to be queried once.
-	/// This is achieved by keeping static state and performance variables.
-	/// </summary>
-	static void Init()
-	{
-		if (!m_TimingInit)
-		{
-			m_ProcessorCount = thread::hardware_concurrency();
-			m_TimingInit = true;
-		}
-	}
-
-	int m_Precision;//How many digits after the decimal place to print for seconds.
-	DoubleMsTimePoint m_BeginTime;//The start of the timing, set with Tic().
-	DoubleMsTimePoint m_EndTime;//The end of the timing, set with Toc().
-	static bool m_TimingInit;//Whether the performance info has bee queried.
-	static uint m_ProcessorCount;//The number of cores on the system, set in Init().
-};
-}
+#pragma once
+
+#include "EmberDefines.h"
+
+/// <summary>
+/// Timing and CriticalSection classes.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// Since the algorithm is so computationally intensive, timing and benchmarking are an integral portion
+/// of both the development process and the execution results. This class provides an easy way to time
+/// things by simply calling its Tic() and Toc() member functions. It also assists with formatting the
+/// elapsed time as a string.
+/// </summary>
+class EMBER_API Timing
+{
+public:
+	/// <summary>
+	/// Constructor that takes an optional precision argument which specifies how many digits after the decimal place should be printed for seconds.
+	/// As a convenience, the Tic() function is called automatically.
+	/// </summary>
+	/// <param name="precision">The precision of the seconds field of the elapsed time. Default: 2.</param>
+	Timing(int precision = 2) noexcept
+	{
+		m_Precision = precision;
+		Init();
+		Tic();
+	}
+
+	/// <summary>
+	/// Set the begin time.
+	/// </summary>
+	/// <returns>The begin time cast to a double</returns>
+	double Tic() noexcept
+	{
+		m_BeginTime = NowMsD();
+		return BeginTime();
+	}
+
+	/// <summary>
+	/// Set the end time and optionally output a string showing the elapsed time.
+	/// </summary>
+	/// <param name="str">The string to output. Default: nullptr.</param>
+	/// <param name="fullString">If true, output the string verbatim, else output the text " processing time: " in between str and the formatted time.</param>
+	/// <returns>The elapsed time in milliseconds as a double</returns>
+	double Toc(const char* str = nullptr, bool fullString = false)
+	{
+		m_EndTime = NowMsD();
+		const auto ms = ElapsedTime();
+
+		if (str)
+		{
+			cout << string(str) << (fullString ? "" : " processing time: ") << Format(ms) << "\n";
+		}
+
+		return ms;
+	}
+
+	/// <summary>
+	/// Return the begin time as a double.
+	/// </summary>
+	/// <returns></returns>
+	double BeginTime() const noexcept { return static_cast<double>(m_BeginTime.time_since_epoch().count()); }
+
+	/// <summary>
+	/// Return the end time as a double.
+	/// </summary>
+	/// <returns></returns>
+	double EndTime() const noexcept { return static_cast<double>(m_EndTime.time_since_epoch().count()); }
+
+	/// <summary>
+	/// Return the elapsed time in milliseconds.
+	/// </summary>
+	/// <returns>The elapsed time in milliseconds as a double</returns>
+	double ElapsedTime() const noexcept
+	{
+		return (m_EndTime - m_BeginTime).count();
+	}
+
+	/// <summary>
+	/// Formats a specified milliseconds value as a string.
+	/// This uses some intelligence to determine what to return depending on how much time has elapsed.
+	/// Days, hours and minutes are only included if 1 or more of them has elapsed. Seconds are always
+	/// included as a decimal value with the precision the user specified in the constructor.
+	/// </summary>
+	/// <param name="ms">The time in milliseconds to format</param>
+	/// <returns>The formatted string</returns>
+	string Format(double ms) const
+	{
+		stringstream ss;
+		double x = ms / 1000;
+		const auto secs = fmod(x, 60);
+		x /= 60;
+		const auto mins = fmod(x, 60);
+		x /= 60;
+		const auto hours = fmod(x, 24);
+		x /= 24;
+		const auto days = x;
+
+		if (days >= 1)
+			ss << static_cast<int>(days) << "d ";
+
+		if (hours >= 1)
+			ss << static_cast<int>(hours) << "h ";
+
+		if (mins >= 1)
+			ss << static_cast<int>(mins) << "m ";
+
+		ss << std::fixed << std::setprecision(m_Precision) << secs << "s";
+		return ss.str();
+	}
+
+	/// <summary>
+	/// Return the number of cores in the system.
+	/// </summary>
+	/// <returns>The number of cores in the system</returns>
+	static uint ProcessorCount()
+	{
+		Init();
+		return m_ProcessorCount;
+	}
+
+private:
+	/// <summary>
+	/// Query and store the performance info of the system.
+	/// Since it will never change it only needs to be queried once.
+	/// This is achieved by keeping static state and performance variables.
+	/// </summary>
+	static void Init() noexcept
+	{
+		if (!m_TimingInit)
+		{
+			m_ProcessorCount = thread::hardware_concurrency();
+			m_TimingInit = true;
+		}
+	}
+
+	int m_Precision;//How many digits after the decimal place to print for seconds.
+	DoubleMsTimePoint m_BeginTime;//The start of the timing, set with Tic().
+	DoubleMsTimePoint m_EndTime;//The end of the timing, set with Toc().
+	static bool m_TimingInit;//Whether the performance info has bee queried.
+	static uint m_ProcessorCount;//The number of cores on the system, set in Init().
+};
+}
@@ -1,67 +1,67 @@
-#pragma once
-
-#include "Variation.h"
-
-/// <summary>
-/// VariationList class.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// Since the list of variations is numerous, it's convenient to be able to make copies
-/// of specific ones. This class holds a list of pointers to variation objects for every
-/// variation available. Similar to the PaletteList class, a caller can look up a variation
-/// by name or ID and retrieve a copy of it.
-/// This class follows the singleton pattern.
-/// All variations are deleted upon destruction.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-class EMBER_API VariationList: public Singleton<VariationList<T>>
-{
-public:
-	const Variation<T>* GetVariation(size_t index) const;
-	const Variation<T>* GetVariation(size_t index, eVariationType varType) const;
-	Variation<T>* GetVariationCopy(size_t index, T weight = 1) const;
-	Variation<T>* GetVariationCopy(size_t index, eVariationType varType, T weight = 1) const;
-	const Variation<T>* GetVariation(eVariationId id) const;
-	Variation<T>* GetVariationCopy(eVariationId id, T weight = 1) const;
-	const Variation<T>* GetVariation(const string& name) const;
-	Variation<T>* GetVariationCopy(const string& name, T weight = 1) const;
-	const ParametricVariation<T>* GetParametricVariation(size_t index) const;
-	const ParametricVariation<T>* GetParametricVariation(const string& name) const;
-	ParametricVariation<T>* GetParametricVariationCopy(eVariationId id, T weight = 1) const;
-	const Variation<T>* GetPreVariation(const string& name) const;
-	const Variation<T>* GetPostVariation(const string& name) const;
-	int GetVariationIndex(const string& name) const;
-	size_t Size() const;
-	size_t RegSize() const;
-	size_t PreSize() const;
-	size_t PostSize() const;
-	size_t ParametricSize() const;
-	size_t NonParametricSize() const;
-
-	const vector<const Variation<T>*>& AllVars()  const;
-	const vector<const Variation<T>*>& RegVars()  const;
-	const vector<const Variation<T>*>& PreVars()  const;
-	const vector<const Variation<T>*>& PostVars() const;
-	const vector<const Variation<T>*>& NonParametricVariations() const;
-	const vector<const ParametricVariation<T>*>& ParametricVariations() const;
-
-	SINGLETON_DERIVED_DECL(VariationList<T>);
-
-private:
-	VariationList();
-	Variation<T>* MakeCopyWithWeight(const Variation<T>* var, T weight) const;
-	template <template <typename> class U>
-	const U<T>* SearchVarName(const vector<const U<T>*>& vars, const string& name) const;
-
-	vector<const Variation<T>*> m_Variations;//A list of pointers to dynamically allocated variation objects.
-	vector<const Variation<T>*> m_RegVariations;
-	vector<const Variation<T>*> m_PreVariations;
-	vector<const Variation<T>*> m_PostVariations;
-	vector<const Variation<T>*> m_NonParametricVariations;
-	vector<const ParametricVariation<T>*> m_ParametricVariations;//A list of pointers to elements in m_Variations which are derived from ParametricVariation.
-};
-}
+#pragma once
+
+#include "Variation.h"
+
+/// <summary>
+/// VariationList class.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// Since the list of variations is numerous, it's convenient to be able to make copies
+/// of specific ones. This class holds a list of pointers to variation objects for every
+/// variation available. Similar to the PaletteList class, a caller can look up a variation
+/// by name or ID and retrieve a copy of it.
+/// This class follows the singleton pattern.
+/// All variations are deleted upon destruction.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+class EMBER_API VariationList: public Singleton<VariationList<T>>
+{
+public:
+	const Variation<T>* GetVariation(size_t index) const;
+	const Variation<T>* GetVariation(size_t index, eVariationType varType) const;
+	Variation<T>* GetVariationCopy(size_t index, T weight = 1) const;
+	Variation<T>* GetVariationCopy(size_t index, eVariationType varType, T weight = 1) const;
+	const Variation<T>* GetVariation(eVariationId id) const;
+	Variation<T>* GetVariationCopy(eVariationId id, T weight = 1) const;
+	const Variation<T>* GetVariation(const string& name) const;
+	Variation<T>* GetVariationCopy(const string& name, T weight = 1) const;
+	const ParametricVariation<T>* GetParametricVariation(size_t index) const;
+	const ParametricVariation<T>* GetParametricVariation(const string& name) const;
+	ParametricVariation<T>* GetParametricVariationCopy(eVariationId id, T weight = 1) const;
+	const Variation<T>* GetPreVariation(const string& name) const;
+	const Variation<T>* GetPostVariation(const string& name) const;
+	int GetVariationIndex(const string& name) const;
+	size_t Size() const;
+	size_t RegSize() const;
+	size_t PreSize() const;
+	size_t PostSize() const;
+	size_t ParametricSize() const;
+	size_t NonParametricSize() const;
+
+	const vector<const Variation<T>*>& AllVars()  const;
+	const vector<const Variation<T>*>& RegVars()  const;
+	const vector<const Variation<T>*>& PreVars()  const;
+	const vector<const Variation<T>*>& PostVars() const;
+	const vector<const Variation<T>*>& NonParametricVariations() const;
+	const vector<const ParametricVariation<T>*>& ParametricVariations() const;
+
+	SINGLETON_DERIVED_DECL(VariationList<T>);
+
+private:
+	VariationList();
+	Variation<T>* MakeCopyWithWeight(const Variation<T>* var, T weight) const;
+	template <template <typename> class U>
+	const U<T>* SearchVarName(const vector<const U<T>*>& vars, const string& name) const;
+
+	vector<const Variation<T>*> m_Variations;//A list of pointers to dynamically allocated variation objects.
+	vector<const Variation<T>*> m_RegVariations;
+	vector<const Variation<T>*> m_PreVariations;
+	vector<const Variation<T>*> m_PostVariations;
+	vector<const Variation<T>*> m_NonParametricVariations;
+	vector<const ParametricVariation<T>*> m_ParametricVariations;//A list of pointers to elements in m_Variations which are derived from ParametricVariation.
+};
+}
@@ -1,317 +1,317 @@
-#pragma once
-
-#include "Variation.h"
-
-namespace EmberNs
-{
-/// <summary>
-/// Gnarly.
-/// </summary>
-template <typename T>
-class GnarlyVariation : public ParametricVariation<T>
-{
-public:
-	GnarlyVariation(T weight = 1.0) : ParametricVariation<T>("gnarly", eVariationId::VAR_GNARLY, weight)
-	{
-		Init();
-	}
-
-	PARVARCOPY(GnarlyVariation)
-
-	virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
-	{
-		T Vx, Vy;
-		T Cx, Cy;
-		T Lx, Ly;
-		T r, theta, s, c;
-		Vx = helper.In.x;
-		Vy = helper.In.y;
-
-		if (m_GnarlyCellSize != T(0))
-		{
-			Cx = (Floor<T>(Vx / m_GnarlyCellSize) + T(0.5)) * m_GnarlyCellSize;
-			Cy = (Floor<T>(Vy / m_GnarlyCellSize) + T(0.5)) * m_GnarlyCellSize;
-			Lx = Vx - Cx;
-			Ly = Vy - Cy;
-
-			if ((Lx * Lx + Ly * Ly) <= m_R2)
-			{
-				r = (Lx * Lx + Ly * Ly) / m_R2;
-				theta = m_GnarlyTwist * std::log(r);
-				sincos(theta, &s, &c);
-				Vx = Cx + c * Lx + s * Ly;
-				Vy = Cy - s * Lx + c * Ly;
-			}
-		}
-
-		helper.Out.x = m_Weight * Vx;
-		helper.Out.y = m_Weight * Vy;
-		helper.Out.z = DefaultZ(helper);
-	}
-
-	virtual string OpenCLString() const override
-	{
-		ostringstream ss, ss2;
-		intmax_t i = 0;
-		ss2 << "_" << XformIndexInEmber() << "]";
-		string index = ss2.str();
-		string weight = WeightDefineString();
-		string cellsize = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
-		string twist = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
-		string r2 = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
-		ss << "\t{\n"
-		   << "\t\treal_t Vx, Vy, Cx, Cy, Lx, Ly, Lxy;\n"
-		   << "\t\treal_t r, theta, s, c;\n"
-		   << "\n"
-		   << "\t\tVx = vIn.x;\n"
-		   << "\t\tVy = vIn.y;\n"
-		   << "\n"
-		   << "\t\tif (" << cellsize << " != (real_t)(0))\n"
-		   << "\t\t{\n"
-		   << "\t\t\tCx = (floor(Vx / " << cellsize << ") + (real_t)(0.5)) * " << cellsize << ";\n"
-		   << "\t\t\tCy = (floor(Vy / " << cellsize << ") + (real_t)(0.5)) * " << cellsize << ";\n"
-		   << "\n"
-		   << "\t\t\tLx = Vx - Cx;\n"
-		   << "\t\t\tLy = Vy - Cy;\n"
-		   << "\t\t\tLxy = fma(Lx, Lx, Ly * Ly);\n"
-		   << "\n"
-		   << "\t\t\tif (Lxy <= " << r2 << ")\n"
-		   << "\t\t\t{\n"
-		   << "\t\t\t\tr = Lxy / " << r2 << ";\n"
-		   << "\t\t\t\ttheta = " << twist << " * log(r);\n"
-		   << "\t\t\t\ts = sin(theta);\n"
-		   << "\t\t\t\tc = cos(theta);\n"
-		   << "\t\t\t\tVx = Cx + c * Lx + s * Ly;\n"
-		   << "\t\t\t\tVy = Cy - s * Lx + c * Ly;\n"
-		   << "\t\t\t}\n"
-		   << "\t\t}\n"
-		   << "\n"
-		   << "\t\tvOut.x = " << weight << " * Vx;\n"
-		   << "\t\tvOut.y = " << weight << " * Vy;\n"
-		   << "\t\tvOut.z = " << DefaultZCl()
-		   << "\t}\n";
-		return ss.str();
-	}
-
-	virtual void Precalc() override
-	{
-		T radius = T(0.5) * m_GnarlyCellSize;
-		m_R2 = Zeps(SQR(radius));
-	}
-
-protected:
-	void Init()
-	{
-		string prefix = Prefix();
-		m_Params.clear();
-		m_Params.push_back(ParamWithName<T>(&m_GnarlyCellSize, prefix + "gnarly_cellsize", T(1)));
-		m_Params.push_back(ParamWithName<T>(&m_GnarlyTwist, prefix + "gnarly_twist", T(1)));
-		m_Params.push_back(ParamWithName<T>(true, &m_R2, prefix + "gnarly_r2"));//Precalc.
-	}
-
-private:
-	T m_GnarlyCellSize;
-	T m_GnarlyTwist;
-	T m_R2;//Precalc.
-};
-
-/// <summary>
-/// inkdrop by Jess.
-/// </summary>
-template <typename T>
-class InkdropVariation : public ParametricVariation<T>
-{
-public:
-	InkdropVariation(T weight = 1.0) : ParametricVariation<T>("inkdrop", eVariationId::VAR_INKDROP, weight)
-	{
-		Init();
-	}
-
-	PARVARCOPY(InkdropVariation)
-
-	virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
-	{
-		T distx = helper.In.x - m_X;
-		T disty = helper.In.y - m_Y;
-		T dist2 = SQR(distx) + SQR(disty);
-		T adjust = std::sqrt(dist2 + m_Rad2) - std::sqrt(dist2);
-		T bearing = std::atan2(disty, distx);
-		T x = helper.In.x + (std::cos(bearing) * adjust);
-		T y = helper.In.y + (std::sin(bearing) * adjust);
-		helper.Out.x = m_Weight * x;
-		helper.Out.y = m_Weight * y;
-		helper.Out.z = DefaultZ(helper);
-	}
-
-	virtual string OpenCLString() const override
-	{
-		ostringstream ss, ss2;
-		intmax_t i = 0;
-		ss2 << "_" << XformIndexInEmber() << "]";
-		string index = ss2.str();
-		string weight = WeightDefineString();
-		string r = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
-		string x = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
-		string y = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
-		string rad2 = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
-		ss << "\t{\n"
-		   << "\t\treal_t distx = vIn.x - " << x << ";\n"
-		   << "\t\treal_t disty = vIn.y - " << y << ";\n"
-		   << "\t\treal_t dist2 = SQR(distx) + SQR(disty);\n"
-		   << "\t\treal_t adjust = sqrt(dist2 + " << rad2 << ") - sqrt(dist2);\n"
-		   << "\n"
-		   << "\t\treal_t bearing = atan2(disty, distx);\n"
-		   << "\t\treal_t x = fma(cos(bearing), adjust, vIn.x);\n"
-		   << "\t\treal_t y = fma(sin(bearing), adjust, vIn.y);\n"
-		   << "\n"
-		   << "\t\tvOut.x = " << weight << " * x;\n"
-		   << "\t\tvOut.y = " << weight << " * y;\n"
-		   << "\t\tvOut.z = " << DefaultZCl()
-		   << "\t}\n";
-		return ss.str();
-	}
-
-	virtual void Precalc() override
-	{
-		m_Rad2 = SQR(m_R);
-	}
-
-protected:
-	void Init()
-	{
-		string prefix = Prefix();
-		m_Params.clear();
-		m_Params.push_back(ParamWithName<T>(&m_R, prefix + "inkdrop_r", T(0.5), eParamType::REAL, 0));
-		m_Params.push_back(ParamWithName<T>(&m_X, prefix + "inkdrop_x"));
-		m_Params.push_back(ParamWithName<T>(&m_Y, prefix + "inkdrop_y"));
-		m_Params.push_back(ParamWithName<T>(true, &m_Rad2, prefix + "inkdrop_rad2"));//Precalc.
-	}
-
-private:
-	T m_R;
-	T m_X;
-	T m_Y;
-	T m_Rad2;//Precalc.
-};
-
-/// <summary>
-/// hex_modulus.
-/// By tatasz.
-/// </summary>
-template <typename T>
-class HexModulusVariation : public ParametricVariation<T>
-{
-public:
-	HexModulusVariation(T weight = 1.0) : ParametricVariation<T>("hex_modulus", eVariationId::VAR_HEX_MODULUS, weight)
-	{
-		Init();
-	}
-
-	PARVARCOPY(HexModulusVariation)
-
-	virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
-	{
-		//get hex
-		T X = helper.In.x * m_HsizePrecalc;
-		T Y = helper.In.y * m_HsizePrecalc;
-		T yover3 = Y / 3;
-		T x = M_SQRT3_3 * X - yover3;
-		T z = T(2.0) * yover3;
-		T y = -x - z;
-		//round
-		T rx = std::round(x);
-		T ry = std::round(y);
-		T rz = std::round(z);
-		T x_diff = std::abs(rx - x);
-		T y_diff = std::abs(ry - y);
-		T z_diff = std::abs(rz - z);
-
-		if ((x_diff > y_diff) & (x_diff > z_diff))
-			rx = -ry - rz;
-		else if (y_diff > z_diff)
-			ry = -rx - rz;
-		else
-			rz = -rx - ry;
-
-		T FX_h = M_SQRT3 * rx + M_SQRT3_2 * rz;
-		T FY_h = T(1.5) * rz;
-		T FX = X - FX_h;
-		T FY = Y - FY_h;
-		helper.Out.x = FX * m_WeightPrecalc;
-		helper.Out.y = FY * m_WeightPrecalc;
-		helper.Out.z = DefaultZ(helper);
-	}
-
-	virtual string OpenCLString() const override
-	{
-		ostringstream ss, ss2;
-		intmax_t i = 0;
-		ss2 << "_" << XformIndexInEmber() << "]";
-		string index = ss2.str();
-		string weight = WeightDefineString();
-		string size          = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
-		string hsizeprecalc  = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
-		string weightprecalc = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
-		ss << "\t{\n"
-		   << "\t\t//get hex\n"
-		   << "\t\treal_t X = vIn.x * " << hsizeprecalc << ";\n"
-		   << "\t\treal_t Y = vIn.y * " << hsizeprecalc << ";\n"
-		   << "\t\treal_t yover3 = Y / (real_t)(3.0);\n"
-		   << "\t\treal_t x = fma(M_SQRT3_3, X, -yover3);\n"
-		   << "\t\treal_t z = (real_t)(2.0) * yover3;\n"
-		   << "\t\treal_t y = -x - z;\n"
-		   << "\t\t//round\n"
-		   << "\t\treal_t rx = round(x);\n"
-		   << "\t\treal_t ry = round(y);\n"
-		   << "\t\treal_t rz = round(z);\n"
-		   << "\n"
-		   << "\t\treal_t x_diff = fabs(rx - x);\n"
-		   << "\t\treal_t y_diff = fabs(ry - y);\n"
-		   << "\t\treal_t z_diff = fabs(rz - z);\n"
-		   << "\n"
-		   << "\t\tif ((x_diff > y_diff) & (x_diff > z_diff))\n"
-		   << "\t\trx = -ry - rz;\n"
-		   << "\t\telse if (y_diff > z_diff)\n"
-		   << "\t\try = -rx - rz;\n"
-		   << "\t\telse\n"
-		   << "\t\trz = -rx - ry;\n"
-		   << "\n"
-		   << "\t\treal_t FX_h = fma(M_SQRT3, rx, M_SQRT3_2 * rz);\n"
-		   << "\t\treal_t FY_h = (real_t)(1.5) * rz;\n"
-		   << "\n"
-		   << "\t\treal_t FX = X - FX_h;\n"
-		   << "\t\treal_t FY = Y - FY_h;\n"
-		   << "\n"
-		   << "\t\tvOut.x = FX * " << weightprecalc << ";\n"
-		   << "\t\tvOut.y = FY * " << weightprecalc << ";\n"
-		   << "\t\tvOut.z = " << DefaultZCl()
-		   << "\t}\n";
-		return ss.str();
-	}
-
-	virtual void Precalc() override
-	{
-		m_HsizePrecalc = M_SQRT3_2 / Zeps(m_Size);
-		m_WeightPrecalc = m_Weight / M_SQRT3_2;
-	}
-
-protected:
-	void Init()
-	{
-		string prefix = Prefix();
-		m_Params.clear();
-		m_Params.push_back(ParamWithName<T>(&m_Size,                prefix + "hex_modulus_size", T(1.0)));
-		m_Params.push_back(ParamWithName<T>(true, &m_HsizePrecalc,  prefix + "hex_modulus_hsize_precalc"));//Precalc.
-		m_Params.push_back(ParamWithName<T>(true, &m_WeightPrecalc, prefix + "hex_modulus_weight_precalc"));
-	}
-
-private:
-	T m_Size;
-	T m_HsizePrecalc;//Precalc.
-	T m_WeightPrecalc;
-};
-
-MAKEPREPOSTPARVAR(Gnarly, gnarly, GNARLY)
-MAKEPREPOSTPARVAR(Inkdrop, inkdrop, INKDROP)
-MAKEPREPOSTPARVAR(HexModulus, hex_modulus, HEX_MODULUS)
+#pragma once
+
+#include "Variation.h"
+
+namespace EmberNs
+{
+/// <summary>
+/// Gnarly.
+/// </summary>
+template <typename T>
+class GnarlyVariation : public ParametricVariation<T>
+{
+public:
+	GnarlyVariation(T weight = 1.0) : ParametricVariation<T>("gnarly", eVariationId::VAR_GNARLY, weight)
+	{
+		Init();
+	}
+
+	PARVARCOPY(GnarlyVariation)
+
+	virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
+	{
+		T Vx, Vy;
+		T Cx, Cy;
+		T Lx, Ly;
+		T r, theta, s, c;
+		Vx = helper.In.x;
+		Vy = helper.In.y;
+
+		if (m_GnarlyCellSize != T(0))
+		{
+			Cx = (Floor<T>(Vx / m_GnarlyCellSize) + T(0.5)) * m_GnarlyCellSize;
+			Cy = (Floor<T>(Vy / m_GnarlyCellSize) + T(0.5)) * m_GnarlyCellSize;
+			Lx = Vx - Cx;
+			Ly = Vy - Cy;
+
+			if ((Lx * Lx + Ly * Ly) <= m_R2)
+			{
+				r = (Lx * Lx + Ly * Ly) / m_R2;
+				theta = m_GnarlyTwist * std::log(r);
+				sincos(theta, &s, &c);
+				Vx = Cx + c * Lx + s * Ly;
+				Vy = Cy - s * Lx + c * Ly;
+			}
+		}
+
+		helper.Out.x = m_Weight * Vx;
+		helper.Out.y = m_Weight * Vy;
+		helper.Out.z = DefaultZ(helper);
+	}
+
+	virtual string OpenCLString() const override
+	{
+		ostringstream ss, ss2;
+		intmax_t i = 0;
+		ss2 << "_" << XformIndexInEmber() << "]";
+		string index = ss2.str();
+		string weight = WeightDefineString();
+		string cellsize = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
+		string twist = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
+		string r2 = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
+		ss << "\t{\n"
+		   << "\t\treal_t Vx, Vy, Cx, Cy, Lx, Ly, Lxy;\n"
+		   << "\t\treal_t r, theta, s, c;\n"
+		   << "\n"
+		   << "\t\tVx = vIn.x;\n"
+		   << "\t\tVy = vIn.y;\n"
+		   << "\n"
+		   << "\t\tif (" << cellsize << " != (real_t)(0))\n"
+		   << "\t\t{\n"
+		   << "\t\t\tCx = (floor(Vx / " << cellsize << ") + (real_t)(0.5)) * " << cellsize << ";\n"
+		   << "\t\t\tCy = (floor(Vy / " << cellsize << ") + (real_t)(0.5)) * " << cellsize << ";\n"
+		   << "\n"
+		   << "\t\t\tLx = Vx - Cx;\n"
+		   << "\t\t\tLy = Vy - Cy;\n"
+		   << "\t\t\tLxy = fma(Lx, Lx, Ly * Ly);\n"
+		   << "\n"
+		   << "\t\t\tif (Lxy <= " << r2 << ")\n"
+		   << "\t\t\t{\n"
+		   << "\t\t\t\tr = Lxy / " << r2 << ";\n"
+		   << "\t\t\t\ttheta = " << twist << " * log(r);\n"
+		   << "\t\t\t\ts = sin(theta);\n"
+		   << "\t\t\t\tc = cos(theta);\n"
+		   << "\t\t\t\tVx = Cx + c * Lx + s * Ly;\n"
+		   << "\t\t\t\tVy = Cy - s * Lx + c * Ly;\n"
+		   << "\t\t\t}\n"
+		   << "\t\t}\n"
+		   << "\n"
+		   << "\t\tvOut.x = " << weight << " * Vx;\n"
+		   << "\t\tvOut.y = " << weight << " * Vy;\n"
+		   << "\t\tvOut.z = " << DefaultZCl()
+		   << "\t}\n";
+		return ss.str();
+	}
+
+	virtual void Precalc() override
+	{
+		T radius = T(0.5) * m_GnarlyCellSize;
+		m_R2 = Zeps(SQR(radius));
+	}
+
+protected:
+	void Init()
+	{
+		string prefix = Prefix();
+		m_Params.clear();
+		m_Params.push_back(ParamWithName<T>(&m_GnarlyCellSize, prefix + "gnarly_cellsize", T(1)));
+		m_Params.push_back(ParamWithName<T>(&m_GnarlyTwist, prefix + "gnarly_twist", T(1)));
+		m_Params.push_back(ParamWithName<T>(true, &m_R2, prefix + "gnarly_r2"));//Precalc.
+	}
+
+private:
+	T m_GnarlyCellSize;
+	T m_GnarlyTwist;
+	T m_R2;//Precalc.
+};
+
+/// <summary>
+/// inkdrop by Jess.
+/// </summary>
+template <typename T>
+class InkdropVariation : public ParametricVariation<T>
+{
+public:
+	InkdropVariation(T weight = 1.0) : ParametricVariation<T>("inkdrop", eVariationId::VAR_INKDROP, weight)
+	{
+		Init();
+	}
+
+	PARVARCOPY(InkdropVariation)
+
+	virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
+	{
+		T distx = helper.In.x - m_X;
+		T disty = helper.In.y - m_Y;
+		T dist2 = SQR(distx) + SQR(disty);
+		T adjust = std::sqrt(dist2 + m_Rad2) - std::sqrt(dist2);
+		T bearing = std::atan2(disty, distx);
+		T x = helper.In.x + (std::cos(bearing) * adjust);
+		T y = helper.In.y + (std::sin(bearing) * adjust);
+		helper.Out.x = m_Weight * x;
+		helper.Out.y = m_Weight * y;
+		helper.Out.z = DefaultZ(helper);
+	}
+
+	virtual string OpenCLString() const override
+	{
+		ostringstream ss, ss2;
+		intmax_t i = 0;
+		ss2 << "_" << XformIndexInEmber() << "]";
+		string index = ss2.str();
+		string weight = WeightDefineString();
+		string r = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
+		string x = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
+		string y = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
+		string rad2 = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
+		ss << "\t{\n"
+		   << "\t\treal_t distx = vIn.x - " << x << ";\n"
+		   << "\t\treal_t disty = vIn.y - " << y << ";\n"
+		   << "\t\treal_t dist2 = SQR(distx) + SQR(disty);\n"
+		   << "\t\treal_t adjust = sqrt(dist2 + " << rad2 << ") - sqrt(dist2);\n"
+		   << "\n"
+		   << "\t\treal_t bearing = atan2(disty, distx);\n"
+		   << "\t\treal_t x = fma(cos(bearing), adjust, vIn.x);\n"
+		   << "\t\treal_t y = fma(sin(bearing), adjust, vIn.y);\n"
+		   << "\n"
+		   << "\t\tvOut.x = " << weight << " * x;\n"
+		   << "\t\tvOut.y = " << weight << " * y;\n"
+		   << "\t\tvOut.z = " << DefaultZCl()
+		   << "\t}\n";
+		return ss.str();
+	}
+
+	virtual void Precalc() override
+	{
+		m_Rad2 = SQR(m_R);
+	}
+
+protected:
+	void Init()
+	{
+		string prefix = Prefix();
+		m_Params.clear();
+		m_Params.push_back(ParamWithName<T>(&m_R, prefix + "inkdrop_r", T(0.5), eParamType::REAL, 0));
+		m_Params.push_back(ParamWithName<T>(&m_X, prefix + "inkdrop_x"));
+		m_Params.push_back(ParamWithName<T>(&m_Y, prefix + "inkdrop_y"));
+		m_Params.push_back(ParamWithName<T>(true, &m_Rad2, prefix + "inkdrop_rad2"));//Precalc.
+	}
+
+private:
+	T m_R;
+	T m_X;
+	T m_Y;
+	T m_Rad2;//Precalc.
+};
+
+/// <summary>
+/// hex_modulus.
+/// By tatasz.
+/// </summary>
+template <typename T>
+class HexModulusVariation : public ParametricVariation<T>
+{
+public:
+	HexModulusVariation(T weight = 1.0) : ParametricVariation<T>("hex_modulus", eVariationId::VAR_HEX_MODULUS, weight)
+	{
+		Init();
+	}
+
+	PARVARCOPY(HexModulusVariation)
+
+	virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
+	{
+		//get hex
+		T X = helper.In.x * m_HsizePrecalc;
+		T Y = helper.In.y * m_HsizePrecalc;
+		T yover3 = Y / 3;
+		T x = M_SQRT3_3 * X - yover3;
+		T z = T(2.0) * yover3;
+		T y = -x - z;
+		//round
+		T rx = std::round(x);
+		T ry = std::round(y);
+		T rz = std::round(z);
+		T x_diff = std::abs(rx - x);
+		T y_diff = std::abs(ry - y);
+		T z_diff = std::abs(rz - z);
+
+		if ((x_diff > y_diff) & (x_diff > z_diff))
+			rx = -ry - rz;
+		else if (y_diff > z_diff)
+			ry = -rx - rz;
+		else
+			rz = -rx - ry;
+
+		T FX_h = M_SQRT3 * rx + M_SQRT3_2 * rz;
+		T FY_h = T(1.5) * rz;
+		T FX = X - FX_h;
+		T FY = Y - FY_h;
+		helper.Out.x = FX * m_WeightPrecalc;
+		helper.Out.y = FY * m_WeightPrecalc;
+		helper.Out.z = DefaultZ(helper);
+	}
+
+	virtual string OpenCLString() const override
+	{
+		ostringstream ss, ss2;
+		intmax_t i = 0;
+		ss2 << "_" << XformIndexInEmber() << "]";
+		string index = ss2.str();
+		string weight = WeightDefineString();
+		string size          = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
+		string hsizeprecalc  = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
+		string weightprecalc = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
+		ss << "\t{\n"
+		   << "\t\t//get hex\n"
+		   << "\t\treal_t X = vIn.x * " << hsizeprecalc << ";\n"
+		   << "\t\treal_t Y = vIn.y * " << hsizeprecalc << ";\n"
+		   << "\t\treal_t yover3 = Y / (real_t)(3.0);\n"
+		   << "\t\treal_t x = fma(M_SQRT3_3, X, -yover3);\n"
+		   << "\t\treal_t z = (real_t)(2.0) * yover3;\n"
+		   << "\t\treal_t y = -x - z;\n"
+		   << "\t\t//round\n"
+		   << "\t\treal_t rx = round(x);\n"
+		   << "\t\treal_t ry = round(y);\n"
+		   << "\t\treal_t rz = round(z);\n"
+		   << "\n"
+		   << "\t\treal_t x_diff = fabs(rx - x);\n"
+		   << "\t\treal_t y_diff = fabs(ry - y);\n"
+		   << "\t\treal_t z_diff = fabs(rz - z);\n"
+		   << "\n"
+		   << "\t\tif ((x_diff > y_diff) & (x_diff > z_diff))\n"
+		   << "\t\trx = -ry - rz;\n"
+		   << "\t\telse if (y_diff > z_diff)\n"
+		   << "\t\try = -rx - rz;\n"
+		   << "\t\telse\n"
+		   << "\t\trz = -rx - ry;\n"
+		   << "\n"
+		   << "\t\treal_t FX_h = fma(M_SQRT3, rx, M_SQRT3_2 * rz);\n"
+		   << "\t\treal_t FY_h = (real_t)(1.5) * rz;\n"
+		   << "\n"
+		   << "\t\treal_t FX = X - FX_h;\n"
+		   << "\t\treal_t FY = Y - FY_h;\n"
+		   << "\n"
+		   << "\t\tvOut.x = FX * " << weightprecalc << ";\n"
+		   << "\t\tvOut.y = FY * " << weightprecalc << ";\n"
+		   << "\t\tvOut.z = " << DefaultZCl()
+		   << "\t}\n";
+		return ss.str();
+	}
+
+	virtual void Precalc() override
+	{
+		m_HsizePrecalc = M_SQRT3_2 / Zeps(m_Size);
+		m_WeightPrecalc = m_Weight / M_SQRT3_2;
+	}
+
+protected:
+	void Init()
+	{
+		string prefix = Prefix();
+		m_Params.clear();
+		m_Params.push_back(ParamWithName<T>(&m_Size,                prefix + "hex_modulus_size", T(1.0)));
+		m_Params.push_back(ParamWithName<T>(true, &m_HsizePrecalc,  prefix + "hex_modulus_hsize_precalc"));//Precalc.
+		m_Params.push_back(ParamWithName<T>(true, &m_WeightPrecalc, prefix + "hex_modulus_weight_precalc"));
+	}
+
+private:
+	T m_Size;
+	T m_HsizePrecalc;//Precalc.
+	T m_WeightPrecalc;
+};
+
+MAKEPREPOSTPARVAR(Gnarly, gnarly, GNARLY)
+MAKEPREPOSTPARVAR(Inkdrop, inkdrop, INKDROP)
+MAKEPREPOSTPARVAR(HexModulus, hex_modulus, HEX_MODULUS)
 }
@@ -1,84 +1,84 @@
-#pragma once
-
-#include "Utils.h"
-#include "PaletteList.h"
-#include "VariationList.h"
-#include "Ember.h"
-#include "Spline.h"
-
-#ifdef __APPLE__
-	#include <libgen.h>
-#endif
-
-/// <summary>
-/// XmlToEmber and Locale classes.
-/// </summary>
-
-namespace EmberNs
-{
-/// <summary>
-/// Convenience class for setting and resetting the locale.
-/// It's set up in the constructor and restored in the destructor.
-/// This relieves the caller of having to manually do it everywhere.
-/// </summary>
-class EMBER_API Locale
-{
-public:
-	Locale(int category = LC_NUMERIC, const char* loc = "C");
-	~Locale();
-
-private:
-	int m_Category;
-	string m_NewLocale;
-	string m_OriginalLocale;
-};
-
-/// <summary>
-/// Class for reading standard Xml flame files as well as Chaotica .chaos files into ember objects.
-/// This class derives from EmberReport, so the caller is able
-/// to retrieve a text dump of error information if any errors occur.
-/// Since this class contains a VariationList object, it's important to declare one
-/// instance and reuse it for the duration of the program instead of creating and deleting
-/// them as local variables.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-class EMBER_API XmlToEmber : public EmberReport
-{
-public:
-	XmlToEmber();
-	template <typename Alloc, template <typename, typename> class C>
-	bool Parse(byte* buf, const char* filename, C<Ember<T>, Alloc>& embers, bool useDefaults);
-	template <typename Alloc, template <typename, typename> class C>
-	bool Parse(const char* filename, C<Ember<T>, Alloc>& embers, bool useDefaults);
-	template <typename valT>
-	bool Aton(const char* str, valT& val);
-	static vector<string> m_FlattenNames;
-
-private:
-	template <typename Alloc, template <typename, typename> class C>
-	void ScanForEmberNodes(xmlNode* curNode, const char* parentFile, C<Ember<T>, Alloc>& embers, bool useDefaults);
-	template <typename Alloc, template <typename, typename> class C>
-	void ScanForChaosNodes(xmlNode* curNode, const char* parentFile, C<Ember<T>, Alloc>& embers, bool useDefaults);
-	bool ParseEmberElement(xmlNode* emberNode, Ember<T>& currentEmber);
-	bool ParseEmberElementFromChaos(xmlNode* emberNode, Ember<T>& currentEmber);
-	bool AttToEmberMotionFloat(xmlAttrPtr att, const char* attStr, eEmberMotionParam param, EmberMotion<T>& motion);
-	bool ParseXform(xmlNode* childNode, Xform<T>& xform, bool motion, bool fromEmber);
-	static string GetCorrectedParamName(const unordered_map<string, string>& names, const char* name);
-	static string GetCorrectedVariationName(vector<pair<pair<string, string>, vector<string>>>& vec, xmlAttrPtr att);
-	static string GetCorrectedVariationName(vector<pair<pair<string, string>, vector<string>>>& vec, const string& varname);
-	static bool XmlContainsTag(xmlAttrPtr att, const char* name);
-	bool ParseHexColors(const char* colstr, Ember<T>& ember, size_t numColors, intmax_t chan);
-	template <typename valT>
-	bool ParseAndAssign(const xmlChar* name, const char* attStr, const char* str, valT& val, bool& b);
-	template <typename valT>
-	bool ParseAndAssignContent(xmlNode* node, const char* fieldname, const char* fieldnameval, valT& val);
-	bool ParseAndAssignContent(xmlNode* node, const char* fieldname, const char* fieldnameval, std::string& val);
-
-	static bool m_Init;
-	static unordered_map<string, string> m_BadParamNames;
-	static vector<pair<pair<string, string>, vector<string>>> m_BadVariationNames;
-	shared_ptr<VariationList<T>> m_VariationList;//The variation list used to make copies of variations to populate the embers with.
-	shared_ptr<PaletteList<float>> m_PaletteList;
-};
-}
+#pragma once
+
+#include "Utils.h"
+#include "PaletteList.h"
+#include "VariationList.h"
+#include "Ember.h"
+#include "Spline.h"
+
+#ifdef __APPLE__
+	#include <libgen.h>
+#endif
+
+/// <summary>
+/// XmlToEmber and Locale classes.
+/// </summary>
+
+namespace EmberNs
+{
+/// <summary>
+/// Convenience class for setting and resetting the locale.
+/// It's set up in the constructor and restored in the destructor.
+/// This relieves the caller of having to manually do it everywhere.
+/// </summary>
+class EMBER_API Locale
+{
+public:
+	Locale(int category = LC_NUMERIC, const char* loc = "C");
+	~Locale();
+
+private:
+	int m_Category;
+	string m_NewLocale;
+	string m_OriginalLocale;
+};
+
+/// <summary>
+/// Class for reading standard Xml flame files as well as Chaotica .chaos files into ember objects.
+/// This class derives from EmberReport, so the caller is able
+/// to retrieve a text dump of error information if any errors occur.
+/// Since this class contains a VariationList object, it's important to declare one
+/// instance and reuse it for the duration of the program instead of creating and deleting
+/// them as local variables.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+class EMBER_API XmlToEmber : public EmberReport
+{
+public:
+	XmlToEmber();
+	template <typename Alloc, template <typename, typename> class C>
+	bool Parse(byte* buf, const char* filename, C<Ember<T>, Alloc>& embers, bool useDefaults);
+	template <typename Alloc, template <typename, typename> class C>
+	bool Parse(const char* filename, C<Ember<T>, Alloc>& embers, bool useDefaults);
+	template <typename valT>
+	bool Aton(const char* str, valT& val);
+	static vector<string> m_FlattenNames;
+
+private:
+	template <typename Alloc, template <typename, typename> class C>
+	void ScanForEmberNodes(xmlNode* curNode, const char* parentFile, C<Ember<T>, Alloc>& embers, bool useDefaults);
+	template <typename Alloc, template <typename, typename> class C>
+	void ScanForChaosNodes(xmlNode* curNode, const char* parentFile, C<Ember<T>, Alloc>& embers, bool useDefaults);
+	bool ParseEmberElement(xmlNode* emberNode, Ember<T>& currentEmber);
+	bool ParseEmberElementFromChaos(xmlNode* emberNode, Ember<T>& currentEmber);
+	bool AttToEmberMotionFloat(xmlAttrPtr att, const char* attStr, eEmberMotionParam param, EmberMotion<T>& motion);
+	bool ParseXform(xmlNode* childNode, Xform<T>& xform, bool motion, bool fromEmber);
+	static string GetCorrectedParamName(const unordered_map<string, string>& names, const char* name);
+	static string GetCorrectedVariationName(vector<pair<pair<string, string>, vector<string>>>& vec, xmlAttrPtr att);
+	static string GetCorrectedVariationName(vector<pair<pair<string, string>, vector<string>>>& vec, const string& varname);
+	static bool XmlContainsTag(xmlAttrPtr att, const char* name);
+	bool ParseHexColors(const char* colstr, Ember<T>& ember, size_t numColors, intmax_t chan);
+	template <typename valT>
+	bool ParseAndAssign(const xmlChar* name, const char* attStr, const char* str, valT& val, bool& b);
+	template <typename valT>
+	bool ParseAndAssignContent(xmlNode* node, const char* fieldname, const char* fieldnameval, valT& val);
+	bool ParseAndAssignContent(xmlNode* node, const char* fieldname, const char* fieldnameval, std::string& val);
+
+	static bool m_Init;
+	static unordered_map<string, string> m_BadParamNames;
+	static vector<pair<pair<string, string>, vector<string>>> m_BadVariationNames;
+	shared_ptr<VariationList<T>> m_VariationList;//The variation list used to make copies of variations to populate the embers with.
+	shared_ptr<PaletteList<float>> m_PaletteList;
+};
+}
@@ -1,16 +1,16 @@
-#pragma once
-
-#include "EmberOptions.h"
-
-/// <summary>
-/// Declaration for the EmberAnimate() function.
-/// </summary>
-
-/// <summary>
-/// The core of the EmberAnimate.exe program.
-/// Template argument expected to be float or double.
-/// </summary>
-/// <param name="opt">A populated EmberOptions object which specifies all program options to be used</param>
-/// <returns>True if success, else false.</returns>
-template <typename T, typename bucketT>
+#pragma once
+
+#include "EmberOptions.h"
+
+/// <summary>
+/// Declaration for the EmberAnimate() function.
+/// </summary>
+
+/// <summary>
+/// The core of the EmberAnimate.exe program.
+/// Template argument expected to be float or double.
+/// </summary>
+/// <param name="opt">A populated EmberOptions object which specifies all program options to be used</param>
+/// <returns>True if success, else false.</returns>
+template <typename T, typename bucketT>
 static bool EmberAnimate(EmberOptions& opt);
@@ -1,15 +1,15 @@
-//{{NO_DEPENDENCIES}}
-// Microsoft Visual C++ generated include file.
-// Used by EmberAnimate.rc
-//
-
-// Next default values for new objects
-// 
-#ifdef APSTUDIO_INVOKED
-#ifndef APSTUDIO_READONLY_SYMBOLS
-#define _APS_NEXT_RESOURCE_VALUE        101
-#define _APS_NEXT_COMMAND_VALUE         40001
-#define _APS_NEXT_CONTROL_VALUE         1000
-#define _APS_NEXT_SYMED_VALUE           101
-#endif
-#endif
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by EmberAnimate.rc
+//
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1000
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
@@ -0,0 +1,241 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2023 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#ifndef OPENCL_CL_D3D10_H_
+#define OPENCL_CL_D3D10_H_
+
+/*
+** This header is generated from the Khronos OpenCL XML API Registry.
+*/
+
+#if defined(_MSC_VER)
+#if _MSC_VER >=1500
+#pragma warning( push )
+#pragma warning( disable : 4201 )
+#pragma warning( disable : 5105 )
+#endif
+#endif
+#include <d3d10.h>
+#if defined(_MSC_VER)
+#if _MSC_VER >=1500
+#pragma warning( pop )
+#endif
+#endif
+
+#include <CL/cl.h>
+
+/* CL_NO_PROTOTYPES implies CL_NO_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_PROTOTYPES) && !defined(CL_NO_EXTENSION_PROTOTYPES)
+#define CL_NO_EXTENSION_PROTOTYPES
+#endif
+
+/* CL_NO_EXTENSION_PROTOTYPES implies
+   CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES and
+   CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/***************************************************************
+* cl_khr_d3d10_sharing
+***************************************************************/
+#define cl_khr_d3d10_sharing 1
+#define CL_KHR_D3D10_SHARING_EXTENSION_NAME \
+    "cl_khr_d3d10_sharing"
+
+typedef cl_uint             cl_d3d10_device_source_khr;
+typedef cl_uint             cl_d3d10_device_set_khr;
+
+/* Error codes */
+#define CL_INVALID_D3D10_DEVICE_KHR                         -1002
+#define CL_INVALID_D3D10_RESOURCE_KHR                       -1003
+#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR              -1004
+#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR                  -1005
+
+/* cl_d3d10_device_source_khr */
+#define CL_D3D10_DEVICE_KHR                                 0x4010
+#define CL_D3D10_DXGI_ADAPTER_KHR                           0x4011
+
+/* cl_d3d10_device_set_khr */
+#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR                  0x4012
+#define CL_ALL_DEVICES_FOR_D3D10_KHR                        0x4013
+
+/* cl_context_info */
+#define CL_CONTEXT_D3D10_DEVICE_KHR                         0x4014
+#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR        0x402C
+
+/* cl_mem_info */
+#define CL_MEM_D3D10_RESOURCE_KHR                           0x4015
+
+/* cl_image_info */
+#define CL_IMAGE_D3D10_SUBRESOURCE_KHR                      0x4016
+
+/* cl_command_type */
+#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR                0x4017
+#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR                0x4018
+
+
+typedef cl_int (CL_API_CALL *
+clGetDeviceIDsFromD3D10KHR_fn)(
+    cl_platform_id platform,
+    cl_d3d10_device_source_khr d3d_device_source,
+    void* d3d_object,
+    cl_d3d10_device_set_khr d3d_device_set,
+    cl_uint num_entries,
+    cl_device_id* devices,
+    cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromD3D10BufferKHR_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D10Buffer* resource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromD3D10Texture2DKHR_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D10Texture2D* resource,
+    UINT subresource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromD3D10Texture3DKHR_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D10Texture3D* resource,
+    UINT subresource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueAcquireD3D10ObjectsKHR_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueReleaseD3D10ObjectsKHR_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromD3D10KHR(
+    cl_platform_id platform,
+    cl_d3d10_device_source_khr d3d_device_source,
+    void* d3d_object,
+    cl_d3d10_device_set_khr d3d_device_set,
+    cl_uint num_entries,
+    cl_device_id* devices,
+    cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D10BufferKHR(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D10Buffer* resource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D10Texture2DKHR(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D10Texture2D* resource,
+    UINT subresource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D10Texture3DKHR(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D10Texture3D* resource,
+    UINT subresource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireD3D10ObjectsKHR(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseD3D10ObjectsKHR(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+/***************************************************************
+* cl_intel_sharing_format_query_d3d10
+***************************************************************/
+#define cl_intel_sharing_format_query_d3d10 1
+#define CL_INTEL_SHARING_FORMAT_QUERY_D3D10_EXTENSION_NAME \
+    "cl_intel_sharing_format_query_d3d10"
+
+/* when cl_khr_d3d10_sharing is supported */
+
+typedef cl_int (CL_API_CALL *
+clGetSupportedD3D10TextureFormatsINTEL_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_mem_object_type image_type,
+    cl_uint num_entries,
+    DXGI_FORMAT* d3d10_formats,
+    cl_uint* num_texture_formats) ;
+
+#if !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedD3D10TextureFormatsINTEL(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_mem_object_type image_type,
+    cl_uint num_entries,
+    DXGI_FORMAT* d3d10_formats,
+    cl_uint* num_texture_formats) ;
+
+#endif /* !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPENCL_CL_D3D10_H_ */
@@ -0,0 +1,243 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2023 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#ifndef OPENCL_CL_D3D11_H_
+#define OPENCL_CL_D3D11_H_
+
+/*
+** This header is generated from the Khronos OpenCL XML API Registry.
+*/
+
+#if defined(_MSC_VER)
+#if _MSC_VER >=1500
+#pragma warning( push )
+#pragma warning( disable : 4201 )
+#pragma warning( disable : 5105 )
+#endif
+#endif
+#include <d3d11.h>
+#if defined(_MSC_VER)
+#if _MSC_VER >=1500
+#pragma warning( pop )
+#endif
+#endif
+
+#include <CL/cl.h>
+
+/* CL_NO_PROTOTYPES implies CL_NO_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_PROTOTYPES) && !defined(CL_NO_EXTENSION_PROTOTYPES)
+#define CL_NO_EXTENSION_PROTOTYPES
+#endif
+
+/* CL_NO_EXTENSION_PROTOTYPES implies
+   CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES and
+   CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/***************************************************************
+* cl_khr_d3d11_sharing
+***************************************************************/
+#define cl_khr_d3d11_sharing 1
+#define CL_KHR_D3D11_SHARING_EXTENSION_NAME \
+    "cl_khr_d3d11_sharing"
+
+typedef cl_uint             cl_d3d11_device_source_khr;
+typedef cl_uint             cl_d3d11_device_set_khr;
+
+/* Error codes */
+#define CL_INVALID_D3D11_DEVICE_KHR                         -1006
+#define CL_INVALID_D3D11_RESOURCE_KHR                       -1007
+#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR              -1008
+#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR                  -1009
+
+/* cl_d3d11_device_source_khr */
+#define CL_D3D11_DEVICE_KHR                                 0x4019
+#define CL_D3D11_DXGI_ADAPTER_KHR                           0x401A
+
+/* cl_d3d11_device_set_khr */
+#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR                  0x401B
+#define CL_ALL_DEVICES_FOR_D3D11_KHR                        0x401C
+
+/* cl_context_info */
+#define CL_CONTEXT_D3D11_DEVICE_KHR                         0x401D
+#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR        0x402D
+
+/* cl_mem_info */
+#define CL_MEM_D3D11_RESOURCE_KHR                           0x401E
+
+/* cl_image_info */
+#define CL_IMAGE_D3D11_SUBRESOURCE_KHR                      0x401F
+
+/* cl_command_type */
+#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR                0x4020
+#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR                0x4021
+
+
+typedef cl_int (CL_API_CALL *
+clGetDeviceIDsFromD3D11KHR_fn)(
+    cl_platform_id platform,
+    cl_d3d11_device_source_khr d3d_device_source,
+    void* d3d_object,
+    cl_d3d11_device_set_khr d3d_device_set,
+    cl_uint num_entries,
+    cl_device_id* devices,
+    cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromD3D11BufferKHR_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D11Buffer* resource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromD3D11Texture2DKHR_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D11Texture2D* resource,
+    UINT subresource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromD3D11Texture3DKHR_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D11Texture3D* resource,
+    UINT subresource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueAcquireD3D11ObjectsKHR_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueReleaseD3D11ObjectsKHR_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromD3D11KHR(
+    cl_platform_id platform,
+    cl_d3d11_device_source_khr d3d_device_source,
+    void* d3d_object,
+    cl_d3d11_device_set_khr d3d_device_set,
+    cl_uint num_entries,
+    cl_device_id* devices,
+    cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D11BufferKHR(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D11Buffer* resource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D11Texture2DKHR(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D11Texture2D* resource,
+    UINT subresource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D11Texture3DKHR(
+    cl_context context,
+    cl_mem_flags flags,
+    ID3D11Texture3D* resource,
+    UINT subresource,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireD3D11ObjectsKHR(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseD3D11ObjectsKHR(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+/***************************************************************
+* cl_intel_sharing_format_query_d3d11
+***************************************************************/
+#define cl_intel_sharing_format_query_d3d11 1
+#define CL_INTEL_SHARING_FORMAT_QUERY_D3D11_EXTENSION_NAME \
+    "cl_intel_sharing_format_query_d3d11"
+
+/* when cl_khr_d3d11_sharing is supported */
+
+typedef cl_int (CL_API_CALL *
+clGetSupportedD3D11TextureFormatsINTEL_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_mem_object_type image_type,
+    cl_uint plane,
+    cl_uint num_entries,
+    DXGI_FORMAT* d3d11_formats,
+    cl_uint* num_texture_formats) ;
+
+#if !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedD3D11TextureFormatsINTEL(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_mem_object_type image_type,
+    cl_uint plane,
+    cl_uint num_entries,
+    DXGI_FORMAT* d3d11_formats,
+    cl_uint* num_texture_formats) ;
+
+#endif /* !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPENCL_CL_D3D11_H_ */
@@ -0,0 +1,350 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2023 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#ifndef OPENCL_CL_DX9_MEDIA_SHARING_H_
+#define OPENCL_CL_DX9_MEDIA_SHARING_H_
+
+/*
+** This header is generated from the Khronos OpenCL XML API Registry.
+*/
+
+#if defined(_WIN32)
+#if defined(_MSC_VER)
+#if _MSC_VER >=1500
+#pragma warning( push )
+#pragma warning( disable : 4201 )
+#pragma warning( disable : 5105 )
+#endif
+#endif
+#include <d3d9.h>
+#if defined(_MSC_VER)
+#if _MSC_VER >=1500
+#pragma warning( pop )
+#endif
+#endif
+#endif
+
+#include <CL/cl.h>
+
+/* CL_NO_PROTOTYPES implies CL_NO_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_PROTOTYPES) && !defined(CL_NO_EXTENSION_PROTOTYPES)
+#define CL_NO_EXTENSION_PROTOTYPES
+#endif
+
+/* CL_NO_EXTENSION_PROTOTYPES implies
+   CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES and
+   CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/***************************************************************
+* cl_khr_dx9_media_sharing
+***************************************************************/
+#define cl_khr_dx9_media_sharing 1
+#define CL_KHR_DX9_MEDIA_SHARING_EXTENSION_NAME \
+    "cl_khr_dx9_media_sharing"
+
+typedef cl_uint             cl_dx9_media_adapter_type_khr;
+typedef cl_uint             cl_dx9_media_adapter_set_khr;
+
+#if defined(_WIN32)
+typedef struct _cl_dx9_surface_info_khr {
+    IDirect3DSurface9* resource;
+    HANDLE shared_handle;
+} cl_dx9_surface_info_khr;
+
+#endif /* defined(_WIN32) */
+
+/* Error codes */
+#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR                    -1010
+#define CL_INVALID_DX9_MEDIA_SURFACE_KHR                    -1011
+#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR           -1012
+#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR               -1013
+
+/* cl_media_adapter_type_khr */
+#define CL_ADAPTER_D3D9_KHR                                 0x2020
+#define CL_ADAPTER_D3D9EX_KHR                               0x2021
+#define CL_ADAPTER_DXVA_KHR                                 0x2022
+
+/* cl_media_adapter_set_khr */
+#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR      0x2023
+#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR            0x2024
+
+/* cl_context_info */
+#define CL_CONTEXT_ADAPTER_D3D9_KHR                         0x2025
+#define CL_CONTEXT_ADAPTER_D3D9EX_KHR                       0x2026
+#define CL_CONTEXT_ADAPTER_DXVA_KHR                         0x2027
+
+/* cl_mem_info */
+#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR                   0x2028
+#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR                   0x2029
+
+/* cl_image_info */
+#define CL_IMAGE_DX9_MEDIA_PLANE_KHR                        0x202A
+
+/* cl_command_type */
+#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR           0x202B
+#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR           0x202C
+
+
+typedef cl_int (CL_API_CALL *
+clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
+    cl_platform_id platform,
+    cl_uint num_media_adapters,
+    cl_dx9_media_adapter_type_khr* media_adapter_type,
+    void* media_adapters,
+    cl_dx9_media_adapter_set_khr media_adapter_set,
+    cl_uint num_entries,
+    cl_device_id* devices,
+    cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromDX9MediaSurfaceKHR_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_dx9_media_adapter_type_khr adapter_type,
+    void* surface_info,
+    cl_uint plane,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromDX9MediaAdapterKHR(
+    cl_platform_id platform,
+    cl_uint num_media_adapters,
+    cl_dx9_media_adapter_type_khr* media_adapter_type,
+    void* media_adapters,
+    cl_dx9_media_adapter_set_khr media_adapter_set,
+    cl_uint num_entries,
+    cl_device_id* devices,
+    cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromDX9MediaSurfaceKHR(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_dx9_media_adapter_type_khr adapter_type,
+    void* surface_info,
+    cl_uint plane,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireDX9MediaSurfacesKHR(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseDX9MediaSurfacesKHR(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+/***************************************************************
+* cl_intel_dx9_media_sharing
+***************************************************************/
+#define cl_intel_dx9_media_sharing 1
+#define CL_INTEL_DX9_MEDIA_SHARING_EXTENSION_NAME \
+    "cl_intel_dx9_media_sharing"
+
+typedef cl_uint             cl_dx9_device_source_intel;
+typedef cl_uint             cl_dx9_device_set_intel;
+
+/* Error codes */
+#define CL_INVALID_DX9_DEVICE_INTEL                         -1010
+#define CL_INVALID_DX9_RESOURCE_INTEL                       -1011
+#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL              -1012
+#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL                  -1013
+
+/* cl_dx9_device_source_intel */
+#define CL_D3D9_DEVICE_INTEL                                0x4022
+#define CL_D3D9EX_DEVICE_INTEL                              0x4070
+#define CL_DXVA_DEVICE_INTEL                                0x4071
+
+/* cl_dx9_device_set_intel */
+#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL                  0x4024
+#define CL_ALL_DEVICES_FOR_DX9_INTEL                        0x4025
+
+/* cl_context_info */
+#define CL_CONTEXT_D3D9_DEVICE_INTEL                        0x4026
+#define CL_CONTEXT_D3D9EX_DEVICE_INTEL                      0x4072
+#define CL_CONTEXT_DXVA_DEVICE_INTEL                        0x4073
+
+/* cl_mem_info */
+#define CL_MEM_DX9_RESOURCE_INTEL                           0x4027
+#define CL_MEM_DX9_SHARED_HANDLE_INTEL                      0x4074
+
+/* cl_image_info */
+#define CL_IMAGE_DX9_PLANE_INTEL                            0x4075
+
+/* cl_command_type */
+#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL                0x402A
+#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL                0x402B
+
+
+typedef cl_int (CL_API_CALL *
+clGetDeviceIDsFromDX9INTEL_fn)(
+    cl_platform_id platform,
+    cl_dx9_device_source_intel dx9_device_source,
+    void* dx9_object,
+    cl_dx9_device_set_intel dx9_device_set,
+    cl_uint num_entries,
+    cl_device_id* devices,
+    cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromDX9MediaSurfaceINTEL_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    IDirect3DSurface9* resource,
+    HANDLE sharedHandle,
+    UINT plane,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueAcquireDX9ObjectsINTEL_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_1;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueReleaseDX9ObjectsINTEL_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_1;
+
+#if !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromDX9INTEL(
+    cl_platform_id platform,
+    cl_dx9_device_source_intel dx9_device_source,
+    void* dx9_object,
+    cl_dx9_device_set_intel dx9_device_set,
+    cl_uint num_entries,
+    cl_device_id* devices,
+    cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromDX9MediaSurfaceINTEL(
+    cl_context context,
+    cl_mem_flags flags,
+    IDirect3DSurface9* resource,
+    HANDLE sharedHandle,
+    UINT plane,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireDX9ObjectsINTEL(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseDX9ObjectsINTEL(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_1;
+
+#endif /* !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+/***************************************************************
+* cl_intel_sharing_format_query_dx9
+***************************************************************/
+#define cl_intel_sharing_format_query_dx9 1
+#define CL_INTEL_SHARING_FORMAT_QUERY_DX9_EXTENSION_NAME \
+    "cl_intel_sharing_format_query_dx9"
+
+/* when cl_khr_dx9_media_sharing or cl_intel_dx9_media_sharing is supported */
+
+typedef cl_int (CL_API_CALL *
+clGetSupportedDX9MediaSurfaceFormatsINTEL_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_mem_object_type image_type,
+    cl_uint plane,
+    cl_uint num_entries,
+    D3DFORMAT* dx9_formats,
+    cl_uint* num_surface_formats) ;
+
+#if !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedDX9MediaSurfaceFormatsINTEL(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_mem_object_type image_type,
+    cl_uint plane,
+    cl_uint num_entries,
+    D3DFORMAT* dx9_formats,
+    cl_uint* num_surface_formats) ;
+
+#endif /* !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPENCL_CL_DX9_MEDIA_SHARING_H_ */
@@ -0,0 +1,18 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2020 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#include <CL/cl_dx9_media_sharing.h>
+#pragma message("The Intel DX9 media sharing extensions have been moved into cl_dx9_media_sharing.h.  Please include cl_dx9_media_sharing.h directly.")
@@ -0,0 +1,167 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2023 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#ifndef OPENCL_CL_EGL_H_
+#define OPENCL_CL_EGL_H_
+
+/*
+** This header is generated from the Khronos OpenCL XML API Registry.
+*/
+
+#include <CL/cl.h>
+
+/* CL_NO_PROTOTYPES implies CL_NO_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_PROTOTYPES) && !defined(CL_NO_EXTENSION_PROTOTYPES)
+#define CL_NO_EXTENSION_PROTOTYPES
+#endif
+
+/* CL_NO_EXTENSION_PROTOTYPES implies
+   CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES and
+   CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/***************************************************************
+* cl_khr_egl_image
+***************************************************************/
+#define cl_khr_egl_image 1
+#define CL_KHR_EGL_IMAGE_EXTENSION_NAME \
+    "cl_khr_egl_image"
+
+/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
+#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR                0x202F
+#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR                  0x202D
+#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR                  0x202E
+
+/* Error type for clCreateFromEGLImageKHR */
+#define CL_INVALID_EGL_OBJECT_KHR                           -1093
+#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR                    -1092
+
+/* CLeglImageKHR is an opaque handle to an EGLImage */
+typedef void*               CLeglImageKHR;
+
+/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
+typedef void*               CLeglDisplayKHR;
+
+/* properties passed to clCreateFromEGLImageKHR */
+typedef intptr_t            cl_egl_image_properties_khr;
+
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromEGLImageKHR_fn)(
+    cl_context context,
+    CLeglDisplayKHR egldisplay,
+    CLeglImageKHR eglimage,
+    cl_mem_flags flags,
+    const cl_egl_image_properties_khr* properties,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueAcquireEGLObjectsKHR_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueReleaseEGLObjectsKHR_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromEGLImageKHR(
+    cl_context context,
+    CLeglDisplayKHR egldisplay,
+    CLeglImageKHR eglimage,
+    cl_mem_flags flags,
+    const cl_egl_image_properties_khr* properties,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireEGLObjectsKHR(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseEGLObjectsKHR(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+/***************************************************************
+* cl_khr_egl_event
+***************************************************************/
+#define cl_khr_egl_event 1
+#define CL_KHR_EGL_EVENT_EXTENSION_NAME \
+    "cl_khr_egl_event"
+
+/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
+/* type CLeglDisplayKHR */
+
+/* CLeglSyncKHR is an opaque handle to an EGLSync object */
+typedef void*               CLeglSyncKHR;
+
+
+typedef cl_event (CL_API_CALL *
+clCreateEventFromEGLSyncKHR_fn)(
+    cl_context context,
+    CLeglSyncKHR sync,
+    CLeglDisplayKHR display,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_event CL_API_CALL
+clCreateEventFromEGLSyncKHR(
+    cl_context context,
+    CLeglSyncKHR sync,
+    CLeglDisplayKHR display,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPENCL_CL_EGL_H_ */
@@ -0,0 +1,19 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2020 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ ******************************************************************************/
+
+#include <CL/cl_ext.h>
+#pragma message("The Intel extensions have been moved into cl_ext.h.  Please include cl_ext.h directly.")
@@ -0,0 +1,372 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2023 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#ifndef OPENCL_CL_GL_H_
+#define OPENCL_CL_GL_H_
+
+/*
+** This header is generated from the Khronos OpenCL XML API Registry.
+*/
+
+#include <CL/cl.h>
+
+/* CL_NO_PROTOTYPES implies CL_NO_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_PROTOTYPES) && !defined(CL_NO_EXTENSION_PROTOTYPES)
+#define CL_NO_EXTENSION_PROTOTYPES
+#endif
+
+/* CL_NO_EXTENSION_PROTOTYPES implies
+   CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES and
+   CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/***************************************************************
+* cl_khr_gl_sharing
+***************************************************************/
+#define cl_khr_gl_sharing 1
+#define CL_KHR_GL_SHARING_EXTENSION_NAME \
+    "cl_khr_gl_sharing"
+
+typedef cl_uint             cl_gl_context_info;
+
+/* Error codes */
+#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR              -1000
+
+/* cl_gl_context_info */
+#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR                0x2006
+#define CL_DEVICES_FOR_GL_CONTEXT_KHR                       0x2007
+
+/* Additional cl_context_properties */
+#define CL_GL_CONTEXT_KHR                                   0x2008
+#define CL_EGL_DISPLAY_KHR                                  0x2009
+#define CL_GLX_DISPLAY_KHR                                  0x200A
+#define CL_WGL_HDC_KHR                                      0x200B
+#define CL_CGL_SHAREGROUP_KHR                               0x200C
+
+typedef cl_uint             cl_gl_object_type;
+typedef cl_uint             cl_gl_texture_info;
+typedef cl_uint             cl_gl_platform_info;
+
+/* cl_gl_object_type */
+#define CL_GL_OBJECT_BUFFER                                 0x2000
+#define CL_GL_OBJECT_TEXTURE2D                              0x2001
+#define CL_GL_OBJECT_TEXTURE3D                              0x2002
+#define CL_GL_OBJECT_RENDERBUFFER                           0x2003
+
+#if defined(CL_VERSION_1_2)
+/* cl_gl_object_type */
+#define CL_GL_OBJECT_TEXTURE2D_ARRAY                        0x200E
+#define CL_GL_OBJECT_TEXTURE1D                              0x200F
+#define CL_GL_OBJECT_TEXTURE1D_ARRAY                        0x2010
+#define CL_GL_OBJECT_TEXTURE_BUFFER                         0x2011
+
+#endif /* defined(CL_VERSION_1_2) */
+
+/* cl_gl_texture_info */
+#define CL_GL_TEXTURE_TARGET                                0x2004
+#define CL_GL_MIPMAP_LEVEL                                  0x2005
+
+
+typedef cl_int (CL_API_CALL *
+clGetGLContextInfoKHR_fn)(
+    const cl_context_properties* properties,
+    cl_gl_context_info param_name,
+    size_t param_value_size,
+    void* param_value,
+    size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromGLBuffer_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_GLuint bufobj,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLContextInfoKHR(
+    const cl_context_properties* properties,
+    cl_gl_context_info param_name,
+    size_t param_value_size,
+    void* param_value,
+    size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLBuffer(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_GLuint bufobj,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+#if defined(CL_VERSION_1_2)
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromGLTexture_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_GLenum target,
+    cl_GLint miplevel,
+    cl_GLuint texture,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_GLenum target,
+    cl_GLint miplevel,
+    cl_GLuint texture,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+#endif /* defined(CL_VERSION_1_2) */
+
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromGLRenderbuffer_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_GLuint renderbuffer,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_int (CL_API_CALL *
+clGetGLObjectInfo_fn)(
+    cl_mem memobj,
+    cl_gl_object_type* gl_object_type,
+    cl_GLuint* gl_object_name) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_int (CL_API_CALL *
+clGetGLTextureInfo_fn)(
+    cl_mem memobj,
+    cl_gl_texture_info param_name,
+    size_t param_value_size,
+    void* param_value,
+    size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueAcquireGLObjects_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueReleaseGLObjects_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLRenderbuffer(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_GLuint renderbuffer,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLObjectInfo(
+    cl_mem memobj,
+    cl_gl_object_type* gl_object_type,
+    cl_GLuint* gl_object_name) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLTextureInfo(
+    cl_mem memobj,
+    cl_gl_texture_info param_name,
+    size_t param_value_size,
+    void* param_value,
+    size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireGLObjects(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseGLObjects(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_0;
+
+#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+/* OpenCL 1.0 APIs that were deprecated in OpenCL 1.2 */
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromGLTexture2D_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_GLenum target,
+    cl_GLint miplevel,
+    cl_GLuint texture,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromGLTexture3D_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_GLenum target,
+    cl_GLint miplevel,
+    cl_GLuint texture,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
+
+#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture2D(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_GLenum target,
+    cl_GLint miplevel,
+    cl_GLuint texture,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture3D(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_GLenum target,
+    cl_GLint miplevel,
+    cl_GLuint texture,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
+
+#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+/***************************************************************
+* cl_khr_gl_event
+***************************************************************/
+#define cl_khr_gl_event 1
+#define CL_KHR_GL_EVENT_EXTENSION_NAME \
+    "cl_khr_gl_event"
+
+typedef struct __GLsync *   cl_GLsync;
+
+/* cl_command_type */
+#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR                 0x200D
+
+
+typedef cl_event (CL_API_CALL *
+clCreateEventFromGLsyncKHR_fn)(
+    cl_context context,
+    cl_GLsync sync,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1;
+
+#if !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_event CL_API_CALL
+clCreateEventFromGLsyncKHR(
+    cl_context context,
+    cl_GLsync sync,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1;
+
+#endif /* !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+/***************************************************************
+* cl_khr_gl_depth_images
+***************************************************************/
+#define cl_khr_gl_depth_images 1
+#define CL_KHR_GL_DEPTH_IMAGES_EXTENSION_NAME \
+    "cl_khr_gl_depth_images"
+
+#if !defined(CL_VERSION_1_2)
+/* cl_channel_order - defined in CL.h for OpenCL 1.2 and newer */
+#define CL_DEPTH_STENCIL                                    0x10BE
+
+#endif /* !defined(CL_VERSION_1_2) */
+
+#if !defined(CL_VERSION_1_2)
+/* cl_channel_type - defined in CL.h for OpenCL 1.2 and newer */
+#define CL_UNORM_INT24                                      0x10DF
+
+#endif /* !defined(CL_VERSION_1_2) */
+
+/***************************************************************
+* cl_khr_gl_msaa_sharing
+***************************************************************/
+#define cl_khr_gl_msaa_sharing 1
+#define CL_KHR_GL_MSAA_SHARING_EXTENSION_NAME \
+    "cl_khr_gl_msaa_sharing"
+
+/* cl_gl_texture_info */
+#define CL_GL_NUM_SAMPLES                                   0x2012
+
+/***************************************************************
+* cl_intel_sharing_format_query_gl
+***************************************************************/
+#define cl_intel_sharing_format_query_gl 1
+#define CL_INTEL_SHARING_FORMAT_QUERY_GL_EXTENSION_NAME \
+    "cl_intel_sharing_format_query_gl"
+
+/* when cl_khr_gl_sharing is supported */
+
+typedef cl_int (CL_API_CALL *
+clGetSupportedGLTextureFormatsINTEL_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_mem_object_type image_type,
+    cl_uint num_entries,
+    cl_GLenum* gl_formats,
+    cl_uint* num_texture_formats) ;
+
+#if !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedGLTextureFormatsINTEL(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_mem_object_type image_type,
+    cl_uint num_entries,
+    cl_GLenum* gl_formats,
+    cl_uint* num_texture_formats) ;
+
+#endif /* !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPENCL_CL_GL_H_ */
@@ -0,0 +1,18 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2021 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#include <CL/cl_gl.h>
+#pragma message("The extensions in cl_gl_ext.h have been moved into cl_gl.h.  Please include cl_gl.h directly.")
@@ -0,0 +1,440 @@
+/*******************************************************************************
+ * Copyright (c) 2019-2020 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+/**
+ * This is a header-only utility library that provides OpenCL host code with
+ * routines for converting to/from cl_half values.
+ *
+ * Example usage:
+ *
+ *    #include <CL/cl_half.h>
+ *    ...
+ *    cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
+ *    cl_float f = cl_half_to_float(h);
+ */
+
+#ifndef OPENCL_CL_HALF_H
+#define OPENCL_CL_HALF_H
+
+#include <CL/cl_platform.h>
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * Rounding mode used when converting to cl_half.
+ */
+typedef enum
+{
+  CL_HALF_RTE, // round to nearest even
+  CL_HALF_RTZ, // round towards zero
+  CL_HALF_RTP, // round towards positive infinity
+  CL_HALF_RTN, // round towards negative infinity
+} cl_half_rounding_mode;
+
+
+/* Private utility macros. */
+#define CL_HALF_EXP_MASK 0x7C00
+#define CL_HALF_MAX_FINITE_MAG 0x7BFF
+
+
+/*
+ * Utility to deal with values that overflow when converting to half precision.
+ */
+static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
+                                              uint16_t sign)
+{
+  if (rounding_mode == CL_HALF_RTZ)
+  {
+    // Round overflow towards zero -> largest finite number (preserving sign)
+    return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
+  }
+  else if (rounding_mode == CL_HALF_RTP && sign)
+  {
+    // Round negative overflow towards positive infinity -> most negative finite number
+    return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
+  }
+  else if (rounding_mode == CL_HALF_RTN && !sign)
+  {
+    // Round positive overflow towards negative infinity -> largest finite number
+    return CL_HALF_MAX_FINITE_MAG;
+  }
+
+  // Overflow to infinity
+  return (sign << 15) | CL_HALF_EXP_MASK;
+}
+
+/*
+ * Utility to deal with values that underflow when converting to half precision.
+ */
+static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
+                                               uint16_t sign)
+{
+  if (rounding_mode == CL_HALF_RTP && !sign)
+  {
+    // Round underflow towards positive infinity -> smallest positive value
+    return (sign << 15) | 1;
+  }
+  else if (rounding_mode == CL_HALF_RTN && sign)
+  {
+    // Round underflow towards negative infinity -> largest negative value
+    return (sign << 15) | 1;
+  }
+
+  // Flush to zero
+  return (sign << 15);
+}
+
+
+/**
+ * Convert a cl_float to a cl_half.
+ */
+static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
+{
+  // Type-punning to get direct access to underlying bits
+  union
+  {
+    cl_float f;
+    uint32_t i;
+  } f32;
+  f32.f = f;
+
+  // Extract sign bit
+  uint16_t sign = f32.i >> 31;
+
+  // Extract FP32 exponent and mantissa
+  uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
+  uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
+
+  // Remove FP32 exponent bias
+  int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
+
+  // Add FP16 exponent bias
+  uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
+
+  // Position of the bit that will become the FP16 mantissa LSB
+  uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
+
+  // Check for NaN / infinity
+  if (f_exp == 0xFF)
+  {
+    if (f_mant)
+    {
+      // NaN -> propagate mantissa and silence it
+      uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
+      h_mant |= 0x200;
+      return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
+    }
+    else
+    {
+      // Infinity -> zero mantissa
+      return (sign << 15) | CL_HALF_EXP_MASK;
+    }
+  }
+
+  // Check for zero
+  if (!f_exp && !f_mant)
+  {
+    return (sign << 15);
+  }
+
+  // Check for overflow
+  if (exp >= CL_HALF_MAX_EXP)
+  {
+    return cl_half_handle_overflow(rounding_mode, sign);
+  }
+
+  // Check for underflow
+  if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
+  {
+    return cl_half_handle_underflow(rounding_mode, sign);
+  }
+
+  // Check for value that will become denormal
+  if (exp < -14)
+  {
+    // Denormal -> include the implicit 1 from the FP32 mantissa
+    h_exp = 0;
+    f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
+
+    // Mantissa shift amount depends on exponent
+    lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
+  }
+
+  // Generate FP16 mantissa by shifting FP32 mantissa
+  uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
+
+  // Check whether we need to round
+  uint32_t halfway = 1 << (lsb_pos - 1);
+  uint32_t mask = (halfway << 1) - 1;
+  switch (rounding_mode)
+  {
+    case CL_HALF_RTE:
+      if ((f_mant & mask) > halfway)
+      {
+        // More than halfway -> round up
+        h_mant += 1;
+      }
+      else if ((f_mant & mask) == halfway)
+      {
+        // Exactly halfway -> round to nearest even
+        if (h_mant & 0x1)
+          h_mant += 1;
+      }
+      break;
+    case CL_HALF_RTZ:
+      // Mantissa has already been truncated -> do nothing
+      break;
+    case CL_HALF_RTP:
+      if ((f_mant & mask) && !sign)
+      {
+        // Round positive numbers up
+        h_mant += 1;
+      }
+      break;
+    case CL_HALF_RTN:
+      if ((f_mant & mask) && sign)
+      {
+        // Round negative numbers down
+        h_mant += 1;
+      }
+      break;
+  }
+
+  // Check for mantissa overflow
+  if (h_mant & 0x400)
+  {
+    h_exp += 1;
+    h_mant = 0;
+  }
+
+  return (sign << 15) | (h_exp << 10) | h_mant;
+}
+
+
+/**
+ * Convert a cl_double to a cl_half.
+ */
+static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
+{
+  // Type-punning to get direct access to underlying bits
+  union
+  {
+    cl_double d;
+    uint64_t i;
+  } f64;
+  f64.d = d;
+
+  // Extract sign bit
+  uint16_t sign = f64.i >> 63;
+
+  // Extract FP64 exponent and mantissa
+  uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
+  uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
+
+  // Remove FP64 exponent bias
+  int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
+
+  // Add FP16 exponent bias
+  uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
+
+  // Position of the bit that will become the FP16 mantissa LSB
+  uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
+
+  // Check for NaN / infinity
+  if (d_exp == 0x7FF)
+  {
+    if (d_mant)
+    {
+      // NaN -> propagate mantissa and silence it
+      uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
+      h_mant |= 0x200;
+      return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
+    }
+    else
+    {
+      // Infinity -> zero mantissa
+      return (sign << 15) | CL_HALF_EXP_MASK;
+    }
+  }
+
+  // Check for zero
+  if (!d_exp && !d_mant)
+  {
+    return (sign << 15);
+  }
+
+  // Check for overflow
+  if (exp >= CL_HALF_MAX_EXP)
+  {
+    return cl_half_handle_overflow(rounding_mode, sign);
+  }
+
+  // Check for underflow
+  if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
+  {
+    return cl_half_handle_underflow(rounding_mode, sign);
+  }
+
+  // Check for value that will become denormal
+  if (exp < -14)
+  {
+    // Include the implicit 1 from the FP64 mantissa
+    h_exp = 0;
+    d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
+
+    // Mantissa shift amount depends on exponent
+    lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
+  }
+
+  // Generate FP16 mantissa by shifting FP64 mantissa
+  uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
+
+  // Check whether we need to round
+  uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
+  uint64_t mask = (halfway << 1) - 1;
+  switch (rounding_mode)
+  {
+    case CL_HALF_RTE:
+      if ((d_mant & mask) > halfway)
+      {
+        // More than halfway -> round up
+        h_mant += 1;
+      }
+      else if ((d_mant & mask) == halfway)
+      {
+        // Exactly halfway -> round to nearest even
+        if (h_mant & 0x1)
+          h_mant += 1;
+      }
+      break;
+    case CL_HALF_RTZ:
+      // Mantissa has already been truncated -> do nothing
+      break;
+    case CL_HALF_RTP:
+      if ((d_mant & mask) && !sign)
+      {
+        // Round positive numbers up
+        h_mant += 1;
+      }
+      break;
+    case CL_HALF_RTN:
+      if ((d_mant & mask) && sign)
+      {
+        // Round negative numbers down
+        h_mant += 1;
+      }
+      break;
+  }
+
+  // Check for mantissa overflow
+  if (h_mant & 0x400)
+  {
+    h_exp += 1;
+    h_mant = 0;
+  }
+
+  return (sign << 15) | (h_exp << 10) | h_mant;
+}
+
+
+/**
+ * Convert a cl_half to a cl_float.
+ */
+static inline cl_float cl_half_to_float(cl_half h)
+{
+  // Type-punning to get direct access to underlying bits
+  union
+  {
+    cl_float f;
+    uint32_t i;
+  } f32;
+
+  // Extract sign bit
+  uint16_t sign = h >> 15;
+
+  // Extract FP16 exponent and mantissa
+  uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
+  uint16_t h_mant = h & 0x3FF;
+
+  // Remove FP16 exponent bias
+  int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
+
+  // Add FP32 exponent bias
+  uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
+
+  // Check for NaN / infinity
+  if (h_exp == 0x1F)
+  {
+    if (h_mant)
+    {
+      // NaN -> propagate mantissa and silence it
+      uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
+      f_mant |= 0x400000;
+      f32.i = (sign << 31) | 0x7F800000 | f_mant;
+      return f32.f;
+    }
+    else
+    {
+      // Infinity -> zero mantissa
+      f32.i = (sign << 31) | 0x7F800000;
+      return f32.f;
+    }
+  }
+
+  // Check for zero / denormal
+  if (h_exp == 0)
+  {
+    if (h_mant == 0)
+    {
+      // Zero -> zero exponent
+      f_exp = 0;
+    }
+    else
+    {
+      // Denormal -> normalize it
+      // - Shift mantissa to make most-significant 1 implicit
+      // - Adjust exponent accordingly
+      uint32_t shift = 0;
+      while ((h_mant & 0x400) == 0)
+      {
+        h_mant <<= 1;
+        shift++;
+      }
+      h_mant &= 0x3FF;
+      f_exp -= shift - 1;
+    }
+  }
+
+  f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
+  return f32.f;
+}
+
+
+#undef CL_HALF_EXP_MASK
+#undef CL_HALF_MAX_FINITE_MAG
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif  /* OPENCL_CL_HALF_H */
@@ -0,0 +1,124 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2023 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#ifndef OPENCL_CL_LAYER_H_
+#define OPENCL_CL_LAYER_H_
+
+/*
+** This header is generated from the Khronos OpenCL XML API Registry.
+*/
+
+#include <CL/cl_icd.h>
+
+#include <CL/cl.h>
+
+/* CL_NO_PROTOTYPES implies CL_NO_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_PROTOTYPES) && !defined(CL_NO_EXTENSION_PROTOTYPES)
+#define CL_NO_EXTENSION_PROTOTYPES
+#endif
+
+/* CL_NO_EXTENSION_PROTOTYPES implies
+   CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES and
+   CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/***************************************************************
+* cl_loader_layers
+***************************************************************/
+#define cl_loader_layers 1
+#define CL_LOADER_LAYERS_EXTENSION_NAME \
+    "cl_loader_layers"
+
+typedef cl_uint             cl_layer_info;
+typedef cl_uint             cl_layer_api_version;
+
+/* cl_layer_info */
+#define CL_LAYER_API_VERSION                                0x4240
+#define CL_LAYER_NAME                                       0x4241
+
+/* Misc API enums */
+#define CL_LAYER_API_VERSION_100                            100
+
+
+typedef cl_int (CL_API_CALL *
+clGetLayerInfo_fn)(
+    cl_layer_info param_name,
+    size_t param_value_size,
+    void* param_value,
+    size_t* param_value_size_ret) ;
+
+typedef cl_int (CL_API_CALL *
+clInitLayer_fn)(
+    cl_uint num_entries,
+    const cl_icd_dispatch* target_dispatch,
+    cl_uint* num_entries_ret,
+    const cl_icd_dispatch** layer_dispatch_ret) ;
+
+/*
+** The function pointer typedefs prefixed with "pfn_" are provided for
+** compatibility with earlier versions of the headers.  New code is
+** encouraged to use the function pointer typedefs that are suffixed with
+** "_fn" instead, for consistency.
+*/
+
+typedef cl_int (CL_API_CALL *
+pfn_clGetLayerInfo)(
+    cl_layer_info param_name,
+    size_t param_value_size,
+    void* param_value,
+    size_t* param_value_size_ret) ;
+
+typedef cl_int (CL_API_CALL *
+pfn_clInitLayer)(
+    cl_uint num_entries,
+    const cl_icd_dispatch* target_dispatch,
+    cl_uint* num_entries_ret,
+    const cl_icd_dispatch** layer_dispatch_ret) ;
+
+#if !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetLayerInfo(
+    cl_layer_info param_name,
+    size_t param_value_size,
+    void* param_value,
+    size_t* param_value_size_ret) ;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clInitLayer(
+    cl_uint num_entries,
+    const cl_icd_dispatch* target_dispatch,
+    cl_uint* num_entries_ret,
+    const cl_icd_dispatch** layer_dispatch_ret) ;
+
+#endif /* !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPENCL_CL_LAYER_H_ */
@@ -0,0 +1,199 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2023 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#ifndef OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H_
+#define OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H_
+
+/*
+** This header is generated from the Khronos OpenCL XML API Registry.
+*/
+
+#include <va/va.h>
+
+#include <CL/cl.h>
+
+/* CL_NO_PROTOTYPES implies CL_NO_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_PROTOTYPES) && !defined(CL_NO_EXTENSION_PROTOTYPES)
+#define CL_NO_EXTENSION_PROTOTYPES
+#endif
+
+/* CL_NO_EXTENSION_PROTOTYPES implies
+   CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES and
+   CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES: */
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+#if defined(CL_NO_EXTENSION_PROTOTYPES) && \
+    !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+#define CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/***************************************************************
+* cl_intel_sharing_format_query_va_api
+***************************************************************/
+#define cl_intel_sharing_format_query_va_api 1
+#define CL_INTEL_SHARING_FORMAT_QUERY_VA_API_EXTENSION_NAME \
+    "cl_intel_sharing_format_query_va_api"
+
+/* when cl_intel_va_api_media_sharing is supported */
+
+typedef cl_int (CL_API_CALL *
+clGetSupportedVA_APIMediaSurfaceFormatsINTEL_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_mem_object_type image_type,
+    cl_uint plane,
+    cl_uint num_entries,
+    VAImageFormat* va_api_formats,
+    cl_uint* num_surface_formats) ;
+
+#if !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedVA_APIMediaSurfaceFormatsINTEL(
+    cl_context context,
+    cl_mem_flags flags,
+    cl_mem_object_type image_type,
+    cl_uint plane,
+    cl_uint num_entries,
+    VAImageFormat* va_api_formats,
+    cl_uint* num_surface_formats) ;
+
+#endif /* !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+/***************************************************************
+* cl_intel_va_api_media_sharing
+***************************************************************/
+#define cl_intel_va_api_media_sharing 1
+#define CL_INTEL_VA_API_MEDIA_SHARING_EXTENSION_NAME \
+    "cl_intel_va_api_media_sharing"
+
+typedef cl_uint             cl_va_api_device_source_intel;
+typedef cl_uint             cl_va_api_device_set_intel;
+
+/* Error codes */
+#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL               -1098
+#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL               -1099
+#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL      -1100
+#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL          -1101
+
+/* cl_va_api_device_source_intel */
+#define CL_VA_API_DISPLAY_INTEL                             0x4094
+
+/* cl_va_api_device_set_intel */
+#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL               0x4095
+#define CL_ALL_DEVICES_FOR_VA_API_INTEL                     0x4096
+
+/* cl_context_info */
+#define CL_CONTEXT_VA_API_DISPLAY_INTEL                     0x4097
+
+/* cl_mem_info */
+#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL                   0x4098
+
+/* cl_image_info */
+#define CL_IMAGE_VA_API_PLANE_INTEL                         0x4099
+
+/* cl_command_type */
+#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL      0x409A
+#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL      0x409B
+
+
+typedef cl_int (CL_API_CALL *
+clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
+    cl_platform_id platform,
+    cl_va_api_device_source_intel media_adapter_type,
+    void* media_adapter,
+    cl_va_api_device_set_intel media_adapter_set,
+    cl_uint num_entries,
+    cl_device_id* devices,
+    cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_mem (CL_API_CALL *
+clCreateFromVA_APIMediaSurfaceINTEL_fn)(
+    cl_context context,
+    cl_mem_flags flags,
+    VASurfaceID* surface,
+    cl_uint plane,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+typedef cl_int (CL_API_CALL *
+clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+#if !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES)
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
+    cl_platform_id platform,
+    cl_va_api_device_source_intel media_adapter_type,
+    void* media_adapter,
+    cl_va_api_device_set_intel media_adapter_set,
+    cl_uint num_entries,
+    cl_device_id* devices,
+    cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromVA_APIMediaSurfaceINTEL(
+    cl_context context,
+    cl_mem_flags flags,
+    VASurfaceID* surface,
+    cl_uint plane,
+    cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireVA_APIMediaSurfacesINTEL(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseVA_APIMediaSurfacesINTEL(
+    cl_command_queue command_queue,
+    cl_uint num_objects,
+    const cl_mem* mem_objects,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event) CL_API_SUFFIX__VERSION_1_2;
+
+#endif /* !defined(CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H_ */
@@ -0,0 +1,81 @@
+/*******************************************************************************
+ * Copyright (c) 2018-2020 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#ifndef __CL_VERSION_H
+#define __CL_VERSION_H
+
+/* Detect which version to target */
+#if !defined(CL_TARGET_OPENCL_VERSION)
+#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 300 (OpenCL 3.0)")
+#define CL_TARGET_OPENCL_VERSION 300
+#endif
+#if CL_TARGET_OPENCL_VERSION != 100 && \
+    CL_TARGET_OPENCL_VERSION != 110 && \
+    CL_TARGET_OPENCL_VERSION != 120 && \
+    CL_TARGET_OPENCL_VERSION != 200 && \
+    CL_TARGET_OPENCL_VERSION != 210 && \
+    CL_TARGET_OPENCL_VERSION != 220 && \
+    CL_TARGET_OPENCL_VERSION != 300
+#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 300 (OpenCL 3.0)")
+#undef CL_TARGET_OPENCL_VERSION
+#define CL_TARGET_OPENCL_VERSION 300
+#endif
+
+
+/* OpenCL Version */
+#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0)
+#define CL_VERSION_3_0  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
+#define CL_VERSION_2_2  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
+#define CL_VERSION_2_1  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
+#define CL_VERSION_2_0  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
+#define CL_VERSION_1_2  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
+#define CL_VERSION_1_1  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
+#define CL_VERSION_1_0  1
+#endif
+
+/* Allow deprecated APIs for older OpenCL versions. */
+#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS)
+#define CL_USE_DEPRECATED_OPENCL_2_2_APIS
+#endif
+#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
+#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
+#endif
+#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
+#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
+#endif
+#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
+#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
+#endif
+#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
+#endif
+#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
+#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
+#endif
+
+#endif  /* __CL_VERSION_H */
@@ -0,0 +1,32 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2021 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+#ifndef __OPENCL_H
+#define __OPENCL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <CL/cl.h>
+#include <CL/cl_gl.h>
+#include <CL/cl_ext.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __OPENCL_H   */
@@ -1,76 +1,76 @@
-#pragma once
-
-#include "EmberCLPch.h"
-#include "EmberCLStructs.h"
-#include "EmberCLFunctions.h"
-
-/// <summary>
-/// DEOpenCLKernelCreator class.
-/// </summary>
-
-namespace EmberCLns
-{
-/// <summary>
-/// Kernel creator for density filtering.
-/// This implements both basic log scale filtering
-/// as well as the full flam3 density estimation filtering
-/// in OpenCL.
-/// Several conditionals are present in the CPU version. They
-/// are stripped out of the kernels and instead a separate kernel
-/// is created for every possible case.
-/// If the filter width is 9 or less, then the entire process can be
-/// done in shared memory which is very fast.
-/// However, if the filter width is greater than 9, shared memory is not
-/// used and all filtering is done directly with main global VRAM. This
-/// ends up being not much faster than doing it on the CPU.
-/// String members are kept for the program source and entry points
-/// for each version of the program.
-/// </summary>
-class EMBERCL_API DEOpenCLKernelCreator
-{
-public:
-	DEOpenCLKernelCreator();
-	DEOpenCLKernelCreator(bool doublePrecision, bool nVidia);
-
-	//Accessors.
-	const string& LogScaleAssignDEKernel() const;
-	const string& LogScaleAssignDEEntryPoint() const;
-	const string& GaussianDEKernel(size_t ss, uint filterWidth) const;
-	const string& GaussianDEEntryPoint(size_t ss, uint filterWidth) const;
-
-	//Miscellaneous static functions.
-	static uint MaxDEFilterSize();
-	static double SolveMaxDERad(double desiredFilterSize, double ss);
-	static uint SolveMaxBoxSize(uint localMem);
-
-private:
-	//Kernel creators.
-	string CreateLogScaleAssignDEKernelString();
-	string CreateGaussianDEKernel(size_t ss);
-	string CreateGaussianDEKernelNoLocalCache(size_t ss);
-
-	string m_LogScaleAssignDEKernel;
-	string m_LogScaleAssignDEEntryPoint = "LogScaleAssignDensityFilterKernel";
-
-	string m_GaussianDEWithoutSsKernel;
-	string m_GaussianDEWithoutSsEntryPoint = "GaussianDEWithoutSsKernel";
-
-	string m_GaussianDESsWithScfKernel;
-	string m_GaussianDESsWithScfEntryPoint = "GaussianDESsWithScfKernel";
-
-	string m_GaussianDESsWithoutScfKernel;
-	string m_GaussianDESsWithoutScfEntryPoint = "GaussianDESsWithoutScfKernel";
-
-	string m_GaussianDEWithoutSsNoCacheKernel;
-	string m_GaussianDEWithoutSsNoCacheEntryPoint = "GaussianDEWithoutSsNoCacheKernel";
-
-	string m_GaussianDESsWithScfNoCacheKernel;
-	string m_GaussianDESsWithScfNoCacheEntryPoint = "GaussianDESsWithScfNoCacheKernel";
-
-	string m_GaussianDESsWithoutScfNoCacheKernel;
-	string m_GaussianDESsWithoutScfNoCacheEntryPoint = "GaussianDESsWithoutScfNoCacheKernel";
-
-	bool m_DoublePrecision;
-	bool m_NVidia;
-};
-}
+#pragma once
+
+#include "EmberCLPch.h"
+#include "EmberCLStructs.h"
+#include "EmberCLFunctions.h"
+
+/// <summary>
+/// DEOpenCLKernelCreator class.
+/// </summary>
+
+namespace EmberCLns
+{
+/// <summary>
+/// Kernel creator for density filtering.
+/// This implements both basic log scale filtering
+/// as well as the full flam3 density estimation filtering
+/// in OpenCL.
+/// Several conditionals are present in the CPU version. They
+/// are stripped out of the kernels and instead a separate kernel
+/// is created for every possible case.
+/// If the filter width is 9 or less, then the entire process can be
+/// done in shared memory which is very fast.
+/// However, if the filter width is greater than 9, shared memory is not
+/// used and all filtering is done directly with main global VRAM. This
+/// ends up being not much faster than doing it on the CPU.
+/// String members are kept for the program source and entry points
+/// for each version of the program.
+/// </summary>
+class EMBERCL_API DEOpenCLKernelCreator
+{
+public:
+	DEOpenCLKernelCreator();
+	DEOpenCLKernelCreator(bool doublePrecision, bool nVidia);
+
+	//Accessors.
+	const string& LogScaleAssignDEKernel() const;
+	const string& LogScaleAssignDEEntryPoint() const;
+	const string& GaussianDEKernel(size_t ss, uint filterWidth) const;
+	const string& GaussianDEEntryPoint(size_t ss, uint filterWidth) const;
+
+	//Miscellaneous static functions.
+	static uint MaxDEFilterSize();
+	static double SolveMaxDERad(double desiredFilterSize, double ss);
+	static uint SolveMaxBoxSize(uint localMem);
+
+private:
+	//Kernel creators.
+	string CreateLogScaleAssignDEKernelString();
+	string CreateGaussianDEKernel(size_t ss);
+	string CreateGaussianDEKernelNoLocalCache(size_t ss);
+
+	string m_LogScaleAssignDEKernel;
+	string m_LogScaleAssignDEEntryPoint = "LogScaleAssignDensityFilterKernel";
+
+	string m_GaussianDEWithoutSsKernel;
+	string m_GaussianDEWithoutSsEntryPoint = "GaussianDEWithoutSsKernel";
+
+	string m_GaussianDESsWithScfKernel;
+	string m_GaussianDESsWithScfEntryPoint = "GaussianDESsWithScfKernel";
+
+	string m_GaussianDESsWithoutScfKernel;
+	string m_GaussianDESsWithoutScfEntryPoint = "GaussianDESsWithoutScfKernel";
+
+	string m_GaussianDEWithoutSsNoCacheKernel;
+	string m_GaussianDEWithoutSsNoCacheEntryPoint = "GaussianDEWithoutSsNoCacheKernel";
+
+	string m_GaussianDESsWithScfNoCacheKernel;
+	string m_GaussianDESsWithScfNoCacheEntryPoint = "GaussianDESsWithScfNoCacheKernel";
+
+	string m_GaussianDESsWithoutScfNoCacheKernel;
+	string m_GaussianDESsWithoutScfNoCacheEntryPoint = "GaussianDESsWithoutScfNoCacheKernel";
+
+	bool m_DoublePrecision = false;
+	bool m_NVidia = false;
+};
+}
@@ -1,22 +1,22 @@
-#include "EmberCLPch.h"
-
-#ifdef _WIN32
-/// <summary>
-/// Generated by Visual Studio to make the DLL run properly.
-/// </summary>
-BOOL APIENTRY DllMain( HMODULE hModule,
-					   DWORD  ul_reason_for_call,
-					   LPVOID lpReserved
-					 )
-{
-	switch (ul_reason_for_call)
-	{
-	case DLL_PROCESS_ATTACH:
-	case DLL_THREAD_ATTACH:
-	case DLL_THREAD_DETACH:
-	case DLL_PROCESS_DETACH:
-		break;
-	}
-	return TRUE;
-}
-#endif
+#include "EmberCLPch.h"
+
+#ifdef _WIN32
+/// <summary>
+/// Generated by Visual Studio to make the DLL run properly.
+/// </summary>
+BOOL APIENTRY DllMain( HMODULE hModule,
+					   DWORD  ul_reason_for_call,
+					   LPVOID lpReserved
+					 )
+{
+	switch (ul_reason_for_call)
+	{
+	case DLL_PROCESS_ATTACH:
+	case DLL_THREAD_ATTACH:
+	case DLL_THREAD_DETACH:
+	case DLL_PROCESS_DETACH:
+		break;
+	}
+	return TRUE;
+}
+#endif
@@ -1,274 +1,274 @@
-#pragma once
-
-#include "EmberCLPch.h"
-#include "EmberCLStructs.h"
-
-#define USEFMA 1
-
-/// <summary>
-/// OpenCL global function strings.
-/// </summary>
-
-namespace EmberCLns
-{
-/// <summary>
-/// OpenCL equivalent of Palette::RgbToHsv().
-/// </summary>
-static constexpr char RgbToHsvFunctionString[] =
-	//rgb 0 - 1,
-	//h 0 - 6, s 0 - 1, v 0 - 1
-	"static inline void RgbToHsv(real4_bucket* rgb, real4_bucket* hsv)\n"
-	"{\n"
-	"	real_bucket_t max, min, del, rc, gc, bc;\n"
-	"\n"
-	//Compute maximum of r, g, b.
-	"	if ((*rgb).x >= (*rgb).y)\n"
-	"	{\n"
-	"		if ((*rgb).x >= (*rgb).z)\n"
-	"			max = (*rgb).x;\n"
-	"		else\n"
-	"			max = (*rgb).z;\n"
-	"	}\n"
-	"	else\n"
-	"	{\n"
-	"		if ((*rgb).y >= (*rgb).z)\n"
-	"			max = (*rgb).y;\n"
-	"		else\n"
-	"			max = (*rgb).z;\n"
-	"	}\n"
-	"\n"
-	//Compute minimum of r, g, b.
-	"	if ((*rgb).x <= (*rgb).y)\n"
-	"	{\n"
-	"		if ((*rgb).x <= (*rgb).z)\n"
-	"			min = (*rgb).x;\n"
-	"		else\n"
-	"			min = (*rgb).z;\n"
-	"	}\n"
-	"	else\n"
-	"	{\n"
-	"		if ((*rgb).y <= (*rgb).z)\n"
-	"			min = (*rgb).y;\n"
-	"		else\n"
-	"			min = (*rgb).z;\n"
-	"	}\n"
-	"\n"
-	"	del = max - min;\n"
-	"	(*hsv).z = max;\n"
-	"\n"
-	"	if (max != 0)\n"
-	"		(*hsv).y = del / max;\n"
-	"	else\n"
-	"		(*hsv).y = 0;\n"
-	"\n"
-	"	(*hsv).x = 0;\n"
-	"	if ((*hsv).y != 0)\n"
-	"	{\n"
-	"		rc = (max - (*rgb).x) / del;\n"
-	"		gc = (max - (*rgb).y) / del;\n"
-	"		bc = (max - (*rgb).z) / del;\n"
-	"\n"
-	"		if ((*rgb).x == max)\n"
-	"			(*hsv).x = bc - gc;\n"
-	"		else if ((*rgb).y == max)\n"
-	"			(*hsv).x = 2 + rc - bc;\n"
-	"		else if ((*rgb).z == max)\n"
-	"			(*hsv).x = 4 + gc - rc;\n"
-	"\n"
-	"		if ((*hsv).x < 0)\n"
-	"			(*hsv).x += 6;\n"
-	"	}\n"
-	"}\n"
-	"\n";
-
-/// <summary>
-/// OpenCL equivalent of Palette::HsvToRgb().
-/// </summary>
-static constexpr char HsvToRgbFunctionString[] =
-	//h 0 - 6, s 0 - 1, v 0 - 1
-	//rgb 0 - 1
-	"static inline void HsvToRgb(real4_bucket* hsv, real4_bucket* rgb)\n"
-	"{\n"
-	"	int j;\n"
-	"	real_bucket_t f, p, q, t;\n"
-	"\n"
-	"	while ((*hsv).x >= 6)\n"
-	"		(*hsv).x = (*hsv).x - 6;\n"
-	"\n"
-	"	while ((*hsv).x <  0)\n"
-	"		(*hsv).x = (*hsv).x + 6;\n"
-	"\n"
-	"	j = (int)floor((*hsv).x);\n"
-	"	f = (*hsv).x - j;\n"
-	"	p = (*hsv).z * (1 - (*hsv).y);\n"
-	"	q = (*hsv).z * (1 - ((*hsv).y * f));\n"
-	"	t = (*hsv).z * (1 - ((*hsv).y * (1 - f)));\n"
-	"\n"
-	"	switch (j)\n"
-	"	{\n"
-	"		case 0:  (*rgb).x = (*hsv).z; (*rgb).y = t;		   (*rgb).z = p;	    break;\n"
-	"		case 1:  (*rgb).x = q;		  (*rgb).y = (*hsv).z; (*rgb).z = p;	    break;\n"
-	"		case 2:  (*rgb).x = p;		  (*rgb).y = (*hsv).z; (*rgb).z = t;	    break;\n"
-	"		case 3:  (*rgb).x = p;		  (*rgb).y = q;		   (*rgb).z = (*hsv).z; break;\n"
-	"		case 4:  (*rgb).x = t;		  (*rgb).y = p;		   (*rgb).z = (*hsv).z; break;\n"
-	"		case 5:  (*rgb).x = (*hsv).z; (*rgb).y = p;		   (*rgb).z = q;	    break;\n"
-	"		default: (*rgb).x = (*hsv).z; (*rgb).y = t;		   (*rgb).z = p;	    break;\n"
-	"	}\n"
-	"}\n"
-	"\n";
-
-/// <summary>
-/// OpenCL equivalent of Palette::CalcAlpha().
-/// </summary>
-static constexpr char CalcAlphaFunctionString[] =
-	"static inline real_t CalcAlpha(real_bucket_t density, real_bucket_t gamma, real_bucket_t linrange)\n"//Not the slightest clue what this is doing.//DOC
-	"{\n"
-	"	real_bucket_t frac, alpha, funcval = pow(linrange, gamma);\n"
-	"\n"
-	"	if (density > 0)\n"
-	"	{\n"
-	"		if (density < linrange)\n"
-	"		{\n"
-	"			frac = density / linrange;\n"
-	"			alpha = (1.0 - frac) * density * (funcval / linrange) + frac * pow(density, gamma);\n"
-	"		}\n"
-	"		else\n"
-	"			alpha = pow(density, gamma);\n"
-	"	}\n"
-	"	else\n"
-	"		alpha = 0;\n"
-	"\n"
-	"	return alpha;\n"
-	"}\n"
-	"\n";
-
-
-/// <summary>
-/// OpenCL equivalent of Renderer::CurveAdjust().
-/// Only use float here instead of real_t because the output will be passed to write_imagef()
-/// during final accumulation, which only takes floats.
-/// </summary>
-static constexpr char CurveAdjustFunctionString[] =
-	"static inline void CurveAdjust(__global real4reals_bucket* csa, float* a, uint index)\n"
-	"{\n"
-	"	uint tempIndex = (uint)clamp(*a * CURVES_LENGTH_M1, 0.0f, CURVES_LENGTH_M1);\n"
-	"	uint tempIndex2 = (uint)clamp(csa[tempIndex].m_Real4.x * CURVES_LENGTH_M1, 0.0f, CURVES_LENGTH_M1);\n"
-	"\n"
-	"	*a = (float)csa[tempIndex2].m_Reals[index];\n"
-	"}\n"
-	"\n";
-
-/// <summary>
-/// Use MWC 64 from David Thomas at the Imperial College of London for
-/// random numbers in OpenCL, instead of ISAAC which was used
-/// for CPU rendering.
-/// </summary>
-static constexpr char RandFunctionString[] =
-	"enum { MWC64X_A = 4294883355u };\n\n"
-	"inline uint MwcNext(uint2* s)\n"
-	"{\n"
-	"	uint res = (*s).x ^ (*s).y;			\n"//Calculate the result.
-	"	uint hi = mul_hi((*s).x, MWC64X_A); \n"//Step the RNG.
-	"	(*s).x = (*s).x * MWC64X_A + (*s).y;\n"//Pack the state back up.
-	"	(*s).y = hi + ((*s).x < (*s).y);	\n"
-	"	return res;							\n"//Return the next result.
-	"}\n"
-	"\n"
-	"inline uint MwcNextRange(uint2* s, uint val)\n"
-	"{\n"
-	"	return (val == 0) ? MwcNext(s) : (uint)(((ulong)MwcNext(s) * (ulong)val) >> 32);\n"
-	"}\n"
-	"\n"
-	"inline real_t MwcNext01(uint2* s)\n"
-	"{\n"
-	"	return MwcNext(s) * (real_t)(1.0 / 4294967296.0);\n"
-	"}\n"
-	"\n"
-	"inline uint MwcNextCrand(uint2* s)\n"
-	"{\n"
-	"	return MwcNextRange(s, 32767u);\n"
-	"}\n"
-	"\n"
-	"inline real_t MwcNextFRange(uint2* s, real_t lower, real_t upper)\n"
-	"{\n"
-	"	real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX;\n"
-#ifdef USEFMA
-	"	return fma(f, upper - lower, lower);\n"
-#else
-	"	return (f * (upper - lower) + lower);\n"
-#endif
-	"}\n"
-	"\n"
-	"inline real_t MwcNextNeg1Pos1(uint2* s)\n"
-	"{\n"
-	"	real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX;\n"
-#ifdef USEFMA
-	"	return fma(f, (real_t)2.0, (real_t)-1.0);\n"
-#else
-	"	return (f * (real_t)2.0 + (real_t)-1.0);\n"
-#endif
-	"}\n"
-	"\n"
-	"inline real_t MwcNext0505(uint2* s)\n"
-	"{\n"
-	"	real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX;\n"
-	"	return -0.5 + f;\n"
-	"}\n"
-	"\n";
-
-/// <summary>
-/// OpenCL equivalent Renderer::AddToAccum().
-/// </summary>
-static constexpr char AddToAccumWithCheckFunctionString[] =
-	"inline bool AccumCheck(int superRasW, int superRasH, int i, int ii, int j, int jj)\n"
-	"{\n"
-	"	return (j + jj >= 0 && j + jj < superRasH && i + ii >= 0 && i + ii < superRasW);\n"
-	"}\n"
-	"\n";
-
-/// <summary>
-/// OpenCL equivalent various CarToRas member functions.
-/// Normaly would subtract m_RasLlX and m_RasLlY, but they were negated in RendererCL before being passed in, so they could be used with fma().
-/// </summary>
-static constexpr char CarToRasFunctionString[] =
-	"inline void CarToRasConvertPointToSingle(__constant CarToRasCL* carToRas, Point* point, uint* singleBufferIndex)\n"
-	"{\n"
-#ifdef USEFMA
-	"	*singleBufferIndex = (uint)fma(carToRas->m_PixPerImageUnitW, point->m_X, carToRas->m_RasLlX) + (carToRas->m_RasWidth * (uint)fma(carToRas->m_PixPerImageUnitH, point->m_Y, carToRas->m_RasLlY));\n"
-#else
-	"	*singleBufferIndex = (uint)(carToRas->m_PixPerImageUnitW * point->m_X + carToRas->m_RasLlX) + (carToRas->m_RasWidth * (uint)(carToRas->m_PixPerImageUnitH * point->m_Y + carToRas->m_RasLlY));\n"
-#endif
-	"}\n"
-	"\n"
-	"inline bool CarToRasInBounds(__constant CarToRasCL* carToRas, Point* point)\n"
-	"{\n"
-	"	return point->m_X >= carToRas->m_CarLlX &&\n"
-	"		point->m_X < carToRas->m_CarUrX &&\n"
-	"		point->m_Y < carToRas->m_CarUrY &&\n"
-	"		point->m_Y >= carToRas->m_CarLlY;\n"
-	"}\n"
-	"\n";
-
-static constexpr char AtomicString[] =
-   "void AtomicAdd(volatile __global real_bucket_t* source, const real_bucket_t operand)\n"
-   "{\n"
-   "	union\n"
-   "	{\n"
-   "		atomi intVal;\n"
-   "		real_bucket_t realVal;\n"
-   "	} newVal;\n"
-   "\n"
-   "	union\n"
-   "	{\n"
-   "		atomi intVal;\n"
-   "		real_bucket_t realVal;\n"
-   "	} prevVal;\n"
-   "\n"
-   "	do\n"
-   "	{\n"
-   "		prevVal.realVal = *source;\n"
-   "		newVal.realVal = prevVal.realVal + operand;\n"
-   "	} while (atomic_cmpxchg((volatile __global atomi*)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);\n"
-   "}\n";
-
-}
+#pragma once
+
+#include "EmberCLPch.h"
+#include "EmberCLStructs.h"
+
+#define USEFMA 1
+
+/// <summary>
+/// OpenCL global function strings.
+/// </summary>
+
+namespace EmberCLns
+{
+/// <summary>
+/// OpenCL equivalent of Palette::RgbToHsv().
+/// </summary>
+static constexpr char RgbToHsvFunctionString[] =
+	//rgb 0 - 1,
+	//h 0 - 6, s 0 - 1, v 0 - 1
+	"static inline void RgbToHsv(real4_bucket* rgb, real4_bucket* hsv)\n"
+	"{\n"
+	"	real_bucket_t max, min, del, rc, gc, bc;\n"
+	"\n"
+	//Compute maximum of r, g, b.
+	"	if ((*rgb).x >= (*rgb).y)\n"
+	"	{\n"
+	"		if ((*rgb).x >= (*rgb).z)\n"
+	"			max = (*rgb).x;\n"
+	"		else\n"
+	"			max = (*rgb).z;\n"
+	"	}\n"
+	"	else\n"
+	"	{\n"
+	"		if ((*rgb).y >= (*rgb).z)\n"
+	"			max = (*rgb).y;\n"
+	"		else\n"
+	"			max = (*rgb).z;\n"
+	"	}\n"
+	"\n"
+	//Compute minimum of r, g, b.
+	"	if ((*rgb).x <= (*rgb).y)\n"
+	"	{\n"
+	"		if ((*rgb).x <= (*rgb).z)\n"
+	"			min = (*rgb).x;\n"
+	"		else\n"
+	"			min = (*rgb).z;\n"
+	"	}\n"
+	"	else\n"
+	"	{\n"
+	"		if ((*rgb).y <= (*rgb).z)\n"
+	"			min = (*rgb).y;\n"
+	"		else\n"
+	"			min = (*rgb).z;\n"
+	"	}\n"
+	"\n"
+	"	del = max - min;\n"
+	"	(*hsv).z = max;\n"
+	"\n"
+	"	if (max != 0)\n"
+	"		(*hsv).y = del / max;\n"
+	"	else\n"
+	"		(*hsv).y = 0;\n"
+	"\n"
+	"	(*hsv).x = 0;\n"
+	"	if ((*hsv).y != 0)\n"
+	"	{\n"
+	"		rc = (max - (*rgb).x) / del;\n"
+	"		gc = (max - (*rgb).y) / del;\n"
+	"		bc = (max - (*rgb).z) / del;\n"
+	"\n"
+	"		if ((*rgb).x == max)\n"
+	"			(*hsv).x = bc - gc;\n"
+	"		else if ((*rgb).y == max)\n"
+	"			(*hsv).x = 2 + rc - bc;\n"
+	"		else if ((*rgb).z == max)\n"
+	"			(*hsv).x = 4 + gc - rc;\n"
+	"\n"
+	"		if ((*hsv).x < 0)\n"
+	"			(*hsv).x += 6;\n"
+	"	}\n"
+	"}\n"
+	"\n";
+
+/// <summary>
+/// OpenCL equivalent of Palette::HsvToRgb().
+/// </summary>
+static constexpr char HsvToRgbFunctionString[] =
+	//h 0 - 6, s 0 - 1, v 0 - 1
+	//rgb 0 - 1
+	"static inline void HsvToRgb(real4_bucket* hsv, real4_bucket* rgb)\n"
+	"{\n"
+	"	int j;\n"
+	"	real_bucket_t f, p, q, t;\n"
+	"\n"
+	"	while ((*hsv).x >= 6)\n"
+	"		(*hsv).x = (*hsv).x - 6;\n"
+	"\n"
+	"	while ((*hsv).x <  0)\n"
+	"		(*hsv).x = (*hsv).x + 6;\n"
+	"\n"
+	"	j = (int)floor((*hsv).x);\n"
+	"	f = (*hsv).x - j;\n"
+	"	p = (*hsv).z * (1 - (*hsv).y);\n"
+	"	q = (*hsv).z * (1 - ((*hsv).y * f));\n"
+	"	t = (*hsv).z * (1 - ((*hsv).y * (1 - f)));\n"
+	"\n"
+	"	switch (j)\n"
+	"	{\n"
+	"		case 0:  (*rgb).x = (*hsv).z; (*rgb).y = t;		   (*rgb).z = p;	    break;\n"
+	"		case 1:  (*rgb).x = q;		  (*rgb).y = (*hsv).z; (*rgb).z = p;	    break;\n"
+	"		case 2:  (*rgb).x = p;		  (*rgb).y = (*hsv).z; (*rgb).z = t;	    break;\n"
+	"		case 3:  (*rgb).x = p;		  (*rgb).y = q;		   (*rgb).z = (*hsv).z; break;\n"
+	"		case 4:  (*rgb).x = t;		  (*rgb).y = p;		   (*rgb).z = (*hsv).z; break;\n"
+	"		case 5:  (*rgb).x = (*hsv).z; (*rgb).y = p;		   (*rgb).z = q;	    break;\n"
+	"		default: (*rgb).x = (*hsv).z; (*rgb).y = t;		   (*rgb).z = p;	    break;\n"
+	"	}\n"
+	"}\n"
+	"\n";
+
+/// <summary>
+/// OpenCL equivalent of Palette::CalcAlpha().
+/// </summary>
+static constexpr char CalcAlphaFunctionString[] =
+	"static inline real_t CalcAlpha(real_bucket_t density, real_bucket_t gamma, real_bucket_t linrange)\n"//Not the slightest clue what this is doing.//DOC
+	"{\n"
+	"	real_bucket_t frac, alpha, funcval = pow(linrange, gamma);\n"
+	"\n"
+	"	if (density > 0)\n"
+	"	{\n"
+	"		if (density < linrange)\n"
+	"		{\n"
+	"			frac = density / linrange;\n"
+	"			alpha = (1.0 - frac) * density * (funcval / linrange) + frac * pow(density, gamma);\n"
+	"		}\n"
+	"		else\n"
+	"			alpha = pow(density, gamma);\n"
+	"	}\n"
+	"	else\n"
+	"		alpha = 0;\n"
+	"\n"
+	"	return alpha;\n"
+	"}\n"
+	"\n";
+
+
+/// <summary>
+/// OpenCL equivalent of Renderer::CurveAdjust().
+/// Only use float here instead of real_t because the output will be passed to write_imagef()
+/// during final accumulation, which only takes floats.
+/// </summary>
+static constexpr char CurveAdjustFunctionString[] =
+	"static inline void CurveAdjust(__global real4reals_bucket* csa, float* a, uint index)\n"
+	"{\n"
+	"	uint tempIndex = (uint)clamp(*a * CURVES_LENGTH_M1, 0.0f, CURVES_LENGTH_M1);\n"
+	"	uint tempIndex2 = (uint)clamp(csa[tempIndex].m_Real4.x * CURVES_LENGTH_M1, 0.0f, CURVES_LENGTH_M1);\n"
+	"\n"
+	"	*a = (float)csa[tempIndex2].m_Reals[index];\n"
+	"}\n"
+	"\n";
+
+/// <summary>
+/// Use MWC 64 from David Thomas at the Imperial College of London for
+/// random numbers in OpenCL, instead of ISAAC which was used
+/// for CPU rendering.
+/// </summary>
+static constexpr char RandFunctionString[] =
+	"enum { MWC64X_A = 4294883355u };\n\n"
+	"inline uint MwcNext(uint2* s)\n"
+	"{\n"
+	"	uint res = (*s).x ^ (*s).y;			\n"//Calculate the result.
+	"	uint hi = mul_hi((*s).x, MWC64X_A); \n"//Step the RNG.
+	"	(*s).x = (*s).x * MWC64X_A + (*s).y;\n"//Pack the state back up.
+	"	(*s).y = hi + ((*s).x < (*s).y);	\n"
+	"	return res;							\n"//Return the next result.
+	"}\n"
+	"\n"
+	"inline uint MwcNextRange(uint2* s, uint val)\n"
+	"{\n"
+	"	return (val == 0) ? MwcNext(s) : (uint)(((ulong)MwcNext(s) * (ulong)val) >> 32);\n"
+	"}\n"
+	"\n"
+	"inline real_t MwcNext01(uint2* s)\n"
+	"{\n"
+	"	return MwcNext(s) * (real_t)(1.0 / 4294967296.0);\n"
+	"}\n"
+	"\n"
+	"inline uint MwcNextCrand(uint2* s)\n"
+	"{\n"
+	"	return MwcNextRange(s, 32767u);\n"
+	"}\n"
+	"\n"
+	"inline real_t MwcNextFRange(uint2* s, real_t lower, real_t upper)\n"
+	"{\n"
+	"	real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX;\n"
+#ifdef USEFMA
+	"	return fma(f, upper - lower, lower);\n"
+#else
+	"	return (f * (upper - lower) + lower);\n"
+#endif
+	"}\n"
+	"\n"
+	"inline real_t MwcNextNeg1Pos1(uint2* s)\n"
+	"{\n"
+	"	real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX;\n"
+#ifdef USEFMA
+	"	return fma(f, (real_t)2.0, (real_t)-1.0);\n"
+#else
+	"	return (f * (real_t)2.0 + (real_t)-1.0);\n"
+#endif
+	"}\n"
+	"\n"
+	"inline real_t MwcNext0505(uint2* s)\n"
+	"{\n"
+	"	real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX;\n"
+	"	return -0.5 + f;\n"
+	"}\n"
+	"\n";
+
+/// <summary>
+/// OpenCL equivalent Renderer::AddToAccum().
+/// </summary>
+static constexpr char AddToAccumWithCheckFunctionString[] =
+	"inline bool AccumCheck(int superRasW, int superRasH, int i, int ii, int j, int jj)\n"
+	"{\n"
+	"	return (j + jj >= 0 && j + jj < superRasH && i + ii >= 0 && i + ii < superRasW);\n"
+	"}\n"
+	"\n";
+
+/// <summary>
+/// OpenCL equivalent various CarToRas member functions.
+/// Normaly would subtract m_RasLlX and m_RasLlY, but they were negated in RendererCL before being passed in, so they could be used with fma().
+/// </summary>
+static constexpr char CarToRasFunctionString[] =
+	"inline void CarToRasConvertPointToSingle(__constant CarToRasCL* carToRas, Point* point, uint* singleBufferIndex)\n"
+	"{\n"
+#ifdef USEFMA
+	"	*singleBufferIndex = (uint)fma(carToRas->m_PixPerImageUnitW, point->m_X, carToRas->m_RasLlX) + (carToRas->m_RasWidth * (uint)fma(carToRas->m_PixPerImageUnitH, point->m_Y, carToRas->m_RasLlY));\n"
+#else
+	"	*singleBufferIndex = (uint)(carToRas->m_PixPerImageUnitW * point->m_X + carToRas->m_RasLlX) + (carToRas->m_RasWidth * (uint)(carToRas->m_PixPerImageUnitH * point->m_Y + carToRas->m_RasLlY));\n"
+#endif
+	"}\n"
+	"\n"
+	"inline bool CarToRasInBounds(__constant CarToRasCL* carToRas, Point* point)\n"
+	"{\n"
+	"	return point->m_X >= carToRas->m_CarLlX &&\n"
+	"		point->m_X < carToRas->m_CarUrX &&\n"
+	"		point->m_Y < carToRas->m_CarUrY &&\n"
+	"		point->m_Y >= carToRas->m_CarLlY;\n"
+	"}\n"
+	"\n";
+
+static constexpr char AtomicString[] =
+   "void AtomicAdd(volatile __global real_bucket_t* source, const real_bucket_t operand)\n"
+   "{\n"
+   "	union\n"
+   "	{\n"
+   "		atomi intVal;\n"
+   "		real_bucket_t realVal;\n"
+   "	} newVal;\n"
+   "\n"
+   "	union\n"
+   "	{\n"
+   "		atomi intVal;\n"
+   "		real_bucket_t realVal;\n"
+   "	} prevVal;\n"
+   "\n"
+   "	do\n"
+   "	{\n"
+   "		prevVal.realVal = *source;\n"
+   "		newVal.realVal = prevVal.realVal + operand;\n"
+   "	} while (atomic_cmpxchg((volatile __global atomi*)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);\n"
+   "}\n";
+
+}
@@ -1,66 +1,73 @@
-#ifdef _WIN32
-	#pragma once
-#endif
-
-/// <summary>
-/// Precompiled header file. Place all system includes here with appropriate #defines for different operating systems and compilers.
-/// </summary>
-
-#define NOMINMAX
-#define WIN32_LEAN_AND_MEAN//Exclude rarely-used stuff from Windows headers.
-#define _USE_MATH_DEFINES
-//#define CL_USE_DEPRECATED_OPENCL_1_2_APIS 1
-
-#include "Timing.h"
-#include "Renderer.h"
-
-#if defined(_WIN32)
-	#pragma warning(disable : 4251; disable : 4661; disable : 4100)
-	#include <windows.h>
-	#include <SDKDDKVer.h>
-	#include "GL/gl.h"
-#elif defined(__APPLE__)
-	#include <OpenGL/gl.h>
-#else
-	#include "GL/glx.h"
-#endif
-
-#include <utility>
-#include <CL/cl.hpp>
-#include <algorithm>
-#include <atomic>
-#include <cstdio>
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
-#include <set>
-#include <string>
-#include <iterator>
-#include <time.h>
-#include <unordered_map>
-
-#ifdef _WIN32
-	#if defined(BUILDING_EMBERCL)
-		#define EMBERCL_API __declspec(dllexport)
-	#else
-		#define EMBERCL_API __declspec(dllimport)
-	#endif
-#else
-	#define EMBERCL_API
-#endif
-
-using namespace std;
-using namespace EmberNs;
-//#define TEST_CL 1
-//#define TEST_CL_BUFFERS 1
-
-//This special define is made to fix buggy OpenCL compilers on Mac.
-//Rendering is much slower there for unknown reasons. Michel traced it down
-//to the consec variable which keeps track of how many tries are needed to compute
-//a point which is not a bad value. Strangely, keeping this as a local variable
-//is slower than keeping it as an element in a global array.
-//This is counterintuitive, and lends further weight to the idea that OpenCL on Mac
-//is horribly broken.
-#ifdef __APPLE__
-	#define KNL_USE_GLOBAL_CONSEC
-#endif
+#ifdef _WIN32
+	#pragma once
+#endif
+
+/// <summary>
+/// Precompiled header file. Place all system includes here with appropriate #defines for different operating systems and compilers.
+/// </summary>
+
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN//Exclude rarely-used stuff from Windows headers.
+#define _USE_MATH_DEFINES
+//#define CL_USE_DEPRECATED_OPENCL_1_2_APIS 1
+//#define CL_USE_DEPRECATED_OPENCL_2_0_APIS 1
+//For reasons unknown, QtCreator cannot use any value higher than 120 with these, because
+//it causes errors when compiling opencl.hpp. This happens even though it's using MSVC under the hood
+//and it compiles in MSVC when using Visual Studio.
+#define CL_TARGET_OPENCL_VERSION 300
+#define CL_HPP_TARGET_OPENCL_VERSION 300
+#define CL_HPP_MINIMUM_OPENCL_VERSION 300
+
+#include "Timing.h"
+#include "Renderer.h"
+
+#if defined(_WIN32)
+	#pragma warning(disable : 4251; disable : 4661; disable : 4100)
+	#include <windows.h>
+	#include <SDKDDKVer.h>
+	#include "GL/gl.h"
+#elif defined(__APPLE__)
+	#include <OpenGL/gl.h>
+#else
+	#include "GL/glx.h"
+#endif
+
+#include <utility>
+#include <CL/opencl.hpp>
+#include <algorithm>
+#include <atomic>
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <set>
+#include <string>
+#include <iterator>
+#include <time.h>
+#include <unordered_map>
+
+#ifdef _WIN32
+	#if defined(BUILDING_EMBERCL)
+		#define EMBERCL_API __declspec(dllexport)
+	#else
+		#define EMBERCL_API __declspec(dllimport)
+	#endif
+#else
+	#define EMBERCL_API
+#endif
+
+using namespace std;
+using namespace EmberNs;
+//#define TEST_CL 1
+//#define TEST_CL_BUFFERS 1
+
+//This special define is made to fix buggy OpenCL compilers on Mac.
+//Rendering is much slower there for unknown reasons. Michel traced it down
+//to the consec variable which keeps track of how many tries are needed to compute
+//a point which is not a bad value. Strangely, keeping this as a local variable
+//is slower than keeping it as an element in a global array.
+//This is counterintuitive, and lends further weight to the idea that OpenCL on Mac
+//is horribly broken.
+#ifdef __APPLE__
+	#define KNL_USE_GLOBAL_CONSEC
+#endif
@@ -1,405 +1,405 @@
-#pragma once
-
-#include "EmberCLPch.h"
-
-/// <summary>
-/// Various data structures defined for the CPU and OpenCL.
-/// These are stripped down versions of THE classes in Ember, for use with OpenCL.
-/// Their sole purpose is to pass values from the host to the device.
-/// They retain most of the member variables, but do not contain the functions.
-/// Visual Studio defaults to alighment of 16, but it's made explicit in case another compiler is used.
-/// This must match the alignment specified in the kernel.
-/// </summary>
-
-namespace EmberCLns
-{
-/// <summary>
-/// Various constants needed for rendering.
-/// </summary>
-static string ConstantDefinesString(bool doublePrecision)
-{
-	ostringstream os;
-	os << "#if defined(cl_amd_fp64)\n"//AMD extension available?
-	   "	#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n"
-	   "#endif\n"
-	   "#if defined(cl_khr_fp64)\n"//Khronos extension available?
-	   "	#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-	   "#endif\n"
-	   "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";//Only supported on nVidia.
-
-	if (doublePrecision)
-	{
-		os <<
-		   "typedef long intPrec;\n"
-		   "typedef uint atomi;\n"//Same size as real_bucket_t, always 4 bytes.
-		   "typedef double real_t;\n"
-		   "typedef float real_bucket_t;\n"//Assume buckets are always float, even though iter calcs are in double.
-		   "typedef double2 real2;\n"
-		   "typedef double3 real3;\n"
-		   "typedef double4 real4;\n"
-		   "typedef float4 real4_bucket;\n"//And here too.
-		   "#define EPS (DBL_EPSILON)\n"
-		   "#define TLOW (DBL_MIN)\n"
-		   "#define TMAX (DBL_MAX)\n"
-		   ;
-	}
-	else
-	{
-		os << "typedef int intPrec;\n"
-		   "typedef uint atomi;\n"
-		   "typedef float real_t;\n"
-		   "typedef float real_bucket_t;\n"
-		   "typedef float2 real2;\n"
-		   "typedef float3 real3;\n"
-		   "typedef float4 real4;\n"
-		   "typedef float4 real4_bucket;\n"
-		   "#define EPS (FLT_EPSILON)\n"
-		   "#define TLOW (FLT_MIN)\n"
-		   "#define TMAX (FLT_MAX)\n"
-		   ;
-	}
-
-	os <<
-	   "typedef          long int int64;\n"
-	   "typedef unsigned long int uint64;\n"
-	   "\n"
-	   "#define EPS6 ((1e-6))\n"
-	   "\n"
-	   "//The number of threads per block used in the iteration function. Don't change\n"
-	   "//it lightly; the block size is hard coded to be exactly 32 x 8.\n"
-	   "#define NTHREADS 256u\n"
-	   "#define THREADS_PER_WARP 32u\n"
-	   "#define NWARPS (NTHREADS / THREADS_PER_WARP)\n"
-	   "#define DE_THRESH 100u\n"
-	   "#define BadVal(x) (isnan(x))\n"
-	   "#define SQR(x) ((x) * (x))\n"
-	   "#define CUBE(x) ((x) * (x) * (x))\n"
-	   "#define MPI ((real_t)M_PI)\n"
-	   "#define MPI2 ((real_t)M_PI_2)\n"
-	   "#define MPI4 ((real_t)M_PI_4)\n"
-	   "#define M1PI ((real_t)M_1_PI)\n"
-	   "#define M2PI ((real_t)M_2_PI)\n"
-	   "#define M_2PI (MPI * 2)\n"
-	   "#define M_3PI (MPI * 3)\n"
-	   "#define M_SQRT3 ((real_t)(1.7320508075688772935274463415059))\n"
-	   "#define M_SQRT3_2 ((real_t)(0.86602540378443864676372317075294))\n"
-	   "#define M_SQRT3_3 ((real_t)(0.57735026918962576450914878050196))\n"
-	   "#define M_SQRT5 ((real_t)(2.2360679774997896964091736687313))\n"
-	   "#define M_PHI ((real_t)(1.61803398874989484820458683436563))\n"
-	   "#define M_1_2PI ((real_t)(0.15915494309189533576888376337251))\n"
-	   "#define M_PI3 ((real_t)(1.0471975511965977461542144610932))\n"
-	   "#define M_PI6 ((real_t)(0.52359877559829887307710723054658))\n"
-	   "#define DEG_2_RAD (MPI / 180)\n"
-	   "#define CURVES_LENGTH_M1 ((real_bucket_t)" << CURVES_LENGTH_M1 << ")\n" <<
-	   "#define ONE_OVER_CURVES_LENGTH_M1 ((real_bucket_t)" << ONE_OVER_CURVES_LENGTH_M1 << ")\n" <<
-	   "\n"
-	   "//Index in each dimension of a thread within a block.\n"
-	   "#define THREAD_ID_X   (get_local_id(0))\n"
-	   "#define THREAD_ID_Y   (get_local_id(1))\n"
-	   "#define THREAD_ID_Z   (get_local_id(2))\n"
-	   "\n"
-	   "//Index in each dimension of a block within a grid.\n"
-	   "#define BLOCK_ID_X    (get_group_id(0))\n"
-	   "#define BLOCK_ID_Y    (get_group_id(1))\n"
-	   "#define BLOCK_ID_Z    (get_group_id(2))\n"
-	   "\n"
-	   "//Absolute index in each dimension of a thread within a grid.\n"
-	   "#define GLOBAL_ID_X   (get_global_id(0))\n"
-	   "#define GLOBAL_ID_Y   (get_global_id(1))\n"
-	   "#define GLOBAL_ID_Z   (get_global_id(2))\n"
-	   "\n"
-	   "//Dimensions of a block.\n"
-	   "#define BLOCK_SIZE_X  (get_local_size(0))\n"
-	   "#define BLOCK_SIZE_Y  (get_local_size(1))\n"
-	   "#define BLOCK_SIZE_Z  (get_local_size(2))\n"
-	   "\n"
-	   "//Dimensions of a grid, in terms of blocks.\n"
-	   "#define GRID_SIZE_X   (get_num_groups(0))\n"
-	   "#define GRID_SIZE_Y   (get_num_groups(1))\n"
-	   "#define GRID_SIZE_Z   (get_num_groups(2))\n"
-	   "\n"
-	   "//Dimensions of a grid, in terms of threads.\n"
-	   "#define GLOBAL_SIZE_X (get_global_size(0))\n"
-	   "#define GLOBAL_SIZE_Y (get_global_size(1))\n"
-	   "#define GLOBAL_SIZE_Z (get_global_size(2))\n"
-	   "\n"
-	   "#define INDEX_IN_BLOCK_2D (THREAD_ID_Y * BLOCK_SIZE_X + THREAD_ID_X)\n"
-	   "#define INDEX_IN_BLOCK_3D ((BLOCK_SIZE_X * BLOCK_SIZE_Y * THREAD_ID_Z) + INDEX_IN_BLOCK_2D)\n"
-	   "\n"
-	   "#define INDEX_IN_GRID_2D (GLOBAL_ID_Y * GLOBAL_SIZE_X + GLOBAL_ID_X)\n"
-	   "#define INDEX_IN_GRID_3D ((GLOBAL_SIZE_X * GLOBAL_SIZE_Y * GLOBAL_ID_Z) + INDEX_IN_GRID_2D)\n"
-	   "\n"
-	   "#define BLOCK_START_INDEX_IN_GRID_2D ((BLOCK_ID_Y * GRID_SIZE_X * BLOCK_SIZE_Y * BLOCK_SIZE_X) + (BLOCK_ID_X * BLOCK_SIZE_X * BLOCK_SIZE_Y))\n"
-	   "\n";
-	return os.str();
-}
-
-/// <summary>
-/// A point structure on the host that maps to the one used on the device to iterate in OpenCL.
-/// It might seem better to use vec4, however 2D palettes and even 3D coordinates may eventually
-/// be supported, which will make it more than 4 members.
-/// </summary>
-template <typename T>
-struct ALIGN PointCL
-{
-	T m_X;
-	T m_Y;
-	T m_Z;
-	T m_ColorX;
-	uint m_LastXfUsed;
-};
-
-/// <summary>
-/// The point structure used to iterate in OpenCL.
-/// It might seem better to use float4, however 2D palettes and even 3D coordinates may eventually
-/// be supported, which will make it more than 4 members.
-/// </summary>
-static constexpr char PointCLStructString[] =
-	"typedef struct __attribute__ " ALIGN_CL " _Point\n"
-	"{\n"
-	"	real_t m_X;\n"
-	"	real_t m_Y;\n"
-	"	real_t m_Z;\n"
-	"	real_t m_ColorX;\n"
-	"	uint m_LastXfUsed;\n"
-	"} Point;\n"
-	"\n";
-
-/// <summary>
-/// A structure on the host used to hold all of the needed information for an xform used on the device to iterate in OpenCL.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-struct ALIGN XformCL
-{
-	T m_A, m_B, m_C, m_D, m_E, m_F;//24 (48)
-	T m_PostA, m_PostB, m_PostC, m_PostD, m_PostE, m_PostF;//48 (96)
-	T m_DirectColor;//52 (104)
-	T m_ColorSpeedCache;//56 (112)
-	T m_OneMinusColorCache;//60 (120)
-	T m_Opacity;//64 (128)
-};
-
-/// <summary>
-/// The xform structure used to iterate in OpenCL.
-/// </summary>
-static constexpr char XformCLStructString[] =
-	"typedef struct __attribute__ " ALIGN_CL " _XformCL\n"
-	"{\n"
-	"	real_t m_A, m_B, m_C, m_D, m_E, m_F;\n"
-	"	real_t m_PostA, m_PostB, m_PostC, m_PostD, m_PostE, m_PostF;\n"
-	"	real_t m_DirectColor;\n"
-	"	real_t m_ColorSpeedCache;\n"
-	"	real_t m_OneMinusColorCache;\n"
-	"	real_t m_Opacity;\n"
-	"} XformCL;\n"
-	"\n";
-
-/// <summary>
-/// A structure on the host used to hold all of the needed information for an ember used on the device to iterate in OpenCL.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-struct ALIGN EmberCL
-{
-	T m_RandPointRange;
-	T m_CamZPos;
-	T m_CamPerspective;
-	T m_CamYaw;
-	T m_CamPitch;
-	T m_BlurCurve;
-	T m_CamDepthBlur;
-	T m_BlurCoef;
-	m3T m_CamMat;
-	T m_CenterX, m_CenterY;
-	T m_RotA, m_RotB, m_RotD, m_RotE;
-	T m_Psm1;
-	T m_Psm2;
-};
-
-/// <summary>
-/// The ember structure used to iterate in OpenCL.
-/// </summary>
-static constexpr char EmberCLStructString[] =
-	"typedef struct __attribute__ " ALIGN_CL " _EmberCL\n"
-	"{\n"
-	"	real_t m_RandPointRange;\n"
-	"	real_t m_CamZPos;\n"
-	"	real_t m_CamPerspective;\n"
-	"	real_t m_CamYaw;\n"
-	"	real_t m_CamPitch;\n"
-	"	real_t m_BlurCurve;\n"
-	"	real_t m_CamDepthBlur;\n"
-	"	real_t m_BlurCoef;\n"
-	"	real_t m_C00;\n"
-	"	real_t m_C01;\n"
-	"	real_t m_C02;\n"
-	"	real_t m_C10;\n"
-	"	real_t m_C11;\n"
-	"	real_t m_C12;\n"
-	"	real_t m_C20;\n"
-	"	real_t m_C21;\n"
-	"	real_t m_C22;\n"
-	"	real_t m_CenterX, m_CenterY;\n"
-	"	real_t m_RotA, m_RotB, m_RotD, m_RotE;\n"
-	"	real_t m_Psm1;\n"
-	"	real_t m_Psm2;\n"
-	"} EmberCL;\n"
-	"\n";
-
-/// <summary>
-/// A structure on the host used to hold all of the needed information for cartesian to raster mapping used on the device to iterate in OpenCL.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-struct ALIGN CarToRasCL
-{
-	T m_PixPerImageUnitW, m_RasLlX;
-	uint m_RasWidth;
-	T m_PixPerImageUnitH, m_RasLlY;
-	T m_CarLlX, m_CarUrX, m_CarUrY, m_CarLlY;
-	T m_CarHalfX, m_CarHalfY, m_CarCenterX, m_CarCenterY;
-};
-
-/// <summary>
-/// The cartesian to raster structure used to iterate in OpenCL.
-/// </summary>
-static constexpr char CarToRasCLStructString[] =
-	"typedef struct __attribute__ " ALIGN_CL " _CarToRasCL\n"
-	"{\n"
-	"	real_t m_PixPerImageUnitW, m_RasLlX;\n"
-	"	uint m_RasWidth;\n"
-	"	real_t m_PixPerImageUnitH, m_RasLlY;\n"
-	"	real_t m_CarLlX, m_CarUrX, m_CarUrY, m_CarLlY;\n"
-	"	real_t m_CarHalfX, m_CarHalfY, m_CarCenterX, m_CarCenterY;\n"
-	"} CarToRasCL;\n"
-	"\n";
-
-/// <summary>
-/// A structure on the host used to hold all of the needed information for density filtering used on the device to iterate in OpenCL.
-/// Note that the actual filter buffer is held elsewhere.
-/// Template argument expected to be float or double.
-/// </summary>
-template <typename T>
-struct ALIGN DensityFilterCL
-{
-	T m_Curve;
-	T m_K1;
-	T m_K2;
-	uint m_Supersample;
-	uint m_SuperRasW;
-	uint m_SuperRasH;
-	uint m_KernelSize;
-	uint m_MaxFilterIndex;
-	uint m_MaxFilteredCounts;
-	uint m_FilterWidth;
-};
-
-/// <summary>
-/// The density filtering structure used to iterate in OpenCL.
-/// Note that the actual filter buffer is held elsewhere.
-/// </summary>
-static constexpr char DensityFilterCLStructString[] =
-	"typedef struct __attribute__ " ALIGN_CL " _DensityFilterCL\n"
-	"{\n"
-	"	real_bucket_t m_Curve;\n"
-	"	real_bucket_t m_K1;\n"
-	"	real_bucket_t m_K2;\n"
-	"	uint m_Supersample;\n"
-	"	uint m_SuperRasW;\n"
-	"	uint m_SuperRasH;\n"
-	"	uint m_KernelSize;\n"
-	"	uint m_MaxFilterIndex;\n"
-	"	uint m_MaxFilteredCounts;\n"
-	"	uint m_FilterWidth;\n"
-	"} DensityFilterCL;\n"
-	"\n";
-
-/// <summary>
-/// A structure on the host used to hold all of the needed information for spatial filtering used on the device to iterate in OpenCL.
-/// Note that the actual filter buffer is held elsewhere.
-/// </summary>
-template <typename T>
-struct ALIGN SpatialFilterCL
-{
-	uint m_SuperRasW;
-	uint m_SuperRasH;
-	uint m_FinalRasW;
-	uint m_FinalRasH;
-	uint m_Supersample;
-	uint m_FilterWidth;
-	uint m_DensityFilterOffset;
-	uint m_YAxisUp;
-	T m_Vibrancy;
-	T m_HighlightPower;
-	T m_Gamma;
-	T m_LinRange;
-	Color<T> m_Background;
-};
-
-/// <summary>
-/// The spatial filtering structure used to iterate in OpenCL.
-/// Note that the actual filter buffer is held elsewhere.
-/// </summary>
-static constexpr char SpatialFilterCLStructString[] =
-	"typedef struct __attribute__ ((aligned (16))) _SpatialFilterCL\n"
-	"{\n"
-	"	uint m_SuperRasW;\n"
-	"	uint m_SuperRasH;\n"
-	"	uint m_FinalRasW;\n"
-	"	uint m_FinalRasH;\n"
-	"	uint m_Supersample;\n"
-	"	uint m_FilterWidth;\n"
-	"	uint m_DensityFilterOffset;\n"
-	"	uint m_YAxisUp;\n"
-	"	real_bucket_t m_Vibrancy;\n"
-	"	real_bucket_t m_HighlightPower;\n"
-	"	real_bucket_t m_Gamma;\n"
-	"	real_bucket_t m_LinRange;\n"
-	"	real_bucket_t m_Background[4];\n"//For some reason, using float4/double4 here does not align no matter what. So just use an array of 4.
-	"} SpatialFilterCL;\n"
-	"\n";
-
-/// <summary>
-/// EmberCL makes extensive use of the build in vector types, however accessing
-/// their members as a buffer is not natively supported.
-/// Declaring them in a union with a buffer resolves this problem.
-/// </summary>
-static constexpr char UnionCLStructString[] =
-	"typedef union\n"
-	"{\n"
-	"	uchar3 m_Uchar3;\n"
-	"	uchar m_Uchars[3];\n"
-	"} uchar3uchars;\n"
-	"\n"
-	"typedef union\n"
-	"{\n"
-	"	uchar4 m_Uchar4;\n"
-	"	uchar m_Uchars[4];\n"
-	"} uchar4uchars;\n"
-	"\n"
-	"typedef union\n"
-	"{\n"
-	"	uint4 m_Uint4;\n"
-	"	uint m_Uints[4];\n"
-	"} uint4uints;\n"
-	"\n"
-	"typedef union\n"//Use in places where float is required.
-	"{\n"
-	"	float4 m_Float4;\n"
-	"	float m_Floats[4];\n"
-	"} float4floats;\n"
-	"\n"
-	"typedef union\n"//Use in places where float or double can be used depending on the template type.
-	"{\n"
-	"	real4 m_Real4;\n"
-	"	real_t m_Reals[4];\n"
-	"} real4reals;\n"
-	"\n"
-	"typedef union\n"//Used to match the bucket template type.
-	"{\n"
-	"	real4_bucket m_Real4;\n"
-	"	real_bucket_t m_Reals[4];\n"
-	"} real4reals_bucket;\n"
-	"\n";
-}
+#pragma once
+
+#include "EmberCLPch.h"
+
+/// <summary>
+/// Various data structures defined for the CPU and OpenCL.
+/// These are stripped down versions of THE classes in Ember, for use with OpenCL.
+/// Their sole purpose is to pass values from the host to the device.
+/// They retain most of the member variables, but do not contain the functions.
+/// Visual Studio defaults to alighment of 16, but it's made explicit in case another compiler is used.
+/// This must match the alignment specified in the kernel.
+/// </summary>
+
+namespace EmberCLns
+{
+/// <summary>
+/// Various constants needed for rendering.
+/// </summary>
+static string ConstantDefinesString(bool doublePrecision)
+{
+	ostringstream os;
+	os << "#if defined(cl_amd_fp64)\n"//AMD extension available?
+	   "	#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n"
+	   "#endif\n"
+	   "#if defined(cl_khr_fp64)\n"//Khronos extension available?
+	   "	#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+	   "#endif\n"
+	   "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";//Only supported on nVidia.
+
+	if (doublePrecision)
+	{
+		os <<
+		   "typedef long intPrec;\n"
+		   "typedef uint atomi;\n"//Same size as real_bucket_t, always 4 bytes.
+		   "typedef double real_t;\n"
+		   "typedef float real_bucket_t;\n"//Assume buckets are always float, even though iter calcs are in double.
+		   "typedef double2 real2;\n"
+		   "typedef double3 real3;\n"
+		   "typedef double4 real4;\n"
+		   "typedef float4 real4_bucket;\n"//And here too.
+		   "#define EPS (DBL_EPSILON)\n"
+		   "#define TLOW (DBL_MIN)\n"
+		   "#define TMAX (DBL_MAX)\n"
+		   ;
+	}
+	else
+	{
+		os << "typedef int intPrec;\n"
+		   "typedef uint atomi;\n"
+		   "typedef float real_t;\n"
+		   "typedef float real_bucket_t;\n"
+		   "typedef float2 real2;\n"
+		   "typedef float3 real3;\n"
+		   "typedef float4 real4;\n"
+		   "typedef float4 real4_bucket;\n"
+		   "#define EPS (FLT_EPSILON)\n"
+		   "#define TLOW (FLT_MIN)\n"
+		   "#define TMAX (FLT_MAX)\n"
+		   ;
+	}
+
+	os <<
+	   "typedef          long int int64;\n"
+	   "typedef unsigned long int uint64;\n"
+	   "\n"
+	   "#define EPS6 ((1e-6))\n"
+	   "\n"
+	   "//The number of threads per block used in the iteration function. Don't change\n"
+	   "//it lightly; the block size is hard coded to be exactly 32 x 8.\n"
+	   "#define NTHREADS 256u\n"
+	   "#define THREADS_PER_WARP 32u\n"
+	   "#define NWARPS (NTHREADS / THREADS_PER_WARP)\n"
+	   "#define DE_THRESH 100u\n"
+	   "#define BadVal(x) (isnan(x))\n"
+	   "#define SQR(x) ((x) * (x))\n"
+	   "#define CUBE(x) ((x) * (x) * (x))\n"
+	   "#define MPI ((real_t)M_PI)\n"
+	   "#define MPI2 ((real_t)M_PI_2)\n"
+	   "#define MPI4 ((real_t)M_PI_4)\n"
+	   "#define M1PI ((real_t)M_1_PI)\n"
+	   "#define M2PI ((real_t)M_2_PI)\n"
+	   "#define M_2PI (MPI * 2)\n"
+	   "#define M_3PI (MPI * 3)\n"
+	   "#define M_SQRT3 ((real_t)(1.7320508075688772935274463415059))\n"
+	   "#define M_SQRT3_2 ((real_t)(0.86602540378443864676372317075294))\n"
+	   "#define M_SQRT3_3 ((real_t)(0.57735026918962576450914878050196))\n"
+	   "#define M_SQRT5 ((real_t)(2.2360679774997896964091736687313))\n"
+	   "#define M_PHI ((real_t)(1.61803398874989484820458683436563))\n"
+	   "#define M_1_2PI ((real_t)(0.15915494309189533576888376337251))\n"
+	   "#define M_PI3 ((real_t)(1.0471975511965977461542144610932))\n"
+	   "#define M_PI6 ((real_t)(0.52359877559829887307710723054658))\n"
+	   "#define DEG_2_RAD (MPI / 180)\n"
+	   "#define CURVES_LENGTH_M1 ((real_bucket_t)" << CURVES_LENGTH_M1 << ")\n" <<
+	   "#define ONE_OVER_CURVES_LENGTH_M1 ((real_bucket_t)" << ONE_OVER_CURVES_LENGTH_M1 << ")\n" <<
+	   "\n"
+	   "//Index in each dimension of a thread within a block.\n"
+	   "#define THREAD_ID_X   (get_local_id(0))\n"
+	   "#define THREAD_ID_Y   (get_local_id(1))\n"
+	   "#define THREAD_ID_Z   (get_local_id(2))\n"
+	   "\n"
+	   "//Index in each dimension of a block within a grid.\n"
+	   "#define BLOCK_ID_X    (get_group_id(0))\n"
+	   "#define BLOCK_ID_Y    (get_group_id(1))\n"
+	   "#define BLOCK_ID_Z    (get_group_id(2))\n"
+	   "\n"
+	   "//Absolute index in each dimension of a thread within a grid.\n"
+	   "#define GLOBAL_ID_X   (get_global_id(0))\n"
+	   "#define GLOBAL_ID_Y   (get_global_id(1))\n"
+	   "#define GLOBAL_ID_Z   (get_global_id(2))\n"
+	   "\n"
+	   "//Dimensions of a block.\n"
+	   "#define BLOCK_SIZE_X  (get_local_size(0))\n"
+	   "#define BLOCK_SIZE_Y  (get_local_size(1))\n"
+	   "#define BLOCK_SIZE_Z  (get_local_size(2))\n"
+	   "\n"
+	   "//Dimensions of a grid, in terms of blocks.\n"
+	   "#define GRID_SIZE_X   (get_num_groups(0))\n"
+	   "#define GRID_SIZE_Y   (get_num_groups(1))\n"
+	   "#define GRID_SIZE_Z   (get_num_groups(2))\n"
+	   "\n"
+	   "//Dimensions of a grid, in terms of threads.\n"
+	   "#define GLOBAL_SIZE_X (get_global_size(0))\n"
+	   "#define GLOBAL_SIZE_Y (get_global_size(1))\n"
+	   "#define GLOBAL_SIZE_Z (get_global_size(2))\n"
+	   "\n"
+	   "#define INDEX_IN_BLOCK_2D (THREAD_ID_Y * BLOCK_SIZE_X + THREAD_ID_X)\n"
+	   "#define INDEX_IN_BLOCK_3D ((BLOCK_SIZE_X * BLOCK_SIZE_Y * THREAD_ID_Z) + INDEX_IN_BLOCK_2D)\n"
+	   "\n"
+	   "#define INDEX_IN_GRID_2D (GLOBAL_ID_Y * GLOBAL_SIZE_X + GLOBAL_ID_X)\n"
+	   "#define INDEX_IN_GRID_3D ((GLOBAL_SIZE_X * GLOBAL_SIZE_Y * GLOBAL_ID_Z) + INDEX_IN_GRID_2D)\n"
+	   "\n"
+	   "#define BLOCK_START_INDEX_IN_GRID_2D ((BLOCK_ID_Y * GRID_SIZE_X * BLOCK_SIZE_Y * BLOCK_SIZE_X) + (BLOCK_ID_X * BLOCK_SIZE_X * BLOCK_SIZE_Y))\n"
+	   "\n";
+	return os.str();
+}
+
+/// <summary>
+/// A point structure on the host that maps to the one used on the device to iterate in OpenCL.
+/// It might seem better to use vec4, however 2D palettes and even 3D coordinates may eventually
+/// be supported, which will make it more than 4 members.
+/// </summary>
+template <typename T>
+struct ALIGN PointCL
+{
+	T m_X;
+	T m_Y;
+	T m_Z;
+	T m_ColorX;
+	uint m_LastXfUsed;
+};
+
+/// <summary>
+/// The point structure used to iterate in OpenCL.
+/// It might seem better to use float4, however 2D palettes and even 3D coordinates may eventually
+/// be supported, which will make it more than 4 members.
+/// </summary>
+static constexpr char PointCLStructString[] =
+	"typedef struct __attribute__ " ALIGN_CL " _Point\n"
+	"{\n"
+	"	real_t m_X;\n"
+	"	real_t m_Y;\n"
+	"	real_t m_Z;\n"
+	"	real_t m_ColorX;\n"
+	"	uint m_LastXfUsed;\n"
+	"} Point;\n"
+	"\n";
+
+/// <summary>
+/// A structure on the host used to hold all of the needed information for an xform used on the device to iterate in OpenCL.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+struct ALIGN XformCL
+{
+	T m_A, m_B, m_C, m_D, m_E, m_F;//24 (48)
+	T m_PostA, m_PostB, m_PostC, m_PostD, m_PostE, m_PostF;//48 (96)
+	T m_DirectColor;//52 (104)
+	T m_ColorSpeedCache;//56 (112)
+	T m_OneMinusColorCache;//60 (120)
+	T m_Opacity;//64 (128)
+};
+
+/// <summary>
+/// The xform structure used to iterate in OpenCL.
+/// </summary>
+static constexpr char XformCLStructString[] =
+	"typedef struct __attribute__ " ALIGN_CL " _XformCL\n"
+	"{\n"
+	"	real_t m_A, m_B, m_C, m_D, m_E, m_F;\n"
+	"	real_t m_PostA, m_PostB, m_PostC, m_PostD, m_PostE, m_PostF;\n"
+	"	real_t m_DirectColor;\n"
+	"	real_t m_ColorSpeedCache;\n"
+	"	real_t m_OneMinusColorCache;\n"
+	"	real_t m_Opacity;\n"
+	"} XformCL;\n"
+	"\n";
+
+/// <summary>
+/// A structure on the host used to hold all of the needed information for an ember used on the device to iterate in OpenCL.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+struct ALIGN EmberCL
+{
+	T m_RandPointRange;
+	T m_CamZPos;
+	T m_CamPerspective;
+	T m_CamYaw;
+	T m_CamPitch;
+	T m_BlurCurve;
+	T m_CamDepthBlur;
+	T m_BlurCoef;
+	m3T m_CamMat;
+	T m_CenterX, m_CenterY;
+	T m_RotA, m_RotB, m_RotD, m_RotE;
+	T m_Psm1;
+	T m_Psm2;
+};
+
+/// <summary>
+/// The ember structure used to iterate in OpenCL.
+/// </summary>
+static constexpr char EmberCLStructString[] =
+	"typedef struct __attribute__ " ALIGN_CL " _EmberCL\n"
+	"{\n"
+	"	real_t m_RandPointRange;\n"
+	"	real_t m_CamZPos;\n"
+	"	real_t m_CamPerspective;\n"
+	"	real_t m_CamYaw;\n"
+	"	real_t m_CamPitch;\n"
+	"	real_t m_BlurCurve;\n"
+	"	real_t m_CamDepthBlur;\n"
+	"	real_t m_BlurCoef;\n"
+	"	real_t m_C00;\n"
+	"	real_t m_C01;\n"
+	"	real_t m_C02;\n"
+	"	real_t m_C10;\n"
+	"	real_t m_C11;\n"
+	"	real_t m_C12;\n"
+	"	real_t m_C20;\n"
+	"	real_t m_C21;\n"
+	"	real_t m_C22;\n"
+	"	real_t m_CenterX, m_CenterY;\n"
+	"	real_t m_RotA, m_RotB, m_RotD, m_RotE;\n"
+	"	real_t m_Psm1;\n"
+	"	real_t m_Psm2;\n"
+	"} EmberCL;\n"
+	"\n";
+
+/// <summary>
+/// A structure on the host used to hold all of the needed information for cartesian to raster mapping used on the device to iterate in OpenCL.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+struct ALIGN CarToRasCL
+{
+	T m_PixPerImageUnitW, m_RasLlX;
+	uint m_RasWidth;
+	T m_PixPerImageUnitH, m_RasLlY;
+	T m_CarLlX, m_CarUrX, m_CarUrY, m_CarLlY;
+	T m_CarHalfX, m_CarHalfY, m_CarCenterX, m_CarCenterY;
+};
+
+/// <summary>
+/// The cartesian to raster structure used to iterate in OpenCL.
+/// </summary>
+static constexpr char CarToRasCLStructString[] =
+	"typedef struct __attribute__ " ALIGN_CL " _CarToRasCL\n"
+	"{\n"
+	"	real_t m_PixPerImageUnitW, m_RasLlX;\n"
+	"	uint m_RasWidth;\n"
+	"	real_t m_PixPerImageUnitH, m_RasLlY;\n"
+	"	real_t m_CarLlX, m_CarUrX, m_CarUrY, m_CarLlY;\n"
+	"	real_t m_CarHalfX, m_CarHalfY, m_CarCenterX, m_CarCenterY;\n"
+	"} CarToRasCL;\n"
+	"\n";
+
+/// <summary>
+/// A structure on the host used to hold all of the needed information for density filtering used on the device to iterate in OpenCL.
+/// Note that the actual filter buffer is held elsewhere.
+/// Template argument expected to be float or double.
+/// </summary>
+template <typename T>
+struct ALIGN DensityFilterCL
+{
+	T m_Curve;
+	T m_K1;
+	T m_K2;
+	uint m_Supersample;
+	uint m_SuperRasW;
+	uint m_SuperRasH;
+	uint m_KernelSize;
+	uint m_MaxFilterIndex;
+	uint m_MaxFilteredCounts;
+	uint m_FilterWidth;
+};
+
+/// <summary>
+/// The density filtering structure used to iterate in OpenCL.
+/// Note that the actual filter buffer is held elsewhere.
+/// </summary>
+static constexpr char DensityFilterCLStructString[] =
+	"typedef struct __attribute__ " ALIGN_CL " _DensityFilterCL\n"
+	"{\n"
+	"	real_bucket_t m_Curve;\n"
+	"	real_bucket_t m_K1;\n"
+	"	real_bucket_t m_K2;\n"
+	"	uint m_Supersample;\n"
+	"	uint m_SuperRasW;\n"
+	"	uint m_SuperRasH;\n"
+	"	uint m_KernelSize;\n"
+	"	uint m_MaxFilterIndex;\n"
+	"	uint m_MaxFilteredCounts;\n"
+	"	uint m_FilterWidth;\n"
+	"} DensityFilterCL;\n"
+	"\n";
+
+/// <summary>
+/// A structure on the host used to hold all of the needed information for spatial filtering used on the device to iterate in OpenCL.
+/// Note that the actual filter buffer is held elsewhere.
+/// </summary>
+template <typename T>
+struct ALIGN SpatialFilterCL
+{
+	uint m_SuperRasW = 0;
+	uint m_SuperRasH = 0;
+	uint m_FinalRasW = 0;
+	uint m_FinalRasH = 0;
+	uint m_Supersample = 0;
+	uint m_FilterWidth = 0;
+	uint m_DensityFilterOffset = 0;
+	uint m_YAxisUp = 0;
+	T m_Vibrancy = 0;
+	T m_HighlightPower = 0;
+	T m_Gamma = 0;
+	T m_LinRange = 0;
+	Color<T> m_Background;
+};
+
+/// <summary>
+/// The spatial filtering structure used to iterate in OpenCL.
+/// Note that the actual filter buffer is held elsewhere.
+/// </summary>
+static constexpr char SpatialFilterCLStructString[] =
+	"typedef struct __attribute__ ((aligned (16))) _SpatialFilterCL\n"
+	"{\n"
+	"	uint m_SuperRasW;\n"
+	"	uint m_SuperRasH;\n"
+	"	uint m_FinalRasW;\n"
+	"	uint m_FinalRasH;\n"
+	"	uint m_Supersample;\n"
+	"	uint m_FilterWidth;\n"
+	"	uint m_DensityFilterOffset;\n"
+	"	uint m_YAxisUp;\n"
+	"	real_bucket_t m_Vibrancy;\n"
+	"	real_bucket_t m_HighlightPower;\n"
+	"	real_bucket_t m_Gamma;\n"
+	"	real_bucket_t m_LinRange;\n"
+	"	real_bucket_t m_Background[4];\n"//For some reason, using float4/double4 here does not align no matter what. So just use an array of 4.
+	"} SpatialFilterCL;\n"
+	"\n";
+
+/// <summary>
+/// EmberCL makes extensive use of the build in vector types, however accessing
+/// their members as a buffer is not natively supported.
+/// Declaring them in a union with a buffer resolves this problem.
+/// </summary>
+static constexpr char UnionCLStructString[] =
+	"typedef union\n"
+	"{\n"
+	"	uchar3 m_Uchar3;\n"
+	"	uchar m_Uchars[3];\n"
+	"} uchar3uchars;\n"
+	"\n"
+	"typedef union\n"
+	"{\n"
+	"	uchar4 m_Uchar4;\n"
+	"	uchar m_Uchars[4];\n"
+	"} uchar4uchars;\n"
+	"\n"
+	"typedef union\n"
+	"{\n"
+	"	uint4 m_Uint4;\n"
+	"	uint m_Uints[4];\n"
+	"} uint4uints;\n"
+	"\n"
+	"typedef union\n"//Use in places where float is required.
+	"{\n"
+	"	float4 m_Float4;\n"
+	"	float m_Floats[4];\n"
+	"} float4floats;\n"
+	"\n"
+	"typedef union\n"//Use in places where float or double can be used depending on the template type.
+	"{\n"
+	"	real4 m_Real4;\n"
+	"	real_t m_Reals[4];\n"
+	"} real4reals;\n"
+	"\n"
+	"typedef union\n"//Used to match the bucket template type.
+	"{\n"
+	"	real4_bucket m_Real4;\n"
+	"	real_bucket_t m_Reals[4];\n"
+	"} real4reals_bucket;\n"
+	"\n";
+}
@@ -1,316 +1,316 @@
-#include "EmberCLPch.h"
-#include "FinalAccumOpenCLKernelCreator.h"
-
-namespace EmberCLns
-{
-/// <summary>
-/// Constructor that creates all kernel strings.
-/// The caller will access these strings through the accessor functions.
-/// </summary>
-FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator(bool doublePrecision)
-{
-	m_DoublePrecision = doublePrecision;
-	m_GammaCorrectionWithoutAlphaCalcKernel                   = CreateGammaCorrectionKernelString();
-	m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true);
-	m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel  = CreateFinalAccumKernelString(false);
-}
-
-/// <summary>
-/// Kernel source and entry point properties, getters only.
-/// </summary>
-
-const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel()     const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel;     }
-const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
-
-const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel()     const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;     }
-const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
-
-const string& FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint() const { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; }
-const string& FinalAccumOpenCLKernelCreator::GammaCorrectionKernel() const { return m_GammaCorrectionWithoutAlphaCalcKernel; }
-
-/// <summary>
-/// Get the final accumulation entry point.
-/// </summary>
-/// <param name="earlyClip">True if early clip is desired, else false.</param>
-/// <returns>The name of the final accumulation entry point kernel function</returns>
-const string& FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip) const
-{
-	if (earlyClip)
-		return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
-	else
-		return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
-}
-
-/// <summary>
-/// Get the final accumulation kernel string.
-/// </summary>
-/// <param name="earlyClip">True if early clip is desired, else false.</param>
-/// <returns>The final accumulation kernel string</returns>
-const string& FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip) const
-{
-	if (earlyClip)
-		return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel();
-	else
-		return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
-}
-
-/// <summary>
-/// Create the final accumulation kernel string
-/// </summary>
-/// <param name="earlyClip">True if early clip is desired, else false.</param>
-/// <returns>The final accumulation kernel string</returns>
-string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyClip)
-{
-	ostringstream os;
-	os <<
-	   ConstantDefinesString(m_DoublePrecision) <<
-	   UnionCLStructString <<
-	   RgbToHsvFunctionString <<
-	   HsvToRgbFunctionString <<
-	   CalcAlphaFunctionString <<
-	   CurveAdjustFunctionString <<
-	   SpatialFilterCLStructString;
-
-	if (earlyClip)
-	{
-		os << "__kernel void " << m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
-	}
-	else
-	{
-		os <<
-		   CreateCalcNewRgbFunctionString(false) <<
-		   CreateGammaCorrectionFunctionString(false, true) <<
-		   "__kernel void " << m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
-	}
-
-	os <<
-	   "	const __global real4reals_bucket* accumulator,\n"
-	   "	__write_only image2d_t pixels,\n"
-	   "	__constant SpatialFilterCL* spatialFilter,\n"
-	   "	__constant real_bucket_t* filterCoefs,\n"
-	   "	__global real4reals_bucket* csa,\n"
-	   "	const uint doCurves\n"
-	   "\t)\n"
-	   "{\n"
-	   "\n"
-	   "	if ((GLOBAL_ID_Y >= spatialFilter->m_FinalRasH) || (GLOBAL_ID_X >= spatialFilter->m_FinalRasW))\n"
-	   "		return;\n"
-	   "\n"
-	   "	uint accumX = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_X * spatialFilter->m_Supersample);\n"
-	   "	uint accumY = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_Y * spatialFilter->m_Supersample);\n"
-	   "    uint clampedFilterH = min((uint)spatialFilter->m_FilterWidth, spatialFilter->m_SuperRasH - accumY);"
-	   "    uint clampedFilterW = min((uint)spatialFilter->m_FilterWidth, spatialFilter->m_SuperRasW - accumX);"
-	   "	int2 finalCoord;\n"
-	   "	finalCoord.x = GLOBAL_ID_X;\n"
-	   "	finalCoord.y = (int)((spatialFilter->m_YAxisUp == 1) ? ((spatialFilter->m_FinalRasH - GLOBAL_ID_Y) - 1) : GLOBAL_ID_Y);\n"
-	   "	float4floats finalColor;\n"
-	   "	int ii, jj;\n"
-	   "	uint filterKRowIndex;\n"
-	   "	const __global real4reals_bucket* accumBucket;\n"
-	   "	real4reals_bucket newBucket;\n"
-	   "	newBucket.m_Real4 = 0;\n"
-	   "\n"
-	   "	for (jj = 0; jj < clampedFilterH; jj++)\n"
-	   "	{\n"
-	   "		filterKRowIndex = jj * spatialFilter->m_FilterWidth;\n"//Use the full, non-clamped width to get the filter value.
-	   "\n"
-	   "		for (ii = 0; ii < clampedFilterW; ii++)\n"
-	   "		{\n"
-	   "			real_bucket_t k = filterCoefs[filterKRowIndex + ii];\n"
-	   "\n"
-	   "			accumBucket = accumulator + ((accumY + jj) * spatialFilter->m_SuperRasW) + (accumX + ii);\n"
-	   "			newBucket.m_Real4 += (k * accumBucket->m_Real4);\n"
-	   "		}\n"
-	   "	}\n"
-	   "\n";
-
-	if (earlyClip)//If early clip, simply assign values directly to the temp float4 since they've been gamma corrected already, then write it straight to the output image below.
-	{
-		os <<
-		   "	finalColor.m_Float4.x = (float)newBucket.m_Real4.x;\n"//CPU side clamps, skip here because write_imagef() does the clamping for us.
-		   "	finalColor.m_Float4.y = (float)newBucket.m_Real4.y;\n"
-		   "	finalColor.m_Float4.z = (float)newBucket.m_Real4.z;\n"
-		   "	finalColor.m_Float4.w = (float)newBucket.m_Real4.w;\n";
-	}
-	else
-	{
-		//Late clip, so must gamma correct from the temp newBucket to temp finalColor float4.
-		if (m_DoublePrecision)
-		{
-			os <<
-			   "	real4reals_bucket realFinal;\n"
-			   "\n"
-			   "	GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, &(realFinal.m_Reals[0]));\n"
-			   "	finalColor.m_Float4.x = (float)realFinal.m_Real4.x;\n"
-			   "	finalColor.m_Float4.y = (float)realFinal.m_Real4.y;\n"
-			   "	finalColor.m_Float4.z = (float)realFinal.m_Real4.z;\n"
-			   "	finalColor.m_Float4.w = (float)realFinal.m_Real4.w;\n"
-			   ;
-		}
-		else
-		{
-			os <<
-			   "	GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, &(finalColor.m_Floats[0]));\n";
-		}
-	}
-
-	os <<
-	   "\n"
-	   "	if (doCurves)\n"
-	   "	{\n"
-	   "		CurveAdjust(csa, &(finalColor.m_Floats[0]), 1);\n"
-	   "		CurveAdjust(csa, &(finalColor.m_Floats[1]), 2);\n"
-	   "		CurveAdjust(csa, &(finalColor.m_Floats[2]), 3);\n"
-	   "	}\n"
-	   "\n"
-	   "	write_imagef(pixels, finalCoord, finalColor.m_Float4);\n"//Use write_imagef instead of write_imageui because only the former works when sharing with an OpenGL texture.
-	   "	barrier(CLK_GLOBAL_MEM_FENCE);\n"//Required, or else page tearing will occur during interactive rendering.
-	   "}\n"
-	   ;
-	return os.str();
-}
-
-/// <summary>
-/// Creates the gamma correction function string.
-/// This is not a full kernel, just a function that is used in the kernels.
-/// </summary>
-/// <param name="globalBucket">True if writing to a global buffer (early clip), else false (late clip).</param>
-/// <param name="finalOut">True if writing to global buffer (late clip), else false (early clip).</param>
-/// <returns>The gamma correction function string</returns>
-string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool globalBucket, bool finalOut)
-{
-	ostringstream os;
-	string dataType;
-	string unionMember;
-	dataType = "real_bucket_t";
-	//Use real_t for all cases, early clip and final accum.
-	os << "void GammaCorrectionFloats(" << (globalBucket ? "__global " : "") << "real4reals_bucket* bucket, __constant real_bucket_t* background, real_bucket_t g, real_bucket_t linRange, real_bucket_t vibrancy, real_bucket_t highlightPower, " << (finalOut ? "" : "__global") << " real_bucket_t* correctedChannels)\n";
-	os << "{\n"
-	   << "	real_bucket_t alpha, ls, tmp, a;\n"
-	   << "	real4reals_bucket newRgb;\n"
-	   << "\n"
-	   << "	if (bucket->m_Reals[3] <= 0)\n"
-	   << "	{\n"
-	   << "		alpha = 0;\n"
-	   << "		ls = 0;\n"
-	   << "	}\n"
-	   << "	else\n"
-	   << "	{\n"
-	   << "		tmp = bucket->m_Reals[3];\n"
-	   << "		alpha = CalcAlpha(tmp, g, linRange);\n"
-	   << "		ls = vibrancy * alpha / tmp;\n"
-	   << "		alpha = clamp(alpha, (real_bucket_t)0.0, (real_bucket_t)1.0);\n"
-	   << "	}\n"
-	   << "\n"
-	   << "	CalcNewRgb(bucket, ls, highlightPower, &newRgb);\n"
-	   << "\n"
-	   << "	for (uint rgbi = 0; rgbi < 3; rgbi++)\n"
-	   << "	{\n"
-	   << "		a = newRgb.m_Reals[rgbi] + ((1.0 - vibrancy) * pow(fabs(bucket->m_Reals[rgbi]), g));\n"
-	   << "		a += ((1.0 - alpha) * background[rgbi]);\n"
-	   << "		correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_bucket_t)0.0, (real_bucket_t)1.0);\n"
-	   << "	}\n"
-	   << "\n"
-	   << "	correctedChannels[3] = (" << dataType << ")alpha;\n"
-	   << "}\n"
-	   << "\n";
-	return os.str();
-}
-
-/// <summary>
-/// OpenCL equivalent of Palette::CalcNewRgb().
-/// </summary>
-/// <param name="globalBucket">True if writing the corrected value to a global buffer (early clip), else false (late clip).</param>
-/// <returns>The CalcNewRgb function string</returns>
-string FinalAccumOpenCLKernelCreator::CreateCalcNewRgbFunctionString(bool globalBucket)
-{
-	ostringstream os;
-	os <<
-	   "static void CalcNewRgb(" << (globalBucket ? "__global " : "") << "real4reals_bucket* oldRgb, real_bucket_t ls, real_bucket_t highPow, real4reals_bucket* newRgb)\n"
-	   "{\n"
-	   "	int rgbi;\n"
-	   "	real_bucket_t lsratio;\n"
-	   "	real4reals_bucket newHsv;\n"
-	   "	real_bucket_t maxa, maxc, newls;\n"
-	   "	real_bucket_t adjhlp;\n"
-	   "\n"
-	   "	if (ls == 0 || (oldRgb->m_Real4.x == 0 && oldRgb->m_Real4.y == 0 && oldRgb->m_Real4.z == 0))\n"//Can't do a vector compare to zero.
-	   "	{\n"
-	   "		newRgb->m_Real4 = 0;\n"
-	   "		return;\n"
-	   "	}\n"
-	   "\n"
-	   //Identify the most saturated channel.
-	   "	maxc = max(max(oldRgb->m_Reals[0], oldRgb->m_Reals[1]), oldRgb->m_Reals[2]);\n"
-	   "	maxa = ls * maxc;\n"
-	   "	newls = 1 / maxc;\n"
-	   "\n"
-	   //If a channel is saturated and highlight power is non-negative
-	   //modify the color to prevent hue shift.
-	   "	if (maxa > 1 && highPow >= 0)\n"
-	   "	{\n"
-	   "		lsratio = pow(newls / ls, highPow);\n"
-	   "\n"
-	   //Calculate the max-value color (ranged 0 - 1).
-	   "		for (rgbi = 0; rgbi < 3; rgbi++)\n"
-	   "			newRgb->m_Reals[rgbi] = newls * oldRgb->m_Reals[rgbi];\n"
-	   "\n"
-	   //Reduce saturation by the lsratio.
-	   "		RgbToHsv(&(newRgb->m_Real4), &(newHsv.m_Real4));\n"
-	   "		newHsv.m_Real4.y *= lsratio;\n"
-	   "		HsvToRgb(&(newHsv.m_Real4), &(newRgb->m_Real4));\n"
-	   "	}\n"
-	   "	else\n"
-	   "	{\n"
-	   "		adjhlp = -highPow;\n"
-	   "\n"
-	   "		if (adjhlp > 1)\n"
-	   "			adjhlp = 1;\n"
-	   "\n"
-	   "		if (maxa <= 1)\n"
-	   "			adjhlp = 1;\n"
-	   "\n"
-	   //Calculate the max-value color (ranged 0 - 1) interpolated with the old behavior.
-	   "		for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling, caching and vectorizing makes no difference.
-	   "			newRgb->m_Reals[rgbi] = ((1.0 - adjhlp) * newls + adjhlp * ls) * oldRgb->m_Reals[rgbi];\n"
-	   "	}\n"
-	   "}\n"
-	   "\n";
-	return os.str();
-}
-
-/// <summary>
-/// Create the gamma correction kernel string used for early clipping.
-/// </summary>
-/// <returns>The gamma correction kernel string used for early clipping</returns>
-string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString()
-{
-	ostringstream os;
-	string dataType;
-	os <<
-	   ConstantDefinesString(m_DoublePrecision) <<
-	   UnionCLStructString <<
-	   RgbToHsvFunctionString <<
-	   HsvToRgbFunctionString <<
-	   CalcAlphaFunctionString <<
-	   CreateCalcNewRgbFunctionString(true) <<
-	   SpatialFilterCLStructString <<
-	   CreateGammaCorrectionFunctionString(true, false);//Will only be used with float in this case, early clip. Will always alpha accum.
-	os << "__kernel void " << m_GammaCorrectionWithoutAlphaCalcEntryPoint << "(\n" <<
-	   "	__global real4reals_bucket* accumulator,\n"
-	   "	__constant SpatialFilterCL* spatialFilter\n"
-	   ")\n"
-	   "{\n"
-	   "	int testGutter = 0;\n"
-	   "\n"
-	   "	if (GLOBAL_ID_Y >= (spatialFilter->m_SuperRasH - testGutter) || GLOBAL_ID_X >= (spatialFilter->m_SuperRasW - testGutter))\n"
-	   "		return;\n"
-	   "\n"
-	   "	uint superIndex = (GLOBAL_ID_Y * spatialFilter->m_SuperRasW) + GLOBAL_ID_X;\n"
-	   "	__global real4reals_bucket* bucket = accumulator + superIndex;\n"
-	   "	GammaCorrectionFloats(bucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, &(bucket->m_Reals[0]));\n"
-	   "}\n"
-	   ;
-	return os.str();
-}
-}
+#include "EmberCLPch.h"
+#include "FinalAccumOpenCLKernelCreator.h"
+
+namespace EmberCLns
+{
+/// <summary>
+/// Constructor that creates all kernel strings.
+/// The caller will access these strings through the accessor functions.
+/// </summary>
+FinalAccumOpenCLKernelCreator::FinalAccumOpenCLKernelCreator(bool doublePrecision)
+{
+	m_DoublePrecision = doublePrecision;
+	m_GammaCorrectionWithoutAlphaCalcKernel                   = CreateGammaCorrectionKernelString();
+	m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel = CreateFinalAccumKernelString(true);
+	m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel  = CreateFinalAccumKernelString(false);
+}
+
+/// <summary>
+/// Kernel source and entry point properties, getters only.
+/// </summary>
+
+const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel()     const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel;     }
+const string& FinalAccumOpenCLKernelCreator::FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
+
+const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel()     const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;     }
+const string& FinalAccumOpenCLKernelCreator::FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const { return m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint; }
+
+const string& FinalAccumOpenCLKernelCreator::GammaCorrectionEntryPoint() const { return m_GammaCorrectionWithoutAlphaCalcEntryPoint; }
+const string& FinalAccumOpenCLKernelCreator::GammaCorrectionKernel() const { return m_GammaCorrectionWithoutAlphaCalcKernel; }
+
+/// <summary>
+/// Get the final accumulation entry point.
+/// </summary>
+/// <param name="earlyClip">True if early clip is desired, else false.</param>
+/// <returns>The name of the final accumulation entry point kernel function</returns>
+const string& FinalAccumOpenCLKernelCreator::FinalAccumEntryPoint(bool earlyClip) const
+{
+	if (earlyClip)
+		return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
+	else
+		return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint();
+}
+
+/// <summary>
+/// Get the final accumulation kernel string.
+/// </summary>
+/// <param name="earlyClip">True if early clip is desired, else false.</param>
+/// <returns>The final accumulation kernel string</returns>
+const string& FinalAccumOpenCLKernelCreator::FinalAccumKernel(bool earlyClip) const
+{
+	if (earlyClip)
+		return FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel();
+	else
+		return FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel();
+}
+
+/// <summary>
+/// Create the final accumulation kernel string
+/// </summary>
+/// <param name="earlyClip">True if early clip is desired, else false.</param>
+/// <returns>The final accumulation kernel string</returns>
+string FinalAccumOpenCLKernelCreator::CreateFinalAccumKernelString(bool earlyClip)
+{
+	ostringstream os;
+	os <<
+	   ConstantDefinesString(m_DoublePrecision) <<
+	   UnionCLStructString <<
+	   RgbToHsvFunctionString <<
+	   HsvToRgbFunctionString <<
+	   CalcAlphaFunctionString <<
+	   CurveAdjustFunctionString <<
+	   SpatialFilterCLStructString;
+
+	if (earlyClip)
+	{
+		os << "__kernel void " << m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
+	}
+	else
+	{
+		os <<
+		   CreateCalcNewRgbFunctionString(false) <<
+		   CreateGammaCorrectionFunctionString(false, true) <<
+		   "__kernel void " << m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint << "(\n";
+	}
+
+	os <<
+	   "	const __global real4reals_bucket* accumulator,\n"
+	   "	__write_only image2d_t pixels,\n"
+	   "	__constant SpatialFilterCL* spatialFilter,\n"
+	   "	__constant real_bucket_t* filterCoefs,\n"
+	   "	__global real4reals_bucket* csa,\n"
+	   "	const uint doCurves\n"
+	   "\t)\n"
+	   "{\n"
+	   "\n"
+	   "	if ((GLOBAL_ID_Y >= spatialFilter->m_FinalRasH) || (GLOBAL_ID_X >= spatialFilter->m_FinalRasW))\n"
+	   "		return;\n"
+	   "\n"
+	   "	uint accumX = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_X * spatialFilter->m_Supersample);\n"
+	   "	uint accumY = spatialFilter->m_DensityFilterOffset + (GLOBAL_ID_Y * spatialFilter->m_Supersample);\n"
+	   "    uint clampedFilterH = min((uint)spatialFilter->m_FilterWidth, spatialFilter->m_SuperRasH - accumY);"
+	   "    uint clampedFilterW = min((uint)spatialFilter->m_FilterWidth, spatialFilter->m_SuperRasW - accumX);"
+	   "	int2 finalCoord;\n"
+	   "	finalCoord.x = GLOBAL_ID_X;\n"
+	   "	finalCoord.y = (int)((spatialFilter->m_YAxisUp == 1) ? ((spatialFilter->m_FinalRasH - GLOBAL_ID_Y) - 1) : GLOBAL_ID_Y);\n"
+	   "	float4floats finalColor;\n"
+	   "	int ii, jj;\n"
+	   "	uint filterKRowIndex;\n"
+	   "	const __global real4reals_bucket* accumBucket;\n"
+	   "	real4reals_bucket newBucket;\n"
+	   "	newBucket.m_Real4 = 0;\n"
+	   "\n"
+	   "	for (jj = 0; jj < clampedFilterH; jj++)\n"
+	   "	{\n"
+	   "		filterKRowIndex = jj * spatialFilter->m_FilterWidth;\n"//Use the full, non-clamped width to get the filter value.
+	   "\n"
+	   "		for (ii = 0; ii < clampedFilterW; ii++)\n"
+	   "		{\n"
+	   "			real_bucket_t k = filterCoefs[filterKRowIndex + ii];\n"
+	   "\n"
+	   "			accumBucket = accumulator + ((accumY + jj) * spatialFilter->m_SuperRasW) + (accumX + ii);\n"
+	   "			newBucket.m_Real4 += (k * accumBucket->m_Real4);\n"
+	   "		}\n"
+	   "	}\n"
+	   "\n";
+
+	if (earlyClip)//If early clip, simply assign values directly to the temp float4 since they've been gamma corrected already, then write it straight to the output image below.
+	{
+		os <<
+		   "	finalColor.m_Float4.x = (float)newBucket.m_Real4.x;\n"//CPU side clamps, skip here because write_imagef() does the clamping for us.
+		   "	finalColor.m_Float4.y = (float)newBucket.m_Real4.y;\n"
+		   "	finalColor.m_Float4.z = (float)newBucket.m_Real4.z;\n"
+		   "	finalColor.m_Float4.w = (float)newBucket.m_Real4.w;\n";
+	}
+	else
+	{
+		//Late clip, so must gamma correct from the temp newBucket to temp finalColor float4.
+		if (m_DoublePrecision)
+		{
+			os <<
+			   "	real4reals_bucket realFinal;\n"
+			   "\n"
+			   "	GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, &(realFinal.m_Reals[0]));\n"
+			   "	finalColor.m_Float4.x = (float)realFinal.m_Real4.x;\n"
+			   "	finalColor.m_Float4.y = (float)realFinal.m_Real4.y;\n"
+			   "	finalColor.m_Float4.z = (float)realFinal.m_Real4.z;\n"
+			   "	finalColor.m_Float4.w = (float)realFinal.m_Real4.w;\n"
+			   ;
+		}
+		else
+		{
+			os <<
+			   "	GammaCorrectionFloats(&newBucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, &(finalColor.m_Floats[0]));\n";
+		}
+	}
+
+	os <<
+	   "\n"
+	   "	if (doCurves)\n"
+	   "	{\n"
+	   "		CurveAdjust(csa, &(finalColor.m_Floats[0]), 1);\n"
+	   "		CurveAdjust(csa, &(finalColor.m_Floats[1]), 2);\n"
+	   "		CurveAdjust(csa, &(finalColor.m_Floats[2]), 3);\n"
+	   "	}\n"
+	   "\n"
+	   "	write_imagef(pixels, finalCoord, finalColor.m_Float4);\n"//Use write_imagef instead of write_imageui because only the former works when sharing with an OpenGL texture.
+	   "	barrier(CLK_GLOBAL_MEM_FENCE);\n"//Required, or else page tearing will occur during interactive rendering.
+	   "}\n"
+	   ;
+	return os.str();
+}
+
+/// <summary>
+/// Creates the gamma correction function string.
+/// This is not a full kernel, just a function that is used in the kernels.
+/// </summary>
+/// <param name="globalBucket">True if writing to a global buffer (early clip), else false (late clip).</param>
+/// <param name="finalOut">True if writing to global buffer (late clip), else false (early clip).</param>
+/// <returns>The gamma correction function string</returns>
+string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionFunctionString(bool globalBucket, bool finalOut)
+{
+	ostringstream os;
+	string dataType;
+	string unionMember;
+	dataType = "real_bucket_t";
+	//Use real_t for all cases, early clip and final accum.
+	os << "void GammaCorrectionFloats(" << (globalBucket ? "__global " : "") << "real4reals_bucket* bucket, __constant real_bucket_t* background, real_bucket_t g, real_bucket_t linRange, real_bucket_t vibrancy, real_bucket_t highlightPower, " << (finalOut ? "" : "__global") << " real_bucket_t* correctedChannels)\n";
+	os << "{\n"
+	   << "	real_bucket_t alpha, ls, tmp, a;\n"
+	   << "	real4reals_bucket newRgb;\n"
+	   << "\n"
+	   << "	if (bucket->m_Reals[3] <= 0)\n"
+	   << "	{\n"
+	   << "		alpha = 0;\n"
+	   << "		ls = 0;\n"
+	   << "	}\n"
+	   << "	else\n"
+	   << "	{\n"
+	   << "		tmp = bucket->m_Reals[3];\n"
+	   << "		alpha = CalcAlpha(tmp, g, linRange);\n"
+	   << "		ls = vibrancy * alpha / tmp;\n"
+	   << "		alpha = clamp(alpha, (real_bucket_t)0.0, (real_bucket_t)1.0);\n"
+	   << "	}\n"
+	   << "\n"
+	   << "	CalcNewRgb(bucket, ls, highlightPower, &newRgb);\n"
+	   << "\n"
+	   << "	for (uint rgbi = 0; rgbi < 3; rgbi++)\n"
+	   << "	{\n"
+	   << "		a = newRgb.m_Reals[rgbi] + ((1.0 - vibrancy) * pow(fabs(bucket->m_Reals[rgbi]), g));\n"
+	   << "		a += ((1.0 - alpha) * background[rgbi]);\n"
+	   << "		correctedChannels[rgbi] = (" << dataType << ")clamp(a, (real_bucket_t)0.0, (real_bucket_t)1.0);\n"
+	   << "	}\n"
+	   << "\n"
+	   << "	correctedChannels[3] = (" << dataType << ")alpha;\n"
+	   << "}\n"
+	   << "\n";
+	return os.str();
+}
+
+/// <summary>
+/// OpenCL equivalent of Palette::CalcNewRgb().
+/// </summary>
+/// <param name="globalBucket">True if writing the corrected value to a global buffer (early clip), else false (late clip).</param>
+/// <returns>The CalcNewRgb function string</returns>
+string FinalAccumOpenCLKernelCreator::CreateCalcNewRgbFunctionString(bool globalBucket)
+{
+	ostringstream os;
+	os <<
+	   "static void CalcNewRgb(" << (globalBucket ? "__global " : "") << "real4reals_bucket* oldRgb, real_bucket_t ls, real_bucket_t highPow, real4reals_bucket* newRgb)\n"
+	   "{\n"
+	   "	int rgbi;\n"
+	   "	real_bucket_t lsratio;\n"
+	   "	real4reals_bucket newHsv;\n"
+	   "	real_bucket_t maxa, maxc, newls;\n"
+	   "	real_bucket_t adjhlp;\n"
+	   "\n"
+	   "	if (ls == 0 || (oldRgb->m_Real4.x == 0 && oldRgb->m_Real4.y == 0 && oldRgb->m_Real4.z == 0))\n"//Can't do a vector compare to zero.
+	   "	{\n"
+	   "		newRgb->m_Real4 = 0;\n"
+	   "		return;\n"
+	   "	}\n"
+	   "\n"
+	   //Identify the most saturated channel.
+	   "	maxc = max(max(oldRgb->m_Reals[0], oldRgb->m_Reals[1]), oldRgb->m_Reals[2]);\n"
+	   "	maxa = ls * maxc;\n"
+	   "	newls = 1 / maxc;\n"
+	   "\n"
+	   //If a channel is saturated and highlight power is non-negative
+	   //modify the color to prevent hue shift.
+	   "	if (maxa > 1 && highPow >= 0)\n"
+	   "	{\n"
+	   "		lsratio = pow(newls / ls, highPow);\n"
+	   "\n"
+	   //Calculate the max-value color (ranged 0 - 1).
+	   "		for (rgbi = 0; rgbi < 3; rgbi++)\n"
+	   "			newRgb->m_Reals[rgbi] = newls * oldRgb->m_Reals[rgbi];\n"
+	   "\n"
+	   //Reduce saturation by the lsratio.
+	   "		RgbToHsv(&(newRgb->m_Real4), &(newHsv.m_Real4));\n"
+	   "		newHsv.m_Real4.y *= lsratio;\n"
+	   "		HsvToRgb(&(newHsv.m_Real4), &(newRgb->m_Real4));\n"
+	   "	}\n"
+	   "	else\n"
+	   "	{\n"
+	   "		adjhlp = -highPow;\n"
+	   "\n"
+	   "		if (adjhlp > 1)\n"
+	   "			adjhlp = 1;\n"
+	   "\n"
+	   "		if (maxa <= 1)\n"
+	   "			adjhlp = 1;\n"
+	   "\n"
+	   //Calculate the max-value color (ranged 0 - 1) interpolated with the old behavior.
+	   "		for (rgbi = 0; rgbi < 3; rgbi++)\n"//Unrolling, caching and vectorizing makes no difference.
+	   "			newRgb->m_Reals[rgbi] = ((1.0 - adjhlp) * newls + adjhlp * ls) * oldRgb->m_Reals[rgbi];\n"
+	   "	}\n"
+	   "}\n"
+	   "\n";
+	return os.str();
+}
+
+/// <summary>
+/// Create the gamma correction kernel string used for early clipping.
+/// </summary>
+/// <returns>The gamma correction kernel string used for early clipping</returns>
+string FinalAccumOpenCLKernelCreator::CreateGammaCorrectionKernelString()
+{
+	ostringstream os;
+	string dataType;
+	os <<
+	   ConstantDefinesString(m_DoublePrecision) <<
+	   UnionCLStructString <<
+	   RgbToHsvFunctionString <<
+	   HsvToRgbFunctionString <<
+	   CalcAlphaFunctionString <<
+	   CreateCalcNewRgbFunctionString(true) <<
+	   SpatialFilterCLStructString <<
+	   CreateGammaCorrectionFunctionString(true, false);//Will only be used with float in this case, early clip. Will always alpha accum.
+	os << "__kernel void " << m_GammaCorrectionWithoutAlphaCalcEntryPoint << "(\n" <<
+	   "	__global real4reals_bucket* accumulator,\n"
+	   "	__constant SpatialFilterCL* spatialFilter\n"
+	   ")\n"
+	   "{\n"
+	   "	int testGutter = 0;\n"
+	   "\n"
+	   "	if (GLOBAL_ID_Y >= (spatialFilter->m_SuperRasH - testGutter) || GLOBAL_ID_X >= (spatialFilter->m_SuperRasW - testGutter))\n"
+	   "		return;\n"
+	   "\n"
+	   "	uint superIndex = (GLOBAL_ID_Y * spatialFilter->m_SuperRasW) + GLOBAL_ID_X;\n"
+	   "	__global real4reals_bucket* bucket = accumulator + superIndex;\n"
+	   "	GammaCorrectionFloats(bucket, &(spatialFilter->m_Background[0]), spatialFilter->m_Gamma, spatialFilter->m_LinRange, spatialFilter->m_Vibrancy, spatialFilter->m_HighlightPower, &(bucket->m_Reals[0]));\n"
+	   "}\n"
+	   ;
+	return os.str();
+}
+}
@@ -1,54 +1,54 @@
-#pragma once
-
-#include "EmberCLPch.h"
-#include "EmberCLStructs.h"
-#include "EmberCLFunctions.h"
-
-/// <summary>
-/// FinalAccumOpenCLKernelCreator class.
-/// </summary>
-
-namespace EmberCLns
-{
-/// <summary>
-/// Class for creating the final accumulation code in OpenCL.
-/// There are many conditionals in the CPU code to create the
-/// final output image. This class creates many different kernels
-/// with all conditionals and unnecessary calculations stripped out.
-/// The conditionals are:
-/// Early clip/late clip
-/// </summary>
-class EMBERCL_API FinalAccumOpenCLKernelCreator
-{
-public:
-	FinalAccumOpenCLKernelCreator(bool doublePrecision);
-
-	const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() const;
-	const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const;
-	const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() const;
-	const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const;
-	const string& GammaCorrectionEntryPoint() const;
-	const string& GammaCorrectionKernel() const;
-	const string& FinalAccumEntryPoint(bool earlyClip) const;
-	const string& FinalAccumKernel(bool earlyClip) const;
-
-private:
-	string CreateFinalAccumKernelString(bool earlyClip);
-	string CreateGammaCorrectionKernelString();
-
-	string CreateGammaCorrectionFunctionString(bool globalBucket, bool finalOut);
-	string CreateCalcNewRgbFunctionString(bool globalBucket);
-
-	string m_GammaCorrectionWithoutAlphaCalcKernel;
-	string m_GammaCorrectionWithoutAlphaCalcEntryPoint = "GammaCorrectionWithoutAlphaCalcKernel";
-
-	string m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel;
-	string m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel";
-
-	string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;
-	string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel";
-
-	string m_Empty;
-	bool m_DoublePrecision;
-};
-}
+#pragma once
+
+#include "EmberCLPch.h"
+#include "EmberCLStructs.h"
+#include "EmberCLFunctions.h"
+
+/// <summary>
+/// FinalAccumOpenCLKernelCreator class.
+/// </summary>
+
+namespace EmberCLns
+{
+/// <summary>
+/// Class for creating the final accumulation code in OpenCL.
+/// There are many conditionals in the CPU code to create the
+/// final output image. This class creates many different kernels
+/// with all conditionals and unnecessary calculations stripped out.
+/// The conditionals are:
+/// Early clip/late clip
+/// </summary>
+class EMBERCL_API FinalAccumOpenCLKernelCreator
+{
+public:
+	FinalAccumOpenCLKernelCreator(bool doublePrecision);
+
+	const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel() const;
+	const string& FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const;
+	const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel() const;
+	const string& FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint() const;
+	const string& GammaCorrectionEntryPoint() const;
+	const string& GammaCorrectionKernel() const;
+	const string& FinalAccumEntryPoint(bool earlyClip) const;
+	const string& FinalAccumKernel(bool earlyClip) const;
+
+private:
+	string CreateFinalAccumKernelString(bool earlyClip);
+	string CreateGammaCorrectionKernelString();
+
+	string CreateGammaCorrectionFunctionString(bool globalBucket, bool finalOut);
+	string CreateCalcNewRgbFunctionString(bool globalBucket);
+
+	string m_GammaCorrectionWithoutAlphaCalcKernel;
+	string m_GammaCorrectionWithoutAlphaCalcEntryPoint = "GammaCorrectionWithoutAlphaCalcKernel";
+
+	string m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel;
+	string m_FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumEarlyClipWithoutAlphaCalcWithAlphaAccumKernel";
+
+	string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel;
+	string m_FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumEntryPoint = "FinalAccumLateClipWithoutAlphaCalcWithAlphaAccumKernel";
+
+	string m_Empty;
+	bool m_DoublePrecision;
+};
+}
@@ -1,22 +1,22 @@
-#pragma once
-
-#include "EmberCLPch.h"
-
-namespace EmberCLns
-{
-/// <summary>
-/// Functionality to map OpenCL function names to their full function body program strings.
-/// This is used to ensure only the functions that are needed by a program are included once
-/// in the program string.
-/// </summary>
-class EMBERCL_API FunctionMapper
-{
-public:
-	FunctionMapper();
-	static const string* GetGlobalFunc(const string& func);
-	static const std::unordered_map<string, string> GetGlobalMapCopy();
-
-private:
-	static std::unordered_map<string, string> s_GlobalMap;
-};
+#pragma once
+
+#include "EmberCLPch.h"
+
+namespace EmberCLns
+{
+/// <summary>
+/// Functionality to map OpenCL function names to their full function body program strings.
+/// This is used to ensure only the functions that are needed by a program are included once
+/// in the program string.
+/// </summary>
+class EMBERCL_API FunctionMapper
+{
+public:
+	FunctionMapper();
+	static const string* GetGlobalFunc(const string& func);
+	static const std::unordered_map<string, string> GetGlobalMapCopy();
+
+private:
+	static std::unordered_map<string, string> s_GlobalMap;
+};
 }
@@ -1,83 +1,83 @@
-#pragma once
-
-#include "EmberCLPch.h"
-#include "EmberCLStructs.h"
-#include "EmberCLFunctions.h"
-#include "FunctionMapper.h"
-
-/// <summary>
-/// IterOpenCLKernelCreator class.
-/// </summary>
-
-namespace EmberCLns
-{
-/// <summary>
-/// Class for creating the main iteration code in OpenCL.
-/// It uses the Cuburn method of iterating where all conditionals
-/// are stripped out and a specific kernel is compiled at run-time.
-/// It uses a very sophisticated method for randomization that avoids
-/// the problem of warp/wavefront divergence that would occur if every
-/// thread selected a random xform to apply.
-/// This only works with embers of type float, double is not supported.
-/// </summary>
-template <typename T>
-class EMBERCL_API IterOpenCLKernelCreator
-{
-public:
-	IterOpenCLKernelCreator();
-	const string& ZeroizeKernel() const;
-	const string& ZeroizeEntryPoint() const;
-	const string& SumHistKernel() const;
-	const string& SumHistEntryPoint() const;
-	const string& IterEntryPoint() const;
-	string CreateIterKernelString(const Ember<T>& ember, const string& parVarDefines, const string& globalSharedDefines, bool optAffine, bool lockAccum = false, bool doAccum = true);
-	string GlobalFunctionsString(const Ember<T>& ember);
-	static void ParVarIndexDefines(const Ember<T>& ember, pair<string, vector<T>>& params, bool doVals = true, bool doString = true);
-	static void SharedDataIndexDefines(const Ember<T>& ember, pair<string, vector<T>>& params, bool doVals = true, bool doString = true);
-	static string VariationStateString(const Ember<T>& ember);
-	static string VariationStateInitString(const Ember<T>& ember);
-	static bool AnyZeroOpacity(const Ember<T>& ember);
-	static bool IsBuildRequired(const Ember<T>& ember1, const Ember<T>& ember2, bool optAffine);
-
-private:
-	string CreateZeroizeKernelString() const;
-	string CreateSumHistKernelString() const;
-	string CreateProjectionString(const Ember<T>& ember) const;
-
-	string m_IterEntryPoint = "IterateKernel";
-	string m_ZeroizeKernel;
-	string m_ZeroizeEntryPoint = "ZeroizeKernel";
-	string m_SumHistKernel;
-	string m_SumHistEntryPoint = "SumHisteKernel";
-	FunctionMapper m_FunctionMapper;
-};
-
-#ifdef OPEN_CL_TEST_AREA
-typedef void (*KernelFuncPointer) (size_t gridWidth, size_t gridHeight, size_t blockWidth, size_t blockHeight,
-								   size_t BLOCK_ID_X, size_t BLOCK_ID_Y, size_t THREAD_ID_X, size_t THREAD_ID_Y);
-
-static void OpenCLSim(size_t gridWidth, size_t gridHeight, size_t blockWidth, size_t blockHeight, KernelFuncPointer func)
-{
-	cout << "OpenCLSim(): ";
-	cout << "\n	Params: ";
-	cout << "\n		gridW: " << gridWidth;
-	cout << "\n		gridH: " << gridHeight;
-	cout << "\n		blockW: " << blockWidth;
-	cout << "\n		blockH: " << blockHeight;
-
-	for (size_t i = 0; i < gridHeight; i += blockHeight)
-	{
-		for (size_t j = 0; j < gridWidth; j += blockWidth)
-		{
-			for (size_t k = 0; k < blockHeight; k++)
-			{
-				for (size_t l = 0; l < blockWidth; l++)
-				{
-					func(gridWidth, gridHeight, blockWidth, blockHeight, j / blockWidth, i / blockHeight, l, k);
-				}
-			}
-		}
-	}
-}
-#endif
-}
+#pragma once
+
+#include "EmberCLPch.h"
+#include "EmberCLStructs.h"
+#include "EmberCLFunctions.h"
+#include "FunctionMapper.h"
+
+/// <summary>
+/// IterOpenCLKernelCreator class.
+/// </summary>
+
+namespace EmberCLns
+{
+/// <summary>
+/// Class for creating the main iteration code in OpenCL.
+/// It uses the Cuburn method of iterating where all conditionals
+/// are stripped out and a specific kernel is compiled at run-time.
+/// It uses a very sophisticated method for randomization that avoids
+/// the problem of warp/wavefront divergence that would occur if every
+/// thread selected a random xform to apply.
+/// This only works with embers of type float, double is not supported.
+/// </summary>
+template <typename T>
+class EMBERCL_API IterOpenCLKernelCreator
+{
+public:
+	IterOpenCLKernelCreator();
+	const string& ZeroizeKernel() const;
+	const string& ZeroizeEntryPoint() const;
+	const string& SumHistKernel() const;
+	const string& SumHistEntryPoint() const;
+	const string& IterEntryPoint() const;
+	string CreateIterKernelString(const Ember<T>& ember, const string& parVarDefines, const string& globalSharedDefines, bool optAffine, bool lockAccum = false, bool doAccum = true);
+	string GlobalFunctionsString(const Ember<T>& ember);
+	static void ParVarIndexDefines(const Ember<T>& ember, pair<string, vector<T>>& params, bool doVals = true, bool doString = true);
+	static void SharedDataIndexDefines(const Ember<T>& ember, pair<string, vector<T>>& params, bool doVals = true, bool doString = true);
+	static string VariationStateString(const Ember<T>& ember);
+	static string VariationStateInitString(const Ember<T>& ember);
+	static bool AnyZeroOpacity(const Ember<T>& ember);
+	static bool IsBuildRequired(const Ember<T>& ember1, const Ember<T>& ember2, bool optAffine);
+
+private:
+	string CreateZeroizeKernelString() const;
+	string CreateSumHistKernelString() const;
+	string CreateProjectionString(const Ember<T>& ember) const;
+
+	string m_IterEntryPoint = "IterateKernel";
+	string m_ZeroizeKernel;
+	string m_ZeroizeEntryPoint = "ZeroizeKernel";
+	string m_SumHistKernel;
+	string m_SumHistEntryPoint = "SumHisteKernel";
+	FunctionMapper m_FunctionMapper;
+};
+
+#ifdef OPEN_CL_TEST_AREA
+typedef void (*KernelFuncPointer) (size_t gridWidth, size_t gridHeight, size_t blockWidth, size_t blockHeight,
+								   size_t BLOCK_ID_X, size_t BLOCK_ID_Y, size_t THREAD_ID_X, size_t THREAD_ID_Y);
+
+static void OpenCLSim(size_t gridWidth, size_t gridHeight, size_t blockWidth, size_t blockHeight, KernelFuncPointer func)
+{
+	cout << "OpenCLSim(): ";
+	cout << "\n	Params: ";
+	cout << "\n		gridW: " << gridWidth;
+	cout << "\n		gridH: " << gridHeight;
+	cout << "\n		blockW: " << blockWidth;
+	cout << "\n		blockH: " << blockHeight;
+
+	for (size_t i = 0; i < gridHeight; i += blockHeight)
+	{
+		for (size_t j = 0; j < gridWidth; j += blockWidth)
+		{
+			for (size_t k = 0; k < blockHeight; k++)
+			{
+				for (size_t l = 0; l < blockWidth; l++)
+				{
+					func(gridWidth, gridHeight, blockWidth, blockHeight, j / blockWidth, i / blockHeight, l, k);
+				}
+			}
+		}
+	}
+}
+#endif
+}
@@ -1,460 +1,460 @@
-#include "EmberCLPch.h"
-#include "OpenCLInfo.h"
-
-namespace EmberCLns
-{
-/// <summary>
-/// Initialize the all platforms and devices and keep information about them in lists.
-/// </summary>
-OpenCLInfo::OpenCLInfo()
-{
-	cl_int err;
-	vector<cl::Platform> platforms;
-	vector<vector<cl::Device>> devices;
-	intmax_t workingPlatformIndex = -1;
-	m_Init = false;
-	cl::Platform::get(&platforms);
-	devices.resize(platforms.size());
-	m_Platforms.reserve(platforms.size());
-	m_Devices.reserve(platforms.size());
-	m_DeviceNames.reserve(platforms.size());
-	m_AllDeviceNames.reserve(platforms.size());
-	m_DeviceIndices.reserve(platforms.size());
-
-	for (size_t i = 0; i < platforms.size(); i++)
-		platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &devices[i]);
-
-	for (size_t platform = 0; platform < platforms.size(); platform++)
-	{
-		bool platformOk = false;
-		bool deviceOk = false;
-		cl::Context context;
-
-		if (CreateContext(platforms[platform], context, false))//Platform is ok, now do context. Unshared by default.
-		{
-			size_t workingDeviceIndex = 0;
-
-			for (size_t device = 0; device < devices[platform].size(); device++)//Context is ok, now do devices.
-			{
-				auto q = cl::CommandQueue(context, devices[platform][device], 0, &err);//At least one GPU device is present, so create a command queue.
-
-				if (CheckCL(err, "cl::CommandQueue()"))
-				{
-					if (!platformOk)
-					{
-						m_Platforms.push_back(platforms[platform]);
-						m_PlatformNames.push_back(platforms[platform].getInfo<CL_PLATFORM_VENDOR>(nullptr).c_str() + " "s + platforms[platform].getInfo<CL_PLATFORM_NAME>(nullptr).c_str() + " "s + platforms[platform].getInfo<CL_PLATFORM_VERSION>(nullptr).c_str());
-						workingPlatformIndex++;
-						platformOk = true;
-					}
-
-					if (!deviceOk)
-					{
-						m_Devices.push_back(vector<cl::Device>());
-						m_DeviceNames.push_back(vector<string>());
-						m_Devices.back().reserve(devices[platform].size());
-						m_DeviceNames.back().reserve(devices[platform].size());
-						deviceOk = true;
-					}
-
-					m_Devices.back().push_back(devices[platform][device]);
-					m_DeviceNames.back().push_back(devices[platform][device].getInfo<CL_DEVICE_VENDOR>(nullptr).c_str() + " "s + devices[platform][device].getInfo<CL_DEVICE_NAME>(nullptr).c_str());// + " " + devices[platform][device].getInfo<CL_DEVICE_VERSION>().c_str());
-					m_AllDeviceNames.push_back(m_DeviceNames.back().back());
-					m_DeviceIndices.push_back(pair<size_t, size_t>(workingPlatformIndex, workingDeviceIndex++));
-					m_Init = true;//If at least one platform and device succeeded, OpenCL is ok. It's now ok to begin building and running programs.
-				}
-			}
-		}
-	}
-}
-
-/// <summary>
-/// Get a const reference to the vector of available platforms.
-/// </summary>
-/// <returns>A const reference to the vector of available platforms</returns>
-const vector<cl::Platform>& OpenCLInfo::Platforms() const
-{
-	return m_Platforms;
-}
-
-/// <summary>
-/// Get a const reference to the platform name at the specified index.
-/// </summary>
-/// <param name="i">The platform index to get the name of</param>
-/// <returns>The platform name if found, else empty string</returns>
-const string& OpenCLInfo::PlatformName(size_t platform) const
-{
-	static string s;
-	return platform < m_PlatformNames.size() ? m_PlatformNames[platform] : s;
-}
-
-/// <summary>
-/// Get a const reference to a vector of all available platform names on the system as a vector of strings.
-/// </summary>
-/// <returns>All available platform names on the system as a vector of strings</returns>
-const vector<string>& OpenCLInfo::PlatformNames() const
-{
-	return m_PlatformNames;
-}
-
-/// <summary>
-/// Get a const reference to a vector of vectors of all available devices on the system.
-/// Each outer vector is a different platform.
-/// </summary>
-/// <returns>All available devices on the system, grouped by platform.</returns>
-const vector<vector<cl::Device>>& OpenCLInfo::Devices() const
-{
-	return m_Devices;
-}
-
-/// <summary>
-/// Get a const reference to the device name at the specified index on the platform
-/// at the specified index.
-/// </summary>
-/// <param name="platform">The platform index of the device</param>
-/// <param name="device">The device index</param>
-/// <returns>The name of the device if found, else empty string</returns>
-const string& OpenCLInfo::DeviceName(size_t platform, size_t device) const
-{
-	static string s;
-
-	if (platform < m_Platforms.size() && platform < m_Devices.size())
-		if (device < m_Devices[platform].size())
-			return m_DeviceNames[platform][device];
-
-	return s;
-}
-
-/// <summary>
-/// Get a const reference to a vector of pairs of uints which contain the platform,device
-/// indices of all available devices on the system.
-/// </summary>
-/// <returns>All available devices on the system as platform,device index pairs</returns>
-const vector<pair<size_t, size_t>>& OpenCLInfo::DeviceIndices() const
-{
-	return m_DeviceIndices;
-}
-
-/// <summary>
-/// Get a const reference to a vector of all available device names on the system as a vector of strings.
-/// </summary>
-/// <returns>All available device names on the system as a vector of strings</returns>
-const vector<string>& OpenCLInfo::AllDeviceNames() const
-{
-	return m_AllDeviceNames;
-}
-
-/// <summary>
-/// Get a const reference to a vector of all available device names on the platform
-/// at the specified index as a vector of strings.
-/// </summary>
-/// <param name="platform">The platform index whose devices names will be returned</param>
-/// <returns>All available device names on the platform at the specified index as a vector of strings if within range, else empty vector.</returns>
-const vector<string>& OpenCLInfo::DeviceNames(size_t platform) const
-{
-	static vector<string> v;
-
-	if (platform < m_DeviceNames.size())
-		return m_DeviceNames[platform];
-
-	return v;
-}
-
-/// <summary>
-/// Get the total device index at the specified platform and device index.
-/// </summary>
-/// <param name="platform">The platform index of the device</param>
-/// <param name="device">The device index within the platform</param>
-/// <returns>The total device index if found, else 0</returns>
-size_t OpenCLInfo::TotalDeviceIndex(size_t platform, size_t device) const
-{
-	size_t index = 0;
-	pair<size_t, size_t> p{ platform, device };
-
-	for (size_t i = 0; i < m_DeviceIndices.size(); i++)
-	{
-		if (p == m_DeviceIndices[i])
-		{
-			index = i;
-			break;
-		}
-	}
-
-	return index;
-}
-
-/// <summary>
-/// Get a pointer to a device based on its ID.
-/// </summary>
-/// <param name="id">The device ID</param>
-/// <param name="platform">Stores the platform index of the device if found.</param>
-/// <param name="device">Stores the device index of the device if found.</param>
-/// <returns>A pointer to the device if found, else nullptr.</returns>
-const cl::Device* OpenCLInfo::DeviceFromId(cl_device_id id, size_t& platform, size_t& device) const
-{
-	for (auto& p : m_DeviceIndices)
-	{
-		if (m_Devices[p.first][p.second]() == id)
-		{
-			platform = p.first;
-			device = p.second;
-			return &(m_Devices[p.first][p.second]);
-		}
-	}
-
-	platform = device = 0;
-	return nullptr;
-}
-
-/// <summary>
-/// Create a context that is optionally shared with OpenGL and place it in the
-/// passed in context ref parameter.
-/// </summary>
-/// <param name="platform">The platform object to create the context on</param>
-/// <param name="context">The context object to store the result in</param>
-/// <param name="shared">True if shared with OpenGL, else not shared.</param>
-/// <returns>True if success, else false.</returns>
-bool OpenCLInfo::CreateContext(const cl::Platform& platform, cl::Context& context, bool shared)
-{
-	cl_int err;
-
-	if (shared)
-	{
-		//Define OS-specific context properties and create the OpenCL context.
-#if defined (__APPLE__) || defined(MACOSX)
-		CGLContextObj kCGLContext = CGLGetCurrentContext();
-		CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
-		cl_context_properties props[] =
-		{
-			CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup,
-			0
-		};
-		context = cl::Context(CL_DEVICE_TYPE_GPU, props, nullptr, nullptr, &err);//May need to tinker with this on Mac.
-#else
-#if defined WIN32
-		//::wglMakeCurrent(wglGetCurrentDC(), wglGetCurrentContext());
-		cl_context_properties props[] =
-		{
-			CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
-			CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
-			CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>((platform)()),
-			0
-		};
-		context = cl::Context(CL_DEVICE_TYPE_GPU, props, nullptr, nullptr, &err);
-#else
-		cl_context_properties props[] =
-		{
-			CL_GL_CONTEXT_KHR, cl_context_properties(glXGetCurrentContext()),
-			CL_GLX_DISPLAY_KHR, cl_context_properties(glXGetCurrentDisplay()),
-			CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>((platform)()),
-			0
-		};
-		context = cl::Context(CL_DEVICE_TYPE_GPU, props, nullptr, nullptr, &err);
-#endif
-#endif
-	}
-	else
-	{
-		cl_context_properties props[3] =
-		{
-			CL_CONTEXT_PLATFORM,
-			reinterpret_cast<cl_context_properties>((platform)()),
-			0
-		};
-		context = cl::Context(CL_DEVICE_TYPE_ALL, props, nullptr, nullptr, &err);
-	}
-
-	return CheckCL(err, "cl::Context()");
-}
-
-/// <summary>
-/// Return whether at least one device has been found and properly initialized.
-/// </summary>
-/// <returns>True if success, else false.</returns>
-bool OpenCLInfo::Ok() const
-{
-	return m_Init;
-}
-
-/// <summary>
-/// Get all information about all platforms and devices.
-/// </summary>
-/// <returns>A string with all information about all platforms and devices</returns>
-string OpenCLInfo::DumpInfo() const
-{
-	ostringstream os;
-	vector<size_t> sizes;
-	os.imbue(locale(""));
-
-	for (size_t platform = 0; platform < m_Platforms.size(); platform++)
-	{
-		os << "Platform " << platform << ": " << PlatformName(platform) << "\n";
-
-		for (size_t device = 0; device < m_Devices[platform].size(); device++)
-		{
-			os << "Device " << device << ": " << DeviceName(platform, device);
-			os << "\nCL_DEVICE_OPENCL_C_VERSION: " << GetInfo<string>(platform, device, CL_DEVICE_OPENCL_C_VERSION).c_str();
-			os << "\nCL_DEVICE_LOCAL_MEM_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_LOCAL_MEM_SIZE);
-			os << "\nCL_DEVICE_LOCAL_MEM_TYPE: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_LOCAL_MEM_TYPE);
-			os << "\nCL_DEVICE_MAX_COMPUTE_UNITS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_COMPUTE_UNITS);
-			os << "\nCL_DEVICE_MAX_READ_IMAGE_ARGS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_READ_IMAGE_ARGS);
-			os << "\nCL_DEVICE_MAX_WRITE_IMAGE_ARGS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS);
-			os << "\nCL_DEVICE_MAX_MEM_ALLOC_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
-			os << "\nCL_DEVICE_ADDRESS_BITS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_ADDRESS_BITS);
-			os << "\nCL_DEVICE_GLOBAL_MEM_CACHE_TYPE: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE);
-			os << "\nCL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE);
-			os << "\nCL_DEVICE_GLOBAL_MEM_CACHE_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE);
-			os << "\nCL_DEVICE_GLOBAL_MEM_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_GLOBAL_MEM_SIZE);
-			os << "\nCL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE);
-			os << "\nCL_DEVICE_MAX_CONSTANT_ARGS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_CONSTANT_ARGS);
-			os << "\nCL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
-			os << "\nCL_DEVICE_MAX_WORK_GROUP_SIZE: " << GetInfo<size_t>(platform, device, CL_DEVICE_MAX_WORK_GROUP_SIZE);
-			sizes = GetInfo<vector<size_t>>(platform, device, CL_DEVICE_MAX_WORK_ITEM_SIZES);
-			os << "\nCL_DEVICE_MAX_WORK_ITEM_SIZES: " << sizes[0] << ", " << sizes[1] << ", " << sizes[2] << "\n" << "\n";
-
-			if (device != m_Devices[platform].size() - 1 && platform != m_Platforms.size() - 1)
-				os << "\n";
-		}
-
-		os << "\n";
-	}
-
-	return os.str();
-}
-
-/// <summary>
-/// Check an OpenCL return value for errors.
-/// </summary>
-/// <param name="err">The error code to inspect</param>
-/// <param name="name">A description of where the value was gotten from</param>
-/// <returns>True if success, else false.</returns>
-bool OpenCLInfo::CheckCL(cl_int err, const char* name)
-{
-	if (err != CL_SUCCESS)
-	{
-		ostringstream ss;
-		ss << "ERROR: " << ErrorToStringCL(err) << " in " << name << ".\n";
-		AddToReport(ss.str());
-	}
-
-	return err == CL_SUCCESS;
-}
-
-/// <summary>
-/// Translate an OpenCL error code into a human readable string.
-/// </summary>
-/// <param name="err">The error code to translate</param>
-/// <returns>A human readable description of the error passed in</returns>
-string OpenCLInfo::ErrorToStringCL(cl_int err)
-{
-	switch (err)
-	{
-		case CL_SUCCESS:								   return "Success";
-
-		case CL_DEVICE_NOT_FOUND:						   return "Device not found";
-
-		case CL_DEVICE_NOT_AVAILABLE:					   return "Device not available";
-
-		case CL_COMPILER_NOT_AVAILABLE:					   return "Compiler not available";
-
-		case CL_MEM_OBJECT_ALLOCATION_FAILURE:			   return "Memory object allocation failure";
-
-		case CL_OUT_OF_RESOURCES:						   return "Out of resources";
-
-		case CL_OUT_OF_HOST_MEMORY:						   return "Out of host memory";
-
-		case CL_PROFILING_INFO_NOT_AVAILABLE:			   return "Profiling information not available";
-
-		case CL_MEM_COPY_OVERLAP:						   return "Memory copy overlap";
-
-		case CL_IMAGE_FORMAT_MISMATCH:					   return "Image format mismatch";
-
-		case CL_IMAGE_FORMAT_NOT_SUPPORTED:				   return "Image format not supported";
-
-		case CL_BUILD_PROGRAM_FAILURE:					   return "Program build failure";
-
-		case CL_MAP_FAILURE:							   return "Map failure";
-
-		case CL_MISALIGNED_SUB_BUFFER_OFFSET:			   return "Misaligned sub buffer offset";
-
-		case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "Exec status error for events in wait list";
-
-		case CL_INVALID_VALUE:							   return "Invalid value";
-
-		case CL_INVALID_DEVICE_TYPE:					   return "Invalid device type";
-
-		case CL_INVALID_PLATFORM:						   return "Invalid platform";
-
-		case CL_INVALID_DEVICE:							   return "Invalid device";
-
-		case CL_INVALID_CONTEXT:						   return "Invalid context";
-
-		case CL_INVALID_QUEUE_PROPERTIES:				   return "Invalid queue properties";
-
-		case CL_INVALID_COMMAND_QUEUE:					   return "Invalid command queue";
-
-		case CL_INVALID_HOST_PTR:						   return "Invalid host pointer";
-
-		case CL_INVALID_MEM_OBJECT:						   return "Invalid memory object";
-
-		case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:		   return "Invalid image format descriptor";
-
-		case CL_INVALID_IMAGE_SIZE:						   return "Invalid image size";
-
-		case CL_INVALID_SAMPLER:						   return "Invalid sampler";
-
-		case CL_INVALID_BINARY:							   return "Invalid binary";
-
-		case CL_INVALID_BUILD_OPTIONS:					   return "Invalid build options";
-
-		case CL_INVALID_PROGRAM:						   return "Invalid program";
-
-		case CL_INVALID_PROGRAM_EXECUTABLE:				   return "Invalid program executable";
-
-		case CL_INVALID_KERNEL_NAME:					   return "Invalid kernel name";
-
-		case CL_INVALID_KERNEL_DEFINITION:				   return "Invalid kernel definition";
-
-		case CL_INVALID_KERNEL:							   return "Invalid kernel";
-
-		case CL_INVALID_ARG_INDEX:						   return "Invalid argument index";
-
-		case CL_INVALID_ARG_VALUE:						   return "Invalid argument value";
-
-		case CL_INVALID_ARG_SIZE:						   return "Invalid argument size";
-
-		case CL_INVALID_KERNEL_ARGS:					   return "Invalid kernel arguments";
-
-		case CL_INVALID_WORK_DIMENSION:					   return "Invalid work dimension";
-
-		case CL_INVALID_WORK_GROUP_SIZE:				   return "Invalid work group size";
-
-		case CL_INVALID_WORK_ITEM_SIZE:					   return "Invalid work item size";
-
-		case CL_INVALID_GLOBAL_OFFSET:					   return "Invalid global offset";
-
-		case CL_INVALID_EVENT_WAIT_LIST:				   return "Invalid event wait list";
-
-		case CL_INVALID_EVENT:							   return "Invalid event";
-
-		case CL_INVALID_OPERATION:						   return "Invalid operation";
-
-		case CL_INVALID_GL_OBJECT:						   return "Invalid OpenGL object";
-
-		case CL_INVALID_BUFFER_SIZE:					   return "Invalid buffer size";
-
-		case CL_INVALID_MIP_LEVEL:						   return "Invalid mip-map level";
-
-		case CL_INVALID_GLOBAL_WORK_SIZE:				   return "Invalid global work size";
-
-		case CL_INVALID_PROPERTY:						   return "Invalid property";
-
-		default:
-		{
-			ostringstream ss;
-			ss << "<Unknown error code> " << err;
-			return ss.str();
-		}
-	}
-}
+#include "EmberCLPch.h"
+#include "OpenCLInfo.h"
+
+namespace EmberCLns
+{
+/// <summary>
+/// Initialize the all platforms and devices and keep information about them in lists.
+/// </summary>
+OpenCLInfo::OpenCLInfo()
+{
+	cl_int err;
+	vector<cl::Platform> platforms;
+	vector<vector<cl::Device>> devices;
+	intmax_t workingPlatformIndex = -1;
+	m_Init = false;
+	cl::Platform::get(&platforms);
+	devices.resize(platforms.size());
+	m_Platforms.reserve(platforms.size());
+	m_Devices.reserve(platforms.size());
+	m_DeviceNames.reserve(platforms.size());
+	m_AllDeviceNames.reserve(platforms.size());
+	m_DeviceIndices.reserve(platforms.size());
+
+	for (size_t i = 0; i < platforms.size(); i++)
+		platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &devices[i]);
+
+	for (size_t platform = 0; platform < platforms.size(); platform++)
+	{
+		bool platformOk = false;
+		bool deviceOk = false;
+		cl::Context context;
+
+		if (CreateContext(platforms[platform], context, false))//Platform is ok, now do context. Unshared by default.
+		{
+			size_t workingDeviceIndex = 0;
+
+			for (size_t device = 0; device < devices[platform].size(); device++)//Context is ok, now do devices.
+			{
+				auto q = cl::CommandQueue(context, devices[platform][device], 0, &err);//At least one GPU device is present, so create a command queue.
+
+				if (CheckCL(err, "cl::CommandQueue()"))
+				{
+					if (!platformOk)
+					{
+						m_Platforms.push_back(platforms[platform]);
+						m_PlatformNames.push_back(platforms[platform].getInfo<CL_PLATFORM_VENDOR>(nullptr).c_str() + " "s + platforms[platform].getInfo<CL_PLATFORM_NAME>(nullptr).c_str() + " "s + platforms[platform].getInfo<CL_PLATFORM_VERSION>(nullptr).c_str());
+						workingPlatformIndex++;
+						platformOk = true;
+					}
+
+					if (!deviceOk)
+					{
+						m_Devices.push_back(vector<cl::Device>());
+						m_DeviceNames.push_back(vector<string>());
+						m_Devices.back().reserve(devices[platform].size());
+						m_DeviceNames.back().reserve(devices[platform].size());
+						deviceOk = true;
+					}
+
+					m_Devices.back().push_back(devices[platform][device]);
+					m_DeviceNames.back().push_back(devices[platform][device].getInfo<CL_DEVICE_VENDOR>(nullptr).c_str() + " "s + devices[platform][device].getInfo<CL_DEVICE_NAME>(nullptr).c_str());// + " " + devices[platform][device].getInfo<CL_DEVICE_VERSION>().c_str());
+					m_AllDeviceNames.push_back(m_DeviceNames.back().back());
+					m_DeviceIndices.push_back(pair<size_t, size_t>(workingPlatformIndex, workingDeviceIndex++));
+					m_Init = true;//If at least one platform and device succeeded, OpenCL is ok. It's now ok to begin building and running programs.
+				}
+			}
+		}
+	}
+}
+
+/// <summary>
+/// Get a const reference to the vector of available platforms.
+/// </summary>
+/// <returns>A const reference to the vector of available platforms</returns>
+const vector<cl::Platform>& OpenCLInfo::Platforms() const
+{
+	return m_Platforms;
+}
+
+/// <summary>
+/// Get a const reference to the platform name at the specified index.
+/// </summary>
+/// <param name="i">The platform index to get the name of</param>
+/// <returns>The platform name if found, else empty string</returns>
+const string& OpenCLInfo::PlatformName(size_t platform) const
+{
+	static string s;
+	return platform < m_PlatformNames.size() ? m_PlatformNames[platform] : s;
+}
+
+/// <summary>
+/// Get a const reference to a vector of all available platform names on the system as a vector of strings.
+/// </summary>
+/// <returns>All available platform names on the system as a vector of strings</returns>
+const vector<string>& OpenCLInfo::PlatformNames() const
+{
+	return m_PlatformNames;
+}
+
+/// <summary>
+/// Get a const reference to a vector of vectors of all available devices on the system.
+/// Each outer vector is a different platform.
+/// </summary>
+/// <returns>All available devices on the system, grouped by platform.</returns>
+const vector<vector<cl::Device>>& OpenCLInfo::Devices() const
+{
+	return m_Devices;
+}
+
+/// <summary>
+/// Get a const reference to the device name at the specified index on the platform
+/// at the specified index.
+/// </summary>
+/// <param name="platform">The platform index of the device</param>
+/// <param name="device">The device index</param>
+/// <returns>The name of the device if found, else empty string</returns>
+const string& OpenCLInfo::DeviceName(size_t platform, size_t device) const
+{
+	static string s;
+
+	if (platform < m_Platforms.size() && platform < m_Devices.size())
+		if (device < m_Devices[platform].size())
+			return m_DeviceNames[platform][device];
+
+	return s;
+}
+
+/// <summary>
+/// Get a const reference to a vector of pairs of uints which contain the platform,device
+/// indices of all available devices on the system.
+/// </summary>
+/// <returns>All available devices on the system as platform,device index pairs</returns>
+const vector<pair<size_t, size_t>>& OpenCLInfo::DeviceIndices() const
+{
+	return m_DeviceIndices;
+}
+
+/// <summary>
+/// Get a const reference to a vector of all available device names on the system as a vector of strings.
+/// </summary>
+/// <returns>All available device names on the system as a vector of strings</returns>
+const vector<string>& OpenCLInfo::AllDeviceNames() const
+{
+	return m_AllDeviceNames;
+}
+
+/// <summary>
+/// Get a const reference to a vector of all available device names on the platform
+/// at the specified index as a vector of strings.
+/// </summary>
+/// <param name="platform">The platform index whose devices names will be returned</param>
+/// <returns>All available device names on the platform at the specified index as a vector of strings if within range, else empty vector.</returns>
+const vector<string>& OpenCLInfo::DeviceNames(size_t platform) const
+{
+	static vector<string> v;
+
+	if (platform < m_DeviceNames.size())
+		return m_DeviceNames[platform];
+
+	return v;
+}
+
+/// <summary>
+/// Get the total device index at the specified platform and device index.
+/// </summary>
+/// <param name="platform">The platform index of the device</param>
+/// <param name="device">The device index within the platform</param>
+/// <returns>The total device index if found, else 0</returns>
+size_t OpenCLInfo::TotalDeviceIndex(size_t platform, size_t device) const
+{
+	size_t index = 0;
+	pair<size_t, size_t> p{ platform, device };
+
+	for (size_t i = 0; i < m_DeviceIndices.size(); i++)
+	{
+		if (p == m_DeviceIndices[i])
+		{
+			index = i;
+			break;
+		}
+	}
+
+	return index;
+}
+
+/// <summary>
+/// Get a pointer to a device based on its ID.
+/// </summary>
+/// <param name="id">The device ID</param>
+/// <param name="platform">Stores the platform index of the device if found.</param>
+/// <param name="device">Stores the device index of the device if found.</param>
+/// <returns>A pointer to the device if found, else nullptr.</returns>
+const cl::Device* OpenCLInfo::DeviceFromId(cl_device_id id, size_t& platform, size_t& device) const
+{
+	for (auto& p : m_DeviceIndices)
+	{
+		if (m_Devices[p.first][p.second]() == id)
+		{
+			platform = p.first;
+			device = p.second;
+			return &(m_Devices[p.first][p.second]);
+		}
+	}
+
+	platform = device = 0;
+	return nullptr;
+}
+
+/// <summary>
+/// Create a context that is optionally shared with OpenGL and place it in the
+/// passed in context ref parameter.
+/// </summary>
+/// <param name="platform">The platform object to create the context on</param>
+/// <param name="context">The context object to store the result in</param>
+/// <param name="shared">True if shared with OpenGL, else not shared.</param>
+/// <returns>True if success, else false.</returns>
+bool OpenCLInfo::CreateContext(const cl::Platform& platform, cl::Context& context, bool shared)
+{
+	cl_int err;
+
+	if (shared)
+	{
+		//Define OS-specific context properties and create the OpenCL context.
+#if defined (__APPLE__) || defined(MACOSX)
+		CGLContextObj kCGLContext = CGLGetCurrentContext();
+		CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
+		cl_context_properties props[] =
+		{
+			CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup,
+			0
+		};
+		context = cl::Context(CL_DEVICE_TYPE_GPU, props, nullptr, nullptr, &err);//May need to tinker with this on Mac.
+#else
+#if defined WIN32
+		//::wglMakeCurrent(wglGetCurrentDC(), wglGetCurrentContext());
+		cl_context_properties props[] =
+		{
+			CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
+			CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
+			CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>((platform)()),
+			0
+		};
+		context = cl::Context(CL_DEVICE_TYPE_GPU, props, nullptr, nullptr, &err);
+#else
+		cl_context_properties props[] =
+		{
+			CL_GL_CONTEXT_KHR, cl_context_properties(glXGetCurrentContext()),
+			CL_GLX_DISPLAY_KHR, cl_context_properties(glXGetCurrentDisplay()),
+			CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>((platform)()),
+			0
+		};
+		context = cl::Context(CL_DEVICE_TYPE_GPU, props, nullptr, nullptr, &err);
+#endif
+#endif
+	}
+	else
+	{
+		cl_context_properties props[3] =
+		{
+			CL_CONTEXT_PLATFORM,
+			reinterpret_cast<cl_context_properties>((platform)()),
+			0
+		};
+		context = cl::Context(CL_DEVICE_TYPE_ALL, props, nullptr, nullptr, &err);
+	}
+
+	return CheckCL(err, "cl::Context()");
+}
+
+/// <summary>
+/// Return whether at least one device has been found and properly initialized.
+/// </summary>
+/// <returns>True if success, else false.</returns>
+bool OpenCLInfo::Ok() const
+{
+	return m_Init;
+}
+
+/// <summary>
+/// Get all information about all platforms and devices.
+/// </summary>
+/// <returns>A string with all information about all platforms and devices</returns>
+string OpenCLInfo::DumpInfo() const
+{
+	ostringstream os;
+	vector<size_t> sizes;
+	os.imbue(locale(""));
+
+	for (size_t platform = 0; platform < m_Platforms.size(); platform++)
+	{
+		os << "Platform " << platform << ": " << PlatformName(platform) << "\n";
+
+		for (size_t device = 0; device < m_Devices[platform].size(); device++)
+		{
+			os << "Device " << device << ": " << DeviceName(platform, device);
+			os << "\nCL_DEVICE_OPENCL_C_VERSION: " << GetInfo<string>(platform, device, CL_DEVICE_OPENCL_C_VERSION).c_str();
+			os << "\nCL_DEVICE_LOCAL_MEM_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_LOCAL_MEM_SIZE);
+			os << "\nCL_DEVICE_LOCAL_MEM_TYPE: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_LOCAL_MEM_TYPE);
+			os << "\nCL_DEVICE_MAX_COMPUTE_UNITS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_COMPUTE_UNITS);
+			os << "\nCL_DEVICE_MAX_READ_IMAGE_ARGS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_READ_IMAGE_ARGS);
+			os << "\nCL_DEVICE_MAX_WRITE_IMAGE_ARGS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS);
+			os << "\nCL_DEVICE_MAX_MEM_ALLOC_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
+			os << "\nCL_DEVICE_ADDRESS_BITS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_ADDRESS_BITS);
+			os << "\nCL_DEVICE_GLOBAL_MEM_CACHE_TYPE: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE);
+			os << "\nCL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE);
+			os << "\nCL_DEVICE_GLOBAL_MEM_CACHE_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE);
+			os << "\nCL_DEVICE_GLOBAL_MEM_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_GLOBAL_MEM_SIZE);
+			os << "\nCL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE);
+			os << "\nCL_DEVICE_MAX_CONSTANT_ARGS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_CONSTANT_ARGS);
+			os << "\nCL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
+			os << "\nCL_DEVICE_MAX_WORK_GROUP_SIZE: " << GetInfo<size_t>(platform, device, CL_DEVICE_MAX_WORK_GROUP_SIZE);
+			sizes = GetInfo<vector<size_t>>(platform, device, CL_DEVICE_MAX_WORK_ITEM_SIZES);
+			os << "\nCL_DEVICE_MAX_WORK_ITEM_SIZES: " << sizes[0] << ", " << sizes[1] << ", " << sizes[2] << "\n" << "\n";
+
+			if (device != m_Devices[platform].size() - 1 && platform != m_Platforms.size() - 1)
+				os << "\n";
+		}
+
+		os << "\n";
+	}
+
+	return os.str();
+}
+
+/// <summary>
+/// Check an OpenCL return value for errors.
+/// </summary>
+/// <param name="err">The error code to inspect</param>
+/// <param name="name">A description of where the value was gotten from</param>
+/// <returns>True if success, else false.</returns>
+bool OpenCLInfo::CheckCL(cl_int err, const char* name)
+{
+	if (err != CL_SUCCESS)
+	{
+		ostringstream ss;
+		ss << "ERROR: " << ErrorToStringCL(err) << " in " << name << ".\n";
+		AddToReport(ss.str());
+	}
+
+	return err == CL_SUCCESS;
+}
+
+/// <summary>
+/// Translate an OpenCL error code into a human readable string.
+/// </summary>
+/// <param name="err">The error code to translate</param>
+/// <returns>A human readable description of the error passed in</returns>
+string OpenCLInfo::ErrorToStringCL(cl_int err)
+{
+	switch (err)
+	{
+		case CL_SUCCESS:								   return "Success";
+
+		case CL_DEVICE_NOT_FOUND:						   return "Device not found";
+
+		case CL_DEVICE_NOT_AVAILABLE:					   return "Device not available";
+
+		case CL_COMPILER_NOT_AVAILABLE:					   return "Compiler not available";
+
+		case CL_MEM_OBJECT_ALLOCATION_FAILURE:			   return "Memory object allocation failure";
+
+		case CL_OUT_OF_RESOURCES:						   return "Out of resources";
+
+		case CL_OUT_OF_HOST_MEMORY:						   return "Out of host memory";
+
+		case CL_PROFILING_INFO_NOT_AVAILABLE:			   return "Profiling information not available";
+
+		case CL_MEM_COPY_OVERLAP:						   return "Memory copy overlap";
+
+		case CL_IMAGE_FORMAT_MISMATCH:					   return "Image format mismatch";
+
+		case CL_IMAGE_FORMAT_NOT_SUPPORTED:				   return "Image format not supported";
+
+		case CL_BUILD_PROGRAM_FAILURE:					   return "Program build failure";
+
+		case CL_MAP_FAILURE:							   return "Map failure";
+
+		case CL_MISALIGNED_SUB_BUFFER_OFFSET:			   return "Misaligned sub buffer offset";
+
+		case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "Exec status error for events in wait list";
+
+		case CL_INVALID_VALUE:							   return "Invalid value";
+
+		case CL_INVALID_DEVICE_TYPE:					   return "Invalid device type";
+
+		case CL_INVALID_PLATFORM:						   return "Invalid platform";
+
+		case CL_INVALID_DEVICE:							   return "Invalid device";
+
+		case CL_INVALID_CONTEXT:						   return "Invalid context";
+
+		case CL_INVALID_QUEUE_PROPERTIES:				   return "Invalid queue properties";
+
+		case CL_INVALID_COMMAND_QUEUE:					   return "Invalid command queue";
+
+		case CL_INVALID_HOST_PTR:						   return "Invalid host pointer";
+
+		case CL_INVALID_MEM_OBJECT:						   return "Invalid memory object";
+
+		case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:		   return "Invalid image format descriptor";
+
+		case CL_INVALID_IMAGE_SIZE:						   return "Invalid image size";
+
+		case CL_INVALID_SAMPLER:						   return "Invalid sampler";
+
+		case CL_INVALID_BINARY:							   return "Invalid binary";
+
+		case CL_INVALID_BUILD_OPTIONS:					   return "Invalid build options";
+
+		case CL_INVALID_PROGRAM:						   return "Invalid program";
+
+		case CL_INVALID_PROGRAM_EXECUTABLE:				   return "Invalid program executable";
+
+		case CL_INVALID_KERNEL_NAME:					   return "Invalid kernel name";
+
+		case CL_INVALID_KERNEL_DEFINITION:				   return "Invalid kernel definition";
+
+		case CL_INVALID_KERNEL:							   return "Invalid kernel";
+
+		case CL_INVALID_ARG_INDEX:						   return "Invalid argument index";
+
+		case CL_INVALID_ARG_VALUE:						   return "Invalid argument value";
+
+		case CL_INVALID_ARG_SIZE:						   return "Invalid argument size";
+
+		case CL_INVALID_KERNEL_ARGS:					   return "Invalid kernel arguments";
+
+		case CL_INVALID_WORK_DIMENSION:					   return "Invalid work dimension";
+
+		case CL_INVALID_WORK_GROUP_SIZE:				   return "Invalid work group size";
+
+		case CL_INVALID_WORK_ITEM_SIZE:					   return "Invalid work item size";
+
+		case CL_INVALID_GLOBAL_OFFSET:					   return "Invalid global offset";
+
+		case CL_INVALID_EVENT_WAIT_LIST:				   return "Invalid event wait list";
+
+		case CL_INVALID_EVENT:							   return "Invalid event";
+
+		case CL_INVALID_OPERATION:						   return "Invalid operation";
+
+		case CL_INVALID_GL_OBJECT:						   return "Invalid OpenGL object";
+
+		case CL_INVALID_BUFFER_SIZE:					   return "Invalid buffer size";
+
+		case CL_INVALID_MIP_LEVEL:						   return "Invalid mip-map level";
+
+		case CL_INVALID_GLOBAL_WORK_SIZE:				   return "Invalid global work size";
+
+		case CL_INVALID_PROPERTY:						   return "Invalid property";
+
+		default:
+		{
+			ostringstream ss;
+			ss << "<Unknown error code> " << err;
+			return ss.str();
+		}
+	}
+}
 }
@@ -1,72 +1,72 @@
-#pragma once
-
-#include "EmberCLPch.h"
-
-/// <summary>
-/// OpenCLInfo class.
-/// </summary>
-
-namespace EmberCLns
-{
-/// <summary>
-/// Keeps information about all valid OpenCL devices on this system.
-/// Devices which do not successfully create a test command queue are not
-/// added to the list.
-/// The pattern is singleton, so there is only one instance per program,
-/// retreivable by reference via the Instance() function.
-/// This class derives from EmberReport, so the caller is able
-/// to retrieve a text dump of error information if any errors occur.
-/// </summary>
-class EMBERCL_API OpenCLInfo : public EmberReport, public Singleton<OpenCLInfo>
-{
-public:
-	const vector<cl::Platform>& Platforms() const;
-	const string& PlatformName(size_t platform) const;
-	const vector<string>& PlatformNames() const;
-	const vector<vector<cl::Device>>& Devices() const;
-	const string& DeviceName(size_t platform, size_t device) const;
-	const vector<pair<size_t, size_t>>& DeviceIndices() const;
-	const vector<string>& AllDeviceNames() const;
-	const vector<string>& DeviceNames(size_t platform) const;
-	size_t TotalDeviceIndex(size_t platform, size_t device) const;
-	const cl::Device* DeviceFromId(cl_device_id id, size_t& platform, size_t& device) const;
-	string DumpInfo() const;
-	bool Ok() const;
-	bool CreateContext(const cl::Platform& platform, cl::Context& context, bool shared);
-	bool CheckCL(cl_int err, const char* name);
-	string ErrorToStringCL(cl_int err);
-
-	/// <summary>
-	/// Get device information for the specified field.
-	/// Template argument expected to be cl_ulong, cl_uint or cl_int;
-	/// </summary>
-	/// <param name="platform">The index platform of the platform to use</param>
-	/// <param name="device">The index device of the device to use</param>
-	/// <param name="name">The device field/feature to query</param>
-	/// <returns>The value of the field</returns>
-	template<typename T>
-	T GetInfo(size_t platform, size_t device, cl_device_info name) const
-	{
-		T val = T();
-
-		if (platform < m_Devices.size() && device < m_Devices[platform].size())
-			m_Devices[platform][device].getInfo(name, &val);
-
-		return val;
-	}
-
-	SINGLETON_DERIVED_IMPL(OpenCLInfo);
-
-private:
-
-	OpenCLInfo();
-
-	bool m_Init;
-	vector<cl::Platform> m_Platforms;
-	vector<vector<cl::Device>> m_Devices;
-	vector<string> m_PlatformNames;
-	vector<vector<string>> m_DeviceNames;
-	vector<pair<size_t, size_t>> m_DeviceIndices;
-	vector<string> m_AllDeviceNames;
-};
-}
+#pragma once
+
+#include "EmberCLPch.h"
+
+/// <summary>
+/// OpenCLInfo class.
+/// </summary>
+
+namespace EmberCLns
+{
+/// <summary>
+/// Keeps information about all valid OpenCL devices on this system.
+/// Devices which do not successfully create a test command queue are not
+/// added to the list.
+/// The pattern is singleton, so there is only one instance per program,
+/// retreivable by reference via the Instance() function.
+/// This class derives from EmberReport, so the caller is able
+/// to retrieve a text dump of error information if any errors occur.
+/// </summary>
+class EMBERCL_API OpenCLInfo : public EmberReport, public Singleton<OpenCLInfo>
+{
+public:
+	const vector<cl::Platform>& Platforms() const;
+	const string& PlatformName(size_t platform) const;
+	const vector<string>& PlatformNames() const;
+	const vector<vector<cl::Device>>& Devices() const;
+	const string& DeviceName(size_t platform, size_t device) const;
+	const vector<pair<size_t, size_t>>& DeviceIndices() const;
+	const vector<string>& AllDeviceNames() const;
+	const vector<string>& DeviceNames(size_t platform) const;
+	size_t TotalDeviceIndex(size_t platform, size_t device) const;
+	const cl::Device* DeviceFromId(cl_device_id id, size_t& platform, size_t& device) const;
+	string DumpInfo() const;
+	bool Ok() const;
+	bool CreateContext(const cl::Platform& platform, cl::Context& context, bool shared);
+	bool CheckCL(cl_int err, const char* name);
+	string ErrorToStringCL(cl_int err);
+
+	/// <summary>
+	/// Get device information for the specified field.
+	/// Template argument expected to be cl_ulong, cl_uint or cl_int;
+	/// </summary>
+	/// <param name="platform">The index platform of the platform to use</param>
+	/// <param name="device">The index device of the device to use</param>
+	/// <param name="name">The device field/feature to query</param>
+	/// <returns>The value of the field</returns>
+	template<typename T>
+	T GetInfo(size_t platform, size_t device, cl_device_info name) const
+	{
+		T val = T();
+
+		if (platform < m_Devices.size() && device < m_Devices[platform].size())
+			m_Devices[platform][device].getInfo(name, &val);
+
+		return val;
+	}
+
+	SINGLETON_DERIVED_IMPL(OpenCLInfo);
+
+private:
+
+	OpenCLInfo();
+
+	bool m_Init;
+	vector<cl::Platform> m_Platforms;
+	vector<vector<cl::Device>> m_Devices;
+	vector<string> m_PlatformNames;
+	vector<vector<string>> m_DeviceNames;
+	vector<pair<size_t, size_t>> m_DeviceIndices;
+	vector<string> m_AllDeviceNames;
+};
+}
@@ -1,210 +1,210 @@
-#pragma once
-
-#include "EmberCLPch.h"
-#include "OpenCLInfo.h"
-
-/// <summary>
-/// OpenCLWrapper, Spk, NamedBuffer, NamedImage2D, NamedImage2DGL classes.
-/// </summary>
-
-namespace EmberCLns
-{
-/// <summary>
-/// Class to contain all of the things needed to store an OpenCL program.
-/// The name of it, the source, the compiled program object and the kernel.
-/// </summary>
-class EMBERCL_API Spk
-{
-public:
-	string m_Name;
-	cl::Program::Sources m_Source;
-	cl::Program m_Program;
-	cl::Kernel m_Kernel;
-};
-
-/// <summary>
-/// Class to hold an OpenCL buffer with a name to identify it by.
-/// </summary>
-class EMBERCL_API NamedBuffer
-{
-public:
-	NamedBuffer()
-	{
-	}
-
-	NamedBuffer(const cl::Buffer& buff, const string& name)
-	{
-		m_Buffer = buff;
-		m_Name = name;
-	}
-
-	cl::Buffer m_Buffer;
-	string m_Name;
-};
-
-/// <summary>
-/// Class to hold a 2D image with a name to identify it by.
-/// </summary>
-class EMBERCL_API NamedImage2D
-{
-public:
-	NamedImage2D()
-	{
-	}
-
-	NamedImage2D(const cl::Image2D& image, const string& name)
-	{
-		m_Image = image;
-		m_Name = name;
-	}
-
-	cl::Image2D m_Image;
-	string m_Name;
-};
-
-/// <summary>
-/// Class to hold a 2D image that is mapped to an OpenGL texture
-/// and a name to identify it by.
-/// </summary>
-class EMBERCL_API NamedImage2DGL
-{
-public:
-	NamedImage2DGL()
-	{
-	}
-
-	NamedImage2DGL(const cl::ImageGL& image, const string& name)
-	{
-		m_Image = image;
-		m_Name = name;
-	}
-
-	cl::ImageGL m_Image;
-	string m_Name;
-};
-
-/// <summary>
-/// Running kernels in OpenCL can require quite a bit of setup, tear down and
-/// general housekeeping. This class helps shield the user from such hassles.
-/// Its main utility is in holding collections of programs, buffers and images
-/// all identified by names. That way, a user can access them as needed without
-/// having to pollute their code.
-/// In addition, writing to an existing object by name determines if the object
-/// can be overwritten, or if it needs to be deleted and replaced by the new one.
-/// This class derives from EmberReport, so the caller is able
-/// to retrieve a text dump of error information if any errors occur.
-/// </summary>
-class EMBERCL_API OpenCLWrapper : public EmberReport
-{
-public:
-	OpenCLWrapper();
-	bool Init(size_t platformIndex, size_t deviceIndex, bool shared = false);
-
-	//Programs.
-	bool AddProgram(const string& name, const string& program, const string& entryPoint, bool doublePrecision);
-	void ClearPrograms();
-
-	//Buffers.
-	bool AddBuffer(const string& name, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
-	bool AddHostBuffer(const string& name, size_t size, void* data);
-	bool AddAndWriteBuffer(const string& name, void* data, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
-	bool WriteBuffer(const string& name, void* data, size_t size);
-	bool WriteBuffer(size_t bufferIndex, void* data, size_t size);
-	bool ReadBuffer(const string& name, void* data, size_t size);
-	bool ReadBuffer(size_t bufferIndex, void* data, size_t size);
-	int FindBufferIndex(const string& name);
-	size_t GetBufferSize(const string& name);
-	size_t GetBufferSize(size_t bufferIndex);
-	void ClearBuffers();
-
-	//Images.
-	bool AddAndWriteImage(const string& name, cl_mem_flags flags, const cl::ImageFormat& format, ::size_t width, ::size_t height, ::size_t row_pitch, void* data = nullptr, bool shared = false, GLuint texName = 0);
-	bool WriteImage2D(size_t index, bool shared, size_t width, size_t height, size_t row_pitch, void* data);
-	bool ReadImage(const string& name, ::size_t width, ::size_t height, ::size_t row_pitch, bool shared, void* data);
-	bool ReadImage(size_t imageIndex, ::size_t width, ::size_t height, ::size_t row_pitch, bool shared, void* data);
-	int FindImageIndex(const string& name, bool shared);
-	size_t GetImageSize(const string& name, bool shared);
-	size_t GetImageSize(size_t imageIndex, bool shared);
-	bool CompareImageParams(cl::Image& image, cl_mem_flags flags, const cl::ImageFormat& format, ::size_t width, ::size_t height, ::size_t row_pitch);
-	void ClearImages(bool shared);
-	bool CreateImage2D(cl::Image2D& image2D, cl_mem_flags flags, cl::ImageFormat format, ::size_t width, ::size_t height, ::size_t row_pitch = 0, void* data = nullptr);
-	bool CreateImage2DGL(cl::ImageGL& image2DGL, cl_mem_flags flags, GLenum target, GLint miplevel, GLuint texobj);
-	bool EnqueueAcquireGLObjects(const string& name);
-	bool EnqueueAcquireGLObjects(cl::ImageGL& image);
-	bool EnqueueReleaseGLObjects(const string& name);
-	bool EnqueueReleaseGLObjects(cl::ImageGL& image);
-	bool EnqueueAcquireGLObjects(const VECTOR_CLASS<cl::Memory>* memObjects = nullptr);
-	bool EnqueueReleaseGLObjects(const VECTOR_CLASS<cl::Memory>* memObjects = nullptr);
-	bool CreateSampler(cl::Sampler& sampler, cl_bool normalizedCoords, cl_addressing_mode addressingMode, cl_filter_mode filterMode);
-
-	//Arguments.
-	bool SetBufferArg(size_t kernelIndex, cl_uint argIndex, const string& name);
-	bool SetBufferArg(size_t kernelIndex, cl_uint argIndex, size_t bufferIndex);
-	bool SetImageArg(size_t kernelIndex, cl_uint argIndex, bool shared, const string& name);
-	bool SetImageArg(size_t kernelIndex, cl_uint argIndex, bool shared, size_t imageIndex);
-
-	/// <summary>
-	/// Set an argument in the specified kernel, at the specified argument index.
-	/// Must keep this here in the .h because it's templated.
-	/// </summary>
-	/// <param name="kernelIndex">Index of the kernel whose argument will be set</param>
-	/// <param name="argIndex">Index of the argument to set</param>
-	/// <param name="arg">The argument value to set</param>
-	/// <returns>True if success, else false</returns>
-	template <typename T>
-	bool SetArg(size_t kernelIndex, cl_uint argIndex, T arg)
-	{
-		if (m_Init && kernelIndex < m_Programs.size())
-		{
-			cl_int err = m_Programs[kernelIndex].m_Kernel.setArg(argIndex, arg);
-			return m_Info->CheckCL(err, "cl::Kernel::setArg()");
-		}
-
-		return false;
-	}
-
-	//Kernels.
-	int FindKernelIndex(const string& name);
-	bool RunKernel(size_t kernelIndex, size_t totalGridWidth, size_t totalGridHeight, size_t totalGridDepth, size_t blockWidth, size_t blockHeight, size_t blockDepth);
-
-	//Accessors.
-	bool Ok() const;
-	bool Shared() const;
-	const cl::Context& Context() const;
-	size_t PlatformIndex() const;
-	size_t DeviceIndex() const;
-	size_t TotalDeviceIndex() const;
-	const string& DeviceName() const;
-	size_t LocalMemSize() const;
-	size_t GlobalMemSize() const;
-	size_t MaxAllocSize() const;
-
-	//Public virtual functions overridden from base classes.
-	virtual void ClearErrorReport() override;
-	virtual string ErrorReportString() override;
-	virtual vector<string> ErrorReport() override;
-
-	static void MakeEvenGridDims(size_t blockW, size_t blockH, size_t& gridW, size_t& gridH);
-
-private:
-	bool CreateSPK(const string& name, const string& program, const string& entryPoint, Spk& spk, bool doublePrecision);
-
-	bool m_Init = false;
-	bool m_Shared = false;
-	size_t m_PlatformIndex = 0;
-	size_t m_DeviceIndex = 0;
-	size_t m_LocalMemSize = 0;
-	size_t m_GlobalMemSize;
-	size_t m_MaxAllocSize;
-	cl::Platform m_Platform;
-	cl::Context m_Context;
-	cl::Device m_Device;
-	cl::CommandQueue m_Queue;
-	shared_ptr<OpenCLInfo> m_Info = OpenCLInfo::Instance();
-	std::vector<cl::Device> m_DeviceVec;
-	std::vector<Spk> m_Programs;
-	std::vector<NamedBuffer> m_Buffers;
-	std::vector<NamedImage2D> m_Images;
-	std::vector<NamedImage2DGL> m_GLImages;
-};
-}
+#pragma once
+
+#include "EmberCLPch.h"
+#include "OpenCLInfo.h"
+
+/// <summary>
+/// OpenCLWrapper, Spk, NamedBuffer, NamedImage2D, NamedImage2DGL classes.
+/// </summary>
+
+namespace EmberCLns
+{
+/// <summary>
+/// Class to contain all of the things needed to store an OpenCL program.
+/// The name of it, the source, the compiled program object and the kernel.
+/// </summary>
+class EMBERCL_API Spk
+{
+public:
+	string m_Name;
+	cl::Program::Sources m_Source;
+	cl::Program m_Program;
+	cl::Kernel m_Kernel;
+};
+
+/// <summary>
+/// Class to hold an OpenCL buffer with a name to identify it by.
+/// </summary>
+class EMBERCL_API NamedBuffer
+{
+public:
+	NamedBuffer() noexcept
+	{
+	}
+
+	NamedBuffer(const cl::Buffer& buff, const string& name)
+	{
+		m_Buffer = buff;
+		m_Name = name;
+	}
+
+	cl::Buffer m_Buffer;
+	string m_Name;
+};
+
+/// <summary>
+/// Class to hold a 2D image with a name to identify it by.
+/// </summary>
+class EMBERCL_API NamedImage2D
+{
+public:
+	NamedImage2D() noexcept
+	{
+	}
+
+	NamedImage2D(const cl::Image2D& image, const string& name)
+	{
+		m_Image = image;
+		m_Name = name;
+	}
+
+	cl::Image2D m_Image;
+	string m_Name;
+};
+
+/// <summary>
+/// Class to hold a 2D image that is mapped to an OpenGL texture
+/// and a name to identify it by.
+/// </summary>
+class EMBERCL_API NamedImage2DGL
+{
+public:
+	NamedImage2DGL() noexcept
+	{
+	}
+
+	NamedImage2DGL(const cl::ImageGL& image, const string& name)
+	{
+		m_Image = image;
+		m_Name = name;
+	}
+
+	cl::ImageGL m_Image;
+	string m_Name;
+};
+
+/// <summary>
+/// Running kernels in OpenCL can require quite a bit of setup, tear down and
+/// general housekeeping. This class helps shield the user from such hassles.
+/// Its main utility is in holding collections of programs, buffers and images
+/// all identified by names. That way, a user can access them as needed without
+/// having to pollute their code.
+/// In addition, writing to an existing object by name determines if the object
+/// can be overwritten, or if it needs to be deleted and replaced by the new one.
+/// This class derives from EmberReport, so the caller is able
+/// to retrieve a text dump of error information if any errors occur.
+/// </summary>
+class EMBERCL_API OpenCLWrapper : public EmberReport
+{
+public:
+	OpenCLWrapper();
+	bool Init(size_t platformIndex, size_t deviceIndex, bool shared = false);
+
+	//Programs.
+	bool AddProgram(const string& name, const string& program, const string& entryPoint, bool doublePrecision);
+	void ClearPrograms();
+
+	//Buffers.
+	bool AddBuffer(const string& name, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
+	bool AddHostBuffer(const string& name, size_t size, void* data);
+	bool AddAndWriteBuffer(const string& name, void* data, size_t size, cl_mem_flags flags = CL_MEM_READ_WRITE);
+	bool WriteBuffer(const string& name, void* data, size_t size);
+	bool WriteBuffer(size_t bufferIndex, void* data, size_t size);
+	bool ReadBuffer(const string& name, void* data, size_t size);
+	bool ReadBuffer(size_t bufferIndex, void* data, size_t size);
+	int FindBufferIndex(const string& name);
+	size_t GetBufferSize(const string& name);
+	size_t GetBufferSize(size_t bufferIndex);
+	void ClearBuffers();
+
+	//Images.
+	bool AddAndWriteImage(const string& name, cl_mem_flags flags, const cl::ImageFormat& format, ::size_t width, ::size_t height, ::size_t row_pitch, void* data = nullptr, bool shared = false, GLuint texName = 0);
+	bool WriteImage2D(size_t index, bool shared, size_t width, size_t height, size_t row_pitch, void* data);
+	bool ReadImage(const string& name, ::size_t width, ::size_t height, ::size_t row_pitch, bool shared, void* data);
+	bool ReadImage(size_t imageIndex, ::size_t width, ::size_t height, ::size_t row_pitch, bool shared, void* data);
+	int FindImageIndex(const string& name, bool shared);
+	size_t GetImageSize(const string& name, bool shared);
+	size_t GetImageSize(size_t imageIndex, bool shared);
+	bool CompareImageParams(cl::Image& image, cl_mem_flags flags, const cl::ImageFormat& format, ::size_t width, ::size_t height, ::size_t row_pitch);
+	void ClearImages(bool shared);
+	bool CreateImage2D(cl::Image2D& image2D, cl_mem_flags flags, cl::ImageFormat format, ::size_t width, ::size_t height, ::size_t row_pitch = 0, void* data = nullptr);
+	bool CreateImage2DGL(cl::ImageGL& image2DGL, cl_mem_flags flags, GLenum target, GLint miplevel, GLuint texobj);
+	bool EnqueueAcquireGLObjects(const string& name);
+	bool EnqueueAcquireGLObjects(cl::ImageGL& image);
+	bool EnqueueReleaseGLObjects(const string& name);
+	bool EnqueueReleaseGLObjects(cl::ImageGL& image);
+	bool EnqueueAcquireGLObjects(const cl::vector<cl::Memory>* memObjects = nullptr);
+	bool EnqueueReleaseGLObjects(const cl::vector<cl::Memory>* memObjects = nullptr);
+	bool CreateSampler(cl::Sampler& sampler, cl_bool normalizedCoords, cl_addressing_mode addressingMode, cl_filter_mode filterMode);
+
+	//Arguments.
+	bool SetBufferArg(size_t kernelIndex, cl_uint argIndex, const string& name);
+	bool SetBufferArg(size_t kernelIndex, cl_uint argIndex, size_t bufferIndex);
+	bool SetImageArg(size_t kernelIndex, cl_uint argIndex, bool shared, const string& name);
+	bool SetImageArg(size_t kernelIndex, cl_uint argIndex, bool shared, size_t imageIndex);
+
+	/// <summary>
+	/// Set an argument in the specified kernel, at the specified argument index.
+	/// Must keep this here in the .h because it's templated.
+	/// </summary>
+	/// <param name="kernelIndex">Index of the kernel whose argument will be set</param>
+	/// <param name="argIndex">Index of the argument to set</param>
+	/// <param name="arg">The argument value to set</param>
+	/// <returns>True if success, else false</returns>
+	template <typename T>
+	bool SetArg(size_t kernelIndex, cl_uint argIndex, T arg)
+	{
+		if (m_Init && kernelIndex < m_Programs.size())
+		{
+			cl_int err = m_Programs[kernelIndex].m_Kernel.setArg(argIndex, arg);
+			return m_Info->CheckCL(err, "cl::Kernel::setArg()");
+		}
+
+		return false;
+	}
+
+	//Kernels.
+	int FindKernelIndex(const string& name);
+	bool RunKernel(size_t kernelIndex, size_t totalGridWidth, size_t totalGridHeight, size_t totalGridDepth, size_t blockWidth, size_t blockHeight, size_t blockDepth);
+
+	//Accessors.
+	bool Ok() const;
+	bool Shared() const;
+	const cl::Context& Context() const;
+	size_t PlatformIndex() const;
+	size_t DeviceIndex() const;
+	size_t TotalDeviceIndex() const;
+	const string& DeviceName() const;
+	size_t LocalMemSize() const;
+	size_t GlobalMemSize() const;
+	size_t MaxAllocSize() const;
+
+	//Public virtual functions overridden from base classes.
+	void ClearErrorReport() noexcept override;
+	string ErrorReportString() override;
+	vector<string> ErrorReport() override;
+
+	static void MakeEvenGridDims(size_t blockW, size_t blockH, size_t& gridW, size_t& gridH);
+
+private:
+	bool CreateSPK(const string& name, const string& program, const string& entryPoint, Spk& spk, bool doublePrecision);
+
+	bool m_Init = false;
+	bool m_Shared = false;
+	size_t m_PlatformIndex = 0;
+	size_t m_DeviceIndex = 0;
+	size_t m_LocalMemSize = 0;
+	size_t m_GlobalMemSize = 0;
+	size_t m_MaxAllocSize = 0;
+	cl::Platform m_Platform;
+	cl::Context m_Context;
+	cl::Device m_Device;
+	cl::CommandQueue m_Queue;
+	shared_ptr<OpenCLInfo> m_Info = OpenCLInfo::Instance();
+	std::vector<cl::Device> m_DeviceVec;
+	std::vector<Spk> m_Programs;
+	std::vector<NamedBuffer> m_Buffers;
+	std::vector<NamedImage2D> m_Images;
+	std::vector<NamedImage2DGL> m_GLImages;
+};
+}
@@ -33,7 +33,7 @@ RendererCL<T, bucketT>::RendererCL(const vector<pair<size_t, size_t>>& devices,
 	m_FinalFormat.image_channel_order = CL_RGBA;
 	m_FinalFormat.image_channel_data_type = CL_FLOAT;
 	m_CompileBegun = [&]() { };
-	m_IterCountPerKernel = size_t(m_SubBatchPercentPerThread * m_Ember.m_SubBatchSize);
+	m_IterCountPerKernel = size_t(double(m_SubBatchPercentPerThread) * m_Ember.m_SubBatchSize);
 	Init(devices, shared, outputTexID);
 }

@@ -183,24 +183,29 @@ bool RendererCL<T, bucketT>::SetOutputTexture(GLuint outputTexID)
 /// </summary>

 //Iters per kernel/block/grid.
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterCountPerKernel() const { return m_IterCountPerKernel;						  }
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterCountPerBlock()  const { return IterCountPerKernel() * IterBlockKernelCount(); }
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterCountPerGrid()   const { return IterCountPerKernel() * IterGridKernelCount();  }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterCountPerKernel() const noexcept { return m_IterCountPerKernel;						   }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterCountPerBlock()  const noexcept { return IterCountPerKernel() * IterBlockKernelCount(); }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterCountPerGrid()   const noexcept { return IterCountPerKernel() * IterGridKernelCount();  }

 //Kernels per block.
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterBlockKernelWidth()  const { return m_IterBlockWidth;								    }
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterBlockKernelHeight() const { return m_IterBlockHeight;								}
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterBlockKernelCount()  const { return IterBlockKernelWidth() * IterBlockKernelHeight(); }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterBlockKernelWidth()  const noexcept { return m_IterBlockWidth;								 }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterBlockKernelHeight() const noexcept { return m_IterBlockHeight;								 }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterBlockKernelCount()  const noexcept { return IterBlockKernelWidth() * IterBlockKernelHeight(); }

 //Kernels per grid.
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridKernelWidth()  const { return IterGridBlockWidth() * IterBlockKernelWidth();   }
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridKernelHeight() const { return IterGridBlockHeight() * IterBlockKernelHeight(); }
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridKernelCount()  const { return IterGridKernelWidth() * IterGridKernelHeight();  }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridKernelWidth()  const noexcept { return IterGridBlockWidth() * IterBlockKernelWidth();   }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridKernelHeight() const noexcept { return IterGridBlockHeight() * IterBlockKernelHeight(); }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridKernelCount()  const noexcept { return IterGridKernelWidth() * IterGridKernelHeight();  }

 //Blocks per grid.
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridBlockWidth()  const { return m_IterBlocksWide;							    }
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridBlockHeight() const { return m_IterBlocksHigh;							    }
-template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridBlockCount()  const { return IterGridBlockWidth() * IterGridBlockHeight();   }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridBlockWidth()  const noexcept { return m_IterBlocksWide;							   }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridBlockHeight() const noexcept { return m_IterBlocksHigh;							   }
+template <typename T, typename bucketT> size_t RendererCL<T, bucketT>::IterGridBlockCount()  const noexcept { return IterGridBlockWidth() * IterGridBlockHeight(); }
+
+//Allow for setting the number of blocks in each grid dimension.
+//These should only be calle before a run starts.
+template <typename T, typename bucketT> void RendererCL<T, bucketT>::IterBlocksWide(size_t w) noexcept { m_IterBlocksWide = w; }
+template <typename T, typename bucketT> void RendererCL<T, bucketT>::IterBlocksHigh(size_t h) noexcept { m_IterBlocksHigh = h; }

 /// <summary>
 /// Read the histogram of the specified into the host side CPU buffer.
@@ -590,7 +595,7 @@ bool RendererCL<T, bucketT>::Shared() const { return m_Shared; }
 /// Clear the error report for this class as well as the OpenCLWrapper members of each device.
 /// </summary>
 template <typename T, typename bucketT>
-void RendererCL<T, bucketT>::ClearErrorReport()
+void RendererCL<T, bucketT>::ClearErrorReport() noexcept
 {
 	EmberReport::ClearErrorReport();

@@ -669,7 +674,7 @@ bool RendererCL<T, bucketT>::RandVec(vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>>& ran
 /// </summary>
 /// <returns>True if an devices are from Nvidia, else false.</returns>
 template <typename T, typename bucketT>
-bool RendererCL<T, bucketT>::AnyNvidia() const
+bool RendererCL<T, bucketT>::AnyNvidia() const noexcept
 {
 	for (auto& dev : m_Devices)
 		if (dev->Nvidia())
@@ -701,7 +706,7 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly)
 	static std::string loc = __FUNCTION__;
 	auto& wrapper = m_Devices[0]->m_Wrapper;
 	InitStateVec();
-	m_IterCountPerKernel = size_t(m_SubBatchPercentPerThread * m_Ember.m_SubBatchSize);//This isn't the greatest place to put this, but it must be computed before the number of iters to do is computed in the base.
+	m_IterCountPerKernel = size_t(double(m_SubBatchPercentPerThread) * m_Ember.m_SubBatchSize);//This isn't the greatest place to put this, but it must be computed before the number of iters to do is computed in the base.

 	if (b && !(b = wrapper.AddBuffer(m_DEFilterParamsBufferName, sizeof(m_DensityFilterCL))))      { ErrorStr(loc, "Failed to set DE filter parameters buffer", m_Devices[0].get()); }

@@ -1305,11 +1310,11 @@ eRenderStatus RendererCL<T, bucketT>::RunDensityFilter()
 		//that are far enough apart such that their filters do not overlap.
 		//Do the latter.
 		//Gap is in terms of blocks and specifies how many blocks must separate two blocks running at the same time.
-		const auto gapW = static_cast<uint>(ceil(fw2 / blockSizeW));
+		const auto gapW = static_cast<size_t>(ceil(fw2 / blockSizeW));
 		const auto chunkSizeW = gapW + 1;//Chunk size is also in terms of blocks and is one block (the one running) plus the gap to the right of it.
-		const auto gapH = static_cast<uint>(ceil(fw2 / blockSizeH));
+		const auto gapH = static_cast<size_t>(ceil(fw2 / blockSizeH));
 		const auto chunkSizeH = gapH + 1;//Chunk size is also in terms of blocks and is one block (the one running) plus the gap below it.
-		double totalChunks = chunkSizeW * chunkSizeH;
+		double totalChunks = double(chunkSizeW * chunkSizeH);

 		if (b && !(b = wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, reinterpret_cast<void*>(&m_DensityFilterCL), sizeof(m_DensityFilterCL)))) { ErrorStr(loc, "Writing DE filter parameters buffer failed", m_Devices[0].get()); }

@@ -1350,12 +1355,12 @@ eRenderStatus RendererCL<T, bucketT>::RunDensityFilter()
 		gridH /= chunkSizeH;
 		OpenCLWrapper::MakeEvenGridDims(blockSizeW, blockSizeH, gridW, gridH);

-		for (uint rowChunkPass = 0; b && !m_Abort && rowChunkPass < chunkSizeH; rowChunkPass++)//Number of vertical passes.
+		for (size_t rowChunkPass = 0; b && !m_Abort && rowChunkPass < chunkSizeH; rowChunkPass++)//Number of vertical passes.
 		{
-			for (uint colChunkPass = 0; b && !m_Abort && colChunkPass < chunkSizeW; colChunkPass++)//Number of horizontal passes.
+			for (size_t colChunkPass = 0; b && !m_Abort && colChunkPass < chunkSizeW; colChunkPass++)//Number of horizontal passes.
 			{
 				//t2.Tic();
-				if (b && !(b = RunDensityFilterPrivate(kernelIndex, gridW, gridH, blockSizeW, blockSizeH, chunkSizeW, chunkSizeH, colChunkPass, rowChunkPass)))
+				if (b && !(b = RunDensityFilterPrivate(kernelIndex, gridW, gridH, blockSizeW, blockSizeH, uint(chunkSizeW), uint(chunkSizeH), uint(colChunkPass), uint(rowChunkPass))))
 				{
 					ErrorStr(loc, "Running DE filter program for row chunk "s + std::to_string(rowChunkPass) + ", col chunk "s + std::to_string(colChunkPass) + " failed", m_Devices[0].get());
 				}
@@ -1510,7 +1515,7 @@ bool RendererCL<T, bucketT>::ClearBuffer(size_t device, const string& bufferName
 		{
 			size_t blockW = m_Devices[device]->Nvidia() ? 32 : 16;//Max work group size is 256 on AMD, which means 16x16.
 			size_t blockH = m_Devices[device]->Nvidia() ? 32 : 16;
-			size_t gridW = width * elementSize;
+			size_t gridW = size_t(width) * elementSize;
 			size_t gridH = height;
 			b = true;
 			OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
@@ -1,270 +1,274 @@
-#pragma once
-
-#include "EmberCLPch.h"
-#include "OpenCLWrapper.h"
-#include "DEOpenCLKernelCreator.h"
-#include "FinalAccumOpenCLKernelCreator.h"
-#include "RendererClDevice.h"
-
-/// <summary>
-/// RendererCLBase and RendererCL classes.
-/// </summary>
-
-namespace EmberCLns
-{
-/// <summary>
-/// Serves only as an interface for OpenCL specific rendering functions.
-/// </summary>
-class EMBERCL_API RendererCLBase
-{
-public:
-	virtual ~RendererCLBase() { }
-	virtual bool ReadFinal(v4F* pixels) { return false; }
-	virtual bool ClearFinal() { return false; }
-	virtual bool AnyNvidia() const { return false; }
-	bool OptAffine() const { return m_OptAffine; }
-	void OptAffine(bool optAffine) { m_OptAffine = optAffine; }
-
-	std::function<void(void)> m_CompileBegun;
-
-protected:
-	bool m_OptAffine = false;
-};
-
-/// <summary>
-/// RendererCL is a derivation of the basic CPU renderer which
-/// overrides various functions to render on the GPU using OpenCL.
-/// This supports multi-GPU rendering and is done in the following manner:
-///		-When rendering a single image, the iterations will be split between devices in sub batches.
-///		-When animating, a renderer for each device will be created by the calling code,
-///			and the frames will each be rendered by a single device as available.
-/// The synchronization across devices is done through a single atomic counter.
-/// Since this class derives from EmberReport and also contains an
-/// OpenCLWrapper member which also derives from EmberReport, the
-/// reporting functions are overridden to aggregate the errors from
-/// both sources.
-/// Template argument T expected to be float or double.
-/// Template argument bucketT must always be float.
-/// </summary>
-template <typename T, typename bucketT>
-class EMBERCL_API RendererCL : public Renderer<T, bucketT>, public RendererCLBase
-{
-	using EmberNs::Renderer<T, bucketT>::RendererBase::Abort;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::EarlyClip;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::EnterResize;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::LeaveResize;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::FinalRasW;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::FinalRasH;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::SuperRasW;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::SuperRasH;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::SuperSize;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::BytesPerChannel;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::TemporalSamples;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::ItersPerTemporalSample;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::FuseCount;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::DensityFilterOffset;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::PrepFinalAccumVector;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::Paused;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_ProgressParameter;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_YAxisUp;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_LockAccum;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_Abort;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_LastIter;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_LastIterPercent;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_Stats;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_Callback;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_Rand;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_RenderTimer;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_IterTimer;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_ProgressTimer;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::EmberReport::AddToReport;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_ResizeCs;
-	using EmberNs::Renderer<T, bucketT>::RendererBase::m_ProcessAction;
-	using EmberNs::Renderer<T, bucketT>::m_RotMat;
-	using EmberNs::Renderer<T, bucketT>::m_Ember;
-	using EmberNs::Renderer<T, bucketT>::m_Csa;
-	using EmberNs::Renderer<T, bucketT>::m_CurvesSet;
-	using EmberNs::Renderer<T, bucketT>::CenterX;
-	using EmberNs::Renderer<T, bucketT>::CenterY;
-	using EmberNs::Renderer<T, bucketT>::K1;
-	using EmberNs::Renderer<T, bucketT>::K2;
-	using EmberNs::Renderer<T, bucketT>::Supersample;
-	using EmberNs::Renderer<T, bucketT>::HighlightPower;
-	using EmberNs::Renderer<T, bucketT>::HistBuckets;
-	using EmberNs::Renderer<T, bucketT>::AccumulatorBuckets;
-	using EmberNs::Renderer<T, bucketT>::GetDensityFilter;
-	using EmberNs::Renderer<T, bucketT>::GetSpatialFilter;
-	using EmberNs::Renderer<T, bucketT>::CoordMap;
-	using EmberNs::Renderer<T, bucketT>::XformDistributions;
-	using EmberNs::Renderer<T, bucketT>::XformDistributionsSize;
-	using EmberNs::Renderer<T, bucketT>::m_Dmap;
-	using EmberNs::Renderer<T, bucketT>::m_DensityFilter;
-	using EmberNs::Renderer<T, bucketT>::m_SpatialFilter;
-
-public:
-	RendererCL(const vector<pair<size_t, size_t>>& devices, bool shared = false, GLuint outputTexID = 0);
-	RendererCL(const RendererCL<T, bucketT>& renderer) = delete;
-	RendererCL<T, bucketT>& operator = (const RendererCL<T, bucketT>& renderer) = delete;
-	virtual ~RendererCL() = default;
-
-	//Non-virtual member functions for OpenCL specific tasks.
-	bool Init(const vector<pair<size_t, size_t>>& devices, bool shared, GLuint outputTexID);
-	bool SetOutputTexture(GLuint outputTexID);
-
-	//Iters per kernel/block/grid.
-	inline size_t IterCountPerKernel() const;
-	inline size_t IterCountPerBlock() const;
-	inline size_t IterCountPerGrid() const;
-
-	//Kernels per block.
-	inline size_t IterBlockKernelWidth() const;
-	inline size_t IterBlockKernelHeight() const;
-	inline size_t IterBlockKernelCount() const;
-
-	//Kernels per grid.
-	inline size_t IterGridKernelWidth() const;
-	inline size_t IterGridKernelHeight() const;
-	inline size_t IterGridKernelCount() const;
-
-	//Blocks per grid.
-	inline size_t IterGridBlockWidth() const;
-	inline size_t IterGridBlockHeight() const;
-	inline size_t IterGridBlockCount() const;
-
-	bool ReadHist(size_t device);
-	bool ReadAccum();
-	bool ReadPoints(size_t device, vector<PointCL<T>>& vec);
-	bool ClearHist();
-	bool ClearHist(size_t device);
-	bool ClearAccum();
-	bool WritePoints(size_t device, vector<PointCL<T>>& vec);
-#ifdef TEST_CL
-	bool WriteRandomPoints(size_t device);
-#endif
-	void InitStateVec();
-	void SubBatchPercentPerThread(float f);
-	float SubBatchPercentPerThread() const;
-	const string& IterKernel() const;
-	const string& DEKernel() const;
-	const string& FinalAccumKernel() const;
-
-	//Access to underlying OpenCL structures. Use cautiously.
-	const vector<unique_ptr<RendererClDevice>>& Devices() const;
-
-	//Virtual functions overridden from RendererCLBase.
-	virtual bool ReadFinal(v4F* pixels);
-	virtual bool ClearFinal();
-
-	//Public virtual functions overridden from Renderer or RendererBase.
-	size_t MemoryAvailable() override;
-	bool Ok() const override;
-	size_t SubBatchSize() const override;
-	size_t ThreadCount() const override;
-	bool CreateDEFilter(bool& newAlloc) override;
-	bool CreateSpatialFilter(bool& newAlloc) override;
-	eRendererType RendererType() const override;
-	bool Shared() const override;
-	void ClearErrorReport() override;
-	string ErrorReportString() override;
-	vector<string> ErrorReport() override;
-	bool RandVec(vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>>& randVec) override;
-	bool AnyNvidia() const override;
-
-#ifndef TEST_CL
-protected:
-#endif
-	//Protected virtual functions overridden from Renderer.
-	bool Alloc(bool histOnly = false) override;
-	bool ResetBuckets(bool resetHist = true, bool resetAccum = true) override;
-	eRenderStatus LogScaleDensityFilter(bool forceOutput = false) override;
-	eRenderStatus GaussianDensityFilter() override;
-	eRenderStatus AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset) override;
-	EmberStats Iterate(size_t iterCount, size_t temporalSample) override;
-
-#ifndef TEST_CL
-private:
-#endif
-	//Private functions for making and running OpenCL programs.
-	bool BuildIterProgramForEmber(bool doAccum = true);
-	bool RunIter(size_t iterCount, size_t temporalSample, size_t& itersRan);
-	eRenderStatus RunLogScaleFilter();
-	eRenderStatus RunDensityFilter();
-	eRenderStatus RunFinalAccum();
-	bool ClearBuffer(size_t device, const string& bufferName, uint width, uint height, uint elementSize);
-	bool RunDensityFilterPrivate(size_t kernelIndex, size_t gridW, size_t gridH, size_t blockW, size_t blockH, uint chunkSizeW, uint chunkSizeH, uint colChunkPass, uint rowChunkPass);
-	int MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth);
-	int MakeAndGetFinalAccumProgram();
-	int MakeAndGetGammaCorrectionProgram();
-	bool CreateHostBuffer();
-	bool SumDeviceHist();
-	void FillSeeds();
-
-	//Private functions passing data to OpenCL programs.
-	void ConvertDensityFilter();
-	void ConvertSpatialFilter();
-	void ConvertEmber(Ember<T>& ember, EmberCL<T>& emberCL, vector<XformCL<T>>& xformsCL);
-	void ConvertCarToRas(const CarToRas<T>& carToRas);
-	std::string ErrorStr(const std::string& loc, const std::string& error, RendererClDevice* dev);
-	bool m_Init = false;
-	bool m_Shared = false;
-	bool m_DoublePrecision = typeid(T) == typeid(double);
-	float m_SubBatchPercentPerThread = 0.025f;//0.025 * 10,240 gives a default value of 256 iters per thread for the default sub batch size of 10,240 which almost all flames will use.
-	//It's critical that these numbers never change. They are
-	//based on the cuburn model of each kernel launch containing
-	//256 threads. 32 wide by 8 high. Everything done in the OpenCL
-	//iteraion kernel depends on these dimensions.
-	size_t m_IterCountPerKernel = 256;
-	size_t m_IterBlocksWide = 64, m_IterBlockWidth = 32;
-	size_t m_IterBlocksHigh = 2, m_IterBlockHeight = 8;
-	size_t m_MaxDEBlockSizeW;
-	size_t m_MaxDEBlockSizeH;
-
-	//Buffer names.
-	string m_EmberBufferName = "Ember";
-	string m_XformsBufferName = "Xforms";
-	string m_ParVarsBufferName = "ParVars";
-	string m_GlobalSharedBufferName = "GlobalShared";
-	string m_SeedsBufferName = "Seeds";
-	string m_DistBufferName = "Dist";
-	string m_CarToRasBufferName = "CarToRas";
-	string m_DEFilterParamsBufferName = "DEFilterParams";
-	string m_SpatialFilterParamsBufferName = "SpatialFilterParams";
-	string m_DECoefsBufferName = "DECoefs";
-	string m_DEWidthsBufferName = "DEWidths";
-	string m_DECoefIndicesBufferName = "DECoefIndices";
-	string m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs";
-	string m_CurvesCsaName = "CurvesCsa";
-	string m_HostBufferName = "Host";
-	string m_HistBufferName = "Hist";
-	string m_AccumBufferName = "Accum";
-	string m_FinalImageName = "Final";
-	string m_PointsBufferName = "Points";
-#ifdef KNL_USE_GLOBAL_CONSEC
-	string m_ConsecBufferName = "Consec";
-#endif
-	string m_VarStateBufferName = "VarState";
-
-	//Kernels.
-	string m_IterKernel;
-
-	cl::ImageFormat m_PaletteFormat;
-	cl::ImageFormat m_FinalFormat;
-	cl::Image2D m_Palette;
-	cl::ImageGL m_AccumImage;
-	GLuint m_OutputTexID;
-	EmberCL<T> m_EmberCL;
-	vector<XformCL<T>> m_XformsCL;
-	vector<vector<glm::highp_uvec2>> m_Seeds;
-	CarToRasCL<T> m_CarToRasCL;
-	DensityFilterCL<bucketT> m_DensityFilterCL;
-	SpatialFilterCL<bucketT> m_SpatialFilterCL;
-	IterOpenCLKernelCreator<T> m_IterOpenCLKernelCreator;
-	DEOpenCLKernelCreator m_DEOpenCLKernelCreator;
-	FinalAccumOpenCLKernelCreator m_FinalAccumOpenCLKernelCreator;
-	pair<string, vector<T>> m_Params;
-	pair<string, vector<T>> m_GlobalShared;
-	vector<T> m_VarStates;
-	vector<unique_ptr<RendererClDevice>> m_Devices;
-	Ember<T> m_LastBuiltEmber;
-};
-}
+#pragma once
+
+#include "EmberCLPch.h"
+#include "OpenCLWrapper.h"
+#include "DEOpenCLKernelCreator.h"
+#include "FinalAccumOpenCLKernelCreator.h"
+#include "RendererClDevice.h"
+
+/// <summary>
+/// RendererCLBase and RendererCL classes.
+/// </summary>
+
+namespace EmberCLns
+{
+/// <summary>
+/// Serves only as an interface for OpenCL specific rendering functions.
+/// </summary>
+class EMBERCL_API RendererCLBase
+{
+public:
+	virtual ~RendererCLBase() { }
+	virtual bool ReadFinal(v4F* pixels) { return false; }
+	virtual bool ClearFinal() { return false; }
+	virtual bool AnyNvidia() const noexcept { return false; }
+	bool OptAffine() const noexcept { return m_OptAffine; }
+	void OptAffine(bool optAffine) noexcept { m_OptAffine = optAffine; }
+
+	std::function<void(void)> m_CompileBegun;
+
+protected:
+	bool m_OptAffine = false;
+};
+
+/// <summary>
+/// RendererCL is a derivation of the basic CPU renderer which
+/// overrides various functions to render on the GPU using OpenCL.
+/// This supports multi-GPU rendering and is done in the following manner:
+///		-When rendering a single image, the iterations will be split between devices in sub batches.
+///		-When animating, a renderer for each device will be created by the calling code,
+///			and the frames will each be rendered by a single device as available.
+/// The synchronization across devices is done through a single atomic counter.
+/// Since this class derives from EmberReport and also contains an
+/// OpenCLWrapper member which also derives from EmberReport, the
+/// reporting functions are overridden to aggregate the errors from
+/// both sources.
+/// Template argument T expected to be float or double.
+/// Template argument bucketT must always be float.
+/// </summary>
+template <typename T, typename bucketT>
+class EMBERCL_API RendererCL : public Renderer<T, bucketT>, public RendererCLBase
+{
+	using EmberNs::Renderer<T, bucketT>::RendererBase::Abort;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::EarlyClip;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::EnterResize;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::LeaveResize;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::FinalRasW;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::FinalRasH;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::SuperRasW;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::SuperRasH;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::SuperSize;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::BytesPerChannel;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::TemporalSamples;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::ItersPerTemporalSample;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::FuseCount;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::DensityFilterOffset;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::PrepFinalAccumVector;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::Paused;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_ProgressParameter;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_YAxisUp;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_LockAccum;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_Abort;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_LastIter;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_LastIterPercent;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_Stats;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_Callback;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_Rand;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_RenderTimer;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_IterTimer;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_ProgressTimer;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::EmberReport::AddToReport;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_ResizeCs;
+	using EmberNs::Renderer<T, bucketT>::RendererBase::m_ProcessAction;
+	using EmberNs::Renderer<T, bucketT>::m_RotMat;
+	using EmberNs::Renderer<T, bucketT>::m_Ember;
+	using EmberNs::Renderer<T, bucketT>::m_Csa;
+	using EmberNs::Renderer<T, bucketT>::m_CurvesSet;
+	using EmberNs::Renderer<T, bucketT>::CenterX;
+	using EmberNs::Renderer<T, bucketT>::CenterY;
+	using EmberNs::Renderer<T, bucketT>::K1;
+	using EmberNs::Renderer<T, bucketT>::K2;
+	using EmberNs::Renderer<T, bucketT>::Supersample;
+	using EmberNs::Renderer<T, bucketT>::HighlightPower;
+	using EmberNs::Renderer<T, bucketT>::HistBuckets;
+	using EmberNs::Renderer<T, bucketT>::AccumulatorBuckets;
+	using EmberNs::Renderer<T, bucketT>::GetDensityFilter;
+	using EmberNs::Renderer<T, bucketT>::GetSpatialFilter;
+	using EmberNs::Renderer<T, bucketT>::CoordMap;
+	using EmberNs::Renderer<T, bucketT>::XformDistributions;
+	using EmberNs::Renderer<T, bucketT>::XformDistributionsSize;
+	using EmberNs::Renderer<T, bucketT>::m_Dmap;
+	using EmberNs::Renderer<T, bucketT>::m_DensityFilter;
+	using EmberNs::Renderer<T, bucketT>::m_SpatialFilter;
+
+public:
+	RendererCL(const vector<pair<size_t, size_t>>& devices, bool shared = false, GLuint outputTexID = 0);
+	RendererCL(const RendererCL<T, bucketT>& renderer) = delete;
+	RendererCL<T, bucketT>& operator = (const RendererCL<T, bucketT>& renderer) = delete;
+	virtual ~RendererCL() = default;
+
+	//Non-virtual member functions for OpenCL specific tasks.
+	bool Init(const vector<pair<size_t, size_t>>& devices, bool shared, GLuint outputTexID);
+	bool SetOutputTexture(GLuint outputTexID);
+
+	//Iters per kernel/block/grid.
+	inline size_t IterCountPerKernel() const noexcept;
+	inline size_t IterCountPerBlock() const noexcept;
+	inline size_t IterCountPerGrid() const noexcept;
+
+	//Kernels per block.
+	inline size_t IterBlockKernelWidth() const noexcept;
+	inline size_t IterBlockKernelHeight() const noexcept;
+	inline size_t IterBlockKernelCount() const noexcept;
+
+	//Kernels per grid.
+	inline size_t IterGridKernelWidth() const noexcept;
+	inline size_t IterGridKernelHeight() const noexcept;
+	inline size_t IterGridKernelCount() const noexcept;
+
+	//Blocks per grid.
+	inline size_t IterGridBlockWidth() const noexcept;
+	inline size_t IterGridBlockHeight() const noexcept;
+	inline size_t IterGridBlockCount() const noexcept;
+
+	//Allow for changing the number of blocks in each dimension of the grid.
+	void IterBlocksWide(size_t w) noexcept;
+	void IterBlocksHigh(size_t h) noexcept;
+
+	bool ReadHist(size_t device);
+	bool ReadAccum();
+	bool ReadPoints(size_t device, vector<PointCL<T>>& vec);
+	bool ClearHist();
+	bool ClearHist(size_t device);
+	bool ClearAccum();
+	bool WritePoints(size_t device, vector<PointCL<T>>& vec);
+#ifdef TEST_CL
+	bool WriteRandomPoints(size_t device);
+#endif
+	void InitStateVec();
+	void SubBatchPercentPerThread(float f);
+	float SubBatchPercentPerThread() const;
+	const string& IterKernel() const;
+	const string& DEKernel() const;
+	const string& FinalAccumKernel() const;
+
+	//Access to underlying OpenCL structures. Use cautiously.
+	const vector<unique_ptr<RendererClDevice>>& Devices() const;
+
+	//Virtual functions overridden from RendererCLBase.
+	virtual bool ReadFinal(v4F* pixels);
+	virtual bool ClearFinal();
+
+	//Public virtual functions overridden from Renderer or RendererBase.
+	size_t MemoryAvailable() override;
+	bool Ok() const override;
+	size_t SubBatchSize() const override;
+	size_t ThreadCount() const override;
+	bool CreateDEFilter(bool& newAlloc) override;
+	bool CreateSpatialFilter(bool& newAlloc) override;
+	eRendererType RendererType() const override;
+	bool Shared() const override;
+	void ClearErrorReport() noexcept override;
+	string ErrorReportString() override;
+	vector<string> ErrorReport() override;
+	bool RandVec(vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>>& randVec) override;
+	bool AnyNvidia() const noexcept override;
+
+#ifndef TEST_CL
+protected:
+#endif
+	//Protected virtual functions overridden from Renderer.
+	bool Alloc(bool histOnly = false) override;
+	bool ResetBuckets(bool resetHist = true, bool resetAccum = true) override;
+	eRenderStatus LogScaleDensityFilter(bool forceOutput = false) override;
+	eRenderStatus GaussianDensityFilter() override;
+	eRenderStatus AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset) override;
+	EmberStats Iterate(size_t iterCount, size_t temporalSample) override;
+
+#ifndef TEST_CL
+private:
+#endif
+	//Private functions for making and running OpenCL programs.
+	bool BuildIterProgramForEmber(bool doAccum = true);
+	bool RunIter(size_t iterCount, size_t temporalSample, size_t& itersRan);
+	eRenderStatus RunLogScaleFilter();
+	eRenderStatus RunDensityFilter();
+	eRenderStatus RunFinalAccum();
+	bool ClearBuffer(size_t device, const string& bufferName, uint width, uint height, uint elementSize);
+	bool RunDensityFilterPrivate(size_t kernelIndex, size_t gridW, size_t gridH, size_t blockW, size_t blockH, uint chunkSizeW, uint chunkSizeH, uint colChunkPass, uint rowChunkPass);
+	int MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth);
+	int MakeAndGetFinalAccumProgram();
+	int MakeAndGetGammaCorrectionProgram();
+	bool CreateHostBuffer();
+	bool SumDeviceHist();
+	void FillSeeds();
+
+	//Private functions passing data to OpenCL programs.
+	void ConvertDensityFilter();
+	void ConvertSpatialFilter();
+	void ConvertEmber(Ember<T>& ember, EmberCL<T>& emberCL, vector<XformCL<T>>& xformsCL);
+	void ConvertCarToRas(const CarToRas<T>& carToRas);
+	std::string ErrorStr(const std::string& loc, const std::string& error, RendererClDevice* dev);
+	bool m_Init = false;
+	bool m_Shared = false;
+	bool m_DoublePrecision = typeid(T) == typeid(double);
+	float m_SubBatchPercentPerThread = 0.025f;//0.025 * 10,240 gives a default value of 256 iters per thread for the default sub batch size of 10,240 which almost all flames will use.
+	//It's critical that these numbers never change. They are
+	//based on the cuburn model of each kernel launch containing
+	//256 threads. 32 wide by 8 high. Everything done in the OpenCL
+	//iteraion kernel depends on these dimensions.
+	size_t m_IterCountPerKernel = 256;
+	size_t m_IterBlocksWide = 64, m_IterBlockWidth = 32;
+	size_t m_IterBlocksHigh = 2, m_IterBlockHeight = 8;
+	size_t m_MaxDEBlockSizeW;
+	size_t m_MaxDEBlockSizeH;
+
+	//Buffer names.
+	string m_EmberBufferName = "Ember";
+	string m_XformsBufferName = "Xforms";
+	string m_ParVarsBufferName = "ParVars";
+	string m_GlobalSharedBufferName = "GlobalShared";
+	string m_SeedsBufferName = "Seeds";
+	string m_DistBufferName = "Dist";
+	string m_CarToRasBufferName = "CarToRas";
+	string m_DEFilterParamsBufferName = "DEFilterParams";
+	string m_SpatialFilterParamsBufferName = "SpatialFilterParams";
+	string m_DECoefsBufferName = "DECoefs";
+	string m_DEWidthsBufferName = "DEWidths";
+	string m_DECoefIndicesBufferName = "DECoefIndices";
+	string m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs";
+	string m_CurvesCsaName = "CurvesCsa";
+	string m_HostBufferName = "Host";
+	string m_HistBufferName = "Hist";
+	string m_AccumBufferName = "Accum";
+	string m_FinalImageName = "Final";
+	string m_PointsBufferName = "Points";
+#ifdef KNL_USE_GLOBAL_CONSEC
+	string m_ConsecBufferName = "Consec";
+#endif
+	string m_VarStateBufferName = "VarState";
+
+	//Kernels.
+	string m_IterKernel;
+
+	cl::ImageFormat m_PaletteFormat;
+	cl::ImageFormat m_FinalFormat;
+	cl::Image2D m_Palette;
+	cl::ImageGL m_AccumImage;
+	GLuint m_OutputTexID;
+	EmberCL<T> m_EmberCL;
+	vector<XformCL<T>> m_XformsCL;
+	vector<vector<glm::highp_uvec2>> m_Seeds;
+	CarToRasCL<T> m_CarToRasCL;
+	DensityFilterCL<bucketT> m_DensityFilterCL;
+	SpatialFilterCL<bucketT> m_SpatialFilterCL;
+	IterOpenCLKernelCreator<T> m_IterOpenCLKernelCreator;
+	DEOpenCLKernelCreator m_DEOpenCLKernelCreator;
+	FinalAccumOpenCLKernelCreator m_FinalAccumOpenCLKernelCreator;
+	pair<string, vector<T>> m_Params;
+	pair<string, vector<T>> m_GlobalShared;
+	vector<T> m_VarStates;
+	vector<unique_ptr<RendererClDevice>> m_Devices;
+	Ember<T> m_LastBuiltEmber;
+};
+}
@@ -1,93 +1,93 @@
-#include "EmberCLPch.h"
-#include "RendererClDevice.h"
-
-namespace EmberCLns
-{
-/// <summary>
-/// Constructor that assigns members.
-/// The object is not fully initialized at this point, the caller
-/// must manually call Init().
-/// </summary>
-/// <param name="platform">The index of the platform to use</param>
-/// <param name="device">The index device of the device to use</param>
-/// <param name="shared">True if shared with OpenGL, else false.</param>
-/// <returns>True if success, else false.</returns>
-RendererClDevice::RendererClDevice(size_t platform, size_t device, bool shared)
-{
-	m_Init = false;
-	m_Shared = shared;
-	m_NVidia = false;
-	m_WarpSize = 0;
-	m_Calls = 0;
-	m_PlatformIndex = platform;
-	m_DeviceIndex = device;
-	m_Info = OpenCLInfo::Instance();
-}
-
-/// <summary>
-/// Initialization of the OpenCLWrapper member.
-/// </summary>
-/// <returns>True if success, else false.</returns>
-bool RendererClDevice::Init()
-{
-	bool b = true;
-
-	if (!m_Wrapper.Ok())
-	{
-		m_Init = false;
-		b = m_Wrapper.Init(m_PlatformIndex, m_DeviceIndex, m_Shared);
-	}
-
-	if (b && m_Wrapper.Ok() && !m_Init)
-	{
-		m_NVidia = Find(ToLower(m_Info->PlatformName(m_PlatformIndex)), "nvidia") && m_Wrapper.LocalMemSize() > (32 * 1024);
-		m_WarpSize = m_NVidia ? 32 : 64;
-		m_Init = true;
-	}
-
-	return b;
-}
-
-/// <summary>
-/// OpenCL property accessors, getters only.
-/// </summary>
-bool RendererClDevice::Ok() const { return m_Init; }
-bool RendererClDevice::Shared() const { return m_Shared; }
-bool RendererClDevice::Nvidia() const { return m_NVidia; }
-size_t RendererClDevice::WarpSize() const { return m_WarpSize; }
-size_t RendererClDevice::PlatformIndex() const { return m_PlatformIndex; }
-size_t RendererClDevice::DeviceIndex() const { return m_DeviceIndex; }
-
-/// <summary>
-/// Clear the error report for this class as well as the wrapper.
-/// </summary>
-void RendererClDevice::ClearErrorReport()
-{
-	EmberReport::ClearErrorReport();
-	m_Wrapper.ClearErrorReport();
-}
-
-/// <summary>
-/// Concatenate and return the error report for this class and the
-/// wrapper as a single string.
-/// </summary>
-/// <returns>The concatenated error report string</returns>
-string RendererClDevice::ErrorReportString()
-{
-	const auto s = EmberReport::ErrorReportString();
-	return s + m_Wrapper.ErrorReportString();
-}
-
-/// <summary>
-/// Concatenate and return the error report for this class and the
-/// wrapper as a vector of strings.
-/// </summary>
-/// <returns>The concatenated error report vector of strings</returns>
-vector<string> RendererClDevice::ErrorReport()
-{
-	auto ours = EmberReport::ErrorReport();
-	const auto s = m_Wrapper.ErrorReport();
-	ours.insert(ours.end(), s.begin(), s.end());
-	return ours;
-}
-}
+#include "EmberCLPch.h"
+#include "RendererClDevice.h"
+
+namespace EmberCLns
+{
+/// <summary>
+/// Constructor that assigns members.
+/// The object is not fully initialized at this point, the caller
+/// must manually call Init().
+/// </summary>
+/// <param name="platform">The index of the platform to use</param>
+/// <param name="device">The index device of the device to use</param>
+/// <param name="shared">True if shared with OpenGL, else false.</param>
+/// <returns>True if success, else false.</returns>
+RendererClDevice::RendererClDevice(size_t platform, size_t device, bool shared)
+{
+	m_Init = false;
+	m_Shared = shared;
+	m_NVidia = false;
+	m_WarpSize = 0;
+	m_Calls = 0;
+	m_PlatformIndex = platform;
+	m_DeviceIndex = device;
+	m_Info = OpenCLInfo::Instance();
+}
+
+/// <summary>
+/// Initialization of the OpenCLWrapper member.
+/// </summary>
+/// <returns>True if success, else false.</returns>
+bool RendererClDevice::Init()
+{
+	bool b = true;
+
+	if (!m_Wrapper.Ok())
+	{
+		m_Init = false;
+		b = m_Wrapper.Init(m_PlatformIndex, m_DeviceIndex, m_Shared);
+	}
+
+	if (b && m_Wrapper.Ok() && !m_Init)
+	{
+		m_NVidia = Find(ToLower(m_Info->PlatformName(m_PlatformIndex)), "nvidia") && m_Wrapper.LocalMemSize() > (32 * 1024);
+		m_WarpSize = m_NVidia ? 32 : 64;
+		m_Init = true;
+	}
+
+	return b;
+}
+
+/// <summary>
+/// OpenCL property accessors, getters only.
+/// </summary>
+bool RendererClDevice::Ok() const noexcept { return m_Init; }
+bool RendererClDevice::Shared() const noexcept { return m_Shared; }
+bool RendererClDevice::Nvidia() const noexcept { return m_NVidia; }
+size_t RendererClDevice::WarpSize() const noexcept { return m_WarpSize; }
+size_t RendererClDevice::PlatformIndex() const noexcept { return m_PlatformIndex; }
+size_t RendererClDevice::DeviceIndex() const noexcept { return m_DeviceIndex; }
+
+/// <summary>
+/// Clear the error report for this class as well as the wrapper.
+/// </summary>
+void RendererClDevice::ClearErrorReport() noexcept
+{
+	EmberReport::ClearErrorReport();
+	m_Wrapper.ClearErrorReport();
+}
+
+/// <summary>
+/// Concatenate and return the error report for this class and the
+/// wrapper as a single string.
+/// </summary>
+/// <returns>The concatenated error report string</returns>
+string RendererClDevice::ErrorReportString()
+{
+	const auto s = EmberReport::ErrorReportString();
+	return s + m_Wrapper.ErrorReportString();
+}
+
+/// <summary>
+/// Concatenate and return the error report for this class and the
+/// wrapper as a vector of strings.
+/// </summary>
+/// <returns>The concatenated error report vector of strings</returns>
+vector<string> RendererClDevice::ErrorReport()
+{
+	auto ours = EmberReport::ErrorReport();
+	const auto s = m_Wrapper.ErrorReport();
+	ours.insert(ours.end(), s.begin(), s.end());
+	return ours;
+}
+}
@@ -1,47 +1,47 @@
-#pragma once
-
-#include "EmberCLPch.h"
-#include "OpenCLWrapper.h"
-#include "IterOpenCLKernelCreator.h"
-
-/// <summary>
-/// RendererClDevice class.
-/// </summary>
-
-namespace EmberCLns
-{
-/// <summary>
-/// Class to manage a device that does the iteration portion of
-/// the rendering process. Having a separate class for this purpose
-/// enables multi-GPU support.
-/// </summary>
-class EMBERCL_API RendererClDevice : public EmberReport
-{
-public:
-	RendererClDevice(size_t platform, size_t device, bool shared);
-	bool Init();
-	bool Ok() const;
-	bool Shared() const;
-	bool Nvidia() const;
-	size_t WarpSize() const;
-	size_t PlatformIndex() const;
-	size_t DeviceIndex() const;
-
-	//Public virtual functions overridden from base classes.
-	void ClearErrorReport() override;
-	string ErrorReportString() override;
-	vector<string> ErrorReport() override;
-
-	size_t m_Calls;
-	OpenCLWrapper m_Wrapper;
-
-private:
-	bool m_Init;
-	bool m_Shared;
-	bool m_NVidia;
-	size_t m_WarpSize;
-	size_t m_PlatformIndex;
-	size_t m_DeviceIndex;
-	shared_ptr<OpenCLInfo> m_Info;
-};
-}
+#pragma once
+
+#include "EmberCLPch.h"
+#include "OpenCLWrapper.h"
+#include "IterOpenCLKernelCreator.h"
+
+/// <summary>
+/// RendererClDevice class.
+/// </summary>
+
+namespace EmberCLns
+{
+/// <summary>
+/// Class to manage a device that does the iteration portion of
+/// the rendering process. Having a separate class for this purpose
+/// enables multi-GPU support.
+/// </summary>
+class EMBERCL_API RendererClDevice : public EmberReport
+{
+public:
+	RendererClDevice(size_t platform, size_t device, bool shared);
+	bool Init();
+	bool Ok() const noexcept;
+	bool Shared() const noexcept;
+	bool Nvidia() const noexcept;
+	size_t WarpSize() const noexcept;
+	size_t PlatformIndex() const noexcept;
+	size_t DeviceIndex() const noexcept;
+
+	//Public virtual functions overridden from base classes.
+	void ClearErrorReport() noexcept override;
+	string ErrorReportString() override;
+	vector<string> ErrorReport() override;
+
+	size_t m_Calls;
+	OpenCLWrapper m_Wrapper;
+
+private:
+	bool m_Init;
+	bool m_Shared;
+	bool m_NVidia;
+	size_t m_WarpSize;
+	size_t m_PlatformIndex;
+	size_t m_DeviceIndex;
+	shared_ptr<OpenCLInfo> m_Info;
+};
+}
@@ -1 +1 @@
-#include "EmberCommonPch.h"
+#include "EmberCommonPch.h"
@@ -1,98 +1,98 @@
-#ifdef _WIN32
-	#pragma once
-#endif
-
-/// <summary>
-/// Precompiled header file. Place all system includes here with appropriate #defines for different operating systems and compilers.
-/// </summary>
-
-#define NOMINMAX
-#define WIN32_LEAN_AND_MEAN//Exclude rarely-used stuff from Windows headers.
-#define _USE_MATH_DEFINES
-
-#ifdef _WIN32
-	#pragma warning(disable : 4251; disable : 4661; disable : 4100)
-	#include <SDKDDKVer.h>
-	#include <windows.h>
-	#include <winsock.h>//For htons().
-	#include <BaseTsd.h>
-	#include <crtdbg.h>
-	#include <tchar.h>
-#else
-	#include <arpa/inet.h>
-	#include <unistd.h>
-	#define _TCHAR char
-	#define _tmain main
-	#define _T
-#endif
-
-#include <iostream>
-#include <iomanip>
-#include <ostream>
-#include <random>
-#include <sstream>
-#include <setjmp.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "jconfig.h"
-#include "jpeglib.h"
-
-#define PNG_SKIP_SETJMP_CHECK 1
-
-#include "png.h"
-
-//Ember.
-#include "Ember.h"
-#include "Variation.h"
-#include "EmberToXml.h"
-#include "XmlToEmber.h"
-#include "PaletteList.h"
-#include "Iterator.h"
-#include "Renderer.h"
-#include "RendererCL.h"
-#include "SheepTools.h"
-
-//Options.
-#include "SimpleGlob.h"
-#include "SimpleOpt.h"
-
-//Exr
-#ifdef _WIN32
-	#define OPENEXR_DLL 1
-#endif
-
-#ifdef __APPLE__
-#include <OpenEXR/ImfRgbaFile.h>
-#include <OpenEXR/ImfStringAttribute.h>
-#include <OpenEXR/half.h>
-//May need these instead for openexr3
-//#include <Imath/half.h>
-//#include <Imath/ImathInt64.h>
-#include <OpenEXR/ImfChannelList.h>
-#include <OpenEXR/ImfOutputFile.h>
-#define ENUM_DYLD_BOOL
-#include <mach-o/dyld.h>
-
-#define _MM_DENORMALS_ZERO_MASK   0x0040
-#define _MM_DENORMALS_ZERO_ON     0x0040
-#define _MM_DENORMALS_ZERO_OFF    0x0000
-
-#define _MM_SET_DENORMALS_ZERO_MODE(mode)                                   \
-	_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (mode))
-#define _MM_GET_DENORMALS_ZERO_MODE()                                       \
-	(_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
-#else
-#include <ImfRgbaFile.h>
-#include <ImfStringAttribute.h>
-#include <ImfChannelList.h>
-#include <ImfOutputFile.h>
-#include <half.h>
-#endif
-
-using namespace Imf;
-using namespace Imath;
-
-using namespace EmberNs;
-using namespace EmberCLns;
+#ifdef _WIN32
+	#pragma once
+#endif
+
+/// <summary>
+/// Precompiled header file. Place all system includes here with appropriate #defines for different operating systems and compilers.
+/// </summary>
+
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN//Exclude rarely-used stuff from Windows headers.
+#define _USE_MATH_DEFINES
+
+#ifdef _WIN32
+	#pragma warning(disable : 4251; disable : 4661; disable : 4100)
+	#include <SDKDDKVer.h>
+	#include <windows.h>
+	#include <winsock.h>//For htons().
+	#include <BaseTsd.h>
+	#include <crtdbg.h>
+	#include <tchar.h>
+#else
+	#include <arpa/inet.h>
+	#include <unistd.h>
+	#define _TCHAR char
+	#define _tmain main
+	#define _T
+#endif
+
+#include <iostream>
+#include <iomanip>
+#include <ostream>
+#include <random>
+#include <sstream>
+#include <setjmp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "jconfig.h"
+#include "jpeglib.h"
+
+#define PNG_SKIP_SETJMP_CHECK 1
+
+#include "png.h"
+
+//Ember.
+#include "Ember.h"
+#include "Variation.h"
+#include "EmberToXml.h"
+#include "XmlToEmber.h"
+#include "PaletteList.h"
+#include "Iterator.h"
+#include "Renderer.h"
+#include "RendererCL.h"
+#include "SheepTools.h"
+
+//Options.
+#include "SimpleGlob.h"
+#include "SimpleOpt.h"
+
+//Exr
+#ifdef _WIN32
+	#define OPENEXR_DLL 1
+#endif
+
+#ifdef __APPLE__
+#include <OpenEXR/ImfRgbaFile.h>
+#include <OpenEXR/ImfStringAttribute.h>
+#include <OpenEXR/half.h>
+//May need these instead for openexr3
+//#include <Imath/half.h>
+//#include <Imath/ImathInt64.h>
+#include <OpenEXR/ImfChannelList.h>
+#include <OpenEXR/ImfOutputFile.h>
+#define ENUM_DYLD_BOOL
+#include <mach-o/dyld.h>
+
+#define _MM_DENORMALS_ZERO_MASK   0x0040
+#define _MM_DENORMALS_ZERO_ON     0x0040
+#define _MM_DENORMALS_ZERO_OFF    0x0000
+
+#define _MM_SET_DENORMALS_ZERO_MODE(mode)                                   \
+	_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (mode))
+#define _MM_GET_DENORMALS_ZERO_MODE()                                       \
+	(_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
+#else
+#include <ImfRgbaFile.h>
+#include <ImfStringAttribute.h>
+#include <ImfChannelList.h>
+#include <ImfOutputFile.h>
+#include <half.h>
+#endif
+
+using namespace Imf;
+using namespace Imath;
+
+using namespace EmberNs;
+using namespace EmberCLns;
@@ -1,23 +1,23 @@
-#pragma once
-
-#include "EmberOptions.h"
-
-/// <summary>
-/// Declaration for the EmberGenome() and SetDefaultTestValues() functions.
-/// </summary>
-
-/// <summary>
-/// Set various default test values on the passed in ember.
-/// </summary>
-/// <param name="ember">The ember to test</param>
-template <typename T>
-static void SetDefaultTestValues(Ember<T>& ember);
-
-/// <summary>
-/// The core of the EmberGenome.exe program.
-/// Template argument expected to be float or double.
-/// </summary>
-/// <param name="opt">A populated EmberOptions object which specifies all program options to be used</param>
-/// <returns>True if success, else false.</returns>
-template <typename T, typename bucketT>
+#pragma once
+
+#include "EmberOptions.h"
+
+/// <summary>
+/// Declaration for the EmberGenome() and SetDefaultTestValues() functions.
+/// </summary>
+
+/// <summary>
+/// Set various default test values on the passed in ember.
+/// </summary>
+/// <param name="ember">The ember to test</param>
+template <typename T>
+static void SetDefaultTestValues(Ember<T>& ember);
+
+/// <summary>
+/// The core of the EmberGenome.exe program.
+/// Template argument expected to be float or double.
+/// </summary>
+/// <param name="opt">A populated EmberOptions object which specifies all program options to be used</param>
+/// <returns>True if success, else false.</returns>
+template <typename T, typename bucketT>
 static bool EmberGenome(EmberOptions& opt);
@@ -1,15 +1,15 @@
-//{{NO_DEPENDENCIES}}
-// Microsoft Visual C++ generated include file.
-// Used by EmberGenome.rc
-//
-
-// Next default values for new objects
-// 
-#ifdef APSTUDIO_INVOKED
-#ifndef APSTUDIO_READONLY_SYMBOLS
-#define _APS_NEXT_RESOURCE_VALUE        101
-#define _APS_NEXT_COMMAND_VALUE         40001
-#define _APS_NEXT_CONTROL_VALUE         1000
-#define _APS_NEXT_SYMED_VALUE           101
-#endif
-#endif
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by EmberGenome.rc
+//
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        101
+#define _APS_NEXT_COMMAND_VALUE         40001
+#define _APS_NEXT_CONTROL_VALUE         1000
+#define _APS_NEXT_SYMED_VALUE           101
+#endif
+#endif
--- a/Show More
+++ b/Show More