0.4.1.5 Beta 11/28/2014

--User Changes Remove limit on the number of xforms allowable on the GPU. This was previously 21. Show actual strips count to be used in parens outside of user specified strips count on final render dialog. Allow for adjustment of iteration depth and fuse count per ember and save/read these values with the xml. Iteration optimizations on both CPU and GPU. Automatically adjust default quality spinner value when using CPU/GPU to 10/30, respectively. --Bug Fixes Fix severe randomization bug with OpenCL. Fix undo list off by one error when doing a new edit anywhere but the end of the undo list. Make integer variation parameters use 4 decimal places in the variations list like all the others. New build of the latest Qt to fix scroll bar drawing bug. Prevent grid from showing as much when pressing control to increase a spinner's increment speed. Still shows sometimes, but better than before. --Code Changes Pass count and fuse to iterator as a structure now to allow for passing more params in the future. Slightly different grid/block logic when running DE filtering on the GPU. Attempt a different way of doing DE, but #define out because it ended up not being faster. Restructure some things to allow for a variable length xforms buffer to be passed to the GPU. Add sub batch size and fuse count as ember members, and remove them from the renderer classes. Remove m_LastPass from Renderer. It should have been removed with passes. Pass seeds as a buffer to the OpenCL iteration kernel, rather than a single seed that gets modified. Slight optimization on CPU accum. Use case statement instead of if/else for xform chosing in OpenCL for a 2% speedup on params with large numbers of xforms. Add SizeOf() wrapper around sizeof(vec[0]) * vec.size(). Remove LogScaleSum() functions from the CPU and GPU because they're no longer used since passes were removed. Make some OpenCLWrapper getters const. Better ogranize RendererCL methods that return grid dimensions.
2025-06-30 21:36:33 -04:00 · 2014-11-28 01:37:51 -08:00
parent 3f29025f99
commit b29bedec38
39 changed files with 905 additions and 392 deletions
--- a/Source/Ember/Ember.h
+++ b/Source/Ember/Ember.h
@ -92,6 +92,8 @@ public:
 		m_OrigFinalRasW		  = ember.m_OrigFinalRasW;
 		m_OrigFinalRasH		  = ember.m_OrigFinalRasH;
 		m_OrigPixPerUnit	  = ember.m_OrigPixPerUnit;
+		m_SubBatchSize		  = ember.m_SubBatchSize;
+		m_FuseCount			  = ember.m_FuseCount;
 		m_Supersample		  = ember.m_Supersample;
 		m_TemporalSamples	  = ember.m_TemporalSamples;
 		m_Symmetry			  = ember.m_Symmetry;
@ -184,6 +186,8 @@ public:
 		m_OrigFinalRasW = 1920;
 		m_OrigFinalRasH = 1080;
 		m_OrigPixPerUnit = 240;
+		m_SubBatchSize = DEFAULT_SBS;
+		m_FuseCount = 15;
 		m_Supersample = 1;
 		m_TemporalSamples = 1000;
 		m_Symmetry = 0;
@ -750,24 +754,15 @@ public:
 		m_PaletteMode = embers[0].m_PaletteMode;
 		m_AffineInterp = embers[0].m_AffineInterp;

-		//Interpolate ember parameters.
-		InterpT<&Ember<T>::m_Brightness>(embers, coefs, size);
-		InterpT<&Ember<T>::m_HighlightPower>(embers, coefs, size);
-		InterpT<&Ember<T>::m_Gamma>(embers, coefs, size);
-		InterpT<&Ember<T>::m_Vibrancy>(embers, coefs, size);
-		InterpT<&Ember<T>::m_Hue>(embers, coefs, size);
+		//Interpolate ember parameters, these should be in the same order the members are declared.
 		InterpI<&Ember<T>::m_FinalRasW>(embers, coefs, size);
 		InterpI<&Ember<T>::m_FinalRasH>(embers, coefs, size);
+		InterpI<&Ember<T>::m_SubBatchSize>(embers, coefs, size);
+		InterpI<&Ember<T>::m_FuseCount>(embers, coefs, size);
 		InterpI<&Ember<T>::m_Supersample>(embers, coefs, size);
-		InterpT<&Ember<T>::m_CenterX>(embers, coefs, size);
-		InterpT<&Ember<T>::m_CenterY>(embers, coefs, size);
-		InterpT<&Ember<T>::m_RotCenterY>(embers, coefs, size);
-		InterpX<Color<T>, &Ember<T>::m_Background>(embers, coefs, size); m_Background.a = bgAlphaSave;//Don't interp alpha.
-		InterpT<&Ember<T>::m_PixelsPerUnit>(embers, coefs, size);
-		InterpT<&Ember<T>::m_SpatialFilterRadius>(embers, coefs, size);
-		InterpT<&Ember<T>::m_TemporalFilterExp>(embers, coefs, size);
-		InterpT<&Ember<T>::m_TemporalFilterWidth>(embers, coefs, size);
+		InterpI<&Ember<T>::m_TemporalSamples>(embers, coefs, size);
 		InterpT<&Ember<T>::m_Quality>(embers, coefs, size);
+		InterpT<&Ember<T>::m_PixelsPerUnit>(embers, coefs, size);
 		InterpT<&Ember<T>::m_Zoom>(embers, coefs, size);
 		InterpT<&Ember<T>::m_CamZPos>(embers, coefs, size);
 		InterpT<&Ember<T>::m_CamPerspective>(embers, coefs, size);
@ -775,12 +770,23 @@ public:
 		InterpT<&Ember<T>::m_CamPitch>(embers, coefs, size);
 		InterpT<&Ember<T>::m_CamDepthBlur>(embers, coefs, size);
 		InterpX<m3T, &Ember<T>::m_CamMat>(embers, coefs, size);
+		InterpT<&Ember<T>::m_CenterX>(embers, coefs, size);
+		InterpT<&Ember<T>::m_CenterY>(embers, coefs, size);
+		InterpT<&Ember<T>::m_RotCenterY>(embers, coefs, size);
 		InterpT<&Ember<T>::m_Rotate>(embers, coefs, size);
-		InterpI<&Ember<T>::m_TemporalSamples>(embers, coefs, size);
+		InterpT<&Ember<T>::m_Hue>(embers, coefs, size);
+		InterpT<&Ember<T>::m_Brightness>(embers, coefs, size);
+		InterpT<&Ember<T>::m_Gamma>(embers, coefs, size);
+		InterpT<&Ember<T>::m_Vibrancy>(embers, coefs, size);
+		InterpT<&Ember<T>::m_GammaThresh>(embers, coefs, size);
+		InterpT<&Ember<T>::m_HighlightPower>(embers, coefs, size);
+		InterpX<Color<T>, &Ember<T>::m_Background>(embers, coefs, size); m_Background.a = bgAlphaSave;//Don't interp alpha.
+		InterpT<&Ember<T>::m_TemporalFilterExp>(embers, coefs, size);
+		InterpT<&Ember<T>::m_TemporalFilterWidth>(embers, coefs, size);
 		InterpT<&Ember<T>::m_MaxRadDE>(embers, coefs, size);
 		InterpT<&Ember<T>::m_MinRadDE>(embers, coefs, size);
 		InterpT<&Ember<T>::m_CurveDE>(embers, coefs, size);
-		InterpT<&Ember<T>::m_GammaThresh>(embers, coefs, size);
+		InterpT<&Ember<T>::m_SpatialFilterRadius>(embers, coefs, size);

 		//An extra step needed here due to the OOD that was not needed in the original.
 		//A small price to pay for the conveniences it affords us elsewhere.
@ -1382,6 +1388,8 @@ public:
 		   << "Quality: " << m_Quality << endl
 		   << "Pixels Per Unit: " << m_PixelsPerUnit << endl
 		   << "Original Pixels Per Unit: " << m_OrigPixPerUnit << endl
+		   << "Sub Batch Size: " << m_SubBatchSize << endl
+		   << "Fuse Count: " << m_FuseCount << endl
 		   << "Zoom: " << m_Zoom << endl
 		   << "ZPos: " << m_CamZPos << endl
 		   << "Perspective: " << m_CamPerspective << endl
@ -1459,6 +1467,14 @@ public:
 	size_t m_OrigFinalRasH;//the dimension may change in an editor and the originals are needed for the aspect ratio.
 	T m_OrigPixPerUnit;

+	//The iteration depth. This was a rendering parameter in flam3 but has been made a member here
+	//so that it can be adjusted more easily.
+	size_t m_SubBatchSize;
+
+	//The number of iterations to disregard for each sub batch. This was a rendering parameter in flam3 but has been made a member here
+	//so that it can be adjusted more easily.
+	size_t m_FuseCount;
+
 	//The multiplier in size of the histogram and DE filtering buffers. Must be at least one, preferrably never larger than 4, only useful at 2.
 	//Xml field: "supersample" or "overample (deprecated)".
 	size_t m_Supersample;
--- a/Source/Ember/EmberDefines.h
+++ b/Source/Ember/EmberDefines.h
@ -36,7 +36,7 @@ namespace EmberNs
 	extern void sincos(float x, float *s, float *c);
 #endif

-#define EMBER_VERSION "0.4.1.4"
+#define EMBER_VERSION "0.4.1.5"
 #define EPS6 T(1e-6)
 #define EPS std::numeric_limits<T>::epsilon()//Apoplugin.h uses -20, but it's more mathematically correct to do it this way.
 #define ISAAC_SIZE 4
@ -54,6 +54,7 @@ namespace EmberNs
 #define COLORMAP_LENGTH 256//These will need to change if 2D palette support is ever added, or variable sized palettes.
 #define COLORMAP_LENGTH_MINUS_1 255
 #define WHITE 255
+#define DEFAULT_SBS (1024 * 10)
 #define XC (const xmlChar*)
 #define BadVal(x) (((x) != (x)) || ((x) > 1e10) || ((x) < -1e10))
 #define Rint(A) floor((A) + (((A) < 0) ? T(-0.5) : T(0.5)))
--- a/Source/Ember/EmberToXml.h
+++ b/Source/Ember/EmberToXml.h
@ -156,6 +156,8 @@ public:
 		os << " temporal_filter_width=\"" << ember.m_TemporalFilterWidth << "\"";
 		os << " quality=\"" << ember.m_Quality << "\"";
 		os << " temporal_samples=\"" << ember.m_TemporalSamples << "\"";
+		os << " sub_batch_size=\"" << ember.m_SubBatchSize << "\"";
+		os << " fuse=\"" << ember.m_FuseCount << "\"";
 		os << " background=\"" << ember.m_Background.r << " " << ember.m_Background.g << " " << ember.m_Background.b << "\"";
 		os << " brightness=\"" << ember.m_Brightness << "\"";
 		os << " gamma=\"" << ember.m_Gamma << "\"";
--- a/Source/Ember/Iterator.h
+++ b/Source/Ember/Iterator.h
@ -16,6 +16,17 @@ namespace EmberNs
 	using Iterator<T>::DoFinalXform; \
 	using Iterator<T>::DoBadVals;

+template <typename T, typename bucketT> class Renderer;
+
+template <typename T>
+struct IterParams
+{
+	size_t m_Count;
+	size_t m_Skip;
+	//T m_OneColDiv2;
+	//T m_OneRowDiv2;
+};
+
 /// <summary>
 /// Iterator base class.
 /// Iterating is one loop level outside of the inner xform application loop so it's still very important
@ -69,7 +80,7 @@ public:
 	/// <param name="samples">The buffer to store the output points</param>
 	/// <param name="rand">The random context to use</param>
 	/// <returns>The number of bad values</returns>
-	virtual size_t Iterate(Ember<T>& ember, size_t count, size_t skip, Point<T>* samples, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) { return 0; }
+	virtual size_t Iterate(Ember<T>& ember, IterParams<T>& params, Point<T>* samples, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) { return 0; }

 	/// <summary>
 	/// Initialize the xform selection vector by normalizing the weights of all xforms and
@ -278,7 +289,7 @@ public:
 	/// <param name="samples">The buffer to store the output points</param>
 	/// <param name="rand">The random context to use</param>
 	/// <returns>The number of bad values</returns>
-	virtual size_t Iterate(Ember<T>& ember, size_t count, size_t skip, Point<T>* samples, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand)
+	virtual size_t Iterate(Ember<T>& ember, IterParams<T>& params, Point<T>* samples, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
 	{
 		size_t i, badVals = 0;
 		Point<T> tempPoint, p1;
@ -290,7 +301,7 @@ public:
 			{
 				p1 = samples[0];

-				for (i = 0; i < skip; i++)//Fuse.
+				for (i = 0; i < params.m_Skip; i++)//Fuse.
 				{
 					if (xforms[NextXformFromIndex(rand.Rand())].Apply(&p1, &p1, rand))
 						DoBadVals(xforms, badVals, &p1, rand);
@ -299,7 +310,7 @@ public:
 				DoFinalXform(ember, p1, samples, rand);//Apply to last fuse point and store as the first element in samples.
 				ember.Proj(samples[0], rand);

-				for (i = 1; i < count; i++)//Real loop.
+				for (i = 1; i < params.m_Count; i++)//Real loop.
 				{
 					if (xforms[NextXformFromIndex(rand.Rand())].Apply(&p1, &p1, rand))
 						DoBadVals(xforms, badVals, &p1, rand);
@ -312,7 +323,7 @@ public:
 			{
 				p1 = samples[0];

-				for (i = 0; i < skip; i++)//Fuse.
+				for (i = 0; i < params.m_Skip; i++)//Fuse.
 				{
 					if (xforms[NextXformFromIndex(rand.Rand())].Apply(&p1, &p1, rand))
 						DoBadVals(xforms, badVals, &p1, rand);
@ -321,7 +332,7 @@ public:
 				samples[0] = p1;
 				ember.Proj(samples[0], rand);

-				for (i = 1; i < count; i++)//Real loop.
+				for (i = 1; i < params.m_Count; i++)//Real loop.
 				{
 					if (xforms[NextXformFromIndex(rand.Rand())].Apply(&p1, &samples[i], rand))
 						DoBadVals(xforms, badVals, samples + i, rand);
@ -337,7 +348,7 @@ public:
 			{
 				p1 = samples[0];

-				for (i = 0; i < skip; i++)//Fuse.
+				for (i = 0; i < params.m_Skip; i++)//Fuse.
 				{
 					if (xforms[NextXformFromIndex(rand.Rand())].Apply(&p1, &p1, rand))
 						DoBadVals(xforms, badVals, &p1, rand);
@ -345,7 +356,7 @@ public:

 				DoFinalXform(ember, p1, samples, rand);//Apply to last fuse point and store as the first element in samples.

-				for (i = 1; i < count; i++)//Real loop.
+				for (i = 1; i < params.m_Count; i++)//Real loop.
 				{
 					if (xforms[NextXformFromIndex(rand.Rand())].Apply(&p1, &p1, rand))//Feed the resulting value of applying the randomly selected xform back into the next iter, and not the result of applying the final xform.
 						DoBadVals(xforms, badVals, &p1, rand);
@ -357,7 +368,7 @@ public:
 			{
 				p1 = samples[0];

-				for (i = 0; i < skip; i++)//Fuse.
+				for (i = 0; i < params.m_Skip; i++)//Fuse.
 				{
 					if (xforms[NextXformFromIndex(rand.Rand())].Apply(&p1, &p1, rand))
 						DoBadVals(xforms, badVals, &p1, rand);
@ -365,9 +376,11 @@ public:

 				samples[0] = p1;

-				for (i = 0; i < count - 1; i++)//Real loop.
+				for (i = 0; i < params.m_Count - 1; i++)//Real loop.
+				{
 					if (xforms[NextXformFromIndex(rand.Rand())].Apply(samples + i, samples + i + 1, rand))
 						DoBadVals(xforms, badVals, samples + i + 1, rand);
+				}
 			}
 		}

@ -442,7 +455,7 @@ public:
 	/// <param name="samples">The buffer to store the output points</param>
 	/// <param name="rand">The random context to use</param>
 	/// <returns>The number of bad values</returns>
-	virtual size_t Iterate(Ember<T>& ember, size_t count, size_t skip, Point<T>* samples, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand)
+	virtual size_t Iterate(Ember<T>& ember, IterParams<T>& params, Point<T>* samples, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
 	{
 		size_t i, xformIndex;
 		size_t lastXformUsed = 0;
@ -456,7 +469,7 @@ public:
 			{
 				p1 = samples[0];

-				for (i = 0; i < skip; i++)//Fuse.
+				for (i = 0; i < params.m_Skip; i++)//Fuse.
 				{
 					xformIndex = NextXformFromIndex(rand.Rand(), lastXformUsed);

@ -469,7 +482,7 @@ public:
 				DoFinalXform(ember, p1, samples, rand);//Apply to last fuse point and store as the first element in samples.
 				ember.Proj(samples[0], rand);

-				for (i = 1; i < count; i++)//Real loop.
+				for (i = 1; i < params.m_Count; i++)//Real loop.
 				{
 					xformIndex = NextXformFromIndex(rand.Rand(), lastXformUsed);

@ -485,7 +498,7 @@ public:
 			{
 				p1 = samples[0];

-				for (i = 0; i < skip; i++)//Fuse.
+				for (i = 0; i < params.m_Skip; i++)//Fuse.
 				{
 					xformIndex = NextXformFromIndex(rand.Rand(), lastXformUsed);

@ -498,7 +511,7 @@ public:
 				samples[0] = p1;
 				ember.Proj(samples[0], rand);

-				for (i = 1; i < count; i++)//Real loop.
+				for (i = 1; i < params.m_Count; i++)//Real loop.
 				{
 					xformIndex = NextXformFromIndex(rand.Rand(), lastXformUsed);

@ -517,7 +530,7 @@ public:
 			{
 				p1 = samples[0];

-				for (i = 0; i < skip; i++)//Fuse.
+				for (i = 0; i < params.m_Skip; i++)//Fuse.
 				{
 					xformIndex = NextXformFromIndex(rand.Rand(), lastXformUsed);

@ -529,7 +542,7 @@ public:

 				DoFinalXform(ember, p1, samples, rand);//Apply to last fuse point and store as the first element in samples.

-				for (i = 1; i < count; i++)//Real loop.
+				for (i = 1; i < params.m_Count; i++)//Real loop.
 				{
 					xformIndex = NextXformFromIndex(rand.Rand(), lastXformUsed);

@ -544,7 +557,7 @@ public:
 			{
 				p1 = samples[0];

-				for (i = 0; i < skip; i++)//Fuse.
+				for (i = 0; i < params.m_Skip; i++)//Fuse.
 				{
 					xformIndex = NextXformFromIndex(rand.Rand(), lastXformUsed);

@ -556,7 +569,7 @@ public:

 				samples[0] = p1;

-				for (i = 0; i < count - 1; i++)//Real loop.
+				for (i = 0; i < params.m_Count - 1; i++)//Real loop.
 				{
 					xformIndex = NextXformFromIndex(rand.Rand(), lastXformUsed);

--- a/Source/Ember/Renderer.cpp
+++ b/Source/Ember/Renderer.cpp
@ -691,7 +691,7 @@ bool Renderer<T, bucketT>::Alloc()
 		(m_SuperSize         != m_HistBuckets.size())        ||
 		(m_SuperSize         != m_AccumulatorBuckets.size()) ||
 		(m_ThreadsToUse      != m_Samples.size())            ||
-		(m_Samples[0].size() != m_SubBatchSize);
+		(m_Samples[0].size() != SubBatchSize());

 	if (lock)
 		EnterResize();
@ -728,14 +728,14 @@ bool Renderer<T, bucketT>::Alloc()

 	for (size_t i = 0; i < m_Samples.size(); i++)
 	{
-		if (m_Samples[i].size() != m_SubBatchSize)
+		if (m_Samples[i].size() != SubBatchSize())
 		{
-			m_Samples[i].resize(m_SubBatchSize);
+			m_Samples[i].resize(SubBatchSize());

 			if (m_ReclaimOnResize)
 				m_Samples[i].shrink_to_fit();

-			b &= (m_Samples[i].size() == m_SubBatchSize);
+			b &= (m_Samples[i].size() == SubBatchSize());
 		}
 	}

@ -1154,7 +1154,7 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(unsigned char* pixel
 /// This function will be called multiple times for an interactive rendering, and
 /// once for a straight through render.
 /// The iteration is reset and fused in each thread after each sub batch is done
-/// which by default is 10,000 iterations.
+/// which by default is 10,240 iterations.
 /// </summary>
 /// <param name="iterCount">The number of iterations to run</param>
 /// <param name="temporalSample">The temporal sample this is running for</param>
@ -1164,7 +1164,6 @@ EmberStats Renderer<T, bucketT>::Iterate(size_t iterCount, size_t temporalSample
 {
 	//Timing t2(4);
 	m_IterTimer.Tic();
-	size_t fuse = EarlyClip() ? 100 : 15;//EarlyClip was one way of detecting a later version of flam3, so it used 100 which is a better value.
 	size_t totalItersPerThread = (size_t)ceil((double)iterCount / (double)m_ThreadsToUse);
 	double percent, etaMs;
 	EmberStats stats;
@ -1180,17 +1179,21 @@ EmberStats Renderer<T, bucketT>::Iterate(size_t iterCount, size_t temporalSample
 	parallel_for(size_t(0), m_ThreadsToUse, [&] (size_t threadIndex)
 	{
 #endif
-		Timing t;
-		size_t subBatchSize = (size_t)min(totalItersPerThread, (size_t)m_SubBatchSize);
+		//Timing t;
+		IterParams<T> params;

 		m_BadVals[threadIndex] = 0;
+		params.m_Count = min(totalItersPerThread, SubBatchSize());
+		params.m_Skip = FuseCount();
+		//params.m_OneColDiv2 = m_CarToRas.OneCol() / 2;
+		//params.m_OneRowDiv2 = m_CarToRas.OneRow() / 2;

 		//Sub batch iterations, loop 2.
-		for (m_SubBatch[threadIndex] = 0; (m_SubBatch[threadIndex] < totalItersPerThread) && !m_Abort; m_SubBatch[threadIndex] += subBatchSize)
+		for (m_SubBatch[threadIndex] = 0; (m_SubBatch[threadIndex] < totalItersPerThread) && !m_Abort; m_SubBatch[threadIndex] += params.m_Count)
 		{
-			//Must recalculate the number of iters to run on each sub batch because the last batch will most likely have less than m_SubBatchSize iters.
+			//Must recalculate the number of iters to run on each sub batch because the last batch will most likely have less than SubBatchSize iters.
 			//For example, if 51,000 are requested, and the sbs is 10,000, it should run 5 sub batches of 10,000 iters, and one final sub batch of 1,000 iters.
-			subBatchSize = min(subBatchSize, totalItersPerThread - m_SubBatch[threadIndex]);
+			params.m_Count = min(params.m_Count, totalItersPerThread - m_SubBatch[threadIndex]);

 			//Use first as random point, the rest are iterated points.
 			//Note that this gets reset with a new random point for each subBatchSize iterations.
@ -1203,14 +1206,14 @@ EmberStats Renderer<T, bucketT>::Iterate(size_t iterCount, size_t temporalSample
 			//Finally, iterate.
 			//t.Tic();
 			//Iterating, loop 3.
-			m_BadVals[threadIndex] += m_Iterator->Iterate(m_Ember, subBatchSize, fuse, m_Samples[threadIndex].data(), m_Rand[threadIndex]);
+			m_BadVals[threadIndex] += m_Iterator->Iterate(m_Ember, params, m_Samples[threadIndex].data(), m_Rand[threadIndex]);
 			//iterationTime += t.Toc();

 			if (m_LockAccum)
 				m_AccumCs.Enter();
 			//t.Tic();
 			//Map temp buffer samples into the histogram using the palette for color.
-			Accumulate(m_Samples[threadIndex].data(), subBatchSize, &m_Dmap);
+			Accumulate(m_Rand[threadIndex], m_Samples[threadIndex].data(), params.m_Count, &m_Dmap);
 			//accumulationTime += t.Toc();
 			if (m_LockAccum)
 				m_AccumCs.Leave();
@ -1347,6 +1350,8 @@ template <typename T, typename bucketT> ePaletteMode      Renderer<T, bucketT>::
 template <typename T, typename bucketT> size_t Renderer<T, bucketT>::TemporalSamples() const { return m_Ember.m_TemporalSamples; }
 template <typename T, typename bucketT> size_t Renderer<T, bucketT>::FinalRasW()       const { return m_Ember.m_FinalRasW; }
 template <typename T, typename bucketT> size_t Renderer<T, bucketT>::FinalRasH()       const { return m_Ember.m_FinalRasH; }
+template <typename T, typename bucketT> size_t Renderer<T, bucketT>::SubBatchSize()    const { return m_Ember.m_SubBatchSize; }
+template <typename T, typename bucketT> size_t Renderer<T, bucketT>::FuseCount()	   const { return m_Ember.m_FuseCount; }

 /// <summary>
 /// Non-virtual iterator wrappers.
@ -1396,11 +1401,13 @@ void Renderer<T, bucketT>::PrepFinalAccumVals(Color<T>& background, T& g, T& lin
 /// <param name="sampleCount">The number of samples</param>
 /// <param name="palette">The palette to use</param>
 template <typename T, typename bucketT>
-void Renderer<T, bucketT>::Accumulate(Point<T>* samples, size_t sampleCount, const Palette<bucketT>* palette)
+void Renderer<T, bucketT>::Accumulate(QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand, Point<T>* samples, size_t sampleCount, const Palette<bucketT>* palette)
 {
 	size_t histIndex, intColorIndex, histSize = m_HistBuckets.size();
 	bucketT colorIndex, colorIndexFrac;
 	const glm::detail::tvec4<bucketT, glm::defaultp>* dmap = &(palette->m_Entries[0]);
+	//T oneColDiv2 = m_CarToRas.OneCol() / 2;
+	//T oneRowDiv2 = m_CarToRas.OneRow() / 2;

 	//It's critical to understand what's going on here as it's one of the most important parts of the algorithm.
 	//A color value gets retrieved from the palette and
@ -1413,24 +1420,37 @@ void Renderer<T, bucketT>::Accumulate(Point<T>* samples, size_t sampleCount, con
 	//Splitting these conditionals into separate loops makes no speed difference.
 	for (size_t i = 0; i < sampleCount && !m_Abort; i++)
 	{
+		Point<T> p(samples[i]);//Slightly faster to cache this.
+
 		if (Rotate() != 0)
 		{
-			T p00 = samples[i].m_X - CenterX();
-			T p11 = samples[i].m_Y - m_Ember.m_RotCenterY;
+			T p00 = p.m_X - CenterX();
+			T p11 = p.m_Y - m_Ember.m_RotCenterY;

-			samples[i].m_X = (p00 * m_RotMat.A()) + (p11 * m_RotMat.B()) + CenterX();
-			samples[i].m_Y = (p00 * m_RotMat.D()) + (p11 * m_RotMat.E()) + m_Ember.m_RotCenterY;
+			p.m_X = (p00 * m_RotMat.A()) + (p11 * m_RotMat.B()) + CenterX();
+			p.m_Y = (p00 * m_RotMat.D()) + (p11 * m_RotMat.E()) + m_Ember.m_RotCenterY;
 		}

+		//T angle = rand.Frand01<T>() * M_2PI;
+		//T r = exp(T(0.5) * sqrt(-log(rand.Frand01<T>()))) - 1;
+
+		//T r = (rand.Frand01<T>() + rand.Frand01<T>() - 1);
+		//T r = (rand.Frand01<T>() + rand.Frand01<T>() + rand.Frand01<T>() + rand.Frand01<T>() - 2);
+
+		//p.m_X += (r * oneColDiv2) * cos(angle);
+		//p.m_Y += (r * oneRowDiv2) * sin(angle);
+		//p.m_X += r * cos(angle);
+		//p.m_Y += r * sin(angle);
+
 		//Checking this first before converting gives better performance than converting and checking a single value, which the original did.
 		//Second, an interesting optimization observation is that when keeping the bounds vars within m_CarToRas and calling its InBounds() member function,
 		//rather than here as members, about a 7% speedup is achieved. This is possibly due to the fact that data from m_CarToRas is accessed
 		//right after the call to Convert(), so some caching efficiencies get realized.
-		if (m_CarToRas.InBounds(samples[i]))
+		if (m_CarToRas.InBounds(p))
 		{
-			if (samples[i].m_VizAdjusted != 0)
+			if (p.m_VizAdjusted != 0)
 			{
-				m_CarToRas.Convert(samples[i], histIndex);
+				m_CarToRas.Convert(p, histIndex);

 				//There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test,
 				//but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform one final check before proceeding.
@ -1445,7 +1465,7 @@ void Renderer<T, bucketT>::Accumulate(Point<T>* samples, size_t sampleCount, con
 					//Use overloaded addition and multiplication operators in vec4 to perform the accumulation.
 					if (PaletteMode() == PALETTE_LINEAR)
 					{
-						colorIndex = (bucketT)samples[i].m_ColorX * COLORMAP_LENGTH;
+						colorIndex = (bucketT)p.m_ColorX * COLORMAP_LENGTH;
 						intColorIndex = (size_t)colorIndex;

 						if (intColorIndex < 0)
@ -1463,19 +1483,19 @@ void Renderer<T, bucketT>::Accumulate(Point<T>* samples, size_t sampleCount, con
 							colorIndexFrac = colorIndex - (bucketT)intColorIndex;//Interpolate between intColorIndex and intColorIndex + 1.
 						}

-						if (samples[i].m_VizAdjusted == 1)
+						if (p.m_VizAdjusted == 1)
 							m_HistBuckets[histIndex] += ((dmap[intColorIndex] * (1 - colorIndexFrac)) + (dmap[intColorIndex + 1] * colorIndexFrac));
 						else
-							m_HistBuckets[histIndex] += (((dmap[intColorIndex] * (1 - colorIndexFrac)) + (dmap[intColorIndex + 1] * colorIndexFrac)) * (bucketT)samples[i].m_VizAdjusted);
+							m_HistBuckets[histIndex] += (((dmap[intColorIndex] * (1 - colorIndexFrac)) + (dmap[intColorIndex + 1] * colorIndexFrac)) * (bucketT)p.m_VizAdjusted);
 					}
 					else if (PaletteMode() == PALETTE_STEP)
 					{
-						intColorIndex = Clamp<size_t>((size_t)(samples[i].m_ColorX * COLORMAP_LENGTH), 0, COLORMAP_LENGTH_MINUS_1);
+						intColorIndex = Clamp<size_t>((size_t)(p.m_ColorX * COLORMAP_LENGTH), 0, COLORMAP_LENGTH_MINUS_1);

-						if (samples[i].m_VizAdjusted == 1)
+						if (p.m_VizAdjusted == 1)
 							m_HistBuckets[histIndex] += dmap[intColorIndex];
 						else
-							m_HistBuckets[histIndex] += (dmap[intColorIndex] * (bucketT)samples[i].m_VizAdjusted);
+							m_HistBuckets[histIndex] += (dmap[intColorIndex] * (bucketT)p.m_VizAdjusted);
 					}
 				}
 			}
--- a/Source/Ember/Renderer.h
+++ b/Source/Ember/Renderer.h
@ -134,6 +134,8 @@ public:
 	virtual size_t TemporalSamples() const override;
 	virtual size_t FinalRasW()       const override;
 	virtual size_t FinalRasH()       const override;
+	virtual size_t SubBatchSize()    const override;
+	virtual size_t FuseCount()		 const override;

 	//Non-virtual iterator wrappers.
 	const unsigned char* XformDistributions()		 const;
@ -144,9 +146,9 @@ protected:
 	//Non-virtual functions that might be needed by a derived class.
 	void PrepFinalAccumVals(Color<T>& background, T& g, T& linRange, T& vibrancy);

-private:
+	private:
 	//Miscellaneous non-virtual functions used only in this class.
-	void Accumulate(Point<T>* samples, size_t sampleCount, const Palette<bucketT>* palette);
+	void Accumulate(QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand, Point<T>* samples, size_t sampleCount, const Palette<bucketT>* palette);
 	/*inline*/ void AddToAccum(const glm::detail::tvec4<bucketT, glm::defaultp>& bucket, intmax_t i, intmax_t ii, intmax_t j, intmax_t jj);
 	template <typename accumT> void GammaCorrection(glm::detail::tvec4<bucketT, glm::defaultp>& bucket, Color<T>& background, T g, T linRange, T vibrancy, bool doAlpha, bool scale, accumT* correctedChannels);

--- a/Source/Ember/RendererBase.cpp
+++ b/Source/Ember/RendererBase.cpp
@ -15,7 +15,6 @@ RendererBase::RendererBase()
 	m_YAxisUp = false;
 	m_InsertPalette = false;
 	m_ReclaimOnResize = false;
-	m_SubBatchSize = 1024 * 10;
 	m_NumChannels = 3;
 	m_BytesPerChannel = 1;
 	m_SuperSize = 0;
@ -412,17 +411,6 @@ void RendererBase::Transparency(bool transparency)
 	ChangeVal([&] { m_Transparency = transparency; }, ACCUM_ONLY);
 }

-/// <summary>
-/// Set the sub batch size. This is the size of of the chunks that the iteration
-/// trajectory will be broken up into.
-/// Reset the rendering process.
-/// </summary>
-/// <param name="sbs">The sub batch size to set</param>
-void RendererBase::SubBatchSize(size_t sbs)
-{
-	ChangeVal([&] { m_SubBatchSize = sbs; }, FULL_RENDER);
-}
-
 /// <summary>
 /// Set the callback object.
 /// </summary>
@ -583,14 +571,6 @@ void RendererBase::NumChannels(size_t numChannels)
 /// <returns>The number of threads used when rendering</returns>
 size_t RendererBase::ThreadCount() const { return m_ThreadsToUse; }

-/// <summary>
-/// Get the sub batch size. This is the size of of the chunks that the iteration
-/// trajectory will be broken up into.
-/// Default: 10k.
-/// </summary>
-/// <returns>The sub batch size</returns>
-size_t RendererBase::SubBatchSize() const { return m_SubBatchSize; }
-
 /// <summary>
 /// Get the renderer type enum.
 /// CPU_RENDERER for this class, other values for derived classes.
--- a/Source/Ember/RendererBase.h
+++ b/Source/Ember/RendererBase.h
@ -149,7 +149,6 @@ public:
 	void ReclaimOnResize(bool reclaimOnResize);
 	bool Transparency() const;
 	void Transparency(bool transparency);
-	void SubBatchSize(size_t subBatchSize);
 	void Callback(RenderCallback* callback);
 	void ThreadCount(size_t threads, const char* seedString = nullptr);
 	size_t BytesPerChannel() const;
@ -161,7 +160,6 @@ public:
 	//Virtual render properties, getters and setters.
 	virtual void NumChannels(size_t numChannels);
 	virtual size_t ThreadCount()   const;
-	virtual size_t SubBatchSize()  const;
 	virtual eRendererType RendererType() const;

 	//Abstract render properties, getters only.
@ -169,6 +167,8 @@ public:
 	virtual size_t HistBucketSize()				   const = 0;
 	virtual size_t FinalRasW()		               const = 0;
 	virtual size_t FinalRasH()					   const = 0;
+	virtual size_t SubBatchSize()				   const = 0;
+	virtual size_t FuseCount()					   const = 0;
 	virtual double ScaledQuality()                 const = 0;
 	virtual double LowerLeftX(bool  gutter = true) const = 0;
 	virtual double LowerLeftY(bool  gutter = true) const = 0;
@ -207,10 +207,8 @@ protected:
 	size_t m_DensityFilterOffset;
 	size_t m_NumChannels;
 	size_t m_BytesPerChannel;
-	size_t m_SubBatchSize;
 	size_t m_ThreadsToUse;
 	size_t m_VibGamCount;
-	size_t m_LastPass;
 	size_t m_LastTemporalSample;
 	double m_LastIterPercent;
 	size_t m_LastIter;
--- a/Source/Ember/SheepTools.h
+++ b/Source/Ember/SheepTools.h
@ -879,7 +879,6 @@ public:
 		m_Renderer->EarlyClip(true);
 		m_Renderer->PixelAspectRatio(1);
 		m_Renderer->ThreadCount(Timing::ProcessorCount());
-		m_Renderer->SubBatchSize(10000);
 		m_Renderer->Callback(nullptr);

 		if (m_Renderer->Run(m_FinalImage) != RENDER_OK)
@ -1280,8 +1279,16 @@ public:
 	/// <returns>The number of iterations ran</returns>
 	size_t EstimateBoundingBox(Ember<T>& ember, T eps, size_t samples, T* bmin, T* bmax)
 	{
+		bool newAlloc = false;
 		size_t i, lowTarget, highTarget;
 		T min[2], max[2];
+		IterParams<T> params;
+
+		m_Renderer->SetEmber(ember);
+		m_Renderer->CreateSpatialFilter(newAlloc);
+		m_Renderer->CreateDEFilter(newAlloc);
+		m_Renderer->ComputeBounds();
+		m_Renderer->ComputeCamera();

 		if (ember.XaosPresent())
 			m_Iterator = m_XaosIterator.get();
@ -1290,8 +1297,12 @@ public:

 		m_Iterator->InitDistributions(ember);
 		m_Samples.resize(samples);
+		params.m_Count = samples;
+		params.m_Skip = 20;
+		//params.m_OneColDiv2 = m_Renderer->CoordMap()->OneCol() / 2;
+		//params.m_OneRowDiv2 = m_Renderer->CoordMap()->OneRow() / 2;

-		size_t bv = m_Iterator->Iterate(ember, samples, 20, m_Samples.data(), m_Rand);//Use a special fuse of 20, all other calls to this will use 15, or 100.
+		size_t bv = m_Iterator->Iterate(ember, params, m_Samples.data(), m_Rand);//Use a special fuse of 20, all other calls to this will use 15, or 100.

 		if (bv / T(samples) > eps)
 			eps = 3 * bv / T(samples);
--- a/Source/Ember/Utils.h
+++ b/Source/Ember/Utils.h
@ -33,6 +33,17 @@ static inline void ForEach(c& container, fn func)
 	std::for_each(container.begin(), container.end(), func);
 }

+/// <summary>
+/// Thin wrapper around computing the total size of a vector.
+/// </summary>
+/// <param name="vec">The vector to compute the size of</param>
+/// <returns>The size of one element times the length.</returns>
+template<typename T>
+static inline size_t SizeOf(vector<T>& vec)
+{
+	return sizeof(vec[0]) * vec.size();
+}
+
 /// <summary>
 /// After a run completes, information about what was run can be saved as strings to the comments
 /// section of a jpg or png file. This class is just a container for those values.
@ -276,7 +287,7 @@ static void ClearVec(vector<T*>& vec, bool arrayDelete = false)
 template<typename T>
 static inline void Memset(vector<T>& vec, int val = 0)
 {
-	memset((void*)vec.data(), val, vec.size() * sizeof(vec[0]));
+	memset((void*)vec.data(), val, SizeOf(vec));
 }

 /// <summary>
--- a/Source/Ember/XmlToEmber.h
+++ b/Source/Ember/XmlToEmber.h
@ -593,6 +593,8 @@ private:
 			else if (ParseAndAssignInt(curAtt->name, attStr, "oversample",       currentEmber.m_Supersample    , ret)) { }
 			else if (ParseAndAssignInt(curAtt->name, attStr, "supersample",      currentEmber.m_Supersample    , ret)) { }
 			else if (ParseAndAssignInt(curAtt->name, attStr, "temporal_samples", currentEmber.m_TemporalSamples, ret)) { }
+			else if (ParseAndAssignInt(curAtt->name, attStr, "sub_batch_size",	 currentEmber.m_SubBatchSize   , ret)) { }
+			else if (ParseAndAssignInt(curAtt->name, attStr, "fuse",			 currentEmber.m_FuseCount	   , ret)) { }
 			else if (ParseAndAssignInt(curAtt->name, attStr, "soloxform",		 soloXform                     , ret)) { }
 			else if (ParseAndAssignInt(curAtt->name, attStr, "new_linear",		 newLinear					   , ret)) { }