2014-07-08 03:11:14 -04:00
# include "EmberPch.h"
# include "Renderer.h"
namespace EmberNs
{
/// <summary>
/// Constructor that sets default values and allocates iterators.
/// The thread count is set to the number of cores detected on the system.
/// </summary>
template < typename T , typename bucketT >
Renderer < T , bucketT > : : Renderer ( )
{
m_Abort = false ;
m_LockAccum = false ;
m_EarlyClip = false ;
2014-07-26 15:03:51 -04:00
m_YAxisUp = false ;
2014-07-08 03:11:14 -04:00
m_InsertPalette = false ;
m_ReclaimOnResize = false ;
m_SubBatchSize = 1024 * 10 ;
m_NumChannels = 3 ;
m_BytesPerChannel = 1 ;
m_SuperSize = 0 ;
m_PixelAspectRatio = 1 ;
m_Transparency = false ;
ThreadCount ( Timing : : ProcessorCount ( ) ) ;
m_StandardIterator = auto_ptr < StandardIterator < T > > ( new StandardIterator < T > ( ) ) ;
m_XaosIterator = auto_ptr < XaosIterator < T > > ( new XaosIterator < T > ( ) ) ;
m_Iterator = m_StandardIterator . get ( ) ;
m_Callback = NULL ;
m_ProgressParameter = NULL ;
m_LastPass = 0 ;
m_LastTemporalSample = 0 ;
m_LastIter = 0 ;
m_LastIterPercent = 0 ;
m_InteractiveFilter = FILTER_LOG ;
m_ProcessState = NONE ;
m_ProcessAction = FULL_RENDER ;
m_InRender = false ;
m_InFinalAccum = false ;
}
/// <summary>
/// Virtual destructor so derived class destructors get called.
/// </summary>
template < typename T , typename bucketT >
Renderer < T , bucketT > : : ~ Renderer ( )
{
}
/// <summary>
/// Compute the bounds of the histogram and density filtering buffers.
/// These are affected by the final requested dimensions, spatial and density
/// filter sizes and supersampling.
/// </summary>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : ComputeBounds ( )
{
unsigned int maxDEFilterWidth = 0 ;
m_GutterWidth = ClampGte ( ( m_SpatialFilter - > FinalFilterWidth ( ) - Supersample ( ) ) / 2 , 0u ) ;
//Check the size of the density estimation filter.
//If the radius of the density estimation filter is greater than the
//gutter width, have to pad with more. Otherwise, use the same value.
for ( unsigned int i = 0 ; i < m_Embers . size ( ) ; i + + )
maxDEFilterWidth = max ( ( unsigned int ) ( ceil ( m_Embers [ i ] . m_MaxRadDE ) * m_Ember . m_Supersample ) , maxDEFilterWidth ) ;
//Need an extra ss = (int)floor(m_Supersample / 2.0) of pixels so that a local iteration count for DE can be determined.//SMOULDER
if ( maxDEFilterWidth > 0 )
maxDEFilterWidth + = ( unsigned int ) Floor < T > ( m_Ember . m_Supersample / T ( 2 ) ) ;
//To have a fully present set of pixels for the spatial filter, must
//add the DE filter width to the spatial filter width.//SMOULDER
m_DensityFilterOffset = maxDEFilterWidth ;
m_GutterWidth + = m_DensityFilterOffset ;
m_SuperRasW = ( Supersample ( ) * FinalRasW ( ) ) + ( 2 * m_GutterWidth ) ;
m_SuperRasH = ( Supersample ( ) * FinalRasH ( ) ) + ( 2 * m_GutterWidth ) ;
m_SuperSize = m_SuperRasW * m_SuperRasH ;
}
/// <summary>
/// Compute the camera.
/// This sets up the bounds of the cartesian plane that the raster bounds correspond to.
/// This must be called after ComputeBounds() which sets up the raster bounds.
/// </summary>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : ComputeCamera ( )
{
m_Scale = pow ( T ( 2.0 ) , Zoom ( ) ) ;
m_ScaledQuality = Quality ( ) * m_Scale * m_Scale ;
m_PixelsPerUnitX = PixelsPerUnit ( ) * m_Scale ;
m_PixelsPerUnitY = m_PixelsPerUnitX ;
m_PixelsPerUnitX / = PixelAspectRatio ( ) ;
T shift = 0 ;
T t0 = T ( m_GutterWidth ) / ( Supersample ( ) * m_PixelsPerUnitX ) ;
T t1 = T ( m_GutterWidth ) / ( Supersample ( ) * m_PixelsPerUnitY ) ;
//These go from ll to ur, moving from negative to positive.
m_LowerLeftX = CenterX ( ) - FinalRasW ( ) / m_PixelsPerUnitX / T ( 2.0 ) ;
m_LowerLeftY = CenterY ( ) - FinalRasH ( ) / m_PixelsPerUnitY / T ( 2.0 ) ;
m_UpperRightX = m_LowerLeftX + FinalRasW ( ) / m_PixelsPerUnitX ;
m_UpperRightY = m_LowerLeftY + FinalRasH ( ) / m_PixelsPerUnitY ;
T carLlX = m_LowerLeftX - t0 ;
T carLlY = m_LowerLeftY - t1 + shift ;
T carUrX = m_UpperRightX + t0 ;
T carUrY = m_UpperRightY + t1 + shift ;
m_RotMat . MakeID ( ) ;
m_RotMat . Rotate ( - Rotate ( ) ) ;
m_CarToRas . Init ( carLlX , carLlY , carUrX , carUrY , m_SuperRasW , m_SuperRasH , PixelAspectRatio ( ) ) ;
}
/// <summary>
/// Abort the render and call a function to do something, most likely change a value.
/// Then update the current process action to the one specified.
/// The current process action will only be set if it makes sense based
/// on the current process state. If the value specified doesn't make sense
/// the next best choice will be made. If nothing makes sense, a complete
/// re-render will be triggered on the next call to Run().
/// </summary>
/// <param name="func">The function to execute</param>
/// <param name="action">The desired process action</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : ChangeVal ( std : : function < void ( void ) > func , eProcessAction action )
{
Abort ( ) ;
EnterRender ( ) ;
func ( ) ;
//If they want a full render, don't bother inspecting process state, just start over.
if ( action = = FULL_RENDER )
{
m_ProcessState = NONE ;
m_ProcessAction = FULL_RENDER ;
}
//Keep iterating is when rendering has completed and the user increases the quality.
//Rendering can be started where it left off by adding just the difference between the
//new and old quality values.
else if ( action = = KEEP_ITERATING )
{
if ( m_ProcessState = = ACCUM_DONE & & TemporalSamples ( ) = = 1 & & Passes ( ) = = 1 )
{
m_ProcessState = ITER_STARTED ;
m_ProcessAction = KEEP_ITERATING ;
}
else //Invaid process state to handle KEEP_ITERATING, so just start over.
{
m_ProcessState = NONE ;
m_ProcessAction = FULL_RENDER ;
}
}
else if ( action = = FILTER_AND_ACCUM )
{
//If in the middle of a render, cannot skip to filtering or accum, so just start over.
if ( m_ProcessState = = NONE | | m_ProcessState = = ITER_STARTED )
{
m_ProcessState = NONE ;
m_ProcessAction = FULL_RENDER ;
}
//If passes == 1, set the state to ITER_DONE and the next process action to FILTER_AND_ACCUM.
else
{
m_ProcessState = Passes ( ) = = 1 ? ITER_DONE : NONE ;
m_ProcessAction = Passes ( ) = = 1 ? FILTER_AND_ACCUM : FULL_RENDER ; //Cannot just filter if passes > 1 because filtering is done with each pass.
}
}
//Run accum only.
else if ( action = = ACCUM_ONLY )
{
//Doesn't make sense if in the middle of iterating, so just start over.
if ( m_ProcessState = = NONE | | m_ProcessState = = ITER_STARTED )
{
m_ProcessAction = FULL_RENDER ;
}
else if ( m_ProcessState = = ITER_DONE ) //If iterating is done, can start at density filtering and proceed.
{
m_ProcessAction = FILTER_AND_ACCUM ;
}
else if ( m_ProcessState = = FILTER_DONE ) //Density filtering is done, so the process action is assigned as desired.
{
m_ProcessAction = ACCUM_ONLY ;
}
else if ( m_ProcessState = = ACCUM_DONE ) //Final accum is done, so back up and run final accum again.
{
m_ProcessState = FILTER_DONE ;
m_ProcessAction = ACCUM_ONLY ;
}
}
LeaveRender ( ) ;
}
/// <summary>
/// Set the current ember.
/// This will also populate the vector of embers with a single element copy
/// of the ember passed in.
/// Temporal samples will be set to 1 since there's only a single ember.
/// </summary>
/// <param name="ember">The ember to assign</param>
/// <param name="action">The requested process action. Note that it's critical the user supply the proper value here.
/// For example: Changing dimensions without setting action to FULL_RENDER will crash the program.
/// However, changing only the brightness and setting action to ACCUM_ONLY is perfectly fine.
/// </param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : SetEmber ( Ember < T > & ember , eProcessAction action )
{
ChangeVal ( [ & ]
{
m_Embers . clear ( ) ;
m_Embers . push_back ( ember ) ;
m_Embers [ 0 ] . m_TemporalSamples = 1 ; //Set temporal samples here to 1 because using the real value only makes sense when using a vector of Embers for animation.
m_Ember = m_Embers [ 0 ] ;
} , action ) ;
}
/// <summary>
/// Set the vector of embers and set the m_Ember member to a copy of the first element.
/// Reset the rendering process.
/// </summary>
/// <param name="embers">The vector of embers</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : SetEmber ( vector < Ember < T > > & embers )
{
ChangeVal ( [ & ]
{
m_Embers = embers ;
if ( ! m_Embers . empty ( ) )
m_Ember = m_Embers [ 0 ] ;
} , FULL_RENDER ) ;
}
/// <summary>
/// Add an ember to the end of the embers vector and reset the rendering process.
/// Reset the rendering process.
/// </summary>
/// <param name="ember">The ember to add</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : AddEmber ( Ember < T > & ember )
{
ChangeVal ( [ & ]
{
m_Embers . push_back ( ember ) ;
if ( m_Embers . size ( ) = = 1 )
m_Ember = m_Embers [ 0 ] ;
} , FULL_RENDER ) ;
}
/// <summary>
/// Create the temporal filter if the current filter parameters differ
/// from the last temporal filter created.
/// </summary>
/// <param name="newAlloc">True if a new filter instance was created, else false.</param>
/// <returns>True if the filter is not NULL (whether a new one was created or not), else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : CreateTemporalFilter ( bool & newAlloc )
{
newAlloc = false ;
//Use intelligent testing so it isn't created every time a new ember is passed in.
2014-08-03 19:16:10 -04:00
if ( ( ! m_TemporalFilter . get ( ) ) | |
( m_Ember . m_Passes ! = m_TemporalFilter - > Passes ( ) ) | |
( m_Ember . m_TemporalSamples ! = m_TemporalFilter - > TemporalSamples ( ) ) | |
2014-07-08 03:11:14 -04:00
( m_Ember . m_TemporalFilterType ! = m_TemporalFilter - > FilterType ( ) ) | |
2014-08-03 19:16:10 -04:00
( m_Ember . m_TemporalFilterWidth ! = m_TemporalFilter - > FilterWidth ( ) ) | |
( m_Ember . m_TemporalFilterExp ! = m_TemporalFilter - > FilterExp ( ) ) )
2014-07-08 03:11:14 -04:00
{
m_TemporalFilter = auto_ptr < TemporalFilter < T > > (
TemporalFilterCreator < T > : : Create ( m_Ember . m_TemporalFilterType , m_Ember . m_Passes , m_Ember . m_TemporalSamples , m_Ember . m_TemporalFilterWidth , m_Ember . m_TemporalFilterExp ) ) ;
newAlloc = true ;
}
return m_TemporalFilter . get ( ) ! = NULL ;
}
/// <summary>
/// Resize the passed in vector to be large enough to handle the output image.
/// If m_ReclaimOnResize is true, and the vector is already larger than needed,
/// it will be shrunk to the needed size. However if m_ReclaimOnResize is false,
/// it will be left alone if already large enough.
/// ComputeBounds() must be called before calling this function.
/// </summary>
/// <param name="pixels">The vector to allocate</param>
/// <returns>True if the vector contains enough space to hold the output image</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : PrepFinalAccumVector ( vector < unsigned char > & pixels )
{
EnterResize ( ) ;
size_t size = FinalBufferSize ( ) ;
if ( m_ReclaimOnResize )
{
if ( pixels . size ( ) ! = size )
2014-08-03 19:16:10 -04:00
{
2014-07-08 03:11:14 -04:00
pixels . resize ( size ) ;
pixels . shrink_to_fit ( ) ;
2014-08-03 19:16:10 -04:00
}
2014-07-08 03:11:14 -04:00
}
else
{
if ( pixels . size ( ) < size )
pixels . resize ( size ) ;
}
LeaveResize ( ) ;
return pixels . size ( ) > = size ; //Ensure allocation went ok.
}
/// <summary>
/// The main render loop. This is the core of the algorithm.
/// The processing steps are: Iterating, density filtering, final accumulation.
/// Various functions in it are virtual so they will resolve
/// to whatever overrides are provided in derived classes. This
/// future-proofs the algorithm for GPU-based renderers.
/// If the caller calls Abort() at any time, or the progress function returns 0,
/// the entire rendering process will exit as soon as it can.
/// The loop structure is:
/// {
/// Passes (Default 1)
/// {
/// Temporal Samples (Default 1 for single image)
/// {
/// Iterate (Either to completion or to a specified number of iterations)
/// {
/// }
/// }
/// }
///
/// Density filtering (Basic log, or full density estimation)
/// Final accumulation (Color correction and spatial filtering)
/// }
/// This loop structure has admittedly been severely butchered from what
/// flam3 did. The reason is that it was made to support interactive rendering
/// that can exit the process and pick up where it left off in response to the
/// user changing values in a fractal flame GUI editor.
/// To achieve this, each step in the rendering process is given an enumeration state
/// as well as a goto label. This allows the renderer to pick up in the state it left
/// off in if no changes prohibiting that have been made.
/// It also allows for the bare minimum amount of processing needed to complete the requested
/// action. For example, if the process has completed and the user only adjusts the brightness
/// of the last rendered ember then there is no need to perform the entire iteration process
/// over again. Rather, only final accumulation is needed.
/// </summary>
/// <param name="finalImage">Storage for the final image. It will be allocated if needed.</param>
/// <param name="time">The time if animating, else ignored.</param>
/// <param name="subBatchCountOverride">Run a specified number of sub batches. Default: 0, meaning run to completion.</param>
/// <param name="forceOutput">True to force rendering a complete image even if iterating is not complete, else don't. Default: false.</param>
/// <param name="finalOffset">Offset in finalImage to store the pixels to. Default: 0.</param>
/// <returns>True if nothing went wrong, else false.</returns>
template < typename T , typename bucketT >
eRenderStatus Renderer < T , bucketT > : : Run ( vector < unsigned char > & finalImage , double time , unsigned int subBatchCountOverride , bool forceOutput , size_t finalOffset )
{
m_InRender = true ;
EnterRender ( ) ;
m_Abort = false ;
bool filterAndAccumOnly = ( m_ProcessAction = = FILTER_AND_ACCUM & & Passes ( ) = = 1 ) ;
bool accumOnly = m_ProcessAction = = ACCUM_ONLY ;
bool resume = m_ProcessState ! = NONE ;
bool newFilterAlloc ;
unsigned int temporalSample , pass ;
2014-09-01 00:25:15 -04:00
T deTime ;
2014-07-08 03:11:14 -04:00
eRenderStatus success = RENDER_OK ;
2014-09-01 00:25:15 -04:00
//double iterationTime = 0;
//double accumulationTime = 0;
2014-07-08 03:11:14 -04:00
//Timing it;
//Reset timers and progress percent if: Beginning anew or only filtering and/or accumulating.
if ( ! resume | | accumOnly | | filterAndAccumOnly )
{
if ( ! resume ) //Only set this if it's the first run through.
m_ProcessState = ITER_STARTED ;
m_RenderTimer . Tic ( ) ;
m_ProgressTimer . Tic ( ) ;
}
if ( ! resume ) //Beginning, reset everything.
{
m_LastPass = 0 ;
m_LastTemporalSample = 0 ;
m_LastIter = 0 ;
m_LastIterPercent = 0 ;
2014-08-06 00:50:52 -04:00
m_Stats . Clear ( ) ;
2014-07-08 03:11:14 -04:00
m_Gamma = 0 ;
m_Vibrancy = 0 ; //Accumulate these after each temporal sample.
m_VibGamCount = 0 ;
m_Background . Clear ( ) ;
}
//User requested an increase in quality after finishing.
else if ( m_ProcessState = = ITER_STARTED & & m_ProcessAction = = KEEP_ITERATING & & TemporalSamples ( ) = = 1 & & Passes ( ) = = 1 )
{
m_LastPass = 0 ;
m_LastTemporalSample = 0 ;
m_LastIter = m_Stats . m_Iters ;
m_LastIterPercent = 0 ; //Might skip a progress update, but shouldn't matter.
m_Gamma = 0 ;
m_Vibrancy = 0 ;
m_VibGamCount = 0 ;
m_Background . Clear ( ) ;
}
pass = ( resume ? m_LastPass : 0 ) ;
//Make sure values are within valid range.
ClampGteRef ( m_Ember . m_Passes , 1u ) ;
ClampGteRef ( m_Ember . m_Supersample , 1u ) ;
//Make sure to get most recent update since loop won't be entered to call Interp().
//Vib, gam and background are normally summed for each temporal sample. However if iteration is skipped, make sure to get the latest.
if ( ( filterAndAccumOnly | | accumOnly ) & & TemporalSamples ( ) = = 1 ) //Disallow jumping when temporal samples > 1.
{
m_Ember = m_Embers [ 0 ] ;
m_Vibrancy = m_Ember . m_Vibrancy ;
m_Gamma = m_Ember . m_Gamma ;
m_Background = m_Ember . m_Background ;
if ( filterAndAccumOnly )
goto FilterAndAccum ;
if ( accumOnly )
goto AccumOnly ;
}
//it.Tic();
//Interpolate.
if ( m_Embers . size ( ) > 1 )
Interpolater < T > : : Interpolate ( m_Embers , T ( time ) , 0 , m_Ember ) ;
//it.Toc("Interp 1");
//Save only for palette insertion.
if ( m_InsertPalette & & BytesPerChannel ( ) = = 1 )
m_TempEmber = m_Ember ;
//Field would go here, however Ember omits it. Would need temps for width and height if ever implemented.
CreateSpatialFilter ( newFilterAlloc ) ;
CreateTemporalFilter ( newFilterAlloc ) ;
ComputeBounds ( ) ;
if ( m_SpatialFilter . get ( ) = = NULL | | m_TemporalFilter . get ( ) = = NULL )
{
m_ErrorReport . push_back ( " Spatial and temporal filter allocations failed, aborting. \n " ) ;
success = RENDER_ERROR ;
goto Finish ;
}
if ( ! resume & & ! Alloc ( ) )
{
m_ErrorReport . push_back ( " Histogram, accumulator and samples buffer allocations failed, aborting. \n " ) ;
success = RENDER_ERROR ;
goto Finish ;
}
if ( ! resume )
ResetBuckets ( true , false ) ; //Only reset hist here and do accum when needed later on.
//Passes, outermost loop 1.
for ( ; ( pass < Passes ( ) ) & & ! m_Abort ; )
{
2014-09-01 00:25:15 -04:00
deTime = T ( time ) + m_TemporalFilter - > Deltas ( ) [ pass * m_Ember . m_TemporalSamples ] ;
2014-07-08 03:11:14 -04:00
//Interpolate and get an ember for DE purposes.
//Additional interpolation will be done in the temporal samples loop.
//it.Tic();
if ( m_Embers . size ( ) > 1 )
Interpolater < T > : : Interpolate ( m_Embers , deTime , 0 , m_Ember ) ;
//it.Toc("Interp 2");
ClampGte < T > ( m_Ember . m_MinRadDE , 0 ) ;
ClampGte < T > ( m_Ember . m_MaxRadDE , 0 ) ;
if ( ! CreateDEFilter ( newFilterAlloc ) )
{
m_ErrorReport . push_back ( " Density filter creation failed, aborting. \n " ) ;
success = RENDER_ERROR ;
goto Finish ;
}
//Temporal samples, loop 2.
temporalSample = resume ? m_LastTemporalSample : 0 ;
for ( ; ( temporalSample < TemporalSamples ( ) ) & & ! m_Abort ; )
{
T colorScalar = m_TemporalFilter - > Filter ( ) [ pass * TemporalSamples ( ) + temporalSample ] ;
T temporalTime = T ( time ) + m_TemporalFilter - > Deltas ( ) [ pass * TemporalSamples ( ) + temporalSample ] ;
//Interpolate again.
//it.Tic();
if ( m_Embers . size ( ) > 1 )
Interpolater < T > : : Interpolate ( m_Embers , temporalTime , 0 , m_Ember ) ; //This will perform all necessary precalcs via the ember/xform/variation assignment operators.
//it.Toc("Interp 3");
if ( ! resume & & ! AssignIterator ( ) )
{
m_ErrorReport . push_back ( " Iterator assignment failed, aborting. \n " ) ;
success = RENDER_ERROR ;
goto Finish ;
}
ComputeCamera ( ) ;
//For each temporal sample, the palette m_Dmap needs to be re-created with color scalar. 1 if no temporal samples.
MakeDmap ( colorScalar ) ;
//The actual number of times to iterate. Each thread will get (totalIters / ThreadCount) iters to do.
//This is based on zoom and scale calculated in ComputeCamera().
//Note that the iter count is based on the final image dimensions, and not the super sampled dimensions.
2014-09-01 00:25:15 -04:00
uint64_t totalIterCount = TotalIterCount ( ) ;
uint64_t itersPerTemporalSample = ItersPerTemporalSample ( ) ; //The total number of iterations for this temporal sample in this pass without overrides.
uint64_t sampleItersToDo ; //The number of iterations to actually do in this sample in this pass, considering overrides.
2014-07-08 03:11:14 -04:00
if ( subBatchCountOverride > 0 )
sampleItersToDo = subBatchCountOverride * SubBatchSize ( ) * ThreadCount ( ) ; //Run a specific number of sub batches.
else
sampleItersToDo = itersPerTemporalSample ; //Run as many iters as specified to complete this temporal sample.
sampleItersToDo = min ( sampleItersToDo , itersPerTemporalSample - m_LastIter ) ;
EmberStats stats = Iterate ( sampleItersToDo , pass , temporalSample ) ; //The heavy work is done here.
//If no iters were executed, something went catastrophically wrong.
if ( stats . m_Iters = = 0 )
{
m_ErrorReport . push_back ( " Zero iterations ran, rendering failed, aborting. \n " ) ;
success = RENDER_ERROR ;
Abort ( ) ;
goto Finish ;
}
if ( m_Abort )
{
success = RENDER_ABORT ;
goto Finish ;
}
//Accumulate stats whether this batch ran to completion or exited prematurely.
m_LastIter + = stats . m_Iters ; //Sum of iter count of all threads, reset each temporal sample.
m_Stats . m_Iters + = stats . m_Iters ; //Sum of iter count of all threads, cumulative from beginning to end.
m_Stats . m_Badvals + = stats . m_Badvals ;
2014-08-06 00:50:52 -04:00
m_Stats . m_IterMs + = stats . m_IterMs ;
2014-07-08 03:11:14 -04:00
//After each temporal sample, accumulate these.
//Allow for incremental rendering by only taking action if the iter loop for this temporal sample is completely done.
if ( m_LastIter > = itersPerTemporalSample )
{
m_Vibrancy + = m_Ember . m_Vibrancy ;
m_Gamma + = m_Ember . m_Gamma ;
m_Background . r + = m_Ember . m_Background . r ;
m_Background . g + = m_Ember . m_Background . g ;
m_Background . b + = m_Ember . m_Background . b ;
m_VibGamCount + + ;
m_LastIter = 0 ;
temporalSample + + ;
}
m_LastTemporalSample = temporalSample ;
if ( subBatchCountOverride > 0 ) //Don't keep going through this loop if only doing an incremental render.
break ;
} //Temporal samples.
//If we've completed all temporal samples and all passes, then it was a complete render, so report progress.
if ( ( Passes ( ) = = 1 | | pass = = Passes ( ) - 1 ) & & ( temporalSample > = TemporalSamples ( ) ) )
{
m_ProcessState = ITER_DONE ;
if ( m_Callback & & ! m_Callback - > ProgressFunc ( m_Ember , m_ProgressParameter , 100.0 , 0 , 0 ) )
{
Abort ( ) ;
success = RENDER_ABORT ;
goto Finish ;
}
}
FilterAndAccum :
if ( filterAndAccumOnly | | temporalSample > = TemporalSamples ( ) | | forceOutput )
{
//t.Toc("Iterating and accumulating");
//Compute k1 and k2.
eRenderStatus fullRun = RENDER_OK ; //Whether density filtering was run to completion without aborting prematurely or triggering an error.
T passFilter = T ( 1 ) / T ( Passes ( ) ) ; //Original used an array, but every element in the array had the same value, so just use a single value here.
T area = FinalRasW ( ) * FinalRasH ( ) / ( m_PixelsPerUnitX * m_PixelsPerUnitY ) ; //Need to use temps from field if ever implemented.
m_K1 = ( Brightness ( ) * T ( 268.0 ) * passFilter ) / 256 ;
//When doing an interactive render, force output early on in the render process, before all iterations are done.
//This presents a problem with the normal calculation of K2 since it relies on the quality value; it will scale the colors
//to be very dark. Correct it by pretending the number of iters done is the exact quality desired and then scale according to that.
if ( forceOutput )
{
T quality = ( ( T ) m_Stats . m_Iters / ( T ) FinalDimensions ( ) ) * ( m_Scale * m_Scale ) ;
m_K2 = ( Supersample ( ) * Supersample ( ) * Passes ( ) ) / ( area * quality * m_TemporalFilter - > SumFilt ( ) ) ;
}
else
m_K2 = ( Supersample ( ) * Supersample ( ) * Passes ( ) ) / ( area * m_ScaledQuality * m_TemporalFilter - > SumFilt ( ) ) ;
if ( filterAndAccumOnly | | pass = = 0 )
ResetBuckets ( false , true ) ; //Only the histogram was reset above, now reset the density filtering buffer.
//t.Tic();
//Apply appropriate filter if iterating is complete.
if ( filterAndAccumOnly | | temporalSample > = TemporalSamples ( ) )
{
fullRun = m_DensityFilter . get ( ) ? GaussianDensityFilter ( ) : LogScaleDensityFilter ( ) ;
}
else
{
//Apply requested filter for a forced output during interactive rendering.
if ( m_DensityFilter . get ( ) & & m_InteractiveFilter = = FILTER_DE )
fullRun = GaussianDensityFilter ( ) ;
else if ( ! m_DensityFilter . get ( ) | | m_InteractiveFilter = = FILTER_LOG )
fullRun = LogScaleDensityFilter ( ) ;
}
//Only update state if iterating and filtering finished completely (didn't arrive here via forceOutput).
if ( fullRun = = RENDER_OK & & m_ProcessState = = ITER_DONE & & ( Passes ( ) = = 1 | | pass = = Passes ( ) - 1 ) )
m_ProcessState = FILTER_DONE ;
//Take special action if filtering exited prematurely.
if ( fullRun ! = RENDER_OK )
{
if ( Passes ( ) > 1 ) //Since all filtering is cummulative with passes > 1, must restart the entire process.
{
m_ProcessState = NONE ;
m_ProcessAction = FULL_RENDER ;
}
ResetBuckets ( false , true ) ; //Reset the accumulator, come back and try again on the next call.
success = fullRun ;
goto Finish ;
}
if ( m_Abort )
{
success = RENDER_ABORT ;
goto Finish ;
}
//t.Toc("Density estimation filtering time: ", true);
}
//Only increment pass if the temporal samples loop has been completed, which could have been done incrementally.
//Also skip if rendering jumped straight here after completely finishing beforehand.
if ( ! filterAndAccumOnly & & temporalSample > = TemporalSamples ( ) ) //This may not work if filtering was prematurely exited.
pass + + ;
if ( ! filterAndAccumOnly )
m_LastPass = pass ;
if ( subBatchCountOverride > 0 ) //Don't keep going through this loop if only doing an incremental render.
break ;
} //Passes.
AccumOnly :
if ( m_ProcessState = = FILTER_DONE | | forceOutput )
{
if ( m_Callback & & ! m_Callback - > ProgressFunc ( m_Ember , m_ProgressParameter , 0 , 2 , 0 ) ) //Original only allowed stages 0 and 1. Add 2 to mean final accum.
{
Abort ( ) ;
success = RENDER_ABORT ;
goto Finish ;
}
//Make sure a filter has been created.
CreateSpatialFilter ( newFilterAlloc ) ;
if ( AccumulatorToFinalImage ( finalImage , finalOffset ) = = RENDER_OK )
{
2014-08-06 00:50:52 -04:00
m_Stats . m_RenderMs = m_RenderTimer . Toc ( ) ; //Record total time from the very beginning to the very end, including all intermediate calls.
2014-07-08 03:11:14 -04:00
//Even though the ember changes throughought the inner loops because of interpolation, it's probably ok to assign here.
//This will hold the last interpolated value (even though spatial and temporal filters were created based off of one of the first interpolated values).
m_LastEmber = m_Ember ;
if ( m_ProcessState = = FILTER_DONE ) //Only update state if gotten here legitimately, and not via forceOutput.
{
m_ProcessState = ACCUM_DONE ;
if ( m_Callback & & ! m_Callback - > ProgressFunc ( m_Ember , m_ProgressParameter , 100.0 , 2 , 0 ) ) //Finished.
{
Abort ( ) ;
success = RENDER_ABORT ;
goto Finish ;
}
}
}
else
{
success = RENDER_ERROR ;
}
}
Finish :
if ( success = = RENDER_OK & & m_Abort ) //If everything ran ok, but they've aborted, record abort as the status.
success = RENDER_ABORT ;
else if ( success ! = RENDER_OK ) //Regardless of abort status, if there was an error, leave that as the return status.
Abort ( ) ;
LeaveRender ( ) ;
m_InRender = false ;
return success ;
}
/// <summary>
/// Return EmberImageComments object with image comments filled out.
/// Run() should have completed before calling this.
/// </summary>
/// <param name="printEditDepth">The depth of the edit tags</param>
/// <param name="intPalette">If true use integers instead of floating point numbers when embedding a non-hex formatted palette, else use floating point numbers.</param>
/// <param name="hexPalette">If true, embed a hexadecimal palette instead of Xml Color tags, else use Xml color tags.</param>
/// <returns>The EmberImageComments object with image comments filled out</returns>
template < typename T , typename bucketT >
EmberImageComments Renderer < T , bucketT > : : ImageComments ( unsigned int printEditDepth , bool intPalette , bool hexPalette )
{
ostringstream ss ;
EmberImageComments comments ;
ss . imbue ( std : : locale ( " " ) ) ;
comments . m_Genome = m_EmberToXml . ToString ( m_Ember , " " , printEditDepth , false , intPalette , hexPalette ) ;
ss < < ( ( double ) m_Stats . m_Badvals / ( double ) m_Stats . m_Iters ) ; //Percentage of bad values to iters.
comments . m_Badvals = ss . str ( ) ; ss . str ( " " ) ;
ss < < m_Stats . m_Iters ;
comments . m_NumIters = ss . str ( ) ; ss . str ( " " ) ; //Total iters.
2014-08-06 00:50:52 -04:00
ss < < ( m_Stats . m_RenderMs / 1000.0 ) ;
2014-07-08 03:11:14 -04:00
comments . m_Runtime = ss . str ( ) ; //Number of seconds for iterating, accumulating and filtering.
return comments ;
}
/// <summary>
/// Return the amount of memory needed to render the current ember.
/// Optionally include the memory needed for the final output image.
/// </summary>
/// <param name="includeFinal">If true include the memory needed for the final output image, else don't.</param>
/// <returns>The memory required to render the current ember</returns>
template < typename T , typename bucketT >
2014-09-01 00:25:15 -04:00
uint64_t Renderer < T , bucketT > : : MemoryRequired ( bool includeFinal )
2014-07-08 03:11:14 -04:00
{
bool newFilterAlloc = false ;
CreateSpatialFilter ( newFilterAlloc ) ;
CreateTemporalFilter ( newFilterAlloc ) ;
ComputeBounds ( ) ;
//Because ComputeBounds() was called, this includes gutter.
2014-09-01 00:25:15 -04:00
uint64_t histSize = SuperSize ( ) * sizeof ( glm : : detail : : tvec4 < bucketT , glm : : defaultp > ) ;
2014-07-08 03:11:14 -04:00
return ( histSize * 2 ) + ( includeFinal ? FinalBufferSize ( ) : 0 ) ; //Multiply hist by 2 to account for the density filtering buffer which is the same size as the histogram.
}
/// <summary>
/// Virtual functions to be overriden in derived renderers that use the GPU.
/// </summary>
/// <summary>
/// The amount of RAM available to render with.
/// </summary>
/// <returns>An unsigned 64-bit integer specifying how much memory is available</returns>
template < typename T , typename bucketT >
2014-09-01 00:25:15 -04:00
uint64_t Renderer < T , bucketT > : : MemoryAvailable ( )
2014-07-08 03:11:14 -04:00
{
2014-09-01 00:25:15 -04:00
uint64_t memAvailable = 0 ;
2014-07-08 03:11:14 -04:00
# ifdef WIN32
MEMORYSTATUSEX stat ;
stat . dwLength = sizeof ( stat ) ;
GlobalMemoryStatusEx ( & stat ) ;
memAvailable = stat . ullTotalPhys ;
# elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
memAvailable = sysconf ( _SC_PHYS_PAGES ) * sysconf ( _SC_PAGESIZE ) ;
# elif defined __APPLE__
# ifdef __LP64__
long physmem ;
size_t len = sizeof ( physmem ) ;
static int mib [ 2 ] = { CTL_HW , HW_MEMSIZE } ;
# else
unsigned int physmem ;
size_t len = sizeof ( physmem ) ;
static int mib [ 2 ] = { CTL_HW , HW_PHYSMEM } ;
# endif
if ( sysctl ( mib , 2 , & physmem , & len , NULL , 0 ) = = 0 & & len = = sizeof ( physmem ) )
{
memAvailable = physmem ;
}
else
{
cout < < " Warning: unable to determine physical memory. " < < endl ;
memAvailable = 4e9 ;
}
# else
cout < < " Warning: unable to determine physical memory. " < < endl ;
memAvailable = 4e9 ;
# endif
return memAvailable ;
}
/// <summary>
/// Stop rendering, ensure all locks are exited and reset the rendering state.
/// </summary>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : Reset ( )
{
Abort ( ) ;
EnterRender ( ) ;
EnterFinalAccum ( ) ;
LeaveFinalAccum ( ) ;
LeaveRender ( ) ;
m_ProcessState = NONE ;
m_ProcessAction = FULL_RENDER ;
}
/// <summary>
/// Get a status indicating whether this renderer is ok.
/// Return true for this class, derived classes will inspect GPU hardware
/// to determine if they are ok.
/// </summary>
/// <returns>Always true for this class</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : Ok ( ) const
{
return true ;
}
/// <summary>
/// Create the density filter if the current filter parameters differ
/// from the last density filter created.
/// The filter will be deleted if the max DE radius is 0, in which case regular
/// log scale filtering will be used.
/// </summary>
/// <param name="newAlloc">True if a new filter instance was created, else false.</param>
/// <returns>True if the filter is not NULL (whether a new one was created or not) or if max rad is 0, else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : CreateDEFilter ( bool & newAlloc )
{
//If they wanted DE, create it if needed, else clear the last DE filter which means we'll do regular log filtering after iters are done.
newAlloc = false ;
if ( m_Ember . m_MaxRadDE > 0 )
{
//Use intelligent testing so it isn't created every time a new ember is passed in.
2014-08-03 19:16:10 -04:00
if ( ( ! m_DensityFilter . get ( ) ) | |
2014-07-08 03:11:14 -04:00
( m_Ember . m_MinRadDE ! = m_DensityFilter - > MinRad ( ) ) | |
( m_Ember . m_MaxRadDE ! = m_DensityFilter - > MaxRad ( ) ) | |
( m_Ember . m_CurveDE ! = m_DensityFilter - > Curve ( ) ) | |
2014-08-03 19:16:10 -04:00
( m_Ember . m_Supersample ! = m_DensityFilter - > Supersample ( ) ) )
2014-07-08 03:11:14 -04:00
{
m_DensityFilter = auto_ptr < DensityFilter < T > > ( new DensityFilter < T > ( m_Ember . m_MinRadDE , m_Ember . m_MaxRadDE , m_Ember . m_CurveDE , m_Ember . m_Supersample ) ) ;
newAlloc = true ;
}
if ( newAlloc )
{
if ( ! m_DensityFilter . get ( ) ) { return false ; } //Did object creation succeed?
if ( ! m_DensityFilter - > Create ( ) ) { return false ; } //Object creation succeeded, did filter creation succeed?
//cout << m_DensityFilter->ToString() << endl;
}
else
if ( ! m_DensityFilter - > Valid ( ) ) { return false ; } //Previously created, are values ok?
}
else
{
m_DensityFilter . reset ( ) ; //They want to do log filtering. Return true because even though the filter is being deleted, nothing went wrong.
}
return true ;
}
/// <summary>
/// Create the spatial filter if the current filter parameters differ
/// from the last spatial filter created.
/// </summary>
/// <param name="newAlloc">True if a new filter instance was created, else false.</param>
/// <returns>True if the filter is not NULL (whether a new one was created or not), else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : CreateSpatialFilter ( bool & newAlloc )
{
newAlloc = false ;
//Use intelligent testing so it isn't created every time a new ember is passed in.
2014-08-03 19:16:10 -04:00
if ( ( ! m_SpatialFilter . get ( ) ) | |
2014-07-08 03:11:14 -04:00
( m_Ember . m_SpatialFilterType ! = m_SpatialFilter - > FilterType ( ) ) | |
( m_Ember . m_SpatialFilterRadius ! = m_SpatialFilter - > FilterRadius ( ) ) | |
2014-08-03 19:16:10 -04:00
( m_Ember . m_Supersample ! = m_SpatialFilter - > Supersample ( ) ) | |
2014-07-08 03:11:14 -04:00
( m_PixelAspectRatio ! = m_SpatialFilter - > PixelAspectRatio ( ) ) )
{
m_SpatialFilter = auto_ptr < SpatialFilter < T > > (
SpatialFilterCreator < T > : : Create ( m_Ember . m_SpatialFilterType , m_Ember . m_SpatialFilterRadius , m_Ember . m_Supersample , m_PixelAspectRatio ) ) ;
newAlloc = true ;
}
return m_SpatialFilter . get ( ) ! = NULL ;
}
/// <summary>
/// Get the sub batch size. This is the size of of the chunks that the iteration
/// trajectory will be broken up into.
/// Default: 10k.
/// </summary>
/// <returns>The sub batch size</returns>
template < typename T , typename bucketT >
unsigned int Renderer < T , bucketT > : : SubBatchSize ( ) const { return m_SubBatchSize ; }
/// <summary>
/// Set the sub batch size. This is the size of of the chunks that the iteration
/// trajectory will be broken up into.
/// Reset the rendering process.
/// </summary>
/// <param name="sbs">The sub batch size to set</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : SubBatchSize ( unsigned int sbs )
{
ChangeVal ( [ & ] { m_SubBatchSize = sbs ; } , FULL_RENDER ) ;
}
/// <summary>
/// Get the number of channels per pixel in the output image. 3 for RGB images
/// like Bitmap and Jpeg, 4 for Png.
/// Default is 3.
/// </summary>
/// <returns>The number of channels per pixel in the output image</returns>
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : NumChannels ( ) const { return m_NumChannels ; }
/// <summary>
/// Set the number of channels per pixel in the output image. 3 for RGB images
/// like Bitmap and Jpeg, 4 for Png.
/// Default is 3.
/// Set the render state to ACCUM_ONLY.
/// </summary>
/// <param name="numChannels">The number of channels per pixel in the output image</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : NumChannels ( unsigned int numChannels )
{
ChangeVal ( [ & ] { m_NumChannels = numChannels ; } , ACCUM_ONLY ) ;
}
/// <summary>
/// Get the renderer type enum.
/// CPU_RENDERER for this class, other values for derived classes.
/// </summary>
/// <returns>CPU_RENDERER</returns>
template < typename T , typename bucketT >
eRendererType Renderer < T , bucketT > : : RendererType ( ) const { return CPU_RENDERER ; }
/// <summary>
/// Get the number of threads used when rendering.
/// Default: use all avaliable cores.
/// </summary>
/// <returns>The number of threads used when rendering</returns>
template < typename T , typename bucketT >
unsigned int Renderer < T , bucketT > : : ThreadCount ( ) const { return m_ThreadsToUse ; }
/// <summary>
/// Set the number of threads to use when rendering.
/// This will also reset the vector of random contexts to be the same size
/// as the number of specified threads.
/// Since this is where they get set up, the caller can optionally pass in
/// a seed string, however it's only used if threads is 1.
/// This is useful for debugging since it will run the same point trajectory
/// every time.
/// Reset the rendering process.
/// </summary>
/// <param name="threads">The number of threads to use</param>
/// <param name="seedString">The seed string to use if threads is 1. Default: NULL.</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : ThreadCount ( unsigned int threads , const char * seedString )
{
ChangeVal ( [ & ]
{
Timing t ;
unsigned int i , size ;
const unsigned int isaacSize = 1 < < ISAAC_SIZE ;
ISAAC_INT seeds [ isaacSize ] ;
m_ThreadsToUse = threads > 0 ? threads : 1 ;
m_Rand . clear ( ) ;
m_SubBatch . clear ( ) ;
m_SubBatch . resize ( m_ThreadsToUse ) ;
m_BadVals . resize ( m_ThreadsToUse ) ;
if ( seedString )
{
memset ( seeds , 0 , isaacSize * sizeof ( ISAAC_INT ) ) ;
memcpy ( ( char * ) seeds , seedString , min ( strlen ( seedString ) , isaacSize * sizeof ( ISAAC_INT ) ) ) ;
}
//This is critical for multithreading, otherwise the threads all happen
//too close to each other in time, resulting in bad randomization.
while ( m_Rand . size ( ) < m_ThreadsToUse )
{
size = ( unsigned int ) m_Rand . size ( ) ;
if ( seedString )
{
Numerous fixes
0.4.0.5 Beta 07/18/2014
--User Changes
Allow for vibrancy values > 1.
Add flatten and unflatten menu items.
Automatically flatten like Apophysis does.
Add plugin and new_linear tags to Xml to be compatible with Apophysis.
--Bug Fixes
Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho,
julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x,
sinusoidal, spherical, spherical3d, stripes.
Unique filename on final render was completely broken.
Two severe OpenCL bugs. Random seeds were biased and fusing was being
reset too often leading to results that differ from the CPU.
Subtle, but sometimes severe bug in the setup of the xaos weights.
Use properly defined epsilon by getting the value from
std::numeric_limits, rather than hard coding 1e-6 or 1e-10.
Omit incorrect usage of epsilon everywhere. It should not be
automatically added to denominators. Rather, it should only be used if
the denominator is zero.
Force final render progress bars to 100 on completion. Sometimes they
didn't seem to make it there.
Make variation name and params comparisons be case insensitive.
--Code Changes
Make ForEach and FindIf wrappers around std::for_each and std::find_if.
2014-07-19 02:33:18 -04:00
unsigned int newSize = size + 5 + ( unsigned int ) ( t . Toc ( ) + t . EndTime ( ) ) ;
2014-07-08 03:11:14 -04:00
Numerous fixes
0.4.0.5 Beta 07/18/2014
--User Changes
Allow for vibrancy values > 1.
Add flatten and unflatten menu items.
Automatically flatten like Apophysis does.
Add plugin and new_linear tags to Xml to be compatible with Apophysis.
--Bug Fixes
Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho,
julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x,
sinusoidal, spherical, spherical3d, stripes.
Unique filename on final render was completely broken.
Two severe OpenCL bugs. Random seeds were biased and fusing was being
reset too often leading to results that differ from the CPU.
Subtle, but sometimes severe bug in the setup of the xaos weights.
Use properly defined epsilon by getting the value from
std::numeric_limits, rather than hard coding 1e-6 or 1e-10.
Omit incorrect usage of epsilon everywhere. It should not be
automatically added to denominators. Rather, it should only be used if
the denominator is zero.
Force final render progress bars to 100 on completion. Sometimes they
didn't seem to make it there.
Make variation name and params comparisons be case insensitive.
--Code Changes
Make ForEach and FindIf wrappers around std::for_each and std::find_if.
2014-07-19 02:33:18 -04:00
# ifdef ISAAC_FLAM3_DEBUG
QTIsaac < ISAAC_SIZE , ISAAC_INT > isaac ( 0 , 0 , 0 , seeds ) ;
# else
2014-08-03 19:16:10 -04:00
QTIsaac < ISAAC_SIZE , ISAAC_INT > isaac ( newSize , newSize * 2 , newSize * 3 , seeds ) ;
Numerous fixes
0.4.0.5 Beta 07/18/2014
--User Changes
Allow for vibrancy values > 1.
Add flatten and unflatten menu items.
Automatically flatten like Apophysis does.
Add plugin and new_linear tags to Xml to be compatible with Apophysis.
--Bug Fixes
Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho,
julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x,
sinusoidal, spherical, spherical3d, stripes.
Unique filename on final render was completely broken.
Two severe OpenCL bugs. Random seeds were biased and fusing was being
reset too often leading to results that differ from the CPU.
Subtle, but sometimes severe bug in the setup of the xaos weights.
Use properly defined epsilon by getting the value from
std::numeric_limits, rather than hard coding 1e-6 or 1e-10.
Omit incorrect usage of epsilon everywhere. It should not be
automatically added to denominators. Rather, it should only be used if
the denominator is zero.
Force final render progress bars to 100 on completion. Sometimes they
didn't seem to make it there.
Make variation name and params comparisons be case insensitive.
--Code Changes
Make ForEach and FindIf wrappers around std::for_each and std::find_if.
2014-07-19 02:33:18 -04:00
# endif
2014-07-08 03:11:14 -04:00
m_Rand . push_back ( isaac ) ;
for ( i = 0 ; i < ( isaacSize * sizeof ( ISAAC_INT ) ) ; i + + )
( ( unsigned char * ) seeds ) [ i ] + + ;
}
else
{
for ( i = 0 ; i < isaacSize ; i + + )
{
t . Toc ( ) ;
seeds [ i ] = ( ISAAC_INT ) ( t . EndTime ( ) * i ) + ( size + 1 ) ;
}
t . Toc ( ) ;
ISAAC_INT r = ( size * i ) + i + ( ISAAC_INT ) t . EndTime ( ) ;
QTIsaac < ISAAC_SIZE , ISAAC_INT > isaac ( r , r * 2 , r * 3 , seeds ) ;
m_Rand . push_back ( isaac ) ;
}
}
} , FULL_RENDER ) ;
}
/// <summary>
/// Set the callback object.
/// </summary>
/// <param name="callback">The callback object to set</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : Callback ( RenderCallback * callback )
{
m_Callback = callback ;
}
/// <summary>
/// Virtual functions to be overriden in derived renderers that use the GPU, but not accessed outside.
/// </summary>
/// <summary>
/// Make the final palette used for iteration.
/// </summary>
/// <param name="colorScalar">The color scalar to multiply the ember's palette by</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : MakeDmap ( T colorScalar )
{
2014-09-01 00:25:15 -04:00
m_Ember . m_Palette . template MakeDmap < bucketT > ( m_Dmap , colorScalar ) ;
2014-07-08 03:11:14 -04:00
}
/// <summary>
/// Allocate various buffers if the image dimensions, thread count, or sub batch size
/// has changed.
/// </summary>
/// <returns>True if success, else false</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : Alloc ( )
{
bool b = true ;
bool lock =
( m_SuperSize ! = m_HistBuckets . size ( ) ) | |
( m_SuperSize ! = m_AccumulatorBuckets . size ( ) ) | |
( m_ThreadsToUse ! = m_Samples . size ( ) ) | |
( m_Samples [ 0 ] . size ( ) ! = m_SubBatchSize ) ;
if ( lock )
EnterResize ( ) ;
if ( m_SuperSize ! = m_HistBuckets . size ( ) )
{
m_HistBuckets . resize ( m_SuperSize ) ;
if ( m_ReclaimOnResize )
m_HistBuckets . shrink_to_fit ( ) ;
b & = ( m_HistBuckets . size ( ) = = m_SuperSize ) ;
}
if ( m_SuperSize ! = m_AccumulatorBuckets . size ( ) )
{
m_AccumulatorBuckets . resize ( m_SuperSize ) ;
if ( m_ReclaimOnResize )
m_AccumulatorBuckets . shrink_to_fit ( ) ;
b & = ( m_AccumulatorBuckets . size ( ) = = m_SuperSize ) ;
}
if ( m_ThreadsToUse ! = m_Samples . size ( ) )
{
m_Samples . resize ( m_ThreadsToUse ) ;
if ( m_ReclaimOnResize )
m_Samples . shrink_to_fit ( ) ;
b & = ( m_Samples . size ( ) = = m_ThreadsToUse ) ;
}
for ( unsigned int i = 0 ; i < m_Samples . size ( ) ; i + + )
{
if ( m_Samples [ i ] . size ( ) ! = m_SubBatchSize )
{
m_Samples [ i ] . resize ( m_SubBatchSize ) ;
if ( m_ReclaimOnResize )
m_Samples [ i ] . shrink_to_fit ( ) ;
b & = ( m_Samples [ i ] . size ( ) = = m_SubBatchSize ) ;
}
}
if ( lock )
LeaveResize ( ) ;
return b ;
}
/// <summary>
/// Clear histogram and/or density filtering buffers to all zeroes.
/// </summary>
/// <param name="resetHist">Clear histogram if true, else don't.</param>
/// <param name="resetAccum">Clear density filtering buffer if true, else don't.</param>
/// <returns>True if anything was cleared, else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : ResetBuckets ( bool resetHist , bool resetAccum )
{
//parallel_invoke(
//[&]
//{
if ( resetHist & & ! m_HistBuckets . empty ( ) )
memset ( ( void * ) m_HistBuckets . data ( ) , 0 , m_HistBuckets . size ( ) * sizeof ( m_HistBuckets [ 0 ] ) ) ;
//},
//[&]
//{
if ( resetAccum & & ! m_AccumulatorBuckets . empty ( ) )
memset ( m_AccumulatorBuckets . data ( ) , 0 , m_AccumulatorBuckets . size ( ) * sizeof ( m_AccumulatorBuckets [ 0 ] ) ) ;
//});
return resetHist | | resetAccum ;
}
/// <summary>
/// Perform log scale density filtering.
/// Base case for simple log scale density estimation as discussed (mostly) in the paper
/// in section 4, p. 6-9.
/// </summary>
/// <returns>True if not prematurely aborted, else false.</returns>
template < typename T , typename bucketT >
eRenderStatus Renderer < T , bucketT > : : LogScaleDensityFilter ( )
{
unsigned int startRow = 0 ;
unsigned int endRow = m_SuperRasH ;
unsigned int startCol = 0 ;
unsigned int endCol = m_SuperRasW ;
//Timing t(4);
//Original didn't parallelize this, doing so gives a 50-75% speedup.
//If there is only one pass, the value can be directly assigned, which is quicker than summing.
if ( Passes ( ) = = 1 )
{
parallel_for ( startRow , endRow , [ & ] ( unsigned int j )
{
unsigned int row = j * m_SuperRasW ;
//__m128 logm128;//Figure out SSE at some point.
//__m128 bucketm128;
//__m128 scaledBucket128;
for ( unsigned int i = startCol ; ( i < endCol ) & & ! m_Abort ; i + + )
{
unsigned int index = row + i ;
//Check for visibility first before doing anything else to avoid all possible unnecessary calculations.
if ( m_HistBuckets [ index ] . a ! = 0 )
{
T logScale = ( m_K1 * log ( 1 + m_HistBuckets [ index ] . a * m_K2 ) ) / m_HistBuckets [ index ] . a ;
//Original did a temporary assignment, then *= logScale, then passed the result to bump_no_overflow().
//Combine here into one operation for a slight speedup.
m_AccumulatorBuckets [ index ] = ( m_HistBuckets [ index ] * ( bucketT ) logScale ) ;
}
}
} ) ;
}
else //Passes > 1, so sum.
{
parallel_for ( startRow , endRow , [ & ] ( unsigned int j )
{
unsigned int row = j * m_SuperRasW ;
for ( unsigned int i = startCol ; ( i < endCol ) & & ! m_Abort ; i + + )
{
unsigned int index = row + i ;
//Check for visibility first before doing anything else to avoid all possible unnecessary calculations.
if ( m_HistBuckets [ index ] . a ! = 0 )
{
//Figure out SSE at some point.
//__declspec(align(16))
T logScale = ( m_K1 * log ( 1 + m_HistBuckets [ index ] . a * m_K2 ) ) / m_HistBuckets [ index ] . a ;
//logm128 = _mm_load1_ps(&logScale);
//bucketm128 = _mm_load_ps(m_HistBuckets[index].Channels);
//scaledBucket128 = _mm_mul_ps(logm128, bucketm128);
m_AccumulatorBuckets [ index ] + = ( m_HistBuckets [ index ] * bucketT ( logScale ) ) ;
}
}
} ) ;
}
//t.Toc(__FUNCTION__);
return m_Abort ? RENDER_ABORT : RENDER_OK ;
}
/// <summary>
/// Perform the more advanced Gaussian density filter.
/// More advanced density estimation filtering given less mention in the paper, but used
/// much more in practice as it gives the best results.
/// Section 8, p. 11-13.
/// </summary>
/// <returns>True if not prematurely aborted, else false.</returns>
template < typename T , typename bucketT >
eRenderStatus Renderer < T , bucketT > : : GaussianDensityFilter ( )
{
Timing totalTime , localTime ;
int scf = ! ( Supersample ( ) & 1 ) ;
unsigned int ss = Floor < T > ( Supersample ( ) / T ( 2 ) ) ;
T scfact = pow ( Supersample ( ) / ( Supersample ( ) + T ( 1.0 ) ) , T ( 2.0 ) ) ;
unsigned int threads = m_ThreadsToUse ;
unsigned int startRow = Supersample ( ) - 1 ;
unsigned int endRow = m_SuperRasH - ( Supersample ( ) - 1 ) ; //Original did + which is most likely wrong.
unsigned int startCol = Supersample ( ) - 1 ;
unsigned int endCol = m_SuperRasW - ( Supersample ( ) - 1 ) ;
unsigned int chunkSize = ( unsigned int ) ceil ( double ( endRow - startRow ) / double ( threads ) ) ;
//parallel_for scales very well, dividing the work almost perfectly among all processors.
parallel_for ( ( unsigned int ) 0 , threads , [ & ] ( unsigned int threadIndex )
{
unsigned int pixelNumber = 0 ;
unsigned int localStartRow = min ( startRow + ( threadIndex * chunkSize ) , endRow - 1 ) ;
unsigned int localEndRow = min ( localStartRow + chunkSize , endRow ) ;
unsigned int pixelsThisThread = ( localEndRow - localStartRow ) * m_SuperRasW ;
double lastPercent = 0 ;
glm : : detail : : tvec4 < bucketT , glm : : defaultp > logScaleBucket ;
for ( unsigned int j = localStartRow ; ( j < localEndRow ) & & ! m_Abort ; j + + )
{
unsigned int bucketRowStart = j * m_SuperRasW ; //Pull out of inner loop for optimization.
const glm : : detail : : tvec4 < bucketT , glm : : defaultp > * bucket ;
const glm : : detail : : tvec4 < bucketT , glm : : defaultp > * buckets = m_HistBuckets . data ( ) ;
const T * filterCoefs = m_DensityFilter - > Coefs ( ) ;
const T * filterWidths = m_DensityFilter - > Widths ( ) ;
for ( unsigned int i = startCol ; i < endCol ; i + + )
{
int ii , jj , arrFilterWidth ;
unsigned int filterSelectInt , filterCoefIndex ;
T filterSelect = 0 ;
bucket = buckets + bucketRowStart + i ;
//Don't do anything if there's no hits here. Must also put this first to avoid dividing by zero below.
if ( bucket - > a = = 0 )
continue ;
T cacheLog = ( m_K1 * log ( T ( 1.0 ) + bucket - > a * m_K2 ) ) / bucket - > a ; //Caching this calculation gives a 30% speedup.
if ( ss = = 0 )
{
filterSelect = bucket - > a ;
}
else
{
//The original contained a glaring flaw as it would run past the boundaries of the buffers
//when calculating the density for a box centered on the last row or column.
//Clamp here to not run over the edge.
int densityBoxLeftX = i - min ( i , ss ) ;
int densityBoxRightX = i + min ( ss , m_SuperRasW - i - 1 ) ;
int densityBoxTopY = j - min ( j , ss ) ;
int densityBoxBottomY = j + min ( ss , m_SuperRasH - j - 1 ) ;
//Count density in ssxss area.
//Original went one col at a time, which is cache inefficient. Go one row at at time here for a slight speedup.
for ( jj = densityBoxTopY ; jj < = densityBoxBottomY ; jj + + )
for ( ii = densityBoxLeftX ; ii < = densityBoxRightX ; ii + + )
filterSelect + = buckets [ ii + ( jj * m_SuperRasW ) ] . a ; //Original divided by 255 in every iteration. Omit here because colors are already in the range of [0..1].
}
//Scale if supersample > 1 for equal iters.
if ( scf )
filterSelect * = scfact ;
if ( filterSelect > m_DensityFilter - > MaxFilteredCounts ( ) )
filterSelectInt = m_DensityFilter - > MaxFilterIndex ( ) ;
else if ( filterSelect < = DE_THRESH )
filterSelectInt = ( int ) ceil ( filterSelect ) - 1 ;
else
filterSelectInt = ( int ) DE_THRESH + Floor < T > ( pow ( filterSelect - DE_THRESH , m_DensityFilter - > Curve ( ) ) ) ;
//If the filter selected below the min specified clamp it to the min.
if ( filterSelectInt > m_DensityFilter - > MaxFilterIndex ( ) )
filterSelectInt = m_DensityFilter - > MaxFilterIndex ( ) ;
//Only have to calculate the values for ~1/8 of the square.
filterCoefIndex = filterSelectInt * m_DensityFilter - > KernelSize ( ) ;
arrFilterWidth = ( int ) ceil ( filterWidths [ filterSelectInt ] ) - 1 ;
for ( jj = 0 ; jj < = arrFilterWidth ; jj + + )
{
for ( ii = 0 ; ii < = jj ; ii + + , filterCoefIndex + + )
{
//Skip if coef is 0.
if ( filterCoefs [ filterCoefIndex ] = = 0 )
continue ;
T logScale = filterCoefs [ filterCoefIndex ] * cacheLog ;
//Original first assigned the fields, then scaled them. Combine into a single step for a 1% optimization.
logScaleBucket = ( * bucket * bucketT ( logScale ) ) ;
if ( jj = = 0 & & ii = = 0 )
{
AddToAccum ( logScaleBucket , i , ii , j , jj ) ;
}
else if ( ii = = 0 )
{
AddToAccum ( logScaleBucket , i , 0 , j , - jj ) ;
AddToAccum ( logScaleBucket , i , - jj , j , 0 ) ;
AddToAccum ( logScaleBucket , i , jj , j , 0 ) ;
AddToAccum ( logScaleBucket , i , 0 , j , jj ) ;
}
else if ( jj = = ii )
{
AddToAccum ( logScaleBucket , i , - ii , j , - jj ) ;
AddToAccum ( logScaleBucket , i , ii , j , - jj ) ;
AddToAccum ( logScaleBucket , i , - ii , j , jj ) ;
AddToAccum ( logScaleBucket , i , ii , j , jj ) ;
}
else
{
//Attempting to optimize cache access by putting these in order makes no difference, even on large images, but do it anyway.
AddToAccum ( logScaleBucket , i , - ii , j , - jj ) ;
AddToAccum ( logScaleBucket , i , ii , j , - jj ) ;
AddToAccum ( logScaleBucket , i , - jj , j , - ii ) ;
AddToAccum ( logScaleBucket , i , jj , j , - ii ) ;
AddToAccum ( logScaleBucket , i , - jj , j , ii ) ;
AddToAccum ( logScaleBucket , i , jj , j , ii ) ;
AddToAccum ( logScaleBucket , i , - ii , j , jj ) ;
AddToAccum ( logScaleBucket , i , ii , j , jj ) ;
}
}
}
}
if ( m_Callback & & threadIndex = = 0 )
{
pixelNumber + = m_SuperRasW ;
double percent = ( double ( pixelNumber ) / double ( pixelsThisThread ) ) * 100.0 ;
double percentDiff = percent - lastPercent ;
double toc = localTime . Toc ( ) ;
if ( percentDiff > = 10 | | ( toc > 1000 & & percentDiff > = 1 ) )
{
double etaMs = ( ( 100.0 - percent ) / percent ) * totalTime . Toc ( ) ;
if ( ! m_Callback - > ProgressFunc ( m_Ember , m_ProgressParameter , percent , 1 , etaMs ) )
Abort ( ) ;
lastPercent = percent ;
localTime . Tic ( ) ;
}
}
}
} ) ;
if ( m_Callback & & ! m_Abort )
m_Callback - > ProgressFunc ( m_Ember , m_ProgressParameter , 100.0 , 1 , 0 ) ;
//totalTime.Toc(__FUNCTION__);
return m_Abort ? RENDER_ABORT : RENDER_OK ;
}
/// <summary>
/// Thin wrapper around AccumulatorToFinalImage().
/// </summary>
/// <param name="pixels">The pixel vector to allocate and store the final image in</param>
/// <param name="finalOffset">Offset in the buffer to store the pixels to</param>
/// <returns>True if not prematurely aborted, else false.</returns>
template < typename T , typename bucketT >
eRenderStatus Renderer < T , bucketT > : : AccumulatorToFinalImage ( vector < unsigned char > & pixels , size_t finalOffset )
{
if ( PrepFinalAccumVector ( pixels ) )
return AccumulatorToFinalImage ( pixels . data ( ) , finalOffset ) ;
return RENDER_ERROR ;
}
/// <summary>
/// Produce a final, visible image by clipping, gamma correcting and spatial filtering the color values
/// in the density filtering buffer and save to the passed in buffer.
/// </summary>
/// <param name="pixels">The pre-allocated pixel buffer to store the final image in</param>
/// <param name="finalOffset">Offset in the buffer to store the pixels to. Default: 0.</param>
/// <returns>True if not prematurely aborted, else false.</returns>
template < typename T , typename bucketT >
eRenderStatus Renderer < T , bucketT > : : AccumulatorToFinalImage ( unsigned char * pixels , size_t finalOffset )
{
if ( ! pixels )
return RENDER_ERROR ;
EnterFinalAccum ( ) ;
//Timing t(4);
unsigned int filterWidth = m_SpatialFilter - > FinalFilterWidth ( ) ;
T g , linRange , vibrancy ;
Color < T > background ;
pixels + = finalOffset ;
PrepFinalAccumVals ( background , g , linRange , vibrancy ) ;
//If early clip, go through the entire accumulator and perform gamma correction first.
//The original does it this way as well and it's roughly 11 times faster to do it this way than inline below with each pixel.
if ( EarlyClip ( ) )
{
parallel_for ( ( unsigned int ) 0 , m_SuperRasH , [ & ] ( unsigned int j )
{
unsigned int rowStart = j * m_SuperRasW ; //Pull out of inner loop for optimization.
for ( unsigned int i = 0 ; i < m_SuperRasW & & ! m_Abort ; i + + )
{
GammaCorrection ( m_AccumulatorBuckets [ i + rowStart ] , background , g , linRange , vibrancy , true , false , & ( m_AccumulatorBuckets [ i + rowStart ] [ 0 ] ) ) ; //Write back in place.
}
} ) ;
}
if ( m_Abort )
{
LeaveFinalAccum ( ) ;
return RENDER_ABORT ;
}
//Note that abort is not checked here. The final accumulation must run to completion
//otherwise artifacts that resemble page tearing will occur in an interactive run. It's
//critical to never exit this loop prematurely.
//for (unsigned int j = 0; j < FinalRasH(); j++)//Keep around for debugging.
parallel_for ( ( unsigned int ) 0 , FinalRasH ( ) , [ & ] ( unsigned int j )
{
Color < bucketT > newBucket ;
2014-07-26 15:03:51 -04:00
int pixelsRowStart = ( m_YAxisUp ? ( ( FinalRasH ( ) - j ) - 1 ) : j ) * FinalRowSize ( ) ; //Pull out of inner loop for optimization.
2014-07-08 03:11:14 -04:00
unsigned int y = m_DensityFilterOffset + ( j * Supersample ( ) ) ; //Start at the beginning row of each super sample block.
unsigned short * p16 ;
for ( unsigned int i = 0 ; i < FinalRasW ( ) ; i + + , pixelsRowStart + = PixelSize ( ) )
{
unsigned int ii , jj ;
unsigned int x = m_DensityFilterOffset + ( i * Supersample ( ) ) ; //Start at the beginning column of each super sample block.
newBucket . Clear ( ) ;
//Original was iterating column-wise, which is slow.
//Here, iterate one row at a time, giving a 10% speed increase.
for ( jj = 0 ; jj < filterWidth ; jj + + )
{
unsigned int filterKRowIndex = jj * filterWidth ;
unsigned int accumRowIndex = ( y + jj ) * m_SuperRasW ; //Pull out of inner loop for optimization.
for ( ii = 0 ; ii < filterWidth ; ii + + )
{
//Need to dereference the spatial filter pointer object to use the [] operator. Makes no speed difference.
bucketT k = bucketT ( ( * m_SpatialFilter ) [ ii + filterKRowIndex ] ) ;
newBucket + = ( m_AccumulatorBuckets [ ( x + ii ) + accumRowIndex ] * k ) ;
}
}
if ( BytesPerChannel ( ) = = 2 )
{
p16 = ( unsigned short * ) ( pixels + pixelsRowStart ) ;
if ( EarlyClip ( ) )
{
p16 [ 0 ] = ( unsigned short ) ( Clamp < bucketT > ( newBucket . r , 0 , 255 ) * bucketT ( 256 ) ) ;
p16 [ 1 ] = ( unsigned short ) ( Clamp < bucketT > ( newBucket . g , 0 , 255 ) * bucketT ( 256 ) ) ;
p16 [ 2 ] = ( unsigned short ) ( Clamp < bucketT > ( newBucket . b , 0 , 255 ) * bucketT ( 256 ) ) ;
2014-09-01 00:25:15 -04:00
2014-07-08 03:11:14 -04:00
if ( NumChannels ( ) > 3 )
{
if ( Transparency ( ) )
p16 [ 3 ] = ( unsigned char ) ( Clamp < bucketT > ( newBucket . a , 0 , 1 ) * bucketT ( 65535.0 ) ) ;
else
p16 [ 3 ] = 65535 ;
}
}
else
{
GammaCorrection ( * ( glm : : detail : : tvec4 < bucketT , glm : : defaultp > * ) ( & newBucket ) , background , g , linRange , vibrancy , NumChannels ( ) > 3 , true , p16 ) ;
}
}
else
{
if ( EarlyClip ( ) )
{
pixels [ pixelsRowStart ] = ( unsigned char ) Clamp < bucketT > ( newBucket . r , 0 , 255 ) ;
pixels [ pixelsRowStart + 1 ] = ( unsigned char ) Clamp < bucketT > ( newBucket . g , 0 , 255 ) ;
pixels [ pixelsRowStart + 2 ] = ( unsigned char ) Clamp < bucketT > ( newBucket . b , 0 , 255 ) ;
2014-09-01 00:25:15 -04:00
2014-07-08 03:11:14 -04:00
if ( NumChannels ( ) > 3 )
{
if ( Transparency ( ) )
pixels [ pixelsRowStart + 3 ] = ( unsigned char ) ( Clamp < bucketT > ( newBucket . a , 0 , 1 ) * bucketT ( 255.0 ) ) ;
else
pixels [ pixelsRowStart + 3 ] = 255 ;
}
}
else
{
GammaCorrection ( * ( glm : : detail : : tvec4 < bucketT , glm : : defaultp > * ) ( & newBucket ) , background , g , linRange , vibrancy , NumChannels ( ) > 3 , true , pixels + pixelsRowStart ) ;
}
}
}
} ) ;
//Insert the palette into the image for debugging purposes. Only works with 8bpc.
if ( m_InsertPalette & & BytesPerChannel ( ) = = 1 )
{
unsigned int i , j , ph = 100 ;
if ( ph > = FinalRasH ( ) )
ph = FinalRasH ( ) ;
for ( j = 0 ; j < ph ; j + + )
{
for ( i = 0 ; i < FinalRasW ( ) ; i + + )
{
unsigned char * p = pixels + ( NumChannels ( ) * ( i + j * FinalRasW ( ) ) ) ;
p [ 0 ] = ( unsigned char ) ( m_TempEmber . m_Palette [ i * 256 / FinalRasW ( ) ] [ 0 ] * WHITE ) ; //The palette is [0..1], output image is [0..255].
p [ 1 ] = ( unsigned char ) ( m_TempEmber . m_Palette [ i * 256 / FinalRasW ( ) ] [ 1 ] * WHITE ) ;
p [ 2 ] = ( unsigned char ) ( m_TempEmber . m_Palette [ i * 256 / FinalRasW ( ) ] [ 2 ] * WHITE ) ;
}
}
}
//t.Toc(__FUNCTION__);
LeaveFinalAccum ( ) ;
return m_Abort ? RENDER_ABORT : RENDER_OK ;
}
//#define TG 1
//#define NEWSUBBATCH 1
/// <summary>
/// Run the iteration algorithm for the specified number of iterations.
/// This is only called after all other setup has been done.
/// This function will be called multiple times for an interactive rendering, and
/// once for a straight through render.
/// The iteration is reset and fused in each thread after each sub batch is done
/// which by default is 10,000 iterations.
/// </summary>
/// <param name="iterCount">The number of iterations to run</param>
/// <param name="pass">The pass this is running for</param>
/// <param name="temporalSample">The temporal sample within the current pass this is running for</param>
/// <returns>Rendering statistics</returns>
template < typename T , typename bucketT >
2014-09-01 00:25:15 -04:00
EmberStats Renderer < T , bucketT > : : Iterate ( uint64_t iterCount , unsigned int pass , unsigned int temporalSample )
2014-07-08 03:11:14 -04:00
{
//Timing t2(4);
2014-08-06 00:50:52 -04:00
m_IterTimer . Tic ( ) ;
2014-07-08 03:11:14 -04:00
unsigned int fuse = EarlyClip ( ) ? 100 : 15 ; //EarlyClip was one way of detecting a later version of flam3, so it used 100 which is a better value.
2014-09-01 00:25:15 -04:00
uint64_t totalItersPerThread = ( uint64_t ) ceil ( ( double ) iterCount / ( double ) m_ThreadsToUse ) ;
2014-07-08 03:11:14 -04:00
double percent , etaMs ;
EmberStats stats ;
2014-09-01 00:25:15 -04:00
# ifdef TG
2014-07-08 03:11:14 -04:00
unsigned int threadIndex ;
for ( unsigned int i = 0 ; i < m_ThreadsToUse ; i + + )
{
threadIndex = i ;
m_TaskGroup . run ( [ & , threadIndex ] ( ) {
# else
parallel_for ( ( unsigned int ) 0 , m_ThreadsToUse , [ & ] ( unsigned int threadIndex )
{
# endif
Timing t ;
2014-09-01 00:25:15 -04:00
uint64_t subBatchSize = ( unsigned int ) min ( totalItersPerThread , ( uint64_t ) m_SubBatchSize ) ;
2014-07-08 03:11:14 -04:00
m_BadVals [ threadIndex ] = 0 ;
//Sub batch iterations, loop 3.
for ( m_SubBatch [ threadIndex ] = 0 ; ( m_SubBatch [ threadIndex ] < totalItersPerThread ) & & ! m_Abort ; m_SubBatch [ threadIndex ] + = subBatchSize )
{
//Must recalculate the number of iters to run on each sub batch because the last batch will most likely have less than m_SubBatchSize iters.
//For example, if 51,000 are requested, and the sbs is 10,000, it should run 5 sub batches of 10,000 iters, and one final sub batch of 1,000 iters.
subBatchSize = min ( subBatchSize , totalItersPerThread - m_SubBatch [ threadIndex ] ) ;
//Use first as random point, the rest are iterated points.
//Note that this gets reset with a new random point for each subBatchSize iterations.
//This helps correct if iteration happens to be on a bad trajectory.
Numerous fixes
0.4.0.5 Beta 07/18/2014
--User Changes
Allow for vibrancy values > 1.
Add flatten and unflatten menu items.
Automatically flatten like Apophysis does.
Add plugin and new_linear tags to Xml to be compatible with Apophysis.
--Bug Fixes
Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho,
julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x,
sinusoidal, spherical, spherical3d, stripes.
Unique filename on final render was completely broken.
Two severe OpenCL bugs. Random seeds were biased and fusing was being
reset too often leading to results that differ from the CPU.
Subtle, but sometimes severe bug in the setup of the xaos weights.
Use properly defined epsilon by getting the value from
std::numeric_limits, rather than hard coding 1e-6 or 1e-10.
Omit incorrect usage of epsilon everywhere. It should not be
automatically added to denominators. Rather, it should only be used if
the denominator is zero.
Force final render progress bars to 100 on completion. Sometimes they
didn't seem to make it there.
Make variation name and params comparisons be case insensitive.
--Code Changes
Make ForEach and FindIf wrappers around std::for_each and std::find_if.
2014-07-19 02:33:18 -04:00
m_Samples [ threadIndex ] [ 0 ] . m_X = m_Rand [ threadIndex ] . Frand11 < T > ( ) ;
m_Samples [ threadIndex ] [ 0 ] . m_Y = m_Rand [ threadIndex ] . Frand11 < T > ( ) ;
2014-07-08 03:11:14 -04:00
m_Samples [ threadIndex ] [ 0 ] . m_Z = 0 ; //m_Ember.m_CamZPos;//Apo set this to 0, then made the user use special variations to kick it. It seems easier to just set it to zpos.
Numerous fixes
0.4.0.5 Beta 07/18/2014
--User Changes
Allow for vibrancy values > 1.
Add flatten and unflatten menu items.
Automatically flatten like Apophysis does.
Add plugin and new_linear tags to Xml to be compatible with Apophysis.
--Bug Fixes
Fix blur, blur3d, bubble, cropn, cross, curl, curl3d, epispiral, ho,
julia3d, julia3dz, loonie, mirror_x, mirror_y, mirror_z, rotate_x,
sinusoidal, spherical, spherical3d, stripes.
Unique filename on final render was completely broken.
Two severe OpenCL bugs. Random seeds were biased and fusing was being
reset too often leading to results that differ from the CPU.
Subtle, but sometimes severe bug in the setup of the xaos weights.
Use properly defined epsilon by getting the value from
std::numeric_limits, rather than hard coding 1e-6 or 1e-10.
Omit incorrect usage of epsilon everywhere. It should not be
automatically added to denominators. Rather, it should only be used if
the denominator is zero.
Force final render progress bars to 100 on completion. Sometimes they
didn't seem to make it there.
Make variation name and params comparisons be case insensitive.
--Code Changes
Make ForEach and FindIf wrappers around std::for_each and std::find_if.
2014-07-19 02:33:18 -04:00
m_Samples [ threadIndex ] [ 0 ] . m_ColorX = m_Rand [ threadIndex ] . Frand01 < T > ( ) ;
2014-07-08 03:11:14 -04:00
//Finally, iterate.
//t.Tic();
//Iterating, loop 4.
2014-09-01 00:25:15 -04:00
m_BadVals [ threadIndex ] + = ( uint64_t ) m_Iterator - > Iterate ( m_Ember , ( uint32_t ) subBatchSize , fuse , m_Samples [ threadIndex ] . data ( ) , m_Rand [ threadIndex ] ) ;
2014-07-08 03:11:14 -04:00
//iterationTime += t.Toc();
if ( m_LockAccum )
m_AccumCs . Enter ( ) ;
//t.Tic();
//Map temp buffer samples into the histogram using the palette for color.
Accumulate ( m_Samples [ threadIndex ] . data ( ) , ( unsigned int ) subBatchSize , & m_Dmap ) ;
//accumulationTime += t.Toc();
if ( m_LockAccum )
m_AccumCs . Leave ( ) ;
if ( m_Callback & & threadIndex = = 0 )
{
percent = 100.0 *
double
(
double
(
double
(
double
(
//Takes progress of current thread and multiplies by thread count.
//This assumes the threads progress at roughly the same speed.
double ( m_LastIter + ( m_SubBatch [ threadIndex ] * m_ThreadsToUse ) ) / double ( ItersPerTemporalSample ( ) )
) + temporalSample
) / ( double ) TemporalSamples ( )
) + ( double ) pass
) / ( double ) Passes ( ) ;
double percentDiff = percent - m_LastIterPercent ;
double toc = m_ProgressTimer . Toc ( ) ;
if ( percentDiff > = 10 | | ( toc > 1000 & & percentDiff > = 1 ) ) //Call callback function if either 10% has passed, or one second (and 1%).
{
etaMs = ( ( 100.0 - percent ) / percent ) * m_RenderTimer . Toc ( ) ;
if ( ! m_Callback - > ProgressFunc ( m_Ember , m_ProgressParameter , percent , 0 , etaMs ) )
Abort ( ) ;
m_LastIterPercent = percent ;
m_ProgressTimer . Tic ( ) ;
}
}
}
} ) ;
# ifdef TG
}
m_TaskGroup . wait ( ) ;
# endif
stats . m_Iters = std : : accumulate ( m_SubBatch . begin ( ) , m_SubBatch . end ( ) , 0ULL ) ; //Sum of iter count of all threads.
2014-08-06 00:50:52 -04:00
stats . m_Badvals = std : : accumulate ( m_BadVals . begin ( ) , m_BadVals . end ( ) , 0ULL ) ;
stats . m_IterMs = m_IterTimer . Toc ( ) ;
2014-07-08 03:11:14 -04:00
//t2.Toc(__FUNCTION__);
return stats ;
}
/// <summary>
/// Accessors for render properties.
/// </summary>
/// <summary>
/// Get a copy of the vector of random contexts.
/// Useful for debugging because the returned vector can be used for future renders to
/// produce the exact same output.
/// </summary>
/// <returns>The vector of random contexts to assign</returns>
template < typename T , typename bucketT >
vector < QTIsaac < ISAAC_SIZE , ISAAC_INT > > Renderer < T , bucketT > : : RandVec ( ) { return m_Rand ; } ;
/// <summary>
/// Set the vector of random contexts.
/// Assignment will only take place if the size of the vector matches
/// the number of threads used for rendering.
/// Reset the rendering process.
/// </summary>
/// <param name="randVec">The vector of random contexts to assign</param>
/// <returns>True if the size of the vector matched the number of threads used for rendering, else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : RandVec ( vector < QTIsaac < ISAAC_SIZE , ISAAC_INT > > & randVec )
{
bool b = false ;
if ( randVec . size ( ) = = ThreadCount ( ) )
{
ChangeVal ( [ & ]
{
m_Rand = randVec ;
b = true ;
} , FULL_RENDER ) ;
}
return b ;
} ;
/// <summary>
/// Get whether the histogram is locked during accumulation.
/// This is to prevent two threads from writing to the same histogram
/// bucket at once.
/// The current implementation matches flam3 and is very innefficient
/// to the point of negating any gains gotten from multi-threading.
/// Future workarounds may be tried in the future.
/// Default: false.
/// </summary>
/// <returns>True if the histogram is locked during accumulation, else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : LockAccum ( ) const { return m_LockAccum ; }
/// <summary>
/// Set whether the histogram is locked during accumulation.
/// This is to prevent two threads from writing to the same histogram
/// bucket at once.
/// The current implementation matches flam3 and is very innefficient
/// to the point of negating any gains gotten from multi-threading.
/// Different workarounds may be tried in the future.
/// Reset the rendering process.
/// </summary>
/// <param name="lockAccum">True if the histogram should be locked when accumulating, else false</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : LockAccum ( bool lockAccum )
{
ChangeVal ( [ & ] { m_LockAccum = lockAccum ; } , FULL_RENDER ) ;
}
/// <summary>
/// Get whether color clipping and gamma correction is done before
/// or after spatial filtering.
/// Default: false.
/// </summary>
/// <returns>True if early clip, else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : EarlyClip ( ) const { return m_EarlyClip ; }
/// <summary>
/// Set whether color clipping and gamma correction is done before
/// or after spatial filtering.
/// Set the render state to FILTER_AND_ACCUM.
/// </summary>
/// <param name="earlyClip">True if early clip, else false.</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : EarlyClip ( bool earlyClip )
{
ChangeVal ( [ & ] { m_EarlyClip = earlyClip ; } , FILTER_AND_ACCUM ) ;
}
2014-07-26 15:03:51 -04:00
/// <summary>
/// Get whether the positive Y coordinate of the final output image is up.
/// Default: false.
/// </summary>
/// <returns>True if up, else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : YAxisUp ( ) const { return m_YAxisUp ; }
/// <summary>
/// Set whether the positive Y axis of the final output image is up.
/// </summary>
/// <param name="yup">True if the positive y axis is up, else false.</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : YAxisUp ( bool yup )
{
ChangeVal ( [ & ] { m_YAxisUp = yup ; } , ACCUM_ONLY ) ;
}
2014-07-08 03:11:14 -04:00
/// <summary>
/// Get whether to insert the palette as a block of colors in the final output image.
/// This is useful for debugging palette issues.
/// Default: 1.
/// </summary>
/// <returns>True if inserting the palette, else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : InsertPalette ( ) const { return m_InsertPalette ; }
/// <summary>
/// Set whether to insert the palette as a block of colors in the final output image.
/// This is useful for debugging palette issues.
/// Set the render state to ACCUM_ONLY.
/// </summary>
/// <param name="insertPalette">True if inserting the palette, else false.</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : InsertPalette ( bool insertPalette )
{
ChangeVal ( [ & ] { m_InsertPalette = insertPalette ; } , ACCUM_ONLY ) ;
}
/// <summary>
/// Get whether to reclaim unused memory in the final output buffer
/// when a smaller size is requested than has been previously allocated.
/// Default: false.
/// </summary>
/// <returns>True if reclaim, else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : ReclaimOnResize ( ) const { return m_ReclaimOnResize ; }
/// <summary>
/// Set whether to reclaim unused memory in the final output buffer
/// when a smaller size is requested than has been previously allocated.
/// Reset the rendering process.
/// </summary>
/// <param name="reclaimOnResize">True if reclaim, else false.</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : ReclaimOnResize ( bool reclaimOnResize )
{
ChangeVal ( [ & ] { m_ReclaimOnResize = reclaimOnResize ; } , FULL_RENDER ) ;
}
/// <summary>
/// Get whether to use transparency in the alpha channel.
/// This only applies when the number of channels is 4 and the output
/// image is Png.
/// Default: false.
/// </summary>
/// <returns>True if using transparency, else false.</returns>
template < typename T , typename bucketT > bool Renderer < T , bucketT > : : Transparency ( ) const { return m_Transparency ; }
/// <summary>
/// Set whether to use transparency in the alpha channel.
/// This only applies when the number of channels is 4 and the output
/// image is Png.
/// Set the render state to ACCUM_ONLY.
/// </summary>
/// <param name="transparency">True if using transparency, else false.</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : Transparency ( bool transparency )
{
ChangeVal ( [ & ] { m_Transparency = transparency ; } , ACCUM_ONLY ) ;
}
/// <summary>
/// Get the bytes per channel of the output image.
/// The only acceptable values are 1 and 2, and 2 is only
/// used when the output is Png.
/// Default: 1.
/// </summary>
/// <returns></returns>
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : BytesPerChannel ( ) const { return m_BytesPerChannel ; }
/// <summary>
/// Set the bytes per channel of the output image.
/// The only acceptable values are 1 and 2, and 2 is only
/// used when the output is Png.
/// Set the render state to ACCUM_ONLY.
/// </summary>
/// <param name="bytesPerChannel">The bytes per channel.</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : BytesPerChannel ( unsigned int bytesPerChannel )
{
ChangeVal ( [ & ]
{
if ( bytesPerChannel = = 0 | | bytesPerChannel > 2 )
m_BytesPerChannel = 1 ;
else
m_BytesPerChannel = bytesPerChannel ;
} , ACCUM_ONLY ) ;
}
/// <summary>
/// Get the pixel aspect ratio of the output image.
/// Default: 1.
/// </summary>
/// <returns>The pixel aspect ratio.</returns>
template < typename T , typename bucketT > T Renderer < T , bucketT > : : PixelAspectRatio ( ) const { return m_PixelAspectRatio ; }
/// <summary>
/// Set the pixel aspect ratio of the output image.
/// Reset the rendering process.
/// </summary>
/// <param name="pixelAspectRatio">The pixel aspect ratio.</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : PixelAspectRatio ( T pixelAspectRatio )
{
ChangeVal ( [ & ] { m_PixelAspectRatio = pixelAspectRatio ; } , FULL_RENDER ) ;
}
/// <summary>
/// Get the type of filter to use for preview renders during interactive rendering.
/// Using basic log scaling is quicker, but doesn't provide any bluring.
/// Full DE is much slower, but provides a more realistic preview of what the final image
/// will look like.
/// Default: FILTER_LOG.
/// </summary>
/// <returns>The type of filter to use</returns>
template < typename T , typename bucketT > eInteractiveFilter Renderer < T , bucketT > : : InteractiveFilter ( ) const { return m_InteractiveFilter ; }
/// <summary>
/// Set the type of filter to use for preview renders during interactive rendering.
/// Using basic log scaling is quicker, but doesn't provide any bluring.
/// Full DE is much slower, but provides a more realistic preview of what the final image
/// will look like.
/// Reset the rendering process.
/// </summary>
/// <param name="filter">The filter.</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : InteractiveFilter ( eInteractiveFilter filter )
{
ChangeVal ( [ & ] { m_InteractiveFilter = filter ; } , FULL_RENDER ) ;
}
/// <summary>
/// Non-virtual functions that might be needed by a derived class.
/// </summary>
/// <summary>
/// Prepare various values needed for producing a final output image.
/// </summary>
/// <param name="background">The computed background value, which may differ from the background member</param>
/// <param name="g">The computed gamma</param>
/// <param name="linRange">The computed linear range</param>
/// <param name="vibrancy">The computed vibrancy</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : PrepFinalAccumVals ( Color < T > & background , T & g , T & linRange , T & vibrancy )
{
//If they are doing incremental rendering, they can get here without doing a full temporal
//sample, which means the values will be zero.
vibrancy = m_Vibrancy = = 0 ? m_Ember . m_Vibrancy : m_Vibrancy ;
unsigned int vibGamCount = m_VibGamCount = = 0 ? 1 : m_VibGamCount ;
T gamma = m_Gamma = = 0 ? m_Ember . m_Gamma : m_Gamma ;
g = T ( 1.0 ) / ClampGte < T > ( gamma / vibGamCount , T ( 0.01 ) ) ; //Ensure a divide by zero doesn't occur.
linRange = GammaThresh ( ) ;
vibrancy / = vibGamCount ;
2014-09-01 00:25:15 -04:00
background . x = ( IsNearZero ( m_Background . r ) ? m_Ember . m_Background . r : m_Background . r ) / ( vibGamCount / T ( 256.0 ) ) ; //Background is [0, 1].
background . y = ( IsNearZero ( m_Background . g ) ? m_Ember . m_Background . g : m_Background . g ) / ( vibGamCount / T ( 256.0 ) ) ;
background . z = ( IsNearZero ( m_Background . b ) ? m_Ember . m_Background . b : m_Background . b ) / ( vibGamCount / T ( 256.0 ) ) ;
2014-07-08 03:11:14 -04:00
}
/// <summary>
/// Miscellaneous functions used only in this class.
/// </summary>
/// <summary>
/// Accumulate the samples to the histogram.
/// To be called after a sub batch is finished iterating.
/// </summary>
/// <param name="samples">The samples to accumulate</param>
/// <param name="sampleCount">The number of samples</param>
/// <param name="palette">The palette to use</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : Accumulate ( Point < T > * samples , unsigned int sampleCount , const Palette < bucketT > * palette )
{
unsigned int histIndex , intColorIndex , histSize = ( unsigned int ) m_HistBuckets . size ( ) ;
bucketT colorIndex , colorIndexFrac ;
const glm : : detail : : tvec4 < bucketT , glm : : defaultp > * dmap = & ( palette - > m_Entries [ 0 ] ) ;
//It's critical to understand what's going on here as it's one of the most important parts of the algorithm.
//A color value gets retrieved from the palette and
//its RGB values are added to the existing RGB values in the histogram bucket.
//Alpha is always 1 in the palettes, so that serves as the hit count.
//This differs from the original since redundantly adding both an alpha component and a hit count is omitted.
//This will eventually leave us with large values for pixels with many hits, which will be log scaled down later.
//Original used a function called bump_no_overflow(). Just do a straight add because the type will always be float or double.
//Doing so gives a 25% speed increase.
//Splitting these conditionals into separate loops makes no speed difference.
for ( unsigned int i = 0 ; i < sampleCount & & ! m_Abort ; i + + )
{
if ( Rotate ( ) ! = 0 )
{
T p00 = samples [ i ] . m_X - CenterX ( ) ;
T p11 = samples [ i ] . m_Y - CenterY ( ) ;
samples [ i ] . m_X = ( p00 * m_RotMat . A ( ) ) + ( p11 * m_RotMat . B ( ) ) + CenterX ( ) ;
samples [ i ] . m_Y = ( p00 * m_RotMat . D ( ) ) + ( p11 * m_RotMat . E ( ) ) + CenterY ( ) ;
}
//Checking this first before converting gives better performance than converting and checking a single value, which the original did.
//Second, an interesting optimization observation is that when keeping the bounds vars within m_CarToRas and calling its InBounds() member function,
//rather than here as members, about a 7% speedup is achieved. This is possibly due to the fact that data from m_CarToRas is accessed
//right after the call to Convert(), so some caching efficiencies get realized.
if ( m_CarToRas . InBounds ( samples [ i ] ) )
{
if ( samples [ i ] . m_VizAdjusted ! = 0 )
{
m_CarToRas . Convert ( samples [ i ] , histIndex ) ;
//There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test,
//but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform one final check before proceeding.
//This will result in a few points at the very edges getting discarded, but prevents a crash and doesn't seem to make a speed difference.
if ( histIndex < histSize )
{
//Linear is a linear scale for when the color index is not a whole number, which is most of the time.
//It uses a portion of the value of the index, and the remainder of the next index.
//Example: index = 25.7
//Fraction = 0.7
//Color = (dmap[25] * 0.3) + (dmap[26] * 0.7)
//Use overloaded addition and multiplication operators in vec4 to perform the accumulation.
if ( PaletteMode ( ) = = PALETTE_LINEAR )
{
colorIndex = ( bucketT ) samples [ i ] . m_ColorX * COLORMAP_LENGTH ;
intColorIndex = ( unsigned int ) colorIndex ;
if ( intColorIndex < 0 )
{
intColorIndex = 0 ;
colorIndexFrac = 0 ;
}
else if ( intColorIndex > = COLORMAP_LENGTH_MINUS_1 )
{
intColorIndex = COLORMAP_LENGTH_MINUS_1 - 1 ;
colorIndexFrac = 1 ;
}
else
{
colorIndexFrac = colorIndex - ( bucketT ) intColorIndex ; //Interpolate between intColorIndex and intColorIndex + 1.
}
if ( samples [ i ] . m_VizAdjusted = = 1 )
m_HistBuckets [ histIndex ] + = ( ( dmap [ intColorIndex ] * ( 1 - colorIndexFrac ) ) + ( dmap [ intColorIndex + 1 ] * colorIndexFrac ) ) ;
else
m_HistBuckets [ histIndex ] + = ( ( ( dmap [ intColorIndex ] * ( 1 - colorIndexFrac ) ) + ( dmap [ intColorIndex + 1 ] * colorIndexFrac ) ) * ( bucketT ) samples [ i ] . m_VizAdjusted ) ;
}
else if ( PaletteMode ( ) = = PALETTE_STEP )
{
intColorIndex = Clamp < unsigned int > ( ( unsigned int ) ( samples [ i ] . m_ColorX * COLORMAP_LENGTH ) , 0 , COLORMAP_LENGTH_MINUS_1 ) ;
if ( samples [ i ] . m_VizAdjusted = = 1 )
m_HistBuckets [ histIndex ] + = dmap [ intColorIndex ] ;
else
m_HistBuckets [ histIndex ] + = ( dmap [ intColorIndex ] * ( bucketT ) samples [ i ] . m_VizAdjusted ) ;
}
}
}
}
}
}
/// <summary>
/// Add a value to the density filtering buffer with a bounds check.
/// </summary>
/// <param name="bucket">The bucket being filtered</param>
/// <param name="i">The column of the bucket</param>
/// <param name="ii">The offset to add to the column</param>
/// <param name="j">The row of the bucket</param>
/// <param name="jj">The offset to add to the row</param>
template < typename T , typename bucketT >
void Renderer < T , bucketT > : : AddToAccum ( const glm : : detail : : tvec4 < bucketT , glm : : defaultp > & bucket , int i , int ii , int j , int jj )
{
if ( j + jj > = 0 & & j + jj < ( int ) m_SuperRasH & & i + ii > = 0 & & i + ii < ( int ) m_SuperRasW )
m_AccumulatorBuckets [ ( i + ii ) + ( ( j + jj ) * m_SuperRasW ) ] + = bucket ;
}
/// <summary>
/// Clip and gamma correct a pixel.
/// Because this code is used in both early and late clipping, a few extra arguments are passed
/// to specify what actions to take. Coupled with an additional template argument, this allows
/// using one function to perform all color clipping, gamma correction and final accumulation.
/// Template argument accumT is expected to match T for the case of early clipping, unsigned char for late clip for
/// images with one byte per channel and unsigned short for images with two bytes per channel.
/// </summary>
/// <param name="bucket">The pixel to correct</param>
/// <param name="background">The background color</param>
/// <param name="g">The gamma to use</param>
/// <param name="linRange">The linear range to use</param>
/// <param name="vibrancy">The vibrancy to use</param>
/// <param name="doAlpha">True if either early clip, or late clip with 4 channel output, else false.</param>
/// <param name="scale">True if late clip, else false.</param>
/// <param name="correctedChannels">The storage space for the corrected values to be written to</param>
template < typename T , typename bucketT >
template < typename accumT >
void Renderer < T , bucketT > : : GammaCorrection ( glm : : detail : : tvec4 < bucketT , glm : : defaultp > & bucket , Color < T > & background , T g , T linRange , T vibrancy , bool doAlpha , bool scale , accumT * correctedChannels )
{
T alpha , ls , a ;
bucketT newRgb [ 3 ] ; //Would normally use a Color<bucketT>, but don't want to call a needless constructor every time this function is called, which is once per pixel.
static T scaleVal = ( numeric_limits < accumT > : : max ( ) + 1 ) / T ( 256.0 ) ;
if ( bucket . a < = 0 )
{
alpha = 0 ;
ls = 0 ;
}
else
{
alpha = Palette < T > : : CalcAlpha ( bucket . a , g , linRange ) ;
ls = vibrancy * T ( 255 ) * alpha / bucket . a ;
ClampRef < T > ( alpha , 0 , 1 ) ;
}
2014-09-01 00:25:15 -04:00
Palette < T > : : template CalcNewRgb < bucketT > ( & bucket [ 0 ] , ls , HighlightPower ( ) , newRgb ) ;
2014-07-08 03:11:14 -04:00
for ( unsigned int rgbi = 0 ; rgbi < 3 ; rgbi + + )
{
a = newRgb [ rgbi ] + ( ( T ( 1.0 ) - vibrancy ) * T ( 255 ) * pow ( T ( bucket [ rgbi ] ) , g ) ) ;
if ( NumChannels ( ) < = 3 | | ! Transparency ( ) )
{
a + = ( ( T ( 1.0 ) - alpha ) * background [ rgbi ] ) ;
}
else
{
if ( alpha > 0 )
a / = alpha ;
else
a = 0 ;
}
if ( ! scale )
correctedChannels [ rgbi ] = ( accumT ) Clamp < T > ( a , 0 , 255 ) ; //Early clip, just assign directly.
else
correctedChannels [ rgbi ] = ( accumT ) ( Clamp < T > ( a , 0 , 255 ) * scaleVal ) ; //Final accum, multiply by 1 for 8 bpc, or 256 for 16 bpc.
}
if ( doAlpha )
{
if ( ! scale )
correctedChannels [ 3 ] = ( accumT ) alpha ; //Early clip, just assign alpha directly.
else if ( Transparency ( ) )
correctedChannels [ 3 ] = ( accumT ) ( alpha * numeric_limits < accumT > : : max ( ) ) ; //Final accum, 4 channels, using transparency. Scale alpha from 0-1 to 0-255 for 8 bpc or 0-65535 for 16 bpc.
else
correctedChannels [ 3 ] = numeric_limits < accumT > : : max ( ) ; //Final accum, 4 channels, but not using transparency. 255 for 8 bpc, 65535 for 16 bpc.
}
}
/// <summary>
/// Set the m_Iterator member to point to the appropriate
/// iterator based on whether the ember currently being rendered
/// contains xaos.
/// After assigning, initialize the xform selection buffer.
/// </summary>
/// <returns>True if assignment and distribution initialization succeeded, else false.</returns>
template < typename T , typename bucketT >
bool Renderer < T , bucketT > : : AssignIterator ( )
{
//Setup iterator and distributions.
//Both iterator types were setup in the constructor (add more in the future if needed).
//So simply assign the pointer to the correct type and re-initialize its distributions
//based on the current ember.
if ( XaosPresent ( ) )
m_Iterator = m_XaosIterator . get ( ) ;
else
m_Iterator = m_StandardIterator . get ( ) ;
//Timing t;
return m_Iterator - > InitDistributions ( m_Ember ) ;
//t.Toc("Distrib creation");
}
/// <summary>
/// Threading control.
/// </summary>
template < typename T , typename bucketT > void Renderer < T , bucketT > : : EnterRender ( ) { m_RenderingCs . Enter ( ) ; }
template < typename T , typename bucketT > void Renderer < T , bucketT > : : LeaveRender ( ) { m_RenderingCs . Leave ( ) ; }
template < typename T , typename bucketT > void Renderer < T , bucketT > : : EnterFinalAccum ( ) { m_FinalAccumCs . Enter ( ) ; m_InFinalAccum = true ; }
template < typename T , typename bucketT > void Renderer < T , bucketT > : : LeaveFinalAccum ( ) { m_FinalAccumCs . Leave ( ) ; m_InFinalAccum = false ; }
template < typename T , typename bucketT > void Renderer < T , bucketT > : : EnterResize ( ) { m_ResizeCs . Enter ( ) ; }
template < typename T , typename bucketT > void Renderer < T , bucketT > : : LeaveResize ( ) { m_ResizeCs . Leave ( ) ; }
template < typename T , typename bucketT > void Renderer < T , bucketT > : : Abort ( ) { m_Abort = true ; }
template < typename T , typename bucketT > bool Renderer < T , bucketT > : : Aborted ( ) { return m_Abort ; }
template < typename T , typename bucketT > bool Renderer < T , bucketT > : : InRender ( ) { return m_InRender ; }
template < typename T , typename bucketT > bool Renderer < T , bucketT > : : InFinalAccum ( ) { return m_InFinalAccum ; }
/// <summary>
/// Renderer properties, getters only.
/// </summary>
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : SuperRasW ( ) const { return m_SuperRasW ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : SuperRasH ( ) const { return m_SuperRasH ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : SuperSize ( ) const { return m_SuperSize ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : FinalBufferSize ( ) const { return FinalRowSize ( ) * FinalRasH ( ) ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : FinalRowSize ( ) const { return FinalRasW ( ) * PixelSize ( ) ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : FinalDimensions ( ) const { return FinalRasW ( ) * FinalRasH ( ) ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : PixelSize ( ) const { return NumChannels ( ) * BytesPerChannel ( ) ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : GutterWidth ( ) const { return m_GutterWidth ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : DensityFilterOffset ( ) const { return m_DensityFilterOffset ; }
template < typename T , typename bucketT > double Renderer < T , bucketT > : : ScaledQuality ( ) const { return m_ScaledQuality ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : Scale ( ) const { return m_Scale ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : PixelsPerUnitX ( ) const { return m_PixelsPerUnitX ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : PixelsPerUnitY ( ) const { return m_PixelsPerUnitY ; }
template < typename T , typename bucketT > double Renderer < T , bucketT > : : LowerLeftX ( bool gutter ) const { return gutter ? m_CarToRas . CarLlX ( ) : m_LowerLeftX ; }
template < typename T , typename bucketT > double Renderer < T , bucketT > : : LowerLeftY ( bool gutter ) const { return gutter ? m_CarToRas . CarLlY ( ) : m_LowerLeftY ; }
template < typename T , typename bucketT > double Renderer < T , bucketT > : : UpperRightX ( bool gutter ) const { return gutter ? m_CarToRas . CarUrX ( ) : m_UpperRightX ; }
template < typename T , typename bucketT > double Renderer < T , bucketT > : : UpperRightY ( bool gutter ) const { return gutter ? m_CarToRas . CarUrY ( ) : m_UpperRightY ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : K1 ( ) const { return m_K1 ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : K2 ( ) const { return m_K2 ; }
2014-09-01 00:25:15 -04:00
template < typename T , typename bucketT > uint64_t Renderer < T , bucketT > : : TotalIterCount ( ) const { return ( uint64_t ) ( ( uint64_t ) Round ( m_ScaledQuality ) * ( uint64_t ) FinalRasW ( ) * ( uint64_t ) FinalRasH ( ) ) ; } //Use Round() because there can be some roundoff error when interpolating.
template < typename T , typename bucketT > uint64_t Renderer < T , bucketT > : : ItersPerTemporalSample ( ) const { return ( uint64_t ) ceil ( double ( TotalIterCount ( ) ) / double ( Passes ( ) * TemporalSamples ( ) ) ) ; }
2014-07-08 03:11:14 -04:00
template < typename T , typename bucketT > eProcessState Renderer < T , bucketT > : : ProcessState ( ) const { return m_ProcessState ; }
template < typename T , typename bucketT > eProcessAction Renderer < T , bucketT > : : ProcessAction ( ) const { return m_ProcessAction ; }
template < typename T , typename bucketT > EmberStats Renderer < T , bucketT > : : Stats ( ) const { return m_Stats ; }
template < typename T , typename bucketT > const CarToRas < T > * Renderer < T , bucketT > : : CoordMap ( ) const { return & m_CarToRas ; }
template < typename T , typename bucketT > glm : : detail : : tvec4 < bucketT , glm : : defaultp > * Renderer < T , bucketT > : : HistBuckets ( ) { return m_HistBuckets . data ( ) ; }
template < typename T , typename bucketT > glm : : detail : : tvec4 < bucketT , glm : : defaultp > * Renderer < T , bucketT > : : AccumulatorBuckets ( ) { return m_AccumulatorBuckets . data ( ) ; }
template < typename T , typename bucketT > SpatialFilter < T > * Renderer < T , bucketT > : : GetSpatialFilter ( ) { return m_SpatialFilter . get ( ) ; }
template < typename T , typename bucketT > TemporalFilter < T > * Renderer < T , bucketT > : : GetTemporalFilter ( ) { return m_TemporalFilter . get ( ) ; }
template < typename T , typename bucketT > DensityFilter < T > * Renderer < T , bucketT > : : GetDensityFilter ( ) { return m_DensityFilter . get ( ) ; }
/// <summary>
/// Ember wrappers, getters only.
/// </summary>
template < typename T , typename bucketT > bool Renderer < T , bucketT > : : XaosPresent ( ) { return m_Ember . XaosPresent ( ) ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : FinalRasW ( ) const { return m_Ember . m_FinalRasW ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : FinalRasH ( ) const { return m_Ember . m_FinalRasH ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : Supersample ( ) const { return m_Ember . m_Supersample ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : Passes ( ) const { return m_Ember . m_Passes ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : TemporalSamples ( ) const { return m_Ember . m_TemporalSamples ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : PaletteIndex ( ) const { return m_Ember . PaletteIndex ( ) ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : Time ( ) const { return m_Ember . m_Time ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : Quality ( ) const { return m_Ember . m_Quality ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : SpatialFilterRadius ( ) const { return m_Ember . m_SpatialFilterRadius ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : PixelsPerUnit ( ) const { return m_Ember . m_PixelsPerUnit ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : Zoom ( ) const { return m_Ember . m_Zoom ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : CenterX ( ) const { return m_Ember . m_CenterX ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : CenterY ( ) const { return m_Ember . m_CenterY ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : Rotate ( ) const { return m_Ember . m_Rotate ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : Hue ( ) const { return m_Ember . m_Hue ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : Brightness ( ) const { return m_Ember . m_Brightness ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : Gamma ( ) const { return m_Ember . m_Gamma ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : Vibrancy ( ) const { return m_Ember . m_Vibrancy ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : GammaThresh ( ) const { return m_Ember . m_GammaThresh ; }
template < typename T , typename bucketT > T Renderer < T , bucketT > : : HighlightPower ( ) const { return m_Ember . m_HighlightPower ; }
template < typename T , typename bucketT > Color < T > Renderer < T , bucketT > : : Background ( ) const { return m_Ember . m_Background ; }
template < typename T , typename bucketT > const Xform < T > * Renderer < T , bucketT > : : Xforms ( ) const { return m_Ember . Xforms ( ) ; }
template < typename T , typename bucketT > Xform < T > * Renderer < T , bucketT > : : NonConstXforms ( ) { return m_Ember . NonConstXforms ( ) ; }
template < typename T , typename bucketT > unsigned int Renderer < T , bucketT > : : XformCount ( ) const { return m_Ember . XformCount ( ) ; }
template < typename T , typename bucketT > const Xform < T > * Renderer < T , bucketT > : : FinalXform ( ) const { return m_Ember . FinalXform ( ) ; }
template < typename T , typename bucketT > Xform < T > * Renderer < T , bucketT > : : NonConstFinalXform ( ) { return m_Ember . NonConstFinalXform ( ) ; }
template < typename T , typename bucketT > bool Renderer < T , bucketT > : : UseFinalXform ( ) const { return m_Ember . UseFinalXform ( ) ; }
template < typename T , typename bucketT > const Palette < T > * Renderer < T , bucketT > : : GetPalette ( ) const { return & m_Ember . m_Palette ; }
template < typename T , typename bucketT > ePaletteMode Renderer < T , bucketT > : : PaletteMode ( ) const { return m_Ember . m_PaletteMode ; }
/// <summary>
/// Iterator wrappers.
/// </summary>
template < typename T , typename bucketT > const unsigned char * Renderer < T , bucketT > : : XformDistributions ( ) const { return m_Iterator ! = NULL ? m_Iterator - > XformDistributions ( ) : NULL ; }
template < typename T , typename bucketT > const unsigned int Renderer < T , bucketT > : : XformDistributionsSize ( ) const { return m_Iterator ! = NULL ? m_Iterator - > XformDistributionsSize ( ) : 0 ; }
template < typename T , typename bucketT > Point < T > * Renderer < T , bucketT > : : Samples ( unsigned int threadIndex ) const { return threadIndex < m_Samples . size ( ) ? ( Point < T > * ) m_Samples [ threadIndex ] . data ( ) : NULL ; }
}