#include "EmberPch.h" #include "Renderer.h" namespace EmberNs { ///

/// Constructor that sets default values and allocates iterators. /// The thread count is set to the number of cores detected on the system. ///

template Renderer::Renderer() { m_Abort = false; m_LockAccum = false; m_EarlyClip = false; m_YAxisUp = false; m_InsertPalette = false; m_ReclaimOnResize = false; m_SubBatchSize = 1024 * 10; m_NumChannels = 3; m_BytesPerChannel = 1; m_SuperSize = 0; m_PixelAspectRatio = 1; m_Transparency = false; ThreadCount(Timing::ProcessorCount()); m_StandardIterator = auto_ptr>(new StandardIterator()); m_XaosIterator = auto_ptr>(new XaosIterator()); m_Iterator = m_StandardIterator.get(); m_Callback = NULL; m_ProgressParameter = NULL; m_LastPass = 0; m_LastTemporalSample = 0; m_LastIter = 0; m_LastIterPercent = 0; m_InteractiveFilter = FILTER_LOG; m_ProcessState = NONE; m_ProcessAction = FULL_RENDER; m_InRender = false; m_InFinalAccum = false; } ///

/// Virtual destructor so derived class destructors get called. ///

template Renderer::~Renderer() { } ///

/// Compute the bounds of the histogram and density filtering buffers. /// These are affected by the final requested dimensions, spatial and density /// filter sizes and supersampling. ///

template void Renderer::ComputeBounds() { unsigned int maxDEFilterWidth = 0; m_GutterWidth = ClampGte((m_SpatialFilter->FinalFilterWidth() - Supersample()) / 2, 0u); //Check the size of the density estimation filter. //If the radius of the density estimation filter is greater than the //gutter width, have to pad with more. Otherwise, use the same value. for (unsigned int i = 0; i < m_Embers.size(); i++) maxDEFilterWidth = max((unsigned int)(ceil(m_Embers[i].m_MaxRadDE) * m_Ember.m_Supersample), maxDEFilterWidth); //Need an extra ss = (int)floor(m_Supersample / 2.0) of pixels so that a local iteration count for DE can be determined.//SMOULDER if (maxDEFilterWidth > 0) maxDEFilterWidth += (unsigned int)Floor(m_Ember.m_Supersample / T(2)); //To have a fully present set of pixels for the spatial filter, must //add the DE filter width to the spatial filter width.//SMOULDER m_DensityFilterOffset = maxDEFilterWidth; m_GutterWidth += m_DensityFilterOffset; m_SuperRasW = (Supersample() * FinalRasW()) + (2 * m_GutterWidth); m_SuperRasH = (Supersample() * FinalRasH()) + (2 * m_GutterWidth); m_SuperSize = m_SuperRasW * m_SuperRasH; } ///

/// Compute the camera. /// This sets up the bounds of the cartesian plane that the raster bounds correspond to. /// This must be called after ComputeBounds() which sets up the raster bounds. ///

template void Renderer::ComputeCamera() { m_Scale = pow(T(2.0), Zoom()); m_ScaledQuality = Quality() * m_Scale * m_Scale; m_PixelsPerUnitX = PixelsPerUnit() * m_Scale; m_PixelsPerUnitY = m_PixelsPerUnitX; m_PixelsPerUnitX /= PixelAspectRatio(); T shift = 0; T t0 = T(m_GutterWidth) / (Supersample() * m_PixelsPerUnitX); T t1 = T(m_GutterWidth) / (Supersample() * m_PixelsPerUnitY); //These go from ll to ur, moving from negative to positive. m_LowerLeftX = CenterX() - FinalRasW() / m_PixelsPerUnitX / T(2.0); m_LowerLeftY = CenterY() - FinalRasH() / m_PixelsPerUnitY / T(2.0); m_UpperRightX = m_LowerLeftX + FinalRasW() / m_PixelsPerUnitX; m_UpperRightY = m_LowerLeftY + FinalRasH() / m_PixelsPerUnitY; T carLlX = m_LowerLeftX - t0; T carLlY = m_LowerLeftY - t1 + shift; T carUrX = m_UpperRightX + t0; T carUrY = m_UpperRightY + t1 + shift; m_RotMat.MakeID(); m_RotMat.Rotate(-Rotate()); m_CarToRas.Init(carLlX, carLlY, carUrX, carUrY, m_SuperRasW, m_SuperRasH, PixelAspectRatio()); } ///

/// Abort the render and call a function to do something, most likely change a value. /// Then update the current process action to the one specified. /// The current process action will only be set if it makes sense based /// on the current process state. If the value specified doesn't make sense /// the next best choice will be made. If nothing makes sense, a complete /// re-render will be triggered on the next call to Run(). ///

/// The function to execute /// The desired process action template void Renderer::ChangeVal(std::function func, eProcessAction action) { Abort(); EnterRender(); func(); //If they want a full render, don't bother inspecting process state, just start over. if (action == FULL_RENDER) { m_ProcessState = NONE; m_ProcessAction = FULL_RENDER; } //Keep iterating is when rendering has completed and the user increases the quality. //Rendering can be started where it left off by adding just the difference between the //new and old quality values. else if (action == KEEP_ITERATING) { if (m_ProcessState == ACCUM_DONE && TemporalSamples() == 1 && Passes() == 1) { m_ProcessState = ITER_STARTED; m_ProcessAction = KEEP_ITERATING; } else//Invaid process state to handle KEEP_ITERATING, so just start over. { m_ProcessState = NONE; m_ProcessAction = FULL_RENDER; } } else if (action == FILTER_AND_ACCUM) { //If in the middle of a render, cannot skip to filtering or accum, so just start over. if (m_ProcessState == NONE || m_ProcessState == ITER_STARTED) { m_ProcessState = NONE; m_ProcessAction = FULL_RENDER; } //If passes == 1, set the state to ITER_DONE and the next process action to FILTER_AND_ACCUM. else { m_ProcessState = Passes() == 1 ? ITER_DONE : NONE; m_ProcessAction = Passes() == 1 ? FILTER_AND_ACCUM : FULL_RENDER;//Cannot just filter if passes > 1 because filtering is done with each pass. } } //Run accum only. else if (action == ACCUM_ONLY) { //Doesn't make sense if in the middle of iterating, so just start over. if (m_ProcessState == NONE || m_ProcessState == ITER_STARTED) { m_ProcessAction = FULL_RENDER; } else if (m_ProcessState == ITER_DONE)//If iterating is done, can start at density filtering and proceed. { m_ProcessAction = FILTER_AND_ACCUM; } else if (m_ProcessState == FILTER_DONE)//Density filtering is done, so the process action is assigned as desired. { m_ProcessAction = ACCUM_ONLY; } else if (m_ProcessState == ACCUM_DONE)//Final accum is done, so back up and run final accum again. { m_ProcessState = FILTER_DONE; m_ProcessAction = ACCUM_ONLY; } } LeaveRender(); } ///

/// Set the current ember. /// This will also populate the vector of embers with a single element copy /// of the ember passed in. /// Temporal samples will be set to 1 since there's only a single ember. ///

/// The ember to assign /// The requested process action. Note that it's critical the user supply the proper value here. /// For example: Changing dimensions without setting action to FULL_RENDER will crash the program. /// However, changing only the brightness and setting action to ACCUM_ONLY is perfectly fine. /// template void Renderer::SetEmber(Ember& ember, eProcessAction action) { ChangeVal([&] { m_Embers.clear(); m_Embers.push_back(ember); m_Embers[0].m_TemporalSamples = 1;//Set temporal samples here to 1 because using the real value only makes sense when using a vector of Embers for animation. m_Ember = m_Embers[0]; }, action); } ///

/// Set the vector of embers and set the m_Ember member to a copy of the first element. /// Reset the rendering process. ///

/// The vector of embers template void Renderer::SetEmber(vector>& embers) { ChangeVal([&] { m_Embers = embers; if (!m_Embers.empty()) m_Ember = m_Embers[0]; }, FULL_RENDER); } ///

/// Add an ember to the end of the embers vector and reset the rendering process. /// Reset the rendering process. ///

/// The ember to add template void Renderer::AddEmber(Ember& ember) { ChangeVal([&] { m_Embers.push_back(ember); if (m_Embers.size() == 1) m_Ember = m_Embers[0]; }, FULL_RENDER); } ///

/// Create the temporal filter if the current filter parameters differ /// from the last temporal filter created. ///

/// True if a new filter instance was created, else false. /// True if the filter is not NULL (whether a new one was created or not), else false. template bool Renderer::CreateTemporalFilter(bool& newAlloc) { newAlloc = false; //Use intelligent testing so it isn't created every time a new ember is passed in. if ((!m_TemporalFilter.get()) || (m_Ember.m_Passes != m_TemporalFilter->Passes()) || (m_Ember.m_TemporalSamples != m_TemporalFilter->TemporalSamples()) || (m_Ember.m_TemporalFilterType != m_TemporalFilter->FilterType()) || (m_Ember.m_TemporalFilterWidth != m_TemporalFilter->FilterWidth()) || (m_Ember.m_TemporalFilterExp != m_TemporalFilter->FilterExp())) { m_TemporalFilter = auto_ptr>( TemporalFilterCreator::Create(m_Ember.m_TemporalFilterType, m_Ember.m_Passes, m_Ember.m_TemporalSamples, m_Ember.m_TemporalFilterWidth, m_Ember.m_TemporalFilterExp)); newAlloc = true; } return m_TemporalFilter.get() != NULL; } ///

/// Resize the passed in vector to be large enough to handle the output image. /// If m_ReclaimOnResize is true, and the vector is already larger than needed, /// it will be shrunk to the needed size. However if m_ReclaimOnResize is false, /// it will be left alone if already large enough. /// ComputeBounds() must be called before calling this function. ///

/// The vector to allocate /// True if the vector contains enough space to hold the output image template bool Renderer::PrepFinalAccumVector(vector& pixels) { EnterResize(); size_t size = FinalBufferSize(); if (m_ReclaimOnResize) { if (pixels.size() != size) { pixels.resize(size); pixels.shrink_to_fit(); } } else { if (pixels.size() < size) pixels.resize(size); } LeaveResize(); return pixels.size() >= size;//Ensure allocation went ok. } ///

/// The main render loop. This is the core of the algorithm. /// The processing steps are: Iterating, density filtering, final accumulation. /// Various functions in it are virtual so they will resolve /// to whatever overrides are provided in derived classes. This /// future-proofs the algorithm for GPU-based renderers. /// If the caller calls Abort() at any time, or the progress function returns 0, /// the entire rendering process will exit as soon as it can. /// The loop structure is: /// { /// Passes (Default 1) /// { /// Temporal Samples (Default 1 for single image) /// { /// Iterate (Either to completion or to a specified number of iterations) /// { /// } /// } /// } /// /// Density filtering (Basic log, or full density estimation) /// Final accumulation (Color correction and spatial filtering) /// } /// This loop structure has admittedly been severely butchered from what /// flam3 did. The reason is that it was made to support interactive rendering /// that can exit the process and pick up where it left off in response to the /// user changing values in a fractal flame GUI editor. /// To achieve this, each step in the rendering process is given an enumeration state /// as well as a goto label. This allows the renderer to pick up in the state it left /// off in if no changes prohibiting that have been made. /// It also allows for the bare minimum amount of processing needed to complete the requested /// action. For example, if the process has completed and the user only adjusts the brightness /// of the last rendered ember then there is no need to perform the entire iteration process /// over again. Rather, only final accumulation is needed. ///

/// Storage for the final image. It will be allocated if needed. /// The time if animating, else ignored. /// Run a specified number of sub batches. Default: 0, meaning run to completion. /// True to force rendering a complete image even if iterating is not complete, else don't. Default: false. /// Offset in finalImage to store the pixels to. Default: 0. /// True if nothing went wrong, else false. template eRenderStatus Renderer::Run(vector& finalImage, double time, unsigned int subBatchCountOverride, bool forceOutput, size_t finalOffset) { m_InRender = true; EnterRender(); m_Abort = false; bool filterAndAccumOnly = (m_ProcessAction == FILTER_AND_ACCUM && Passes() == 1); bool accumOnly = m_ProcessAction == ACCUM_ONLY; bool resume = m_ProcessState != NONE; bool newFilterAlloc; unsigned int temporalSample, pass; T deTime; eRenderStatus success = RENDER_OK; //double iterationTime = 0; //double accumulationTime = 0; //Timing it; //Reset timers and progress percent if: Beginning anew or only filtering and/or accumulating. if (!resume || accumOnly || filterAndAccumOnly) { if (!resume)//Only set this if it's the first run through. m_ProcessState = ITER_STARTED; m_RenderTimer.Tic(); m_ProgressTimer.Tic(); } if (!resume)//Beginning, reset everything. { m_LastPass = 0; m_LastTemporalSample = 0; m_LastIter = 0; m_LastIterPercent = 0; m_Stats.Clear(); m_Gamma = 0; m_Vibrancy = 0;//Accumulate these after each temporal sample. m_VibGamCount = 0; m_Background.Clear(); } //User requested an increase in quality after finishing. else if (m_ProcessState == ITER_STARTED && m_ProcessAction == KEEP_ITERATING && TemporalSamples() == 1 && Passes() == 1) { m_LastPass = 0; m_LastTemporalSample = 0; m_LastIter = m_Stats.m_Iters; m_LastIterPercent = 0;//Might skip a progress update, but shouldn't matter. m_Gamma = 0; m_Vibrancy = 0; m_VibGamCount = 0; m_Background.Clear(); } pass = (resume ? m_LastPass : 0); //Make sure values are within valid range. ClampGteRef(m_Ember.m_Passes, 1u); ClampGteRef(m_Ember.m_Supersample, 1u); //Make sure to get most recent update since loop won't be entered to call Interp(). //Vib, gam and background are normally summed for each temporal sample. However if iteration is skipped, make sure to get the latest. if ((filterAndAccumOnly || accumOnly) && TemporalSamples() == 1)//Disallow jumping when temporal samples > 1. { m_Ember = m_Embers[0]; m_Vibrancy = m_Ember.m_Vibrancy; m_Gamma = m_Ember.m_Gamma; m_Background = m_Ember.m_Background; if (filterAndAccumOnly) goto FilterAndAccum; if (accumOnly) goto AccumOnly; } //it.Tic(); //Interpolate. if (m_Embers.size() > 1) Interpolater::Interpolate(m_Embers, T(time), 0, m_Ember); //it.Toc("Interp 1"); //Save only for palette insertion. if (m_InsertPalette && BytesPerChannel() == 1) m_TempEmber = m_Ember; //Field would go here, however Ember omits it. Would need temps for width and height if ever implemented. CreateSpatialFilter(newFilterAlloc); CreateTemporalFilter(newFilterAlloc); ComputeBounds(); if (m_SpatialFilter.get() == NULL || m_TemporalFilter.get() == NULL) { m_ErrorReport.push_back("Spatial and temporal filter allocations failed, aborting.\n"); success = RENDER_ERROR; goto Finish; } if (!resume && !Alloc()) { m_ErrorReport.push_back("Histogram, accumulator and samples buffer allocations failed, aborting.\n"); success = RENDER_ERROR; goto Finish; } if (!resume) ResetBuckets(true, false);//Only reset hist here and do accum when needed later on. //Passes, outermost loop 1. for (; (pass < Passes()) && !m_Abort;) { deTime = T(time) + m_TemporalFilter->Deltas()[pass * m_Ember.m_TemporalSamples]; //Interpolate and get an ember for DE purposes. //Additional interpolation will be done in the temporal samples loop. //it.Tic(); if (m_Embers.size() > 1) Interpolater::Interpolate(m_Embers, deTime, 0, m_Ember); //it.Toc("Interp 2"); ClampGte(m_Ember.m_MinRadDE, 0); ClampGte(m_Ember.m_MaxRadDE, 0); if (!CreateDEFilter(newFilterAlloc)) { m_ErrorReport.push_back("Density filter creation failed, aborting.\n"); success = RENDER_ERROR; goto Finish; } //Temporal samples, loop 2. temporalSample = resume ? m_LastTemporalSample : 0; for (; (temporalSample < TemporalSamples()) && !m_Abort;) { T colorScalar = m_TemporalFilter->Filter()[pass * TemporalSamples() + temporalSample]; T temporalTime = T(time) + m_TemporalFilter->Deltas()[pass * TemporalSamples() + temporalSample]; //Interpolate again. //it.Tic(); if (m_Embers.size() > 1) Interpolater::Interpolate(m_Embers, temporalTime, 0, m_Ember);//This will perform all necessary precalcs via the ember/xform/variation assignment operators. //it.Toc("Interp 3"); if (!resume && !AssignIterator()) { m_ErrorReport.push_back("Iterator assignment failed, aborting.\n"); success = RENDER_ERROR; goto Finish; } ComputeCamera(); //For each temporal sample, the palette m_Dmap needs to be re-created with color scalar. 1 if no temporal samples. MakeDmap(colorScalar); //The actual number of times to iterate. Each thread will get (totalIters / ThreadCount) iters to do. //This is based on zoom and scale calculated in ComputeCamera(). //Note that the iter count is based on the final image dimensions, and not the super sampled dimensions. uint64_t totalIterCount = TotalIterCount(); uint64_t itersPerTemporalSample = ItersPerTemporalSample();//The total number of iterations for this temporal sample in this pass without overrides. uint64_t sampleItersToDo;//The number of iterations to actually do in this sample in this pass, considering overrides. if (subBatchCountOverride > 0) sampleItersToDo = subBatchCountOverride * SubBatchSize() * ThreadCount();//Run a specific number of sub batches. else sampleItersToDo = itersPerTemporalSample;//Run as many iters as specified to complete this temporal sample. sampleItersToDo = min(sampleItersToDo, itersPerTemporalSample - m_LastIter); EmberStats stats = Iterate(sampleItersToDo, pass, temporalSample);//The heavy work is done here. //If no iters were executed, something went catastrophically wrong. if (stats.m_Iters == 0) { m_ErrorReport.push_back("Zero iterations ran, rendering failed, aborting.\n"); success = RENDER_ERROR; Abort(); goto Finish; } if (m_Abort) { success = RENDER_ABORT; goto Finish; } //Accumulate stats whether this batch ran to completion or exited prematurely. m_LastIter += stats.m_Iters;//Sum of iter count of all threads, reset each temporal sample. m_Stats.m_Iters += stats.m_Iters;//Sum of iter count of all threads, cumulative from beginning to end. m_Stats.m_Badvals += stats.m_Badvals; m_Stats.m_IterMs += stats.m_IterMs; //After each temporal sample, accumulate these. //Allow for incremental rendering by only taking action if the iter loop for this temporal sample is completely done. if (m_LastIter >= itersPerTemporalSample) { m_Vibrancy += m_Ember.m_Vibrancy; m_Gamma += m_Ember.m_Gamma; m_Background.r += m_Ember.m_Background.r; m_Background.g += m_Ember.m_Background.g; m_Background.b += m_Ember.m_Background.b; m_VibGamCount++; m_LastIter = 0; temporalSample++; } m_LastTemporalSample = temporalSample; if (subBatchCountOverride > 0)//Don't keep going through this loop if only doing an incremental render. break; }//Temporal samples. //If we've completed all temporal samples and all passes, then it was a complete render, so report progress. if ((Passes() == 1 || pass == Passes() - 1) && (temporalSample >= TemporalSamples())) { m_ProcessState = ITER_DONE; if (m_Callback && !m_Callback->ProgressFunc(m_Ember, m_ProgressParameter, 100.0, 0, 0)) { Abort(); success = RENDER_ABORT; goto Finish; } } FilterAndAccum: if (filterAndAccumOnly || temporalSample >= TemporalSamples() || forceOutput) { //t.Toc("Iterating and accumulating"); //Compute k1 and k2. eRenderStatus fullRun = RENDER_OK;//Whether density filtering was run to completion without aborting prematurely or triggering an error. T passFilter = T(1) / T(Passes());//Original used an array, but every element in the array had the same value, so just use a single value here. T area = FinalRasW() * FinalRasH() / (m_PixelsPerUnitX * m_PixelsPerUnitY);//Need to use temps from field if ever implemented. m_K1 = (Brightness() * T(268.0) * passFilter) / 256; //When doing an interactive render, force output early on in the render process, before all iterations are done. //This presents a problem with the normal calculation of K2 since it relies on the quality value; it will scale the colors //to be very dark. Correct it by pretending the number of iters done is the exact quality desired and then scale according to that. if (forceOutput) { T quality = ((T)m_Stats.m_Iters / (T)FinalDimensions()) * (m_Scale * m_Scale); m_K2 = (Supersample() * Supersample() * Passes()) / (area * quality * m_TemporalFilter->SumFilt()); } else m_K2 = (Supersample() * Supersample() * Passes()) / (area * m_ScaledQuality * m_TemporalFilter->SumFilt()); if (filterAndAccumOnly || pass == 0) ResetBuckets(false, true);//Only the histogram was reset above, now reset the density filtering buffer. //t.Tic(); //Apply appropriate filter if iterating is complete. if (filterAndAccumOnly || temporalSample >= TemporalSamples()) { fullRun = m_DensityFilter.get() ? GaussianDensityFilter() : LogScaleDensityFilter(); } else { //Apply requested filter for a forced output during interactive rendering. if (m_DensityFilter.get() && m_InteractiveFilter == FILTER_DE) fullRun = GaussianDensityFilter(); else if (!m_DensityFilter.get() || m_InteractiveFilter == FILTER_LOG) fullRun = LogScaleDensityFilter(); } //Only update state if iterating and filtering finished completely (didn't arrive here via forceOutput). if (fullRun == RENDER_OK && m_ProcessState == ITER_DONE && (Passes() == 1 || pass == Passes() - 1)) m_ProcessState = FILTER_DONE; //Take special action if filtering exited prematurely. if (fullRun != RENDER_OK) { if (Passes() > 1)//Since all filtering is cummulative with passes > 1, must restart the entire process. { m_ProcessState = NONE; m_ProcessAction = FULL_RENDER; } ResetBuckets(false, true);//Reset the accumulator, come back and try again on the next call. success = fullRun; goto Finish; } if (m_Abort) { success = RENDER_ABORT; goto Finish; } //t.Toc("Density estimation filtering time: ", true); } //Only increment pass if the temporal samples loop has been completed, which could have been done incrementally. //Also skip if rendering jumped straight here after completely finishing beforehand. if (!filterAndAccumOnly && temporalSample >= TemporalSamples())//This may not work if filtering was prematurely exited. pass++; if (!filterAndAccumOnly) m_LastPass = pass; if (subBatchCountOverride > 0)//Don't keep going through this loop if only doing an incremental render. break; }//Passes. AccumOnly: if (m_ProcessState == FILTER_DONE || forceOutput) { if (m_Callback && !m_Callback->ProgressFunc(m_Ember, m_ProgressParameter, 0, 2, 0))//Original only allowed stages 0 and 1. Add 2 to mean final accum. { Abort(); success = RENDER_ABORT; goto Finish; } //Make sure a filter has been created. CreateSpatialFilter(newFilterAlloc); if (AccumulatorToFinalImage(finalImage, finalOffset) == RENDER_OK) { m_Stats.m_RenderMs = m_RenderTimer.Toc();//Record total time from the very beginning to the very end, including all intermediate calls. //Even though the ember changes throughought the inner loops because of interpolation, it's probably ok to assign here. //This will hold the last interpolated value (even though spatial and temporal filters were created based off of one of the first interpolated values). m_LastEmber = m_Ember; if (m_ProcessState == FILTER_DONE)//Only update state if gotten here legitimately, and not via forceOutput. { m_ProcessState = ACCUM_DONE; if (m_Callback && !m_Callback->ProgressFunc(m_Ember, m_ProgressParameter, 100.0, 2, 0))//Finished. { Abort(); success = RENDER_ABORT; goto Finish; } } } else { success = RENDER_ERROR; } } Finish: if (success == RENDER_OK && m_Abort)//If everything ran ok, but they've aborted, record abort as the status. success = RENDER_ABORT; else if (success != RENDER_OK)//Regardless of abort status, if there was an error, leave that as the return status. Abort(); LeaveRender(); m_InRender = false; return success; } ///

/// Return EmberImageComments object with image comments filled out. /// Run() should have completed before calling this. ///

/// The depth of the edit tags /// If true use integers instead of floating point numbers when embedding a non-hex formatted palette, else use floating point numbers. /// If true, embed a hexadecimal palette instead of Xml Color tags, else use Xml color tags. /// The EmberImageComments object with image comments filled out template EmberImageComments Renderer::ImageComments(unsigned int printEditDepth, bool intPalette, bool hexPalette) { ostringstream ss; EmberImageComments comments; ss.imbue(std::locale("")); comments.m_Genome = m_EmberToXml.ToString(m_Ember, "", printEditDepth, false, intPalette, hexPalette); ss << ((double)m_Stats.m_Badvals / (double)m_Stats.m_Iters);//Percentage of bad values to iters. comments.m_Badvals = ss.str(); ss.str(""); ss << m_Stats.m_Iters; comments.m_NumIters = ss.str(); ss.str("");//Total iters. ss << (m_Stats.m_RenderMs / 1000.0); comments.m_Runtime = ss.str();//Number of seconds for iterating, accumulating and filtering. return comments; } ///

/// Return the amount of memory needed to render the current ember. /// Optionally include the memory needed for the final output image. ///

/// If true include the memory needed for the final output image, else don't. /// The memory required to render the current ember template uint64_t Renderer::MemoryRequired(bool includeFinal) { bool newFilterAlloc = false; CreateSpatialFilter(newFilterAlloc); CreateTemporalFilter(newFilterAlloc); ComputeBounds(); //Because ComputeBounds() was called, this includes gutter. uint64_t histSize = SuperSize() * sizeof(glm::detail::tvec4); return (histSize * 2) + (includeFinal ? FinalBufferSize() : 0);//Multiply hist by 2 to account for the density filtering buffer which is the same size as the histogram. } ///

/// Virtual functions to be overriden in derived renderers that use the GPU. ///

///

/// The amount of RAM available to render with. ///

/// An unsigned 64-bit integer specifying how much memory is available template uint64_t Renderer::MemoryAvailable() { uint64_t memAvailable = 0; #ifdef WIN32 MEMORYSTATUSEX stat; stat.dwLength = sizeof(stat); GlobalMemoryStatusEx(&stat); memAvailable = stat.ullTotalPhys; #elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) memAvailable = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); #elif defined __APPLE__ #ifdef __LP64__ long physmem; size_t len = sizeof(physmem); static int mib[2] = { CTL_HW, HW_MEMSIZE }; #else unsigned int physmem; size_t len = sizeof(physmem); static int mib[2] = { CTL_HW, HW_PHYSMEM }; #endif if (sysctl(mib, 2, &physmem, &len, NULL, 0) == 0 && len == sizeof(physmem)) { memAvailable = physmem; } else { cout << "Warning: unable to determine physical memory." << endl; memAvailable = 4e9; } #else cout << "Warning: unable to determine physical memory." << endl; memAvailable = 4e9; #endif return memAvailable; } ///

/// Stop rendering, ensure all locks are exited and reset the rendering state. ///

template void Renderer::Reset() { Abort(); EnterRender(); EnterFinalAccum(); LeaveFinalAccum(); LeaveRender(); m_ProcessState = NONE; m_ProcessAction = FULL_RENDER; } ///

/// Get a status indicating whether this renderer is ok. /// Return true for this class, derived classes will inspect GPU hardware /// to determine if they are ok. ///

/// Always true for this class template bool Renderer::Ok() const { return true; } ///

/// Create the density filter if the current filter parameters differ /// from the last density filter created. /// The filter will be deleted if the max DE radius is 0, in which case regular /// log scale filtering will be used. ///

/// True if a new filter instance was created, else false. /// True if the filter is not NULL (whether a new one was created or not) or if max rad is 0, else false. template bool Renderer::CreateDEFilter(bool& newAlloc) { //If they wanted DE, create it if needed, else clear the last DE filter which means we'll do regular log filtering after iters are done. newAlloc = false; if (m_Ember.m_MaxRadDE > 0) { //Use intelligent testing so it isn't created every time a new ember is passed in. if ((!m_DensityFilter.get()) || (m_Ember.m_MinRadDE != m_DensityFilter->MinRad()) || (m_Ember.m_MaxRadDE != m_DensityFilter->MaxRad()) || (m_Ember.m_CurveDE != m_DensityFilter->Curve()) || (m_Ember.m_Supersample != m_DensityFilter->Supersample())) { m_DensityFilter = auto_ptr>(new DensityFilter(m_Ember.m_MinRadDE, m_Ember.m_MaxRadDE, m_Ember.m_CurveDE, m_Ember.m_Supersample)); newAlloc = true; } if (newAlloc) { if (!m_DensityFilter.get()) { return false; }//Did object creation succeed? if (!m_DensityFilter->Create()) { return false; }//Object creation succeeded, did filter creation succeed? //cout << m_DensityFilter->ToString() << endl; } else if (!m_DensityFilter->Valid()) { return false; }//Previously created, are values ok? } else { m_DensityFilter.reset();//They want to do log filtering. Return true because even though the filter is being deleted, nothing went wrong. } return true; } ///

/// Create the spatial filter if the current filter parameters differ /// from the last spatial filter created. ///

/// True if a new filter instance was created, else false. /// True if the filter is not NULL (whether a new one was created or not), else false. template bool Renderer::CreateSpatialFilter(bool& newAlloc) { newAlloc = false; //Use intelligent testing so it isn't created every time a new ember is passed in. if ((!m_SpatialFilter.get()) || (m_Ember.m_SpatialFilterType != m_SpatialFilter->FilterType()) || (m_Ember.m_SpatialFilterRadius != m_SpatialFilter->FilterRadius()) || (m_Ember.m_Supersample != m_SpatialFilter->Supersample()) || (m_PixelAspectRatio != m_SpatialFilter->PixelAspectRatio())) { m_SpatialFilter = auto_ptr>( SpatialFilterCreator::Create(m_Ember.m_SpatialFilterType, m_Ember.m_SpatialFilterRadius, m_Ember.m_Supersample, m_PixelAspectRatio)); newAlloc = true; } return m_SpatialFilter.get() != NULL; } ///

/// Get the sub batch size. This is the size of of the chunks that the iteration /// trajectory will be broken up into. /// Default: 10k. ///

/// The sub batch size template unsigned int Renderer::SubBatchSize() const { return m_SubBatchSize; } ///

/// Set the sub batch size. This is the size of of the chunks that the iteration /// trajectory will be broken up into. /// Reset the rendering process. ///

/// The sub batch size to set template void Renderer::SubBatchSize(unsigned int sbs) { ChangeVal([&] { m_SubBatchSize = sbs; }, FULL_RENDER); } ///

/// Get the number of channels per pixel in the output image. 3 for RGB images /// like Bitmap and Jpeg, 4 for Png. /// Default is 3. ///

/// The number of channels per pixel in the output image template unsigned int Renderer::NumChannels() const { return m_NumChannels; } ///

/// Set the number of channels per pixel in the output image. 3 for RGB images /// like Bitmap and Jpeg, 4 for Png. /// Default is 3. /// Set the render state to ACCUM_ONLY. ///

/// The number of channels per pixel in the output image template void Renderer::NumChannels(unsigned int numChannels) { ChangeVal([&] { m_NumChannels = numChannels; }, ACCUM_ONLY); } ///

/// Get the renderer type enum. /// CPU_RENDERER for this class, other values for derived classes. ///

/// CPU_RENDERER template eRendererType Renderer::RendererType() const { return CPU_RENDERER; } ///

/// Get the number of threads used when rendering. /// Default: use all avaliable cores. ///

/// The number of threads used when rendering template unsigned int Renderer::ThreadCount() const { return m_ThreadsToUse; } ///

/// Set the number of threads to use when rendering. /// This will also reset the vector of random contexts to be the same size /// as the number of specified threads. /// Since this is where they get set up, the caller can optionally pass in /// a seed string, however it's only used if threads is 1. /// This is useful for debugging since it will run the same point trajectory /// every time. /// Reset the rendering process. ///

/// The number of threads to use /// The seed string to use if threads is 1. Default: NULL. template void Renderer::ThreadCount(unsigned int threads, const char* seedString) { ChangeVal([&] { Timing t; unsigned int i, size; const unsigned int isaacSize = 1 << ISAAC_SIZE; ISAAC_INT seeds[isaacSize]; m_ThreadsToUse = threads > 0 ? threads : 1; m_Rand.clear(); m_SubBatch.clear(); m_SubBatch.resize(m_ThreadsToUse); m_BadVals.resize(m_ThreadsToUse); if (seedString) { memset(seeds, 0, isaacSize * sizeof(ISAAC_INT)); memcpy((char*)seeds, seedString, min(strlen(seedString), isaacSize * sizeof(ISAAC_INT))); } //This is critical for multithreading, otherwise the threads all happen //too close to each other in time, resulting in bad randomization. while (m_Rand.size() < m_ThreadsToUse) { size = (unsigned int)m_Rand.size(); if (seedString) { unsigned int newSize = size + 5 + (unsigned int)(t.Toc() + t.EndTime()); #ifdef ISAAC_FLAM3_DEBUG QTIsaac isaac(0, 0, 0, seeds); #else QTIsaac isaac(newSize, newSize * 2, newSize * 3, seeds); #endif m_Rand.push_back(isaac); for (i = 0; i < (isaacSize * sizeof(ISAAC_INT)); i++) ((unsigned char*)seeds)[i]++; } else { for (i = 0; i < isaacSize; i++) { t.Toc(); seeds[i] = (ISAAC_INT)(t.EndTime() * i) + (size + 1); } t.Toc(); ISAAC_INT r = (size * i) + i + (ISAAC_INT)t.EndTime(); QTIsaac isaac(r, r * 2, r * 3, seeds); m_Rand.push_back(isaac); } } }, FULL_RENDER); } ///

/// Set the callback object. ///

/// The callback object to set template void Renderer::Callback(RenderCallback* callback) { m_Callback = callback; } ///

/// Virtual functions to be overriden in derived renderers that use the GPU, but not accessed outside. ///

///

/// Make the final palette used for iteration. ///

/// The color scalar to multiply the ember's palette by template void Renderer::MakeDmap(T colorScalar) { m_Ember.m_Palette.template MakeDmap(m_Dmap, colorScalar); } ///

/// Allocate various buffers if the image dimensions, thread count, or sub batch size /// has changed. ///

/// True if success, else false template bool Renderer::Alloc() { bool b = true; bool lock = (m_SuperSize != m_HistBuckets.size()) || (m_SuperSize != m_AccumulatorBuckets.size()) || (m_ThreadsToUse != m_Samples.size()) || (m_Samples[0].size() != m_SubBatchSize); if (lock) EnterResize(); if (m_SuperSize != m_HistBuckets.size()) { m_HistBuckets.resize(m_SuperSize); if (m_ReclaimOnResize) m_HistBuckets.shrink_to_fit(); b &= (m_HistBuckets.size() == m_SuperSize); } if (m_SuperSize != m_AccumulatorBuckets.size()) { m_AccumulatorBuckets.resize(m_SuperSize); if (m_ReclaimOnResize) m_AccumulatorBuckets.shrink_to_fit(); b &= (m_AccumulatorBuckets.size() == m_SuperSize); } if (m_ThreadsToUse != m_Samples.size()) { m_Samples.resize(m_ThreadsToUse); if (m_ReclaimOnResize) m_Samples.shrink_to_fit(); b &= (m_Samples.size() == m_ThreadsToUse); } for (unsigned int i = 0; i < m_Samples.size(); i++) { if (m_Samples[i].size() != m_SubBatchSize) { m_Samples[i].resize(m_SubBatchSize); if (m_ReclaimOnResize) m_Samples[i].shrink_to_fit(); b &= (m_Samples[i].size() == m_SubBatchSize); } } if (lock) LeaveResize(); return b; } ///

/// Clear histogram and/or density filtering buffers to all zeroes. ///

/// Clear histogram if true, else don't. /// Clear density filtering buffer if true, else don't. /// True if anything was cleared, else false. template bool Renderer::ResetBuckets(bool resetHist, bool resetAccum) { //parallel_invoke( //[&] //{ if (resetHist && !m_HistBuckets.empty()) memset((void*)m_HistBuckets.data(), 0, m_HistBuckets.size() * sizeof(m_HistBuckets[0])); //}, //[&] //{ if (resetAccum && !m_AccumulatorBuckets.empty()) memset(m_AccumulatorBuckets.data(), 0, m_AccumulatorBuckets.size() * sizeof(m_AccumulatorBuckets[0])); //}); return resetHist || resetAccum; } ///

/// Perform log scale density filtering. /// Base case for simple log scale density estimation as discussed (mostly) in the paper /// in section 4, p. 6-9. ///

/// True if not prematurely aborted, else false. template eRenderStatus Renderer::LogScaleDensityFilter() { unsigned int startRow = 0; unsigned int endRow = m_SuperRasH; unsigned int startCol = 0; unsigned int endCol = m_SuperRasW; //Timing t(4); //Original didn't parallelize this, doing so gives a 50-75% speedup. //If there is only one pass, the value can be directly assigned, which is quicker than summing. if (Passes() == 1) { parallel_for(startRow, endRow, [&] (unsigned int j) { unsigned int row = j * m_SuperRasW; //__m128 logm128;//Figure out SSE at some point. //__m128 bucketm128; //__m128 scaledBucket128; for (unsigned int i = startCol; (i < endCol) && !m_Abort; i++) { unsigned int index = row + i; //Check for visibility first before doing anything else to avoid all possible unnecessary calculations. if (m_HistBuckets[index].a != 0) { T logScale = (m_K1 * log(1 + m_HistBuckets[index].a * m_K2)) / m_HistBuckets[index].a; //Original did a temporary assignment, then *= logScale, then passed the result to bump_no_overflow(). //Combine here into one operation for a slight speedup. m_AccumulatorBuckets[index] = (m_HistBuckets[index] * (bucketT)logScale); } } }); } else//Passes > 1, so sum. { parallel_for(startRow, endRow, [&] (unsigned int j) { unsigned int row = j * m_SuperRasW; for (unsigned int i = startCol; (i < endCol) && !m_Abort; i++) { unsigned int index = row + i; //Check for visibility first before doing anything else to avoid all possible unnecessary calculations. if (m_HistBuckets[index].a != 0) { //Figure out SSE at some point. //__declspec(align(16)) T logScale = (m_K1 * log(1 + m_HistBuckets[index].a * m_K2)) / m_HistBuckets[index].a; //logm128 = _mm_load1_ps(&logScale); //bucketm128 = _mm_load_ps(m_HistBuckets[index].Channels); //scaledBucket128 = _mm_mul_ps(logm128, bucketm128); m_AccumulatorBuckets[index] += (m_HistBuckets[index] * bucketT(logScale)); } } }); } //t.Toc(__FUNCTION__); return m_Abort ? RENDER_ABORT : RENDER_OK; } ///

/// Perform the more advanced Gaussian density filter. /// More advanced density estimation filtering given less mention in the paper, but used /// much more in practice as it gives the best results. /// Section 8, p. 11-13. ///

/// True if not prematurely aborted, else false. template eRenderStatus Renderer::GaussianDensityFilter() { Timing totalTime, localTime; int scf = !(Supersample() & 1); unsigned int ss = Floor(Supersample() / T(2)); T scfact = pow(Supersample() / (Supersample() + T(1.0)), T(2.0)); unsigned int threads = m_ThreadsToUse; unsigned int startRow = Supersample() - 1; unsigned int endRow = m_SuperRasH - (Supersample() - 1);//Original did + which is most likely wrong. unsigned int startCol = Supersample() - 1; unsigned int endCol = m_SuperRasW - (Supersample() - 1); unsigned int chunkSize = (unsigned int)ceil(double(endRow - startRow) / double(threads)); //parallel_for scales very well, dividing the work almost perfectly among all processors. parallel_for((unsigned int)0, threads, [&] (unsigned int threadIndex) { unsigned int pixelNumber = 0; unsigned int localStartRow = min(startRow + (threadIndex * chunkSize), endRow - 1); unsigned int localEndRow = min(localStartRow + chunkSize, endRow); unsigned int pixelsThisThread = (localEndRow - localStartRow) * m_SuperRasW; double lastPercent = 0; glm::detail::tvec4 logScaleBucket; for (unsigned int j = localStartRow; (j < localEndRow) && !m_Abort; j++) { unsigned int bucketRowStart = j * m_SuperRasW;//Pull out of inner loop for optimization. const glm::detail::tvec4* bucket; const glm::detail::tvec4* buckets = m_HistBuckets.data(); const T* filterCoefs = m_DensityFilter->Coefs(); const T* filterWidths = m_DensityFilter->Widths(); for (unsigned int i = startCol; i < endCol; i++) { int ii, jj, arrFilterWidth; unsigned int filterSelectInt, filterCoefIndex; T filterSelect = 0; bucket = buckets + bucketRowStart + i; //Don't do anything if there's no hits here. Must also put this first to avoid dividing by zero below. if (bucket->a == 0) continue; T cacheLog = (m_K1 * log(T(1.0) + bucket->a * m_K2)) / bucket->a;//Caching this calculation gives a 30% speedup. if (ss == 0) { filterSelect = bucket->a; } else { //The original contained a glaring flaw as it would run past the boundaries of the buffers //when calculating the density for a box centered on the last row or column. //Clamp here to not run over the edge. int densityBoxLeftX = i - min(i, ss); int densityBoxRightX = i + min(ss, m_SuperRasW - i - 1); int densityBoxTopY = j - min(j, ss); int densityBoxBottomY = j + min(ss, m_SuperRasH - j - 1); //Count density in ssxss area. //Original went one col at a time, which is cache inefficient. Go one row at at time here for a slight speedup. for (jj = densityBoxTopY; jj <= densityBoxBottomY; jj++) for (ii = densityBoxLeftX; ii <= densityBoxRightX; ii++) filterSelect += buckets[ii + (jj * m_SuperRasW)].a;//Original divided by 255 in every iteration. Omit here because colors are already in the range of [0..1]. } //Scale if supersample > 1 for equal iters. if (scf) filterSelect *= scfact; if (filterSelect > m_DensityFilter->MaxFilteredCounts()) filterSelectInt = m_DensityFilter->MaxFilterIndex(); else if (filterSelect <= DE_THRESH) filterSelectInt = (int)ceil(filterSelect) - 1; else filterSelectInt = (int)DE_THRESH + Floor(pow(filterSelect - DE_THRESH, m_DensityFilter->Curve())); //If the filter selected below the min specified clamp it to the min. if (filterSelectInt > m_DensityFilter->MaxFilterIndex()) filterSelectInt = m_DensityFilter->MaxFilterIndex(); //Only have to calculate the values for ~1/8 of the square. filterCoefIndex = filterSelectInt * m_DensityFilter->KernelSize(); arrFilterWidth = (int)ceil(filterWidths[filterSelectInt]) - 1; for (jj = 0; jj <= arrFilterWidth; jj++) { for (ii = 0; ii <= jj; ii++, filterCoefIndex++) { //Skip if coef is 0. if (filterCoefs[filterCoefIndex] == 0) continue; T logScale = filterCoefs[filterCoefIndex] * cacheLog; //Original first assigned the fields, then scaled them. Combine into a single step for a 1% optimization. logScaleBucket = (*bucket * bucketT(logScale)); if (jj == 0 && ii == 0) { AddToAccum(logScaleBucket, i, ii, j, jj); } else if (ii == 0) { AddToAccum(logScaleBucket, i, 0, j, -jj); AddToAccum(logScaleBucket, i, -jj, j, 0); AddToAccum(logScaleBucket, i, jj, j, 0); AddToAccum(logScaleBucket, i, 0, j, jj); } else if (jj == ii) { AddToAccum(logScaleBucket, i, -ii, j, -jj); AddToAccum(logScaleBucket, i, ii, j, -jj); AddToAccum(logScaleBucket, i, -ii, j, jj); AddToAccum(logScaleBucket, i, ii, j, jj); } else { //Attempting to optimize cache access by putting these in order makes no difference, even on large images, but do it anyway. AddToAccum(logScaleBucket, i, -ii, j, -jj); AddToAccum(logScaleBucket, i, ii, j, -jj); AddToAccum(logScaleBucket, i, -jj, j, -ii); AddToAccum(logScaleBucket, i, jj, j, -ii); AddToAccum(logScaleBucket, i, -jj, j, ii); AddToAccum(logScaleBucket, i, jj, j, ii); AddToAccum(logScaleBucket, i, -ii, j, jj); AddToAccum(logScaleBucket, i, ii, j, jj); } } } } if (m_Callback && threadIndex == 0) { pixelNumber += m_SuperRasW; double percent = (double(pixelNumber) / double(pixelsThisThread)) * 100.0; double percentDiff = percent - lastPercent; double toc = localTime.Toc(); if (percentDiff >= 10 || (toc > 1000 && percentDiff >= 1)) { double etaMs = ((100.0 - percent) / percent) * totalTime.Toc(); if (!m_Callback->ProgressFunc(m_Ember, m_ProgressParameter, percent, 1, etaMs)) Abort(); lastPercent = percent; localTime.Tic(); } } } }); if (m_Callback && !m_Abort) m_Callback->ProgressFunc(m_Ember, m_ProgressParameter, 100.0, 1, 0); //totalTime.Toc(__FUNCTION__); return m_Abort ? RENDER_ABORT : RENDER_OK; } ///

/// Thin wrapper around AccumulatorToFinalImage(). ///

/// The pixel vector to allocate and store the final image in /// Offset in the buffer to store the pixels to /// True if not prematurely aborted, else false. template eRenderStatus Renderer::AccumulatorToFinalImage(vector& pixels, size_t finalOffset) { if (PrepFinalAccumVector(pixels)) return AccumulatorToFinalImage(pixels.data(), finalOffset); return RENDER_ERROR; } ///

/// Produce a final, visible image by clipping, gamma correcting and spatial filtering the color values /// in the density filtering buffer and save to the passed in buffer. ///

/// The pre-allocated pixel buffer to store the final image in /// Offset in the buffer to store the pixels to. Default: 0. /// True if not prematurely aborted, else false. template eRenderStatus Renderer::AccumulatorToFinalImage(unsigned char* pixels, size_t finalOffset) { if (!pixels) return RENDER_ERROR; EnterFinalAccum(); //Timing t(4); unsigned int filterWidth = m_SpatialFilter->FinalFilterWidth(); T g, linRange, vibrancy; Color background; pixels += finalOffset; PrepFinalAccumVals(background, g, linRange, vibrancy); //If early clip, go through the entire accumulator and perform gamma correction first. //The original does it this way as well and it's roughly 11 times faster to do it this way than inline below with each pixel. if (EarlyClip()) { parallel_for((unsigned int)0, m_SuperRasH, [&] (unsigned int j) { unsigned int rowStart = j * m_SuperRasW;//Pull out of inner loop for optimization. for (unsigned int i = 0; i < m_SuperRasW && !m_Abort; i++) { GammaCorrection(m_AccumulatorBuckets[i + rowStart], background, g, linRange, vibrancy, true, false, &(m_AccumulatorBuckets[i + rowStart][0]));//Write back in place. } }); } if (m_Abort) { LeaveFinalAccum(); return RENDER_ABORT; } //Note that abort is not checked here. The final accumulation must run to completion //otherwise artifacts that resemble page tearing will occur in an interactive run. It's //critical to never exit this loop prematurely. //for (unsigned int j = 0; j < FinalRasH(); j++)//Keep around for debugging. parallel_for((unsigned int)0, FinalRasH(), [&] (unsigned int j) { Color newBucket; int pixelsRowStart = (m_YAxisUp ? ((FinalRasH() - j) - 1) : j) * FinalRowSize();//Pull out of inner loop for optimization. unsigned int y = m_DensityFilterOffset + (j * Supersample());//Start at the beginning row of each super sample block. unsigned short* p16; for (unsigned int i = 0; i < FinalRasW(); i++, pixelsRowStart += PixelSize()) { unsigned int ii, jj; unsigned int x = m_DensityFilterOffset + (i * Supersample());//Start at the beginning column of each super sample block. newBucket.Clear(); //Original was iterating column-wise, which is slow. //Here, iterate one row at a time, giving a 10% speed increase. for (jj = 0; jj < filterWidth; jj++) { unsigned int filterKRowIndex = jj * filterWidth; unsigned int accumRowIndex = (y + jj) * m_SuperRasW;//Pull out of inner loop for optimization. for (ii = 0; ii < filterWidth; ii++) { //Need to dereference the spatial filter pointer object to use the [] operator. Makes no speed difference. bucketT k = bucketT((*m_SpatialFilter)[ii + filterKRowIndex]); newBucket += (m_AccumulatorBuckets[(x + ii) + accumRowIndex] * k); } } if (BytesPerChannel() == 2) { p16 = (unsigned short*)(pixels + pixelsRowStart); if (EarlyClip()) { p16[0] = (unsigned short)(Clamp(newBucket.r, 0, 255) * bucketT(256)); p16[1] = (unsigned short)(Clamp(newBucket.g, 0, 255) * bucketT(256)); p16[2] = (unsigned short)(Clamp(newBucket.b, 0, 255) * bucketT(256)); if (NumChannels() > 3) { if (Transparency()) p16[3] = (unsigned char)(Clamp(newBucket.a, 0, 1) * bucketT(65535.0)); else p16[3] = 65535; } } else { GammaCorrection(*(glm::detail::tvec4*)(&newBucket), background, g, linRange, vibrancy, NumChannels() > 3, true, p16); } } else { if (EarlyClip()) { pixels[pixelsRowStart] = (unsigned char)Clamp(newBucket.r, 0, 255); pixels[pixelsRowStart + 1] = (unsigned char)Clamp(newBucket.g, 0, 255); pixels[pixelsRowStart + 2] = (unsigned char)Clamp(newBucket.b, 0, 255); if (NumChannels() > 3) { if (Transparency()) pixels[pixelsRowStart + 3] = (unsigned char)(Clamp(newBucket.a, 0, 1) * bucketT(255.0)); else pixels[pixelsRowStart + 3] = 255; } } else { GammaCorrection(*(glm::detail::tvec4*)(&newBucket), background, g, linRange, vibrancy, NumChannels() > 3, true, pixels + pixelsRowStart); } } } }); //Insert the palette into the image for debugging purposes. Only works with 8bpc. if (m_InsertPalette && BytesPerChannel() == 1) { unsigned int i, j, ph = 100; if (ph >= FinalRasH()) ph = FinalRasH(); for (j = 0; j < ph; j++) { for (i = 0; i < FinalRasW(); i++) { unsigned char* p = pixels + (NumChannels() * (i + j * FinalRasW())); p[0] = (unsigned char)(m_TempEmber.m_Palette[i * 256 / FinalRasW()][0] * WHITE);//The palette is [0..1], output image is [0..255]. p[1] = (unsigned char)(m_TempEmber.m_Palette[i * 256 / FinalRasW()][1] * WHITE); p[2] = (unsigned char)(m_TempEmber.m_Palette[i * 256 / FinalRasW()][2] * WHITE); } } } //t.Toc(__FUNCTION__); LeaveFinalAccum(); return m_Abort ? RENDER_ABORT : RENDER_OK; } //#define TG 1 //#define NEWSUBBATCH 1 ///

/// Run the iteration algorithm for the specified number of iterations. /// This is only called after all other setup has been done. /// This function will be called multiple times for an interactive rendering, and /// once for a straight through render. /// The iteration is reset and fused in each thread after each sub batch is done /// which by default is 10,000 iterations. ///

/// The number of iterations to run /// The pass this is running for /// The temporal sample within the current pass this is running for /// Rendering statistics template EmberStats Renderer::Iterate(uint64_t iterCount, unsigned int pass, unsigned int temporalSample) { //Timing t2(4); m_IterTimer.Tic(); unsigned int fuse = EarlyClip() ? 100 : 15;//EarlyClip was one way of detecting a later version of flam3, so it used 100 which is a better value. uint64_t totalItersPerThread = (uint64_t)ceil((double)iterCount / (double)m_ThreadsToUse); double percent, etaMs; EmberStats stats; #ifdef TG unsigned int threadIndex; for (unsigned int i = 0; i < m_ThreadsToUse; i++) { threadIndex = i; m_TaskGroup.run([&, threadIndex] () { #else parallel_for((unsigned int)0, m_ThreadsToUse, [&] (unsigned int threadIndex) { #endif Timing t; uint64_t subBatchSize = (unsigned int)min(totalItersPerThread, (uint64_t)m_SubBatchSize); m_BadVals[threadIndex] = 0; //Sub batch iterations, loop 3. for (m_SubBatch[threadIndex] = 0; (m_SubBatch[threadIndex] < totalItersPerThread) && !m_Abort; m_SubBatch[threadIndex] += subBatchSize) { //Must recalculate the number of iters to run on each sub batch because the last batch will most likely have less than m_SubBatchSize iters. //For example, if 51,000 are requested, and the sbs is 10,000, it should run 5 sub batches of 10,000 iters, and one final sub batch of 1,000 iters. subBatchSize = min(subBatchSize, totalItersPerThread - m_SubBatch[threadIndex]); //Use first as random point, the rest are iterated points. //Note that this gets reset with a new random point for each subBatchSize iterations. //This helps correct if iteration happens to be on a bad trajectory. m_Samples[threadIndex][0].m_X = m_Rand[threadIndex].Frand11(); m_Samples[threadIndex][0].m_Y = m_Rand[threadIndex].Frand11(); m_Samples[threadIndex][0].m_Z = 0;//m_Ember.m_CamZPos;//Apo set this to 0, then made the user use special variations to kick it. It seems easier to just set it to zpos. m_Samples[threadIndex][0].m_ColorX = m_Rand[threadIndex].Frand01(); //Finally, iterate. //t.Tic(); //Iterating, loop 4. m_BadVals[threadIndex] += (uint64_t)m_Iterator->Iterate(m_Ember, (uint32_t)subBatchSize, fuse, m_Samples[threadIndex].data(), m_Rand[threadIndex]); //iterationTime += t.Toc(); if (m_LockAccum) m_AccumCs.Enter(); //t.Tic(); //Map temp buffer samples into the histogram using the palette for color. Accumulate(m_Samples[threadIndex].data(), (unsigned int)subBatchSize, &m_Dmap); //accumulationTime += t.Toc(); if (m_LockAccum) m_AccumCs.Leave(); if (m_Callback && threadIndex == 0) { percent = 100.0 * double ( double ( double ( double ( //Takes progress of current thread and multiplies by thread count. //This assumes the threads progress at roughly the same speed. double(m_LastIter + (m_SubBatch[threadIndex] * m_ThreadsToUse)) / double(ItersPerTemporalSample()) ) + temporalSample ) / (double)TemporalSamples() ) + (double)pass ) / (double)Passes(); double percentDiff = percent - m_LastIterPercent; double toc = m_ProgressTimer.Toc(); if (percentDiff >= 10 || (toc > 1000 && percentDiff >= 1))//Call callback function if either 10% has passed, or one second (and 1%). { etaMs = ((100.0 - percent) / percent) * m_RenderTimer.Toc(); if (!m_Callback->ProgressFunc(m_Ember, m_ProgressParameter, percent, 0, etaMs)) Abort(); m_LastIterPercent = percent; m_ProgressTimer.Tic(); } } } }); #ifdef TG } m_TaskGroup.wait(); #endif stats.m_Iters = std::accumulate(m_SubBatch.begin(), m_SubBatch.end(), 0ULL);//Sum of iter count of all threads. stats.m_Badvals = std::accumulate(m_BadVals.begin(), m_BadVals.end(), 0ULL); stats.m_IterMs = m_IterTimer.Toc(); //t2.Toc(__FUNCTION__); return stats; } ///

/// Accessors for render properties. ///

///

/// Get a copy of the vector of random contexts. /// Useful for debugging because the returned vector can be used for future renders to /// produce the exact same output. ///

/// The vector of random contexts to assign template vector> Renderer::RandVec() { return m_Rand; }; ///

/// Set the vector of random contexts. /// Assignment will only take place if the size of the vector matches /// the number of threads used for rendering. /// Reset the rendering process. ///

/// The vector of random contexts to assign /// True if the size of the vector matched the number of threads used for rendering, else false. template bool Renderer::RandVec(vector>& randVec) { bool b = false; if (randVec.size() == ThreadCount()) { ChangeVal([&] { m_Rand = randVec; b = true; }, FULL_RENDER); } return b; }; ///

/// Get whether the histogram is locked during accumulation. /// This is to prevent two threads from writing to the same histogram /// bucket at once. /// The current implementation matches flam3 and is very innefficient /// to the point of negating any gains gotten from multi-threading. /// Future workarounds may be tried in the future. /// Default: false. ///

/// True if the histogram is locked during accumulation, else false. template bool Renderer::LockAccum() const { return m_LockAccum; } ///

/// Set whether the histogram is locked during accumulation. /// This is to prevent two threads from writing to the same histogram /// bucket at once. /// The current implementation matches flam3 and is very innefficient /// to the point of negating any gains gotten from multi-threading. /// Different workarounds may be tried in the future. /// Reset the rendering process. ///

/// True if the histogram should be locked when accumulating, else false template void Renderer::LockAccum(bool lockAccum) { ChangeVal([&] { m_LockAccum = lockAccum; }, FULL_RENDER); } ///

/// Get whether color clipping and gamma correction is done before /// or after spatial filtering. /// Default: false. ///

/// True if early clip, else false. template bool Renderer::EarlyClip() const { return m_EarlyClip; } ///

/// Set whether color clipping and gamma correction is done before /// or after spatial filtering. /// Set the render state to FILTER_AND_ACCUM. ///

/// True if early clip, else false. template void Renderer::EarlyClip(bool earlyClip) { ChangeVal([&] { m_EarlyClip = earlyClip; }, FILTER_AND_ACCUM); } ///

/// Get whether the positive Y coordinate of the final output image is up. /// Default: false. ///

/// True if up, else false. template bool Renderer::YAxisUp() const { return m_YAxisUp; } ///

/// Set whether the positive Y axis of the final output image is up. ///

/// True if the positive y axis is up, else false. template void Renderer::YAxisUp(bool yup) { ChangeVal([&] { m_YAxisUp = yup; }, ACCUM_ONLY); } ///

/// Get whether to insert the palette as a block of colors in the final output image. /// This is useful for debugging palette issues. /// Default: 1. ///

/// True if inserting the palette, else false. template bool Renderer::InsertPalette() const { return m_InsertPalette; } ///

/// Set whether to insert the palette as a block of colors in the final output image. /// This is useful for debugging palette issues. /// Set the render state to ACCUM_ONLY. ///

/// True if inserting the palette, else false. template void Renderer::InsertPalette(bool insertPalette) { ChangeVal([&] { m_InsertPalette = insertPalette; }, ACCUM_ONLY); } ///

/// Get whether to reclaim unused memory in the final output buffer /// when a smaller size is requested than has been previously allocated. /// Default: false. ///

/// True if reclaim, else false. template bool Renderer::ReclaimOnResize() const { return m_ReclaimOnResize; } ///

/// Set whether to reclaim unused memory in the final output buffer /// when a smaller size is requested than has been previously allocated. /// Reset the rendering process. ///

/// True if reclaim, else false. template void Renderer::ReclaimOnResize(bool reclaimOnResize) { ChangeVal([&] { m_ReclaimOnResize = reclaimOnResize; }, FULL_RENDER); } ///

/// Get whether to use transparency in the alpha channel. /// This only applies when the number of channels is 4 and the output /// image is Png. /// Default: false. ///

/// True if using transparency, else false. template bool Renderer::Transparency() const { return m_Transparency; } ///

/// Set whether to use transparency in the alpha channel. /// This only applies when the number of channels is 4 and the output /// image is Png. /// Set the render state to ACCUM_ONLY. ///

/// True if using transparency, else false. template void Renderer::Transparency(bool transparency) { ChangeVal([&] { m_Transparency = transparency; }, ACCUM_ONLY); } ///

/// Get the bytes per channel of the output image. /// The only acceptable values are 1 and 2, and 2 is only /// used when the output is Png. /// Default: 1. ///

/// template unsigned int Renderer::BytesPerChannel() const { return m_BytesPerChannel; } ///

/// Set the bytes per channel of the output image. /// The only acceptable values are 1 and 2, and 2 is only /// used when the output is Png. /// Set the render state to ACCUM_ONLY. ///

/// The bytes per channel. template void Renderer::BytesPerChannel(unsigned int bytesPerChannel) { ChangeVal([&] { if (bytesPerChannel == 0 || bytesPerChannel > 2) m_BytesPerChannel = 1; else m_BytesPerChannel = bytesPerChannel; }, ACCUM_ONLY); } ///

/// Get the pixel aspect ratio of the output image. /// Default: 1. ///

/// The pixel aspect ratio. template T Renderer::PixelAspectRatio() const { return m_PixelAspectRatio; } ///

/// Set the pixel aspect ratio of the output image. /// Reset the rendering process. ///

/// The pixel aspect ratio. template void Renderer::PixelAspectRatio(T pixelAspectRatio) { ChangeVal([&] { m_PixelAspectRatio = pixelAspectRatio; }, FULL_RENDER); } ///

/// Get the type of filter to use for preview renders during interactive rendering. /// Using basic log scaling is quicker, but doesn't provide any bluring. /// Full DE is much slower, but provides a more realistic preview of what the final image /// will look like. /// Default: FILTER_LOG. ///

/// The type of filter to use template eInteractiveFilter Renderer::InteractiveFilter() const { return m_InteractiveFilter; } ///

/// Set the type of filter to use for preview renders during interactive rendering. /// Using basic log scaling is quicker, but doesn't provide any bluring. /// Full DE is much slower, but provides a more realistic preview of what the final image /// will look like. /// Reset the rendering process. ///

/// The filter. template void Renderer::InteractiveFilter(eInteractiveFilter filter) { ChangeVal([&] { m_InteractiveFilter = filter; }, FULL_RENDER); } ///

/// Non-virtual functions that might be needed by a derived class. ///

///

/// Prepare various values needed for producing a final output image. ///

/// The computed background value, which may differ from the background member /// The computed gamma /// The computed linear range /// The computed vibrancy template void Renderer::PrepFinalAccumVals(Color& background, T& g, T& linRange, T& vibrancy) { //If they are doing incremental rendering, they can get here without doing a full temporal //sample, which means the values will be zero. vibrancy = m_Vibrancy == 0 ? m_Ember.m_Vibrancy : m_Vibrancy; unsigned int vibGamCount = m_VibGamCount == 0 ? 1 : m_VibGamCount; T gamma = m_Gamma == 0 ? m_Ember.m_Gamma : m_Gamma; g = T(1.0) / ClampGte(gamma / vibGamCount, T(0.01));//Ensure a divide by zero doesn't occur. linRange = GammaThresh(); vibrancy /= vibGamCount; background.x = (IsNearZero(m_Background.r) ? m_Ember.m_Background.r : m_Background.r) / (vibGamCount / T(256.0));//Background is [0, 1]. background.y = (IsNearZero(m_Background.g) ? m_Ember.m_Background.g : m_Background.g) / (vibGamCount / T(256.0)); background.z = (IsNearZero(m_Background.b) ? m_Ember.m_Background.b : m_Background.b) / (vibGamCount / T(256.0)); } ///

/// Miscellaneous functions used only in this class. ///

///

/// Accumulate the samples to the histogram. /// To be called after a sub batch is finished iterating. ///

/// The samples to accumulate /// The number of samples /// The palette to use template void Renderer::Accumulate(Point* samples, unsigned int sampleCount, const Palette* palette) { unsigned int histIndex, intColorIndex, histSize = (unsigned int)m_HistBuckets.size(); bucketT colorIndex, colorIndexFrac; const glm::detail::tvec4* dmap = &(palette->m_Entries[0]); //It's critical to understand what's going on here as it's one of the most important parts of the algorithm. //A color value gets retrieved from the palette and //its RGB values are added to the existing RGB values in the histogram bucket. //Alpha is always 1 in the palettes, so that serves as the hit count. //This differs from the original since redundantly adding both an alpha component and a hit count is omitted. //This will eventually leave us with large values for pixels with many hits, which will be log scaled down later. //Original used a function called bump_no_overflow(). Just do a straight add because the type will always be float or double. //Doing so gives a 25% speed increase. //Splitting these conditionals into separate loops makes no speed difference. for (unsigned int i = 0; i < sampleCount && !m_Abort; i++) { if (Rotate() != 0) { T p00 = samples[i].m_X - CenterX(); T p11 = samples[i].m_Y - CenterY(); samples[i].m_X = (p00 * m_RotMat.A()) + (p11 * m_RotMat.B()) + CenterX(); samples[i].m_Y = (p00 * m_RotMat.D()) + (p11 * m_RotMat.E()) + CenterY(); } //Checking this first before converting gives better performance than converting and checking a single value, which the original did. //Second, an interesting optimization observation is that when keeping the bounds vars within m_CarToRas and calling its InBounds() member function, //rather than here as members, about a 7% speedup is achieved. This is possibly due to the fact that data from m_CarToRas is accessed //right after the call to Convert(), so some caching efficiencies get realized. if (m_CarToRas.InBounds(samples[i])) { if (samples[i].m_VizAdjusted != 0) { m_CarToRas.Convert(samples[i], histIndex); //There is a very slim chance that a point will be right on the border and will technically be in bounds, passing the InBounds() test, //but ends up being mapped to a histogram bucket that is out of bounds due to roundoff error. Perform one final check before proceeding. //This will result in a few points at the very edges getting discarded, but prevents a crash and doesn't seem to make a speed difference. if (histIndex < histSize) { //Linear is a linear scale for when the color index is not a whole number, which is most of the time. //It uses a portion of the value of the index, and the remainder of the next index. //Example: index = 25.7 //Fraction = 0.7 //Color = (dmap[25] * 0.3) + (dmap[26] * 0.7) //Use overloaded addition and multiplication operators in vec4 to perform the accumulation. if (PaletteMode() == PALETTE_LINEAR) { colorIndex = (bucketT)samples[i].m_ColorX * COLORMAP_LENGTH; intColorIndex = (unsigned int)colorIndex; if (intColorIndex < 0) { intColorIndex = 0; colorIndexFrac = 0; } else if (intColorIndex >= COLORMAP_LENGTH_MINUS_1) { intColorIndex = COLORMAP_LENGTH_MINUS_1 - 1; colorIndexFrac = 1; } else { colorIndexFrac = colorIndex - (bucketT)intColorIndex;//Interpolate between intColorIndex and intColorIndex + 1. } if (samples[i].m_VizAdjusted == 1) m_HistBuckets[histIndex] += ((dmap[intColorIndex] * (1 - colorIndexFrac)) + (dmap[intColorIndex + 1] * colorIndexFrac)); else m_HistBuckets[histIndex] += (((dmap[intColorIndex] * (1 - colorIndexFrac)) + (dmap[intColorIndex + 1] * colorIndexFrac)) * (bucketT)samples[i].m_VizAdjusted); } else if (PaletteMode() == PALETTE_STEP) { intColorIndex = Clamp((unsigned int)(samples[i].m_ColorX * COLORMAP_LENGTH), 0, COLORMAP_LENGTH_MINUS_1); if (samples[i].m_VizAdjusted == 1) m_HistBuckets[histIndex] += dmap[intColorIndex]; else m_HistBuckets[histIndex] += (dmap[intColorIndex] * (bucketT)samples[i].m_VizAdjusted); } } } } } } ///

/// Add a value to the density filtering buffer with a bounds check. ///

/// The bucket being filtered /// The column of the bucket /// The offset to add to the column /// The row of the bucket /// The offset to add to the row template void Renderer::AddToAccum(const glm::detail::tvec4& bucket, int i, int ii, int j, int jj) { if (j + jj >= 0 && j + jj < (int)m_SuperRasH && i + ii >= 0 && i + ii < (int)m_SuperRasW) m_AccumulatorBuckets[(i + ii) + ((j + jj) * m_SuperRasW)] += bucket; } ///

/// Clip and gamma correct a pixel. /// Because this code is used in both early and late clipping, a few extra arguments are passed /// to specify what actions to take. Coupled with an additional template argument, this allows /// using one function to perform all color clipping, gamma correction and final accumulation. /// Template argument accumT is expected to match T for the case of early clipping, unsigned char for late clip for /// images with one byte per channel and unsigned short for images with two bytes per channel. ///

/// The pixel to correct /// The background color /// The gamma to use /// The linear range to use /// The vibrancy to use /// True if either early clip, or late clip with 4 channel output, else false. /// True if late clip, else false. /// The storage space for the corrected values to be written to template template void Renderer::GammaCorrection(glm::detail::tvec4& bucket, Color& background, T g, T linRange, T vibrancy, bool doAlpha, bool scale, accumT* correctedChannels) { T alpha, ls, a; bucketT newRgb[3];//Would normally use a Color, but don't want to call a needless constructor every time this function is called, which is once per pixel. static T scaleVal = (numeric_limits::max() + 1) / T(256.0); if (bucket.a <= 0) { alpha = 0; ls = 0; } else { alpha = Palette::CalcAlpha(bucket.a, g, linRange); ls = vibrancy * T(255) * alpha / bucket.a; ClampRef(alpha, 0, 1); } Palette::template CalcNewRgb(&bucket[0], ls, HighlightPower(), newRgb); for (unsigned int rgbi = 0; rgbi < 3; rgbi++) { a = newRgb[rgbi] + ((T(1.0) - vibrancy) * T(255) * pow(T(bucket[rgbi]), g)); if (NumChannels() <= 3 || !Transparency()) { a += ((T(1.0) - alpha) * background[rgbi]); } else { if (alpha > 0) a /= alpha; else a = 0; } if (!scale) correctedChannels[rgbi] = (accumT)Clamp(a, 0, 255);//Early clip, just assign directly. else correctedChannels[rgbi] = (accumT)(Clamp(a, 0, 255) * scaleVal);//Final accum, multiply by 1 for 8 bpc, or 256 for 16 bpc. } if (doAlpha) { if (!scale) correctedChannels[3] = (accumT)alpha;//Early clip, just assign alpha directly. else if (Transparency()) correctedChannels[3] = (accumT)(alpha * numeric_limits::max());//Final accum, 4 channels, using transparency. Scale alpha from 0-1 to 0-255 for 8 bpc or 0-65535 for 16 bpc. else correctedChannels[3] = numeric_limits::max();//Final accum, 4 channels, but not using transparency. 255 for 8 bpc, 65535 for 16 bpc. } } ///

/// Set the m_Iterator member to point to the appropriate /// iterator based on whether the ember currently being rendered /// contains xaos. /// After assigning, initialize the xform selection buffer. ///

/// True if assignment and distribution initialization succeeded, else false. template bool Renderer::AssignIterator() { //Setup iterator and distributions. //Both iterator types were setup in the constructor (add more in the future if needed). //So simply assign the pointer to the correct type and re-initialize its distributions //based on the current ember. if (XaosPresent()) m_Iterator = m_XaosIterator.get(); else m_Iterator = m_StandardIterator.get(); //Timing t; return m_Iterator->InitDistributions(m_Ember); //t.Toc("Distrib creation"); } ///

/// Threading control. ///

template void Renderer::EnterRender() { m_RenderingCs.Enter(); } template void Renderer::LeaveRender() { m_RenderingCs.Leave(); } template void Renderer::EnterFinalAccum() { m_FinalAccumCs.Enter(); m_InFinalAccum = true; } template void Renderer::LeaveFinalAccum() { m_FinalAccumCs.Leave(); m_InFinalAccum = false; } template void Renderer::EnterResize() { m_ResizeCs.Enter(); } template void Renderer::LeaveResize() { m_ResizeCs.Leave(); } template void Renderer::Abort() { m_Abort = true; } template bool Renderer::Aborted() { return m_Abort; } template bool Renderer::InRender() { return m_InRender; } template bool Renderer::InFinalAccum() { return m_InFinalAccum; } ///

/// Renderer properties, getters only. ///

template unsigned int Renderer::SuperRasW() const { return m_SuperRasW; } template unsigned int Renderer::SuperRasH() const { return m_SuperRasH; } template unsigned int Renderer::SuperSize() const { return m_SuperSize; } template unsigned int Renderer::FinalBufferSize() const { return FinalRowSize() * FinalRasH(); } template unsigned int Renderer::FinalRowSize() const { return FinalRasW() * PixelSize(); } template unsigned int Renderer::FinalDimensions() const { return FinalRasW() * FinalRasH(); } template unsigned int Renderer::PixelSize() const { return NumChannels() * BytesPerChannel(); } template unsigned int Renderer::GutterWidth() const { return m_GutterWidth; } template unsigned int Renderer::DensityFilterOffset() const { return m_DensityFilterOffset; } template double Renderer::ScaledQuality() const { return m_ScaledQuality; } template T Renderer::Scale() const { return m_Scale; } template T Renderer::PixelsPerUnitX() const { return m_PixelsPerUnitX; } template T Renderer::PixelsPerUnitY() const { return m_PixelsPerUnitY; } template double Renderer::LowerLeftX(bool gutter) const { return gutter ? m_CarToRas.CarLlX() : m_LowerLeftX; } template double Renderer::LowerLeftY(bool gutter) const { return gutter ? m_CarToRas.CarLlY() : m_LowerLeftY; } template double Renderer::UpperRightX(bool gutter) const { return gutter ? m_CarToRas.CarUrX() : m_UpperRightX; } template double Renderer::UpperRightY(bool gutter) const { return gutter ? m_CarToRas.CarUrY() : m_UpperRightY; } template T Renderer::K1() const { return m_K1; } template T Renderer::K2() const { return m_K2; } template uint64_t Renderer::TotalIterCount() const { return (uint64_t)((uint64_t)Round(m_ScaledQuality) * (uint64_t)FinalRasW() * (uint64_t)FinalRasH()); }//Use Round() because there can be some roundoff error when interpolating. template uint64_t Renderer::ItersPerTemporalSample() const { return (uint64_t)ceil(double(TotalIterCount()) / double(Passes() * TemporalSamples())); } template eProcessState Renderer::ProcessState() const { return m_ProcessState; } template eProcessAction Renderer::ProcessAction() const { return m_ProcessAction; } template EmberStats Renderer::Stats() const { return m_Stats; } template const CarToRas* Renderer::CoordMap() const { return &m_CarToRas; } template glm::detail::tvec4* Renderer::HistBuckets() { return m_HistBuckets.data(); } template glm::detail::tvec4* Renderer::AccumulatorBuckets() { return m_AccumulatorBuckets.data(); } template SpatialFilter* Renderer::GetSpatialFilter() { return m_SpatialFilter.get(); } template TemporalFilter* Renderer::GetTemporalFilter() { return m_TemporalFilter.get(); } template DensityFilter* Renderer::GetDensityFilter() { return m_DensityFilter.get(); } ///

/// Ember wrappers, getters only. ///

template bool Renderer::XaosPresent() { return m_Ember.XaosPresent(); } template unsigned int Renderer::FinalRasW() const { return m_Ember.m_FinalRasW; } template unsigned int Renderer::FinalRasH() const { return m_Ember.m_FinalRasH; } template unsigned int Renderer::Supersample() const { return m_Ember.m_Supersample; } template unsigned int Renderer::Passes() const { return m_Ember.m_Passes; } template unsigned int Renderer::TemporalSamples() const { return m_Ember.m_TemporalSamples; } template unsigned int Renderer::PaletteIndex() const { return m_Ember.PaletteIndex(); } template T Renderer::Time() const { return m_Ember.m_Time; } template T Renderer::Quality() const { return m_Ember.m_Quality; } template T Renderer::SpatialFilterRadius() const { return m_Ember.m_SpatialFilterRadius; } template T Renderer::PixelsPerUnit() const { return m_Ember.m_PixelsPerUnit; } template T Renderer::Zoom() const { return m_Ember.m_Zoom; } template T Renderer::CenterX() const { return m_Ember.m_CenterX; } template T Renderer::CenterY() const { return m_Ember.m_CenterY; } template T Renderer::Rotate() const { return m_Ember.m_Rotate; } template T Renderer::Hue() const { return m_Ember.m_Hue; } template T Renderer::Brightness() const { return m_Ember.m_Brightness; } template T Renderer::Gamma() const { return m_Ember.m_Gamma; } template T Renderer::Vibrancy() const { return m_Ember.m_Vibrancy; } template T Renderer::GammaThresh() const { return m_Ember.m_GammaThresh; } template T Renderer::HighlightPower() const { return m_Ember.m_HighlightPower; } template Color Renderer::Background() const { return m_Ember.m_Background; } template const Xform* Renderer::Xforms() const { return m_Ember.Xforms(); } template Xform* Renderer::NonConstXforms() { return m_Ember.NonConstXforms(); } template unsigned int Renderer::XformCount() const { return m_Ember.XformCount(); } template const Xform* Renderer::FinalXform() const { return m_Ember.FinalXform(); } template Xform* Renderer::NonConstFinalXform() { return m_Ember.NonConstFinalXform(); } template bool Renderer::UseFinalXform() const { return m_Ember.UseFinalXform(); } template const Palette* Renderer::GetPalette() const { return &m_Ember.m_Palette; } template ePaletteMode Renderer::PaletteMode() const { return m_Ember.m_PaletteMode; } ///

/// Iterator wrappers. ///

template const unsigned char* Renderer::XformDistributions() const { return m_Iterator != NULL ? m_Iterator->XformDistributions() : NULL; } template const unsigned int Renderer::XformDistributionsSize() const { return m_Iterator != NULL ? m_Iterator->XformDistributionsSize() : 0; } template Point* Renderer::Samples(unsigned int threadIndex) const { return threadIndex < m_Samples.size() ? (Point*)m_Samples[threadIndex].data() : NULL; } }