#include "EmberPch.h" #include "RendererBase.h" namespace EmberNs { ///

/// Constructor that sets default values. /// The thread count is set to the number of cores detected on the system. ///

RendererBase::RendererBase() { ThreadCount(Timing::ProcessorCount()); } ///

/// Non-virtual processing functions. ///

///

/// Abort the render and call a function to do something, most likely change a value. /// Then update the current process action to the one specified. /// The current process action will only be set if it makes sense based /// on the current process state. If the value specified doesn't make sense /// the next best choice will be made. If nothing makes sense, a complete /// re-render will be triggered on the next call to Run(). ///

/// The function to execute /// The desired process action void RendererBase::ChangeVal(std::function func, eProcessAction action) { Abort(); EnterRender(); func(); //If they want a full render, don't bother inspecting process state, just start over. if (action == eProcessAction::FULL_RENDER) { m_ProcessState = eProcessState::NONE; m_ProcessAction = eProcessAction::FULL_RENDER; } //Keep iterating is when rendering has completed and the user increases the quality. //Rendering can be started where it left off by adding just the difference between the //new and old quality values. else if (action == eProcessAction::KEEP_ITERATING) { if ((m_ProcessState == eProcessState::ACCUM_DONE || m_ProcessState == eProcessState::ITER_STARTED) && TemporalSamples() == 1) { m_ProcessState = eProcessState::ITER_STARTED; m_ProcessAction = eProcessAction::KEEP_ITERATING; } else//Invaid process state to handle KEEP_ITERATING, so just start over. { m_ProcessState = eProcessState::NONE; m_ProcessAction = eProcessAction::FULL_RENDER; } } else if (action == eProcessAction::FILTER_AND_ACCUM) { //If in the middle of a render, cannot skip to filtering or accum, so just start over. if (m_ProcessState == eProcessState::NONE || m_ProcessState == eProcessState::ITER_STARTED) { m_ProcessState = eProcessState::NONE; m_ProcessAction = eProcessAction::FULL_RENDER; } //Set the state to ITER_DONE and the next process action to FILTER_AND_ACCUM. else { m_ProcessState = eProcessState::ITER_DONE; m_ProcessAction = eProcessAction::FILTER_AND_ACCUM; } } //Run accum only. else if (action == eProcessAction::ACCUM_ONLY) { //Doesn't make sense if in the middle of iterating, so just start over. if (m_ProcessState == eProcessState::NONE || m_ProcessState == eProcessState::ITER_STARTED) { m_ProcessAction = eProcessAction::FULL_RENDER; } else if (m_ProcessState == eProcessState::ITER_DONE)//If iterating is done, can start at density filtering and proceed. { m_ProcessAction = eProcessAction::FILTER_AND_ACCUM; } else if (m_ProcessState == eProcessState::FILTER_DONE)//Density filtering is done, so the process action is assigned as desired. { m_ProcessAction = eProcessAction::ACCUM_ONLY; } else if (m_ProcessState == eProcessState::ACCUM_DONE)//Final accum is done, so back up and run final accum again. { m_ProcessState = eProcessState::FILTER_DONE; m_ProcessAction = eProcessAction::ACCUM_ONLY; } } LeaveRender(); } ///

/// Return the amount of memory needed for the histogram. ///

/// The memory required for the histogram to render the current ember size_t RendererBase::HistMemoryRequired(size_t strips) { bool newFilterAlloc = false; CreateSpatialFilter(newFilterAlloc); CreateTemporalFilter(newFilterAlloc); ComputeBounds(); //Because ComputeBounds() was called, this includes gutter. return (SuperSize() * HistBucketSize()) / strips; } ///

/// Return a pair whose first member contains the amount of memory needed for the histogram, /// and whose second member contains the total the amount of memory needed to render the current ember. /// Optionally include the memory needed for the final output image in pair.second. /// Note that the memory required for the final output image will be doubled if threaded writes /// are used because a copy of the final output is passed to a thread. ///

/// The number of strips being used /// If true include the memory needed for the final output image, else don't. /// Whether the caller will be writing the output in a thread, which doubles the memory required for the final output buffer. /// The histogram memory required in first, and the total memory required in second pair RendererBase::MemoryRequired(size_t strips, bool includeFinal, bool threadedWrite) { pair p; size_t outSize = includeFinal ? FinalBufferSize() : 0; outSize *= (threadedWrite ? 2 : 1); p.first = HistMemoryRequired(strips); p.second = (p.first * 2) + outSize;//Multiply hist by 2 to account for the density filtering buffer which is the same size as the histogram. return p; } ///

/// Get a copy of the vector of random contexts. /// Useful for debugging because the returned vector can be used for future renders to /// produce the exact same output. ///

/// The vector of random contexts to assign vector> RendererBase::RandVec() { return m_Rand; } ///

/// Set the vector of random contexts. /// Assignment will only take place if the size of the vector matches /// the number of threads used for rendering. /// Reset the rendering process. ///

/// The vector of random contexts to assign /// True if the size of the vector matched the number of threads used for rendering, else false. bool RendererBase::RandVec(vector>& randVec) { bool b = false; if (randVec.size() == ThreadCount()) { ChangeVal([&] { m_Rand = randVec; b = true; }, eProcessAction::FULL_RENDER); } return b; } ///

/// Resize the passed in vector to be large enough to handle the output image. /// If m_ReclaimOnResize is true, and the vector is already larger than needed, /// it will be shrunk to the needed size. However if m_ReclaimOnResize is false, /// it will be left alone if already large enough. /// ComputeBounds() must be called before calling this function. ///

/// The vector to allocate /// True if the vector contains enough space to hold the output image bool RendererBase::PrepFinalAccumVector(vector& pixels) { EnterResize(); size_t size = FinalDimensions(); if (m_ReclaimOnResize) { if (pixels.size() != size) { pixels.resize(size); pixels.shrink_to_fit(); } } else { if (pixels.size() < size) pixels.resize(size); } LeaveResize(); return pixels.size() >= size;//Ensure allocation went ok. } ///

/// Virtual processing functions. ///

///

/// Get a status indicating whether this renderer is ok. /// Return true for this class, derived classes will inspect GPU hardware /// to determine if they are ok. ///

/// Always true for this class bool RendererBase::Ok() const { return true; } ///

/// The amount of RAM available to render with. ///

/// An unsigned 64-bit integer specifying how much memory is available size_t RendererBase::MemoryAvailable() { size_t memAvailable = 0; #ifdef _WIN32 MEMORYSTATUSEX stat; stat.dwLength = sizeof(stat); GlobalMemoryStatusEx(&stat); memAvailable = stat.ullTotalPhys; #elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) memAvailable = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); #elif defined __APPLE__ #ifdef __LP64__ long physmem; size_t len = sizeof(physmem); static int mib[2] = { CTL_HW, HW_MEMSIZE }; #else size_t physmem; size_t len = sizeof(physmem); static int mib[2] = { CTL_HW, HW_PHYSMEM }; #endif if (sysctl(mib, 2, &physmem, &len, nullptr, 0) == 0 && len == sizeof(physmem)) { memAvailable = physmem; } else { cout << "Warning: unable to determine physical memory.\n"; memAvailable = 4e9; } #else cout << "Warning: unable to determine physical memory.\n"; memAvailable = 4e9; #endif return memAvailable; } ///

/// Non-virtual renderer properties, getters only. ///

size_t RendererBase::SuperRasW() const { return m_SuperRasW; } size_t RendererBase::SuperRasH() const { return m_SuperRasH; } size_t RendererBase::SuperSize() const { return m_SuperSize; } size_t RendererBase::FinalRowSize() const { return FinalRasW() * PixelSize(); } size_t RendererBase::FinalDimensions() const { return FinalRasW() * FinalRasH(); } size_t RendererBase::FinalBufferSize() const { return FinalRowSize() * FinalRasH(); } size_t RendererBase::PixelSize() const { return NumChannels() * BytesPerChannel(); } size_t RendererBase::GutterWidth() const { return m_GutterWidth; } size_t RendererBase::DensityFilterOffset() const { return m_DensityFilterOffset; } size_t RendererBase::TotalIterCount(size_t strips) const { return size_t(size_t(Round(ScaledQuality())) * FinalRasW() * FinalRasH() * strips); }//Use Round() because there can be some roundoff error when interpolating. size_t RendererBase::ItersPerTemporalSample() const { return size_t(ceil(double(TotalIterCount(1)) / double(TemporalSamples()))); }//Temporal samples is used with animation, which doesn't support strips, so pass 1. eProcessState RendererBase::ProcessState() const { return m_ProcessState; } eProcessAction RendererBase::ProcessAction() const { return m_ProcessAction; } EmberStats RendererBase::Stats() const { return m_Stats; } ///

/// Non-virtual render properties, getters and setters. ///

///

/// Get whether the histogram is locked during accumulation. /// This is to prevent two threads from writing to the same histogram /// bucket at once. /// The current implementation matches flam3 and is very innefficient /// to the point of negating any gains gotten from multi-threading. /// Future workarounds may be tried in the future. /// Default: false. ///

/// True if the histogram is locked during accumulation, else false. bool RendererBase::LockAccum() const { return m_LockAccum; } ///

/// Set whether the histogram is locked during accumulation. /// This is to prevent two threads from writing to the same histogram /// bucket at once. /// The current implementation matches flam3 and is very innefficient /// to the point of negating any gains gotten from multi-threading. /// Different workarounds may be tried in the future. /// Reset the rendering process. ///

/// True if the histogram should be locked when accumulating, else false void RendererBase::LockAccum(bool lockAccum) { ChangeVal([&] { m_LockAccum = lockAccum; }, eProcessAction::FULL_RENDER); } ///

/// Get whether color clipping and gamma correction is done before /// or after spatial filtering. /// Default: false. ///

/// True if early clip, else false. bool RendererBase::EarlyClip() const { return m_EarlyClip; } ///

/// Set whether color clipping and gamma correction is done before /// or after spatial filtering. /// Set the render state to FILTER_AND_ACCUM. ///

/// True if early clip, else false. void RendererBase::EarlyClip(bool earlyClip) { ChangeVal([&] { m_EarlyClip = earlyClip; }, eProcessAction::FILTER_AND_ACCUM); } ///

/// Get whether the positive Y coordinate of the final output image is up. /// Default: false. ///

/// True if up, else false. bool RendererBase::YAxisUp() const { return m_YAxisUp; } ///

/// Set whether the positive Y axis of the final output image is up. ///

/// True if the positive y axis is up, else false. void RendererBase::YAxisUp(bool yup) { ChangeVal([&] { m_YAxisUp = yup; }, eProcessAction::ACCUM_ONLY); } ///

/// Get whether to insert the palette as a block of colors in the final output image. /// This is useful for debugging palette issues. /// Default: 1. ///

/// True if inserting the palette, else false. bool RendererBase::InsertPalette() const { return m_InsertPalette; } ///

/// Set whether to insert the palette as a block of colors in the final output image. /// This is useful for debugging palette issues. /// Set the render state to ACCUM_ONLY. ///

/// True if inserting the palette, else false. void RendererBase::InsertPalette(bool insertPalette) { ChangeVal([&] { m_InsertPalette = insertPalette; }, eProcessAction::ACCUM_ONLY); } ///

/// Get whether to reclaim unused memory in the final output buffer /// when a smaller size is requested than has been previously allocated. /// Default: false. ///

/// True if reclaim, else false. bool RendererBase::ReclaimOnResize() const { return m_ReclaimOnResize; } ///

/// Set whether to reclaim unused memory in the final output buffer /// when a smaller size is requested than has been previously allocated. /// Reset the rendering process. ///

/// True if reclaim, else false. void RendererBase::ReclaimOnResize(bool reclaimOnResize) { ChangeVal([&] { m_ReclaimOnResize = reclaimOnResize; }, eProcessAction::FULL_RENDER); } ///

/// Set the callback object. ///

/// The callback object to set void RendererBase::Callback(RenderCallback* callback) { m_Callback = callback; } ///

/// Set the number of threads to use when rendering. /// This will also reset the vector of random contexts to be the same size /// as the number of specified threads. /// Since this is where they get set up, the caller can optionally pass in /// a seed string, however it's only used if threads is 1. /// This is useful for debugging since it will run the same point trajectory /// every time. /// Reset the rendering process. ///

/// The number of threads to use /// The seed string to use if threads is 1. Default: nullptr. void RendererBase::ThreadCount(size_t threads, const char* seedString) { ChangeVal([&] { Timing t; size_t i, size; const size_t isaacSize = 1 << ISAAC_SIZE; ISAAC_INT seeds[isaacSize]; m_ThreadsToUse = threads > 0 ? threads : 1; m_Rand.clear(); m_SubBatch.clear(); m_SubBatch.resize(m_ThreadsToUse); m_BadVals.resize(m_ThreadsToUse); if (seedString) { memset(seeds, 0, isaacSize * sizeof(ISAAC_INT)); memcpy(reinterpret_cast(seeds), seedString, std::min(strlen(seedString), isaacSize * sizeof(ISAAC_INT))); } //This is critical for multithreading, otherwise the threads all happen //too close to each other in time, resulting in bad randomization. while (m_Rand.size() < m_ThreadsToUse) { size = m_Rand.size(); if (seedString) { ISAAC_INT newSize = ISAAC_INT(size + 5 + (t.Toc() + t.EndTime())); #ifdef ISAAC_FLAM3_DEBUG QTIsaac isaac(0, 0, 0, seeds); #else QTIsaac isaac(newSize, newSize * 2, newSize * 3, seeds); #endif m_Rand.push_back(isaac); for (i = 0; i < (isaacSize * sizeof(ISAAC_INT)); i++) reinterpret_cast(seeds)[i]++; } else { for (i = 0; i < isaacSize; i++) { t.Toc(); seeds[i] = ISAAC_INT((t.EndTime() * i) + (size + 1)); } t.Toc(); ISAAC_INT r = ISAAC_INT((size * i) + i + t.EndTime()); QTIsaac isaac(r, r * 2, r * 3, seeds); m_Rand.push_back(isaac); } } }, eProcessAction::FULL_RENDER); } ///

/// Get the bytes per channel of the output image. /// This will always be 4 since each channel is a 32-bit float. ///

/// The number of bytes per channel size_t RendererBase::BytesPerChannel() const { return m_BytesPerChannel; } ///

/// Get the number of channels per pixel in the output image. /// This will always be 4 since each pixel is always RGBA. ///

/// The number of channels per pixel in the output image size_t RendererBase::NumChannels() const { return m_NumChannels; } ///

/// Get/set the priority used for the CPU rendering threads. /// This does not affect OpenCL rendering. ///

/// The priority to use for the CPU rendering threads eThreadPriority RendererBase::Priority() const { return m_Priority; } void RendererBase::Priority(eThreadPriority priority) { m_Priority = priority; } ///

/// Get the type of filter to use for preview renders during interactive rendering. /// Using basic log scaling is quicker, but doesn't provide any bluring. /// Full DE is much slower, but provides a more realistic preview of what the final image /// will look like. /// Default: FILTER_LOG. ///

/// The type of filter to use eInteractiveFilter RendererBase::InteractiveFilter() const { return m_InteractiveFilter; } ///

/// Set the type of filter to use for preview renders during interactive rendering. /// Using basic log scaling is quicker, but doesn't provide any bluring. /// Full DE is much slower, but provides a more realistic preview of what the final image /// will look like. /// Reset the rendering process. ///

/// The filter. void RendererBase::InteractiveFilter(eInteractiveFilter filter) { ChangeVal([&] { m_InteractiveFilter = filter; }, eProcessAction::FULL_RENDER); } ///

/// Virtual render properties, getters and setters. ///

///

/// Get the number of threads used when rendering. /// Default: use all avaliable cores. ///

/// The number of threads used when rendering size_t RendererBase::ThreadCount() const { return m_ThreadsToUse; } ///

/// Get the renderer type enum. /// eRendererType::CPU_RENDERER for this class, other values for derived classes. ///

/// eRendererType::CPU_RENDERER eRendererType RendererBase::RendererType() const { return eRendererType::CPU_RENDERER; } ///

/// Get whether the renderer uses a shared texture with OpenGL. /// This only applies to the OpenCL renderer (which can be shared or unshared), so it's always false in the base. ///

/// True if shared, else false. Always false in the base. bool RendererBase::Shared() const { return false; } ///

/// //Non-virtual threading control. ///

///

/// Stop rendering, ensure all locks are exited and reset the rendering state. ///

void RendererBase::Reset() { Abort(); EnterRender(); EnterFinalAccum(); LeaveFinalAccum(); LeaveRender(); m_ProcessState = eProcessState::NONE; m_ProcessAction = eProcessAction::FULL_RENDER; } void RendererBase::EnterRender() { m_RenderingCs.lock(); } void RendererBase::LeaveRender() { m_RenderingCs.unlock(); } void RendererBase::EnterFinalAccum() { m_FinalAccumCs.lock(); m_InFinalAccum = true; } void RendererBase::LeaveFinalAccum() { m_FinalAccumCs.unlock(); m_InFinalAccum = false; } void RendererBase::EnterResize() { m_ResizeCs.lock(); } void RendererBase::LeaveResize() { m_ResizeCs.unlock(); } void RendererBase::Abort() { m_Abort = true; Pause(false); } bool RendererBase::Aborted() { return m_Abort; } void RendererBase::Pause(bool pause) { m_Pause = pause; } bool RendererBase::Paused() { return m_Pause; } bool RendererBase::InRender() { return m_InRender; } bool RendererBase::InFinalAccum() { return m_InFinalAccum; } }