#pragma once #include "EmberCLPch.h" #include "OpenCLWrapper.h" #include "DEOpenCLKernelCreator.h" #include "FinalAccumOpenCLKernelCreator.h" #include "RendererClDevice.h" /// /// RendererCLBase and RendererCL classes. /// namespace EmberCLns { /// /// Serves only as an interface for OpenCL specific rendering functions. /// class EMBERCL_API RendererCLBase { public: virtual ~RendererCLBase() { } virtual bool ReadFinal(v4F* pixels) = 0; virtual bool ClearFinal() = 0; virtual bool AnyNvidia() const = 0; }; /// /// RendererCL is a derivation of the basic CPU renderer which /// overrides various functions to render on the GPU using OpenCL. /// This supports multi-GPU rendering and is done in the following manner: /// -When rendering a single image, the iterations will be split between devices in sub batches. /// -When animating, a renderer for each device will be created by the calling code, /// and the frames will each be rendered by a single device as available. /// The synchronization across devices is done through a single atomic counter. /// Since this class derives from EmberReport and also contains an /// OpenCLWrapper member which also derives from EmberReport, the /// reporting functions are overridden to aggregate the errors from /// both sources. /// Template argument T expected to be float or double. /// Template argument bucketT must always be float. /// template class EMBERCL_API RendererCL : public Renderer, public RendererCLBase { using EmberNs::Renderer::RendererBase::Abort; using EmberNs::Renderer::RendererBase::EarlyClip; using EmberNs::Renderer::RendererBase::EnterResize; using EmberNs::Renderer::RendererBase::LeaveResize; using EmberNs::Renderer::RendererBase::FinalRasW; using EmberNs::Renderer::RendererBase::FinalRasH; using EmberNs::Renderer::RendererBase::SuperRasW; using EmberNs::Renderer::RendererBase::SuperRasH; using EmberNs::Renderer::RendererBase::SuperSize; using EmberNs::Renderer::RendererBase::BytesPerChannel; using EmberNs::Renderer::RendererBase::TemporalSamples; using EmberNs::Renderer::RendererBase::ItersPerTemporalSample; using EmberNs::Renderer::RendererBase::FuseCount; using EmberNs::Renderer::RendererBase::DensityFilterOffset; using EmberNs::Renderer::RendererBase::m_ProgressParameter; using EmberNs::Renderer::RendererBase::m_YAxisUp; using EmberNs::Renderer::RendererBase::m_LockAccum; using EmberNs::Renderer::RendererBase::m_Abort; using EmberNs::Renderer::RendererBase::m_LastIter; using EmberNs::Renderer::RendererBase::m_LastIterPercent; using EmberNs::Renderer::RendererBase::m_Stats; using EmberNs::Renderer::RendererBase::m_Callback; using EmberNs::Renderer::RendererBase::m_Rand; using EmberNs::Renderer::RendererBase::m_RenderTimer; using EmberNs::Renderer::RendererBase::m_IterTimer; using EmberNs::Renderer::RendererBase::m_ProgressTimer; using EmberNs::Renderer::RendererBase::EmberReport::AddToReport; using EmberNs::Renderer::RendererBase::m_ResizeCs; using EmberNs::Renderer::RendererBase::m_ProcessAction; using EmberNs::Renderer::m_RotMat; using EmberNs::Renderer::m_Ember; using EmberNs::Renderer::m_Csa; using EmberNs::Renderer::m_CurvesSet; using EmberNs::Renderer::CenterX; using EmberNs::Renderer::CenterY; using EmberNs::Renderer::K1; using EmberNs::Renderer::K2; using EmberNs::Renderer::Supersample; using EmberNs::Renderer::HighlightPower; using EmberNs::Renderer::HistBuckets; using EmberNs::Renderer::AccumulatorBuckets; using EmberNs::Renderer::GetDensityFilter; using EmberNs::Renderer::GetSpatialFilter; using EmberNs::Renderer::CoordMap; using EmberNs::Renderer::XformDistributions; using EmberNs::Renderer::XformDistributionsSize; using EmberNs::Renderer::m_Dmap; using EmberNs::Renderer::m_DensityFilter; using EmberNs::Renderer::m_SpatialFilter; public: RendererCL(const vector>& devices, bool shared = false, GLuint outputTexID = 0); RendererCL(const RendererCL& renderer) = delete; RendererCL& operator = (const RendererCL& renderer) = delete; virtual ~RendererCL() = default; //Non-virtual member functions for OpenCL specific tasks. bool Init(const vector>& devices, bool shared, GLuint outputTexID); bool SetOutputTexture(GLuint outputTexID); //Iters per kernel/block/grid. inline size_t IterCountPerKernel() const; inline size_t IterCountPerBlock() const; inline size_t IterCountPerGrid() const; //Kernels per block. inline size_t IterBlockKernelWidth() const; inline size_t IterBlockKernelHeight() const; inline size_t IterBlockKernelCount() const; //Kernels per grid. inline size_t IterGridKernelWidth() const; inline size_t IterGridKernelHeight() const; inline size_t IterGridKernelCount() const; //Blocks per grid. inline size_t IterGridBlockWidth() const; inline size_t IterGridBlockHeight() const; inline size_t IterGridBlockCount() const; bool ReadHist(size_t device); bool ReadAccum(); bool ReadPoints(size_t device, vector>& vec); bool ClearHist(); bool ClearHist(size_t device); bool ClearAccum(); bool WritePoints(size_t device, vector>& vec); #ifdef TEST_CL bool WriteRandomPoints(size_t device); #endif const string& IterKernel() const; const string& DEKernel() const; const string& FinalAccumKernel() const; //Access to underlying OpenCL structures. Use cautiously. const vector>& Devices() const; //Virtual functions overridden from RendererCLBase. virtual bool ReadFinal(v4F* pixels); virtual bool ClearFinal(); //Public virtual functions overridden from Renderer or RendererBase. virtual size_t MemoryAvailable() override; virtual bool Ok() const override; virtual void ClearErrorReport() override; virtual size_t SubBatchSize() const override; virtual size_t ThreadCount() const override; virtual bool CreateDEFilter(bool& newAlloc) override; virtual bool CreateSpatialFilter(bool& newAlloc) override; virtual eRendererType RendererType() const override; virtual string ErrorReportString() override; virtual vector ErrorReport() override; virtual bool RandVec(vector>& randVec) override; virtual bool AnyNvidia() const override; #ifndef TEST_CL protected: #endif //Protected virtual functions overridden from Renderer. virtual bool Alloc(bool histOnly = false) override; virtual bool ResetBuckets(bool resetHist = true, bool resetAccum = true) override; virtual eRenderStatus LogScaleDensityFilter(bool forceOutput = false) override; virtual eRenderStatus GaussianDensityFilter() override; virtual eRenderStatus AccumulatorToFinalImage(v4F* pixels, size_t finalOffset) override; virtual EmberStats Iterate(size_t iterCount, size_t temporalSample) override; #ifndef TEST_CL private: #endif void Init(); //Private functions for making and running OpenCL programs. bool BuildIterProgramForEmber(bool doAccum = true); bool RunIter(size_t iterCount, size_t temporalSample, size_t& itersRan); eRenderStatus RunLogScaleFilter(); eRenderStatus RunDensityFilter(); eRenderStatus RunFinalAccum(); bool ClearBuffer(size_t device, const string& bufferName, uint width, uint height, uint elementSize); bool RunDensityFilterPrivate(size_t kernelIndex, size_t gridW, size_t gridH, size_t blockW, size_t blockH, uint chunkSizeW, uint chunkSizeH, uint colChunkPass, uint rowChunkPass); int MakeAndGetDensityFilterProgram(size_t ss, uint filterWidth); int MakeAndGetFinalAccumProgram(); int MakeAndGetGammaCorrectionProgram(); bool CreateHostBuffer(); bool SumDeviceHist(); void FillSeeds(); //Private functions passing data to OpenCL programs. void ConvertDensityFilter(); void ConvertSpatialFilter(); void ConvertEmber(Ember& ember, EmberCL& emberCL, vector>& xformsCL); void ConvertCarToRas(const CarToRas& carToRas); bool m_Init; bool m_DoublePrecision; size_t m_IterCountPerKernel; size_t m_IterBlocksWide, m_IterBlockWidth; size_t m_IterBlocksHigh, m_IterBlockHeight; size_t m_MaxDEBlockSizeW; size_t m_MaxDEBlockSizeH; //Buffer names. string m_EmberBufferName; string m_XformsBufferName; string m_ParVarsBufferName; string m_GlobalSharedBufferName; string m_SeedsBufferName; string m_DistBufferName; string m_CarToRasBufferName; string m_DEFilterParamsBufferName; string m_SpatialFilterParamsBufferName; string m_CurvesCsaName; string m_DECoefsBufferName; string m_DEWidthsBufferName; string m_DECoefIndicesBufferName; string m_SpatialFilterCoefsBufferName; string m_HostBufferName; string m_HistBufferName; string m_AccumBufferName; string m_FinalImageName; string m_PointsBufferName; //Kernels. string m_IterKernel; cl::ImageFormat m_PaletteFormat; cl::ImageFormat m_FinalFormat; cl::Image2D m_Palette; cl::ImageGL m_AccumImage; GLuint m_OutputTexID; EmberCL m_EmberCL; vector> m_XformsCL; vector> m_Seeds; CarToRasCL m_CarToRasCL; DensityFilterCL m_DensityFilterCL; SpatialFilterCL m_SpatialFilterCL; IterOpenCLKernelCreator m_IterOpenCLKernelCreator; DEOpenCLKernelCreator m_DEOpenCLKernelCreator; FinalAccumOpenCLKernelCreator m_FinalAccumOpenCLKernelCreator; pair> m_Params; pair> m_GlobalShared; vector> m_Devices; Ember m_LastBuiltEmber; }; }