fractorium/Source/EmberCL/DEOpenCLKernelCreator.h
mfeemster a4aae06b02 --User changes
-Add Simon Detheridge's name to the About Box.

--Bug fixes
 -Fix bug in OpenCL atomic string, which is never used.
 -Wrong hist and accum allocation size in RendererCL when using float-only buffers now.
 -Move some kernel initialization to a place where it's done once per render, rather than on every interactive iter chunk.

--Code changes
 -Make ConvertCarToRas() just assign to the member rather than return a struct.
 -Make kernel string accessor functions in IterOpenCLKernelCreator, FinalAccumOpenCLKernelCreator and DEOpenCLKernelCreator be const and return a const string reference.
 -Don't include atomic string unless locking on the GPU, which is never.
2015-08-12 18:51:07 -07:00

78 lines
2.3 KiB
C++

#pragma once
#include "EmberCLPch.h"
#include "EmberCLStructs.h"
#include "EmberCLFunctions.h"
/// <summary>
/// DEOpenCLKernelCreator class.
/// </summary>
//#define ROW_ONLY_DE 1
namespace EmberCLns
{
/// <summary>
/// Kernel creator for density filtering.
/// This implements both basic log scale filtering
/// as well as the full flam3 density estimation filtering
/// in OpenCL.
/// Several conditionals are present in the CPU version. They
/// are stripped out of the kernels and instead a separate kernel
/// is created for every possible case.
/// If the filter width is 9 or less, then the entire process can be
/// done in shared memory which is very fast.
/// However, if the filter width is greater than 9, shared memory is not
/// used and all filtering is done directly with main global VRAM. This
/// ends up being not much faster than doing it on the CPU.
/// String members are kept for the program source and entry points
/// for each version of the program.
/// </summary>
class EMBERCL_API DEOpenCLKernelCreator
{
public:
DEOpenCLKernelCreator(bool doublePrecision, bool nVidia);
//Accessors.
const string& LogScaleAssignDEKernel() const;
const string& LogScaleAssignDEEntryPoint() const;
const string& GaussianDEKernel(size_t ss, uint filterWidth) const;
const string& GaussianDEEntryPoint(size_t ss, uint filterWidth) const;
//Miscellaneous static functions.
static uint MaxDEFilterSize();
static double SolveMaxDERad(uint maxBoxSize, double desiredFilterSize, double ss);
static uint SolveMaxBoxSize(uint localMem);
private:
//Kernel creators.
string CreateLogScaleAssignDEKernelString();
string CreateGaussianDEKernel(size_t ss);
string CreateGaussianDEKernelNoLocalCache(size_t ss);
string m_LogScaleAssignDEKernel;
string m_LogScaleAssignDEEntryPoint;
string m_GaussianDEWithoutSsKernel;
string m_GaussianDEWithoutSsEntryPoint;
string m_GaussianDESsWithScfKernel;
string m_GaussianDESsWithScfEntryPoint;
string m_GaussianDESsWithoutScfKernel;
string m_GaussianDESsWithoutScfEntryPoint;
string m_GaussianDEWithoutSsNoCacheKernel;
string m_GaussianDEWithoutSsNoCacheEntryPoint;
string m_GaussianDESsWithScfNoCacheKernel;
string m_GaussianDESsWithScfNoCacheEntryPoint;
string m_GaussianDESsWithoutScfNoCacheKernel;
string m_GaussianDESsWithoutScfNoCacheEntryPoint;
bool m_DoublePrecision;
bool m_NVidia;
};
}