--User changes

-Add two new variations, hyperbolic and hypershift2.
 -Allow for animating final xforms.
 -More detailed diagnostics when any action in the OpenCL renderer fails.
 -Allow for creating an OpenCL renderer which does not share a texture with the main window, and instead manually copies its final output image from GPU to CPU then back to GPU.

--Bug fixes
 -Text was not properly being copied out of the Info | Bounds text box.

--Code changes
 -Remove Renderer::AccumulatorToFinalImage(v4F* pixels, size_t finalOffset), it's no longer needed or makes sense.
 -Controllers no longer keep track of shared status, it's kept inside the renderers.
 -Make getter functions in FractoriumOptionsDialog be public.
This commit is contained in:
Person
2018-04-28 22:28:05 -07:00
parent 0c67c52720
commit 92e9836151
39 changed files with 852 additions and 405 deletions

View File

@ -43,7 +43,7 @@ OpenCLInfo::OpenCLInfo()
if (!platformOk)
{
m_Platforms.push_back(platforms[platform]);
m_PlatformNames.push_back(platforms[platform].getInfo<CL_PLATFORM_VENDOR>(nullptr) + " " + platforms[platform].getInfo<CL_PLATFORM_NAME>(nullptr) + " " + platforms[platform].getInfo<CL_PLATFORM_VERSION>(nullptr));
m_PlatformNames.push_back(platforms[platform].getInfo<CL_PLATFORM_VENDOR>(nullptr).c_str() + " "s + platforms[platform].getInfo<CL_PLATFORM_NAME>(nullptr).c_str() + " "s + platforms[platform].getInfo<CL_PLATFORM_VERSION>(nullptr).c_str());
workingPlatformIndex++;
platformOk = true;
}
@ -58,7 +58,7 @@ OpenCLInfo::OpenCLInfo()
}
m_Devices.back().push_back(devices[platform][device]);
m_DeviceNames.back().push_back(devices[platform][device].getInfo<CL_DEVICE_VENDOR>(nullptr) + " " + devices[platform][device].getInfo<CL_DEVICE_NAME>(nullptr));// + " " + devices[platform][device].getInfo<CL_DEVICE_VERSION>());
m_DeviceNames.back().push_back(devices[platform][device].getInfo<CL_DEVICE_VENDOR>(nullptr).c_str() + " "s + devices[platform][device].getInfo<CL_DEVICE_NAME>(nullptr).c_str());// + " " + devices[platform][device].getInfo<CL_DEVICE_VERSION>().c_str());
m_AllDeviceNames.push_back(m_DeviceNames.back().back());
m_DeviceIndices.push_back(pair<size_t, size_t>(workingPlatformIndex, workingDeviceIndex++));
m_Init = true;//If at least one platform and device succeeded, OpenCL is ok. It's now ok to begin building and running programs.
@ -183,6 +183,29 @@ size_t OpenCLInfo::TotalDeviceIndex(size_t platform, size_t device) const
return index;
}
/// <summary>
/// Get a pointer to a device based on its ID.
/// </summary>
/// <param name="id">The device ID</param>
/// <param name="platform">Stores the platform index of the device if found.</param>
/// <param name="device">Stores the device index of the device if found.</param>
/// <returns>A pointer to the device if found, else nullptr.</returns>
const cl::Device* OpenCLInfo::DeviceFromId(cl_device_id id, size_t& platform, size_t& device) const
{
for (auto& p : m_DeviceIndices)
{
if (m_Devices[p.first][p.second]() == id)
{
platform = p.first;
device = p.second;
return &(m_Devices[p.first][p.second]);
}
}
platform = device = 0;
return nullptr;
}
/// <summary>
/// Create a context that is optionally shared with OpenGL and place it in the
/// passed in context ref parameter.
@ -209,6 +232,7 @@ bool OpenCLInfo::CreateContext(const cl::Platform& platform, cl::Context& contex
context = cl::Context(CL_DEVICE_TYPE_GPU, props, nullptr, nullptr, &err);//May need to tinker with this on Mac.
#else
#if defined WIN32
//::wglMakeCurrent(wglGetCurrentDC(), wglGetCurrentContext());
cl_context_properties props[] =
{
CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
@ -269,7 +293,7 @@ string OpenCLInfo::DumpInfo() const
for (size_t device = 0; device < m_Devices[platform].size(); device++)
{
os << "Device " << device << ": " << DeviceName(platform, device);
os << "\nCL_DEVICE_OPENCL_C_VERSION: " << GetInfo<string>(platform, device, CL_DEVICE_OPENCL_C_VERSION);
os << "\nCL_DEVICE_OPENCL_C_VERSION: " << GetInfo<string>(platform, device, CL_DEVICE_OPENCL_C_VERSION).c_str();
os << "\nCL_DEVICE_LOCAL_MEM_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_LOCAL_MEM_SIZE);
os << "\nCL_DEVICE_LOCAL_MEM_TYPE: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_LOCAL_MEM_TYPE);
os << "\nCL_DEVICE_MAX_COMPUTE_UNITS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_COMPUTE_UNITS);

View File

@ -29,6 +29,7 @@ public:
const vector<string>& AllDeviceNames() const;
const vector<string>& DeviceNames(size_t platform) const;
size_t TotalDeviceIndex(size_t platform, size_t device) const;
const cl::Device* DeviceFromId(cl_device_id id, size_t& platform, size_t& device) const;
string DumpInfo() const;
bool Ok() const;
bool CreateContext(const cl::Platform& platform, cl::Context& context, bool shared);

View File

@ -387,7 +387,6 @@ bool OpenCLWrapper::AddAndWriteImage(const string& name, cl_mem_flags flags, con
{
if (shared)
{
//::wglMakeCurrent(wglGetCurrentDC(), wglGetCurrentContext());
cl::ImageGL imageGL(m_Context, flags, GL_TEXTURE_2D, 0, texName, &err);
NamedImage2DGL namedImageGL(imageGL, name);

View File

@ -29,50 +29,11 @@ RendererCL<T, bucketT>::RendererCL(const vector<pair<size_t, size_t>>& devices,
m_DEOpenCLKernelCreator(typeid(T) == typeid(double), false),
m_FinalAccumOpenCLKernelCreator(typeid(T) == typeid(double))
{
Init();
Init(devices, shared, outputTexID);
}
/// <summary>
/// Initialization of fields, no OpenCL initialization is done here.
template <typename T, typename bucketT>
void RendererCL<T, bucketT>::Init()
{
m_Init = false;
m_DoublePrecision = typeid(T) == typeid(double);
//Buffer names.
m_EmberBufferName = "Ember";
m_XformsBufferName = "Xforms";
m_ParVarsBufferName = "ParVars";
m_GlobalSharedBufferName = "GlobalShared";
m_SeedsBufferName = "Seeds";
m_DistBufferName = "Dist";
m_CarToRasBufferName = "CarToRas";
m_DEFilterParamsBufferName = "DEFilterParams";
m_SpatialFilterParamsBufferName = "SpatialFilterParams";
m_DECoefsBufferName = "DECoefs";
m_DEWidthsBufferName = "DEWidths";
m_DECoefIndicesBufferName = "DECoefIndices";
m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs";
m_CurvesCsaName = "CurvesCsa";
m_HostBufferName = "Host";
m_HistBufferName = "Hist";
m_AccumBufferName = "Accum";
m_FinalImageName = "Final";
m_PointsBufferName = "Points";
//It's critical that these numbers never change. They are
//based on the cuburn model of each kernel launch containing
//256 threads. 32 wide by 8 high. Everything done in the OpenCL
//iteraion kernel depends on these dimensions.
m_IterCountPerKernel = 256;
m_IterBlockWidth = 32;
m_IterBlockHeight = 8;
m_IterBlocksWide = 64;
m_IterBlocksHigh = 2;
m_PaletteFormat.image_channel_order = CL_RGBA;
m_PaletteFormat.image_channel_data_type = CL_FLOAT;
m_FinalFormat.image_channel_order = CL_RGBA;
m_FinalFormat.image_channel_data_type = CL_FLOAT;
Init(devices, shared, outputTexID);
}
/// <summary>
@ -97,11 +58,12 @@ bool RendererCL<T, bucketT>::Init(const vector<pair<size_t, size_t>>& devices, b
return false;
bool b = false;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
auto& zeroizeProgram = m_IterOpenCLKernelCreator.ZeroizeKernel();
auto& sumHistProgram = m_IterOpenCLKernelCreator.SumHistKernel();
ostringstream os;
m_Init = false;
m_Shared = false;
m_Devices.clear();
m_Devices.reserve(devices.size());
m_OutputTexID = outputTexID;
@ -115,11 +77,11 @@ bool RendererCL<T, bucketT>::Init(const vector<pair<size_t, size_t>>& devices, b
if ((b = cld->Init()))//Build a simple program to ensure OpenCL is working right.
{
if (b && !(b = cld->m_Wrapper.AddProgram(m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), zeroizeProgram, m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), m_DoublePrecision))) { AddToReport(loc); }
if (b && !(b = cld->m_Wrapper.AddProgram(m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), zeroizeProgram, m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), m_DoublePrecision))) { ErrorStr(loc, "Failed to init zeroize program: "s + cld->ErrorReportString(), cld.get()); }
if (b && !(b = cld->m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, 256, 1, 0, nullptr))) { AddToReport(loc); }
if (b && !(b = cld->m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, 256, 1, 0, nullptr))) { ErrorStr(loc, "Failed to init palette buffer: "s + cld->ErrorReportString(), cld.get()); }
if (b && !(b = cld->m_Wrapper.AddAndWriteBuffer(m_GlobalSharedBufferName, m_GlobalShared.second.data(), m_GlobalShared.second.size() * sizeof(m_GlobalShared.second[0])))) { AddToReport(loc); }//Empty at start, will be filled in later if needed.
if (b && !(b = cld->m_Wrapper.AddAndWriteBuffer(m_GlobalSharedBufferName, m_GlobalShared.second.data(), m_GlobalShared.second.size() * sizeof(m_GlobalShared.second[0])))) { ErrorStr(loc, "Failed to init global shared buffer: "s + cld->ErrorReportString(), cld.get()); }//Empty at start, will be filled in later if needed.
if (b)
{
@ -127,33 +89,35 @@ bool RendererCL<T, bucketT>::Init(const vector<pair<size_t, size_t>>& devices, b
}
else
{
os << loc << ": failed to init platform " << devices[i].first << ", device " << devices[i].second;
AddToReport(loc);
ErrorStr(loc, "Failed to init programs for platform", cld.get());
break;
}
}
else
{
ErrorStr(loc, "Failed to init device, "s + cld->ErrorReportString(), cld.get());
break;
}
}
catch (const std::exception& e)
{
os << loc << ": failed to init platform " << devices[i].first << ", device " << devices[i].second << ": " << e.what();
AddToReport(os.str());
ErrorStr(loc, "Failed to init platform: "s + e.what(), nullptr);
}
catch (...)
{
os << loc << ": failed to init platform " << devices[i].first << ", device " << devices[i].second;
AddToReport(os.str());
ErrorStr(loc, "Failed to init platform with unknown exception", nullptr);
}
}
if (b && m_Devices.size() == devices.size())
if (b && (m_Devices.size() == devices.size()))
{
auto& firstWrapper = m_Devices[0]->m_Wrapper;
m_DEOpenCLKernelCreator = DEOpenCLKernelCreator(m_DoublePrecision, m_Devices[0]->Nvidia());
//Build a simple program to ensure OpenCL is working right.
if (b && !(b = firstWrapper.AddProgram(m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DEOpenCLKernelCreator.LogScaleAssignDEKernel(), m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DoublePrecision))) { AddToReport(loc); }
if (b && !(b = firstWrapper.AddProgram(m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DEOpenCLKernelCreator.LogScaleAssignDEKernel(), m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DoublePrecision))) { ErrorStr(loc, "failed to init log scale program", m_Devices[0].get()); }
if (b && !(b = firstWrapper.AddProgram(m_IterOpenCLKernelCreator.SumHistEntryPoint(), sumHistProgram, m_IterOpenCLKernelCreator.SumHistEntryPoint(), m_DoublePrecision))) { AddToReport(loc); }
if (b && !(b = firstWrapper.AddProgram(m_IterOpenCLKernelCreator.SumHistEntryPoint(), sumHistProgram, m_IterOpenCLKernelCreator.SumHistEntryPoint(), m_DoublePrecision))) { ErrorStr(loc, "Failed to init sum histogram program", m_Devices[0].get()); }
if (b)
{
@ -168,16 +132,15 @@ bool RendererCL<T, bucketT>::Init(const vector<pair<size_t, size_t>>& devices, b
FillSeeds();
for (size_t device = 0; device < m_Devices.size(); device++)
if (b && !(b = m_Devices[device]->m_Wrapper.AddAndWriteBuffer(m_SeedsBufferName, reinterpret_cast<void*>(m_Seeds[device].data()), SizeOf(m_Seeds[device])))) { AddToReport(loc); break; }
if (b && !(b = m_Devices[device]->m_Wrapper.AddAndWriteBuffer(m_SeedsBufferName, reinterpret_cast<void*>(m_Seeds[device].data()), SizeOf(m_Seeds[device])))) { ErrorStr(loc, "Failed to init seeds buffer", m_Devices[device].get()); break; }
}
m_Shared = shared;
m_Init = b;
}
else
{
m_Devices.clear();
os << loc << ": failed to init all devices and platforms.";
AddToReport(os.str());
ErrorStr(loc, "Failed to init all devices and platforms", nullptr);
}
return m_Init;
@ -192,7 +155,7 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::SetOutputTexture(GLuint outputTexID)
{
bool success = true;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty())
{
@ -202,7 +165,7 @@ bool RendererCL<T, bucketT>::SetOutputTexture(GLuint outputTexID)
if (!firstWrapper.AddAndWriteImage(m_FinalImageName, CL_MEM_WRITE_ONLY, m_FinalFormat, FinalRasW(), FinalRasH(), 0, nullptr, firstWrapper.Shared(), m_OutputTexID))
{
AddToReport(loc);
ErrorStr(loc, "Failed to init set output texture", m_Devices[0].get());
success = false;
}
@ -293,10 +256,10 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::ClearHist()
{
bool b = !m_Devices.empty();
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
for (size_t i = 0; i < m_Devices.size(); i++)
if (b && !(b = ClearBuffer(i, m_HistBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4bT)))) { AddToReport(loc); break; }
if (b && !(b = ClearBuffer(i, m_HistBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4bT)))) { ErrorStr(loc, "Failed to clear histogram", m_Devices[i].get()); break; }
return b;
}
@ -310,9 +273,9 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::ClearHist(size_t device)
{
bool b = device < m_Devices.size();
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (b && !(b = ClearBuffer(device, m_HistBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4bT)))) { AddToReport(loc); }
if (b && !(b = ClearBuffer(device, m_HistBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4bT)))) { ErrorStr(loc, "Failed to clear histogram", m_Devices[device].get()); }
return b;
}
@ -338,10 +301,10 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::WritePoints(size_t device, vector<PointCL<T>>& vec)
{
bool b = false;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (device < m_Devices.size())
if (!(b = m_Devices[device]->m_Wrapper.WriteBuffer(m_PointsBufferName, reinterpret_cast<void*>(vec.data()), SizeOf(vec)))) { AddToReport(loc); }
if (!(b = m_Devices[device]->m_Wrapper.WriteBuffer(m_PointsBufferName, reinterpret_cast<void*>(vec.data()), SizeOf(vec)))) { ErrorStr(loc, "Failed to write points buffer", m_Devices[device].get()); }
return b;
}
@ -423,6 +386,7 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::ClearFinal()
{
vector<v4F> v;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty())
{
@ -431,18 +395,16 @@ bool RendererCL<T, bucketT>::ClearFinal()
if (this->PrepFinalAccumVector(v))
{
bool b = wrapper.WriteImage2D(index, wrapper.Shared(), FinalRasW(), FinalRasH(), 0, v.data());
if (!b)
AddToReport(__FUNCTION__);
return b;
if (!wrapper.WriteImage2D(index, wrapper.Shared(), FinalRasW(), FinalRasH(), 0, v.data()))
ErrorStr(loc, "Failed to clear final buffer", m_Devices[0].get());
else
return false;
}
else
return false;
}
else
return false;
return false;
}
/// <summary>
@ -469,18 +431,6 @@ bool RendererCL<T, bucketT>::Ok() const
return !m_Devices.empty() && m_Init;
}
/// <summary>
/// Clear the error report for this class as well as the OpenCLWrapper members of each device.
/// </summary>
template <typename T, typename bucketT>
void RendererCL<T, bucketT>::ClearErrorReport()
{
EmberReport::ClearErrorReport();
for (auto& device : m_Devices)
device->m_Wrapper.ClearErrorReport();
}
/// <summary>
/// The sub batch size for OpenCL will always be how many
/// iterations are ran per kernel call. The caller can't
@ -514,20 +464,23 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::CreateDEFilter(bool& newAlloc)
{
bool b = true;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty() && Renderer<T, bucketT>::CreateDEFilter(newAlloc))
{
//Copy coefs and widths here. Convert and copy the other filter params right before calling the filtering kernel.
if (newAlloc)
{
const char* loc = __FUNCTION__;
auto& wrapper = m_Devices[0]->m_Wrapper;
if (b && !(b = wrapper.AddAndWriteBuffer(m_DECoefsBufferName, reinterpret_cast<void*>(const_cast<bucketT*>(m_DensityFilter->Coefs())), m_DensityFilter->CoefsSizeBytes()))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DECoefsBufferName, reinterpret_cast<void*>(const_cast<bucketT*>(m_DensityFilter->Coefs())), m_DensityFilter->CoefsSizeBytes())))
ErrorStr(loc, "Failed to set DE coefficients buffer", m_Devices[0].get());
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEWidthsBufferName, reinterpret_cast<void*>(const_cast<bucketT*>(m_DensityFilter->Widths())), m_DensityFilter->WidthsSizeBytes()))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEWidthsBufferName, reinterpret_cast<void*>(const_cast<bucketT*>(m_DensityFilter->Widths())), m_DensityFilter->WidthsSizeBytes())))
ErrorStr(loc, "Failed to set DE widths buffer", m_Devices[0].get());
if (b && !(b = wrapper.AddAndWriteBuffer(m_DECoefIndicesBufferName, reinterpret_cast<void*>(const_cast<uint*>(m_DensityFilter->CoefIndices())), m_DensityFilter->CoefsIndicesSizeBytes()))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DECoefIndicesBufferName, reinterpret_cast<void*>(const_cast<uint*>(m_DensityFilter->CoefIndices())), m_DensityFilter->CoefsIndicesSizeBytes())))
ErrorStr(loc, "Failed to set DE coefficient indices buffer", m_Devices[0].get());
}
}
else
@ -546,11 +499,13 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::CreateSpatialFilter(bool& newAlloc)
{
bool b = true;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty() && Renderer<T, bucketT>::CreateSpatialFilter(newAlloc))
{
if (newAlloc)
if (!(b = m_Devices[0]->m_Wrapper.AddAndWriteBuffer(m_SpatialFilterCoefsBufferName, reinterpret_cast<void*>(m_SpatialFilter->Filter()), m_SpatialFilter->BufferSizeBytes()))) { AddToReport(__FUNCTION__); }
if (!(b = m_Devices[0]->m_Wrapper.AddAndWriteBuffer(m_SpatialFilterCoefsBufferName, reinterpret_cast<void*>(m_SpatialFilter->Filter()), m_SpatialFilter->BufferSizeBytes())))
ErrorStr(loc, "Failed to set patial filter coefficients buffer", m_Devices[0].get());
}
else
b = false;
@ -568,6 +523,25 @@ eRendererType RendererCL<T, bucketT>::RendererType() const
return eRendererType::OPENCL_RENDERER;
}
/// <summary>
/// Get whether the renderer uses a shared texture with OpenGL.
/// </summary>
/// <returns>True if shared, else false.</returns>
template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::Shared() const { return m_Shared; }
/// <summary>
/// Clear the error report for this class as well as the OpenCLWrapper members of each device.
/// </summary>
template <typename T, typename bucketT>
void RendererCL<T, bucketT>::ClearErrorReport()
{
EmberReport::ClearErrorReport();
for (auto& device : m_Devices)
device->m_Wrapper.ClearErrorReport();
}
/// <summary>
/// Concatenate and return the error report for this class and the
/// OpenCLWrapper member of each device as a single string.
@ -579,7 +553,7 @@ string RendererCL<T, bucketT>::ErrorReportString()
auto s = EmberReport::ErrorReportString();
for (auto& device : m_Devices)
s += device->m_Wrapper.ErrorReportString();
s += device->ErrorReportString();
return s;
}
@ -596,7 +570,7 @@ vector<string> RendererCL<T, bucketT>::ErrorReport()
for (auto& device : m_Devices)
{
auto s = device->m_Wrapper.ErrorReport();
auto s = device->ErrorReport();
ours.insert(ours.end(), s.begin(), s.end());
}
@ -615,14 +589,18 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::RandVec(vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>>& randVec)
{
bool b = Renderer<T, bucketT>::RandVec(randVec);
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty())
{
FillSeeds();
for (size_t device = 0; device < m_Devices.size(); device++)
if (b && !(b = m_Devices[device]->m_Wrapper.AddAndWriteBuffer(m_SeedsBufferName, reinterpret_cast<void*>(m_Seeds[device].data()), SizeOf(m_Seeds[device])))) { AddToReport(loc); break; }
if (b && !(b = m_Devices[device]->m_Wrapper.AddAndWriteBuffer(m_SeedsBufferName, reinterpret_cast<void*>(m_Seeds[device].data()), SizeOf(m_Seeds[device]))))
{
ErrorStr(loc, "Failed to set randoms buffer", m_Devices[device].get());
break;
}
}
else
b = false;
@ -664,32 +642,32 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly)
m_XformsCL.resize(m_Ember.TotalXformCount());
bool b = true;
size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer.
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
auto& wrapper = m_Devices[0]->m_Wrapper;
if (b && !(b = wrapper.AddBuffer(m_DEFilterParamsBufferName, sizeof(m_DensityFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddBuffer(m_DEFilterParamsBufferName, sizeof(m_DensityFilterCL)))) { ErrorStr(loc, "Failed to set DE filter parameters buffer", m_Devices[0].get()); }
if (b && !(b = wrapper.AddBuffer(m_SpatialFilterParamsBufferName, sizeof(m_SpatialFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddBuffer(m_SpatialFilterParamsBufferName, sizeof(m_SpatialFilterCL)))) { ErrorStr(loc, "Failed to set spatial filter parameters buffer", m_Devices[0].get()); }
if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa)))) { ErrorStr(loc, "Failed to set curves buffer", m_Devices[0].get()); }
if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, size))) { AddToReport(loc); }//Accum buffer.
if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, size))) { ErrorStr(loc, "Failed to set accum buffer", m_Devices[0].get()); }
for (auto& device : m_Devices)
{
if (b && !(b = device->m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { AddToReport(loc); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { ErrorStr(loc, "Failed to set ember buffer", device.get()); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_XformsBufferName, SizeOf(m_XformsCL)))) { AddToReport(loc); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_XformsBufferName, SizeOf(m_XformsCL)))) { ErrorStr(loc, "Failed to set xforms buffer", device.get()); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_ParVarsBufferName, 128 * sizeof(T)))) { AddToReport(loc); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_ParVarsBufferName, 128 * sizeof(T)))) { ErrorStr(loc, "Failed to set parametric variations buffer", device.get()); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_DistBufferName, CHOOSE_XFORM_GRAIN))) { AddToReport(loc); break; }//Will be resized for xaos.
if (b && !(b = device->m_Wrapper.AddBuffer(m_DistBufferName, CHOOSE_XFORM_GRAIN))) { ErrorStr(loc, "Failed to set xforms distribution buffer", device.get()); break; }//Will be resized for xaos.
if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { AddToReport(loc); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { ErrorStr(loc, "Failed to set cartesian to raster buffer", device.get()); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size))) { AddToReport(loc); break; }//Histogram. Will memset to zero later.
if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size))) { ErrorStr(loc, "Failed to set histogram buffer", device.get()); break; }//Histogram. Will memset to zero later.
if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL<T>)))) { AddToReport(loc); break; }//Points between iter calls.
if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL<T>)))) { ErrorStr(loc, "Failed to set points buffer", device.get()); break; }//Points between iter calls.
//Global shared is allocated once and written when building the kernel.
}
@ -699,7 +677,7 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly)
LeaveResize();
if (b && !(b = SetOutputTexture(m_OutputTexID))) { AddToReport(loc); }
if (b && !(b = SetOutputTexture(m_OutputTexID))) { ErrorStr(loc, "Failed to set output texture", m_Devices[0].get()); }
return b;
}
@ -765,23 +743,28 @@ eRenderStatus RendererCL<T, bucketT>::GaussianDensityFilter()
/// <summary>
/// Run final accumulation on the primary device.
/// If pixels is nullptr, the output will remain in the OpenCL 2D image.
/// However, if pixels is not nullptr, the output will be copied. This is
/// useful when rendering in OpenCL, but saving the output to a file.
/// If the first device is not shared, the output will remain in the OpenCL 2D image and no copying will take place.
/// If it is shared, then the image will be copied into the pixels vector.
/// </summary>
/// <param name="pixels">The pixels to copy the final image to if not nullptr</param>
/// <param name="pixels">The pixel vector to allocate and store the final image in</param>
/// <param name="finalOffset">Offset in the buffer to store the pixels to</param>
/// <returns>True if success and not aborted, else false.</returns>
/// <returns>True if not prematurely aborted, else false.</returns>
template <typename T, typename bucketT>
eRenderStatus RendererCL<T, bucketT>::AccumulatorToFinalImage(v4F* pixels, size_t finalOffset)
eRenderStatus RendererCL<T, bucketT>::AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset)
{
auto status = RunFinalAccum();
if (status == eRenderStatus::RENDER_OK && pixels && !m_Devices.empty() && !m_Devices[0]->m_Wrapper.Shared())
if (status == eRenderStatus::RENDER_OK && !m_Devices.empty() && !m_Devices[0]->m_Wrapper.Shared())
{
pixels += finalOffset;
if (PrepFinalAccumVector(pixels))
{
auto p = pixels.data();
p += finalOffset;
if (!ReadFinal(pixels))
if (!ReadFinal(p))
status = eRenderStatus::RENDER_ERROR;
}
else
status = eRenderStatus::RENDER_ERROR;
}
@ -805,7 +788,7 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
{
bool b = true;
EmberStats stats;//Do not record bad vals with with GPU. If the user needs to investigate bad vals, use the CPU.
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
//Only need to do this once on the beginning of a new render. Last iter will always be 0 at the beginning of a full render or temporal sample.
if (m_LastIter == 0)
@ -825,19 +808,34 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
auto& wrapper = device->m_Wrapper;
if (b && !(b = wrapper.WriteBuffer(m_EmberBufferName, reinterpret_cast<void*>(&m_EmberCL), sizeof(m_EmberCL))))
{
ErrorStr(loc, "Write ember buffer failed", device.get());
break;
}
if (b && !(b = wrapper.WriteBuffer(m_XformsBufferName, reinterpret_cast<void*>(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size())))
{
ErrorStr(loc, "Write xforms buffer failed", device.get());
break;
}
if (b && !(b = wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast<void*>(const_cast<byte*>(XformDistributions())), XformDistributionsSize())))//Will be resized for xaos.
{
ErrorStr(loc, "Write xforms distribution buffer failed", device.get());
break;
}
if (b && !(b = wrapper.WriteBuffer(m_CarToRasBufferName, reinterpret_cast<void*>(&m_CarToRasCL), sizeof(m_CarToRasCL))))
{
ErrorStr(loc, "Write cartesian to raster buffer failed", device.get());
break;
}
if (b && !(b = wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_Dmap.m_Entries.size(), 1, 0, m_Dmap.m_Entries.data())))
{
ErrorStr(loc, "Write palette buffer failed", device.get());
break;
}
if (b)
{
@ -846,8 +844,13 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
//Don't know the size of the parametric varations parameters buffer until the ember is examined.
//So set it up right before the run.
if (!m_Params.second.empty())
{
if (!wrapper.AddAndWriteBuffer(m_ParVarsBufferName, m_Params.second.data(), m_Params.second.size() * sizeof(m_Params.second[0])))
{
ErrorStr(loc, "Write parametric variations buffer failed", device.get());
break;
}
}
}
else
break;
@ -873,7 +876,7 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
else
{
m_Abort = true;
AddToReport(loc);
ErrorStr(loc, "Iiteration failed", nullptr);
}
return stats;
@ -894,7 +897,7 @@ bool RendererCL<T, bucketT>::BuildIterProgramForEmber(bool doAccum)
{
//Timing t;
bool b = !m_Devices.empty();
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
IterOpenCLKernelCreator<T>::ParVarIndexDefines(m_Ember, m_Params, false, true);//Do with string and no vals.
IterOpenCLKernelCreator<T>::SharedDataIndexDefines(m_Ember, m_GlobalShared, true, true);//Do with string and vals only once on build since it won't change until another build occurs.
@ -909,7 +912,7 @@ bool RendererCL<T, bucketT>::BuildIterProgramForEmber(bool doAccum)
{
rlg l(m_ResizeCs);//Just use the resize CS for lack of a better one.
b = false;
AddToReport(string(loc) + "()\n" + dev->m_Wrapper.DeviceName() + ":\nBuilding the following program failed: \n" + m_IterKernel + "\n");
ErrorStr(loc, "Building the following program failed\n"s + m_IterKernel, dev);
}
else if (!m_GlobalShared.second.empty())
{
@ -917,7 +920,7 @@ bool RendererCL<T, bucketT>::BuildIterProgramForEmber(bool doAccum)
{
rlg l(m_ResizeCs);//Just use the resize CS for lack of a better one.
b = false;
AddToReport(string(loc) + "()\n" + dev->m_Wrapper.DeviceName() + ":\nAdding global shared buffer failed.\n");
ErrorStr(loc, "Adding global shared buffer failed", dev);
}
}
};
@ -960,7 +963,7 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
bool success = !m_Devices.empty();
uint histSuperSize = uint(SuperSize());
size_t launches = size_t(ceil(double(iterCount) / IterCountPerGrid()));
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
vector<std::thread> threadVec;
std::atomic<size_t> atomLaunchesRan;
std::atomic<intmax_t> atomItersRan, atomItersRemaining;
@ -1006,31 +1009,31 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
size_t iterCountThisLaunch = iterCountPerKernel * IterGridKernelWidth() * IterGridKernelHeight();
//cout << "itersRemaining " << itersRemaining << ", iterCountPerKernel " << iterCountPerKernel << ", iterCountThisLaunch " << iterCountThisLaunch << "\n";
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, iterCountPerKernel))) { AddToReport(loc); }//Number of iters for each thread to run.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, iterCountPerKernel))) { ErrorStr(loc, "Setting iter count argument failed", m_Devices[dev].get()); }//Number of iters for each thread to run.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, fuse))) { AddToReport(loc); }//Number of iters to fuse.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, fuse))) { ErrorStr(loc, "Setting fuse count argument failed", m_Devices[dev].get()); }//Number of iters to fuse.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_SeedsBufferName))) { AddToReport(loc); }//Seeds.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_SeedsBufferName))) { ErrorStr(loc, "Setting seeds buffer argument failed", m_Devices[dev].get()); }//Seeds.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_EmberBufferName))) { AddToReport(loc); }//Ember.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_EmberBufferName))) { ErrorStr(loc, "Setting ember buffer argument failed", m_Devices[dev].get()); }//Ember.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_XformsBufferName))) { AddToReport(loc); }//Xforms.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_XformsBufferName))) { ErrorStr(loc, "Setting xforms buffer argument failed", m_Devices[dev].get()); }//Xforms.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_ParVarsBufferName))) { AddToReport(loc); }//Parametric variation parameters.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_ParVarsBufferName))) { ErrorStr(loc, "Setting parametric variations buffer argument failed", m_Devices[dev].get()); }//Parametric variation parameters.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_GlobalSharedBufferName))) { AddToReport(loc); }//Global shared data.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_GlobalSharedBufferName))) { ErrorStr(loc, "Setting global shared buffer argument failed", m_Devices[dev].get()); }//Global shared data.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_DistBufferName))) { AddToReport(loc); }//Xform distributions.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_DistBufferName))) { ErrorStr(loc, "Setting xforms distribution buffer argument failed", m_Devices[dev].get()); }//Xform distributions.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_CarToRasBufferName))) { AddToReport(loc); }//Coordinate converter.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_CarToRasBufferName))) { ErrorStr(loc, "Setting cartesian to raster buffer argument failed", m_Devices[dev].get()); }//Coordinate converter.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { AddToReport(loc); }//Histogram.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { ErrorStr(loc, "Setting histogram buffer argument failed", m_Devices[dev].get()); }//Histogram.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, histSuperSize))) { AddToReport(loc); }//Histogram size.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, histSuperSize))) { ErrorStr(loc, "Setting histogram size argument failed", m_Devices[dev].get()); }//Histogram size.
if (b && !(b = wrapper.SetImageArg (kernelIndex, argIndex++, false, "Palette"))) { AddToReport(loc); }//Palette.
if (b && !(b = wrapper.SetImageArg (kernelIndex, argIndex++, false, "Palette"))) { ErrorStr(loc, "Setting palette argument failed", m_Devices[dev].get()); }//Palette.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_PointsBufferName))) { AddToReport(loc); }//Random start points.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_PointsBufferName))) { ErrorStr(loc, "Setting points buffer argument failed", m_Devices[dev].get()); }//Random start points.
if (b && !(b = wrapper.RunKernel(kernelIndex,
IterGridKernelWidth(),//Total grid dims.
@ -1042,7 +1045,7 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
{
success = false;
m_Abort = true;
AddToReport(loc);
ErrorStr(loc, "Error running iteration program", m_Devices[dev].get());
atomLaunchesRan.fetch_sub(1);
break;
}
@ -1107,9 +1110,9 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
if (m_Devices.size() > 1)//Determine whether/when to sum histograms of secondary devices with the primary.
{
if (((TemporalSamples() == 1) || (temporalSample == TemporalSamples() - 1)) &&//If there are no temporal samples (not animating), or the current one is the last...
if (((TemporalSamples() == 1) || (temporalSample == TemporalSamples() - 1)) &&//If there are no temporal samples (not animating), or the current one is the last... (probably doesn't matter anymore since we never use multiple renders for a single frame when animating, instead each frame gets its own renderer).
((m_LastIter + itersRan) >= ItersPerTemporalSample()))//...and the required number of iters for that sample have completed...
if (success && !(success = SumDeviceHist())) { AddToReport(loc); }//...read the histogram from the secondary devices and sum them to the primary.
if (success && !(success = SumDeviceHist())) { ErrorStr(loc, "Summing histograms failed", nullptr); }//...read the histogram from the secondary devices and sum them to the primary.
}
//t2.Toc(__FUNCTION__);
@ -1125,12 +1128,12 @@ eRenderStatus RendererCL<T, bucketT>::RunLogScaleFilter()
{
//Timing t(4);
bool b = !m_Devices.empty();
static std::string loc = __FUNCTION__;
if (b)
{
auto& wrapper = m_Devices[0]->m_Wrapper;
int kernelIndex = wrapper.FindKernelIndex(m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint());
const char* loc = __FUNCTION__;
if (kernelIndex != -1)
{
@ -1142,16 +1145,16 @@ eRenderStatus RendererCL<T, bucketT>::RunLogScaleFilter()
size_t gridH = m_DensityFilterCL.m_SuperRasH;
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, reinterpret_cast<void*>(&m_DensityFilterCL), sizeof(m_DensityFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, reinterpret_cast<void*>(&m_DensityFilterCL), sizeof(m_DensityFilterCL)))) { ErrorStr(loc, "Adding DE filter parameters buffer failed", m_Devices[0].get()); }
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { AddToReport(loc); }//Histogram.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { ErrorStr(loc, "Setting histogram buffer argument failed", m_Devices[0].get()); }//Histogram.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName))) { AddToReport(loc); }//Accumulator.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName))) { ErrorStr(loc, "Setting accumulator buffer argument failed", m_Devices[0].get()); }//Accumulator.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_DEFilterParamsBufferName))) { AddToReport(loc); }//DensityFilterCL.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_DEFilterParamsBufferName))) { ErrorStr(loc, "Setting DE filter parameters buffer argument failed", m_Devices[0].get()); }//DensityFilterCL.
//t.Tic();
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { AddToReport(loc); }
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running log scale program failed", m_Devices[0].get()); }
//t.Toc(loc);
@ -1162,7 +1165,7 @@ eRenderStatus RendererCL<T, bucketT>::RunLogScaleFilter()
else
{
b = false;
AddToReport(loc);
ErrorStr(loc, "Invalid kernel index for log scale program", m_Devices[0].get());
}
}
@ -1212,7 +1215,7 @@ eRenderStatus RendererCL<T, bucketT>::RunDensityFilter()
uint chunkSizeH = gapH + 1;//Chunk size is also in terms of blocks and is one block (the one running) plus the gap below it.
double totalChunks = chunkSizeW * chunkSizeH;
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, reinterpret_cast<void*>(&m_DensityFilterCL), sizeof(m_DensityFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, reinterpret_cast<void*>(&m_DensityFilterCL), sizeof(m_DensityFilterCL)))) { ErrorStr(loc, "Writing DE filter parameters buffer failed", m_Devices[0].get()); }
#ifdef ROW_ONLY_DE
blockSizeW = 64;//These *must* both be divisible by 16 or else pixels will go missing.
@ -1256,7 +1259,11 @@ eRenderStatus RendererCL<T, bucketT>::RunDensityFilter()
for (uint colChunkPass = 0; b && !m_Abort && colChunkPass < chunkSizeW; colChunkPass++)//Number of horizontal passes.
{
//t2.Tic();
if (b && !(b = RunDensityFilterPrivate(kernelIndex, gridW, gridH, blockSizeW, blockSizeH, chunkSizeW, chunkSizeH, colChunkPass, rowChunkPass))) { m_Abort = true; AddToReport(loc); }
if (b && !(b = RunDensityFilterPrivate(kernelIndex, gridW, gridH, blockSizeW, blockSizeH, chunkSizeW, chunkSizeH, colChunkPass, rowChunkPass)))
{
m_Abort = true;
ErrorStr(loc, "Running DE filter program for row chunk "s + std::to_string(rowChunkPass) + ", col chunk "s + std::to_string(colChunkPass) + " failed", m_Devices[0].get());
}
//t2.Toc(loc);
@ -1282,7 +1289,7 @@ eRenderStatus RendererCL<T, bucketT>::RunDensityFilter()
else
{
b = false;
AddToReport(loc);
ErrorStr(loc, "Invalid kernel index for DE filter program", m_Devices[0].get());
}
return m_Abort ? eRenderStatus::RENDER_ABORT : (b ? eRenderStatus::RENDER_OK : eRenderStatus::RENDER_ERROR);
@ -1304,7 +1311,7 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
size_t blockW;
size_t blockH;
uint curvesSet = m_CurvesSet ? 1 : 0;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (!m_Abort && accumKernelIndex != -1)
{
@ -1312,9 +1319,9 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
//This is needed with or without early clip.
ConvertSpatialFilter();
if (b && !(b = wrapper.AddAndWriteBuffer(m_SpatialFilterParamsBufferName, reinterpret_cast<void*>(&m_SpatialFilterCL), sizeof(m_SpatialFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_SpatialFilterParamsBufferName, reinterpret_cast<void*>(&m_SpatialFilterCL), sizeof(m_SpatialFilterCL)))) { ErrorStr(loc, "Adding spatial filter parameters buffer", m_Devices[0].get()); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_CurvesCsaName, m_Csa.data(), SizeOf(m_Csa)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_CurvesCsaName, m_Csa.data(), SizeOf(m_Csa)))) { ErrorStr(loc, "Adding curves buffer", m_Devices[0].get()); }
//Since early clip requires gamma correcting the entire accumulator first,
//it can't be done inside of the normal final accumulation kernel, so
@ -1332,16 +1339,16 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
gridH = m_SpatialFilterCL.m_SuperRasH;
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (b && !(b = wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_AccumBufferName))) { AddToReport(loc); }//Accumulator.
if (b && !(b = wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_AccumBufferName))) { ErrorStr(loc, "Setting early clip accumulator buffer argument failed", m_Devices[0].get()); }//Accumulator.
if (b && !(b = wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_SpatialFilterParamsBufferName))) { AddToReport(loc); }//SpatialFilterCL.
if (b && !(b = wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_SpatialFilterParamsBufferName))) { ErrorStr(loc, "Setting early clip spatial filter parameters buffer argument failed", m_Devices[0].get()); }//SpatialFilterCL.
if (b && !(b = wrapper.RunKernel(gammaCorrectKernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { AddToReport(loc); }
if (b && !(b = wrapper.RunKernel(gammaCorrectKernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running early clip gamma correction program failed", m_Devices[0].get()); }
}
else
{
b = false;
AddToReport(loc);
ErrorStr(loc, "Invalid kernel index for early clip gamma correction program", m_Devices[0].get());
}
}
@ -1352,32 +1359,32 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
gridH = m_SpatialFilterCL.m_FinalRasH;
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_AccumBufferName))) { AddToReport(loc); }//Accumulator.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_AccumBufferName))) { ErrorStr(loc, "Setting accumulator buffer argument failed", m_Devices[0].get()); }//Accumulator.
if (b && !(b = wrapper.SetImageArg(accumKernelIndex, argIndex++, wrapper.Shared(), m_FinalImageName))) { AddToReport(loc); }//Final image.
if (b && !(b = wrapper.SetImageArg(accumKernelIndex, argIndex++, wrapper.Shared(), m_FinalImageName))) { ErrorStr(loc, "Setting accumulator final image buffer argument failed", m_Devices[0].get()); }//Final image.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterParamsBufferName))) { AddToReport(loc); }//SpatialFilterCL.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterParamsBufferName))) { ErrorStr(loc, "Setting spatial filter parameters buffer argument failed", m_Devices[0].get()); }//SpatialFilterCL.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterCoefsBufferName))) { AddToReport(loc); }//Filter coefs.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterCoefsBufferName))) { ErrorStr(loc, "Setting spatial filter coefficients buffer argument failed", m_Devices[0].get()); }//Filter coefs.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_CurvesCsaName))) { AddToReport(loc); }//Curve points.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_CurvesCsaName))) { ErrorStr(loc, "Setting curves buffer argument failed", m_Devices[0].get()); }//Curve points.
if (b && !(b = wrapper.SetArg (accumKernelIndex, argIndex++, curvesSet))) { AddToReport(loc); }//Do curves.
if (b && !(b = wrapper.SetArg (accumKernelIndex, argIndex++, curvesSet))) { ErrorStr(loc, "Setting curves boolean argument failed", m_Devices[0].get()); }//Do curves.
if (b && wrapper.Shared())
if (b && !(b = wrapper.EnqueueAcquireGLObjects(m_FinalImageName))) { AddToReport(loc); }
if (b && !(b = wrapper.EnqueueAcquireGLObjects(m_FinalImageName))) { ErrorStr(loc, "Acquiring OpenGL texture failed", m_Devices[0].get()); }
if (b && !(b = wrapper.RunKernel(accumKernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { AddToReport(loc); }
if (b && !(b = wrapper.RunKernel(accumKernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running final accumulation program failed", m_Devices[0].get()); }
if (b && wrapper.Shared())
if (b && !(b = wrapper.EnqueueReleaseGLObjects(m_FinalImageName))) { AddToReport(loc); }
if (b && !(b = wrapper.EnqueueReleaseGLObjects(m_FinalImageName))) { ErrorStr(loc, "Releasing OpenGL texture failed", m_Devices[0].get()); }
//t.Toc((char*)loc);
}
else
{
b = false;
AddToReport(loc);
ErrorStr(loc, "Invalid kernel index for final accumulation program", m_Devices[0].get());
}
return b ? eRenderStatus::RENDER_OK : eRenderStatus::RENDER_ERROR;
@ -1402,7 +1409,7 @@ bool RendererCL<T, bucketT>::ClearBuffer(size_t device, const string& bufferName
auto& wrapper = m_Devices[device]->m_Wrapper;
int kernelIndex = wrapper.FindKernelIndex(m_IterOpenCLKernelCreator.ZeroizeEntryPoint());
cl_uint argIndex = 0;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (kernelIndex != -1)
{
@ -1413,17 +1420,17 @@ bool RendererCL<T, bucketT>::ClearBuffer(size_t device, const string& bufferName
b = true;
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, bufferName))) { AddToReport(loc); }//Buffer of byte.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, bufferName))) { ErrorStr(loc, "Setting clear buffer argument failed", m_Devices[device].get()); }//Buffer of byte.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex++, width * elementSize))) { AddToReport(loc); }//Width.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex++, width * elementSize))) { ErrorStr(loc, "Setting clear buffer width argument failed", m_Devices[device].get()); }//Width.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex++, height))) { AddToReport(loc); }//Height.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex++, height))) { ErrorStr(loc, "Setting clear buffer height argument failed", m_Devices[device].get()); }//Height.
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { AddToReport(loc); }
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running clear buffer program failed", m_Devices[device].get()); }
}
else
{
AddToReport(loc);
ErrorStr(loc, "Invalid kernel index for clear buffer program", m_Devices[device].get());
}
}
@ -1450,36 +1457,36 @@ bool RendererCL<T, bucketT>::RunDensityFilterPrivate(size_t kernelIndex, size_t
//Timing t(4);
bool b = true;
cl_uint argIndex = 0;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty())
{
auto& wrapper = m_Devices[0]->m_Wrapper;
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_HistBufferName))) { AddToReport(loc); } argIndex++;//Histogram.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_HistBufferName))) { ErrorStr(loc, "Setting histogram buffer argument failed", m_Devices[0].get()); } argIndex++;//Histogram.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_AccumBufferName))) { AddToReport(loc); } argIndex++;//Accumulator.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_AccumBufferName))) { ErrorStr(loc, "Setting accumulator buffer argument failed", m_Devices[0].get()); } argIndex++;//Accumulator.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DEFilterParamsBufferName))) { AddToReport(loc); } argIndex++;//FlameDensityFilterCL.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DEFilterParamsBufferName))) { ErrorStr(loc, "Setting DE filter parameters buffer argument failed", m_Devices[0].get()); } argIndex++;//FlameDensityFilterCL.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefsBufferName))) { AddToReport(loc); } argIndex++;//Coefs.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefsBufferName))) { ErrorStr(loc, "Setting DE coefficients buffer argument failed", m_Devices[0].get()); } argIndex++;//Coefs.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DEWidthsBufferName))) { AddToReport(loc); } argIndex++;//Widths.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DEWidthsBufferName))) { ErrorStr(loc, "Setting DE widths buffer argument failed", m_Devices[0].get()); } argIndex++;//Widths.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefIndicesBufferName))) { AddToReport(loc); } argIndex++;//Coef indices.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefIndicesBufferName))) { ErrorStr(loc, "Setting DE coefficient indices buffer argument failed", m_Devices[0].get()); } argIndex++;//Coef indices.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, chunkSizeW))) { AddToReport(loc); } argIndex++;//Chunk size width (gapW + 1).
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, chunkSizeW))) { ErrorStr(loc, "Setting chunk size width argument failed", m_Devices[0].get()); } argIndex++;//Chunk size width (gapW + 1).
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, chunkSizeH))) { AddToReport(loc); } argIndex++;//Chunk size height (gapH + 1).
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, chunkSizeH))) { ErrorStr(loc, "Setting chunk size height argument failed", m_Devices[0].get()); } argIndex++;//Chunk size height (gapH + 1).
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, colChunkPass))) { AddToReport(loc); } argIndex++;//Column chunk, horizontal pass.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, colChunkPass))) { ErrorStr(loc, "Setting col chunk pass argument failed", m_Devices[0].get()); } argIndex++;//Column chunk, horizontal pass.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, rowChunkPass))) { AddToReport(loc); } argIndex++;//Row chunk, vertical pass.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, rowChunkPass))) { ErrorStr(loc, "Setting row chunk pass argument failed", m_Devices[0].get()); } argIndex++;//Row chunk, vertical pass.
//t.Toc(__FUNCTION__ " set args");
//t.Tic();
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { AddToReport(loc); }//Method 7, accumulating to temp box area.
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running DE filter program failed", m_Devices[0].get()); }//Method 7, accumulating to temp box area.
//t.Toc(__FUNCTION__ " RunKernel()");
return b;
@ -1512,7 +1519,7 @@ int RendererCL<T, bucketT>::MakeAndGetDensityFilterProgram(size_t ss, uint filte
if (wrapper.AddProgram(deEntryPoint, kernel, deEntryPoint, m_DoublePrecision))
kernelIndex = wrapper.FindKernelIndex(deEntryPoint);//Try to find it again, it will be present if successfully built.
else
AddToReport(string(loc) + "():\nBuilding the following program failed: \n" + kernel + "\n");
ErrorStr(loc, "Adding the DE filter program at "s + deEntryPoint + " failed to build:\n"s + kernel, m_Devices[0].get());
}
}
@ -1541,7 +1548,7 @@ int RendererCL<T, bucketT>::MakeAndGetFinalAccumProgram()
if (wrapper.AddProgram(finalAccumEntryPoint, kernel, finalAccumEntryPoint, m_DoublePrecision))
kernelIndex = wrapper.FindKernelIndex(finalAccumEntryPoint);//Try to find it again, it will be present if successfully built.
else
AddToReport(loc);
ErrorStr(loc, "Adding final accumulation program "s + finalAccumEntryPoint + " failed"s, m_Devices[0].get());
}
}
@ -1560,7 +1567,7 @@ int RendererCL<T, bucketT>::MakeAndGetGammaCorrectionProgram()
auto& wrapper = m_Devices[0]->m_Wrapper;
auto& gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint();
int kernelIndex = wrapper.FindKernelIndex(gammaEntryPoint);
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (kernelIndex == -1)//Has not been built yet.
{
@ -1570,7 +1577,7 @@ int RendererCL<T, bucketT>::MakeAndGetGammaCorrectionProgram()
if (b)
kernelIndex = wrapper.FindKernelIndex(gammaEntryPoint);//Try to find it again, it will be present if successfully built.
else
AddToReport(loc);
ErrorStr(loc, "Adding gamma correction program "s + gammaEntryPoint + " failed"s, m_Devices[0].get());
}
return kernelIndex;
@ -1589,15 +1596,15 @@ bool RendererCL<T, bucketT>::CreateHostBuffer()
{
bool b = true;
size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer.
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (b = Renderer<T, bucketT>::Alloc(true))//Allocate the histogram memory to point this HOST_PTR buffer to, other buffers not needed.
{
if (b && !(b = m_Devices[0]->m_Wrapper.AddHostBuffer(m_HostBufferName, size, reinterpret_cast<void*>(HistBuckets()))))
AddToReport(string(loc) + ": creating OpenCL HOST_PTR buffer to point to host side histogram failed.");//Host side histogram for temporary use with multiple devices.
if (b && !(b = m_Devices[0]->m_Wrapper.AddHostBuffer(m_HostBufferName, size, reinterpret_cast<void*>(HistBuckets()))))//Host side histogram for temporary use with multiple devices.
ErrorStr(loc, "Creating OpenCL HOST_PTR buffer to point to host side histogram failed", m_Devices[0].get());
}
else
AddToReport(string(loc) + ": allocating host side histogram failed.");//Allocating histogram failed, something is seriously wrong.
ErrorStr(loc, "Allocating host side histogram failed", m_Devices[0].get());//Allocating histogram failed, something is seriously wrong.
return b;
}
@ -1617,7 +1624,7 @@ bool RendererCL<T, bucketT>::SumDeviceHist()
//Timing t;
bool b = true;
auto& wrapper = m_Devices[0]->m_Wrapper;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
size_t blockW = m_Devices[0]->Nvidia() ? 32 : 16;//Max work group size is 256 on AMD, which means 16x16.
size_t blockH = m_Devices[0]->Nvidia() ? 32 : 16;
size_t gridW = SuperRasW();
@ -1635,20 +1642,21 @@ bool RendererCL<T, bucketT>::SumDeviceHist()
{
cl_uint argIndex = 0;
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HostBufferName))) { break; }//Source buffer of v4bT.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HostBufferName))) { ErrorStr(loc, "Setting host buffer argument failed", m_Devices[device].get()); break; }//Source buffer of v4bT.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { break; }//Dest buffer of v4bT.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { ErrorStr(loc, "Setting histogram buffer argument failed", m_Devices[device].get()); break; }//Dest buffer of v4bT.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasW())))) { break; }//Width in pixels.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasW())))) { ErrorStr(loc, "Setting width argument failed", m_Devices[device].get()); break; }//Width in pixels.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasH())))) { break; }//Height in pixels.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasH())))) { ErrorStr(loc, "Setting height argument failed", m_Devices[device].get()); break; }//Height in pixels.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { break; }//Clear the source buffer on the last device.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { ErrorStr(loc, "Setting clear argument failed", m_Devices[device].get()); break; }//Clear the source buffer on the last device.
if (b && !(b = wrapper.RunKernel (kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { break; }
if (b && !(b = wrapper.RunKernel (kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running histogram sum program failed", m_Devices[device].get()); break; }
}
else
{
ErrorStr(loc, "Running histogram reading and clearing programs failed", m_Devices[device].get());
break;
}
}
@ -1656,9 +1664,7 @@ bool RendererCL<T, bucketT>::SumDeviceHist()
if (!b)
{
ostringstream os;
os << loc << ": failed to sum histograms from the secondary device(s) to the primary device.";
AddToReport(os.str());
ErrorStr(loc, "Summing histograms from the secondary device(s) to the primary device failed", nullptr);
}
//t.Toc(loc);
@ -1825,6 +1831,25 @@ void RendererCL<T, bucketT>::FillSeeds()
}
}
/// <summary>
/// Compose an error string based on the strings and device passed in, add it to the error report and return the string.
/// </summary>
/// <param name="loc">The location where the error occurred</param>
/// <param name="error">The text of the error</param>
/// <param name="dev">The device the error occurred on</param>
/// <returns>The new error string</returns>
template <typename T, typename bucketT>
std::string RendererCL<T, bucketT>::ErrorStr(const std::string& loc, const std::string& error, RendererClDevice* dev)
{
std::string str = loc + "()"s + (dev ?
"\n"s +
dev->m_Wrapper.DeviceName() + "\nPlatform: " +
std::to_string(dev->PlatformIndex()) + ", device: " + std::to_string(dev->DeviceIndex()) : "") + ", error:\n" +
error + "\n";
AddToReport(str);
return str;
}
template EMBERCL_API class RendererCL<float, float>;
#ifdef DO_DOUBLE

View File

@ -146,12 +146,13 @@ public:
//Public virtual functions overridden from Renderer or RendererBase.
virtual size_t MemoryAvailable() override;
virtual bool Ok() const override;
virtual void ClearErrorReport() override;
virtual size_t SubBatchSize() const override;
virtual size_t ThreadCount() const override;
virtual bool CreateDEFilter(bool& newAlloc) override;
virtual bool CreateSpatialFilter(bool& newAlloc) override;
virtual eRendererType RendererType() const override;
virtual bool Shared() const override;
virtual void ClearErrorReport() override;
virtual string ErrorReportString() override;
virtual vector<string> ErrorReport() override;
virtual bool RandVec(vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>>& randVec) override;
@ -165,13 +166,12 @@ protected:
virtual bool ResetBuckets(bool resetHist = true, bool resetAccum = true) override;
virtual eRenderStatus LogScaleDensityFilter(bool forceOutput = false) override;
virtual eRenderStatus GaussianDensityFilter() override;
virtual eRenderStatus AccumulatorToFinalImage(v4F* pixels, size_t finalOffset) override;
virtual eRenderStatus AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset) override;
virtual EmberStats Iterate(size_t iterCount, size_t temporalSample) override;
#ifndef TEST_CL
private:
#endif
void Init();
//Private functions for making and running OpenCL programs.
bool BuildIterProgramForEmber(bool doAccum = true);
bool RunIter(size_t iterCount, size_t temporalSample, size_t& itersRan);
@ -192,35 +192,40 @@ private:
void ConvertSpatialFilter();
void ConvertEmber(Ember<T>& ember, EmberCL<T>& emberCL, vector<XformCL<T>>& xformsCL);
void ConvertCarToRas(const CarToRas<T>& carToRas);
bool m_Init;
bool m_DoublePrecision;
size_t m_IterCountPerKernel;
size_t m_IterBlocksWide, m_IterBlockWidth;
size_t m_IterBlocksHigh, m_IterBlockHeight;
std::string ErrorStr(const std::string& loc, const std::string& error, RendererClDevice* dev);
bool m_Init = false;
bool m_Shared = false;
bool m_DoublePrecision = typeid(T) == typeid(double);
//It's critical that these numbers never change. They are
//based on the cuburn model of each kernel launch containing
//256 threads. 32 wide by 8 high. Everything done in the OpenCL
//iteraion kernel depends on these dimensions.
size_t m_IterCountPerKernel = 256;
size_t m_IterBlocksWide = 64, m_IterBlockWidth = 32;
size_t m_IterBlocksHigh = 2, m_IterBlockHeight = 8;
size_t m_MaxDEBlockSizeW;
size_t m_MaxDEBlockSizeH;
//Buffer names.
string m_EmberBufferName;
string m_XformsBufferName;
string m_ParVarsBufferName;
string m_GlobalSharedBufferName;
string m_SeedsBufferName;
string m_DistBufferName;
string m_CarToRasBufferName;
string m_DEFilterParamsBufferName;
string m_SpatialFilterParamsBufferName;
string m_CurvesCsaName;
string m_DECoefsBufferName;
string m_DEWidthsBufferName;
string m_DECoefIndicesBufferName;
string m_SpatialFilterCoefsBufferName;
string m_HostBufferName;
string m_HistBufferName;
string m_AccumBufferName;
string m_FinalImageName;
string m_PointsBufferName;
string m_EmberBufferName = "Ember";
string m_XformsBufferName = "Xforms";
string m_ParVarsBufferName = "ParVars";
string m_GlobalSharedBufferName = "GlobalShared";
string m_SeedsBufferName = "Seeds";
string m_DistBufferName = "Dist";
string m_CarToRasBufferName = "CarToRas";
string m_DEFilterParamsBufferName = "DEFilterParams";
string m_SpatialFilterParamsBufferName = "SpatialFilterParams";
string m_DECoefsBufferName = "DECoefs";
string m_DEWidthsBufferName = "DEWidths";
string m_DECoefIndicesBufferName = "DECoefIndices";
string m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs";
string m_CurvesCsaName = "CurvesCsa";
string m_HostBufferName = "Host";
string m_HistBufferName = "Hist";
string m_AccumBufferName = "Accum";
string m_FinalImageName = "Final";
string m_PointsBufferName = "Points";
//Kernels.
string m_IterKernel;

View File

@ -55,4 +55,39 @@ bool RendererClDevice::Ok() const { return m_Init; }
bool RendererClDevice::Shared() const { return m_Shared; }
bool RendererClDevice::Nvidia() const { return m_NVidia; }
size_t RendererClDevice::WarpSize() const { return m_WarpSize; }
size_t RendererClDevice::PlatformIndex() const { return m_PlatformIndex; }
size_t RendererClDevice::DeviceIndex() const { return m_DeviceIndex; }
/// <summary>
/// Clear the error report for this class as well as the wrapper.
/// </summary>
void RendererClDevice::ClearErrorReport()
{
EmberReport::ClearErrorReport();
m_Wrapper.ClearErrorReport();
}
/// <summary>
/// Concatenate and return the error report for this class and the
/// wrapper as a single string.
/// </summary>
/// <returns>The concatenated error report string</returns>
string RendererClDevice::ErrorReportString()
{
auto s = EmberReport::ErrorReportString();
return s + m_Wrapper.ErrorReportString();
}
/// <summary>
/// Concatenate and return the error report for this class and the
/// wrapper as a vector of strings.
/// </summary>
/// <returns>The concatenated error report vector of strings</returns>
vector<string> RendererClDevice::ErrorReport()
{
auto ours = EmberReport::ErrorReport();
auto s = m_Wrapper.ErrorReport();
ours.insert(ours.end(), s.begin(), s.end());
return ours;
}
}

View File

@ -24,6 +24,13 @@ public:
bool Shared() const;
bool Nvidia() const;
size_t WarpSize() const;
size_t PlatformIndex() const;
size_t DeviceIndex() const;
//Public virtual functions overridden from base classes.
virtual void ClearErrorReport() override;
virtual string ErrorReportString() override;
virtual vector<string> ErrorReport() override;
size_t m_Calls;
OpenCLWrapper m_Wrapper;