--User changes

-Add two new variations, hyperbolic and hypershift2.
 -Allow for animating final xforms.
 -More detailed diagnostics when any action in the OpenCL renderer fails.
 -Allow for creating an OpenCL renderer which does not share a texture with the main window, and instead manually copies its final output image from GPU to CPU then back to GPU.

--Bug fixes
 -Text was not properly being copied out of the Info | Bounds text box.

--Code changes
 -Remove Renderer::AccumulatorToFinalImage(v4F* pixels, size_t finalOffset), it's no longer needed or makes sense.
 -Controllers no longer keep track of shared status, it's kept inside the renderers.
 -Make getter functions in FractoriumOptionsDialog be public.
This commit is contained in:
Person 2018-04-28 22:28:05 -07:00
parent 0c67c52720
commit 92e9836151
39 changed files with 852 additions and 405 deletions

View File

@ -6,7 +6,7 @@
<ProductVersion>3.7</ProductVersion>
<ProjectGuid>{c8096c47-e358-438c-a520-146d46b0637d}</ProjectGuid>
<SchemaVersion>2.0</SchemaVersion>
<OutputName>Fractorium_1.0.0.7</OutputName>
<OutputName>Fractorium_1.0.0.8</OutputName>
<OutputType>Package</OutputType>
<WixTargetsPath Condition=" '$(WixTargetsPath)' == '' AND '$(MSBuildExtensionsPath32)' != '' ">$(MSBuildExtensionsPath32)\Microsoft\WiX\v3.x\Wix.targets</WixTargetsPath>
<WixTargetsPath Condition=" '$(WixTargetsPath)' == '' ">$(MSBuildExtensionsPath)\Microsoft\WiX\v3.x\Wix.targets</WixTargetsPath>

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<Wix xmlns="http://schemas.microsoft.com/wix/2006/wi">
<?define ProductVersion="1.0.0.7" ?>
<?define ProductVersion="1.0.0.8" ?>
<?define ProductName="Fractorium $(var.ProductVersion) ($(var.GpuType))" ?>
<?define UpgradeCode="{4714cd15-bfba-44f6-8059-9e1466ebfa6e}"?>
<?define Manufacturer="Fractorium"?>
@ -13,7 +13,7 @@
<!--
Change this for every release.
-->
<?define ProductCode="{4E5EFBC9-8572-4B82-A99E-A275DEA7A609}"?>
<?define ProductCode="{91870DB0-6CA7-4F14-9DC4-3DE0CA02E9E9}"?>
<Product Id="$(var.ProductCode)" Name="$(var.ProductName)" Language="1033" Version="$(var.ProductVersion)" Manufacturer="$(var.Manufacturer)" UpgradeCode="$(var.UpgradeCode)">
<Package

Binary file not shown.

View File

@ -49,8 +49,8 @@
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 1, 0, 0, 7
PRODUCTVERSION 1, 0, 0, 7
FILEVERSION 1, 0, 0, 8
PRODUCTVERSION 1, 0, 0, 8
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -67,12 +67,12 @@
BEGIN
VALUE "CompanyName", "Open Source"
VALUE "FileDescription", "Renders fractal flames as animations with motion blur"
VALUE "FileVersion", "1.0.0.7"
VALUE "FileVersion", "1.0.0.8"
VALUE "InternalName", "EmberAnimate.exe"
VALUE "LegalCopyright", "Copyright (C) Matt Feemster 2017, GPL v3"
VALUE "OriginalFilename", "EmberAnimate.exe"
VALUE "ProductName", "Ember Animate"
VALUE "ProductVersion", "1.0.0.7"
VALUE "ProductVersion", "1.0.0.8"
END
END
BLOCK "VarFileInfo"

Binary file not shown.

View File

@ -49,8 +49,8 @@
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 1, 0, 0, 7
PRODUCTVERSION 1, 0, 0, 7
FILEVERSION 1, 0, 0, 8
PRODUCTVERSION 1, 0, 0, 8
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -67,12 +67,12 @@
BEGIN
VALUE "CompanyName", "Open Source"
VALUE "FileDescription", "Manipulates fractal flames parameter files"
VALUE "FileVersion", "1.0.0.7"
VALUE "FileVersion", "1.0.0.8"
VALUE "InternalName", "EmberGenome.exe"
VALUE "LegalCopyright", "Copyright (C) Matt Feemster 2017, GPL v3"
VALUE "OriginalFilename", "EmberGenome.exe"
VALUE "ProductName", "Ember Genome"
VALUE "ProductVersion", "1.0.0.7"
VALUE "ProductVersion", "1.0.0.8"
END
END
BLOCK "VarFileInfo"

View File

@ -49,8 +49,8 @@
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 1, 0, 0, 7
PRODUCTVERSION 1, 0, 0, 7
FILEVERSION 1, 0, 0, 8
PRODUCTVERSION 1, 0, 0, 8
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -67,12 +67,12 @@
BEGIN
VALUE "CompanyName", "Open Source"
VALUE "FileDescription", "Renders fractal flames as single images"
VALUE "FileVersion", "1.0.0.7"
VALUE "FileVersion", "1.0.0.8"
VALUE "InternalName", "EmberRender.exe"
VALUE "LegalCopyright", "Copyright (C) Matt Feemster 2017, GPL v3"
VALUE "OriginalFilename", "EmberRender.exe"
VALUE "ProductName", "Ember Render"
VALUE "ProductVersion", "1.0.0.7"
VALUE "ProductVersion", "1.0.0.8"
END
END
BLOCK "VarFileInfo"

Binary file not shown.

View File

@ -1,4 +1,4 @@
1.0.0.7 12/22/2017
1.0.0.8 12/22/2017
--User changes
-Support 4k monitors, and in general, properly scale any monitor that is not HD.
-Allow for a spatial filter of radius zero, which means do not use a spatial filter.

View File

@ -407,6 +407,8 @@ uint Timing::m_ProcessorCount;
EXPORTPREPOSTREGVAR(Sphereblur, T) \
EXPORTPREPOSTREGVAR(Cpow3, T) \
EXPORTPREPOSTREGVAR(Concentric, T) \
EXPORTPREPOSTREGVAR(Hyperbolic, T) \
EXPORTPREPOSTREGVAR(Hypershift2, T) \
template EMBER_API class PostSmartcropVariation<T>; /*Only implemented as post.*/ \
EXPORTPREPOSTREGVAR(DCBubble, T) \
EXPORTPREPOSTREGVAR(DCCarpet, T) \

View File

@ -998,17 +998,19 @@ public:
/// <param name="angle">The angle to rotate by</param>
void RotateAffines(T angle)
{
for (size_t i = 0; i < XformCount(); i++)//Only look at normal xforms, exclude final.
size_t i = 0;
while (auto xform = GetTotalXform(i++))//Flam3 only allowed animation with normal xforms. This has been changed to allow animations of final xforms.
{
//Don't rotate xforms with animate set to 0.
if (m_Xforms[i].m_Animate == 0)
if (xform->m_Animate == 0)
continue;
//Assume that if there are no variations, then it's a padding xform.
if (m_Xforms[i].Empty() && m_AffineInterp != eAffineInterp::AFFINE_INTERP_LOG)
if (xform->Empty() && m_AffineInterp != eAffineInterp::AFFINE_INTERP_LOG)
continue;
m_Xforms[i].m_Affine.Rotate(angle * DEG_2_RAD_T);
xform->m_Affine.Rotate(angle * DEG_2_RAD_T);
//Don't rotate post.
}
}

View File

@ -37,7 +37,7 @@ static void sincos(float x, float* s, float* c)
namespace EmberNs
{
#define EMBER_VERSION "1.0.0.7"
#define EMBER_VERSION "1.0.0.8"
#define EPS6 T(1e-6)
#define EPS std::numeric_limits<T>::epsilon()//Apoplugin.h uses -20, but it's more mathematically correct to do it this way.
#define ISAAC_SIZE 4

View File

@ -1093,7 +1093,8 @@ eRenderStatus Renderer<T, bucketT>::GaussianDensityFilter()
}
/// <summary>
/// Thin wrapper around AccumulatorToFinalImage().
/// Produce a final, visible image by clipping, gamma correcting and spatial filtering the color values
/// in the density filtering buffer and save to the passed in buffer.
/// </summary>
/// <param name="pixels">The pixel vector to allocate and store the final image in</param>
/// <param name="finalOffset">Offset in the buffer to store the pixels to</param>
@ -1101,31 +1102,20 @@ eRenderStatus Renderer<T, bucketT>::GaussianDensityFilter()
template <typename T, typename bucketT>
eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset)
{
if (PrepFinalAccumVector(pixels))
return AccumulatorToFinalImage(pixels.data(), finalOffset);
return eRenderStatus::RENDER_ERROR;
}
/// <summary>
/// Produce a final, visible image by clipping, gamma correcting and spatial filtering the color values
/// in the density filtering buffer and save to the passed in buffer.
/// </summary>
/// <param name="pixels">The pre-allocated pixel buffer to store the final image in</param>
/// <param name="finalOffset">Offset in the buffer to store the pixels to. Default: 0.</param>
/// <returns>True if not prematurely aborted, else false.</returns>
template <typename T, typename bucketT>
eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(v4F* pixels, size_t finalOffset)
{
if (!pixels)
return eRenderStatus::RENDER_ERROR;
EnterFinalAccum();
if (!PrepFinalAccumVector(pixels))
{
LeaveFinalAccum();
return eRenderStatus::RENDER_ERROR;
}
//Timing t(4);
size_t filterWidth = m_SpatialFilter->FinalFilterWidth();
bucketT g, linRange, vibrancy;
Color<bucketT> background;
pixels += finalOffset;
auto p = pixels.data();
p += finalOffset;
PrepFinalAccumVals(background, g, linRange, vibrancy);//After this, background has been scaled from 0-1 to 0-255.
//If early clip, go through the entire accumulator and perform gamma correction first.
@ -1165,7 +1155,7 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(v4F* pixels, size_t
size_t pixelsRowStart = (m_YAxisUp ? ((FinalRasH() - j) - 1) : j) * FinalRasW();//Pull out of inner loop for optimization.
size_t y = m_DensityFilterOffset + (j * Supersample());//Start at the beginning row of each super sample block.
size_t clampedFilterH = std::min(filterWidth, m_SuperRasH - y);//Make sure the filter doesn't go past the bottom of the gutter.
auto pv4T = pixels + pixelsRowStart;
auto pv4T = p + pixelsRowStart;
for (size_t i = 0; i < FinalRasW(); i++, pv4T++)
{
@ -1210,11 +1200,11 @@ eRenderStatus Renderer<T, bucketT>::AccumulatorToFinalImage(v4F* pixels, size_t
{
for (i = 0; i < FinalRasW(); i++)
{
auto p = pixels + (i + j * FinalRasW());
p->r = m_TempEmber.m_Palette[i * 256 / FinalRasW()][0];
p->g = m_TempEmber.m_Palette[i * 256 / FinalRasW()][1];
p->b = m_TempEmber.m_Palette[i * 256 / FinalRasW()][2];
p->a = 1;
auto pp = p + (i + j * FinalRasW());
pp->r = m_TempEmber.m_Palette[i * 256 / FinalRasW()][0];
pp->g = m_TempEmber.m_Palette[i * 256 / FinalRasW()][1];
pp->b = m_TempEmber.m_Palette[i * 256 / FinalRasW()][2];
pp->a = 1;
}
}
}

View File

@ -77,7 +77,6 @@ protected:
virtual eRenderStatus LogScaleDensityFilter(bool forceOutput = false);
virtual eRenderStatus GaussianDensityFilter();
virtual eRenderStatus AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset);
virtual eRenderStatus AccumulatorToFinalImage(v4F* pixels, size_t finalOffset);
virtual EmberStats Iterate(size_t iterCount, size_t temporalSample);
virtual void ComputeCurves();

View File

@ -514,6 +514,13 @@ size_t RendererBase::ThreadCount() const { return m_ThreadsToUse; }
/// <returns>eRendererType::CPU_RENDERER</returns>
eRendererType RendererBase::RendererType() const { return eRendererType::CPU_RENDERER; }
/// <summary>
/// Get whether the renderer uses a shared texture with OpenGL.
/// This only applies to the OpenCL renderer (which can be shared or unshared), so it's always false in the base.
/// </summary>
/// <returns>True if shared, else false. Always false in the base.</returns>
bool RendererBase::Shared() const { return false; }
/// <summary>
/// //Non-virtual threading control.
/// </summary>

View File

@ -164,6 +164,7 @@ public:
//Virtual render properties, getters and setters.
virtual size_t ThreadCount() const;
virtual eRendererType RendererType() const;
virtual bool Shared() const;
//Abstract render properties, getters only.
virtual size_t TemporalSamples() const = 0;

View File

@ -197,6 +197,8 @@ enum class eVariationId : et
VAR_HOLE ,
VAR_HORSESHOE ,
VAR_HYPERBOLIC ,
VAR_HYPERCROP ,
VAR_HYPERSHIFT2 ,
VAR_HYPERTILE ,
VAR_HYPERTILE1 ,
VAR_HYPERTILE2 ,
@ -540,6 +542,8 @@ enum class eVariationId : et
VAR_PRE_HOLE,
VAR_PRE_HORSESHOE,
VAR_PRE_HYPERBOLIC,
VAR_PRE_HYPERCROP,
VAR_PRE_HYPERSHIFT2,
VAR_PRE_HYPERTILE,
VAR_PRE_HYPERTILE1,
VAR_PRE_HYPERTILE2,
@ -883,6 +887,8 @@ enum class eVariationId : et
VAR_POST_HOLE,
VAR_POST_HORSESHOE,
VAR_POST_HYPERBOLIC,
VAR_POST_HYPERCROP,
VAR_POST_HYPERSHIFT2,
VAR_POST_HYPERTILE,
VAR_POST_HYPERTILE1,
VAR_POST_HYPERTILE2,

View File

@ -359,6 +359,8 @@ VariationList<T>::VariationList()
ADDPREPOSTREGVAR(Sphereblur)
ADDPREPOSTREGVAR(Cpow3)
ADDPREPOSTREGVAR(Concentric)
ADDPREPOSTREGVAR(Hypercrop)
ADDPREPOSTREGVAR(Hypershift2)
//ADDPREPOSTREGVAR(LinearXZ)
//ADDPREPOSTREGVAR(LinearYZ)
//DC are special.

View File

@ -1487,12 +1487,6 @@ public:
PARVARCOPY(CrobVariation)
/// <summary>
/// Functions the specified helper.
/// </summary>
/// <param name="helper">The helper.</param>
/// <param name="outPoint">The out point.</param>
/// <param name="rand">The rand.</param>
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T gradTmp, secTmp, xTmp = 0, yTmp = 0;

View File

@ -2013,6 +2013,256 @@ private:
T m_Zblur;
};
/// <summary>
/// hypercrop.
/// </summary>
template <typename T>
class HypercropVariation : public ParametricVariation<T>
{
public:
HypercropVariation(T weight = 1.0) : ParametricVariation<T>("hypercrop", eVariationId::VAR_HYPERCROP, weight, false, false, false, false, true)
{
Init();
}
PARVARCOPY(HypercropVariation)
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T fx = helper.In.x;
T fy = helper.In.y;
T fz = helper.In.z;
T a0 = T(M_PI) / m_N;
T len = 1 / Zeps(std::cos(a0));
T d = m_Rad * std::sin(a0) * len;
T angle = Floor<T>(helper.m_PrecalcAtanyx * m_Coeff) / m_Coeff + T(M_PI) / m_N;
T x0 = std::cos(angle) * len;
T y0 = std::sin(angle) * len;
if (std::sqrt(Sqr(helper.In.x - x0) + Sqr(helper.In.y - y0)) < d)
{
if (m_Zero > 1.5)
{
fx = x0;
fy = y0;
fz = 0;
}
else
{
if (m_Zero > 0.5)
{
fx = 0;
fy = 0;
fz = 0;
}
else
{
T rangle = std::atan2(helper.In.y - y0, helper.In.x - x0);
fx = x0 + std::cos(rangle) * d;
fy = y0 + std::sin(rangle) * d;
fz = 0;
}
}
}
helper.Out.x = fx * m_Weight;
helper.Out.y = fy * m_Weight;
helper.Out.z = fz * m_Weight;
}
virtual string OpenCLString() const override
{
ostringstream ss, ss2;
intmax_t i = 0, varIndex = IndexInXform();
ss2 << "_" << XformIndexInEmber() << "]";
string index = ss2.str();
string weight = WeightDefineString();
string n = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string rad = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string zero = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string coeff = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
ss << "\t{\n"
<< "\t\treal_t fx = vIn.x;\n"
<< "\t\treal_t fy = vIn.y;\n"
<< "\t\treal_t fz = vIn.z;\n"
<< "\t\treal_t a0 = M_PI / " << n << ";\n"
<< "\t\treal_t len = 1 / Zeps(cos(a0));\n"
<< "\t\treal_t d = " << rad << " * sin(a0) * len;\n"
<< "\t\treal_t angle = floor(precalcAtanyx * " << coeff << ") / " << coeff << " + M_PI / " << n << ";\n"
<< "\t\treal_t x0 = cos(angle) * len;\n"
<< "\t\treal_t y0 = sin(angle) * len;\n"
<< "\n"
<< "\t\tif (sqrt(Sqr(vIn.x - x0) + Sqr(vIn.y - y0)) < d)\n"
<< "\t\t{\n"
<< "\t\t if (" << zero << " > 1.5)\n"
<< "\t\t {\n"
<< "\t\t fx = x0;\n"
<< "\t\t fy = y0;\n"
<< "\t\t fz = 0;\n"
<< "\t\t }\n"
<< "\t\t else\n"
<< "\t\t {\n"
<< "\t\t if (" << zero << " > 0.5)\n"
<< "\t\t {\n"
<< "\t\t fx = 0;\n"
<< "\t\t fy = 0;\n"
<< "\t\t fz = 0;\n"
<< "\t\t }\n"
<< "\t\t else\n"
<< "\t\t {\n"
<< "\t\t real_t rangle = atan2(vIn.y - y0, vIn.x - x0);\n"
<< "\t\t fx = x0 + cos(rangle) * d;\n"
<< "\t\t fy = y0 + sin(rangle) * d;\n"
<< "\t\t fz = 0;\n"
<< "\t\t }\n"
<< "\t\t }\n"
<< "\t\t}\n"
<< "\n"
<< "\t\tvOut.x = fx * " << weight << ";\n"
<< "\t\tvOut.y = fy * " << weight << ";\n"
<< "\t\tvOut.z = fz * " << weight << ";\n"
<< "\t}\n";
return ss.str();
}
virtual void Precalc() override
{
m_N = Zeps(m_N);
m_Coeff = Zeps<T>(m_N * T(0.5) / T(M_PI));
}
virtual vector<string> OpenCLGlobalFuncNames() const override
{
return vector<string> { "Zeps", "Sqr" };
}
protected:
void Init()
{
string prefix = Prefix();
m_Params.clear();
m_Params.push_back(ParamWithName<T>(&m_N, prefix + "hypercrop_n", 4));
m_Params.push_back(ParamWithName<T>(&m_Rad, prefix + "hypercrop_rad", 1));
m_Params.push_back(ParamWithName<T>(&m_Zero, prefix + "hypercrop_zero"));
m_Params.push_back(ParamWithName<T>(true, &m_Coeff, prefix + "hypercrop_coeff"));//Precalc.
}
private:
T m_N;
T m_Rad;
T m_Zero;
T m_Coeff;//Precalc.
};
/// <summary>
/// hypershift2.
/// </summary>
template <typename T>
class Hypershift2Variation : public ParametricVariation<T>
{
public:
Hypershift2Variation(T weight = 1.0) : ParametricVariation<T>("hypershift2", eVariationId::VAR_HYPERSHIFT2, weight)
{
Init();
}
PARVARCOPY(Hypershift2Variation)
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T fx = helper.In.x * m_Scale2;
T fy = helper.In.y * m_Scale2;
T rad = 1 / Zeps(fx * fx + fy * fy);
T x = rad * fx + m_Shift;
T y = rad * fy;
rad = m_Weight * m_Scale / Zeps(x * x + y * y);
T angle = ((rand.Rand() % int(m_P)) * 2 + 1) * T(M_PI) / m_P;
T X = rad * x + m_Shift;
T Y = rad * y;
T cosa = std::cos(angle);
T sina = std::sin(angle);
if (m_VarType == eVariationType::VARTYPE_REG)
outPoint.m_X = outPoint.m_Y = outPoint.m_Z = 0;//This variation assigns, instead of summing, so order will matter.
helper.Out.x = cosa * X - sina * Y;
helper.Out.y = sina * X + cosa * Y;
helper.Out.z = helper.In.z * rad;
}
virtual string OpenCLString() const override
{
ostringstream ss, ss2;
intmax_t i = 0, varIndex = IndexInXform();
ss2 << "_" << XformIndexInEmber() << "]";
string index = ss2.str();
string weight = WeightDefineString();
string p = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string q = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string shift = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string scale = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string scale2 = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
ss << "\t{\n"
<< "\t\treal_t fx = vIn.x * " << scale2 << ";\n"
<< "\t\treal_t fy = vIn.y * " << scale2 << ";\n"
<< "\t\treal_t rad = 1 / Zeps(fx * fx + fy * fy);\n"
<< "\t\treal_t x = rad * fx + " << shift << ";\n"
<< "\t\treal_t y = rad * fy;\n"
<< "\t\trad = " << weight << " * " << shift << " / Zeps(x * x + y * y);\n"
<< "\t\treal_t angle = ((MwcNext(mwc) % (int)" << p << ") * 2 + 1) * M_PI / " << p << ";\n"
<< "\t\treal_t X = rad * x + " << shift << ";\n"
<< "\t\treal_t Y = rad * y;\n"
<< "\t\treal_t cosa = cos(angle);\n"
<< "\t\treal_t sina = sin(angle);\n";
if (m_VarType == eVariationType::VARTYPE_REG)
ss << "\t\toutPoint->m_X = outPoint->m_Y = outPoint->m_Z = 0;\n";
ss << "\t\tvOut.x = cosa * X - sina * Y;\n"
<< "\t\tvOut.y = sina * X + cosa * Y;\n"
<< "\t\tvOut.z = vIn.z * rad;\n"
<< "\t}\n";
return ss.str();
}
virtual void Precalc() override
{
T pq = T(M_PI) / m_Q;
T pp = T(M_PI) / m_P;
T spq = std::sin(pq);
T spp = std::sin(pp);
m_Shift = std::sin(T(M_PI) * T(0.5) - pq - pp);
m_Shift = m_Shift / std::sqrt(1 - Sqr(spq) - Sqr(spp));
m_Scale2 = 1 / std::sqrt(Sqr(sin(T(M_PI) / 2 + pp)) / Sqr(spq) - 1);
m_Scale2 = m_Scale2 * (std::sin(T(M_PI) / 2 + pp) / spq - 1);
m_Scale = 1 - m_Shift * m_Shift;
}
virtual vector<string> OpenCLGlobalFuncNames() const override
{
return vector<string> { "Zeps" };
}
protected:
void Init()
{
string prefix = Prefix();
m_Params.clear();
m_Params.push_back(ParamWithName<T>(&m_P, prefix + "hypershift2_p", 3, eParamType::INTEGER_NONZERO));
m_Params.push_back(ParamWithName<T>(&m_Q, prefix + "hypershift2_q", 7, eParamType::INTEGER_NONZERO));
m_Params.push_back(ParamWithName<T>(true, &m_Shift, prefix + "hypershift2_shift"));//Precalc.
m_Params.push_back(ParamWithName<T>(true, &m_Scale, prefix + "hypershift2_scale"));
m_Params.push_back(ParamWithName<T>(true, &m_Scale2, prefix + "hypershift2_scale2"));
}
private:
T m_P;
T m_Q;
T m_Shift;//Precalc.
T m_Scale;
T m_Scale2;
};
MAKEPREPOSTPARVAR(Splits3D, splits3D, SPLITS3D)
MAKEPREPOSTPARVAR(Waves2B, waves2b, WAVES2B)
MAKEPREPOSTPARVAR(JacCn, jac_cn, JAC_CN)
@ -2035,4 +2285,6 @@ MAKEPREPOSTPARVAR(Helix, helix, HELIX)
MAKEPREPOSTPARVAR(Sphereblur, sphereblur, SPHEREBLUR)
MAKEPREPOSTPARVAR(Cpow3, cpow3, CPOW3)
MAKEPREPOSTPARVAR(Concentric, concentric, CONCENTRIC)
MAKEPREPOSTPARVAR(Hypercrop, hypercrop, HYPERCROP)
MAKEPREPOSTPARVAR(Hypershift2, hypershift2, HYPERSHIFT2)
}

View File

@ -43,7 +43,7 @@ OpenCLInfo::OpenCLInfo()
if (!platformOk)
{
m_Platforms.push_back(platforms[platform]);
m_PlatformNames.push_back(platforms[platform].getInfo<CL_PLATFORM_VENDOR>(nullptr) + " " + platforms[platform].getInfo<CL_PLATFORM_NAME>(nullptr) + " " + platforms[platform].getInfo<CL_PLATFORM_VERSION>(nullptr));
m_PlatformNames.push_back(platforms[platform].getInfo<CL_PLATFORM_VENDOR>(nullptr).c_str() + " "s + platforms[platform].getInfo<CL_PLATFORM_NAME>(nullptr).c_str() + " "s + platforms[platform].getInfo<CL_PLATFORM_VERSION>(nullptr).c_str());
workingPlatformIndex++;
platformOk = true;
}
@ -58,7 +58,7 @@ OpenCLInfo::OpenCLInfo()
}
m_Devices.back().push_back(devices[platform][device]);
m_DeviceNames.back().push_back(devices[platform][device].getInfo<CL_DEVICE_VENDOR>(nullptr) + " " + devices[platform][device].getInfo<CL_DEVICE_NAME>(nullptr));// + " " + devices[platform][device].getInfo<CL_DEVICE_VERSION>());
m_DeviceNames.back().push_back(devices[platform][device].getInfo<CL_DEVICE_VENDOR>(nullptr).c_str() + " "s + devices[platform][device].getInfo<CL_DEVICE_NAME>(nullptr).c_str());// + " " + devices[platform][device].getInfo<CL_DEVICE_VERSION>().c_str());
m_AllDeviceNames.push_back(m_DeviceNames.back().back());
m_DeviceIndices.push_back(pair<size_t, size_t>(workingPlatformIndex, workingDeviceIndex++));
m_Init = true;//If at least one platform and device succeeded, OpenCL is ok. It's now ok to begin building and running programs.
@ -183,6 +183,29 @@ size_t OpenCLInfo::TotalDeviceIndex(size_t platform, size_t device) const
return index;
}
/// <summary>
/// Get a pointer to a device based on its ID.
/// </summary>
/// <param name="id">The device ID</param>
/// <param name="platform">Stores the platform index of the device if found.</param>
/// <param name="device">Stores the device index of the device if found.</param>
/// <returns>A pointer to the device if found, else nullptr.</returns>
const cl::Device* OpenCLInfo::DeviceFromId(cl_device_id id, size_t& platform, size_t& device) const
{
for (auto& p : m_DeviceIndices)
{
if (m_Devices[p.first][p.second]() == id)
{
platform = p.first;
device = p.second;
return &(m_Devices[p.first][p.second]);
}
}
platform = device = 0;
return nullptr;
}
/// <summary>
/// Create a context that is optionally shared with OpenGL and place it in the
/// passed in context ref parameter.
@ -209,6 +232,7 @@ bool OpenCLInfo::CreateContext(const cl::Platform& platform, cl::Context& contex
context = cl::Context(CL_DEVICE_TYPE_GPU, props, nullptr, nullptr, &err);//May need to tinker with this on Mac.
#else
#if defined WIN32
//::wglMakeCurrent(wglGetCurrentDC(), wglGetCurrentContext());
cl_context_properties props[] =
{
CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
@ -269,7 +293,7 @@ string OpenCLInfo::DumpInfo() const
for (size_t device = 0; device < m_Devices[platform].size(); device++)
{
os << "Device " << device << ": " << DeviceName(platform, device);
os << "\nCL_DEVICE_OPENCL_C_VERSION: " << GetInfo<string>(platform, device, CL_DEVICE_OPENCL_C_VERSION);
os << "\nCL_DEVICE_OPENCL_C_VERSION: " << GetInfo<string>(platform, device, CL_DEVICE_OPENCL_C_VERSION).c_str();
os << "\nCL_DEVICE_LOCAL_MEM_SIZE: " << GetInfo<cl_ulong>(platform, device, CL_DEVICE_LOCAL_MEM_SIZE);
os << "\nCL_DEVICE_LOCAL_MEM_TYPE: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_LOCAL_MEM_TYPE);
os << "\nCL_DEVICE_MAX_COMPUTE_UNITS: " << GetInfo<cl_uint>(platform, device, CL_DEVICE_MAX_COMPUTE_UNITS);

View File

@ -29,6 +29,7 @@ public:
const vector<string>& AllDeviceNames() const;
const vector<string>& DeviceNames(size_t platform) const;
size_t TotalDeviceIndex(size_t platform, size_t device) const;
const cl::Device* DeviceFromId(cl_device_id id, size_t& platform, size_t& device) const;
string DumpInfo() const;
bool Ok() const;
bool CreateContext(const cl::Platform& platform, cl::Context& context, bool shared);

View File

@ -387,7 +387,6 @@ bool OpenCLWrapper::AddAndWriteImage(const string& name, cl_mem_flags flags, con
{
if (shared)
{
//::wglMakeCurrent(wglGetCurrentDC(), wglGetCurrentContext());
cl::ImageGL imageGL(m_Context, flags, GL_TEXTURE_2D, 0, texName, &err);
NamedImage2DGL namedImageGL(imageGL, name);

View File

@ -29,50 +29,11 @@ RendererCL<T, bucketT>::RendererCL(const vector<pair<size_t, size_t>>& devices,
m_DEOpenCLKernelCreator(typeid(T) == typeid(double), false),
m_FinalAccumOpenCLKernelCreator(typeid(T) == typeid(double))
{
Init();
Init(devices, shared, outputTexID);
}
/// <summary>
/// Initialization of fields, no OpenCL initialization is done here.
template <typename T, typename bucketT>
void RendererCL<T, bucketT>::Init()
{
m_Init = false;
m_DoublePrecision = typeid(T) == typeid(double);
//Buffer names.
m_EmberBufferName = "Ember";
m_XformsBufferName = "Xforms";
m_ParVarsBufferName = "ParVars";
m_GlobalSharedBufferName = "GlobalShared";
m_SeedsBufferName = "Seeds";
m_DistBufferName = "Dist";
m_CarToRasBufferName = "CarToRas";
m_DEFilterParamsBufferName = "DEFilterParams";
m_SpatialFilterParamsBufferName = "SpatialFilterParams";
m_DECoefsBufferName = "DECoefs";
m_DEWidthsBufferName = "DEWidths";
m_DECoefIndicesBufferName = "DECoefIndices";
m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs";
m_CurvesCsaName = "CurvesCsa";
m_HostBufferName = "Host";
m_HistBufferName = "Hist";
m_AccumBufferName = "Accum";
m_FinalImageName = "Final";
m_PointsBufferName = "Points";
//It's critical that these numbers never change. They are
//based on the cuburn model of each kernel launch containing
//256 threads. 32 wide by 8 high. Everything done in the OpenCL
//iteraion kernel depends on these dimensions.
m_IterCountPerKernel = 256;
m_IterBlockWidth = 32;
m_IterBlockHeight = 8;
m_IterBlocksWide = 64;
m_IterBlocksHigh = 2;
m_PaletteFormat.image_channel_order = CL_RGBA;
m_PaletteFormat.image_channel_data_type = CL_FLOAT;
m_FinalFormat.image_channel_order = CL_RGBA;
m_FinalFormat.image_channel_data_type = CL_FLOAT;
Init(devices, shared, outputTexID);
}
/// <summary>
@ -97,11 +58,12 @@ bool RendererCL<T, bucketT>::Init(const vector<pair<size_t, size_t>>& devices, b
return false;
bool b = false;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
auto& zeroizeProgram = m_IterOpenCLKernelCreator.ZeroizeKernel();
auto& sumHistProgram = m_IterOpenCLKernelCreator.SumHistKernel();
ostringstream os;
m_Init = false;
m_Shared = false;
m_Devices.clear();
m_Devices.reserve(devices.size());
m_OutputTexID = outputTexID;
@ -115,11 +77,11 @@ bool RendererCL<T, bucketT>::Init(const vector<pair<size_t, size_t>>& devices, b
if ((b = cld->Init()))//Build a simple program to ensure OpenCL is working right.
{
if (b && !(b = cld->m_Wrapper.AddProgram(m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), zeroizeProgram, m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), m_DoublePrecision))) { AddToReport(loc); }
if (b && !(b = cld->m_Wrapper.AddProgram(m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), zeroizeProgram, m_IterOpenCLKernelCreator.ZeroizeEntryPoint(), m_DoublePrecision))) { ErrorStr(loc, "Failed to init zeroize program: "s + cld->ErrorReportString(), cld.get()); }
if (b && !(b = cld->m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, 256, 1, 0, nullptr))) { AddToReport(loc); }
if (b && !(b = cld->m_Wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, 256, 1, 0, nullptr))) { ErrorStr(loc, "Failed to init palette buffer: "s + cld->ErrorReportString(), cld.get()); }
if (b && !(b = cld->m_Wrapper.AddAndWriteBuffer(m_GlobalSharedBufferName, m_GlobalShared.second.data(), m_GlobalShared.second.size() * sizeof(m_GlobalShared.second[0])))) { AddToReport(loc); }//Empty at start, will be filled in later if needed.
if (b && !(b = cld->m_Wrapper.AddAndWriteBuffer(m_GlobalSharedBufferName, m_GlobalShared.second.data(), m_GlobalShared.second.size() * sizeof(m_GlobalShared.second[0])))) { ErrorStr(loc, "Failed to init global shared buffer: "s + cld->ErrorReportString(), cld.get()); }//Empty at start, will be filled in later if needed.
if (b)
{
@ -127,33 +89,35 @@ bool RendererCL<T, bucketT>::Init(const vector<pair<size_t, size_t>>& devices, b
}
else
{
os << loc << ": failed to init platform " << devices[i].first << ", device " << devices[i].second;
AddToReport(loc);
ErrorStr(loc, "Failed to init programs for platform", cld.get());
break;
}
}
else
{
ErrorStr(loc, "Failed to init device, "s + cld->ErrorReportString(), cld.get());
break;
}
}
catch (const std::exception& e)
{
os << loc << ": failed to init platform " << devices[i].first << ", device " << devices[i].second << ": " << e.what();
AddToReport(os.str());
ErrorStr(loc, "Failed to init platform: "s + e.what(), nullptr);
}
catch (...)
{
os << loc << ": failed to init platform " << devices[i].first << ", device " << devices[i].second;
AddToReport(os.str());
ErrorStr(loc, "Failed to init platform with unknown exception", nullptr);
}
}
if (b && m_Devices.size() == devices.size())
if (b && (m_Devices.size() == devices.size()))
{
auto& firstWrapper = m_Devices[0]->m_Wrapper;
m_DEOpenCLKernelCreator = DEOpenCLKernelCreator(m_DoublePrecision, m_Devices[0]->Nvidia());
//Build a simple program to ensure OpenCL is working right.
if (b && !(b = firstWrapper.AddProgram(m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DEOpenCLKernelCreator.LogScaleAssignDEKernel(), m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DoublePrecision))) { AddToReport(loc); }
if (b && !(b = firstWrapper.AddProgram(m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DEOpenCLKernelCreator.LogScaleAssignDEKernel(), m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DoublePrecision))) { ErrorStr(loc, "failed to init log scale program", m_Devices[0].get()); }
if (b && !(b = firstWrapper.AddProgram(m_IterOpenCLKernelCreator.SumHistEntryPoint(), sumHistProgram, m_IterOpenCLKernelCreator.SumHistEntryPoint(), m_DoublePrecision))) { AddToReport(loc); }
if (b && !(b = firstWrapper.AddProgram(m_IterOpenCLKernelCreator.SumHistEntryPoint(), sumHistProgram, m_IterOpenCLKernelCreator.SumHistEntryPoint(), m_DoublePrecision))) { ErrorStr(loc, "Failed to init sum histogram program", m_Devices[0].get()); }
if (b)
{
@ -168,16 +132,15 @@ bool RendererCL<T, bucketT>::Init(const vector<pair<size_t, size_t>>& devices, b
FillSeeds();
for (size_t device = 0; device < m_Devices.size(); device++)
if (b && !(b = m_Devices[device]->m_Wrapper.AddAndWriteBuffer(m_SeedsBufferName, reinterpret_cast<void*>(m_Seeds[device].data()), SizeOf(m_Seeds[device])))) { AddToReport(loc); break; }
if (b && !(b = m_Devices[device]->m_Wrapper.AddAndWriteBuffer(m_SeedsBufferName, reinterpret_cast<void*>(m_Seeds[device].data()), SizeOf(m_Seeds[device])))) { ErrorStr(loc, "Failed to init seeds buffer", m_Devices[device].get()); break; }
}
m_Shared = shared;
m_Init = b;
}
else
{
m_Devices.clear();
os << loc << ": failed to init all devices and platforms.";
AddToReport(os.str());
ErrorStr(loc, "Failed to init all devices and platforms", nullptr);
}
return m_Init;
@ -192,7 +155,7 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::SetOutputTexture(GLuint outputTexID)
{
bool success = true;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty())
{
@ -202,7 +165,7 @@ bool RendererCL<T, bucketT>::SetOutputTexture(GLuint outputTexID)
if (!firstWrapper.AddAndWriteImage(m_FinalImageName, CL_MEM_WRITE_ONLY, m_FinalFormat, FinalRasW(), FinalRasH(), 0, nullptr, firstWrapper.Shared(), m_OutputTexID))
{
AddToReport(loc);
ErrorStr(loc, "Failed to init set output texture", m_Devices[0].get());
success = false;
}
@ -293,10 +256,10 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::ClearHist()
{
bool b = !m_Devices.empty();
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
for (size_t i = 0; i < m_Devices.size(); i++)
if (b && !(b = ClearBuffer(i, m_HistBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4bT)))) { AddToReport(loc); break; }
if (b && !(b = ClearBuffer(i, m_HistBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4bT)))) { ErrorStr(loc, "Failed to clear histogram", m_Devices[i].get()); break; }
return b;
}
@ -310,9 +273,9 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::ClearHist(size_t device)
{
bool b = device < m_Devices.size();
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (b && !(b = ClearBuffer(device, m_HistBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4bT)))) { AddToReport(loc); }
if (b && !(b = ClearBuffer(device, m_HistBufferName, uint(SuperRasW()), uint(SuperRasH()), sizeof(v4bT)))) { ErrorStr(loc, "Failed to clear histogram", m_Devices[device].get()); }
return b;
}
@ -338,10 +301,10 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::WritePoints(size_t device, vector<PointCL<T>>& vec)
{
bool b = false;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (device < m_Devices.size())
if (!(b = m_Devices[device]->m_Wrapper.WriteBuffer(m_PointsBufferName, reinterpret_cast<void*>(vec.data()), SizeOf(vec)))) { AddToReport(loc); }
if (!(b = m_Devices[device]->m_Wrapper.WriteBuffer(m_PointsBufferName, reinterpret_cast<void*>(vec.data()), SizeOf(vec)))) { ErrorStr(loc, "Failed to write points buffer", m_Devices[device].get()); }
return b;
}
@ -423,6 +386,7 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::ClearFinal()
{
vector<v4F> v;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty())
{
@ -431,18 +395,16 @@ bool RendererCL<T, bucketT>::ClearFinal()
if (this->PrepFinalAccumVector(v))
{
bool b = wrapper.WriteImage2D(index, wrapper.Shared(), FinalRasW(), FinalRasH(), 0, v.data());
if (!b)
AddToReport(__FUNCTION__);
return b;
if (!wrapper.WriteImage2D(index, wrapper.Shared(), FinalRasW(), FinalRasH(), 0, v.data()))
ErrorStr(loc, "Failed to clear final buffer", m_Devices[0].get());
else
return false;
}
else
return false;
}
else
return false;
return false;
}
/// <summary>
@ -469,18 +431,6 @@ bool RendererCL<T, bucketT>::Ok() const
return !m_Devices.empty() && m_Init;
}
/// <summary>
/// Clear the error report for this class as well as the OpenCLWrapper members of each device.
/// </summary>
template <typename T, typename bucketT>
void RendererCL<T, bucketT>::ClearErrorReport()
{
EmberReport::ClearErrorReport();
for (auto& device : m_Devices)
device->m_Wrapper.ClearErrorReport();
}
/// <summary>
/// The sub batch size for OpenCL will always be how many
/// iterations are ran per kernel call. The caller can't
@ -514,20 +464,23 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::CreateDEFilter(bool& newAlloc)
{
bool b = true;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty() && Renderer<T, bucketT>::CreateDEFilter(newAlloc))
{
//Copy coefs and widths here. Convert and copy the other filter params right before calling the filtering kernel.
if (newAlloc)
{
const char* loc = __FUNCTION__;
auto& wrapper = m_Devices[0]->m_Wrapper;
if (b && !(b = wrapper.AddAndWriteBuffer(m_DECoefsBufferName, reinterpret_cast<void*>(const_cast<bucketT*>(m_DensityFilter->Coefs())), m_DensityFilter->CoefsSizeBytes()))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DECoefsBufferName, reinterpret_cast<void*>(const_cast<bucketT*>(m_DensityFilter->Coefs())), m_DensityFilter->CoefsSizeBytes())))
ErrorStr(loc, "Failed to set DE coefficients buffer", m_Devices[0].get());
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEWidthsBufferName, reinterpret_cast<void*>(const_cast<bucketT*>(m_DensityFilter->Widths())), m_DensityFilter->WidthsSizeBytes()))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEWidthsBufferName, reinterpret_cast<void*>(const_cast<bucketT*>(m_DensityFilter->Widths())), m_DensityFilter->WidthsSizeBytes())))
ErrorStr(loc, "Failed to set DE widths buffer", m_Devices[0].get());
if (b && !(b = wrapper.AddAndWriteBuffer(m_DECoefIndicesBufferName, reinterpret_cast<void*>(const_cast<uint*>(m_DensityFilter->CoefIndices())), m_DensityFilter->CoefsIndicesSizeBytes()))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DECoefIndicesBufferName, reinterpret_cast<void*>(const_cast<uint*>(m_DensityFilter->CoefIndices())), m_DensityFilter->CoefsIndicesSizeBytes())))
ErrorStr(loc, "Failed to set DE coefficient indices buffer", m_Devices[0].get());
}
}
else
@ -546,11 +499,13 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::CreateSpatialFilter(bool& newAlloc)
{
bool b = true;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty() && Renderer<T, bucketT>::CreateSpatialFilter(newAlloc))
{
if (newAlloc)
if (!(b = m_Devices[0]->m_Wrapper.AddAndWriteBuffer(m_SpatialFilterCoefsBufferName, reinterpret_cast<void*>(m_SpatialFilter->Filter()), m_SpatialFilter->BufferSizeBytes()))) { AddToReport(__FUNCTION__); }
if (!(b = m_Devices[0]->m_Wrapper.AddAndWriteBuffer(m_SpatialFilterCoefsBufferName, reinterpret_cast<void*>(m_SpatialFilter->Filter()), m_SpatialFilter->BufferSizeBytes())))
ErrorStr(loc, "Failed to set patial filter coefficients buffer", m_Devices[0].get());
}
else
b = false;
@ -568,6 +523,25 @@ eRendererType RendererCL<T, bucketT>::RendererType() const
return eRendererType::OPENCL_RENDERER;
}
/// <summary>
/// Get whether the renderer uses a shared texture with OpenGL.
/// </summary>
/// <returns>True if shared, else false.</returns>
template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::Shared() const { return m_Shared; }
/// <summary>
/// Clear the error report for this class as well as the OpenCLWrapper members of each device.
/// </summary>
template <typename T, typename bucketT>
void RendererCL<T, bucketT>::ClearErrorReport()
{
EmberReport::ClearErrorReport();
for (auto& device : m_Devices)
device->m_Wrapper.ClearErrorReport();
}
/// <summary>
/// Concatenate and return the error report for this class and the
/// OpenCLWrapper member of each device as a single string.
@ -579,7 +553,7 @@ string RendererCL<T, bucketT>::ErrorReportString()
auto s = EmberReport::ErrorReportString();
for (auto& device : m_Devices)
s += device->m_Wrapper.ErrorReportString();
s += device->ErrorReportString();
return s;
}
@ -596,7 +570,7 @@ vector<string> RendererCL<T, bucketT>::ErrorReport()
for (auto& device : m_Devices)
{
auto s = device->m_Wrapper.ErrorReport();
auto s = device->ErrorReport();
ours.insert(ours.end(), s.begin(), s.end());
}
@ -615,14 +589,18 @@ template <typename T, typename bucketT>
bool RendererCL<T, bucketT>::RandVec(vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>>& randVec)
{
bool b = Renderer<T, bucketT>::RandVec(randVec);
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty())
{
FillSeeds();
for (size_t device = 0; device < m_Devices.size(); device++)
if (b && !(b = m_Devices[device]->m_Wrapper.AddAndWriteBuffer(m_SeedsBufferName, reinterpret_cast<void*>(m_Seeds[device].data()), SizeOf(m_Seeds[device])))) { AddToReport(loc); break; }
if (b && !(b = m_Devices[device]->m_Wrapper.AddAndWriteBuffer(m_SeedsBufferName, reinterpret_cast<void*>(m_Seeds[device].data()), SizeOf(m_Seeds[device]))))
{
ErrorStr(loc, "Failed to set randoms buffer", m_Devices[device].get());
break;
}
}
else
b = false;
@ -664,32 +642,32 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly)
m_XformsCL.resize(m_Ember.TotalXformCount());
bool b = true;
size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer.
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
auto& wrapper = m_Devices[0]->m_Wrapper;
if (b && !(b = wrapper.AddBuffer(m_DEFilterParamsBufferName, sizeof(m_DensityFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddBuffer(m_DEFilterParamsBufferName, sizeof(m_DensityFilterCL)))) { ErrorStr(loc, "Failed to set DE filter parameters buffer", m_Devices[0].get()); }
if (b && !(b = wrapper.AddBuffer(m_SpatialFilterParamsBufferName, sizeof(m_SpatialFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddBuffer(m_SpatialFilterParamsBufferName, sizeof(m_SpatialFilterCL)))) { ErrorStr(loc, "Failed to set spatial filter parameters buffer", m_Devices[0].get()); }
if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddBuffer(m_CurvesCsaName, SizeOf(m_Csa)))) { ErrorStr(loc, "Failed to set curves buffer", m_Devices[0].get()); }
if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, size))) { AddToReport(loc); }//Accum buffer.
if (b && !(b = wrapper.AddBuffer(m_AccumBufferName, size))) { ErrorStr(loc, "Failed to set accum buffer", m_Devices[0].get()); }
for (auto& device : m_Devices)
{
if (b && !(b = device->m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { AddToReport(loc); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { ErrorStr(loc, "Failed to set ember buffer", device.get()); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_XformsBufferName, SizeOf(m_XformsCL)))) { AddToReport(loc); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_XformsBufferName, SizeOf(m_XformsCL)))) { ErrorStr(loc, "Failed to set xforms buffer", device.get()); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_ParVarsBufferName, 128 * sizeof(T)))) { AddToReport(loc); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_ParVarsBufferName, 128 * sizeof(T)))) { ErrorStr(loc, "Failed to set parametric variations buffer", device.get()); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_DistBufferName, CHOOSE_XFORM_GRAIN))) { AddToReport(loc); break; }//Will be resized for xaos.
if (b && !(b = device->m_Wrapper.AddBuffer(m_DistBufferName, CHOOSE_XFORM_GRAIN))) { ErrorStr(loc, "Failed to set xforms distribution buffer", device.get()); break; }//Will be resized for xaos.
if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { AddToReport(loc); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { ErrorStr(loc, "Failed to set cartesian to raster buffer", device.get()); break; }
if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size))) { AddToReport(loc); break; }//Histogram. Will memset to zero later.
if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size))) { ErrorStr(loc, "Failed to set histogram buffer", device.get()); break; }//Histogram. Will memset to zero later.
if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL<T>)))) { AddToReport(loc); break; }//Points between iter calls.
if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL<T>)))) { ErrorStr(loc, "Failed to set points buffer", device.get()); break; }//Points between iter calls.
//Global shared is allocated once and written when building the kernel.
}
@ -699,7 +677,7 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly)
LeaveResize();
if (b && !(b = SetOutputTexture(m_OutputTexID))) { AddToReport(loc); }
if (b && !(b = SetOutputTexture(m_OutputTexID))) { ErrorStr(loc, "Failed to set output texture", m_Devices[0].get()); }
return b;
}
@ -765,23 +743,28 @@ eRenderStatus RendererCL<T, bucketT>::GaussianDensityFilter()
/// <summary>
/// Run final accumulation on the primary device.
/// If pixels is nullptr, the output will remain in the OpenCL 2D image.
/// However, if pixels is not nullptr, the output will be copied. This is
/// useful when rendering in OpenCL, but saving the output to a file.
/// If the first device is not shared, the output will remain in the OpenCL 2D image and no copying will take place.
/// If it is shared, then the image will be copied into the pixels vector.
/// </summary>
/// <param name="pixels">The pixels to copy the final image to if not nullptr</param>
/// <param name="pixels">The pixel vector to allocate and store the final image in</param>
/// <param name="finalOffset">Offset in the buffer to store the pixels to</param>
/// <returns>True if success and not aborted, else false.</returns>
/// <returns>True if not prematurely aborted, else false.</returns>
template <typename T, typename bucketT>
eRenderStatus RendererCL<T, bucketT>::AccumulatorToFinalImage(v4F* pixels, size_t finalOffset)
eRenderStatus RendererCL<T, bucketT>::AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset)
{
auto status = RunFinalAccum();
if (status == eRenderStatus::RENDER_OK && pixels && !m_Devices.empty() && !m_Devices[0]->m_Wrapper.Shared())
if (status == eRenderStatus::RENDER_OK && !m_Devices.empty() && !m_Devices[0]->m_Wrapper.Shared())
{
pixels += finalOffset;
if (PrepFinalAccumVector(pixels))
{
auto p = pixels.data();
p += finalOffset;
if (!ReadFinal(pixels))
if (!ReadFinal(p))
status = eRenderStatus::RENDER_ERROR;
}
else
status = eRenderStatus::RENDER_ERROR;
}
@ -805,7 +788,7 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
{
bool b = true;
EmberStats stats;//Do not record bad vals with with GPU. If the user needs to investigate bad vals, use the CPU.
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
//Only need to do this once on the beginning of a new render. Last iter will always be 0 at the beginning of a full render or temporal sample.
if (m_LastIter == 0)
@ -825,19 +808,34 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
auto& wrapper = device->m_Wrapper;
if (b && !(b = wrapper.WriteBuffer(m_EmberBufferName, reinterpret_cast<void*>(&m_EmberCL), sizeof(m_EmberCL))))
{
ErrorStr(loc, "Write ember buffer failed", device.get());
break;
}
if (b && !(b = wrapper.WriteBuffer(m_XformsBufferName, reinterpret_cast<void*>(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size())))
{
ErrorStr(loc, "Write xforms buffer failed", device.get());
break;
}
if (b && !(b = wrapper.AddAndWriteBuffer(m_DistBufferName, reinterpret_cast<void*>(const_cast<byte*>(XformDistributions())), XformDistributionsSize())))//Will be resized for xaos.
{
ErrorStr(loc, "Write xforms distribution buffer failed", device.get());
break;
}
if (b && !(b = wrapper.WriteBuffer(m_CarToRasBufferName, reinterpret_cast<void*>(&m_CarToRasCL), sizeof(m_CarToRasCL))))
{
ErrorStr(loc, "Write cartesian to raster buffer failed", device.get());
break;
}
if (b && !(b = wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY, m_PaletteFormat, m_Dmap.m_Entries.size(), 1, 0, m_Dmap.m_Entries.data())))
{
ErrorStr(loc, "Write palette buffer failed", device.get());
break;
}
if (b)
{
@ -846,8 +844,13 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
//Don't know the size of the parametric varations parameters buffer until the ember is examined.
//So set it up right before the run.
if (!m_Params.second.empty())
{
if (!wrapper.AddAndWriteBuffer(m_ParVarsBufferName, m_Params.second.data(), m_Params.second.size() * sizeof(m_Params.second[0])))
{
ErrorStr(loc, "Write parametric variations buffer failed", device.get());
break;
}
}
}
else
break;
@ -873,7 +876,7 @@ EmberStats RendererCL<T, bucketT>::Iterate(size_t iterCount, size_t temporalSamp
else
{
m_Abort = true;
AddToReport(loc);
ErrorStr(loc, "Iiteration failed", nullptr);
}
return stats;
@ -894,7 +897,7 @@ bool RendererCL<T, bucketT>::BuildIterProgramForEmber(bool doAccum)
{
//Timing t;
bool b = !m_Devices.empty();
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
IterOpenCLKernelCreator<T>::ParVarIndexDefines(m_Ember, m_Params, false, true);//Do with string and no vals.
IterOpenCLKernelCreator<T>::SharedDataIndexDefines(m_Ember, m_GlobalShared, true, true);//Do with string and vals only once on build since it won't change until another build occurs.
@ -909,7 +912,7 @@ bool RendererCL<T, bucketT>::BuildIterProgramForEmber(bool doAccum)
{
rlg l(m_ResizeCs);//Just use the resize CS for lack of a better one.
b = false;
AddToReport(string(loc) + "()\n" + dev->m_Wrapper.DeviceName() + ":\nBuilding the following program failed: \n" + m_IterKernel + "\n");
ErrorStr(loc, "Building the following program failed\n"s + m_IterKernel, dev);
}
else if (!m_GlobalShared.second.empty())
{
@ -917,7 +920,7 @@ bool RendererCL<T, bucketT>::BuildIterProgramForEmber(bool doAccum)
{
rlg l(m_ResizeCs);//Just use the resize CS for lack of a better one.
b = false;
AddToReport(string(loc) + "()\n" + dev->m_Wrapper.DeviceName() + ":\nAdding global shared buffer failed.\n");
ErrorStr(loc, "Adding global shared buffer failed", dev);
}
}
};
@ -960,7 +963,7 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
bool success = !m_Devices.empty();
uint histSuperSize = uint(SuperSize());
size_t launches = size_t(ceil(double(iterCount) / IterCountPerGrid()));
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
vector<std::thread> threadVec;
std::atomic<size_t> atomLaunchesRan;
std::atomic<intmax_t> atomItersRan, atomItersRemaining;
@ -1006,31 +1009,31 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
size_t iterCountThisLaunch = iterCountPerKernel * IterGridKernelWidth() * IterGridKernelHeight();
//cout << "itersRemaining " << itersRemaining << ", iterCountPerKernel " << iterCountPerKernel << ", iterCountThisLaunch " << iterCountThisLaunch << "\n";
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, iterCountPerKernel))) { AddToReport(loc); }//Number of iters for each thread to run.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, iterCountPerKernel))) { ErrorStr(loc, "Setting iter count argument failed", m_Devices[dev].get()); }//Number of iters for each thread to run.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, fuse))) { AddToReport(loc); }//Number of iters to fuse.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, fuse))) { ErrorStr(loc, "Setting fuse count argument failed", m_Devices[dev].get()); }//Number of iters to fuse.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_SeedsBufferName))) { AddToReport(loc); }//Seeds.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_SeedsBufferName))) { ErrorStr(loc, "Setting seeds buffer argument failed", m_Devices[dev].get()); }//Seeds.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_EmberBufferName))) { AddToReport(loc); }//Ember.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_EmberBufferName))) { ErrorStr(loc, "Setting ember buffer argument failed", m_Devices[dev].get()); }//Ember.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_XformsBufferName))) { AddToReport(loc); }//Xforms.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_XformsBufferName))) { ErrorStr(loc, "Setting xforms buffer argument failed", m_Devices[dev].get()); }//Xforms.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_ParVarsBufferName))) { AddToReport(loc); }//Parametric variation parameters.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_ParVarsBufferName))) { ErrorStr(loc, "Setting parametric variations buffer argument failed", m_Devices[dev].get()); }//Parametric variation parameters.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_GlobalSharedBufferName))) { AddToReport(loc); }//Global shared data.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_GlobalSharedBufferName))) { ErrorStr(loc, "Setting global shared buffer argument failed", m_Devices[dev].get()); }//Global shared data.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_DistBufferName))) { AddToReport(loc); }//Xform distributions.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_DistBufferName))) { ErrorStr(loc, "Setting xforms distribution buffer argument failed", m_Devices[dev].get()); }//Xform distributions.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_CarToRasBufferName))) { AddToReport(loc); }//Coordinate converter.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_CarToRasBufferName))) { ErrorStr(loc, "Setting cartesian to raster buffer argument failed", m_Devices[dev].get()); }//Coordinate converter.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { AddToReport(loc); }//Histogram.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { ErrorStr(loc, "Setting histogram buffer argument failed", m_Devices[dev].get()); }//Histogram.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, histSuperSize))) { AddToReport(loc); }//Histogram size.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, histSuperSize))) { ErrorStr(loc, "Setting histogram size argument failed", m_Devices[dev].get()); }//Histogram size.
if (b && !(b = wrapper.SetImageArg (kernelIndex, argIndex++, false, "Palette"))) { AddToReport(loc); }//Palette.
if (b && !(b = wrapper.SetImageArg (kernelIndex, argIndex++, false, "Palette"))) { ErrorStr(loc, "Setting palette argument failed", m_Devices[dev].get()); }//Palette.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_PointsBufferName))) { AddToReport(loc); }//Random start points.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_PointsBufferName))) { ErrorStr(loc, "Setting points buffer argument failed", m_Devices[dev].get()); }//Random start points.
if (b && !(b = wrapper.RunKernel(kernelIndex,
IterGridKernelWidth(),//Total grid dims.
@ -1042,7 +1045,7 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
{
success = false;
m_Abort = true;
AddToReport(loc);
ErrorStr(loc, "Error running iteration program", m_Devices[dev].get());
atomLaunchesRan.fetch_sub(1);
break;
}
@ -1107,9 +1110,9 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
if (m_Devices.size() > 1)//Determine whether/when to sum histograms of secondary devices with the primary.
{
if (((TemporalSamples() == 1) || (temporalSample == TemporalSamples() - 1)) &&//If there are no temporal samples (not animating), or the current one is the last...
if (((TemporalSamples() == 1) || (temporalSample == TemporalSamples() - 1)) &&//If there are no temporal samples (not animating), or the current one is the last... (probably doesn't matter anymore since we never use multiple renders for a single frame when animating, instead each frame gets its own renderer).
((m_LastIter + itersRan) >= ItersPerTemporalSample()))//...and the required number of iters for that sample have completed...
if (success && !(success = SumDeviceHist())) { AddToReport(loc); }//...read the histogram from the secondary devices and sum them to the primary.
if (success && !(success = SumDeviceHist())) { ErrorStr(loc, "Summing histograms failed", nullptr); }//...read the histogram from the secondary devices and sum them to the primary.
}
//t2.Toc(__FUNCTION__);
@ -1125,12 +1128,12 @@ eRenderStatus RendererCL<T, bucketT>::RunLogScaleFilter()
{
//Timing t(4);
bool b = !m_Devices.empty();
static std::string loc = __FUNCTION__;
if (b)
{
auto& wrapper = m_Devices[0]->m_Wrapper;
int kernelIndex = wrapper.FindKernelIndex(m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint());
const char* loc = __FUNCTION__;
if (kernelIndex != -1)
{
@ -1142,16 +1145,16 @@ eRenderStatus RendererCL<T, bucketT>::RunLogScaleFilter()
size_t gridH = m_DensityFilterCL.m_SuperRasH;
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, reinterpret_cast<void*>(&m_DensityFilterCL), sizeof(m_DensityFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, reinterpret_cast<void*>(&m_DensityFilterCL), sizeof(m_DensityFilterCL)))) { ErrorStr(loc, "Adding DE filter parameters buffer failed", m_Devices[0].get()); }
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { AddToReport(loc); }//Histogram.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { ErrorStr(loc, "Setting histogram buffer argument failed", m_Devices[0].get()); }//Histogram.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName))) { AddToReport(loc); }//Accumulator.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_AccumBufferName))) { ErrorStr(loc, "Setting accumulator buffer argument failed", m_Devices[0].get()); }//Accumulator.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_DEFilterParamsBufferName))) { AddToReport(loc); }//DensityFilterCL.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_DEFilterParamsBufferName))) { ErrorStr(loc, "Setting DE filter parameters buffer argument failed", m_Devices[0].get()); }//DensityFilterCL.
//t.Tic();
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { AddToReport(loc); }
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running log scale program failed", m_Devices[0].get()); }
//t.Toc(loc);
@ -1162,7 +1165,7 @@ eRenderStatus RendererCL<T, bucketT>::RunLogScaleFilter()
else
{
b = false;
AddToReport(loc);
ErrorStr(loc, "Invalid kernel index for log scale program", m_Devices[0].get());
}
}
@ -1212,7 +1215,7 @@ eRenderStatus RendererCL<T, bucketT>::RunDensityFilter()
uint chunkSizeH = gapH + 1;//Chunk size is also in terms of blocks and is one block (the one running) plus the gap below it.
double totalChunks = chunkSizeW * chunkSizeH;
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, reinterpret_cast<void*>(&m_DensityFilterCL), sizeof(m_DensityFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_DEFilterParamsBufferName, reinterpret_cast<void*>(&m_DensityFilterCL), sizeof(m_DensityFilterCL)))) { ErrorStr(loc, "Writing DE filter parameters buffer failed", m_Devices[0].get()); }
#ifdef ROW_ONLY_DE
blockSizeW = 64;//These *must* both be divisible by 16 or else pixels will go missing.
@ -1256,7 +1259,11 @@ eRenderStatus RendererCL<T, bucketT>::RunDensityFilter()
for (uint colChunkPass = 0; b && !m_Abort && colChunkPass < chunkSizeW; colChunkPass++)//Number of horizontal passes.
{
//t2.Tic();
if (b && !(b = RunDensityFilterPrivate(kernelIndex, gridW, gridH, blockSizeW, blockSizeH, chunkSizeW, chunkSizeH, colChunkPass, rowChunkPass))) { m_Abort = true; AddToReport(loc); }
if (b && !(b = RunDensityFilterPrivate(kernelIndex, gridW, gridH, blockSizeW, blockSizeH, chunkSizeW, chunkSizeH, colChunkPass, rowChunkPass)))
{
m_Abort = true;
ErrorStr(loc, "Running DE filter program for row chunk "s + std::to_string(rowChunkPass) + ", col chunk "s + std::to_string(colChunkPass) + " failed", m_Devices[0].get());
}
//t2.Toc(loc);
@ -1282,7 +1289,7 @@ eRenderStatus RendererCL<T, bucketT>::RunDensityFilter()
else
{
b = false;
AddToReport(loc);
ErrorStr(loc, "Invalid kernel index for DE filter program", m_Devices[0].get());
}
return m_Abort ? eRenderStatus::RENDER_ABORT : (b ? eRenderStatus::RENDER_OK : eRenderStatus::RENDER_ERROR);
@ -1304,7 +1311,7 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
size_t blockW;
size_t blockH;
uint curvesSet = m_CurvesSet ? 1 : 0;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (!m_Abort && accumKernelIndex != -1)
{
@ -1312,9 +1319,9 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
//This is needed with or without early clip.
ConvertSpatialFilter();
if (b && !(b = wrapper.AddAndWriteBuffer(m_SpatialFilterParamsBufferName, reinterpret_cast<void*>(&m_SpatialFilterCL), sizeof(m_SpatialFilterCL)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_SpatialFilterParamsBufferName, reinterpret_cast<void*>(&m_SpatialFilterCL), sizeof(m_SpatialFilterCL)))) { ErrorStr(loc, "Adding spatial filter parameters buffer", m_Devices[0].get()); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_CurvesCsaName, m_Csa.data(), SizeOf(m_Csa)))) { AddToReport(loc); }
if (b && !(b = wrapper.AddAndWriteBuffer(m_CurvesCsaName, m_Csa.data(), SizeOf(m_Csa)))) { ErrorStr(loc, "Adding curves buffer", m_Devices[0].get()); }
//Since early clip requires gamma correcting the entire accumulator first,
//it can't be done inside of the normal final accumulation kernel, so
@ -1332,16 +1339,16 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
gridH = m_SpatialFilterCL.m_SuperRasH;
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (b && !(b = wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_AccumBufferName))) { AddToReport(loc); }//Accumulator.
if (b && !(b = wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_AccumBufferName))) { ErrorStr(loc, "Setting early clip accumulator buffer argument failed", m_Devices[0].get()); }//Accumulator.
if (b && !(b = wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_SpatialFilterParamsBufferName))) { AddToReport(loc); }//SpatialFilterCL.
if (b && !(b = wrapper.SetBufferArg(gammaCorrectKernelIndex, argIndex++, m_SpatialFilterParamsBufferName))) { ErrorStr(loc, "Setting early clip spatial filter parameters buffer argument failed", m_Devices[0].get()); }//SpatialFilterCL.
if (b && !(b = wrapper.RunKernel(gammaCorrectKernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { AddToReport(loc); }
if (b && !(b = wrapper.RunKernel(gammaCorrectKernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running early clip gamma correction program failed", m_Devices[0].get()); }
}
else
{
b = false;
AddToReport(loc);
ErrorStr(loc, "Invalid kernel index for early clip gamma correction program", m_Devices[0].get());
}
}
@ -1352,32 +1359,32 @@ eRenderStatus RendererCL<T, bucketT>::RunFinalAccum()
gridH = m_SpatialFilterCL.m_FinalRasH;
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_AccumBufferName))) { AddToReport(loc); }//Accumulator.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_AccumBufferName))) { ErrorStr(loc, "Setting accumulator buffer argument failed", m_Devices[0].get()); }//Accumulator.
if (b && !(b = wrapper.SetImageArg(accumKernelIndex, argIndex++, wrapper.Shared(), m_FinalImageName))) { AddToReport(loc); }//Final image.
if (b && !(b = wrapper.SetImageArg(accumKernelIndex, argIndex++, wrapper.Shared(), m_FinalImageName))) { ErrorStr(loc, "Setting accumulator final image buffer argument failed", m_Devices[0].get()); }//Final image.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterParamsBufferName))) { AddToReport(loc); }//SpatialFilterCL.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterParamsBufferName))) { ErrorStr(loc, "Setting spatial filter parameters buffer argument failed", m_Devices[0].get()); }//SpatialFilterCL.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterCoefsBufferName))) { AddToReport(loc); }//Filter coefs.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_SpatialFilterCoefsBufferName))) { ErrorStr(loc, "Setting spatial filter coefficients buffer argument failed", m_Devices[0].get()); }//Filter coefs.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_CurvesCsaName))) { AddToReport(loc); }//Curve points.
if (b && !(b = wrapper.SetBufferArg(accumKernelIndex, argIndex++, m_CurvesCsaName))) { ErrorStr(loc, "Setting curves buffer argument failed", m_Devices[0].get()); }//Curve points.
if (b && !(b = wrapper.SetArg (accumKernelIndex, argIndex++, curvesSet))) { AddToReport(loc); }//Do curves.
if (b && !(b = wrapper.SetArg (accumKernelIndex, argIndex++, curvesSet))) { ErrorStr(loc, "Setting curves boolean argument failed", m_Devices[0].get()); }//Do curves.
if (b && wrapper.Shared())
if (b && !(b = wrapper.EnqueueAcquireGLObjects(m_FinalImageName))) { AddToReport(loc); }
if (b && !(b = wrapper.EnqueueAcquireGLObjects(m_FinalImageName))) { ErrorStr(loc, "Acquiring OpenGL texture failed", m_Devices[0].get()); }
if (b && !(b = wrapper.RunKernel(accumKernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { AddToReport(loc); }
if (b && !(b = wrapper.RunKernel(accumKernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running final accumulation program failed", m_Devices[0].get()); }
if (b && wrapper.Shared())
if (b && !(b = wrapper.EnqueueReleaseGLObjects(m_FinalImageName))) { AddToReport(loc); }
if (b && !(b = wrapper.EnqueueReleaseGLObjects(m_FinalImageName))) { ErrorStr(loc, "Releasing OpenGL texture failed", m_Devices[0].get()); }
//t.Toc((char*)loc);
}
else
{
b = false;
AddToReport(loc);
ErrorStr(loc, "Invalid kernel index for final accumulation program", m_Devices[0].get());
}
return b ? eRenderStatus::RENDER_OK : eRenderStatus::RENDER_ERROR;
@ -1402,7 +1409,7 @@ bool RendererCL<T, bucketT>::ClearBuffer(size_t device, const string& bufferName
auto& wrapper = m_Devices[device]->m_Wrapper;
int kernelIndex = wrapper.FindKernelIndex(m_IterOpenCLKernelCreator.ZeroizeEntryPoint());
cl_uint argIndex = 0;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (kernelIndex != -1)
{
@ -1413,17 +1420,17 @@ bool RendererCL<T, bucketT>::ClearBuffer(size_t device, const string& bufferName
b = true;
OpenCLWrapper::MakeEvenGridDims(blockW, blockH, gridW, gridH);
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, bufferName))) { AddToReport(loc); }//Buffer of byte.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, bufferName))) { ErrorStr(loc, "Setting clear buffer argument failed", m_Devices[device].get()); }//Buffer of byte.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex++, width * elementSize))) { AddToReport(loc); }//Width.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex++, width * elementSize))) { ErrorStr(loc, "Setting clear buffer width argument failed", m_Devices[device].get()); }//Width.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex++, height))) { AddToReport(loc); }//Height.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex++, height))) { ErrorStr(loc, "Setting clear buffer height argument failed", m_Devices[device].get()); }//Height.
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { AddToReport(loc); }
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running clear buffer program failed", m_Devices[device].get()); }
}
else
{
AddToReport(loc);
ErrorStr(loc, "Invalid kernel index for clear buffer program", m_Devices[device].get());
}
}
@ -1450,36 +1457,36 @@ bool RendererCL<T, bucketT>::RunDensityFilterPrivate(size_t kernelIndex, size_t
//Timing t(4);
bool b = true;
cl_uint argIndex = 0;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (!m_Devices.empty())
{
auto& wrapper = m_Devices[0]->m_Wrapper;
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_HistBufferName))) { AddToReport(loc); } argIndex++;//Histogram.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_HistBufferName))) { ErrorStr(loc, "Setting histogram buffer argument failed", m_Devices[0].get()); } argIndex++;//Histogram.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_AccumBufferName))) { AddToReport(loc); } argIndex++;//Accumulator.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_AccumBufferName))) { ErrorStr(loc, "Setting accumulator buffer argument failed", m_Devices[0].get()); } argIndex++;//Accumulator.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DEFilterParamsBufferName))) { AddToReport(loc); } argIndex++;//FlameDensityFilterCL.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DEFilterParamsBufferName))) { ErrorStr(loc, "Setting DE filter parameters buffer argument failed", m_Devices[0].get()); } argIndex++;//FlameDensityFilterCL.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefsBufferName))) { AddToReport(loc); } argIndex++;//Coefs.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefsBufferName))) { ErrorStr(loc, "Setting DE coefficients buffer argument failed", m_Devices[0].get()); } argIndex++;//Coefs.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DEWidthsBufferName))) { AddToReport(loc); } argIndex++;//Widths.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DEWidthsBufferName))) { ErrorStr(loc, "Setting DE widths buffer argument failed", m_Devices[0].get()); } argIndex++;//Widths.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefIndicesBufferName))) { AddToReport(loc); } argIndex++;//Coef indices.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex, m_DECoefIndicesBufferName))) { ErrorStr(loc, "Setting DE coefficient indices buffer argument failed", m_Devices[0].get()); } argIndex++;//Coef indices.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, chunkSizeW))) { AddToReport(loc); } argIndex++;//Chunk size width (gapW + 1).
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, chunkSizeW))) { ErrorStr(loc, "Setting chunk size width argument failed", m_Devices[0].get()); } argIndex++;//Chunk size width (gapW + 1).
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, chunkSizeH))) { AddToReport(loc); } argIndex++;//Chunk size height (gapH + 1).
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, chunkSizeH))) { ErrorStr(loc, "Setting chunk size height argument failed", m_Devices[0].get()); } argIndex++;//Chunk size height (gapH + 1).
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, colChunkPass))) { AddToReport(loc); } argIndex++;//Column chunk, horizontal pass.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, colChunkPass))) { ErrorStr(loc, "Setting col chunk pass argument failed", m_Devices[0].get()); } argIndex++;//Column chunk, horizontal pass.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, rowChunkPass))) { AddToReport(loc); } argIndex++;//Row chunk, vertical pass.
if (b && !(b = wrapper.SetArg(kernelIndex, argIndex, rowChunkPass))) { ErrorStr(loc, "Setting row chunk pass argument failed", m_Devices[0].get()); } argIndex++;//Row chunk, vertical pass.
//t.Toc(__FUNCTION__ " set args");
//t.Tic();
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { AddToReport(loc); }//Method 7, accumulating to temp box area.
if (b && !(b = wrapper.RunKernel(kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running DE filter program failed", m_Devices[0].get()); }//Method 7, accumulating to temp box area.
//t.Toc(__FUNCTION__ " RunKernel()");
return b;
@ -1512,7 +1519,7 @@ int RendererCL<T, bucketT>::MakeAndGetDensityFilterProgram(size_t ss, uint filte
if (wrapper.AddProgram(deEntryPoint, kernel, deEntryPoint, m_DoublePrecision))
kernelIndex = wrapper.FindKernelIndex(deEntryPoint);//Try to find it again, it will be present if successfully built.
else
AddToReport(string(loc) + "():\nBuilding the following program failed: \n" + kernel + "\n");
ErrorStr(loc, "Adding the DE filter program at "s + deEntryPoint + " failed to build:\n"s + kernel, m_Devices[0].get());
}
}
@ -1541,7 +1548,7 @@ int RendererCL<T, bucketT>::MakeAndGetFinalAccumProgram()
if (wrapper.AddProgram(finalAccumEntryPoint, kernel, finalAccumEntryPoint, m_DoublePrecision))
kernelIndex = wrapper.FindKernelIndex(finalAccumEntryPoint);//Try to find it again, it will be present if successfully built.
else
AddToReport(loc);
ErrorStr(loc, "Adding final accumulation program "s + finalAccumEntryPoint + " failed"s, m_Devices[0].get());
}
}
@ -1560,7 +1567,7 @@ int RendererCL<T, bucketT>::MakeAndGetGammaCorrectionProgram()
auto& wrapper = m_Devices[0]->m_Wrapper;
auto& gammaEntryPoint = m_FinalAccumOpenCLKernelCreator.GammaCorrectionEntryPoint();
int kernelIndex = wrapper.FindKernelIndex(gammaEntryPoint);
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (kernelIndex == -1)//Has not been built yet.
{
@ -1570,7 +1577,7 @@ int RendererCL<T, bucketT>::MakeAndGetGammaCorrectionProgram()
if (b)
kernelIndex = wrapper.FindKernelIndex(gammaEntryPoint);//Try to find it again, it will be present if successfully built.
else
AddToReport(loc);
ErrorStr(loc, "Adding gamma correction program "s + gammaEntryPoint + " failed"s, m_Devices[0].get());
}
return kernelIndex;
@ -1589,15 +1596,15 @@ bool RendererCL<T, bucketT>::CreateHostBuffer()
{
bool b = true;
size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer.
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
if (b = Renderer<T, bucketT>::Alloc(true))//Allocate the histogram memory to point this HOST_PTR buffer to, other buffers not needed.
{
if (b && !(b = m_Devices[0]->m_Wrapper.AddHostBuffer(m_HostBufferName, size, reinterpret_cast<void*>(HistBuckets()))))
AddToReport(string(loc) + ": creating OpenCL HOST_PTR buffer to point to host side histogram failed.");//Host side histogram for temporary use with multiple devices.
if (b && !(b = m_Devices[0]->m_Wrapper.AddHostBuffer(m_HostBufferName, size, reinterpret_cast<void*>(HistBuckets()))))//Host side histogram for temporary use with multiple devices.
ErrorStr(loc, "Creating OpenCL HOST_PTR buffer to point to host side histogram failed", m_Devices[0].get());
}
else
AddToReport(string(loc) + ": allocating host side histogram failed.");//Allocating histogram failed, something is seriously wrong.
ErrorStr(loc, "Allocating host side histogram failed", m_Devices[0].get());//Allocating histogram failed, something is seriously wrong.
return b;
}
@ -1617,7 +1624,7 @@ bool RendererCL<T, bucketT>::SumDeviceHist()
//Timing t;
bool b = true;
auto& wrapper = m_Devices[0]->m_Wrapper;
const char* loc = __FUNCTION__;
static std::string loc = __FUNCTION__;
size_t blockW = m_Devices[0]->Nvidia() ? 32 : 16;//Max work group size is 256 on AMD, which means 16x16.
size_t blockH = m_Devices[0]->Nvidia() ? 32 : 16;
size_t gridW = SuperRasW();
@ -1635,20 +1642,21 @@ bool RendererCL<T, bucketT>::SumDeviceHist()
{
cl_uint argIndex = 0;
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HostBufferName))) { break; }//Source buffer of v4bT.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HostBufferName))) { ErrorStr(loc, "Setting host buffer argument failed", m_Devices[device].get()); break; }//Source buffer of v4bT.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { break; }//Dest buffer of v4bT.
if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { ErrorStr(loc, "Setting histogram buffer argument failed", m_Devices[device].get()); break; }//Dest buffer of v4bT.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasW())))) { break; }//Width in pixels.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasW())))) { ErrorStr(loc, "Setting width argument failed", m_Devices[device].get()); break; }//Width in pixels.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasH())))) { break; }//Height in pixels.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, uint(SuperRasH())))) { ErrorStr(loc, "Setting height argument failed", m_Devices[device].get()); break; }//Height in pixels.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { break; }//Clear the source buffer on the last device.
if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, (device == m_Devices.size() - 1) ? 1 : 0))) { ErrorStr(loc, "Setting clear argument failed", m_Devices[device].get()); break; }//Clear the source buffer on the last device.
if (b && !(b = wrapper.RunKernel (kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { break; }
if (b && !(b = wrapper.RunKernel (kernelIndex, gridW, gridH, 1, blockW, blockH, 1))) { ErrorStr(loc, "Running histogram sum program failed", m_Devices[device].get()); break; }
}
else
{
ErrorStr(loc, "Running histogram reading and clearing programs failed", m_Devices[device].get());
break;
}
}
@ -1656,9 +1664,7 @@ bool RendererCL<T, bucketT>::SumDeviceHist()
if (!b)
{
ostringstream os;
os << loc << ": failed to sum histograms from the secondary device(s) to the primary device.";
AddToReport(os.str());
ErrorStr(loc, "Summing histograms from the secondary device(s) to the primary device failed", nullptr);
}
//t.Toc(loc);
@ -1825,6 +1831,25 @@ void RendererCL<T, bucketT>::FillSeeds()
}
}
/// <summary>
/// Compose an error string based on the strings and device passed in, add it to the error report and return the string.
/// </summary>
/// <param name="loc">The location where the error occurred</param>
/// <param name="error">The text of the error</param>
/// <param name="dev">The device the error occurred on</param>
/// <returns>The new error string</returns>
template <typename T, typename bucketT>
std::string RendererCL<T, bucketT>::ErrorStr(const std::string& loc, const std::string& error, RendererClDevice* dev)
{
std::string str = loc + "()"s + (dev ?
"\n"s +
dev->m_Wrapper.DeviceName() + "\nPlatform: " +
std::to_string(dev->PlatformIndex()) + ", device: " + std::to_string(dev->DeviceIndex()) : "") + ", error:\n" +
error + "\n";
AddToReport(str);
return str;
}
template EMBERCL_API class RendererCL<float, float>;
#ifdef DO_DOUBLE

View File

@ -146,12 +146,13 @@ public:
//Public virtual functions overridden from Renderer or RendererBase.
virtual size_t MemoryAvailable() override;
virtual bool Ok() const override;
virtual void ClearErrorReport() override;
virtual size_t SubBatchSize() const override;
virtual size_t ThreadCount() const override;
virtual bool CreateDEFilter(bool& newAlloc) override;
virtual bool CreateSpatialFilter(bool& newAlloc) override;
virtual eRendererType RendererType() const override;
virtual bool Shared() const override;
virtual void ClearErrorReport() override;
virtual string ErrorReportString() override;
virtual vector<string> ErrorReport() override;
virtual bool RandVec(vector<QTIsaac<ISAAC_SIZE, ISAAC_INT>>& randVec) override;
@ -165,13 +166,12 @@ protected:
virtual bool ResetBuckets(bool resetHist = true, bool resetAccum = true) override;
virtual eRenderStatus LogScaleDensityFilter(bool forceOutput = false) override;
virtual eRenderStatus GaussianDensityFilter() override;
virtual eRenderStatus AccumulatorToFinalImage(v4F* pixels, size_t finalOffset) override;
virtual eRenderStatus AccumulatorToFinalImage(vector<v4F>& pixels, size_t finalOffset) override;
virtual EmberStats Iterate(size_t iterCount, size_t temporalSample) override;
#ifndef TEST_CL
private:
#endif
void Init();
//Private functions for making and running OpenCL programs.
bool BuildIterProgramForEmber(bool doAccum = true);
bool RunIter(size_t iterCount, size_t temporalSample, size_t& itersRan);
@ -192,35 +192,40 @@ private:
void ConvertSpatialFilter();
void ConvertEmber(Ember<T>& ember, EmberCL<T>& emberCL, vector<XformCL<T>>& xformsCL);
void ConvertCarToRas(const CarToRas<T>& carToRas);
bool m_Init;
bool m_DoublePrecision;
size_t m_IterCountPerKernel;
size_t m_IterBlocksWide, m_IterBlockWidth;
size_t m_IterBlocksHigh, m_IterBlockHeight;
std::string ErrorStr(const std::string& loc, const std::string& error, RendererClDevice* dev);
bool m_Init = false;
bool m_Shared = false;
bool m_DoublePrecision = typeid(T) == typeid(double);
//It's critical that these numbers never change. They are
//based on the cuburn model of each kernel launch containing
//256 threads. 32 wide by 8 high. Everything done in the OpenCL
//iteraion kernel depends on these dimensions.
size_t m_IterCountPerKernel = 256;
size_t m_IterBlocksWide = 64, m_IterBlockWidth = 32;
size_t m_IterBlocksHigh = 2, m_IterBlockHeight = 8;
size_t m_MaxDEBlockSizeW;
size_t m_MaxDEBlockSizeH;
//Buffer names.
string m_EmberBufferName;
string m_XformsBufferName;
string m_ParVarsBufferName;
string m_GlobalSharedBufferName;
string m_SeedsBufferName;
string m_DistBufferName;
string m_CarToRasBufferName;
string m_DEFilterParamsBufferName;
string m_SpatialFilterParamsBufferName;
string m_CurvesCsaName;
string m_DECoefsBufferName;
string m_DEWidthsBufferName;
string m_DECoefIndicesBufferName;
string m_SpatialFilterCoefsBufferName;
string m_HostBufferName;
string m_HistBufferName;
string m_AccumBufferName;
string m_FinalImageName;
string m_PointsBufferName;
string m_EmberBufferName = "Ember";
string m_XformsBufferName = "Xforms";
string m_ParVarsBufferName = "ParVars";
string m_GlobalSharedBufferName = "GlobalShared";
string m_SeedsBufferName = "Seeds";
string m_DistBufferName = "Dist";
string m_CarToRasBufferName = "CarToRas";
string m_DEFilterParamsBufferName = "DEFilterParams";
string m_SpatialFilterParamsBufferName = "SpatialFilterParams";
string m_DECoefsBufferName = "DECoefs";
string m_DEWidthsBufferName = "DEWidths";
string m_DECoefIndicesBufferName = "DECoefIndices";
string m_SpatialFilterCoefsBufferName = "SpatialFilterCoefs";
string m_CurvesCsaName = "CurvesCsa";
string m_HostBufferName = "Host";
string m_HistBufferName = "Hist";
string m_AccumBufferName = "Accum";
string m_FinalImageName = "Final";
string m_PointsBufferName = "Points";
//Kernels.
string m_IterKernel;

View File

@ -55,4 +55,39 @@ bool RendererClDevice::Ok() const { return m_Init; }
bool RendererClDevice::Shared() const { return m_Shared; }
bool RendererClDevice::Nvidia() const { return m_NVidia; }
size_t RendererClDevice::WarpSize() const { return m_WarpSize; }
size_t RendererClDevice::PlatformIndex() const { return m_PlatformIndex; }
size_t RendererClDevice::DeviceIndex() const { return m_DeviceIndex; }
/// <summary>
/// Clear the error report for this class as well as the wrapper.
/// </summary>
void RendererClDevice::ClearErrorReport()
{
EmberReport::ClearErrorReport();
m_Wrapper.ClearErrorReport();
}
/// <summary>
/// Concatenate and return the error report for this class and the
/// wrapper as a single string.
/// </summary>
/// <returns>The concatenated error report string</returns>
string RendererClDevice::ErrorReportString()
{
auto s = EmberReport::ErrorReportString();
return s + m_Wrapper.ErrorReportString();
}
/// <summary>
/// Concatenate and return the error report for this class and the
/// wrapper as a vector of strings.
/// </summary>
/// <returns>The concatenated error report vector of strings</returns>
vector<string> RendererClDevice::ErrorReport()
{
auto ours = EmberReport::ErrorReport();
auto s = m_Wrapper.ErrorReport();
ours.insert(ours.end(), s.begin(), s.end());
return ours;
}
}

View File

@ -24,6 +24,13 @@ public:
bool Shared() const;
bool Nvidia() const;
size_t WarpSize() const;
size_t PlatformIndex() const;
size_t DeviceIndex() const;
//Public virtual functions overridden from base classes.
virtual void ClearErrorReport() override;
virtual string ErrorReportString() override;
virtual vector<string> ErrorReport() override;
size_t m_Calls;
OpenCLWrapper m_Wrapper;

View File

@ -58,7 +58,7 @@
<enum>QFrame::NoFrame</enum>
</property>
<property name="text">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p align=&quot;center&quot;&gt;Fractorium 1.0.0.7&lt;/p&gt;&lt;p align=&quot;center&quot;&gt;&lt;span style=&quot; font-size:10pt;&quot;&gt;A Qt-based fractal flame editor which uses a C++ re-write of the flam3 algorithm named Ember and a GPU capable version named EmberCL which implements a portion of the cuburn algorithm in OpenCL.&lt;/span&gt;&lt;/p&gt;&lt;p align=&quot;center&quot;&gt;&lt;a href=&quot;http://fractorium.com&quot;&gt;&lt;span style=&quot; text-decoration: underline; color:#0000ff;&quot;&gt;fractorium.com&lt;/span&gt;&lt;/a&gt;&lt;span style=&quot; font-size:10pt;&quot;&gt;&lt;br/&gt;Lead: Matt Feemster&lt;br/&gt;Contributors: Simon Detheridge, Michel Mastriani&lt;/span&gt;&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p align=&quot;center&quot;&gt;Fractorium 1.0.0.8&lt;/p&gt;&lt;p align=&quot;center&quot;&gt;&lt;span style=&quot; font-size:10pt;&quot;&gt;A Qt-based fractal flame editor which uses a C++ re-write of the flam3 algorithm named Ember and a GPU capable version named EmberCL which implements a portion of the cuburn algorithm in OpenCL.&lt;/span&gt;&lt;/p&gt;&lt;p align=&quot;center&quot;&gt;&lt;a href=&quot;http://fractorium.com&quot;&gt;&lt;span style=&quot; text-decoration: underline; color:#0000ff;&quot;&gt;fractorium.com&lt;/span&gt;&lt;/a&gt;&lt;span style=&quot; font-size:10pt;&quot;&gt;&lt;br/&gt;Lead: Matt Feemster&lt;br/&gt;Contributors: Simon Detheridge, Michel Mastriani&lt;/span&gt;&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="textFormat">
<enum>Qt::RichText</enum>

View File

@ -439,7 +439,6 @@ bool FinalRenderEmberController<T>::CreateRenderer(eRendererType renderType, con
vector<string> errorReport;
m_Devices = devices;//Store values for re-creation later on.
m_OutputTexID = 0;//Don't care about tex ID when doing final render.
m_Shared = shared;//So shared is of course false.
if (m_FinalRenderDialog->DoSequence())
{

View File

@ -213,6 +213,11 @@ Fractorium::Fractorium(QWidget* p)
//this constructor exits, GLWidget::InitGL() will create the initial flock and start the rendering timer
//which executes whenever the program is idle. Upon starting the timer, the renderer
//will be initialized.
//auto cdc = wglGetCurrentDC();
//auto cc = wglGetCurrentContext();
//qDebug() << "Fractorium::Fractorium():";
//qDebug() << "Current DC: " << cdc;
//qDebug() << "Current Context: " << cc;
QTimer::singleShot(1000, [&]() { ui.GLDisplay->InitGL(); });
}

View File

@ -268,7 +268,6 @@ protected:
//Non-templated members.
bool m_Rendering = false;
bool m_Shared = true;
bool m_LastEditWasUndoRedo;
vector<pair<size_t, size_t>> m_Devices;
size_t m_SubBatchCount = 1;//Will be ovewritten by the options on first render.

View File

@ -476,8 +476,7 @@ bool FractoriumEmberController<T>::Render()
//Update it on finish because the rendering process is completely done.
if (update || ProcessState() == eProcessState::ACCUM_DONE)
{
if (m_FinalImage.size() == m_Renderer->FinalDimensions())//Make absolutely sure the correct amount of data is passed.
gl->update();
gl->update();//Queue update.
if (ProcessState() == eProcessState::ACCUM_DONE)
SaveCurrentToOpenedFile();//Will not save if the previews are still rendering.
@ -543,26 +542,34 @@ bool FractoriumEmberController<T>::CreateRenderer(eRendererType renderType, cons
auto s = m_Fractorium->m_Settings;
auto gl = m_Fractorium->ui.GLDisplay;
if (!m_Renderer.get() || (m_Renderer->RendererType() != renderType) || !Equal(m_Devices, devices))
if (!m_Renderer.get() || (m_Renderer->RendererType() != renderType) || !Equal(m_Devices, devices) || m_Renderer->Shared() != shared)
{
EmberReport emberReport;
vector<string> errorReport;
DeleteRenderer();//Delete the renderer and refresh the textures.
//Before starting, must take care of allocations.
gl->Allocate(true);//Forcing a realloc of the texture is necessary on AMD, but not on nVidia.
m_Renderer = unique_ptr<EmberNs::RendererBase>(::CreateRenderer<T>(renderType, devices, shared, gl->OutputTexID(), emberReport));//Always make bucket type float.
errorReport = emberReport.ErrorReport();
if (errorReport.empty())
//Before starting, must take care of allocations.
if (gl->Allocate(true))//Forcing a realloc of the texture is necessary on AMD, but not on nVidia.
{
m_Devices = devices;
m_OutputTexID = gl->OutputTexID();
m_Shared = shared;
m_Renderer = unique_ptr<EmberNs::RendererBase>(::CreateRenderer<T>(renderType, devices, shared, gl->OutputTexID(), emberReport));//Always make bucket type float.
errorReport = emberReport.ErrorReport();
if (errorReport.empty())
{
m_Devices = devices;
m_OutputTexID = gl->OutputTexID();
}
else
{
ok = false;
m_Fractorium->ShowCritical("Renderer Creation Error", "Could not create requested renderer, fallback CPU renderer created. See info tab for details.");
m_Fractorium->ErrorReportToQTextEdit(errorReport, m_Fractorium->ui.InfoRenderingTextEdit);
}
}
else
{
ok = false;
m_Fractorium->ShowCritical("Renderer Creation Error", "Could not create requested renderer, fallback CPU renderer created. See info tab for details.");
m_Fractorium->ShowCritical("Renderer Creation Error", "Could not create OpenGL texture, interactive rendering will be disabled.");
m_Fractorium->ErrorReportToQTextEdit(errorReport, m_Fractorium->ui.InfoRenderingTextEdit);
}
}
@ -662,14 +669,16 @@ bool Fractorium::CreateRendererFromOptions(bool updatePreviews)
auto v = Devices(m_Settings->Devices());
//The most important option to process is what kind of renderer is desired, so do it first.
if (!m_Controller->CreateRenderer((useOpenCL && !v.empty()) ? eRendererType::OPENCL_RENDERER : eRendererType::CPU_RENDERER, v, updatePreviews))
if (!m_Controller->CreateRenderer((useOpenCL && !v.empty()) ? eRendererType::OPENCL_RENDERER : eRendererType::CPU_RENDERER, v, updatePreviews, useOpenCL && m_Settings->SharedTexture()))
{
//If using OpenCL, will only get here if creating RendererCL failed, but creating a backup CPU Renderer succeeded.
ShowCritical("Renderer Creation Error", "Error creating renderer, most likely a GPU problem. Using CPU instead.");
m_Settings->OpenCL(false);
m_Settings->SharedTexture(false);
ui.ActionCpu->setChecked(true);
ui.ActionCL->setChecked(false);
m_OptionsDialog->ui.OpenCLCheckBox->setChecked(false);
m_OptionsDialog->ui.SharedTextureCheckBox->setChecked(false);
m_FinalRenderDialog->ui.FinalRenderOpenCLCheckBox->setChecked(false);
ok = false;
}

View File

@ -98,6 +98,9 @@ void FractoriumSettings::EnsureDefaults()
if (OpenPaletteImageFolder() == "")
OpenPaletteImageFolder(QCoreApplication::applicationDirPath());
if (value(SHAREDTEXTURE).toString() == "")//Set this to true if the setting is missing because it only needs to be false for the rare system that has problems with shared textures.
SharedTexture(true);
}
/// <summary>
@ -116,6 +119,9 @@ void FractoriumSettings::Transparency(bool b) { setValue(TRANSPARENCY, b);
bool FractoriumSettings::OpenCL() { return value(OPENCL).toBool(); }
void FractoriumSettings::OpenCL(bool b) { setValue(OPENCL, b); }
bool FractoriumSettings::SharedTexture() { return value(SHAREDTEXTURE).toBool(); }
void FractoriumSettings::SharedTexture(bool b) { setValue(SHAREDTEXTURE, b); }
bool FractoriumSettings::Double() { return value(DOUBLEPRECISION).toBool(); }
void FractoriumSettings::Double(bool b) { setValue(DOUBLEPRECISION, b); }
@ -158,14 +164,14 @@ void FractoriumSettings::OpenCLSubBatch(uint i) { setValue(OPENCLSUBBATCH,
uint FractoriumSettings::RandomCount() { return value(RANDOMCOUNT).toUInt(); }
void FractoriumSettings::RandomCount(uint i) { setValue(RANDOMCOUNT, i); }
uint FractoriumSettings::CpuQuality() { return value(CPU_QUALITY).toUInt(); }
void FractoriumSettings::CpuQuality(uint i) { setValue(CPU_QUALITY, i); }
uint FractoriumSettings::CpuQuality() { return value(CPUQUALITY).toUInt(); }
void FractoriumSettings::CpuQuality(uint i) { setValue(CPUQUALITY, i); }
uint FractoriumSettings::OpenClQuality() { return value(OPENCL_QUALITY).toUInt(); }
void FractoriumSettings::OpenClQuality(uint i) { setValue(OPENCL_QUALITY, i); }
uint FractoriumSettings::OpenClQuality() { return value(OPENCLQUALITY).toUInt(); }
void FractoriumSettings::OpenClQuality(uint i) { setValue(OPENCLQUALITY, i); }
bool FractoriumSettings::LoadLast() { return value(LOAD_LAST).toBool(); }
void FractoriumSettings::LoadLast(bool b) { setValue(LOAD_LAST, b); }
bool FractoriumSettings::LoadLast() { return value(LOADLAST).toBool(); }
void FractoriumSettings::LoadLast(bool b) { setValue(LOADLAST, b); }
/// <summary>
/// Sequence generation settings.

View File

@ -10,6 +10,7 @@
#define YAXISUP "render/yaxisup"
#define TRANSPARENCY "render/transparency"
#define OPENCL "render/opencl"
#define SHAREDTEXTURE "render/sharedtexture"
#define DOUBLEPRECISION "render/dp64"
#define CONTUPDATE "render/continuousupdate"
#define SHOWALLXFORMS "render/dragshowallxforms"
@ -24,9 +25,9 @@
#define CPUSUBBATCH "render/cpusubbatch"
#define OPENCLSUBBATCH "render/openclsubbatch"
#define RANDOMCOUNT "render/randomcount"
#define CPU_QUALITY "render/cpuquality"
#define OPENCL_QUALITY "render/openclquality"
#define LOAD_LAST "render/loadlastonstart"
#define CPUQUALITY "render/cpuquality"
#define OPENCLQUALITY "render/openclquality"
#define LOADLAST "render/loadlastonstart"
#define STAGGER "sequence/stagger"
#define STAGGERMAX "sequence/staggermax"
@ -114,6 +115,9 @@ public:
bool OpenCL();
void OpenCL(bool b);
bool SharedTexture();
void SharedTexture(bool b);
bool Double();
void Double(bool b);

View File

@ -332,6 +332,7 @@ void FractoriumEmberController<T>::AddFinalXform()
Update([&]()
{
Xform<T> final;
final.m_Animate = 0;
final.AddVariation(m_VariationList->GetVariationCopy(eVariationId::VAR_LINEAR));//Just a placeholder so other parts of the code don't see it as being empty.
m_Ember.SetFinalXform(final);
int index = int(m_Ember.TotalXformCount() - 1);//Set index to the last item.

View File

@ -121,12 +121,12 @@ GLWidget::GLWidget(QWidget* p)
setFormat(fmt);
*/
auto qsf = this->format();
qDebug() << "Constructor*****************\nVersion: " << qsf.majorVersion() << ',' << qsf.minorVersion();
qDebug() << "Profile: " << qsf.profile();
qDebug() << "Depth buffer size: " << qsf.depthBufferSize();
qDebug() << "Swap behavior: " << qsf.swapBehavior();
qDebug() << "Swap interval: " << qsf.swapInterval();
//auto qsf = this->format();
//qDebug() << "Constructor*****************\nVersion: " << qsf.majorVersion() << ',' << qsf.minorVersion();
//qDebug() << "Profile: " << qsf.profile();
//qDebug() << "Depth buffer size: " << qsf.depthBufferSize();
//qDebug() << "Swap behavior: " << qsf.swapBehavior();
//qDebug() << "Swap interval: " << qsf.swapInterval();
}
/// <summary>
@ -144,12 +144,12 @@ void GLWidget::InitGL()
{
if (!m_Init)
{
auto qsf = this->format();
qDebug() << "InitGL*****************\nVersion: " << qsf.majorVersion() << ',' << qsf.minorVersion();
qDebug() << "Profile: " << qsf.profile();
qDebug() << "Depth buffer size: " << qsf.depthBufferSize();
qDebug() << "Swap behavior: " << qsf.swapBehavior();
qDebug() << "Swap interval: " << qsf.swapInterval();
//auto qsf = this->format();
//qDebug() << "InitGL*****************\nVersion: " << qsf.majorVersion() << ',' << qsf.minorVersion();
//qDebug() << "Profile: " << qsf.profile();
//qDebug() << "Depth buffer size: " << qsf.depthBufferSize();
//qDebug() << "Swap behavior: " << qsf.swapBehavior();
//qDebug() << "Swap interval: " << qsf.swapInterval();
int w = std::ceil(m_Fractorium->ui.GLParentScrollArea->width() * devicePixelRatioF());
int h = std::ceil(m_Fractorium->ui.GLParentScrollArea->height() * devicePixelRatioF());
SetDimensions(w, h);
@ -182,6 +182,65 @@ void GLWidget::InitGL()
m_Fractorium->m_Controller->DelayedStartRenderTimer();
m_Init = true;
/*
auto clinfo = OpenCLInfo::DefInstance();
auto& platforms = clinfo->Platforms();
auto& alldevices = clinfo->Devices();
std::vector<std::string> strs;
auto cdc = wglGetCurrentDC();
auto cc = wglGetCurrentContext();
ostringstream os;
strs.push_back(os.str()); os.str(""); os << "GLWidget::InitGL():";
strs.push_back(os.str()); os.str(""); os << "\nCurrent DC: " << cdc;
strs.push_back(os.str()); os.str(""); os << "\nCurrent Context: " << cc;
for (int platform = 0; platform < platforms.size(); platform++)
{
cl_context_properties props[] =
{
CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>((platforms[platform])()),
0
};
// Find CL capable devices in the current GL context
//wglMakeCurrent(wglGetCurrentDC(), wglGetCurrentContext());
::wglMakeCurrent(wglGetCurrentDC(), wglGetCurrentContext());
size_t sizedev;
cl_device_id devices[32];
clGetGLContextInfoKHR_fn clGetGLContextInfo = (clGetGLContextInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platforms[platform](), "clGetGLContextInfoKHR");
clGetGLContextInfo(props, CL_DEVICES_FOR_GL_CONTEXT_KHR, 32 * sizeof(cl_device_id), devices, &sizedev);
sizedev = (cl_uint)(sizedev / sizeof(cl_device_id));
for (int i = 0; i < sizedev; i++)
{
std::string s;
size_t pi, di;
auto dd = clinfo->DeviceFromId(devices[i], pi, di);
if (dd)
{
auto& dev = *dd;
auto& plat = platforms[pi];
strs.push_back(os.str()); os.str(""); os << "\nPlatform[" << pi << "], device[" << di << "] is GL capable.";
strs.push_back(os.str()); os.str(""); os << "\nPlatform profile: " << plat.getInfo<CL_PLATFORM_PROFILE>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nPlatform version: " << plat.getInfo<CL_PLATFORM_VERSION>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nPlatform name: " << plat.getInfo<CL_PLATFORM_NAME>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nPlatform vendor: " << plat.getInfo<CL_PLATFORM_VENDOR>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nPlatform extensions: " << plat.getInfo<CL_PLATFORM_EXTENSIONS>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nVendor: " << dev.getInfo<CL_DEVICE_VENDOR>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nDevice: " << dev.getInfo<CL_DEVICE_NAME>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nDriver version: " << dev.getInfo<CL_DRIVER_VERSION>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nDevice profile: " << dev.getInfo<CL_DEVICE_PROFILE>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nDevice version: " << dev.getInfo<CL_DEVICE_VERSION>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nDevice extensions: " << dev.getInfo<CL_DEVICE_EXTENSIONS>(nullptr).c_str() << endl;
strs.push_back(os.str()); os.str(""); os << "\nDevice OpenCL C version: " << dev.getInfo<CL_DEVICE_OPENCL_C_VERSION>(nullptr).c_str() << endl;
}
}
}
m_Fractorium->ErrorReportToQTextEdit(strs, m_Fractorium->ui.InfoRenderingTextEdit);
*/
}
}
@ -243,23 +302,28 @@ void GLWidget::DrawQuad()
this->glEnable(GL_TEXTURE_2D);
this->glActiveTexture(GL_TEXTURE0);
auto renderer = m_Fractorium->m_Controller->Renderer();
auto finalImage = m_Fractorium->m_Controller->FinalImage();
//Ensure all allocation has taken place first.
if (m_OutputTexID != 0 && finalImage && !finalImage->empty())
if (m_OutputTexID != 0)
{
glBindTexture(GL_TEXTURE_2D, m_OutputTexID);//The texture to draw to.
auto scaledW = std::ceil(width() * devicePixelRatioF());
auto scaledH = std::ceil(height() * devicePixelRatioF());
//Only draw if the dimensions match exactly.
if (m_TexWidth == m_Fractorium->m_Controller->FinalRasW() &&
m_TexHeight == m_Fractorium->m_Controller->FinalRasH() &&
((m_TexWidth * m_TexHeight) == GLint(finalImage->size())))
if (m_TexWidth == m_Fractorium->m_Controller->FinalRasW() && m_TexHeight == m_Fractorium->m_Controller->FinalRasH())
{
//Copy data from CPU to OpenGL if using a CPU renderer. This is not needed when using OpenCL.
if (renderer->RendererType() == eRendererType::CPU_RENDERER)
this->glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_TexWidth, m_TexHeight, GL_RGBA, GL_FLOAT, finalImage->data());
if (renderer->RendererType() == eRendererType::CPU_RENDERER || !renderer->Shared())
{
auto finalImage = m_Fractorium->m_Controller->FinalImage();
if (finalImage &&//Make absolutely sure all image dimensions match when copying host side buffer to GL texture.
!finalImage->empty() &&
((m_TexWidth * m_TexHeight) == GLint(finalImage->size())) &&
(finalImage->size() == renderer->FinalDimensions()))
this->glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_TexWidth, m_TexHeight, GL_RGBA, GL_FLOAT, finalImage->data());
}
m_QuadProgram->bind();
this->glVertexAttribPointer(m_TexturePosAttr, 2, GL_FLOAT, GL_FALSE, 0, m_TexVerts.data());
@ -363,12 +427,12 @@ GLuint GLWidget::OutputTexID() { return m_OutputTexID; }
void GLWidget::initializeGL()
{
#ifdef USE_GLSL
auto qsf = this->format();
qDebug() << "initializeGL*****************\nVersion: " << qsf.majorVersion() << ',' << qsf.minorVersion();
qDebug() << "Profile: " << qsf.profile();
qDebug() << "Depth buffer size: " << qsf.depthBufferSize();
qDebug() << "Swap behavior: " << qsf.swapBehavior();
qDebug() << "Swap interval: " << qsf.swapInterval();
//auto qsf = this->format();
//qDebug() << "initializeGL*****************\nVersion: " << qsf.majorVersion() << ',' << qsf.minorVersion();
//qDebug() << "Profile: " << qsf.profile();
//qDebug() << "Depth buffer size: " << qsf.depthBufferSize();
//qDebug() << "Swap behavior: " << qsf.swapBehavior();
//qDebug() << "Swap interval: " << qsf.swapInterval();
if (!m_Init && m_Fractorium)
{
@ -541,11 +605,7 @@ void GLEmberController<T>::DrawImage()
if (SizesMatch())//Ensure all sizes are correct. If not, do nothing.
{
auto finalImage = m_FractoriumEmberController->FinalImage();
if ((renderer->RendererType() == eRendererType::OPENCL_RENDERER) || finalImage)//Final image only matters for CPU renderer.
if ((renderer->RendererType() == eRendererType::OPENCL_RENDERER) || finalImage->size() == renderer->FinalDimensions())
m_GL->DrawQuad();//Output image is drawn here.
m_GL->DrawQuad();//Output image is drawn here.
}
renderer->LeaveResize();//Unlock, may not be necessary.
@ -1149,6 +1209,7 @@ bool GLWidget::Allocate(bool force)
}
#endif
this->glFinish();
return m_OutputTexID != 0;
}

View File

@ -47,6 +47,8 @@ FractoriumOptionsDialog::FractoriumOptionsDialog(QWidget* p, Qt::WindowFlags f)
ui.DeviceTable->setEnabled(false);
ui.OpenCLCheckBox->setChecked(false);
ui.OpenCLCheckBox->setEnabled(false);
ui.SharedTextureCheckBox->setChecked(false);
ui.SharedTextureCheckBox->setEnabled(false);
ui.OpenCLSubBatchSpin->setEnabled(false);
ui.OpenCLQualitySpin->setEnabled(false);
ui.OpenCLFilteringDERadioButton->setEnabled(false);
@ -67,6 +69,7 @@ bool FractoriumOptionsDialog::YAxisUp() { return ui.YAxisUpCheckBox->isChecked()
bool FractoriumOptionsDialog::Transparency() { return ui.TransparencyCheckBox->isChecked(); }
bool FractoriumOptionsDialog::ContinuousUpdate() { return ui.ContinuousUpdateCheckBox->isChecked(); }
bool FractoriumOptionsDialog::OpenCL() { return ui.OpenCLCheckBox->isChecked(); }
bool FractoriumOptionsDialog::SharedTexture() { return ui.SharedTextureCheckBox->isChecked(); }
bool FractoriumOptionsDialog::Double() { return ui.DoublePrecisionCheckBox->isChecked(); }
bool FractoriumOptionsDialog::ShowAllXforms() { return ui.ShowAllXformsCheckBox->isChecked(); }
bool FractoriumOptionsDialog::ToggleType() { return ui.ToggleTypeCheckBox->isChecked(); }
@ -127,6 +130,7 @@ void FractoriumOptionsDialog::OnOpenCLCheckBoxStateChanged(int state)
ui.DeviceTable->setEnabled(checked);
ui.ThreadCountSpin->setEnabled(!checked);
ui.CpuSubBatchSpin->setEnabled(!checked);
ui.SharedTextureCheckBox->setEnabled(checked);
ui.OpenCLSubBatchSpin->setEnabled(checked);
ui.OpenCLQualitySpin->setEnabled(checked);
ui.CpuQualitySpin->setEnabled(!checked);
@ -180,6 +184,7 @@ void FractoriumOptionsDialog::GuiToData()
m_Settings->Transparency(Transparency());
m_Settings->ContinuousUpdate(ContinuousUpdate());
m_Settings->OpenCL(OpenCL());
m_Settings->SharedTexture(SharedTexture());
m_Settings->Double(Double());
m_Settings->ShowAllXforms(ShowAllXforms());
m_Settings->ToggleType(ToggleType());
@ -217,6 +222,7 @@ void FractoriumOptionsDialog::DataToGui()
ui.TransparencyCheckBox->setChecked(m_Settings->Transparency());
ui.ContinuousUpdateCheckBox->setChecked(m_Settings->ContinuousUpdate());
ui.OpenCLCheckBox->setChecked(m_Settings->OpenCL());
ui.SharedTextureCheckBox->setChecked(m_Settings->SharedTexture());
ui.DoublePrecisionCheckBox->setChecked(m_Settings->Double());
ui.ShowAllXformsCheckBox->setChecked(m_Settings->ShowAllXforms());
ui.ToggleTypeCheckBox->setChecked(m_Settings->ToggleType());

View File

@ -25,23 +25,12 @@ class FractoriumOptionsDialog : public QDialog
public:
FractoriumOptionsDialog(QWidget* p = nullptr, Qt::WindowFlags f = 0);
public slots:
void OnOpenCLCheckBoxStateChanged(int state);
void OnDeviceTableCellChanged(int row, int col);
void OnDeviceTableRadioToggled(bool checked);
virtual void accept() override;
virtual void reject() override;
protected:
virtual void showEvent(QShowEvent* e) override;
private:
bool EarlyClip();
bool YAxisUp();
bool Transparency();
bool ContinuousUpdate();
bool OpenCL();
bool SharedTexture();
bool Double();
bool ShowAllXforms();
bool ToggleType();
@ -55,6 +44,17 @@ private:
void DataToGui();
void GuiToData();
public slots:
void OnOpenCLCheckBoxStateChanged(int state);
void OnDeviceTableCellChanged(int row, int col);
void OnDeviceTableRadioToggled(bool checked);
virtual void accept() override;
virtual void reject() override;
protected:
virtual void showEvent(QShowEvent* e) override;
private:
Ui::OptionsDialog ui;
shared_ptr<OpenCLInfo> m_Info;
SpinBox* m_XmlTemporalSamplesSpin;

View File

@ -119,17 +119,7 @@
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="YAxisUpCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Checked: Positive Y direction is up.&lt;/p&gt;&lt;p&gt;Unchecked: Positive Y direction is down.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Positive Y Up</string>
</property>
</widget>
</item>
<item row="1" column="1">
<item row="2" column="1">
<widget class="QCheckBox" name="DoublePrecisionCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Checked: use 64-bit double precision numbers (slower, but better image quality).&lt;/p&gt;&lt;p&gt;Unchecked: use 32-bit single precision numbers (faster, but worse image quality).&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
@ -139,17 +129,7 @@
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="TransparencyCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Use transparency in the final image.&lt;/p&gt;&lt;p&gt;This will not make a difference in the editor, but will when saving as .png and opening in other programs.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Transparency</string>
</property>
</widget>
</item>
<item row="2" column="1">
<item row="3" column="1">
<widget class="QCheckBox" name="ShowAllXformsCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Checked: show all xforms while dragging.&lt;/p&gt;&lt;p&gt;Unchecked: only show current xform while dragging.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
@ -159,17 +139,7 @@
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QCheckBox" name="ContinuousUpdateCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Continually update output image during interactive rendering.&lt;/p&gt;&lt;p&gt;This will slow down performance, but will give continuous updates on how the final render will look. Note that only log scale filtering is applied on each update. Full DE is not applied until iteration is complete.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Continuous Update</string>
</property>
</widget>
</item>
<item row="3" column="1">
<item row="4" column="1">
<widget class="QCheckBox" name="ToggleTypeCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Checked: right clicking toggles spin boxes, right button dragging disabled.&lt;/p&gt;&lt;p&gt;Unchecked: double clicking toggles spin boxes, right button dragging enabled.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
@ -179,17 +149,7 @@
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QCheckBox" name="Png16BitCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Save each RGBA component as 16-bits when saving Png files.&lt;/p&gt;&lt;p&gt;This leads to greater color precision for use in high end rendering and display on HDR monitors, however it makes the file size larger.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Save 16-bit Png</string>
</property>
</widget>
</item>
<item row="6" column="0" colspan="2">
<item row="7" column="0" colspan="2">
<widget class="QTableWidget" name="DeviceTable">
<property name="sizePolicy">
<sizepolicy hsizetype="MinimumExpanding" vsizetype="MinimumExpanding">
@ -277,7 +237,7 @@
</column>
</widget>
</item>
<item row="15" column="0" colspan="2">
<item row="16" column="0" colspan="2">
<layout class="QGridLayout" name="InteractiveRenderingTabGridLayout">
<item row="3" column="1">
<widget class="QGroupBox" name="InteraciveGpuFilteringGroupBox">
@ -476,7 +436,7 @@ in interactive mode for each mouse movement</string>
</item>
</layout>
</item>
<item row="4" column="1">
<item row="5" column="1">
<widget class="QCheckBox" name="LoadLastOnStartCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Checked: load the flame from the previous run on startup.&lt;/p&gt;&lt;p&gt;Unchecked: create randoms on startup.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
@ -486,6 +446,56 @@ in interactive mode for each mouse movement</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QCheckBox" name="SharedTextureCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Share the memory between the final image output and the OpenGL texture used to display the image result on the interactive renderer.&lt;/p&gt;&lt;p&gt;This is a highly recommended performance optimization for interactive editing when using OpenCL if your card supports it.&lt;/p&gt;&lt;p&gt;If creating the OpenCL renderer fails, uncheck this option. You will see a slight performance decrease in interactive rendering.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Shared Texture</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QCheckBox" name="YAxisUpCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Checked: Positive Y direction is up.&lt;/p&gt;&lt;p&gt;Unchecked: Positive Y direction is down.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Positive Y Up</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="TransparencyCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Use transparency in the final image.&lt;/p&gt;&lt;p&gt;This will not make a difference in the editor, but will when saving as .png and opening in other programs.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Transparency</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QCheckBox" name="ContinuousUpdateCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Continually update output image during interactive rendering.&lt;/p&gt;&lt;p&gt;This will slow down performance, but will give continuous updates on how the final render will look. Note that only log scale filtering is applied on each update. Full DE is not applied until iteration is complete.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Continuous Update</string>
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QCheckBox" name="Png16BitCheckBox">
<property name="toolTip">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Save each RGBA component as 16-bits when saving Png files.&lt;/p&gt;&lt;p&gt;This leads to greater color precision for use in high end rendering and display on HDR monitors, however it makes the file size larger.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Save 16-bit Png</string>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="OptionsXmlSavingTab">
@ -903,13 +913,9 @@ in interactive mode for each mouse movement</string>
<tabstops>
<tabstop>EarlyClipCheckBox</tabstop>
<tabstop>OpenCLCheckBox</tabstop>
<tabstop>YAxisUpCheckBox</tabstop>
<tabstop>DoublePrecisionCheckBox</tabstop>
<tabstop>TransparencyCheckBox</tabstop>
<tabstop>ShowAllXformsCheckBox</tabstop>
<tabstop>ContinuousUpdateCheckBox</tabstop>
<tabstop>ToggleTypeCheckBox</tabstop>
<tabstop>Png16BitCheckBox</tabstop>
<tabstop>LoadLastOnStartCheckBox</tabstop>
<tabstop>ThreadCountSpin</tabstop>
<tabstop>CpuSubBatchSpin</tabstop>