--Bug fixes

-Fix improper usage of rand() in cpow2, cpow3, hypertile1, hypertile3D1, hypertile3D2, juliac, juliaq.
 -Fix program crashing during density filtering on some Nvidia cards.
 -hypertile3D1 was wrong.
 -Parsing phoenix_julia when coming from Apophysis was wrong.
 -Density filtering was freezing on certain Nvidia cards.

--Code changes
 -Optimize juliac, npolar.
 -Add a new function Crand() which behaves like the legacy C rand() which returns an integer between 0 and 32766, inclusive.
 -Use RandBit() in some places.
 -Remove Zeps() from vignette, it's not needed.
 -Restructure OpenCL code for density filtering such that it does not hang after being compiled on some Nvidia cards, such as the gtx 1660. Remove barriers from conditionals where possible.
This commit is contained in:
Person 2020-12-28 21:46:55 -08:00
parent d68deb1255
commit 47b6614c8a
18 changed files with 224 additions and 153 deletions

View File

@ -1089,14 +1089,14 @@ public:
static intmax_t symDistrib[] =
{
-4, -3,
-2, -2, -2,
-1, -1, -1,
2, 2, 2,
3, 3,
4, 4,
};
-2, -2, -2,
-1, -1, -1,
2, 2, 2,
3, 3,
4, 4,
};
if (rand.Rand() & 1)
if (rand.RandBit())
sym = symDistrib[rand.Rand(Vlen(symDistrib))];
else if (rand.Rand() & 31)
sym = intmax_t(rand.Rand(13)) - 6;

View File

@ -143,6 +143,26 @@ public:
return t;
}
/// <summary>
/// Legacy rand() in the C language returns a number in the range of (0, RAND_MAX], which yields 0-32766.
/// This function is used to simulate that behavior.
/// </summary>
/// <returns>The next random integer in the range of 0-32766 inclusive</returns>
inline T Crand()
{
return Rand(32767);
}
/// <summary
/// Locked version of Crand().
/// </summary>
inline T LockedCrand()
{
rlg l(*s_CS.get());
T t = GlobalRand->Crand();
return t;
}
/// <summary>
/// Return the next random integer between 0 and the value passed in minus 1.
/// </summary>

View File

@ -906,13 +906,13 @@ public:
auto xform0 = RandomXform(ember, -1);
auto xform1 = RandomXform(ember, ember.GetXformIndex(xform0));
if (xform0 && (m_Rand.Rand() & 1))
if (xform0 && (m_Rand.RandBit()))
{
xform0->m_ColorX = 0;
xform0->m_ColorY = 0;
}
if (xform1 && (m_Rand.Rand() & 1))
if (xform1 && (m_Rand.RandBit()))
{
xform1->m_ColorX = 1;
xform1->m_ColorY = 1;

View File

@ -1819,7 +1819,7 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T tempr = (helper.m_PrecalcAtanyx + M_2PI * rand.Rand(ISAAC_INT(m_Rn))) / m_Power;
T tempr = (helper.m_PrecalcAtanyx + M_2PI * rand.Rand(size_t(m_Rn))) / m_Power;
T r = m_Weight * std::pow(helper.m_PrecalcSumSquares, m_Cn);
helper.Out.x = r * std::cos(tempr);
helper.Out.y = r * std::sin(tempr);
@ -2199,7 +2199,7 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
int sl = int(rand.Frand01<T>() * m_Slices + T(0.5));
auto sl = rand.Rand(size_t(m_Slices));
T a = m_Rotation + m_Pi2Slices * (sl + m_Thickness * rand.Frand01<T>());
T r = m_Weight * rand.Frand01<T>();
helper.Out.x = r * std::cos(a);
@ -2219,7 +2219,7 @@ public:
string thickness = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string pi2Slices = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
ss << "\t{\n"
<< "\t\tint sl = (int)(fma(MwcNext01(mwc), " << slices << ", (real_t)(0.5)));\n"
<< "\t\tuint sl = MwcNextRange(mwc, (uint)" << slices << ");\n"
<< "\t\treal_t a = fma(" << pi2Slices << ", fma(" << thickness << ", MwcNext01(mwc), sl), " << rotation << ");\n"
<< "\t\treal_t r = " << weight << " * MwcNext01(mwc);\n"
<< "\n"
@ -4003,7 +4003,7 @@ public:
{
T a = helper.m_PrecalcAtanyx;
T lnr = T(0.5) * std::log(helper.m_PrecalcSumSquares);
T angle = m_C * a + m_D * lnr + m_Ang * Floor<T>(m_Power * rand.Frand01<T>());
T angle = m_C * a + m_D * lnr + m_Ang * rand.Rand(size_t(m_Power));
T m = m_Weight * std::exp(m_C * lnr - m_D * a);
helper.Out.x = m * std::cos(angle);
helper.Out.y = m * std::sin(angle);
@ -4026,7 +4026,7 @@ public:
ss << "\t{\n"
<< "\t\treal_t a = precalcAtanyx;\n"
<< "\t\treal_t lnr = (real_t)(0.5) * log(precalcSumSquares);\n"
<< "\t\treal_t angle = fma(" << c << ", a, fma(" << d << ", lnr, " << ang << " * floor(" << power << " * MwcNext01(mwc))));\n"
<< "\t\treal_t angle = fma(" << c << ", a, fma(" << d << ", lnr, " << ang << " * (real_t)MwcNextRange(mwc, (uint)" << power << ")));\n"
<< "\t\treal_t m = " << weight << " * exp(fma(" << c << ", lnr, -(" << d << " * a)));\n"
<< "\n"
<< "\t\tvOut.x = m * cos(angle);\n"
@ -5776,7 +5776,7 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T r = m_Weight * std::pow(helper.m_PrecalcSumSquares, m_Cn);
int tRand = int(m_Rn * rand.Frand01<T>());
auto tRand = rand.Rand(size_t(m_Rn));
T a = (helper.m_PrecalcAtanyx + M_2PI * tRand) / m_Power;
T c = T(Floor<T>((m_Count * a + T(M_PI)) * T(M_1_PI) * T(0.5)));
a = a * m_Cf + c * m_Angle;
@ -5801,7 +5801,7 @@ public:
string cf = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
ss << "\t{\n"
<< "\t\treal_t r = " << weight << " * pow(precalcSumSquares, " << cn << ");\n"
<< "\t\tint tRand = (int)(" << rn << " * MwcNext01(mwc));\n"
<< "\t\tuint tRand = MwcNextRange(mwc, (uint)" << rn << ");\n"
<< "\t\treal_t a = fma(M_2PI, (real_t)tRand, precalcAtanyx) / " << power << ";\n"
<< "\t\treal_t c = floor(fma(" << count << ", a, MPI) * M1PI * (real_t)(0.5));\n"
<< "\n"

View File

@ -3141,7 +3141,7 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T temp = Floor<T>(rand.Frand01<T>() * m_IP) * m_Pa;
T temp = rand.Crand() * m_Pa;
T sina = std::sin(temp);
T cosa = std::cos(temp);
T re = m_R * cosa;
@ -3169,7 +3169,7 @@ public:
string r = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string ip = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
ss << "\t{\n"
<< "\t\treal_t temp = floor(MwcNext01(mwc) * " << ip << ") * " << pa << ";\n"
<< "\t\treal_t temp = MwcNextCrand(mwc) * " << pa << ";\n"
<< "\t\treal_t sina = sin(temp);\n"
<< "\t\treal_t cosa = cos(temp);\n"
<< "\t\treal_t re = " << r << " * cosa;\n"
@ -3190,9 +3190,9 @@ public:
virtual void Precalc() override
{
m_Pa = M_2PI / Zeps(m_P);
T cs = cos(m_Pa);
T r2 = T(1) - (cs - 1) / (cs + cos(M_2PI / Zeps(m_Q)));
m_R = (r2 > 0) ? T(1) / sqrt(r2) : T(1);
T cs = std::cos(m_Pa);
T r2 = T(1) - (cs - 1) / (cs + std::cos(M_2PI / Zeps(m_Q)));
m_R = (r2 > 0) ? T(1) / std::sqrt(r2) : T(1);
m_IP = T((int)m_P);
}
@ -3239,7 +3239,7 @@ public:
T x = (a * c + b * d);
T y = (b * c - a * d);
T vr = m_Weight / (SQR(c) + SQR(d));
T temp = Floor<T>(rand.Frand01<T>() * 32767) * m_Pa;
T temp = rand.Crand() * m_Pa;
T sina = std::sin(temp);
T cosa = std::cos(temp);
helper.Out.x = vr * (x * cosa + y * sina);
@ -3266,7 +3266,7 @@ public:
<< "\t\treal_t x = fma(a, c, b * d);\n"
<< "\t\treal_t y = fma(b, c, -(a * d));\n"
<< "\t\treal_t vr = " << weight << " / fma(c, c, SQR(d));\n"
<< "\t\treal_t temp = floor(MwcNext01(mwc) * 32767) * " << pa << ";\n"
<< "\t\treal_t temp = MwcNextCrand(mwc) * " << pa << ";\n"
<< "\t\treal_t sina = sin(temp);\n"
<< "\t\treal_t cosa = cos(temp);\n"
<< "\n"
@ -3280,8 +3280,8 @@ public:
virtual void Precalc() override
{
m_Pa = T(M_2PI) / Zeps(m_P);
T cs = cos(m_Pa);
T r2 = T(1) - (cs - T(1)) / (cs + cos(T(M_2PI) / Zeps(m_Q)));
T cs = std::cos(m_Pa);
T r2 = T(1) - (cs - T(1)) / (cs + std::cos(T(M_2PI) / Zeps(m_Q)));
m_R = (r2 > 0) ? T(1) / sqrt(r2) : T(1);
}
@ -3440,14 +3440,14 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T temp = rand.Rand() * m_Pa;
T temp = rand.Crand() * m_Pa;
T cx = m_R * std::cos(temp);
T cy = m_R * std::sin(temp);
T s2x = 1 + SQR(cx) - SQR(cy);
T s2y = 1 + SQR(cy) - SQR(cx);
T r2 = helper.m_PrecalcSumSquares + SQR(helper.In.z);
T x2cx = 2 * cx * helper.In.x;
T y2cy = 2 * cy * helper.In.x;
T y2cy = 2 * cy * helper.In.y;
T d = m_Weight / Zeps(m_C2 * r2 + x2cx - y2cy + 1);
helper.Out.x = d * (helper.In.x * s2x - cx * (y2cy - r2 - 1));
helper.Out.y = d * (helper.In.y * s2y + cy * (-x2cx - r2 - 1));
@ -3468,14 +3468,14 @@ public:
string c2 = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string s2z = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
ss << "\t{\n"
<< "\t\treal_t temp = MwcNext(mwc) * " << pa << ";\n"
<< "\t\treal_t temp = MwcNextCrand(mwc) * " << pa << ";\n"
<< "\t\treal_t cx = " << r << " * cos(temp);\n"
<< "\t\treal_t cy = " << r << " * sin(temp);\n"
<< "\t\treal_t s2x = fma(cx, cx, (real_t)(1.0)) - SQR(cy);\n"
<< "\t\treal_t s2y = fma(cy, cy, (real_t)(1.0)) - SQR(cx);\n"
<< "\t\treal_t r2 = precalcSumSquares + SQR(vIn.z);\n"
<< "\t\treal_t x2cx = 2 * cx * vIn.x;\n"
<< "\t\treal_t y2cy = 2 * cy * vIn.x;\n"
<< "\t\treal_t y2cy = 2 * cy * vIn.y;\n"
<< "\t\treal_t d = " << weight << " / Zeps(fma(" << c2 << ", r2, (x2cx - y2cy) + 1)); \n"
<< "\n"
<< "\t\tvOut.x = d * fma(vIn.x, s2x, -(cx * (y2cy - r2 - 1)));\n"
@ -3550,7 +3550,7 @@ public:
T x = helper.In.x * m_S2x - m_Cx * (-r2 - 1);
T y = helper.In.y * m_S2y;
T vr = m_Weight / (m_C2 * r2 + x2cx + 1);
T temp = rand.Rand() * m_Pa;
T temp = rand.Crand() * m_Pa;
T sina = std::sin(temp);
T cosa = std::cos(temp);
helper.Out.x = vr * (x * cosa + y * sina);
@ -3580,7 +3580,7 @@ public:
<< "\t\treal_t x = fma(vIn.x, " << s2x << ", -(" << cx << " * (-r2 - (real_t)(1.0))));\n"
<< "\t\treal_t y = vIn.y * " << s2y << ";\n"
<< "\t\treal_t vr = " << weight << " / fma(" << c2 << ", r2, x2cx + (real_t)(1.0));\n"
<< "\t\treal_t temp = MwcNext(mwc) * " << pa << ";\n"
<< "\t\treal_t temp = MwcNextCrand(mwc) * " << pa << ";\n"
<< "\t\treal_t sina = sin(temp);\n"
<< "\t\treal_t cosa = cos(temp);\n"
<< "\n"
@ -3720,7 +3720,7 @@ public:
{
T x = m_A * helper.In.x + m_B * helper.In.y + m_E;
T y = m_C * helper.In.x + m_D * helper.In.y + m_F;
T angle = (std::atan2(y, x) + M_2PI * rand.Rand(int(m_AbsN))) / m_Power;
T angle = (std::atan2(y, x) + M_2PI * rand.Rand(size_t(m_AbsN))) / m_Power;
T sina = std::sin(angle);
T cosa = std::cos(angle);
T r = m_Weight * std::pow(SQR(x) + SQR(y), m_Cn);
@ -3816,7 +3816,7 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T a = helper.m_PrecalcAtanyx * m_InvPower + rand.Rand() * m_InvPower2pi;
T a = helper.m_PrecalcAtanyx * m_InvPower + rand.Crand() * m_InvPower2pi;
T sina = std::sin(a);
T cosa = std::cos(a);
T r = m_Weight * std::pow(helper.m_PrecalcSumSquares, m_HalfInvPower);
@ -3838,7 +3838,7 @@ public:
string invPower = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string invPower2Pi = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
ss << "\t{\n"
<< "\t\treal_t a = fma(precalcAtanyx, " << invPower << ", MwcNext(mwc) * " << invPower2Pi << ");\n"
<< "\t\treal_t a = fma(precalcAtanyx, " << invPower << ", MwcNextCrand(mwc) * " << invPower2Pi << ");\n"
<< "\t\treal_t sina = sin(a);\n"
<< "\t\treal_t cosa = cos(a);\n"
<< "\t\treal_t r = " << weight << " * pow(precalcSumSquares, " << halfInvPower << ");\n"
@ -4085,14 +4085,27 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T x = (m_IsOdd != 0) ? helper.In.x : m_Vvar * helper.m_PrecalcAtanxy;
T y = (m_IsOdd != 0) ? helper.In.y : m_Vvar2 * std::log(helper.m_PrecalcSumSquares);
T angle = (std::atan2(y, x) + M_2PI * rand.Rand(int(m_AbsN))) / m_Nnz;
T r = m_Weight * std::pow(SQR(x) + SQR(y), m_Cn) * ((m_IsOdd == 0) ? 1 : m_Parity);
T sina = std::sin(angle) * r;
T cosa = std::cos(angle) * r;
x = (m_IsOdd != 0) ? cosa : (m_Vvar2 * std::log(SQR(cosa) + SQR(sina)));
y = (m_IsOdd != 0) ? sina : (m_Vvar * std::atan2(cosa, sina));
T x, y;
if (m_IsOdd != 0)
{
T angle = (std::atan2(helper.In.y, helper.In.x) + M_2PI * rand.Rand(size_t(m_AbsN))) * m_Nnz;
T r = m_Weight * std::pow(SQR(helper.In.x) + SQR(helper.In.y), m_Cn) * m_Parity;
x = std::cos(angle) * r;
y = std::sin(angle) * r;
}
else
{
x = m_Vvar * helper.m_PrecalcAtanxy;
y = m_Vvar2 * std::log(helper.m_PrecalcSumSquares);
T angle = (std::atan2(y, x) + M_2PI * rand.Rand(size_t(m_AbsN))) * m_Nnz;
T r = m_Weight * std::pow(SQR(x) + SQR(y), m_Cn);
T sina = std::sin(angle) * r;
T cosa = std::cos(angle) * r;
x = m_Vvar2 * std::log(SQR(cosa) + SQR(sina));
y = m_Vvar * std::atan2(cosa, sina);
}
helper.Out.x = x;
helper.Out.y = y;
helper.Out.z = DefaultZ(helper);
@ -4114,15 +4127,27 @@ public:
string cn = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string isOdd = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
ss << "\t{\n"
<< "\t\treal_t x = (" << isOdd << " != 0) ? vIn.x : " << vvar << " * precalcAtanxy;\n"
<< "\t\treal_t y = (" << isOdd << " != 0) ? vIn.y : " << vvar2 << " * log(precalcSumSquares);\n"
<< "\t\treal_t angle = fma(M_2PI, MwcNextRange(mwc, (uint)" << absn << "), atan2(y, x)) / " << nnz << ";\n"
<< "\t\treal_t r = " << weight << " * pow(fma(x, x, SQR(y)), " << cn << ") * ((" << isOdd << " == 0) ? 1 : " << parity << ");\n"
<< "\t\treal_t sina = sin(angle) * r;\n"
<< "\t\treal_t cosa = cos(angle) * r;\n"
<< "\t\treal_t x, y;\n"
<< "\n"
<< "\t\tif (" << isOdd << " != 0)\n"
<< "\t\t{\n"
<< "\t\t real_t angle = (atan2(vIn.y, vIn.x) + M_2PI * MwcNextRange(mwc, (uint)" << absn << ")) * " << nnz << ";\n"
<< "\t\t real_t r = " << weight << " * pow(SQR(vIn.x) + SQR(vIn.y), " << cn << ") * " << parity << ";\n"
<< "\t\t x = cos(angle) * r;\n"
<< "\t\t y = sin(angle) * r;\n"
<< "\t\t}\n"
<< "\t\telse\n"
<< "\t\t{\n"
<< "\t\t x = " << vvar << " * precalcAtanxy;\n"
<< "\t\t y = " << vvar2 << " * log(precalcSumSquares);\n"
<< "\t\t real_t angle = (atan2(y, x) + M_2PI * MwcNextRange(mwc, (uint)" << absn << ")) * " << nnz << ";\n"
<< "\t\t real_t r = " << weight << " * pow(SQR(x) + SQR(y), " << cn << ");\n"
<< "\t\t real_t sina = sin(angle) * r;\n"
<< "\t\t real_t cosa = cos(angle) * r;\n"
<< "\t\t x = " << vvar2 << " * log(SQR(cosa) + SQR(sina));\n"
<< "\t\t y = " << vvar << " * atan2(cosa, sina);\n"
<< "\t\t}\n"
<< "\n"
<< "\t\tx = (" << isOdd << " != 0) ? cosa : (" << vvar2 << " * log(fma(cosa, cosa, SQR(sina))));\n"
<< "\t\ty = (" << isOdd << " != 0) ? sina : (" << vvar << " * atan2(cosa, sina));\n"
<< "\t\tvOut.x = x;\n"
<< "\t\tvOut.y = y;\n"
<< "\t\tvOut.z = " << DefaultZCl()
@ -4132,7 +4157,7 @@ public:
virtual void Precalc() override
{
m_Nnz = (m_N == 0) ? 1 : m_N;
m_Nnz = 1 / ((m_N == 0) ? 1 : m_N);
m_Vvar = m_Weight / T(M_PI);
m_Vvar2 = m_Vvar * T(0.5);
m_AbsN = abs(m_Nnz);
@ -5508,7 +5533,7 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T angle = (helper.m_PrecalcAtanyx + M_2PI * rand.Rand(int(m_AbsN))) / m_Power;
T angle = (helper.m_PrecalcAtanyx + M_2PI * rand.Rand(size_t(m_AbsN))) / m_Power;
T r = m_Weight * std::pow(helper.m_PrecalcSumSquares, m_Cn);
T sina = std::sin(angle);
T cosa = std::cos(angle);
@ -5959,12 +5984,12 @@ public:
a += M_2PI * n;
if (std::cos(a * m_InvSpread) < rand.Rand() * T(2) / 0xFFFFFFFF - T(1))//Rand max.
if (std::cos(a * m_InvSpread) < rand.Frand11<T>())
a -= m_FullSpread;
T lnr2 = std::log(helper.m_PrecalcSumSquares);
T r = m_Weight * std::exp(m_HalfC * lnr2 - m_D * a);
T temp = m_C * a + m_HalfD * lnr2 + m_Ang * rand.Rand();
T temp = m_C * a + m_HalfD * lnr2 + m_Ang * rand.Crand();
helper.Out.x = r * std::cos(temp);
helper.Out.y = r * std::sin(temp);
helper.Out.z = DefaultZ(helper);
@ -5997,12 +6022,12 @@ public:
<< "\n"
<< "\t\ta += M_2PI * n;\n"
<< "\n"
<< "\t\tif (cos(a * " << invSpread << ") < MwcNext(mwc) * (real_t)2.0 / 0xFFFFFFFF - (real_t)1.0)\n"
<< "\t\tif (cos(a * " << invSpread << ") < MwcNextNeg1Pos1(mwc))\n"
<< "\t\t a -= " << fullSpread << ";\n"
<< "\n"
<< "\t\treal_t lnr2 = log(precalcSumSquares);\n"
<< "\t\treal_t r = " << weight << " * exp(fma(" << halfC << ", lnr2, -(" << d << " * a)));\n"
<< "\t\treal_t temp = fma(" << c << ", a, fma(" << halfD << ", lnr2, " << ang << " * MwcNext(mwc)));\n"
<< "\t\treal_t temp = fma(" << c << ", a, fma(" << halfD << ", lnr2, " << ang << " * MwcNextCrand(mwc)));\n"
<< "\n"
<< "\t\tvOut.x = r * cos(temp);\n"
<< "\t\tvOut.y = r * sin(temp);\n"

View File

@ -2697,7 +2697,7 @@ public:
{
T preX = helper.In.x * (m_XDistort + 1);
T preY = helper.In.y * (m_YDistort + 1);
T temp = std::atan2(preY, preX) * m_InvN + rand.Rand() * m_Inv2PiN;
T temp = std::atan2(preY, preX) * m_InvN + rand.Crand() * m_Inv2PiN;
T r = m_Weight * std::pow(helper.m_PrecalcSumSquares, m_Cn);
helper.Out.x = r * std::cos(temp);
helper.Out.y = r * std::sin(temp);
@ -2721,7 +2721,7 @@ public:
ss << "\t{\n"
<< "\t\treal_t preX = vIn.x * (" << xDistort << " + 1);\n"
<< "\t\treal_t preY = vIn.y * (" << yDistort << " + 1);\n"
<< "\t\treal_t temp = fma(atan2(preY, preX), " << invN << ", MwcNext(mwc) * " << inv2PiN << ");\n"
<< "\t\treal_t temp = fma(atan2(preY, preX), " << invN << ", MwcNextCrand(mwc) * " << inv2PiN << ");\n"
<< "\t\treal_t r = " << weight << " * pow(precalcSumSquares, " << cN << ");\n"
<< "\n"
<< "\t\tvOut.x = r * cos(temp);\n"
@ -2744,7 +2744,7 @@ protected:
{
string prefix = Prefix();
m_Params.clear();
m_Params.push_back(ParamWithName<T>(&m_Power, prefix + "phoenix_julia_power", 2));
m_Params.push_back(ParamWithName<T>(&m_Power, prefix + "phoenix_julia_power", 2));//Original omitted _julia.
m_Params.push_back(ParamWithName<T>(&m_Dist, prefix + "phoenix_julia_dist", 1));
m_Params.push_back(ParamWithName<T>(&m_XDistort, prefix + "phoenix_julia_x_distort", T(-0.5)));//Original omitted phoenix_ prefix.
m_Params.push_back(ParamWithName<T>(&m_YDistort, prefix + "phoenix_julia_y_distort"));

View File

@ -1712,28 +1712,28 @@ public:
switch (rand.Rand(5))
{
case 0:
a = (rand.Rand(ISAAC_INT(m_Slices)) + rand.Frand01<T>() * m_XThickness) / m_Slices;
r = (rand.Rand(ISAAC_INT(m_Slices)) + rand.Frand01<T>() * m_YThickness) / m_Slices;
a = (rand.Rand(size_t(m_Slices)) + rand.Frand01<T>() * m_XThickness) / m_Slices;
r = (rand.Rand(size_t(m_Slices)) + rand.Frand01<T>() * m_YThickness) / m_Slices;
break;
case 1:
a = (rand.Rand(ISAAC_INT(m_Slices)) + rand.Frand01<T>()) / m_Slices;
r = (rand.Rand(ISAAC_INT(m_Slices)) + m_YThickness) / m_Slices;
a = (rand.Rand(size_t(m_Slices)) + rand.Frand01<T>()) / m_Slices;
r = (rand.Rand(size_t(m_Slices)) + m_YThickness) / m_Slices;
break;
case 2:
a = (rand.Rand(ISAAC_INT(m_Slices)) + m_XThickness) / m_Slices;
r = (rand.Rand(ISAAC_INT(m_Slices)) + rand.Frand01<T>()) / m_Slices;
a = (rand.Rand(size_t(m_Slices)) + m_XThickness) / m_Slices;
r = (rand.Rand(size_t(m_Slices)) + rand.Frand01<T>()) / m_Slices;
break;
case 3:
a = rand.Frand01<T>();
r = (rand.Rand(ISAAC_INT(m_Slices)) + m_YThickness + rand.Frand01<T>() * (1 - m_YThickness)) / m_Slices;
r = (rand.Rand(size_t(m_Slices)) + m_YThickness + rand.Frand01<T>() * (1 - m_YThickness)) / m_Slices;
break;
case 4:
default:
a = (rand.Rand(ISAAC_INT(m_Slices)) + m_XThickness + rand.Frand01<T>() * (1 - m_XThickness)) / m_Slices;
a = (rand.Rand(size_t(m_Slices)) + m_XThickness + rand.Frand01<T>() * (1 - m_XThickness)) / m_Slices;
r = rand.Frand01<T>();
break;
}
@ -1759,27 +1759,27 @@ public:
ss << "\t{\n"
<< "\t\treal_t a = 0, r = 0;\n"
<< "\n"
<< "\t\tswitch (MwcNextRange(mwc, 5))\n"
<< "\t\tswitch (MwcNextRange(mwc, 5u))\n"
<< "\t\t{\n"
<< "\t\t case 0:\n"
<< "\t\t a = (MwcNextRange(mwc, (int)" << slices << ") + MwcNext01(mwc) * " << xThickness << ") / " << slices << ";\n"
<< "\t\t r = (MwcNextRange(mwc, (int)" << slices << ") + MwcNext01(mwc) * " << yThickness << ") / " << slices << ";\n"
<< "\t\t a = (MwcNextRange(mwc, (uint)" << slices << ") + MwcNext01(mwc) * " << xThickness << ") / " << slices << ";\n"
<< "\t\t r = (MwcNextRange(mwc, (uint)" << slices << ") + MwcNext01(mwc) * " << yThickness << ") / " << slices << ";\n"
<< "\t\t break;\n"
<< "\t\t case 1:\n"
<< "\t\t a = (MwcNextRange(mwc, (int)" << slices << ") + MwcNext01(mwc)) / " << slices << ";\n"
<< "\t\t r = (MwcNextRange(mwc, (int)" << slices << ") + " << yThickness << ") / " << slices << ";\n"
<< "\t\t a = (MwcNextRange(mwc, (uint)" << slices << ") + MwcNext01(mwc)) / " << slices << ";\n"
<< "\t\t r = (MwcNextRange(mwc, (uint)" << slices << ") + " << yThickness << ") / " << slices << ";\n"
<< "\t\t break;\n"
<< "\t\t case 2:\n"
<< "\t\t a = (MwcNextRange(mwc, (int)" << slices << ") + " << xThickness << ") / " << slices << ";\n"
<< "\t\t r = (MwcNextRange(mwc, (int)" << slices << ") + MwcNext01(mwc)) / " << slices << ";\n"
<< "\t\t a = (MwcNextRange(mwc, (uint)" << slices << ") + " << xThickness << ") / " << slices << ";\n"
<< "\t\t r = (MwcNextRange(mwc, (uint)" << slices << ") + MwcNext01(mwc)) / " << slices << ";\n"
<< "\t\t break;\n"
<< "\t\t case 3:\n"
<< "\t\t a = MwcNext01(mwc);\n"
<< "\t\t r = fma(MwcNext01(mwc), 1 - " << yThickness << ", MwcNextRange(mwc, (int)" << slices << ") + " << yThickness << ") / " << slices << ";\n"
<< "\t\t r = fma(MwcNext01(mwc), 1 - " << yThickness << ", MwcNextRange(mwc, (uint)" << slices << ") + " << yThickness << ") / " << slices << ";\n"
<< "\t\t break;\n"
<< "\t\t case 4:\n"
<< "\t\t default:\n"
<< "\t\t a = fma(MwcNext01(mwc), (1 - " << xThickness << "), MwcNextRange(mwc, (int)" << slices << ") + " << xThickness << ") / " << slices << ";\n"
<< "\t\t a = fma(MwcNext01(mwc), (1 - " << xThickness << "), MwcNextRange(mwc, (uint)" << slices << ") + " << xThickness << ") / " << slices << ";\n"
<< "\t\t r = MwcNext01(mwc);\n"
<< "\t\t break;\n"
<< "\t\t}\n"
@ -5292,8 +5292,8 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T arg = helper.m_PrecalcAtanyx + fmod(T(rand.Rand()), T(1 / m_ReInv)) * M_2PI;
T lnmod = m_Dist * T(0.5) * std::log(helper.m_PrecalcSumSquares);
T arg = helper.m_PrecalcAtanyx + fmod(T(rand.Rand()), m_OneOverReInv) * M_2PI;
T lnmod = m_HalfDist * std::log(helper.m_PrecalcSumSquares);
T temp = arg * m_ReInv + lnmod * m_Im100;
T mod2 = std::exp(lnmod * m_ReInv - arg * m_Im100);
helper.Out.x = m_Weight * mod2 * std::cos(temp);
@ -5308,14 +5308,16 @@ public:
ss2 << "_" << XformIndexInEmber() << "]";
string index = ss2.str();
string weight = WeightDefineString();
string re = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string im = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string dist = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string reInv = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string im100 = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string re = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string im = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string dist = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string halfDist = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string reInv = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string oneOverReInv = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string im100 = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
ss << "\t{\n"
<< "\t\treal_t arg = fma(fmod((real_t)MwcNext(mwc), (real_t)((real_t)(1.0) / " << reInv << ")), M_2PI, precalcAtanyx);\n"
<< "\t\treal_t lnmod = " << dist << " * (real_t)(0.5) * log(precalcSumSquares);\n"
<< "\t\treal_t arg = fma(fmod((real_t)MwcNext(mwc), (real_t)(" << oneOverReInv << ")), M_2PI, precalcAtanyx);\n"
<< "\t\treal_t lnmod = " << halfDist << " * log(precalcSumSquares);\n"
<< "\t\treal_t temp = fma(arg, " << reInv << ", lnmod * " << im100 << ");\n"
<< "\t\treal_t mod2 = exp(fma(lnmod, " << reInv << ", -(arg * " << im100 << ")));\n"
<< "\n"
@ -5328,7 +5330,9 @@ public:
virtual void Precalc() override
{
m_HalfDist = m_Dist * T(0.5);
m_ReInv = 1 / Zeps(m_Re);
m_OneOverReInv = 1 / m_ReInv;
m_Im100 = m_Im * T(0.01);
}
@ -5340,15 +5344,19 @@ protected:
m_Params.push_back(ParamWithName<T>(&m_Re, prefix + "Juliac_re", 2));
m_Params.push_back(ParamWithName<T>(&m_Im, prefix + "Juliac_im", 1));
m_Params.push_back(ParamWithName<T>(&m_Dist, prefix + "Juliac_dist", 1));
m_Params.push_back(ParamWithName<T>(true, &m_ReInv, prefix + "Juliac_re_inv"));
m_Params.push_back(ParamWithName<T>(true, &m_Im100, prefix + "Juliac_im100"));
m_Params.push_back(ParamWithName<T>(true, &m_HalfDist, prefix + "Juliac_half_dist"));
m_Params.push_back(ParamWithName<T>(true, &m_ReInv, prefix + "Juliac_re_inv"));
m_Params.push_back(ParamWithName<T>(true, &m_OneOverReInv, prefix + "Juliac_one_over_re_inv"));
m_Params.push_back(ParamWithName<T>(true, &m_Im100, prefix + "Juliac_im100"));
}
private:
T m_Re;
T m_Im;
T m_Dist;
T m_HalfDist;
T m_ReInv;
T m_OneOverReInv;
T m_Im100;
};

View File

@ -1275,7 +1275,7 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T temp = helper.m_PrecalcAtanyx * m_InvPower + rand.Rand() * m_InvPower2pi;
T temp = helper.m_PrecalcAtanyx * m_InvPower + rand.Crand() * m_InvPower2pi;
T sina = std::sin(temp);
T cosa = std::cos(temp);
T z = helper.In.z * m_AbsInvPower;
@ -1301,7 +1301,7 @@ public:
string halfInvPower = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
string invPower2pi = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
ss << "\t{\n"
<< "\t\treal_t temp = fma(precalcAtanyx, " << invPower << ", MwcNext(mwc) * " << invPower2pi << ");\n"
<< "\t\treal_t temp = fma(precalcAtanyx, " << invPower << ", MwcNextCrand(mwc) * " << invPower2pi << ");\n"
<< "\t\treal_t sina = sin(temp);\n"
<< "\t\treal_t cosa = cos(temp);\n"
<< "\t\treal_t z = vIn.z * " << absInvPower << ";\n"
@ -3198,7 +3198,7 @@ public:
<< "\t\t{\n"//InverseTrilinear function extracted out here.
<< "\t\t real_t inx = fma(alpha - " << radius << ", " << cosC << ", beta - " << radius << ") / " << sinC << ";\n"
<< "\t\t real_t iny = alpha - " << radius << ";\n"
<< "\t\t real_t angle = fma(M_2PI, (real_t)MwcNextRange(mwc, (int)" << absN << "), atan2(iny, inx)) / " << power << ";\n"
<< "\t\t real_t angle = fma(M_2PI, (real_t)MwcNextRange(mwc, (uint)" << absN << "), atan2(iny, inx)) / " << power << ";\n"
<< "\t\t real_t r = " << weight << " * pow(fma(inx, inx, SQR(iny)), " << cn << ");\n"
<< "\n"
<< "\t\t x = r * cos(angle);\n"
@ -3434,7 +3434,7 @@ private:
{
T inx = (be - m_Radius + (al - m_Radius) * m_CosC) / m_SinC;
T iny = al - m_Radius;
T angle = (std::atan2(iny, inx) + M_2PI * (rand.Rand(int(m_AbsN)))) / m_Power;
T angle = (std::atan2(iny, inx) + M_2PI * (rand.Rand(size_t(m_AbsN)))) / m_Power;
T r = m_Weight * std::pow(SQR(inx) + SQR(iny), m_Cn);
x = r * std::cos(angle);
y = r * std::sin(angle);
@ -4037,7 +4037,7 @@ public:
<< "\n"
<< "\t\tif (" << rswtch << ")\n"
<< "\t\t{\n"
<< "\t\t uint loc = MwcNextRange(mwc, 6);\n"
<< "\t\t uint loc = MwcNextRange(mwc, 6u);\n"
<< "\t\t tempx = parVars[" << seg60xStartIndex << " + loc];\n"
<< "\t\t tempy = parVars[" << seg60yStartIndex << " + loc];\n"
<< "\t\t scale3 = 1;\n"
@ -4045,7 +4045,7 @@ public:
<< "\t\t}\n"
<< "\t\telse\n"
<< "\t\t{\n"
<< "\t\t uint loc = MwcNextRange(mwc, 3);\n"
<< "\t\t uint loc = MwcNextRange(mwc, 3u);\n"
<< "\t\t tempx = parVars[" << seg120xStartIndex << " + loc];\n"
<< "\t\t tempy = parVars[" << seg120yStartIndex << " + loc];\n"
<< "\t\t scale3 = " << side3 << ";\n"

View File

@ -859,7 +859,7 @@ private:
if (params.ExactCalc == 1)
angXY = rand.Frand01<T>() * M_2PI;
else
angXY = (std::atan(params.ArcTan1 * (rand.Frand01<T>() - T(0.5))) / params.ArcTan2 + T(0.5) + T(rand.Rand(glm::uint(params.NumEdges)))) * params.MidAngle;
angXY = (std::atan(params.ArcTan1 * (rand.Frand01<T>() - T(0.5))) / params.ArcTan2 + T(0.5) + T(rand.Rand(size_t(params.NumEdges)))) * params.MidAngle;
sincos(angXY, &params.X, &params.Y);
angMem = angXY;

View File

@ -919,7 +919,7 @@ public:
<< "\n"
<< "\t\tfor (i = 0; i < 7; i++)\n"
<< "\t\t{\n"
<< "\t\t adp = MwcNextRange(mwc, 10) - 5;\n"
<< "\t\t adp = MwcNextRange(mwc, 10u) - 5;\n"
<< "\n"
<< "\t\t if (abs(adp) >= 3)\n"
<< "\t\t adp = 0;\n"
@ -1108,7 +1108,7 @@ public:
T temp, x = helper.In.x / m_Width;
bool pos = x > 0;
if (std::cos((pos ? x - (int)x : x + (int)x) * T(M_PI)) < rand.Frand01<T>() * 2 - 1)
if (std::cos((pos ? x - (int)x : x + (int)x) * T(M_PI)) < rand.Frand11<T>())
temp = pos ? -m_Vwidth : m_Vwidth;
else
temp = 0;
@ -1131,7 +1131,7 @@ public:
<< "\t\treal_t temp, x = vIn.x / Zeps(" << width << ");\n"
<< "\t\tbool pos = x > 0;\n"
<< "\n"
<< "\t\tif (cos((pos ? x - (int)x : x + (int)x) * MPI) < MwcNext01(mwc) * 2 - 1)\n"
<< "\t\tif (cos((pos ? x - (int)x : x + (int)x) * MPI) < MwcNextNeg1Pos1(mwc))\n"
<< "\t\t temp = pos ? -" << vwidth << " : " << vwidth << ";\n"
<< "\t\telse\n"
<< "\t\t temp = 0;\n"
@ -1183,7 +1183,7 @@ public:
virtual void Func(IteratorHelper<T>& helper, Point<T>& outPoint, QTIsaac<ISAAC_SIZE, ISAAC_INT>& rand) override
{
T temp = Round(std::log(rand.Frand01<T>()) * (rand.Rand() & 1 ? m_Spread : -m_Spread));
T temp = Round(std::log(rand.Frand01<T>()) * (rand.RandBit() ? m_Spread : -m_Spread));
helper.Out.x = m_Weight * (helper.In.x + temp);
helper.Out.y = m_Weight * helper.In.y;
helper.Out.z = DefaultZ(helper);
@ -2349,7 +2349,7 @@ public:
a += (rand.RandBit() ? M_2PI : -M_2PI) * std::round(std::log(rand.Frand01<T>()) * m_Coeff);
T lnr2 = std::log(helper.m_PrecalcSumSquares);
T r = m_Weight * std::exp(m_HalfC * lnr2 - m_PrecalcD * a);
T temp = m_PrecalcC * a + m_HalfD * lnr2 + m_Ang * rand.Rand();
T temp = m_PrecalcC * a + m_HalfD * lnr2 + m_Ang * rand.Crand();
helper.Out.x = r * std::cos(temp);
helper.Out.y = r * std::sin(temp);
helper.Out.z = DefaultZ(helper);
@ -2383,7 +2383,7 @@ public:
<< "\t\ta += ((MwcNext(mwc) & 1) ? M_2PI : -M_2PI) * round(log(MwcNext01(mwc)) * " << coeff << ");\n"
<< "\t\treal_t lnr2 = log(precalcSumSquares);\n"
<< "\t\treal_t r = " << weight << " * exp(fma(" << halfc << ", lnr2, -(" << precalcd << " * a)));\n"
<< "\t\treal_t temp = fma(" << precalcc << ", a, fma(" << halfd << ", lnr2, " << ang << " * MwcNext(mwc)));\n"
<< "\t\treal_t temp = fma(" << precalcc << ", a, fma(" << halfd << ", lnr2, " << ang << " * MwcNextCrand(mwc)));\n"
<< "\t\tvOut.x = r * cos(temp);\n"
<< "\t\tvOut.y = r * sin(temp);\n"
<< "\t\tvOut.z = " << DefaultZCl()
@ -7724,11 +7724,6 @@ public:
m_PowerhelperPrecalc = T(1.0) / std::abs(m_Power);
}
virtual vector<string> OpenCLGlobalFuncNames() const override
{
return vector<string> { "Zeps" };
}
protected:
void Init()
{

View File

@ -282,8 +282,8 @@ public:
<< "\t\treal_t x, y, z;\n"
<< "\t\treal_t p = 2 * MwcNext01(mwc) - 1;\n"
<< "\t\treal_t q = 2 * MwcNext01(mwc) - 1;\n"
<< "\t\tuint i = MwcNextRange(mwc, 3);\n"
<< "\t\tuint j = MwcNext(mwc) & 1;\n"
<< "\t\tuint i = MwcNextRange(mwc, 3u);\n"
<< "\t\tuint j = MwcNext(mwc) & 1u;\n"
<< "\n"
<< "\t\tswitch (i)\n"
<< "\t\t{\n"

View File

@ -250,7 +250,11 @@ XmlToEmber<T>::XmlToEmber()
{ "mode", "unicorngaloshen_mode" },
{ "d_spher_weight", "d_spherical_weight" },
{ "poincare_p", "poincare2_p" },
{ "poincare_q", "poincare2_q" }
{ "poincare_q", "poincare2_q" },
{ "phoenix_power", "phoenix_julia_power"},
{ "phoenix_dist", "phoenix_julia_dist" },
{ "x_distort", "phoenix_julia_x_distort"},
{ "y_distort", "phoenix_julia_y_distort" }
};
m_FlattenNames =
{

View File

@ -3,6 +3,16 @@
namespace EmberCLns
{
/// <summary>
/// Empty constructor.
/// This is needed because this class will need to be empty before it's initialized in RendererCL
/// with specific arguments.
/// </summary>
DEOpenCLKernelCreator::DEOpenCLKernelCreator()
{
}
/// <summary>
/// Constructor that sets all kernel entry points as well as composes
/// all kernel source strings.
@ -330,77 +340,81 @@ string DEOpenCLKernelCreator::CreateGaussianDEKernel(size_t ss)
"\n"
" barrier(CLK_LOCAL_MEM_FENCE);\n"
"\n"
" if (threadHistRow < botBound && threadHistCol < rightBound)\n"
" {\n"
" if (threadHistRow < botBound && threadHistCol < rightBound)\n"//This is done to avoid putting barriers inside of conidtionals.
" bucket = histogram[(threadHistRow * densityFilter->m_SuperRasW) + threadHistCol];\n"
" else\n"
" bucket = 0.0;\n"
"\n"
" if (bucket.w != 0)\n"
" {\n"
" cacheLog = (densityFilter->m_K1 * log((real_bucket_t)fma(bucket.w, densityFilter->m_K2, (real_bucket_t)1.0))) / bucket.w;\n";
" if (bucket.w != 0)\n"//This is done to avoid putting barriers inside of conidtionals.
" {\n"
" cacheLog = (densityFilter->m_K1 * log((real_bucket_t)fma(bucket.w, densityFilter->m_K2, (real_bucket_t)1.0))) / bucket.w;\n";
if (doSS)
{
os <<
" filterSelect = 0;\n"
" densityBoxLeftX = threadHistCol - min(threadHistCol, ss);\n"
" densityBoxRightX = threadHistCol + min(ss, (densityFilter->m_SuperRasW - threadHistCol) - 1);\n"
" densityBoxTopY = threadHistRow - min(threadHistRow, ss);\n"
" densityBoxBottomY = threadHistRow + min(ss, (densityFilter->m_SuperRasH - threadHistRow) - 1);\n"
" filterSelect = 0;\n"
" densityBoxLeftX = threadHistCol - min(threadHistCol, ss);\n"
" densityBoxRightX = threadHistCol + min(ss, (densityFilter->m_SuperRasW - threadHistCol) - 1);\n"
" densityBoxTopY = threadHistRow - min(threadHistRow, ss);\n"
" densityBoxBottomY = threadHistRow + min(ss, (densityFilter->m_SuperRasH - threadHistRow) - 1);\n"
"\n"
" for (j = densityBoxTopY; j <= densityBoxBottomY; j++)\n"
" for (j = densityBoxTopY; j <= densityBoxBottomY; j++)\n"
" {\n"
" for (i = densityBoxLeftX; i <= densityBoxRightX; i++)\n"
" {\n"
" for (i = densityBoxLeftX; i <= densityBoxRightX; i++)\n"
" {\n"
" filterSelect += histogram[i + (j * densityFilter->m_SuperRasW)].w;\n"
" }\n"
" filterSelect += histogram[i + (j * densityFilter->m_SuperRasW)].w;\n"
" }\n"
" }\n"
"\n";
if (doScf)
os <<
" filterSelect *= scfact;\n";
" filterSelect *= scfact;\n";
}
else
{
os <<
" filterSelect = bucket.w;\n";
" filterSelect = bucket.w;\n";
}
os <<
" }\n"
" else\n"
" {\n"
" cacheLog = 0.0;\n"
" filterSelect = 1.0;\n"//Will subtract 1 to be 0 below.
" }\n"
"\n"
" if (filterSelect > densityFilter->m_MaxFilteredCounts)\n"
" filterSelectInt = densityFilter->m_MaxFilterIndex;\n"
" else if (filterSelect <= DE_THRESH)\n"
" filterSelectInt = (int)ceil(filterSelect) - 1;\n"
" else\n"
" filterSelectInt = (int)DE_THRESH + (int)floor(pow((real_bucket_t)(filterSelect - DE_THRESH), densityFilter->m_Curve));\n"
" if (filterSelect > densityFilter->m_MaxFilteredCounts)\n"
" filterSelectInt = densityFilter->m_MaxFilterIndex;\n"
" else if (filterSelect <= DE_THRESH)\n"
" filterSelectInt = (int)ceil(filterSelect) - 1;\n"
" else\n"
" filterSelectInt = (int)DE_THRESH + (int)floor(pow((real_bucket_t)(filterSelect - DE_THRESH), densityFilter->m_Curve));\n"
"\n"
" if (filterSelectInt > densityFilter->m_MaxFilterIndex)\n"
" filterSelectInt = densityFilter->m_MaxFilterIndex;\n"
" if (filterSelectInt > densityFilter->m_MaxFilterIndex)\n"
" filterSelectInt = densityFilter->m_MaxFilterIndex;\n"
"\n"
" filterCoefIndex = filterSelectInt * densityFilter->m_KernelSize;\n"
" filterCoefIndex = filterSelectInt * densityFilter->m_KernelSize;\n"
"\n"
//With this new method, only accumulate to the temp local buffer first. Write to the final accumulator last.
//For each loop through, note that there is a local memory barrier call inside of each call to AddToAccumNoCheck().
//If this isn't done, pixel errors occurr and even an out of resources error occurrs because too many writes are done to the same place in memory at once.
" k = (int)densityFilter->m_FilterWidth;\n"//Need a signed int to use below, really is filter width, but reusing a variable to save space.
" k = (int)densityFilter->m_FilterWidth;\n"//Need a signed int to use below, really is filter width, but reusing a variable to save space.
"\n"
" for (j = -k; j <= k; j++)\n"
" for (j = -k; j <= k; j++)\n"
" {\n"
" for (i = -k; i <= k; i++)\n"
" {\n"
" filterSelectInt = filterCoefIndex + coefIndices[(abs(j) * (densityFilter->m_FilterWidth + 1)) + abs(i)];\n"//Really is filterCoeffIndexPlusOffset, but reusing a variable to save space.
"\n"
" if (filterCoefs[filterSelectInt] != 0)\n"//This conditional actually improves speed, despite SIMT being bad at conditionals.
" {\n"
" for (i = -k; i <= k; i++)\n"
" {\n"
" filterSelectInt = filterCoefIndex + coefIndices[(abs(j) * (densityFilter->m_FilterWidth + 1)) + abs(i)];\n"//Really is filterCoeffIndexPlusOffset, but reusing a variable to save space.
"\n"
" if (filterCoefs[filterSelectInt] != 0)\n"//This conditional actually improves speed, despite SIMT being bad at conditionals.
" {\n"
" filterBox[(i + boxCol) + ((j + boxRow) * fullTempBoxWidth)].m_Real4 += (bucket * (filterCoefs[filterSelectInt] * cacheLog));\n"
" }\n"
" }\n"
" barrier(CLK_LOCAL_MEM_FENCE);\n"//If this is the only barrier and the block size is exactly 16, it works perfectly. Otherwise, no chunks occur, but a many streaks.
" filterBox[(i + boxCol) + ((j + boxRow) * fullTempBoxWidth)].m_Real4 += (bucket * (filterCoefs[filterSelectInt] * cacheLog));\n"
" }\n"
" }\n"//bucket.w != 0.
" }\n"//In bounds.
"\n"
" }\n"
" barrier(CLK_LOCAL_MEM_FENCE);\n"//If this is the only barrier and the block size is exactly 16, it works perfectly. Otherwise, no chunks occur, but a many streaks.
" }\n"
"\n"
" barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);\n"
"\n"

View File

@ -29,6 +29,7 @@ namespace EmberCLns
class EMBERCL_API DEOpenCLKernelCreator
{
public:
DEOpenCLKernelCreator();
DEOpenCLKernelCreator(bool doublePrecision, bool nVidia);
//Accessors.

View File

@ -184,6 +184,11 @@ static const char* RandFunctionString =
" return MwcNext(s) * (real_t)(1.0 / 4294967296.0);\n"
"}\n"
"\n"
"inline uint MwcNextCrand(uint2* s)\n"
"{\n"
" return MwcNextRange(s, 32767u);\n"
"}\n"
"\n"
"inline real_t MwcNextFRange(uint2* s, real_t lower, real_t upper)\n"
"{\n"
" real_t f = (real_t)MwcNext(s) / (real_t)UINT_MAX;\n"

View File

@ -56,7 +56,7 @@ using namespace EmberNs;
//This special define is made to fix buggy OpenCL compilers on Mac.
//Rendering is much slower there for unknown reasons. Michel traced it down
//to the consec variable which keeps track of how many tries are needed to computer
//to the consec variable which keeps track of how many tries are needed to compute
//a point which is not a bad value. Strangely, keeping this as a local variable
//is slower than keeping it as an element in a global array.
//This is counterintuitive, and lends further weight to the idea that OpenCL on Mac

View File

@ -26,7 +26,6 @@ template <typename T, typename bucketT>
RendererCL<T, bucketT>::RendererCL(const vector<pair<size_t, size_t>>& devices, bool shared, GLuint outputTexID)
:
m_IterOpenCLKernelCreator(),
m_DEOpenCLKernelCreator(typeid(T) == typeid(double), false),
m_FinalAccumOpenCLKernelCreator(typeid(T) == typeid(double))
{
m_PaletteFormat.image_channel_order = CL_RGBA;
@ -114,7 +113,7 @@ bool RendererCL<T, bucketT>::Init(const vector<pair<size_t, size_t>>& devices, b
if (b && (m_Devices.size() == devices.size()))
{
auto& firstWrapper = m_Devices[0]->m_Wrapper;
m_DEOpenCLKernelCreator = DEOpenCLKernelCreator(m_DoublePrecision, m_Devices[0]->Nvidia());
m_DEOpenCLKernelCreator = DEOpenCLKernelCreator(m_DoublePrecision, m_Devices[0]->Nvidia());//This will cause it to be created a second time, because it was already done once in the constructor.
//Build a simple program to ensure OpenCL is working right.
if (b && !(b = firstWrapper.AddProgram(m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DEOpenCLKernelCreator.LogScaleAssignDEKernel(), m_DEOpenCLKernelCreator.LogScaleAssignDEEntryPoint(), m_DoublePrecision))) { ErrorStr(loc, "failed to init log scale program", m_Devices[0].get()); }

View File

@ -240,7 +240,7 @@ private:
string m_FinalImageName = "Final";
string m_PointsBufferName = "Points";
#ifdef KNL_USE_GLOBAL_CONSEC
string m_ConsecBufferName = "Consec";
string m_ConsecBufferName = "Consec";
#endif
string m_VarStateBufferName = "VarState";