diff --git a/Source/Ember/Affine2D.cpp b/Source/Ember/Affine2D.cpp index 3941926..c9f8f61 100644 --- a/Source/Ember/Affine2D.cpp +++ b/Source/Ember/Affine2D.cpp @@ -91,7 +91,7 @@ Affine2D& Affine2D::operator = (const Affine2D& affine) /// The Affine2D to compare to /// True if all fields are equal, else false template -bool Affine2D::operator == (const Affine2D& affine) +bool Affine2D::operator == (const Affine2D& affine) const { return IsClose(A(), affine.A()) && IsClose(B(), affine.B()) && @@ -107,7 +107,7 @@ bool Affine2D::operator == (const Affine2D& affine) /// The vec2 to multiply by /// A new vec2 which is the product of the multiplication template -typename v2T Affine2D::operator * (const v2T& v) +typename v2T Affine2D::operator * (const v2T& v) const { return TransformVector(v); } @@ -118,7 +118,7 @@ typename v2T Affine2D::operator * (const v2T& v) /// The amount to scale by /// A new Affine2D which a scaled copy of this instance template -Affine2D Affine2D:: operator * (const T& t) +Affine2D Affine2D:: operator * (T t) const { return Affine2D(A() * t, D() * t, diff --git a/Source/Ember/Affine2D.h b/Source/Ember/Affine2D.h index b8be124..9b1e937 100644 --- a/Source/Ember/Affine2D.h +++ b/Source/Ember/Affine2D.h @@ -67,9 +67,9 @@ public: return *this; } - bool operator == (const Affine2D& affine); - v2T operator * (const v2T& v); - Affine2D operator * (const T& t); + bool operator == (const Affine2D& affine) const; + v2T operator * (const v2T& v) const; + Affine2D operator * (T t) const; void MakeID(); bool IsID() const; diff --git a/Source/Ember/Ember.h b/Source/Ember/Ember.h index 1ae42a4..08ab7ac 100644 --- a/Source/Ember/Ember.h +++ b/Source/Ember/Ember.h @@ -375,7 +375,7 @@ public: /// /// A pointer to the xform to find /// The index of the matched xform if found, else -1. - intmax_t GetXformIndex(Xform* xform) const + intmax_t GetXformIndex(const Xform* xform) const { intmax_t index = -1; @@ -392,7 +392,7 @@ public: /// A pointer to the xform to find /// If true, return the index of the final xform when its pointer is passed, even if a final is not present. Default: false. /// The index of the matched xform if found, else -1. - intmax_t GetTotalXformIndex(Xform* xform, bool forceFinal = false) const + intmax_t GetTotalXformIndex(const Xform* xform, bool forceFinal = false) const { size_t totalXformCount = TotalXformCount(forceFinal); @@ -427,7 +427,7 @@ public: /// /// A pointer to the xform to test /// True if matched, else false. - bool IsFinalXform(Xform* xform) const + bool IsFinalXform(const Xform* xform) const { return &m_FinalXform == xform; } @@ -640,6 +640,22 @@ public: } } + /// + /// Compute the total number of state fields within all variations of all xforms. + /// + /// The number of state fields + size_t GetVariationStateParamCount() const + { + size_t count = 0, i = 0, j = 0; + + while (auto xform = GetTotalXform(i++)) + for (j = 0; j < xform->TotalVariationCount(); j++) + if (auto var = xform->GetVariation(j)) + count += var->StateParamCount(); + + return count; + } + /// /// Flatten all xforms by adding a flatten variation if none is present, and if none of the /// variations or parameters in the vector are not present. diff --git a/Source/Ember/Variation.h b/Source/Ember/Variation.h index 0bc522b..5c1e4c0 100644 --- a/Source/Ember/Variation.h +++ b/Source/Ember/Variation.h @@ -1544,6 +1544,15 @@ public: return ""; } + /// + /// Initialize the state variables contained in the passed in array. + /// + /// The pointer to the state variables. + /// The offset in the pointer where the data begins. + virtual void InitStateVars(T* t, size_t& index) + { + } + /// /// Returns an OpenCL string for the initialization of the fields in this variation /// that change during iterations. @@ -1693,6 +1702,7 @@ public: void ParentXform(Xform* xform) { m_Xform = xform; } intmax_t IndexInXform() const { return m_Xform ? m_Xform->GetVariationIndex(const_cast*>(this)) : -1; } intmax_t XformIndexInEmber() const { return m_Xform ? m_Xform->IndexInParentEmber() : -1; } + virtual size_t StateParamCount() const { return 0; } T m_Weight;//The weight of the variation. @@ -2058,6 +2068,7 @@ private: using Variation::Prefix; \ using Variation::Precalc; \ using Variation::StateOpenCLString; \ + using Variation::InitStateVars; \ using Variation::WeightDefineString; \ using Variation::DefaultZ; \ using Variation::DefaultZCl; @@ -2283,7 +2294,7 @@ public: /// Note these are different than regular variation parameters, /// and thus require a completely different solution. /// - /// + /// The OpenCL string for the state variables virtual string StateOpenCLString() const override { ostringstream os, os2; @@ -2301,6 +2312,43 @@ public: return os.str(); } + /// + /// Returns the number of state variables present for this variation. + /// + /// The number of state variables + virtual size_t StateParamCount() const override + { + size_t count = 0; + + for (auto& param : m_Params) + { + if (param.IsState()) + { + count++; + } + } + + return count; + } + + /// + /// Initialize the state variables contained in the passed in array. + /// This is meant to be used only with OpenCL to initialize a state struct for every thread before + /// starting iteration. + /// + /// The pointer to the state variables. + /// The offset in the pointer where the data begins. + virtual void InitStateVars(T* t, size_t& index) override + { + for (auto& param : m_Params) + { + if (param.IsState()) + { + t[index++] = param.ParamVal(); + } + } + } + /// /// Return the name, weight and parameters of the variation as a string. /// diff --git a/Source/Ember/Variations05.h b/Source/Ember/Variations05.h index 3b8b37b..8a0e6c3 100644 --- a/Source/Ember/Variations05.h +++ b/Source/Ember/Variations05.h @@ -311,13 +311,13 @@ public: ostringstream ss, ss2; intmax_t i = 0, varIndex = IndexInXform(); ss2 << "_" << XformIndexInEmber() << "]"; - string index = ss2.str(); + string index = ss2.str(); string weight = WeightDefineString(); - string sc = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string dens = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string x = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string y = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string seed = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string sc = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string dens = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string x = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string y = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string seed = "parVars[" + ToUpper(m_Params[i++].Name()) + index; ss << "\t{\n" << "\t\tint m, n, iters = 0;\n" << "\t\treal_t x, y, u;\n" @@ -369,10 +369,10 @@ protected: { string prefix = Prefix(); m_Params.clear(); - m_Params.push_back(ParamWithName(&m_Sc, prefix + "CircleRand_Sc", 1, eParamType::REAL_NONZERO)); + m_Params.push_back(ParamWithName(&m_Sc, prefix + "CircleRand_Sc", 1, eParamType::REAL_NONZERO)); m_Params.push_back(ParamWithName(&m_Dens, prefix + "CircleRand_Dens", T(0.5))); - m_Params.push_back(ParamWithName(&m_X, prefix + "CircleRand_X", 10)); - m_Params.push_back(ParamWithName(&m_Y, prefix + "CircleRand_Y", 10)); + m_Params.push_back(ParamWithName(&m_X, prefix + "CircleRand_X", 10)); + m_Params.push_back(ParamWithName(&m_Y, prefix + "CircleRand_Y", 10)); m_Params.push_back(ParamWithName(&m_Seed, prefix + "CircleRand_Seed", 0, eParamType::INTEGER)); } @@ -1070,7 +1070,7 @@ public: virtual void Func(IteratorHelper& helper, Point& outPoint, QTIsaac& rand) override { T expx = std::exp(helper.In.x) * T(0.5); - T expnx = T(0.25) / expx; + T expnx = T(0.25) / Zeps(expx); T boot = helper.In.z == 0 ? helper.m_PrecalcAtanyx : helper.In.z; T tmp = m_Weight / Zeps(expx + expnx - (std::cos(helper.In.y) * std::cos(boot))); helper.Out.x = (expx - expnx) * tmp; @@ -1085,7 +1085,7 @@ public: string weight = WeightDefineString(); ss << "\t{\n" << "\t\treal_t expx = exp(vIn.x) * (real_t)(0.5);\n" - << "\t\treal_t expnx = (real_t)(0.25) / expx;\n" + << "\t\treal_t expnx = (real_t)(0.25) / Zeps(expx);\n" << "\t\treal_t boot = vIn.z == 0 ? precalcAtanyx : vIn.z;\n" << "\t\treal_t tmp = " << weight << " / Zeps(expx + expnx - (cos(vIn.y) * cos(boot)));\n" << "\n" @@ -3593,19 +3593,16 @@ public: if (m_FCycle > 5) { m_FCycle = 0; - m_RSwtch = std::trunc(rand.Frand01() * 3);//Chooses 6 or 3 nodes. + m_RSwtch = T(rand.RandBit());//Chooses 6 or 3 nodes. } if (m_BCycle > 2) { m_BCycle = 0; - m_RSwtch = std::trunc(rand.Frand01() * 3);//Chooses 6 or 3 nodes. + m_RSwtch = T(rand.RandBit());//Chooses 6 or 3 nodes. } - int posNeg = 1; - int loc; T tempx, tempy; - T lrmaj = m_Weight;//Sets hexagon length radius - major plane. T boost = 1;//Boost is the separation distance between the two planes. T sumX, sumY; @@ -3622,48 +3619,33 @@ public: sumY = helper.In.y; } - if (rand.Frand01() < T(0.5)) - posNeg = -1; - - //Determine whether one or two major planes. - int majplane = 1; - T abmajp = std::abs(m_MajP); - - if (abmajp <= 1) - { - majplane = 1;//Want either 1 or 2. - } - else - { - majplane = 2; - boost = (abmajp - 1) * T(0.5);//Distance above and below XY plane. - } + int posNeg = rand.RandBit() ? -1 : 1; //Creating Z factors relative to the planes. These will be added, whereas x and y will be assigned. //Original does += z *, so using z on the right side of = is intentional. - if (majplane == 2) - helper.Out.z = helper.In.z * T(0.5) * m_ZLift + (posNeg * boost); + if (m_MajPlane == 2) + helper.Out.z = helper.In.z * T(0.5) * m_ZLift + (posNeg * m_Boost); else helper.Out.z = helper.In.z * T(0.5) * m_ZLift; //Work out the segments and hexagonal nodes. - if (m_RSwtch <= 1)//Occasion to build using 60 degree segments. + if (m_RSwtch)//Occasion to build using 60 degree segments. { - loc = int(m_FCycle);//Sequential nodes selection. + int loc = int(m_FCycle);//Sequential nodes selection. tempx = m_Seg60[loc].x; tempy = m_Seg60[loc].y; m_FCycle++; } else//Occasion to build on 120 degree segments. { - loc = int(m_BCycle);//Sequential nodes selection. + int loc = int(m_BCycle);//Sequential nodes selection. tempx = m_Seg120[loc].x; tempy = m_Seg120[loc].y; m_BCycle++; } - helper.Out.x = ((sumX + helper.In.x) * m_HalfScale) + (lrmaj * tempx); - helper.Out.y = ((sumY + helper.In.y) * m_HalfScale) + (lrmaj * tempy); + helper.Out.x = ((sumX + helper.In.x) * m_HalfScale) + (m_Weight * tempx); + helper.Out.y = ((sumY + helper.In.y) * m_HalfScale) + (m_Weight * tempy); } virtual string OpenCLString() const override @@ -3677,6 +3659,8 @@ public: string majp = "parVars[" + ToUpper(m_Params[i++].Name()) + index; string scale = "parVars[" + ToUpper(m_Params[i++].Name()) + index; string zlift = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string majplane = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string boost = "parVars[" + ToUpper(m_Params[i++].Name()) + index; string seg60xStartIndex = ToUpper(m_Params[i].Name()) + stateIndex; i += 6;//Precalc. string seg60yStartIndex = ToUpper(m_Params[i].Name()) + stateIndex; i += 6; string seg120xStartIndex = ToUpper(m_Params[i].Name()) + stateIndex; i += 3; @@ -3689,20 +3673,16 @@ public: << "\t\tif (" << fcycle << " > 5)\n" << "\t\t{\n" << "\t\t " << fcycle << " = 0;\n" - << "\t\t " << rswtch << " = trunc(MwcNext01(mwc) * 3.0);\n" + << "\t\t " << rswtch << " = (real_t)(MwcNext(mwc) & 1);\n" << "\t\t}\n" << "\n" << "\t\tif (" << bcycle << " > 2)\n" << "\t\t{\n" << "\t\t " << bcycle << " = 0;\n" - << "\t\t " << rswtch << " = trunc(MwcNext01(mwc) * 3.0);\n" + << "\t\t " << rswtch << " = (real_t)(MwcNext(mwc) & 1);\n" << "\t\t}\n" << "\t\t\n" - << "\t\tint posNeg = 1;\n" - << "\t\tint loc;\n" << "\t\treal_t tempx, tempy;\n" - << "\t\treal_t lrmaj = " << weight << ";\n" - << "\t\treal_t boost = 1;\n" << "\t\treal_t sumX, sumY;\n\n"; if (m_VarType == eVariationType::VARTYPE_REG) @@ -3721,61 +3701,40 @@ public: } ss - << "\t\t\n" - << "\t\tif (MwcNext01(mwc) < 0.5)\n" - << "\t\t posNeg = -1;\n" << "\n" - << "\t\tint majplane = 1;\n" - << "\t\treal_t abmajp = fabs(" << majp << ");\n" + << "\t\tint posNeg = (MwcNext(mwc) & 1) ? -1 : 1;\n" << "\n" - << "\t\tif (abmajp <= 1)\n" - << "\t\t{\n" - << "\t\t majplane = 1;\n" - << "\t\t}\n" - << "\t\telse\n" - << "\t\t{\n" - << "\t\t majplane = 2;\n" - << "\t\t boost = (abmajp - 1) * 0.5;\n" - << "\t\t}\n" - << "\n" - << "\t\tif (majplane == 2)\n" - << "\t\t vOut.z = fma(vIn.z * (real_t)(0.5), " << zlift << ", (posNeg * boost));\n" + << "\t\tif (" << majplane << " == 2)\n" + << "\t\t vOut.z = fma(vIn.z * (real_t)(0.5), " << zlift << ", (posNeg * " << boost << "));\n" << "\t\telse\n" << "\t\t vOut.z = vIn.z * 0.5 * " << zlift << ";\n" << "\n" - << "\t\tif (" << rswtch << " <= 1)\n" + << "\t\tif (" << rswtch << ")\n" << "\t\t{\n" - << "\t\t loc = (int)" << fcycle << ";\n" + << "\t\t int loc = (int)" << fcycle << ";\n" << "\t\t tempx = parVars[" << seg60xStartIndex << " + loc];\n" << "\t\t tempy = parVars[" << seg60yStartIndex << " + loc];\n" << "\t\t " << fcycle << " = " << fcycle << " + 1;\n" << "\t\t}\n" << "\t\telse\n" << "\t\t{\n" - << "\t\t loc = (int)" << bcycle << ";\n" + << "\t\t int loc = (int)" << bcycle << ";\n" << "\t\t tempx = parVars[" << seg120xStartIndex << " + loc];\n" << "\t\t tempy = parVars[" << seg120yStartIndex << " + loc];\n" << "\t\t " << bcycle << " = " << bcycle << " + 1;\n" << "\t\t}\n" << "\n" - << "\t\tvOut.x = fma((sumX + vIn.x), " << halfScale << ", (lrmaj * tempx));\n" - << "\t\tvOut.y = fma((sumY + vIn.y), " << halfScale << ", (lrmaj * tempy));\n" + << "\t\tvOut.x = fma(sumX + vIn.x, " << halfScale << ", " << weight << " * tempx);\n" + << "\t\tvOut.y = fma(sumY + vIn.y, " << halfScale << ", " << weight << " * tempy);\n" << "\t}\n"; return ss.str(); } - virtual string StateInitOpenCLString() const override + virtual void InitStateVars(T* t, size_t& index) override { - ostringstream ss, ss2; - ss2 << "_" << XformIndexInEmber(); - string stateIndex = ss2.str(); - string prefix = Prefix(); - //CPU sets fycle and bcycle to 0 at the beginning in Precalc(). - //Set to random in OpenCL since a value can't be set once and kept between kernel launches without writing it back to an OpenCL buffer. - ss << "\n\tvarState." << prefix << "hexaplay3D_rswtch" << stateIndex << " = trunc(MwcNext01(&mwc) * 3.0);"; - ss << "\n\tvarState." << prefix << "hexaplay3D_fcycle" << stateIndex << " = trunc(MwcNext01(&mwc) * 5.0);"; - ss << "\n\tvarState." << prefix << "hexaplay3D_bcycle" << stateIndex << " = trunc(MwcNext01(&mwc) * 2.0);"; - return ss.str(); + t[index++] = T(QTIsaac::LockedRandBit()); + t[index++] = 0; + t[index++] = 0; } virtual void Precalc() override @@ -3784,6 +3743,18 @@ public: m_RSwtch = std::trunc(QTIsaac::LockedFrand01() * 3);//Chooses 6 or 3 nodes. m_FCycle = 0; m_BCycle = 0; + T absmajp = std::abs(m_MajP); + + if (absmajp <= 1) + { + m_MajPlane = 1;//Want either 1 or 2. + } + else + { + m_MajPlane = 2; + m_Boost = (absmajp - 1) * T(0.5);//Distance above and below XY plane. + } + m_Seg60[0].x = 1; m_Seg60[1].x = T(0.5); m_Seg60[2].x = T(-0.5); @@ -3814,7 +3785,9 @@ protected: m_Params.push_back(ParamWithName(&m_MajP, prefix + "hexaplay3D_majp", 1, eParamType::REAL)); m_Params.push_back(ParamWithName(&m_Scale, prefix + "hexaplay3D_scale", T(0.25), eParamType::REAL)); m_Params.push_back(ParamWithName(&m_ZLift, prefix + "hexaplay3D_zlift", T(0.25), eParamType::REAL)); - m_Params.push_back(ParamWithName(true, &m_Seg60[0].x, prefix + "hexaplay3D_seg60x0"));//Precalc. + m_Params.push_back(ParamWithName(true, &m_MajPlane, prefix + "hexaplay3D_majplane"));//Precalc. + m_Params.push_back(ParamWithName(true, &m_Boost, prefix + "hexaplay3D_boost")); + m_Params.push_back(ParamWithName(true, &m_Seg60[0].x, prefix + "hexaplay3D_seg60x0")); m_Params.push_back(ParamWithName(true, &m_Seg60[1].x, prefix + "hexaplay3D_seg60x1")); m_Params.push_back(ParamWithName(true, &m_Seg60[2].x, prefix + "hexaplay3D_seg60x2")); m_Params.push_back(ParamWithName(true, &m_Seg60[3].x, prefix + "hexaplay3D_seg60x3")); @@ -3842,7 +3815,9 @@ private: T m_MajP; T m_Scale; T m_ZLift; - v2T m_Seg60[6];//Precalc. + T m_MajPlane;//Precalc. + T m_Boost; + v2T m_Seg60[6]; v2T m_Seg120[3]; T m_HalfScale; T m_RSwtch;//State. @@ -3873,22 +3848,19 @@ public: if (m_FCycle > 5) { m_FCycle = 0; - m_RSwtch = std::trunc(rand.Frand01() * 3);//Chooses 6 or 3 nodes. + m_RSwtch = T(rand.RandBit());//Chooses 6 or 3 nodes. } if (m_BCycle > 2) { m_BCycle = 0; - m_RSwtch = std::trunc(rand.Frand01() * 3);//Chooses 6 or 3 nodes. + m_RSwtch = T(rand.RandBit());//Chooses 6 or 3 nodes. } - T lrmaj = m_Weight; - T smooth = 1; T smRotxFP = 0; T smRotyFP = 0; T smRotxFT = 0; T smRotyFT = 0; - T gentleZ = 0; T sumX, sumY, sumZ; if (m_VarType == eVariationType::VARTYPE_REG) @@ -3906,75 +3878,39 @@ public: sumZ = helper.In.z; } - if (std::abs(m_Weight) <= 0.5) - smooth = m_Weight * 2; - else - smooth = 1; - - int posNeg = 1; - int loc; - T boost = 0; - T scale = m_Scale; + int posNeg = rand.RandBit() ? -1 : 1; T scale3; T tempx, tempy; - if (rand.Frand01() < T(0.5)) - posNeg = -1; - - int majplane = 0; - T abmajp = std::abs(m_MajP); - - if (abmajp <= 1) + if (m_MajPlane == 0) { - majplane = 0; - boost = 0; + helper.Out.z = m_Smooth * helper.In.z * m_Scale * m_ZLift; } - else if (abmajp > 1 && abmajp < 2) + else if (m_MajPlane == 1 && m_MajP < 0) { - majplane = 1; - boost = 0; - } - else - { - majplane = 2; - boost = (abmajp - 2) * T(0.5); - } - - if (majplane == 0) - { - helper.Out.z = smooth * helper.In.z * scale * m_ZLift; - } - else if (majplane == 1 && m_MajP < 0) - { - if (m_MajP < -1 && m_MajP >= -2) - gentleZ = (abmajp - 1); - else - gentleZ = 1; - if (posNeg < 0) - helper.Out.z = -2 * (sumZ * gentleZ); + helper.Out.z = -2 * (sumZ * m_GentleZ); } - - if (majplane == 2 && m_MajP < 0) + else if (m_MajPlane == 2 && m_MajP < 0) { if (posNeg > 0) { - helper.Out.z = (smooth * (helper.In.z * scale * m_ZLift + boost)); + helper.Out.z = (m_Smooth * (helper.In.z * m_Scale * m_ZLift + m_Boost)); } else//For this case when reg, assign and zero out. For all others, sum as usual. { - helper.Out.z = (sumZ - (2 * smooth * sumZ)) + (smooth * posNeg * (helper.In.z * scale * m_ZLift + boost)); + helper.Out.z = (sumZ - (2 * m_Smooth * sumZ)) + (m_Smooth * posNeg * (helper.In.z * m_Scale * m_ZLift + m_Boost)); if (m_VarType == eVariationType::VARTYPE_REG) outPoint.m_Z = 0; } } else - helper.Out.z = smooth * (helper.In.z * scale * m_ZLift + (posNeg * boost)); + helper.Out.z = m_Smooth * (helper.In.z * m_Scale * m_ZLift + (posNeg * m_Boost)); - if (m_RSwtch <= 1) + if (m_RSwtch) { - loc = int(rand.Frand01() * 6); + auto loc = rand.Rand(6); tempx = m_Seg60[loc].x; tempy = m_Seg60[loc].y; scale3 = 1; @@ -3982,19 +3918,19 @@ public: } else { - loc = int(rand.Frand01() * 3); + auto loc = rand.Rand(3); tempx = m_Seg120[loc].x; tempy = m_Seg120[loc].y; scale3 = m_3side; m_BCycle++; } - smRotxFP = (smooth * scale * sumX * tempx) - (smooth * scale * sumY * tempy); - smRotyFP = (smooth * scale * sumY * tempx) + (smooth * scale * sumX * tempy); - smRotxFT = (helper.In.x * smooth * scale * tempx) - (helper.In.y * smooth * scale * tempy); - smRotyFT = (helper.In.y * smooth * scale * tempx) + (helper.In.x * smooth * scale * tempy); - helper.Out.x = sumX * (1 - smooth) + smRotxFP + smRotxFT + smooth * lrmaj * scale3 * tempx; - helper.Out.y = sumY * (1 - smooth) + smRotyFP + smRotyFT + smooth * lrmaj * scale3 * tempy; + smRotxFP = (m_Smooth * m_Scale * sumX * tempx) - (m_Smooth * m_Scale * sumY * tempy); + smRotyFP = (m_Smooth * m_Scale * sumY * tempx) + (m_Smooth * m_Scale * sumX * tempy); + smRotxFT = (helper.In.x * m_Smooth * m_Scale * tempx) - (helper.In.y * m_Smooth * m_Scale * tempy); + smRotyFT = (helper.In.y * m_Smooth * m_Scale * tempx) + (helper.In.x * m_Smooth * m_Scale * tempy); + helper.Out.x = sumX * (1 - m_Smooth) + smRotxFP + smRotxFT + m_Smooth * m_Weight * scale3 * tempx; + helper.Out.y = sumY * (1 - m_Smooth) + smRotyFP + smRotyFT + m_Smooth * m_Weight * scale3 * tempy; } virtual string OpenCLString() const override @@ -4009,7 +3945,11 @@ public: string scale = "parVars[" + ToUpper(m_Params[i++].Name()) + index; string zlift = "parVars[" + ToUpper(m_Params[i++].Name()) + index; string side3 = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string seg60xStartIndex = ToUpper(m_Params[i].Name()) + stateIndex; i += 6;//Precalc. + string smooth = "parVars[" + ToUpper(m_Params[i++].Name()) + index;//Precalc. + string majplane = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string boost = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string gentlez = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string seg60xStartIndex = ToUpper(m_Params[i].Name()) + stateIndex; i += 6; string seg60yStartIndex = ToUpper(m_Params[i].Name()) + stateIndex; i += 6; string seg120xStartIndex = ToUpper(m_Params[i].Name()) + stateIndex; i += 3; string seg120yStartIndex = ToUpper(m_Params[i].Name()) + stateIndex; i += 3; @@ -4020,22 +3960,21 @@ public: << "\t\tif (" << fcycle << " > 5)\n" << "\t\t{\n" << "\t\t " << fcycle << " = 0;\n" - << "\t\t " << rswtch << " = trunc(MwcNext01(mwc) * 3.0);\n" + << "\t\t " << rswtch << " = (real_t)(MwcNext(mwc) & 1);\n" << "\t\t}\n" << "\n" << "\t\tif (" << bcycle << " > 2)\n" << "\t\t{\n" << "\t\t " << bcycle << " = 0;\n" - << "\t\t " << rswtch << " = trunc(MwcNext01(mwc) * 3.0);\n" + << "\t\t " << rswtch << " = (real_t)(MwcNext(mwc) & 1);\n" << "\t\t}\n" << "\n" - << "\t\treal_t lrmaj = " << weight << ";\n" - << "\t\treal_t smooth = 1;\n" + << "\t\treal_t scale = " << scale << ";\n"//This is an optimal blend of memory accesses vs. caching to local variables which seems to work best. + << "\t\treal_t smooth = " << smooth << ";\n" << "\t\treal_t smRotxFP = 0;\n" << "\t\treal_t smRotyFP = 0;\n" << "\t\treal_t smRotxFT = 0;\n" << "\t\treal_t smRotyFT = 0;\n" - << "\t\treal_t gentleZ = 0;\n" << "\t\treal_t sumX, sumY, sumZ;\n\n"; if (m_VarType == eVariationType::VARTYPE_REG) @@ -4057,64 +3996,28 @@ public: ss << "\n" - << "\t\tif (fabs(lrmaj) <= 0.5)\n" - << "\t\t smooth = lrmaj * 2;\n" - << "\t\telse\n" - << "\t\t smooth = 1;\n" - << "\n" - << "\t\tint posNeg = 1;\n" - << "\t\tint loc;\n" - << "\t\treal_t boost = 0;\n" - << "\t\treal_t scale = " << scale << ";\n"//Temp will be used from here on. + << "\t\tint posNeg = (MwcNext(mwc) & 1) ? -1 : 1;\n" << "\t\treal_t scale3;\n" << "\t\treal_t tempx, tempy;\n" << "\n" - << "\t\tif (MwcNext01(mwc) < 0.5)\n" - << "\t\t posNeg = -1;\n" - << "\n" - << "\t\tint majplane = 0;\n" - << "\t\treal_t abmajp = fabs(" << majp << ");\n" - << "\n" - << "\t\tif (abmajp <= 1)\n" - << "\t\t{\n" - << "\t\t majplane = 0;\n" - << "\t\t boost = 0;\n" - << "\t\t}\n" - << "\t\telse if (abmajp > 1 && abmajp < 2)\n" - << "\t\t{\n" - << "\t\t majplane = 1;\n" - << "\t\t boost = 0;\n" - << "\t\t}\n" - << "\t\telse\n" - << "\t\t{\n" - << "\t\t majplane = 2;\n" - << "\t\t boost = (abmajp - 2) * 0.5;\n" - << "\t\t}\n" - << "\n" - << "\t\tif (majplane == 0)\n" + << "\t\tif (" << majplane << " == 0)\n" << "\t\t{\n" << "\t\t vOut.z = smooth * vIn.z * scale * " << zlift << ";\n" << "\t\t}\n" - << "\t\telse if (majplane == 1 && " << majp << " < 0)\n" + << "\t\telse if (" << majplane << " == 1 && " << majp << " < 0)\n" << "\t\t{\n" - << "\t\t if (" << majp << " < -1 && " << majp << " >= -2)\n" - << "\t\t gentleZ = (abmajp - 1);\n" - << "\t\t else\n" - << "\t\t gentleZ = 1;\n" - << "\n" << "\t\t if (posNeg < 0)\n" - << "\t\t vOut.z = -2 * (sumZ * gentleZ);\n" + << "\t\t vOut.z = -2 * (sumZ * " << gentlez << ");\n" << "\t\t}\n" - << "\n" - << "\t\tif (majplane == 2 && " << majp << " < 0)\n" + << "\t\telse if (" << majplane << " == 2 && " << majp << " < 0)\n" << "\t\t{\n" << "\t\t if (posNeg > 0)\n" << "\t\t {\n" - << "\t\t vOut.z = (smooth * fma(vIn.z * scale, " << zlift << ", boost));\n" + << "\t\t vOut.z = (smooth * fma(vIn.z * scale, " << zlift << ", " << boost << "));\n" << "\t\t }\n" << "\t\t else\n" << "\t\t {\n" - << "\t\t vOut.z = fma(smooth * posNeg, fma(vIn.z * scale, " << zlift << ", boost), sumZ - ((real_t)(2.0) * smooth * sumZ));\n"; + << "\t\t vOut.z = fma(smooth * posNeg, fma(vIn.z * scale, " << zlift << ", " << boost << "), sumZ - ((real_t)(2.0) * smooth * sumZ));\n"; if (m_VarType == eVariationType::VARTYPE_REG) ss << "\t\t outPoint->m_Z = 0;\n"; @@ -4124,12 +4027,12 @@ public: << "\t\t}\n" << "\t\telse\n" << "\t\t{\n" - << "\t\t vOut.z = smooth * fma(vIn.z * scale, " << zlift << ", (posNeg * boost));\n" + << "\t\t vOut.z = smooth * fma(vIn.z * scale, " << zlift << ", (posNeg * " << boost << "));\n" << "\t\t}\n" << "\n" - << "\t\tif (" << rswtch << " <= 1)\n" + << "\t\tif (" << rswtch << ")\n" << "\t\t{\n" - << "\t\t loc = (int)(MwcNext01(mwc) * 6);\n" + << "\t\t uint loc = MwcNextRange(mwc, 6);\n" << "\t\t tempx = parVars[" << seg60xStartIndex << " + loc];\n" << "\t\t tempy = parVars[" << seg60yStartIndex << " + loc];\n" << "\t\t scale3 = 1;\n" @@ -4137,7 +4040,7 @@ public: << "\t\t}\n" << "\t\telse\n" << "\t\t{\n" - << "\t\t loc = (int)(MwcNext01(mwc) * 3);\n" + << "\t\t uint loc = MwcNextRange(mwc, 3);\n" << "\t\t tempx = parVars[" << seg120xStartIndex << " + loc];\n" << "\t\t tempy = parVars[" << seg120yStartIndex << " + loc];\n" << "\t\t scale3 = " << side3 << ";\n" @@ -4148,33 +4051,58 @@ public: << "\t\tsmRotyFP = fma(smooth * scale, sumY * tempx, (smooth * scale * sumX * tempy));\n" << "\t\tsmRotxFT = fma(vIn.x * smooth, scale * tempx, -(vIn.y * smooth * scale * tempy));\n" << "\t\tsmRotyFT = fma(vIn.y * smooth, scale * tempx, (vIn.x * smooth * scale * tempy));\n" - << "\t\tvOut.x = fma(sumX, (1 - smooth), fma(smooth * lrmaj, scale3 * tempx, smRotxFP + smRotxFT));\n" - << "\t\tvOut.y = fma(sumY, (1 - smooth), fma(smooth * lrmaj, scale3 * tempy, smRotyFP + smRotyFT));\n" + << "\t\tvOut.x = fma(sumX, (1 - smooth), fma(smooth * " << weight << ", scale3 * tempx, smRotxFP + smRotxFT));\n" + << "\t\tvOut.y = fma(sumY, (1 - smooth), fma(smooth * " << weight << ", scale3 * tempy, smRotyFP + smRotyFT));\n" << "\t}\n"; return ss.str(); } - virtual string StateInitOpenCLString() const override + virtual void InitStateVars(T* t, size_t& index) override { - ostringstream ss, ss2; - ss2 << "_" << XformIndexInEmber(); - string stateIndex = ss2.str(); - string prefix = Prefix(); - //CPU sets fycle and bcycle to 0 at the beginning in Precalc(). - //Set to random in OpenCL since a value can't be set once and kept between kernel launches without writing it back to an OpenCL buffer. - //This doesn't seem to make a difference from setting them to 0, but do it anyway because it seems more correct. - ss << "\n\tvarState." << prefix << "hexnix3D_rswtch" << stateIndex << " = trunc(MwcNext01(&mwc) * (real_t)(3.0));"; - ss << "\n\tvarState." << prefix << "hexnix3D_fcycle" << stateIndex << " = trunc(MwcNext01(&mwc) * (real_t)(5.0));"; - ss << "\n\tvarState." << prefix << "hexnix3D_bcycle" << stateIndex << " = trunc(MwcNext01(&mwc) * (real_t)(2.0));"; - return ss.str(); + t[index++] = T(QTIsaac::LockedRandBit()); + t[index++] = 0; + t[index++] = 0; } virtual void Precalc() override { T hlift = std::sin(T(M_PI) / 3); - m_RSwtch = std::trunc(QTIsaac::LockedFrand01() * 3);//Chooses 6 or 3 nodes. + m_RSwtch = T(QTIsaac::LockedRandBit());// QTIsaac::LockedRand(4);// //std::trunc(QTIsaac::LockedFrand01() * 3);//Chooses 6 or 3 nodes. m_FCycle = 0; m_BCycle = 0; + auto absmajp = std::abs(m_MajP); + + if (absmajp <= 1) + { + m_MajPlane = 0; + m_Boost = 0; + } + else if (absmajp > 1 && absmajp < 2) + { + m_MajPlane = 1; + m_Boost = 0; + } + else + { + m_MajPlane = 2; + m_Boost = (absmajp - 2) * T(0.5); + } + + if (m_MajPlane == 1 && m_MajP < 0) + { + if (m_MajP < -1 && m_MajP >= -2) + m_GentleZ = absmajp - 1; + else + m_GentleZ = 1; + } + else + m_GentleZ = 0; + + if (std::abs(m_Weight) <= T(0.5)) + m_Smooth = m_Weight * 2; + else + m_Smooth = 1; + m_Seg60[0].x = 1; m_Seg60[1].x = T(0.5); m_Seg60[2].x = T(-0.5); @@ -4205,7 +4133,11 @@ protected: m_Params.push_back(ParamWithName(&m_Scale, prefix + "hexnix3D_scale", T(0.25), eParamType::REAL)); m_Params.push_back(ParamWithName(&m_ZLift, prefix + "hexnix3D_zlift")); m_Params.push_back(ParamWithName(&m_3side, prefix + "hexnix3D_3side", T(0.667), eParamType::REAL)); - m_Params.push_back(ParamWithName(true, &m_Seg60[0].x, prefix + "hexnix3D_seg60x0"));//Precalc. + m_Params.push_back(ParamWithName(true, &m_Smooth, prefix + "hexnix3D_smooth"));//Precalc. + m_Params.push_back(ParamWithName(true, &m_MajPlane, prefix + "hexnix3D_majplane")); + m_Params.push_back(ParamWithName(true, &m_Boost, prefix + "hexnix3D_boost")); + m_Params.push_back(ParamWithName(true, &m_GentleZ, prefix + "hexnix3D_gentlez")); + m_Params.push_back(ParamWithName(true, &m_Seg60[0].x, prefix + "hexnix3D_seg60x0")); m_Params.push_back(ParamWithName(true, &m_Seg60[1].x, prefix + "hexnix3D_seg60x1")); m_Params.push_back(ParamWithName(true, &m_Seg60[2].x, prefix + "hexnix3D_seg60x2")); m_Params.push_back(ParamWithName(true, &m_Seg60[3].x, prefix + "hexnix3D_seg60x3")); @@ -4233,7 +4165,11 @@ private: T m_Scale; T m_ZLift; T m_3side; - v2T m_Seg60[6];//Precalc. + T m_Smooth;//Precalc. + T m_MajPlane; + T m_Boost; + T m_GentleZ; + v2T m_Seg60[6]; v2T m_Seg120[3]; T m_RSwtch;//State. T m_FCycle; diff --git a/Source/Ember/Variations06.h b/Source/Ember/Variations06.h index 793db23..fac0b7d 100644 --- a/Source/Ember/Variations06.h +++ b/Source/Ember/Variations06.h @@ -4244,31 +4244,31 @@ public: ostringstream ss, ss2; intmax_t i = 0, varIndex = IndexInXform(); ss2 << "_" << XformIndexInEmber(); - string weight = WeightDefineString(); - string index = ss2.str() + "]"; - string stateIndex = ss2.str(); - string power = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string radius = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string roundstr = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string roundwidth = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string distortion = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string edge = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string scatter = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string offset = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string rotation = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string cropmode = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string staticc = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string mode = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string radial = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string workradius = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string workpower = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string alpha = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string roundcoeff = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string workrotation = "parVars[" + ToUpper(m_Params[i++].Name()) + index; - string x = "varState->" + m_Params[i++].Name() + stateIndex;//State. - string y = "varState->" + m_Params[i++].Name() + stateIndex; - string z = "varState->" + m_Params[i++].Name() + stateIndex; - string c = "varState->" + m_Params[i++].Name() + stateIndex; + string weight = WeightDefineString(); + string index = ss2.str() + "]"; + string stateIndex = ss2.str(); + string power = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string radius = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string roundstr = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string roundwidth = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string distortion = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string edge = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string scatter = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string offset = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string rotation = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string cropmode = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string staticc = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string mode = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string radial = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string workradius = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string workpower = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string alpha = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string roundcoeff = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string workrotation = "parVars[" + ToUpper(m_Params[i++].Name()) + index; + string x = "varState->" + m_Params[i++].Name() + stateIndex;//State. + string y = "varState->" + m_Params[i++].Name() + stateIndex; + string z = "varState->" + m_Params[i++].Name() + stateIndex; + string c = "varState->" + m_Params[i++].Name() + stateIndex; ss << "\t{\n" << "\t\tint lastPart = 1;\n" << "\t\treal_t xi, yi, zi;\n" @@ -4456,19 +4456,6 @@ public: return ss.str(); } - virtual string StateInitOpenCLString() const override - { - ostringstream ss, ss2; - ss2 << "_" << XformIndexInEmber(); - string stateIndex = ss2.str(); - string prefix = Prefix(); - ss << "\n\tvarState." << prefix << "smartcrop_x" << stateIndex << " = 0;"; - ss << "\n\tvarState." << prefix << "smartcrop_y" << stateIndex << " = 0;"; - ss << "\n\tvarState." << prefix << "smartcrop_z" << stateIndex << " = 0;"; - ss << "\n\tvarState." << prefix << "smartcrop_c" << stateIndex << " = 0;"; - return ss.str(); - } - virtual void Precalc() override { m_Mode = T(((m_Power > 0) == (m_Radius > 0)) ? 1 : 0); @@ -4496,29 +4483,30 @@ protected: { string prefix = Prefix(); m_Params.clear(); - m_Params.push_back(ParamWithName(&m_Power, prefix + "smartcrop_power", 4)); //Original used a prefix of scrop_, which is incompatible with Ember's design. - m_Params.push_back(ParamWithName(&m_Radius, prefix + "smartcrop_radius", 1)); - m_Params.push_back(ParamWithName(&m_Roundstr, prefix + "smartcrop_roundstr")); - m_Params.push_back(ParamWithName(&m_Roundwidth, prefix + "smartcrop_roundwidth", 1)); - m_Params.push_back(ParamWithName(&m_Distortion, prefix + "smartcrop_distortion", 1)); - m_Params.push_back(ParamWithName(&m_Edge, prefix + "smartcrop_edge")); - m_Params.push_back(ParamWithName(&m_Scatter, prefix + "smartcrop_scatter")); - m_Params.push_back(ParamWithName(&m_Offset, prefix + "smartcrop_offset")); - m_Params.push_back(ParamWithName(&m_Rotation, prefix + "smartcrop_rotation")); - m_Params.push_back(ParamWithName(&m_Cropmode, prefix + "smartcrop_cropmode", 1, eParamType::INTEGER, -1, 2)); - m_Params.push_back(ParamWithName(&m_Static, prefix + "smartcrop_static", 1, eParamType::INTEGER, -1, 3)); - m_Params.push_back(ParamWithName(true, &m_Mode, prefix + "smartcrop_mode"));//Precalc. - m_Params.push_back(ParamWithName(true, &m_Radial, prefix + "smartcrop_radial")); - m_Params.push_back(ParamWithName(true, &m_WorkRadius, prefix + "smartcrop_work_radius")); - m_Params.push_back(ParamWithName(true, &m_WorkPower, prefix + "smartcrop_work_power")); - m_Params.push_back(ParamWithName(true, &m_Alpha, prefix + "smartcrop_alpha")); - m_Params.push_back(ParamWithName(true, &m_RoundCoeff, prefix + "smartcrop_round_coeff")); + m_Params.push_back(ParamWithName(&m_Power, prefix + "smartcrop_power", 4)); //Original used a prefix of scrop_, which is incompatible with Ember's design. + m_Params.push_back(ParamWithName(&m_Radius, prefix + "smartcrop_radius", 1)); + m_Params.push_back(ParamWithName(&m_Roundstr, prefix + "smartcrop_roundstr")); + m_Params.push_back(ParamWithName(&m_Roundwidth, prefix + "smartcrop_roundwidth", 1)); + m_Params.push_back(ParamWithName(&m_Distortion, prefix + "smartcrop_distortion", 1)); + m_Params.push_back(ParamWithName(&m_Edge, prefix + "smartcrop_edge")); + m_Params.push_back(ParamWithName(&m_Scatter, prefix + "smartcrop_scatter")); + m_Params.push_back(ParamWithName(&m_Offset, prefix + "smartcrop_offset")); + m_Params.push_back(ParamWithName(&m_Rotation, prefix + "smartcrop_rotation")); + m_Params.push_back(ParamWithName(&m_Cropmode, prefix + "smartcrop_cropmode", 1, eParamType::INTEGER, -1, 2)); + m_Params.push_back(ParamWithName(&m_Static, prefix + "smartcrop_static", 1, eParamType::INTEGER, -1, 3)); + m_Params.push_back(ParamWithName(true, &m_Mode, prefix + "smartcrop_mode"));//Precalc. + m_Params.push_back(ParamWithName(true, &m_Radial, prefix + "smartcrop_radial")); + m_Params.push_back(ParamWithName(true, &m_WorkRadius, prefix + "smartcrop_work_radius")); + m_Params.push_back(ParamWithName(true, &m_WorkPower, prefix + "smartcrop_work_power")); + m_Params.push_back(ParamWithName(true, &m_Alpha, prefix + "smartcrop_alpha")); + m_Params.push_back(ParamWithName(true, &m_RoundCoeff, prefix + "smartcrop_round_coeff")); m_Params.push_back(ParamWithName(true, &m_WorkRotation, prefix + "smartcrop_work_rotation")); - m_Params.push_back(ParamWithName(true, true, &m_X, prefix + "smartcrop_x"));//State. - m_Params.push_back(ParamWithName(true, true, &m_Y, prefix + "smartcrop_y")); - m_Params.push_back(ParamWithName(true, true, &m_Z, prefix + "smartcrop_z")); - m_Params.push_back(ParamWithName(true, true, &m_C, prefix + "smartcrop_c")); + m_Params.push_back(ParamWithName(true, true, &m_X, prefix + "smartcrop_x"));//State. + m_Params.push_back(ParamWithName(true, true, &m_Y, prefix + "smartcrop_y")); + m_Params.push_back(ParamWithName(true, true, &m_Z, prefix + "smartcrop_z")); + m_Params.push_back(ParamWithName(true, true, &m_C, prefix + "smartcrop_c")); } + private: T m_Power; T m_Radius; diff --git a/Source/EmberCL/EmberCLStructs.h b/Source/EmberCL/EmberCLStructs.h index 5c1f7c3..df8cc1e 100644 --- a/Source/EmberCL/EmberCLStructs.h +++ b/Source/EmberCL/EmberCLStructs.h @@ -128,6 +128,8 @@ static string ConstantDefinesString(bool doublePrecision) "\n" "#define INDEX_IN_GRID_2D (GLOBAL_ID_Y * GLOBAL_SIZE_X + GLOBAL_ID_X)\n" "#define INDEX_IN_GRID_3D ((GLOBAL_SIZE_X * GLOBAL_SIZE_Y * GLOBAL_ID_Z) + INDEX_IN_GRID_2D)\n" + "\n" + "#define BLOCK_START_INDEX_IN_GRID_2D ((BLOCK_ID_Y * GRID_SIZE_X * BLOCK_SIZE_Y * BLOCK_SIZE_X) + (BLOCK_ID_X * BLOCK_SIZE_X * BLOCK_SIZE_Y))\n" "\n"; return os.str(); } diff --git a/Source/EmberCL/IterOpenCLKernelCreator.cpp b/Source/EmberCL/IterOpenCLKernelCreator.cpp index 9c7059f..dceb59f 100644 --- a/Source/EmberCL/IterOpenCLKernelCreator.cpp +++ b/Source/EmberCL/IterOpenCLKernelCreator.cpp @@ -46,6 +46,7 @@ string IterOpenCLKernelCreator::CreateIterKernelString(const Ember& ember, xformFuncs << VariationStateString(ember); xformFuncs << parVarDefines << globalSharedDefines; ember.GetPresentVariations(variations); + bool hasVarState = ember.GetVariationStateParamCount(); for (auto var : variations) if (var) @@ -224,6 +225,7 @@ string IterOpenCLKernelCreator::CreateIterKernelString(const Ember& ember, i++; } + auto varStateString = VariationStateInitString(ember); os << ConstantDefinesString(doublePrecision) << GlobalFunctionsString(ember) << @@ -250,7 +252,15 @@ string IterOpenCLKernelCreator::CreateIterKernelString(const Ember& ember, " __global real_t* globalShared,\n" " __global uchar* xformDistributions,\n"//Using uchar is quicker than uint. Can't be constant because the size can be too large to fit when using xaos. " __constant CarToRasCL* carToRas,\n" - " __global real4reals_bucket* histogram,\n" + " __global real4reals_bucket* histogram,\n"; + + if (hasVarState) + { + os << + " __global VariationState* varStates,\n"; + } + + os << " uint histSize,\n" " __read_only image2d_t palette,\n" " __global Point* points\n" @@ -259,6 +269,8 @@ string IterOpenCLKernelCreator::CreateIterKernelString(const Ember& ember, " bool fuse, ok;\n" " uint threadIndex = INDEX_IN_BLOCK_2D;\n" " uint pointsIndex = INDEX_IN_GRID_2D;\n" + " uint blockStartIndex = BLOCK_START_INDEX_IN_GRID_2D;\n" + " uint blockStartThreadIndex = blockStartIndex + threadIndex;\n" " uint i, itersToDo;\n" " uint consec = 0;\n" //" int badvals = 0;\n" @@ -275,14 +287,18 @@ string IterOpenCLKernelCreator::CreateIterKernelString(const Ember& ember, " uint threadXDivRows = (THREAD_ID_X / NWARPS);\n" " uint threadsMinus1 = NTHREADS - 1;\n" " VariationState varState;\n" - ; - os << - "\n" + "\n"; + + if (ember.XformCount() > 1) + { + os << #ifndef STRAIGHT_RAND - " __local Point swap[NTHREADS];\n" - " __local uint xfsel[NWARPS];\n" + " __local Point swap[NTHREADS];\n" + " __local uint xfsel[NWARPS];\n"; #endif - "\n" + } + + os << " iPaletteCoord.y = 0;\n" "\n" " if (fuseCount > 0)\n" @@ -296,82 +312,107 @@ string IterOpenCLKernelCreator::CreateIterKernelString(const Ember& ember, " firstPoint.m_Y = MwcNextFRange(&mwc, -ember->m_RandPointRange, ember->m_RandPointRange);\n" " firstPoint.m_Z = 0.0;\n" " firstPoint.m_ColorX = MwcNext01(&mwc);\n" - " firstPoint.m_LastXfUsed = 0 - 1;\n"//This ensures the first iteration chooses from the unweighted distribution array, all subsequent will choose from the weighted ones. + " firstPoint.m_LastXfUsed = 0 - 1;\n";//This ensures the first iteration chooses from the unweighted distribution array, all subsequent iterations will choose from the weighted ones. + //os << + // varStateString << '\n'; + os << " }\n" " else\n" " {\n" " fuse = false;\n" " itersToDo = iterCount;\n" - " firstPoint = points[pointsIndex];\n" + " firstPoint = points[blockStartThreadIndex];\n" " }\n" "\n" ; - auto varStateString = VariationStateInitString(ember); - if (!varStateString.empty()) - os << varStateString << "\n\n"; + if (hasVarState) + { + os << + " varState = varStates[blockStartThreadIndex];\n"; + } //This is done once initially here and then again after each swap-sync in the main loop. //This along with the randomness that the point shuffle provides gives sufficient randomness //to produce results identical to those produced on the CPU. - os << -#ifndef STRAIGHT_RAND - " if (THREAD_ID_Y == 0 && THREAD_ID_X < NWARPS)\n" - " xfsel[THREAD_ID_X] = MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << ";\n"//It's faster to do the & here ahead of time than every time an xform is looked up to use inside the loop. - "\n" -#endif - " barrier(CLK_LOCAL_MEM_FENCE);\n" - "\n" - " for (i = 0; i < itersToDo; i++)\n" - " {\n"; - os << - " consec = 0;\n" - "\n" - " do\n" - " {\n"; - - //If xaos is present, the a hybrid of the cuburn method is used. - //This makes each thread in a row pick the same offset into a distribution, using xfsel. - //However, the distribution the offset is in, is determined by firstPoint.m_LastXfUsed. - if (ember.XaosPresent()) + if (ember.XformCount() > 1) { +#ifndef STRAIGHT_RAND os << -#ifdef STRAIGHT_RAND - " secondPoint.m_LastXfUsed = xformDistributions[MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << " + (" << CHOOSE_XFORM_GRAIN << " * (firstPoint.m_LastXfUsed + 1u))];\n\n"; -#else - " secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y] + (" << CHOOSE_XFORM_GRAIN << " * (firstPoint.m_LastXfUsed + 1u))];\n\n";//Partial cuburn hybrid. + "\n" + " if (THREAD_ID_Y == 0 && THREAD_ID_X < NWARPS)\n" + " xfsel[THREAD_ID_X] = MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << ";\n"//It's faster to do the & here ahead of time than every time an xform is looked up to use inside the loop. + "\n"; #endif } else { os << + " secondPoint.m_LastXfUsed = 0;\n"; + } + + os << + " barrier(CLK_LOCAL_MEM_FENCE);\n" + "\n" + " for (i = 0; i < itersToDo; i++)\n" + " {\n" + " consec = 0;\n" + "\n" + " do\n" + " {\n"; + + if (ember.XformCount() > 1) + { + //If xaos is present, the a hybrid of the cuburn method is used. + //This makes each thread in a row pick the same offset into a distribution, using xfsel. + //However, the distribution the offset is in, is determined by firstPoint.m_LastXfUsed. + if (ember.XaosPresent()) + { + os << #ifdef STRAIGHT_RAND - " secondPoint.m_LastXfUsed = xformDistributions[MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << "];\n\n";//For testing, using straight rand flam4/fractron style instead of cuburn. + " secondPoint.m_LastXfUsed = xformDistributions[(MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << ") + (" << CHOOSE_XFORM_GRAIN << " * (firstPoint.m_LastXfUsed + 1u))];\n\n"; #else - " secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y]];\n\n"; + " secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y] + (" << CHOOSE_XFORM_GRAIN << " * (firstPoint.m_LastXfUsed + 1u))];\n\n";//Partial cuburn hybrid. #endif + } + else + { + os << +#ifdef STRAIGHT_RAND + " secondPoint.m_LastXfUsed = xformDistributions[MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << "];\n\n";//For testing, using straight rand flam4/fractron style instead of cuburn. +#else + " secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y]];\n\n"; +#endif + } } for (i = 0; i < ember.XformCount(); i++) { - if (i == 0) + if (ember.XformCount() > 1) { + if (i == 0) + { + os << + " switch (secondPoint.m_LastXfUsed)\n" + " {\n"; + } + os << - " switch (secondPoint.m_LastXfUsed)\n" - " {\n"; + " case " << i << ":\n" + " {\n" << + " Xform" << i << "(&(xforms[" << i << "]), parVars, globalShared, &firstPoint, &secondPoint, &mwc, &varState);\n" << + " break;\n" + " }\n"; + + if (i == ember.XformCount() - 1) + { + os << + " }\n"; + } } - - os << - " case " << i << ":\n" - " {\n" << - " Xform" << i << "(&(xforms[" << i << "]), parVars, globalShared, &firstPoint, &secondPoint, &mwc, &varState);\n" << - " break;\n" - " }\n"; - - if (i == ember.XformCount() - 1) + else { - os << - " }\n"; + os << " Xform0(&(xforms[0]), parVars, globalShared, &firstPoint, &secondPoint, &mwc, &varState);"; } } @@ -397,25 +438,54 @@ string IterOpenCLKernelCreator::CreateIterKernelString(const Ember& ember, " secondPoint.m_X = MwcNextFRange(&mwc, -ember->m_RandPointRange, ember->m_RandPointRange);\n" " secondPoint.m_Y = MwcNextFRange(&mwc, -ember->m_RandPointRange, ember->m_RandPointRange);\n" " secondPoint.m_Z = 0.0;\n" - " }\n" + " }\n"; #ifndef STRAIGHT_RAND - "\n"//Rotate points between threads. This is how randomization is achieved. - " uint swr = threadXY + ((i & 1u) * threadXDivRows);\n" - " uint sw = (swr * THREADS_PER_WARP + THREAD_ID_X) & threadsMinus1;\n" - "\n" - //Write to another thread's location. - " swap[sw] = secondPoint;\n" - "\n" - //Populate randomized xform index buffer with new random values. - " if (THREAD_ID_Y == 0 && THREAD_ID_X < NWARPS)\n" - " xfsel[THREAD_ID_X] = MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << ";\n" - "\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - //Another thread will have written to this thread's location, so read the new value and use it for accumulation below. - " firstPoint = swap[threadIndex];\n" + + if (ember.XformCount() > 1) + { + os << + "\n"//Rotate points between threads. This is how randomization is achieved. + " uint swr = threadXY + ((i & 1u) * threadXDivRows);\n" + " uint sw = (swr * THREADS_PER_WARP + THREAD_ID_X) & threadsMinus1;\n" + "\n" + //Write to another thread's location. + " swap[sw] = secondPoint;\n"; + + if (hasVarState) + { + os << + " varStates[blockStartIndex + sw] = varState;\n"; + } + + os << + "\n" + //Populate randomized xform index buffer with new random values. + " if (THREAD_ID_Y == 0 && THREAD_ID_X < NWARPS)\n" + " xfsel[THREAD_ID_X] = MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << ";\n" + "\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + //Another thread will have written to this thread's location, so read the new value and use it for accumulation below. + " firstPoint = swap[threadIndex];\n"; + + if (hasVarState) + { + os << + " varState = varStates[blockStartThreadIndex];\n" + ; + } + } + else + { + os << + "\n" + " firstPoint = secondPoint;\n"; + } + #else - " firstPoint = secondPoint;\n"//For testing, using straight rand flam4/fractron style instead of cuburn. + os << + " firstPoint = secondPoint;\n";//For testing, using straight rand flam4/fractron style instead of cuburn. #endif + os << "\n" " if (fuse)\n" " {\n" @@ -537,9 +607,17 @@ string IterOpenCLKernelCreator::CreateIterKernelString(const Ember& ember, " points[pointsIndex].m_Z = MwcNextNeg1Pos1(&mwc);\n" " points[pointsIndex].m_ColorX = MwcNextNeg1Pos1(&mwc);\n" #else - " points[pointsIndex] = firstPoint;\n" " seeds[pointsIndex] = mwc;\n" + " points[blockStartThreadIndex] = firstPoint;\n"; + + if (hasVarState) + { + os << + " varStates[blockStartThreadIndex] = varState;\n"; + } + #endif + os << " barrier(CLK_GLOBAL_MEM_FENCE);\n" "}\n"; return os.str(); @@ -593,7 +671,6 @@ string IterOpenCLKernelCreator::GlobalFunctionsString(const Ember& ember) return os.str(); } - /// /// Create an OpenCL string of #defines and a corresponding host side vector for variation weights and parametric variation values. /// Parametric variations present a special problem in the iteration code. @@ -689,7 +766,6 @@ void IterOpenCLKernelCreator::ParVarIndexDefines(const Ember& ember, pair< params.first = os.str(); } } - /// /// Create an OpenCL string of #defines and a corresponding host side vector for globally shared data. /// Certain variations, such as crackle and dc_perlin use static, read-only buffers of data. @@ -750,7 +826,6 @@ void IterOpenCLKernelCreator::SharedDataIndexDefines(const Ember& ember, p params.first = os.str(); } } - /// /// Create the string needed for the struct whose values will change between each iteration. /// This is only needed for variations whose state changes. @@ -773,7 +848,6 @@ string IterOpenCLKernelCreator::VariationStateString(const Ember& ember) os << "\n} VariationState;\n\n"; return os.str(); } - /// /// Create the string needed for the initial state of the struct whose values will change between each iteration. /// This is only needed for variations whose state changes. @@ -795,7 +869,6 @@ string IterOpenCLKernelCreator::VariationStateInitString(const Ember& embe return os.str(); } - /// /// Determine whether the two embers passed in differ enough /// to require a rebuild of the iteration code. @@ -855,7 +928,6 @@ bool IterOpenCLKernelCreator::IsBuildRequired(const Ember& ember1, const E return false; } - /// /// Create the zeroize kernel string. /// OpenCL comes with no way to zeroize a buffer like memset() @@ -880,7 +952,6 @@ string IterOpenCLKernelCreator::CreateZeroizeKernelString() const "\n"; return os.str(); } - /// /// Create the histogram summing kernel string. /// This is used when running with multiple GPUs. It takes @@ -910,7 +981,6 @@ string IterOpenCLKernelCreator::CreateSumHistKernelString() const "\n"; return os.str(); } - /// /// Create the string for 3D projection based on the 3D values of the ember. /// Projection is done on the second point. @@ -1018,9 +1088,7 @@ string IterOpenCLKernelCreator::CreateProjectionString(const Ember& ember) return os.str(); } - template EMBERCL_API class IterOpenCLKernelCreator; - #ifdef DO_DOUBLE template EMBERCL_API class IterOpenCLKernelCreator; #endif diff --git a/Source/EmberCL/RendererCL.cpp b/Source/EmberCL/RendererCL.cpp index a5b8ad4..16f194a 100644 --- a/Source/EmberCL/RendererCL.cpp +++ b/Source/EmberCL/RendererCL.cpp @@ -331,6 +331,41 @@ bool RendererCL::WriteRandomPoints(size_t device) } #endif +/// +/// Resize the variation state vector to hold all of the variation state variables across all variations +/// in the ember, aligned to 16, for each thread that will be launched on a device. +/// +template +void RendererCL::InitStateVec() +{ + size_t count = 0, i = 0, j = 0, k = 0; + + while (auto xform = m_Ember.GetTotalXform(i++)) + for (j = 0; j < xform->TotalVariationCount(); j++) + if (auto var = xform->GetVariation(j)) + count += var->StateParamCount() * sizeof(T); + + //Round to 16 and resize the buffer to be copied to OpenCL buffer here. + auto igkc = IterGridKernelCount(); + size_t index = 0, count16 = ((count / 16) * 16) + (count % 16 > 0 ? 16 : 0); + auto elcount = count16 / sizeof(T); + m_VarStates.resize(igkc * elcount); + + if (count16) + { + for (k = 0; k < igkc; k++) + { + i = 0; + index = k * elcount; + + while (auto xform = m_Ember.GetTotalXform(i++)) + for (j = 0; j < xform->TotalVariationCount(); j++) + if (auto var = xform->GetVariation(j)) + var->InitStateVars(m_VarStates.data(), index); + } + } +} + /// /// Set the percentage of a sub batch that should be executed in each thread per kernel call. /// @@ -650,7 +685,7 @@ bool RendererCL::AnyNvidia() const /// /// Allocate all buffers required for running as well as the final -/// 2D image. +/// 2D image and perform some other initialization. /// Note that only iteration-related buffers are allocated on secondary devices. /// /// True if success, else false. @@ -666,6 +701,8 @@ bool RendererCL::Alloc(bool histOnly) size_t size = SuperSize() * sizeof(v4bT);//Size of histogram and density filter buffer. static std::string loc = __FUNCTION__; auto& wrapper = m_Devices[0]->m_Wrapper; + InitStateVec(); + m_IterCountPerKernel = size_t(m_SubBatchPercentPerThread * m_Ember.m_SubBatchSize);//This isn't the greatest place to put this, but it must be computed before the number of iters to do is computed in the base. if (b && !(b = wrapper.AddBuffer(m_DEFilterParamsBufferName, sizeof(m_DensityFilterCL)))) { ErrorStr(loc, "Failed to set DE filter parameters buffer", m_Devices[0].get()); } @@ -677,19 +714,22 @@ bool RendererCL::Alloc(bool histOnly) for (auto& device : m_Devices) { - if (b && !(b = device->m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { ErrorStr(loc, "Failed to set ember buffer", device.get()); break; } + if (b && !(b = device->m_Wrapper.AddBuffer(m_EmberBufferName, sizeof(m_EmberCL)))) { ErrorStr(loc, "Failed to set ember buffer", device.get()); break; } - if (b && !(b = device->m_Wrapper.AddBuffer(m_XformsBufferName, SizeOf(m_XformsCL)))) { ErrorStr(loc, "Failed to set xforms buffer", device.get()); break; } + if (b && !(b = device->m_Wrapper.AddBuffer(m_XformsBufferName, SizeOf(m_XformsCL)))) { ErrorStr(loc, "Failed to set xforms buffer", device.get()); break; } - if (b && !(b = device->m_Wrapper.AddBuffer(m_ParVarsBufferName, 128 * sizeof(T)))) { ErrorStr(loc, "Failed to set parametric variations buffer", device.get()); break; }//Will be resized with the needed amount later. + if (b && !(b = device->m_Wrapper.AddBuffer(m_ParVarsBufferName, 128 * sizeof(T)))) { ErrorStr(loc, "Failed to set parametric variations buffer", device.get()); break; }//Will be resized with the needed amount later. - if (b && !(b = device->m_Wrapper.AddBuffer(m_DistBufferName, CHOOSE_XFORM_GRAIN))) { ErrorStr(loc, "Failed to set xforms distribution buffer", device.get()); break; }//Will be resized for xaos. + if (b && !(b = device->m_Wrapper.AddBuffer(m_DistBufferName, CHOOSE_XFORM_GRAIN))) { ErrorStr(loc, "Failed to set xforms distribution buffer", device.get()); break; }//Will be resized for xaos. - if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { ErrorStr(loc, "Failed to set cartesian to raster buffer", device.get()); break; } + if (b && !(b = device->m_Wrapper.AddBuffer(m_CarToRasBufferName, sizeof(m_CarToRasCL)))) { ErrorStr(loc, "Failed to set cartesian to raster buffer", device.get()); break; } - if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size))) { ErrorStr(loc, "Failed to set histogram buffer", device.get()); break; }//Histogram. Will memset to zero later. + if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size))) { ErrorStr(loc, "Failed to set histogram buffer", device.get()); break; }//Histogram. Will memset to zero later. - if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL)))) { ErrorStr(loc, "Failed to set points buffer", device.get()); break; }//Points between iter calls. + if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL)))) { ErrorStr(loc, "Failed to set points buffer", device.get()); break; }//Points between iter calls. + + if (m_VarStates.size()) + if (b && !(b = device->m_Wrapper.AddBuffer(m_VarStateBufferName, SizeOf(m_VarStates)))) { ErrorStr(loc, "Failed to set variation state buffer", device.get()); break; }//Points between iter calls. //Global shared is allocated once and written when building the kernel. } @@ -835,7 +875,7 @@ EmberStats RendererCL::Iterate(size_t iterCount, size_t temporalSamp break; } - if (b && !(b = wrapper.WriteBuffer(m_XformsBufferName, reinterpret_cast(m_XformsCL.data()), sizeof(m_XformsCL[0]) * m_XformsCL.size()))) + if (b && !(b = wrapper.WriteBuffer(m_XformsBufferName, reinterpret_cast(m_XformsCL.data()), SizeOf(m_XformsCL)))) { ErrorStr(loc, "Write xforms buffer failed", device.get()); break; @@ -853,6 +893,15 @@ EmberStats RendererCL::Iterate(size_t iterCount, size_t temporalSamp break; } + if (m_VarStates.size()) + { + if (b && !(b = wrapper.AddAndWriteBuffer(m_VarStateBufferName, reinterpret_cast(m_VarStates.data()), SizeOf(m_VarStates)))) + { + ErrorStr(loc, "Write variation state buffer failed", device.get()); + break; + } + } + if (b && !(b = wrapper.AddAndWriteImage("Palette", CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, m_PaletteFormat, m_Dmap.Size(), 1, 0, m_Dmap.m_Entries.data()))) { ErrorStr(loc, "Write palette buffer failed", device.get()); @@ -867,7 +916,7 @@ EmberStats RendererCL::Iterate(size_t iterCount, size_t temporalSamp //So set it up right before the run. if (!m_Params.second.empty()) { - if (!wrapper.AddAndWriteBuffer(m_ParVarsBufferName, m_Params.second.data(), m_Params.second.size() * sizeof(m_Params.second[0]))) + if (!wrapper.AddAndWriteBuffer(m_ParVarsBufferName, m_Params.second.data(), SizeOf(m_Params.second))) { ErrorStr(loc, "Write parametric variations buffer failed", device.get()); break; @@ -988,7 +1037,6 @@ bool RendererCL::RunIter(size_t iterCount, size_t temporalSample, si vector threadVec; std::atomic atomLaunchesRan; std::atomic atomItersRan, atomItersRemaining; - m_IterCountPerKernel = size_t(m_SubBatchPercentPerThread * m_Ember.m_SubBatchSize); size_t adjustedIterCountPerKernel = m_IterCountPerKernel; itersRan = 0; atomItersRan.store(0); @@ -999,6 +1047,9 @@ bool RendererCL::RunIter(size_t iterCount, size_t temporalSample, si //If a very small number of iters is requested, and multiple devices //are present, then try to spread the launches over the devices. //Otherwise, only one device would get used. + //This also applies to when running a single device, and the requested iters per thread based on the + //sub batch size, is greater than is required to run all requested iters. This will reduce the iters + //per thread to the appropriate value. //Note that this can lead to doing a few more iterations than requested //due to rounding up to ~32k kernel threads per launch. if (m_Devices.size() >= launches) @@ -1057,6 +1108,9 @@ bool RendererCL::RunIter(size_t iterCount, size_t temporalSample, si if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_HistBufferName))) { ErrorStr(loc, "Setting histogram buffer argument failed", m_Devices[dev].get()); }//Histogram. + if (!m_VarStates.empty()) + if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_VarStateBufferName))) { ErrorStr(loc, "Setting variation state buffer argument failed", m_Devices[dev].get()); }//Variation state. + if (b && !(b = wrapper.SetArg (kernelIndex, argIndex++, histSuperSize))) { ErrorStr(loc, "Setting histogram size argument failed", m_Devices[dev].get()); }//Histogram size. if (b && !(b = wrapper.SetImageArg (kernelIndex, argIndex++, false, "Palette"))) { ErrorStr(loc, "Setting palette argument failed", m_Devices[dev].get()); }//Palette. diff --git a/Source/EmberCL/RendererCL.h b/Source/EmberCL/RendererCL.h index 1a8c3bc..e54cdd2 100644 --- a/Source/EmberCL/RendererCL.h +++ b/Source/EmberCL/RendererCL.h @@ -141,6 +141,7 @@ public: #ifdef TEST_CL bool WriteRandomPoints(size_t device); #endif + void InitStateVec(); void SubBatchPercentPerThread(float f); float SubBatchPercentPerThread() const; const string& IterKernel() const; @@ -238,6 +239,7 @@ private: string m_AccumBufferName = "Accum"; string m_FinalImageName = "Final"; string m_PointsBufferName = "Points"; + string m_VarStateBufferName = "VarState"; //Kernels. string m_IterKernel; @@ -258,6 +260,7 @@ private: FinalAccumOpenCLKernelCreator m_FinalAccumOpenCLKernelCreator; pair> m_Params; pair> m_GlobalShared; + vector m_VarStates; vector> m_Devices; Ember m_LastBuiltEmber; }; diff --git a/Source/EmberCommon/EmberCommon.h b/Source/EmberCommon/EmberCommon.h index 6cd7e9e..9bcbf01 100644 --- a/Source/EmberCommon/EmberCommon.h +++ b/Source/EmberCommon/EmberCommon.h @@ -877,12 +877,12 @@ static vector*> FindVarsWithout(const vectorThe vector of xforms to add /// True to preserve xaos else false. template -static void AddXformsWithXaos(Ember& ember, std::vector>& xforms, bool preserveXaos) +static void AddXformsWithXaos(Ember& ember, std::vector, size_t>>& xforms, bool preserveXaos) { auto origXformCount = ember.XformCount(); for (auto& it : xforms) - ember.AddXform(it); + ember.AddXform(it.first); for (auto i = 0; i < ember.XformCount(); i++) { @@ -901,8 +901,9 @@ static void AddXformsWithXaos(Ember& ember, std::vector>& xforms, bo xf->SetXaos(j, 0); else if (!preserveXaos) xf->SetXaos(j, 1); - else if (i - origXformCount < xforms.size())//Should never be out of bounds, but just to be safe. - xf->SetXaos(j, xforms[i - origXformCount].Xaos(j - origXformCount)); + + //else if (i - origXformCount < xforms.size())//Should never be out of bounds, but just to be safe. + // xf->SetXaos(j, xforms[i - origXformCount].first.Xaos(j - origXformCount)); } } } diff --git a/Source/Fractorium/Fractorium.ui b/Source/Fractorium/Fractorium.ui index cf2c070..5f85d85 100644 --- a/Source/Fractorium/Fractorium.ui +++ b/Source/Fractorium/Fractorium.ui @@ -3212,7 +3212,7 @@ - <html><head/><body><p>Duplicate selected xforms.</p><p>If xaos is present in the flame, the duplicated xforms will be added with existing xaos preserved, else they'll just be added normally.</p></body></html> + <html><head/><body><p>Duplicate selected xforms.</p><p>If xaos is present in the flame, the duplicated xforms will be added with existing xaos preserved, or as a layer if Ctrl is pressed, else they'll just be added normally.</p></body></html> @@ -8571,7 +8571,7 @@ Paste selected x&forms - Paste copied xforms into the current flame + <html><head/><body><p>Paste copied xforms as a layer into the current flame</p></body></html> Ctrl+X, Ctrl+V diff --git a/Source/Fractorium/FractoriumEmberController.h b/Source/Fractorium/FractoriumEmberController.h index 591cda8..a67f816 100644 --- a/Source/Fractorium/FractoriumEmberController.h +++ b/Source/Fractorium/FractoriumEmberController.h @@ -596,7 +596,7 @@ private: EmberFile m_EmberFile; EmberFile m_SequenceFile; deque> m_UndoList; - vector> m_CopiedXforms; + vector, size_t>> m_CopiedXforms; Xform m_CopiedFinalXform; Affine2D m_CopiedAffine; shared_ptr> m_VariationList; diff --git a/Source/Fractorium/FractoriumMenus.cpp b/Source/Fractorium/FractoriumMenus.cpp index 836cdc6..0cedf7a 100644 --- a/Source/Fractorium/FractoriumMenus.cpp +++ b/Source/Fractorium/FractoriumMenus.cpp @@ -686,7 +686,7 @@ void FractoriumEmberController::CopySelectedXforms() if (m_Ember.IsFinalXform(xform)) m_CopiedFinalXform = *xform; else - m_CopiedXforms.push_back(*xform); + m_CopiedXforms.emplace_back(*xform, xfindex); }, eXformUpdate::UPDATE_SELECTED, false); m_Fractorium->ui.ActionPasteSelectedXforms->setEnabled(true); } diff --git a/Source/Fractorium/FractoriumXaos.cpp b/Source/Fractorium/FractoriumXaos.cpp index 004e640..10d0df8 100644 --- a/Source/Fractorium/FractoriumXaos.cpp +++ b/Source/Fractorium/FractoriumXaos.cpp @@ -264,7 +264,7 @@ void FractoriumEmberController::AddLayer(int xforms) { Update([&] { - std::vector> vec(xforms); + std::vector, size_t>> vec(xforms); AddXformsWithXaos(m_Ember, vec, false); }); diff --git a/Source/Fractorium/FractoriumXforms.cpp b/Source/Fractorium/FractoriumXforms.cpp index dd47b9c..7477162 100644 --- a/Source/Fractorium/FractoriumXforms.cpp +++ b/Source/Fractorium/FractoriumXforms.cpp @@ -209,6 +209,7 @@ void Fractorium::OnAddLinkedXformButtonClicked(bool checked) { m_Controller->Add /// /// Duplicate the specified xforms in the current ember, and set the last one as the current xform. /// If xaos is present in the ember, the duplicated xforms will be added with xaos preserved, else they'll just be added normally. +/// The manner in which xaos is preserved is altered when ctrl is pressed. /// Called when the duplicate xform button is clicked. /// Resets the rendering process. /// @@ -217,19 +218,45 @@ template void FractoriumEmberController::DuplicateXform() { bool forceFinal = m_Fractorium->HaveFinal(); - vector> vec; + bool ctrl = QGuiApplication::keyboardModifiers().testFlag(Qt::ControlModifier); + vector, size_t>> vec; vec.reserve(m_Ember.XformCount()); UpdateXform([&](Xform* xform, size_t xfindex, size_t selIndex) { - vec.push_back(*xform); + vec.emplace_back(*xform, xfindex); }, eXformUpdate::UPDATE_SELECTED_EXCEPT_FINAL, false); Update([&]() { if (m_Ember.XaosPresent()) - AddXformsWithXaos(m_Ember, vec, true); + { + if (!ctrl) + { + auto oldxfcount = m_Ember.XformCount(); + + for (auto& it : vec) + { + m_Ember.AddXform(it.first); + auto newxfcount = m_Ember.XformCount() - 1; + auto* newxform = m_Ember.GetXform(newxfcount); + + for (size_t i = 0; i < oldxfcount; i++) + { + if (auto xform = m_Ember.GetXform(i)) + { + newxform->SetXaos(i, it.first.Xaos(i)); + xform->SetXaos(newxfcount, xform->Xaos(it.second)); + } + } + } + } + else + { + AddXformsWithXaos(m_Ember, vec, true); + } + } else for (auto& it : vec) - m_Ember.AddXform(it); + m_Ember.AddXform(it.first); int index = int(m_Ember.TotalXformCount(forceFinal) - (forceFinal ? 2 : 1));//Set index to the last item before final. FillXforms(index);//Handles xaos. diff --git a/Source/Fractorium/GLEmberController.h b/Source/Fractorium/GLEmberController.h index 1b2be5b..4ca9e6f 100644 --- a/Source/Fractorium/GLEmberController.h +++ b/Source/Fractorium/GLEmberController.h @@ -119,9 +119,9 @@ public: void CalcDragTranslation(); void SetSelectedXform(Xform* xform); void DrawGrid(); - void DrawAffine(Xform* xform, bool pre, bool selected, bool hovered); - int UpdateHover(v3T& glCoords); - bool CheckXformHover(Xform* xform, v3T& glCoords, T& bestDist, bool pre, bool post); + void DrawAffine(const Xform* xform, bool pre, bool selected, bool hovered); + int UpdateHover(const v3T& glCoords); + bool CheckXformHover(const Xform* xform, const v3T& glCoords, T& bestDist, bool pre, bool post); private: v2T SnapToGrid(v2T& vec); diff --git a/Source/Fractorium/GLWidget.cpp b/Source/Fractorium/GLWidget.cpp index 34a2d22..cc0a79f 100644 --- a/Source/Fractorium/GLWidget.cpp +++ b/Source/Fractorium/GLWidget.cpp @@ -1519,7 +1519,7 @@ void GLEmberController::DrawGrid() /// True if selected (draw enclosing circle), else false (only draw axes). /// True if the xform is being hovered over (draw tansparent disc), else false (no disc). template -void GLEmberController::DrawAffine(Xform* xform, bool pre, bool selected, bool hovered) +void GLEmberController::DrawAffine(const Xform* xform, bool pre, bool selected, bool hovered) { auto ember = m_FractoriumEmberController->CurrentEmber(); auto final = ember->IsFinalXform(xform); @@ -1755,7 +1755,7 @@ void GLWidget::DrawAffineHelper(int index, float circleWidth, float lineWidth, b /// The mouse raster coordinates to check /// The index of the xform being hovered over, else -1 if no hover. template -int GLEmberController::UpdateHover(v3T& glCoords) +int GLEmberController::UpdateHover(const v3T& glCoords) { bool pre = m_Fractorium->ui.PreAffineGroupBox->isChecked(); bool post = m_Fractorium->ui.PostAffineGroupBox->isChecked(); @@ -1838,7 +1838,7 @@ int GLEmberController::UpdateHover(v3T& glCoords) /// True to check post affine, else don't. /// True if hovering and the distance is smaller than the bestDist parameter template -bool GLEmberController::CheckXformHover(Xform* xform, v3T& glCoords, T& bestDist, bool pre, bool post) +bool GLEmberController::CheckXformHover(const Xform* xform, const v3T& glCoords, T& bestDist, bool pre, bool post) { bool preFound = false, postFound = false; T dist = 0, scale = m_FractoriumEmberController->AffineScaleCurrentToLocked(); diff --git a/package-linux.sh b/package-linux.sh index 7c704e3..6730985 100755 --- a/package-linux.sh +++ b/package-linux.sh @@ -122,6 +122,7 @@ tar --exclude='package-linux.sh' \ ./Data/Quartz_Varieties.ugr \ ./Data/dark_linux.qss \ ./Data/lightdark.qss \ + ./Data/uranium.qss \ . [ $? -ne 0 ] && echo "Tar command failed." && exit 2