mirror of
https://bitbucket.org/mfeemster/fractorium.git
synced 2025-02-01 18:40:12 -05:00
--User changes
-crackle is now 5-10 times faster as a result of using cached values as is done on the CPU. These were previously omitted due to a bug elsewhere in the code that was erroneously attributed to the crackle caching mechanism. -Report precision in command line programs.
This commit is contained in:
parent
0fbea60026
commit
d639921d68
@ -3768,24 +3768,29 @@ public:
|
|||||||
|
|
||||||
virtual string OpenCLFuncsString() const override
|
virtual string OpenCLFuncsString() const override
|
||||||
{
|
{
|
||||||
//CPU version uses a cache of points if the abs() values are <= 10. However, this crashes on Nvidia GPUs.
|
ostringstream os;
|
||||||
//The problem was traced to the usage of the cache array.
|
os <<
|
||||||
//No possible solution was found, so it is unused here.
|
"static void Position(__constant real2* cache, __global real_t* p, __global real_t* grad, int x, int y, real_t z, real_t s, real_t d, real2* v)\n"
|
||||||
//The full calculation is recomputed for every point.
|
"{\n"
|
||||||
return
|
" if (abs(x) <= " << CACHE_NUM << " && abs(y) <= " << CACHE_NUM << ")\n"
|
||||||
"static void Position(__global real_t* p, __global real_t* grad, int x, int y, real_t z, real_t s, real_t d, real2* v)\n"
|
" {\n"
|
||||||
"{\n"
|
" *v = cache[((x + " << CACHE_NUM << ") * " << CACHE_WIDTH << ") + (y + " << CACHE_NUM << ")];\n"
|
||||||
" real3 e, f;\n"
|
" }\n"
|
||||||
" e.x = x * 2.5;\n"
|
" else\n"
|
||||||
" e.y = y * 2.5;\n"
|
" {\n"
|
||||||
" e.z = z * 2.5;\n"
|
" real3 e, f;\n"
|
||||||
" f.x = y * 2.5 + 30.2;\n"
|
" e.x = x * 2.5;\n"
|
||||||
" f.y = x * 2.5 - 12.1;\n"
|
" e.y = y * 2.5;\n"
|
||||||
" f.z = z * 2.5 + 19.8;\n"
|
" e.z = z * 2.5;\n"
|
||||||
" (*v).x = (x + d * SimplexNoise3D(&e, p, grad)) * s;\n"
|
" f.x = y * 2.5 + 30.2;\n"
|
||||||
" (*v).y = (y + d * SimplexNoise3D(&f, p, grad)) * s;\n"
|
" f.y = x * 2.5 - 12.1;\n"
|
||||||
"}\n"
|
" f.z = z * 2.5 + 19.8;\n"
|
||||||
"\n";
|
" (*v).x = (x + d * SimplexNoise3D(&e, p, grad)) * s;\n"
|
||||||
|
" (*v).y = (y + d * SimplexNoise3D(&f, p, grad)) * s;\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
"\n";
|
||||||
|
return os.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual string OpenCLString() const override
|
virtual string OpenCLString() const override
|
||||||
@ -3800,6 +3805,7 @@ public:
|
|||||||
string scale = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
|
string scale = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
|
||||||
string z = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
|
string z = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
|
||||||
string halfCellSize = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
|
string halfCellSize = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
|
||||||
|
string cache = "parVars[" + ToUpper(m_Params[i++].Name()) + index;
|
||||||
ss << "\t{\n"
|
ss << "\t{\n"
|
||||||
<< "\t\tint di = -1, dj = -1;\n"
|
<< "\t\tint di = -1, dj = -1;\n"
|
||||||
<< "\t\tint i = 0;\n"
|
<< "\t\tint i = 0;\n"
|
||||||
@ -3822,7 +3828,7 @@ public:
|
|||||||
<< "\t\t{\n"
|
<< "\t\t{\n"
|
||||||
<< "\t\t for (dj = -1; dj < 2; dj++)\n"
|
<< "\t\t for (dj = -1; dj < 2; dj++)\n"
|
||||||
<< "\t\t {\n"
|
<< "\t\t {\n"
|
||||||
<< "\t\t Position(globalShared + NOISE_INDEX, globalShared + NOISE_POINTS, cv.x + di, cv.y + dj, " << z << ", " << halfCellSize << ", " << distort << ", &p[i]);\n"
|
<< "\t\t Position((__constant real2*)(&" << cache << "), globalShared + NOISE_INDEX, globalShared + NOISE_POINTS, cv.x + di, cv.y + dj, " << z << ", " << halfCellSize << ", " << distort << ", &p[i]); \n"
|
||||||
<< "\t\t i++;\n"
|
<< "\t\t i++;\n"
|
||||||
<< "\t\t }\n"
|
<< "\t\t }\n"
|
||||||
<< "\t\t}\n"
|
<< "\t\t}\n"
|
||||||
@ -3838,7 +3844,7 @@ public:
|
|||||||
<< "\t\t{\n"
|
<< "\t\t{\n"
|
||||||
<< "\t\t for (dj = -1; dj < 2; dj++)\n"
|
<< "\t\t for (dj = -1; dj < 2; dj++)\n"
|
||||||
<< "\t\t {\n"
|
<< "\t\t {\n"
|
||||||
<< "\t\t Position(globalShared + NOISE_INDEX, globalShared + NOISE_POINTS, cv.x + di, cv.y + dj, " << z << ", " << halfCellSize << ", " << distort << ", &p[i]);\n"
|
<< "\t\t Position((__constant real2*)(&" << cache << "), globalShared + NOISE_INDEX, globalShared + NOISE_POINTS, cv.x + di, cv.y + dj, " << z << ", " << halfCellSize << ", " << distort << ", &p[i]);\n"
|
||||||
<< "\t\t i++;\n"
|
<< "\t\t i++;\n"
|
||||||
<< "\t\t }\n"
|
<< "\t\t }\n"
|
||||||
<< "\t\t}\n"
|
<< "\t\t}\n"
|
||||||
@ -3877,6 +3883,7 @@ protected:
|
|||||||
m_Params.push_back(ParamWithName<T>(&m_Scale, prefix + "crackle_scale", 1));
|
m_Params.push_back(ParamWithName<T>(&m_Scale, prefix + "crackle_scale", 1));
|
||||||
m_Params.push_back(ParamWithName<T>(&m_Z, prefix + "crackle_z"));
|
m_Params.push_back(ParamWithName<T>(&m_Z, prefix + "crackle_z"));
|
||||||
m_Params.push_back(ParamWithName<T>(true, &m_HalfCellSize, prefix + "crackle_half_cellsize"));
|
m_Params.push_back(ParamWithName<T>(true, &m_HalfCellSize, prefix + "crackle_half_cellsize"));
|
||||||
|
m_Params.push_back(ParamWithName<T>(true, &(m_C[0][0].x), prefix + "crackle_cache", sizeof(m_C)));
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -24,6 +24,7 @@ bool EmberAnimate(EmberOptions& opt)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VerbosePrint("Using " << (sizeof(T) == sizeof(float) ? "single" : "double") << " precision.");
|
||||||
//Regular variables.
|
//Regular variables.
|
||||||
Timing t;
|
Timing t;
|
||||||
bool unsorted = false;
|
bool unsorted = false;
|
||||||
|
@ -97,6 +97,7 @@ bool EmberGenome(EmberOptions& opt)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VerbosePrint("Using " << (sizeof(T) == sizeof(float) ? "single" : "double") << " precision.");
|
||||||
//Regular variables.
|
//Regular variables.
|
||||||
Timing t;
|
Timing t;
|
||||||
bool exactTimeMatch, randomMode, didColor, seqFlag;
|
bool exactTimeMatch, randomMode, didColor, seqFlag;
|
||||||
|
@ -26,6 +26,7 @@ bool EmberRender(EmberOptions& opt)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VerbosePrint("Using " << (sizeof(T) == sizeof(float) ? "single" : "double") << " precision.");
|
||||||
Timing t;
|
Timing t;
|
||||||
bool writeSuccess = false;
|
bool writeSuccess = false;
|
||||||
byte* finalImagep;
|
byte* finalImagep;
|
||||||
|
Loading…
Reference in New Issue
Block a user