Features:

--Added panorama1 and panorama2 variations.

Bug fixes:
--crackle had a bug with Nvidia GPUs.

Code changes:
--crackle now uses real_t* for cache rather than real2. This is what was causing the bug.
--Make the local offsets array used in crackle a precalc since it's the same for all. This reduces register pressure.
--Get rid of all usages of real3, just to be safe since Nvidia doesn't like them.
--#define TOTAL_GLOBAL_SIZE_END in the OpenCL iteration kernel just for debugging purposes to see how large the parvars buffer is.
This commit is contained in:
Person
2017-08-16 17:33:11 -07:00
parent d6d121ac95
commit 59f5bffc3c
8 changed files with 142 additions and 28 deletions

View File

@ -176,17 +176,16 @@ FunctionMapper::FunctionMapper()
" return ratiomax;\n"
"}\n";
s_GlobalMap["SimplexNoise3D"] =
"inline real_t SimplexNoise3D(real3* v, __global real_t* p, __global real_t* grad)\n"
"inline real_t SimplexNoise3D(real4* v, __global real_t* p, __global real_t* grad)\n"
"{\n"
" real3 c[4];\n"
" real4 c[4];\n"
" real_t n = 0;\n"
" int gi[4];\n"
" real_t t;\n"
" real_t skewIn = ((*v).x + (*v).y + (*v).z) * 0.333333;\n"
" int i = (int)floor((*v).x + skewIn);\n"
" int j = (int)floor((*v).y + skewIn);\n"
" int k = (int)floor((*v).z + skewIn);\n"
" t = (i + j + k) * 0.1666666;\n"
" real_t t = (i + j + k) * 0.1666666;\n"
" real_t x0 = i - t;\n"
" real_t y0 = j - t;\n"
" real_t z0 = k - t;\n"
@ -195,7 +194,7 @@ FunctionMapper::FunctionMapper()
" c[0].z = (*v).z - z0;\n"
" int i1, j1, k1;\n"
" int i2, j2, k2;\n"
" real3 u;\n"
" real4 u;\n"
"\n"
" if (c[0].x >= c[0].y)\n"
" {\n"
@ -257,9 +256,10 @@ FunctionMapper::FunctionMapper()
"\n"
" if (t > 0)\n"
" {\n"
" u.x = grad[(gi[corner] * 3)];\n"
" u.y = grad[(gi[corner] * 3) + 1];\n"
" u.z = grad[(gi[corner] * 3) + 2];\n"
" int index = gi[corner] * 3;\n"
" u.x = grad[index];\n"
" u.y = grad[index + 1];\n"
" u.z = grad[index + 2];\n"
" t *= t;\n"
" n += t * t * (u.x * c[corner].x + u.y * c[corner].y + u.z * c[corner].z);\n"
" }\n"
@ -268,11 +268,11 @@ FunctionMapper::FunctionMapper()
" return 32.0 * n;\n"
"}\n";
s_GlobalMap["PerlinNoise3D"] =
"inline real_t PerlinNoise3D(real3* v, __global real_t* p, __global real_t* grad, real_t aScale, real_t fScale, int octaves)\n"
"inline real_t PerlinNoise3D(real4* v, __global real_t* p, __global real_t* grad, real_t aScale, real_t fScale, int octaves)\n"
"{\n"
" int i;\n"
" real_t n = 0.0, a = 1.0;\n"
" real3 u = *v;\n"
" real4 u = *v;\n"
"\n"
" for (i = 0; i < octaves; i++)\n"
" {\n"

View File

@ -703,7 +703,7 @@ void IterOpenCLKernelCreator<T>::SharedDataIndexDefines(const Ember<T>& ember, p
if (auto dataInfo = varFuncs->GetSharedData(s))///Will contain a name, pointer to data, and size of the data in units of sizeof(T).
{
if (doString)
os << "#define " << ToUpper(name) << " " << offset << "\n";
os << "#define " << ToUpper(name) << " " << offset << '\n';
if (doVals)
params.second.insert(params.second.end(), dataInfo->first, dataInfo->first + dataInfo->second);
@ -719,7 +719,7 @@ void IterOpenCLKernelCreator<T>::SharedDataIndexDefines(const Ember<T>& ember, p
if (doString)
{
os << "\n";
os << "#define TOTAL_GLOBAL_SIZE_END " << offset << "\n\n";
params.first = os.str();
}
}