Features:

--Added panorama1 and panorama2 variations.

Bug fixes:
--crackle had a bug with Nvidia GPUs.

Code changes:
--crackle now uses real_t* for cache rather than real2. This is what was causing the bug.
--Make the local offsets array used in crackle a precalc since it's the same for all. This reduces register pressure.
--Get rid of all usages of real3, just to be safe since Nvidia doesn't like them.
--#define TOTAL_GLOBAL_SIZE_END in the OpenCL iteration kernel just for debugging purposes to see how large the parvars buffer is.
This commit is contained in:
Person
2017-08-16 17:33:11 -07:00
parent d6d121ac95
commit 59f5bffc3c
8 changed files with 142 additions and 28 deletions

View File

@ -703,7 +703,7 @@ void IterOpenCLKernelCreator<T>::SharedDataIndexDefines(const Ember<T>& ember, p
if (auto dataInfo = varFuncs->GetSharedData(s))///Will contain a name, pointer to data, and size of the data in units of sizeof(T).
{
if (doString)
os << "#define " << ToUpper(name) << " " << offset << "\n";
os << "#define " << ToUpper(name) << " " << offset << '\n';
if (doVals)
params.second.insert(params.second.end(), dataInfo->first, dataInfo->first + dataInfo->second);
@ -719,7 +719,7 @@ void IterOpenCLKernelCreator<T>::SharedDataIndexDefines(const Ember<T>& ember, p
if (doString)
{
os << "\n";
os << "#define TOTAL_GLOBAL_SIZE_END " << offset << "\n\n";
params.first = os.str();
}
}