mirror of
				https://bitbucket.org/mfeemster/fractorium.git
				synced 2025-11-03 17:50:27 -05:00 
			
		
		
		
	--Code changes
-Clean up Michel's PR.
This commit is contained in:
		@ -54,6 +54,13 @@ using namespace EmberNs;
 | 
				
			|||||||
//#define TEST_CL 1
 | 
					//#define TEST_CL 1
 | 
				
			||||||
//#define TEST_CL_BUFFERS 1
 | 
					//#define TEST_CL_BUFFERS 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//This special define is made to fix buggy OpenCL compilers on Mac.
 | 
				
			||||||
 | 
					//Rendering is much slower there for unknown reasons. Michel traced it down
 | 
				
			||||||
 | 
					//to the consec variable which keeps track of how many tries are needed to computer
 | 
				
			||||||
 | 
					//a point which is not a bad value. Strangely, keeping this as a local variable
 | 
				
			||||||
 | 
					//is slower than keeping it as an element in a global array.
 | 
				
			||||||
 | 
					//This is counterintuitive, and lends further weight to the idea that OpenCL on Mac
 | 
				
			||||||
 | 
					//is horribly broken.
 | 
				
			||||||
#ifdef __APPLE__
 | 
					#ifdef __APPLE__
 | 
				
			||||||
    #define KNL_USE_GLOBAL_CONSEC
 | 
						#define KNL_USE_GLOBAL_CONSEC
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
				
			|||||||
@ -359,12 +359,12 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember,
 | 
				
			|||||||
	os <<
 | 
						os <<
 | 
				
			||||||
	   "	uint histSize,\n"
 | 
						   "	uint histSize,\n"
 | 
				
			||||||
	   "	__read_only image2d_t palette,\n"
 | 
						   "	__read_only image2d_t palette,\n"
 | 
				
			||||||
       "	__global Point* points"
 | 
						   "	__global Point* points"
 | 
				
			||||||
#ifndef KNL_USE_GLOBAL_CONSEC
 | 
					#ifndef KNL_USE_GLOBAL_CONSEC
 | 
				
			||||||
       "\n"
 | 
						   "\n"
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
       ",\n"
 | 
						   ",\n"
 | 
				
			||||||
       "	__global uchar* consec\n"
 | 
						   "	__global uchar* consec\n"
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	   "\t)\n"
 | 
						   "\t)\n"
 | 
				
			||||||
	   "{\n"
 | 
						   "{\n"
 | 
				
			||||||
@ -463,38 +463,38 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember,
 | 
				
			|||||||
	   "	for (i = 0; i < itersToDo; i++)\n"
 | 
						   "	for (i = 0; i < itersToDo; i++)\n"
 | 
				
			||||||
	   "	{\n"
 | 
						   "	{\n"
 | 
				
			||||||
#ifndef KNL_USE_GLOBAL_CONSEC
 | 
					#ifndef KNL_USE_GLOBAL_CONSEC
 | 
				
			||||||
       "		consec = 0;\n"
 | 
						   "		consec = 0;\n"
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
       "		consec[blockStartThreadIndex] = 0;\n"
 | 
						   "		consec[blockStartThreadIndex] = 0;\n"
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
       "\n";
 | 
						   ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (ember.XformCount() > 1)
 | 
						if (ember.XformCount() > 1)
 | 
				
			||||||
    {
 | 
						{
 | 
				
			||||||
        //If xaos is present, the a hybrid of the cuburn method is used.
 | 
							//If xaos is present, the a hybrid of the cuburn method is used.
 | 
				
			||||||
        //This makes each thread in a row pick the same offset into a distribution, using xfsel.
 | 
							//This makes each thread in a row pick the same offset into a distribution, using xfsel.
 | 
				
			||||||
        //However, the distribution the offset is in, is determined by firstPoint.m_LastXfUsed.
 | 
							//However, the distribution the offset is in, is determined by firstPoint.m_LastXfUsed.
 | 
				
			||||||
        if (ember.XaosPresent())
 | 
							if (ember.XaosPresent())
 | 
				
			||||||
        {
 | 
							{
 | 
				
			||||||
            os <<
 | 
								os <<
 | 
				
			||||||
#ifdef STRAIGHT_RAND
 | 
					#ifdef STRAIGHT_RAND
 | 
				
			||||||
               "		secondPoint.m_LastXfUsed = xformDistributions[(MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << "u) + (" << CHOOSE_XFORM_GRAIN << "u * (firstPoint.m_LastXfUsed + 1u))];\n\n";
 | 
								   "		secondPoint.m_LastXfUsed = xformDistributions[(MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << "u) + (" << CHOOSE_XFORM_GRAIN << "u * (firstPoint.m_LastXfUsed + 1u))];\n\n";
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
               "		secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y] + (" << CHOOSE_XFORM_GRAIN << "u * (firstPoint.m_LastXfUsed + 1u))];\n\n";//Partial cuburn hybrid.
 | 
								   "		secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y] + (" << CHOOSE_XFORM_GRAIN << "u * (firstPoint.m_LastXfUsed + 1u))];\n\n";//Partial cuburn hybrid.
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
        }
 | 
							}
 | 
				
			||||||
        else
 | 
							else
 | 
				
			||||||
        {
 | 
							{
 | 
				
			||||||
            os <<
 | 
								os <<
 | 
				
			||||||
#ifdef STRAIGHT_RAND
 | 
					#ifdef STRAIGHT_RAND
 | 
				
			||||||
               "		secondPoint.m_LastXfUsed = xformDistributions[MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << "u];\n\n";//For testing, using straight rand flam4/fractron style instead of cuburn.
 | 
								   "		secondPoint.m_LastXfUsed = xformDistributions[MwcNext(&mwc) & " << CHOOSE_XFORM_GRAIN_M1 << "u];\n\n";//For testing, using straight rand flam4/fractron style instead of cuburn.
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
               "		secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y]];\n\n";
 | 
								   "		secondPoint.m_LastXfUsed = xformDistributions[xfsel[THREAD_ID_Y]];\n\n";
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
        }
 | 
							}
 | 
				
			||||||
    }
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    os <<
 | 
						os <<
 | 
				
			||||||
	   "		do\n"
 | 
						   "		do\n"
 | 
				
			||||||
	   "		{\n";
 | 
						   "		{\n";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -542,7 +542,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember,
 | 
				
			|||||||
#ifndef KNL_USE_GLOBAL_CONSEC
 | 
					#ifndef KNL_USE_GLOBAL_CONSEC
 | 
				
			||||||
	   "				consec++;\n"
 | 
						   "				consec++;\n"
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
       "				consec[blockStartThreadIndex]++;\n"
 | 
						   "				consec[blockStartThreadIndex]++;\n"
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	   //"				badvals++;\n"
 | 
						   //"				badvals++;\n"
 | 
				
			||||||
	   "			}\n"
 | 
						   "			}\n"
 | 
				
			||||||
@ -550,7 +550,7 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember,
 | 
				
			|||||||
#ifndef KNL_USE_GLOBAL_CONSEC
 | 
					#ifndef KNL_USE_GLOBAL_CONSEC
 | 
				
			||||||
	   "		while (!ok && consec < 5);\n"
 | 
						   "		while (!ok && consec < 5);\n"
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
       "		while (!ok && consec[blockStartThreadIndex] < 5);\n"
 | 
						   "		while (!ok && consec[blockStartThreadIndex] < 5);\n"
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	   "\n"
 | 
						   "\n"
 | 
				
			||||||
	   "		if (!ok)\n"
 | 
						   "		if (!ok)\n"
 | 
				
			||||||
@ -793,8 +793,8 @@ string IterOpenCLKernelCreator<T>::CreateIterKernelString(const Ember<T>& ember,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	   os <<
 | 
						   os <<
 | 
				
			||||||
       "	barrier(CLK_GLOBAL_MEM_FENCE);\n"
 | 
						   "	barrier(CLK_GLOBAL_MEM_FENCE);\n"
 | 
				
			||||||
       //"	printf(\"Global ID0: %d Global ID1: %d WorkDim: %d ThreadIndex: %d\\n\", get_global_id(0), get_global_id(1), get_work_dim(), blockStartThreadIndex);\n"
 | 
						   //"	printf(\"Global ID0: %d Global ID1: %d WorkDim: %d ThreadIndex: %d\\n\", get_global_id(0), get_global_id(1), get_work_dim(), blockStartThreadIndex);\n"
 | 
				
			||||||
	   "}\n";
 | 
						   "}\n";
 | 
				
			||||||
	return os.str();
 | 
						return os.str();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -727,9 +727,13 @@ bool RendererCL<T, bucketT>::Alloc(bool histOnly)
 | 
				
			|||||||
		if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size)))                                           { ErrorStr(loc, "Failed to set histogram buffer", device.get()); break; }//Histogram. Will memset to zero later.
 | 
							if (b && !(b = device->m_Wrapper.AddBuffer(m_HistBufferName, size)))                                           { ErrorStr(loc, "Failed to set histogram buffer", device.get()); break; }//Histogram. Will memset to zero later.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL<T>))))   { ErrorStr(loc, "Failed to set points buffer", device.get()); break; }//Points between iter calls.
 | 
							if (b && !(b = device->m_Wrapper.AddBuffer(m_PointsBufferName, IterGridKernelCount() * sizeof(PointCL<T>))))   { ErrorStr(loc, "Failed to set points buffer", device.get()); break; }//Points between iter calls.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef KNL_USE_GLOBAL_CONSEC
 | 
					#ifdef KNL_USE_GLOBAL_CONSEC
 | 
				
			||||||
        if (b && !(b = device->m_Wrapper.AddBuffer(m_ConsecBufferName, IterGridKernelCount() * sizeof(cl_uchar))))                     { ErrorStr(loc, "Failed to set consec buffer", device.get()); break; }//Global sequence.
 | 
					
 | 
				
			||||||
 | 
							if (b && !(b = device->m_Wrapper.AddBuffer(m_ConsecBufferName, IterGridKernelCount() * sizeof(cl_uchar))))                     { ErrorStr(loc, "Failed to set consec buffer", device.get()); break; }//Global sequence.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (m_VarStates.size())
 | 
							if (m_VarStates.size())
 | 
				
			||||||
			if (b && !(b = device->m_Wrapper.AddBuffer(m_VarStateBufferName, SizeOf(m_VarStates))))                        { ErrorStr(loc, "Failed to set variation state buffer", device.get()); break; }//Points between iter calls.
 | 
								if (b && !(b = device->m_Wrapper.AddBuffer(m_VarStateBufferName, SizeOf(m_VarStates))))                        { ErrorStr(loc, "Failed to set variation state buffer", device.get()); break; }//Points between iter calls.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -1087,8 +1091,8 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
 | 
				
			|||||||
			//Similar to what's done in the base class.
 | 
								//Similar to what's done in the base class.
 | 
				
			||||||
			//The number of iters per thread must be adjusted if they've requested less iters than is normally ran in a grid (256 * 256 * 64 * 2 = 32,768).
 | 
								//The number of iters per thread must be adjusted if they've requested less iters than is normally ran in a grid (256 * 256 * 64 * 2 = 32,768).
 | 
				
			||||||
			uint iterCountPerKernel = std::min<uint>(uint(adjustedIterCountPerKernel), uint(ceil(double(itersRemaining) / IterGridKernelCount())));
 | 
								uint iterCountPerKernel = std::min<uint>(uint(adjustedIterCountPerKernel), uint(ceil(double(itersRemaining) / IterGridKernelCount())));
 | 
				
			||||||
            size_t iterCountThisLaunch = iterCountPerKernel * IterGridKernelWidth() * IterGridKernelHeight();
 | 
								size_t iterCountThisLaunch = iterCountPerKernel * IterGridKernelWidth() * IterGridKernelHeight();
 | 
				
			||||||
            //cout << "itersRemaining " << itersRemaining << ", iterCountPerKernel " << iterCountPerKernel << ", iterCountThisLaunch " << iterCountThisLaunch << "\n";
 | 
								//cout << "itersRemaining " << itersRemaining << ", iterCountPerKernel " << iterCountPerKernel << ", iterCountThisLaunch " << iterCountThisLaunch << "\n";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, iterCountPerKernel)))        { ErrorStr(loc, "Setting iter count argument failed", m_Devices[dev].get()); }//Number of iters for each thread to run.
 | 
								if (b && !(b = wrapper.SetArg	   (kernelIndex, argIndex++, iterCountPerKernel)))        { ErrorStr(loc, "Setting iter count argument failed", m_Devices[dev].get()); }//Number of iters for each thread to run.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -1118,9 +1122,13 @@ bool RendererCL<T, bucketT>::RunIter(size_t iterCount, size_t temporalSample, si
 | 
				
			|||||||
			if (b && !(b = wrapper.SetImageArg (kernelIndex, argIndex++, false, "Palette")))          { ErrorStr(loc, "Setting palette argument failed", m_Devices[dev].get()); }//Palette.
 | 
								if (b && !(b = wrapper.SetImageArg (kernelIndex, argIndex++, false, "Palette")))          { ErrorStr(loc, "Setting palette argument failed", m_Devices[dev].get()); }//Palette.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_PointsBufferName)))        { ErrorStr(loc, "Setting points buffer argument failed", m_Devices[dev].get()); }//Random start points.
 | 
								if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_PointsBufferName)))        { ErrorStr(loc, "Setting points buffer argument failed", m_Devices[dev].get()); }//Random start points.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef KNL_USE_GLOBAL_CONSEC
 | 
					#ifdef KNL_USE_GLOBAL_CONSEC
 | 
				
			||||||
            if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_ConsecBufferName)))        { ErrorStr(loc, "Setting consec buffer argument failed", m_Devices[dev].get()); }//Global sequence.
 | 
					
 | 
				
			||||||
 | 
								if (b && !(b = wrapper.SetBufferArg(kernelIndex, argIndex++, m_ConsecBufferName)))        { ErrorStr(loc, "Setting consec buffer argument failed", m_Devices[dev].get()); }//Global sequence.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (b && !(b = wrapper.RunKernel(kernelIndex,
 | 
								if (b && !(b = wrapper.RunKernel(kernelIndex,
 | 
				
			||||||
											 IterGridKernelWidth(),//Total grid dims.
 | 
																 IterGridKernelWidth(),//Total grid dims.
 | 
				
			||||||
											 IterGridKernelHeight(),
 | 
																 IterGridKernelHeight(),
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user