mirror of
				https://github.com/stevenrobertson/cuburn.git
				synced 2025-11-03 18:00:55 -05:00 
			
		
		
		
	'fill_dptr' instead of 'zero_dptr'
This commit is contained in:
		@ -90,10 +90,10 @@ uint32_t trunca(float f) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__global__
 | 
					__global__
 | 
				
			||||||
void zero_dptr(float* dptr, int size) {
 | 
					void fill_dptr(uint32_t* dptr, int size, uint32_t value) {
 | 
				
			||||||
    int i = (gridDim.x * blockIdx.y + blockIdx.x) * blockDim.x + threadIdx.x;
 | 
					    int i = (gridDim.x * blockIdx.y + blockIdx.x) * blockDim.x + threadIdx.x;
 | 
				
			||||||
    if (i < size) {
 | 
					    if (i < size) {
 | 
				
			||||||
        dptr[i] = 0.0f;
 | 
					        dptr[i] = value;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -185,14 +185,21 @@ float3 hsv2rgb(float3 hsv) {
 | 
				
			|||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def zero_dptr(mod, dptr, size, stream=None):
 | 
					    def fill_dptr(mod, dptr, size, stream=None, value=np.uint32(0)):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        A memory zeroer which can be embedded in a stream. Size is the
 | 
					        A memory zeroer which can be embedded in a stream, unlike the various
 | 
				
			||||||
        number of 4-byte words in the pointer.
 | 
					        memset routines. Size is the number of 4-byte words in the pointer;
 | 
				
			||||||
 | 
					        value is the word to fill it with. If value is not an np.uint32, it
 | 
				
			||||||
 | 
					        will be coerced to a buffer and the first four bytes taken.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        zero = mod.get_function("zero_dptr")
 | 
					        fill = mod.get_function("fill_dptr")
 | 
				
			||||||
 | 
					        if not isinstance(value, np.uint32):
 | 
				
			||||||
 | 
					            if isinstance(value, int):
 | 
				
			||||||
 | 
					                value = np.uint32(value)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                value = np.frombuffer(buffer(value), np.uint32)[0]
 | 
				
			||||||
        blocks = int(np.ceil(np.sqrt(size / 1024 + 1)))
 | 
					        blocks = int(np.ceil(np.sqrt(size / 1024 + 1)))
 | 
				
			||||||
        zero(dptr, np.int32(size), stream=stream,
 | 
					        fill(dptr, np.int32(size), value, stream=stream,
 | 
				
			||||||
             block=(1024, 1, 1), grid=(blocks, blocks, 1))
 | 
					             block=(1024, 1, 1), grid=(blocks, blocks, 1))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -166,7 +166,7 @@ class Renderer(object):
 | 
				
			|||||||
        iter_fun = self.mod.get_function("iter")
 | 
					        iter_fun = self.mod.get_function("iter")
 | 
				
			||||||
        #iter_fun.set_cache_config(cuda.func_cache.PREFER_L1)
 | 
					        #iter_fun.set_cache_config(cuda.func_cache.PREFER_L1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        util.BaseCode.zero_dptr(self.mod, d_accum, 4 * nbins, filt_stream)
 | 
					        util.BaseCode.fill_dptr(self.mod, d_accum, 4 * nbins, filt_stream)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        last_time = times[0][0]
 | 
					        last_time = times[0][0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -221,9 +221,9 @@ class Renderer(object):
 | 
				
			|||||||
                yield last_time, self._trim(h_out)
 | 
					                yield last_time, self._trim(h_out)
 | 
				
			||||||
                last_time = start
 | 
					                last_time = start
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            util.BaseCode.zero_dptr(self.mod, d_out, 4 * nbins, filt_stream)
 | 
					            util.BaseCode.fill_dptr(self.mod, d_out, 4 * nbins, filt_stream)
 | 
				
			||||||
            self._de.invoke(self.mod, cen_cp, d_accum, d_out, filt_stream)
 | 
					            self._de.invoke(self.mod, cen_cp, d_accum, d_out, filt_stream)
 | 
				
			||||||
            util.BaseCode.zero_dptr(self.mod, d_accum, 4 * nbins, filt_stream)
 | 
					            util.BaseCode.fill_dptr(self.mod, d_accum, 4 * nbins, filt_stream)
 | 
				
			||||||
            filter_done_event = cuda.Event().record(filt_stream)
 | 
					            filter_done_event = cuda.Event().record(filt_stream)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            f32 = np.float32
 | 
					            f32 = np.float32
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user