mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
'fill_dptr' instead of 'zero_dptr'
This commit is contained in:
parent
3c1dac530b
commit
1faffa1d14
@ -90,10 +90,10 @@ uint32_t trunca(float f) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
__global__
|
__global__
|
||||||
void zero_dptr(float* dptr, int size) {
|
void fill_dptr(uint32_t* dptr, int size, uint32_t value) {
|
||||||
int i = (gridDim.x * blockIdx.y + blockIdx.x) * blockDim.x + threadIdx.x;
|
int i = (gridDim.x * blockIdx.y + blockIdx.x) * blockDim.x + threadIdx.x;
|
||||||
if (i < size) {
|
if (i < size) {
|
||||||
dptr[i] = 0.0f;
|
dptr[i] = value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,14 +185,21 @@ float3 hsv2rgb(float3 hsv) {
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def zero_dptr(mod, dptr, size, stream=None):
|
def fill_dptr(mod, dptr, size, stream=None, value=np.uint32(0)):
|
||||||
"""
|
"""
|
||||||
A memory zeroer which can be embedded in a stream. Size is the
|
A memory zeroer which can be embedded in a stream, unlike the various
|
||||||
number of 4-byte words in the pointer.
|
memset routines. Size is the number of 4-byte words in the pointer;
|
||||||
|
value is the word to fill it with. If value is not an np.uint32, it
|
||||||
|
will be coerced to a buffer and the first four bytes taken.
|
||||||
"""
|
"""
|
||||||
zero = mod.get_function("zero_dptr")
|
fill = mod.get_function("fill_dptr")
|
||||||
|
if not isinstance(value, np.uint32):
|
||||||
|
if isinstance(value, int):
|
||||||
|
value = np.uint32(value)
|
||||||
|
else:
|
||||||
|
value = np.frombuffer(buffer(value), np.uint32)[0]
|
||||||
blocks = int(np.ceil(np.sqrt(size / 1024 + 1)))
|
blocks = int(np.ceil(np.sqrt(size / 1024 + 1)))
|
||||||
zero(dptr, np.int32(size), stream=stream,
|
fill(dptr, np.int32(size), value, stream=stream,
|
||||||
block=(1024, 1, 1), grid=(blocks, blocks, 1))
|
block=(1024, 1, 1), grid=(blocks, blocks, 1))
|
||||||
|
|
||||||
|
|
||||||
|
@ -166,7 +166,7 @@ class Renderer(object):
|
|||||||
iter_fun = self.mod.get_function("iter")
|
iter_fun = self.mod.get_function("iter")
|
||||||
#iter_fun.set_cache_config(cuda.func_cache.PREFER_L1)
|
#iter_fun.set_cache_config(cuda.func_cache.PREFER_L1)
|
||||||
|
|
||||||
util.BaseCode.zero_dptr(self.mod, d_accum, 4 * nbins, filt_stream)
|
util.BaseCode.fill_dptr(self.mod, d_accum, 4 * nbins, filt_stream)
|
||||||
|
|
||||||
last_time = times[0][0]
|
last_time = times[0][0]
|
||||||
|
|
||||||
@ -221,9 +221,9 @@ class Renderer(object):
|
|||||||
yield last_time, self._trim(h_out)
|
yield last_time, self._trim(h_out)
|
||||||
last_time = start
|
last_time = start
|
||||||
|
|
||||||
util.BaseCode.zero_dptr(self.mod, d_out, 4 * nbins, filt_stream)
|
util.BaseCode.fill_dptr(self.mod, d_out, 4 * nbins, filt_stream)
|
||||||
self._de.invoke(self.mod, cen_cp, d_accum, d_out, filt_stream)
|
self._de.invoke(self.mod, cen_cp, d_accum, d_out, filt_stream)
|
||||||
util.BaseCode.zero_dptr(self.mod, d_accum, 4 * nbins, filt_stream)
|
util.BaseCode.fill_dptr(self.mod, d_accum, 4 * nbins, filt_stream)
|
||||||
filter_done_event = cuda.Event().record(filt_stream)
|
filter_done_event = cuda.Event().record(filt_stream)
|
||||||
|
|
||||||
f32 = np.float32
|
f32 = np.float32
|
||||||
|
Loading…
Reference in New Issue
Block a user