mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Use consts for image size instead of immediates.
This saves us from having to recompile if the frame size changes.
This commit is contained in:
parent
084a65c615
commit
b73461132c
@ -129,6 +129,13 @@ texture<uchar4, cudaTextureType2D, cudaReadModeNormalizedFloat> palTex;
|
||||
__shared__ iter_params params;
|
||||
__device__ int rb_head, rb_tail, rb_size;
|
||||
|
||||
typedef struct {
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t stride;
|
||||
} acc_size_t;
|
||||
__constant__ acc_size_t acc_size;
|
||||
|
||||
"""
|
||||
|
||||
def _xfbody(self, xfid, xform):
|
||||
@ -330,14 +337,14 @@ void iter(
|
||||
|
||||
uint32_t ix = trunca(cx), iy = trunca(cy);
|
||||
|
||||
if (ix >= {{info.acc_width}} || iy >= {{info.acc_height}}) {
|
||||
if (ix >= acc_size.width || iy >= acc_size.height) {
|
||||
{{if info.acc_mode == 'deferred'}}
|
||||
*log = 0xffffffff;
|
||||
{{endif}}
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t i = iy * {{info.acc_stride}} + ix;
|
||||
uint32_t i = iy * acc_size.stride + ix;
|
||||
|
||||
{{if info.acc_mode == 'atomic'}}
|
||||
float4 outcol = tex2D(palTex, cc, time_frac);
|
||||
|
@ -132,6 +132,10 @@ class Renderer(object):
|
||||
d_accum = cuda.mem_alloc(16 * nbins)
|
||||
d_out = cuda.mem_alloc(16 * nbins)
|
||||
|
||||
acc_size = np.array([info.acc_width, info.acc_height, info.acc_stride])
|
||||
d_acc_size = self.mod.get_global('acc_size')[0]
|
||||
cuda.memcpy_htod_async(d_acc_size, np.uint32(acc_size), write_stream)
|
||||
|
||||
if info.acc_mode == 'deferred':
|
||||
# Having a fixed, power-of-two log size makes things much easier
|
||||
log_size = 64 << 20
|
||||
|
Loading…
Reference in New Issue
Block a user