mirror of
				https://github.com/stevenrobertson/cuburn.git
				synced 2025-11-03 18:00:55 -05:00 
			
		
		
		
	Use consts for image size instead of immediates.
This saves us from having to recompile if the frame size changes.
This commit is contained in:
		@ -129,6 +129,13 @@ texture<uchar4, cudaTextureType2D, cudaReadModeNormalizedFloat> palTex;
 | 
				
			|||||||
__shared__ iter_params params;
 | 
					__shared__ iter_params params;
 | 
				
			||||||
__device__ int rb_head, rb_tail, rb_size;
 | 
					__device__ int rb_head, rb_tail, rb_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef struct {
 | 
				
			||||||
 | 
					    uint32_t width;
 | 
				
			||||||
 | 
					    uint32_t height;
 | 
				
			||||||
 | 
					    uint32_t stride;
 | 
				
			||||||
 | 
					} acc_size_t;
 | 
				
			||||||
 | 
					__constant__ acc_size_t acc_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _xfbody(self, xfid, xform):
 | 
					    def _xfbody(self, xfid, xform):
 | 
				
			||||||
@ -330,14 +337,14 @@ void iter(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        uint32_t ix = trunca(cx), iy = trunca(cy);
 | 
					        uint32_t ix = trunca(cx), iy = trunca(cy);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (ix >= {{info.acc_width}} || iy >= {{info.acc_height}}) {
 | 
					        if (ix >= acc_size.width || iy >= acc_size.height) {
 | 
				
			||||||
{{if info.acc_mode == 'deferred'}}
 | 
					{{if info.acc_mode == 'deferred'}}
 | 
				
			||||||
            *log = 0xffffffff;
 | 
					            *log = 0xffffffff;
 | 
				
			||||||
{{endif}}
 | 
					{{endif}}
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        uint32_t i = iy * {{info.acc_stride}} + ix;
 | 
					        uint32_t i = iy * acc_size.stride + ix;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{{if info.acc_mode == 'atomic'}}
 | 
					{{if info.acc_mode == 'atomic'}}
 | 
				
			||||||
        float4 outcol = tex2D(palTex, cc, time_frac);
 | 
					        float4 outcol = tex2D(palTex, cc, time_frac);
 | 
				
			||||||
 | 
				
			|||||||
@ -132,6 +132,10 @@ class Renderer(object):
 | 
				
			|||||||
        d_accum = cuda.mem_alloc(16 * nbins)
 | 
					        d_accum = cuda.mem_alloc(16 * nbins)
 | 
				
			||||||
        d_out = cuda.mem_alloc(16 * nbins)
 | 
					        d_out = cuda.mem_alloc(16 * nbins)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        acc_size = np.array([info.acc_width, info.acc_height, info.acc_stride])
 | 
				
			||||||
 | 
					        d_acc_size = self.mod.get_global('acc_size')[0]
 | 
				
			||||||
 | 
					        cuda.memcpy_htod_async(d_acc_size, np.uint32(acc_size), write_stream)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if info.acc_mode == 'deferred':
 | 
					        if info.acc_mode == 'deferred':
 | 
				
			||||||
            # Having a fixed, power-of-two log size makes things much easier
 | 
					            # Having a fixed, power-of-two log size makes things much easier
 | 
				
			||||||
            log_size = 64 << 20
 | 
					            log_size = 64 << 20
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user