mirror of
				https://github.com/stevenrobertson/cuburn.git
				synced 2025-11-02 09:20:43 -05:00 
			
		
		
		
	Merge memory transaction for slightly less smashing
This commit is contained in:
		@ -61,9 +61,9 @@ void apply_xf{{xfid}}(float *ix, float *iy, float *icolor, mwc_st *rctx) {
 | 
				
			|||||||
        return tmpl.substitute(g)
 | 
					        return tmpl.substitute(g)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _iterbody(self):
 | 
					    def _iterbody(self):
 | 
				
			||||||
        tmpl = Template("""
 | 
					        tmpl = Template(r"""
 | 
				
			||||||
__global__
 | 
					__global__
 | 
				
			||||||
void iter(mwc_st *msts, iter_info *infos, float *accbuf, float *denbuf) {
 | 
					void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
 | 
				
			||||||
    mwc_st rctx = msts[gtid()];
 | 
					    mwc_st rctx = msts[gtid()];
 | 
				
			||||||
    iter_info *info_glob = &(infos[blockIdx.x]);
 | 
					    iter_info *info_glob = &(infos[blockIdx.x]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -139,10 +139,12 @@ void iter(mwc_st *msts, iter_info *infos, float *accbuf, float *denbuf) {
 | 
				
			|||||||
        int i = iy * {{features.width}} + ix;
 | 
					        int i = iy * {{features.width}} + ix;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        float4 outcol = tex2D(palTex, color, {{packer.get('cp_step_frac')}});
 | 
					        float4 outcol = tex2D(palTex, color, {{packer.get('cp_step_frac')}});
 | 
				
			||||||
        accbuf[i*4]     += outcol.x;
 | 
					        float4 pix = accbuf[i];
 | 
				
			||||||
        accbuf[i*4+1]   += outcol.y;
 | 
					        pix.x += outcol.x;
 | 
				
			||||||
        accbuf[i*4+2]   += outcol.z;
 | 
					        pix.y += outcol.y;
 | 
				
			||||||
        accbuf[i*4+3]   += outcol.w;
 | 
					        pix.z += outcol.z;
 | 
				
			||||||
 | 
					        pix.w += outcol.w;
 | 
				
			||||||
 | 
					        accbuf[i] = pix;    // TODO: atomic operations (or better)
 | 
				
			||||||
        denbuf[i] += 1.0f;
 | 
					        denbuf[i] += 1.0f;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user