mirror of
				https://github.com/stevenrobertson/cuburn.git
				synced 2025-11-03 18:00:55 -05:00 
			
		
		
		
	Use one dither offset per block.
This commit is contained in:
		@ -110,6 +110,16 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
 | 
				
			|||||||
        cosel[threadIdx.x] = mwc_next_01(rctx);
 | 
					        cosel[threadIdx.x] = mwc_next_01(rctx);
 | 
				
			||||||
    {{endif}}
 | 
					    {{endif}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (threadIdx.y == 1 && threadIdx.x == 0) {
 | 
				
			||||||
 | 
					        float ditherwidth = {{packer.get("0.33 * cp.spatial_filter_radius")}};
 | 
				
			||||||
 | 
					        float u0 = mwc_next_01(rctx);
 | 
				
			||||||
 | 
					        float r = ditherwidth * sqrt(-2.0f * log2f(u0) / M_LOG2E);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        float u1 = 2.0f * M_PI * mwc_next_01(rctx);
 | 
				
			||||||
 | 
					        info.cam_xo += r * cos(u1);
 | 
				
			||||||
 | 
					        info.cam_yo += r * sin(u1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    __syncthreads();
 | 
					    __syncthreads();
 | 
				
			||||||
    int consec_bad = -{{features.fuse}};
 | 
					    int consec_bad = -{{features.fuse}};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -197,21 +207,9 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
 | 
				
			|||||||
        {{apply_affine('x', 'y', 'cx', 'cy', packer,
 | 
					        {{apply_affine('x', 'y', 'cx', 'cy', packer,
 | 
				
			||||||
                       'cp.camera_transform', 'cam')}}
 | 
					                       'cp.camera_transform', 'cam')}}
 | 
				
			||||||
        {{endif}}
 | 
					        {{endif}}
 | 
				
			||||||
 | 
					        uint32_t ix = trunca(cx), iy = trunca(cy);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // TODO: verify that constants get premultiplied
 | 
					        if (ix >= {{features.acc_width}} || iy >= {{features.acc_height}} ) {
 | 
				
			||||||
        float ditherwidth = {{packer.get("0.33 * cp.spatial_filter_radius")}};
 | 
					 | 
				
			||||||
        float u0 = mwc_next_01(rctx);
 | 
					 | 
				
			||||||
        float r = ditherwidth * sqrt(-2.0f * log2f(u0) / M_LOG2E);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // TODO: provide mwc_next_0_2pi()
 | 
					 | 
				
			||||||
        float u1 = 2.0f * M_PI * mwc_next_01(rctx);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        float ditherx = r * cos(u1);
 | 
					 | 
				
			||||||
        float dithery = r * sin(u1);
 | 
					 | 
				
			||||||
        int ix = trunca(cx+ditherx), iy = trunca(cy+dithery);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (ix < 0 || ix >= {{features.acc_width}} ||
 | 
					 | 
				
			||||||
            iy < 0 || iy >= {{features.acc_height}} ) {
 | 
					 | 
				
			||||||
            consec_bad++;
 | 
					            consec_bad++;
 | 
				
			||||||
            if (consec_bad > {{features.max_oob}}) {
 | 
					            if (consec_bad > {{features.max_oob}}) {
 | 
				
			||||||
                x = mwc_next_11(rctx);
 | 
					                x = mwc_next_11(rctx);
 | 
				
			||||||
@ -222,7 +220,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
 | 
				
			|||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        int i = iy * {{features.acc_stride}} + ix;
 | 
					        uint32_t i = iy * {{features.acc_stride}} + ix;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        float4 outcol = tex2D(palTex, color, {{packer.get("cp_step_frac")}});
 | 
					        float4 outcol = tex2D(palTex, color, {{packer.get("cp_step_frac")}});
 | 
				
			||||||
        float4 pix = accbuf[i];
 | 
					        float4 pix = accbuf[i];
 | 
				
			||||||
 | 
				
			|||||||
@ -89,9 +89,10 @@ uint32_t gtid() {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__device__
 | 
					__device__
 | 
				
			||||||
int trunca(float f) {
 | 
					uint32_t trunca(float f) {
 | 
				
			||||||
    // truncate as used in address calculations
 | 
					    // truncate as used in address calculations. note the use of a signed
 | 
				
			||||||
    int ret;
 | 
					    // conversion is intentional here (simplifies image bounds checking).
 | 
				
			||||||
 | 
					    uint32_t ret;
 | 
				
			||||||
    asm("cvt.rni.s32.f32    %0,     %1;" : "=r"(ret) : "f"(f));
 | 
					    asm("cvt.rni.s32.f32    %0,     %1;" : "=r"(ret) : "f"(f));
 | 
				
			||||||
    return ret;
 | 
					    return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user