mirror of
				https://github.com/stevenrobertson/cuburn.git
				synced 2025-11-03 18:00:55 -05:00 
			
		
		
		
	Correct dither fail.
This commit is contained in:
		@ -221,14 +221,13 @@ class GenomePacker(HunkOCode):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
__global__
 | 
					__global__
 | 
				
			||||||
void interp_{{tname}}(
 | 
					void interp_{{tname}}(
 | 
				
			||||||
        {{tname}}* out, mwc_st *rctxes,
 | 
					        {{tname}}* out,
 | 
				
			||||||
        const float *times, const float *knots,
 | 
					        const float *times, const float *knots,
 | 
				
			||||||
        float tstart, float tstep, int maxid
 | 
					        float tstart, float tstep, int maxid
 | 
				
			||||||
) {
 | 
					) {
 | 
				
			||||||
    int id = gtid();
 | 
					    int id = gtid();
 | 
				
			||||||
    if (id >= maxid) return;
 | 
					    if (id >= maxid) return;
 | 
				
			||||||
    out = &out[id];
 | 
					    out = &out[id];
 | 
				
			||||||
    mwc_st rctx = rctxes[id];
 | 
					 | 
				
			||||||
    float time = tstart + id * tstep;
 | 
					    float time = tstart + id * tstep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    float *outf = reinterpret_cast<float*>(out);
 | 
					    float *outf = reinterpret_cast<float*>(out);
 | 
				
			||||||
 | 
				
			|||||||
@ -70,23 +70,15 @@ def precalc_camera(pcam):
 | 
				
			|||||||
        float cenx = {{pre_cam.center.x}}, ceny = {{pre_cam.center.y}};
 | 
					        float cenx = {{pre_cam.center.x}}, ceny = {{pre_cam.center.y}};
 | 
				
			||||||
        float scale = {{pre_cam.scale}} * acc_size.width;
 | 
					        float scale = {{pre_cam.scale}} * acc_size.width;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        float ditherwidth = {{pre_cam.dither_width}} * 0.33f;
 | 
					 | 
				
			||||||
        float u0 = mwc_next_01(rctx);
 | 
					 | 
				
			||||||
        float r = ditherwidth * sqrtf(-2.0f * log2f(u0) / M_LOG2E);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        float u1 = 2.0f * M_PI * mwc_next_01(rctx);
 | 
					 | 
				
			||||||
        float ditherx = r * cos(u1);
 | 
					 | 
				
			||||||
        float dithery = r * sin(u1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        {{pre_cam._set('xx')}} = scale * rotcos;
 | 
					        {{pre_cam._set('xx')}} = scale * rotcos;
 | 
				
			||||||
        {{pre_cam._set('xy')}} = scale * -rotsin;
 | 
					        {{pre_cam._set('xy')}} = scale * -rotsin;
 | 
				
			||||||
        {{pre_cam._set('xo')}} = scale * (rotsin * ceny - rotcos * cenx)
 | 
					        {{pre_cam._set('xo')}} = scale * (rotsin * ceny - rotcos * cenx)
 | 
				
			||||||
                               + 0.5f * acc_size.awidth + ditherx;
 | 
					                               + 0.5f * acc_size.awidth;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        {{pre_cam._set('yx')}} = scale * rotsin;
 | 
					        {{pre_cam._set('yx')}} = scale * rotsin;
 | 
				
			||||||
        {{pre_cam._set('yy')}} = scale * rotcos;
 | 
					        {{pre_cam._set('yy')}} = scale * rotcos;
 | 
				
			||||||
        {{pre_cam._set('yo')}} = scale * -(rotsin * cenx + rotcos * ceny)
 | 
					        {{pre_cam._set('yo')}} = scale * -(rotsin * cenx + rotcos * ceny)
 | 
				
			||||||
                               + 0.5f * acc_size.aheight + dithery;
 | 
					                               + 0.5f * acc_size.aheight;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    """).substitute(locals()))
 | 
					    """).substitute(locals()))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -203,13 +195,24 @@ void iter(
 | 
				
			|||||||
            reinterpret_cast<const float*>(global_params)[i];
 | 
					            reinterpret_cast<const float*>(global_params)[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    __shared__ int rb_idx;
 | 
					    __shared__ int rb_idx;
 | 
				
			||||||
    if (threadIdx.x == 1 && threadIdx.y == 1)
 | 
					    if (threadIdx.y == 1 && threadIdx.x == 1)
 | 
				
			||||||
        rb_idx = 32 * blockDim.y * (atomicAdd(&rb_head, 1) % rb_size);
 | 
					        rb_idx = 32 * blockDim.y * (atomicAdd(&rb_head, 1) % rb_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    __syncthreads();
 | 
					    __syncthreads();
 | 
				
			||||||
    int this_rb_idx = rb_idx + threadIdx.x + 32 * threadIdx.y;
 | 
					    int this_rb_idx = rb_idx + threadIdx.x + 32 * threadIdx.y;
 | 
				
			||||||
    mwc_st rctx = msts[this_rb_idx];
 | 
					    mwc_st rctx = msts[this_rb_idx];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    {{precalc_camera(pcp.camera)}}
 | 
				
			||||||
 | 
					    if (threadIdx.y == 5 && threadIdx.x == 4) {
 | 
				
			||||||
 | 
					        float ditherwidth = {{pcp.camera.dither_width}} * 0.5f;
 | 
				
			||||||
 | 
					        float u0 = mwc_next_01(rctx);
 | 
				
			||||||
 | 
					        float r = ditherwidth * sqrtf(-2.0f * log2f(u0) / M_LOG2E);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        float u1 = 2.0f * M_PI * mwc_next_01(rctx);
 | 
				
			||||||
 | 
					        {{pcp.camera.xo}} += r * cos(u1);
 | 
				
			||||||
 | 
					        {{pcp.camera.yo}} += r * sin(u1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{{if info.acc_mode == 'global'}}
 | 
					{{if info.acc_mode == 'global'}}
 | 
				
			||||||
    __shared__ float time_frac;
 | 
					    __shared__ float time_frac;
 | 
				
			||||||
    time_frac = blockIdx.x / (float) gridDim.x;
 | 
					    time_frac = blockIdx.x / (float) gridDim.x;
 | 
				
			||||||
@ -332,8 +335,6 @@ void iter(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        float cx, cy, cc;
 | 
					        float cx, cy, cc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        {{precalc_camera(pcp.camera)}}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
{{if 'final' in cp.xforms}}
 | 
					{{if 'final' in cp.xforms}}
 | 
				
			||||||
        {{apply_affine('fx', 'fy', 'cx', 'cy', pcp.camera)}}
 | 
					        {{apply_affine('fx', 'fy', 'cx', 'cy', pcp.camera)}}
 | 
				
			||||||
        cc = fcolor;
 | 
					        cc = fcolor;
 | 
				
			||||||
 | 
				
			|||||||
@ -293,7 +293,7 @@ class Renderer(object):
 | 
				
			|||||||
                        block=(256,1,1), grid=(self.palette_height,1),
 | 
					                        block=(256,1,1), grid=(self.palette_height,1),
 | 
				
			||||||
                        stream=write_stream)
 | 
					                        stream=write_stream)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            packer_fun(d_infos, d_seeds, d_genome_times, d_genome_knots,
 | 
					            packer_fun(d_infos, d_genome_times, d_genome_knots,
 | 
				
			||||||
                       f32(ts), f32(td / ntemporal_samples),
 | 
					                       f32(ts), f32(td / ntemporal_samples),
 | 
				
			||||||
                       i32(ntemporal_samples), block=(256,1,1),
 | 
					                       i32(ntemporal_samples), block=(256,1,1),
 | 
				
			||||||
                       grid=(int(np.ceil(ntemporal_samples/256.)),1),
 | 
					                       grid=(int(np.ceil(ntemporal_samples/256.)),1),
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user