mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 19:50:04 -05:00
Remove a sync from iter.
A small but consistent improvement.
This commit is contained in:
parent
095936666e
commit
b081bc9378
@ -100,6 +100,16 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
if (threadIdx.y == 0 && threadIdx.x == 0)
|
if (threadIdx.y == 0 && threadIdx.x == 0)
|
||||||
nsamps = {{packer.get("cp.width * cp.height / cp.ntemporal_samples * cp.adj_density")}};
|
nsamps = {{packer.get("cp.width * cp.height / cp.ntemporal_samples * cp.adj_density")}};
|
||||||
|
|
||||||
|
{{if features.chaos_used}}
|
||||||
|
int last_xf_used = 0;
|
||||||
|
{{else}}
|
||||||
|
// Size can be reduced by a factor of four using a slower 4-stage reduce
|
||||||
|
__shared__ float swap[2048];
|
||||||
|
__shared__ float cosel[16];
|
||||||
|
if (threadIdx.y == 0 && threadIdx.x < 16)
|
||||||
|
cosel[threadIdx.x] = mwc_next_01(rctx);
|
||||||
|
{{endif}}
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
int consec_bad = -{{features.fuse}};
|
int consec_bad = -{{features.fuse}};
|
||||||
|
|
||||||
@ -108,32 +118,14 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
y = mwc_next_11(rctx);
|
y = mwc_next_11(rctx);
|
||||||
color = mwc_next_01(rctx);
|
color = mwc_next_01(rctx);
|
||||||
|
|
||||||
{{if features.chaos_used}}
|
|
||||||
int last_xf_used = 0;
|
|
||||||
{{else}}
|
|
||||||
// Size can be reduced by a factor of four using a slower 4-stage reduce
|
|
||||||
__shared__ float swap[2048];
|
|
||||||
__shared__ float cosel[16];
|
|
||||||
{{endif}}
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
{{if features.chaos_used}}
|
{{if features.chaos_used}}
|
||||||
// For now, we can't use the swap buffer with chaos enabled
|
// For now, we can't use the swap buffer with chaos enabled
|
||||||
float xfsel = mwc_next_01(rctx);
|
float xfsel = mwc_next_01(rctx);
|
||||||
// Needed to match the behavior of the loop with swapping
|
|
||||||
__syncthreads();
|
|
||||||
{{else}}
|
{{else}}
|
||||||
if (threadIdx.y == 0 && threadIdx.x < 16) {
|
|
||||||
cosel[threadIdx.x] = mwc_next_01(rctx);
|
|
||||||
}
|
|
||||||
__syncthreads();
|
|
||||||
float xfsel = cosel[threadIdx.y];
|
float xfsel = cosel[threadIdx.y];
|
||||||
{{endif}}
|
{{endif}}
|
||||||
|
|
||||||
// This is moved from outside the conditional to avoid needing an extra
|
|
||||||
// __syncthreads on every loop
|
|
||||||
if (nsamps < 0) break;
|
|
||||||
|
|
||||||
{{if features.chaos_used}}
|
{{if features.chaos_used}}
|
||||||
{{for density_row_idx, prior_xform_idx in enumerate(features.std_xforms)}}
|
{{for density_row_idx, prior_xform_idx in enumerate(features.std_xforms)}}
|
||||||
{{for density_col_idx, this_xform_idx in enumerate(features.std_xforms)}}
|
{{for density_col_idx, this_xform_idx in enumerate(features.std_xforms)}}
|
||||||
@ -167,6 +159,16 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
swap[sw+1024] = y;
|
swap[sw+1024] = y;
|
||||||
swap[sw+1536] = color;
|
swap[sw+1536] = color;
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
// This is in the middle of the function so that only one sync is
|
||||||
|
// required per loop.
|
||||||
|
if (nsamps < 0) break;
|
||||||
|
|
||||||
|
{{if not features.chaos_used}}
|
||||||
|
// Similarly, we select the next xforms here.
|
||||||
|
if (threadIdx.y == 0 && threadIdx.x < 16)
|
||||||
|
cosel[threadIdx.x] = mwc_next_01(rctx);
|
||||||
|
{{endif}}
|
||||||
|
|
||||||
consec_bad = swap[sr];
|
consec_bad = swap[sr];
|
||||||
x = swap[sr+512];
|
x = swap[sr+512];
|
||||||
y = swap[sr+1024];
|
y = swap[sr+1024];
|
||||||
|
Loading…
Reference in New Issue
Block a user