mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Use C++ pass-by-reference to explicitly share.
This commit is contained in:
parent
72dbae1ebe
commit
46c6074b92
@ -29,8 +29,8 @@ __shared__ iter_info info;
|
|||||||
|
|
||||||
tmpl = Template("""
|
tmpl = Template("""
|
||||||
__device__
|
__device__
|
||||||
void apply_xf{{xfid}}(float *ix, float *iy, float *icolor, mwc_st *rctx) {
|
void apply_xf{{xfid}}(float &ox, float &oy, float &color, mwc_st &rctx) {
|
||||||
float tx, ty, ox = *ix, oy = *iy;
|
float tx, ty;
|
||||||
|
|
||||||
{{apply_affine_flam3('ox', 'oy', 'tx', 'ty', px, 'xf.c', 'pre')}}
|
{{apply_affine_flam3('ox', 'oy', 'tx', 'ty', px, 'xf.c', 'pre')}}
|
||||||
|
|
||||||
@ -55,11 +55,8 @@ void apply_xf{{xfid}}(float *ix, float *iy, float *icolor, mwc_st *rctx) {
|
|||||||
{{endif}}
|
{{endif}}
|
||||||
{{endfor}}
|
{{endfor}}
|
||||||
|
|
||||||
*ix = ox;
|
|
||||||
*iy = oy;
|
|
||||||
|
|
||||||
float csp = {{px.get('xf.color_speed')}};
|
float csp = {{px.get('xf.color_speed')}};
|
||||||
*icolor = *icolor * (1.0f - csp) + {{px.get('xf.color')}} * csp;
|
color = color * (1.0f - csp) + {{px.get('xf.color')}} * csp;
|
||||||
};
|
};
|
||||||
""")
|
""")
|
||||||
g = dict(globals())
|
g = dict(globals())
|
||||||
@ -87,9 +84,9 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
int consec_bad = -{{features.fuse}};
|
int consec_bad = -{{features.fuse}};
|
||||||
|
|
||||||
float x, y, color;
|
float x, y, color;
|
||||||
x = mwc_next_11(&rctx);
|
x = mwc_next_11(rctx);
|
||||||
y = mwc_next_11(&rctx);
|
y = mwc_next_11(rctx);
|
||||||
color = mwc_next_01(&rctx);
|
color = mwc_next_01(rctx);
|
||||||
|
|
||||||
{{if features.chaos_used}}
|
{{if features.chaos_used}}
|
||||||
int last_xf_used = 0;
|
int last_xf_used = 0;
|
||||||
@ -102,12 +99,12 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
while (1) {
|
while (1) {
|
||||||
{{if features.chaos_used}}
|
{{if features.chaos_used}}
|
||||||
// For now, we can't use the swap buffer with chaos enabled
|
// For now, we can't use the swap buffer with chaos enabled
|
||||||
float xfsel = mwc_next_01(&rctx);
|
float xfsel = mwc_next_01(rctx);
|
||||||
// Needed to match the behavior of the loop with swapping
|
// Needed to match the behavior of the loop with swapping
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
{{else}}
|
{{else}}
|
||||||
if (threadIdx.y == 0 && threadIdx.x < 16) {
|
if (threadIdx.y == 0 && threadIdx.x < 16) {
|
||||||
cosel[threadIdx.x] = mwc_next_01(&rctx);
|
cosel[threadIdx.x] = mwc_next_01(rctx);
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
float xfsel = cosel[threadIdx.y];
|
float xfsel = cosel[threadIdx.y];
|
||||||
@ -122,7 +119,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
{{for density_col_idx, this_xform_idx in enumerate(features.std_xforms)}}
|
{{for density_col_idx, this_xform_idx in enumerate(features.std_xforms)}}
|
||||||
if (last_xf_used == {{prior_xform_idx}} &&
|
if (last_xf_used == {{prior_xform_idx}} &&
|
||||||
xfsel <= {{packer.get("cp.chaos_densities[%d][%d]" % (density_row_idx, density_col_idx))}}) {
|
xfsel <= {{packer.get("cp.chaos_densities[%d][%d]" % (density_row_idx, density_col_idx))}}) {
|
||||||
apply_xf{{this_xform_idx}}(&x, &y, &color, &rctx);
|
apply_xf{{this_xform_idx}}(x, y, color, rctx);
|
||||||
last_xf_used = {{this_xform_idx}};
|
last_xf_used = {{this_xform_idx}};
|
||||||
} else
|
} else
|
||||||
{{endfor}}
|
{{endfor}}
|
||||||
@ -130,7 +127,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
{{else}}
|
{{else}}
|
||||||
{{for density_col_idx, this_xform_idx in enumerate(features.std_xforms)}}
|
{{for density_col_idx, this_xform_idx in enumerate(features.std_xforms)}}
|
||||||
if (xfsel <= {{packer.get("cp.norm_density[%d]" % (density_col_idx))}}) {
|
if (xfsel <= {{packer.get("cp.norm_density[%d]" % (density_col_idx))}}) {
|
||||||
apply_xf{{this_xform_idx}}(&x, &y, &color, &rctx);
|
apply_xf{{this_xform_idx}}(x, y, color, rctx);
|
||||||
} else
|
} else
|
||||||
{{endfor}}
|
{{endfor}}
|
||||||
{{endif}}
|
{{endif}}
|
||||||
@ -166,7 +163,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
|
|
||||||
{{if features.final_xform_index}}
|
{{if features.final_xform_index}}
|
||||||
float fx = x, fy = y, fcolor;
|
float fx = x, fy = y, fcolor;
|
||||||
apply_xf{{features.final_xform_index}}(&fx, &fy, &fcolor, &rctx);
|
apply_xf{{features.final_xform_index}}(fx, fy, fcolor, rctx);
|
||||||
{{endif}}
|
{{endif}}
|
||||||
|
|
||||||
// TODO: this may not optimize well, verify.
|
// TODO: this may not optimize well, verify.
|
||||||
@ -181,11 +178,11 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
|
|
||||||
// TODO: verify that constants get premultiplied
|
// TODO: verify that constants get premultiplied
|
||||||
float ditherwidth = {{packer.get("0.33 * cp.spatial_filter_radius")}};
|
float ditherwidth = {{packer.get("0.33 * cp.spatial_filter_radius")}};
|
||||||
float u0 = mwc_next_01(&rctx);
|
float u0 = mwc_next_01(rctx);
|
||||||
float r = ditherwidth * sqrt(-2.0f * log2f(u0) / M_LOG2E);
|
float r = ditherwidth * sqrt(-2.0f * log2f(u0) / M_LOG2E);
|
||||||
|
|
||||||
// TODO: provide mwc_next_0_2pi()
|
// TODO: provide mwc_next_0_2pi()
|
||||||
float u1 = 2.0f * M_PI * mwc_next_01(&rctx);
|
float u1 = 2.0f * M_PI * mwc_next_01(rctx);
|
||||||
|
|
||||||
float ditherx = r * cos(u1);
|
float ditherx = r * cos(u1);
|
||||||
float dithery = r * sin(u1);
|
float dithery = r * sin(u1);
|
||||||
@ -195,9 +192,9 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
iy < 0 || iy >= {{features.acc_height}} ) {
|
iy < 0 || iy >= {{features.acc_height}} ) {
|
||||||
consec_bad++;
|
consec_bad++;
|
||||||
if (consec_bad > {{features.max_oob}}) {
|
if (consec_bad > {{features.max_oob}}) {
|
||||||
x = mwc_next_11(&rctx);
|
x = mwc_next_11(rctx);
|
||||||
y = mwc_next_11(&rctx);
|
y = mwc_next_11(rctx);
|
||||||
color = mwc_next_01(&rctx);
|
color = mwc_next_01(rctx);
|
||||||
consec_bad = -{{features.fuse}};
|
consec_bad = -{{features.fuse}};
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
@ -214,7 +211,6 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
|||||||
accbuf[i] = pix; // TODO: atomic operations (or better)
|
accbuf[i] = pix; // TODO: atomic operations (or better)
|
||||||
denbuf[i] += 1.0f;
|
denbuf[i] += 1.0f;
|
||||||
}
|
}
|
||||||
asm volatile ("membar.cta;");
|
|
||||||
}
|
}
|
||||||
''')
|
''')
|
||||||
return tmpl.substitute(
|
return tmpl.substitute(
|
||||||
|
@ -16,20 +16,20 @@ typedef struct {
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
defs = r"""
|
defs = r"""
|
||||||
__device__ uint32_t mwc_next(mwc_st *st) {
|
__device__ uint32_t mwc_next(mwc_st &st) {
|
||||||
asm("{\n\t.reg .u64 val;\n\t"
|
asm("{\n\t.reg .u64 val;\n\t"
|
||||||
"cvt.u64.u32 val, %0;\n\t"
|
"cvt.u64.u32 val, %0;\n\t"
|
||||||
"mad.wide.u32 val, %1, %2, val;\n\t"
|
"mad.wide.u32 val, %1, %2, val;\n\t"
|
||||||
"mov.b64 {%1, %0}, val;\n\t}\n\t"
|
"mov.b64 {%1, %0}, val;\n\t}\n\t"
|
||||||
: "+r"(st->carry), "+r"(st->state) : "r"(st->mul));
|
: "+r"(st.carry), "+r"(st.state) : "r"(st.mul));
|
||||||
return st->state;
|
return st.state;
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ float mwc_next_01(mwc_st *st) {
|
__device__ float mwc_next_01(mwc_st &st) {
|
||||||
return mwc_next(st) * (1.0f / 4294967296.0f);
|
return mwc_next(st) * (1.0f / 4294967296.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ float mwc_next_11(mwc_st *st) {
|
__device__ float mwc_next_11(mwc_st &st) {
|
||||||
uint32_t val = mwc_next(st);
|
uint32_t val = mwc_next(st);
|
||||||
float ret;
|
float ret;
|
||||||
asm("cvt.rn.f32.s32 %0, %1;\n\t"
|
asm("cvt.rn.f32.s32 %0, %1;\n\t"
|
||||||
|
Loading…
Reference in New Issue
Block a user