mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Use one dither offset per block.
This commit is contained in:
parent
83670df2c7
commit
dd645bcbf6
@ -110,6 +110,16 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
||||
cosel[threadIdx.x] = mwc_next_01(rctx);
|
||||
{{endif}}
|
||||
|
||||
if (threadIdx.y == 1 && threadIdx.x == 0) {
|
||||
float ditherwidth = {{packer.get("0.33 * cp.spatial_filter_radius")}};
|
||||
float u0 = mwc_next_01(rctx);
|
||||
float r = ditherwidth * sqrt(-2.0f * log2f(u0) / M_LOG2E);
|
||||
|
||||
float u1 = 2.0f * M_PI * mwc_next_01(rctx);
|
||||
info.cam_xo += r * cos(u1);
|
||||
info.cam_yo += r * sin(u1);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
int consec_bad = -{{features.fuse}};
|
||||
|
||||
@ -197,21 +207,9 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
||||
{{apply_affine('x', 'y', 'cx', 'cy', packer,
|
||||
'cp.camera_transform', 'cam')}}
|
||||
{{endif}}
|
||||
uint32_t ix = trunca(cx), iy = trunca(cy);
|
||||
|
||||
// TODO: verify that constants get premultiplied
|
||||
float ditherwidth = {{packer.get("0.33 * cp.spatial_filter_radius")}};
|
||||
float u0 = mwc_next_01(rctx);
|
||||
float r = ditherwidth * sqrt(-2.0f * log2f(u0) / M_LOG2E);
|
||||
|
||||
// TODO: provide mwc_next_0_2pi()
|
||||
float u1 = 2.0f * M_PI * mwc_next_01(rctx);
|
||||
|
||||
float ditherx = r * cos(u1);
|
||||
float dithery = r * sin(u1);
|
||||
int ix = trunca(cx+ditherx), iy = trunca(cy+dithery);
|
||||
|
||||
if (ix < 0 || ix >= {{features.acc_width}} ||
|
||||
iy < 0 || iy >= {{features.acc_height}} ) {
|
||||
if (ix >= {{features.acc_width}} || iy >= {{features.acc_height}} ) {
|
||||
consec_bad++;
|
||||
if (consec_bad > {{features.max_oob}}) {
|
||||
x = mwc_next_11(rctx);
|
||||
@ -222,7 +220,7 @@ void iter(mwc_st *msts, iter_info *infos, float4 *accbuf, float *denbuf) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int i = iy * {{features.acc_stride}} + ix;
|
||||
uint32_t i = iy * {{features.acc_stride}} + ix;
|
||||
|
||||
float4 outcol = tex2D(palTex, color, {{packer.get("cp_step_frac")}});
|
||||
float4 pix = accbuf[i];
|
||||
|
@ -89,9 +89,10 @@ uint32_t gtid() {
|
||||
}
|
||||
|
||||
__device__
|
||||
int trunca(float f) {
|
||||
// truncate as used in address calculations
|
||||
int ret;
|
||||
uint32_t trunca(float f) {
|
||||
// truncate as used in address calculations. note the use of a signed
|
||||
// conversion is intentional here (simplifies image bounds checking).
|
||||
uint32_t ret;
|
||||
asm("cvt.rni.s32.f32 %0, %1;" : "=r"(ret) : "f"(f));
|
||||
return ret;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user