From 40a5ceafde4c2ed13adbe620fae2206ab8f978c9 Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Sun, 12 Sep 2010 00:16:35 -0400 Subject: [PATCH] Use a somewhat better writeback mechanism for now --- cuburn/device_code.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cuburn/device_code.py b/cuburn/device_code.py index efeae39..375e8a7 100644 --- a/cuburn/device_code.py +++ b/cuburn/device_code.py @@ -474,13 +474,14 @@ class HistScatter(PTXFragment): palette.look_up(r, g, b, a, color, norm_time) # TODO: look up, scale by xform visibility # TODO: Make this more performant - reg.f32('gval') - for i, val in enumerate([r, g, b, a]): - #op.red.add.f32(addr(hist_bin_addr,4*i), val) - op.ld.f32(gval,addr(hist_bin_addr,4*i)) - op.add.f32(gval, gval, val) - op.st.f32(addr(hist_bin_addr,4*i),gval) - + reg.f32('gr gg gb ga') + op.ld.v4.f32(vec(gr, gg, gb, ga), addr(hist_bin_addr)) + op.add.f32(gr, gr, r) + op.add.f32(gg, gg, g) + op.add.f32(gb, gb, b) + op.add.f32(ga, ga, a) + op.st.v4.f32(addr(hist_bin_addr), vec(gr, gg, gb, ga)) + #op.red.add.f32(addr(hist_bin_addr,4*i), val) def call_setup(self, ctx): hist_bins_dp, hist_bins_l = ctx.mod.get_global('g_hist_bins')