Allow swapping out store methods for easier testing of performance

This commit is contained in:
Steven Robertson 2010-09-12 01:09:04 -04:00
parent f368a99a16
commit 802ca1d585
2 changed files with 31 additions and 15 deletions

View File

@ -120,10 +120,7 @@ class IterThread(PTXEntryPoint):
label('iter_loop_start') label('iter_loop_start')
timeout.check_time(10) #timeout.check_time(10)
# TODO: diagram and fix syncing (can this be automated?)
#op.bar.sync(1)
with block(): with block():
reg.u32('num_rounds') reg.u32('num_rounds')
@ -453,13 +450,26 @@ class HistScatter(PTXFragment):
def module_setup(self): def module_setup(self):
mem.global_.f32('g_hist_bins', mem.global_.f32('g_hist_bins',
features.hist_height * features.hist_stride * 4) features.hist_height * features.hist_stride * 4)
comment("Target to ensure fake local values get written")
mem.global_.f32('g_hist_dummy')
@ptx_func @ptx_func
def entry_setup(self): def entry_setup(self):
comment("For now, assume histogram bins have been cleared by host") comment("Fake bins for fake scatter")
mem.local.f32('l_scatter_fake_adr')
mem.local.f32('l_scatter_fake_alpha')
@ptx_func @ptx_func
def scatter(self, x, y, color, xf_idx, p_valid=None): def entry_teardown(self):
with block("Store fake histogram bins to dummy global"):
reg.b32('hist_dummy')
op.ld.local.b32(hist_dummy, addr(l_scatter_fake_adr))
op.st.volatile.b32(addr(g_hist_dummy), hist_dummy)
op.ld.local.b32(hist_dummy, addr(l_scatter_fake_alpha))
op.st.volatile.b32(addr(g_hist_dummy), hist_dummy)
@ptx_func
def scatter(self, x, y, color, xf_idx, p_valid=None, type='ldst'):
""" """
Scatter the given point directly to the histogram bins. I think this Scatter the given point directly to the histogram bins. I think this
technique has the worst performance of all of 'em. Accesses ``cpA`` technique has the worst performance of all of 'em. Accesses ``cpA``
@ -479,6 +489,7 @@ class HistScatter(PTXFragment):
palette.look_up(r, g, b, a, color, norm_time) palette.look_up(r, g, b, a, color, norm_time)
# TODO: look up, scale by xform visibility # TODO: look up, scale by xform visibility
# TODO: Make this more performant # TODO: Make this more performant
if type == 'ldst':
reg.f32('gr gg gb ga') reg.f32('gr gg gb ga')
op.ld.v4.f32(vec(gr, gg, gb, ga), addr(hist_bin_addr)) op.ld.v4.f32(vec(gr, gg, gb, ga), addr(hist_bin_addr))
op.add.f32(gr, gr, r) op.add.f32(gr, gr, r)
@ -486,7 +497,12 @@ class HistScatter(PTXFragment):
op.add.f32(gb, gb, b) op.add.f32(gb, gb, b)
op.add.f32(ga, ga, a) op.add.f32(ga, ga, a)
op.st.v4.f32(addr(hist_bin_addr), vec(gr, gg, gb, ga)) op.st.v4.f32(addr(hist_bin_addr), vec(gr, gg, gb, ga))
#op.red.add.f32(addr(hist_bin_addr,4*i), val) elif type == 'red':
for i, val in enumerate([r, g, b, a]):
op.red.add.f32(addr(hist_bin_addr,4*i), val)
elif type == 'fake':
op.st.local.u32(addr(l_scatter_fake_adr), hist_bin_addr)
op.st.local.f32(addr(l_scatter_fake_alpha), a)
def call_setup(self, ctx): def call_setup(self, ctx):
hist_bins_dp, hist_bins_l = ctx.mod.get_global('g_hist_bins') hist_bins_dp, hist_bins_l = ctx.mod.get_global('g_hist_bins')

View File

@ -154,7 +154,7 @@ class Animation(object):
the active device. the active device.
""" """
# TODO: user-configurable test control # TODO: user-configurable test control
self.ctx = LaunchContext([IterThread], block=(256,1,1), grid=(54,1), self.ctx = LaunchContext([IterThread], block=(256,1,1), grid=(28,1),
tests=True) tests=True)
# TODO: user-configurable verbosity control # TODO: user-configurable verbosity control
self.ctx.compile(verbose=3, anim=self, features=self.features) self.ctx.compile(verbose=3, anim=self, features=self.features)