mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Time to go have nightmares about this code again (no really)
This commit is contained in:
parent
2c26ff9ab6
commit
27e7fd82a3
6
TODO
6
TODO
@ -41,4 +41,8 @@ Things to do (rather severely incomplete):
|
|||||||
- Implement
|
- Implement
|
||||||
- Test effects on quality by masking off writes on all but one lane and
|
- Test effects on quality by masking off writes on all but one lane and
|
||||||
boosting the sample density to compensate (muuuuuch later on)
|
boosting the sample density to compensate (muuuuuch later on)
|
||||||
|
- MWC RNG output types
|
||||||
|
- float in range [0, 1]
|
||||||
|
- Debug statements
|
||||||
|
- Some code can't be tested separately (notably IterThread). Make a debug
|
||||||
|
flag which embeds extra tests into the kernel
|
||||||
|
@ -107,6 +107,124 @@ SHUFFLE:
|
|||||||
stop being "opaque" and become simply "dynamic".
|
stop being "opaque" and become simply "dynamic".
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
class IterThread(PTXTest):
|
||||||
|
entry_name = 'iter_thread'
|
||||||
|
entry_params = []
|
||||||
|
def deps(self):
|
||||||
|
return [MWCRNG, CPDataStream]
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def module_setup(self):
|
||||||
|
mem.global_.u32('g_cp_array',
|
||||||
|
[features.max_ntemporal_samples,'*',cp_stream_size])
|
||||||
|
mem.global_.u32('g_num_cps')
|
||||||
|
# TODO move into debug statement
|
||||||
|
mem.global_.u32('g_num_rounds', ctx.threads)
|
||||||
|
mem.global_.u32('g_num_writes', ctx.threads)
|
||||||
|
|
||||||
|
@ptx_func
|
||||||
|
def entry():
|
||||||
|
reg.f32('x_coord y_coord color_coord alpha_coord')
|
||||||
|
|
||||||
|
# TODO: temporary, for testing
|
||||||
|
reg.u32('num_rounds num_writes')
|
||||||
|
op.mov.u32(num_rounds, 0)
|
||||||
|
op.mov.u32(num_writes, 0)
|
||||||
|
|
||||||
|
# TODO: MWC float output types
|
||||||
|
#mwc_next_f32_01(x_coord)
|
||||||
|
#mwc_next_f32_01(y_coord)
|
||||||
|
#mwc_next_f32_01(color_coord)
|
||||||
|
#mwc_next_f32_01(alpha_coord)
|
||||||
|
|
||||||
|
# Registers are hard to come by. To avoid having to track both the count
|
||||||
|
# of samples processed and the number of samples to generate,
|
||||||
|
# 'num_samples' counts *down* from the CP's desired sample count.
|
||||||
|
# When it hits 0, we move on to the next CP.
|
||||||
|
#
|
||||||
|
# FUSE complicates things. To track it, we store the *negative* number
|
||||||
|
# of points we have left to fuse before we start to store the results.
|
||||||
|
# When it hits -1, we're done fusing, and can move on to the real
|
||||||
|
# thread. The execution flow between 'cp_loop', 'fuse_start', and
|
||||||
|
# 'iter_loop_start' is therefore tricky, and bears close inspection.
|
||||||
|
#
|
||||||
|
# In summary:
|
||||||
|
# num_samples == 0: Load next CP, set num_samples from that
|
||||||
|
# num_samples > 0: Iterate, store the result, decrement num_samples
|
||||||
|
# num_samples < -1: Iterate, don't store, increment num_samples
|
||||||
|
# num_samples == -1: Done fusing, enter normal flow
|
||||||
|
# TODO: move this to qlocal storage
|
||||||
|
reg.s32('num_samples')
|
||||||
|
op.mov.s32(num_samples, -(features.num_fuse_samples+1))
|
||||||
|
|
||||||
|
# TODO: Move cp_num to qlocal storage (or spill it, rarely accessed)
|
||||||
|
reg.u32('cp_num cpA')
|
||||||
|
mov.u32(cp_num, 0)
|
||||||
|
|
||||||
|
label('cp_loop_start')
|
||||||
|
op.bar.sync(0)
|
||||||
|
|
||||||
|
with block('Check to see if this is the last CP'):
|
||||||
|
reg.u32('num_cps')
|
||||||
|
reg.pred('p_last_cp')
|
||||||
|
op.ldu.u32(num_cps, addr(g_num_cps))
|
||||||
|
op.setp.lt.u32(p_last_cp, cp_num, num_cps)
|
||||||
|
op.bra.uni('all_cps_done', ifp=p_last_cp)
|
||||||
|
|
||||||
|
with block('Load CP address'):
|
||||||
|
op.mov.u32(cpA, g_cp_array)
|
||||||
|
op.mad.lo.u32(cpA, cp_num, cp_stream_size, cpA)
|
||||||
|
|
||||||
|
with block('Increment CP number, load num_samples (unless in fuse)'):
|
||||||
|
reg.pred('p_in_fuse')
|
||||||
|
op.setp.lt.s32(p_in_fuse, num_samples, 0)
|
||||||
|
op.add.u32(cp_num, cp_num, 1, ifp=p_in_fuse)
|
||||||
|
cp_stream_get(cpA, num_samples, 'cp.samples_per_thread',
|
||||||
|
ifp=p_in_fuse)
|
||||||
|
|
||||||
|
label('fuse_loop_start')
|
||||||
|
with block('FUSE-specific stuff'):
|
||||||
|
reg.pred('p_fuse')
|
||||||
|
comment('If num_samples == -1, set it to 0 and jump back up')
|
||||||
|
comment('This will start the normal CP loading machinery')
|
||||||
|
op.setp.eq.s32(p_fuse, num_samples, -1)
|
||||||
|
op.mov.s32(p_fuse, 0, ifp=p_fuse)
|
||||||
|
op.bra.uni(cp_loop_start, ifp=p_fuse)
|
||||||
|
|
||||||
|
comment('If num_samples < -1, still fusing, so increment')
|
||||||
|
op.setp.lt.s32(p_fuse, num_samples, -1)
|
||||||
|
op.add.s32(num_samples, num_samples, 1, ifp=p_fuse)
|
||||||
|
|
||||||
|
label('iter_loop_start')
|
||||||
|
|
||||||
|
comment('Do... well, most of everything')
|
||||||
|
|
||||||
|
op.add.u32(num_rounds, num_rounds, 1)
|
||||||
|
|
||||||
|
with block("Test if we're still in FUSE"):
|
||||||
|
reg.pred('p_in_fuse')
|
||||||
|
op.setp.lt.s32(p_in_fuse, num_samples, 0)
|
||||||
|
op.bra.uni(fuse_start, ifp=p_in_fuse)
|
||||||
|
|
||||||
|
with block("Ordinarily, we'd write the result here"):
|
||||||
|
op.add.u32(num_writes, num_writes, 1)
|
||||||
|
|
||||||
|
with block("Check to see if we're done with this CP"):
|
||||||
|
reg.pred('p_cp_done')
|
||||||
|
op.setp.eq.s32(p_cp_done, num_samples, 0)
|
||||||
|
op.bra.uni(cp_loop_start, ifp=p_cp_done)
|
||||||
|
|
||||||
|
op.bra.uni(iter_loop_start)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO this is for testing, move it to a debug statement
|
||||||
|
store_per_thread(g_num_rounds, num_rounds)
|
||||||
|
store_per_thread(g_num_writes, num_writes)
|
||||||
|
|
||||||
|
def call(self, ctx):
|
||||||
|
raise HorribleDeathError("Okay I'm going to bed now")
|
||||||
|
|
||||||
|
|
||||||
class MWCRNG(PTXFragment):
|
class MWCRNG(PTXFragment):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.threads_ready = 0
|
self.threads_ready = 0
|
||||||
@ -256,10 +374,9 @@ class MWCRNGTest(PTXTest):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
class CameraCoordTransform(PTXFragment):
|
class CameraCoordTransform(PTXFragment):
|
||||||
# TODO finish
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class CPDataStream(PTXFragment):
|
class CPDataStream(PTXFragment):
|
||||||
"""
|
"""DataStream which stores the control points."""
|
||||||
DataStream which stores
|
prefix = 'cp'
|
||||||
|
|
||||||
|
4
main.py
4
main.py
@ -15,7 +15,7 @@ from ctypes import *
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from cuburnlib.device_code import MWCRNGTest
|
from cuburnlib.device_code import IterThread
|
||||||
from cuburnlib.cuda import LaunchContext
|
from cuburnlib.cuda import LaunchContext
|
||||||
from fr0stlib.pyflam3 import *
|
from fr0stlib.pyflam3 import *
|
||||||
from fr0stlib.pyflam3._flam3 import *
|
from fr0stlib.pyflam3._flam3 import *
|
||||||
@ -25,7 +25,7 @@ def main(args):
|
|||||||
verbose = 1
|
verbose = 1
|
||||||
if '-d' in args:
|
if '-d' in args:
|
||||||
verbose = 3
|
verbose = 3
|
||||||
ctx = LaunchContext([MWCRNGTest], block=(256,1,1), grid=(64,1), tests=True)
|
ctx = LaunchContext([IterThread], block=(256,1,1), grid=(64,1), tests=True)
|
||||||
ctx.compile(verbose=verbose)
|
ctx.compile(verbose=verbose)
|
||||||
ctx.run_tests()
|
ctx.run_tests()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user