mirror of
https://github.com/stevenrobertson/cuburn.git
synced 2025-02-05 11:40:04 -05:00
Add --sync option.
This commit is contained in:
parent
9a348f0517
commit
e6e2c4a8d7
@ -234,6 +234,9 @@ class _AnimRenderer(object):
|
|||||||
# used, no matter the number of time steps.
|
# used, no matter the number of time steps.
|
||||||
PAL_HEIGHT = 16
|
PAL_HEIGHT = 16
|
||||||
|
|
||||||
|
# Use synchronous launches
|
||||||
|
sync = False
|
||||||
|
|
||||||
def __init__(self, anim):
|
def __init__(self, anim):
|
||||||
self.anim = anim
|
self.anim = anim
|
||||||
self.pending = False
|
self.pending = False
|
||||||
@ -268,6 +271,9 @@ class _AnimRenderer(object):
|
|||||||
# It's less than ideal, but we lock some memory ahead of time
|
# It's less than ideal, but we lock some memory ahead of time
|
||||||
self.h_infos_locked = cuda.pagelocked_empty((info_size/4,), np.float32)
|
self.h_infos_locked = cuda.pagelocked_empty((info_size/4,), np.float32)
|
||||||
|
|
||||||
|
if self.sync:
|
||||||
|
self.stream = self.alt_stream = None
|
||||||
|
|
||||||
def render(self, cen_time):
|
def render(self, cen_time):
|
||||||
assert not self.pending, "Tried to render with results pending!"
|
assert not self.pending, "Tried to render with results pending!"
|
||||||
self.pending = True
|
self.pending = True
|
||||||
@ -281,6 +287,7 @@ class _AnimRenderer(object):
|
|||||||
util.BaseCode.zero_dptr(a.mod, self.d_accum, 4 * self.nbins,
|
util.BaseCode.zero_dptr(a.mod, self.d_accum, 4 * self.nbins,
|
||||||
self.stream)
|
self.stream)
|
||||||
# Ensure all main stream tasks are done before starting alt stream
|
# Ensure all main stream tasks are done before starting alt stream
|
||||||
|
if not self.sync:
|
||||||
self.alt_stream.wait_for_event(cuda.Event().record(self.stream))
|
self.alt_stream.wait_for_event(cuda.Event().record(self.stream))
|
||||||
|
|
||||||
dpal = cuda.make_multichannel_2d_array(palette, 'C')
|
dpal = cuda.make_multichannel_2d_array(palette, 'C')
|
||||||
@ -311,10 +318,13 @@ class _AnimRenderer(object):
|
|||||||
if not d_seeds:
|
if not d_seeds:
|
||||||
seeds = mwc.MWC.make_seeds(iter.IterCode.NTHREADS *
|
seeds = mwc.MWC.make_seeds(iter.IterCode.NTHREADS *
|
||||||
self.cps_per_block)
|
self.cps_per_block)
|
||||||
h_seeds = cuda.pagelocked_empty(seeds.shape, seeds.dtype)
|
if self.sync:
|
||||||
h_seeds[:] = seeds
|
d_seeds = cuda.to_device(seeds)
|
||||||
|
else:
|
||||||
size = seeds.dtype.itemsize * seeds.size
|
size = seeds.dtype.itemsize * seeds.size
|
||||||
d_seeds = cuda.mem_alloc(size)
|
d_seeds = cuda.mem_alloc(size)
|
||||||
|
h_seeds = cuda.pagelocked_empty(seeds.shape, seeds.dtype)
|
||||||
|
h_seeds[:] = seeds
|
||||||
cuda.memcpy_htod_async(d_seeds, h_seeds, stream)
|
cuda.memcpy_htod_async(d_seeds, h_seeds, stream)
|
||||||
if on_main:
|
if on_main:
|
||||||
self.d_seeds = d_seeds
|
self.d_seeds = d_seeds
|
||||||
@ -341,10 +351,12 @@ class _AnimRenderer(object):
|
|||||||
|
|
||||||
infos = np.concatenate(infos)
|
infos = np.concatenate(infos)
|
||||||
offset = b * packer.align * self.cps_per_block
|
offset = b * packer.align * self.cps_per_block
|
||||||
|
d_info_off = int(self.d_infos) + offset
|
||||||
|
if self.sync:
|
||||||
|
cuda.memcpy_htod(d_info_off, infos)
|
||||||
|
else:
|
||||||
h_infos = self.h_infos_locked[offset/4:offset/4+len(infos)]
|
h_infos = self.h_infos_locked[offset/4:offset/4+len(infos)]
|
||||||
h_infos[:] = infos
|
h_infos[:] = infos
|
||||||
# TODO: portable across 32/64-bit arches?
|
|
||||||
d_info_off = int(self.d_infos) + offset
|
|
||||||
cuda.memcpy_htod_async(d_info_off, h_infos, stream)
|
cuda.memcpy_htod_async(d_info_off, h_infos, stream)
|
||||||
|
|
||||||
# TODO: get block config from IterCode
|
# TODO: get block config from IterCode
|
||||||
@ -353,6 +365,7 @@ class _AnimRenderer(object):
|
|||||||
texrefs=[tref], stream=stream)
|
texrefs=[tref], stream=stream)
|
||||||
|
|
||||||
# Now ensure all alt stream tasks are done before continuing main
|
# Now ensure all alt stream tasks are done before continuing main
|
||||||
|
if not self.sync:
|
||||||
self.stream.wait_for_event(cuda.Event().record(self.alt_stream))
|
self.stream.wait_for_event(cuda.Event().record(self.alt_stream))
|
||||||
|
|
||||||
util.BaseCode.zero_dptr(a.mod, self.d_out, 4 * self.nbins,
|
util.BaseCode.zero_dptr(a.mod, self.d_out, 4 * self.nbins,
|
||||||
@ -408,9 +421,12 @@ class _AnimRenderer(object):
|
|||||||
return pal
|
return pal
|
||||||
|
|
||||||
def done(self):
|
def done(self):
|
||||||
|
if self.sync:
|
||||||
|
return True
|
||||||
return self.stream.is_done()
|
return self.stream.is_done()
|
||||||
|
|
||||||
def get_result(self):
|
def get_result(self):
|
||||||
|
if not self.sync:
|
||||||
self.stream.synchronize()
|
self.stream.synchronize()
|
||||||
self.pending = False
|
self.pending = False
|
||||||
a = self.anim
|
a = self.anim
|
||||||
|
7
main.py
7
main.py
@ -23,7 +23,7 @@ import pycuda.autoinit
|
|||||||
|
|
||||||
import cuburn._pyflam3_hacks
|
import cuburn._pyflam3_hacks
|
||||||
from fr0stlib import pyflam3
|
from fr0stlib import pyflam3
|
||||||
from cuburn.render import *
|
from cuburn import render
|
||||||
from cuburn.code.mwc import MWCTest
|
from cuburn.code.mwc import MWCTest
|
||||||
|
|
||||||
np.set_printoptions(precision=5, edgeitems=20)
|
np.set_printoptions(precision=5, edgeitems=20)
|
||||||
@ -110,7 +110,8 @@ def main(args):
|
|||||||
if (args.start is None or t >= args.start)
|
if (args.start is None or t >= args.start)
|
||||||
and (args.end is None or t < args.end)]
|
and (args.end is None or t < args.end)]
|
||||||
|
|
||||||
anim = Animation(genomes)
|
render._AnimRenderer.sync = args.sync
|
||||||
|
anim = render.Animation(genomes)
|
||||||
if args.debug:
|
if args.debug:
|
||||||
anim.cmp_options.append('-G')
|
anim.cmp_options.append('-G')
|
||||||
anim.keep = args.keep or args.debug
|
anim.keep = args.keep or args.debug
|
||||||
@ -216,6 +217,8 @@ if __name__ == "__main__":
|
|||||||
help='Keep compilation directory (disables kernel caching)')
|
help='Keep compilation directory (disables kernel caching)')
|
||||||
debug.add_argument('--debug', action='store_true', dest='debug',
|
debug.add_argument('--debug', action='store_true', dest='debug',
|
||||||
help='Compile kernel with debugging enabled (implies --keep)')
|
help='Compile kernel with debugging enabled (implies --keep)')
|
||||||
|
debug.add_argument('--sync', action='store_true', dest='sync',
|
||||||
|
help='Use synchronous launches whenever possible')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user