From df8100d1f4a327366787cf0e1cede415bb95d98c Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Thu, 8 Dec 2011 11:49:31 -0500 Subject: [PATCH] Use new mad.cc instruction in MWC --- cuburn/code/mwc.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cuburn/code/mwc.py b/cuburn/code/mwc.py index b5bdb94..206f290 100644 --- a/cuburn/code/mwc.py +++ b/cuburn/code/mwc.py @@ -4,7 +4,7 @@ The multiply-with-carry random number generator. import numpy as np -from cuburn.code.util import * +from util import * class MWC(HunkOCode): decls = """ @@ -17,11 +17,12 @@ typedef struct { defs = r""" __device__ uint32_t mwc_next(mwc_st &st) { - asm("{\n\t.reg .u64 val;\n\t" - "cvt.u64.u32 val, %0;\n\t" - "mad.wide.u32 val, %1, %2, val;\n\t" - "mov.b64 {%1, %0}, val;\n\t}\n\t" - : "+r"(st.carry), "+r"(st.state) : "r"(st.mul)); + asm("{\n\t" + ".reg .u32 tmp;\n\t" + "mad.lo.cc.u32 tmp, %2, %1, %0;\n\t" + "madc.hi.u32 %0, %2, %1, 0;\n\t" + "mov.u32 %1, tmp;\n\t" + "}" : "+r"(st.carry), "+r"(st.state) : "r"(st.mul)); return st.state; } @@ -114,3 +115,7 @@ __global__ void test_mwc(mwc_st *msts, uint64_t *sums, float nrounds) { print sums print dsums +if __name__ == "__main__": + import pycuda.autoinit + MWCTest.test_mwc() +