From: lkcl Date: Thu, 18 May 2023 14:11:55 +0000 (+0100) Subject: (no commit message) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c3882020d2f0533ae20d20a638324a913d4ba547;p=libreriscv.git --- diff --git a/simple_v_extension/daxpy_example.mdwn b/simple_v_extension/daxpy_example.mdwn index 4e3163242..e9cacf006 100644 --- a/simple_v_extension/daxpy_example.mdwn +++ b/simple_v_extension/daxpy_example.mdwn @@ -11,22 +11,41 @@ # SVP64 Power ISA version -``` +``` + # r5: n count + # r6: x ptr + # r7: y ptr + # fp1: a mul-scalar + 1 mtctr 5 # move n to CTR + 2 addi r10,r6,0 # copy y-ptr into r10 (y') + 3 .L2 + 4 setvl MAXVL=32,VL=CTR # actually VL=MIN(MAXVL,CTR) + 5 sv.lfdup *32,8(6) # load x into fp32-63, inc x + 6 sv.lfdup *64,8(7) # load y into fp64-95, inc y + 7 sv.fmadd *64,*64,1,*32 # (*y) = (*y) * (*x) + fp1 + 8 sv.stfdup *64,8(10) # store at y-copy, inc y' + 9 sv.bc/ctr .L2 # decrement CTR by VL + blr # return +``` +A refinement, reducing 1 instruction and register port usage. +Relies on post-increment, relies on no overlap between x and y +in memory, and critically relies on y overwrite. + +``` # r5: n count # r6: x ptr # r7: y ptr # fp1: a mul-scalar - mtctr 5 # move n to CTR - addi r10,r6,0 # copy y-ptr into r10 -.L2 - setvl MAXVL=32,VL=CTR # could do more - sv.lfdup/els *32,8(6) # load from x into fp32-63 - sv.lfdup/els *64,8(7) # load from y into fp64-95 - sv.fmadd *64,*64,1,*32 # (*y) = (*y) * (*x) + fp1 - stfdup/els *64,8(10) # store y-copy - sv.bc/ctr .L2 # decrement VL by CTR - blr # return + 1 mtctr 5 # move n to CTR + 2 .L2 + 3 setvl MAXVL=32,VL=CTR # actually VL=MIN(MAXVL,CTR) + 4 sv.lfdup *32,8(6) # load x into fp32-63, incr x + 5 sv.lfd *64,8(7) # load y into fp64-95, NO INC + 6 sv.fmadd *64,*64,1,*32 # (*y) = (*y) * (*x) + fp1 + 7 sv.stfdup *64,8(7) # store at y, incr y + 8 sv.bc/ctr .L2 # decrement CTR by VL + 9 blr # return ``` # RVV version