1 #*****************************************************************************
2 # cmplxmult function (assembly version)
3 #-----------------------------------------------------------------------------
6 #--------------------------------------------------------------------------
8 #--------------------------------------------------------------------------
10 # Here are some defines that make writing assembly code easier.
12 # I'm using the knowledge that rN will be placed in register a0, rA will be
13 # placed into register a1, etc., based on the calling convention for functions.
28 # WARNING: do not write to the s0,...,s9 registers without first saving them to
31 #--------------------------------------------------------------------------
32 # void scalar_cmplxmult_asm( int n, float a[], float b[], float c[] )
33 #--------------------------------------------------------------------------
37 .globl scalar_cmplxmult_asm
38 .type scalar_cmplxmult_asm,@function
42 # ***** Scalar Example *****
44 blez rN, done # exit early if n < 0
47 # The following code is a naive implementation...
48 # Re-ordering instructions may increase performance, also,
49 # RISC-V supports instrucitons such as the "fmuladd" and "fmulsub".
50 # fmsub.s fa2,fa4,fa3,ft1
51 # Finally, unrolling and other fun transformations can also provide
75 #--------------------------------------------------------------------------
76 # void vt_cmplxmult_asm( int n, float a[], float b[], float c[] )
77 #--------------------------------------------------------------------------
80 # ***** Vector-Thread Example *****
82 .globl vt_cmplxmult_asm
83 .type vt_cmplxmult_asm,@function
85 # HINT: because you are dealing with an array of structures, a regular,
86 # vanilla vector-load/vector-store won't work here!
94 vvcfgivl rVlen, rN, 1, 7
98 # ADD YOUR CODE HERE....
99 vsetvl rVlen, rN # set the vector length
100 # rN is the desired (application) vector length
101 # rVLen is what vector length we were given
103 vflstw vf2, rA, rStride # real number vector load of A
105 vflstw vf4, rB, rStride # real number vector load of B
107 vflstw vf3, rAI, rStride #imaginary number vector load of A
108 vflstw vf5, rBI, rStride #imaginary vector number load of B
110 vf 0(a4) # jump to vector-fetch code
112 vfsstw vf0, rC, rStride # real number vector store C
114 vfsstw vf1, rCI, rStride # imaginary
117 sub rN, rN, rVlen # book keeping
121 bne rN, zero, stripmineloop
122 # Step 0: set the vector length
123 # Step 1: perform your vector loads
124 # Step 2: jump to the vector-fetch code to perform the calculation
125 # Step 3: perform the vector store
126 # Step 4: book keeping, update the pointers, etc.
133 # ADD YOUR VECTOR-ELEMENT CODE HERE ...
135 fmsub.s f0, f3, f5, f0
138 fmadd.s f1, f3, f4, f1
141 # The C code uses a jalr instruction to call this function
142 # so we can use a jr to return back to where the function
143 # was called. Also known as "ret", for "return".