6715c45297d92f7ee68d03d67c9a8120c65c0d9d
1 //**************************************************************************
2 // Vector-Thread Complex Multiply benchmark
3 //--------------------------------------------------------------------------
5 // This benchmark multiplies two complex numbers together. The input data (and
6 // reference data) should be generated using the cmplxmult_gendata.pl perl
7 // script and dumped to a file named dataset.h. The riscv-gcc toolchain does
8 // not support system calls so printf's can only be used on a host system, not
9 // on the riscv-v processor simulator itself.
11 // HOWEVER: printstr() and printhex() are provided, for a primitive form of
12 // printing strings and hexadecimal values to stdout.
15 // Choose which implementation you wish to test... but leave only one on!
16 // (only the first one will be executed).
21 //--------------------------------------------------------------------------
24 // Set HOST_DEBUG to 1 if you are going to compile this for a host
25 // machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
26 // to 0 if you are compiling with the smips-gcc toolchain.
32 // Set PREALLOCATE to 1 if you want to preallocate the benchmark
33 // function before starting stats. If you have instruction/data
34 // caches and you don't want to count the overhead of misses, then
35 // you will need to use preallocation.
41 // Set SET_STATS to 1 if you want to carve out the piece that actually
42 // does the computation.
48 //--------------------------------------------------------------------------
49 // Host Platform Includes
55 void printstr(const char*);
61 //--------------------------------------------------------------------------
62 // Complex Value Structs
71 //--------------------------------------------------------------------------
72 // Input/Reference Data
74 //#include "dataset_test.h"
78 //--------------------------------------------------------------------------
81 float absolute( float in
)
90 // are two floating point numbers "close enough"?
91 // this is pretty loose, because Perl is giving me pretty terrible answers
92 int close_enough(float a
, float b
)
96 if ( absolute(a
) > 1.10*absolute(b
)
97 || absolute(a
) < 0.90*absolute(b
)
98 || absolute(a
) > 1.10*absolute(b
)
99 || absolute(a
) < 0.90*absolute(b
))
101 if (absolute(absolute(a
) - absolute(b
)) > 0.1)
110 int verify( int n
, struct Complex test
[], struct Complex correct
[] )
113 for ( i
= 0; i
< n
; i
++ ) {
114 if ( !close_enough(test
[i
].real
, correct
[i
].real
)
115 || !close_enough(test
[i
].imag
, correct
[i
].imag
))
118 printf(" test[%d] : {%3.2f, %3.2f}\n", i
, test
[i
].real
, test
[i
].imag
);
119 printf(" corr[%d] : {%3.2f, %3.2f}\n", i
, correct
[i
].real
, correct
[i
].imag
);
121 // tell us which index fails + 10
122 // (so that if i==0,i==1 fails, we don't
123 // think it was a 'not-finished yet' or pass)
132 void printComplexArray( char name
[], int n
, struct Complex arr
[] )
136 printf( " %10s :", name
);
137 for ( i
= 0; i
< n
; i
++ )
138 printf( " {%03.2f,%03.2f} ", arr
[i
].real
, arr
[i
].imag
);
143 for ( i
= 0; i
< n
; i
++ )
146 printhex((int) arr
[i
].real
);
148 printhex((int) arr
[i
].imag
);
158 void finishTest( int correct
, long long num_cycles
, long long num_retired
)
160 int toHostValue
= correct
;
162 if ( toHostValue
== 1 )
163 printf( "*** PASSED ***\n" );
165 printf( "*** FAILED *** (tohost = %d)\n", toHostValue
);
168 // we no longer run in -testrun mode, which means we can't use
169 // the tohost register to communicate "test is done" and "test results"
170 // so instead we will communicate through print* functions!
173 printstr( "*** PASSED *** (num_cycles = 0x" );
174 printhex(num_cycles
);
175 printstr( ", num_inst_retired = 0x");
176 printhex(num_retired
);
181 printstr( "*** FAILED *** (num_cycles = 0x");
182 printhex(num_cycles
);
183 printstr( ", num_inst_retired = 0x");
184 printhex(num_retired
);
194 // deprecated - cr10/stats-enable register no longer exists
195 void setStats( int enable
)
197 #if ( !HOST_DEBUG && SET_STATS )
198 asm( "mtpcr %0, cr10" : : "r" (enable
) );
202 long long getCycles()
204 long long cycles
= 1337;
205 #if ( !HOST_DEBUG && SET_STATS )
206 __asm__
__volatile__( "rdcycle %0" : "=r" (cycles
) );
211 long long getInstRetired()
213 long long inst_retired
= 1338;
214 #if ( !HOST_DEBUG && SET_STATS )
215 __asm__
__volatile__( "rdinstret %0" : "=r" (inst_retired
) );
220 //--------------------------------------------------------------------------
221 // complex multiply function
223 // scalar C implementation
224 void cmplxmult( int n
, struct Complex a
[], struct Complex b
[], struct Complex c
[] )
227 for ( i
= 0; i
< n
; i
++ )
229 c
[i
].real
= (a
[i
].real
* b
[i
].real
) - (a
[i
].imag
* b
[i
].imag
);
230 c
[i
].imag
= (a
[i
].imag
* b
[i
].real
) + (a
[i
].real
* b
[i
].imag
);
234 // assembly implementations can be found in *_asm.S
236 //--------------------------------------------------------------------------
239 int main( int argc
, char* argv
[] )
241 struct Complex results_data
[DATA_SIZE
];
242 long long start_cycles
= 0;
243 long long stop_cycles
= 0;
244 long long num_cycles
;
245 long long start_retired
= 0;
246 long long stop_retired
= 0;
247 long long num_retired
;
249 // Output the input array
252 printComplexArray( "input1", DATA_SIZE
, input1_data
);
253 printComplexArray( "input2", DATA_SIZE
, input2_data
);
254 printComplexArray( "verify", DATA_SIZE
, verify_data
);
257 // --------------------------------------------------
258 // If needed we preallocate everything in the caches
263 cmplxmult( DATA_SIZE
, input1_data
, input2_data
, results_data
);
266 scalar_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
269 vt_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
276 // --------------------------------------------------
279 start_cycles
= getCycles();
280 start_retired
= getInstRetired();
283 cmplxmult( DATA_SIZE
, input1_data
, input2_data
, results_data
);
287 scalar_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
292 vt_cmplxmult_asm( DATA_SIZE
, input1_data
, input2_data
, results_data
);
298 stop_cycles
= getCycles();
299 stop_retired
= getInstRetired();
300 num_cycles
= stop_cycles
- start_cycles
;
301 num_retired
= stop_retired
- start_retired
;
303 // --------------------------------------------------
304 // Print out the results
307 printComplexArray( "results", DATA_SIZE
, results_data
);
308 printComplexArray( "verify ", DATA_SIZE
, verify_data
);
312 // --------------------------------------------------
314 int correct
= verify( DATA_SIZE
, results_data
, verify_data
);
315 finishTest(correct
, num_cycles
, num_retired
);