X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=mt%2Fas_matmul%2Fmatmul_mi.c;h=88534b8803e3c50f34331cf2b82ece06ed07cc30;hb=9f7ed92e1aeb91593ceb8baf26ff4fe0a1a9efa6;hp=130fdb7232489de0d4eea7afdb2d42def4510000;hpb=dbde501592ce20c536cbc97e99d03f54f3e30294;p=riscv-tests.git diff --git a/mt/as_matmul/matmul_mi.c b/mt/as_matmul/matmul_mi.c index 130fdb7..88534b8 100644 --- a/mt/as_matmul/matmul_mi.c +++ b/mt/as_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -118,8 +118,8 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da int i, j, k, n, m, c1, c2; - //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 952596 cycles, 29.0 cycles/iter, 3.6 CPI - //matmul(32, input1_data, input2_data, results_data); barrier(): 570135 cycles, 17.3 cycles/iter, 3.4 CPI + //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 952596 cycles, 29.0 cycles/iter, 3.6 CPI + //matmul(32, input1_data, input2_data, results_data); barrier(nc): 570135 cycles, 17.3 cycles/iter, 3.4 CPI for ( j = coreid; j < lda; j += 2*ncores ) { for ( i = 0; i < lda; i += 1 ){ @@ -129,14 +129,14 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da c1 += A[j * lda + k] * B[k*lda + i]; c2 += A[(j+2) * lda + k] * B[k*lda + i]; - //barrier(); + //barrier(nc); } C[i + j * lda] = c1; C[i + (j+2) * lda] = c2; - barrier(); + barrier(nc); } - //barrier(); + //barrier(nc); } } @@ -157,33 +157,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); }