3 //--------------------------------------------------------------------------
4 // single-thread, naive version
6 void __attribute__((noinline
)) matmul(const int coreid
, const int ncores
, const int lda
, const data_t A
[], const data_t B
[], data_t C
[] )
10 for ( i
= 0; i
< lda
; i
++ )
12 for ( j
= 0; j
< lda
; j
++ )
14 for ( k
= coreid
; k
< lda
; k
+=ncores
)
16 C
[i
+ j
*lda
] += A
[j
*lda
+ k
] * B
[k
*lda
+ i
];