6 void thread_entry(int cid
, int nc
)
25 for (size_t i
= 0; i
< m
; i
++)
26 for (size_t j
= 0; j
< p
; j
++)
28 for (size_t i
= 0; i
< p
; i
++)
29 for (size_t j
= 0; j
< n
; j
++)
31 memset(c
, 0, m
*n
*sizeof(c
[0]));
33 size_t instret
, cycles
;
35 for (int i
= 0; i
< R
; i
++)
37 instret
= -rdinstret();
39 mm_rb_hwacha(m
, n
, p
, a
, p
, b
, n
, c
, n
);
40 instret
+= rdinstret();
44 for (int i
= 0; i
< R
; i
++)
46 instret
= -rdinstret();
48 mm(m
, n
, p
, a
, p
, b
, n
, c
, n
);
49 instret
+= rdinstret();
54 printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
55 cid
, RBM
, RBN
, RBK
, CBM
, CBN
, CBK
);
56 printf("C%d: %d instructions\n", cid
, (int)(instret
));
57 printf("C%d: %d cycles\n", cid
, (int)(cycles
));
58 printf("C%d: %d flops\n", cid
, 2*m
*n
*p
);
59 printf("C%d: %d Mflops @ 1 GHz\n", cid
, 2000*m
*n
*p
/(cycles
));
62 for (size_t i
= 0; i
< m
; i
++)
64 for (size_t j
= 0; j
< n
; j
++)
67 for (size_t aik
= i
, bkj
= -j
; aik
< i
+p
; aik
++, bkj
++)
69 if (fabs(c
[i
*n
+j
]-s
*R
) > 1e-6*s
)
71 printf("C%d: c[%lu][%lu] %f != %f\n", cid
, i
, j
, c
[i
*n
+j
], s
);