e11923515b653c84b05809dbdfb18d539415e13f
7 void thread_entry(int cid
, int nc
)
11 uint64_t s
= 0xdeadbeefU
;
27 for (size_t i
= 0; i
< m
; i
++)
28 for (size_t j
= 0; j
< p
; j
++)
29 a
[i
*p
+j
] = (t
)(s
= lfsr(s
));
30 for (size_t i
= 0; i
< p
; i
++)
31 for (size_t j
= 0; j
< n
; j
++)
32 b
[i
*n
+j
] = (t
)(s
= lfsr(s
));
33 memset(c
, 0, m
*n
*sizeof(c
[0]));
35 size_t instret
, cycles
;
37 for (int i
= 0; i
< R
; i
++)
39 instret
= -rdinstret();
41 mm_rb_hwacha(m
, n
, p
, a
, p
, b
, n
, c
, n
);
42 instret
+= rdinstret();
46 for (int i
= 0; i
< R
; i
++)
48 instret
= -rdinstret();
50 mm(m
, n
, p
, a
, p
, b
, n
, c
, n
);
51 instret
+= rdinstret();
56 printf("C%d: reg block %dx%dx%d, cache block %dx%dx%d\n",
57 cid
, RBM
, RBN
, RBK
, CBM
, CBN
, CBK
);
58 printf("C%d: %d instructions\n", cid
, (int)(instret
));
59 printf("C%d: %d cycles\n", cid
, (int)(cycles
));
60 printf("C%d: %d flops\n", cid
, 2*m
*n
*p
);
61 printf("C%d: %d Mflops @ 1 GHz\n", cid
, 2000*m
*n
*p
/(cycles
));
64 for (size_t i
= 0; i
< m
; i
++)
66 for (size_t j
= 0; j
< n
; j
++)
69 for (size_t k
= 0; k
< p
; k
++)
70 s
+= a
[i
*p
+k
] * b
[k
*n
+j
];
72 if (fabs(c
[i
*n
+j
]-s
) > fabs(1e-6*s
))
74 printf("C%d: c[%lu][%lu] %f != %f\n", cid
, i
, j
, c
[i
*n
+j
], s
);