cavatools: initialize repository
[cavatools.git] / pipesim / pipesim.c
1 /*
2 Copyright (c) 2020 Peter Hsu. All Rights Reserved. See LICENCE file for details.
3 */
4
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <assert.h>
9 #include <string.h>
10 #include <sys/time.h>
11
12 #include "caveat.h"
13 #include "opcodes.h"
14 #include "insn.h"
15 #include "shmfifo.h"
16 #include "cache.h"
17 #include "perfctr.h"
18 #include "pipesim.h"
19
20 #include "lru_fsm_1way.h"
21 #include "lru_fsm_2way.h"
22 #include "lru_fsm_3way.h"
23 #include "lru_fsm_4way.h"
24
25 unsigned char fu_latency[Number_of_units] =
26 { [Unit_a] = 4, /* FP Adder */
27 [Unit_b] = 1, /* Branch unit */
28 [Unit_f] = 4, /* FP fused Multiply-Add */
29 [Unit_i] = 1, /* Scalar Integer ALU */
30 [Unit_j] = 1, /* Media Integer ALU */
31 [Unit_m] = 4, /* FP Multipler*/
32 [Unit_n] = 8, /* Scalar Integer Multipler */
33 [Unit_r] = 2, /* Load unit */
34 [Unit_s] = 1, /* Scalar Shift unit */
35 [Unit_t] = 1, /* Media Shift unit */
36 [Unit_w] = 1, /* Store unit */
37 [Unit_x] = 5, /* Special unit */
38 };
39
40 static const char *in_path, *out_path, *perf_path, *wflag;
41
42 const struct options_t opt[] =
43 {
44 { "--in=s", .s=&in_path, .ds=0, .h="Trace file from caveat =name" },
45 { "--perf=s", .s=&perf_path, .ds=0, .h="Performance counters in shared memory =name" },
46
47 { "--bdelay=i", .i=&ib.delay, .di=2, .h="Taken branch delay is =number cycles" },
48 { "--bmiss=i", .i=&ib.penalty, .di=5, .h="L0 instruction buffer refill latency is =number cycles" },
49 { "--bufsz=i", .i=&ib.bufsz, .di=7, .h="L0 instruction buffer capacity is 2*2^ =n bytes" },
50 { "--blocksz=i", .i=&ib.blksize, .di=4, .h="L0 instruction buffer block size is 2^ =n bytes" },
51
52 { "--imiss=i", .i=&ic.penalty, .di=25, .h="L1 instruction cache miss latency is =number cycles" },
53 { "--iline=i", .i=&ic.lg_line, .di=6, .h="L1 instrucdtion cache line size is 2^ =n bytes" },
54 { "--iways=i", .i=&ic.ways, .di=4, .h="L1 instrucdtion cache is =n ways set associativity" },
55 { "--isets=i", .i=&ic.lg_rows, .di=6, .h="L1 instrucdtion cache has 2^ =n sets per way" },
56
57 { "--dmiss=i", .i=&dc.penalty, .di=25, .h="L1 data cache miss latency is =number cycles" },
58 { "--write=s", .s=&wflag, .ds="b", .h="L1 data cache is write=[back|thru]" },
59 { "--dline=i", .i=&dc.lg_line, .di=6, .h="L1 data cache line size is 2^ =n bytes" },
60 { "--dways=i", .i=&dc.ways, .di=4, .h="L1 data cache is =w ways set associativity" },
61 { "--dsets=i", .i=&dc.lg_rows, .di=6, .h="L1 data cache has 2^ =n sets per way" },
62
63 { "--out=s", .s=&out_path, .ds=0, .h="Create output trace file =name" },
64 { "--report=i", .i=&report, .di=100, .h="Progress report every =number million instructions" },
65 { "--quiet", .b=&quiet, .bv=1, .h="Don't report progress to stderr" },
66 { "-q", .b=&quiet, .bv=1, .h="short for --quiet" },
67 { 0 }
68 };
69 const char* usage = "pipesim --in=trace --perf=counters [pipesim-options] target-program";
70
71 long quiet, report;
72 struct timeval start_time;
73 long instructions_executed, cycles_simulated;
74
75 struct ibuf_t ib;
76 struct cache_t ic, dc;
77 struct fifo_t* in;
78 struct fifo_t* out;
79 int hart;
80 uint64_t mem_queue[tr_memq_len];
81
82
83
84
85 int main(int argc, const char** argv)
86 {
87 assert(sizeof(struct insn_t) == 8);
88 gettimeofday(&start_time, 0);
89 for (int i=0; i<Number_of_opcodes; i++)
90 insnAttr[i].latency = fu_latency[insnAttr[i].unit];
91
92 int numopts = parse_options(argv+1);
93 if (argc == numopts+1 || !in_path)
94 help_exit();
95
96 long entry = load_elf_binary(argv[1+numopts], 0);
97 insnSpace_init();
98
99 report *= 1000000;
100 in = fifo_open(in_path);
101 if (out_path)
102 out = fifo_create(out_path, 0);
103 if (perf_path) {
104 perf_create(perf_path);
105 perf.start = start_time;
106 }
107
108 /* initialize instruction buffer */
109 ib.tag_mask = ~( (1L << (ib.bufsz-1)) - 1 );
110 ib.numblks = (1<<ib.bufsz)/(1<<ib.blksize) - 1;
111 ib.blk_mask = ib.numblks - 1;
112 for (int i=0; i<2; i++) {
113 ib.ready[i] = (long*)malloc(ib.numblks*sizeof(long));
114 memset((char*)ib.ready[i], 0, ib.numblks*sizeof(long));
115 }
116
117 /* initialize instruction cache */
118 struct lru_fsm_t* fsm;
119 switch (ic.ways) {
120 case 1: fsm = cache_fsm_1way; break;
121 case 2: fsm = cache_fsm_2way; break;
122 case 3: fsm = cache_fsm_3way; break;
123 case 4: fsm = cache_fsm_4way; break;
124 default: fprintf(stderr, "--iways=1..4 only\n"); exit(-1);
125 }
126 init_cache(&ic, fsm, 0);
127
128 /* initialize data cache */
129 switch (dc.ways) {
130 case 1: fsm = cache_fsm_1way; break;
131 case 2: fsm = cache_fsm_2way; break;
132 case 3: fsm = cache_fsm_3way; break;
133 case 4: fsm = cache_fsm_4way; break;
134 default: fprintf(stderr, "--dways=1..4 only\n"); exit(-1);
135 }
136 init_cache(&dc,fsm, !(wflag && wflag[0]=='t'));
137
138 long (*model_dcache)(long tr, const struct insn_t* p, long available) = &dcache_writeback;;
139 if (wflag) {
140 if (strcmp(wflag, "thru") == 0)
141 model_dcache = &dcache_writethru;
142 else if (strcmp(wflag, "back") == 0)
143 help_exit();
144 }
145 if (out_path) {
146 if (perf_path)
147 trace_count_pipe(report, model_dcache);
148 else
149 trace_pipe(report, model_dcache);
150 fifo_put(out, trM(tr_eof, 0));
151 fifo_finish(out);
152 }
153 else {
154 if (perf_path)
155 count_pipe(report, model_dcache);
156 else
157 fast_pipe(report, 0);
158 }
159 if (perf_path)
160 perf_close(&perf);
161 fifo_close(in);
162
163 fprintf(stderr, "\n\n");
164 fprintf(stdout, "%12ld instructions executed\n", instructions_executed);
165 fprintf(stdout, "%12ld cycles simulated\n", cycles_simulated);
166 fprintf(stdout, "%12.3f IPC\n", (double)instructions_executed/cycles_simulated);
167 fprintf(stdout, "Ibuffer %ldB capacity %ldB blocksize\n", 1L<<ib.bufsz, 1L<<ib.blksize);
168 fprintf(stdout, "%12ld instruction buffer misses (%3.1f%%)\n",
169 ib.misses, 100.0*ib.misses/instructions_executed);
170
171 fprintf(stdout, "Icache %ldB linesize %ldKB capacity %ld way\n", ic.line,
172 (ic.line*ic.rows*ic.ways)/1024, ic.ways);
173 long reads = ic.refs-ic.updates;
174 fprintf(stdout, "%12ld L1 Icache reads (%3.1f%%)\n", reads, 100.0*reads/instructions_executed);
175
176 fprintf(stdout, "Dcache %ldB linesize %ldKB capacity %ld way\n", dc.line,
177 (dc.line*dc.rows*dc.ways)/1024, dc.ways);
178 reads = dc.refs-dc.updates;
179 fprintf(stdout, "%12ld L1 Dcache reads (%3.1f%%)\n", reads, 100.0*reads/instructions_executed);
180 fprintf(stdout, "%12ld L1 Dcache writes (%3.1f%%)\n", dc.updates, 100.0*dc.updates/instructions_executed);
181 fprintf(stdout, "%12ld L1 Dcache misses (%5.3f%%)\n", dc.misses, 100.0*dc.misses/instructions_executed);
182 fprintf(stdout, "%12ld L1 Dcache evictions (%5.3f%%)\n", dc.evictions, 100.0*dc.evictions/instructions_executed);
183
184 return 0;
185 }
186
187
188
189 void status_report(long now, long icount)
190 {
191 instructions_executed = icount;
192 cycles_simulated = now;
193 if (quiet)
194 return;
195 struct timeval this_time;
196 gettimeofday(&this_time, 0);
197 double msec = (this_time.tv_sec - start_time.tv_sec)*1000;
198 msec += (this_time.tv_usec - start_time.tv_usec)/1000.0;
199 fprintf(stderr, "\r%3.1fBi %3.1fBc IPC=%5.3f CPS=%5.3f in %lds",
200 icount/1e9, now/1e9, (double)icount/now, now/(1e3*msec), (long)(msec/1e3));
201 if (perf_path) {
202 perf.h->insns = icount;
203 perf.h->cycles = now;
204 perf.h->ib_misses = ib.misses;
205 perf.h->ic_misses = ic.misses;
206 perf.h->dc_misses = dc.misses;
207 double kinsns = icount/1e3;
208 fprintf(stderr, " IB=%3.0f I$=%5.3f D$=%4.2f m/Ki",
209 ib.misses/kinsns, ic.misses/kinsns, dc.misses/kinsns);
210 }
211 }
212
213
214 long dcache_writethru(long tr, const struct insn_t* p, long available)
215 {
216 long addr = tr_value(tr);
217 long tag = addr >> dc.lg_line;
218 long when = 0;
219 if (writeOp(p->op_code)) {
220 long sz = tr_size(tr);
221 if (sz < 8) { /* < 8B need L1 for ECC, 8B do not allocate */
222 when = lookup_cache(&dc, addr, 0, available);
223 if (when == available)
224 fifo_put(out, trM(tr_d1get, addr));
225 }
226 fifo_put(out, tr);
227 }
228 else
229 when = lookup_cache(&dc, addr, 0, available);
230 if (when == available) { /* cache miss */
231 fifo_put(out, trM(tr_d1get, addr));
232 }
233 return when;
234 }
235
236
237 long dcache_writeback(long tr, const struct insn_t* p, long available)
238 {
239 long addr = tr_value(tr);
240 long tag = addr >> dc.lg_line;
241 long when = lookup_cache(&dc, addr, writeOp(p->op_code), available);
242 if (when == available) { /* cache miss */
243 if (*dc.evicted)
244 fifo_put(out, trM(tr_d1put, *dc.evicted<<dc.lg_line));
245 fifo_put(out, trM(tr_d1get, addr));
246 }
247 return when;
248 }