2 Copyright (c) 2020 Peter Hsu. All Rights Reserved. See LICENCE file for details.
20 #include "lru_fsm_1way.h"
21 #include "lru_fsm_2way.h"
22 #include "lru_fsm_3way.h"
23 #include "lru_fsm_4way.h"
25 unsigned char fu_latency
[Number_of_units
] =
26 { [Unit_a
] = 4, /* FP Adder */
27 [Unit_b
] = 1, /* Branch unit */
28 [Unit_f
] = 4, /* FP fused Multiply-Add */
29 [Unit_i
] = 1, /* Scalar Integer ALU */
30 [Unit_j
] = 1, /* Media Integer ALU */
31 [Unit_m
] = 4, /* FP Multipler*/
32 [Unit_n
] = 8, /* Scalar Integer Multipler */
33 [Unit_r
] = 2, /* Load unit */
34 [Unit_s
] = 1, /* Scalar Shift unit */
35 [Unit_t
] = 1, /* Media Shift unit */
36 [Unit_w
] = 1, /* Store unit */
37 [Unit_x
] = 5, /* Special unit */
40 static const char *in_path
, *out_path
, *perf_path
, *wflag
;
42 const struct options_t opt
[] =
44 { "--in=s", .s
=&in_path
, .ds
=0, .h
="Trace file from caveat =name" },
45 { "--perf=s", .s
=&perf_path
, .ds
=0, .h
="Performance counters in shared memory =name" },
47 { "--bdelay=i", .i
=&ib
.delay
, .di
=2, .h
="Taken branch delay is =number cycles" },
48 { "--bmiss=i", .i
=&ib
.penalty
, .di
=5, .h
="L0 instruction buffer refill latency is =number cycles" },
49 { "--bufsz=i", .i
=&ib
.bufsz
, .di
=7, .h
="L0 instruction buffer capacity is 2*2^ =n bytes" },
50 { "--blocksz=i", .i
=&ib
.blksize
, .di
=4, .h
="L0 instruction buffer block size is 2^ =n bytes" },
52 { "--imiss=i", .i
=&ic
.penalty
, .di
=25, .h
="L1 instruction cache miss latency is =number cycles" },
53 { "--iline=i", .i
=&ic
.lg_line
, .di
=6, .h
="L1 instrucdtion cache line size is 2^ =n bytes" },
54 { "--iways=i", .i
=&ic
.ways
, .di
=4, .h
="L1 instrucdtion cache is =n ways set associativity" },
55 { "--isets=i", .i
=&ic
.lg_rows
, .di
=6, .h
="L1 instrucdtion cache has 2^ =n sets per way" },
57 { "--dmiss=i", .i
=&dc
.penalty
, .di
=25, .h
="L1 data cache miss latency is =number cycles" },
58 { "--write=s", .s
=&wflag
, .ds
="b", .h
="L1 data cache is write=[back|thru]" },
59 { "--dline=i", .i
=&dc
.lg_line
, .di
=6, .h
="L1 data cache line size is 2^ =n bytes" },
60 { "--dways=i", .i
=&dc
.ways
, .di
=4, .h
="L1 data cache is =w ways set associativity" },
61 { "--dsets=i", .i
=&dc
.lg_rows
, .di
=6, .h
="L1 data cache has 2^ =n sets per way" },
63 { "--out=s", .s
=&out_path
, .ds
=0, .h
="Create output trace file =name" },
64 { "--report=i", .i
=&report
, .di
=100, .h
="Progress report every =number million instructions" },
65 { "--quiet", .b
=&quiet
, .bv
=1, .h
="Don't report progress to stderr" },
66 { "-q", .b
=&quiet
, .bv
=1, .h
="short for --quiet" },
69 const char* usage
= "pipesim --in=trace --perf=counters [pipesim-options] target-program";
72 struct timeval start_time
;
73 long instructions_executed
, cycles_simulated
;
76 struct cache_t ic
, dc
;
80 uint64_t mem_queue
[tr_memq_len
];
85 int main(int argc
, const char** argv
)
87 assert(sizeof(struct insn_t
) == 8);
88 gettimeofday(&start_time
, 0);
89 for (int i
=0; i
<Number_of_opcodes
; i
++)
90 insnAttr
[i
].latency
= fu_latency
[insnAttr
[i
].unit
];
92 int numopts
= parse_options(argv
+1);
93 if (argc
== numopts
+1 || !in_path
)
96 long entry
= load_elf_binary(argv
[1+numopts
], 0);
100 in
= fifo_open(in_path
);
102 out
= fifo_create(out_path
, 0);
104 perf_create(perf_path
);
105 perf
.start
= start_time
;
108 /* initialize instruction buffer */
109 ib
.tag_mask
= ~( (1L << (ib
.bufsz
-1)) - 1 );
110 ib
.numblks
= (1<<ib
.bufsz
)/(1<<ib
.blksize
) - 1;
111 ib
.blk_mask
= ib
.numblks
- 1;
112 for (int i
=0; i
<2; i
++) {
113 ib
.ready
[i
] = (long*)malloc(ib
.numblks
*sizeof(long));
114 memset((char*)ib
.ready
[i
], 0, ib
.numblks
*sizeof(long));
117 /* initialize instruction cache */
118 struct lru_fsm_t
* fsm
;
120 case 1: fsm
= cache_fsm_1way
; break;
121 case 2: fsm
= cache_fsm_2way
; break;
122 case 3: fsm
= cache_fsm_3way
; break;
123 case 4: fsm
= cache_fsm_4way
; break;
124 default: fprintf(stderr
, "--iways=1..4 only\n"); exit(-1);
126 init_cache(&ic
, fsm
, 0);
128 /* initialize data cache */
130 case 1: fsm
= cache_fsm_1way
; break;
131 case 2: fsm
= cache_fsm_2way
; break;
132 case 3: fsm
= cache_fsm_3way
; break;
133 case 4: fsm
= cache_fsm_4way
; break;
134 default: fprintf(stderr
, "--dways=1..4 only\n"); exit(-1);
136 init_cache(&dc
,fsm
, !(wflag
&& wflag
[0]=='t'));
138 long (*model_dcache
)(long tr
, const struct insn_t
* p
, long available
) = &dcache_writeback
;;
140 if (strcmp(wflag
, "thru") == 0)
141 model_dcache
= &dcache_writethru
;
142 else if (strcmp(wflag
, "back") == 0)
147 trace_count_pipe(report
, model_dcache
);
149 trace_pipe(report
, model_dcache
);
150 fifo_put(out
, trM(tr_eof
, 0));
155 count_pipe(report
, model_dcache
);
157 fast_pipe(report
, 0);
163 fprintf(stderr
, "\n\n");
164 fprintf(stdout
, "%12ld instructions executed\n", instructions_executed
);
165 fprintf(stdout
, "%12ld cycles simulated\n", cycles_simulated
);
166 fprintf(stdout
, "%12.3f IPC\n", (double)instructions_executed
/cycles_simulated
);
167 fprintf(stdout
, "Ibuffer %ldB capacity %ldB blocksize\n", 1L<<ib
.bufsz
, 1L<<ib
.blksize
);
168 fprintf(stdout
, "%12ld instruction buffer misses (%3.1f%%)\n",
169 ib
.misses
, 100.0*ib
.misses
/instructions_executed
);
171 fprintf(stdout
, "Icache %ldB linesize %ldKB capacity %ld way\n", ic
.line
,
172 (ic
.line
*ic
.rows
*ic
.ways
)/1024, ic
.ways
);
173 long reads
= ic
.refs
-ic
.updates
;
174 fprintf(stdout
, "%12ld L1 Icache reads (%3.1f%%)\n", reads
, 100.0*reads
/instructions_executed
);
176 fprintf(stdout
, "Dcache %ldB linesize %ldKB capacity %ld way\n", dc
.line
,
177 (dc
.line
*dc
.rows
*dc
.ways
)/1024, dc
.ways
);
178 reads
= dc
.refs
-dc
.updates
;
179 fprintf(stdout
, "%12ld L1 Dcache reads (%3.1f%%)\n", reads
, 100.0*reads
/instructions_executed
);
180 fprintf(stdout
, "%12ld L1 Dcache writes (%3.1f%%)\n", dc
.updates
, 100.0*dc
.updates
/instructions_executed
);
181 fprintf(stdout
, "%12ld L1 Dcache misses (%5.3f%%)\n", dc
.misses
, 100.0*dc
.misses
/instructions_executed
);
182 fprintf(stdout
, "%12ld L1 Dcache evictions (%5.3f%%)\n", dc
.evictions
, 100.0*dc
.evictions
/instructions_executed
);
189 void status_report(long now
, long icount
)
191 instructions_executed
= icount
;
192 cycles_simulated
= now
;
195 struct timeval this_time
;
196 gettimeofday(&this_time
, 0);
197 double msec
= (this_time
.tv_sec
- start_time
.tv_sec
)*1000;
198 msec
+= (this_time
.tv_usec
- start_time
.tv_usec
)/1000.0;
199 fprintf(stderr
, "\r%3.1fBi %3.1fBc IPC=%5.3f CPS=%5.3f in %lds",
200 icount
/1e9
, now
/1e9
, (double)icount
/now
, now
/(1e3
*msec
), (long)(msec
/1e3
));
202 perf
.h
->insns
= icount
;
203 perf
.h
->cycles
= now
;
204 perf
.h
->ib_misses
= ib
.misses
;
205 perf
.h
->ic_misses
= ic
.misses
;
206 perf
.h
->dc_misses
= dc
.misses
;
207 double kinsns
= icount
/1e3
;
208 fprintf(stderr
, " IB=%3.0f I$=%5.3f D$=%4.2f m/Ki",
209 ib
.misses
/kinsns
, ic
.misses
/kinsns
, dc
.misses
/kinsns
);
214 long dcache_writethru(long tr
, const struct insn_t
* p
, long available
)
216 long addr
= tr_value(tr
);
217 long tag
= addr
>> dc
.lg_line
;
219 if (writeOp(p
->op_code
)) {
220 long sz
= tr_size(tr
);
221 if (sz
< 8) { /* < 8B need L1 for ECC, 8B do not allocate */
222 when
= lookup_cache(&dc
, addr
, 0, available
);
223 if (when
== available
)
224 fifo_put(out
, trM(tr_d1get
, addr
));
229 when
= lookup_cache(&dc
, addr
, 0, available
);
230 if (when
== available
) { /* cache miss */
231 fifo_put(out
, trM(tr_d1get
, addr
));
237 long dcache_writeback(long tr
, const struct insn_t
* p
, long available
)
239 long addr
= tr_value(tr
);
240 long tag
= addr
>> dc
.lg_line
;
241 long when
= lookup_cache(&dc
, addr
, writeOp(p
->op_code
), available
);
242 if (when
== available
) { /* cache miss */
244 fifo_put(out
, trM(tr_d1put
, *dc
.evicted
<<dc
.lg_line
));
245 fifo_put(out
, trM(tr_d1get
, addr
));