Fix performance bug when CSR accesses are common
[riscv-isa-sim.git] / riscv / processor.cc
1 // See LICENSE for license details.
2
3 #include "processor.h"
4 #include "extension.h"
5 #include "common.h"
6 #include "config.h"
7 #include "sim.h"
8 #include "htif.h"
9 #include "disasm.h"
10 #include <cinttypes>
11 #include <cmath>
12 #include <cstdlib>
13 #include <iostream>
14 #include <assert.h>
15 #include <limits.h>
16 #include <stdexcept>
17 #include <algorithm>
18
19 #undef STATE
20 #define STATE state
21
22 processor_t::processor_t(const char* isa, sim_t* sim, uint32_t id)
23 : sim(sim), ext(NULL), disassembler(new disassembler_t),
24 id(id), run(false), debug(false)
25 {
26 parse_isa_string(isa);
27
28 mmu = new mmu_t(sim->mem, sim->memsz);
29 mmu->set_processor(this);
30
31 reset(true);
32
33 #define DECLARE_INSN(name, match, mask) REGISTER_INSN(this, name, match, mask)
34 #include "encoding.h"
35 #undef DECLARE_INSN
36 build_opcode_map();
37 }
38
39 processor_t::~processor_t()
40 {
41 #ifdef RISCV_ENABLE_HISTOGRAM
42 if (histogram_enabled)
43 {
44 fprintf(stderr, "PC Histogram size:%lu\n", pc_histogram.size());
45 for(auto iterator = pc_histogram.begin(); iterator != pc_histogram.end(); ++iterator) {
46 fprintf(stderr, "%0lx %lu\n", (iterator->first << 2), iterator->second);
47 }
48 }
49 #endif
50
51 delete mmu;
52 delete disassembler;
53 }
54
55 static void bad_isa_string(const char* isa)
56 {
57 fprintf(stderr, "error: bad --isa option %s\n", isa);
58 abort();
59 }
60
61 void processor_t::parse_isa_string(const char* isa)
62 {
63 const char* p = isa;
64 const char* all_subsets = "IMAFDC";
65
66 max_xlen = 64;
67 cpuid = reg_t(2) << 62;
68
69 if (strncmp(p, "RV32", 4) == 0)
70 max_xlen = 32, cpuid = 0, p += 4;
71 else if (strncmp(p, "RV64", 4) == 0)
72 p += 4;
73 else if (strncmp(p, "RV", 2) == 0)
74 p += 2;
75
76 cpuid |= 1L << ('S' - 'A'); // advertise support for supervisor mode
77
78 if (!*p)
79 p = all_subsets;
80 else if (*p != 'I')
81 bad_isa_string(isa);
82
83 while (*p) {
84 cpuid |= 1L << (*p - 'A');
85
86 if (auto next = strchr(all_subsets, *p)) {
87 all_subsets = next + 1;
88 p++;
89 } else if (*p == 'X') {
90 const char* ext = p+1, *end = ext;
91 while (islower(*end))
92 end++;
93 register_extension(find_extension(std::string(ext, end - ext).c_str())());
94 p = end;
95 } else {
96 bad_isa_string(isa);
97 }
98 }
99
100 if (supports_extension('D') && !supports_extension('F'))
101 bad_isa_string(isa);
102 }
103
104 void state_t::reset()
105 {
106 memset(this, 0, sizeof(*this));
107 mstatus = set_field(mstatus, MSTATUS_PRV, PRV_M);
108 mstatus = set_field(mstatus, MSTATUS_PRV1, PRV_S);
109 mstatus = set_field(mstatus, MSTATUS_PRV2, PRV_S);
110 pc = DEFAULT_MTVEC + 0x100;
111 load_reservation = -1;
112 }
113
114 void processor_t::set_debug(bool value)
115 {
116 debug = value;
117 if (ext)
118 ext->set_debug(value);
119 }
120
121 void processor_t::set_histogram(bool value)
122 {
123 histogram_enabled = value;
124 }
125
126 void processor_t::reset(bool value)
127 {
128 if (run == !value)
129 return;
130 run = !value;
131
132 state.reset();
133 set_csr(CSR_MSTATUS, state.mstatus);
134
135 if (ext)
136 ext->reset(); // reset the extension
137 }
138
139 void processor_t::raise_interrupt(reg_t which)
140 {
141 throw trap_t(((reg_t)1 << (max_xlen-1)) | which);
142 }
143
144 void processor_t::take_interrupt()
145 {
146 int priv = get_field(state.mstatus, MSTATUS_PRV);
147 int ie = get_field(state.mstatus, MSTATUS_IE);
148 reg_t interrupts = state.mie & state.mip;
149
150 if (priv < PRV_M || (priv == PRV_M && ie)) {
151 if (interrupts & MIP_MSIP)
152 raise_interrupt(IRQ_SOFT);
153
154 if (state.fromhost != 0)
155 raise_interrupt(IRQ_HOST);
156 }
157
158 if (priv < PRV_S || (priv == PRV_S && ie)) {
159 if (interrupts & MIP_SSIP)
160 raise_interrupt(IRQ_SOFT);
161
162 if (interrupts & MIP_STIP)
163 raise_interrupt(IRQ_TIMER);
164 }
165 }
166
167 static void commit_log(state_t* state, reg_t pc, insn_t insn)
168 {
169 #ifdef RISCV_ENABLE_COMMITLOG
170 if (get_field(state->mstatus, MSTATUS_IE)) {
171 uint64_t mask = (insn.length() == 8 ? uint64_t(0) : (uint64_t(1) << (insn.length() * 8))) - 1;
172 if (state->log_reg_write.addr) {
173 fprintf(stderr, "0x%016" PRIx64 " (0x%08" PRIx64 ") %c%2" PRIu64 " 0x%016" PRIx64 "\n",
174 pc,
175 insn.bits() & mask,
176 state->log_reg_write.addr & 1 ? 'f' : 'x',
177 state->log_reg_write.addr >> 1,
178 state->log_reg_write.data);
179 } else {
180 fprintf(stderr, "0x%016" PRIx64 " (0x%08" PRIx64 ")\n", pc, insn.bits() & mask);
181 }
182 }
183 state->log_reg_write.addr = 0;
184 #endif
185 }
186
187 inline void processor_t::update_histogram(size_t pc)
188 {
189 #ifdef RISCV_ENABLE_HISTOGRAM
190 size_t idx = pc >> 2;
191 pc_histogram[idx]++;
192 #endif
193 }
194
195 static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch)
196 {
197 reg_t npc = fetch.func(p, fetch.insn, pc);
198 if (npc != PC_SERIALIZE) {
199 commit_log(p->get_state(), pc, fetch.insn);
200 p->update_histogram(pc);
201 }
202 return npc;
203 }
204
205 static void update_timer(state_t* state, size_t instret)
206 {
207 uint64_t count0 = (uint64_t)(uint32_t)state->mtime;
208 state->mtime += instret;
209 uint64_t before = count0 - state->stimecmp;
210 if (int64_t(before ^ (before + instret)) < 0)
211 state->mip |= MIP_STIP;
212 }
213
214 static size_t next_timer(state_t* state)
215 {
216 return state->stimecmp - (uint32_t)state->mtime;
217 }
218
219 void processor_t::step(size_t n)
220 {
221 size_t instret = 0;
222 reg_t pc = state.pc;
223 mmu_t* _mmu = mmu;
224
225 if (unlikely(!run || !n))
226 return;
227 n = std::min(n, next_timer(&state) | 1U);
228
229 #define maybe_serialize() \
230 if (unlikely(pc == PC_SERIALIZE)) { \
231 pc = state.pc; \
232 state.serialized = true; \
233 break; \
234 }
235
236 try
237 {
238 take_interrupt();
239
240 if (unlikely(debug))
241 {
242 while (instret < n)
243 {
244 insn_fetch_t fetch = mmu->load_insn(pc);
245 if (!state.serialized)
246 disasm(fetch.insn);
247 pc = execute_insn(this, pc, fetch);
248 maybe_serialize();
249 instret++;
250 state.pc = pc;
251 }
252 }
253 else while (instret < n)
254 {
255 size_t idx = _mmu->icache_index(pc);
256 auto ic_entry = _mmu->access_icache(pc);
257
258 #define ICACHE_ACCESS(idx) { \
259 insn_fetch_t fetch = ic_entry->data; \
260 ic_entry++; \
261 pc = execute_insn(this, pc, fetch); \
262 if (idx == mmu_t::ICACHE_ENTRIES-1) break; \
263 if (unlikely(ic_entry->tag != pc)) break; \
264 instret++; \
265 state.pc = pc; \
266 }
267
268 switch (idx) {
269 #include "icache.h"
270 }
271
272 maybe_serialize();
273 instret++;
274 state.pc = pc;
275 }
276 }
277 catch(trap_t& t)
278 {
279 take_trap(t, pc);
280 }
281
282 update_timer(&state, instret);
283
284 // tail-recurse if we didn't execute as many instructions as we'd hoped
285 if (instret < n)
286 step(n - instret);
287 }
288
289 void processor_t::push_privilege_stack()
290 {
291 reg_t s = state.mstatus;
292 s = set_field(s, MSTATUS_PRV2, get_field(state.mstatus, MSTATUS_PRV1));
293 s = set_field(s, MSTATUS_IE2, get_field(state.mstatus, MSTATUS_IE1));
294 s = set_field(s, MSTATUS_PRV1, get_field(state.mstatus, MSTATUS_PRV));
295 s = set_field(s, MSTATUS_IE1, get_field(state.mstatus, MSTATUS_IE));
296 s = set_field(s, MSTATUS_PRV, PRV_M);
297 s = set_field(s, MSTATUS_MPRV, 0);
298 s = set_field(s, MSTATUS_IE, 0);
299 set_csr(CSR_MSTATUS, s);
300 }
301
302 void processor_t::pop_privilege_stack()
303 {
304 reg_t s = state.mstatus;
305 s = set_field(s, MSTATUS_PRV, get_field(state.mstatus, MSTATUS_PRV1));
306 s = set_field(s, MSTATUS_IE, get_field(state.mstatus, MSTATUS_IE1));
307 s = set_field(s, MSTATUS_PRV1, get_field(state.mstatus, MSTATUS_PRV2));
308 s = set_field(s, MSTATUS_IE1, get_field(state.mstatus, MSTATUS_IE2));
309 s = set_field(s, MSTATUS_PRV2, PRV_U);
310 s = set_field(s, MSTATUS_IE2, 1);
311 set_csr(CSR_MSTATUS, s);
312 }
313
314 void processor_t::take_trap(trap_t& t, reg_t epc)
315 {
316 if (debug)
317 fprintf(stderr, "core %3d: exception %s, epc 0x%016" PRIx64 "\n",
318 id, t.name(), epc);
319
320 state.pc = DEFAULT_MTVEC + 0x40 * get_field(state.mstatus, MSTATUS_PRV);
321 push_privilege_stack();
322 yield_load_reservation();
323 state.mcause = t.cause();
324 state.mepc = epc;
325 t.side_effects(&state); // might set badvaddr etc.
326 }
327
328 void processor_t::deliver_ipi()
329 {
330 state.mip |= MIP_MSIP;
331 }
332
333 void processor_t::disasm(insn_t insn)
334 {
335 uint64_t bits = insn.bits() & ((1ULL << (8 * insn_length(insn.bits()))) - 1);
336 fprintf(stderr, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx64 ") %s\n",
337 id, state.pc, bits, disassembler->disassemble(insn).c_str());
338 }
339
340 static bool validate_priv(reg_t priv)
341 {
342 return priv == PRV_U || priv == PRV_S || priv == PRV_M;
343 }
344
345 static bool validate_vm(int max_xlen, reg_t vm)
346 {
347 if (max_xlen == 64 && (vm == VM_SV39 || vm == VM_SV48))
348 return true;
349 if (max_xlen == 32 && vm == VM_SV32)
350 return true;
351 return vm == VM_MBARE;
352 }
353
354 void processor_t::set_csr(int which, reg_t val)
355 {
356 switch (which)
357 {
358 case CSR_FFLAGS:
359 dirty_fp_state;
360 state.fflags = val & (FSR_AEXC >> FSR_AEXC_SHIFT);
361 break;
362 case CSR_FRM:
363 dirty_fp_state;
364 state.frm = val & (FSR_RD >> FSR_RD_SHIFT);
365 break;
366 case CSR_FCSR:
367 dirty_fp_state;
368 state.fflags = (val & FSR_AEXC) >> FSR_AEXC_SHIFT;
369 state.frm = (val & FSR_RD) >> FSR_RD_SHIFT;
370 break;
371 case CSR_MTIME:
372 case CSR_STIMEW:
373 state.mtime = val;
374 break;
375 case CSR_MTIMEH:
376 case CSR_STIMEHW:
377 if (xlen == 32)
378 state.mtime = (uint32_t)val | (state.mtime >> 32 << 32);
379 else
380 state.mtime = val;
381 break;
382 case CSR_CYCLEW:
383 case CSR_TIMEW:
384 case CSR_INSTRETW:
385 val -= state.mtime;
386 if (xlen == 32)
387 state.sutime_delta = (uint32_t)val | (state.sutime_delta >> 32 << 32);
388 else
389 state.sutime_delta = val;
390 break;
391 case CSR_CYCLEHW:
392 case CSR_TIMEHW:
393 case CSR_INSTRETHW:
394 val -= state.mtime;
395 state.sutime_delta = (val << 32) | (uint32_t)state.sutime_delta;
396 break;
397 case CSR_MSTATUS: {
398 if ((val ^ state.mstatus) & (MSTATUS_VM | MSTATUS_PRV | MSTATUS_PRV1 | MSTATUS_MPRV))
399 mmu->flush_tlb();
400
401 reg_t mask = MSTATUS_IE | MSTATUS_IE1 | MSTATUS_IE2 | MSTATUS_MPRV
402 | MSTATUS_FS | (ext ? MSTATUS_XS : 0);
403
404 if (validate_vm(max_xlen, get_field(val, MSTATUS_VM)))
405 mask |= MSTATUS_VM;
406 if (validate_priv(get_field(val, MSTATUS_PRV)))
407 mask |= MSTATUS_PRV;
408 if (validate_priv(get_field(val, MSTATUS_PRV1)))
409 mask |= MSTATUS_PRV1;
410 if (validate_priv(get_field(val, MSTATUS_PRV2)))
411 mask |= MSTATUS_PRV2;
412
413 state.mstatus = (state.mstatus & ~mask) | (val & mask);
414
415 bool dirty = (state.mstatus & MSTATUS_FS) == MSTATUS_FS;
416 dirty |= (state.mstatus & MSTATUS_XS) == MSTATUS_XS;
417 if (max_xlen == 32)
418 state.mstatus = set_field(state.mstatus, MSTATUS32_SD, dirty);
419 else
420 state.mstatus = set_field(state.mstatus, MSTATUS64_SD, dirty);
421
422 // spike supports the notion of xlen < max_xlen, but current priv spec
423 // doesn't provide a mechanism to run RV32 software on an RV64 machine
424 xlen = max_xlen;
425 break;
426 }
427 case CSR_MIP: {
428 reg_t mask = MIP_SSIP | MIP_MSIP;
429 state.mip = (state.mip & ~mask) | (val & mask);
430 break;
431 }
432 case CSR_MIE: {
433 reg_t mask = MIP_SSIP | MIP_MSIP | MIP_STIP;
434 state.mie = (state.mie & ~mask) | (val & mask);
435 break;
436 }
437 case CSR_SSTATUS: {
438 reg_t ms = state.mstatus;
439 ms = set_field(ms, MSTATUS_IE, get_field(val, SSTATUS_IE));
440 ms = set_field(ms, MSTATUS_IE1, get_field(val, SSTATUS_PIE));
441 ms = set_field(ms, MSTATUS_PRV1, get_field(val, SSTATUS_PS));
442 ms = set_field(ms, MSTATUS_FS, get_field(val, SSTATUS_FS));
443 ms = set_field(ms, MSTATUS_XS, get_field(val, SSTATUS_XS));
444 ms = set_field(ms, MSTATUS_MPRV, get_field(val, SSTATUS_MPRV));
445 return set_csr(CSR_MSTATUS, ms);
446 }
447 case CSR_SIP: {
448 reg_t mask = MIP_SSIP;
449 state.mip = (state.mip & ~mask) | (val & mask);
450 break;
451 }
452 case CSR_SIE: {
453 reg_t mask = MIP_SSIP | MIP_STIP;
454 state.mie = (state.mie & ~mask) | (val & mask);
455 break;
456 }
457 case CSR_SEPC: state.sepc = val; break;
458 case CSR_STVEC: state.stvec = val & ~3; break;
459 case CSR_STIMECMP:
460 state.mip &= ~MIP_STIP;
461 state.stimecmp = val;
462 break;
463 case CSR_SPTBR: state.sptbr = zext_xlen(val & -PGSIZE); break;
464 case CSR_SSCRATCH: state.sscratch = val; break;
465 case CSR_MEPC: state.mepc = val; break;
466 case CSR_MSCRATCH: state.mscratch = val; break;
467 case CSR_MCAUSE: state.mcause = val; break;
468 case CSR_MBADADDR: state.mbadaddr = val; break;
469 case CSR_SEND_IPI: sim->send_ipi(val); break;
470 case CSR_MTOHOST:
471 if (state.tohost == 0)
472 state.tohost = val;
473 break;
474 case CSR_MFROMHOST: state.fromhost = val; break;
475 }
476 }
477
478 reg_t processor_t::get_csr(int which)
479 {
480 switch (which)
481 {
482 case CSR_FFLAGS:
483 require_fp;
484 if (!supports_extension('F'))
485 break;
486 return state.fflags;
487 case CSR_FRM:
488 require_fp;
489 if (!supports_extension('F'))
490 break;
491 return state.frm;
492 case CSR_FCSR:
493 require_fp;
494 if (!supports_extension('F'))
495 break;
496 return (state.fflags << FSR_AEXC_SHIFT) | (state.frm << FSR_RD_SHIFT);
497 case CSR_MTIME:
498 case CSR_STIMEW:
499 return state.mtime;
500 case CSR_MTIMEH:
501 case CSR_STIMEHW:
502 return state.mtime >> 32;
503 case CSR_CYCLE:
504 case CSR_TIME:
505 case CSR_INSTRET:
506 case CSR_STIME:
507 case CSR_CYCLEW:
508 case CSR_TIMEW:
509 case CSR_INSTRETW:
510 return state.mtime + state.sutime_delta;
511 case CSR_CYCLEH:
512 case CSR_TIMEH:
513 case CSR_INSTRETH:
514 case CSR_STIMEH:
515 case CSR_CYCLEHW:
516 case CSR_TIMEHW:
517 case CSR_INSTRETHW:
518 if (xlen == 64)
519 break;
520 return (state.mtime + state.sutime_delta) >> 32;
521 case CSR_SSTATUS: {
522 reg_t ss = 0;
523 ss = set_field(ss, SSTATUS_IE, get_field(state.mstatus, MSTATUS_IE));
524 ss = set_field(ss, SSTATUS_PIE, get_field(state.mstatus, MSTATUS_IE1));
525 ss = set_field(ss, SSTATUS_PS, get_field(state.mstatus, MSTATUS_PRV1));
526 ss = set_field(ss, SSTATUS_FS, get_field(state.mstatus, MSTATUS_FS));
527 ss = set_field(ss, SSTATUS_XS, get_field(state.mstatus, MSTATUS_XS));
528 ss = set_field(ss, SSTATUS_MPRV, get_field(state.mstatus, MSTATUS_MPRV));
529 if (get_field(state.mstatus, MSTATUS64_SD))
530 ss = set_field(ss, (xlen == 32 ? SSTATUS32_SD : SSTATUS64_SD), 1);
531 return ss;
532 }
533 case CSR_SIP: return state.mip & (MIP_SSIP | MIP_STIP);
534 case CSR_SIE: return state.mie & (MIP_SSIP | MIP_STIP);
535 case CSR_SEPC: return state.sepc;
536 case CSR_SBADADDR: return state.sbadaddr;
537 case CSR_STVEC: return state.stvec;
538 case CSR_STIMECMP: return state.stimecmp;
539 case CSR_SCAUSE:
540 if (max_xlen > xlen)
541 return state.scause | ((state.scause >> (max_xlen-1)) << (xlen-1));
542 return state.scause;
543 case CSR_SPTBR: return state.sptbr;
544 case CSR_SASID: return 0;
545 case CSR_SSCRATCH: return state.sscratch;
546 case CSR_MSTATUS: return state.mstatus;
547 case CSR_MIP: return state.mip;
548 case CSR_MIE: return state.mie;
549 case CSR_MEPC: return state.mepc;
550 case CSR_MSCRATCH: return state.mscratch;
551 case CSR_MCAUSE: return state.mcause;
552 case CSR_MBADADDR: return state.mbadaddr;
553 case CSR_MCPUID: return cpuid;
554 case CSR_MIMPID: return IMPL_ROCKET;
555 case CSR_MHARTID: return id;
556 case CSR_MTVEC: return DEFAULT_MTVEC;
557 case CSR_MTDELEG: return 0;
558 case CSR_MTOHOST:
559 sim->get_htif()->tick(); // not necessary, but faster
560 return state.tohost;
561 case CSR_MFROMHOST:
562 sim->get_htif()->tick(); // not necessary, but faster
563 return state.fromhost;
564 case CSR_SEND_IPI: return 0;
565 case CSR_UARCH0:
566 case CSR_UARCH1:
567 case CSR_UARCH2:
568 case CSR_UARCH3:
569 case CSR_UARCH4:
570 case CSR_UARCH5:
571 case CSR_UARCH6:
572 case CSR_UARCH7:
573 case CSR_UARCH8:
574 case CSR_UARCH9:
575 case CSR_UARCH10:
576 case CSR_UARCH11:
577 case CSR_UARCH12:
578 case CSR_UARCH13:
579 case CSR_UARCH14:
580 case CSR_UARCH15:
581 return 0;
582 }
583 throw trap_illegal_instruction();
584 }
585
586 reg_t illegal_instruction(processor_t* p, insn_t insn, reg_t pc)
587 {
588 throw trap_illegal_instruction();
589 }
590
591 insn_func_t processor_t::decode_insn(insn_t insn)
592 {
593 size_t mask = opcode_map.size()-1;
594 insn_desc_t* desc = opcode_map[insn.bits() & mask];
595
596 while ((insn.bits() & desc->mask) != desc->match)
597 desc++;
598
599 return xlen == 64 ? desc->rv64 : desc->rv32;
600 }
601
602 void processor_t::register_insn(insn_desc_t desc)
603 {
604 assert(desc.mask & 1);
605 instructions.push_back(desc);
606 }
607
608 void processor_t::build_opcode_map()
609 {
610 size_t buckets = -1;
611 for (auto& inst : instructions)
612 while ((inst.mask & buckets) != buckets)
613 buckets /= 2;
614 buckets++;
615
616 struct cmp {
617 decltype(insn_desc_t::match) mask;
618 cmp(decltype(mask) mask) : mask(mask) {}
619 bool operator()(const insn_desc_t& lhs, const insn_desc_t& rhs) {
620 if ((lhs.match & mask) != (rhs.match & mask))
621 return (lhs.match & mask) < (rhs.match & mask);
622 return lhs.match < rhs.match;
623 }
624 };
625 std::sort(instructions.begin(), instructions.end(), cmp(buckets-1));
626
627 opcode_map.resize(buckets);
628 opcode_store.resize(instructions.size() + 1);
629
630 size_t j = 0;
631 for (size_t b = 0, i = 0; b < buckets; b++)
632 {
633 opcode_map[b] = &opcode_store[j];
634 while (i < instructions.size() && b == (instructions[i].match & (buckets-1)))
635 opcode_store[j++] = instructions[i++];
636 }
637
638 assert(j == opcode_store.size()-1);
639 opcode_store[j].match = opcode_store[j].mask = 0;
640 opcode_store[j].rv32 = &illegal_instruction;
641 opcode_store[j].rv64 = &illegal_instruction;
642 }
643
644 void processor_t::register_extension(extension_t* x)
645 {
646 for (auto insn : x->get_instructions())
647 register_insn(insn);
648 build_opcode_map();
649 for (auto disasm_insn : x->get_disasms())
650 disassembler->add_insn(disasm_insn);
651 if (ext != NULL)
652 throw std::logic_error("only one extension may be registered");
653 ext = x;
654 x->set_processor(this);
655 }