Improve performance for branchy code
[riscv-isa-sim.git] / riscv / processor.cc
1 // See LICENSE for license details.
2
3 #include "processor.h"
4 #include "extension.h"
5 #include "common.h"
6 #include "config.h"
7 #include "sim.h"
8 #include "htif.h"
9 #include "disasm.h"
10 #include "icache.h"
11 #include <cinttypes>
12 #include <cmath>
13 #include <cstdlib>
14 #include <iostream>
15 #include <assert.h>
16 #include <limits.h>
17 #include <stdexcept>
18 #include <algorithm>
19
20 processor_t::processor_t(sim_t* _sim, mmu_t* _mmu, uint32_t _id)
21 : sim(_sim), mmu(_mmu), ext(NULL), disassembler(new disassembler_t),
22 id(_id), run(false), debug(false)
23 {
24 reset(true);
25 mmu->set_processor(this);
26
27 #define DECLARE_INSN(name, match, mask) REGISTER_INSN(this, name, match, mask)
28 #include "encoding.h"
29 #undef DECLARE_INSN
30 build_opcode_map();
31 }
32
33 processor_t::~processor_t()
34 {
35 }
36
37 void state_t::reset()
38 {
39 // the ISA guarantees on boot that the PC is 0x2000 and the the processor
40 // is in supervisor mode, and in 64-bit mode, if supported, with traps
41 // and virtual memory disabled.
42 sr = SR_S | SR_S64;
43 pc = 0x2000;
44
45 // the following state is undefined upon boot-up,
46 // but we zero it for determinism
47 XPR.reset();
48 FPR.reset();
49
50 evec = 0;
51 epc = 0;
52 badvaddr = 0;
53 cause = 0;
54 pcr_k0 = 0;
55 pcr_k1 = 0;
56 count = 0;
57 compare = 0;
58 fflags = 0;
59 frm = 0;
60
61 load_reservation = -1;
62 }
63
64 void processor_t::set_debug(bool value)
65 {
66 debug = value;
67 if (ext)
68 ext->set_debug(value);
69 }
70
71 void processor_t::reset(bool value)
72 {
73 if (run == !value)
74 return;
75 run = !value;
76
77 state.reset(); // reset the core
78 set_pcr(CSR_STATUS, state.sr);
79
80 if (ext)
81 ext->reset(); // reset the extension
82 }
83
84 void processor_t::take_interrupt()
85 {
86 uint32_t interrupts = (state.sr & SR_IP) >> SR_IP_SHIFT;
87 interrupts &= (state.sr & SR_IM) >> SR_IM_SHIFT;
88
89 if (interrupts && (state.sr & SR_EI))
90 for (int i = 0; ; i++, interrupts >>= 1)
91 if (interrupts & 1)
92 throw trap_t((1ULL << ((state.sr & SR_S64) ? 63 : 31)) + i);
93 }
94
95 void processor_t::step(size_t n)
96 {
97 if(!run)
98 return;
99
100 mmu_t* _mmu = mmu;
101 auto count32 = decltype(state.compare)(state.count);
102 bool count_le_compare = count32 <= state.compare;
103 n = std::min(n, size_t(state.compare - count32) | 1);
104
105 try
106 {
107 take_interrupt();
108
109 // execute_insn fetches and executes one instruction
110 #define execute_insn(noisy) \
111 do { \
112 insn_fetch_t fetch = mmu->load_insn(state.pc); \
113 if(noisy) disasm(fetch.insn.insn); \
114 state.pc = fetch.func(this, fetch.insn.insn, state.pc); \
115 } while(0)
116
117
118 // special execute_insn for commit log dumping
119 #ifdef RISCV_ENABLE_COMMITLOG
120 //static disassembler disasmblr;
121 #undef execute_insn
122 #define execute_insn(noisy) \
123 do { \
124 insn_fetch_t fetch = _mmu->load_insn(state.pc); \
125 if(noisy) disasm(fetch.insn.insn); \
126 bool in_spvr = state.sr & SR_S; \
127 if (!in_spvr) fprintf(stderr, "\n0x%016" PRIx64 " (0x%08" PRIx32 ") ", state.pc, fetch.insn.insn.bits()); \
128 /*if (!in_spvr) fprintf(stderr, "\n0x%016" PRIx64 " (0x%08" PRIx32 ") %s ", state.pc, fetch.insn.insn.bits(), disasmblr.disassemble(fetch.insn.insn).c_str());*/ \
129 state.pc = fetch.func(this, fetch.insn.insn, state.pc); \
130 } while(0)
131 #endif
132
133 if (debug) // print out instructions as we go
134 {
135 for (size_t i = 0; i < n; state.count++, i++)
136 execute_insn(true);
137 }
138 else while (n > 0)
139 {
140 size_t idx = (state.pc / sizeof(insn_t)) % ICACHE_SIZE;
141 auto ic_entry_init = &_mmu->icache[idx], ic_entry = ic_entry_init;
142
143 #define update_count() { \
144 size_t i = ic_entry - ic_entry_init; \
145 state.count += i; \
146 if (i >= n) break; \
147 n -= i; }
148
149 #define ICACHE_ACCESS(idx) { \
150 insn_t insn = ic_entry->data.insn.insn; \
151 insn_func_t func = ic_entry->data.func; \
152 if (unlikely(ic_entry->tag != state.pc)) break; \
153 ic_entry++; \
154 state.pc = func(this, insn, state.pc); }
155
156 switch (idx) while (true)
157 {
158 ICACHE_SWITCH;
159 update_count();
160 ic_entry_init = ic_entry = &_mmu->icache[0];
161 }
162
163 _mmu->access_icache(state.pc);
164 update_count();
165 }
166 }
167 catch(trap_t& t)
168 {
169 take_trap(t);
170 }
171
172 bool count_ge_compare =
173 uint64_t(n) + decltype(state.compare)(state.count) >= state.compare;
174 if (count_le_compare && count_ge_compare)
175 set_interrupt(IRQ_TIMER, true);
176 }
177
178 void processor_t::take_trap(trap_t& t)
179 {
180 if (debug)
181 fprintf(stderr, "core %3d: exception %s, epc 0x%016" PRIx64 "\n",
182 id, t.name(), state.pc);
183
184 // switch to supervisor, set previous supervisor bit, disable interrupts
185 set_pcr(CSR_STATUS, (((state.sr & ~SR_EI) | SR_S) & ~SR_PS & ~SR_PEI) |
186 ((state.sr & SR_S) ? SR_PS : 0) |
187 ((state.sr & SR_EI) ? SR_PEI : 0));
188
189 yield_load_reservation();
190 state.cause = t.cause();
191 state.epc = state.pc;
192 state.pc = state.evec;
193
194 t.side_effects(&state); // might set badvaddr etc.
195 }
196
197 void processor_t::deliver_ipi()
198 {
199 if (run)
200 set_pcr(CSR_CLEAR_IPI, 1);
201 }
202
203 void processor_t::disasm(insn_t insn)
204 {
205 // the disassembler is stateless, so we share it
206 fprintf(stderr, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx32 ") %s\n",
207 id, state.pc, insn.bits(), disassembler->disassemble(insn).c_str());
208 }
209
210 reg_t processor_t::set_pcr(int which, reg_t val)
211 {
212 reg_t old_pcr = get_pcr(which);
213
214 switch (which)
215 {
216 case CSR_FFLAGS:
217 state.fflags = val & (FSR_AEXC >> FSR_AEXC_SHIFT);
218 break;
219 case CSR_FRM:
220 state.frm = val & (FSR_RD >> FSR_RD_SHIFT);
221 break;
222 case CSR_FCSR:
223 state.fflags = (val & FSR_AEXC) >> FSR_AEXC_SHIFT;
224 state.frm = (val & FSR_RD) >> FSR_RD_SHIFT;
225 break;
226 case CSR_STATUS:
227 state.sr = (val & ~SR_IP) | (state.sr & SR_IP);
228 #ifndef RISCV_ENABLE_64BIT
229 state.sr &= ~(SR_S64 | SR_U64);
230 #endif
231 #ifndef RISCV_ENABLE_FPU
232 state.sr &= ~SR_EF;
233 #endif
234 if (!ext)
235 state.sr &= ~SR_EA;
236 state.sr &= ~SR_ZERO;
237 rv64 = (state.sr & SR_S) ? (state.sr & SR_S64) : (state.sr & SR_U64);
238 mmu->flush_tlb();
239 break;
240 case CSR_EPC:
241 state.epc = val;
242 break;
243 case CSR_EVEC:
244 state.evec = val;
245 break;
246 case CSR_CYCLE:
247 case CSR_TIME:
248 case CSR_INSTRET:
249 case CSR_COUNT:
250 state.count = val;
251 break;
252 case CSR_COMPARE:
253 set_interrupt(IRQ_TIMER, false);
254 state.compare = val;
255 break;
256 case CSR_PTBR:
257 state.ptbr = val & ~(PGSIZE-1);
258 break;
259 case CSR_SEND_IPI:
260 sim->send_ipi(val);
261 break;
262 case CSR_CLEAR_IPI:
263 set_interrupt(IRQ_IPI, val & 1);
264 break;
265 case CSR_SUP0:
266 state.pcr_k0 = val;
267 break;
268 case CSR_SUP1:
269 state.pcr_k1 = val;
270 break;
271 case CSR_TOHOST:
272 if (state.tohost == 0)
273 state.tohost = val;
274 break;
275 case CSR_FROMHOST:
276 set_fromhost(val);
277 break;
278 }
279
280 return old_pcr;
281 }
282
283 void processor_t::set_fromhost(reg_t val)
284 {
285 set_interrupt(IRQ_HOST, val != 0);
286 state.fromhost = val;
287 }
288
289 reg_t processor_t::get_pcr(int which)
290 {
291 switch (which)
292 {
293 case CSR_FFLAGS:
294 return state.fflags;
295 case CSR_FRM:
296 return state.frm;
297 case CSR_FCSR:
298 return (state.fflags << FSR_AEXC_SHIFT) | (state.frm << FSR_RD_SHIFT);
299 case CSR_STATUS:
300 return state.sr;
301 case CSR_EPC:
302 return state.epc;
303 case CSR_BADVADDR:
304 return state.badvaddr;
305 case CSR_EVEC:
306 return state.evec;
307 case CSR_CYCLE:
308 case CSR_TIME:
309 case CSR_INSTRET:
310 case CSR_COUNT:
311 return state.count;
312 case CSR_COMPARE:
313 return state.compare;
314 case CSR_CAUSE:
315 return state.cause;
316 case CSR_PTBR:
317 return state.ptbr;
318 case CSR_ASID:
319 return 0;
320 case CSR_FATC:
321 mmu->flush_tlb();
322 return 0;
323 case CSR_HARTID:
324 return id;
325 case CSR_IMPL:
326 return 1;
327 case CSR_SUP0:
328 return state.pcr_k0;
329 case CSR_SUP1:
330 return state.pcr_k1;
331 case CSR_TOHOST:
332 sim->get_htif()->tick(); // not necessary, but faster
333 return state.tohost;
334 case CSR_FROMHOST:
335 sim->get_htif()->tick(); // not necessary, but faster
336 return state.fromhost;
337 default:
338 return -1;
339 }
340 }
341
342 void processor_t::set_interrupt(int which, bool on)
343 {
344 uint32_t mask = (1 << (which + SR_IP_SHIFT)) & SR_IP;
345 if (on)
346 state.sr |= mask;
347 else
348 state.sr &= ~mask;
349 }
350
351 reg_t illegal_instruction(processor_t* p, insn_t insn, reg_t pc)
352 {
353 throw trap_illegal_instruction();
354 }
355
356 insn_func_t processor_t::decode_insn(insn_t insn)
357 {
358 size_t mask = opcode_map.size()-1;
359 insn_desc_t* desc = opcode_map[insn.bits() & mask];
360
361 while ((insn.bits() & desc->mask) != desc->match)
362 desc++;
363
364 return rv64 ? desc->rv64 : desc->rv32;
365 }
366
367 void processor_t::register_insn(insn_desc_t desc)
368 {
369 assert(desc.mask & 1);
370 instructions.push_back(desc);
371 }
372
373 void processor_t::build_opcode_map()
374 {
375 size_t buckets = -1;
376 for (auto& inst : instructions)
377 while ((inst.mask & buckets) != buckets)
378 buckets /= 2;
379 buckets++;
380
381 struct cmp {
382 decltype(insn_desc_t::match) mask;
383 cmp(decltype(mask) mask) : mask(mask) {}
384 bool operator()(const insn_desc_t& lhs, const insn_desc_t& rhs) {
385 if ((lhs.match & mask) != (rhs.match & mask))
386 return (lhs.match & mask) < (rhs.match & mask);
387 return lhs.match < rhs.match;
388 }
389 };
390 std::sort(instructions.begin(), instructions.end(), cmp(buckets-1));
391
392 opcode_map.resize(buckets);
393 opcode_store.resize(instructions.size() + 1);
394
395 size_t j = 0;
396 for (size_t b = 0, i = 0; b < buckets; b++)
397 {
398 opcode_map[b] = &opcode_store[j];
399 while (i < instructions.size() && b == (instructions[i].match & (buckets-1)))
400 opcode_store[j++] = instructions[i++];
401 }
402
403 assert(j == opcode_store.size()-1);
404 opcode_store[j].match = opcode_store[j].mask = 0;
405 opcode_store[j].rv32 = &illegal_instruction;
406 opcode_store[j].rv64 = &illegal_instruction;
407 }
408
409 void processor_t::register_extension(extension_t* x)
410 {
411 for (auto insn : x->get_instructions())
412 register_insn(insn);
413 build_opcode_map();
414 for (auto disasm_insn : x->get_disasms())
415 disassembler->add_insn(disasm_insn);
416 if (ext != NULL)
417 throw std::logic_error("only one extension may be registered");
418 ext = x;
419 x->set_processor(this);
420 }