Don't waste time spinning in place in debug mode
[riscv-isa-sim.git] / riscv / execute.cc
1 // See LICENSE for license details.
2
3 #include "processor.h"
4 #include "mmu.h"
5 #include "sim.h"
6 #include <cassert>
7
8
9 static void commit_log_stash_privilege(state_t* state)
10 {
11 #ifdef RISCV_ENABLE_COMMITLOG
12 state->last_inst_priv = state->prv;
13 #endif
14 }
15
16 static void commit_log_print_insn(state_t* state, reg_t pc, insn_t insn)
17 {
18 #ifdef RISCV_ENABLE_COMMITLOG
19 int32_t priv = state->last_inst_priv;
20 uint64_t mask = (insn.length() == 8 ? uint64_t(0) : (uint64_t(1) << (insn.length() * 8))) - 1;
21 if (state->log_reg_write.addr) {
22 fprintf(stderr, "%1d 0x%016" PRIx64 " (0x%08" PRIx64 ") %c%2" PRIu64 " 0x%016" PRIx64 "\n",
23 priv,
24 pc,
25 insn.bits() & mask,
26 state->log_reg_write.addr & 1 ? 'f' : 'x',
27 state->log_reg_write.addr >> 1,
28 state->log_reg_write.data);
29 } else {
30 fprintf(stderr, "%1d 0x%016" PRIx64 " (0x%08" PRIx64 ")\n", priv, pc, insn.bits() & mask);
31 }
32 state->log_reg_write.addr = 0;
33 #endif
34 }
35
36 inline void processor_t::update_histogram(reg_t pc)
37 {
38 #ifdef RISCV_ENABLE_HISTOGRAM
39 pc_histogram[pc]++;
40 #endif
41 }
42
43 // This is expected to be inlined by the compiler so each use of execute_insn
44 // includes a duplicated body of the function to get separate fetch.func
45 // function calls.
46 static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch)
47 {
48 commit_log_stash_privilege(p->get_state());
49 reg_t npc = fetch.func(p, fetch.insn, pc);
50 if (!invalid_pc(npc)) {
51 commit_log_print_insn(p->get_state(), pc, fetch.insn);
52 p->update_histogram(pc);
53 }
54 return npc;
55 }
56
57 bool processor_t::slow_path()
58 {
59 return debug || state.single_step != state.STEP_NONE || state.dcsr.cause;
60 }
61
62 // fetch/decode/execute loop
63 void processor_t::step(size_t n)
64 {
65 if (state.dcsr.cause == DCSR_CAUSE_NONE) {
66 // TODO: get_interrupt() isn't super fast. Does that matter?
67 if (sim->debug_module.get_interrupt(id)) {
68 enter_debug_mode(DCSR_CAUSE_DEBUGINT);
69 } else if (state.dcsr.halt) {
70 enter_debug_mode(DCSR_CAUSE_HALT);
71 }
72 }
73
74 while (n > 0) {
75 size_t instret = 0;
76 reg_t pc = state.pc;
77 mmu_t* _mmu = mmu;
78
79 #define advance_pc() \
80 if (unlikely(invalid_pc(pc))) { \
81 switch (pc) { \
82 case PC_SERIALIZE_BEFORE: state.serialized = true; break; \
83 case PC_SERIALIZE_AFTER: instret++; break; \
84 default: abort(); \
85 } \
86 pc = state.pc; \
87 break; \
88 } else { \
89 state.pc = pc; \
90 instret++; \
91 }
92
93 try
94 {
95 take_interrupt();
96
97 if (unlikely(slow_path()))
98 {
99 while (instret < n)
100 {
101 if (unlikely(state.single_step == state.STEP_STEPPING)) {
102 state.single_step = state.STEP_STEPPED;
103 }
104
105 insn_fetch_t fetch = mmu->load_insn(pc);
106 if (debug && !state.serialized)
107 disasm(fetch.insn);
108 pc = execute_insn(this, pc, fetch);
109 bool serialize_before = (pc == PC_SERIALIZE_BEFORE);
110
111 advance_pc();
112
113 if (unlikely(state.single_step == state.STEP_STEPPED) && !serialize_before) {
114 state.single_step = state.STEP_NONE;
115 enter_debug_mode(DCSR_CAUSE_STEP);
116 // enter_debug_mode changed state.pc, so we can't just continue.
117 break;
118 }
119
120 if (unlikely(state.pc >= DEBUG_ROM_ENTRY &&
121 state.pc < DEBUG_ROM_ENTRY + DEBUG_ROM_ENTRY_SIZE)) {
122 // We're spinning waiting for the debugger to tell us something.
123 // Let's go talk to the debugger.
124 return;
125 }
126 }
127 }
128 else while (instret < n)
129 {
130 // This code uses a modified Duff's Device to improve the performance
131 // of executing instructions. While typical Duff's Devices are used
132 // for software pipelining, the switch statement below primarily
133 // benefits from separate call points for the fetch.func function call
134 // found in each execute_insn. This function call is an indirect jump
135 // that depends on the current instruction. By having an indirect jump
136 // dedicated for each icache entry, you improve the performance of the
137 // host's next address predictor. Each case in the switch statement
138 // allows for the program flow to contine to the next case if it
139 // corresponds to the next instruction in the program and instret is
140 // still less than n.
141 //
142 // According to Andrew Waterman's recollection, this optimization
143 // resulted in approximately a 2x performance increase.
144 //
145 // If there is support for compressed instructions, the mmu and the
146 // switch statement get more complicated. Each branch target is stored
147 // in the index corresponding to mmu->icache_index(), but consecutive
148 // non-branching instructions are stored in consecutive indices even if
149 // mmu->icache_index() specifies a different index (which is the case
150 // for 32-bit instructions in the presence of compressed instructions).
151
152 // This figures out where to jump to in the switch statement
153 size_t idx = _mmu->icache_index(pc);
154
155 // This gets the cached decoded instruction form the MMU. If the MMU
156 // does not have the current pc cached, it will refill the MMU and
157 // return the correct entry. ic_entry->data.func is the C++ function
158 // corresponding to the instruction.
159 auto ic_entry = _mmu->access_icache(pc);
160
161 // This macro is included in "icache.h" included within the switch
162 // statement below. The indirect jump corresponding to the instruction
163 // is located within the execute_insn() function call.
164 #define ICACHE_ACCESS(i) { \
165 insn_fetch_t fetch = ic_entry->data; \
166 ic_entry++; \
167 pc = execute_insn(this, pc, fetch); \
168 if (i == mmu_t::ICACHE_ENTRIES-1) break; \
169 if (unlikely(ic_entry->tag != pc)) goto miss; \
170 if (unlikely(instret+1 == n)) break; \
171 instret++; \
172 state.pc = pc; \
173 }
174
175 // This switch statement implements the modified Duff's device as
176 // explained above.
177 switch (idx) {
178 // "icache.h" is generated by the gen_icache script
179 #include "icache.h"
180 }
181
182 advance_pc();
183 continue;
184
185 miss:
186 advance_pc();
187 // refill I$ if it looks like there wasn't a taken branch
188 if (pc > (ic_entry-1)->tag && pc <= (ic_entry-1)->tag + MAX_INSN_LENGTH)
189 _mmu->refill_icache(pc, ic_entry);
190 }
191 }
192 catch(trap_t& t)
193 {
194 take_trap(t, pc);
195 n = instret;
196
197 if (unlikely(state.single_step == state.STEP_STEPPED)) {
198 state.single_step = state.STEP_NONE;
199 enter_debug_mode(DCSR_CAUSE_STEP);
200 }
201 }
202 catch (trigger_matched_t& t)
203 {
204 if (mmu->matched_trigger) {
205 // This exception came from the MMU. That means the instruction hasn't
206 // fully executed yet. We start it again, but this time it won't throw
207 // an exception because matched_trigger is already set. (All memory
208 // instructions are idempotent so restarting is safe.)
209
210 insn_fetch_t fetch = mmu->load_insn(pc);
211 pc = execute_insn(this, pc, fetch);
212 advance_pc();
213
214 delete mmu->matched_trigger;
215 mmu->matched_trigger = NULL;
216 }
217 switch (state.mcontrol[t.index].action) {
218 case ACTION_DEBUG_MODE:
219 enter_debug_mode(DCSR_CAUSE_HWBP);
220 break;
221 case ACTION_DEBUG_EXCEPTION: {
222 mem_trap_t trap(CAUSE_BREAKPOINT, t.address);
223 take_trap(trap, pc);
224 break;
225 }
226 default:
227 abort();
228 }
229 }
230
231 state.minstret += instret;
232 n -= instret;
233 }
234 }