Merge pull request #156 from p12nGH/noncontiguous_harts
[riscv-isa-sim.git] / riscv / execute.cc
1 // See LICENSE for license details.
2
3 #include "processor.h"
4 #include "mmu.h"
5 #include "sim.h"
6 #include <cassert>
7
8
9 static void commit_log_stash_privilege(processor_t* p)
10 {
11 #ifdef RISCV_ENABLE_COMMITLOG
12 state_t* state = p->get_state();
13 state->last_inst_priv = state->prv;
14 state->last_inst_xlen = p->get_xlen();
15 state->last_inst_flen = p->get_flen();
16 #endif
17 }
18
19 static void commit_log_print_value(int width, uint64_t hi, uint64_t lo)
20 {
21 switch (width) {
22 case 16:
23 fprintf(stderr, "0x%04" PRIx16, (uint16_t)lo);
24 break;
25 case 32:
26 fprintf(stderr, "0x%08" PRIx32, (uint32_t)lo);
27 break;
28 case 64:
29 fprintf(stderr, "0x%016" PRIx64, lo);
30 break;
31 case 128:
32 fprintf(stderr, "0x%016" PRIx64 "%016" PRIx64, hi, lo);
33 break;
34 default:
35 abort();
36 }
37 }
38
39 static void commit_log_print_insn(state_t* state, reg_t pc, insn_t insn)
40 {
41 #ifdef RISCV_ENABLE_COMMITLOG
42 auto& reg = state->log_reg_write;
43 int priv = state->last_inst_priv;
44 int xlen = state->last_inst_xlen;
45 int flen = state->last_inst_flen;
46 if (reg.addr) {
47 bool fp = reg.addr & 1;
48 int rd = reg.addr >> 1;
49 int size = fp ? flen : xlen;
50
51 fprintf(stderr, "%1d ", priv);
52 commit_log_print_value(xlen, 0, pc);
53 fprintf(stderr, " (");
54 commit_log_print_value(insn.length() * 8, 0, insn.bits());
55 fprintf(stderr, ") %c%2d ", fp ? 'f' : 'x', rd);
56 commit_log_print_value(size, reg.data.v[1], reg.data.v[0]);
57 fprintf(stderr, "\n");
58 }
59 reg.addr = 0;
60 #endif
61 }
62
63 inline void processor_t::update_histogram(reg_t pc)
64 {
65 #ifdef RISCV_ENABLE_HISTOGRAM
66 pc_histogram[pc]++;
67 #endif
68 }
69
70 // This is expected to be inlined by the compiler so each use of execute_insn
71 // includes a duplicated body of the function to get separate fetch.func
72 // function calls.
73 static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch)
74 {
75 commit_log_stash_privilege(p);
76 reg_t npc = fetch.func(p, fetch.insn, pc);
77 if (!invalid_pc(npc)) {
78 commit_log_print_insn(p->get_state(), pc, fetch.insn);
79 p->update_histogram(pc);
80 }
81 return npc;
82 }
83
84 bool processor_t::slow_path()
85 {
86 return debug || state.single_step != state.STEP_NONE || state.dcsr.cause;
87 }
88
89 // fetch/decode/execute loop
90 void processor_t::step(size_t n)
91 {
92 if (state.dcsr.cause == DCSR_CAUSE_NONE) {
93 if (halt_request) {
94 enter_debug_mode(DCSR_CAUSE_DEBUGINT);
95 } // !!!The halt bit in DCSR is deprecated.
96 else if (state.dcsr.halt) {
97 enter_debug_mode(DCSR_CAUSE_HALT);
98 }
99 }
100
101 while (n > 0) {
102 size_t instret = 0;
103 reg_t pc = state.pc;
104 mmu_t* _mmu = mmu;
105
106 #define advance_pc() \
107 if (unlikely(invalid_pc(pc))) { \
108 switch (pc) { \
109 case PC_SERIALIZE_BEFORE: state.serialized = true; break; \
110 case PC_SERIALIZE_AFTER: n = ++instret; break; \
111 default: abort(); \
112 } \
113 pc = state.pc; \
114 break; \
115 } else { \
116 state.pc = pc; \
117 instret++; \
118 }
119
120 try
121 {
122 take_pending_interrupt();
123
124 if (unlikely(slow_path()))
125 {
126 while (instret < n)
127 {
128 if (unlikely(state.single_step == state.STEP_STEPPING)) {
129 state.single_step = state.STEP_STEPPED;
130 }
131
132 insn_fetch_t fetch = mmu->load_insn(pc);
133 if (debug && !state.serialized)
134 disasm(fetch.insn);
135 pc = execute_insn(this, pc, fetch);
136 bool serialize_before = (pc == PC_SERIALIZE_BEFORE);
137
138 advance_pc();
139
140 if (unlikely(state.single_step == state.STEP_STEPPED) && !serialize_before) {
141 state.single_step = state.STEP_NONE;
142 enter_debug_mode(DCSR_CAUSE_STEP);
143 // enter_debug_mode changed state.pc, so we can't just continue.
144 break;
145 }
146
147 if (unlikely(state.pc >= DEBUG_START &&
148 state.pc < DEBUG_END)) {
149 // We're waiting for the debugger to tell us something.
150 return;
151 }
152
153
154
155 }
156 }
157 else while (instret < n)
158 {
159 // This code uses a modified Duff's Device to improve the performance
160 // of executing instructions. While typical Duff's Devices are used
161 // for software pipelining, the switch statement below primarily
162 // benefits from separate call points for the fetch.func function call
163 // found in each execute_insn. This function call is an indirect jump
164 // that depends on the current instruction. By having an indirect jump
165 // dedicated for each icache entry, you improve the performance of the
166 // host's next address predictor. Each case in the switch statement
167 // allows for the program flow to contine to the next case if it
168 // corresponds to the next instruction in the program and instret is
169 // still less than n.
170 //
171 // According to Andrew Waterman's recollection, this optimization
172 // resulted in approximately a 2x performance increase.
173 //
174 // If there is support for compressed instructions, the mmu and the
175 // switch statement get more complicated. Each branch target is stored
176 // in the index corresponding to mmu->icache_index(), but consecutive
177 // non-branching instructions are stored in consecutive indices even if
178 // mmu->icache_index() specifies a different index (which is the case
179 // for 32-bit instructions in the presence of compressed instructions).
180
181 // This figures out where to jump to in the switch statement
182 size_t idx = _mmu->icache_index(pc);
183
184 // This gets the cached decoded instruction from the MMU. If the MMU
185 // does not have the current pc cached, it will refill the MMU and
186 // return the correct entry. ic_entry->data.func is the C++ function
187 // corresponding to the instruction.
188 auto ic_entry = _mmu->access_icache(pc);
189
190 // This macro is included in "icache.h" included within the switch
191 // statement below. The indirect jump corresponding to the instruction
192 // is located within the execute_insn() function call.
193 #define ICACHE_ACCESS(i) { \
194 insn_fetch_t fetch = ic_entry->data; \
195 ic_entry++; \
196 pc = execute_insn(this, pc, fetch); \
197 if (i == mmu_t::ICACHE_ENTRIES-1) break; \
198 if (unlikely(ic_entry->tag != pc)) goto miss; \
199 if (unlikely(instret+1 == n)) break; \
200 instret++; \
201 state.pc = pc; \
202 }
203
204 // This switch statement implements the modified Duff's device as
205 // explained above.
206 switch (idx) {
207 // "icache.h" is generated by the gen_icache script
208 #include "icache.h"
209 }
210
211 advance_pc();
212 continue;
213
214 miss:
215 advance_pc();
216 // refill I$ if it looks like there wasn't a taken branch
217 if (pc > (ic_entry-1)->tag && pc <= (ic_entry-1)->tag + MAX_INSN_LENGTH)
218 _mmu->refill_icache(pc, ic_entry);
219 }
220 }
221 catch(trap_t& t)
222 {
223 take_trap(t, pc);
224 n = instret;
225
226 if (unlikely(state.single_step == state.STEP_STEPPED)) {
227 state.single_step = state.STEP_NONE;
228 enter_debug_mode(DCSR_CAUSE_STEP);
229 }
230 }
231 catch (trigger_matched_t& t)
232 {
233 if (mmu->matched_trigger) {
234 // This exception came from the MMU. That means the instruction hasn't
235 // fully executed yet. We start it again, but this time it won't throw
236 // an exception because matched_trigger is already set. (All memory
237 // instructions are idempotent so restarting is safe.)
238
239 insn_fetch_t fetch = mmu->load_insn(pc);
240 pc = execute_insn(this, pc, fetch);
241 advance_pc();
242
243 delete mmu->matched_trigger;
244 mmu->matched_trigger = NULL;
245 }
246 switch (state.mcontrol[t.index].action) {
247 case ACTION_DEBUG_MODE:
248 enter_debug_mode(DCSR_CAUSE_HWBP);
249 break;
250 case ACTION_DEBUG_EXCEPTION: {
251 mem_trap_t trap(CAUSE_BREAKPOINT, t.address);
252 take_trap(trap, pc);
253 break;
254 }
255 default:
256 abort();
257 }
258 }
259
260 state.minstret += instret;
261 n -= instret;
262 }
263 }