X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=riscv%2Fexecute.cc;h=c5cafc2ee5ead5df9b35f2156540f288167a3ec5;hb=4299874ad4b07ef457776513a64e5b2397a6a75e;hp=a1c22292a410774a9ae06a11f66c0e16dff9bcd6;hpb=7baa7795ba6b878f1dc859f4d4ee239bb569c750;p=riscv-isa-sim.git diff --git a/riscv/execute.cc b/riscv/execute.cc index a1c2229..c5cafc2 100644 --- a/riscv/execute.cc +++ b/riscv/execute.cc @@ -6,30 +6,60 @@ #include -static void commit_log_stash_privilege(state_t* state) +static void commit_log_stash_privilege(processor_t* p) { #ifdef RISCV_ENABLE_COMMITLOG + state_t* state = p->get_state(); state->last_inst_priv = state->prv; + state->last_inst_xlen = p->get_xlen(); + state->last_inst_flen = p->get_flen(); #endif } +static void commit_log_print_value(int width, uint64_t hi, uint64_t lo) +{ + switch (width) { + case 16: + fprintf(stderr, "0x%04" PRIx16, (uint16_t)lo); + break; + case 32: + fprintf(stderr, "0x%08" PRIx32, (uint32_t)lo); + break; + case 64: + fprintf(stderr, "0x%016" PRIx64, lo); + break; + case 128: + fprintf(stderr, "0x%016" PRIx64 "%016" PRIx64, hi, lo); + break; + default: + abort(); + } +} + static void commit_log_print_insn(state_t* state, reg_t pc, insn_t insn) { #ifdef RISCV_ENABLE_COMMITLOG - int32_t priv = state->last_inst_priv; - uint64_t mask = (insn.length() == 8 ? uint64_t(0) : (uint64_t(1) << (insn.length() * 8))) - 1; - if (state->log_reg_write.addr) { - fprintf(stderr, "%1d 0x%016" PRIx64 " (0x%08" PRIx64 ") %c%2" PRIu64 " 0x%016" PRIx64 "\n", - priv, - pc, - insn.bits() & mask, - state->log_reg_write.addr & 1 ? 'f' : 'x', - state->log_reg_write.addr >> 1, - state->log_reg_write.data); + auto& reg = state->log_reg_write; + int priv = state->last_inst_priv; + int xlen = state->last_inst_xlen; + int flen = state->last_inst_flen; + + fprintf(stderr, "%1d ", priv); + commit_log_print_value(xlen, 0, pc); + fprintf(stderr, " ("); + commit_log_print_value(insn.length() * 8, 0, insn.bits()); + + if (reg.addr) { + bool fp = reg.addr & 1; + int rd = reg.addr >> 1; + int size = fp ? flen : xlen; + fprintf(stderr, ") %c%2d ", fp ? 'f' : 'x', rd); + commit_log_print_value(size, reg.data.v[1], reg.data.v[0]); + fprintf(stderr, "\n"); } else { - fprintf(stderr, "%1d 0x%016" PRIx64 " (0x%08" PRIx64 ")\n", priv, pc, insn.bits() & mask); + fprintf(stderr, ")\n"); } - state->log_reg_write.addr = 0; + reg.addr = 0; #endif } @@ -40,9 +70,12 @@ inline void processor_t::update_histogram(reg_t pc) #endif } +// This is expected to be inlined by the compiler so each use of execute_insn +// includes a duplicated body of the function to get separate fetch.func +// function calls. static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch) { - commit_log_stash_privilege(p->get_state()); + commit_log_stash_privilege(p); reg_t npc = fetch.func(p, fetch.insn, pc); if (!invalid_pc(npc)) { commit_log_print_insn(p->get_state(), pc, fetch.insn); @@ -51,19 +84,21 @@ static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch) return npc; } +bool processor_t::slow_path() +{ + return debug || state.single_step != state.STEP_NONE || state.dcsr.cause; +} + // fetch/decode/execute loop void processor_t::step(size_t n) { - // TODO: get_interrupt() isn't super fast. Does that matter? - if (state.dcsr.cause == DCSR_CAUSE_NONE && - sim->debug_module.get_interrupt(id)) { - enter_debug_mode(DCSR_CAUSE_DEBUGINT); - } - - if (state.dcsr.cause != DCSR_CAUSE_NONE) { - // In Debug Mode, just do 10 steps at a time. Otherwise we're going to be - // spinning the rest of the time anyway. - n = std::min(n, (size_t) 10); + if (state.dcsr.cause == DCSR_CAUSE_NONE) { + if (halt_request) { + enter_debug_mode(DCSR_CAUSE_DEBUGINT); + } // !!!The halt bit in DCSR is deprecated. + else if (state.dcsr.halt) { + enter_debug_mode(DCSR_CAUSE_HALT); + } } while (n > 0) { @@ -75,10 +110,11 @@ void processor_t::step(size_t n) if (unlikely(invalid_pc(pc))) { \ switch (pc) { \ case PC_SERIALIZE_BEFORE: state.serialized = true; break; \ - case PC_SERIALIZE_AFTER: instret++; break; \ + case PC_SERIALIZE_AFTER: n = ++instret; break; \ default: abort(); \ } \ pc = state.pc; \ + check_pc_alignment(pc); \ break; \ } else { \ state.pc = pc; \ @@ -87,53 +123,126 @@ void processor_t::step(size_t n) try { - take_interrupt(); + take_pending_interrupt(); - if (unlikely(debug)) + if (unlikely(slow_path())) { while (instret < n) { + if (unlikely(state.single_step == state.STEP_STEPPING)) { + state.single_step = state.STEP_STEPPED; + } + insn_fetch_t fetch = mmu->load_insn(pc); - if (!state.serialized) + if (debug && !state.serialized) disasm(fetch.insn); pc = execute_insn(this, pc, fetch); + bool serialize_before = (pc == PC_SERIALIZE_BEFORE); + advance_pc(); + + if (unlikely(state.single_step == state.STEP_STEPPED) && !serialize_before) { + state.single_step = state.STEP_NONE; + enter_debug_mode(DCSR_CAUSE_STEP); + // enter_debug_mode changed state.pc, so we can't just continue. + break; + } + + if (unlikely(state.pc >= DEBUG_ROM_ENTRY && + state.pc < DEBUG_END)) { + // We're waiting for the debugger to tell us something. + return; + } + } } else while (instret < n) { + // This code uses a modified Duff's Device to improve the performance + // of executing instructions. While typical Duff's Devices are used + // for software pipelining, the switch statement below primarily + // benefits from separate call points for the fetch.func function call + // found in each execute_insn. This function call is an indirect jump + // that depends on the current instruction. By having an indirect jump + // dedicated for each icache entry, you improve the performance of the + // host's next address predictor. Each case in the switch statement + // allows for the program flow to contine to the next case if it + // corresponds to the next instruction in the program and instret is + // still less than n. + // + // According to Andrew Waterman's recollection, this optimization + // resulted in approximately a 2x performance increase. + + // This figures out where to jump to in the switch statement size_t idx = _mmu->icache_index(pc); + + // This gets the cached decoded instruction from the MMU. If the MMU + // does not have the current pc cached, it will refill the MMU and + // return the correct entry. ic_entry->data.func is the C++ function + // corresponding to the instruction. auto ic_entry = _mmu->access_icache(pc); + // This macro is included in "icache.h" included within the switch + // statement below. The indirect jump corresponding to the instruction + // is located within the execute_insn() function call. #define ICACHE_ACCESS(i) { \ insn_fetch_t fetch = ic_entry->data; \ - ic_entry++; \ pc = execute_insn(this, pc, fetch); \ + ic_entry = ic_entry->next; \ if (i == mmu_t::ICACHE_ENTRIES-1) break; \ - if (unlikely(ic_entry->tag != pc)) goto miss; \ + if (unlikely(ic_entry->tag != pc)) break; \ if (unlikely(instret+1 == n)) break; \ instret++; \ state.pc = pc; \ } + // This switch statement implements the modified Duff's device as + // explained above. switch (idx) { + // "icache.h" is generated by the gen_icache script #include "icache.h" } advance_pc(); - continue; - -miss: - advance_pc(); - // refill I$ if it looks like there wasn't a taken branch - if (pc > (ic_entry-1)->tag && pc <= (ic_entry-1)->tag + MAX_INSN_LENGTH) - _mmu->refill_icache(pc, ic_entry); } } catch(trap_t& t) { take_trap(t, pc); n = instret; + + if (unlikely(state.single_step == state.STEP_STEPPED)) { + state.single_step = state.STEP_NONE; + enter_debug_mode(DCSR_CAUSE_STEP); + } + } + catch (trigger_matched_t& t) + { + if (mmu->matched_trigger) { + // This exception came from the MMU. That means the instruction hasn't + // fully executed yet. We start it again, but this time it won't throw + // an exception because matched_trigger is already set. (All memory + // instructions are idempotent so restarting is safe.) + + insn_fetch_t fetch = mmu->load_insn(pc); + pc = execute_insn(this, pc, fetch); + advance_pc(); + + delete mmu->matched_trigger; + mmu->matched_trigger = NULL; + } + switch (state.mcontrol[t.index].action) { + case ACTION_DEBUG_MODE: + enter_debug_mode(DCSR_CAUSE_HWBP); + break; + case ACTION_DEBUG_EXCEPTION: { + mem_trap_t trap(CAUSE_BREAKPOINT, t.address); + take_trap(trap, pc); + break; + } + default: + abort(); + } } state.minstret += instret;