riscv/execute.cc

   1 // See LICENSE for license details.
   2
   3 #include "processor.h"
   4 #include "mmu.h"
   5 #include "sim.h"
   6 #include <cassert>
   7
   8
   9 static void commit_log_stash_privilege(processor_t* p)
  10 {
  11 #ifdef RISCV_ENABLE_COMMITLOG
  12   state_t* state = p->get_state();
  13   state->last_inst_priv = state->prv;
  14   state->last_inst_xlen = p->get_xlen();
  15   state->last_inst_flen = p->get_flen();
  16 #endif
  17 }
  18
  19 static void commit_log_print_value(int width, uint64_t hi, uint64_t lo)
  20 {
  21   switch (width) {
  22     case 16:
  23       fprintf(stderr, "0x%04" PRIx16, (uint16_t)lo);
  24       break;
  25     case 32:
  26       fprintf(stderr, "0x%08" PRIx32, (uint32_t)lo);
  27       break;
  28     case 64:
  29       fprintf(stderr, "0x%016" PRIx64, lo);
  30       break;
  31     case 128:
  32       fprintf(stderr, "0x%016" PRIx64 "%016" PRIx64, hi, lo);
  33       break;
  34     default:
  35       abort();
  36   }
  37 }
  38
  39 static void commit_log_print_insn(state_t* state, reg_t pc, insn_t insn)
  40 {
  41 #ifdef RISCV_ENABLE_COMMITLOG
  42   auto& reg = state->log_reg_write;
  43   int priv = state->last_inst_priv;
  44   int xlen = state->last_inst_xlen;
  45   int flen = state->last_inst_flen;
  46   if (reg.addr) {
  47     bool fp = reg.addr & 1;
  48     int rd = reg.addr >> 1;
  49     int size = fp ? flen : xlen;
  50
  51     fprintf(stderr, "%1d ", priv);
  52     commit_log_print_value(xlen, 0, pc);
  53     fprintf(stderr, " (");
  54     commit_log_print_value(insn.length() * 8, 0, insn.bits());
  55     fprintf(stderr, ") %c%2d ", fp ? 'f' : 'x', rd);
  56     commit_log_print_value(size, reg.data.v[1], reg.data.v[0]);
  57     fprintf(stderr, "\n");
  58   }
  59   reg.addr = 0;
  60 #endif
  61 }
  62
  63 inline void processor_t::update_histogram(reg_t pc)
  64 {
  65 #ifdef RISCV_ENABLE_HISTOGRAM
  66   pc_histogram[pc]++;
  67 #endif
  68 }
  69
  70 // This is expected to be inlined by the compiler so each use of execute_insn
  71 // includes a duplicated body of the function to get separate fetch.func
  72 // function calls.
  73 static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch)
  74 {
  75   commit_log_stash_privilege(p);
  76   reg_t npc = fetch.func(p, fetch.insn, pc);
  77   if (!invalid_pc(npc)) {
  78     commit_log_print_insn(p->get_state(), pc, fetch.insn);
  79     p->update_histogram(pc);
  80   }
  81   return npc;
  82 }
  83
  84 bool processor_t::slow_path()
  85 {
  86   return debug || state.single_step != state.STEP_NONE || state.dcsr.cause;
  87 }
  88
  89 // fetch/decode/execute loop
  90 void processor_t::step(size_t n)
  91 {
  92   if (state.dcsr.cause == DCSR_CAUSE_NONE) {
  93     if (halt_request) {
  94       enter_debug_mode(DCSR_CAUSE_DEBUGINT);
  95     } // !!!The halt bit in DCSR is deprecated.
  96     else if (state.dcsr.halt) {
  97       enter_debug_mode(DCSR_CAUSE_HALT);
  98     }
  99   }
 100
 101   while (n > 0) {
 102     size_t instret = 0;
 103     reg_t pc = state.pc;
 104     mmu_t* _mmu = mmu;
 105
 106     #define advance_pc() \
 107      if (unlikely(invalid_pc(pc))) { \
 108        switch (pc) { \
 109          case PC_SERIALIZE_BEFORE: state.serialized = true; break; \
 110          case PC_SERIALIZE_AFTER: n = ++instret; break; \
 111          default: abort(); \
 112        } \
 113        pc = state.pc; \
 114        break; \
 115      } else { \
 116        state.pc = pc; \
 117        instret++; \
 118      }
 119
 120     try
 121     {
 122       take_pending_interrupt();
 123
 124       if (unlikely(slow_path()))
 125       {
 126         while (instret < n)
 127         {
 128           if (unlikely(state.single_step == state.STEP_STEPPING)) {
 129             state.single_step = state.STEP_STEPPED;
 130           }
 131
 132           insn_fetch_t fetch = mmu->load_insn(pc);
 133           if (debug && !state.serialized)
 134             disasm(fetch.insn);
 135           pc = execute_insn(this, pc, fetch);
 136           bool serialize_before = (pc == PC_SERIALIZE_BEFORE);
 137
 138           advance_pc();
 139
 140           if (unlikely(state.single_step == state.STEP_STEPPED) && !serialize_before) {
 141             state.single_step = state.STEP_NONE;
 142             enter_debug_mode(DCSR_CAUSE_STEP);
 143             // enter_debug_mode changed state.pc, so we can't just continue.
 144             break;
 145           }
 146
 147           if (unlikely(state.pc >= DEBUG_START &&
 148                        state.pc < DEBUG_END)) {
 149             // We're waiting for the debugger to tell us something.
 150             return;
 151           }
 152
 153
 154
 155         }
 156       }
 157       else while (instret < n)
 158       {
 159         // This code uses a modified Duff's Device to improve the performance
 160         // of executing instructions. While typical Duff's Devices are used
 161         // for software pipelining, the switch statement below primarily
 162         // benefits from separate call points for the fetch.func function call
 163         // found in each execute_insn. This function call is an indirect jump
 164         // that depends on the current instruction. By having an indirect jump
 165         // dedicated for each icache entry, you improve the performance of the
 166         // host's next address predictor. Each case in the switch statement
 167         // allows for the program flow to contine to the next case if it
 168         // corresponds to the next instruction in the program and instret is
 169         // still less than n.
 170         //
 171         // According to Andrew Waterman's recollection, this optimization
 172         // resulted in approximately a 2x performance increase.
 173         //
 174         // If there is support for compressed instructions, the mmu and the
 175         // switch statement get more complicated. Each branch target is stored
 176         // in the index corresponding to mmu->icache_index(), but consecutive
 177         // non-branching instructions are stored in consecutive indices even if
 178         // mmu->icache_index() specifies a different index (which is the case
 179         // for 32-bit instructions in the presence of compressed instructions).
 180
 181         // This figures out where to jump to in the switch statement
 182         size_t idx = _mmu->icache_index(pc);
 183
 184         // This gets the cached decoded instruction from the MMU. If the MMU
 185         // does not have the current pc cached, it will refill the MMU and
 186         // return the correct entry. ic_entry->data.func is the C++ function
 187         // corresponding to the instruction.
 188         auto ic_entry = _mmu->access_icache(pc);
 189
 190         // This macro is included in "icache.h" included within the switch
 191         // statement below. The indirect jump corresponding to the instruction
 192         // is located within the execute_insn() function call.
 193         #define ICACHE_ACCESS(i) { \
 194           insn_fetch_t fetch = ic_entry->data; \
 195           ic_entry++; \
 196           pc = execute_insn(this, pc, fetch); \
 197           if (i == mmu_t::ICACHE_ENTRIES-1) break; \
 198           if (unlikely(ic_entry->tag != pc)) goto miss; \
 199           if (unlikely(instret+1 == n)) break; \
 200           instret++; \
 201           state.pc = pc; \
 202         }
 203
 204         // This switch statement implements the modified Duff's device as
 205         // explained above.
 206         switch (idx) {
 207           // "icache.h" is generated by the gen_icache script
 208           #include "icache.h"
 209         }
 210
 211         advance_pc();
 212         continue;
 213
 214 miss:
 215         advance_pc();
 216         // refill I$ if it looks like there wasn't a taken branch
 217         if (pc > (ic_entry-1)->tag && pc <= (ic_entry-1)->tag + MAX_INSN_LENGTH)
 218           _mmu->refill_icache(pc, ic_entry);
 219       }
 220     }
 221     catch(trap_t& t)
 222     {
 223       take_trap(t, pc);
 224       n = instret;
 225
 226       if (unlikely(state.single_step == state.STEP_STEPPED)) {
 227         state.single_step = state.STEP_NONE;
 228         enter_debug_mode(DCSR_CAUSE_STEP);
 229       }
 230     }
 231     catch (trigger_matched_t& t)
 232     {
 233       if (mmu->matched_trigger) {
 234         // This exception came from the MMU. That means the instruction hasn't
 235         // fully executed yet. We start it again, but this time it won't throw
 236         // an exception because matched_trigger is already set. (All memory
 237         // instructions are idempotent so restarting is safe.)
 238
 239         insn_fetch_t fetch = mmu->load_insn(pc);
 240         pc = execute_insn(this, pc, fetch);
 241         advance_pc();
 242
 243         delete mmu->matched_trigger;
 244         mmu->matched_trigger = NULL;
 245       }
 246       switch (state.mcontrol[t.index].action) {
 247         case ACTION_DEBUG_MODE:
 248           enter_debug_mode(DCSR_CAUSE_HWBP);
 249           break;
 250         case ACTION_DEBUG_EXCEPTION: {
 251           mem_trap_t trap(CAUSE_BREAKPOINT, t.address);
 252           take_trap(trap, pc);
 253           break;
 254         }
 255         default:
 256           abort();
 257       }
 258     }
 259
 260     state.minstret += instret;
 261     n -= instret;
 262   }
 263 }