[xcc] minor performance tweaks

[riscv-isa-sim.git] / riscv / processor.cc
diff --git a/riscv/processor.cc b/riscv/processor.cc

index 360b7553689607d4ac7788e3e410469f35453088..e86536e4b4d05192bc5b678968c8103b272f856c 100644 (file)
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -7,15 +7,81 @@
  #include "common.h"
  #include "config.h"
  #include "sim.h"
-#include "softfloat.h"
-#include "platform.h" // softfloat isNaNF32UI, etc.
-#include "internals.h" // ditto
+#include "icsim.h"
  
-processor_t::processor_t(sim_t* _sim, char* _mem, size_t _memsz)
-  : sim(_sim), mmu(_mem,_memsz)
+processor_t::processor_t(sim_t* _sim, mmu_t* _mmu)
+  : sim(_sim), mmu(*_mmu)
  {
+  // a few assumptions about endianness, including freg_t union
+  static_assert(BYTE_ORDER == LITTLE_ENDIAN);
+  static_assert(sizeof(freg_t) == 8);
+  static_assert(sizeof(reg_t) == 8);
+
+  static_assert(sizeof(insn_t) == 4);
+  static_assert(sizeof(uint128_t) == 16 && sizeof(int128_t) == 16);
+
+  icsim = NULL;
+  dcsim = NULL;
+  itlbsim = NULL;
+  dtlbsim = NULL;
+
+  reset();
+}
+
+processor_t::~processor_t()
+{
+  if(icsim)
+    icsim->print_stats();
+  delete icsim;
+
+  if(itlbsim)
+    itlbsim->print_stats();
+  delete itlbsim;
+
+  if(dcsim)
+    dcsim->print_stats();
+  delete dcsim;
+
+  if(dtlbsim)
+    dtlbsim->print_stats();
+  delete dtlbsim;
+}
+
+void processor_t::init(uint32_t _id, icsim_t* default_icache,
+                       icsim_t* default_dcache)
+{
+  id = _id;
+
+  for (int i=0; i<MAX_UTS; i++)
+  {
+    uts[i] = new processor_t(sim, &mmu);
+    uts[i]->id = id;
+    uts[i]->set_sr(uts[i]->sr | SR_EF);
+    uts[i]->set_sr(uts[i]->sr | SR_EV);
+    uts[i]->utidx = i;
+  }
+
+  #ifdef RISCV_ENABLE_ICSIM
+  icsim = new icsim_t(*default_icache);
+  mmu.set_icsim(icsim);
+  itlbsim = new icsim_t(1, 8, 4096, "ITLB");
+  mmu.set_itlbsim(itlbsim);
+  #endif
+  #ifdef RISCV_ENABLE_ICSIM
+  dcsim = new icsim_t(*default_dcache);
+  mmu.set_dcsim(dcsim);
+  dtlbsim = new icsim_t(1, 8, 4096, "DTLB");
+  mmu.set_dtlbsim(dtlbsim);
+  #endif
+}
+
+void processor_t::reset()
+{
+  run = false;
+
    memset(XPR,0,sizeof(XPR));
    memset(FPR,0,sizeof(FPR));
+
    pc = 0;
    evec = 0;
    epc = 0;
@@ -27,42 +93,21 @@ processor_t::processor_t(sim_t* _sim, char* _mem, size_t _memsz)
    fromhost = 0;
    count = 0;
    compare = 0;
+  cycle = 0;
    set_sr(SR_S | SR_SX);  // SX ignored if 64b mode not supported
    set_fsr(0);
  
-  memset(counters,0,sizeof(counters));
-
    // vector stuff
+  vecbanks = 0xff;
+  vecbanks_count = 8;
    utidx = -1;
-  vlmax = 8;
+  vlmax = 32;
    vl = 0;
-  nxpr_all = 256;
-  nfpr_all = 256;
-  nxpr_use = 0;
-  nfpr_use = 0;
+  nxfpr_bank = 256;
+  nxpr_use = 32;
+  nfpr_use = 32;
    for (int i=0; i<MAX_UTS; i++)
      uts[i] = NULL;
-
-  // a few assumptions about endianness, including freg_t union
-  static_assert(BYTE_ORDER == LITTLE_ENDIAN);
-  static_assert(sizeof(freg_t) == 8);
-  static_assert(sizeof(reg_t) == 8);
-
-  static_assert(sizeof(insn_t) == 4);
-  static_assert(sizeof(uint128_t) == 16 && sizeof(int128_t) == 16);
-}
-
-void processor_t::init(uint32_t _id, char* _mem, size_t _memsz)
-{
-  id = _id;
-
-  for (int i=0; i<MAX_UTS; i++)
-  {
-    uts[i] = new processor_t(sim, _mem, _memsz);
-    uts[i]->set_sr(uts[i]->sr | SR_EF);
-    uts[i]->set_sr(uts[i]->sr | SR_EV);
-    uts[i]->utidx = i;
-  }
  }
  
  void processor_t::set_sr(uint32_t val)
@@ -81,6 +126,10 @@ void processor_t::set_sr(uint32_t val)
    sr &= ~SR_EV;
  #endif
  
+  mmu.set_vm_enabled(sr & SR_VM);
+  mmu.set_supervisor(sr & SR_S);
+  mmu.flush_tlb();
+
    xprlen = ((sr & SR_S) ? (sr & SR_SX) : (sr & SR_UX)) ? 64 : 32;
  }
  
@@ -91,14 +140,10 @@ void processor_t::set_fsr(uint32_t val)
  
  void processor_t::vcfg()
  {
-  if (nxpr_use == 0 && nfpr_use == 0)
-    vlmax = 8;
-  else if (nfpr_use == 0)
-    vlmax = (nxpr_all-1) / (nxpr_use-1);
-  else if (nxpr_use == 0)
-    vlmax = (nfpr_all-1) / (nfpr_use-1);
+  if (nxpr_use + nfpr_use < 2)
+    vlmax = nxfpr_bank * vecbanks_count;
    else
-    vlmax = std::min((nxpr_all-1) / (nxpr_use-1), (nfpr_all-1) / (nfpr_use-1));
+    vlmax = (nxfpr_bank / (nxpr_use + nfpr_use - 1)) * vecbanks_count;
  
    vlmax = std::min(vlmax, MAX_UTS);
  }
@@ -108,34 +153,53 @@ void processor_t::setvl(int vlapp)
    vl = std::min(vlmax, vlapp);
  }
  
+void processor_t::take_interrupt()
+{
+  uint32_t interrupts = (cause & CAUSE_IP) >> CAUSE_IP_SHIFT;
+  interrupts &= (sr & SR_IM) >> SR_IM_SHIFT;
+
+  if(interrupts && (sr & SR_ET))
+    throw trap_interrupt;
+}
+
  void processor_t::step(size_t n, bool noisy)
  {
+  if(!run)
+    return;
+
    size_t i = 0;
    while(1) try
    {
-    for( ; i < n; i++)
-    {
-      uint32_t interrupts = (cause & CAUSE_IP) >> CAUSE_IP_SHIFT;
-      interrupts &= (sr & SR_IM) >> SR_IM_SHIFT;
-      if(interrupts && (sr & SR_ET))
-        take_trap(trap_interrupt,noisy);
+    take_interrupt();
  
-      insn_t insn = mmu.load_insn(pc, sr & SR_EC);
-  
-      reg_t npc = pc + insn_length(insn);
+    mmu_t& _mmu = mmu;
+    insn_t insn;
+    insn_func_t func;
+    reg_t npc = pc;
+    #define execute_insn(noisy) \
+      do { \
+        insn = _mmu.load_insn(npc, sr & SR_EC, &func); \
+        if(noisy) disasm(insn,pc); \
+        npc = func(this, insn, npc); \
+        pc = npc; \
+      } while(0)
  
-      if(noisy)
-        disasm(insn,pc);
-
-      #include "execute.h"
-  
-      pc = npc;
-      XPR[0] = 0;
-
-      if(count++ == compare)
-        cause |= 1 << (TIMER_IRQ+CAUSE_IP_SHIFT);
+    if(noisy) for( ; i < n; i++)
+      execute_insn(true);
+    else 
+    {
+      for( ; n > 3 && i < n-3; i+=4)
+      {
+        execute_insn(false);
+        execute_insn(false);
+        execute_insn(false);
+        execute_insn(false);
+      }
+      for( ; i < n; i++)
+        execute_insn(false);
      }
-    return;
+
+    break;
    }
    catch(trap_t t)
    {
@@ -144,9 +208,23 @@ void processor_t::step(size_t n, bool noisy)
    }
    catch(vt_command_t cmd)
    {
+    i++;
      if (cmd == vt_command_stop)
-      return;
+      break;
    }
+  catch(halt_t t)
+  {
+    reset();
+    return;
+  }
+
+  cycle += i;
+
+  typeof(count) old_count = count;
+  typeof(count) max_count = -1;
+  count += i;
+  if(old_count < compare && (count >= compare || old_count > max_count-i))
+    cause |= 1 << (TIMER_IRQ+CAUSE_IP_SHIFT);
  }
  
  void processor_t::take_trap(trap_t t, bool noisy)
@@ -165,6 +243,12 @@ void processor_t::take_trap(trap_t t, bool noisy)
    badvaddr = mmu.get_badvaddr();
  }
  
+void processor_t::deliver_ipi()
+{
+  cause |= 1 << (IPI_IRQ+CAUSE_IP_SHIFT);
+  run = true;
+}
+
  void processor_t::disasm(insn_t insn, reg_t pc)
  {
    printf("core %3d: 0x%016llx (0x%08x) ",id,(unsigned long long)pc,insn.bits);
@@ -181,7 +265,7 @@ void processor_t::disasm(insn_t insn, reg_t pc)
    info.buffer_vma = pc;
  
    int ret = print_insn_little_mips(pc, &info);
-  demand(ret == (INSN_IS_RVC(insn.bits) ? 2 : 4), "disasm bug!");
+  demand(ret == insn_length(insn.bits), "disasm bug!");
    #else
    printf("unknown");
    #endif