[xcc] minor performance tweaks

[riscv-isa-sim.git] / riscv / decode.h
diff --git a/riscv/decode.h b/riscv/decode.h

index 424ad7fdfe85fbe6ec6f9c063e0f74238f7634ae..2078e58ae2cc028e1b5959a9f04ca0ce2abf4a62 100644 (file)
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -3,55 +3,72 @@
  
  #define __STDC_LIMIT_MACROS
  #include <stdint.h>
+
+#include "config.h"
+
  typedef int int128_t __attribute__((mode(TI)));
  typedef unsigned int uint128_t __attribute__((mode(TI)));
  
-#define support_64bit 1
  typedef int64_t sreg_t;
  typedef uint64_t reg_t;
  typedef uint64_t freg_t;
  
  const int OPCODE_BITS = 7;
-const int JTYPE_OPCODE_BITS = 5;
  
-const int GPR_BITS = 8*sizeof(reg_t);
-const int GPRID_BITS = 5;
-const int NGPR = 1 << GPRID_BITS;
+const int XPRID_BITS = 5;
+const int NXPR = 1 << XPRID_BITS;
  
  const int FPR_BITS = 64;
  const int FPRID_BITS = 5;
  const int NFPR = 1 << FPRID_BITS;
  
  const int IMM_BITS = 12;
-const int TARGET_BITS = 27;
-const int SHAMT_BITS = 6;
+const int IMMLO_BITS = 7;
+const int TARGET_BITS = 25;
  const int FUNCT_BITS = 3;
-const int FFUNCT_BITS = 5;
+const int FUNCTR_BITS = 7;
+const int FFUNCT_BITS = 2;
+const int RM_BITS = 3;
  const int BIGIMM_BITS = 20;
+const int BRANCH_ALIGN_BITS = 1;
+const int JUMP_ALIGN_BITS = 1;
  
  #define SR_ET    0x0000000000000001ULL
-#define SR_PS    0x0000000000000004ULL
-#define SR_S     0x0000000000000008ULL
-#define SR_EF    0x0000000000000010ULL
-#define SR_UX    0x0000000000000020ULL
-#define SR_KX    0x0000000000000040ULL
+#define SR_EF    0x0000000000000002ULL
+#define SR_EV    0x0000000000000004ULL
+#define SR_EC    0x0000000000000008ULL
+#define SR_PS    0x0000000000000010ULL
+#define SR_S     0x0000000000000020ULL
+#define SR_UX    0x0000000000000040ULL
+#define SR_SX    0x0000000000000080ULL
  #define SR_IM    0x000000000000FF00ULL
-#define SR_ZERO  ~(SR_ET | SR_PS | SR_S | SR_EF | SR_UX | SR_KX | SR_IM)
+#define SR_VM    0x0000000000010000ULL
+#define SR_ZERO  ~(SR_ET|SR_EF|SR_EV|SR_EC|SR_PS|SR_S|SR_UX|SR_SX|SR_IM|SR_VM)
+#define SR_IM_SHIFT 8
+#define IPI_IRQ 5
+#define TIMER_IRQ 7
  
-#define FP_RD_NE 0
-#define FP_RD_0  1
-#define FP_RD_UP 2
-#define FP_RD_DN 3
-#define FSR_RD_SHIFT 10
-#define FSR_RD   (0x3 << FSR_RD_SHIFT)
+#define CAUSE_EXCCODE 0x000000FF
+#define CAUSE_IP      0x0000FF00
+#define CAUSE_EXCCODE_SHIFT 0
+#define CAUSE_IP_SHIFT      8
+
+#define FP_RD_NE  0
+#define FP_RD_0   1
+#define FP_RD_DN  2
+#define FP_RD_UP  3
+#define FP_RD_NMM 4
+
+#define FSR_RD_SHIFT 5
+#define FSR_RD   (0x7 << FSR_RD_SHIFT)
  
-#define FPEXC_NV 0x10
-#define FPEXC_OF 0x08
-#define FPEXC_UF 0x04
-#define FPEXC_DZ 0x02
  #define FPEXC_NX 0x01
+#define FPEXC_UF 0x02
+#define FPEXC_OF 0x04
+#define FPEXC_DZ 0x08
+#define FPEXC_NV 0x10
  
-#define FSR_AEXC_SHIFT 5
+#define FSR_AEXC_SHIFT 0
  #define FSR_NVA  (FPEXC_NV << FSR_AEXC_SHIFT)
  #define FSR_OFA  (FPEXC_OF << FSR_AEXC_SHIFT)
  #define FSR_UFA  (FPEXC_UF << FSR_AEXC_SHIFT)
@@ -59,58 +76,60 @@ const int BIGIMM_BITS = 20;
  #define FSR_NXA  (FPEXC_NX << FSR_AEXC_SHIFT)
  #define FSR_AEXC (FSR_NVA | FSR_OFA | FSR_UFA | FSR_DZA | FSR_NXA)
  
-#define FSR_CEXC_SHIFT 0
-#define FSR_NVC  (FPEXC_NV << FSR_AEXC_SHIFT)
-#define FSR_OFC  (FPEXC_OF << FSR_AEXC_SHIFT)
-#define FSR_UFC  (FPEXC_UF << FSR_AEXC_SHIFT)
-#define FSR_DZC  (FPEXC_DZ << FSR_AEXC_SHIFT)
-#define FSR_NXC  (FPEXC_NX << FSR_AEXC_SHIFT)
-#define FSR_CEXC (FSR_NVC | FSR_OFC | FSR_UFC | FSR_DZC | FSR_NXC)
-
-#define FSR_ZERO ~(FSR_RD | FSR_AEXC | FSR_CEXC)
+#define FSR_ZERO ~(FSR_RD | FSR_AEXC)
  
  // note: bit fields are in little-endian order
  struct itype_t
  {
-  unsigned imm : IMM_BITS;
+  unsigned opcode : OPCODE_BITS;
    unsigned funct : FUNCT_BITS;
-  unsigned rb : GPRID_BITS;
-  unsigned ra : GPRID_BITS;
+  signed imm12 : IMM_BITS;
+  unsigned rs1 : XPRID_BITS;
+  unsigned rd : XPRID_BITS;
+};
+
+struct btype_t
+{
    unsigned opcode : OPCODE_BITS;
+  unsigned funct : FUNCT_BITS;
+  unsigned immlo : IMMLO_BITS;
+  unsigned rs2 : XPRID_BITS;
+  unsigned rs1 : XPRID_BITS;
+  signed immhi : IMM_BITS-IMMLO_BITS;
  };
  
  struct jtype_t
  {
-  unsigned target : TARGET_BITS;
-  unsigned jump_opcode : JTYPE_OPCODE_BITS;
+  unsigned jump_opcode : OPCODE_BITS;
+  signed target : TARGET_BITS;
  };
  
  struct rtype_t
  {
-  unsigned rc : GPRID_BITS;
-  unsigned shamt : SHAMT_BITS;
-  unsigned unused : 1;
-  unsigned funct : FUNCT_BITS;
-  unsigned rb : GPRID_BITS;
-  unsigned ra : GPRID_BITS;
    unsigned opcode : OPCODE_BITS;
+  unsigned funct : FUNCT_BITS;
+  unsigned functr : FUNCTR_BITS;
+  unsigned rs2 : XPRID_BITS;
+  unsigned rs1 : XPRID_BITS;
+  unsigned rd : XPRID_BITS;
  };
  
-struct btype_t
+struct ltype_t
  {
-  unsigned bigimm : BIGIMM_BITS;
-  unsigned rt : GPRID_BITS;
    unsigned opcode : OPCODE_BITS;
+  unsigned bigimm : BIGIMM_BITS;
+  unsigned rd : XPRID_BITS;
  };
  
  struct ftype_t
  {
-  unsigned rc : FPRID_BITS;
-  unsigned rd : FPRID_BITS;
-  unsigned ffunct : FFUNCT_BITS;
-  unsigned rb : FPRID_BITS;
-  unsigned ra : FPRID_BITS;
    unsigned opcode : OPCODE_BITS;
+  unsigned ffunct : FFUNCT_BITS;
+  unsigned rm : RM_BITS;
+  unsigned rs3 : FPRID_BITS;
+  unsigned rs2 : FPRID_BITS;
+  unsigned rs1 : FPRID_BITS;
+  unsigned rd  : FPRID_BITS;
  };
  
  union insn_t
@@ -119,38 +138,164 @@ union insn_t
    jtype_t jtype;
    rtype_t rtype;
    btype_t btype;
+  ltype_t ltype;
    ftype_t ftype;
    uint32_t bits;
  };
  
+#include <stdio.h>
+class do_writeback
+{
+public:
+  do_writeback(reg_t* _rf, int _rd) : rf(_rf), rd(_rd) {}
+
+  const do_writeback& operator = (reg_t rhs)
+  {
+#if 0
+    printf("R[%x] <= %llx\n",rd,(long long)rhs);
+#endif
+    rf[rd] = rhs;
+    rf[0] = 0;
+    return *this;
+  }
+
+  operator reg_t() { return rf[rd]; }
+
+private:
+  reg_t* rf;
+  int rd;
+};
+
+#define throw_illegal_instruction \
+  ({ if (utmode) throw trap_vector_illegal_instruction; \
+     else throw trap_illegal_instruction; })
+
  // helpful macros, etc
-#define RA R[insn.rtype.ra]
-#define RB R[insn.rtype.rb]
-#define RC R[insn.rtype.rc]
-#define FRA FR[insn.ftype.ra]
-#define FRB FR[insn.ftype.rb]
-#define FRC FR[insn.ftype.rc]
-#define FRD FR[insn.ftype.rd]
-#define BIGIMM insn.btype.bigimm
-#define IMM insn.itype.imm
-#define SIMM ((int32_t)((uint32_t)insn.itype.imm<<(32-IMM_BITS))>>(32-IMM_BITS))
-#define SHAMT insn.rtype.shamt
+#define RS1 XPR[insn.rtype.rs1]
+#define RS2 XPR[insn.rtype.rs2]
+#define RD do_writeback(XPR,insn.rtype.rd)
+#define RA do_writeback(XPR,1)
+#define FRS1 FPR[insn.ftype.rs1]
+#define FRS2 FPR[insn.ftype.rs2]
+#define FRS3 FPR[insn.ftype.rs3]
+#define FRD FPR[insn.ftype.rd]
+#define BIGIMM insn.ltype.bigimm
+#define SIMM insn.itype.imm12
+#define BIMM ((signed)insn.btype.immlo | (insn.btype.immhi << IMMLO_BITS))
+#define SHAMT (insn.itype.imm12 & 0x3F)
+#define SHAMTW (insn.itype.imm12 & 0x1F)
  #define TARGET insn.jtype.target
-#define BRANCH_TARGET (npc + (SIMM*sizeof(insn_t)))
-#define JUMP_TARGET ((npc & ~((1<<TARGET_BITS)*sizeof(insn_t)-1)) + TARGET*sizeof(insn_t))
-
-#define require_supervisor if(!(sr & SR_S)) throw trap_privileged_instruction
-#define require64 if(gprlen != 64) throw trap_illegal_instruction
-#define require_fp if(!(sr & SR_EF)) throw trap_fp_disabled
-#define cmp_trunc(reg) (reg_t(reg) << (64-gprlen))
-#define set_fp_exceptions ({ set_fsr((fsr & ~FSR_CEXC) | \
-                                (float_exception_flags << FSR_AEXC_SHIFT) | \
-                                (float_exception_flags << FSR_CEXC_SHIFT)); \
-                             float_exception_flags = 0; })
-
-static inline sreg_t sext32(int32_t arg)
+#define BRANCH_TARGET (pc + (BIMM << BRANCH_ALIGN_BITS))
+#define JUMP_TARGET (pc + (TARGET << JUMP_ALIGN_BITS))
+#define RM ({ int rm = insn.ftype.rm; \
+              if(rm == 7) rm = (fsr & FSR_RD) >> FSR_RD_SHIFT; \
+              if(rm > 4) throw_illegal_instruction; \
+              rm; })
+
+#define require_supervisor if(unlikely(!(sr & SR_S))) throw trap_privileged_instruction
+#define xpr64 (xprlen == 64)
+#define require_xpr64 if(unlikely(!xpr64)) throw_illegal_instruction
+#define require_xpr32 if(unlikely(xpr64)) throw_illegal_instruction
+#define require_fp if(unlikely(!(sr & SR_EF))) throw trap_fp_disabled
+#define require_vector \
+  ({ if(!(sr & SR_EV)) throw trap_vector_disabled; \
+    else if (!utmode && (vecbanks_count < 3)) throw trap_vector_bank; \
+  })
+#define cmp_trunc(reg) (reg_t(reg) << (64-xprlen))
+#define set_fp_exceptions ({ set_fsr(fsr | \
+                               (softfloat_exceptionFlags << FSR_AEXC_SHIFT)); \
+                             softfloat_exceptionFlags = 0; })
+
+#define sext32(x) ((sreg_t)(int32_t)(x))
+#define zext32(x) ((reg_t)(uint32_t)(x))
+#define sext_xprlen(x) ((sreg_t(x) << (64-xprlen)) >> (64-xprlen))
+#define zext_xprlen(x) ((reg_t(x) << (64-xprlen)) >> (64-xprlen))
+
+#ifndef RISCV_ENABLE_RVC
+# define set_pc(x) \
+  do { if((x) & (sizeof(insn_t)-1)) \
+       { badvaddr = (x); throw trap_instruction_address_misaligned; } \
+       npc = (x); \
+     } while(0)
+#else
+# define set_pc(x) \
+  do { if((x) & ((sr & SR_EC) ? 1 : 3)) \
+       { badvaddr = (x); throw trap_instruction_address_misaligned; } \
+       npc = (x); \
+     } while(0)
+#endif
+
+// RVC stuff
+
+#define INSN_IS_RVC(x) (((x) & 0x3) < 0x3)
+#define insn_length(x) (INSN_IS_RVC(x) ? 2 : 4)
+#define require_rvc if(!(sr & SR_EC)) throw_illegal_instruction
+
+#define CRD_REGNUM ((insn.bits >> 5) & 0x1f)
+#define CRD do_writeback(XPR, CRD_REGNUM)
+#define CRS1 XPR[(insn.bits >> 10) & 0x1f]
+#define CRS2 XPR[(insn.bits >> 5) & 0x1f]
+#define CIMM6 ((int32_t)((insn.bits >> 10) & 0x3f) << 26 >> 26)
+#define CIMM5U ((insn.bits >> 5) & 0x1f)
+#define CIMM5 ((int32_t)CIMM5U << 27 >> 27)
+#define CIMM10 ((int32_t)((insn.bits >> 5) & 0x3ff) << 22 >> 22)
+#define CBRANCH_TARGET (pc + (CIMM5 << BRANCH_ALIGN_BITS))
+#define CJUMP_TARGET (pc + (CIMM10 << JUMP_ALIGN_BITS))
+
+static const int rvc_rs1_regmap[8] = { 20, 21, 2, 3, 4, 5, 6, 7 };
+#define rvc_rd_regmap rvc_rs1_regmap
+#define rvc_rs2b_regmap rvc_rs1_regmap
+static const int rvc_rs2_regmap[8] = { 20, 21, 2, 3, 4, 5, 6, 0 };
+#define CRDS XPR[rvc_rd_regmap[(insn.bits >> 13) & 0x7]]
+#define FCRDS FPR[rvc_rd_regmap[(insn.bits >> 13) & 0x7]]
+#define CRS1S XPR[rvc_rs1_regmap[(insn.bits >> 10) & 0x7]]
+#define CRS2S XPR[rvc_rs2_regmap[(insn.bits >> 13) & 0x7]]
+#define CRS2BS XPR[rvc_rs2b_regmap[(insn.bits >> 5) & 0x7]]
+#define FCRS2S FPR[rvc_rs2_regmap[(insn.bits >> 13) & 0x7]]
+
+// vector stuff
+#define VL vl
+
+#define UT_RS1(idx) uts[idx]->XPR[insn.rtype.rs1]
+#define UT_RS2(idx) uts[idx]->XPR[insn.rtype.rs2]
+#define UT_RD(idx) do_writeback(uts[idx]->XPR,insn.rtype.rd)
+#define UT_RA(idx) do_writeback(uts[idx]->XPR,1)
+#define UT_FRS1(idx) uts[idx]->FPR[insn.ftype.rs1]
+#define UT_FRS2(idx) uts[idx]->FPR[insn.ftype.rs2]
+#define UT_FRS3(idx) uts[idx]->FPR[insn.ftype.rs3]
+#define UT_FRD(idx) uts[idx]->FPR[insn.ftype.rd]
+#define UT_RM(idx) ((insn.ftype.rm != 7) ? insn.ftype.rm : \
+              ((uts[idx]->fsr & FSR_RD) >> FSR_RD_SHIFT))
+
+#define UT_LOOP_START for (int i=0;i<VL; i++) {
+#define UT_LOOP_END }
+#define UT_LOOP_RS1 UT_RS1(i)
+#define UT_LOOP_RS2 UT_RS2(i)
+#define UT_LOOP_RD UT_RD(i)
+#define UT_LOOP_RA UT_RA(i)
+#define UT_LOOP_FRS1 UT_FRS1(i)
+#define UT_LOOP_FRS2 UT_FRS2(i)
+#define UT_LOOP_FRS3 UT_FRS3(i)
+#define UT_LOOP_FRD UT_FRD(i)
+#define UT_LOOP_RM UT_RM(i)
+
+#define VEC_LOAD(dst, func, inc) \
+  reg_t addr = RS1; \
+  UT_LOOP_START \
+    UT_LOOP_##dst = mmu.func(addr); \
+    addr += inc; \
+  UT_LOOP_END
+
+#define VEC_STORE(src, func, inc) \
+  reg_t addr = RS1; \
+  UT_LOOP_START \
+    mmu.func(addr, UT_LOOP_##src); \
+    addr += inc; \
+  UT_LOOP_END
+
+enum vt_command_t
  {
-  return arg;
-}
+  vt_command_stop,
+};
  
  #endif