save branch address and predication merged result, and test after branch
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 9 Oct 2018 16:01:12 +0000 (17:01 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 9 Oct 2018 16:01:12 +0000 (17:01 +0100)
if the predication was all good, go ahead with the branch

still not tested for predicated / vectorised branch yet however
at least scalar branches work

riscv/insn_template_sv.cc
riscv/sv.cc
riscv/sv_decode.h

index f96f117206ac225ebe2cffc936477b54453b02ea..7a67f6c1b3721678c5729831fe4450544c9dcccd 100644 (file)
@@ -27,7 +27,7 @@
    determined by id_regs.py.
 
    in case you're wondering: yes, really, id_regs.py actually parses
-   the actual riscv/insns/*.h implementations (all of them), looking
+   the actual riscv/insns/impl.h implementations (all of them), looking
    for uses of "RVC_RS1" and "WRITE_RD" and so on, as an indicator
    of the register usage for that specific opcode.  whilst it was
    hypothetically possible to use this repo, kindly written by michael clark:
@@ -238,6 +238,17 @@ reg_t FN(processor_t* p, insn_t s_insn, reg_t pc)
 #endif
       *dest_offs += 1;
   }
+#ifdef INSN_TYPE_BRANCH
+  // ok, at the end of the loop, if the predicates are equal,
+  // we're good to branch.  use the saved address (to avoid
+  // calculating the BRANCH_TARGET macro again)
+  uint64_t mask = (1<<vlen) - 1;
+  if (insn.get_if_one_reg_vectorised() &&
+      (insn.get_saved_branch_rd() & mask) == (target_pred & mask))
+  {
+    _set_pc(insn.get_saved_branch_addr());
+  }
+#endif
   // ok at the **END** of the looping we set the (ref'd) dest_offs and
   // src_offs to zero.  this allows any exceptions that are thrown
   // to leave dest_offs and src_offs as they are, such that when
index edb6f8ba47dbfd16ab89ab766501793f2dde7040..c06bdd5575751829352c396664c74778e8f24fa9 100644 (file)
@@ -207,11 +207,12 @@ uint64_t sv_insn_t::rd_bitset(uint64_t bit, bool set)
 void sv_insn_t::setpc(int xlen, int vlen, reg_t &npc, reg_t addr, uint64_t offs,
                       uint64_t predicate)
 {
-    if (vlen == 1 or not at_least_one_reg_vectorised)
+    save_branch_addr = addr;
+    if (not at_least_one_reg_vectorised)
     {
         _set_pc(addr);
         return;
     }
-    rd_bitset(offs, true);
+    save_branch_rd = rd_bitset(offs, true);
 }
 
index 92ed7956a9ffd5fe98071a7f5176621f7fbb01a5..42ea2e3334e3d6b5c5ca26548216ef65b72b7d7d 100644 (file)
@@ -28,7 +28,8 @@ public:
             at_least_one_reg_vectorised(false), fimap(f),
             offs_rd(o_rd), offs_rs1(o_rs1), offs_rs2(o_rs2), offs_rs3(o_rs3),
             offs_imm(o_imm),
-            prd(p_rd), prs1(p_rs1), prs2(p_rs2), prs3(p_rs3) {}
+            prd(p_rd), prs1(p_rs1), prs2(p_rs2), prs3(p_rs3),
+            save_branch_addr(0) {}
 
   uint64_t _rvc_spoffs_imm(uint64_t elwidth, uint64_t baseoffs);
   uint64_t rvc_lwsp_imm() { return _rvc_spoffs_imm(4, insn_t::rvc_lwsp_imm()); }
@@ -86,6 +87,10 @@ public:
     return remap(reg, isint, *offs);
   }
 
+  uint64_t get_saved_branch_addr() { return save_branch_addr; }
+  uint64_t get_saved_branch_rd() { return save_branch_rd; }
+  uint64_t get_if_one_reg_vectorised() { return at_least_one_reg_vectorised; }
+
 private:
   bool vloop_continue;
   bool at_least_one_reg_vectorised;
@@ -99,6 +104,8 @@ private:
   uint64_t &prs1;
   uint64_t &prs2;
   uint64_t &prs3;
+  uint64_t save_branch_addr;
+  uint64_t save_branch_rd;
 
   // remaps the register through the lookup table.
   // will need to take the current loop index/offset somehow