From 833752a4ba3a70eb50aa51193409295a7ecceb55 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 4 Oct 2018 14:11:02 +0100 Subject: [PATCH] reorganise twin-predication move the offset incrementing to outside of the sv_insn_t, and pass in the src_offs and dest_offs by reference, mirroring and matching the predication src and dest referencing --- id_regs.py | 4 ++ riscv/insn_template_sv.cc | 49 ++++++++++++++-------- riscv/sv.cc | 6 +-- riscv/sv_decode.h | 87 ++++++++++++--------------------------- 4 files changed, 64 insertions(+), 82 deletions(-) diff --git a/id_regs.py b/id_regs.py index 49f68b0..cd82b60 100644 --- a/id_regs.py +++ b/id_regs.py @@ -141,6 +141,10 @@ def find_registers(fname, twin_predication): res.append('#define SRC_REG %s' % found) res.append('#define PRED_ARGS %s' % ','.join(predargs)) + offsargs = [] + for i in range(4): + offsargs.append(predargs[i].replace('pred', 'offs')) + res.append('#define OFFS_ARGS %s' % ','.join(offsargs)) return '\n'.join(res) diff --git a/riscv/insn_template_sv.cc b/riscv/insn_template_sv.cc index 40d2a9b..cab74d7 100644 --- a/riscv/insn_template_sv.cc +++ b/riscv/insn_template_sv.cc @@ -19,12 +19,14 @@ reg_t FN(processor_t* p, insn_t s_insn, reg_t pc) // REGS_PATTERN is generated by id_regs.py (per opcode) unsigned int floatintmap = REGS_PATTERN; reg_t dest_pred = ~0x0; + int dest_offs = 0; bool zeroing = false; #ifdef INSN_CATEGORY_TWINPREDICATION reg_t src_pred = ~0x0; + int src_offs = 0; bool zeroingsrc = false; #endif - sv_insn_t insn(p, bits, floatintmap, PRED_ARGS); + sv_insn_t insn(p, bits, floatintmap, PRED_ARGS, OFFS_ARGS); if (vlen > 0) { fprintf(stderr, "pre-ex reg %s %x rd %ld rs1 %ld rs2 %ld vlen %d\n", @@ -57,45 +59,55 @@ reg_t FN(processor_t* p, insn_t s_insn, reg_t pc) { insn.reset_vloop_check(); #ifdef INSN_CATEGORY_TWINPREDICATION - int srcoffs = insn.rs_offs(); + if (src_offs >= vlen) { + break; + } + if (dest_offs >= vlen) { + break; + } +#ifdef INSN_C_MV + fprintf(stderr, "pre twin reg %s src %d dest %d pred %lx %lx\n", + xstr(INSN), src_offs, dest_offs, src_pred, dest_pred); +#endif if (!zeroingsrc) { - while ((src_pred & (1<= vlen) { break; } } } - int destoffs = insn.rd_offs(); if (!zeroing) { - while ((dest_pred & (1<= vlen) { break; } } } - if (srcoffs == vlen || destoffs == vlen) { + if (src_offs >= vlen || dest_offs >= vlen) { break; // end vector loop if either src or dest pred reaches end } if (vlen > 1) { fprintf(stderr, "twin reg %s src %d dest %d pred %lx %lx\n", - xstr(INSN), srcoffs, destoffs, src_pred, dest_pred); + xstr(INSN), src_offs, dest_offs, src_pred, dest_pred); } #endif #ifdef INSN_C_MV - fprintf(stderr, "pre loop reg %s %x vloop %d " \ - "vlen %d stop %d pred %lx rd%lx rvc2%d\n", - xstr(INSN), INSNCODE, voffs, vlen, insn.stop_vloop(), - dest_pred & (1< 1) { - insn.reset_caches(); // ready to increment offsets #if defined(USING_REG_RD) fprintf(stderr, "reg %s %x vloop %d vlen %d stop %d pred %lx rd%lx\n", xstr(INSN), INSNCODE, voffs, vlen, insn.stop_vloop(), @@ -120,6 +131,10 @@ reg_t FN(processor_t* p, insn_t s_insn, reg_t pc) { break; } +#ifdef INSN_CATEGORY_TWINPREDICATION + src_offs += 1; +#endif + dest_offs += 1; } #else insn_t insn(bits); diff --git a/riscv/sv.cc b/riscv/sv.cc index 7f7846a..9cc4650 100644 --- a/riscv/sv.cc +++ b/riscv/sv.cc @@ -66,7 +66,7 @@ bool sv_insn_t::sv_check_reg(bool intreg, uint64_t reg) * of SV. it's "supposed" to "just" be a vectorisation API. it isn't: * it's quite a bit more. */ -uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs, int &newoffs) +uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs) { // okaay so first determine which map to use. intreg is passed // in (ultimately) from id_regs.py's examination of the use of @@ -101,10 +101,6 @@ uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs, int &newoffs) // and, at last, we have "parallelism" a la contiguous registers. reg += voffs; // wheww :) - // however... before returning, we increment the loop-offset for - // this particular register, so that on the next loop the next - // contiguous register will be used. - newoffs = voffs + 1; return reg; } diff --git a/riscv/sv_decode.h b/riscv/sv_decode.h index aabbe79..7bffe1b 100644 --- a/riscv/sv_decode.h +++ b/riscv/sv_decode.h @@ -21,48 +21,32 @@ class sv_insn_t: public insn_t { public: sv_insn_t(processor_t *pr, insn_bits_t bits, unsigned int f, - uint64_t &p_rd, uint64_t &p_rs1, uint64_t &p_rs2, uint64_t &p_rs3) : + uint64_t &p_rd, uint64_t &p_rs1, uint64_t &p_rs2, uint64_t &p_rs3, + int &o_rd, int &o_rs1, int &o_rs2, int &o_rs3) : insn_t(bits), p(pr), vloop_continue(false), fimap(f), - cached_rd(0xff), cached_rs1(0xff), - cached_rs2(0xff), cached_rs3(0xff), - offs_rd(0), offs_rs(0), - new_offs_rd(0), new_offs_rs(0), + offs_rd(o_rd), offs_rs1(o_rs1), offs_rs2(o_rs2), offs_rs3(o_rs3), prd(p_rd), prs1(p_rs1), prs2(p_rs2), prs3(p_rs3) {} uint64_t rd () { return predicated(_rd (), offs_rd, prd); } - uint64_t rs1() { return predicated(_rs1(), offs_rs, prs1); } - uint64_t rs2() { return predicated(_rs2(), offs_rs, prs2); } - uint64_t rs3() { return predicated(_rs3(), offs_rs, prs3); } - uint64_t rvc_rs1 () { return predicated(_rvc_rs1 (), offs_rs, prs1); } - uint64_t rvc_rs1s() { return predicated(_rvc_rs1s(), offs_rs, prs1); } - uint64_t rvc_rs2 () { return predicated(_rvc_rs2 (), offs_rs, prs2); } - uint64_t rvc_rs2s() { return predicated(_rvc_rs2s(), offs_rs, prs2); } + uint64_t rs1() { return predicated(_rs1(), offs_rs1, prs1); } + uint64_t rs2() { return predicated(_rs2(), offs_rs2, prs2); } + uint64_t rs3() { return predicated(_rs3(), offs_rs3, prs3); } + uint64_t rvc_rs1 () { return predicated(_rvc_rs1 (), offs_rs1, prs1); } + uint64_t rvc_rs1s() { return predicated(_rvc_rs1s(), offs_rs1, prs1); } + uint64_t rvc_rs2 () { return predicated(_rvc_rs2 (), offs_rs2, prs2); } + uint64_t rvc_rs2s() { return predicated(_rvc_rs2s(), offs_rs2, prs2); } - uint64_t _rd () { return _remap(insn_t::rd (), fimap & REG_RD , - offs_rd , cached_rd, new_offs_rd); } - uint64_t _rs1() { return _remap(insn_t::rs1(), fimap & REG_RS1, - offs_rs, cached_rs1, new_offs_rs); } - uint64_t _rs2() { return _remap(insn_t::rs2(), fimap & REG_RS2, - offs_rs, cached_rs2, new_offs_rs); } - uint64_t _rs3() { return _remap(insn_t::rs3(), fimap & REG_RS3, - offs_rs, cached_rs3, new_offs_rs); } + uint64_t _rd () { return _remap(insn_t::rd (), fimap & REG_RD , offs_rd); } + uint64_t _rs1() { return _remap(insn_t::rs1(), fimap & REG_RS1, offs_rs1); } + uint64_t _rs2() { return _remap(insn_t::rs2(), fimap & REG_RS2, offs_rs2); } + uint64_t _rs3() { return _remap(insn_t::rs3(), fimap & REG_RS3, offs_rs3); } uint64_t _rvc_rs1 () { return _remap(insn_t::rvc_rs1(), fimap & REG_RVC_RS1, - offs_rs, cached_rs1, new_offs_rs); } + offs_rs1); } uint64_t _rvc_rs1s() { return _remap(insn_t::rvc_rs1s(), fimap & REG_RVC_RS1S, - offs_rs, cached_rs1, new_offs_rs); } + offs_rs1); } uint64_t _rvc_rs2 () { return _remap(insn_t::rvc_rs2(), fimap & REG_RVC_RS2, - offs_rs, cached_rs2, new_offs_rs); } + offs_rs2); } uint64_t _rvc_rs2s() { return _remap(insn_t::rvc_rs2s(), fimap & REG_RVC_RS2S, - offs_rs, cached_rs2, new_offs_rs); } - - void reset_caches(void) - { - cached_rd = 0xff; - cached_rs1 = 0xff; - cached_rs2 = 0xff; - cached_rs3 = 0xff; - offs_rd = new_offs_rd; - offs_rs = new_offs_rs; - } + offs_rs2); } bool sv_check_reg(bool intreg, uint64_t reg); sv_reg_entry* get_regentry(uint64_t reg, bool isint); @@ -72,23 +56,14 @@ public: void reset_vloop_check(void) { vloop_continue = false; } bool stop_vloop(void); - int rd_offs(void) { return offs_rd; } - int rs_offs(void) { return offs_rs; } - int rd_offs_inc(void) { offs_rd += 1; return offs_rd; } - int rs_offs_inc(void) { offs_rs += 1; return offs_rs; } - processor_t *p; private: bool vloop_continue; unsigned int fimap; - uint64_t cached_rd; - uint64_t cached_rs1; - uint64_t cached_rs2; - uint64_t cached_rs3; - int offs_rd; - int offs_rs; - int new_offs_rd; - int new_offs_rs; + int &offs_rd; + int &offs_rs1; + int &offs_rs2; + int &offs_rs3; uint64_t &prd; uint64_t &prs1; uint64_t &prs2; @@ -96,26 +71,18 @@ private: // remaps the register through the lookup table. // will need to take the current loop index/offset somehow - uint64_t remap(uint64_t reg, bool isint, int &offs, int &newoffs); + uint64_t remap(uint64_t reg, bool isint, int &offs); // cached version of remap: if remap is called multiple times // by an emulated instruction it would increment the loop offset // before it's supposed to. - uint64_t _remap(uint64_t reg, bool isint, int &offs, - uint64_t &cached, int &newoffs) + uint64_t _remap(uint64_t reg, bool isint, int &offs) { - if (cached == 0xff) - { - cached = remap(reg, isint, offs, newoffs); - } - else + if (sv_check_reg(isint, reg)) { - if (sv_check_reg(isint, reg)) - { - vloop_continue = true; - } + vloop_continue = true; } - return cached; + return remap(reg, isint, offs); } uint64_t predicated(uint64_t reg, int offs, uint64_t pred); -- 2.30.2