riscv/sv.cc

   1 #include "sv.h"
   2 #include "sv_decode.h"
   3 #include "processor.h"
   4
   5 int get_bitwidth(uint8_t elwidth, int xlen)
   6 {
   7   switch (elwidth) {
   8       case 0: return xlen;
   9       case 1: return 8;
  10       case 2: return 16;
  11       default: return 32;
  12   }
  13 }
  14
  15 uint8_t maxelwidth(uint8_t wid1, uint8_t wid2)
  16 {
  17     if (wid1 == 0 || wid2 == 0) {
  18         return 0;
  19     }
  20     return std::max(wid1, wid2);
  21 }
  22
  23 /* convenience routines to map from compact 8-bit to 16-bit format,
  24  * for use in new VBLOCK format
  25  */
  26 void sv_regmap_8to16(union sv_reg_csr_entry8 const& r8,
  27                      union sv_reg_csr_entry &r16)
  28 {
  29     r16.b.regkey  = r8.b.regkey;
  30     r16.b.elwidth = r8.b.elwidth;
  31     r16.b.type    = r8.b.type;
  32     r16.b.regidx  = r8.b.regkey << 2; // multiply by 4, no room for 6 bits
  33     r16.b.isvec   = 1;                // has to be a vector
  34 }
  35
  36 void sv_predmap_8to16(union sv_pred_csr_entry8 const& r8,
  37                       union sv_pred_csr_entry &r16,
  38                       uint64_t table_idx)
  39 {
  40     r16.b.regkey  = r8.b.regkey;
  41     r16.b.zero    = r8.b.zero;
  42     r16.b.inv     = r8.b.inv;
  43     r16.b.type    = r8.b.type;
  44     r16.b.regidx  = table_idx + 9; // 8-bit format starts at x9
  45     r16.b.ffirst  = 0;             // no room, whoops.
  46 }
  47
  48 /* increments the sub-offset appropriately in a FSM-based
  49    version of a twin-nested for-loop:
  50      for (suboffs = 0; suboffs < subvl; suboffs++) {
  51      ... doooo stuuuuff (python would use "yield" here)
  52      }
  53      suboffs = 0; // reset to zero after "loop"
  54 */
  55 bool inc_offs(int vlen, int subvl, int &suboffs)
  56 {
  57     suboffs++;
  58     if (suboffs < subvl) {
  59         return false; // outer loop should not increment
  60     }
  61     suboffs = 0; // reset the sub-offs
  62     return true; // indicates outer (VL) loop should increment
  63 }
  64
  65 sv_insn_t::sv_insn_t(processor_t *pr, bool _sv_enabled,
  66             insn_bits_t bits, unsigned int f,
  67             int _xlen, int _src_flen, int _dest_flen,
  68             uint64_t &p_rd, uint64_t &p_rs1, uint64_t &p_rs2, uint64_t &p_rs3,
  69             uint64_t &p_sp, uint64_t *p_im,
  70             int *o_rd, int *o_rs1, int *o_rs2, int *o_rs3, int *o_sp,
  71             int *o_imm,
  72             int *s_offs,
  73             bool _sign) :
  74             insn_t(bits), p(pr), src_bitwidth(0),
  75             xlen(_xlen), src_flen(_src_flen), dest_flen(_dest_flen),
  76             sv_enabled(_sv_enabled), signextended(_sign),
  77             vloop_continue(false),
  78             at_least_one_reg_vectorised(false), fimap(f),
  79             offs_rd(o_rd), offs_rs1(o_rs1), offs_rs2(o_rs2), offs_rs3(o_rs3),
  80             offs_sp(o_sp),
  81             offs_imm(o_imm),
  82             suboffs(s_offs),
  83             prd(p_rd), prs1(p_rs1), prs2(p_rs2), prs3(p_rs3), psp(p_sp),
  84             save_branch_addr(0)
  85 {
  86     // work out the source element width based on what is used
  87     // note that this has to match with id_regs.py patterns
  88
  89     unsigned int bm=2;
  90     for (int i = 1; i < 12; i++, bm<<=1)
  91     {
  92         sv_reg_entry* r = NULL;
  93         if (bm == (REG_RS1 & fimap)) {
  94              r = get_regentry(insn_t::rs1(), true);
  95         } else if (bm == (REG_RS2 & fimap)) {
  96              r = get_regentry(insn_t::rs2(), true);
  97         } else if (bm == (REG_RS3 & fimap)) {
  98              r = get_regentry(insn_t::rs3(), true);
  99         } else if (bm == (REG_RVC_RS1 & fimap)) {
 100              r = get_regentry(insn_t::rvc_rs1(), true);
 101         } else if (bm == (REG_RVC_RS2 & fimap)) {
 102              r = get_regentry(insn_t::rvc_rs2(), true);
 103         } else if (bm == (REG_RVC_RS1S & fimap)) {
 104              r = get_regentry(insn_t::rvc_rs1s(), true);
 105         } else if (bm == (REG_RVC_RS2S & fimap)) {
 106              r = get_regentry(insn_t::rvc_rs2s(), true);
 107         } else if (bm == (REG_FRS1 & fimap)) {
 108              r = get_regentry(insn_t::rs1(), false);
 109         } else if (bm == (REG_FRS2 & fimap)) {
 110              r = get_regentry(insn_t::rs2(), false);
 111         } else if (bm == (REG_FRS3 & fimap)) {
 112              r = get_regentry(insn_t::rs3(), false);
 113         }
 114         if (r ==  NULL || !r->active) {
 115             continue;
 116         }
 117         uint8_t elwidth = r->elwidth;
 118         uint8_t bitwidth = get_bitwidth(elwidth, _xlen);
 119         src_bitwidth = std::max(src_bitwidth, bitwidth);
 120     }
 121 }
 122
 123 sv_pred_entry* sv_insn_t::get_predentry(uint64_t reg, bool intreg)
 124 {
 125   // okaay so first determine which map to use.  intreg is passed
 126   // in (ultimately) from id_regs.py's examination of the use of
 127   // FRS1/RS1, WRITE_FRD/WRITE_RD, which in turn gets passed
 128   // in from sv_insn_t::fimap...
 129   sv_pred_entry *r;
 130   if (intreg)
 131   {
 132     return &p->get_state()->sv().sv_pred_int_tb[reg];
 133   }
 134   else
 135   {
 136     return &p->get_state()->sv().sv_pred_fp_tb[reg];
 137   }
 138 }
 139
 140 sv_reg_entry* sv_insn_t::get_regentry(uint64_t reg, bool intreg)
 141 {
 142   // okaay so first determine which map to use.  intreg is passed
 143   // in (ultimately) from id_regs.py's examination of the use of
 144   // FRS1/RS1, WRITE_FRD/WRITE_RD, which in turn gets passed
 145   // in from sv_insn_t::fimap...
 146   sv_reg_entry *r;
 147   if (intreg)
 148   {
 149     return &p->get_state()->sv().sv_int_tb[reg];
 150   }
 151   else
 152   {
 153     return &p->get_state()->sv().sv_fp_tb[reg];
 154   }
 155 }
 156
 157 bool sv_insn_t::sv_check_reg(bool intreg, uint64_t reg)
 158 {
 159   sv_reg_entry *r = get_regentry(reg, intreg);
 160   if (r->elwidth != 0)
 161   {
 162     // XXX raise exception
 163   }
 164   if (r->active && r->isvec)
 165   {
 166     fprintf(stderr, "checkreg: %ld active isvec\n", reg);
 167     at_least_one_reg_vectorised = true;
 168     return true;
 169   }
 170   if (r->active)
 171   {
 172     fprintf(stderr, "checkreg: %ld active !vec\n", reg);
 173   }
 174   return false;
 175 }
 176
 177 /* this is the "remap" function.  note that registers can STILL BE REDIRECTED
 178  * yet NOT BE MARKED AS A VECTOR.
 179  *
 180  * reg 5 -> active=false, regidx=XX, isvec=XX     -> returns 5
 181  * reg 5 -> active=true , regidx=35, isvec=false  -> returns 35
 182  * reg 5 -> active=true , regidx=35, isvec=true   -> returns 35 *PLUS LOOP*
 183  *
 184  * so it is possible for example to use the remap system for C  instructions
 185  * to get access to the *full* range of registers x0..x63 (yes 63 because
 186  * SV doubles both the int and fp regfile sizes), by setting
 187  * "active=true, isvec=false" for any of x8..x15
 188  *
 189  * where "active=true, isvec=true" this is the "expected" behaviour
 190  * of SV.  it's "supposed" to "just" be a vectorisation API. it isn't:
 191  * it's quite a bit more.
 192  */
 193 reg_spec_t sv_insn_t::remap(uint64_t reg, bool intreg, int *voffs, int *subo)
 194 {
 195   reg_spec_t spec = {reg, NULL};
 196   // okaay so first determine which map to use.  intreg is passed
 197   // in (ultimately) from id_regs.py's examination of the use of
 198   // FRS1/RS1, WRITE_FRD/WRITE_RD, which in turn gets passed
 199   // in from sv_insn_t::fimap...
 200   sv_reg_entry *r = get_regentry(reg, intreg);
 201
 202   // next we check if this entry is active.  if not, the register
 203   // is not being "redirected", so just return the actual reg.
 204   if (!r->active)
 205   {
 206     return spec; // not active: return as-is
 207   }
 208   vloop_continue = true;
 209
 210   // next we go through the lookup table.  *THIS* is why the
 211   // sv_reg_entry table is 32 entries (5-bit) *NOT* 6 bits
 212   // the *KEY* (reg) is 5-bit, the *VALUE* (actual target reg) is 6-bit
 213   // XXX TODO: must actually double NXPR and NXFR in processor.h to cope!!
 214   reg = r->regidx;
 215
 216   // now we determine if this is a scalar/vector: if it's scalar
 217   // we return the re-mapped register...
 218 #if 0
 219   if (!r->isvec) // scalar
 220   {
 221     return spec;
 222   }
 223 #endif
 224   vloop_continue = true;
 225
 226   // aaand now, as it's a "vector", FINALLY we can pass the loop-offset
 227   spec.reg = reg; //+ *voffs;
 228   spec.offset = voffs;
 229   spec.suboff = subo;
 230   spec.isvec = r->isvec;
 231   spec.signextend = signextended;
 232   return spec;
 233 }
 234
 235 /* gets the predication value (if active).  returns all-1s if not active
 236  * also returns whether zeroing is enabled/disabled for this register.
 237  *
 238  * uses the same sort of lookup logic as remap:
 239  *
 240  * - first thing to note is, there is one CSR table for FP and one for INT
 241  *   (so, FP regs can be predicated separately from INT ones)
 242  * - redirection occurs if the CSR entry for the register is "active".
 243  * - inversion of the predication can be set (so it's possible to have
 244  *   the same actual register value be unchanged yet be referred to by
 245  *   *TWO* redirections, one with inversion, one with not).
 246  *
 247  * note that this function *actually* returns the value of the (integer)
 248  * register file, hence why processor_t has to be passed in
 249  *
 250  * note also that *even scalar* ops will be predicated (i.e. if a register
 251  * has been set active=true and isvec=false in sv_int_tb or sv_fp_tb).
 252  * the way to ensure that scalar ops are not predicated is: set VLEN=0,
 253  * set active=false in sv_int_tb/sv_fp_tb for that register, or switch off
 254  * the predication for that register (sv_pred_int_tb/sv_pred_fb_tb).
 255  *
 256  * note also that the hard limit on SV maximum vector length is actually
 257  * down to the number of bits in the predication i.e. the bitwidth of integer
 258  * registers (i.e. XLEN bits).
 259  */
 260 reg_t sv_insn_t::predicate(uint64_t reg, bool intreg, bool &zeroing)
 261 {
 262   sv_reg_entry *pr = get_regentry(reg, intreg);
 263   if (!pr->active)
 264   {
 265     return ~0x0; // *REGISTER* not active: return all-1s (unconditional "on")
 266   }
 267   sv_pred_entry *r = get_predentry(reg, intreg);
 268   if (!r->active)
 269   {
 270     return ~0x0; // *PREDICATION* not active: return all-1s (unconditional "on")
 271   }
 272   zeroing = r->zero;
 273   fprintf(stderr, "predicate read %ld -> %ld\n", reg, r->regidx);
 274   reg = r->regidx;
 275   reg_spec_t rs = {reg, NULL};
 276   reg_t pred = p->s.READ_REG(rs); // macros go through processor_t state
 277   if (r->inv)
 278   {
 279     return ~pred;
 280   }
 281   return pred;
 282 }
 283
 284 // XXX WARNING: this fn does NOT invert the predicate (if r->inv return ~pred)
 285 reg_t sv_insn_t::predicate(uint64_t reg, bool intreg, bool &zeroing, bool &inv)
 286 {
 287   sv_reg_entry *pr = get_regentry(reg, intreg);
 288   if (!pr->active)
 289   {
 290     return ~0x0; // *REGISTER* not active: return all-1s (unconditional "on")
 291   }
 292   sv_pred_entry *r = get_predentry(reg, intreg);
 293   if (!r->active)
 294   {
 295     return ~0x0; // *PREDICATION* not active: return all-1s (unconditional "on")
 296   }
 297   zeroing = r->zero;
 298   inv = r->inv;
 299   fprintf(stderr, "predicate read %ld -> %ld\n", reg, r->regidx);
 300   reg = r->regidx;
 301   reg_spec_t rs = {reg, NULL};
 302   reg_t predicate = p->s.READ_REG(rs); // macros go through processor_t state
 303   return predicate;
 304 }
 305
 306 reg_spec_t sv_insn_t::predicated(reg_spec_t const& spec, uint64_t pred)
 307 {
 308     reg_spec_t res = spec;
 309     if (spec.offset == NULL)
 310     {
 311         return res;
 312     }
 313     if (pred & (1<<p->s.pred_remap(res.reg, *spec.offset)))
 314     {
 315         return res;
 316     }
 317     fprintf(stderr, "predication %ld %d %lx\n", spec.reg, (*spec.offset), pred);
 318     res.reg = 0;
 319     res.offset = 0;
 320     res.suboff = 0;
 321     res.isvec = spec.isvec;
 322     return res;
 323 }
 324
 325 bool sv_insn_t::stop_vloop(void)
 326 {
 327     return (p->get_state()->sv().vl == 0) || !vloop_continue;
 328 }
 329
 330
 331 /* c_lwsp's immediate offset is turned into a Vector "unit stride" if
 332  * x2 (sp by convention) is marked as vectorised.
 333  *
 334  */
 335 uint64_t sv_insn_t::_rvc_spoffs_imm(uint64_t elwidth, uint64_t offs)
 336 {
 337   sv_reg_entry *r = get_regentry(X_SP, 1);
 338   if (!r->active)
 339   {
 340     return offs;
 341   }
 342   vloop_continue = true;
 343   reg_t reg = r->regidx;
 344   if (!r->isvec)
 345   {
 346     return offs;
 347   }
 348   offs += (*offs_imm) * elwidth;
 349
 350   return offs;
 351 }
 352
 353 // for use in predicated branches. sets bit N if val=true; clears bit N if false
 354 uint64_t sv_insn_t::rd_bitset(reg_t reg, int bit, bool set)
 355 {
 356     uint64_t val = STATE.XPR[reg];
 357     if (set) {
 358         val |= (1UL<<bit);
 359     } else {
 360         val &= ~(1UL<<bit);
 361     }
 362     STATE.XPR.write(reg, val);
 363     return val;
 364 }
 365
 366 /* called by the instruction: in scalar mode it performs the branch.
 367    in SV mode, the fact that the bxx.h even tried to call setpc is
 368    taken to mean that the compare succeeded, and save_branch_rd is
 369    used instead to accumulate that information [or the target_reg
 370    used instead, and copied into save_branch_rd]
 371
 372    at the **END** of the vector loop (back in insn_template_sv.cc)
 373    the *accumulated* results in save_branch_rd are tested to see
 374    if they *all* succeeded, and if so *then* the branch is taken.
 375
 376    TODO: the loop has to be modified to be aware of SUBVL, because
 377    only if *all* subvector elements succeed is the save_branch_rd
 378    bit allowed to be set.
 379 */
 380 void sv_insn_t::setpc(int xlen, int vlen, reg_t &npc, reg_t addr, uint64_t offs,
 381                       reg_t *target_reg, bool zeroing, bool inv)
 382 {
 383     save_branch_addr = addr;
 384     if (not at_least_one_reg_vectorised) // scalar-scalar: make the branch
 385     {
 386         _set_pc(addr);
 387         return;
 388     }
 389     if (target_reg != NULL) {
 390         reg_spec_t rs = {*target_reg, NULL};
 391         fprintf(stderr, "setpc pre rd %ld v %lx pred %lx\n",
 392                         *target_reg, p->s.READ_REG(rs), prs1);
 393     }
 394     if (target_reg) {
 395         offs = p->s.pred_remap(*target_reg, offs);
 396     }
 397     if ((1<<offs) & prs1)
 398     {
 399         if (target_reg) {
 400             save_branch_rd = rd_bitset(*target_reg, offs, !zeroing);
 401         } else {
 402             if (zeroing)
 403                 save_branch_rd &= ~(1UL<<offs);
 404             else
 405                 save_branch_rd |= (1UL<<offs);
 406         }
 407     }
 408     else if (inv) // target pred, meaning of inv bit is overloaded
 409     {
 410       vloop_continue = false;
 411     }
 412     fprintf(stderr, "setpc %lx offs %ld predicate %lx rs1 %ld rs2 %ld\n",
 413             save_branch_rd, offs, prs1,
 414             p->s.READ_REG(rs1()), p->s.READ_REG(rs2()));
 415 }
 416
 417 uint8_t sv_insn_t::reg_elwidth(reg_t reg, bool intreg)
 418 {
 419     sv_reg_entry *r = get_regentry(reg, intreg);
 420     if (r->active) {
 421         return r->elwidth;
 422     }
 423     return 0;
 424 }
 425
 426 uint64_t sv_csr_t::regpush(uint16_t csrval, int len, bool top)
 427 {
 428     // when csrval == 0 it means "pop".
 429     // when reg != 0 and an existing entry exists, it means "change reg"
 430     // when reg != 0 and an existing entry doesn't exist, it means "push"
 431     // push/pop are to top when top is true, otherwise to bottom.
 432     uint64_t reg = get_field(csrval, 0x1f);
 433     int idx = 0;
 434     int topstack = -1;
 435     for (idx = 0; idx < len; idx++)
 436     {
 437         if (sv_csrs[idx].u == 0) {
 438             topstack = idx; // used to count to end (for pop/push)
 439             break;
 440         } else if (reg != 0 && sv_csrs[idx].b.regkey == reg) {
 441             sv_csrs[idx].u = csrval; // change reg
 442             return 0; // no popping
 443         }
 444     }
 445     // ok entry not found, is reg==0, means "pop"
 446     if (csrval == 0) {
 447         uint64_t popped = 0;
 448         if (top) {
 449             if (topstack == 0) {
 450                 return 0;
 451             }
 452             topstack -= 1;
 453             uint64_t popped = sv_csrs[topstack].u;
 454             sv_csrs[topstack].u = 0;
 455             fprintf(stderr, "regcsr clr %d\n", topstack);
 456         } else {
 457             uint64_t popped = sv_csrs[0].u;
 458             for (idx = 0; idx < topstack-1; idx++) {
 459                 sv_csrs[idx].u = sv_csrs[idx+1].u;
 460                 fprintf(stderr, "regcsr clr shuffle %d %d\n", idx, idx+1);
 461             }
 462             sv_csrs[topstack].u = 0;
 463             fprintf(stderr, "regcsr clr %d\n", topstack);
 464         }
 465         return popped;
 466     }
 467     // ok entry not found, reg != 0, means "push", are we pushing to top?
 468     if (top) {
 469         if (topstack == len-1) { // not enough room
 470             uint64_t popped = sv_csrs[0].u;
 471             for (idx = 0; idx < topstack-1; idx++) {
 472                 sv_csrs[idx].u = sv_csrs[idx+1].u;
 473                 fprintf(stderr, "regcsr shuffle %d %d\n", idx, idx+1);
 474             }
 475             sv_csrs[topstack].u = csrval;
 476             fprintf(stderr, "regcsr set %d %x\n", topstack, csrval);
 477             return popped;
 478         } else {
 479             sv_csrs[topstack].u = csrval;
 480             fprintf(stderr, "regcsr set %d %x\n", topstack, csrval);
 481             return 0;
 482         }
 483     }
 484     // no, we're pushing at bottom
 485     uint64_t popped = 0;
 486     if (topstack == len-1) { // not enough room
 487         popped = sv_csrs[topstack].u; // top's going to get wiped
 488     }
 489     for (idx = topstack; idx > 0; idx--) {
 490         sv_csrs[idx].u = sv_csrs[idx-1].u;
 491     }
 492     sv_csrs[0].u = csrval; // put in at bottom
 493     return popped;
 494 }
 495