5 int get_bitwidth(uint8_t elwidth
, int xlen
)
15 uint8_t maxelwidth(uint8_t wid1
, uint8_t wid2
)
17 if (wid1
== 0 || wid2
== 0) {
20 return std::max(wid1
, wid2
);
23 /* convenience routines to map from compact 8-bit to 16-bit format,
24 * for use in new VBLOCK format
26 void sv_regmap_8to16(union sv_reg_csr_entry8
const& r8
,
27 union sv_reg_csr_entry
&r16
)
29 r16
.b
.regkey
= r8
.b
.regkey
;
30 r16
.b
.elwidth
= r8
.b
.elwidth
;
31 r16
.b
.type
= r8
.b
.type
;
32 r16
.b
.regidx
= r8
.b
.regkey
<< 2; // multiply by 4, no room for 6 bits
33 r16
.b
.isvec
= 1; // has to be a vector
36 void sv_predmap_8to16(union sv_pred_csr_entry8
const& r8
,
37 union sv_pred_csr_entry
&r16
,
40 r16
.b
.regkey
= r8
.b
.regkey
;
41 r16
.b
.zero
= r8
.b
.zero
;
43 r16
.b
.type
= r8
.b
.type
;
44 r16
.b
.regidx
= table_idx
+ 9; // 8-bit format starts at x9
45 r16
.b
.ffirst
= 0; // no room, whoops.
48 /* increments the sub-offset appropriately in a FSM-based
49 version of a twin-nested for-loop:
50 for (suboffs = 0; suboffs < subvl; suboffs++) {
51 ... doooo stuuuuff (python would use "yield" here)
53 suboffs = 0; // reset to zero after "loop"
55 bool inc_offs(int vlen
, int subvl
, int &suboffs
)
58 if (suboffs
< subvl
) {
59 return false; // outer loop should not increment
61 suboffs
= 0; // reset the sub-offs
62 return true; // indicates outer (VL) loop should increment
65 sv_insn_t::sv_insn_t(processor_t
*pr
, bool _sv_enabled
,
66 insn_bits_t bits
, unsigned int f
,
67 int _xlen
, int _src_flen
, int _dest_flen
,
68 uint64_t &p_rd
, uint64_t &p_rs1
, uint64_t &p_rs2
, uint64_t &p_rs3
,
69 uint64_t &p_sp
, uint64_t *p_im
,
70 int *o_rd
, int *o_rs1
, int *o_rs2
, int *o_rs3
, int *o_sp
,
74 insn_t(bits
), p(pr
), src_bitwidth(0),
75 xlen(_xlen
), src_flen(_src_flen
), dest_flen(_dest_flen
),
76 sv_enabled(_sv_enabled
), signextended(_sign
),
77 vloop_continue(false),
78 at_least_one_reg_vectorised(false), fimap(f
),
79 offs_rd(o_rd
), offs_rs1(o_rs1
), offs_rs2(o_rs2
), offs_rs3(o_rs3
),
83 prd(p_rd
), prs1(p_rs1
), prs2(p_rs2
), prs3(p_rs3
), psp(p_sp
),
86 // work out the source element width based on what is used
87 // note that this has to match with id_regs.py patterns
90 for (int i
= 1; i
< 12; i
++, bm
<<=1)
92 sv_reg_entry
* r
= NULL
;
93 if (bm
== (REG_RS1
& fimap
)) {
94 r
= get_regentry(insn_t::rs1(), true);
95 } else if (bm
== (REG_RS2
& fimap
)) {
96 r
= get_regentry(insn_t::rs2(), true);
97 } else if (bm
== (REG_RS3
& fimap
)) {
98 r
= get_regentry(insn_t::rs3(), true);
99 } else if (bm
== (REG_RVC_RS1
& fimap
)) {
100 r
= get_regentry(insn_t::rvc_rs1(), true);
101 } else if (bm
== (REG_RVC_RS2
& fimap
)) {
102 r
= get_regentry(insn_t::rvc_rs2(), true);
103 } else if (bm
== (REG_RVC_RS1S
& fimap
)) {
104 r
= get_regentry(insn_t::rvc_rs1s(), true);
105 } else if (bm
== (REG_RVC_RS2S
& fimap
)) {
106 r
= get_regentry(insn_t::rvc_rs2s(), true);
107 } else if (bm
== (REG_FRS1
& fimap
)) {
108 r
= get_regentry(insn_t::rs1(), false);
109 } else if (bm
== (REG_FRS2
& fimap
)) {
110 r
= get_regentry(insn_t::rs2(), false);
111 } else if (bm
== (REG_FRS3
& fimap
)) {
112 r
= get_regentry(insn_t::rs3(), false);
114 if (r
== NULL
|| !r
->active
) {
117 uint8_t elwidth
= r
->elwidth
;
118 uint8_t bitwidth
= get_bitwidth(elwidth
, _xlen
);
119 src_bitwidth
= std::max(src_bitwidth
, bitwidth
);
123 sv_pred_entry
* sv_insn_t::get_predentry(uint64_t reg
, bool intreg
)
125 // okaay so first determine which map to use. intreg is passed
126 // in (ultimately) from id_regs.py's examination of the use of
127 // FRS1/RS1, WRITE_FRD/WRITE_RD, which in turn gets passed
128 // in from sv_insn_t::fimap...
132 return &p
->get_state()->sv().sv_pred_int_tb
[reg
];
136 return &p
->get_state()->sv().sv_pred_fp_tb
[reg
];
140 sv_reg_entry
* sv_insn_t::get_regentry(uint64_t reg
, bool intreg
)
142 // okaay so first determine which map to use. intreg is passed
143 // in (ultimately) from id_regs.py's examination of the use of
144 // FRS1/RS1, WRITE_FRD/WRITE_RD, which in turn gets passed
145 // in from sv_insn_t::fimap...
149 return &p
->get_state()->sv().sv_int_tb
[reg
];
153 return &p
->get_state()->sv().sv_fp_tb
[reg
];
157 bool sv_insn_t::sv_check_reg(bool intreg
, uint64_t reg
)
159 sv_reg_entry
*r
= get_regentry(reg
, intreg
);
162 // XXX raise exception
164 if (r
->active
&& r
->isvec
)
166 fprintf(stderr
, "checkreg: %ld active isvec\n", reg
);
167 at_least_one_reg_vectorised
= true;
172 fprintf(stderr
, "checkreg: %ld active !vec\n", reg
);
177 /* this is the "remap" function. note that registers can STILL BE REDIRECTED
178 * yet NOT BE MARKED AS A VECTOR.
180 * reg 5 -> active=false, regidx=XX, isvec=XX -> returns 5
181 * reg 5 -> active=true , regidx=35, isvec=false -> returns 35
182 * reg 5 -> active=true , regidx=35, isvec=true -> returns 35 *PLUS LOOP*
184 * so it is possible for example to use the remap system for C instructions
185 * to get access to the *full* range of registers x0..x63 (yes 63 because
186 * SV doubles both the int and fp regfile sizes), by setting
187 * "active=true, isvec=false" for any of x8..x15
189 * where "active=true, isvec=true" this is the "expected" behaviour
190 * of SV. it's "supposed" to "just" be a vectorisation API. it isn't:
191 * it's quite a bit more.
193 reg_spec_t
sv_insn_t::remap(uint64_t reg
, bool intreg
, int *voffs
, int *subo
)
195 reg_spec_t spec
= {reg
, NULL
};
196 // okaay so first determine which map to use. intreg is passed
197 // in (ultimately) from id_regs.py's examination of the use of
198 // FRS1/RS1, WRITE_FRD/WRITE_RD, which in turn gets passed
199 // in from sv_insn_t::fimap...
200 sv_reg_entry
*r
= get_regentry(reg
, intreg
);
202 // next we check if this entry is active. if not, the register
203 // is not being "redirected", so just return the actual reg.
206 return spec
; // not active: return as-is
208 vloop_continue
= true;
210 // next we go through the lookup table. *THIS* is why the
211 // sv_reg_entry table is 32 entries (5-bit) *NOT* 6 bits
212 // the *KEY* (reg) is 5-bit, the *VALUE* (actual target reg) is 6-bit
213 // XXX TODO: must actually double NXPR and NXFR in processor.h to cope!!
216 // now we determine if this is a scalar/vector: if it's scalar
217 // we return the re-mapped register...
219 if (!r
->isvec
) // scalar
224 vloop_continue
= true;
226 // aaand now, as it's a "vector", FINALLY we can pass the loop-offset
227 spec
.reg
= reg
; //+ *voffs;
230 spec
.isvec
= r
->isvec
;
231 spec
.signextend
= signextended
;
235 /* gets the predication value (if active). returns all-1s if not active
236 * also returns whether zeroing is enabled/disabled for this register.
238 * uses the same sort of lookup logic as remap:
240 * - first thing to note is, there is one CSR table for FP and one for INT
241 * (so, FP regs can be predicated separately from INT ones)
242 * - redirection occurs if the CSR entry for the register is "active".
243 * - inversion of the predication can be set (so it's possible to have
244 * the same actual register value be unchanged yet be referred to by
245 * *TWO* redirections, one with inversion, one with not).
247 * note that this function *actually* returns the value of the (integer)
248 * register file, hence why processor_t has to be passed in
250 * note also that *even scalar* ops will be predicated (i.e. if a register
251 * has been set active=true and isvec=false in sv_int_tb or sv_fp_tb).
252 * the way to ensure that scalar ops are not predicated is: set VLEN=0,
253 * set active=false in sv_int_tb/sv_fp_tb for that register, or switch off
254 * the predication for that register (sv_pred_int_tb/sv_pred_fb_tb).
256 * note also that the hard limit on SV maximum vector length is actually
257 * down to the number of bits in the predication i.e. the bitwidth of integer
258 * registers (i.e. XLEN bits).
260 reg_t
sv_insn_t::predicate(uint64_t reg
, bool intreg
, bool &zeroing
)
262 sv_reg_entry
*pr
= get_regentry(reg
, intreg
);
265 return ~0x0; // *REGISTER* not active: return all-1s (unconditional "on")
267 sv_pred_entry
*r
= get_predentry(reg
, intreg
);
270 return ~0x0; // *PREDICATION* not active: return all-1s (unconditional "on")
273 fprintf(stderr
, "predicate read %ld -> %ld\n", reg
, r
->regidx
);
275 reg_spec_t rs
= {reg
, NULL
};
276 reg_t pred
= p
->s
.READ_REG(rs
); // macros go through processor_t state
284 // XXX WARNING: this fn does NOT invert the predicate (if r->inv return ~pred)
285 reg_t
sv_insn_t::predicate(uint64_t reg
, bool intreg
, bool &zeroing
, bool &inv
)
287 sv_reg_entry
*pr
= get_regentry(reg
, intreg
);
290 return ~0x0; // *REGISTER* not active: return all-1s (unconditional "on")
292 sv_pred_entry
*r
= get_predentry(reg
, intreg
);
295 return ~0x0; // *PREDICATION* not active: return all-1s (unconditional "on")
299 fprintf(stderr
, "predicate read %ld -> %ld\n", reg
, r
->regidx
);
301 reg_spec_t rs
= {reg
, NULL
};
302 reg_t predicate
= p
->s
.READ_REG(rs
); // macros go through processor_t state
306 reg_spec_t
sv_insn_t::predicated(reg_spec_t
const& spec
, uint64_t pred
)
308 reg_spec_t res
= spec
;
309 if (spec
.offset
== NULL
)
313 if (pred
& (1<<p
->s
.pred_remap(res
.reg
, *spec
.offset
)))
317 fprintf(stderr
, "predication %ld %d %lx\n", spec
.reg
, (*spec
.offset
), pred
);
321 res
.isvec
= spec
.isvec
;
325 bool sv_insn_t::stop_vloop(void)
327 return (p
->get_state()->sv().vl
== 0) || !vloop_continue
;
331 /* c_lwsp's immediate offset is turned into a Vector "unit stride" if
332 * x2 (sp by convention) is marked as vectorised.
335 uint64_t sv_insn_t::_rvc_spoffs_imm(uint64_t elwidth
, uint64_t offs
)
337 sv_reg_entry
*r
= get_regentry(X_SP
, 1);
342 vloop_continue
= true;
343 reg_t reg
= r
->regidx
;
348 offs
+= (*offs_imm
) * elwidth
;
353 // for use in predicated branches. sets bit N if val=true; clears bit N if false
354 uint64_t sv_insn_t::rd_bitset(reg_t reg
, int bit
, bool set
)
356 uint64_t val
= STATE
.XPR
[reg
];
362 STATE
.XPR
.write(reg
, val
);
366 /* called by the instruction: in scalar mode it performs the branch.
367 in SV mode, the fact that the bxx.h even tried to call setpc is
368 taken to mean that the compare succeeded, and save_branch_rd is
369 used instead to accumulate that information [or the target_reg
370 used instead, and copied into save_branch_rd]
372 at the **END** of the vector loop (back in insn_template_sv.cc)
373 the *accumulated* results in save_branch_rd are tested to see
374 if they *all* succeeded, and if so *then* the branch is taken.
376 TODO: the loop has to be modified to be aware of SUBVL, because
377 only if *all* subvector elements succeed is the save_branch_rd
378 bit allowed to be set.
380 void sv_insn_t::setpc(int xlen
, int vlen
, reg_t
&npc
, reg_t addr
, uint64_t offs
,
381 reg_t
*target_reg
, bool zeroing
, bool inv
)
383 save_branch_addr
= addr
;
384 if (not at_least_one_reg_vectorised
) // scalar-scalar: make the branch
389 if (target_reg
!= NULL
) {
390 reg_spec_t rs
= {*target_reg
, NULL
};
391 fprintf(stderr
, "setpc pre rd %ld v %lx pred %lx\n",
392 *target_reg
, p
->s
.READ_REG(rs
), prs1
);
395 offs
= p
->s
.pred_remap(*target_reg
, offs
);
397 if ((1<<offs
) & prs1
)
400 save_branch_rd
= rd_bitset(*target_reg
, offs
, !zeroing
);
403 save_branch_rd
&= ~(1UL<<offs
);
405 save_branch_rd
|= (1UL<<offs
);
408 else if (inv
) // target pred, meaning of inv bit is overloaded
410 vloop_continue
= false;
412 fprintf(stderr
, "setpc %lx offs %ld predicate %lx rs1 %ld rs2 %ld\n",
413 save_branch_rd
, offs
, prs1
,
414 p
->s
.READ_REG(rs1()), p
->s
.READ_REG(rs2()));
417 uint8_t sv_insn_t::reg_elwidth(reg_t reg
, bool intreg
)
419 sv_reg_entry
*r
= get_regentry(reg
, intreg
);
426 uint64_t sv_csr_t::regpush(uint16_t csrval
, int len
, bool top
)
428 // when csrval == 0 it means "pop".
429 // when reg != 0 and an existing entry exists, it means "change reg"
430 // when reg != 0 and an existing entry doesn't exist, it means "push"
431 // push/pop are to top when top is true, otherwise to bottom.
432 uint64_t reg
= get_field(csrval
, 0x1f);
435 for (idx
= 0; idx
< len
; idx
++)
437 if (sv_csrs
[idx
].u
== 0) {
438 topstack
= idx
; // used to count to end (for pop/push)
440 } else if (reg
!= 0 && sv_csrs
[idx
].b
.regkey
== reg
) {
441 sv_csrs
[idx
].u
= csrval
; // change reg
442 return 0; // no popping
445 // ok entry not found, is reg==0, means "pop"
453 uint64_t popped
= sv_csrs
[topstack
].u
;
454 sv_csrs
[topstack
].u
= 0;
455 fprintf(stderr
, "regcsr clr %d\n", topstack
);
457 uint64_t popped
= sv_csrs
[0].u
;
458 for (idx
= 0; idx
< topstack
-1; idx
++) {
459 sv_csrs
[idx
].u
= sv_csrs
[idx
+1].u
;
460 fprintf(stderr
, "regcsr clr shuffle %d %d\n", idx
, idx
+1);
462 sv_csrs
[topstack
].u
= 0;
463 fprintf(stderr
, "regcsr clr %d\n", topstack
);
467 // ok entry not found, reg != 0, means "push", are we pushing to top?
469 if (topstack
== len
-1) { // not enough room
470 uint64_t popped
= sv_csrs
[0].u
;
471 for (idx
= 0; idx
< topstack
-1; idx
++) {
472 sv_csrs
[idx
].u
= sv_csrs
[idx
+1].u
;
473 fprintf(stderr
, "regcsr shuffle %d %d\n", idx
, idx
+1);
475 sv_csrs
[topstack
].u
= csrval
;
476 fprintf(stderr
, "regcsr set %d %x\n", topstack
, csrval
);
479 sv_csrs
[topstack
].u
= csrval
;
480 fprintf(stderr
, "regcsr set %d %x\n", topstack
, csrval
);
484 // no, we're pushing at bottom
486 if (topstack
== len
-1) { // not enough room
487 popped
= sv_csrs
[topstack
].u
; // top's going to get wiped
489 for (idx
= topstack
; idx
> 0; idx
--) {
490 sv_csrs
[idx
].u
= sv_csrs
[idx
-1].u
;
492 sv_csrs
[0].u
= csrval
; // put in at bottom