add comment
[riscv-isa-sim.git] / riscv / sv.cc
1 #include "sv.h"
2 #include "sv_decode.h"
3 #include "processor.h"
4
5 int get_bitwidth(uint8_t elwidth, int xlen)
6 {
7 switch (elwidth) {
8 case 0: return xlen;
9 case 1: return 8;
10 case 2: return 16;
11 default: return 32;
12 }
13 }
14
15 uint8_t maxelwidth(uint8_t wid1, uint8_t wid2)
16 {
17 if (wid1 == 0 || wid2 == 0) {
18 return 0;
19 }
20 return std::max(wid1, wid2);
21 }
22
23 /* convenience routines to map from compact 8-bit to 16-bit format,
24 * for use in new VBLOCK format
25 */
26 void sv_regmap_8to16(union sv_reg_csr_entry8 const& r8,
27 union sv_reg_csr_entry &r16)
28 {
29 r16.b.regkey = r8.b.regkey;
30 r16.b.elwidth = r8.b.elwidth;
31 r16.b.type = r8.b.type;
32 r16.b.regidx = r8.b.regkey << 2; // multiply by 4, no room for 6 bits
33 r16.b.isvec = 1; // has to be a vector
34 }
35
36 void sv_predmap_8to16(union sv_pred_csr_entry8 const& r8,
37 union sv_pred_csr_entry &r16,
38 uint64_t table_idx)
39 {
40 r16.b.regkey = r8.b.regkey;
41 r16.b.zero = r8.b.zero;
42 r16.b.inv = r8.b.inv;
43 r16.b.type = r8.b.type;
44 r16.b.regidx = table_idx + 9; // 8-bit format starts at x9
45 r16.b.ffirst = 0; // no room, whoops.
46 }
47
48 /* increments the sub-offset appropriately in a FSM-based
49 version of a twin-nested for-loop:
50 for (suboffs = 0; suboffs < subvl; suboffs++) {
51 ... doooo stuuuuff (python would use "yield" here)
52 }
53 suboffs = 0; // reset to zero after "loop"
54 */
55 bool inc_offs(int vlen, int subvl, int &suboffs)
56 {
57 suboffs++;
58 if (suboffs < subvl) {
59 return false; // outer loop should not increment
60 }
61 suboffs = 0; // reset the sub-offs
62 return true; // indicates outer (VL) loop should increment
63 }
64
65 sv_insn_t::sv_insn_t(processor_t *pr, bool _sv_enabled,
66 insn_bits_t bits, unsigned int f,
67 int _xlen, int _src_flen, int _dest_flen,
68 uint64_t &p_rd, uint64_t &p_rs1, uint64_t &p_rs2, uint64_t &p_rs3,
69 uint64_t &p_sp, uint64_t *p_im,
70 int *o_rd, int *o_rs1, int *o_rs2, int *o_rs3, int *o_sp,
71 int *o_imm,
72 int *s_offs,
73 bool _sign) :
74 insn_t(bits), p(pr), src_bitwidth(0),
75 xlen(_xlen), src_flen(_src_flen), dest_flen(_dest_flen),
76 sv_enabled(_sv_enabled), signextended(_sign),
77 vloop_continue(false),
78 at_least_one_reg_vectorised(false), fimap(f),
79 offs_rd(o_rd), offs_rs1(o_rs1), offs_rs2(o_rs2), offs_rs3(o_rs3),
80 offs_sp(o_sp),
81 offs_imm(o_imm),
82 suboffs(s_offs),
83 prd(p_rd), prs1(p_rs1), prs2(p_rs2), prs3(p_rs3), psp(p_sp),
84 save_branch_addr(0)
85 {
86 // work out the source element width based on what is used
87 // note that this has to match with id_regs.py patterns
88
89 unsigned int bm=2;
90 for (int i = 1; i < 12; i++, bm<<=1)
91 {
92 sv_reg_entry* r = NULL;
93 if (bm == (REG_RS1 & fimap)) {
94 r = get_regentry(insn_t::rs1(), true);
95 } else if (bm == (REG_RS2 & fimap)) {
96 r = get_regentry(insn_t::rs2(), true);
97 } else if (bm == (REG_RS3 & fimap)) {
98 r = get_regentry(insn_t::rs3(), true);
99 } else if (bm == (REG_RVC_RS1 & fimap)) {
100 r = get_regentry(insn_t::rvc_rs1(), true);
101 } else if (bm == (REG_RVC_RS2 & fimap)) {
102 r = get_regentry(insn_t::rvc_rs2(), true);
103 } else if (bm == (REG_RVC_RS1S & fimap)) {
104 r = get_regentry(insn_t::rvc_rs1s(), true);
105 } else if (bm == (REG_RVC_RS2S & fimap)) {
106 r = get_regentry(insn_t::rvc_rs2s(), true);
107 } else if (bm == (REG_FRS1 & fimap)) {
108 r = get_regentry(insn_t::rs1(), false);
109 } else if (bm == (REG_FRS2 & fimap)) {
110 r = get_regentry(insn_t::rs2(), false);
111 } else if (bm == (REG_FRS3 & fimap)) {
112 r = get_regentry(insn_t::rs3(), false);
113 }
114 if (r == NULL || !r->active) {
115 continue;
116 }
117 uint8_t elwidth = r->elwidth;
118 uint8_t bitwidth = get_bitwidth(elwidth, _xlen);
119 src_bitwidth = std::max(src_bitwidth, bitwidth);
120 }
121 }
122
123 sv_pred_entry* sv_insn_t::get_predentry(uint64_t reg, bool intreg)
124 {
125 // okaay so first determine which map to use. intreg is passed
126 // in (ultimately) from id_regs.py's examination of the use of
127 // FRS1/RS1, WRITE_FRD/WRITE_RD, which in turn gets passed
128 // in from sv_insn_t::fimap...
129 sv_pred_entry *r;
130 if (intreg)
131 {
132 return &p->get_state()->sv().sv_pred_int_tb[reg];
133 }
134 else
135 {
136 return &p->get_state()->sv().sv_pred_fp_tb[reg];
137 }
138 }
139
140 sv_reg_entry* sv_insn_t::get_regentry(uint64_t reg, bool intreg)
141 {
142 // okaay so first determine which map to use. intreg is passed
143 // in (ultimately) from id_regs.py's examination of the use of
144 // FRS1/RS1, WRITE_FRD/WRITE_RD, which in turn gets passed
145 // in from sv_insn_t::fimap...
146 sv_reg_entry *r;
147 if (intreg)
148 {
149 return &p->get_state()->sv().sv_int_tb[reg];
150 }
151 else
152 {
153 return &p->get_state()->sv().sv_fp_tb[reg];
154 }
155 }
156
157 bool sv_insn_t::sv_check_reg(bool intreg, uint64_t reg)
158 {
159 sv_reg_entry *r = get_regentry(reg, intreg);
160 if (r->elwidth != 0)
161 {
162 // XXX raise exception
163 }
164 if (r->active && r->isvec)
165 {
166 fprintf(stderr, "checkreg: %ld active isvec\n", reg);
167 at_least_one_reg_vectorised = true;
168 return true;
169 }
170 if (r->active)
171 {
172 fprintf(stderr, "checkreg: %ld active !vec\n", reg);
173 }
174 return false;
175 }
176
177 /* this is the "remap" function. note that registers can STILL BE REDIRECTED
178 * yet NOT BE MARKED AS A VECTOR.
179 *
180 * reg 5 -> active=false, regidx=XX, isvec=XX -> returns 5
181 * reg 5 -> active=true , regidx=35, isvec=false -> returns 35
182 * reg 5 -> active=true , regidx=35, isvec=true -> returns 35 *PLUS LOOP*
183 *
184 * so it is possible for example to use the remap system for C instructions
185 * to get access to the *full* range of registers x0..x63 (yes 63 because
186 * SV doubles both the int and fp regfile sizes), by setting
187 * "active=true, isvec=false" for any of x8..x15
188 *
189 * where "active=true, isvec=true" this is the "expected" behaviour
190 * of SV. it's "supposed" to "just" be a vectorisation API. it isn't:
191 * it's quite a bit more.
192 */
193 reg_spec_t sv_insn_t::remap(uint64_t reg, bool intreg, int *voffs, int *subo)
194 {
195 reg_spec_t spec = {reg, NULL};
196 // okaay so first determine which map to use. intreg is passed
197 // in (ultimately) from id_regs.py's examination of the use of
198 // FRS1/RS1, WRITE_FRD/WRITE_RD, which in turn gets passed
199 // in from sv_insn_t::fimap...
200 sv_reg_entry *r = get_regentry(reg, intreg);
201
202 // next we check if this entry is active. if not, the register
203 // is not being "redirected", so just return the actual reg.
204 if (!r->active)
205 {
206 return spec; // not active: return as-is
207 }
208 vloop_continue = true;
209
210 // next we go through the lookup table. *THIS* is why the
211 // sv_reg_entry table is 32 entries (5-bit) *NOT* 6 bits
212 // the *KEY* (reg) is 5-bit, the *VALUE* (actual target reg) is 6-bit
213 // XXX TODO: must actually double NXPR and NXFR in processor.h to cope!!
214 reg = r->regidx;
215
216 // now we determine if this is a scalar/vector: if it's scalar
217 // we return the re-mapped register...
218 #if 0
219 if (!r->isvec) // scalar
220 {
221 return spec;
222 }
223 #endif
224 vloop_continue = true;
225
226 // aaand now, as it's a "vector", FINALLY we can pass the loop-offset
227 spec.reg = reg; //+ *voffs;
228 spec.offset = voffs;
229 spec.suboff = subo;
230 spec.isvec = r->isvec;
231 spec.signextend = signextended;
232 return spec;
233 }
234
235 /* gets the predication value (if active). returns all-1s if not active
236 * also returns whether zeroing is enabled/disabled for this register.
237 *
238 * uses the same sort of lookup logic as remap:
239 *
240 * - first thing to note is, there is one CSR table for FP and one for INT
241 * (so, FP regs can be predicated separately from INT ones)
242 * - redirection occurs if the CSR entry for the register is "active".
243 * - inversion of the predication can be set (so it's possible to have
244 * the same actual register value be unchanged yet be referred to by
245 * *TWO* redirections, one with inversion, one with not).
246 *
247 * note that this function *actually* returns the value of the (integer)
248 * register file, hence why processor_t has to be passed in
249 *
250 * note also that *even scalar* ops will be predicated (i.e. if a register
251 * has been set active=true and isvec=false in sv_int_tb or sv_fp_tb).
252 * the way to ensure that scalar ops are not predicated is: set VLEN=0,
253 * set active=false in sv_int_tb/sv_fp_tb for that register, or switch off
254 * the predication for that register (sv_pred_int_tb/sv_pred_fb_tb).
255 *
256 * note also that the hard limit on SV maximum vector length is actually
257 * down to the number of bits in the predication i.e. the bitwidth of integer
258 * registers (i.e. XLEN bits).
259 */
260 reg_t sv_insn_t::predicate(uint64_t reg, bool intreg, bool &zeroing)
261 {
262 sv_reg_entry *pr = get_regentry(reg, intreg);
263 if (!pr->active)
264 {
265 return ~0x0; // *REGISTER* not active: return all-1s (unconditional "on")
266 }
267 sv_pred_entry *r = get_predentry(reg, intreg);
268 if (!r->active)
269 {
270 return ~0x0; // *PREDICATION* not active: return all-1s (unconditional "on")
271 }
272 zeroing = r->zero;
273 fprintf(stderr, "predicate read %ld -> %ld\n", reg, r->regidx);
274 reg = r->regidx;
275 reg_spec_t rs = {reg, NULL};
276 reg_t pred = p->s.READ_REG(rs); // macros go through processor_t state
277 if (r->inv)
278 {
279 return ~pred;
280 }
281 return pred;
282 }
283
284 // XXX WARNING: this fn does NOT invert the predicate (if r->inv return ~pred)
285 reg_t sv_insn_t::predicate(uint64_t reg, bool intreg, bool &zeroing, bool &inv)
286 {
287 sv_reg_entry *pr = get_regentry(reg, intreg);
288 if (!pr->active)
289 {
290 return ~0x0; // *REGISTER* not active: return all-1s (unconditional "on")
291 }
292 sv_pred_entry *r = get_predentry(reg, intreg);
293 if (!r->active)
294 {
295 return ~0x0; // *PREDICATION* not active: return all-1s (unconditional "on")
296 }
297 zeroing = r->zero;
298 inv = r->inv;
299 fprintf(stderr, "predicate read %ld -> %ld\n", reg, r->regidx);
300 reg = r->regidx;
301 reg_spec_t rs = {reg, NULL};
302 reg_t predicate = p->s.READ_REG(rs); // macros go through processor_t state
303 return predicate;
304 }
305
306 reg_spec_t sv_insn_t::predicated(reg_spec_t const& spec, uint64_t pred)
307 {
308 reg_spec_t res = spec;
309 if (spec.offset == NULL)
310 {
311 return res;
312 }
313 if (pred & (1<<p->s.pred_remap(res.reg, *spec.offset)))
314 {
315 return res;
316 }
317 fprintf(stderr, "predication %ld %d %lx\n", spec.reg, (*spec.offset), pred);
318 res.reg = 0;
319 res.offset = 0;
320 res.suboff = 0;
321 res.isvec = spec.isvec;
322 return res;
323 }
324
325 bool sv_insn_t::stop_vloop(void)
326 {
327 return (p->get_state()->sv().vl == 0) || !vloop_continue;
328 }
329
330
331 /* c_lwsp's immediate offset is turned into a Vector "unit stride" if
332 * x2 (sp by convention) is marked as vectorised.
333 *
334 */
335 uint64_t sv_insn_t::_rvc_spoffs_imm(uint64_t elwidth, uint64_t offs)
336 {
337 sv_reg_entry *r = get_regentry(X_SP, 1);
338 if (!r->active)
339 {
340 return offs;
341 }
342 vloop_continue = true;
343 reg_t reg = r->regidx;
344 if (!r->isvec)
345 {
346 return offs;
347 }
348 offs += (*offs_imm) * elwidth;
349
350 return offs;
351 }
352
353 // for use in predicated branches. sets bit N if val=true; clears bit N if false
354 uint64_t sv_insn_t::rd_bitset(reg_t reg, int bit, bool set)
355 {
356 uint64_t val = STATE.XPR[reg];
357 if (set) {
358 val |= (1UL<<bit);
359 } else {
360 val &= ~(1UL<<bit);
361 }
362 STATE.XPR.write(reg, val);
363 return val;
364 }
365
366 /* called by the instruction: in scalar mode it performs the branch.
367 in SV mode, the fact that the bxx.h even tried to call setpc is
368 taken to mean that the compare succeeded, and save_branch_rd is
369 used instead to accumulate that information [or the target_reg
370 used instead, and copied into save_branch_rd]
371
372 at the **END** of the vector loop (back in insn_template_sv.cc)
373 the *accumulated* results in save_branch_rd are tested to see
374 if they *all* succeeded, and if so *then* the branch is taken.
375
376 TODO: the loop has to be modified to be aware of SUBVL, because
377 only if *all* subvector elements succeed is the save_branch_rd
378 bit allowed to be set.
379 */
380 void sv_insn_t::setpc(int xlen, int vlen, reg_t &npc, reg_t addr, uint64_t offs,
381 reg_t *target_reg, bool zeroing, bool inv)
382 {
383 save_branch_addr = addr;
384 if (not at_least_one_reg_vectorised) // scalar-scalar: make the branch
385 {
386 _set_pc(addr);
387 return;
388 }
389 if (target_reg != NULL) {
390 reg_spec_t rs = {*target_reg, NULL};
391 fprintf(stderr, "setpc pre rd %ld v %lx pred %lx\n",
392 *target_reg, p->s.READ_REG(rs), prs1);
393 }
394 if (target_reg) {
395 offs = p->s.pred_remap(*target_reg, offs);
396 }
397 if ((1<<offs) & prs1)
398 {
399 if (target_reg) {
400 save_branch_rd = rd_bitset(*target_reg, offs, !zeroing);
401 } else {
402 if (zeroing)
403 save_branch_rd &= ~(1UL<<offs);
404 else
405 save_branch_rd |= (1UL<<offs);
406 }
407 }
408 else if (inv) // target pred, meaning of inv bit is overloaded
409 {
410 vloop_continue = false;
411 }
412 fprintf(stderr, "setpc %lx offs %ld predicate %lx rs1 %ld rs2 %ld\n",
413 save_branch_rd, offs, prs1,
414 p->s.READ_REG(rs1()), p->s.READ_REG(rs2()));
415 }
416
417 uint8_t sv_insn_t::reg_elwidth(reg_t reg, bool intreg)
418 {
419 sv_reg_entry *r = get_regentry(reg, intreg);
420 if (r->active) {
421 return r->elwidth;
422 }
423 return 0;
424 }
425
426 uint64_t sv_csr_t::regpush(uint16_t csrval, int len, bool top)
427 {
428 // when csrval == 0 it means "pop".
429 // when reg != 0 and an existing entry exists, it means "change reg"
430 // when reg != 0 and an existing entry doesn't exist, it means "push"
431 // push/pop are to top when top is true, otherwise to bottom.
432 uint64_t reg = get_field(csrval, 0x1f);
433 int idx = 0;
434 int topstack = -1;
435 for (idx = 0; idx < len; idx++)
436 {
437 if (sv_csrs[idx].u == 0) {
438 topstack = idx; // used to count to end (for pop/push)
439 break;
440 } else if (reg != 0 && sv_csrs[idx].b.regkey == reg) {
441 sv_csrs[idx].u = csrval; // change reg
442 return 0; // no popping
443 }
444 }
445 // ok entry not found, is reg==0, means "pop"
446 if (csrval == 0) {
447 uint64_t popped = 0;
448 if (top) {
449 if (topstack == 0) {
450 return 0;
451 }
452 topstack -= 1;
453 uint64_t popped = sv_csrs[topstack].u;
454 sv_csrs[topstack].u = 0;
455 fprintf(stderr, "regcsr clr %d\n", topstack);
456 } else {
457 uint64_t popped = sv_csrs[0].u;
458 for (idx = 0; idx < topstack-1; idx++) {
459 sv_csrs[idx].u = sv_csrs[idx+1].u;
460 fprintf(stderr, "regcsr clr shuffle %d %d\n", idx, idx+1);
461 }
462 sv_csrs[topstack].u = 0;
463 fprintf(stderr, "regcsr clr %d\n", topstack);
464 }
465 return popped;
466 }
467 // ok entry not found, reg != 0, means "push", are we pushing to top?
468 if (top) {
469 if (topstack == len-1) { // not enough room
470 uint64_t popped = sv_csrs[0].u;
471 for (idx = 0; idx < topstack-1; idx++) {
472 sv_csrs[idx].u = sv_csrs[idx+1].u;
473 fprintf(stderr, "regcsr shuffle %d %d\n", idx, idx+1);
474 }
475 sv_csrs[topstack].u = csrval;
476 fprintf(stderr, "regcsr set %d %x\n", topstack, csrval);
477 return popped;
478 } else {
479 sv_csrs[topstack].u = csrval;
480 fprintf(stderr, "regcsr set %d %x\n", topstack, csrval);
481 return 0;
482 }
483 }
484 // no, we're pushing at bottom
485 uint64_t popped = 0;
486 if (topstack == len-1) { // not enough room
487 popped = sv_csrs[topstack].u; // top's going to get wiped
488 }
489 for (idx = topstack; idx > 0; idx--) {
490 sv_csrs[idx].u = sv_csrs[idx-1].u;
491 }
492 sv_csrs[0].u = csrval; // put in at bottom
493 return popped;
494 }
495