f1061106e7966001285d833db1da57cc6d5822ce
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12
13 from compalu import ComputationUnitNoDelay
14
15 from alu_hier import ALU, BranchALU
16 from nmutil.latch import SRLatch
17
18 from random import randint, seed
19 from copy import deepcopy
20
21
22 class CompUnitsBase(Elaboratable):
23 """ Computation Unit Base class.
24
25 Amazingly, this class works recursively. It's supposed to just
26 look after some ALUs (that can handle the same operations),
27 grouping them together, however it turns out that the same code
28 can also group *groups* of Computation Units together as well.
29 """
30 def __init__(self, rwid, units):
31 """ Inputs:
32
33 * :rwid: bit width of register file(s) - both FP and INT
34 * :units: sequence of ALUs (or CompUnitsBase derivatives)
35 """
36 self.units = units
37 self.rwid = rwid
38 if units and isinstance(units[0], CompUnitsBase):
39 self.n_units = 0
40 for u in self.units:
41 self.n_units += u.n_units
42 else:
43 self.n_units = len(units)
44
45 n_units = self.n_units
46
47 # inputs
48 self.issue_i = Signal(n_units, reset_less=True)
49 self.go_rd_i = Signal(n_units, reset_less=True)
50 self.go_wr_i = Signal(n_units, reset_less=True)
51 self.shadown_i = Signal(n_units, reset_less=True)
52 self.go_die_i = Signal(n_units, reset_less=True)
53
54 # outputs
55 self.busy_o = Signal(n_units, reset_less=True)
56 self.rd_rel_o = Signal(n_units, reset_less=True)
57 self.req_rel_o = Signal(n_units, reset_less=True)
58
59 # in/out register data (note: not register#, actual data)
60 self.data_o = Signal(rwid, reset_less=True)
61 self.src1_i = Signal(rwid, reset_less=True)
62 self.src2_i = Signal(rwid, reset_less=True)
63
64 def elaborate(self, platform):
65 m = Module()
66 comb = m.d.comb
67
68 for i, alu in enumerate(self.units):
69 print ("elaborate comp%d" % i, self, alu)
70 setattr(m.submodules, "comp%d" % i, alu)
71
72 go_rd_l = []
73 go_wr_l = []
74 issue_l = []
75 busy_l = []
76 req_rel_l = []
77 rd_rel_l = []
78 shadow_l = []
79 godie_l = []
80 for alu in self.units:
81 req_rel_l.append(alu.req_rel_o)
82 rd_rel_l.append(alu.rd_rel_o)
83 shadow_l.append(alu.shadown_i)
84 godie_l.append(alu.go_die_i)
85 go_wr_l.append(alu.go_wr_i)
86 go_rd_l.append(alu.go_rd_i)
87 issue_l.append(alu.issue_i)
88 busy_l.append(alu.busy_o)
89 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
90 comb += self.req_rel_o.eq(Cat(*req_rel_l))
91 comb += self.busy_o.eq(Cat(*busy_l))
92 comb += Cat(*godie_l).eq(self.go_die_i)
93 comb += Cat(*shadow_l).eq(self.shadown_i)
94 comb += Cat(*go_wr_l).eq(self.go_wr_i)
95 comb += Cat(*go_rd_l).eq(self.go_rd_i)
96 comb += Cat(*issue_l).eq(self.issue_i)
97
98 # connect data register input/output
99
100 # merge (OR) all integer FU / ALU outputs to a single value
101 # bit of a hack: treereduce needs a list with an item named "data_o"
102 if self.units:
103 data_o = treereduce(self.units)
104 comb += self.data_o.eq(data_o)
105
106 for i, alu in enumerate(self.units):
107 comb += alu.src1_i.eq(self.src1_i)
108 comb += alu.src2_i.eq(self.src2_i)
109
110 return m
111
112
113 class CompUnitALUs(CompUnitsBase):
114
115 def __init__(self, rwid):
116 """ Inputs:
117
118 * :rwid: bit width of register file(s) - both FP and INT
119 """
120
121 # Int ALUs
122 add = ALU(rwid)
123 sub = ALU(rwid)
124 mul = ALU(rwid)
125 shf = ALU(rwid)
126
127 units = []
128 for alu in [add, sub, mul, shf]:
129 units.append(ComputationUnitNoDelay(rwid, 2, alu))
130
131 print ("alu units", units)
132 CompUnitsBase.__init__(self, rwid, units)
133 print ("alu base init done")
134
135 def elaborate(self, platform):
136 print ("alu elaborate start")
137 m = CompUnitsBase.elaborate(self, platform)
138 print ("alu elaborate done")
139 comb = m.d.comb
140
141 comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
142 comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
143 comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
144 comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
145
146 return m
147
148
149 class CompUnitBR(CompUnitsBase):
150
151 def __init__(self, rwid):
152 """ Inputs:
153
154 * :rwid: bit width of register file(s) - both FP and INT
155
156 Note: bgt unit is returned so that a shadow unit can be created
157 for it
158
159 """
160
161 # Branch ALU and CU
162 self.bgt = BranchALU(rwid)
163 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
164 print ("br units", [self.br1])
165 CompUnitsBase.__init__(self, rwid, [self.br1])
166 print ("br base init done")
167
168 def elaborate(self, platform):
169 print ("br elaborate start")
170 m = CompUnitsBase.elaborate(self, platform)
171 print ("br elaborate done")
172 comb = m.d.comb
173
174 comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
175
176 return m
177
178
179 class FunctionUnits(Elaboratable):
180
181 def __init__(self, n_regs, n_int_alus):
182 self.n_regs = n_regs
183 self.n_int_alus = n_int_alus
184
185 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
186 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
187 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
188
189 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
190 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
191
192 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
193 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
194 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
195
196 self.req_rel_i = Signal(n_int_alus, reset_less = True)
197 self.readable_o = Signal(n_int_alus, reset_less=True)
198 self.writable_o = Signal(n_int_alus, reset_less=True)
199
200 self.go_rd_i = Signal(n_int_alus, reset_less=True)
201 self.go_wr_i = Signal(n_int_alus, reset_less=True)
202 self.go_die_i = Signal(n_int_alus, reset_less=True)
203 self.req_rel_o = Signal(n_int_alus, reset_less=True)
204 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
205
206 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
207
208 def elaborate(self, platform):
209 m = Module()
210 comb = m.d.comb
211 sync = m.d.sync
212
213 n_int_fus = self.n_int_alus
214
215 # Integer FU-FU Dep Matrix
216 intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
217 m.submodules.intfudeps = intfudeps
218 # Integer FU-Reg Dep Matrix
219 intregdeps = FURegDepMatrix(n_int_fus, self.n_regs)
220 m.submodules.intregdeps = intregdeps
221
222 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
223 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
224
225 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
226 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
227
228 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
229 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
230 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
231
232 comb += intfudeps.issue_i.eq(self.fn_issue_i)
233 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
234 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
235 comb += intfudeps.go_die_i.eq(self.go_die_i)
236 comb += self.readable_o.eq(intfudeps.readable_o)
237 comb += self.writable_o.eq(intfudeps.writable_o)
238
239 # Connect function issue / arrays, and dest/src1/src2
240 comb += intregdeps.dest_i.eq(self.dest_i)
241 comb += intregdeps.src1_i.eq(self.src1_i)
242 comb += intregdeps.src2_i.eq(self.src2_i)
243
244 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
245 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
246 comb += intregdeps.go_die_i.eq(self.go_die_i)
247 comb += intregdeps.issue_i.eq(self.fn_issue_i)
248
249 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
250 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
251 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
252
253 return m
254
255
256 class Scoreboard(Elaboratable):
257 def __init__(self, rwid, n_regs):
258 """ Inputs:
259
260 * :rwid: bit width of register file(s) - both FP and INT
261 * :n_regs: depth of register file(s) - number of FP and INT regs
262 """
263 self.rwid = rwid
264 self.n_regs = n_regs
265
266 # Register Files
267 self.intregs = RegFileArray(rwid, n_regs)
268 self.fpregs = RegFileArray(rwid, n_regs)
269
270 # inputs
271 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
272 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
273 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
274 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
275
276 # outputs
277 self.issue_o = Signal(reset_less=True) # instruction was accepted
278 self.busy_o = Signal(reset_less=True) # at least one CU is busy
279
280 # for branch speculation experiment. branch_direction = 0 if
281 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
282 # branch_succ and branch_fail are requests to have the current
283 # instruction be dependent on the branch unit "shadow" capability.
284 self.branch_succ_i = Signal(reset_less=True)
285 self.branch_fail_i = Signal(reset_less=True)
286 self.branch_direction_o = Signal(2, reset_less=True)
287
288 def elaborate(self, platform):
289 m = Module()
290 comb = m.d.comb
291 sync = m.d.sync
292
293 m.submodules.intregs = self.intregs
294 m.submodules.fpregs = self.fpregs
295
296 # register ports
297 int_dest = self.intregs.write_port("dest")
298 int_src1 = self.intregs.read_port("src1")
299 int_src2 = self.intregs.read_port("src2")
300
301 fp_dest = self.fpregs.write_port("dest")
302 fp_src1 = self.fpregs.read_port("src1")
303 fp_src2 = self.fpregs.read_port("src2")
304
305 # Int ALUs and Comp Units
306 n_int_alus = 5
307 cua = CompUnitALUs(self.rwid)
308 cub = CompUnitBR(self.rwid)
309 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
310 bgt = cub.bgt # get at the branch computation unit
311 br1 = cub.br1
312
313 # Int FUs
314 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
315
316 # Count of number of FUs
317 n_int_fus = n_int_alus
318 n_fp_fus = 0 # for now
319
320 # Integer Priority Picker 1: Adder + Subtractor
321 intpick1 = GroupPicker(n_int_fus) # picks between add, sub, mul and shf
322 m.submodules.intpick1 = intpick1
323
324 # INT/FP Issue Unit
325 regdecode = RegDecode(self.n_regs)
326 m.submodules.regdecode = regdecode
327 issueunit = IntFPIssueUnit(n_int_fus, n_fp_fus)
328 m.submodules.issueunit = issueunit
329
330 # Shadow Matrix. currently n_int_fus shadows, to be used for
331 # write-after-write hazards. NOTE: there is one extra for branches,
332 # so the shadow width is increased by 1
333 m.submodules.shadows = shadows = ShadowMatrix(n_int_fus, n_int_fus, True)
334 m.submodules.bshadow = bshadow = ShadowMatrix(n_int_fus, 1, False)
335
336 # record previous instruction to cast shadow on current instruction
337 fn_issue_prev = Signal(n_int_fus)
338 prev_shadow = Signal(n_int_fus)
339
340 # Branch Speculation recorder. tracks the success/fail state as
341 # each instruction is issued, so that when the branch occurs the
342 # allow/cancel can be issued as appropriate.
343 m.submodules.specrec = bspec = BranchSpeculationRecord(n_int_fus)
344
345 #---------
346 # ok start wiring things together...
347 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
348 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
349 #---------
350
351 #---------
352 # Issue Unit is where it starts. set up some in/outs for this module
353 #---------
354 comb += [ regdecode.dest_i.eq(self.int_dest_i),
355 regdecode.src1_i.eq(self.int_src1_i),
356 regdecode.src2_i.eq(self.int_src2_i),
357 regdecode.enable_i.eq(self.reg_enable_i),
358 self.issue_o.eq(issueunit.issue_o)
359 ]
360 self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
361
362 # TODO: issueunit.f (FP)
363
364 # and int function issue / busy arrays, and dest/src1/src2
365 comb += intfus.dest_i.eq(regdecode.dest_o)
366 comb += intfus.src1_i.eq(regdecode.src1_o)
367 comb += intfus.src2_i.eq(regdecode.src2_o)
368
369 fn_issue_o = issueunit.i.fn_issue_o
370
371 comb += intfus.fn_issue_i.eq(fn_issue_o)
372 comb += issueunit.i.busy_i.eq(cu.busy_o)
373 comb += self.busy_o.eq(cu.busy_o.bool())
374
375 #---------
376 # merge shadow matrices outputs
377 #---------
378
379 # these are explained in ShadowMatrix docstring, and are to be
380 # connected to the FUReg and FUFU Matrices, to get them to reset
381 anydie = Signal(n_int_fus, reset_less=True)
382 allshadown = Signal(n_int_fus, reset_less=True)
383 shreset = Signal(n_int_fus, reset_less=True)
384 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
385 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
386 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
387
388 #---------
389 # connect fu-fu matrix
390 #---------
391
392 # Group Picker... done manually for now.
393 go_rd_o = intpick1.go_rd_o
394 go_wr_o = intpick1.go_wr_o
395 go_rd_i = intfus.go_rd_i
396 go_wr_i = intfus.go_wr_i
397 go_die_i = intfus.go_die_i
398 # NOTE: connect to the shadowed versions so that they can "die" (reset)
399 comb += go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus]) # rd
400 comb += go_wr_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus]) # wr
401 comb += go_die_i[0:n_int_fus].eq(anydie[0:n_int_fus]) # die
402
403 # Connect Picker
404 #---------
405 comb += intpick1.rd_rel_i[0:n_int_fus].eq(cu.rd_rel_o[0:n_int_fus])
406 comb += intpick1.req_rel_i[0:n_int_fus].eq(cu.req_rel_o[0:n_int_fus])
407 int_rd_o = intfus.readable_o
408 int_wr_o = intfus.writable_o
409 comb += intpick1.readable_i[0:n_int_fus].eq(int_rd_o[0:n_int_fus])
410 comb += intpick1.writable_i[0:n_int_fus].eq(int_wr_o[0:n_int_fus])
411
412 #---------
413 # Shadow Matrix
414 #---------
415
416 comb += shadows.issue_i.eq(fn_issue_o)
417 #comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
418 comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
419 #---------
420 # NOTE; this setup is for the instruction order preservation...
421
422 # connect shadows / go_dies to Computation Units
423 comb += cu.shadown_i[0:n_int_fus].eq(allshadown)
424 comb += cu.go_die_i[0:n_int_fus].eq(anydie)
425
426 # ok connect first n_int_fu shadows to busy lines, to create an
427 # instruction-order linked-list-like arrangement, using a bit-matrix
428 # (instead of e.g. a ring buffer).
429 # XXX TODO
430
431 # when written, the shadow can be cancelled (and was good)
432 for i in range(n_int_fus):
433 comb += shadows.s_good_i[i][0:n_int_fus].eq(go_wr_o[0:n_int_fus])
434
435 # work out the current-activated busy unit (by recording the old one)
436 with m.If(fn_issue_o): # only update prev bit if instruction issued
437 sync += fn_issue_prev.eq(fn_issue_o)
438
439 # *previous* instruction shadows *current* instruction, and, obviously,
440 # if the previous is completed (!busy) don't cast the shadow!
441 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
442 for i in range(n_int_fus):
443 comb += shadows.shadow_i[i][0:n_int_fus].eq(prev_shadow)
444
445 #---------
446 # ... and this is for branch speculation. it uses the extra bit
447 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
448 # only needs to set shadow_i, s_fail_i and s_good_i
449
450 # issue captures shadow_i (if enabled)
451 comb += bshadow.reset_i[0:n_int_fus].eq(shreset[0:n_int_fus])
452
453 bactive = Signal(reset_less=True)
454 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
455
456 # instruction being issued (fn_issue_o) has a shadow cast by the branch
457 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
458 comb += bshadow.issue_i.eq(fn_issue_o)
459 for i in range(n_int_fus):
460 with m.If(fn_issue_o & (Const(1<<i))):
461 comb += bshadow.shadow_i[i][0].eq(1)
462
463 # finally, we need an indicator to the test infrastructure as to
464 # whether the branch succeeded or failed, plus, link up to the
465 # "recorder" of whether the instruction was under shadow or not
466
467 with m.If(br1.issue_i):
468 sync += bspec.active_i.eq(1)
469 with m.If(self.branch_succ_i):
470 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
471 with m.If(self.branch_fail_i):
472 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
473
474 # branch is active (TODO: a better signal: this is over-using the
475 # go_write signal - actually the branch should not be "writing")
476 with m.If(br1.go_wr_i):
477 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
478 sync += bspec.active_i.eq(0)
479 comb += bspec.br_i.eq(1)
480 # branch occurs if data == 1, failed if data == 0
481 comb += bspec.br_ok_i.eq(br1.data_o == 1)
482 for i in range(n_int_fus):
483 # *expected* direction of the branch matched against *actual*
484 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
485 # ... or it didn't
486 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
487
488 #---------
489 # Connect Register File(s)
490 #---------
491 print ("intregdeps wen len", len(intfus.dest_rsel_o))
492 comb += int_dest.wen.eq(intfus.dest_rsel_o)
493 comb += int_src1.ren.eq(intfus.src1_rsel_o)
494 comb += int_src2.ren.eq(intfus.src2_rsel_o)
495
496 # connect ALUs to regfule
497 comb += int_dest.data_i.eq(cu.data_o)
498 comb += cu.src1_i.eq(int_src1.data_o)
499 comb += cu.src2_i.eq(int_src2.data_o)
500
501 # connect ALU Computation Units
502 comb += cu.go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus])
503 comb += cu.go_wr_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus])
504 comb += cu.issue_i[0:n_int_fus].eq(fn_issue_o[0:n_int_fus])
505
506 return m
507
508
509 def __iter__(self):
510 yield from self.intregs
511 yield from self.fpregs
512 yield self.int_dest_i
513 yield self.int_src1_i
514 yield self.int_src2_i
515 yield self.issue_o
516 yield self.branch_succ_i
517 yield self.branch_fail_i
518 yield self.branch_direction_o
519
520 def ports(self):
521 return list(self)
522
523 IADD = 0
524 ISUB = 1
525 IMUL = 2
526 ISHF = 3
527 IBGT = 4
528 IBLT = 5
529 IBEQ = 6
530 IBNE = 7
531
532 class RegSim:
533 def __init__(self, rwidth, nregs):
534 self.rwidth = rwidth
535 self.regs = [0] * nregs
536
537 def op(self, op, src1, src2, dest):
538 maxbits = (1 << self.rwidth) - 1
539 src1 = self.regs[src1] & maxbits
540 src2 = self.regs[src2] & maxbits
541 if op == IADD:
542 val = src1 + src2
543 elif op == ISUB:
544 val = src1 - src2
545 elif op == IMUL:
546 val = src1 * src2
547 elif op == ISHF:
548 val = src1 >> (src2 & maxbits)
549 elif op == IBGT:
550 val = int(src1 > src2)
551 elif op == IBLT:
552 val = int(src1 < src2)
553 elif op == IBEQ:
554 val = int(src1 == src2)
555 elif op == IBNE:
556 val = int(src1 != src2)
557 val &= maxbits
558 self.setval(dest, val)
559 return val
560
561 def setval(self, dest, val):
562 print ("sim setval", dest, hex(val))
563 self.regs[dest] = val
564
565 def dump(self, dut):
566 for i, val in enumerate(self.regs):
567 reg = yield dut.intregs.regs[i].reg
568 okstr = "OK" if reg == val else "!ok"
569 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
570
571 def check(self, dut):
572 for i, val in enumerate(self.regs):
573 reg = yield dut.intregs.regs[i].reg
574 if reg != val:
575 print("reg %d expected %x received %x\n" % (i, val, reg))
576 yield from self.dump(dut)
577 assert False
578
579 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
580 for i in range(len(dut.int_insn_i)):
581 yield dut.int_insn_i[i].eq(0)
582 yield dut.int_dest_i.eq(dest)
583 yield dut.int_src1_i.eq(src1)
584 yield dut.int_src2_i.eq(src2)
585 yield dut.int_insn_i[op].eq(1)
586 yield dut.reg_enable_i.eq(1)
587
588 # these indicate that the instruction is to be made shadow-dependent on
589 # (either) branch success or branch fail
590 yield dut.branch_fail_i.eq(branch_fail)
591 yield dut.branch_succ_i.eq(branch_success)
592
593
594 def print_reg(dut, rnums):
595 rs = []
596 for rnum in rnums:
597 reg = yield dut.intregs.regs[rnum].reg
598 rs.append("%x" % reg)
599 rnums = map(str, rnums)
600 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
601
602
603 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
604 insts = []
605 for i in range(n_ops):
606 src1 = randint(1, dut.n_regs-1)
607 src2 = randint(1, dut.n_regs-1)
608 dest = randint(1, dut.n_regs-1)
609 op = randint(0, max_opnums)
610
611 if shadowing:
612 insts.append((src1, src2, dest, op, (0, 0)))
613 else:
614 insts.append((src1, src2, dest, op))
615 return insts
616
617
618 def wait_for_busy_clear(dut):
619 while True:
620 busy_o = yield dut.busy_o
621 if not busy_o:
622 break
623 print ("busy",)
624 yield
625
626
627 def wait_for_issue(dut):
628 while True:
629 issue_o = yield dut.issue_o
630 if issue_o:
631 for i in range(len(dut.int_insn_i)):
632 yield dut.int_insn_i[i].eq(0)
633 yield dut.reg_enable_i.eq(0)
634 break
635 #print ("busy",)
636 #yield from print_reg(dut, [1,2,3])
637 yield
638 #yield from print_reg(dut, [1,2,3])
639
640 def scoreboard_branch_sim(dut, alusim):
641
642 iseed = 3
643
644 for i in range(1):
645
646 print ("rseed", iseed)
647 seed(iseed)
648 iseed += 1
649
650 yield dut.branch_direction_o.eq(0)
651
652 # set random values in the registers
653 for i in range(1, dut.n_regs):
654 val = 31+i*3
655 val = randint(0, (1<<alusim.rwidth)-1)
656 yield dut.intregs.regs[i].reg.eq(val)
657 alusim.setval(i, val)
658
659 if False:
660 # create some instructions: branches create a tree
661 insts = create_random_ops(dut, 1, True, 1)
662 #insts.append((6, 6, 1, 2, (0, 0)))
663 #insts.append((4, 3, 3, 0, (0, 0)))
664
665 src1 = randint(1, dut.n_regs-1)
666 src2 = randint(1, dut.n_regs-1)
667 #op = randint(4, 7)
668 op = 4 # only BGT at the moment
669
670 branch_ok = create_random_ops(dut, 1, True, 1)
671 branch_fail = create_random_ops(dut, 1, True, 1)
672
673 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
674
675 if True:
676 insts = []
677 #insts.append( (3, 5, 2, 0, (0, 0)) )
678 branch_ok = []
679 branch_fail = []
680 branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
681 #branch_ok.append( None )
682 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
683 #branch_fail.append( None )
684 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
685
686 siminsts = deepcopy(insts)
687
688 # issue instruction(s)
689 i = -1
690 instrs = insts
691 branch_direction = 0
692 while instrs:
693 yield
694 yield
695 i += 1
696 branch_direction = yield dut.branch_direction_o # way branch went
697 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
698 if branch_direction == 1 and shadow_on:
699 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
700 continue # branch was "success" and this is a "failed"... skip
701 if branch_direction == 2 and shadow_off:
702 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
703 continue # branch was "fail" and this is a "success"... skip
704 if branch_direction != 0:
705 shadow_on = 0
706 shadow_off = 0
707 is_branch = op >= 4
708 if is_branch:
709 branch_ok, branch_fail = dest
710 dest = src2
711 # ok zip up the branch success / fail instructions and
712 # drop them into the queue, one marked "to have branch success"
713 # the other to be marked shadow branch "fail".
714 # one out of each of these will be cancelled
715 for ok, fl in zip(branch_ok, branch_fail):
716 if ok:
717 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
718 if fl:
719 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
720 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
721 (i, src1, src2, dest, op, shadow_on, shadow_off))
722 yield from int_instr(dut, op, src1, src2, dest,
723 shadow_on, shadow_off)
724 yield
725 yield from wait_for_issue(dut)
726
727 # wait for all instructions to stop before checking
728 yield
729 yield from wait_for_busy_clear(dut)
730
731 i = -1
732 while siminsts:
733 instr = siminsts.pop(0)
734 if instr is None:
735 continue
736 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
737 i += 1
738 is_branch = op >= 4
739 if is_branch:
740 branch_ok, branch_fail = dest
741 dest = src2
742 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
743 (i, src1, src2, dest, op, shadow_on, shadow_off))
744 branch_res = alusim.op(op, src1, src2, dest)
745 if is_branch:
746 if branch_res:
747 siminsts += branch_ok
748 else:
749 siminsts += branch_fail
750
751 # check status
752 yield from alusim.check(dut)
753 yield from alusim.dump(dut)
754
755
756 def scoreboard_sim(dut, alusim):
757
758 seed(0)
759
760 for i in range(20):
761
762 # set random values in the registers
763 for i in range(1, dut.n_regs):
764 val = 31+i*3
765 val = randint(0, (1<<alusim.rwidth)-1)
766 yield dut.intregs.regs[i].reg.eq(val)
767 alusim.setval(i, val)
768
769 # create some instructions (some random, some regression tests)
770 instrs = []
771 if True:
772 instrs = create_random_ops(dut, 10, True, 4)
773
774 if False:
775 instrs.append((2, 3, 3, 0))
776 instrs.append((5, 3, 3, 1))
777
778 if False:
779 instrs.append((5, 6, 2, 1))
780 instrs.append((2, 2, 4, 0))
781 #instrs.append((2, 2, 3, 1))
782
783 if False:
784 instrs.append((2, 1, 2, 3))
785
786 if False:
787 instrs.append((2, 6, 2, 1))
788 instrs.append((2, 1, 2, 0))
789
790 if False:
791 instrs.append((1, 2, 7, 2))
792 instrs.append((7, 1, 5, 0))
793 instrs.append((4, 4, 1, 1))
794
795 if False:
796 instrs.append((5, 6, 2, 2))
797 instrs.append((1, 1, 4, 1))
798 instrs.append((6, 5, 3, 0))
799
800 if False:
801 # Write-after-Write Hazard
802 instrs.append( (3, 6, 7, 2) )
803 instrs.append( (4, 4, 7, 1) )
804
805 if False:
806 # self-read/write-after-write followed by Read-after-Write
807 instrs.append((1, 1, 1, 1))
808 instrs.append((1, 5, 3, 0))
809
810 if False:
811 # Read-after-Write followed by self-read-after-write
812 instrs.append((5, 6, 1, 2))
813 instrs.append((1, 1, 1, 1))
814
815 if False:
816 # self-read-write sandwich
817 instrs.append((5, 6, 1, 2))
818 instrs.append((1, 1, 1, 1))
819 instrs.append((1, 5, 3, 0))
820
821 if False:
822 # very weird failure
823 instrs.append( (5, 2, 5, 2) )
824 instrs.append( (2, 6, 3, 0) )
825 instrs.append( (4, 2, 2, 1) )
826
827 if False:
828 v1 = 4
829 yield dut.intregs.regs[5].reg.eq(v1)
830 alusim.setval(5, v1)
831 yield dut.intregs.regs[3].reg.eq(5)
832 alusim.setval(3, 5)
833 instrs.append((5, 3, 3, 4, (0, 0)))
834 instrs.append((4, 2, 1, 2, (0, 1)))
835
836 if False:
837 v1 = 6
838 yield dut.intregs.regs[5].reg.eq(v1)
839 alusim.setval(5, v1)
840 yield dut.intregs.regs[3].reg.eq(5)
841 alusim.setval(3, 5)
842 instrs.append((5, 3, 3, 4, (0, 0)))
843 instrs.append((4, 2, 1, 2, (1, 0)))
844
845 if False:
846 instrs.append( (4, 3, 5, 1, (0, 0)) )
847 instrs.append( (5, 2, 3, 1, (0, 0)) )
848 instrs.append( (7, 1, 5, 2, (0, 0)) )
849 instrs.append( (5, 6, 6, 4, (0, 0)) )
850 instrs.append( (7, 5, 2, 2, (1, 0)) )
851 instrs.append( (1, 7, 5, 0, (0, 1)) )
852 instrs.append( (1, 6, 1, 2, (1, 0)) )
853 instrs.append( (1, 6, 7, 3, (0, 0)) )
854 instrs.append( (6, 7, 7, 0, (0, 0)) )
855
856 # issue instruction(s), wait for issue to be free before proceeding
857 for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
858
859 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
860 alusim.op(op, src1, src2, dest)
861 yield from int_instr(dut, op, src1, src2, dest, br_ok, br_fail)
862 yield
863 yield from wait_for_issue(dut)
864
865 # wait for all instructions to stop before checking
866 yield
867 yield from wait_for_busy_clear(dut)
868
869 # check status
870 yield from alusim.check(dut)
871 yield from alusim.dump(dut)
872
873
874 def test_scoreboard():
875 dut = Scoreboard(16, 8)
876 alusim = RegSim(16, 8)
877 vl = rtlil.convert(dut, ports=dut.ports())
878 with open("test_scoreboard6600.il", "w") as f:
879 f.write(vl)
880
881 run_simulation(dut, scoreboard_sim(dut, alusim),
882 vcd_name='test_scoreboard6600.vcd')
883
884 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
885 # vcd_name='test_scoreboard6600.vcd')
886
887
888 if __name__ == '__main__':
889 test_scoreboard()