add docstring
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12
13 from compalu import ComputationUnitNoDelay
14
15 from alu_hier import ALU, BranchALU
16 from nmutil.latch import SRLatch
17
18 from random import randint, seed
19 from copy import deepcopy
20
21
22 class CompUnitsBase(Elaboratable):
23 """ Computation Unit Base class.
24
25 Amazingly, this class works recursively. It's supposed to just
26 look after some ALUs (that can handle the same operations),
27 grouping them together, however it turns out that the same code
28 can also group *groups* of Computation Units together as well.
29
30 Basically it was intended just to concatenate the ALU's issue,
31 go_rd etc. signals together, which start out as bits and become
32 sequences. Turns out that the same trick works just as well
33 on Computation Units!
34
35 So this class may be used recursively to present a top-level
36 sequential concatenation of all the signals in and out of
37 ALUs, whilst at the same time making it convenient to group
38 ALUs together.
39
40 At the lower level, the intent is that groups of (identical)
41 ALUs may be passed the same operation. Even beyond that,
42 the intent is that that group of (identical) ALUs actually
43 share the *same pipeline* and as such become a "Concurrent
44 Computation Unit" as defined by Mitch Alsup (see section
45 11.4.9.3)
46 """
47 def __init__(self, rwid, units):
48 """ Inputs:
49
50 * :rwid: bit width of register file(s) - both FP and INT
51 * :units: sequence of ALUs (or CompUnitsBase derivatives)
52 """
53 self.units = units
54 self.rwid = rwid
55 if units and isinstance(units[0], CompUnitsBase):
56 self.n_units = 0
57 for u in self.units:
58 self.n_units += u.n_units
59 else:
60 self.n_units = len(units)
61
62 n_units = self.n_units
63
64 # inputs
65 self.issue_i = Signal(n_units, reset_less=True)
66 self.go_rd_i = Signal(n_units, reset_less=True)
67 self.go_wr_i = Signal(n_units, reset_less=True)
68 self.shadown_i = Signal(n_units, reset_less=True)
69 self.go_die_i = Signal(n_units, reset_less=True)
70
71 # outputs
72 self.busy_o = Signal(n_units, reset_less=True)
73 self.rd_rel_o = Signal(n_units, reset_less=True)
74 self.req_rel_o = Signal(n_units, reset_less=True)
75
76 # in/out register data (note: not register#, actual data)
77 self.data_o = Signal(rwid, reset_less=True)
78 self.src1_i = Signal(rwid, reset_less=True)
79 self.src2_i = Signal(rwid, reset_less=True)
80
81 def elaborate(self, platform):
82 m = Module()
83 comb = m.d.comb
84
85 for i, alu in enumerate(self.units):
86 print ("elaborate comp%d" % i, self, alu)
87 setattr(m.submodules, "comp%d" % i, alu)
88
89 go_rd_l = []
90 go_wr_l = []
91 issue_l = []
92 busy_l = []
93 req_rel_l = []
94 rd_rel_l = []
95 shadow_l = []
96 godie_l = []
97 for alu in self.units:
98 req_rel_l.append(alu.req_rel_o)
99 rd_rel_l.append(alu.rd_rel_o)
100 shadow_l.append(alu.shadown_i)
101 godie_l.append(alu.go_die_i)
102 go_wr_l.append(alu.go_wr_i)
103 go_rd_l.append(alu.go_rd_i)
104 issue_l.append(alu.issue_i)
105 busy_l.append(alu.busy_o)
106 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
107 comb += self.req_rel_o.eq(Cat(*req_rel_l))
108 comb += self.busy_o.eq(Cat(*busy_l))
109 comb += Cat(*godie_l).eq(self.go_die_i)
110 comb += Cat(*shadow_l).eq(self.shadown_i)
111 comb += Cat(*go_wr_l).eq(self.go_wr_i)
112 comb += Cat(*go_rd_l).eq(self.go_rd_i)
113 comb += Cat(*issue_l).eq(self.issue_i)
114
115 # connect data register input/output
116
117 # merge (OR) all integer FU / ALU outputs to a single value
118 # bit of a hack: treereduce needs a list with an item named "data_o"
119 if self.units:
120 data_o = treereduce(self.units)
121 comb += self.data_o.eq(data_o)
122
123 for i, alu in enumerate(self.units):
124 comb += alu.src1_i.eq(self.src1_i)
125 comb += alu.src2_i.eq(self.src2_i)
126
127 return m
128
129
130 class CompUnitALUs(CompUnitsBase):
131
132 def __init__(self, rwid):
133 """ Inputs:
134
135 * :rwid: bit width of register file(s) - both FP and INT
136 """
137
138 # Int ALUs
139 add = ALU(rwid)
140 sub = ALU(rwid)
141 mul = ALU(rwid)
142 shf = ALU(rwid)
143
144 units = []
145 for alu in [add, sub, mul, shf]:
146 units.append(ComputationUnitNoDelay(rwid, 2, alu))
147
148 print ("alu units", units)
149 CompUnitsBase.__init__(self, rwid, units)
150 print ("alu base init done")
151
152 def elaborate(self, platform):
153 print ("alu elaborate start")
154 m = CompUnitsBase.elaborate(self, platform)
155 print ("alu elaborate done")
156 comb = m.d.comb
157
158 comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
159 comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
160 comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
161 comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
162
163 return m
164
165
166 class CompUnitBR(CompUnitsBase):
167
168 def __init__(self, rwid):
169 """ Inputs:
170
171 * :rwid: bit width of register file(s) - both FP and INT
172
173 Note: bgt unit is returned so that a shadow unit can be created
174 for it
175
176 """
177
178 # Branch ALU and CU
179 self.bgt = BranchALU(rwid)
180 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
181 print ("br units", [self.br1])
182 CompUnitsBase.__init__(self, rwid, [self.br1])
183 print ("br base init done")
184
185 def elaborate(self, platform):
186 print ("br elaborate start")
187 m = CompUnitsBase.elaborate(self, platform)
188 print ("br elaborate done")
189 comb = m.d.comb
190
191 comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
192
193 return m
194
195
196 class FunctionUnits(Elaboratable):
197
198 def __init__(self, n_regs, n_int_alus):
199 self.n_regs = n_regs
200 self.n_int_alus = n_int_alus
201
202 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
203 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
204 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
205
206 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
207 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
208
209 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
210 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
211 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
212
213 self.req_rel_i = Signal(n_int_alus, reset_less = True)
214 self.readable_o = Signal(n_int_alus, reset_less=True)
215 self.writable_o = Signal(n_int_alus, reset_less=True)
216
217 self.go_rd_i = Signal(n_int_alus, reset_less=True)
218 self.go_wr_i = Signal(n_int_alus, reset_less=True)
219 self.go_die_i = Signal(n_int_alus, reset_less=True)
220 self.req_rel_o = Signal(n_int_alus, reset_less=True)
221 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
222
223 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
224
225 def elaborate(self, platform):
226 m = Module()
227 comb = m.d.comb
228 sync = m.d.sync
229
230 n_int_fus = self.n_int_alus
231
232 # Integer FU-FU Dep Matrix
233 intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
234 m.submodules.intfudeps = intfudeps
235 # Integer FU-Reg Dep Matrix
236 intregdeps = FURegDepMatrix(n_int_fus, self.n_regs)
237 m.submodules.intregdeps = intregdeps
238
239 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
240 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
241
242 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
243 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
244
245 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
246 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
247 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
248
249 comb += intfudeps.issue_i.eq(self.fn_issue_i)
250 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
251 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
252 comb += intfudeps.go_die_i.eq(self.go_die_i)
253 comb += self.readable_o.eq(intfudeps.readable_o)
254 comb += self.writable_o.eq(intfudeps.writable_o)
255
256 # Connect function issue / arrays, and dest/src1/src2
257 comb += intregdeps.dest_i.eq(self.dest_i)
258 comb += intregdeps.src1_i.eq(self.src1_i)
259 comb += intregdeps.src2_i.eq(self.src2_i)
260
261 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
262 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
263 comb += intregdeps.go_die_i.eq(self.go_die_i)
264 comb += intregdeps.issue_i.eq(self.fn_issue_i)
265
266 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
267 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
268 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
269
270 return m
271
272
273 class Scoreboard(Elaboratable):
274 def __init__(self, rwid, n_regs):
275 """ Inputs:
276
277 * :rwid: bit width of register file(s) - both FP and INT
278 * :n_regs: depth of register file(s) - number of FP and INT regs
279 """
280 self.rwid = rwid
281 self.n_regs = n_regs
282
283 # Register Files
284 self.intregs = RegFileArray(rwid, n_regs)
285 self.fpregs = RegFileArray(rwid, n_regs)
286
287 # inputs
288 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
289 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
290 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
291 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
292
293 # outputs
294 self.issue_o = Signal(reset_less=True) # instruction was accepted
295 self.busy_o = Signal(reset_less=True) # at least one CU is busy
296
297 # for branch speculation experiment. branch_direction = 0 if
298 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
299 # branch_succ and branch_fail are requests to have the current
300 # instruction be dependent on the branch unit "shadow" capability.
301 self.branch_succ_i = Signal(reset_less=True)
302 self.branch_fail_i = Signal(reset_less=True)
303 self.branch_direction_o = Signal(2, reset_less=True)
304
305 def elaborate(self, platform):
306 m = Module()
307 comb = m.d.comb
308 sync = m.d.sync
309
310 m.submodules.intregs = self.intregs
311 m.submodules.fpregs = self.fpregs
312
313 # register ports
314 int_dest = self.intregs.write_port("dest")
315 int_src1 = self.intregs.read_port("src1")
316 int_src2 = self.intregs.read_port("src2")
317
318 fp_dest = self.fpregs.write_port("dest")
319 fp_src1 = self.fpregs.read_port("src1")
320 fp_src2 = self.fpregs.read_port("src2")
321
322 # Int ALUs and Comp Units
323 n_int_alus = 5
324 cua = CompUnitALUs(self.rwid)
325 cub = CompUnitBR(self.rwid)
326 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
327 bgt = cub.bgt # get at the branch computation unit
328 br1 = cub.br1
329
330 # Int FUs
331 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
332
333 # Count of number of FUs
334 n_int_fus = n_int_alus
335 n_fp_fus = 0 # for now
336
337 # Integer Priority Picker 1: Adder + Subtractor
338 intpick1 = GroupPicker(n_int_fus) # picks between add, sub, mul and shf
339 m.submodules.intpick1 = intpick1
340
341 # INT/FP Issue Unit
342 regdecode = RegDecode(self.n_regs)
343 m.submodules.regdecode = regdecode
344 issueunit = IntFPIssueUnit(n_int_fus, n_fp_fus)
345 m.submodules.issueunit = issueunit
346
347 # Shadow Matrix. currently n_int_fus shadows, to be used for
348 # write-after-write hazards. NOTE: there is one extra for branches,
349 # so the shadow width is increased by 1
350 m.submodules.shadows = shadows = ShadowMatrix(n_int_fus, n_int_fus, True)
351 m.submodules.bshadow = bshadow = ShadowMatrix(n_int_fus, 1, False)
352
353 # record previous instruction to cast shadow on current instruction
354 fn_issue_prev = Signal(n_int_fus)
355 prev_shadow = Signal(n_int_fus)
356
357 # Branch Speculation recorder. tracks the success/fail state as
358 # each instruction is issued, so that when the branch occurs the
359 # allow/cancel can be issued as appropriate.
360 m.submodules.specrec = bspec = BranchSpeculationRecord(n_int_fus)
361
362 #---------
363 # ok start wiring things together...
364 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
365 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
366 #---------
367
368 #---------
369 # Issue Unit is where it starts. set up some in/outs for this module
370 #---------
371 comb += [ regdecode.dest_i.eq(self.int_dest_i),
372 regdecode.src1_i.eq(self.int_src1_i),
373 regdecode.src2_i.eq(self.int_src2_i),
374 regdecode.enable_i.eq(self.reg_enable_i),
375 self.issue_o.eq(issueunit.issue_o)
376 ]
377 self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
378
379 # TODO: issueunit.f (FP)
380
381 # and int function issue / busy arrays, and dest/src1/src2
382 comb += intfus.dest_i.eq(regdecode.dest_o)
383 comb += intfus.src1_i.eq(regdecode.src1_o)
384 comb += intfus.src2_i.eq(regdecode.src2_o)
385
386 fn_issue_o = issueunit.i.fn_issue_o
387
388 comb += intfus.fn_issue_i.eq(fn_issue_o)
389 comb += issueunit.i.busy_i.eq(cu.busy_o)
390 comb += self.busy_o.eq(cu.busy_o.bool())
391
392 #---------
393 # merge shadow matrices outputs
394 #---------
395
396 # these are explained in ShadowMatrix docstring, and are to be
397 # connected to the FUReg and FUFU Matrices, to get them to reset
398 anydie = Signal(n_int_fus, reset_less=True)
399 allshadown = Signal(n_int_fus, reset_less=True)
400 shreset = Signal(n_int_fus, reset_less=True)
401 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
402 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
403 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
404
405 #---------
406 # connect fu-fu matrix
407 #---------
408
409 # Group Picker... done manually for now.
410 go_rd_o = intpick1.go_rd_o
411 go_wr_o = intpick1.go_wr_o
412 go_rd_i = intfus.go_rd_i
413 go_wr_i = intfus.go_wr_i
414 go_die_i = intfus.go_die_i
415 # NOTE: connect to the shadowed versions so that they can "die" (reset)
416 comb += go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus]) # rd
417 comb += go_wr_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus]) # wr
418 comb += go_die_i[0:n_int_fus].eq(anydie[0:n_int_fus]) # die
419
420 # Connect Picker
421 #---------
422 comb += intpick1.rd_rel_i[0:n_int_fus].eq(cu.rd_rel_o[0:n_int_fus])
423 comb += intpick1.req_rel_i[0:n_int_fus].eq(cu.req_rel_o[0:n_int_fus])
424 int_rd_o = intfus.readable_o
425 int_wr_o = intfus.writable_o
426 comb += intpick1.readable_i[0:n_int_fus].eq(int_rd_o[0:n_int_fus])
427 comb += intpick1.writable_i[0:n_int_fus].eq(int_wr_o[0:n_int_fus])
428
429 #---------
430 # Shadow Matrix
431 #---------
432
433 comb += shadows.issue_i.eq(fn_issue_o)
434 #comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
435 comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
436 #---------
437 # NOTE; this setup is for the instruction order preservation...
438
439 # connect shadows / go_dies to Computation Units
440 comb += cu.shadown_i[0:n_int_fus].eq(allshadown)
441 comb += cu.go_die_i[0:n_int_fus].eq(anydie)
442
443 # ok connect first n_int_fu shadows to busy lines, to create an
444 # instruction-order linked-list-like arrangement, using a bit-matrix
445 # (instead of e.g. a ring buffer).
446 # XXX TODO
447
448 # when written, the shadow can be cancelled (and was good)
449 for i in range(n_int_fus):
450 comb += shadows.s_good_i[i][0:n_int_fus].eq(go_wr_o[0:n_int_fus])
451
452 # work out the current-activated busy unit (by recording the old one)
453 with m.If(fn_issue_o): # only update prev bit if instruction issued
454 sync += fn_issue_prev.eq(fn_issue_o)
455
456 # *previous* instruction shadows *current* instruction, and, obviously,
457 # if the previous is completed (!busy) don't cast the shadow!
458 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
459 for i in range(n_int_fus):
460 comb += shadows.shadow_i[i][0:n_int_fus].eq(prev_shadow)
461
462 #---------
463 # ... and this is for branch speculation. it uses the extra bit
464 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
465 # only needs to set shadow_i, s_fail_i and s_good_i
466
467 # issue captures shadow_i (if enabled)
468 comb += bshadow.reset_i[0:n_int_fus].eq(shreset[0:n_int_fus])
469
470 bactive = Signal(reset_less=True)
471 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
472
473 # instruction being issued (fn_issue_o) has a shadow cast by the branch
474 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
475 comb += bshadow.issue_i.eq(fn_issue_o)
476 for i in range(n_int_fus):
477 with m.If(fn_issue_o & (Const(1<<i))):
478 comb += bshadow.shadow_i[i][0].eq(1)
479
480 # finally, we need an indicator to the test infrastructure as to
481 # whether the branch succeeded or failed, plus, link up to the
482 # "recorder" of whether the instruction was under shadow or not
483
484 with m.If(br1.issue_i):
485 sync += bspec.active_i.eq(1)
486 with m.If(self.branch_succ_i):
487 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
488 with m.If(self.branch_fail_i):
489 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
490
491 # branch is active (TODO: a better signal: this is over-using the
492 # go_write signal - actually the branch should not be "writing")
493 with m.If(br1.go_wr_i):
494 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
495 sync += bspec.active_i.eq(0)
496 comb += bspec.br_i.eq(1)
497 # branch occurs if data == 1, failed if data == 0
498 comb += bspec.br_ok_i.eq(br1.data_o == 1)
499 for i in range(n_int_fus):
500 # *expected* direction of the branch matched against *actual*
501 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
502 # ... or it didn't
503 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
504
505 #---------
506 # Connect Register File(s)
507 #---------
508 print ("intregdeps wen len", len(intfus.dest_rsel_o))
509 comb += int_dest.wen.eq(intfus.dest_rsel_o)
510 comb += int_src1.ren.eq(intfus.src1_rsel_o)
511 comb += int_src2.ren.eq(intfus.src2_rsel_o)
512
513 # connect ALUs to regfule
514 comb += int_dest.data_i.eq(cu.data_o)
515 comb += cu.src1_i.eq(int_src1.data_o)
516 comb += cu.src2_i.eq(int_src2.data_o)
517
518 # connect ALU Computation Units
519 comb += cu.go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus])
520 comb += cu.go_wr_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus])
521 comb += cu.issue_i[0:n_int_fus].eq(fn_issue_o[0:n_int_fus])
522
523 return m
524
525
526 def __iter__(self):
527 yield from self.intregs
528 yield from self.fpregs
529 yield self.int_dest_i
530 yield self.int_src1_i
531 yield self.int_src2_i
532 yield self.issue_o
533 yield self.branch_succ_i
534 yield self.branch_fail_i
535 yield self.branch_direction_o
536
537 def ports(self):
538 return list(self)
539
540 IADD = 0
541 ISUB = 1
542 IMUL = 2
543 ISHF = 3
544 IBGT = 4
545 IBLT = 5
546 IBEQ = 6
547 IBNE = 7
548
549 class RegSim:
550 def __init__(self, rwidth, nregs):
551 self.rwidth = rwidth
552 self.regs = [0] * nregs
553
554 def op(self, op, src1, src2, dest):
555 maxbits = (1 << self.rwidth) - 1
556 src1 = self.regs[src1] & maxbits
557 src2 = self.regs[src2] & maxbits
558 if op == IADD:
559 val = src1 + src2
560 elif op == ISUB:
561 val = src1 - src2
562 elif op == IMUL:
563 val = src1 * src2
564 elif op == ISHF:
565 val = src1 >> (src2 & maxbits)
566 elif op == IBGT:
567 val = int(src1 > src2)
568 elif op == IBLT:
569 val = int(src1 < src2)
570 elif op == IBEQ:
571 val = int(src1 == src2)
572 elif op == IBNE:
573 val = int(src1 != src2)
574 val &= maxbits
575 self.setval(dest, val)
576 return val
577
578 def setval(self, dest, val):
579 print ("sim setval", dest, hex(val))
580 self.regs[dest] = val
581
582 def dump(self, dut):
583 for i, val in enumerate(self.regs):
584 reg = yield dut.intregs.regs[i].reg
585 okstr = "OK" if reg == val else "!ok"
586 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
587
588 def check(self, dut):
589 for i, val in enumerate(self.regs):
590 reg = yield dut.intregs.regs[i].reg
591 if reg != val:
592 print("reg %d expected %x received %x\n" % (i, val, reg))
593 yield from self.dump(dut)
594 assert False
595
596 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
597 for i in range(len(dut.int_insn_i)):
598 yield dut.int_insn_i[i].eq(0)
599 yield dut.int_dest_i.eq(dest)
600 yield dut.int_src1_i.eq(src1)
601 yield dut.int_src2_i.eq(src2)
602 yield dut.int_insn_i[op].eq(1)
603 yield dut.reg_enable_i.eq(1)
604
605 # these indicate that the instruction is to be made shadow-dependent on
606 # (either) branch success or branch fail
607 yield dut.branch_fail_i.eq(branch_fail)
608 yield dut.branch_succ_i.eq(branch_success)
609
610
611 def print_reg(dut, rnums):
612 rs = []
613 for rnum in rnums:
614 reg = yield dut.intregs.regs[rnum].reg
615 rs.append("%x" % reg)
616 rnums = map(str, rnums)
617 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
618
619
620 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
621 insts = []
622 for i in range(n_ops):
623 src1 = randint(1, dut.n_regs-1)
624 src2 = randint(1, dut.n_regs-1)
625 dest = randint(1, dut.n_regs-1)
626 op = randint(0, max_opnums)
627
628 if shadowing:
629 insts.append((src1, src2, dest, op, (0, 0)))
630 else:
631 insts.append((src1, src2, dest, op))
632 return insts
633
634
635 def wait_for_busy_clear(dut):
636 while True:
637 busy_o = yield dut.busy_o
638 if not busy_o:
639 break
640 print ("busy",)
641 yield
642
643
644 def wait_for_issue(dut):
645 while True:
646 issue_o = yield dut.issue_o
647 if issue_o:
648 for i in range(len(dut.int_insn_i)):
649 yield dut.int_insn_i[i].eq(0)
650 yield dut.reg_enable_i.eq(0)
651 break
652 #print ("busy",)
653 #yield from print_reg(dut, [1,2,3])
654 yield
655 #yield from print_reg(dut, [1,2,3])
656
657 def scoreboard_branch_sim(dut, alusim):
658
659 iseed = 3
660
661 for i in range(1):
662
663 print ("rseed", iseed)
664 seed(iseed)
665 iseed += 1
666
667 yield dut.branch_direction_o.eq(0)
668
669 # set random values in the registers
670 for i in range(1, dut.n_regs):
671 val = 31+i*3
672 val = randint(0, (1<<alusim.rwidth)-1)
673 yield dut.intregs.regs[i].reg.eq(val)
674 alusim.setval(i, val)
675
676 if False:
677 # create some instructions: branches create a tree
678 insts = create_random_ops(dut, 1, True, 1)
679 #insts.append((6, 6, 1, 2, (0, 0)))
680 #insts.append((4, 3, 3, 0, (0, 0)))
681
682 src1 = randint(1, dut.n_regs-1)
683 src2 = randint(1, dut.n_regs-1)
684 #op = randint(4, 7)
685 op = 4 # only BGT at the moment
686
687 branch_ok = create_random_ops(dut, 1, True, 1)
688 branch_fail = create_random_ops(dut, 1, True, 1)
689
690 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
691
692 if True:
693 insts = []
694 #insts.append( (3, 5, 2, 0, (0, 0)) )
695 branch_ok = []
696 branch_fail = []
697 branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
698 #branch_ok.append( None )
699 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
700 #branch_fail.append( None )
701 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
702
703 siminsts = deepcopy(insts)
704
705 # issue instruction(s)
706 i = -1
707 instrs = insts
708 branch_direction = 0
709 while instrs:
710 yield
711 yield
712 i += 1
713 branch_direction = yield dut.branch_direction_o # way branch went
714 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
715 if branch_direction == 1 and shadow_on:
716 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
717 continue # branch was "success" and this is a "failed"... skip
718 if branch_direction == 2 and shadow_off:
719 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
720 continue # branch was "fail" and this is a "success"... skip
721 if branch_direction != 0:
722 shadow_on = 0
723 shadow_off = 0
724 is_branch = op >= 4
725 if is_branch:
726 branch_ok, branch_fail = dest
727 dest = src2
728 # ok zip up the branch success / fail instructions and
729 # drop them into the queue, one marked "to have branch success"
730 # the other to be marked shadow branch "fail".
731 # one out of each of these will be cancelled
732 for ok, fl in zip(branch_ok, branch_fail):
733 if ok:
734 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
735 if fl:
736 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
737 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
738 (i, src1, src2, dest, op, shadow_on, shadow_off))
739 yield from int_instr(dut, op, src1, src2, dest,
740 shadow_on, shadow_off)
741 yield
742 yield from wait_for_issue(dut)
743
744 # wait for all instructions to stop before checking
745 yield
746 yield from wait_for_busy_clear(dut)
747
748 i = -1
749 while siminsts:
750 instr = siminsts.pop(0)
751 if instr is None:
752 continue
753 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
754 i += 1
755 is_branch = op >= 4
756 if is_branch:
757 branch_ok, branch_fail = dest
758 dest = src2
759 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
760 (i, src1, src2, dest, op, shadow_on, shadow_off))
761 branch_res = alusim.op(op, src1, src2, dest)
762 if is_branch:
763 if branch_res:
764 siminsts += branch_ok
765 else:
766 siminsts += branch_fail
767
768 # check status
769 yield from alusim.check(dut)
770 yield from alusim.dump(dut)
771
772
773 def scoreboard_sim(dut, alusim):
774
775 seed(0)
776
777 for i in range(20):
778
779 # set random values in the registers
780 for i in range(1, dut.n_regs):
781 val = 31+i*3
782 val = randint(0, (1<<alusim.rwidth)-1)
783 yield dut.intregs.regs[i].reg.eq(val)
784 alusim.setval(i, val)
785
786 # create some instructions (some random, some regression tests)
787 instrs = []
788 if True:
789 instrs = create_random_ops(dut, 10, True, 4)
790
791 if False:
792 instrs.append((2, 3, 3, 0))
793 instrs.append((5, 3, 3, 1))
794
795 if False:
796 instrs.append((5, 6, 2, 1))
797 instrs.append((2, 2, 4, 0))
798 #instrs.append((2, 2, 3, 1))
799
800 if False:
801 instrs.append((2, 1, 2, 3))
802
803 if False:
804 instrs.append((2, 6, 2, 1))
805 instrs.append((2, 1, 2, 0))
806
807 if False:
808 instrs.append((1, 2, 7, 2))
809 instrs.append((7, 1, 5, 0))
810 instrs.append((4, 4, 1, 1))
811
812 if False:
813 instrs.append((5, 6, 2, 2))
814 instrs.append((1, 1, 4, 1))
815 instrs.append((6, 5, 3, 0))
816
817 if False:
818 # Write-after-Write Hazard
819 instrs.append( (3, 6, 7, 2) )
820 instrs.append( (4, 4, 7, 1) )
821
822 if False:
823 # self-read/write-after-write followed by Read-after-Write
824 instrs.append((1, 1, 1, 1))
825 instrs.append((1, 5, 3, 0))
826
827 if False:
828 # Read-after-Write followed by self-read-after-write
829 instrs.append((5, 6, 1, 2))
830 instrs.append((1, 1, 1, 1))
831
832 if False:
833 # self-read-write sandwich
834 instrs.append((5, 6, 1, 2))
835 instrs.append((1, 1, 1, 1))
836 instrs.append((1, 5, 3, 0))
837
838 if False:
839 # very weird failure
840 instrs.append( (5, 2, 5, 2) )
841 instrs.append( (2, 6, 3, 0) )
842 instrs.append( (4, 2, 2, 1) )
843
844 if False:
845 v1 = 4
846 yield dut.intregs.regs[5].reg.eq(v1)
847 alusim.setval(5, v1)
848 yield dut.intregs.regs[3].reg.eq(5)
849 alusim.setval(3, 5)
850 instrs.append((5, 3, 3, 4, (0, 0)))
851 instrs.append((4, 2, 1, 2, (0, 1)))
852
853 if False:
854 v1 = 6
855 yield dut.intregs.regs[5].reg.eq(v1)
856 alusim.setval(5, v1)
857 yield dut.intregs.regs[3].reg.eq(5)
858 alusim.setval(3, 5)
859 instrs.append((5, 3, 3, 4, (0, 0)))
860 instrs.append((4, 2, 1, 2, (1, 0)))
861
862 if False:
863 instrs.append( (4, 3, 5, 1, (0, 0)) )
864 instrs.append( (5, 2, 3, 1, (0, 0)) )
865 instrs.append( (7, 1, 5, 2, (0, 0)) )
866 instrs.append( (5, 6, 6, 4, (0, 0)) )
867 instrs.append( (7, 5, 2, 2, (1, 0)) )
868 instrs.append( (1, 7, 5, 0, (0, 1)) )
869 instrs.append( (1, 6, 1, 2, (1, 0)) )
870 instrs.append( (1, 6, 7, 3, (0, 0)) )
871 instrs.append( (6, 7, 7, 0, (0, 0)) )
872
873 # issue instruction(s), wait for issue to be free before proceeding
874 for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
875
876 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
877 alusim.op(op, src1, src2, dest)
878 yield from int_instr(dut, op, src1, src2, dest, br_ok, br_fail)
879 yield
880 yield from wait_for_issue(dut)
881
882 # wait for all instructions to stop before checking
883 yield
884 yield from wait_for_busy_clear(dut)
885
886 # check status
887 yield from alusim.check(dut)
888 yield from alusim.dump(dut)
889
890
891 def test_scoreboard():
892 dut = Scoreboard(16, 8)
893 alusim = RegSim(16, 8)
894 vl = rtlil.convert(dut, ports=dut.ports())
895 with open("test_scoreboard6600.il", "w") as f:
896 f.write(vl)
897
898 run_simulation(dut, scoreboard_sim(dut, alusim),
899 vcd_name='test_scoreboard6600.vcd')
900
901 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
902 # vcd_name='test_scoreboard6600.vcd')
903
904
905 if __name__ == '__main__':
906 test_scoreboard()