9724210e988c3f6f85f9826cd8860ccd8965f2db
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12
13 from compalu import ComputationUnitNoDelay
14
15 from alu_hier import ALU, BranchALU
16 from nmutil.latch import SRLatch
17
18 from random import randint, seed
19 from copy import deepcopy
20
21
22 class CompUnitsBase(Elaboratable):
23 """ Computation Unit Base class.
24
25 Amazingly, this class works recursively. It's supposed to just
26 look after some ALUs (that can handle the same operations),
27 grouping them together, however it turns out that the same code
28 can also group *groups* of Computation Units together as well.
29
30 Basically it was intended just to concatenate the ALU's issue,
31 go_rd etc. signals together, which start out as bits and become
32 sequences. Turns out that the same trick works just as well
33 on Computation Units!
34
35 So this class may be used recursively to present a top-level
36 sequential concatenation of all the signals in and out of
37 ALUs, whilst at the same time making it convenient to group
38 ALUs together.
39
40 At the lower level, the intent is that groups of (identical)
41 ALUs may be passed the same operation. Even beyond that,
42 the intent is that that group of (identical) ALUs actually
43 share the *same pipeline* and as such become a "Concurrent
44 Computation Unit" as defined by Mitch Alsup (see section
45 11.4.9.3)
46 """
47 def __init__(self, rwid, units):
48 """ Inputs:
49
50 * :rwid: bit width of register file(s) - both FP and INT
51 * :units: sequence of ALUs (or CompUnitsBase derivatives)
52 """
53 self.units = units
54 self.rwid = rwid
55 self.rwid = rwid
56 if units and isinstance(units[0], CompUnitsBase):
57 self.n_units = 0
58 for u in self.units:
59 self.n_units += u.n_units
60 else:
61 self.n_units = len(units)
62
63 n_units = self.n_units
64
65 # inputs
66 self.issue_i = Signal(n_units, reset_less=True)
67 self.go_rd_i = Signal(n_units, reset_less=True)
68 self.go_wr_i = Signal(n_units, reset_less=True)
69 self.shadown_i = Signal(n_units, reset_less=True)
70 self.go_die_i = Signal(n_units, reset_less=True)
71
72 # outputs
73 self.busy_o = Signal(n_units, reset_less=True)
74 self.rd_rel_o = Signal(n_units, reset_less=True)
75 self.req_rel_o = Signal(n_units, reset_less=True)
76
77 # in/out register data (note: not register#, actual data)
78 self.data_o = Signal(rwid, reset_less=True)
79 self.src1_i = Signal(rwid, reset_less=True)
80 self.src2_i = Signal(rwid, reset_less=True)
81 # input operand
82
83 def elaborate(self, platform):
84 m = Module()
85 comb = m.d.comb
86
87 for i, alu in enumerate(self.units):
88 setattr(m.submodules, "comp%d" % i, alu)
89
90 go_rd_l = []
91 go_wr_l = []
92 issue_l = []
93 busy_l = []
94 req_rel_l = []
95 rd_rel_l = []
96 shadow_l = []
97 godie_l = []
98 for alu in self.units:
99 req_rel_l.append(alu.req_rel_o)
100 rd_rel_l.append(alu.rd_rel_o)
101 shadow_l.append(alu.shadown_i)
102 godie_l.append(alu.go_die_i)
103 go_wr_l.append(alu.go_wr_i)
104 go_rd_l.append(alu.go_rd_i)
105 issue_l.append(alu.issue_i)
106 busy_l.append(alu.busy_o)
107 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
108 comb += self.req_rel_o.eq(Cat(*req_rel_l))
109 comb += self.busy_o.eq(Cat(*busy_l))
110 comb += Cat(*godie_l).eq(self.go_die_i)
111 comb += Cat(*shadow_l).eq(self.shadown_i)
112 comb += Cat(*go_wr_l).eq(self.go_wr_i)
113 comb += Cat(*go_rd_l).eq(self.go_rd_i)
114 comb += Cat(*issue_l).eq(self.issue_i)
115
116 # connect data register input/output
117
118 # merge (OR) all integer FU / ALU outputs to a single value
119 # bit of a hack: treereduce needs a list with an item named "data_o"
120 if self.units:
121 data_o = treereduce(self.units)
122 comb += self.data_o.eq(data_o)
123
124 for i, alu in enumerate(self.units):
125 comb += alu.src1_i.eq(self.src1_i)
126 comb += alu.src2_i.eq(self.src2_i)
127
128 return m
129
130
131 class CompUnitALUs(CompUnitsBase):
132
133 def __init__(self, rwid, opwid):
134 """ Inputs:
135
136 * :rwid: bit width of register file(s) - both FP and INT
137 * :opwid: operand bit width
138 """
139 self.opwid = opwid
140
141 # inputs
142 self.oper_i = Signal(opwid, reset_less=True)
143
144 # Int ALUs
145 add = ALU(rwid)
146 sub = ALU(rwid)
147 mul = ALU(rwid)
148 shf = ALU(rwid)
149
150 units = []
151 for alu in [add, sub, mul, shf]:
152 units.append(ComputationUnitNoDelay(rwid, 2, alu))
153
154 CompUnitsBase.__init__(self, rwid, units)
155
156 def elaborate(self, platform):
157 m = CompUnitsBase.elaborate(self, platform)
158 comb = m.d.comb
159
160 # hand the same operation to all units
161 for alu in self.units:
162 comb += alu.oper_i.eq(self.oper_i)
163 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
164 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
165 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
166 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
167
168 return m
169
170
171 class CompUnitBR(CompUnitsBase):
172
173 def __init__(self, rwid, opwid):
174 """ Inputs:
175
176 * :rwid: bit width of register file(s) - both FP and INT
177 * :opwid: operand bit width
178
179 Note: bgt unit is returned so that a shadow unit can be created
180 for it
181 """
182 self.opwid = opwid
183
184 # inputs
185 self.oper_i = Signal(opwid, reset_less=True)
186
187 # Branch ALU and CU
188 self.bgt = BranchALU(rwid)
189 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
190 CompUnitsBase.__init__(self, rwid, [self.br1])
191
192 def elaborate(self, platform):
193 m = CompUnitsBase.elaborate(self, platform)
194 comb = m.d.comb
195
196 # hand the same operation to all units
197 for alu in self.units:
198 comb += alu.oper_i.eq(self.oper_i)
199 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
200
201 return m
202
203
204 class FunctionUnits(Elaboratable):
205
206 def __init__(self, n_regs, n_int_alus):
207 self.n_regs = n_regs
208 self.n_int_alus = n_int_alus
209
210 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
211 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
212 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
213
214 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
215 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
216
217 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
218 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
219 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
220
221 self.req_rel_i = Signal(n_int_alus, reset_less = True)
222 self.readable_o = Signal(n_int_alus, reset_less=True)
223 self.writable_o = Signal(n_int_alus, reset_less=True)
224
225 self.go_rd_i = Signal(n_int_alus, reset_less=True)
226 self.go_wr_i = Signal(n_int_alus, reset_less=True)
227 self.go_die_i = Signal(n_int_alus, reset_less=True)
228 self.req_rel_o = Signal(n_int_alus, reset_less=True)
229 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
230
231 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
232
233 def elaborate(self, platform):
234 m = Module()
235 comb = m.d.comb
236 sync = m.d.sync
237
238 n_intfus = self.n_int_alus
239
240 # Integer FU-FU Dep Matrix
241 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
242 m.submodules.intfudeps = intfudeps
243 # Integer FU-Reg Dep Matrix
244 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
245 m.submodules.intregdeps = intregdeps
246
247 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
248 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
249
250 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
251 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
252
253 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
254 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
255 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
256
257 comb += intfudeps.issue_i.eq(self.fn_issue_i)
258 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
259 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
260 comb += intfudeps.go_die_i.eq(self.go_die_i)
261 comb += self.readable_o.eq(intfudeps.readable_o)
262 comb += self.writable_o.eq(intfudeps.writable_o)
263
264 # Connect function issue / arrays, and dest/src1/src2
265 comb += intregdeps.dest_i.eq(self.dest_i)
266 comb += intregdeps.src1_i.eq(self.src1_i)
267 comb += intregdeps.src2_i.eq(self.src2_i)
268
269 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
270 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
271 comb += intregdeps.go_die_i.eq(self.go_die_i)
272 comb += intregdeps.issue_i.eq(self.fn_issue_i)
273
274 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
275 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
276 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
277
278 return m
279
280
281 class Scoreboard(Elaboratable):
282 def __init__(self, rwid, n_regs):
283 """ Inputs:
284
285 * :rwid: bit width of register file(s) - both FP and INT
286 * :n_regs: depth of register file(s) - number of FP and INT regs
287 """
288 self.rwid = rwid
289 self.n_regs = n_regs
290
291 # Register Files
292 self.intregs = RegFileArray(rwid, n_regs)
293 self.fpregs = RegFileArray(rwid, n_regs)
294
295 # inputs
296 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
297 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
298 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
299 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
300
301 # outputs
302 self.issue_o = Signal(reset_less=True) # instruction was accepted
303 self.busy_o = Signal(reset_less=True) # at least one CU is busy
304
305 # for branch speculation experiment. branch_direction = 0 if
306 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
307 # branch_succ and branch_fail are requests to have the current
308 # instruction be dependent on the branch unit "shadow" capability.
309 self.branch_succ_i = Signal(reset_less=True)
310 self.branch_fail_i = Signal(reset_less=True)
311 self.branch_direction_o = Signal(2, reset_less=True)
312
313 def elaborate(self, platform):
314 m = Module()
315 comb = m.d.comb
316 sync = m.d.sync
317
318 m.submodules.intregs = self.intregs
319 m.submodules.fpregs = self.fpregs
320
321 # register ports
322 int_dest = self.intregs.write_port("dest")
323 int_src1 = self.intregs.read_port("src1")
324 int_src2 = self.intregs.read_port("src2")
325
326 fp_dest = self.fpregs.write_port("dest")
327 fp_src1 = self.fpregs.read_port("src1")
328 fp_src2 = self.fpregs.read_port("src2")
329
330 # Int ALUs and Comp Units
331 n_int_alus = 5
332 cua = CompUnitALUs(self.rwid, 2)
333 cub = CompUnitBR(self.rwid, 2)
334 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
335 bgt = cub.bgt # get at the branch computation unit
336 br1 = cub.br1
337
338 # Int FUs
339 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
340
341 # Count of number of FUs
342 n_intfus = n_int_alus
343 n_fp_fus = 0 # for now
344
345 # Integer Priority Picker 1: Adder + Subtractor
346 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
347 m.submodules.intpick1 = intpick1
348
349 # INT/FP Issue Unit
350 regdecode = RegDecode(self.n_regs)
351 m.submodules.regdecode = regdecode
352 aluissue = IssueUnitGroup(4)
353 brissue = IssueUnitGroup(1)
354 issueunit = IssueUnitArray([aluissue, brissue])
355 m.submodules.issueunit = issueunit
356
357 # Shadow Matrix. currently n_intfus shadows, to be used for
358 # write-after-write hazards. NOTE: there is one extra for branches,
359 # so the shadow width is increased by 1
360 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
361 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
362
363 # record previous instruction to cast shadow on current instruction
364 fn_issue_prev = Signal(n_intfus)
365 prev_shadow = Signal(n_intfus)
366
367 # Branch Speculation recorder. tracks the success/fail state as
368 # each instruction is issued, so that when the branch occurs the
369 # allow/cancel can be issued as appropriate.
370 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
371
372 #---------
373 # ok start wiring things together...
374 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
375 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
376 #---------
377
378 #---------
379 # Issue Unit is where it starts. set up some in/outs for this module
380 #---------
381 comb += [ regdecode.dest_i.eq(self.int_dest_i),
382 regdecode.src1_i.eq(self.int_src1_i),
383 regdecode.src2_i.eq(self.int_src2_i),
384 regdecode.enable_i.eq(self.reg_enable_i),
385 self.issue_o.eq(issueunit.issue_o)
386 ]
387
388 # take these to outside (for testing)
389 self.aluissue = aluissue
390 self.brissue = brissue
391 self.alu_oper_i = cua.oper_i
392 self.br_oper_i = cub.oper_i
393
394 # TODO: issueunit.f (FP)
395
396 # and int function issue / busy arrays, and dest/src1/src2
397 comb += intfus.dest_i.eq(regdecode.dest_o)
398 comb += intfus.src1_i.eq(regdecode.src1_o)
399 comb += intfus.src2_i.eq(regdecode.src2_o)
400
401 fn_issue_o = issueunit.fn_issue_o
402
403 comb += intfus.fn_issue_i.eq(fn_issue_o)
404 comb += issueunit.busy_i.eq(cu.busy_o)
405 comb += self.busy_o.eq(cu.busy_o.bool())
406
407 #---------
408 # merge shadow matrices outputs
409 #---------
410
411 # these are explained in ShadowMatrix docstring, and are to be
412 # connected to the FUReg and FUFU Matrices, to get them to reset
413 anydie = Signal(n_intfus, reset_less=True)
414 allshadown = Signal(n_intfus, reset_less=True)
415 shreset = Signal(n_intfus, reset_less=True)
416 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
417 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
418 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
419
420 #---------
421 # connect fu-fu matrix
422 #---------
423
424 # Group Picker... done manually for now.
425 go_rd_o = intpick1.go_rd_o
426 go_wr_o = intpick1.go_wr_o
427 go_rd_i = intfus.go_rd_i
428 go_wr_i = intfus.go_wr_i
429 go_die_i = intfus.go_die_i
430 # NOTE: connect to the shadowed versions so that they can "die" (reset)
431 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
432 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
433 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
434
435 # Connect Picker
436 #---------
437 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
438 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
439 int_rd_o = intfus.readable_o
440 int_wr_o = intfus.writable_o
441 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
442 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
443
444 #---------
445 # Shadow Matrix
446 #---------
447
448 comb += shadows.issue_i.eq(fn_issue_o)
449 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
450 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
451 #---------
452 # NOTE; this setup is for the instruction order preservation...
453
454 # connect shadows / go_dies to Computation Units
455 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
456 comb += cu.go_die_i[0:n_intfus].eq(anydie)
457
458 # ok connect first n_int_fu shadows to busy lines, to create an
459 # instruction-order linked-list-like arrangement, using a bit-matrix
460 # (instead of e.g. a ring buffer).
461 # XXX TODO
462
463 # when written, the shadow can be cancelled (and was good)
464 for i in range(n_intfus):
465 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
466
467 # work out the current-activated busy unit (by recording the old one)
468 with m.If(fn_issue_o): # only update prev bit if instruction issued
469 sync += fn_issue_prev.eq(fn_issue_o)
470
471 # *previous* instruction shadows *current* instruction, and, obviously,
472 # if the previous is completed (!busy) don't cast the shadow!
473 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
474 for i in range(n_intfus):
475 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
476
477 #---------
478 # ... and this is for branch speculation. it uses the extra bit
479 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
480 # only needs to set shadow_i, s_fail_i and s_good_i
481
482 # issue captures shadow_i (if enabled)
483 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
484
485 bactive = Signal(reset_less=True)
486 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
487
488 # instruction being issued (fn_issue_o) has a shadow cast by the branch
489 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
490 comb += bshadow.issue_i.eq(fn_issue_o)
491 for i in range(n_intfus):
492 with m.If(fn_issue_o & (Const(1<<i))):
493 comb += bshadow.shadow_i[i][0].eq(1)
494
495 # finally, we need an indicator to the test infrastructure as to
496 # whether the branch succeeded or failed, plus, link up to the
497 # "recorder" of whether the instruction was under shadow or not
498
499 with m.If(br1.issue_i):
500 sync += bspec.active_i.eq(1)
501 with m.If(self.branch_succ_i):
502 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
503 with m.If(self.branch_fail_i):
504 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
505
506 # branch is active (TODO: a better signal: this is over-using the
507 # go_write signal - actually the branch should not be "writing")
508 with m.If(br1.go_wr_i):
509 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
510 sync += bspec.active_i.eq(0)
511 comb += bspec.br_i.eq(1)
512 # branch occurs if data == 1, failed if data == 0
513 comb += bspec.br_ok_i.eq(br1.data_o == 1)
514 for i in range(n_intfus):
515 # *expected* direction of the branch matched against *actual*
516 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
517 # ... or it didn't
518 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
519
520 #---------
521 # Connect Register File(s)
522 #---------
523 comb += int_dest.wen.eq(intfus.dest_rsel_o)
524 comb += int_src1.ren.eq(intfus.src1_rsel_o)
525 comb += int_src2.ren.eq(intfus.src2_rsel_o)
526
527 # connect ALUs to regfule
528 comb += int_dest.data_i.eq(cu.data_o)
529 comb += cu.src1_i.eq(int_src1.data_o)
530 comb += cu.src2_i.eq(int_src2.data_o)
531
532 # connect ALU Computation Units
533 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
534 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
535 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
536
537 return m
538
539
540 def __iter__(self):
541 yield from self.intregs
542 yield from self.fpregs
543 yield self.int_dest_i
544 yield self.int_src1_i
545 yield self.int_src2_i
546 yield self.issue_o
547 yield self.branch_succ_i
548 yield self.branch_fail_i
549 yield self.branch_direction_o
550
551 def ports(self):
552 return list(self)
553
554 IADD = 0
555 ISUB = 1
556 IMUL = 2
557 ISHF = 3
558 IBGT = 4
559 IBLT = 5
560 IBEQ = 6
561 IBNE = 7
562
563 class RegSim:
564 def __init__(self, rwidth, nregs):
565 self.rwidth = rwidth
566 self.regs = [0] * nregs
567
568 def op(self, op, src1, src2, dest):
569 maxbits = (1 << self.rwidth) - 1
570 src1 = self.regs[src1] & maxbits
571 src2 = self.regs[src2] & maxbits
572 if op == IADD:
573 val = src1 + src2
574 elif op == ISUB:
575 val = src1 - src2
576 elif op == IMUL:
577 val = src1 * src2
578 elif op == ISHF:
579 val = src1 >> (src2 & maxbits)
580 elif op == IBGT:
581 val = int(src1 > src2)
582 elif op == IBLT:
583 val = int(src1 < src2)
584 elif op == IBEQ:
585 val = int(src1 == src2)
586 elif op == IBNE:
587 val = int(src1 != src2)
588 val &= maxbits
589 self.setval(dest, val)
590 return val
591
592 def setval(self, dest, val):
593 print ("sim setval", dest, hex(val))
594 self.regs[dest] = val
595
596 def dump(self, dut):
597 for i, val in enumerate(self.regs):
598 reg = yield dut.intregs.regs[i].reg
599 okstr = "OK" if reg == val else "!ok"
600 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
601
602 def check(self, dut):
603 for i, val in enumerate(self.regs):
604 reg = yield dut.intregs.regs[i].reg
605 if reg != val:
606 print("reg %d expected %x received %x\n" % (i, val, reg))
607 yield from self.dump(dut)
608 assert False
609
610 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
611 yield from disable_issue(dut)
612 yield dut.int_dest_i.eq(dest)
613 yield dut.int_src1_i.eq(src1)
614 yield dut.int_src2_i.eq(src2)
615 if (op & (0x3<<2)) != 0: # branch
616 yield dut.brissue.insn_i.eq(1)
617 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
618 dut_issue = dut.brissue
619 else:
620 yield dut.aluissue.insn_i.eq(1)
621 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
622 dut_issue = dut.aluissue
623 yield dut.reg_enable_i.eq(1)
624
625 # these indicate that the instruction is to be made shadow-dependent on
626 # (either) branch success or branch fail
627 yield dut.branch_fail_i.eq(branch_fail)
628 yield dut.branch_succ_i.eq(branch_success)
629
630 yield
631 yield from wait_for_issue(dut, dut_issue)
632
633
634 def print_reg(dut, rnums):
635 rs = []
636 for rnum in rnums:
637 reg = yield dut.intregs.regs[rnum].reg
638 rs.append("%x" % reg)
639 rnums = map(str, rnums)
640 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
641
642
643 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
644 insts = []
645 for i in range(n_ops):
646 src1 = randint(1, dut.n_regs-1)
647 src2 = randint(1, dut.n_regs-1)
648 dest = randint(1, dut.n_regs-1)
649 op = randint(0, max_opnums)
650
651 if shadowing:
652 insts.append((src1, src2, dest, op, (0, 0)))
653 else:
654 insts.append((src1, src2, dest, op))
655 return insts
656
657
658 def wait_for_busy_clear(dut):
659 while True:
660 busy_o = yield dut.busy_o
661 if not busy_o:
662 break
663 print ("busy",)
664 yield
665
666 def disable_issue(dut):
667 yield dut.aluissue.insn_i.eq(0)
668 yield dut.brissue.insn_i.eq(0)
669
670
671 def wait_for_issue(dut, dut_issue):
672 while True:
673 issue_o = yield dut_issue.fn_issue_o
674 if issue_o:
675 yield from disable_issue(dut)
676 yield dut.reg_enable_i.eq(0)
677 break
678 print ("busy",)
679 #yield from print_reg(dut, [1,2,3])
680 yield
681 #yield from print_reg(dut, [1,2,3])
682
683 def scoreboard_branch_sim(dut, alusim):
684
685 iseed = 3
686
687 for i in range(1):
688
689 print ("rseed", iseed)
690 seed(iseed)
691 iseed += 1
692
693 yield dut.branch_direction_o.eq(0)
694
695 # set random values in the registers
696 for i in range(1, dut.n_regs):
697 val = 31+i*3
698 val = randint(0, (1<<alusim.rwidth)-1)
699 yield dut.intregs.regs[i].reg.eq(val)
700 alusim.setval(i, val)
701
702 if False:
703 # create some instructions: branches create a tree
704 insts = create_random_ops(dut, 1, True, 1)
705 #insts.append((6, 6, 1, 2, (0, 0)))
706 #insts.append((4, 3, 3, 0, (0, 0)))
707
708 src1 = randint(1, dut.n_regs-1)
709 src2 = randint(1, dut.n_regs-1)
710 #op = randint(4, 7)
711 op = 4 # only BGT at the moment
712
713 branch_ok = create_random_ops(dut, 1, True, 1)
714 branch_fail = create_random_ops(dut, 1, True, 1)
715
716 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
717
718 if True:
719 insts = []
720 insts.append( (3, 5, 2, 0, (0, 0)) )
721 branch_ok = []
722 branch_fail = []
723 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
724 branch_ok.append( None )
725 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
726 #branch_fail.append( None )
727 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
728
729 siminsts = deepcopy(insts)
730
731 # issue instruction(s)
732 i = -1
733 instrs = insts
734 branch_direction = 0
735 while instrs:
736 yield
737 yield
738 i += 1
739 branch_direction = yield dut.branch_direction_o # way branch went
740 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
741 if branch_direction == 1 and shadow_on:
742 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
743 continue # branch was "success" and this is a "failed"... skip
744 if branch_direction == 2 and shadow_off:
745 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
746 continue # branch was "fail" and this is a "success"... skip
747 if branch_direction != 0:
748 shadow_on = 0
749 shadow_off = 0
750 is_branch = op >= 4
751 if is_branch:
752 branch_ok, branch_fail = dest
753 dest = src2
754 # ok zip up the branch success / fail instructions and
755 # drop them into the queue, one marked "to have branch success"
756 # the other to be marked shadow branch "fail".
757 # one out of each of these will be cancelled
758 for ok, fl in zip(branch_ok, branch_fail):
759 if ok:
760 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
761 if fl:
762 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
763 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
764 (i, src1, src2, dest, op, shadow_on, shadow_off))
765 yield from int_instr(dut, op, src1, src2, dest,
766 shadow_on, shadow_off)
767
768 # wait for all instructions to stop before checking
769 yield
770 yield from wait_for_busy_clear(dut)
771
772 i = -1
773 while siminsts:
774 instr = siminsts.pop(0)
775 if instr is None:
776 continue
777 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
778 i += 1
779 is_branch = op >= 4
780 if is_branch:
781 branch_ok, branch_fail = dest
782 dest = src2
783 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
784 (i, src1, src2, dest, op, shadow_on, shadow_off))
785 branch_res = alusim.op(op, src1, src2, dest)
786 if is_branch:
787 if branch_res:
788 siminsts += branch_ok
789 else:
790 siminsts += branch_fail
791
792 # check status
793 yield from alusim.check(dut)
794 yield from alusim.dump(dut)
795
796
797 def scoreboard_sim(dut, alusim):
798
799 seed(0)
800
801 for i in range(20):
802
803 # set random values in the registers
804 for i in range(1, dut.n_regs):
805 val = randint(0, (1<<alusim.rwidth)-1)
806 #val = 31+i*3
807 #val = i
808 yield dut.intregs.regs[i].reg.eq(val)
809 alusim.setval(i, val)
810
811 # create some instructions (some random, some regression tests)
812 instrs = []
813 if True:
814 instrs = create_random_ops(dut, 10, True, 4)
815
816 if False:
817 instrs.append( (7, 3, 2, 4, (0, 0)) )
818 instrs.append( (7, 6, 6, 2, (0, 0)) )
819 instrs.append( (1, 7, 2, 2, (0, 0)) )
820
821
822 if False:
823 instrs.append((2, 3, 3, 0, (0, 0)))
824 instrs.append((5, 3, 3, 1, (0, 0)))
825 instrs.append((3, 5, 5, 2, (0, 0)))
826 instrs.append((5, 3, 3, 3, (0, 0)))
827 instrs.append((3, 5, 5, 0, (0, 0)))
828
829 if False:
830 instrs.append((5, 6, 2, 1))
831 instrs.append((2, 2, 4, 0))
832 #instrs.append((2, 2, 3, 1))
833
834 if False:
835 instrs.append((2, 1, 2, 3))
836
837 if False:
838 instrs.append((2, 6, 2, 1))
839 instrs.append((2, 1, 2, 0))
840
841 if False:
842 instrs.append((1, 2, 7, 2))
843 instrs.append((7, 1, 5, 0))
844 instrs.append((4, 4, 1, 1))
845
846 if False:
847 instrs.append((5, 6, 2, 2))
848 instrs.append((1, 1, 4, 1))
849 instrs.append((6, 5, 3, 0))
850
851 if False:
852 # Write-after-Write Hazard
853 instrs.append( (3, 6, 7, 2) )
854 instrs.append( (4, 4, 7, 1) )
855
856 if False:
857 # self-read/write-after-write followed by Read-after-Write
858 instrs.append((1, 1, 1, 1))
859 instrs.append((1, 5, 3, 0))
860
861 if False:
862 # Read-after-Write followed by self-read-after-write
863 instrs.append((5, 6, 1, 2))
864 instrs.append((1, 1, 1, 1))
865
866 if False:
867 # self-read-write sandwich
868 instrs.append((5, 6, 1, 2))
869 instrs.append((1, 1, 1, 1))
870 instrs.append((1, 5, 3, 0))
871
872 if False:
873 # very weird failure
874 instrs.append( (5, 2, 5, 2) )
875 instrs.append( (2, 6, 3, 0) )
876 instrs.append( (4, 2, 2, 1) )
877
878 if False:
879 v1 = 4
880 yield dut.intregs.regs[5].reg.eq(v1)
881 alusim.setval(5, v1)
882 yield dut.intregs.regs[3].reg.eq(5)
883 alusim.setval(3, 5)
884 instrs.append((5, 3, 3, 4, (0, 0)))
885 instrs.append((4, 2, 1, 2, (0, 1)))
886
887 if False:
888 v1 = 6
889 yield dut.intregs.regs[5].reg.eq(v1)
890 alusim.setval(5, v1)
891 yield dut.intregs.regs[3].reg.eq(5)
892 alusim.setval(3, 5)
893 instrs.append((5, 3, 3, 4, (0, 0)))
894 instrs.append((4, 2, 1, 2, (1, 0)))
895
896 if False:
897 instrs.append( (4, 3, 5, 1, (0, 0)) )
898 instrs.append( (5, 2, 3, 1, (0, 0)) )
899 instrs.append( (7, 1, 5, 2, (0, 0)) )
900 instrs.append( (5, 6, 6, 4, (0, 0)) )
901 instrs.append( (7, 5, 2, 2, (1, 0)) )
902 instrs.append( (1, 7, 5, 0, (0, 1)) )
903 instrs.append( (1, 6, 1, 2, (1, 0)) )
904 instrs.append( (1, 6, 7, 3, (0, 0)) )
905 instrs.append( (6, 7, 7, 0, (0, 0)) )
906
907 # issue instruction(s), wait for issue to be free before proceeding
908 for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
909
910 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
911 alusim.op(op, src1, src2, dest)
912 yield from int_instr(dut, op, src1, src2, dest, br_ok, br_fail)
913
914 # wait for all instructions to stop before checking
915 yield
916 yield from wait_for_busy_clear(dut)
917
918 # check status
919 yield from alusim.check(dut)
920 yield from alusim.dump(dut)
921
922
923 def test_scoreboard():
924 dut = Scoreboard(16, 8)
925 alusim = RegSim(16, 8)
926 vl = rtlil.convert(dut, ports=dut.ports())
927 with open("test_scoreboard6600.il", "w") as f:
928 f.write(vl)
929
930 run_simulation(dut, scoreboard_sim(dut, alusim),
931 vcd_name='test_scoreboard6600.vcd')
932
933 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
934 # vcd_name='test_scoreboard6600.vcd')
935
936
937 if __name__ == '__main__':
938 test_scoreboard()