whoops wrong mask for branch instruction decode
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12
13 from compalu import ComputationUnitNoDelay
14
15 from alu_hier import ALU, BranchALU
16 from nmutil.latch import SRLatch
17
18 from random import randint, seed
19 from copy import deepcopy
20
21
22 class CompUnitsBase(Elaboratable):
23 """ Computation Unit Base class.
24
25 Amazingly, this class works recursively. It's supposed to just
26 look after some ALUs (that can handle the same operations),
27 grouping them together, however it turns out that the same code
28 can also group *groups* of Computation Units together as well.
29
30 Basically it was intended just to concatenate the ALU's issue,
31 go_rd etc. signals together, which start out as bits and become
32 sequences. Turns out that the same trick works just as well
33 on Computation Units!
34
35 So this class may be used recursively to present a top-level
36 sequential concatenation of all the signals in and out of
37 ALUs, whilst at the same time making it convenient to group
38 ALUs together.
39
40 At the lower level, the intent is that groups of (identical)
41 ALUs may be passed the same operation. Even beyond that,
42 the intent is that that group of (identical) ALUs actually
43 share the *same pipeline* and as such become a "Concurrent
44 Computation Unit" as defined by Mitch Alsup (see section
45 11.4.9.3)
46 """
47 def __init__(self, rwid, units):
48 """ Inputs:
49
50 * :rwid: bit width of register file(s) - both FP and INT
51 * :units: sequence of ALUs (or CompUnitsBase derivatives)
52 """
53 self.units = units
54 self.rwid = rwid
55 self.rwid = rwid
56 if units and isinstance(units[0], CompUnitsBase):
57 self.n_units = 0
58 for u in self.units:
59 self.n_units += u.n_units
60 else:
61 self.n_units = len(units)
62
63 n_units = self.n_units
64
65 # inputs
66 self.issue_i = Signal(n_units, reset_less=True)
67 self.go_rd_i = Signal(n_units, reset_less=True)
68 self.go_wr_i = Signal(n_units, reset_less=True)
69 self.shadown_i = Signal(n_units, reset_less=True)
70 self.go_die_i = Signal(n_units, reset_less=True)
71
72 # outputs
73 self.busy_o = Signal(n_units, reset_less=True)
74 self.rd_rel_o = Signal(n_units, reset_less=True)
75 self.req_rel_o = Signal(n_units, reset_less=True)
76
77 # in/out register data (note: not register#, actual data)
78 self.data_o = Signal(rwid, reset_less=True)
79 self.src1_i = Signal(rwid, reset_less=True)
80 self.src2_i = Signal(rwid, reset_less=True)
81 # input operand
82
83 def elaborate(self, platform):
84 m = Module()
85 comb = m.d.comb
86
87 for i, alu in enumerate(self.units):
88 setattr(m.submodules, "comp%d" % i, alu)
89
90 go_rd_l = []
91 go_wr_l = []
92 issue_l = []
93 busy_l = []
94 req_rel_l = []
95 rd_rel_l = []
96 shadow_l = []
97 godie_l = []
98 for alu in self.units:
99 req_rel_l.append(alu.req_rel_o)
100 rd_rel_l.append(alu.rd_rel_o)
101 shadow_l.append(alu.shadown_i)
102 godie_l.append(alu.go_die_i)
103 go_wr_l.append(alu.go_wr_i)
104 go_rd_l.append(alu.go_rd_i)
105 issue_l.append(alu.issue_i)
106 busy_l.append(alu.busy_o)
107 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
108 comb += self.req_rel_o.eq(Cat(*req_rel_l))
109 comb += self.busy_o.eq(Cat(*busy_l))
110 comb += Cat(*godie_l).eq(self.go_die_i)
111 comb += Cat(*shadow_l).eq(self.shadown_i)
112 comb += Cat(*go_wr_l).eq(self.go_wr_i)
113 comb += Cat(*go_rd_l).eq(self.go_rd_i)
114 comb += Cat(*issue_l).eq(self.issue_i)
115
116 # connect data register input/output
117
118 # merge (OR) all integer FU / ALU outputs to a single value
119 # bit of a hack: treereduce needs a list with an item named "data_o"
120 if self.units:
121 data_o = treereduce(self.units)
122 comb += self.data_o.eq(data_o)
123
124 for i, alu in enumerate(self.units):
125 comb += alu.src1_i.eq(self.src1_i)
126 comb += alu.src2_i.eq(self.src2_i)
127
128 return m
129
130
131 class CompUnitALUs(CompUnitsBase):
132
133 def __init__(self, rwid, opwid):
134 """ Inputs:
135
136 * :rwid: bit width of register file(s) - both FP and INT
137 * :opwid: operand bit width
138 """
139 self.opwid = opwid
140
141 # inputs
142 self.oper_i = Signal(opwid, reset_less=True)
143
144 # Int ALUs
145 add = ALU(rwid)
146 sub = ALU(rwid)
147 mul = ALU(rwid)
148 shf = ALU(rwid)
149
150 units = []
151 for alu in [add, sub, mul, shf]:
152 units.append(ComputationUnitNoDelay(rwid, 2, alu))
153
154 CompUnitsBase.__init__(self, rwid, units)
155
156 def elaborate(self, platform):
157 m = CompUnitsBase.elaborate(self, platform)
158 comb = m.d.comb
159
160 # hand the same operation to all units
161 for alu in self.units:
162 comb += alu.oper_i.eq(self.oper_i)
163 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
164 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
165 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
166 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
167
168 return m
169
170
171 class CompUnitBR(CompUnitsBase):
172
173 def __init__(self, rwid, opwid):
174 """ Inputs:
175
176 * :rwid: bit width of register file(s) - both FP and INT
177 * :opwid: operand bit width
178
179 Note: bgt unit is returned so that a shadow unit can be created
180 for it
181 """
182 self.opwid = opwid
183
184 # inputs
185 self.oper_i = Signal(opwid, reset_less=True)
186
187 # Branch ALU and CU
188 self.bgt = BranchALU(rwid)
189 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
190 CompUnitsBase.__init__(self, rwid, [self.br1])
191
192 def elaborate(self, platform):
193 m = CompUnitsBase.elaborate(self, platform)
194 comb = m.d.comb
195
196 # hand the same operation to all units
197 for alu in self.units:
198 comb += alu.oper_i.eq(self.oper_i)
199 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
200
201 return m
202
203
204 class FunctionUnits(Elaboratable):
205
206 def __init__(self, n_regs, n_int_alus):
207 self.n_regs = n_regs
208 self.n_int_alus = n_int_alus
209
210 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
211 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
212 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
213
214 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
215 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
216
217 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
218 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
219 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
220
221 self.req_rel_i = Signal(n_int_alus, reset_less = True)
222 self.readable_o = Signal(n_int_alus, reset_less=True)
223 self.writable_o = Signal(n_int_alus, reset_less=True)
224
225 self.go_rd_i = Signal(n_int_alus, reset_less=True)
226 self.go_wr_i = Signal(n_int_alus, reset_less=True)
227 self.go_die_i = Signal(n_int_alus, reset_less=True)
228 self.req_rel_o = Signal(n_int_alus, reset_less=True)
229 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
230
231 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
232
233 def elaborate(self, platform):
234 m = Module()
235 comb = m.d.comb
236 sync = m.d.sync
237
238 n_intfus = self.n_int_alus
239
240 # Integer FU-FU Dep Matrix
241 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
242 m.submodules.intfudeps = intfudeps
243 # Integer FU-Reg Dep Matrix
244 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
245 m.submodules.intregdeps = intregdeps
246
247 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
248 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
249
250 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
251 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
252
253 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
254 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
255 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
256
257 comb += intfudeps.issue_i.eq(self.fn_issue_i)
258 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
259 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
260 comb += intfudeps.go_die_i.eq(self.go_die_i)
261 comb += self.readable_o.eq(intfudeps.readable_o)
262 comb += self.writable_o.eq(intfudeps.writable_o)
263
264 # Connect function issue / arrays, and dest/src1/src2
265 comb += intregdeps.dest_i.eq(self.dest_i)
266 comb += intregdeps.src1_i.eq(self.src1_i)
267 comb += intregdeps.src2_i.eq(self.src2_i)
268
269 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
270 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
271 comb += intregdeps.go_die_i.eq(self.go_die_i)
272 comb += intregdeps.issue_i.eq(self.fn_issue_i)
273
274 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
275 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
276 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
277
278 return m
279
280
281 class Scoreboard(Elaboratable):
282 def __init__(self, rwid, n_regs):
283 """ Inputs:
284
285 * :rwid: bit width of register file(s) - both FP and INT
286 * :n_regs: depth of register file(s) - number of FP and INT regs
287 """
288 self.rwid = rwid
289 self.n_regs = n_regs
290
291 # Register Files
292 self.intregs = RegFileArray(rwid, n_regs)
293 self.fpregs = RegFileArray(rwid, n_regs)
294
295 # inputs
296 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
297 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
298 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
299 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
300
301 # outputs
302 self.issue_o = Signal(reset_less=True) # instruction was accepted
303 self.busy_o = Signal(reset_less=True) # at least one CU is busy
304
305 # for branch speculation experiment. branch_direction = 0 if
306 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
307 # branch_succ and branch_fail are requests to have the current
308 # instruction be dependent on the branch unit "shadow" capability.
309 self.branch_succ_i = Signal(reset_less=True)
310 self.branch_fail_i = Signal(reset_less=True)
311 self.branch_direction_o = Signal(2, reset_less=True)
312
313 def elaborate(self, platform):
314 m = Module()
315 comb = m.d.comb
316 sync = m.d.sync
317
318 m.submodules.intregs = self.intregs
319 m.submodules.fpregs = self.fpregs
320
321 # register ports
322 int_dest = self.intregs.write_port("dest")
323 int_src1 = self.intregs.read_port("src1")
324 int_src2 = self.intregs.read_port("src2")
325
326 fp_dest = self.fpregs.write_port("dest")
327 fp_src1 = self.fpregs.read_port("src1")
328 fp_src2 = self.fpregs.read_port("src2")
329
330 # Int ALUs and Comp Units
331 n_int_alus = 5
332 cua = CompUnitALUs(self.rwid, 2)
333 cub = CompUnitBR(self.rwid, 2)
334 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
335 bgt = cub.bgt # get at the branch computation unit
336 br1 = cub.br1
337
338 # Int FUs
339 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
340
341 # Count of number of FUs
342 n_intfus = n_int_alus
343 n_fp_fus = 0 # for now
344
345 # Integer Priority Picker 1: Adder + Subtractor
346 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
347 m.submodules.intpick1 = intpick1
348
349 # INT/FP Issue Unit
350 regdecode = RegDecode(self.n_regs)
351 m.submodules.regdecode = regdecode
352 aluissue = IssueUnitGroup(4)
353 brissue = IssueUnitGroup(1)
354 issueunit = IssueUnitArray([aluissue, brissue])
355 m.submodules.issueunit = issueunit
356
357 # Shadow Matrix. currently n_intfus shadows, to be used for
358 # write-after-write hazards. NOTE: there is one extra for branches,
359 # so the shadow width is increased by 1
360 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
361 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
362
363 # record previous instruction to cast shadow on current instruction
364 fn_issue_prev = Signal(n_intfus)
365 prev_shadow = Signal(n_intfus)
366
367 # Branch Speculation recorder. tracks the success/fail state as
368 # each instruction is issued, so that when the branch occurs the
369 # allow/cancel can be issued as appropriate.
370 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
371
372 #---------
373 # ok start wiring things together...
374 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
375 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
376 #---------
377
378 #---------
379 # Issue Unit is where it starts. set up some in/outs for this module
380 #---------
381 comb += [ regdecode.dest_i.eq(self.int_dest_i),
382 regdecode.src1_i.eq(self.int_src1_i),
383 regdecode.src2_i.eq(self.int_src2_i),
384 regdecode.enable_i.eq(self.reg_enable_i),
385 self.issue_o.eq(issueunit.issue_o)
386 ]
387
388 # take these to outside (for testing)
389 self.alu_insn_i = aluissue.insn_i # enabled by instruction decode
390 self.br_insn_i = brissue.insn_i # enabled by instruction decode
391 self.alu_oper_i = cua.oper_i
392 self.br_oper_i = cub.oper_i
393
394 # TODO: issueunit.f (FP)
395
396 # and int function issue / busy arrays, and dest/src1/src2
397 comb += intfus.dest_i.eq(regdecode.dest_o)
398 comb += intfus.src1_i.eq(regdecode.src1_o)
399 comb += intfus.src2_i.eq(regdecode.src2_o)
400
401 fn_issue_o = issueunit.fn_issue_o
402
403 comb += intfus.fn_issue_i.eq(fn_issue_o)
404 comb += issueunit.busy_i.eq(cu.busy_o)
405 comb += self.busy_o.eq(cu.busy_o.bool())
406
407 #---------
408 # merge shadow matrices outputs
409 #---------
410
411 # these are explained in ShadowMatrix docstring, and are to be
412 # connected to the FUReg and FUFU Matrices, to get them to reset
413 anydie = Signal(n_intfus, reset_less=True)
414 allshadown = Signal(n_intfus, reset_less=True)
415 shreset = Signal(n_intfus, reset_less=True)
416 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
417 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
418 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
419
420 #---------
421 # connect fu-fu matrix
422 #---------
423
424 # Group Picker... done manually for now.
425 go_rd_o = intpick1.go_rd_o
426 go_wr_o = intpick1.go_wr_o
427 go_rd_i = intfus.go_rd_i
428 go_wr_i = intfus.go_wr_i
429 go_die_i = intfus.go_die_i
430 # NOTE: connect to the shadowed versions so that they can "die" (reset)
431 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
432 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
433 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
434
435 # Connect Picker
436 #---------
437 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
438 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
439 int_rd_o = intfus.readable_o
440 int_wr_o = intfus.writable_o
441 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
442 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
443
444 #---------
445 # Shadow Matrix
446 #---------
447
448 comb += shadows.issue_i.eq(fn_issue_o)
449 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
450 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
451 #---------
452 # NOTE; this setup is for the instruction order preservation...
453
454 # connect shadows / go_dies to Computation Units
455 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
456 comb += cu.go_die_i[0:n_intfus].eq(anydie)
457
458 # ok connect first n_int_fu shadows to busy lines, to create an
459 # instruction-order linked-list-like arrangement, using a bit-matrix
460 # (instead of e.g. a ring buffer).
461 # XXX TODO
462
463 # when written, the shadow can be cancelled (and was good)
464 for i in range(n_intfus):
465 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
466
467 # work out the current-activated busy unit (by recording the old one)
468 with m.If(fn_issue_o): # only update prev bit if instruction issued
469 sync += fn_issue_prev.eq(fn_issue_o)
470
471 # *previous* instruction shadows *current* instruction, and, obviously,
472 # if the previous is completed (!busy) don't cast the shadow!
473 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
474 for i in range(n_intfus):
475 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
476
477 #---------
478 # ... and this is for branch speculation. it uses the extra bit
479 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
480 # only needs to set shadow_i, s_fail_i and s_good_i
481
482 # issue captures shadow_i (if enabled)
483 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
484
485 bactive = Signal(reset_less=True)
486 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
487
488 # instruction being issued (fn_issue_o) has a shadow cast by the branch
489 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
490 comb += bshadow.issue_i.eq(fn_issue_o)
491 for i in range(n_intfus):
492 with m.If(fn_issue_o & (Const(1<<i))):
493 comb += bshadow.shadow_i[i][0].eq(1)
494
495 # finally, we need an indicator to the test infrastructure as to
496 # whether the branch succeeded or failed, plus, link up to the
497 # "recorder" of whether the instruction was under shadow or not
498
499 with m.If(br1.issue_i):
500 sync += bspec.active_i.eq(1)
501 with m.If(self.branch_succ_i):
502 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
503 with m.If(self.branch_fail_i):
504 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
505
506 # branch is active (TODO: a better signal: this is over-using the
507 # go_write signal - actually the branch should not be "writing")
508 with m.If(br1.go_wr_i):
509 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
510 sync += bspec.active_i.eq(0)
511 comb += bspec.br_i.eq(1)
512 # branch occurs if data == 1, failed if data == 0
513 comb += bspec.br_ok_i.eq(br1.data_o == 1)
514 for i in range(n_intfus):
515 # *expected* direction of the branch matched against *actual*
516 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
517 # ... or it didn't
518 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
519
520 #---------
521 # Connect Register File(s)
522 #---------
523 comb += int_dest.wen.eq(intfus.dest_rsel_o)
524 comb += int_src1.ren.eq(intfus.src1_rsel_o)
525 comb += int_src2.ren.eq(intfus.src2_rsel_o)
526
527 # connect ALUs to regfule
528 comb += int_dest.data_i.eq(cu.data_o)
529 comb += cu.src1_i.eq(int_src1.data_o)
530 comb += cu.src2_i.eq(int_src2.data_o)
531
532 # connect ALU Computation Units
533 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
534 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
535 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
536
537 return m
538
539
540 def __iter__(self):
541 yield from self.intregs
542 yield from self.fpregs
543 yield self.int_dest_i
544 yield self.int_src1_i
545 yield self.int_src2_i
546 yield self.issue_o
547 yield self.branch_succ_i
548 yield self.branch_fail_i
549 yield self.branch_direction_o
550
551 def ports(self):
552 return list(self)
553
554 IADD = 0
555 ISUB = 1
556 IMUL = 2
557 ISHF = 3
558 IBGT = 4
559 IBLT = 5
560 IBEQ = 6
561 IBNE = 7
562
563 class RegSim:
564 def __init__(self, rwidth, nregs):
565 self.rwidth = rwidth
566 self.regs = [0] * nregs
567
568 def op(self, op, src1, src2, dest):
569 maxbits = (1 << self.rwidth) - 1
570 src1 = self.regs[src1] & maxbits
571 src2 = self.regs[src2] & maxbits
572 if op == IADD:
573 val = src1 + src2
574 elif op == ISUB:
575 val = src1 - src2
576 elif op == IMUL:
577 val = src1 * src2
578 elif op == ISHF:
579 val = src1 >> (src2 & maxbits)
580 elif op == IBGT:
581 val = int(src1 > src2)
582 elif op == IBLT:
583 val = int(src1 < src2)
584 elif op == IBEQ:
585 val = int(src1 == src2)
586 elif op == IBNE:
587 val = int(src1 != src2)
588 val &= maxbits
589 self.setval(dest, val)
590 return val
591
592 def setval(self, dest, val):
593 print ("sim setval", dest, hex(val))
594 self.regs[dest] = val
595
596 def dump(self, dut):
597 for i, val in enumerate(self.regs):
598 reg = yield dut.intregs.regs[i].reg
599 okstr = "OK" if reg == val else "!ok"
600 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
601
602 def check(self, dut):
603 for i, val in enumerate(self.regs):
604 reg = yield dut.intregs.regs[i].reg
605 if reg != val:
606 print("reg %d expected %x received %x\n" % (i, val, reg))
607 yield from self.dump(dut)
608 assert False
609
610 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
611 yield from disable_issue(dut)
612 yield dut.int_dest_i.eq(dest)
613 yield dut.int_src1_i.eq(src1)
614 yield dut.int_src2_i.eq(src2)
615 if (op & (0x3<<2)) != 0: # branch
616 yield dut.br_insn_i.eq(1)
617 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
618 else:
619 yield dut.alu_insn_i.eq(1)
620 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
621 yield dut.reg_enable_i.eq(1)
622
623 # these indicate that the instruction is to be made shadow-dependent on
624 # (either) branch success or branch fail
625 yield dut.branch_fail_i.eq(branch_fail)
626 yield dut.branch_succ_i.eq(branch_success)
627
628
629 def print_reg(dut, rnums):
630 rs = []
631 for rnum in rnums:
632 reg = yield dut.intregs.regs[rnum].reg
633 rs.append("%x" % reg)
634 rnums = map(str, rnums)
635 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
636
637
638 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
639 insts = []
640 for i in range(n_ops):
641 src1 = randint(1, dut.n_regs-1)
642 src2 = randint(1, dut.n_regs-1)
643 dest = randint(1, dut.n_regs-1)
644 op = randint(0, max_opnums)
645
646 if shadowing:
647 insts.append((src1, src2, dest, op, (0, 0)))
648 else:
649 insts.append((src1, src2, dest, op))
650 return insts
651
652
653 def wait_for_busy_clear(dut):
654 while True:
655 busy_o = yield dut.busy_o
656 if not busy_o:
657 break
658 print ("busy",)
659 yield
660
661 def disable_issue(dut):
662 yield dut.alu_insn_i.eq(0)
663 yield dut.br_insn_i.eq(0)
664
665
666 def wait_for_issue(dut):
667 while True:
668 issue_o = yield dut.issue_o
669 if issue_o:
670 yield from disable_issue(dut)
671 yield dut.reg_enable_i.eq(0)
672 break
673 #print ("busy",)
674 #yield from print_reg(dut, [1,2,3])
675 yield
676 #yield from print_reg(dut, [1,2,3])
677
678 def scoreboard_branch_sim(dut, alusim):
679
680 iseed = 3
681
682 for i in range(1):
683
684 print ("rseed", iseed)
685 seed(iseed)
686 iseed += 1
687
688 yield dut.branch_direction_o.eq(0)
689
690 # set random values in the registers
691 for i in range(1, dut.n_regs):
692 val = 31+i*3
693 val = randint(0, (1<<alusim.rwidth)-1)
694 yield dut.intregs.regs[i].reg.eq(val)
695 alusim.setval(i, val)
696
697 if False:
698 # create some instructions: branches create a tree
699 insts = create_random_ops(dut, 1, True, 1)
700 #insts.append((6, 6, 1, 2, (0, 0)))
701 #insts.append((4, 3, 3, 0, (0, 0)))
702
703 src1 = randint(1, dut.n_regs-1)
704 src2 = randint(1, dut.n_regs-1)
705 #op = randint(4, 7)
706 op = 4 # only BGT at the moment
707
708 branch_ok = create_random_ops(dut, 1, True, 1)
709 branch_fail = create_random_ops(dut, 1, True, 1)
710
711 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
712
713 if True:
714 insts = []
715 #insts.append( (3, 5, 2, 0, (0, 0)) )
716 branch_ok = []
717 branch_fail = []
718 branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
719 #branch_ok.append( None )
720 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
721 #branch_fail.append( None )
722 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
723
724 siminsts = deepcopy(insts)
725
726 # issue instruction(s)
727 i = -1
728 instrs = insts
729 branch_direction = 0
730 while instrs:
731 yield
732 yield
733 i += 1
734 branch_direction = yield dut.branch_direction_o # way branch went
735 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
736 if branch_direction == 1 and shadow_on:
737 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
738 continue # branch was "success" and this is a "failed"... skip
739 if branch_direction == 2 and shadow_off:
740 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
741 continue # branch was "fail" and this is a "success"... skip
742 if branch_direction != 0:
743 shadow_on = 0
744 shadow_off = 0
745 is_branch = op >= 4
746 if is_branch:
747 branch_ok, branch_fail = dest
748 dest = src2
749 # ok zip up the branch success / fail instructions and
750 # drop them into the queue, one marked "to have branch success"
751 # the other to be marked shadow branch "fail".
752 # one out of each of these will be cancelled
753 for ok, fl in zip(branch_ok, branch_fail):
754 if ok:
755 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
756 if fl:
757 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
758 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
759 (i, src1, src2, dest, op, shadow_on, shadow_off))
760 yield from int_instr(dut, op, src1, src2, dest,
761 shadow_on, shadow_off)
762 yield
763 yield from wait_for_issue(dut)
764
765 # wait for all instructions to stop before checking
766 yield
767 yield from wait_for_busy_clear(dut)
768
769 i = -1
770 while siminsts:
771 instr = siminsts.pop(0)
772 if instr is None:
773 continue
774 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
775 i += 1
776 is_branch = op >= 4
777 if is_branch:
778 branch_ok, branch_fail = dest
779 dest = src2
780 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
781 (i, src1, src2, dest, op, shadow_on, shadow_off))
782 branch_res = alusim.op(op, src1, src2, dest)
783 if is_branch:
784 if branch_res:
785 siminsts += branch_ok
786 else:
787 siminsts += branch_fail
788
789 # check status
790 yield from alusim.check(dut)
791 yield from alusim.dump(dut)
792
793
794 def scoreboard_sim(dut, alusim):
795
796 seed(0)
797
798 for i in range(20):
799
800 # set random values in the registers
801 for i in range(1, dut.n_regs):
802 val = randint(0, (1<<alusim.rwidth)-1)
803 #val = 31+i*3
804 val = i
805 yield dut.intregs.regs[i].reg.eq(val)
806 alusim.setval(i, val)
807
808 # create some instructions (some random, some regression tests)
809 instrs = []
810 if True:
811 instrs = create_random_ops(dut, 10, True, 4)
812
813 if False:
814 instrs.append((2, 3, 3, 0, (0, 0)))
815 instrs.append((5, 3, 3, 1, (0, 0)))
816 instrs.append((3, 5, 5, 2, (0, 0)))
817 instrs.append((5, 3, 3, 3, (0, 0)))
818 instrs.append((3, 5, 5, 0, (0, 0)))
819
820 if False:
821 instrs.append((5, 6, 2, 1))
822 instrs.append((2, 2, 4, 0))
823 #instrs.append((2, 2, 3, 1))
824
825 if False:
826 instrs.append((2, 1, 2, 3))
827
828 if False:
829 instrs.append((2, 6, 2, 1))
830 instrs.append((2, 1, 2, 0))
831
832 if False:
833 instrs.append((1, 2, 7, 2))
834 instrs.append((7, 1, 5, 0))
835 instrs.append((4, 4, 1, 1))
836
837 if False:
838 instrs.append((5, 6, 2, 2))
839 instrs.append((1, 1, 4, 1))
840 instrs.append((6, 5, 3, 0))
841
842 if False:
843 # Write-after-Write Hazard
844 instrs.append( (3, 6, 7, 2) )
845 instrs.append( (4, 4, 7, 1) )
846
847 if False:
848 # self-read/write-after-write followed by Read-after-Write
849 instrs.append((1, 1, 1, 1))
850 instrs.append((1, 5, 3, 0))
851
852 if False:
853 # Read-after-Write followed by self-read-after-write
854 instrs.append((5, 6, 1, 2))
855 instrs.append((1, 1, 1, 1))
856
857 if False:
858 # self-read-write sandwich
859 instrs.append((5, 6, 1, 2))
860 instrs.append((1, 1, 1, 1))
861 instrs.append((1, 5, 3, 0))
862
863 if False:
864 # very weird failure
865 instrs.append( (5, 2, 5, 2) )
866 instrs.append( (2, 6, 3, 0) )
867 instrs.append( (4, 2, 2, 1) )
868
869 if False:
870 v1 = 4
871 yield dut.intregs.regs[5].reg.eq(v1)
872 alusim.setval(5, v1)
873 yield dut.intregs.regs[3].reg.eq(5)
874 alusim.setval(3, 5)
875 instrs.append((5, 3, 3, 4, (0, 0)))
876 instrs.append((4, 2, 1, 2, (0, 1)))
877
878 if False:
879 v1 = 6
880 yield dut.intregs.regs[5].reg.eq(v1)
881 alusim.setval(5, v1)
882 yield dut.intregs.regs[3].reg.eq(5)
883 alusim.setval(3, 5)
884 instrs.append((5, 3, 3, 4, (0, 0)))
885 instrs.append((4, 2, 1, 2, (1, 0)))
886
887 if False:
888 instrs.append( (4, 3, 5, 1, (0, 0)) )
889 instrs.append( (5, 2, 3, 1, (0, 0)) )
890 instrs.append( (7, 1, 5, 2, (0, 0)) )
891 instrs.append( (5, 6, 6, 4, (0, 0)) )
892 instrs.append( (7, 5, 2, 2, (1, 0)) )
893 instrs.append( (1, 7, 5, 0, (0, 1)) )
894 instrs.append( (1, 6, 1, 2, (1, 0)) )
895 instrs.append( (1, 6, 7, 3, (0, 0)) )
896 instrs.append( (6, 7, 7, 0, (0, 0)) )
897
898 # issue instruction(s), wait for issue to be free before proceeding
899 for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
900
901 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
902 alusim.op(op, src1, src2, dest)
903 yield from int_instr(dut, op, src1, src2, dest, br_ok, br_fail)
904 yield
905 yield from wait_for_issue(dut)
906
907 # wait for all instructions to stop before checking
908 yield
909 yield from wait_for_busy_clear(dut)
910
911 # check status
912 yield from alusim.check(dut)
913 yield from alusim.dump(dut)
914
915
916 def test_scoreboard():
917 dut = Scoreboard(16, 8)
918 alusim = RegSim(16, 8)
919 vl = rtlil.convert(dut, ports=dut.ports())
920 with open("test_scoreboard6600.il", "w") as f:
921 f.write(vl)
922
923 run_simulation(dut, scoreboard_sim(dut, alusim),
924 vcd_name='test_scoreboard6600.vcd')
925
926 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
927 # vcd_name='test_scoreboard6600.vcd')
928
929
930 if __name__ == '__main__':
931 test_scoreboard()