c7a36b26cf4e5e4e32771e7d1114bc0750cafe96
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12
13 from compalu import ComputationUnitNoDelay
14
15 from alu_hier import ALU, BranchALU
16 from nmutil.latch import SRLatch
17
18 from random import randint, seed
19
20
21 class CompUnits(Elaboratable):
22
23 def __init__(self, rwid, n_units):
24 """ Inputs:
25
26 * :rwid: bit width of register file(s) - both FP and INT
27 * :n_units: number of ALUs
28
29 Note: bgt unit is returned so that a shadow unit can be created
30 for it
31
32 """
33 self.n_units = n_units
34 self.rwid = rwid
35
36 # inputs
37 self.issue_i = Signal(n_units, reset_less=True)
38 self.go_rd_i = Signal(n_units, reset_less=True)
39 self.go_wr_i = Signal(n_units, reset_less=True)
40 self.shadown_i = Signal(n_units, reset_less=True)
41 self.go_die_i = Signal(n_units, reset_less=True)
42
43 # outputs
44 self.busy_o = Signal(n_units, reset_less=True)
45 self.rd_rel_o = Signal(n_units, reset_less=True)
46 self.req_rel_o = Signal(n_units, reset_less=True)
47
48 # in/out register data (note: not register#, actual data)
49 self.dest_o = Signal(rwid, reset_less=True)
50 self.src1_data_i = Signal(rwid, reset_less=True)
51 self.src2_data_i = Signal(rwid, reset_less=True)
52
53 # Branch ALU and CU
54 self.bgt = BranchALU(self.rwid)
55 self.br1 = ComputationUnitNoDelay(self.rwid, 2, self.bgt)
56
57 def elaborate(self, platform):
58 m = Module()
59 comb = m.d.comb
60 sync = m.d.sync
61
62 # Int ALUs
63 add = ALU(self.rwid)
64 sub = ALU(self.rwid)
65 mul = ALU(self.rwid)
66 shf = ALU(self.rwid)
67 bgt = self.bgt
68
69 m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 2, add)
70 m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 2, sub)
71 m.submodules.comp3 = comp3 = ComputationUnitNoDelay(self.rwid, 2, mul)
72 m.submodules.comp4 = comp4 = ComputationUnitNoDelay(self.rwid, 2, shf)
73 m.submodules.br1 = br1 = self.br1
74 int_alus = [comp1, comp2, comp3, comp4, br1]
75
76 comb += comp1.oper_i.eq(Const(0, 2)) # op=add
77 comb += comp2.oper_i.eq(Const(1, 2)) # op=sub
78 comb += comp3.oper_i.eq(Const(2, 2)) # op=mul
79 comb += comp4.oper_i.eq(Const(3, 2)) # op=shf
80 comb += br1.oper_i.eq(Const(0, 2)) # op=bgt
81
82 go_rd_l = []
83 go_wr_l = []
84 issue_l = []
85 busy_l = []
86 req_rel_l = []
87 rd_rel_l = []
88 shadow_l = []
89 godie_l = []
90 for alu in int_alus:
91 req_rel_l.append(alu.req_rel_o)
92 rd_rel_l.append(alu.rd_rel_o)
93 shadow_l.append(alu.shadown_i)
94 godie_l.append(alu.go_die_i)
95 go_wr_l.append(alu.go_wr_i)
96 go_rd_l.append(alu.go_rd_i)
97 issue_l.append(alu.issue_i)
98 busy_l.append(alu.busy_o)
99 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
100 comb += self.req_rel_o.eq(Cat(*req_rel_l))
101 comb += self.busy_o.eq(Cat(*busy_l))
102 comb += Cat(*godie_l).eq(self.go_die_i)
103 comb += Cat(*shadow_l).eq(self.shadown_i)
104 comb += Cat(*go_wr_l).eq(self.go_wr_i)
105 comb += Cat(*go_rd_l).eq(self.go_rd_i)
106 comb += Cat(*issue_l).eq(self.issue_i)
107
108 # connect data register input/output
109
110 # merge (OR) all integer FU / ALU outputs to a single value
111 # bit of a hack: treereduce needs a list with an item named "dest_o"
112 dest_o = treereduce(int_alus)
113 comb += self.dest_o.eq(dest_o)
114
115 for i, alu in enumerate(int_alus):
116 comb += alu.src1_i.eq(self.src1_data_i)
117 comb += alu.src2_i.eq(self.src2_data_i)
118
119 return m
120
121
122 class FunctionUnits(Elaboratable):
123
124 def __init__(self, n_regs, n_int_alus):
125 self.n_regs = n_regs
126 self.n_int_alus = n_int_alus
127
128 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
129 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
130 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
131
132 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
133 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
134
135 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
136 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
137 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
138
139 self.req_rel_i = Signal(n_int_alus, reset_less = True)
140 self.readable_o = Signal(n_int_alus, reset_less=True)
141 self.writable_o = Signal(n_int_alus, reset_less=True)
142
143 self.go_rd_i = Signal(n_int_alus, reset_less=True)
144 self.go_wr_i = Signal(n_int_alus, reset_less=True)
145 self.req_rel_o = Signal(n_int_alus, reset_less=True)
146 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
147
148 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
149
150 def elaborate(self, platform):
151 m = Module()
152 comb = m.d.comb
153 sync = m.d.sync
154
155 n_int_fus = self.n_int_alus
156
157 # Integer FU-FU Dep Matrix
158 intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
159 m.submodules.intfudeps = intfudeps
160 # Integer FU-Reg Dep Matrix
161 intregdeps = FURegDepMatrix(n_int_fus, self.n_regs)
162 m.submodules.intregdeps = intregdeps
163
164 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
165 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
166
167 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
168 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
169
170 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
171 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
172 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
173
174 comb += intfudeps.issue_i.eq(self.fn_issue_i)
175 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
176 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
177 comb += self.readable_o.eq(intfudeps.readable_o)
178 comb += self.writable_o.eq(intfudeps.writable_o)
179
180 # Connect function issue / arrays, and dest/src1/src2
181 comb += intregdeps.dest_i.eq(self.dest_i)
182 comb += intregdeps.src1_i.eq(self.src1_i)
183 comb += intregdeps.src2_i.eq(self.src2_i)
184
185 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
186 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
187 comb += intregdeps.issue_i.eq(self.fn_issue_i)
188
189 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
190 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
191 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
192
193 return m
194
195
196 class Scoreboard(Elaboratable):
197 def __init__(self, rwid, n_regs):
198 """ Inputs:
199
200 * :rwid: bit width of register file(s) - both FP and INT
201 * :n_regs: depth of register file(s) - number of FP and INT regs
202 """
203 self.rwid = rwid
204 self.n_regs = n_regs
205
206 # Register Files
207 self.intregs = RegFileArray(rwid, n_regs)
208 self.fpregs = RegFileArray(rwid, n_regs)
209
210 # inputs
211 self.int_store_i = Signal(reset_less=True) # instruction is a store
212 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
213 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
214 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
215 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
216
217 # outputs
218 self.issue_o = Signal(reset_less=True) # instruction was accepted
219 self.busy_o = Signal(reset_less=True) # at least one CU is busy
220
221 # for branch speculation experiment. branch_direction = 0 if
222 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
223 # branch_succ and branch_fail are requests to have the current
224 # instruction be dependent on the branch unit "shadow" capability.
225 self.branch_succ_i = Signal(reset_less=True)
226 self.branch_fail_i = Signal(reset_less=True)
227 self.branch_direction_o = Signal(2, reset_less=True)
228
229 def elaborate(self, platform):
230 m = Module()
231 comb = m.d.comb
232 sync = m.d.sync
233
234 m.submodules.intregs = self.intregs
235 m.submodules.fpregs = self.fpregs
236
237 # register ports
238 int_dest = self.intregs.write_port("dest")
239 int_src1 = self.intregs.read_port("src1")
240 int_src2 = self.intregs.read_port("src2")
241
242 fp_dest = self.fpregs.write_port("dest")
243 fp_src1 = self.fpregs.read_port("src1")
244 fp_src2 = self.fpregs.read_port("src2")
245
246 # Int ALUs and Comp Units
247 n_int_alus = 5
248 m.submodules.cu = cu = CompUnits(self.rwid, n_int_alus)
249 comb += cu.go_die_i.eq(0)
250 bgt = cu.bgt # get at the branch computation unit
251
252 # Int FUs
253 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
254
255 # Count of number of FUs
256 n_int_fus = n_int_alus
257 n_fp_fus = 0 # for now
258
259 # Integer Priority Picker 1: Adder + Subtractor
260 intpick1 = GroupPicker(n_int_fus) # picks between add, sub, mul and shf
261 m.submodules.intpick1 = intpick1
262
263 # INT/FP Issue Unit
264 regdecode = RegDecode(self.n_regs)
265 m.submodules.regdecode = regdecode
266 issueunit = IntFPIssueUnit(self.n_regs, n_int_fus, n_fp_fus)
267 m.submodules.issueunit = issueunit
268
269 # Shadow Matrix. currently n_int_fus shadows, to be used for
270 # write-after-write hazards. NOTE: there is one extra for branches,
271 # so the shadow width is increased by 1
272 m.submodules.shadows = shadows = ShadowMatrix(n_int_fus, n_int_fus)
273 m.submodules.bshadow = bshadow = ShadowMatrix(n_int_fus, 1)
274
275 # combined go_rd/wr + go_die (go_die used to reset latches)
276 go_rd_rst = Signal(n_int_fus, reset_less=True)
277 go_wr_rst = Signal(n_int_fus, reset_less=True)
278 # record previous instruction to cast shadow on current instruction
279 fn_issue_prev = Signal(n_int_fus)
280 prev_shadow = Signal(n_int_fus)
281
282 # Branch Speculation recorder. tracks the success/fail state as
283 # each instruction is issued, so that when the branch occurs the
284 # allow/cancel can be issued as appropriate.
285 m.submodules.specrec = bspec = BranchSpeculationRecord(n_int_fus)
286
287 #---------
288 # ok start wiring things together...
289 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
290 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
291 #---------
292
293 #---------
294 # Issue Unit is where it starts. set up some in/outs for this module
295 #---------
296 comb += [issueunit.i.store_i.eq(self.int_store_i),
297 regdecode.dest_i.eq(self.int_dest_i),
298 regdecode.src1_i.eq(self.int_src1_i),
299 regdecode.src2_i.eq(self.int_src2_i),
300 regdecode.enable_i.eq(self.reg_enable_i),
301 issueunit.i.dest_i.eq(regdecode.dest_o),
302 self.issue_o.eq(issueunit.issue_o)
303 ]
304 self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
305
306 # connect global rd/wr pending vector (for WaW detection)
307 sync += issueunit.i.g_wr_pend_i.eq(intfus.g_int_wr_pend_o)
308 # TODO: issueunit.f (FP)
309
310 # and int function issue / busy arrays, and dest/src1/src2
311 comb += intfus.dest_i.eq(regdecode.dest_o)
312 comb += intfus.src1_i.eq(regdecode.src1_o)
313 comb += intfus.src2_i.eq(regdecode.src2_o)
314
315 fn_issue_o = issueunit.i.fn_issue_o
316
317 comb += intfus.fn_issue_i.eq(fn_issue_o)
318 comb += issueunit.i.busy_i.eq(cu.busy_o)
319 comb += self.busy_o.eq(cu.busy_o.bool())
320
321 #---------
322 # connect fu-fu matrix
323 #---------
324
325 # Group Picker... done manually for now.
326 go_rd_o = intpick1.go_rd_o
327 go_wr_o = intpick1.go_wr_o
328 go_rd_i = intfus.go_rd_i
329 go_wr_i = intfus.go_wr_i
330 # NOTE: connect to the shadowed versions so that they can "die" (reset)
331 comb += go_rd_i[0:n_int_fus].eq(go_rd_rst[0:n_int_fus]) # rd
332 comb += go_wr_i[0:n_int_fus].eq(go_wr_rst[0:n_int_fus]) # wr
333
334 # Connect Picker
335 #---------
336 comb += intpick1.rd_rel_i[0:n_int_fus].eq(cu.rd_rel_o[0:n_int_fus])
337 comb += intpick1.req_rel_i[0:n_int_fus].eq(cu.req_rel_o[0:n_int_fus])
338 int_rd_o = intfus.readable_o
339 int_wr_o = intfus.writable_o
340 comb += intpick1.readable_i[0:n_int_fus].eq(int_rd_o[0:n_int_fus])
341 comb += intpick1.writable_i[0:n_int_fus].eq(int_wr_o[0:n_int_fus])
342
343 #---------
344 # Shadow Matrix
345 #---------
346
347 comb += shadows.issue_i.eq(fn_issue_o)
348 # these are explained in ShadowMatrix docstring, and are to be
349 # connected to the FUReg and FUFU Matrices, to get them to reset
350 # NOTE: do NOT connect these to the Computation Units. The CUs need to
351 # do something slightly different (due to the revolving-door SRLatches)
352 anydie = Signal(n_int_fus, reset_less=True)
353 allshadown = Signal(n_int_fus, reset_less=True)
354 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
355 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
356 comb += go_rd_rst.eq(go_rd_o | anydie)
357 comb += go_wr_rst.eq(go_wr_o | anydie)
358
359 #---------
360 # NOTE; this setup is for the instruction order preservation...
361
362 # connect shadows / go_dies to Computation Units
363 comb += cu.shadown_i[0:n_int_fus].eq(allshadown)
364 comb += cu.go_die_i[0:n_int_fus].eq(anydie)
365
366 # ok connect first n_int_fu shadows to busy lines, to create an
367 # instruction-order linked-list-like arrangement, using a bit-matrix
368 # (instead of e.g. a ring buffer).
369 # XXX TODO
370
371 # when written, the shadow can be cancelled (and was good)
372 for i in range(n_int_fus):
373 comb += shadows.s_good_i[i][0:n_int_fus].eq(go_wr_o[0:n_int_fus])
374
375 # work out the current-activated busy unit (by recording the old one)
376 with m.If(fn_issue_o): # only update prev bit if instruction issued
377 sync += fn_issue_prev.eq(fn_issue_o)
378
379 # *previous* instruction shadows *current* instruction, and, obviously,
380 # if the previous is completed (!busy) don't cast the shadow!
381 comb += prev_shadow.eq(~fn_issue_o & fn_issue_prev & cu.busy_o)
382 for i in range(n_int_fus):
383 comb += shadows.shadow_i[i][0:n_int_fus].eq(prev_shadow)
384
385 #---------
386 # ... and this is for branch speculation. it uses the extra bit
387 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
388 # only needs to set shadow_i, s_fail_i and s_good_i
389
390 # issue captures shadow_i (if enabled)
391 comb += bshadow.issue_i.eq(fn_issue_o)
392
393 # instruction being issued (fn_issue_o) has a shadow cast by the branch
394 with m.If(self.branch_succ_i | self.branch_fail_i):
395 comb += bshadow.shadow_i[fn_issue_o][0].eq(1)
396
397 # finally, we need an indicator to the test infrastructure as to
398 # whether the branch succeeded or failed, plus, link up to the
399 # "recorder" of whether the instruction was under shadow or not
400
401 with m.If(cu.br1.issue_i):
402 sync += bspec.active_i.eq(1)
403 with m.If(self.branch_succ_i):
404 comb += bspec.good_i.eq(fn_issue_o & 0xf)
405 with m.If(self.branch_fail_i):
406 comb += bspec.fail_i.eq(fn_issue_o & 0xf)
407
408 # branch is active (TODO: a better signal: this is over-using the
409 # go_write signal - actually the branch should not be "writing")
410 with m.If(cu.br1.go_wr_i):
411 sync += self.branch_direction_o.eq(cu.br1.data_o+Const(1, 2))
412 sync += bspec.active_i.eq(0)
413 comb += bspec.br_i.eq(1)
414 # branch occurs if data == 1, failed if data == 0
415 comb += bspec.br_ok_i.eq(cu.br1.data_o == 1)
416 for i in range(n_int_fus):
417 # *expected* direction of the branch matched against *actual*
418 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
419 # ... or it didn't
420 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
421
422 #---------
423 # Connect Register File(s)
424 #---------
425 print ("intregdeps wen len", len(intfus.dest_rsel_o))
426 comb += int_dest.wen.eq(intfus.dest_rsel_o)
427 comb += int_src1.ren.eq(intfus.src1_rsel_o)
428 comb += int_src2.ren.eq(intfus.src2_rsel_o)
429
430 # connect ALUs to regfule
431 comb += int_dest.data_i.eq(cu.dest_o)
432 comb += cu.src1_data_i.eq(int_src1.data_o)
433 comb += cu.src2_data_i.eq(int_src2.data_o)
434
435 # connect ALU Computation Units
436 comb += cu.go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus])
437 comb += cu.go_wr_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus])
438 comb += cu.issue_i[0:n_int_fus].eq(fn_issue_o[0:n_int_fus])
439
440 return m
441
442
443 def __iter__(self):
444 yield from self.intregs
445 yield from self.fpregs
446 yield self.int_store_i
447 yield self.int_dest_i
448 yield self.int_src1_i
449 yield self.int_src2_i
450 yield self.issue_o
451 yield self.branch_succ_i
452 yield self.branch_fail_i
453 yield self.branch_direction_o
454
455 def ports(self):
456 return list(self)
457
458 IADD = 0
459 ISUB = 1
460 IMUL = 2
461 ISHF = 3
462 IBGT = 4
463 IBLT = 5
464 IBEQ = 6
465 IBNE = 7
466
467 class RegSim:
468 def __init__(self, rwidth, nregs):
469 self.rwidth = rwidth
470 self.regs = [0] * nregs
471
472 def op(self, op, src1, src2, dest):
473 maxbits = (1 << self.rwidth) - 1
474 src1 = self.regs[src1] & maxbits
475 src2 = self.regs[src2] & maxbits
476 if op == IADD:
477 val = src1 + src2
478 elif op == ISUB:
479 val = src1 - src2
480 elif op == IMUL:
481 val = src1 * src2
482 elif op == ISHF:
483 val = src1 >> (src2 & maxbits)
484 elif op == IBGT:
485 val = int(src1 > src2)
486 elif op == IBLT:
487 val = int(src1 < src2)
488 elif op == IBEQ:
489 val = int(src1 == src2)
490 elif op == IBNE:
491 val = int(src1 != src2)
492 val &= maxbits
493 self.regs[dest] = val
494
495 def setval(self, dest, val):
496 self.regs[dest] = val
497
498 def dump(self, dut):
499 for i, val in enumerate(self.regs):
500 reg = yield dut.intregs.regs[i].reg
501 okstr = "OK" if reg == val else "!ok"
502 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
503
504 def check(self, dut):
505 for i, val in enumerate(self.regs):
506 reg = yield dut.intregs.regs[i].reg
507 if reg != val:
508 print("reg %d expected %x received %x\n" % (i, val, reg))
509 yield from self.dump(dut)
510 assert False
511
512 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
513 for i in range(len(dut.int_insn_i)):
514 yield dut.int_insn_i[i].eq(0)
515 yield dut.int_dest_i.eq(dest)
516 yield dut.int_src1_i.eq(src1)
517 yield dut.int_src2_i.eq(src2)
518 yield dut.int_insn_i[op].eq(1)
519 yield dut.reg_enable_i.eq(1)
520
521 # these indicate that the instruction is to be made shadow-dependent on
522 # (either) branch success or branch fail
523 yield dut.branch_fail_i.eq(branch_fail)
524 yield dut.branch_succ_i.eq(branch_success)
525
526
527 def print_reg(dut, rnums):
528 rs = []
529 for rnum in rnums:
530 reg = yield dut.intregs.regs[rnum].reg
531 rs.append("%x" % reg)
532 rnums = map(str, rnums)
533 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
534
535
536 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
537 insts = []
538 for i in range(n_ops):
539 src1 = randint(1, dut.n_regs-1)
540 src2 = randint(1, dut.n_regs-1)
541 dest = randint(1, dut.n_regs-1)
542 op = randint(0, max_opnums)
543
544 if shadowing:
545 insts.append((src1, src2, dest, op, (0, 0)))
546 else:
547 insts.append((src1, src2, dest, op))
548 return insts
549
550
551 def wait_for_busy_clear(dut):
552 while True:
553 busy_o = yield dut.busy_o
554 if not busy_o:
555 break
556 print ("busy",)
557 yield
558
559
560 def wait_for_issue(dut):
561 while True:
562 issue_o = yield dut.issue_o
563 if issue_o:
564 for i in range(len(dut.int_insn_i)):
565 yield dut.int_insn_i[i].eq(0)
566 yield dut.reg_enable_i.eq(0)
567 break
568 #print ("busy",)
569 #yield from print_reg(dut, [1,2,3])
570 yield
571 #yield from print_reg(dut, [1,2,3])
572
573 def scoreboard_branch_sim(dut, alusim):
574
575 seed(0)
576
577 yield dut.int_store_i.eq(1)
578
579 for i in range(2):
580
581 # set random values in the registers
582 for i in range(1, dut.n_regs):
583 val = 31+i*3
584 val = randint(0, (1<<alusim.rwidth)-1)
585 yield dut.intregs.regs[i].reg.eq(val)
586 alusim.setval(i, val)
587
588 # create some instructions: branches create a tree
589 insts = create_random_ops(dut, 1, True)
590
591 src1 = randint(1, dut.n_regs-1)
592 src2 = randint(1, dut.n_regs-1)
593 #op = randint(4, 7)
594 op = 4 # only BGT at the moment
595
596 branch_ok = create_random_ops(dut, 1, True)
597 branch_fail = create_random_ops(dut, 1, True)
598
599 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
600
601 # issue instruction(s)
602 i = -1
603 instrs = insts
604 branch_direction = 0
605 while instrs:
606 i += 1
607 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop()
608 if branch_direction == 1 and shadow_off:
609 continue # branch was "success" and this is a "failed"... skip
610 if branch_direction == 2 and shadow_on:
611 continue # branch was "fail" and this is a "success"... skip
612 is_branch = op >= 4
613 if is_branch:
614 branch_ok, branch_fail = dest
615 dest = -1
616 # ok zip up the branch success / fail instructions and
617 # drop them into the queue, one marked "to have branch success"
618 # the other to be marked shadow branch "fail".
619 # one out of each of these will be cancelled
620 for ok, fl in zip(branch_ok, branch_fail):
621 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
622 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
623 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
624 yield from int_instr(dut, op, src1, src2, dest,
625 shadow_on, shadow_off)
626 yield
627 yield from wait_for_issue(dut)
628 branch_direction = yield dut.branch_direction_o # way branch went
629
630 # wait for all instructions to stop before checking
631 yield
632 yield from wait_for_busy_clear(dut)
633
634 for (src1, src2, dest, op, (shadow_on, shadow_off)) in insts:
635 is_branch = op >= 4
636 if is_branch:
637 branch_ok, branch_fail = dest
638 dest = None
639 branch_res = alusim.op(op, src1, src2, dest)
640 if is_branch:
641 if branch_res:
642 insts.append(branch_ok)
643 else:
644 insts.append(branch_fail)
645
646 # check status
647 yield from alusim.check(dut)
648 yield from alusim.dump(dut)
649
650
651 def scoreboard_sim(dut, alusim):
652
653 yield dut.int_store_i.eq(1)
654
655 for i in range(1):
656
657 # set random values in the registers
658 for i in range(1, dut.n_regs):
659 val = 31+i*3
660 val = randint(0, (1<<alusim.rwidth)-1)
661 yield dut.intregs.regs[i].reg.eq(val)
662 alusim.setval(i, val)
663
664 # create some instructions (some random, some regression tests)
665 instrs = []
666 if True:
667 instrs = create_random_ops(dut, 10, True, 4)
668
669 if False:
670 instrs.append((2, 3, 3, 0))
671 instrs.append((5, 3, 3, 1))
672
673 if False:
674 instrs.append((5, 6, 2, 1))
675 instrs.append((2, 2, 4, 0))
676 #instrs.append((2, 2, 3, 1))
677
678 if False:
679 instrs.append((2, 1, 2, 3))
680
681 if False:
682 instrs.append((2, 6, 2, 1))
683 instrs.append((2, 1, 2, 0))
684
685 if False:
686 instrs.append((1, 2, 7, 2))
687 instrs.append((7, 1, 5, 0))
688 instrs.append((4, 4, 1, 1))
689
690 if False:
691 instrs.append((5, 6, 2, 2))
692 instrs.append((1, 1, 4, 1))
693 instrs.append((6, 5, 3, 0))
694
695 if False:
696 # Write-after-Write Hazard
697 instrs.append( (3, 6, 7, 2) )
698 instrs.append( (4, 4, 7, 1) )
699
700 if False:
701 # self-read/write-after-write followed by Read-after-Write
702 instrs.append((1, 1, 1, 1))
703 instrs.append((1, 5, 3, 0))
704
705 if False:
706 # Read-after-Write followed by self-read-after-write
707 instrs.append((5, 6, 1, 2))
708 instrs.append((1, 1, 1, 1))
709
710 if False:
711 # self-read-write sandwich
712 instrs.append((5, 6, 1, 2))
713 instrs.append((1, 1, 1, 1))
714 instrs.append((1, 5, 3, 0))
715
716 if False:
717 # very weird failure
718 instrs.append( (5, 2, 5, 2) )
719 instrs.append( (2, 6, 3, 0) )
720 instrs.append( (4, 2, 2, 1) )
721
722 if False:
723 v1 = 4
724 yield dut.intregs.regs[5].reg.eq(v1)
725 alusim.setval(5, v1)
726 yield dut.intregs.regs[3].reg.eq(5)
727 alusim.setval(3, 5)
728 instrs.append((5, 3, 3, 4, (0, 0)))
729 instrs.append((4, 2, 1, 2, (0, 1)))
730
731 if False:
732 v1 = 6
733 yield dut.intregs.regs[5].reg.eq(v1)
734 alusim.setval(5, v1)
735 yield dut.intregs.regs[3].reg.eq(5)
736 alusim.setval(3, 5)
737 instrs.append((5, 3, 3, 4, (0, 0)))
738 instrs.append((4, 2, 1, 2, (1, 0)))
739
740 # issue instruction(s), wait for issue to be free before proceeding
741 for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
742
743 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
744 alusim.op(op, src1, src2, dest)
745 yield from int_instr(dut, op, src1, src2, dest, br_ok, br_fail)
746 yield
747 yield from wait_for_issue(dut)
748
749 # wait for all instructions to stop before checking
750 yield
751 yield from wait_for_busy_clear(dut)
752
753 # check status
754 yield from alusim.check(dut)
755 yield from alusim.dump(dut)
756
757
758 def test_scoreboard():
759 dut = Scoreboard(16, 8)
760 alusim = RegSim(16, 8)
761 vl = rtlil.convert(dut, ports=dut.ports())
762 with open("test_scoreboard6600.il", "w") as f:
763 f.write(vl)
764
765 #run_simulation(dut, scoreboard_sim(dut, alusim),
766 # vcd_name='test_scoreboard6600.vcd')
767
768 run_simulation(dut, scoreboard_branch_sim(dut, alusim),
769 vcd_name='test_scoreboard6600.vcd')
770
771
772 if __name__ == '__main__':
773 test_scoreboard()