f1061106e7966001285d833db1da57cc6d5822ce
1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
, BranchALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
, seed
19 from copy
import deepcopy
22 class CompUnitsBase(Elaboratable
):
23 """ Computation Unit Base class.
25 Amazingly, this class works recursively. It's supposed to just
26 look after some ALUs (that can handle the same operations),
27 grouping them together, however it turns out that the same code
28 can also group *groups* of Computation Units together as well.
30 def __init__(self
, rwid
, units
):
33 * :rwid: bit width of register file(s) - both FP and INT
34 * :units: sequence of ALUs (or CompUnitsBase derivatives)
38 if units
and isinstance(units
[0], CompUnitsBase
):
41 self
.n_units
+= u
.n_units
43 self
.n_units
= len(units
)
45 n_units
= self
.n_units
48 self
.issue_i
= Signal(n_units
, reset_less
=True)
49 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
50 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
51 self
.shadown_i
= Signal(n_units
, reset_less
=True)
52 self
.go_die_i
= Signal(n_units
, reset_less
=True)
55 self
.busy_o
= Signal(n_units
, reset_less
=True)
56 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
57 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
59 # in/out register data (note: not register#, actual data)
60 self
.data_o
= Signal(rwid
, reset_less
=True)
61 self
.src1_i
= Signal(rwid
, reset_less
=True)
62 self
.src2_i
= Signal(rwid
, reset_less
=True)
64 def elaborate(self
, platform
):
68 for i
, alu
in enumerate(self
.units
):
69 print ("elaborate comp%d" % i
, self
, alu
)
70 setattr(m
.submodules
, "comp%d" % i
, alu
)
80 for alu
in self
.units
:
81 req_rel_l
.append(alu
.req_rel_o
)
82 rd_rel_l
.append(alu
.rd_rel_o
)
83 shadow_l
.append(alu
.shadown_i
)
84 godie_l
.append(alu
.go_die_i
)
85 go_wr_l
.append(alu
.go_wr_i
)
86 go_rd_l
.append(alu
.go_rd_i
)
87 issue_l
.append(alu
.issue_i
)
88 busy_l
.append(alu
.busy_o
)
89 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
90 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
91 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
92 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
93 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
94 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
95 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
96 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
98 # connect data register input/output
100 # merge (OR) all integer FU / ALU outputs to a single value
101 # bit of a hack: treereduce needs a list with an item named "data_o"
103 data_o
= treereduce(self
.units
)
104 comb
+= self
.data_o
.eq(data_o
)
106 for i
, alu
in enumerate(self
.units
):
107 comb
+= alu
.src1_i
.eq(self
.src1_i
)
108 comb
+= alu
.src2_i
.eq(self
.src2_i
)
113 class CompUnitALUs(CompUnitsBase
):
115 def __init__(self
, rwid
):
118 * :rwid: bit width of register file(s) - both FP and INT
128 for alu
in [add
, sub
, mul
, shf
]:
129 units
.append(ComputationUnitNoDelay(rwid
, 2, alu
))
131 print ("alu units", units
)
132 CompUnitsBase
.__init
__(self
, rwid
, units
)
133 print ("alu base init done")
135 def elaborate(self
, platform
):
136 print ("alu elaborate start")
137 m
= CompUnitsBase
.elaborate(self
, platform
)
138 print ("alu elaborate done")
141 comb
+= self
.units
[0].oper_i
.eq(Const(0, 2)) # op=add
142 comb
+= self
.units
[1].oper_i
.eq(Const(1, 2)) # op=sub
143 comb
+= self
.units
[2].oper_i
.eq(Const(2, 2)) # op=mul
144 comb
+= self
.units
[3].oper_i
.eq(Const(3, 2)) # op=shf
149 class CompUnitBR(CompUnitsBase
):
151 def __init__(self
, rwid
):
154 * :rwid: bit width of register file(s) - both FP and INT
156 Note: bgt unit is returned so that a shadow unit can be created
162 self
.bgt
= BranchALU(rwid
)
163 self
.br1
= ComputationUnitNoDelay(rwid
, 3, self
.bgt
)
164 print ("br units", [self
.br1
])
165 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
166 print ("br base init done")
168 def elaborate(self
, platform
):
169 print ("br elaborate start")
170 m
= CompUnitsBase
.elaborate(self
, platform
)
171 print ("br elaborate done")
174 comb
+= self
.br1
.oper_i
.eq(Const(4, 3)) # op=bgt
179 class FunctionUnits(Elaboratable
):
181 def __init__(self
, n_regs
, n_int_alus
):
183 self
.n_int_alus
= n_int_alus
185 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
186 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
187 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
189 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
190 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
192 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
193 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
194 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
196 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
197 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
198 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
200 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
201 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
202 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
203 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
204 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
206 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
208 def elaborate(self
, platform
):
213 n_int_fus
= self
.n_int_alus
215 # Integer FU-FU Dep Matrix
216 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
217 m
.submodules
.intfudeps
= intfudeps
218 # Integer FU-Reg Dep Matrix
219 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
220 m
.submodules
.intregdeps
= intregdeps
222 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
223 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
225 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
226 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
228 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
229 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
230 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
232 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
233 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
234 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
235 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
236 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
237 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
239 # Connect function issue / arrays, and dest/src1/src2
240 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
241 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
242 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
244 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
245 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
246 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
247 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
249 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
250 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
251 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
256 class Scoreboard(Elaboratable
):
257 def __init__(self
, rwid
, n_regs
):
260 * :rwid: bit width of register file(s) - both FP and INT
261 * :n_regs: depth of register file(s) - number of FP and INT regs
267 self
.intregs
= RegFileArray(rwid
, n_regs
)
268 self
.fpregs
= RegFileArray(rwid
, n_regs
)
271 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
272 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
273 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
274 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
277 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
278 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
280 # for branch speculation experiment. branch_direction = 0 if
281 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
282 # branch_succ and branch_fail are requests to have the current
283 # instruction be dependent on the branch unit "shadow" capability.
284 self
.branch_succ_i
= Signal(reset_less
=True)
285 self
.branch_fail_i
= Signal(reset_less
=True)
286 self
.branch_direction_o
= Signal(2, reset_less
=True)
288 def elaborate(self
, platform
):
293 m
.submodules
.intregs
= self
.intregs
294 m
.submodules
.fpregs
= self
.fpregs
297 int_dest
= self
.intregs
.write_port("dest")
298 int_src1
= self
.intregs
.read_port("src1")
299 int_src2
= self
.intregs
.read_port("src2")
301 fp_dest
= self
.fpregs
.write_port("dest")
302 fp_src1
= self
.fpregs
.read_port("src1")
303 fp_src2
= self
.fpregs
.read_port("src2")
305 # Int ALUs and Comp Units
307 cua
= CompUnitALUs(self
.rwid
)
308 cub
= CompUnitBR(self
.rwid
)
309 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cub
])
310 bgt
= cub
.bgt
# get at the branch computation unit
314 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
316 # Count of number of FUs
317 n_int_fus
= n_int_alus
318 n_fp_fus
= 0 # for now
320 # Integer Priority Picker 1: Adder + Subtractor
321 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
322 m
.submodules
.intpick1
= intpick1
325 regdecode
= RegDecode(self
.n_regs
)
326 m
.submodules
.regdecode
= regdecode
327 issueunit
= IntFPIssueUnit(n_int_fus
, n_fp_fus
)
328 m
.submodules
.issueunit
= issueunit
330 # Shadow Matrix. currently n_int_fus shadows, to be used for
331 # write-after-write hazards. NOTE: there is one extra for branches,
332 # so the shadow width is increased by 1
333 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
, True)
334 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_int_fus
, 1, False)
336 # record previous instruction to cast shadow on current instruction
337 fn_issue_prev
= Signal(n_int_fus
)
338 prev_shadow
= Signal(n_int_fus
)
340 # Branch Speculation recorder. tracks the success/fail state as
341 # each instruction is issued, so that when the branch occurs the
342 # allow/cancel can be issued as appropriate.
343 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_int_fus
)
346 # ok start wiring things together...
347 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
348 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
352 # Issue Unit is where it starts. set up some in/outs for this module
354 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
355 regdecode
.src1_i
.eq(self
.int_src1_i
),
356 regdecode
.src2_i
.eq(self
.int_src2_i
),
357 regdecode
.enable_i
.eq(self
.reg_enable_i
),
358 self
.issue_o
.eq(issueunit
.issue_o
)
360 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
362 # TODO: issueunit.f (FP)
364 # and int function issue / busy arrays, and dest/src1/src2
365 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
366 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
367 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
369 fn_issue_o
= issueunit
.i
.fn_issue_o
371 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
372 comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
373 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
376 # merge shadow matrices outputs
379 # these are explained in ShadowMatrix docstring, and are to be
380 # connected to the FUReg and FUFU Matrices, to get them to reset
381 anydie
= Signal(n_int_fus
, reset_less
=True)
382 allshadown
= Signal(n_int_fus
, reset_less
=True)
383 shreset
= Signal(n_int_fus
, reset_less
=True)
384 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
385 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
386 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
389 # connect fu-fu matrix
392 # Group Picker... done manually for now.
393 go_rd_o
= intpick1
.go_rd_o
394 go_wr_o
= intpick1
.go_wr_o
395 go_rd_i
= intfus
.go_rd_i
396 go_wr_i
= intfus
.go_wr_i
397 go_die_i
= intfus
.go_die_i
398 # NOTE: connect to the shadowed versions so that they can "die" (reset)
399 comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
400 comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
401 comb
+= go_die_i
[0:n_int_fus
].eq(anydie
[0:n_int_fus
]) # die
405 comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
406 comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
407 int_rd_o
= intfus
.readable_o
408 int_wr_o
= intfus
.writable_o
409 comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
410 comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
416 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
417 #comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
418 comb
+= shadows
.reset_i
[0:n_int_fus
].eq(bshadow
.go_die_o
[0:n_int_fus
])
420 # NOTE; this setup is for the instruction order preservation...
422 # connect shadows / go_dies to Computation Units
423 comb
+= cu
.shadown_i
[0:n_int_fus
].eq(allshadown
)
424 comb
+= cu
.go_die_i
[0:n_int_fus
].eq(anydie
)
426 # ok connect first n_int_fu shadows to busy lines, to create an
427 # instruction-order linked-list-like arrangement, using a bit-matrix
428 # (instead of e.g. a ring buffer).
431 # when written, the shadow can be cancelled (and was good)
432 for i
in range(n_int_fus
):
433 comb
+= shadows
.s_good_i
[i
][0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
435 # work out the current-activated busy unit (by recording the old one)
436 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
437 sync
+= fn_issue_prev
.eq(fn_issue_o
)
439 # *previous* instruction shadows *current* instruction, and, obviously,
440 # if the previous is completed (!busy) don't cast the shadow!
441 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
442 for i
in range(n_int_fus
):
443 comb
+= shadows
.shadow_i
[i
][0:n_int_fus
].eq(prev_shadow
)
446 # ... and this is for branch speculation. it uses the extra bit
447 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
448 # only needs to set shadow_i, s_fail_i and s_good_i
450 # issue captures shadow_i (if enabled)
451 comb
+= bshadow
.reset_i
[0:n_int_fus
].eq(shreset
[0:n_int_fus
])
453 bactive
= Signal(reset_less
=True)
454 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
456 # instruction being issued (fn_issue_o) has a shadow cast by the branch
457 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
458 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
459 for i
in range(n_int_fus
):
460 with m
.If(fn_issue_o
& (Const(1<<i
))):
461 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
463 # finally, we need an indicator to the test infrastructure as to
464 # whether the branch succeeded or failed, plus, link up to the
465 # "recorder" of whether the instruction was under shadow or not
467 with m
.If(br1
.issue_i
):
468 sync
+= bspec
.active_i
.eq(1)
469 with m
.If(self
.branch_succ_i
):
470 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
471 with m
.If(self
.branch_fail_i
):
472 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
474 # branch is active (TODO: a better signal: this is over-using the
475 # go_write signal - actually the branch should not be "writing")
476 with m
.If(br1
.go_wr_i
):
477 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
478 sync
+= bspec
.active_i
.eq(0)
479 comb
+= bspec
.br_i
.eq(1)
480 # branch occurs if data == 1, failed if data == 0
481 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
482 for i
in range(n_int_fus
):
483 # *expected* direction of the branch matched against *actual*
484 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
486 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
489 # Connect Register File(s)
491 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
492 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
493 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
494 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
496 # connect ALUs to regfule
497 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
498 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
499 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
501 # connect ALU Computation Units
502 comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
503 comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
504 comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
510 yield from self
.intregs
511 yield from self
.fpregs
512 yield self
.int_dest_i
513 yield self
.int_src1_i
514 yield self
.int_src2_i
516 yield self
.branch_succ_i
517 yield self
.branch_fail_i
518 yield self
.branch_direction_o
533 def __init__(self
, rwidth
, nregs
):
535 self
.regs
= [0] * nregs
537 def op(self
, op
, src1
, src2
, dest
):
538 maxbits
= (1 << self
.rwidth
) - 1
539 src1
= self
.regs
[src1
] & maxbits
540 src2
= self
.regs
[src2
] & maxbits
548 val
= src1
>> (src2
& maxbits
)
550 val
= int(src1
> src2
)
552 val
= int(src1
< src2
)
554 val
= int(src1
== src2
)
556 val
= int(src1
!= src2
)
558 self
.setval(dest
, val
)
561 def setval(self
, dest
, val
):
562 print ("sim setval", dest
, hex(val
))
563 self
.regs
[dest
] = val
566 for i
, val
in enumerate(self
.regs
):
567 reg
= yield dut
.intregs
.regs
[i
].reg
568 okstr
= "OK" if reg
== val
else "!ok"
569 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
571 def check(self
, dut
):
572 for i
, val
in enumerate(self
.regs
):
573 reg
= yield dut
.intregs
.regs
[i
].reg
575 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
576 yield from self
.dump(dut
)
579 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
580 for i
in range(len(dut
.int_insn_i
)):
581 yield dut
.int_insn_i
[i
].eq(0)
582 yield dut
.int_dest_i
.eq(dest
)
583 yield dut
.int_src1_i
.eq(src1
)
584 yield dut
.int_src2_i
.eq(src2
)
585 yield dut
.int_insn_i
[op
].eq(1)
586 yield dut
.reg_enable_i
.eq(1)
588 # these indicate that the instruction is to be made shadow-dependent on
589 # (either) branch success or branch fail
590 yield dut
.branch_fail_i
.eq(branch_fail
)
591 yield dut
.branch_succ_i
.eq(branch_success
)
594 def print_reg(dut
, rnums
):
597 reg
= yield dut
.intregs
.regs
[rnum
].reg
598 rs
.append("%x" % reg
)
599 rnums
= map(str, rnums
)
600 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
603 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
605 for i
in range(n_ops
):
606 src1
= randint(1, dut
.n_regs
-1)
607 src2
= randint(1, dut
.n_regs
-1)
608 dest
= randint(1, dut
.n_regs
-1)
609 op
= randint(0, max_opnums
)
612 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
614 insts
.append((src1
, src2
, dest
, op
))
618 def wait_for_busy_clear(dut
):
620 busy_o
= yield dut
.busy_o
627 def wait_for_issue(dut
):
629 issue_o
= yield dut
.issue_o
631 for i
in range(len(dut
.int_insn_i
)):
632 yield dut
.int_insn_i
[i
].eq(0)
633 yield dut
.reg_enable_i
.eq(0)
636 #yield from print_reg(dut, [1,2,3])
638 #yield from print_reg(dut, [1,2,3])
640 def scoreboard_branch_sim(dut
, alusim
):
646 print ("rseed", iseed
)
650 yield dut
.branch_direction_o
.eq(0)
652 # set random values in the registers
653 for i
in range(1, dut
.n_regs
):
655 val
= randint(0, (1<<alusim
.rwidth
)-1)
656 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
657 alusim
.setval(i
, val
)
660 # create some instructions: branches create a tree
661 insts
= create_random_ops(dut
, 1, True, 1)
662 #insts.append((6, 6, 1, 2, (0, 0)))
663 #insts.append((4, 3, 3, 0, (0, 0)))
665 src1
= randint(1, dut
.n_regs
-1)
666 src2
= randint(1, dut
.n_regs
-1)
668 op
= 4 # only BGT at the moment
670 branch_ok
= create_random_ops(dut
, 1, True, 1)
671 branch_fail
= create_random_ops(dut
, 1, True, 1)
673 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
677 #insts.append( (3, 5, 2, 0, (0, 0)) )
680 branch_ok
.append ( (5, 7, 5, 1, (1, 0)) )
681 #branch_ok.append( None )
682 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
683 #branch_fail.append( None )
684 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
686 siminsts
= deepcopy(insts
)
688 # issue instruction(s)
696 branch_direction
= yield dut
.branch_direction_o
# way branch went
697 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
698 if branch_direction
== 1 and shadow_on
:
699 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
700 continue # branch was "success" and this is a "failed"... skip
701 if branch_direction
== 2 and shadow_off
:
702 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
703 continue # branch was "fail" and this is a "success"... skip
704 if branch_direction
!= 0:
709 branch_ok
, branch_fail
= dest
711 # ok zip up the branch success / fail instructions and
712 # drop them into the queue, one marked "to have branch success"
713 # the other to be marked shadow branch "fail".
714 # one out of each of these will be cancelled
715 for ok
, fl
in zip(branch_ok
, branch_fail
):
717 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
719 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
720 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
721 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
722 yield from int_instr(dut
, op
, src1
, src2
, dest
,
723 shadow_on
, shadow_off
)
725 yield from wait_for_issue(dut
)
727 # wait for all instructions to stop before checking
729 yield from wait_for_busy_clear(dut
)
733 instr
= siminsts
.pop(0)
736 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
740 branch_ok
, branch_fail
= dest
742 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
743 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
744 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
747 siminsts
+= branch_ok
749 siminsts
+= branch_fail
752 yield from alusim
.check(dut
)
753 yield from alusim
.dump(dut
)
756 def scoreboard_sim(dut
, alusim
):
762 # set random values in the registers
763 for i
in range(1, dut
.n_regs
):
765 val
= randint(0, (1<<alusim
.rwidth
)-1)
766 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
767 alusim
.setval(i
, val
)
769 # create some instructions (some random, some regression tests)
772 instrs
= create_random_ops(dut
, 10, True, 4)
775 instrs
.append((2, 3, 3, 0))
776 instrs
.append((5, 3, 3, 1))
779 instrs
.append((5, 6, 2, 1))
780 instrs
.append((2, 2, 4, 0))
781 #instrs.append((2, 2, 3, 1))
784 instrs
.append((2, 1, 2, 3))
787 instrs
.append((2, 6, 2, 1))
788 instrs
.append((2, 1, 2, 0))
791 instrs
.append((1, 2, 7, 2))
792 instrs
.append((7, 1, 5, 0))
793 instrs
.append((4, 4, 1, 1))
796 instrs
.append((5, 6, 2, 2))
797 instrs
.append((1, 1, 4, 1))
798 instrs
.append((6, 5, 3, 0))
801 # Write-after-Write Hazard
802 instrs
.append( (3, 6, 7, 2) )
803 instrs
.append( (4, 4, 7, 1) )
806 # self-read/write-after-write followed by Read-after-Write
807 instrs
.append((1, 1, 1, 1))
808 instrs
.append((1, 5, 3, 0))
811 # Read-after-Write followed by self-read-after-write
812 instrs
.append((5, 6, 1, 2))
813 instrs
.append((1, 1, 1, 1))
816 # self-read-write sandwich
817 instrs
.append((5, 6, 1, 2))
818 instrs
.append((1, 1, 1, 1))
819 instrs
.append((1, 5, 3, 0))
823 instrs
.append( (5, 2, 5, 2) )
824 instrs
.append( (2, 6, 3, 0) )
825 instrs
.append( (4, 2, 2, 1) )
829 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
831 yield dut
.intregs
.regs
[3].reg
.eq(5)
833 instrs
.append((5, 3, 3, 4, (0, 0)))
834 instrs
.append((4, 2, 1, 2, (0, 1)))
838 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
840 yield dut
.intregs
.regs
[3].reg
.eq(5)
842 instrs
.append((5, 3, 3, 4, (0, 0)))
843 instrs
.append((4, 2, 1, 2, (1, 0)))
846 instrs
.append( (4, 3, 5, 1, (0, 0)) )
847 instrs
.append( (5, 2, 3, 1, (0, 0)) )
848 instrs
.append( (7, 1, 5, 2, (0, 0)) )
849 instrs
.append( (5, 6, 6, 4, (0, 0)) )
850 instrs
.append( (7, 5, 2, 2, (1, 0)) )
851 instrs
.append( (1, 7, 5, 0, (0, 1)) )
852 instrs
.append( (1, 6, 1, 2, (1, 0)) )
853 instrs
.append( (1, 6, 7, 3, (0, 0)) )
854 instrs
.append( (6, 7, 7, 0, (0, 0)) )
856 # issue instruction(s), wait for issue to be free before proceeding
857 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
859 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
860 alusim
.op(op
, src1
, src2
, dest
)
861 yield from int_instr(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
863 yield from wait_for_issue(dut
)
865 # wait for all instructions to stop before checking
867 yield from wait_for_busy_clear(dut
)
870 yield from alusim
.check(dut
)
871 yield from alusim
.dump(dut
)
874 def test_scoreboard():
875 dut
= Scoreboard(16, 8)
876 alusim
= RegSim(16, 8)
877 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
878 with
open("test_scoreboard6600.il", "w") as f
:
881 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
882 vcd_name
='test_scoreboard6600.vcd')
884 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
885 # vcd_name='test_scoreboard6600.vcd')
888 if __name__
== '__main__':