1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
, BranchALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
, seed
19 from copy
import deepcopy
22 class CompUnitsBase(Elaboratable
):
23 """ Computation Unit Base class.
25 Amazingly, this class works recursively. It's supposed to just
26 look after some ALUs (that can handle the same operations),
27 grouping them together, however it turns out that the same code
28 can also group *groups* of Computation Units together as well.
30 Basically it was intended just to concatenate the ALU's issue,
31 go_rd etc. signals together, which start out as bits and become
32 sequences. Turns out that the same trick works just as well
35 So this class may be used recursively to present a top-level
36 sequential concatenation of all the signals in and out of
37 ALUs, whilst at the same time making it convenient to group
40 At the lower level, the intent is that groups of (identical)
41 ALUs may be passed the same operation. Even beyond that,
42 the intent is that that group of (identical) ALUs actually
43 share the *same pipeline* and as such become a "Concurrent
44 Computation Unit" as defined by Mitch Alsup (see section
47 def __init__(self
, rwid
, units
):
50 * :rwid: bit width of register file(s) - both FP and INT
51 * :units: sequence of ALUs (or CompUnitsBase derivatives)
56 if units
and isinstance(units
[0], CompUnitsBase
):
59 self
.n_units
+= u
.n_units
61 self
.n_units
= len(units
)
63 n_units
= self
.n_units
66 self
.issue_i
= Signal(n_units
, reset_less
=True)
67 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
68 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
69 self
.shadown_i
= Signal(n_units
, reset_less
=True)
70 self
.go_die_i
= Signal(n_units
, reset_less
=True)
73 self
.busy_o
= Signal(n_units
, reset_less
=True)
74 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
75 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
77 # in/out register data (note: not register#, actual data)
78 self
.data_o
= Signal(rwid
, reset_less
=True)
79 self
.src1_i
= Signal(rwid
, reset_less
=True)
80 self
.src2_i
= Signal(rwid
, reset_less
=True)
83 def elaborate(self
, platform
):
87 for i
, alu
in enumerate(self
.units
):
88 setattr(m
.submodules
, "comp%d" % i
, alu
)
98 for alu
in self
.units
:
99 req_rel_l
.append(alu
.req_rel_o
)
100 rd_rel_l
.append(alu
.rd_rel_o
)
101 shadow_l
.append(alu
.shadown_i
)
102 godie_l
.append(alu
.go_die_i
)
103 go_wr_l
.append(alu
.go_wr_i
)
104 go_rd_l
.append(alu
.go_rd_i
)
105 issue_l
.append(alu
.issue_i
)
106 busy_l
.append(alu
.busy_o
)
107 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
108 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
109 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
110 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
111 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
112 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
113 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
114 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
116 # connect data register input/output
118 # merge (OR) all integer FU / ALU outputs to a single value
119 # bit of a hack: treereduce needs a list with an item named "data_o"
121 data_o
= treereduce(self
.units
)
122 comb
+= self
.data_o
.eq(data_o
)
124 for i
, alu
in enumerate(self
.units
):
125 comb
+= alu
.src1_i
.eq(self
.src1_i
)
126 comb
+= alu
.src2_i
.eq(self
.src2_i
)
131 class CompUnitALUs(CompUnitsBase
):
133 def __init__(self
, rwid
, opwid
):
136 * :rwid: bit width of register file(s) - both FP and INT
137 * :opwid: operand bit width
142 self
.oper_i
= Signal(opwid
, reset_less
=True)
151 for alu
in [add
, sub
, mul
, shf
]:
152 units
.append(ComputationUnitNoDelay(rwid
, 2, alu
))
154 CompUnitsBase
.__init
__(self
, rwid
, units
)
156 def elaborate(self
, platform
):
157 m
= CompUnitsBase
.elaborate(self
, platform
)
160 # hand the same operation to all units
161 for alu
in self
.units
:
162 comb
+= alu
.oper_i
.eq(self
.oper_i
)
163 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
164 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
165 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
166 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
171 class CompUnitBR(CompUnitsBase
):
173 def __init__(self
, rwid
, opwid
):
176 * :rwid: bit width of register file(s) - both FP and INT
177 * :opwid: operand bit width
179 Note: bgt unit is returned so that a shadow unit can be created
185 self
.oper_i
= Signal(opwid
, reset_less
=True)
188 self
.bgt
= BranchALU(rwid
)
189 self
.br1
= ComputationUnitNoDelay(rwid
, 3, self
.bgt
)
190 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
192 def elaborate(self
, platform
):
193 m
= CompUnitsBase
.elaborate(self
, platform
)
196 # hand the same operation to all units
197 for alu
in self
.units
:
198 comb
+= alu
.oper_i
.eq(self
.oper_i
)
199 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
204 class FunctionUnits(Elaboratable
):
206 def __init__(self
, n_regs
, n_int_alus
):
208 self
.n_int_alus
= n_int_alus
210 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
211 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
212 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
214 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
215 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
217 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
218 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
219 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
221 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
222 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
223 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
225 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
226 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
227 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
228 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
229 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
231 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
233 def elaborate(self
, platform
):
238 n_intfus
= self
.n_int_alus
240 # Integer FU-FU Dep Matrix
241 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
242 m
.submodules
.intfudeps
= intfudeps
243 # Integer FU-Reg Dep Matrix
244 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
)
245 m
.submodules
.intregdeps
= intregdeps
247 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
248 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
250 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
251 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
253 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
254 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
255 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
257 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
258 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
259 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
260 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
261 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
262 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
264 # Connect function issue / arrays, and dest/src1/src2
265 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
266 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
267 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
269 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
270 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
271 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
272 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
274 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
275 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
276 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
281 class Scoreboard(Elaboratable
):
282 def __init__(self
, rwid
, n_regs
):
285 * :rwid: bit width of register file(s) - both FP and INT
286 * :n_regs: depth of register file(s) - number of FP and INT regs
292 self
.intregs
= RegFileArray(rwid
, n_regs
)
293 self
.fpregs
= RegFileArray(rwid
, n_regs
)
296 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
297 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
298 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
299 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
302 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
303 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
305 # for branch speculation experiment. branch_direction = 0 if
306 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
307 # branch_succ and branch_fail are requests to have the current
308 # instruction be dependent on the branch unit "shadow" capability.
309 self
.branch_succ_i
= Signal(reset_less
=True)
310 self
.branch_fail_i
= Signal(reset_less
=True)
311 self
.branch_direction_o
= Signal(2, reset_less
=True)
313 def elaborate(self
, platform
):
318 m
.submodules
.intregs
= self
.intregs
319 m
.submodules
.fpregs
= self
.fpregs
322 int_dest
= self
.intregs
.write_port("dest")
323 int_src1
= self
.intregs
.read_port("src1")
324 int_src2
= self
.intregs
.read_port("src2")
326 fp_dest
= self
.fpregs
.write_port("dest")
327 fp_src1
= self
.fpregs
.read_port("src1")
328 fp_src2
= self
.fpregs
.read_port("src2")
330 # Int ALUs and Comp Units
332 cua
= CompUnitALUs(self
.rwid
, 2)
333 cub
= CompUnitBR(self
.rwid
, 2)
334 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cub
])
335 bgt
= cub
.bgt
# get at the branch computation unit
339 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
341 # Count of number of FUs
342 n_intfus
= n_int_alus
343 n_fp_fus
= 0 # for now
345 # Integer Priority Picker 1: Adder + Subtractor
346 intpick1
= GroupPicker(n_intfus
) # picks between add, sub, mul and shf
347 m
.submodules
.intpick1
= intpick1
350 regdecode
= RegDecode(self
.n_regs
)
351 m
.submodules
.regdecode
= regdecode
352 aluissue
= IssueUnitGroup(4)
353 brissue
= IssueUnitGroup(1)
354 issueunit
= IssueUnitArray([aluissue
, brissue
])
355 m
.submodules
.issueunit
= issueunit
357 # Shadow Matrix. currently n_intfus shadows, to be used for
358 # write-after-write hazards. NOTE: there is one extra for branches,
359 # so the shadow width is increased by 1
360 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
361 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
363 # record previous instruction to cast shadow on current instruction
364 fn_issue_prev
= Signal(n_intfus
)
365 prev_shadow
= Signal(n_intfus
)
367 # Branch Speculation recorder. tracks the success/fail state as
368 # each instruction is issued, so that when the branch occurs the
369 # allow/cancel can be issued as appropriate.
370 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
373 # ok start wiring things together...
374 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
375 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
379 # Issue Unit is where it starts. set up some in/outs for this module
381 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
382 regdecode
.src1_i
.eq(self
.int_src1_i
),
383 regdecode
.src2_i
.eq(self
.int_src2_i
),
384 regdecode
.enable_i
.eq(self
.reg_enable_i
),
385 self
.issue_o
.eq(issueunit
.issue_o
)
388 # take these to outside (for testing)
389 self
.alu_insn_i
= aluissue
.insn_i
# enabled by instruction decode
390 self
.br_insn_i
= brissue
.insn_i
# enabled by instruction decode
391 self
.alu_oper_i
= cua
.oper_i
392 self
.br_oper_i
= cub
.oper_i
394 # TODO: issueunit.f (FP)
396 # and int function issue / busy arrays, and dest/src1/src2
397 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
398 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
399 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
401 fn_issue_o
= issueunit
.fn_issue_o
403 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
404 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
405 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
408 # merge shadow matrices outputs
411 # these are explained in ShadowMatrix docstring, and are to be
412 # connected to the FUReg and FUFU Matrices, to get them to reset
413 anydie
= Signal(n_intfus
, reset_less
=True)
414 allshadown
= Signal(n_intfus
, reset_less
=True)
415 shreset
= Signal(n_intfus
, reset_less
=True)
416 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
417 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
418 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
421 # connect fu-fu matrix
424 # Group Picker... done manually for now.
425 go_rd_o
= intpick1
.go_rd_o
426 go_wr_o
= intpick1
.go_wr_o
427 go_rd_i
= intfus
.go_rd_i
428 go_wr_i
= intfus
.go_wr_i
429 go_die_i
= intfus
.go_die_i
430 # NOTE: connect to the shadowed versions so that they can "die" (reset)
431 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
432 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
433 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
437 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
438 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
439 int_rd_o
= intfus
.readable_o
440 int_wr_o
= intfus
.writable_o
441 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
442 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
448 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
449 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
450 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
452 # NOTE; this setup is for the instruction order preservation...
454 # connect shadows / go_dies to Computation Units
455 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
456 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
458 # ok connect first n_int_fu shadows to busy lines, to create an
459 # instruction-order linked-list-like arrangement, using a bit-matrix
460 # (instead of e.g. a ring buffer).
463 # when written, the shadow can be cancelled (and was good)
464 for i
in range(n_intfus
):
465 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
467 # work out the current-activated busy unit (by recording the old one)
468 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
469 sync
+= fn_issue_prev
.eq(fn_issue_o
)
471 # *previous* instruction shadows *current* instruction, and, obviously,
472 # if the previous is completed (!busy) don't cast the shadow!
473 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
474 for i
in range(n_intfus
):
475 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
478 # ... and this is for branch speculation. it uses the extra bit
479 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
480 # only needs to set shadow_i, s_fail_i and s_good_i
482 # issue captures shadow_i (if enabled)
483 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
485 bactive
= Signal(reset_less
=True)
486 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
488 # instruction being issued (fn_issue_o) has a shadow cast by the branch
489 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
490 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
491 for i
in range(n_intfus
):
492 with m
.If(fn_issue_o
& (Const(1<<i
))):
493 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
495 # finally, we need an indicator to the test infrastructure as to
496 # whether the branch succeeded or failed, plus, link up to the
497 # "recorder" of whether the instruction was under shadow or not
499 with m
.If(br1
.issue_i
):
500 sync
+= bspec
.active_i
.eq(1)
501 with m
.If(self
.branch_succ_i
):
502 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
503 with m
.If(self
.branch_fail_i
):
504 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
506 # branch is active (TODO: a better signal: this is over-using the
507 # go_write signal - actually the branch should not be "writing")
508 with m
.If(br1
.go_wr_i
):
509 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
510 sync
+= bspec
.active_i
.eq(0)
511 comb
+= bspec
.br_i
.eq(1)
512 # branch occurs if data == 1, failed if data == 0
513 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
514 for i
in range(n_intfus
):
515 # *expected* direction of the branch matched against *actual*
516 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
518 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
521 # Connect Register File(s)
523 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
524 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
525 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
527 # connect ALUs to regfule
528 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
529 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
530 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
532 # connect ALU Computation Units
533 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
534 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
535 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
541 yield from self
.intregs
542 yield from self
.fpregs
543 yield self
.int_dest_i
544 yield self
.int_src1_i
545 yield self
.int_src2_i
547 yield self
.branch_succ_i
548 yield self
.branch_fail_i
549 yield self
.branch_direction_o
564 def __init__(self
, rwidth
, nregs
):
566 self
.regs
= [0] * nregs
568 def op(self
, op
, src1
, src2
, dest
):
569 maxbits
= (1 << self
.rwidth
) - 1
570 src1
= self
.regs
[src1
] & maxbits
571 src2
= self
.regs
[src2
] & maxbits
579 val
= src1
>> (src2
& maxbits
)
581 val
= int(src1
> src2
)
583 val
= int(src1
< src2
)
585 val
= int(src1
== src2
)
587 val
= int(src1
!= src2
)
589 self
.setval(dest
, val
)
592 def setval(self
, dest
, val
):
593 print ("sim setval", dest
, hex(val
))
594 self
.regs
[dest
] = val
597 for i
, val
in enumerate(self
.regs
):
598 reg
= yield dut
.intregs
.regs
[i
].reg
599 okstr
= "OK" if reg
== val
else "!ok"
600 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
602 def check(self
, dut
):
603 for i
, val
in enumerate(self
.regs
):
604 reg
= yield dut
.intregs
.regs
[i
].reg
606 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
607 yield from self
.dump(dut
)
610 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
611 yield from disable_issue(dut
)
612 yield dut
.int_dest_i
.eq(dest
)
613 yield dut
.int_src1_i
.eq(src1
)
614 yield dut
.int_src2_i
.eq(src2
)
615 if (op
& 0x30) != 0: # branch
616 yield dut
.br_insn_i
.eq(1)
617 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
619 yield dut
.alu_insn_i
.eq(1)
620 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
621 yield dut
.reg_enable_i
.eq(1)
623 # these indicate that the instruction is to be made shadow-dependent on
624 # (either) branch success or branch fail
625 yield dut
.branch_fail_i
.eq(branch_fail
)
626 yield dut
.branch_succ_i
.eq(branch_success
)
629 def print_reg(dut
, rnums
):
632 reg
= yield dut
.intregs
.regs
[rnum
].reg
633 rs
.append("%x" % reg
)
634 rnums
= map(str, rnums
)
635 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
638 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
640 for i
in range(n_ops
):
641 src1
= randint(1, dut
.n_regs
-1)
642 src2
= randint(1, dut
.n_regs
-1)
643 dest
= randint(1, dut
.n_regs
-1)
644 op
= randint(0, max_opnums
)
647 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
649 insts
.append((src1
, src2
, dest
, op
))
653 def wait_for_busy_clear(dut
):
655 busy_o
= yield dut
.busy_o
661 def disable_issue(dut
):
662 yield dut
.alu_insn_i
.eq(0)
663 yield dut
.br_insn_i
.eq(0)
666 def wait_for_issue(dut
):
668 issue_o
= yield dut
.issue_o
670 yield from disable_issue(dut
)
671 yield dut
.reg_enable_i
.eq(0)
674 #yield from print_reg(dut, [1,2,3])
676 #yield from print_reg(dut, [1,2,3])
678 def scoreboard_branch_sim(dut
, alusim
):
684 print ("rseed", iseed
)
688 yield dut
.branch_direction_o
.eq(0)
690 # set random values in the registers
691 for i
in range(1, dut
.n_regs
):
693 val
= randint(0, (1<<alusim
.rwidth
)-1)
694 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
695 alusim
.setval(i
, val
)
698 # create some instructions: branches create a tree
699 insts
= create_random_ops(dut
, 1, True, 1)
700 #insts.append((6, 6, 1, 2, (0, 0)))
701 #insts.append((4, 3, 3, 0, (0, 0)))
703 src1
= randint(1, dut
.n_regs
-1)
704 src2
= randint(1, dut
.n_regs
-1)
706 op
= 4 # only BGT at the moment
708 branch_ok
= create_random_ops(dut
, 1, True, 1)
709 branch_fail
= create_random_ops(dut
, 1, True, 1)
711 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
715 #insts.append( (3, 5, 2, 0, (0, 0)) )
718 branch_ok
.append ( (5, 7, 5, 1, (1, 0)) )
719 #branch_ok.append( None )
720 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
721 #branch_fail.append( None )
722 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
724 siminsts
= deepcopy(insts
)
726 # issue instruction(s)
734 branch_direction
= yield dut
.branch_direction_o
# way branch went
735 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
736 if branch_direction
== 1 and shadow_on
:
737 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
738 continue # branch was "success" and this is a "failed"... skip
739 if branch_direction
== 2 and shadow_off
:
740 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
741 continue # branch was "fail" and this is a "success"... skip
742 if branch_direction
!= 0:
747 branch_ok
, branch_fail
= dest
749 # ok zip up the branch success / fail instructions and
750 # drop them into the queue, one marked "to have branch success"
751 # the other to be marked shadow branch "fail".
752 # one out of each of these will be cancelled
753 for ok
, fl
in zip(branch_ok
, branch_fail
):
755 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
757 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
758 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
759 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
760 yield from int_instr(dut
, op
, src1
, src2
, dest
,
761 shadow_on
, shadow_off
)
763 yield from wait_for_issue(dut
)
765 # wait for all instructions to stop before checking
767 yield from wait_for_busy_clear(dut
)
771 instr
= siminsts
.pop(0)
774 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
778 branch_ok
, branch_fail
= dest
780 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
781 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
782 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
785 siminsts
+= branch_ok
787 siminsts
+= branch_fail
790 yield from alusim
.check(dut
)
791 yield from alusim
.dump(dut
)
794 def scoreboard_sim(dut
, alusim
):
800 # set random values in the registers
801 for i
in range(1, dut
.n_regs
):
802 val
= randint(0, (1<<alusim
.rwidth
)-1)
805 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
806 alusim
.setval(i
, val
)
808 # create some instructions (some random, some regression tests)
811 instrs
= create_random_ops(dut
, 10, True, 3)
814 instrs
.append( (4, 3, 5, 1, (0, 0)) )
815 instrs
.append( (5, 2, 3, 4, (0, 0)) )
818 instrs
.append((2, 3, 3, 0, (0, 0)))
819 instrs
.append((5, 3, 3, 1, (0, 0)))
820 instrs
.append((3, 5, 5, 2, (0, 0)))
821 instrs
.append((5, 3, 3, 3, (0, 0)))
822 instrs
.append((3, 5, 5, 0, (0, 0)))
825 instrs
.append((5, 6, 2, 1))
826 instrs
.append((2, 2, 4, 0))
827 #instrs.append((2, 2, 3, 1))
830 instrs
.append((2, 1, 2, 3))
833 instrs
.append((2, 6, 2, 1))
834 instrs
.append((2, 1, 2, 0))
837 instrs
.append((1, 2, 7, 2))
838 instrs
.append((7, 1, 5, 0))
839 instrs
.append((4, 4, 1, 1))
842 instrs
.append((5, 6, 2, 2))
843 instrs
.append((1, 1, 4, 1))
844 instrs
.append((6, 5, 3, 0))
847 # Write-after-Write Hazard
848 instrs
.append( (3, 6, 7, 2) )
849 instrs
.append( (4, 4, 7, 1) )
852 # self-read/write-after-write followed by Read-after-Write
853 instrs
.append((1, 1, 1, 1))
854 instrs
.append((1, 5, 3, 0))
857 # Read-after-Write followed by self-read-after-write
858 instrs
.append((5, 6, 1, 2))
859 instrs
.append((1, 1, 1, 1))
862 # self-read-write sandwich
863 instrs
.append((5, 6, 1, 2))
864 instrs
.append((1, 1, 1, 1))
865 instrs
.append((1, 5, 3, 0))
869 instrs
.append( (5, 2, 5, 2) )
870 instrs
.append( (2, 6, 3, 0) )
871 instrs
.append( (4, 2, 2, 1) )
875 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
877 yield dut
.intregs
.regs
[3].reg
.eq(5)
879 instrs
.append((5, 3, 3, 4, (0, 0)))
880 instrs
.append((4, 2, 1, 2, (0, 1)))
884 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
886 yield dut
.intregs
.regs
[3].reg
.eq(5)
888 instrs
.append((5, 3, 3, 4, (0, 0)))
889 instrs
.append((4, 2, 1, 2, (1, 0)))
892 instrs
.append( (4, 3, 5, 1, (0, 0)) )
893 instrs
.append( (5, 2, 3, 1, (0, 0)) )
894 instrs
.append( (7, 1, 5, 2, (0, 0)) )
895 instrs
.append( (5, 6, 6, 4, (0, 0)) )
896 instrs
.append( (7, 5, 2, 2, (1, 0)) )
897 instrs
.append( (1, 7, 5, 0, (0, 1)) )
898 instrs
.append( (1, 6, 1, 2, (1, 0)) )
899 instrs
.append( (1, 6, 7, 3, (0, 0)) )
900 instrs
.append( (6, 7, 7, 0, (0, 0)) )
902 # issue instruction(s), wait for issue to be free before proceeding
903 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
905 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
906 alusim
.op(op
, src1
, src2
, dest
)
907 yield from int_instr(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
909 yield from wait_for_issue(dut
)
911 # wait for all instructions to stop before checking
913 yield from wait_for_busy_clear(dut
)
916 yield from alusim
.check(dut
)
917 yield from alusim
.dump(dut
)
920 def test_scoreboard():
921 dut
= Scoreboard(16, 8)
922 alusim
= RegSim(16, 8)
923 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
924 with
open("test_scoreboard6600.il", "w") as f
:
927 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
928 vcd_name
='test_scoreboard6600.vcd')
930 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
931 # vcd_name='test_scoreboard6600.vcd')
934 if __name__
== '__main__':