1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
, BranchALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
, seed
19 from copy
import deepcopy
22 class CompUnitsBase(Elaboratable
):
23 """ Computation Unit Base class.
25 Amazingly, this class works recursively. It's supposed to just
26 look after some ALUs (that can handle the same operations),
27 grouping them together, however it turns out that the same code
28 can also group *groups* of Computation Units together as well.
30 Basically it was intended just to concatenate the ALU's issue,
31 go_rd etc. signals together, which start out as bits and become
32 sequences. Turns out that the same trick works just as well
35 So this class may be used recursively to present a top-level
36 sequential concatenation of all the signals in and out of
37 ALUs, whilst at the same time making it convenient to group
40 At the lower level, the intent is that groups of (identical)
41 ALUs may be passed the same operation. Even beyond that,
42 the intent is that that group of (identical) ALUs actually
43 share the *same pipeline* and as such become a "Concurrent
44 Computation Unit" as defined by Mitch Alsup (see section
47 def __init__(self
, rwid
, units
):
50 * :rwid: bit width of register file(s) - both FP and INT
51 * :units: sequence of ALUs (or CompUnitsBase derivatives)
55 if units
and isinstance(units
[0], CompUnitsBase
):
58 self
.n_units
+= u
.n_units
60 self
.n_units
= len(units
)
62 n_units
= self
.n_units
65 self
.issue_i
= Signal(n_units
, reset_less
=True)
66 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
67 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
68 self
.shadown_i
= Signal(n_units
, reset_less
=True)
69 self
.go_die_i
= Signal(n_units
, reset_less
=True)
72 self
.busy_o
= Signal(n_units
, reset_less
=True)
73 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
74 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
76 # in/out register data (note: not register#, actual data)
77 self
.data_o
= Signal(rwid
, reset_less
=True)
78 self
.src1_i
= Signal(rwid
, reset_less
=True)
79 self
.src2_i
= Signal(rwid
, reset_less
=True)
81 def elaborate(self
, platform
):
85 for i
, alu
in enumerate(self
.units
):
86 print ("elaborate comp%d" % i
, self
, alu
)
87 setattr(m
.submodules
, "comp%d" % i
, alu
)
97 for alu
in self
.units
:
98 req_rel_l
.append(alu
.req_rel_o
)
99 rd_rel_l
.append(alu
.rd_rel_o
)
100 shadow_l
.append(alu
.shadown_i
)
101 godie_l
.append(alu
.go_die_i
)
102 go_wr_l
.append(alu
.go_wr_i
)
103 go_rd_l
.append(alu
.go_rd_i
)
104 issue_l
.append(alu
.issue_i
)
105 busy_l
.append(alu
.busy_o
)
106 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
107 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
108 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
109 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
110 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
111 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
112 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
113 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
115 # connect data register input/output
117 # merge (OR) all integer FU / ALU outputs to a single value
118 # bit of a hack: treereduce needs a list with an item named "data_o"
120 data_o
= treereduce(self
.units
)
121 comb
+= self
.data_o
.eq(data_o
)
123 for i
, alu
in enumerate(self
.units
):
124 comb
+= alu
.src1_i
.eq(self
.src1_i
)
125 comb
+= alu
.src2_i
.eq(self
.src2_i
)
130 class CompUnitALUs(CompUnitsBase
):
132 def __init__(self
, rwid
):
135 * :rwid: bit width of register file(s) - both FP and INT
145 for alu
in [add
, sub
, mul
, shf
]:
146 units
.append(ComputationUnitNoDelay(rwid
, 2, alu
))
148 print ("alu units", units
)
149 CompUnitsBase
.__init
__(self
, rwid
, units
)
150 print ("alu base init done")
152 def elaborate(self
, platform
):
153 print ("alu elaborate start")
154 m
= CompUnitsBase
.elaborate(self
, platform
)
155 print ("alu elaborate done")
158 comb
+= self
.units
[0].oper_i
.eq(Const(0, 2)) # op=add
159 comb
+= self
.units
[1].oper_i
.eq(Const(1, 2)) # op=sub
160 comb
+= self
.units
[2].oper_i
.eq(Const(2, 2)) # op=mul
161 comb
+= self
.units
[3].oper_i
.eq(Const(3, 2)) # op=shf
166 class CompUnitBR(CompUnitsBase
):
168 def __init__(self
, rwid
):
171 * :rwid: bit width of register file(s) - both FP and INT
173 Note: bgt unit is returned so that a shadow unit can be created
179 self
.bgt
= BranchALU(rwid
)
180 self
.br1
= ComputationUnitNoDelay(rwid
, 3, self
.bgt
)
181 print ("br units", [self
.br1
])
182 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
183 print ("br base init done")
185 def elaborate(self
, platform
):
186 print ("br elaborate start")
187 m
= CompUnitsBase
.elaborate(self
, platform
)
188 print ("br elaborate done")
191 comb
+= self
.br1
.oper_i
.eq(Const(4, 3)) # op=bgt
196 class FunctionUnits(Elaboratable
):
198 def __init__(self
, n_regs
, n_int_alus
):
200 self
.n_int_alus
= n_int_alus
202 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
203 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
204 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
206 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
207 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
209 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
210 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
211 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
213 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
214 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
215 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
217 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
218 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
219 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
220 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
221 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
223 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
225 def elaborate(self
, platform
):
230 n_int_fus
= self
.n_int_alus
232 # Integer FU-FU Dep Matrix
233 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
234 m
.submodules
.intfudeps
= intfudeps
235 # Integer FU-Reg Dep Matrix
236 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
237 m
.submodules
.intregdeps
= intregdeps
239 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
240 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
242 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
243 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
245 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
246 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
247 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
249 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
250 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
251 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
252 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
253 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
254 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
256 # Connect function issue / arrays, and dest/src1/src2
257 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
258 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
259 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
261 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
262 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
263 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
264 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
266 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
267 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
268 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
273 class Scoreboard(Elaboratable
):
274 def __init__(self
, rwid
, n_regs
):
277 * :rwid: bit width of register file(s) - both FP and INT
278 * :n_regs: depth of register file(s) - number of FP and INT regs
284 self
.intregs
= RegFileArray(rwid
, n_regs
)
285 self
.fpregs
= RegFileArray(rwid
, n_regs
)
288 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
289 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
290 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
291 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
294 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
295 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
297 # for branch speculation experiment. branch_direction = 0 if
298 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
299 # branch_succ and branch_fail are requests to have the current
300 # instruction be dependent on the branch unit "shadow" capability.
301 self
.branch_succ_i
= Signal(reset_less
=True)
302 self
.branch_fail_i
= Signal(reset_less
=True)
303 self
.branch_direction_o
= Signal(2, reset_less
=True)
305 def elaborate(self
, platform
):
310 m
.submodules
.intregs
= self
.intregs
311 m
.submodules
.fpregs
= self
.fpregs
314 int_dest
= self
.intregs
.write_port("dest")
315 int_src1
= self
.intregs
.read_port("src1")
316 int_src2
= self
.intregs
.read_port("src2")
318 fp_dest
= self
.fpregs
.write_port("dest")
319 fp_src1
= self
.fpregs
.read_port("src1")
320 fp_src2
= self
.fpregs
.read_port("src2")
322 # Int ALUs and Comp Units
324 cua
= CompUnitALUs(self
.rwid
)
325 cub
= CompUnitBR(self
.rwid
)
326 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cub
])
327 bgt
= cub
.bgt
# get at the branch computation unit
331 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
333 # Count of number of FUs
334 n_int_fus
= n_int_alus
335 n_fp_fus
= 0 # for now
337 # Integer Priority Picker 1: Adder + Subtractor
338 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
339 m
.submodules
.intpick1
= intpick1
342 regdecode
= RegDecode(self
.n_regs
)
343 m
.submodules
.regdecode
= regdecode
344 issueunit
= IntFPIssueUnit(n_int_fus
, n_fp_fus
)
345 m
.submodules
.issueunit
= issueunit
347 # Shadow Matrix. currently n_int_fus shadows, to be used for
348 # write-after-write hazards. NOTE: there is one extra for branches,
349 # so the shadow width is increased by 1
350 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
, True)
351 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_int_fus
, 1, False)
353 # record previous instruction to cast shadow on current instruction
354 fn_issue_prev
= Signal(n_int_fus
)
355 prev_shadow
= Signal(n_int_fus
)
357 # Branch Speculation recorder. tracks the success/fail state as
358 # each instruction is issued, so that when the branch occurs the
359 # allow/cancel can be issued as appropriate.
360 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_int_fus
)
363 # ok start wiring things together...
364 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
365 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
369 # Issue Unit is where it starts. set up some in/outs for this module
371 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
372 regdecode
.src1_i
.eq(self
.int_src1_i
),
373 regdecode
.src2_i
.eq(self
.int_src2_i
),
374 regdecode
.enable_i
.eq(self
.reg_enable_i
),
375 self
.issue_o
.eq(issueunit
.issue_o
)
377 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
379 # TODO: issueunit.f (FP)
381 # and int function issue / busy arrays, and dest/src1/src2
382 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
383 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
384 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
386 fn_issue_o
= issueunit
.i
.fn_issue_o
388 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
389 comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
390 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
393 # merge shadow matrices outputs
396 # these are explained in ShadowMatrix docstring, and are to be
397 # connected to the FUReg and FUFU Matrices, to get them to reset
398 anydie
= Signal(n_int_fus
, reset_less
=True)
399 allshadown
= Signal(n_int_fus
, reset_less
=True)
400 shreset
= Signal(n_int_fus
, reset_less
=True)
401 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
402 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
403 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
406 # connect fu-fu matrix
409 # Group Picker... done manually for now.
410 go_rd_o
= intpick1
.go_rd_o
411 go_wr_o
= intpick1
.go_wr_o
412 go_rd_i
= intfus
.go_rd_i
413 go_wr_i
= intfus
.go_wr_i
414 go_die_i
= intfus
.go_die_i
415 # NOTE: connect to the shadowed versions so that they can "die" (reset)
416 comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
417 comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
418 comb
+= go_die_i
[0:n_int_fus
].eq(anydie
[0:n_int_fus
]) # die
422 comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
423 comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
424 int_rd_o
= intfus
.readable_o
425 int_wr_o
= intfus
.writable_o
426 comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
427 comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
433 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
434 #comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
435 comb
+= shadows
.reset_i
[0:n_int_fus
].eq(bshadow
.go_die_o
[0:n_int_fus
])
437 # NOTE; this setup is for the instruction order preservation...
439 # connect shadows / go_dies to Computation Units
440 comb
+= cu
.shadown_i
[0:n_int_fus
].eq(allshadown
)
441 comb
+= cu
.go_die_i
[0:n_int_fus
].eq(anydie
)
443 # ok connect first n_int_fu shadows to busy lines, to create an
444 # instruction-order linked-list-like arrangement, using a bit-matrix
445 # (instead of e.g. a ring buffer).
448 # when written, the shadow can be cancelled (and was good)
449 for i
in range(n_int_fus
):
450 comb
+= shadows
.s_good_i
[i
][0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
452 # work out the current-activated busy unit (by recording the old one)
453 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
454 sync
+= fn_issue_prev
.eq(fn_issue_o
)
456 # *previous* instruction shadows *current* instruction, and, obviously,
457 # if the previous is completed (!busy) don't cast the shadow!
458 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
459 for i
in range(n_int_fus
):
460 comb
+= shadows
.shadow_i
[i
][0:n_int_fus
].eq(prev_shadow
)
463 # ... and this is for branch speculation. it uses the extra bit
464 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
465 # only needs to set shadow_i, s_fail_i and s_good_i
467 # issue captures shadow_i (if enabled)
468 comb
+= bshadow
.reset_i
[0:n_int_fus
].eq(shreset
[0:n_int_fus
])
470 bactive
= Signal(reset_less
=True)
471 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
473 # instruction being issued (fn_issue_o) has a shadow cast by the branch
474 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
475 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
476 for i
in range(n_int_fus
):
477 with m
.If(fn_issue_o
& (Const(1<<i
))):
478 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
480 # finally, we need an indicator to the test infrastructure as to
481 # whether the branch succeeded or failed, plus, link up to the
482 # "recorder" of whether the instruction was under shadow or not
484 with m
.If(br1
.issue_i
):
485 sync
+= bspec
.active_i
.eq(1)
486 with m
.If(self
.branch_succ_i
):
487 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
488 with m
.If(self
.branch_fail_i
):
489 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
491 # branch is active (TODO: a better signal: this is over-using the
492 # go_write signal - actually the branch should not be "writing")
493 with m
.If(br1
.go_wr_i
):
494 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
495 sync
+= bspec
.active_i
.eq(0)
496 comb
+= bspec
.br_i
.eq(1)
497 # branch occurs if data == 1, failed if data == 0
498 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
499 for i
in range(n_int_fus
):
500 # *expected* direction of the branch matched against *actual*
501 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
503 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
506 # Connect Register File(s)
508 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
509 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
510 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
511 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
513 # connect ALUs to regfule
514 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
515 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
516 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
518 # connect ALU Computation Units
519 comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
520 comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
521 comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
527 yield from self
.intregs
528 yield from self
.fpregs
529 yield self
.int_dest_i
530 yield self
.int_src1_i
531 yield self
.int_src2_i
533 yield self
.branch_succ_i
534 yield self
.branch_fail_i
535 yield self
.branch_direction_o
550 def __init__(self
, rwidth
, nregs
):
552 self
.regs
= [0] * nregs
554 def op(self
, op
, src1
, src2
, dest
):
555 maxbits
= (1 << self
.rwidth
) - 1
556 src1
= self
.regs
[src1
] & maxbits
557 src2
= self
.regs
[src2
] & maxbits
565 val
= src1
>> (src2
& maxbits
)
567 val
= int(src1
> src2
)
569 val
= int(src1
< src2
)
571 val
= int(src1
== src2
)
573 val
= int(src1
!= src2
)
575 self
.setval(dest
, val
)
578 def setval(self
, dest
, val
):
579 print ("sim setval", dest
, hex(val
))
580 self
.regs
[dest
] = val
583 for i
, val
in enumerate(self
.regs
):
584 reg
= yield dut
.intregs
.regs
[i
].reg
585 okstr
= "OK" if reg
== val
else "!ok"
586 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
588 def check(self
, dut
):
589 for i
, val
in enumerate(self
.regs
):
590 reg
= yield dut
.intregs
.regs
[i
].reg
592 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
593 yield from self
.dump(dut
)
596 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
597 for i
in range(len(dut
.int_insn_i
)):
598 yield dut
.int_insn_i
[i
].eq(0)
599 yield dut
.int_dest_i
.eq(dest
)
600 yield dut
.int_src1_i
.eq(src1
)
601 yield dut
.int_src2_i
.eq(src2
)
602 yield dut
.int_insn_i
[op
].eq(1)
603 yield dut
.reg_enable_i
.eq(1)
605 # these indicate that the instruction is to be made shadow-dependent on
606 # (either) branch success or branch fail
607 yield dut
.branch_fail_i
.eq(branch_fail
)
608 yield dut
.branch_succ_i
.eq(branch_success
)
611 def print_reg(dut
, rnums
):
614 reg
= yield dut
.intregs
.regs
[rnum
].reg
615 rs
.append("%x" % reg
)
616 rnums
= map(str, rnums
)
617 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
620 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
622 for i
in range(n_ops
):
623 src1
= randint(1, dut
.n_regs
-1)
624 src2
= randint(1, dut
.n_regs
-1)
625 dest
= randint(1, dut
.n_regs
-1)
626 op
= randint(0, max_opnums
)
629 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
631 insts
.append((src1
, src2
, dest
, op
))
635 def wait_for_busy_clear(dut
):
637 busy_o
= yield dut
.busy_o
644 def wait_for_issue(dut
):
646 issue_o
= yield dut
.issue_o
648 for i
in range(len(dut
.int_insn_i
)):
649 yield dut
.int_insn_i
[i
].eq(0)
650 yield dut
.reg_enable_i
.eq(0)
653 #yield from print_reg(dut, [1,2,3])
655 #yield from print_reg(dut, [1,2,3])
657 def scoreboard_branch_sim(dut
, alusim
):
663 print ("rseed", iseed
)
667 yield dut
.branch_direction_o
.eq(0)
669 # set random values in the registers
670 for i
in range(1, dut
.n_regs
):
672 val
= randint(0, (1<<alusim
.rwidth
)-1)
673 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
674 alusim
.setval(i
, val
)
677 # create some instructions: branches create a tree
678 insts
= create_random_ops(dut
, 1, True, 1)
679 #insts.append((6, 6, 1, 2, (0, 0)))
680 #insts.append((4, 3, 3, 0, (0, 0)))
682 src1
= randint(1, dut
.n_regs
-1)
683 src2
= randint(1, dut
.n_regs
-1)
685 op
= 4 # only BGT at the moment
687 branch_ok
= create_random_ops(dut
, 1, True, 1)
688 branch_fail
= create_random_ops(dut
, 1, True, 1)
690 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
694 #insts.append( (3, 5, 2, 0, (0, 0)) )
697 branch_ok
.append ( (5, 7, 5, 1, (1, 0)) )
698 #branch_ok.append( None )
699 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
700 #branch_fail.append( None )
701 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
703 siminsts
= deepcopy(insts
)
705 # issue instruction(s)
713 branch_direction
= yield dut
.branch_direction_o
# way branch went
714 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
715 if branch_direction
== 1 and shadow_on
:
716 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
717 continue # branch was "success" and this is a "failed"... skip
718 if branch_direction
== 2 and shadow_off
:
719 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
720 continue # branch was "fail" and this is a "success"... skip
721 if branch_direction
!= 0:
726 branch_ok
, branch_fail
= dest
728 # ok zip up the branch success / fail instructions and
729 # drop them into the queue, one marked "to have branch success"
730 # the other to be marked shadow branch "fail".
731 # one out of each of these will be cancelled
732 for ok
, fl
in zip(branch_ok
, branch_fail
):
734 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
736 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
737 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
738 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
739 yield from int_instr(dut
, op
, src1
, src2
, dest
,
740 shadow_on
, shadow_off
)
742 yield from wait_for_issue(dut
)
744 # wait for all instructions to stop before checking
746 yield from wait_for_busy_clear(dut
)
750 instr
= siminsts
.pop(0)
753 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
757 branch_ok
, branch_fail
= dest
759 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
760 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
761 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
764 siminsts
+= branch_ok
766 siminsts
+= branch_fail
769 yield from alusim
.check(dut
)
770 yield from alusim
.dump(dut
)
773 def scoreboard_sim(dut
, alusim
):
779 # set random values in the registers
780 for i
in range(1, dut
.n_regs
):
782 val
= randint(0, (1<<alusim
.rwidth
)-1)
783 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
784 alusim
.setval(i
, val
)
786 # create some instructions (some random, some regression tests)
789 instrs
= create_random_ops(dut
, 10, True, 4)
792 instrs
.append((2, 3, 3, 0))
793 instrs
.append((5, 3, 3, 1))
796 instrs
.append((5, 6, 2, 1))
797 instrs
.append((2, 2, 4, 0))
798 #instrs.append((2, 2, 3, 1))
801 instrs
.append((2, 1, 2, 3))
804 instrs
.append((2, 6, 2, 1))
805 instrs
.append((2, 1, 2, 0))
808 instrs
.append((1, 2, 7, 2))
809 instrs
.append((7, 1, 5, 0))
810 instrs
.append((4, 4, 1, 1))
813 instrs
.append((5, 6, 2, 2))
814 instrs
.append((1, 1, 4, 1))
815 instrs
.append((6, 5, 3, 0))
818 # Write-after-Write Hazard
819 instrs
.append( (3, 6, 7, 2) )
820 instrs
.append( (4, 4, 7, 1) )
823 # self-read/write-after-write followed by Read-after-Write
824 instrs
.append((1, 1, 1, 1))
825 instrs
.append((1, 5, 3, 0))
828 # Read-after-Write followed by self-read-after-write
829 instrs
.append((5, 6, 1, 2))
830 instrs
.append((1, 1, 1, 1))
833 # self-read-write sandwich
834 instrs
.append((5, 6, 1, 2))
835 instrs
.append((1, 1, 1, 1))
836 instrs
.append((1, 5, 3, 0))
840 instrs
.append( (5, 2, 5, 2) )
841 instrs
.append( (2, 6, 3, 0) )
842 instrs
.append( (4, 2, 2, 1) )
846 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
848 yield dut
.intregs
.regs
[3].reg
.eq(5)
850 instrs
.append((5, 3, 3, 4, (0, 0)))
851 instrs
.append((4, 2, 1, 2, (0, 1)))
855 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
857 yield dut
.intregs
.regs
[3].reg
.eq(5)
859 instrs
.append((5, 3, 3, 4, (0, 0)))
860 instrs
.append((4, 2, 1, 2, (1, 0)))
863 instrs
.append( (4, 3, 5, 1, (0, 0)) )
864 instrs
.append( (5, 2, 3, 1, (0, 0)) )
865 instrs
.append( (7, 1, 5, 2, (0, 0)) )
866 instrs
.append( (5, 6, 6, 4, (0, 0)) )
867 instrs
.append( (7, 5, 2, 2, (1, 0)) )
868 instrs
.append( (1, 7, 5, 0, (0, 1)) )
869 instrs
.append( (1, 6, 1, 2, (1, 0)) )
870 instrs
.append( (1, 6, 7, 3, (0, 0)) )
871 instrs
.append( (6, 7, 7, 0, (0, 0)) )
873 # issue instruction(s), wait for issue to be free before proceeding
874 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
876 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
877 alusim
.op(op
, src1
, src2
, dest
)
878 yield from int_instr(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
880 yield from wait_for_issue(dut
)
882 # wait for all instructions to stop before checking
884 yield from wait_for_busy_clear(dut
)
887 yield from alusim
.check(dut
)
888 yield from alusim
.dump(dut
)
891 def test_scoreboard():
892 dut
= Scoreboard(16, 8)
893 alusim
= RegSim(16, 8)
894 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
895 with
open("test_scoreboard6600.il", "w") as f
:
898 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
899 vcd_name
='test_scoreboard6600.vcd')
901 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
902 # vcd_name='test_scoreboard6600.vcd')
905 if __name__
== '__main__':