1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
12 from scoreboard
.shadow
import ShadowMatrix
, WaWGrid
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
, BranchALU
17 from nmutil
.latch
import SRLatch
19 from random
import randint
22 class CompUnits(Elaboratable
):
24 def __init__(self
, rwid
, n_units
):
27 * :rwid: bit width of register file(s) - both FP and INT
28 * :n_units: number of ALUs
30 Note: bgt unit is returned so that a shadow unit can be created
34 self
.n_units
= n_units
38 self
.issue_i
= Signal(n_units
, reset_less
=True)
39 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
40 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
41 self
.shadown_i
= Signal(n_units
, reset_less
=True)
42 self
.go_die_i
= Signal(n_units
, reset_less
=True)
45 self
.busy_o
= Signal(n_units
, reset_less
=True)
46 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
47 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
49 # in/out register data (note: not register#, actual data)
50 self
.dest_o
= Signal(rwid
, reset_less
=True)
51 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
52 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
55 self
.bgt
= BranchALU(self
.rwid
)
57 def elaborate(self
, platform
):
67 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
68 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
69 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
70 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
71 m
.submodules
.br1
= br1
= ComputationUnitNoDelay(self
.rwid
, 2, bgt
)
72 int_alus
= [comp1
, comp2
, comp3
, comp4
, br1
]
74 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
75 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
76 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
77 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
78 m
.d
.comb
+= br1
.oper_i
.eq(Const(0, 2)) # op=bgt
89 req_rel_l
.append(alu
.req_rel_o
)
90 rd_rel_l
.append(alu
.rd_rel_o
)
91 shadow_l
.append(alu
.shadown_i
)
92 godie_l
.append(alu
.go_die_i
)
93 go_wr_l
.append(alu
.go_wr_i
)
94 go_rd_l
.append(alu
.go_rd_i
)
95 issue_l
.append(alu
.issue_i
)
96 busy_l
.append(alu
.busy_o
)
97 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
98 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
99 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
100 m
.d
.comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
101 m
.d
.comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
102 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
103 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
104 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
106 # connect data register input/output
108 # merge (OR) all integer FU / ALU outputs to a single value
109 # bit of a hack: treereduce needs a list with an item named "dest_o"
110 dest_o
= treereduce(int_alus
)
111 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
113 for i
, alu
in enumerate(int_alus
):
114 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
115 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
120 class FunctionUnits(Elaboratable
):
122 def __init__(self
, n_regs
, n_int_alus
):
124 self
.n_int_alus
= n_int_alus
126 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
127 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
128 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
130 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
131 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
133 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
134 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
135 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
137 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
138 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
139 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
141 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
142 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
143 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
144 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
146 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
148 def elaborate(self
, platform
):
151 n_int_fus
= self
.n_int_alus
153 # Integer FU-FU Dep Matrix
154 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
155 m
.submodules
.intfudeps
= intfudeps
156 # Integer FU-Reg Dep Matrix
157 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
158 m
.submodules
.intregdeps
= intregdeps
160 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
161 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
163 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
164 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
166 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
167 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
168 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
170 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
171 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
172 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
173 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
174 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
176 # Connect function issue / arrays, and dest/src1/src2
177 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
178 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
179 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
181 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
182 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
183 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
185 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
186 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
187 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
192 class Scoreboard(Elaboratable
):
193 def __init__(self
, rwid
, n_regs
):
196 * :rwid: bit width of register file(s) - both FP and INT
197 * :n_regs: depth of register file(s) - number of FP and INT regs
203 self
.intregs
= RegFileArray(rwid
, n_regs
)
204 self
.fpregs
= RegFileArray(rwid
, n_regs
)
207 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
208 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
209 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
210 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
211 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
214 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
215 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
217 # for branch speculation experiment. branch_direction = 0 if
218 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
219 # branch_succ and branch_fail are requests to have the current
220 # instruction be dependent on the branch unit "shadow" capability.
221 self
.branch_succ_i
= Signal(reset_less
=True)
222 self
.branch_fail_i
= Signal(reset_less
=True)
223 self
.branch_direction_o
= Signal(2, reset_less
=True)
225 def elaborate(self
, platform
):
228 m
.submodules
.intregs
= self
.intregs
229 m
.submodules
.fpregs
= self
.fpregs
232 m
.d
.sync
+= self
.branch_succ_i
.eq(Const(0))
233 m
.d
.sync
+= self
.branch_fail_i
.eq(Const(0))
234 m
.d
.sync
+= self
.branch_direction_o
.eq(Const(0))
237 int_dest
= self
.intregs
.write_port("dest")
238 int_src1
= self
.intregs
.read_port("src1")
239 int_src2
= self
.intregs
.read_port("src2")
241 fp_dest
= self
.fpregs
.write_port("dest")
242 fp_src1
= self
.fpregs
.read_port("src1")
243 fp_src2
= self
.fpregs
.read_port("src2")
245 # Int ALUs and Comp Units
247 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
248 m
.d
.comb
+= cu
.go_die_i
.eq(0)
249 bgt
= cu
.bgt
# get at the branch computation unit
252 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
254 # Count of number of FUs
255 n_int_fus
= n_int_alus
256 n_fp_fus
= 0 # for now
258 # Integer Priority Picker 1: Adder + Subtractor
259 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
260 m
.submodules
.intpick1
= intpick1
263 regdecode
= RegDecode(self
.n_regs
)
264 m
.submodules
.regdecode
= regdecode
265 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
266 m
.submodules
.issueunit
= issueunit
268 # Shadow Matrix. currently n_int_fus shadows, to be used for
269 # write-after-write hazards. NOTE: there is one extra for branches,
270 # so the shadow width is increased by 1
271 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
+1)
272 # combined go_rd/wr + go_die (go_die used to reset latches)
273 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
274 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
275 # record previous instruction to cast shadow on current instruction
276 fn_issue_prev
= Signal(n_int_fus
)
277 prev_shadow
= Signal(n_int_fus
)
280 # ok start wiring things together...
281 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
282 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
286 # Issue Unit is where it starts. set up some in/outs for this module
288 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
289 regdecode
.dest_i
.eq(self
.int_dest_i
),
290 regdecode
.src1_i
.eq(self
.int_src1_i
),
291 regdecode
.src2_i
.eq(self
.int_src2_i
),
292 regdecode
.enable_i
.eq(self
.reg_enable_i
),
293 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
294 self
.issue_o
.eq(issueunit
.issue_o
)
296 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
298 # connect global rd/wr pending vector (for WaW detection)
299 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
300 # TODO: issueunit.f (FP)
302 # and int function issue / busy arrays, and dest/src1/src2
303 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
304 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
305 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
307 fn_issue_o
= issueunit
.i
.fn_issue_o
309 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
310 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
311 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
314 # connect fu-fu matrix
317 # Group Picker... done manually for now.
318 go_rd_o
= intpick1
.go_rd_o
319 go_wr_o
= intpick1
.go_wr_o
320 go_rd_i
= intfus
.go_rd_i
321 go_wr_i
= intfus
.go_wr_i
322 # NOTE: connect to the shadowed versions so that they can "die" (reset)
323 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_rst
[0:n_int_fus
]) # rd
324 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_rst
[0:n_int_fus
]) # wr
328 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
329 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
330 int_rd_o
= intfus
.readable_o
331 int_wr_o
= intfus
.writable_o
332 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
333 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
339 m
.d
.comb
+= shadows
.issue_i
.eq(fn_issue_o
)
340 # these are explained in ShadowMatrix docstring, and are to be
341 # connected to the FUReg and FUFU Matrices, to get them to reset
342 # NOTE: do NOT connect these to the Computation Units. The CUs need to
343 # do something slightly different (due to the revolving-door SRLatches)
344 m
.d
.comb
+= go_rd_rst
.eq(go_rd_o | shadows
.go_die_o
)
345 m
.d
.comb
+= go_wr_rst
.eq(go_wr_o | shadows
.go_die_o
)
347 # connect shadows / go_dies to Computation Units
348 m
.d
.comb
+= cu
.shadown_i
[0:n_int_fus
].eq(shadows
.shadown_o
[0:n_int_fus
])
349 m
.d
.comb
+= cu
.go_die_i
[0:n_int_fus
].eq(shadows
.go_die_o
[0:n_int_fus
])
351 # ok connect first n_int_fu shadows to busy lines, to create an
352 # instruction-order linked-list-like arrangement, using a bit-matrix
353 # (instead of e.g. a ring buffer).
356 # when written, the shadow can be cancelled (and was good)
357 m
.d
.comb
+= shadows
.s_good_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
359 # work out the current-activated busy unit (by recording the old one)
360 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
361 m
.d
.sync
+= fn_issue_prev
.eq(fn_issue_o
)
363 # *previous* instruction shadows *current* instruction, and, obviously,
364 # if the previous is completed (!busy) don't cast the shadow!
365 m
.d
.comb
+= prev_shadow
.eq(~fn_issue_o
& fn_issue_prev
& cu
.busy_o
)
366 for i
in range(n_int_fus
):
367 m
.d
.comb
+= shadows
.shadow_i
[i
][0:n_int_fus
].eq(prev_shadow
)
370 # Connect Register File(s)
372 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
373 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
374 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
375 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
377 # connect ALUs to regfule
378 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
379 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
380 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
382 # connect ALU Computation Units
383 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
384 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
385 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
391 yield from self
.intregs
392 yield from self
.fpregs
393 yield self
.int_store_i
394 yield self
.int_dest_i
395 yield self
.int_src1_i
396 yield self
.int_src2_i
398 yield self
.branch_succ_i
399 yield self
.branch_fail_i
400 yield self
.branch_direction_o
415 def __init__(self
, rwidth
, nregs
):
417 self
.regs
= [0] * nregs
419 def op(self
, op
, src1
, src2
, dest
):
420 maxbits
= (1 << self
.rwidth
) - 1
421 src1
= self
.regs
[src1
] & maxbits
422 src2
= self
.regs
[src2
] & maxbits
430 val
= src1
>> (src2
& maxbits
)
432 val
= int(src1
> src2
)
434 val
= int(src1
< src2
)
436 val
= int(src1
== src2
)
438 val
= int(src1
!= src2
)
440 self
.regs
[dest
] = val
442 def setval(self
, dest
, val
):
443 self
.regs
[dest
] = val
446 for i
, val
in enumerate(self
.regs
):
447 reg
= yield dut
.intregs
.regs
[i
].reg
448 okstr
= "OK" if reg
== val
else "!ok"
449 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
451 def check(self
, dut
):
452 for i
, val
in enumerate(self
.regs
):
453 reg
= yield dut
.intregs
.regs
[i
].reg
455 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
456 yield from self
.dump(dut
)
459 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
460 for i
in range(len(dut
.int_insn_i
)):
461 yield dut
.int_insn_i
[i
].eq(0)
462 yield dut
.int_dest_i
.eq(dest
)
463 yield dut
.int_src1_i
.eq(src1
)
464 yield dut
.int_src2_i
.eq(src2
)
465 yield dut
.int_insn_i
[op
].eq(1)
466 yield dut
.reg_enable_i
.eq(1)
468 # these indicate that the instruction is to be made shadow-dependent on
469 # (either) branch success or branch fail
470 yield dut
.branch_fail_i
.eq(branch_fail
)
471 yield dut
.branch_succ_i
.eq(branch_success
)
474 def print_reg(dut
, rnums
):
477 reg
= yield dut
.intregs
.regs
[rnum
].reg
478 rs
.append("%x" % reg
)
479 rnums
= map(str, rnums
)
480 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
483 def create_random_ops(n_ops
, shadowing
=False):
485 for i
in range(n_ops
):
486 src1
= randint(1, dut
.n_regs
-1)
487 src2
= randint(1, dut
.n_regs
-1)
488 dest
= randint(1, dut
.n_regs
-1)
492 instrs
.append((src1
, src2
, dest
, op
, (False, False)))
494 instrs
.append((src1
, src2
, dest
, op
))
498 def wait_for_busy_clear(dut
):
500 busy_o
= yield dut
.busy_o
507 def wait_for_issue(dut
):
509 issue_o
= yield dut
.issue_o
511 for i
in range(len(dut
.int_insn_i
)):
512 yield dut
.int_insn_i
[i
].eq(0)
513 yield dut
.reg_enable_i
.eq(0)
516 #yield from print_reg(dut, [1,2,3])
518 #yield from print_reg(dut, [1,2,3])
520 def scoreboard_branch_sim(dut
, alusim
):
522 yield dut
.int_store_i
.eq(1)
526 # set random values in the registers
527 for i
in range(1, dut
.n_regs
):
529 val
= randint(0, (1<<alusim
.rwidth
)-1)
530 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
531 alusim
.setval(i
, val
)
533 # create some instructions: branches create a tree
534 insts
= create_random_ops(5)
536 src1
= randint(1, dut
.n_regs
-1)
537 src2
= randint(1, dut
.n_regs
-1)
540 branch_ok
= create_random_ops(5)
541 branch_fail
= create_random_ops(5)
543 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
545 # issue instruction(s)
551 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop()
552 if branch_direction
== 1 and shadow_off
:
553 continue # branch was "success" and this is a "failed"... skip
554 if branch_direction
== 2 and shadow_on
:
555 continue # branch was "fail" and this is a "success"... skip
558 branch_ok
, branch_fail
= dest
560 # ok zip up the branch success / fail instructions and
561 # drop them into the queue, one marked "to have branch success"
562 # the other to be marked shadow branch "fail".
563 # one out of each of these will be cancelled
564 for ok
, fl
in zip(branch_ok
, branch_fail
):
565 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
566 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
567 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
568 yield from int_instr(dut
, op
, src1
, src2
, dest
,
569 shadow_on
, shadow_off
)
571 yield from wait_for_issue(dut
)
572 branch_direction
= dut
.branch_direction_o
# which way branch went
574 # wait for all instructions to stop before checking
576 yield from wait_for_busy_clear(dut
)
578 for (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) in insts
:
581 branch_ok
, branch_fail
= dest
583 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
586 insts
.append(branch_ok
)
588 insts
.append(branch_fail
)
591 yield from alusim
.check(dut
)
592 yield from alusim
.dump(dut
)
595 def scoreboard_sim(dut
, alusim
):
597 yield dut
.int_store_i
.eq(1)
601 # set random values in the registers
602 for i
in range(1, dut
.n_regs
):
604 val
= randint(0, (1<<alusim
.rwidth
)-1)
605 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
606 alusim
.setval(i
, val
)
608 # create some instructions (some random, some regression tests)
612 src1
= randint(1, dut
.n_regs
-1)
613 src2
= randint(1, dut
.n_regs
-1)
615 dest
= randint(1, dut
.n_regs
-1)
617 if dest
not in [src1
, src2
]:
627 instrs
.append((src1
, src2
, dest
, op
))
630 instrs
.append((2, 3, 3, 0))
631 instrs
.append((5, 3, 3, 1))
634 instrs
.append((5, 6, 2, 1))
635 instrs
.append((2, 2, 4, 0))
636 #instrs.append((2, 2, 3, 1))
639 instrs
.append((2, 1, 2, 3))
642 instrs
.append((2, 6, 2, 1))
643 instrs
.append((2, 1, 2, 0))
646 instrs
.append((1, 2, 7, 2))
647 instrs
.append((7, 1, 5, 0))
648 instrs
.append((4, 4, 1, 1))
651 instrs
.append((5, 6, 2, 2))
652 instrs
.append((1, 1, 4, 1))
653 instrs
.append((6, 5, 3, 0))
656 # Write-after-Write Hazard
657 instrs
.append( (3, 6, 7, 2) )
658 instrs
.append( (4, 4, 7, 1) )
661 # self-read/write-after-write followed by Read-after-Write
662 instrs
.append((1, 1, 1, 1))
663 instrs
.append((1, 5, 3, 0))
666 # Read-after-Write followed by self-read-after-write
667 instrs
.append((5, 6, 1, 2))
668 instrs
.append((1, 1, 1, 1))
671 # self-read-write sandwich
672 instrs
.append((5, 6, 1, 2))
673 instrs
.append((1, 1, 1, 1))
674 instrs
.append((1, 5, 3, 0))
678 instrs
.append( (5, 2, 5, 2) )
679 instrs
.append( (2, 6, 3, 0) )
680 instrs
.append( (4, 2, 2, 1) )
682 # issue instruction(s), wait for issue to be free before proceeding
683 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
685 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
686 alusim
.op(op
, src1
, src2
, dest
)
687 yield from int_instr(dut
, op
, src1
, src2
, dest
, 0, 0)
689 yield from wait_for_issue(dut
)
691 # wait for all instructions to stop before checking
693 yield from wait_for_busy_clear(dut
)
696 yield from alusim
.check(dut
)
697 yield from alusim
.dump(dut
)
700 def test_scoreboard():
701 dut
= Scoreboard(16, 8)
702 alusim
= RegSim(16, 8)
703 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
704 with
open("test_scoreboard6600.il", "w") as f
:
707 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
708 vcd_name
='test_scoreboard6600.vcd')
711 if __name__
== '__main__':