1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
, BranchALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
21 class CompUnits(Elaboratable
):
23 def __init__(self
, rwid
, n_units
):
26 * :rwid: bit width of register file(s) - both FP and INT
27 * :n_units: number of ALUs
29 Note: bgt unit is returned so that a shadow unit can be created
33 self
.n_units
= n_units
37 self
.issue_i
= Signal(n_units
, reset_less
=True)
38 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
39 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
40 self
.shadown_i
= Signal(n_units
, reset_less
=True)
41 self
.go_die_i
= Signal(n_units
, reset_less
=True)
44 self
.busy_o
= Signal(n_units
, reset_less
=True)
45 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
46 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
48 # in/out register data (note: not register#, actual data)
49 self
.dest_o
= Signal(rwid
, reset_less
=True)
50 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
51 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
54 self
.bgt
= BranchALU(self
.rwid
)
55 self
.br1
= ComputationUnitNoDelay(self
.rwid
, 2, self
.bgt
)
57 def elaborate(self
, platform
):
67 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
68 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
69 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
70 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
71 m
.submodules
.br1
= br1
= self
.br1
72 int_alus
= [comp1
, comp2
, comp3
, comp4
, br1
]
74 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
75 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
76 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
77 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
78 m
.d
.comb
+= br1
.oper_i
.eq(Const(0, 2)) # op=bgt
89 req_rel_l
.append(alu
.req_rel_o
)
90 rd_rel_l
.append(alu
.rd_rel_o
)
91 shadow_l
.append(alu
.shadown_i
)
92 godie_l
.append(alu
.go_die_i
)
93 go_wr_l
.append(alu
.go_wr_i
)
94 go_rd_l
.append(alu
.go_rd_i
)
95 issue_l
.append(alu
.issue_i
)
96 busy_l
.append(alu
.busy_o
)
97 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
98 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
99 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
100 m
.d
.comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
101 m
.d
.comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
102 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
103 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
104 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
106 # connect data register input/output
108 # merge (OR) all integer FU / ALU outputs to a single value
109 # bit of a hack: treereduce needs a list with an item named "dest_o"
110 dest_o
= treereduce(int_alus
)
111 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
113 for i
, alu
in enumerate(int_alus
):
114 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
115 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
120 class FunctionUnits(Elaboratable
):
122 def __init__(self
, n_regs
, n_int_alus
):
124 self
.n_int_alus
= n_int_alus
126 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
127 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
128 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
130 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
131 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
133 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
134 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
135 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
137 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
138 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
139 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
141 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
142 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
143 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
144 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
146 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
148 def elaborate(self
, platform
):
151 n_int_fus
= self
.n_int_alus
153 # Integer FU-FU Dep Matrix
154 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
155 m
.submodules
.intfudeps
= intfudeps
156 # Integer FU-Reg Dep Matrix
157 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
158 m
.submodules
.intregdeps
= intregdeps
160 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
161 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
163 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
164 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
166 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
167 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
168 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
170 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
171 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
172 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
173 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
174 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
176 # Connect function issue / arrays, and dest/src1/src2
177 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
178 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
179 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
181 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
182 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
183 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
185 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
186 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
187 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
192 class Scoreboard(Elaboratable
):
193 def __init__(self
, rwid
, n_regs
):
196 * :rwid: bit width of register file(s) - both FP and INT
197 * :n_regs: depth of register file(s) - number of FP and INT regs
203 self
.intregs
= RegFileArray(rwid
, n_regs
)
204 self
.fpregs
= RegFileArray(rwid
, n_regs
)
207 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
208 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
209 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
210 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
211 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
214 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
215 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
217 # for branch speculation experiment. branch_direction = 0 if
218 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
219 # branch_succ and branch_fail are requests to have the current
220 # instruction be dependent on the branch unit "shadow" capability.
221 self
.branch_succ_i
= Signal(reset_less
=True)
222 self
.branch_fail_i
= Signal(reset_less
=True)
223 self
.branch_direction_o
= Signal(2, reset_less
=True)
225 def elaborate(self
, platform
):
228 m
.submodules
.intregs
= self
.intregs
229 m
.submodules
.fpregs
= self
.fpregs
232 m
.d
.sync
+= self
.branch_succ_i
.eq(Const(0))
233 m
.d
.sync
+= self
.branch_fail_i
.eq(Const(0))
234 m
.d
.sync
+= self
.branch_direction_o
.eq(Const(0))
237 int_dest
= self
.intregs
.write_port("dest")
238 int_src1
= self
.intregs
.read_port("src1")
239 int_src2
= self
.intregs
.read_port("src2")
241 fp_dest
= self
.fpregs
.write_port("dest")
242 fp_src1
= self
.fpregs
.read_port("src1")
243 fp_src2
= self
.fpregs
.read_port("src2")
245 # Int ALUs and Comp Units
247 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
248 m
.d
.comb
+= cu
.go_die_i
.eq(0)
249 bgt
= cu
.bgt
# get at the branch computation unit
252 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
254 # Count of number of FUs
255 n_int_fus
= n_int_alus
256 n_fp_fus
= 0 # for now
258 # Integer Priority Picker 1: Adder + Subtractor
259 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
260 m
.submodules
.intpick1
= intpick1
263 regdecode
= RegDecode(self
.n_regs
)
264 m
.submodules
.regdecode
= regdecode
265 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
266 m
.submodules
.issueunit
= issueunit
268 # Shadow Matrix. currently n_int_fus shadows, to be used for
269 # write-after-write hazards. NOTE: there is one extra for branches,
270 # so the shadow width is increased by 1
271 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
+1)
273 # combined go_rd/wr + go_die (go_die used to reset latches)
274 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
275 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
276 # record previous instruction to cast shadow on current instruction
277 fn_issue_prev
= Signal(n_int_fus
)
278 prev_shadow
= Signal(n_int_fus
)
280 # Branch Speculation recorder. tracks the success/fail state as
281 # each instruction is issued, so that when the branch occurs the
282 # allow/cancel can be issued as appropriate.
283 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_int_fus
)
286 # ok start wiring things together...
287 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
288 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
292 # Issue Unit is where it starts. set up some in/outs for this module
294 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
295 regdecode
.dest_i
.eq(self
.int_dest_i
),
296 regdecode
.src1_i
.eq(self
.int_src1_i
),
297 regdecode
.src2_i
.eq(self
.int_src2_i
),
298 regdecode
.enable_i
.eq(self
.reg_enable_i
),
299 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
300 self
.issue_o
.eq(issueunit
.issue_o
)
302 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
304 # connect global rd/wr pending vector (for WaW detection)
305 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
306 # TODO: issueunit.f (FP)
308 # and int function issue / busy arrays, and dest/src1/src2
309 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
310 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
311 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
313 fn_issue_o
= issueunit
.i
.fn_issue_o
315 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
316 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
317 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
320 # connect fu-fu matrix
323 # Group Picker... done manually for now.
324 go_rd_o
= intpick1
.go_rd_o
325 go_wr_o
= intpick1
.go_wr_o
326 go_rd_i
= intfus
.go_rd_i
327 go_wr_i
= intfus
.go_wr_i
328 # NOTE: connect to the shadowed versions so that they can "die" (reset)
329 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_rst
[0:n_int_fus
]) # rd
330 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_rst
[0:n_int_fus
]) # wr
334 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
335 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
336 int_rd_o
= intfus
.readable_o
337 int_wr_o
= intfus
.writable_o
338 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
339 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
345 m
.d
.comb
+= shadows
.issue_i
.eq(fn_issue_o
)
346 # these are explained in ShadowMatrix docstring, and are to be
347 # connected to the FUReg and FUFU Matrices, to get them to reset
348 # NOTE: do NOT connect these to the Computation Units. The CUs need to
349 # do something slightly different (due to the revolving-door SRLatches)
350 m
.d
.comb
+= go_rd_rst
.eq(go_rd_o | shadows
.go_die_o
)
351 m
.d
.comb
+= go_wr_rst
.eq(go_wr_o | shadows
.go_die_o
)
354 # NOTE; this setup is for the instruction order preservation...
356 # connect shadows / go_dies to Computation Units
357 m
.d
.comb
+= cu
.shadown_i
[0:n_int_fus
].eq(shadows
.shadown_o
[0:n_int_fus
])
358 m
.d
.comb
+= cu
.go_die_i
[0:n_int_fus
].eq(shadows
.go_die_o
[0:n_int_fus
])
360 # ok connect first n_int_fu shadows to busy lines, to create an
361 # instruction-order linked-list-like arrangement, using a bit-matrix
362 # (instead of e.g. a ring buffer).
365 # when written, the shadow can be cancelled (and was good)
366 m
.d
.comb
+= shadows
.s_good_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
368 # work out the current-activated busy unit (by recording the old one)
369 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
370 m
.d
.sync
+= fn_issue_prev
.eq(fn_issue_o
)
372 # *previous* instruction shadows *current* instruction, and, obviously,
373 # if the previous is completed (!busy) don't cast the shadow!
374 m
.d
.comb
+= prev_shadow
.eq(~fn_issue_o
& fn_issue_prev
& cu
.busy_o
)
375 for i
in range(n_int_fus
):
376 m
.d
.comb
+= shadows
.shadow_i
[i
][0:n_int_fus
].eq(prev_shadow
)
379 # ... and this is for branch speculation. it uses the extra bit
380 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
381 # only needs to set shadow_i, s_fail_i and s_good_i
383 m
.d
.comb
+= shadows
.s_good_i
[n_int_fus
].eq(bspec
.good_o
[i
])
384 m
.d
.comb
+= shadows
.s_fail_i
[n_int_fus
].eq(bspec
.fail_o
[i
])
386 with m
.If(self
.branch_succ_i | self
.branch_fail_i
):
387 for i
in range(n_int_fus
):
388 m
.d
.comb
+= shadows
.shadow_i
[i
][n_int_fus
].eq(1)
390 # finally, we need an indicator to the test infrastructure as to
391 # whether the branch succeeded or failed, plus, link up to the
392 # "recorder" of whether the instruction was under shadow or not
394 m
.d
.comb
+= bspec
.issue_i
.eq(fn_issue_o
)
395 m
.d
.comb
+= bspec
.good_i
.eq(self
.branch_succ_i
)
396 m
.d
.comb
+= bspec
.fail_i
.eq(self
.branch_fail_i
)
397 # branch is active (TODO: a better signal: this is over-using the
398 # go_write signal - actually the branch should not be "writing")
399 with m
.If(cu
.br1
.go_wr_i
):
400 m
.d
.sync
+= self
.branch_direction_o
.eq(cu
.br1
.data_o
+Const(1, 2))
401 m
.d
.comb
+= bspec
.branch_i
.eq(1)
404 # Connect Register File(s)
406 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
407 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
408 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
409 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
411 # connect ALUs to regfule
412 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
413 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
414 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
416 # connect ALU Computation Units
417 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
418 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
419 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
425 yield from self
.intregs
426 yield from self
.fpregs
427 yield self
.int_store_i
428 yield self
.int_dest_i
429 yield self
.int_src1_i
430 yield self
.int_src2_i
432 yield self
.branch_succ_i
433 yield self
.branch_fail_i
434 yield self
.branch_direction_o
449 def __init__(self
, rwidth
, nregs
):
451 self
.regs
= [0] * nregs
453 def op(self
, op
, src1
, src2
, dest
):
454 maxbits
= (1 << self
.rwidth
) - 1
455 src1
= self
.regs
[src1
] & maxbits
456 src2
= self
.regs
[src2
] & maxbits
464 val
= src1
>> (src2
& maxbits
)
466 val
= int(src1
> src2
)
468 val
= int(src1
< src2
)
470 val
= int(src1
== src2
)
472 val
= int(src1
!= src2
)
474 self
.regs
[dest
] = val
476 def setval(self
, dest
, val
):
477 self
.regs
[dest
] = val
480 for i
, val
in enumerate(self
.regs
):
481 reg
= yield dut
.intregs
.regs
[i
].reg
482 okstr
= "OK" if reg
== val
else "!ok"
483 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
485 def check(self
, dut
):
486 for i
, val
in enumerate(self
.regs
):
487 reg
= yield dut
.intregs
.regs
[i
].reg
489 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
490 yield from self
.dump(dut
)
493 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
494 for i
in range(len(dut
.int_insn_i
)):
495 yield dut
.int_insn_i
[i
].eq(0)
496 yield dut
.int_dest_i
.eq(dest
)
497 yield dut
.int_src1_i
.eq(src1
)
498 yield dut
.int_src2_i
.eq(src2
)
499 yield dut
.int_insn_i
[op
].eq(1)
500 yield dut
.reg_enable_i
.eq(1)
502 # these indicate that the instruction is to be made shadow-dependent on
503 # (either) branch success or branch fail
504 yield dut
.branch_fail_i
.eq(branch_fail
)
505 yield dut
.branch_succ_i
.eq(branch_success
)
508 def print_reg(dut
, rnums
):
511 reg
= yield dut
.intregs
.regs
[rnum
].reg
512 rs
.append("%x" % reg
)
513 rnums
= map(str, rnums
)
514 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
517 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
519 for i
in range(n_ops
):
520 src1
= randint(1, dut
.n_regs
-1)
521 src2
= randint(1, dut
.n_regs
-1)
522 dest
= randint(1, dut
.n_regs
-1)
523 op
= randint(0, max_opnums
)
526 insts
.append((src1
, src2
, dest
, op
, (False, False)))
528 insts
.append((src1
, src2
, dest
, op
))
532 def wait_for_busy_clear(dut
):
534 busy_o
= yield dut
.busy_o
541 def wait_for_issue(dut
):
543 issue_o
= yield dut
.issue_o
545 for i
in range(len(dut
.int_insn_i
)):
546 yield dut
.int_insn_i
[i
].eq(0)
547 yield dut
.reg_enable_i
.eq(0)
550 #yield from print_reg(dut, [1,2,3])
552 #yield from print_reg(dut, [1,2,3])
554 def scoreboard_branch_sim(dut
, alusim
):
556 yield dut
.int_store_i
.eq(1)
560 # set random values in the registers
561 for i
in range(1, dut
.n_regs
):
563 val
= randint(0, (1<<alusim
.rwidth
)-1)
564 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
565 alusim
.setval(i
, val
)
567 # create some instructions: branches create a tree
568 insts
= create_random_ops(dut
, 5)
570 src1
= randint(1, dut
.n_regs
-1)
571 src2
= randint(1, dut
.n_regs
-1)
574 branch_ok
= create_random_ops(dut
, 5)
575 branch_fail
= create_random_ops(dut
, 5)
577 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
579 # issue instruction(s)
585 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop()
586 if branch_direction
== 1 and shadow_off
:
587 continue # branch was "success" and this is a "failed"... skip
588 if branch_direction
== 2 and shadow_on
:
589 continue # branch was "fail" and this is a "success"... skip
592 branch_ok
, branch_fail
= dest
594 # ok zip up the branch success / fail instructions and
595 # drop them into the queue, one marked "to have branch success"
596 # the other to be marked shadow branch "fail".
597 # one out of each of these will be cancelled
598 for ok
, fl
in zip(branch_ok
, branch_fail
):
599 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
600 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
601 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
602 yield from int_instr(dut
, op
, src1
, src2
, dest
,
603 shadow_on
, shadow_off
)
605 yield from wait_for_issue(dut
)
606 branch_direction
= dut
.branch_direction_o
# which way branch went
608 # wait for all instructions to stop before checking
610 yield from wait_for_busy_clear(dut
)
612 for (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) in insts
:
615 branch_ok
, branch_fail
= dest
617 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
620 insts
.append(branch_ok
)
622 insts
.append(branch_fail
)
625 yield from alusim
.check(dut
)
626 yield from alusim
.dump(dut
)
629 def scoreboard_sim(dut
, alusim
):
631 yield dut
.int_store_i
.eq(1)
635 # set random values in the registers
636 for i
in range(1, dut
.n_regs
):
638 val
= randint(0, (1<<alusim
.rwidth
)-1)
639 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
640 alusim
.setval(i
, val
)
642 # create some instructions (some random, some regression tests)
645 instrs
= create_random_ops(dut
, 10, False, 4)
648 instrs
.append((2, 3, 3, 0))
649 instrs
.append((5, 3, 3, 1))
652 instrs
.append((5, 6, 2, 1))
653 instrs
.append((2, 2, 4, 0))
654 #instrs.append((2, 2, 3, 1))
657 instrs
.append((2, 1, 2, 3))
660 instrs
.append((2, 6, 2, 1))
661 instrs
.append((2, 1, 2, 0))
664 instrs
.append((1, 2, 7, 2))
665 instrs
.append((7, 1, 5, 0))
666 instrs
.append((4, 4, 1, 1))
669 instrs
.append((5, 6, 2, 2))
670 instrs
.append((1, 1, 4, 1))
671 instrs
.append((6, 5, 3, 0))
674 # Write-after-Write Hazard
675 instrs
.append( (3, 6, 7, 2) )
676 instrs
.append( (4, 4, 7, 1) )
679 # self-read/write-after-write followed by Read-after-Write
680 instrs
.append((1, 1, 1, 1))
681 instrs
.append((1, 5, 3, 0))
684 # Read-after-Write followed by self-read-after-write
685 instrs
.append((5, 6, 1, 2))
686 instrs
.append((1, 1, 1, 1))
689 # self-read-write sandwich
690 instrs
.append((5, 6, 1, 2))
691 instrs
.append((1, 1, 1, 1))
692 instrs
.append((1, 5, 3, 0))
696 instrs
.append( (5, 2, 5, 2) )
697 instrs
.append( (2, 6, 3, 0) )
698 instrs
.append( (4, 2, 2, 1) )
700 # issue instruction(s), wait for issue to be free before proceeding
701 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
703 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
704 alusim
.op(op
, src1
, src2
, dest
)
705 yield from int_instr(dut
, op
, src1
, src2
, dest
, 0, 0)
707 yield from wait_for_issue(dut
)
709 # wait for all instructions to stop before checking
711 yield from wait_for_busy_clear(dut
)
714 yield from alusim
.check(dut
)
715 yield from alusim
.dump(dut
)
718 def test_scoreboard():
719 dut
= Scoreboard(16, 8)
720 alusim
= RegSim(16, 8)
721 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
722 with
open("test_scoreboard6600.il", "w") as f
:
725 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
726 vcd_name
='test_scoreboard6600.vcd')
729 if __name__
== '__main__':