1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
12 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
, BranchALU
17 from nmutil
.latch
import SRLatch
19 from random
import randint
22 class CompUnits(Elaboratable
):
24 def __init__(self
, rwid
, n_units
):
27 * :rwid: bit width of register file(s) - both FP and INT
28 * :n_units: number of ALUs
30 Note: bgt unit is returned so that a shadow unit can be created
34 self
.n_units
= n_units
38 self
.issue_i
= Signal(n_units
, reset_less
=True)
39 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
40 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
41 self
.shadown_i
= Signal(n_units
, reset_less
=True)
42 self
.go_die_i
= Signal(n_units
, reset_less
=True)
45 self
.busy_o
= Signal(n_units
, reset_less
=True)
46 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
47 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
49 # in/out register data (note: not register#, actual data)
50 self
.dest_o
= Signal(rwid
, reset_less
=True)
51 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
52 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
55 self
.bgt
= BranchALU(self
.rwid
)
56 self
.br1
= ComputationUnitNoDelay(self
.rwid
, 2, self
.bgt
)
58 def elaborate(self
, platform
):
68 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
69 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
70 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
71 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
72 m
.submodules
.br1
= br1
= self
.br1
73 int_alus
= [comp1
, comp2
, comp3
, comp4
, br1
]
75 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
76 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
77 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
78 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
79 m
.d
.comb
+= br1
.oper_i
.eq(Const(0, 2)) # op=bgt
90 req_rel_l
.append(alu
.req_rel_o
)
91 rd_rel_l
.append(alu
.rd_rel_o
)
92 shadow_l
.append(alu
.shadown_i
)
93 godie_l
.append(alu
.go_die_i
)
94 go_wr_l
.append(alu
.go_wr_i
)
95 go_rd_l
.append(alu
.go_rd_i
)
96 issue_l
.append(alu
.issue_i
)
97 busy_l
.append(alu
.busy_o
)
98 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
99 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
100 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
101 m
.d
.comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
102 m
.d
.comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
103 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
104 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
105 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
107 # connect data register input/output
109 # merge (OR) all integer FU / ALU outputs to a single value
110 # bit of a hack: treereduce needs a list with an item named "dest_o"
111 dest_o
= treereduce(int_alus
)
112 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
114 for i
, alu
in enumerate(int_alus
):
115 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
116 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
121 class FunctionUnits(Elaboratable
):
123 def __init__(self
, n_regs
, n_int_alus
):
125 self
.n_int_alus
= n_int_alus
127 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
128 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
129 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
131 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
132 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
134 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
135 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
136 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
138 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
139 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
140 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
142 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
143 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
144 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
145 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
147 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
149 def elaborate(self
, platform
):
152 n_int_fus
= self
.n_int_alus
154 # Integer FU-FU Dep Matrix
155 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
156 m
.submodules
.intfudeps
= intfudeps
157 # Integer FU-Reg Dep Matrix
158 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
159 m
.submodules
.intregdeps
= intregdeps
161 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
162 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
164 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
165 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
167 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
168 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
169 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
171 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
172 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
173 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
174 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
175 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
177 # Connect function issue / arrays, and dest/src1/src2
178 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
179 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
180 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
182 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
183 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
184 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
186 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
187 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
188 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
193 class Scoreboard(Elaboratable
):
194 def __init__(self
, rwid
, n_regs
):
197 * :rwid: bit width of register file(s) - both FP and INT
198 * :n_regs: depth of register file(s) - number of FP and INT regs
204 self
.intregs
= RegFileArray(rwid
, n_regs
)
205 self
.fpregs
= RegFileArray(rwid
, n_regs
)
208 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
209 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
210 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
211 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
212 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
215 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
216 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
218 # for branch speculation experiment. branch_direction = 0 if
219 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
220 # branch_succ and branch_fail are requests to have the current
221 # instruction be dependent on the branch unit "shadow" capability.
222 self
.branch_succ_i
= Signal(reset_less
=True)
223 self
.branch_fail_i
= Signal(reset_less
=True)
224 self
.branch_direction_o
= Signal(2, reset_less
=True)
226 def elaborate(self
, platform
):
229 m
.submodules
.intregs
= self
.intregs
230 m
.submodules
.fpregs
= self
.fpregs
233 m
.d
.sync
+= self
.branch_succ_i
.eq(Const(0))
234 m
.d
.sync
+= self
.branch_fail_i
.eq(Const(0))
235 m
.d
.sync
+= self
.branch_direction_o
.eq(Const(0))
238 int_dest
= self
.intregs
.write_port("dest")
239 int_src1
= self
.intregs
.read_port("src1")
240 int_src2
= self
.intregs
.read_port("src2")
242 fp_dest
= self
.fpregs
.write_port("dest")
243 fp_src1
= self
.fpregs
.read_port("src1")
244 fp_src2
= self
.fpregs
.read_port("src2")
246 # Int ALUs and Comp Units
248 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
249 m
.d
.comb
+= cu
.go_die_i
.eq(0)
250 bgt
= cu
.bgt
# get at the branch computation unit
253 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
255 # Count of number of FUs
256 n_int_fus
= n_int_alus
257 n_fp_fus
= 0 # for now
259 # Integer Priority Picker 1: Adder + Subtractor
260 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
261 m
.submodules
.intpick1
= intpick1
264 regdecode
= RegDecode(self
.n_regs
)
265 m
.submodules
.regdecode
= regdecode
266 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
267 m
.submodules
.issueunit
= issueunit
269 # Shadow Matrix. currently n_int_fus shadows, to be used for
270 # write-after-write hazards. NOTE: there is one extra for branches,
271 # so the shadow width is increased by 1
272 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
+1)
274 # combined go_rd/wr + go_die (go_die used to reset latches)
275 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
276 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
277 # record previous instruction to cast shadow on current instruction
278 fn_issue_prev
= Signal(n_int_fus
)
279 prev_shadow
= Signal(n_int_fus
)
281 # Branch Speculation recorder. tracks the success/fail state as
282 # each instruction is issued, so that when the branch occurs the
283 # allow/cancel can be issued as appropriate.
284 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_int_fus
)
287 # ok start wiring things together...
288 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
289 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
293 # Issue Unit is where it starts. set up some in/outs for this module
295 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
296 regdecode
.dest_i
.eq(self
.int_dest_i
),
297 regdecode
.src1_i
.eq(self
.int_src1_i
),
298 regdecode
.src2_i
.eq(self
.int_src2_i
),
299 regdecode
.enable_i
.eq(self
.reg_enable_i
),
300 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
301 self
.issue_o
.eq(issueunit
.issue_o
)
303 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
305 # connect global rd/wr pending vector (for WaW detection)
306 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
307 # TODO: issueunit.f (FP)
309 # and int function issue / busy arrays, and dest/src1/src2
310 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
311 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
312 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
314 fn_issue_o
= issueunit
.i
.fn_issue_o
316 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
317 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
318 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
321 # connect fu-fu matrix
324 # Group Picker... done manually for now.
325 go_rd_o
= intpick1
.go_rd_o
326 go_wr_o
= intpick1
.go_wr_o
327 go_rd_i
= intfus
.go_rd_i
328 go_wr_i
= intfus
.go_wr_i
329 # NOTE: connect to the shadowed versions so that they can "die" (reset)
330 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_rst
[0:n_int_fus
]) # rd
331 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_rst
[0:n_int_fus
]) # wr
335 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
336 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
337 int_rd_o
= intfus
.readable_o
338 int_wr_o
= intfus
.writable_o
339 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
340 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
346 m
.d
.comb
+= shadows
.issue_i
.eq(fn_issue_o
)
347 # these are explained in ShadowMatrix docstring, and are to be
348 # connected to the FUReg and FUFU Matrices, to get them to reset
349 # NOTE: do NOT connect these to the Computation Units. The CUs need to
350 # do something slightly different (due to the revolving-door SRLatches)
351 m
.d
.comb
+= go_rd_rst
.eq(go_rd_o | shadows
.go_die_o
)
352 m
.d
.comb
+= go_wr_rst
.eq(go_wr_o | shadows
.go_die_o
)
355 # NOTE; this setup is for the instruction order preservation...
357 # connect shadows / go_dies to Computation Units
358 m
.d
.comb
+= cu
.shadown_i
[0:n_int_fus
].eq(shadows
.shadown_o
[0:n_int_fus
])
359 m
.d
.comb
+= cu
.go_die_i
[0:n_int_fus
].eq(shadows
.go_die_o
[0:n_int_fus
])
361 # ok connect first n_int_fu shadows to busy lines, to create an
362 # instruction-order linked-list-like arrangement, using a bit-matrix
363 # (instead of e.g. a ring buffer).
366 # when written, the shadow can be cancelled (and was good)
367 m
.d
.comb
+= shadows
.s_good_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
369 # work out the current-activated busy unit (by recording the old one)
370 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
371 m
.d
.sync
+= fn_issue_prev
.eq(fn_issue_o
)
373 # *previous* instruction shadows *current* instruction, and, obviously,
374 # if the previous is completed (!busy) don't cast the shadow!
375 m
.d
.comb
+= prev_shadow
.eq(~fn_issue_o
& fn_issue_prev
& cu
.busy_o
)
376 for i
in range(n_int_fus
):
377 m
.d
.comb
+= shadows
.shadow_i
[i
][0:n_int_fus
].eq(prev_shadow
)
380 # ... and this is for branch speculation. it uses the extra bit
381 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
382 # only needs to set shadow_i, s_fail_i and s_good_i
384 m
.d
.comb
+= shadows
.s_good_i
[n_int_fus
].eq(bspec
.good_o
[i
])
385 m
.d
.comb
+= shadows
.s_fail_i
[n_int_fus
].eq(bspec
.fail_o
[i
])
387 with m
.If(self
.branch_succ_i | self
.branch_fail_i
):
388 for i
in range(n_int_fus
):
389 m
.d
.comb
+= shadows
.shadow_i
[i
][n_int_fus
].eq(1)
391 # finally, we need an indicator to the test infrastructure as to
392 # whether the branch succeeded or failed, plus, link up to the
393 # "recorder" of whether the instruction was under shadow or not
395 m
.d
.comb
+= bspec
.issue_i
.eq(fn_issue_o
)
396 m
.d
.comb
+= bspec
.good_i
.eq(self
.branch_succ_i
)
397 m
.d
.comb
+= bspec
.fail_i
.eq(self
.branch_fail_i
)
398 # branch is active (TODO: a better signal: this is over-using the
399 # go_write signal - actually the branch should not be "writing")
400 with m
.If(cu
.br1
.go_wr_i
):
401 m
.d
.sync
+= self
.branch_direction_o
.eq(cu
.br1
.data_o
+Const(1, 2))
402 m
.d
.comb
+= bspec
.branch_i
.eq(1)
405 # Connect Register File(s)
407 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
408 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
409 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
410 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
412 # connect ALUs to regfule
413 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
414 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
415 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
417 # connect ALU Computation Units
418 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
419 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
420 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
426 yield from self
.intregs
427 yield from self
.fpregs
428 yield self
.int_store_i
429 yield self
.int_dest_i
430 yield self
.int_src1_i
431 yield self
.int_src2_i
433 yield self
.branch_succ_i
434 yield self
.branch_fail_i
435 yield self
.branch_direction_o
450 def __init__(self
, rwidth
, nregs
):
452 self
.regs
= [0] * nregs
454 def op(self
, op
, src1
, src2
, dest
):
455 maxbits
= (1 << self
.rwidth
) - 1
456 src1
= self
.regs
[src1
] & maxbits
457 src2
= self
.regs
[src2
] & maxbits
465 val
= src1
>> (src2
& maxbits
)
467 val
= int(src1
> src2
)
469 val
= int(src1
< src2
)
471 val
= int(src1
== src2
)
473 val
= int(src1
!= src2
)
475 self
.regs
[dest
] = val
477 def setval(self
, dest
, val
):
478 self
.regs
[dest
] = val
481 for i
, val
in enumerate(self
.regs
):
482 reg
= yield dut
.intregs
.regs
[i
].reg
483 okstr
= "OK" if reg
== val
else "!ok"
484 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
486 def check(self
, dut
):
487 for i
, val
in enumerate(self
.regs
):
488 reg
= yield dut
.intregs
.regs
[i
].reg
490 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
491 yield from self
.dump(dut
)
494 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
495 for i
in range(len(dut
.int_insn_i
)):
496 yield dut
.int_insn_i
[i
].eq(0)
497 yield dut
.int_dest_i
.eq(dest
)
498 yield dut
.int_src1_i
.eq(src1
)
499 yield dut
.int_src2_i
.eq(src2
)
500 yield dut
.int_insn_i
[op
].eq(1)
501 yield dut
.reg_enable_i
.eq(1)
503 # these indicate that the instruction is to be made shadow-dependent on
504 # (either) branch success or branch fail
505 yield dut
.branch_fail_i
.eq(branch_fail
)
506 yield dut
.branch_succ_i
.eq(branch_success
)
509 def print_reg(dut
, rnums
):
512 reg
= yield dut
.intregs
.regs
[rnum
].reg
513 rs
.append("%x" % reg
)
514 rnums
= map(str, rnums
)
515 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
518 def create_random_ops(n_ops
, shadowing
=False):
520 for i
in range(n_ops
):
521 src1
= randint(1, dut
.n_regs
-1)
522 src2
= randint(1, dut
.n_regs
-1)
523 dest
= randint(1, dut
.n_regs
-1)
527 instrs
.append((src1
, src2
, dest
, op
, (False, False)))
529 instrs
.append((src1
, src2
, dest
, op
))
533 def wait_for_busy_clear(dut
):
535 busy_o
= yield dut
.busy_o
542 def wait_for_issue(dut
):
544 issue_o
= yield dut
.issue_o
546 for i
in range(len(dut
.int_insn_i
)):
547 yield dut
.int_insn_i
[i
].eq(0)
548 yield dut
.reg_enable_i
.eq(0)
551 #yield from print_reg(dut, [1,2,3])
553 #yield from print_reg(dut, [1,2,3])
555 def scoreboard_branch_sim(dut
, alusim
):
557 yield dut
.int_store_i
.eq(1)
561 # set random values in the registers
562 for i
in range(1, dut
.n_regs
):
564 val
= randint(0, (1<<alusim
.rwidth
)-1)
565 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
566 alusim
.setval(i
, val
)
568 # create some instructions: branches create a tree
569 insts
= create_random_ops(5)
571 src1
= randint(1, dut
.n_regs
-1)
572 src2
= randint(1, dut
.n_regs
-1)
575 branch_ok
= create_random_ops(5)
576 branch_fail
= create_random_ops(5)
578 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
580 # issue instruction(s)
586 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop()
587 if branch_direction
== 1 and shadow_off
:
588 continue # branch was "success" and this is a "failed"... skip
589 if branch_direction
== 2 and shadow_on
:
590 continue # branch was "fail" and this is a "success"... skip
593 branch_ok
, branch_fail
= dest
595 # ok zip up the branch success / fail instructions and
596 # drop them into the queue, one marked "to have branch success"
597 # the other to be marked shadow branch "fail".
598 # one out of each of these will be cancelled
599 for ok
, fl
in zip(branch_ok
, branch_fail
):
600 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
601 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
602 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
603 yield from int_instr(dut
, op
, src1
, src2
, dest
,
604 shadow_on
, shadow_off
)
606 yield from wait_for_issue(dut
)
607 branch_direction
= dut
.branch_direction_o
# which way branch went
609 # wait for all instructions to stop before checking
611 yield from wait_for_busy_clear(dut
)
613 for (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) in insts
:
616 branch_ok
, branch_fail
= dest
618 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
621 insts
.append(branch_ok
)
623 insts
.append(branch_fail
)
626 yield from alusim
.check(dut
)
627 yield from alusim
.dump(dut
)
630 def scoreboard_sim(dut
, alusim
):
632 yield dut
.int_store_i
.eq(1)
636 # set random values in the registers
637 for i
in range(1, dut
.n_regs
):
639 val
= randint(0, (1<<alusim
.rwidth
)-1)
640 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
641 alusim
.setval(i
, val
)
643 # create some instructions (some random, some regression tests)
647 src1
= randint(1, dut
.n_regs
-1)
648 src2
= randint(1, dut
.n_regs
-1)
650 dest
= randint(1, dut
.n_regs
-1)
652 if dest
not in [src1
, src2
]:
662 instrs
.append((src1
, src2
, dest
, op
))
665 instrs
.append((2, 3, 3, 0))
666 instrs
.append((5, 3, 3, 1))
669 instrs
.append((5, 6, 2, 1))
670 instrs
.append((2, 2, 4, 0))
671 #instrs.append((2, 2, 3, 1))
674 instrs
.append((2, 1, 2, 3))
677 instrs
.append((2, 6, 2, 1))
678 instrs
.append((2, 1, 2, 0))
681 instrs
.append((1, 2, 7, 2))
682 instrs
.append((7, 1, 5, 0))
683 instrs
.append((4, 4, 1, 1))
686 instrs
.append((5, 6, 2, 2))
687 instrs
.append((1, 1, 4, 1))
688 instrs
.append((6, 5, 3, 0))
691 # Write-after-Write Hazard
692 instrs
.append( (3, 6, 7, 2) )
693 instrs
.append( (4, 4, 7, 1) )
696 # self-read/write-after-write followed by Read-after-Write
697 instrs
.append((1, 1, 1, 1))
698 instrs
.append((1, 5, 3, 0))
701 # Read-after-Write followed by self-read-after-write
702 instrs
.append((5, 6, 1, 2))
703 instrs
.append((1, 1, 1, 1))
706 # self-read-write sandwich
707 instrs
.append((5, 6, 1, 2))
708 instrs
.append((1, 1, 1, 1))
709 instrs
.append((1, 5, 3, 0))
713 instrs
.append( (5, 2, 5, 2) )
714 instrs
.append( (2, 6, 3, 0) )
715 instrs
.append( (4, 2, 2, 1) )
717 # issue instruction(s), wait for issue to be free before proceeding
718 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
720 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
721 alusim
.op(op
, src1
, src2
, dest
)
722 yield from int_instr(dut
, op
, src1
, src2
, dest
, 0, 0)
724 yield from wait_for_issue(dut
)
726 # wait for all instructions to stop before checking
728 yield from wait_for_busy_clear(dut
)
731 yield from alusim
.check(dut
)
732 yield from alusim
.dump(dut
)
735 def test_scoreboard():
736 dut
= Scoreboard(16, 8)
737 alusim
= RegSim(16, 8)
738 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
739 with
open("test_scoreboard6600.il", "w") as f
:
742 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
743 vcd_name
='test_scoreboard6600.vcd')
746 if __name__
== '__main__':