1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
, BranchALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
, seed
19 from copy
import deepcopy
22 class CompUnits(Elaboratable
):
24 def __init__(self
, rwid
, n_units
):
27 * :rwid: bit width of register file(s) - both FP and INT
28 * :n_units: number of ALUs
30 Note: bgt unit is returned so that a shadow unit can be created
34 self
.n_units
= n_units
38 self
.issue_i
= Signal(n_units
, reset_less
=True)
39 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
40 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
41 self
.shadown_i
= Signal(n_units
, reset_less
=True)
42 self
.go_die_i
= Signal(n_units
, reset_less
=True)
45 self
.busy_o
= Signal(n_units
, reset_less
=True)
46 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
47 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
49 # in/out register data (note: not register#, actual data)
50 self
.dest_o
= Signal(rwid
, reset_less
=True)
51 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
52 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
55 self
.bgt
= BranchALU(self
.rwid
)
56 self
.br1
= ComputationUnitNoDelay(self
.rwid
, 3, self
.bgt
)
58 def elaborate(self
, platform
):
70 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
71 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
72 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
73 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
74 m
.submodules
.br1
= br1
= self
.br1
75 int_alus
= [comp1
, comp2
, comp3
, comp4
, br1
]
77 comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
78 comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
79 comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
80 comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
81 comb
+= br1
.oper_i
.eq(Const(4, 3)) # op=bgt
92 req_rel_l
.append(alu
.req_rel_o
)
93 rd_rel_l
.append(alu
.rd_rel_o
)
94 shadow_l
.append(alu
.shadown_i
)
95 godie_l
.append(alu
.go_die_i
)
96 go_wr_l
.append(alu
.go_wr_i
)
97 go_rd_l
.append(alu
.go_rd_i
)
98 issue_l
.append(alu
.issue_i
)
99 busy_l
.append(alu
.busy_o
)
100 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
101 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
102 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
103 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
104 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
105 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
106 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
107 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
109 # connect data register input/output
111 # merge (OR) all integer FU / ALU outputs to a single value
112 # bit of a hack: treereduce needs a list with an item named "dest_o"
113 dest_o
= treereduce(int_alus
)
114 comb
+= self
.dest_o
.eq(dest_o
)
116 for i
, alu
in enumerate(int_alus
):
117 comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
118 comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
123 class FunctionUnits(Elaboratable
):
125 def __init__(self
, n_regs
, n_int_alus
):
127 self
.n_int_alus
= n_int_alus
129 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
130 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
131 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
133 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
134 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
136 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
137 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
138 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
140 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
141 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
142 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
144 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
145 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
146 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
147 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
148 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
150 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
152 def elaborate(self
, platform
):
157 n_int_fus
= self
.n_int_alus
159 # Integer FU-FU Dep Matrix
160 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
161 m
.submodules
.intfudeps
= intfudeps
162 # Integer FU-Reg Dep Matrix
163 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
164 m
.submodules
.intregdeps
= intregdeps
166 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
167 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
169 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
170 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
172 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
173 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
174 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
176 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
177 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
178 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
179 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
180 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
181 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
183 # Connect function issue / arrays, and dest/src1/src2
184 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
185 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
186 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
188 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
189 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
190 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
191 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
193 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
194 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
195 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
200 class Scoreboard(Elaboratable
):
201 def __init__(self
, rwid
, n_regs
):
204 * :rwid: bit width of register file(s) - both FP and INT
205 * :n_regs: depth of register file(s) - number of FP and INT regs
211 self
.intregs
= RegFileArray(rwid
, n_regs
)
212 self
.fpregs
= RegFileArray(rwid
, n_regs
)
215 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
216 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
217 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
218 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
219 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
222 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
223 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
225 # for branch speculation experiment. branch_direction = 0 if
226 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
227 # branch_succ and branch_fail are requests to have the current
228 # instruction be dependent on the branch unit "shadow" capability.
229 self
.branch_succ_i
= Signal(reset_less
=True)
230 self
.branch_fail_i
= Signal(reset_less
=True)
231 self
.branch_direction_o
= Signal(2, reset_less
=True)
233 def elaborate(self
, platform
):
238 m
.submodules
.intregs
= self
.intregs
239 m
.submodules
.fpregs
= self
.fpregs
242 int_dest
= self
.intregs
.write_port("dest")
243 int_src1
= self
.intregs
.read_port("src1")
244 int_src2
= self
.intregs
.read_port("src2")
246 fp_dest
= self
.fpregs
.write_port("dest")
247 fp_src1
= self
.fpregs
.read_port("src1")
248 fp_src2
= self
.fpregs
.read_port("src2")
250 # Int ALUs and Comp Units
252 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
253 comb
+= cu
.go_die_i
.eq(0)
254 bgt
= cu
.bgt
# get at the branch computation unit
257 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
259 # Count of number of FUs
260 n_int_fus
= n_int_alus
261 n_fp_fus
= 0 # for now
263 # Integer Priority Picker 1: Adder + Subtractor
264 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
265 m
.submodules
.intpick1
= intpick1
268 regdecode
= RegDecode(self
.n_regs
)
269 m
.submodules
.regdecode
= regdecode
270 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
271 m
.submodules
.issueunit
= issueunit
273 # Shadow Matrix. currently n_int_fus shadows, to be used for
274 # write-after-write hazards. NOTE: there is one extra for branches,
275 # so the shadow width is increased by 1
276 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
)
277 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_int_fus
, 1)
279 # record previous instruction to cast shadow on current instruction
280 fn_issue_prev
= Signal(n_int_fus
)
281 prev_shadow
= Signal(n_int_fus
)
283 # Branch Speculation recorder. tracks the success/fail state as
284 # each instruction is issued, so that when the branch occurs the
285 # allow/cancel can be issued as appropriate.
286 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_int_fus
)
289 # ok start wiring things together...
290 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
291 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
295 # Issue Unit is where it starts. set up some in/outs for this module
297 comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
298 regdecode
.dest_i
.eq(self
.int_dest_i
),
299 regdecode
.src1_i
.eq(self
.int_src1_i
),
300 regdecode
.src2_i
.eq(self
.int_src2_i
),
301 regdecode
.enable_i
.eq(self
.reg_enable_i
),
302 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
303 self
.issue_o
.eq(issueunit
.issue_o
)
305 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
307 # connect global rd/wr pending vector (for WaW detection)
308 sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
309 # TODO: issueunit.f (FP)
311 # and int function issue / busy arrays, and dest/src1/src2
312 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
313 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
314 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
316 fn_issue_o
= issueunit
.i
.fn_issue_o
318 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
319 comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
320 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
323 # merge shadow matrices outputs
326 # these are explained in ShadowMatrix docstring, and are to be
327 # connected to the FUReg and FUFU Matrices, to get them to reset
328 anydie
= Signal(n_int_fus
, reset_less
=True)
329 allshadown
= Signal(n_int_fus
, reset_less
=True)
330 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
331 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
334 # connect fu-fu matrix
337 # Group Picker... done manually for now.
338 go_rd_o
= intpick1
.go_rd_o
339 go_wr_o
= intpick1
.go_wr_o
340 go_rd_i
= intfus
.go_rd_i
341 go_wr_i
= intfus
.go_wr_i
342 go_die_i
= intfus
.go_die_i
343 # NOTE: connect to the shadowed versions so that they can "die" (reset)
344 comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
345 comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
346 comb
+= go_die_i
[0:n_int_fus
].eq(anydie
[0:n_int_fus
]) # die
350 comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
351 comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
352 int_rd_o
= intfus
.readable_o
353 int_wr_o
= intfus
.writable_o
354 comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
355 comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
361 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
363 # NOTE; this setup is for the instruction order preservation...
365 # connect shadows / go_dies to Computation Units
366 comb
+= cu
.shadown_i
[0:n_int_fus
].eq(allshadown
)
367 comb
+= cu
.go_die_i
[0:n_int_fus
].eq(anydie
)
369 # ok connect first n_int_fu shadows to busy lines, to create an
370 # instruction-order linked-list-like arrangement, using a bit-matrix
371 # (instead of e.g. a ring buffer).
374 # when written, the shadow can be cancelled (and was good)
375 for i
in range(n_int_fus
):
376 comb
+= shadows
.s_good_i
[i
][0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
378 # work out the current-activated busy unit (by recording the old one)
379 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
380 sync
+= fn_issue_prev
.eq(fn_issue_o
)
382 # *previous* instruction shadows *current* instruction, and, obviously,
383 # if the previous is completed (!busy) don't cast the shadow!
384 comb
+= prev_shadow
.eq(~fn_issue_o
& fn_issue_prev
& cu
.busy_o
)
385 for i
in range(n_int_fus
):
386 comb
+= shadows
.shadow_i
[i
][0:n_int_fus
].eq(prev_shadow
)
389 # ... and this is for branch speculation. it uses the extra bit
390 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
391 # only needs to set shadow_i, s_fail_i and s_good_i
393 # issue captures shadow_i (if enabled)
394 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
396 # instruction being issued (fn_issue_o) has a shadow cast by the branch
397 with m
.If(self
.branch_succ_i | self
.branch_fail_i
):
398 for i
in range(n_int_fus
):
399 with m
.If(fn_issue_o
& (Const(1<<i
))):
400 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
402 # finally, we need an indicator to the test infrastructure as to
403 # whether the branch succeeded or failed, plus, link up to the
404 # "recorder" of whether the instruction was under shadow or not
406 with m
.If(cu
.br1
.issue_i
):
407 sync
+= bspec
.active_i
.eq(1)
408 with m
.If(self
.branch_succ_i
):
409 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0xf)
410 with m
.If(self
.branch_fail_i
):
411 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0xf)
413 # branch is active (TODO: a better signal: this is over-using the
414 # go_write signal - actually the branch should not be "writing")
415 with m
.If(cu
.br1
.go_wr_i
):
416 sync
+= self
.branch_direction_o
.eq(cu
.br1
.data_o
+Const(1, 2))
417 sync
+= bspec
.active_i
.eq(0)
418 comb
+= bspec
.br_i
.eq(1)
419 # branch occurs if data == 1, failed if data == 0
420 comb
+= bspec
.br_ok_i
.eq(cu
.br1
.data_o
== 1)
421 for i
in range(n_int_fus
):
422 # *expected* direction of the branch matched against *actual*
423 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
425 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
428 # Connect Register File(s)
430 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
431 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
432 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
433 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
435 # connect ALUs to regfule
436 comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
437 comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
438 comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
440 # connect ALU Computation Units
441 comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
442 comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
443 comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
449 yield from self
.intregs
450 yield from self
.fpregs
451 yield self
.int_store_i
452 yield self
.int_dest_i
453 yield self
.int_src1_i
454 yield self
.int_src2_i
456 yield self
.branch_succ_i
457 yield self
.branch_fail_i
458 yield self
.branch_direction_o
473 def __init__(self
, rwidth
, nregs
):
475 self
.regs
= [0] * nregs
477 def op(self
, op
, src1
, src2
, dest
):
478 maxbits
= (1 << self
.rwidth
) - 1
479 src1
= self
.regs
[src1
] & maxbits
480 src2
= self
.regs
[src2
] & maxbits
488 val
= src1
>> (src2
& maxbits
)
490 val
= int(src1
> src2
)
492 val
= int(src1
< src2
)
494 val
= int(src1
== src2
)
496 val
= int(src1
!= src2
)
498 self
.setval(dest
, val
)
501 def setval(self
, dest
, val
):
502 print ("sim setval", dest
, hex(val
))
503 self
.regs
[dest
] = val
506 for i
, val
in enumerate(self
.regs
):
507 reg
= yield dut
.intregs
.regs
[i
].reg
508 okstr
= "OK" if reg
== val
else "!ok"
509 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
511 def check(self
, dut
):
512 for i
, val
in enumerate(self
.regs
):
513 reg
= yield dut
.intregs
.regs
[i
].reg
515 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
516 yield from self
.dump(dut
)
519 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
520 for i
in range(len(dut
.int_insn_i
)):
521 yield dut
.int_insn_i
[i
].eq(0)
522 yield dut
.int_dest_i
.eq(dest
)
523 yield dut
.int_src1_i
.eq(src1
)
524 yield dut
.int_src2_i
.eq(src2
)
525 yield dut
.int_insn_i
[op
].eq(1)
526 yield dut
.reg_enable_i
.eq(1)
528 # these indicate that the instruction is to be made shadow-dependent on
529 # (either) branch success or branch fail
530 yield dut
.branch_fail_i
.eq(branch_fail
)
531 yield dut
.branch_succ_i
.eq(branch_success
)
534 def print_reg(dut
, rnums
):
537 reg
= yield dut
.intregs
.regs
[rnum
].reg
538 rs
.append("%x" % reg
)
539 rnums
= map(str, rnums
)
540 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
543 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
545 for i
in range(n_ops
):
546 src1
= randint(1, dut
.n_regs
-1)
547 src2
= randint(1, dut
.n_regs
-1)
548 dest
= randint(1, dut
.n_regs
-1)
549 op
= randint(0, max_opnums
)
552 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
554 insts
.append((src1
, src2
, dest
, op
))
558 def wait_for_busy_clear(dut
):
560 busy_o
= yield dut
.busy_o
567 def wait_for_issue(dut
):
569 issue_o
= yield dut
.issue_o
571 for i
in range(len(dut
.int_insn_i
)):
572 yield dut
.int_insn_i
[i
].eq(0)
573 yield dut
.reg_enable_i
.eq(0)
576 #yield from print_reg(dut, [1,2,3])
578 #yield from print_reg(dut, [1,2,3])
580 def scoreboard_branch_sim(dut
, alusim
):
584 yield dut
.int_store_i
.eq(1)
588 # set random values in the registers
589 for i
in range(1, dut
.n_regs
):
591 val
= randint(0, (1<<alusim
.rwidth
)-1)
592 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
593 alusim
.setval(i
, val
)
595 # create some instructions: branches create a tree
596 insts
= create_random_ops(dut
, 0, True)
598 src1
= randint(1, dut
.n_regs
-1)
599 src2
= randint(1, dut
.n_regs
-1)
601 op
= 4 # only BGT at the moment
603 branch_ok
= create_random_ops(dut
, 1, True)
604 branch_fail
= create_random_ops(dut
, 1, True)
606 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
608 siminsts
= deepcopy(insts
)
610 # issue instruction(s)
616 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
617 if branch_direction
== 1 and shadow_off
:
618 continue # branch was "success" and this is a "failed"... skip
619 if branch_direction
== 2 and shadow_on
:
620 continue # branch was "fail" and this is a "success"... skip
623 branch_ok
, branch_fail
= dest
625 # ok zip up the branch success / fail instructions and
626 # drop them into the queue, one marked "to have branch success"
627 # the other to be marked shadow branch "fail".
628 # one out of each of these will be cancelled
629 for ok
, fl
in zip(branch_ok
, branch_fail
):
630 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
631 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
632 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
633 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
634 yield from int_instr(dut
, op
, src1
, src2
, dest
,
635 shadow_on
, shadow_off
)
637 yield from wait_for_issue(dut
)
638 branch_direction
= yield dut
.branch_direction_o
# way branch went
640 # wait for all instructions to stop before checking
642 yield from wait_for_busy_clear(dut
)
645 for (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) in siminsts
:
649 branch_ok
, branch_fail
= dest
651 print ("sim %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
652 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
655 siminsts
+= branch_ok
657 siminsts
+= branch_fail
660 yield from alusim
.check(dut
)
661 yield from alusim
.dump(dut
)
664 def scoreboard_sim(dut
, alusim
):
666 yield dut
.int_store_i
.eq(1)
670 # set random values in the registers
671 for i
in range(1, dut
.n_regs
):
673 val
= randint(0, (1<<alusim
.rwidth
)-1)
674 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
675 alusim
.setval(i
, val
)
677 # create some instructions (some random, some regression tests)
680 instrs
= create_random_ops(dut
, 10, True, 4)
683 instrs
.append((2, 3, 3, 0))
684 instrs
.append((5, 3, 3, 1))
687 instrs
.append((5, 6, 2, 1))
688 instrs
.append((2, 2, 4, 0))
689 #instrs.append((2, 2, 3, 1))
692 instrs
.append((2, 1, 2, 3))
695 instrs
.append((2, 6, 2, 1))
696 instrs
.append((2, 1, 2, 0))
699 instrs
.append((1, 2, 7, 2))
700 instrs
.append((7, 1, 5, 0))
701 instrs
.append((4, 4, 1, 1))
704 instrs
.append((5, 6, 2, 2))
705 instrs
.append((1, 1, 4, 1))
706 instrs
.append((6, 5, 3, 0))
709 # Write-after-Write Hazard
710 instrs
.append( (3, 6, 7, 2) )
711 instrs
.append( (4, 4, 7, 1) )
714 # self-read/write-after-write followed by Read-after-Write
715 instrs
.append((1, 1, 1, 1))
716 instrs
.append((1, 5, 3, 0))
719 # Read-after-Write followed by self-read-after-write
720 instrs
.append((5, 6, 1, 2))
721 instrs
.append((1, 1, 1, 1))
724 # self-read-write sandwich
725 instrs
.append((5, 6, 1, 2))
726 instrs
.append((1, 1, 1, 1))
727 instrs
.append((1, 5, 3, 0))
731 instrs
.append( (5, 2, 5, 2) )
732 instrs
.append( (2, 6, 3, 0) )
733 instrs
.append( (4, 2, 2, 1) )
737 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
739 yield dut
.intregs
.regs
[3].reg
.eq(5)
741 instrs
.append((5, 3, 3, 4, (0, 0)))
742 instrs
.append((4, 2, 1, 2, (0, 1)))
746 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
748 yield dut
.intregs
.regs
[3].reg
.eq(5)
750 instrs
.append((5, 3, 3, 4, (0, 0)))
751 instrs
.append((4, 2, 1, 2, (1, 0)))
753 # issue instruction(s), wait for issue to be free before proceeding
754 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
756 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
757 alusim
.op(op
, src1
, src2
, dest
)
758 yield from int_instr(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
760 yield from wait_for_issue(dut
)
762 # wait for all instructions to stop before checking
764 yield from wait_for_busy_clear(dut
)
767 yield from alusim
.check(dut
)
768 yield from alusim
.dump(dut
)
771 def test_scoreboard():
772 dut
= Scoreboard(16, 8)
773 alusim
= RegSim(16, 8)
774 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
775 with
open("test_scoreboard6600.il", "w") as f
:
778 #run_simulation(dut, scoreboard_sim(dut, alusim),
779 # vcd_name='test_scoreboard6600.vcd')
781 run_simulation(dut
, scoreboard_branch_sim(dut
, alusim
),
782 vcd_name
='test_scoreboard6600.vcd')
785 if __name__
== '__main__':