1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
, BranchALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
, seed
19 from copy
import deepcopy
22 class CompUnits(Elaboratable
):
24 def __init__(self
, rwid
, n_units
):
27 * :rwid: bit width of register file(s) - both FP and INT
28 * :n_units: number of ALUs
30 Note: bgt unit is returned so that a shadow unit can be created
34 self
.n_units
= n_units
38 self
.issue_i
= Signal(n_units
, reset_less
=True)
39 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
40 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
41 self
.shadown_i
= Signal(n_units
, reset_less
=True)
42 self
.go_die_i
= Signal(n_units
, reset_less
=True)
45 self
.busy_o
= Signal(n_units
, reset_less
=True)
46 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
47 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
49 # in/out register data (note: not register#, actual data)
50 self
.dest_o
= Signal(rwid
, reset_less
=True)
51 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
52 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
55 self
.bgt
= BranchALU(self
.rwid
)
56 self
.br1
= ComputationUnitNoDelay(self
.rwid
, 3, self
.bgt
)
58 def elaborate(self
, platform
):
70 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
71 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
72 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
73 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
74 m
.submodules
.br1
= br1
= self
.br1
75 int_alus
= [comp1
, comp2
, comp3
, comp4
, br1
]
77 comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
78 comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
79 comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
80 comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
81 comb
+= br1
.oper_i
.eq(Const(4, 3)) # op=bgt
92 req_rel_l
.append(alu
.req_rel_o
)
93 rd_rel_l
.append(alu
.rd_rel_o
)
94 shadow_l
.append(alu
.shadown_i
)
95 godie_l
.append(alu
.go_die_i
)
96 go_wr_l
.append(alu
.go_wr_i
)
97 go_rd_l
.append(alu
.go_rd_i
)
98 issue_l
.append(alu
.issue_i
)
99 busy_l
.append(alu
.busy_o
)
100 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
101 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
102 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
103 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
104 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
105 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
106 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
107 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
109 # connect data register input/output
111 # merge (OR) all integer FU / ALU outputs to a single value
112 # bit of a hack: treereduce needs a list with an item named "dest_o"
113 dest_o
= treereduce(int_alus
)
114 comb
+= self
.dest_o
.eq(dest_o
)
116 for i
, alu
in enumerate(int_alus
):
117 comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
118 comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
123 class FunctionUnits(Elaboratable
):
125 def __init__(self
, n_regs
, n_int_alus
):
127 self
.n_int_alus
= n_int_alus
129 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
130 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
131 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
133 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
134 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
136 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
137 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
138 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
140 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
141 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
142 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
144 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
145 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
146 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
147 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
148 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
150 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
152 def elaborate(self
, platform
):
157 n_int_fus
= self
.n_int_alus
159 # Integer FU-FU Dep Matrix
160 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
161 m
.submodules
.intfudeps
= intfudeps
162 # Integer FU-Reg Dep Matrix
163 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
164 m
.submodules
.intregdeps
= intregdeps
166 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
167 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
169 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
170 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
172 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
173 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
174 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
176 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
177 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
178 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
179 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
180 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
181 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
183 # Connect function issue / arrays, and dest/src1/src2
184 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
185 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
186 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
188 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
189 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
190 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
191 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
193 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
194 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
195 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
200 class Scoreboard(Elaboratable
):
201 def __init__(self
, rwid
, n_regs
):
204 * :rwid: bit width of register file(s) - both FP and INT
205 * :n_regs: depth of register file(s) - number of FP and INT regs
211 self
.intregs
= RegFileArray(rwid
, n_regs
)
212 self
.fpregs
= RegFileArray(rwid
, n_regs
)
215 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
216 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
217 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
218 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
219 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
222 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
223 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
225 # for branch speculation experiment. branch_direction = 0 if
226 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
227 # branch_succ and branch_fail are requests to have the current
228 # instruction be dependent on the branch unit "shadow" capability.
229 self
.branch_succ_i
= Signal(reset_less
=True)
230 self
.branch_fail_i
= Signal(reset_less
=True)
231 self
.branch_direction_o
= Signal(2, reset_less
=True)
233 def elaborate(self
, platform
):
238 m
.submodules
.intregs
= self
.intregs
239 m
.submodules
.fpregs
= self
.fpregs
242 int_dest
= self
.intregs
.write_port("dest")
243 int_src1
= self
.intregs
.read_port("src1")
244 int_src2
= self
.intregs
.read_port("src2")
246 fp_dest
= self
.fpregs
.write_port("dest")
247 fp_src1
= self
.fpregs
.read_port("src1")
248 fp_src2
= self
.fpregs
.read_port("src2")
250 # Int ALUs and Comp Units
252 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
253 comb
+= cu
.go_die_i
.eq(0)
254 bgt
= cu
.bgt
# get at the branch computation unit
257 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
259 # Count of number of FUs
260 n_int_fus
= n_int_alus
261 n_fp_fus
= 0 # for now
263 # Integer Priority Picker 1: Adder + Subtractor
264 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
265 m
.submodules
.intpick1
= intpick1
268 regdecode
= RegDecode(self
.n_regs
)
269 m
.submodules
.regdecode
= regdecode
270 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
271 m
.submodules
.issueunit
= issueunit
273 # Shadow Matrix. currently n_int_fus shadows, to be used for
274 # write-after-write hazards. NOTE: there is one extra for branches,
275 # so the shadow width is increased by 1
276 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
, True)
277 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_int_fus
, 1, False)
279 # record previous instruction to cast shadow on current instruction
280 fn_issue_prev
= Signal(n_int_fus
)
281 prev_shadow
= Signal(n_int_fus
)
283 # Branch Speculation recorder. tracks the success/fail state as
284 # each instruction is issued, so that when the branch occurs the
285 # allow/cancel can be issued as appropriate.
286 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_int_fus
)
289 # ok start wiring things together...
290 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
291 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
295 # Issue Unit is where it starts. set up some in/outs for this module
297 comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
298 regdecode
.dest_i
.eq(self
.int_dest_i
),
299 regdecode
.src1_i
.eq(self
.int_src1_i
),
300 regdecode
.src2_i
.eq(self
.int_src2_i
),
301 regdecode
.enable_i
.eq(self
.reg_enable_i
),
302 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
303 self
.issue_o
.eq(issueunit
.issue_o
)
305 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
307 # connect global rd/wr pending vector (for WaW detection)
308 sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
309 # TODO: issueunit.f (FP)
311 # and int function issue / busy arrays, and dest/src1/src2
312 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
313 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
314 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
316 fn_issue_o
= issueunit
.i
.fn_issue_o
318 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
319 comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
320 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
323 # merge shadow matrices outputs
326 # these are explained in ShadowMatrix docstring, and are to be
327 # connected to the FUReg and FUFU Matrices, to get them to reset
328 anydie
= Signal(n_int_fus
, reset_less
=True)
329 allshadown
= Signal(n_int_fus
, reset_less
=True)
330 shreset
= Signal(n_int_fus
, reset_less
=True)
331 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
332 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
333 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
336 # connect fu-fu matrix
339 # Group Picker... done manually for now.
340 go_rd_o
= intpick1
.go_rd_o
341 go_wr_o
= intpick1
.go_wr_o
342 go_rd_i
= intfus
.go_rd_i
343 go_wr_i
= intfus
.go_wr_i
344 go_die_i
= intfus
.go_die_i
345 # NOTE: connect to the shadowed versions so that they can "die" (reset)
346 comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
347 comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
348 comb
+= go_die_i
[0:n_int_fus
].eq(anydie
[0:n_int_fus
]) # die
352 comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
353 comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
354 int_rd_o
= intfus
.readable_o
355 int_wr_o
= intfus
.writable_o
356 comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
357 comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
363 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
364 #comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
365 comb
+= shadows
.reset_i
[0:n_int_fus
].eq(bshadow
.go_die_o
[0:n_int_fus
])
367 # NOTE; this setup is for the instruction order preservation...
369 # connect shadows / go_dies to Computation Units
370 comb
+= cu
.shadown_i
[0:n_int_fus
].eq(allshadown
)
371 comb
+= cu
.go_die_i
[0:n_int_fus
].eq(anydie
)
373 # ok connect first n_int_fu shadows to busy lines, to create an
374 # instruction-order linked-list-like arrangement, using a bit-matrix
375 # (instead of e.g. a ring buffer).
378 # when written, the shadow can be cancelled (and was good)
379 for i
in range(n_int_fus
):
380 comb
+= shadows
.s_good_i
[i
][0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
382 # work out the current-activated busy unit (by recording the old one)
383 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
384 sync
+= fn_issue_prev
.eq(fn_issue_o
)
386 # *previous* instruction shadows *current* instruction, and, obviously,
387 # if the previous is completed (!busy) don't cast the shadow!
388 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
389 for i
in range(n_int_fus
):
390 comb
+= shadows
.shadow_i
[i
][0:n_int_fus
].eq(prev_shadow
)
393 # ... and this is for branch speculation. it uses the extra bit
394 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
395 # only needs to set shadow_i, s_fail_i and s_good_i
397 # issue captures shadow_i (if enabled)
398 comb
+= bshadow
.reset_i
[0:n_int_fus
].eq(shreset
[0:n_int_fus
])
400 bactive
= Signal(reset_less
=True)
401 comb
+= bactive
.eq((bspec
.active_i | cu
.br1
.issue_i
) & ~cu
.br1
.go_wr_i
)
403 # instruction being issued (fn_issue_o) has a shadow cast by the branch
404 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
405 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
406 for i
in range(n_int_fus
):
407 with m
.If(fn_issue_o
& (Const(1<<i
))):
408 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
410 # finally, we need an indicator to the test infrastructure as to
411 # whether the branch succeeded or failed, plus, link up to the
412 # "recorder" of whether the instruction was under shadow or not
414 with m
.If(cu
.br1
.issue_i
):
415 sync
+= bspec
.active_i
.eq(1)
416 with m
.If(self
.branch_succ_i
):
417 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
418 with m
.If(self
.branch_fail_i
):
419 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
421 # branch is active (TODO: a better signal: this is over-using the
422 # go_write signal - actually the branch should not be "writing")
423 with m
.If(cu
.br1
.go_wr_i
):
424 sync
+= self
.branch_direction_o
.eq(cu
.br1
.data_o
+Const(1, 2))
425 sync
+= bspec
.active_i
.eq(0)
426 comb
+= bspec
.br_i
.eq(1)
427 # branch occurs if data == 1, failed if data == 0
428 comb
+= bspec
.br_ok_i
.eq(cu
.br1
.data_o
== 1)
429 for i
in range(n_int_fus
):
430 # *expected* direction of the branch matched against *actual*
431 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
433 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
436 # Connect Register File(s)
438 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
439 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
440 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
441 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
443 # connect ALUs to regfule
444 comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
445 comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
446 comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
448 # connect ALU Computation Units
449 comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
450 comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
451 comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
457 yield from self
.intregs
458 yield from self
.fpregs
459 yield self
.int_store_i
460 yield self
.int_dest_i
461 yield self
.int_src1_i
462 yield self
.int_src2_i
464 yield self
.branch_succ_i
465 yield self
.branch_fail_i
466 yield self
.branch_direction_o
481 def __init__(self
, rwidth
, nregs
):
483 self
.regs
= [0] * nregs
485 def op(self
, op
, src1
, src2
, dest
):
486 maxbits
= (1 << self
.rwidth
) - 1
487 src1
= self
.regs
[src1
] & maxbits
488 src2
= self
.regs
[src2
] & maxbits
496 val
= src1
>> (src2
& maxbits
)
498 val
= int(src1
> src2
)
500 val
= int(src1
< src2
)
502 val
= int(src1
== src2
)
504 val
= int(src1
!= src2
)
506 self
.setval(dest
, val
)
509 def setval(self
, dest
, val
):
510 print ("sim setval", dest
, hex(val
))
511 self
.regs
[dest
] = val
514 for i
, val
in enumerate(self
.regs
):
515 reg
= yield dut
.intregs
.regs
[i
].reg
516 okstr
= "OK" if reg
== val
else "!ok"
517 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
519 def check(self
, dut
):
520 for i
, val
in enumerate(self
.regs
):
521 reg
= yield dut
.intregs
.regs
[i
].reg
523 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
524 yield from self
.dump(dut
)
527 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
528 for i
in range(len(dut
.int_insn_i
)):
529 yield dut
.int_insn_i
[i
].eq(0)
530 yield dut
.int_dest_i
.eq(dest
)
531 yield dut
.int_src1_i
.eq(src1
)
532 yield dut
.int_src2_i
.eq(src2
)
533 yield dut
.int_insn_i
[op
].eq(1)
534 yield dut
.reg_enable_i
.eq(1)
536 # these indicate that the instruction is to be made shadow-dependent on
537 # (either) branch success or branch fail
538 yield dut
.branch_fail_i
.eq(branch_fail
)
539 yield dut
.branch_succ_i
.eq(branch_success
)
542 def print_reg(dut
, rnums
):
545 reg
= yield dut
.intregs
.regs
[rnum
].reg
546 rs
.append("%x" % reg
)
547 rnums
= map(str, rnums
)
548 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
551 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
553 for i
in range(n_ops
):
554 src1
= randint(1, dut
.n_regs
-1)
555 src2
= randint(1, dut
.n_regs
-1)
556 dest
= randint(1, dut
.n_regs
-1)
557 op
= randint(0, max_opnums
)
560 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
562 insts
.append((src1
, src2
, dest
, op
))
566 def wait_for_busy_clear(dut
):
568 busy_o
= yield dut
.busy_o
575 def wait_for_issue(dut
):
577 issue_o
= yield dut
.issue_o
579 for i
in range(len(dut
.int_insn_i
)):
580 yield dut
.int_insn_i
[i
].eq(0)
581 yield dut
.reg_enable_i
.eq(0)
584 #yield from print_reg(dut, [1,2,3])
586 #yield from print_reg(dut, [1,2,3])
588 def scoreboard_branch_sim(dut
, alusim
):
592 yield dut
.int_store_i
.eq(1)
596 print ("rseed", iseed
)
600 yield dut
.branch_direction_o
.eq(0)
602 # set random values in the registers
603 for i
in range(1, dut
.n_regs
):
605 val
= randint(0, (1<<alusim
.rwidth
)-1)
606 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
607 alusim
.setval(i
, val
)
610 # create some instructions: branches create a tree
611 insts
= create_random_ops(dut
, 1, True, 1)
612 #insts.append((6, 6, 1, 2, (0, 0)))
613 #insts.append((4, 3, 3, 0, (0, 0)))
615 src1
= randint(1, dut
.n_regs
-1)
616 src2
= randint(1, dut
.n_regs
-1)
618 op
= 4 # only BGT at the moment
620 branch_ok
= create_random_ops(dut
, 1, True, 1)
621 branch_fail
= create_random_ops(dut
, 1, True, 1)
623 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
627 #insts.append( (3, 5, 2, 0, (0, 0)) )
630 branch_ok
.append ( (5, 7, 5, 1, (1, 0)) )
631 #branch_ok.append( None )
632 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
633 #branch_fail.append( None )
634 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
636 siminsts
= deepcopy(insts
)
638 # issue instruction(s)
646 branch_direction
= yield dut
.branch_direction_o
# way branch went
647 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
648 if branch_direction
== 1 and shadow_on
:
649 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
650 continue # branch was "success" and this is a "failed"... skip
651 if branch_direction
== 2 and shadow_off
:
652 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
653 continue # branch was "fail" and this is a "success"... skip
654 if branch_direction
!= 0:
659 branch_ok
, branch_fail
= dest
661 # ok zip up the branch success / fail instructions and
662 # drop them into the queue, one marked "to have branch success"
663 # the other to be marked shadow branch "fail".
664 # one out of each of these will be cancelled
665 for ok
, fl
in zip(branch_ok
, branch_fail
):
667 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
669 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
670 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
671 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
672 yield from int_instr(dut
, op
, src1
, src2
, dest
,
673 shadow_on
, shadow_off
)
675 yield from wait_for_issue(dut
)
677 # wait for all instructions to stop before checking
679 yield from wait_for_busy_clear(dut
)
683 instr
= siminsts
.pop(0)
686 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
690 branch_ok
, branch_fail
= dest
692 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
693 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
694 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
697 siminsts
+= branch_ok
699 siminsts
+= branch_fail
702 yield from alusim
.check(dut
)
703 yield from alusim
.dump(dut
)
706 def scoreboard_sim(dut
, alusim
):
710 yield dut
.int_store_i
.eq(1)
714 # set random values in the registers
715 for i
in range(1, dut
.n_regs
):
717 val
= randint(0, (1<<alusim
.rwidth
)-1)
718 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
719 alusim
.setval(i
, val
)
721 # create some instructions (some random, some regression tests)
724 instrs
= create_random_ops(dut
, 10, True, 4)
727 instrs
.append((2, 3, 3, 0))
728 instrs
.append((5, 3, 3, 1))
731 instrs
.append((5, 6, 2, 1))
732 instrs
.append((2, 2, 4, 0))
733 #instrs.append((2, 2, 3, 1))
736 instrs
.append((2, 1, 2, 3))
739 instrs
.append((2, 6, 2, 1))
740 instrs
.append((2, 1, 2, 0))
743 instrs
.append((1, 2, 7, 2))
744 instrs
.append((7, 1, 5, 0))
745 instrs
.append((4, 4, 1, 1))
748 instrs
.append((5, 6, 2, 2))
749 instrs
.append((1, 1, 4, 1))
750 instrs
.append((6, 5, 3, 0))
753 # Write-after-Write Hazard
754 instrs
.append( (3, 6, 7, 2) )
755 instrs
.append( (4, 4, 7, 1) )
758 # self-read/write-after-write followed by Read-after-Write
759 instrs
.append((1, 1, 1, 1))
760 instrs
.append((1, 5, 3, 0))
763 # Read-after-Write followed by self-read-after-write
764 instrs
.append((5, 6, 1, 2))
765 instrs
.append((1, 1, 1, 1))
768 # self-read-write sandwich
769 instrs
.append((5, 6, 1, 2))
770 instrs
.append((1, 1, 1, 1))
771 instrs
.append((1, 5, 3, 0))
775 instrs
.append( (5, 2, 5, 2) )
776 instrs
.append( (2, 6, 3, 0) )
777 instrs
.append( (4, 2, 2, 1) )
781 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
783 yield dut
.intregs
.regs
[3].reg
.eq(5)
785 instrs
.append((5, 3, 3, 4, (0, 0)))
786 instrs
.append((4, 2, 1, 2, (0, 1)))
790 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
792 yield dut
.intregs
.regs
[3].reg
.eq(5)
794 instrs
.append((5, 3, 3, 4, (0, 0)))
795 instrs
.append((4, 2, 1, 2, (1, 0)))
798 instrs
.append( (4, 3, 5, 1, (0, 0)) )
799 instrs
.append( (5, 2, 3, 1, (0, 0)) )
800 instrs
.append( (7, 1, 5, 2, (0, 0)) )
801 instrs
.append( (5, 6, 6, 4, (0, 0)) )
802 instrs
.append( (7, 5, 2, 2, (1, 0)) )
803 instrs
.append( (1, 7, 5, 0, (0, 1)) )
804 instrs
.append( (1, 6, 1, 2, (1, 0)) )
805 instrs
.append( (1, 6, 7, 3, (0, 0)) )
806 instrs
.append( (6, 7, 7, 0, (0, 0)) )
808 # issue instruction(s), wait for issue to be free before proceeding
809 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
811 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
812 alusim
.op(op
, src1
, src2
, dest
)
813 yield from int_instr(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
815 yield from wait_for_issue(dut
)
817 # wait for all instructions to stop before checking
819 yield from wait_for_busy_clear(dut
)
822 yield from alusim
.check(dut
)
823 yield from alusim
.dump(dut
)
826 def test_scoreboard():
827 dut
= Scoreboard(16, 8)
828 alusim
= RegSim(16, 8)
829 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
830 with
open("test_scoreboard6600.il", "w") as f
:
833 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
834 vcd_name
='test_scoreboard6600.vcd')
836 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
837 # vcd_name='test_scoreboard6600.vcd')
840 if __name__
== '__main__':