1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
, BranchALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
, seed
19 from copy
import deepcopy
22 class CompUnits(Elaboratable
):
24 def __init__(self
, rwid
, n_units
):
27 * :rwid: bit width of register file(s) - both FP and INT
28 * :n_units: number of ALUs
30 Note: bgt unit is returned so that a shadow unit can be created
34 self
.n_units
= n_units
38 self
.issue_i
= Signal(n_units
, reset_less
=True)
39 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
40 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
41 self
.shadown_i
= Signal(n_units
, reset_less
=True)
42 self
.go_die_i
= Signal(n_units
, reset_less
=True)
45 self
.busy_o
= Signal(n_units
, reset_less
=True)
46 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
47 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
49 # in/out register data (note: not register#, actual data)
50 self
.dest_o
= Signal(rwid
, reset_less
=True)
51 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
52 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
55 self
.bgt
= BranchALU(self
.rwid
)
56 self
.br1
= ComputationUnitNoDelay(self
.rwid
, 3, self
.bgt
)
58 def elaborate(self
, platform
):
70 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
71 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
72 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
73 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
74 m
.submodules
.br1
= br1
= self
.br1
75 int_alus
= [comp1
, comp2
, comp3
, comp4
, br1
]
77 comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
78 comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
79 comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
80 comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
81 comb
+= br1
.oper_i
.eq(Const(4, 3)) # op=bgt
92 req_rel_l
.append(alu
.req_rel_o
)
93 rd_rel_l
.append(alu
.rd_rel_o
)
94 shadow_l
.append(alu
.shadown_i
)
95 godie_l
.append(alu
.go_die_i
)
96 go_wr_l
.append(alu
.go_wr_i
)
97 go_rd_l
.append(alu
.go_rd_i
)
98 issue_l
.append(alu
.issue_i
)
99 busy_l
.append(alu
.busy_o
)
100 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
101 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
102 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
103 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
104 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
105 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
106 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
107 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
109 # connect data register input/output
111 # merge (OR) all integer FU / ALU outputs to a single value
112 # bit of a hack: treereduce needs a list with an item named "dest_o"
113 dest_o
= treereduce(int_alus
)
114 comb
+= self
.dest_o
.eq(dest_o
)
116 for i
, alu
in enumerate(int_alus
):
117 comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
118 comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
123 class FunctionUnits(Elaboratable
):
125 def __init__(self
, n_regs
, n_int_alus
):
127 self
.n_int_alus
= n_int_alus
129 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
130 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
131 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
133 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
134 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
136 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
137 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
138 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
140 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
141 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
142 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
144 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
145 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
146 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
147 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
148 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
150 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
152 def elaborate(self
, platform
):
157 n_int_fus
= self
.n_int_alus
159 # Integer FU-FU Dep Matrix
160 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
161 m
.submodules
.intfudeps
= intfudeps
162 # Integer FU-Reg Dep Matrix
163 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
164 m
.submodules
.intregdeps
= intregdeps
166 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
167 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
169 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
170 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
172 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
173 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
174 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
176 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
177 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
178 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
179 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
180 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
181 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
183 # Connect function issue / arrays, and dest/src1/src2
184 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
185 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
186 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
188 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
189 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
190 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
191 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
193 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
194 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
195 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
200 class Scoreboard(Elaboratable
):
201 def __init__(self
, rwid
, n_regs
):
204 * :rwid: bit width of register file(s) - both FP and INT
205 * :n_regs: depth of register file(s) - number of FP and INT regs
211 self
.intregs
= RegFileArray(rwid
, n_regs
)
212 self
.fpregs
= RegFileArray(rwid
, n_regs
)
215 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
216 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
217 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
218 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
221 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
222 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
224 # for branch speculation experiment. branch_direction = 0 if
225 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
226 # branch_succ and branch_fail are requests to have the current
227 # instruction be dependent on the branch unit "shadow" capability.
228 self
.branch_succ_i
= Signal(reset_less
=True)
229 self
.branch_fail_i
= Signal(reset_less
=True)
230 self
.branch_direction_o
= Signal(2, reset_less
=True)
232 def elaborate(self
, platform
):
237 m
.submodules
.intregs
= self
.intregs
238 m
.submodules
.fpregs
= self
.fpregs
241 int_dest
= self
.intregs
.write_port("dest")
242 int_src1
= self
.intregs
.read_port("src1")
243 int_src2
= self
.intregs
.read_port("src2")
245 fp_dest
= self
.fpregs
.write_port("dest")
246 fp_src1
= self
.fpregs
.read_port("src1")
247 fp_src2
= self
.fpregs
.read_port("src2")
249 # Int ALUs and Comp Units
251 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
252 comb
+= cu
.go_die_i
.eq(0)
253 bgt
= cu
.bgt
# get at the branch computation unit
256 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
258 # Count of number of FUs
259 n_int_fus
= n_int_alus
260 n_fp_fus
= 0 # for now
262 # Integer Priority Picker 1: Adder + Subtractor
263 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
264 m
.submodules
.intpick1
= intpick1
267 regdecode
= RegDecode(self
.n_regs
)
268 m
.submodules
.regdecode
= regdecode
269 issueunit
= IntFPIssueUnit(n_int_fus
, n_fp_fus
)
270 m
.submodules
.issueunit
= issueunit
272 # Shadow Matrix. currently n_int_fus shadows, to be used for
273 # write-after-write hazards. NOTE: there is one extra for branches,
274 # so the shadow width is increased by 1
275 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
, True)
276 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_int_fus
, 1, False)
278 # record previous instruction to cast shadow on current instruction
279 fn_issue_prev
= Signal(n_int_fus
)
280 prev_shadow
= Signal(n_int_fus
)
282 # Branch Speculation recorder. tracks the success/fail state as
283 # each instruction is issued, so that when the branch occurs the
284 # allow/cancel can be issued as appropriate.
285 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_int_fus
)
288 # ok start wiring things together...
289 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
290 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
294 # Issue Unit is where it starts. set up some in/outs for this module
296 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
297 regdecode
.src1_i
.eq(self
.int_src1_i
),
298 regdecode
.src2_i
.eq(self
.int_src2_i
),
299 regdecode
.enable_i
.eq(self
.reg_enable_i
),
300 self
.issue_o
.eq(issueunit
.issue_o
)
302 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
304 # TODO: issueunit.f (FP)
306 # and int function issue / busy arrays, and dest/src1/src2
307 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
308 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
309 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
311 fn_issue_o
= issueunit
.i
.fn_issue_o
313 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
314 comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
315 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
318 # merge shadow matrices outputs
321 # these are explained in ShadowMatrix docstring, and are to be
322 # connected to the FUReg and FUFU Matrices, to get them to reset
323 anydie
= Signal(n_int_fus
, reset_less
=True)
324 allshadown
= Signal(n_int_fus
, reset_less
=True)
325 shreset
= Signal(n_int_fus
, reset_less
=True)
326 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
327 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
328 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
331 # connect fu-fu matrix
334 # Group Picker... done manually for now.
335 go_rd_o
= intpick1
.go_rd_o
336 go_wr_o
= intpick1
.go_wr_o
337 go_rd_i
= intfus
.go_rd_i
338 go_wr_i
= intfus
.go_wr_i
339 go_die_i
= intfus
.go_die_i
340 # NOTE: connect to the shadowed versions so that they can "die" (reset)
341 comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
342 comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
343 comb
+= go_die_i
[0:n_int_fus
].eq(anydie
[0:n_int_fus
]) # die
347 comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
348 comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
349 int_rd_o
= intfus
.readable_o
350 int_wr_o
= intfus
.writable_o
351 comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
352 comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
358 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
359 #comb += shadows.reset_i[0:n_int_fus].eq(bshadow.go_die_o[0:n_int_fus])
360 comb
+= shadows
.reset_i
[0:n_int_fus
].eq(bshadow
.go_die_o
[0:n_int_fus
])
362 # NOTE; this setup is for the instruction order preservation...
364 # connect shadows / go_dies to Computation Units
365 comb
+= cu
.shadown_i
[0:n_int_fus
].eq(allshadown
)
366 comb
+= cu
.go_die_i
[0:n_int_fus
].eq(anydie
)
368 # ok connect first n_int_fu shadows to busy lines, to create an
369 # instruction-order linked-list-like arrangement, using a bit-matrix
370 # (instead of e.g. a ring buffer).
373 # when written, the shadow can be cancelled (and was good)
374 for i
in range(n_int_fus
):
375 comb
+= shadows
.s_good_i
[i
][0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
377 # work out the current-activated busy unit (by recording the old one)
378 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
379 sync
+= fn_issue_prev
.eq(fn_issue_o
)
381 # *previous* instruction shadows *current* instruction, and, obviously,
382 # if the previous is completed (!busy) don't cast the shadow!
383 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
384 for i
in range(n_int_fus
):
385 comb
+= shadows
.shadow_i
[i
][0:n_int_fus
].eq(prev_shadow
)
388 # ... and this is for branch speculation. it uses the extra bit
389 # tacked onto the ShadowMatrix (hence shadow_wid=n_int_fus+1)
390 # only needs to set shadow_i, s_fail_i and s_good_i
392 # issue captures shadow_i (if enabled)
393 comb
+= bshadow
.reset_i
[0:n_int_fus
].eq(shreset
[0:n_int_fus
])
395 bactive
= Signal(reset_less
=True)
396 comb
+= bactive
.eq((bspec
.active_i | cu
.br1
.issue_i
) & ~cu
.br1
.go_wr_i
)
398 # instruction being issued (fn_issue_o) has a shadow cast by the branch
399 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
400 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
401 for i
in range(n_int_fus
):
402 with m
.If(fn_issue_o
& (Const(1<<i
))):
403 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
405 # finally, we need an indicator to the test infrastructure as to
406 # whether the branch succeeded or failed, plus, link up to the
407 # "recorder" of whether the instruction was under shadow or not
409 with m
.If(cu
.br1
.issue_i
):
410 sync
+= bspec
.active_i
.eq(1)
411 with m
.If(self
.branch_succ_i
):
412 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
413 with m
.If(self
.branch_fail_i
):
414 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
416 # branch is active (TODO: a better signal: this is over-using the
417 # go_write signal - actually the branch should not be "writing")
418 with m
.If(cu
.br1
.go_wr_i
):
419 sync
+= self
.branch_direction_o
.eq(cu
.br1
.data_o
+Const(1, 2))
420 sync
+= bspec
.active_i
.eq(0)
421 comb
+= bspec
.br_i
.eq(1)
422 # branch occurs if data == 1, failed if data == 0
423 comb
+= bspec
.br_ok_i
.eq(cu
.br1
.data_o
== 1)
424 for i
in range(n_int_fus
):
425 # *expected* direction of the branch matched against *actual*
426 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
428 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
431 # Connect Register File(s)
433 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
434 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
435 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
436 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
438 # connect ALUs to regfule
439 comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
440 comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
441 comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
443 # connect ALU Computation Units
444 comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
445 comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
446 comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
452 yield from self
.intregs
453 yield from self
.fpregs
454 yield self
.int_dest_i
455 yield self
.int_src1_i
456 yield self
.int_src2_i
458 yield self
.branch_succ_i
459 yield self
.branch_fail_i
460 yield self
.branch_direction_o
475 def __init__(self
, rwidth
, nregs
):
477 self
.regs
= [0] * nregs
479 def op(self
, op
, src1
, src2
, dest
):
480 maxbits
= (1 << self
.rwidth
) - 1
481 src1
= self
.regs
[src1
] & maxbits
482 src2
= self
.regs
[src2
] & maxbits
490 val
= src1
>> (src2
& maxbits
)
492 val
= int(src1
> src2
)
494 val
= int(src1
< src2
)
496 val
= int(src1
== src2
)
498 val
= int(src1
!= src2
)
500 self
.setval(dest
, val
)
503 def setval(self
, dest
, val
):
504 print ("sim setval", dest
, hex(val
))
505 self
.regs
[dest
] = val
508 for i
, val
in enumerate(self
.regs
):
509 reg
= yield dut
.intregs
.regs
[i
].reg
510 okstr
= "OK" if reg
== val
else "!ok"
511 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
513 def check(self
, dut
):
514 for i
, val
in enumerate(self
.regs
):
515 reg
= yield dut
.intregs
.regs
[i
].reg
517 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
518 yield from self
.dump(dut
)
521 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
522 for i
in range(len(dut
.int_insn_i
)):
523 yield dut
.int_insn_i
[i
].eq(0)
524 yield dut
.int_dest_i
.eq(dest
)
525 yield dut
.int_src1_i
.eq(src1
)
526 yield dut
.int_src2_i
.eq(src2
)
527 yield dut
.int_insn_i
[op
].eq(1)
528 yield dut
.reg_enable_i
.eq(1)
530 # these indicate that the instruction is to be made shadow-dependent on
531 # (either) branch success or branch fail
532 yield dut
.branch_fail_i
.eq(branch_fail
)
533 yield dut
.branch_succ_i
.eq(branch_success
)
536 def print_reg(dut
, rnums
):
539 reg
= yield dut
.intregs
.regs
[rnum
].reg
540 rs
.append("%x" % reg
)
541 rnums
= map(str, rnums
)
542 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
545 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
547 for i
in range(n_ops
):
548 src1
= randint(1, dut
.n_regs
-1)
549 src2
= randint(1, dut
.n_regs
-1)
550 dest
= randint(1, dut
.n_regs
-1)
551 op
= randint(0, max_opnums
)
554 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
556 insts
.append((src1
, src2
, dest
, op
))
560 def wait_for_busy_clear(dut
):
562 busy_o
= yield dut
.busy_o
569 def wait_for_issue(dut
):
571 issue_o
= yield dut
.issue_o
573 for i
in range(len(dut
.int_insn_i
)):
574 yield dut
.int_insn_i
[i
].eq(0)
575 yield dut
.reg_enable_i
.eq(0)
578 #yield from print_reg(dut, [1,2,3])
580 #yield from print_reg(dut, [1,2,3])
582 def scoreboard_branch_sim(dut
, alusim
):
588 print ("rseed", iseed
)
592 yield dut
.branch_direction_o
.eq(0)
594 # set random values in the registers
595 for i
in range(1, dut
.n_regs
):
597 val
= randint(0, (1<<alusim
.rwidth
)-1)
598 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
599 alusim
.setval(i
, val
)
602 # create some instructions: branches create a tree
603 insts
= create_random_ops(dut
, 1, True, 1)
604 #insts.append((6, 6, 1, 2, (0, 0)))
605 #insts.append((4, 3, 3, 0, (0, 0)))
607 src1
= randint(1, dut
.n_regs
-1)
608 src2
= randint(1, dut
.n_regs
-1)
610 op
= 4 # only BGT at the moment
612 branch_ok
= create_random_ops(dut
, 1, True, 1)
613 branch_fail
= create_random_ops(dut
, 1, True, 1)
615 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
619 #insts.append( (3, 5, 2, 0, (0, 0)) )
622 branch_ok
.append ( (5, 7, 5, 1, (1, 0)) )
623 #branch_ok.append( None )
624 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
625 #branch_fail.append( None )
626 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
628 siminsts
= deepcopy(insts
)
630 # issue instruction(s)
638 branch_direction
= yield dut
.branch_direction_o
# way branch went
639 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
640 if branch_direction
== 1 and shadow_on
:
641 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
642 continue # branch was "success" and this is a "failed"... skip
643 if branch_direction
== 2 and shadow_off
:
644 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
645 continue # branch was "fail" and this is a "success"... skip
646 if branch_direction
!= 0:
651 branch_ok
, branch_fail
= dest
653 # ok zip up the branch success / fail instructions and
654 # drop them into the queue, one marked "to have branch success"
655 # the other to be marked shadow branch "fail".
656 # one out of each of these will be cancelled
657 for ok
, fl
in zip(branch_ok
, branch_fail
):
659 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
661 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
662 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
663 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
664 yield from int_instr(dut
, op
, src1
, src2
, dest
,
665 shadow_on
, shadow_off
)
667 yield from wait_for_issue(dut
)
669 # wait for all instructions to stop before checking
671 yield from wait_for_busy_clear(dut
)
675 instr
= siminsts
.pop(0)
678 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
682 branch_ok
, branch_fail
= dest
684 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
685 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
686 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
689 siminsts
+= branch_ok
691 siminsts
+= branch_fail
694 yield from alusim
.check(dut
)
695 yield from alusim
.dump(dut
)
698 def scoreboard_sim(dut
, alusim
):
704 # set random values in the registers
705 for i
in range(1, dut
.n_regs
):
707 val
= randint(0, (1<<alusim
.rwidth
)-1)
708 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
709 alusim
.setval(i
, val
)
711 # create some instructions (some random, some regression tests)
714 instrs
= create_random_ops(dut
, 10, True, 4)
717 instrs
.append((2, 3, 3, 0))
718 instrs
.append((5, 3, 3, 1))
721 instrs
.append((5, 6, 2, 1))
722 instrs
.append((2, 2, 4, 0))
723 #instrs.append((2, 2, 3, 1))
726 instrs
.append((2, 1, 2, 3))
729 instrs
.append((2, 6, 2, 1))
730 instrs
.append((2, 1, 2, 0))
733 instrs
.append((1, 2, 7, 2))
734 instrs
.append((7, 1, 5, 0))
735 instrs
.append((4, 4, 1, 1))
738 instrs
.append((5, 6, 2, 2))
739 instrs
.append((1, 1, 4, 1))
740 instrs
.append((6, 5, 3, 0))
743 # Write-after-Write Hazard
744 instrs
.append( (3, 6, 7, 2) )
745 instrs
.append( (4, 4, 7, 1) )
748 # self-read/write-after-write followed by Read-after-Write
749 instrs
.append((1, 1, 1, 1))
750 instrs
.append((1, 5, 3, 0))
753 # Read-after-Write followed by self-read-after-write
754 instrs
.append((5, 6, 1, 2))
755 instrs
.append((1, 1, 1, 1))
758 # self-read-write sandwich
759 instrs
.append((5, 6, 1, 2))
760 instrs
.append((1, 1, 1, 1))
761 instrs
.append((1, 5, 3, 0))
765 instrs
.append( (5, 2, 5, 2) )
766 instrs
.append( (2, 6, 3, 0) )
767 instrs
.append( (4, 2, 2, 1) )
771 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
773 yield dut
.intregs
.regs
[3].reg
.eq(5)
775 instrs
.append((5, 3, 3, 4, (0, 0)))
776 instrs
.append((4, 2, 1, 2, (0, 1)))
780 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
782 yield dut
.intregs
.regs
[3].reg
.eq(5)
784 instrs
.append((5, 3, 3, 4, (0, 0)))
785 instrs
.append((4, 2, 1, 2, (1, 0)))
788 instrs
.append( (4, 3, 5, 1, (0, 0)) )
789 instrs
.append( (5, 2, 3, 1, (0, 0)) )
790 instrs
.append( (7, 1, 5, 2, (0, 0)) )
791 instrs
.append( (5, 6, 6, 4, (0, 0)) )
792 instrs
.append( (7, 5, 2, 2, (1, 0)) )
793 instrs
.append( (1, 7, 5, 0, (0, 1)) )
794 instrs
.append( (1, 6, 1, 2, (1, 0)) )
795 instrs
.append( (1, 6, 7, 3, (0, 0)) )
796 instrs
.append( (6, 7, 7, 0, (0, 0)) )
798 # issue instruction(s), wait for issue to be free before proceeding
799 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
801 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
802 alusim
.op(op
, src1
, src2
, dest
)
803 yield from int_instr(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
805 yield from wait_for_issue(dut
)
807 # wait for all instructions to stop before checking
809 yield from wait_for_busy_clear(dut
)
812 yield from alusim
.check(dut
)
813 yield from alusim
.dump(dut
)
816 def test_scoreboard():
817 dut
= Scoreboard(16, 8)
818 alusim
= RegSim(16, 8)
819 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
820 with
open("test_scoreboard6600.il", "w") as f
:
823 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
824 vcd_name
='test_scoreboard6600.vcd')
826 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
827 # vcd_name='test_scoreboard6600.vcd')
830 if __name__
== '__main__':