1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
12 from scoreboard
.instruction_q
import Instruction
, InstructionQ
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
, BranchALU
17 from nmutil
.latch
import SRLatch
18 from nmutil
.nmoperator
import eq
20 from random
import randint
, seed
21 from copy
import deepcopy
25 class CompUnitsBase(Elaboratable
):
26 """ Computation Unit Base class.
28 Amazingly, this class works recursively. It's supposed to just
29 look after some ALUs (that can handle the same operations),
30 grouping them together, however it turns out that the same code
31 can also group *groups* of Computation Units together as well.
33 Basically it was intended just to concatenate the ALU's issue,
34 go_rd etc. signals together, which start out as bits and become
35 sequences. Turns out that the same trick works just as well
38 So this class may be used recursively to present a top-level
39 sequential concatenation of all the signals in and out of
40 ALUs, whilst at the same time making it convenient to group
43 At the lower level, the intent is that groups of (identical)
44 ALUs may be passed the same operation. Even beyond that,
45 the intent is that that group of (identical) ALUs actually
46 share the *same pipeline* and as such become a "Concurrent
47 Computation Unit" as defined by Mitch Alsup (see section
50 def __init__(self
, rwid
, units
):
53 * :rwid: bit width of register file(s) - both FP and INT
54 * :units: sequence of ALUs (or CompUnitsBase derivatives)
59 if units
and isinstance(units
[0], CompUnitsBase
):
62 self
.n_units
+= u
.n_units
64 self
.n_units
= len(units
)
66 n_units
= self
.n_units
69 self
.issue_i
= Signal(n_units
, reset_less
=True)
70 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
71 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
72 self
.shadown_i
= Signal(n_units
, reset_less
=True)
73 self
.go_die_i
= Signal(n_units
, reset_less
=True)
76 self
.busy_o
= Signal(n_units
, reset_less
=True)
77 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
78 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
80 # in/out register data (note: not register#, actual data)
81 self
.data_o
= Signal(rwid
, reset_less
=True)
82 self
.src1_i
= Signal(rwid
, reset_less
=True)
83 self
.src2_i
= Signal(rwid
, reset_less
=True)
86 def elaborate(self
, platform
):
90 for i
, alu
in enumerate(self
.units
):
91 setattr(m
.submodules
, "comp%d" % i
, alu
)
101 for alu
in self
.units
:
102 req_rel_l
.append(alu
.req_rel_o
)
103 rd_rel_l
.append(alu
.rd_rel_o
)
104 shadow_l
.append(alu
.shadown_i
)
105 godie_l
.append(alu
.go_die_i
)
106 go_wr_l
.append(alu
.go_wr_i
)
107 go_rd_l
.append(alu
.go_rd_i
)
108 issue_l
.append(alu
.issue_i
)
109 busy_l
.append(alu
.busy_o
)
110 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
111 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
112 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
113 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
114 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
115 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
116 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
117 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
119 # connect data register input/output
121 # merge (OR) all integer FU / ALU outputs to a single value
122 # bit of a hack: treereduce needs a list with an item named "data_o"
124 data_o
= treereduce(self
.units
)
125 comb
+= self
.data_o
.eq(data_o
)
127 for i
, alu
in enumerate(self
.units
):
128 comb
+= alu
.src1_i
.eq(self
.src1_i
)
129 comb
+= alu
.src2_i
.eq(self
.src2_i
)
134 class CompUnitALUs(CompUnitsBase
):
136 def __init__(self
, rwid
, opwid
):
139 * :rwid: bit width of register file(s) - both FP and INT
140 * :opwid: operand bit width
145 self
.oper_i
= Signal(opwid
, reset_less
=True)
154 for alu
in [add
, sub
, mul
, shf
]:
155 units
.append(ComputationUnitNoDelay(rwid
, 2, alu
))
157 CompUnitsBase
.__init
__(self
, rwid
, units
)
159 def elaborate(self
, platform
):
160 m
= CompUnitsBase
.elaborate(self
, platform
)
163 # hand the same operation to all units
164 for alu
in self
.units
:
165 comb
+= alu
.oper_i
.eq(self
.oper_i
)
166 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
167 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
168 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
169 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
174 class CompUnitBR(CompUnitsBase
):
176 def __init__(self
, rwid
, opwid
):
179 * :rwid: bit width of register file(s) - both FP and INT
180 * :opwid: operand bit width
182 Note: bgt unit is returned so that a shadow unit can be created
188 self
.oper_i
= Signal(opwid
, reset_less
=True)
191 self
.bgt
= BranchALU(rwid
)
192 self
.br1
= ComputationUnitNoDelay(rwid
, 3, self
.bgt
)
193 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
195 def elaborate(self
, platform
):
196 m
= CompUnitsBase
.elaborate(self
, platform
)
199 # hand the same operation to all units
200 for alu
in self
.units
:
201 comb
+= alu
.oper_i
.eq(self
.oper_i
)
202 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
207 class FunctionUnits(Elaboratable
):
209 def __init__(self
, n_regs
, n_int_alus
):
211 self
.n_int_alus
= n_int_alus
213 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
214 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
215 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
217 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
218 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
220 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
221 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
222 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
224 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
225 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
226 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
228 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
229 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
230 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
231 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
232 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
234 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
236 def elaborate(self
, platform
):
241 n_intfus
= self
.n_int_alus
243 # Integer FU-FU Dep Matrix
244 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
245 m
.submodules
.intfudeps
= intfudeps
246 # Integer FU-Reg Dep Matrix
247 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
)
248 m
.submodules
.intregdeps
= intregdeps
250 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
251 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
253 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
254 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
256 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
257 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
258 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
260 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
261 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
262 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
263 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
264 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
265 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
267 # Connect function issue / arrays, and dest/src1/src2
268 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
269 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
270 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
272 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
273 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
274 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
275 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
277 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
278 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
279 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
284 class Scoreboard(Elaboratable
):
285 def __init__(self
, rwid
, n_regs
):
288 * :rwid: bit width of register file(s) - both FP and INT
289 * :n_regs: depth of register file(s) - number of FP and INT regs
295 self
.intregs
= RegFileArray(rwid
, n_regs
)
296 self
.fpregs
= RegFileArray(rwid
, n_regs
)
298 # issue q needs to get at these
299 self
.aluissue
= IssueUnitGroup(4)
300 self
.brissue
= IssueUnitGroup(1)
302 self
.alu_oper_i
= Signal(4, reset_less
=True)
303 self
.br_oper_i
= Signal(4, reset_less
=True)
306 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
307 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
308 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
309 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
312 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
313 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
315 # for branch speculation experiment. branch_direction = 0 if
316 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
317 # branch_succ and branch_fail are requests to have the current
318 # instruction be dependent on the branch unit "shadow" capability.
319 self
.branch_succ_i
= Signal(reset_less
=True)
320 self
.branch_fail_i
= Signal(reset_less
=True)
321 self
.branch_direction_o
= Signal(2, reset_less
=True)
323 def elaborate(self
, platform
):
328 m
.submodules
.intregs
= self
.intregs
329 m
.submodules
.fpregs
= self
.fpregs
332 int_dest
= self
.intregs
.write_port("dest")
333 int_src1
= self
.intregs
.read_port("src1")
334 int_src2
= self
.intregs
.read_port("src2")
336 fp_dest
= self
.fpregs
.write_port("dest")
337 fp_src1
= self
.fpregs
.read_port("src1")
338 fp_src2
= self
.fpregs
.read_port("src2")
340 # Int ALUs and Comp Units
342 cua
= CompUnitALUs(self
.rwid
, 2)
343 cub
= CompUnitBR(self
.rwid
, 2)
344 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cub
])
345 bgt
= cub
.bgt
# get at the branch computation unit
349 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
351 # Count of number of FUs
352 n_intfus
= n_int_alus
353 n_fp_fus
= 0 # for now
355 # Integer Priority Picker 1: Adder + Subtractor
356 intpick1
= GroupPicker(n_intfus
) # picks between add, sub, mul and shf
357 m
.submodules
.intpick1
= intpick1
360 regdecode
= RegDecode(self
.n_regs
)
361 m
.submodules
.regdecode
= regdecode
362 issueunit
= IssueUnitArray([self
.aluissue
, self
.brissue
])
363 m
.submodules
.issueunit
= issueunit
365 # Shadow Matrix. currently n_intfus shadows, to be used for
366 # write-after-write hazards. NOTE: there is one extra for branches,
367 # so the shadow width is increased by 1
368 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
369 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
371 # record previous instruction to cast shadow on current instruction
372 fn_issue_prev
= Signal(n_intfus
)
373 prev_shadow
= Signal(n_intfus
)
375 # Branch Speculation recorder. tracks the success/fail state as
376 # each instruction is issued, so that when the branch occurs the
377 # allow/cancel can be issued as appropriate.
378 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
381 # ok start wiring things together...
382 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
383 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
387 # Issue Unit is where it starts. set up some in/outs for this module
389 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
390 regdecode
.src1_i
.eq(self
.int_src1_i
),
391 regdecode
.src2_i
.eq(self
.int_src2_i
),
392 regdecode
.enable_i
.eq(self
.reg_enable_i
),
393 self
.issue_o
.eq(issueunit
.issue_o
)
396 # take these to outside (issue needs them)
397 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
398 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
400 # TODO: issueunit.f (FP)
402 # and int function issue / busy arrays, and dest/src1/src2
403 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
404 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
405 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
407 fn_issue_o
= issueunit
.fn_issue_o
409 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
410 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
411 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
414 # merge shadow matrices outputs
417 # these are explained in ShadowMatrix docstring, and are to be
418 # connected to the FUReg and FUFU Matrices, to get them to reset
419 anydie
= Signal(n_intfus
, reset_less
=True)
420 allshadown
= Signal(n_intfus
, reset_less
=True)
421 shreset
= Signal(n_intfus
, reset_less
=True)
422 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
423 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
424 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
427 # connect fu-fu matrix
430 # Group Picker... done manually for now.
431 go_rd_o
= intpick1
.go_rd_o
432 go_wr_o
= intpick1
.go_wr_o
433 go_rd_i
= intfus
.go_rd_i
434 go_wr_i
= intfus
.go_wr_i
435 go_die_i
= intfus
.go_die_i
436 # NOTE: connect to the shadowed versions so that they can "die" (reset)
437 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
438 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
439 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
443 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
444 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
445 int_rd_o
= intfus
.readable_o
446 int_wr_o
= intfus
.writable_o
447 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
448 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
454 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
455 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
456 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
458 # NOTE; this setup is for the instruction order preservation...
460 # connect shadows / go_dies to Computation Units
461 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
462 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
464 # ok connect first n_int_fu shadows to busy lines, to create an
465 # instruction-order linked-list-like arrangement, using a bit-matrix
466 # (instead of e.g. a ring buffer).
469 # when written, the shadow can be cancelled (and was good)
470 for i
in range(n_intfus
):
471 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
473 # work out the current-activated busy unit (by recording the old one)
474 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
475 sync
+= fn_issue_prev
.eq(fn_issue_o
)
477 # *previous* instruction shadows *current* instruction, and, obviously,
478 # if the previous is completed (!busy) don't cast the shadow!
479 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
480 for i
in range(n_intfus
):
481 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
484 # ... and this is for branch speculation. it uses the extra bit
485 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
486 # only needs to set shadow_i, s_fail_i and s_good_i
488 # issue captures shadow_i (if enabled)
489 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
491 bactive
= Signal(reset_less
=True)
492 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
494 # instruction being issued (fn_issue_o) has a shadow cast by the branch
495 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
496 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
497 for i
in range(n_intfus
):
498 with m
.If(fn_issue_o
& (Const(1<<i
))):
499 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
501 # finally, we need an indicator to the test infrastructure as to
502 # whether the branch succeeded or failed, plus, link up to the
503 # "recorder" of whether the instruction was under shadow or not
505 with m
.If(br1
.issue_i
):
506 sync
+= bspec
.active_i
.eq(1)
507 with m
.If(self
.branch_succ_i
):
508 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
509 with m
.If(self
.branch_fail_i
):
510 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
512 # branch is active (TODO: a better signal: this is over-using the
513 # go_write signal - actually the branch should not be "writing")
514 with m
.If(br1
.go_wr_i
):
515 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
516 sync
+= bspec
.active_i
.eq(0)
517 comb
+= bspec
.br_i
.eq(1)
518 # branch occurs if data == 1, failed if data == 0
519 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
520 for i
in range(n_intfus
):
521 # *expected* direction of the branch matched against *actual*
522 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
524 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
527 # Connect Register File(s)
529 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
530 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
531 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
533 # connect ALUs to regfule
534 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
535 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
536 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
538 # connect ALU Computation Units
539 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
540 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
541 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
546 yield from self
.intregs
547 yield from self
.fpregs
548 yield self
.int_dest_i
549 yield self
.int_src1_i
550 yield self
.int_src2_i
552 yield self
.branch_succ_i
553 yield self
.branch_fail_i
554 yield self
.branch_direction_o
559 class IssueToScoreboard(Elaboratable
):
561 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
569 mqbits
= (int(log(qlen
) / log(2))+2, False)
570 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
571 self
.p_ready_o
= Signal() # instructions were added
572 self
.data_i
= Instruction
.nq(n_in
, "data_i", rwid
, opwid
)
574 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
575 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
577 def elaborate(self
, platform
):
582 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
, self
.n_in
, self
.n_out
)
583 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
587 # get at the regfile for testing
588 self
.intregs
= sc
.intregs
590 # and the "busy" signal and instruction queue length
591 comb
+= self
.busy_o
.eq(sc
.busy_o
)
592 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
594 # link up instruction queue
595 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
596 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
597 for i
in range(self
.n_in
):
598 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
600 # take instruction and process it. note that it's possible to
601 # "inspect" the queue contents *without* actually removing the
602 # items. items are only removed when the
605 wait_issue_br
= Signal()
606 wait_issue_alu
= Signal()
608 with m
.If(wait_issue_br | wait_issue_alu
):
609 # set instruction pop length to 1 if the unit accepted
610 # also tell the unit-group to stop accepting the instruction
611 # and disable the regfile
612 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
613 with m
.If(iq
.qlen_o
!= 0):
614 comb
+= iq
.n_sub_i
.eq(1)
615 comb
+= wait_issue_br
.eq(0)
616 comb
+= sc
.brissue
.insn_i
.eq(0)
617 comb
+= sc
.int_dest_i
.eq(0)
618 comb
+= sc
.int_src1_i
.eq(0)
619 comb
+= sc
.int_src2_i
.eq(0)
620 comb
+= sc
.reg_enable_i
.eq(0)
621 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
622 with m
.If(iq
.qlen_o
!= 0):
623 comb
+= iq
.n_sub_i
.eq(1)
624 comb
+= wait_issue_alu
.eq(0)
625 comb
+= sc
.aluissue
.insn_i
.eq(0)
626 comb
+= sc
.int_dest_i
.eq(0)
627 comb
+= sc
.int_src1_i
.eq(0)
628 comb
+= sc
.int_src2_i
.eq(0)
629 comb
+= sc
.reg_enable_i
.eq(0)
631 # see if some instruction(s) are here. note that this is
632 # "inspecting" the in-place queue. note also that on the
633 # cycle following "waiting" for fn_issue_o to be set, the
634 # "resetting" done above (insn_i=0) could be re-ASSERTed.
635 with m
.If(iq
.qlen_o
!= 0):
636 # get the operands and operation
637 dest
= iq
.data_o
[0].dest_i
638 src1
= iq
.data_o
[0].src1_i
639 src2
= iq
.data_o
[0].src2_i
640 op
= iq
.data_o
[0].oper_i
642 # set the src/dest regs
643 comb
+= sc
.int_dest_i
.eq(dest
)
644 comb
+= sc
.int_src1_i
.eq(src1
)
645 comb
+= sc
.int_src2_i
.eq(src2
)
646 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
648 # choose a Function-Unit-Group
649 with m
.If((op
& (0x3<<2)) != 0): # branch
650 comb
+= sc
.brissue
.insn_i
.eq(1)
651 comb
+= sc
.br_oper_i
.eq(op
& 0x3)
652 comb
+= wait_issue_br
.eq(1)
654 comb
+= sc
.aluissue
.insn_i
.eq(1)
655 comb
+= sc
.alu_oper_i
.eq(op
& 0x3)
656 comb
+= wait_issue_alu
.eq(1)
659 # these indicate that the instruction is to be made
660 # shadow-dependent on
661 # (either) branch success or branch fail
662 #yield sc.branch_fail_i.eq(branch_fail)
663 #yield sc.branch_succ_i.eq(branch_success)
669 for o
in self
.data_i
:
686 def __init__(self
, rwidth
, nregs
):
688 self
.regs
= [0] * nregs
690 def op(self
, op
, src1
, src2
, dest
):
691 maxbits
= (1 << self
.rwidth
) - 1
692 src1
= self
.regs
[src1
] & maxbits
693 src2
= self
.regs
[src2
] & maxbits
701 val
= src1
>> (src2
& maxbits
)
703 val
= int(src1
> src2
)
705 val
= int(src1
< src2
)
707 val
= int(src1
== src2
)
709 val
= int(src1
!= src2
)
711 self
.setval(dest
, val
)
714 def setval(self
, dest
, val
):
715 print ("sim setval", dest
, hex(val
))
716 self
.regs
[dest
] = val
719 for i
, val
in enumerate(self
.regs
):
720 reg
= yield dut
.intregs
.regs
[i
].reg
721 okstr
= "OK" if reg
== val
else "!ok"
722 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
724 def check(self
, dut
):
725 for i
, val
in enumerate(self
.regs
):
726 reg
= yield dut
.intregs
.regs
[i
].reg
728 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
729 yield from self
.dump(dut
)
732 def instr_q(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
733 instrs
= [{'oper_i': op
, 'dest_i': dest
, 'src1_i': src1
, 'src2_i': src2
}]
736 for idx
in range(sendlen
):
737 yield from eq(dut
.data_i
[idx
], instrs
[idx
])
738 di
= yield dut
.data_i
[idx
]
739 print ("senddata %d %x" % (idx
, di
))
740 yield dut
.p_add_i
.eq(sendlen
)
742 o_p_ready
= yield dut
.p_ready_o
745 o_p_ready
= yield dut
.p_ready_o
747 yield dut
.p_add_i
.eq(0)
750 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
751 yield from disable_issue(dut
)
752 yield dut
.int_dest_i
.eq(dest
)
753 yield dut
.int_src1_i
.eq(src1
)
754 yield dut
.int_src2_i
.eq(src2
)
755 if (op
& (0x3<<2)) != 0: # branch
756 yield dut
.brissue
.insn_i
.eq(1)
757 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
758 dut_issue
= dut
.brissue
760 yield dut
.aluissue
.insn_i
.eq(1)
761 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
762 dut_issue
= dut
.aluissue
763 yield dut
.reg_enable_i
.eq(1)
765 # these indicate that the instruction is to be made shadow-dependent on
766 # (either) branch success or branch fail
767 yield dut
.branch_fail_i
.eq(branch_fail
)
768 yield dut
.branch_succ_i
.eq(branch_success
)
771 yield from wait_for_issue(dut
, dut_issue
)
774 def print_reg(dut
, rnums
):
777 reg
= yield dut
.intregs
.regs
[rnum
].reg
778 rs
.append("%x" % reg
)
779 rnums
= map(str, rnums
)
780 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
783 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
785 for i
in range(n_ops
):
786 src1
= randint(1, dut
.n_regs
-1)
787 src2
= randint(1, dut
.n_regs
-1)
788 dest
= randint(1, dut
.n_regs
-1)
789 op
= randint(0, max_opnums
)
792 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
794 insts
.append((src1
, src2
, dest
, op
))
798 def wait_for_busy_clear(dut
):
800 busy_o
= yield dut
.busy_o
806 def disable_issue(dut
):
807 yield dut
.aluissue
.insn_i
.eq(0)
808 yield dut
.brissue
.insn_i
.eq(0)
811 def wait_for_issue(dut
, dut_issue
):
813 issue_o
= yield dut_issue
.fn_issue_o
815 yield from disable_issue(dut
)
816 yield dut
.reg_enable_i
.eq(0)
819 #yield from print_reg(dut, [1,2,3])
821 #yield from print_reg(dut, [1,2,3])
823 def scoreboard_branch_sim(dut
, alusim
):
829 print ("rseed", iseed
)
833 yield dut
.branch_direction_o
.eq(0)
835 # set random values in the registers
836 for i
in range(1, dut
.n_regs
):
838 val
= randint(0, (1<<alusim
.rwidth
)-1)
839 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
840 alusim
.setval(i
, val
)
843 # create some instructions: branches create a tree
844 insts
= create_random_ops(dut
, 1, True, 1)
845 #insts.append((6, 6, 1, 2, (0, 0)))
846 #insts.append((4, 3, 3, 0, (0, 0)))
848 src1
= randint(1, dut
.n_regs
-1)
849 src2
= randint(1, dut
.n_regs
-1)
851 op
= 4 # only BGT at the moment
853 branch_ok
= create_random_ops(dut
, 1, True, 1)
854 branch_fail
= create_random_ops(dut
, 1, True, 1)
856 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
860 insts
.append( (3, 5, 2, 0, (0, 0)) )
863 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
864 branch_ok
.append( None )
865 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
866 #branch_fail.append( None )
867 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
869 siminsts
= deepcopy(insts
)
871 # issue instruction(s)
879 branch_direction
= yield dut
.branch_direction_o
# way branch went
880 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
881 if branch_direction
== 1 and shadow_on
:
882 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
883 continue # branch was "success" and this is a "failed"... skip
884 if branch_direction
== 2 and shadow_off
:
885 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
886 continue # branch was "fail" and this is a "success"... skip
887 if branch_direction
!= 0:
892 branch_ok
, branch_fail
= dest
894 # ok zip up the branch success / fail instructions and
895 # drop them into the queue, one marked "to have branch success"
896 # the other to be marked shadow branch "fail".
897 # one out of each of these will be cancelled
898 for ok
, fl
in zip(branch_ok
, branch_fail
):
900 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
902 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
903 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
904 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
905 yield from int_instr(dut
, op
, src1
, src2
, dest
,
906 shadow_on
, shadow_off
)
908 # wait for all instructions to stop before checking
910 yield from wait_for_busy_clear(dut
)
914 instr
= siminsts
.pop(0)
917 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
921 branch_ok
, branch_fail
= dest
923 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
924 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
925 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
928 siminsts
+= branch_ok
930 siminsts
+= branch_fail
933 yield from alusim
.check(dut
)
934 yield from alusim
.dump(dut
)
937 def scoreboard_sim(dut
, alusim
):
943 # set random values in the registers
944 for i
in range(1, dut
.n_regs
):
945 val
= randint(0, (1<<alusim
.rwidth
)-1)
948 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
949 alusim
.setval(i
, val
)
951 # create some instructions (some random, some regression tests)
954 instrs
= create_random_ops(dut
, 15, True, 3)
957 instrs
.append( (7, 3, 2, 4, (0, 0)) )
958 instrs
.append( (7, 6, 6, 2, (0, 0)) )
959 instrs
.append( (1, 7, 2, 2, (0, 0)) )
963 instrs
.append((2, 3, 3, 0, (0, 0)))
964 instrs
.append((5, 3, 3, 1, (0, 0)))
965 instrs
.append((3, 5, 5, 2, (0, 0)))
966 instrs
.append((5, 3, 3, 3, (0, 0)))
967 instrs
.append((3, 5, 5, 0, (0, 0)))
970 instrs
.append((5, 6, 2, 1))
971 instrs
.append((2, 2, 4, 0))
972 #instrs.append((2, 2, 3, 1))
975 instrs
.append((2, 1, 2, 3))
978 instrs
.append((2, 6, 2, 1))
979 instrs
.append((2, 1, 2, 0))
982 instrs
.append((1, 2, 7, 2))
983 instrs
.append((7, 1, 5, 0))
984 instrs
.append((4, 4, 1, 1))
987 instrs
.append((5, 6, 2, 2))
988 instrs
.append((1, 1, 4, 1))
989 instrs
.append((6, 5, 3, 0))
992 # Write-after-Write Hazard
993 instrs
.append( (3, 6, 7, 2) )
994 instrs
.append( (4, 4, 7, 1) )
997 # self-read/write-after-write followed by Read-after-Write
998 instrs
.append((1, 1, 1, 1))
999 instrs
.append((1, 5, 3, 0))
1002 # Read-after-Write followed by self-read-after-write
1003 instrs
.append((5, 6, 1, 2))
1004 instrs
.append((1, 1, 1, 1))
1007 # self-read-write sandwich
1008 instrs
.append((5, 6, 1, 2))
1009 instrs
.append((1, 1, 1, 1))
1010 instrs
.append((1, 5, 3, 0))
1013 # very weird failure
1014 instrs
.append( (5, 2, 5, 2) )
1015 instrs
.append( (2, 6, 3, 0) )
1016 instrs
.append( (4, 2, 2, 1) )
1020 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1021 alusim
.setval(5, v1
)
1022 yield dut
.intregs
.regs
[3].reg
.eq(5)
1024 instrs
.append((5, 3, 3, 4, (0, 0)))
1025 instrs
.append((4, 2, 1, 2, (0, 1)))
1029 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1030 alusim
.setval(5, v1
)
1031 yield dut
.intregs
.regs
[3].reg
.eq(5)
1033 instrs
.append((5, 3, 3, 4, (0, 0)))
1034 instrs
.append((4, 2, 1, 2, (1, 0)))
1037 instrs
.append( (4, 3, 5, 1, (0, 0)) )
1038 instrs
.append( (5, 2, 3, 1, (0, 0)) )
1039 instrs
.append( (7, 1, 5, 2, (0, 0)) )
1040 instrs
.append( (5, 6, 6, 4, (0, 0)) )
1041 instrs
.append( (7, 5, 2, 2, (1, 0)) )
1042 instrs
.append( (1, 7, 5, 0, (0, 1)) )
1043 instrs
.append( (1, 6, 1, 2, (1, 0)) )
1044 instrs
.append( (1, 6, 7, 3, (0, 0)) )
1045 instrs
.append( (6, 7, 7, 0, (0, 0)) )
1047 # issue instruction(s), wait for issue to be free before proceeding
1048 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
1050 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
1051 alusim
.op(op
, src1
, src2
, dest
)
1052 yield from instr_q(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
1054 # wait for all instructions to stop before checking
1056 iqlen
= yield dut
.qlen_o
1064 yield from wait_for_busy_clear(dut
)
1067 yield from alusim
.check(dut
)
1068 yield from alusim
.dump(dut
)
1071 def test_scoreboard():
1072 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
1073 alusim
= RegSim(16, 8)
1074 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1075 with
open("test_scoreboard6600.il", "w") as f
:
1078 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1079 vcd_name
='test_scoreboard6600.vcd')
1081 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1082 # vcd_name='test_scoreboard6600.vcd')
1085 if __name__
== '__main__':