1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
12 from scoreboard
.instruction_q
import Instruction
, InstructionQ
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
, BranchALU
17 from nmutil
.latch
import SRLatch
18 from nmutil
.nmoperator
import eq
20 from random
import randint
, seed
21 from copy
import deepcopy
25 class Memory(Elaboratable
):
26 def __init__(self
, regwid
, addrw
):
27 depth
= (1<<addrw
) / (regwid
/8)
28 self
.adr
= Signal(addrw
)
29 self
.dat_r
= Signal(regwid
)
30 self
.dat_w
= Signal(regwid
)
32 self
.mem
= Memory(width
=regwid
, depth
=depth
, init
=range(0, depth
))
34 def elaborate(self
, platform
):
36 m
.submodules
.rdport
= rdport
= self
.mem
.read_port()
37 m
.submodules
.wrport
= wrport
= self
.mem
.write_port()
39 rdport
.addr
.eq(self
.adr
[2:]),
40 self
.dat_r
.eq(rdport
.data
),
41 wrport
.addr
.eq(self
.adr
),
42 wrport
.data
.eq(self
.dat_w
),
43 wrport
.en
.eq(self
.we
),
48 class CompUnitsBase(Elaboratable
):
49 """ Computation Unit Base class.
51 Amazingly, this class works recursively. It's supposed to just
52 look after some ALUs (that can handle the same operations),
53 grouping them together, however it turns out that the same code
54 can also group *groups* of Computation Units together as well.
56 Basically it was intended just to concatenate the ALU's issue,
57 go_rd etc. signals together, which start out as bits and become
58 sequences. Turns out that the same trick works just as well
61 So this class may be used recursively to present a top-level
62 sequential concatenation of all the signals in and out of
63 ALUs, whilst at the same time making it convenient to group
66 At the lower level, the intent is that groups of (identical)
67 ALUs may be passed the same operation. Even beyond that,
68 the intent is that that group of (identical) ALUs actually
69 share the *same pipeline* and as such become a "Concurrent
70 Computation Unit" as defined by Mitch Alsup (see section
73 def __init__(self
, rwid
, units
):
76 * :rwid: bit width of register file(s) - both FP and INT
77 * :units: sequence of ALUs (or CompUnitsBase derivatives)
82 if units
and isinstance(units
[0], CompUnitsBase
):
85 self
.n_units
+= u
.n_units
87 self
.n_units
= len(units
)
89 n_units
= self
.n_units
92 self
.issue_i
= Signal(n_units
, reset_less
=True)
93 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
94 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
95 self
.shadown_i
= Signal(n_units
, reset_less
=True)
96 self
.go_die_i
= Signal(n_units
, reset_less
=True)
99 self
.busy_o
= Signal(n_units
, reset_less
=True)
100 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
101 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
103 # in/out register data (note: not register#, actual data)
104 self
.data_o
= Signal(rwid
, reset_less
=True)
105 self
.src1_i
= Signal(rwid
, reset_less
=True)
106 self
.src2_i
= Signal(rwid
, reset_less
=True)
109 def elaborate(self
, platform
):
113 for i
, alu
in enumerate(self
.units
):
114 setattr(m
.submodules
, "comp%d" % i
, alu
)
124 for alu
in self
.units
:
125 req_rel_l
.append(alu
.req_rel_o
)
126 rd_rel_l
.append(alu
.rd_rel_o
)
127 shadow_l
.append(alu
.shadown_i
)
128 godie_l
.append(alu
.go_die_i
)
129 go_wr_l
.append(alu
.go_wr_i
)
130 go_rd_l
.append(alu
.go_rd_i
)
131 issue_l
.append(alu
.issue_i
)
132 busy_l
.append(alu
.busy_o
)
133 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
134 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
135 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
136 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
137 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
138 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
139 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
140 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
142 # connect data register input/output
144 # merge (OR) all integer FU / ALU outputs to a single value
145 # bit of a hack: treereduce needs a list with an item named "data_o"
147 data_o
= treereduce(self
.units
)
148 comb
+= self
.data_o
.eq(data_o
)
150 for i
, alu
in enumerate(self
.units
):
151 comb
+= alu
.src1_i
.eq(self
.src1_i
)
152 comb
+= alu
.src2_i
.eq(self
.src2_i
)
157 class CompUnitALUs(CompUnitsBase
):
159 def __init__(self
, rwid
, opwid
):
162 * :rwid: bit width of register file(s) - both FP and INT
163 * :opwid: operand bit width
168 self
.oper_i
= Signal(opwid
, reset_less
=True)
177 for alu
in [add
, sub
, mul
, shf
]:
178 units
.append(ComputationUnitNoDelay(rwid
, 2, alu
))
180 CompUnitsBase
.__init
__(self
, rwid
, units
)
182 def elaborate(self
, platform
):
183 m
= CompUnitsBase
.elaborate(self
, platform
)
186 # hand the same operation to all units
187 for alu
in self
.units
:
188 comb
+= alu
.oper_i
.eq(self
.oper_i
)
189 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
190 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
191 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
192 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
197 class CompUnitBR(CompUnitsBase
):
199 def __init__(self
, rwid
, opwid
):
202 * :rwid: bit width of register file(s) - both FP and INT
203 * :opwid: operand bit width
205 Note: bgt unit is returned so that a shadow unit can be created
211 self
.oper_i
= Signal(opwid
, reset_less
=True)
214 self
.bgt
= BranchALU(rwid
)
215 self
.br1
= ComputationUnitNoDelay(rwid
, 3, self
.bgt
)
216 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
218 def elaborate(self
, platform
):
219 m
= CompUnitsBase
.elaborate(self
, platform
)
222 # hand the same operation to all units
223 for alu
in self
.units
:
224 comb
+= alu
.oper_i
.eq(self
.oper_i
)
225 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
230 class FunctionUnits(Elaboratable
):
232 def __init__(self
, n_regs
, n_int_alus
):
234 self
.n_int_alus
= n_int_alus
236 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
237 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
238 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
240 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
241 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
243 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
244 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
245 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
247 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
248 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
249 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
251 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
252 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
253 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
254 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
255 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
257 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
259 def elaborate(self
, platform
):
264 n_intfus
= self
.n_int_alus
266 # Integer FU-FU Dep Matrix
267 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
268 m
.submodules
.intfudeps
= intfudeps
269 # Integer FU-Reg Dep Matrix
270 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
)
271 m
.submodules
.intregdeps
= intregdeps
273 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
274 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
276 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
277 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
279 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
280 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
281 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
283 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
284 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
285 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
286 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
287 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
288 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
290 # Connect function issue / arrays, and dest/src1/src2
291 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
292 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
293 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
295 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
296 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
297 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
298 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
300 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
301 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
302 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
307 class Scoreboard(Elaboratable
):
308 def __init__(self
, rwid
, n_regs
):
311 * :rwid: bit width of register file(s) - both FP and INT
312 * :n_regs: depth of register file(s) - number of FP and INT regs
318 self
.intregs
= RegFileArray(rwid
, n_regs
)
319 self
.fpregs
= RegFileArray(rwid
, n_regs
)
321 # issue q needs to get at these
322 self
.aluissue
= IssueUnitGroup(4)
323 self
.brissue
= IssueUnitGroup(1)
325 self
.alu_oper_i
= Signal(4, reset_less
=True)
326 self
.br_oper_i
= Signal(4, reset_less
=True)
329 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
330 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
331 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
332 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
335 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
336 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
338 # for branch speculation experiment. branch_direction = 0 if
339 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
340 # branch_succ and branch_fail are requests to have the current
341 # instruction be dependent on the branch unit "shadow" capability.
342 self
.branch_succ_i
= Signal(reset_less
=True)
343 self
.branch_fail_i
= Signal(reset_less
=True)
344 self
.branch_direction_o
= Signal(2, reset_less
=True)
346 def elaborate(self
, platform
):
351 m
.submodules
.intregs
= self
.intregs
352 m
.submodules
.fpregs
= self
.fpregs
355 int_dest
= self
.intregs
.write_port("dest")
356 int_src1
= self
.intregs
.read_port("src1")
357 int_src2
= self
.intregs
.read_port("src2")
359 fp_dest
= self
.fpregs
.write_port("dest")
360 fp_src1
= self
.fpregs
.read_port("src1")
361 fp_src2
= self
.fpregs
.read_port("src2")
363 # Int ALUs and Comp Units
365 cua
= CompUnitALUs(self
.rwid
, 2)
366 cub
= CompUnitBR(self
.rwid
, 2)
367 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cub
])
368 bgt
= cub
.bgt
# get at the branch computation unit
372 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
374 # Count of number of FUs
375 n_intfus
= n_int_alus
376 n_fp_fus
= 0 # for now
378 # Integer Priority Picker 1: Adder + Subtractor
379 intpick1
= GroupPicker(n_intfus
) # picks between add, sub, mul and shf
380 m
.submodules
.intpick1
= intpick1
383 regdecode
= RegDecode(self
.n_regs
)
384 m
.submodules
.regdecode
= regdecode
385 issueunit
= IssueUnitArray([self
.aluissue
, self
.brissue
])
386 m
.submodules
.issueunit
= issueunit
388 # Shadow Matrix. currently n_intfus shadows, to be used for
389 # write-after-write hazards. NOTE: there is one extra for branches,
390 # so the shadow width is increased by 1
391 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
392 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
394 # record previous instruction to cast shadow on current instruction
395 fn_issue_prev
= Signal(n_intfus
)
396 prev_shadow
= Signal(n_intfus
)
398 # Branch Speculation recorder. tracks the success/fail state as
399 # each instruction is issued, so that when the branch occurs the
400 # allow/cancel can be issued as appropriate.
401 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
404 # ok start wiring things together...
405 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
406 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
410 # Issue Unit is where it starts. set up some in/outs for this module
412 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
413 regdecode
.src1_i
.eq(self
.int_src1_i
),
414 regdecode
.src2_i
.eq(self
.int_src2_i
),
415 regdecode
.enable_i
.eq(self
.reg_enable_i
),
416 self
.issue_o
.eq(issueunit
.issue_o
)
419 # take these to outside (issue needs them)
420 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
421 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
423 # TODO: issueunit.f (FP)
425 # and int function issue / busy arrays, and dest/src1/src2
426 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
427 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
428 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
430 fn_issue_o
= issueunit
.fn_issue_o
432 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
433 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
434 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
437 # merge shadow matrices outputs
440 # these are explained in ShadowMatrix docstring, and are to be
441 # connected to the FUReg and FUFU Matrices, to get them to reset
442 anydie
= Signal(n_intfus
, reset_less
=True)
443 allshadown
= Signal(n_intfus
, reset_less
=True)
444 shreset
= Signal(n_intfus
, reset_less
=True)
445 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
446 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
447 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
450 # connect fu-fu matrix
453 # Group Picker... done manually for now.
454 go_rd_o
= intpick1
.go_rd_o
455 go_wr_o
= intpick1
.go_wr_o
456 go_rd_i
= intfus
.go_rd_i
457 go_wr_i
= intfus
.go_wr_i
458 go_die_i
= intfus
.go_die_i
459 # NOTE: connect to the shadowed versions so that they can "die" (reset)
460 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
461 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
462 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
466 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
467 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
468 int_rd_o
= intfus
.readable_o
469 int_wr_o
= intfus
.writable_o
470 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
471 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
477 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
478 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
479 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
481 # NOTE; this setup is for the instruction order preservation...
483 # connect shadows / go_dies to Computation Units
484 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
485 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
487 # ok connect first n_int_fu shadows to busy lines, to create an
488 # instruction-order linked-list-like arrangement, using a bit-matrix
489 # (instead of e.g. a ring buffer).
492 # when written, the shadow can be cancelled (and was good)
493 for i
in range(n_intfus
):
494 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
496 # work out the current-activated busy unit (by recording the old one)
497 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
498 sync
+= fn_issue_prev
.eq(fn_issue_o
)
500 # *previous* instruction shadows *current* instruction, and, obviously,
501 # if the previous is completed (!busy) don't cast the shadow!
502 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
503 for i
in range(n_intfus
):
504 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
507 # ... and this is for branch speculation. it uses the extra bit
508 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
509 # only needs to set shadow_i, s_fail_i and s_good_i
511 # issue captures shadow_i (if enabled)
512 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
514 bactive
= Signal(reset_less
=True)
515 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
517 # instruction being issued (fn_issue_o) has a shadow cast by the branch
518 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
519 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
520 for i
in range(n_intfus
):
521 with m
.If(fn_issue_o
& (Const(1<<i
))):
522 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
524 # finally, we need an indicator to the test infrastructure as to
525 # whether the branch succeeded or failed, plus, link up to the
526 # "recorder" of whether the instruction was under shadow or not
528 with m
.If(br1
.issue_i
):
529 sync
+= bspec
.active_i
.eq(1)
530 with m
.If(self
.branch_succ_i
):
531 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
532 with m
.If(self
.branch_fail_i
):
533 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
535 # branch is active (TODO: a better signal: this is over-using the
536 # go_write signal - actually the branch should not be "writing")
537 with m
.If(br1
.go_wr_i
):
538 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
539 sync
+= bspec
.active_i
.eq(0)
540 comb
+= bspec
.br_i
.eq(1)
541 # branch occurs if data == 1, failed if data == 0
542 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
543 for i
in range(n_intfus
):
544 # *expected* direction of the branch matched against *actual*
545 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
547 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
550 # Connect Register File(s)
552 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
553 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
554 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
556 # connect ALUs to regfule
557 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
558 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
559 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
561 # connect ALU Computation Units
562 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
563 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
564 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
569 yield from self
.intregs
570 yield from self
.fpregs
571 yield self
.int_dest_i
572 yield self
.int_src1_i
573 yield self
.int_src2_i
575 yield self
.branch_succ_i
576 yield self
.branch_fail_i
577 yield self
.branch_direction_o
582 class IssueToScoreboard(Elaboratable
):
584 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
592 mqbits
= (int(log(qlen
) / log(2))+2, False)
593 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
594 self
.p_ready_o
= Signal() # instructions were added
595 self
.data_i
= Instruction
.nq(n_in
, "data_i", rwid
, opwid
)
597 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
598 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
600 def elaborate(self
, platform
):
605 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
, self
.n_in
, self
.n_out
)
606 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
610 # get at the regfile for testing
611 self
.intregs
= sc
.intregs
613 # and the "busy" signal and instruction queue length
614 comb
+= self
.busy_o
.eq(sc
.busy_o
)
615 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
617 # link up instruction queue
618 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
619 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
620 for i
in range(self
.n_in
):
621 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
623 # take instruction and process it. note that it's possible to
624 # "inspect" the queue contents *without* actually removing the
625 # items. items are only removed when the
628 wait_issue_br
= Signal()
629 wait_issue_alu
= Signal()
631 with m
.If(wait_issue_br | wait_issue_alu
):
632 # set instruction pop length to 1 if the unit accepted
633 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
634 with m
.If(iq
.qlen_o
!= 0):
635 comb
+= iq
.n_sub_i
.eq(1)
636 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
637 with m
.If(iq
.qlen_o
!= 0):
638 comb
+= iq
.n_sub_i
.eq(1)
640 # see if some instruction(s) are here. note that this is
641 # "inspecting" the in-place queue. note also that on the
642 # cycle following "waiting" for fn_issue_o to be set, the
643 # "resetting" done above (insn_i=0) could be re-ASSERTed.
644 with m
.If(iq
.qlen_o
!= 0):
645 # get the operands and operation
646 dest
= iq
.data_o
[0].dest_i
647 src1
= iq
.data_o
[0].src1_i
648 src2
= iq
.data_o
[0].src2_i
649 op
= iq
.data_o
[0].oper_i
651 # set the src/dest regs
652 comb
+= sc
.int_dest_i
.eq(dest
)
653 comb
+= sc
.int_src1_i
.eq(src1
)
654 comb
+= sc
.int_src2_i
.eq(src2
)
655 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
657 # choose a Function-Unit-Group
658 with m
.If((op
& (0x3<<2)) != 0): # branch
659 comb
+= sc
.brissue
.insn_i
.eq(1)
660 comb
+= sc
.br_oper_i
.eq(op
& 0x3)
661 comb
+= wait_issue_br
.eq(1)
663 comb
+= sc
.aluissue
.insn_i
.eq(1)
664 comb
+= sc
.alu_oper_i
.eq(op
& 0x3)
665 comb
+= wait_issue_alu
.eq(1)
668 # these indicate that the instruction is to be made
669 # shadow-dependent on
670 # (either) branch success or branch fail
671 #yield sc.branch_fail_i.eq(branch_fail)
672 #yield sc.branch_succ_i.eq(branch_success)
678 for o
in self
.data_i
:
695 def __init__(self
, rwidth
, nregs
):
697 self
.regs
= [0] * nregs
699 def op(self
, op
, src1
, src2
, dest
):
700 maxbits
= (1 << self
.rwidth
) - 1
701 src1
= self
.regs
[src1
] & maxbits
702 src2
= self
.regs
[src2
] & maxbits
710 val
= src1
>> (src2
& maxbits
)
712 val
= int(src1
> src2
)
714 val
= int(src1
< src2
)
716 val
= int(src1
== src2
)
718 val
= int(src1
!= src2
)
720 self
.setval(dest
, val
)
723 def setval(self
, dest
, val
):
724 print ("sim setval", dest
, hex(val
))
725 self
.regs
[dest
] = val
728 for i
, val
in enumerate(self
.regs
):
729 reg
= yield dut
.intregs
.regs
[i
].reg
730 okstr
= "OK" if reg
== val
else "!ok"
731 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
733 def check(self
, dut
):
734 for i
, val
in enumerate(self
.regs
):
735 reg
= yield dut
.intregs
.regs
[i
].reg
737 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
738 yield from self
.dump(dut
)
741 def instr_q(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
742 instrs
= [{'oper_i': op
, 'dest_i': dest
, 'src1_i': src1
, 'src2_i': src2
}]
745 for idx
in range(sendlen
):
746 yield from eq(dut
.data_i
[idx
], instrs
[idx
])
747 di
= yield dut
.data_i
[idx
]
748 print ("senddata %d %x" % (idx
, di
))
749 yield dut
.p_add_i
.eq(sendlen
)
751 o_p_ready
= yield dut
.p_ready_o
754 o_p_ready
= yield dut
.p_ready_o
756 yield dut
.p_add_i
.eq(0)
759 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
760 yield from disable_issue(dut
)
761 yield dut
.int_dest_i
.eq(dest
)
762 yield dut
.int_src1_i
.eq(src1
)
763 yield dut
.int_src2_i
.eq(src2
)
764 if (op
& (0x3<<2)) != 0: # branch
765 yield dut
.brissue
.insn_i
.eq(1)
766 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
767 dut_issue
= dut
.brissue
769 yield dut
.aluissue
.insn_i
.eq(1)
770 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
771 dut_issue
= dut
.aluissue
772 yield dut
.reg_enable_i
.eq(1)
774 # these indicate that the instruction is to be made shadow-dependent on
775 # (either) branch success or branch fail
776 yield dut
.branch_fail_i
.eq(branch_fail
)
777 yield dut
.branch_succ_i
.eq(branch_success
)
780 yield from wait_for_issue(dut
, dut_issue
)
783 def print_reg(dut
, rnums
):
786 reg
= yield dut
.intregs
.regs
[rnum
].reg
787 rs
.append("%x" % reg
)
788 rnums
= map(str, rnums
)
789 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
792 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
794 for i
in range(n_ops
):
795 src1
= randint(1, dut
.n_regs
-1)
796 src2
= randint(1, dut
.n_regs
-1)
797 dest
= randint(1, dut
.n_regs
-1)
798 op
= randint(0, max_opnums
)
801 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
803 insts
.append((src1
, src2
, dest
, op
))
807 def wait_for_busy_clear(dut
):
809 busy_o
= yield dut
.busy_o
815 def disable_issue(dut
):
816 yield dut
.aluissue
.insn_i
.eq(0)
817 yield dut
.brissue
.insn_i
.eq(0)
820 def wait_for_issue(dut
, dut_issue
):
822 issue_o
= yield dut_issue
.fn_issue_o
824 yield from disable_issue(dut
)
825 yield dut
.reg_enable_i
.eq(0)
828 #yield from print_reg(dut, [1,2,3])
830 #yield from print_reg(dut, [1,2,3])
832 def scoreboard_branch_sim(dut
, alusim
):
838 print ("rseed", iseed
)
842 yield dut
.branch_direction_o
.eq(0)
844 # set random values in the registers
845 for i
in range(1, dut
.n_regs
):
847 val
= randint(0, (1<<alusim
.rwidth
)-1)
848 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
849 alusim
.setval(i
, val
)
852 # create some instructions: branches create a tree
853 insts
= create_random_ops(dut
, 1, True, 1)
854 #insts.append((6, 6, 1, 2, (0, 0)))
855 #insts.append((4, 3, 3, 0, (0, 0)))
857 src1
= randint(1, dut
.n_regs
-1)
858 src2
= randint(1, dut
.n_regs
-1)
860 op
= 4 # only BGT at the moment
862 branch_ok
= create_random_ops(dut
, 1, True, 1)
863 branch_fail
= create_random_ops(dut
, 1, True, 1)
865 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
869 insts
.append( (3, 5, 2, 0, (0, 0)) )
872 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
873 branch_ok
.append( None )
874 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
875 #branch_fail.append( None )
876 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
878 siminsts
= deepcopy(insts
)
880 # issue instruction(s)
888 branch_direction
= yield dut
.branch_direction_o
# way branch went
889 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
890 if branch_direction
== 1 and shadow_on
:
891 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
892 continue # branch was "success" and this is a "failed"... skip
893 if branch_direction
== 2 and shadow_off
:
894 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
895 continue # branch was "fail" and this is a "success"... skip
896 if branch_direction
!= 0:
901 branch_ok
, branch_fail
= dest
903 # ok zip up the branch success / fail instructions and
904 # drop them into the queue, one marked "to have branch success"
905 # the other to be marked shadow branch "fail".
906 # one out of each of these will be cancelled
907 for ok
, fl
in zip(branch_ok
, branch_fail
):
909 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
911 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
912 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
913 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
914 yield from int_instr(dut
, op
, src1
, src2
, dest
,
915 shadow_on
, shadow_off
)
917 # wait for all instructions to stop before checking
919 yield from wait_for_busy_clear(dut
)
923 instr
= siminsts
.pop(0)
926 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
930 branch_ok
, branch_fail
= dest
932 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
933 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
934 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
937 siminsts
+= branch_ok
939 siminsts
+= branch_fail
942 yield from alusim
.check(dut
)
943 yield from alusim
.dump(dut
)
946 def scoreboard_sim(dut
, alusim
):
952 # set random values in the registers
953 for i
in range(1, dut
.n_regs
):
954 val
= randint(0, (1<<alusim
.rwidth
)-1)
957 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
958 alusim
.setval(i
, val
)
960 # create some instructions (some random, some regression tests)
963 instrs
= create_random_ops(dut
, 15, True, 3)
966 instrs
.append( (7, 3, 2, 4, (0, 0)) )
967 instrs
.append( (7, 6, 6, 2, (0, 0)) )
968 instrs
.append( (1, 7, 2, 2, (0, 0)) )
972 instrs
.append((2, 3, 3, 0, (0, 0)))
973 instrs
.append((5, 3, 3, 1, (0, 0)))
974 instrs
.append((3, 5, 5, 2, (0, 0)))
975 instrs
.append((5, 3, 3, 3, (0, 0)))
976 instrs
.append((3, 5, 5, 0, (0, 0)))
979 instrs
.append((5, 6, 2, 1))
980 instrs
.append((2, 2, 4, 0))
981 #instrs.append((2, 2, 3, 1))
984 instrs
.append((2, 1, 2, 3))
987 instrs
.append((2, 6, 2, 1))
988 instrs
.append((2, 1, 2, 0))
991 instrs
.append((1, 2, 7, 2))
992 instrs
.append((7, 1, 5, 0))
993 instrs
.append((4, 4, 1, 1))
996 instrs
.append((5, 6, 2, 2))
997 instrs
.append((1, 1, 4, 1))
998 instrs
.append((6, 5, 3, 0))
1001 # Write-after-Write Hazard
1002 instrs
.append( (3, 6, 7, 2) )
1003 instrs
.append( (4, 4, 7, 1) )
1006 # self-read/write-after-write followed by Read-after-Write
1007 instrs
.append((1, 1, 1, 1))
1008 instrs
.append((1, 5, 3, 0))
1011 # Read-after-Write followed by self-read-after-write
1012 instrs
.append((5, 6, 1, 2))
1013 instrs
.append((1, 1, 1, 1))
1016 # self-read-write sandwich
1017 instrs
.append((5, 6, 1, 2))
1018 instrs
.append((1, 1, 1, 1))
1019 instrs
.append((1, 5, 3, 0))
1022 # very weird failure
1023 instrs
.append( (5, 2, 5, 2) )
1024 instrs
.append( (2, 6, 3, 0) )
1025 instrs
.append( (4, 2, 2, 1) )
1029 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1030 alusim
.setval(5, v1
)
1031 yield dut
.intregs
.regs
[3].reg
.eq(5)
1033 instrs
.append((5, 3, 3, 4, (0, 0)))
1034 instrs
.append((4, 2, 1, 2, (0, 1)))
1038 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1039 alusim
.setval(5, v1
)
1040 yield dut
.intregs
.regs
[3].reg
.eq(5)
1042 instrs
.append((5, 3, 3, 4, (0, 0)))
1043 instrs
.append((4, 2, 1, 2, (1, 0)))
1046 instrs
.append( (4, 3, 5, 1, (0, 0)) )
1047 instrs
.append( (5, 2, 3, 1, (0, 0)) )
1048 instrs
.append( (7, 1, 5, 2, (0, 0)) )
1049 instrs
.append( (5, 6, 6, 4, (0, 0)) )
1050 instrs
.append( (7, 5, 2, 2, (1, 0)) )
1051 instrs
.append( (1, 7, 5, 0, (0, 1)) )
1052 instrs
.append( (1, 6, 1, 2, (1, 0)) )
1053 instrs
.append( (1, 6, 7, 3, (0, 0)) )
1054 instrs
.append( (6, 7, 7, 0, (0, 0)) )
1056 # issue instruction(s), wait for issue to be free before proceeding
1057 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
1059 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
1060 alusim
.op(op
, src1
, src2
, dest
)
1061 yield from instr_q(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
1063 # wait for all instructions to stop before checking
1065 iqlen
= yield dut
.qlen_o
1073 yield from wait_for_busy_clear(dut
)
1076 yield from alusim
.check(dut
)
1077 yield from alusim
.dump(dut
)
1080 def test_scoreboard():
1081 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
1082 alusim
= RegSim(16, 8)
1083 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1084 with
open("test_scoreboard6600.il", "w") as f
:
1087 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1088 vcd_name
='test_scoreboard6600.vcd')
1090 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1091 # vcd_name='test_scoreboard6600.vcd')
1094 if __name__
== '__main__':