1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
12 from scoreboard
.shadow
import ShadowMatrix
, WaWGrid
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
17 from nmutil
.latch
import SRLatch
19 from random
import randint
21 class CompUnits(Elaboratable
):
23 def __init__(self
, rwid
, n_units
):
26 * :rwid: bit width of register file(s) - both FP and INT
27 * :n_units: number of ALUs
29 self
.n_units
= n_units
32 self
.issue_i
= Signal(n_units
, reset_less
=True)
33 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
34 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
35 self
.shadown_i
= Signal(n_units
, reset_less
=True)
36 self
.go_die_i
= Signal(n_units
, reset_less
=True)
37 self
.busy_o
= Signal(n_units
, reset_less
=True)
38 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
39 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
41 self
.dest_o
= Signal(rwid
, reset_less
=True)
42 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
43 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
45 def elaborate(self
, platform
):
53 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
54 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
55 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
56 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
57 int_alus
= [comp1
, comp2
, comp3
, comp4
]
59 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
60 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
61 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
62 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
73 req_rel_l
.append(alu
.req_rel_o
)
74 rd_rel_l
.append(alu
.rd_rel_o
)
75 shadow_l
.append(alu
.shadown_i
)
76 godie_l
.append(alu
.go_die_i
)
77 go_wr_l
.append(alu
.go_wr_i
)
78 go_rd_l
.append(alu
.go_rd_i
)
79 issue_l
.append(alu
.issue_i
)
80 busy_l
.append(alu
.busy_o
)
81 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
82 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
83 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
84 m
.d
.comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
85 m
.d
.comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
86 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
87 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
88 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
90 # connect data register input/output
92 # merge (OR) all integer FU / ALU outputs to a single value
93 # bit of a hack: treereduce needs a list with an item named "dest_o"
94 dest_o
= treereduce(int_alus
)
95 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
97 for i
, alu
in enumerate(int_alus
):
98 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
99 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
104 class FunctionUnits(Elaboratable
):
106 def __init__(self
, n_regs
, n_int_alus
):
108 self
.n_int_alus
= n_int_alus
110 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
111 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
112 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
114 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
115 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
117 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
118 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
119 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
121 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
122 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
123 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
125 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
126 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
127 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
128 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
130 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
132 def elaborate(self
, platform
):
135 n_int_fus
= self
.n_int_alus
137 # Integer FU-FU Dep Matrix
138 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
139 m
.submodules
.intfudeps
= intfudeps
140 # Integer FU-Reg Dep Matrix
141 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
142 m
.submodules
.intregdeps
= intregdeps
144 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
145 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
147 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
148 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
150 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
151 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
152 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
154 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
155 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
156 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
157 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
158 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
160 # Connect function issue / arrays, and dest/src1/src2
161 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
162 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
163 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
165 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
166 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
167 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
169 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
170 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
171 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
176 class Scoreboard(Elaboratable
):
177 def __init__(self
, rwid
, n_regs
):
180 * :rwid: bit width of register file(s) - both FP and INT
181 * :n_regs: depth of register file(s) - number of FP and INT regs
187 self
.intregs
= RegFileArray(rwid
, n_regs
)
188 self
.fpregs
= RegFileArray(rwid
, n_regs
)
191 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
192 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
193 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
194 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
195 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
197 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
198 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
200 def elaborate(self
, platform
):
203 m
.submodules
.intregs
= self
.intregs
204 m
.submodules
.fpregs
= self
.fpregs
207 int_dest
= self
.intregs
.write_port("dest")
208 int_src1
= self
.intregs
.read_port("src1")
209 int_src2
= self
.intregs
.read_port("src2")
211 fp_dest
= self
.fpregs
.write_port("dest")
212 fp_src1
= self
.fpregs
.read_port("src1")
213 fp_src2
= self
.fpregs
.read_port("src2")
215 # Int ALUs and Comp Units
217 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
218 m
.d
.comb
+= cu
.shadown_i
.eq(-1)
219 m
.d
.comb
+= cu
.go_die_i
.eq(0)
222 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
224 # Count of number of FUs
225 n_int_fus
= n_int_alus
226 n_fp_fus
= 0 # for now
228 # Integer Priority Picker 1: Adder + Subtractor
229 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
230 m
.submodules
.intpick1
= intpick1
233 regdecode
= RegDecode(self
.n_regs
)
234 m
.submodules
.regdecode
= regdecode
235 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
236 m
.submodules
.issueunit
= issueunit
238 # Shadow Matrix. currently n_int_fus shadows, to be used for
239 # write-after-write hazards
240 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
)
241 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
242 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
244 # Write-after-Write grid: selects one shadow to enable, based
245 # on which unit(s) have writes pending and the current instruction
246 # also needing to write
247 m
.submodules
.wawgrid
= wawgrid
= WaWGrid(n_int_fus
, n_int_fus
)
248 busy_prev
= Signal(n_int_fus
)
249 busy_curr
= Signal(n_int_fus
)
252 # ok start wiring things together...
253 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
254 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
258 # Issue Unit is where it starts. set up some in/outs for this module
260 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
261 regdecode
.dest_i
.eq(self
.int_dest_i
),
262 regdecode
.src1_i
.eq(self
.int_src1_i
),
263 regdecode
.src2_i
.eq(self
.int_src2_i
),
264 regdecode
.enable_i
.eq(self
.reg_enable_i
),
265 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
266 self
.issue_o
.eq(issueunit
.issue_o
)
268 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
270 # connect global rd/wr pending vector (for WaW detection)
271 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
272 # TODO: issueunit.f (FP)
274 # and int function issue / busy arrays, and dest/src1/src2
275 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
276 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
277 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
279 fn_issue_o
= issueunit
.i
.fn_issue_o
281 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
282 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
283 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
286 # connect fu-fu matrix
289 # Group Picker... done manually for now.
290 go_rd_o
= intpick1
.go_rd_o
291 go_wr_o
= intpick1
.go_wr_o
292 go_rd_i
= intfus
.go_rd_i
293 go_wr_i
= intfus
.go_wr_i
294 # NOTE: connect to the shadowed versions so that they can "die" (reset)
295 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_rst
[0:n_int_fus
]) # rd
296 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_rst
[0:n_int_fus
]) # wr
300 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
301 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
302 int_rd_o
= intfus
.readable_o
303 int_wr_o
= intfus
.writable_o
304 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
305 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
311 m
.d
.comb
+= shadows
.issue_i
.eq(fn_issue_o
)
312 # these are explained in ShadowMatrix docstring, and are to be
313 # connected to the FUReg and FUFU Matrices, to get them to reset
314 # NOTE: do NOT connect these to the Computation Units. The CUs need to
315 # do something slightly different (due to the revolving-door SRLatches)
316 m
.d
.comb
+= go_rd_rst
.eq(go_rd_o | shadows
.go_die_o
)
317 m
.d
.comb
+= go_wr_rst
.eq(go_wr_o | shadows
.go_die_o
)
319 # connect shadows / go_dies to Computation Units
320 m
.d
.comb
+= cu
.shadown_i
[0:n_int_fus
].eq(shadows
.shadown_o
[0:n_int_fus
])
321 m
.d
.comb
+= cu
.go_die_i
[0:n_int_fus
].eq(shadows
.go_die_o
[0:n_int_fus
])
323 # ok connect first n_int_fu shadows to busy lines, to create an
324 # instruction-order linked-list-like arrangement, using a bit-matrix
325 # (instead of e.g. a ring buffer).
328 # when written, the shadow can be cancelled (and was good)
329 m
.d
.comb
+= shadows
.s_good_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
331 # work out the current-activated busy unit (by recording the old one)
332 m
.d
.sync
+= busy_prev
.eq(cu
.busy_o
)
333 m
.d
.comb
+= busy_curr
.eq(~busy_prev
& cu
.busy_o
)
336 # Connect Register File(s)
338 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
339 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
340 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
341 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
343 # connect ALUs to regfule
344 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
345 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
346 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
348 # connect ALU Computation Units
349 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
350 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
351 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
357 yield from self
.intregs
358 yield from self
.fpregs
359 yield self
.int_store_i
360 yield self
.int_dest_i
361 yield self
.int_src1_i
362 yield self
.int_src2_i
364 #yield from self.int_src1
365 #yield from self.int_dest
366 #yield from self.int_src1
367 #yield from self.int_src2
368 #yield from self.fp_dest
369 #yield from self.fp_src1
370 #yield from self.fp_src2
381 def __init__(self
, rwidth
, nregs
):
383 self
.regs
= [0] * nregs
385 def op(self
, op
, src1
, src2
, dest
):
386 maxbits
= (1 << self
.rwidth
) - 1
387 src1
= self
.regs
[src1
]
388 src2
= self
.regs
[src2
]
396 val
= src1
>> (src2
& maxbits
)
398 self
.regs
[dest
] = val
400 def setval(self
, dest
, val
):
401 self
.regs
[dest
] = val
404 for i
, val
in enumerate(self
.regs
):
405 reg
= yield dut
.intregs
.regs
[i
].reg
406 okstr
= "OK" if reg
== val
else "!ok"
407 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
409 def check(self
, dut
):
410 for i
, val
in enumerate(self
.regs
):
411 reg
= yield dut
.intregs
.regs
[i
].reg
413 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
414 yield from self
.dump(dut
)
417 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
418 for i
in range(len(dut
.int_insn_i
)):
419 yield dut
.int_insn_i
[i
].eq(0)
420 yield dut
.int_dest_i
.eq(dest
)
421 yield dut
.int_src1_i
.eq(src1
)
422 yield dut
.int_src2_i
.eq(src2
)
423 yield dut
.int_insn_i
[op
].eq(1)
424 yield dut
.reg_enable_i
.eq(1)
425 alusim
.op(op
, src1
, src2
, dest
)
428 def print_reg(dut
, rnums
):
431 reg
= yield dut
.intregs
.regs
[rnum
].reg
432 rs
.append("%x" % reg
)
433 rnums
= map(str, rnums
)
434 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
437 def scoreboard_sim(dut
, alusim
):
439 yield dut
.int_store_i
.eq(0)
443 # set random values in the registers
444 for i
in range(1, dut
.n_regs
):
446 val
= randint(0, (1<<alusim
.rwidth
)-1)
447 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
448 alusim
.setval(i
, val
)
450 # create some instructions (some random, some regression tests)
454 src1
= randint(1, dut
.n_regs
-1)
455 src2
= randint(1, dut
.n_regs
-1)
457 dest
= randint(1, dut
.n_regs
-1)
459 if dest
not in [src1
, src2
]:
469 instrs
.append((src1
, src2
, dest
, op
))
472 instrs
.append((2, 3, 3, 0))
473 instrs
.append((5, 3, 3, 1))
476 instrs
.append((5, 6, 2, 1))
477 instrs
.append((2, 2, 4, 0))
478 #instrs.append((2, 2, 3, 1))
481 instrs
.append((2, 1, 2, 3))
484 instrs
.append((2, 6, 2, 1))
485 instrs
.append((2, 1, 2, 0))
488 instrs
.append((1, 2, 7, 2))
489 instrs
.append((7, 1, 5, 0))
490 instrs
.append((4, 4, 1, 1))
493 instrs
.append((5, 6, 2, 2))
494 instrs
.append((1, 1, 4, 1))
495 instrs
.append((6, 5, 3, 0))
498 # Write-after-Write Hazard
499 instrs
.append( (3, 6, 7, 2) )
500 instrs
.append( (4, 4, 7, 1) )
503 # self-read/write-after-write followed by Read-after-Write
504 instrs
.append((1, 1, 1, 1))
505 instrs
.append((1, 5, 3, 0))
508 # Read-after-Write followed by self-read-after-write
509 instrs
.append((5, 6, 1, 2))
510 instrs
.append((1, 1, 1, 1))
513 # self-read-write sandwich
514 instrs
.append((5, 6, 1, 2))
515 instrs
.append((1, 1, 1, 1))
516 instrs
.append((1, 5, 3, 0))
520 instrs
.append( (5, 2, 5, 2) )
521 instrs
.append( (2, 6, 3, 0) )
522 instrs
.append( (4, 2, 2, 1) )
524 # issue instruction(s), wait for issue to be free before proceeding
525 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
527 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
528 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
531 issue_o
= yield dut
.issue_o
533 for i
in range(len(dut
.int_insn_i
)):
534 yield dut
.int_insn_i
[i
].eq(0)
535 yield dut
.reg_enable_i
.eq(0)
538 #yield from print_reg(dut, [1,2,3])
540 #yield from print_reg(dut, [1,2,3])
542 # wait for all instructions to stop before checking
545 busy_o
= yield dut
.busy_o
552 yield from alusim
.check(dut
)
553 yield from alusim
.dump(dut
)
556 def explore_groups(dut
):
557 from nmigen
.hdl
.ir
import Fragment
558 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
560 fragment
= dut
.elaborate(platform
=None)
561 fr
= Fragment
.get(fragment
, platform
=None)
563 groups
= LHSGroupAnalyzer()(fragment
._statements
)
568 def test_scoreboard():
569 dut
= Scoreboard(16, 8)
570 alusim
= RegSim(16, 8)
571 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
572 with
open("test_scoreboard6600.il", "w") as f
:
575 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
576 vcd_name
='test_scoreboard6600.vcd')
579 if __name__
== '__main__':