b265bf976c1e6d7fc184df6865959000f7857c2d
1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
12 from scoreboard
.shadow
import ShadowMatrix
, WaWGrid
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
17 from nmutil
.latch
import SRLatch
19 from random
import randint
21 class CompUnits(Elaboratable
):
23 def __init__(self
, rwid
, n_units
):
26 * :rwid: bit width of register file(s) - both FP and INT
27 * :n_units: number of ALUs
29 self
.n_units
= n_units
32 self
.issue_i
= Signal(n_units
, reset_less
=True)
33 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
34 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
35 self
.shadown_i
= Signal(n_units
, reset_less
=True)
36 self
.go_die_i
= Signal(n_units
, reset_less
=True)
37 self
.busy_o
= Signal(n_units
, reset_less
=True)
38 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
39 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
41 self
.dest_o
= Signal(rwid
, reset_less
=True)
42 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
43 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
45 def elaborate(self
, platform
):
53 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
54 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
55 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
56 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
57 int_alus
= [comp1
, comp2
, comp3
, comp4
]
59 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
60 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
61 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
62 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
73 req_rel_l
.append(alu
.req_rel_o
)
74 rd_rel_l
.append(alu
.rd_rel_o
)
75 shadow_l
.append(alu
.shadown_i
)
76 godie_l
.append(alu
.go_die_i
)
77 go_wr_l
.append(alu
.go_wr_i
)
78 go_rd_l
.append(alu
.go_rd_i
)
79 issue_l
.append(alu
.issue_i
)
80 busy_l
.append(alu
.busy_o
)
81 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
82 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
83 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
84 m
.d
.comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
85 m
.d
.comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
86 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
87 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
88 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
90 # connect data register input/output
92 # merge (OR) all integer FU / ALU outputs to a single value
93 # bit of a hack: treereduce needs a list with an item named "dest_o"
94 dest_o
= treereduce(int_alus
)
95 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
97 for i
, alu
in enumerate(int_alus
):
98 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
99 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
104 class FunctionUnits(Elaboratable
):
106 def __init__(self
, n_regs
, n_int_alus
):
108 self
.n_int_alus
= n_int_alus
110 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
111 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
112 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
114 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
115 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
117 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
118 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
119 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
121 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
122 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
123 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
125 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
126 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
127 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
128 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
130 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
132 def elaborate(self
, platform
):
135 n_int_fus
= self
.n_int_alus
137 # Integer FU-FU Dep Matrix
138 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
139 m
.submodules
.intfudeps
= intfudeps
140 # Integer FU-Reg Dep Matrix
141 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
142 m
.submodules
.intregdeps
= intregdeps
144 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
145 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
147 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
148 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
150 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
151 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
152 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
154 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
155 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
156 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
157 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
158 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
160 # Connect function issue / arrays, and dest/src1/src2
161 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
162 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
163 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
165 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
166 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
167 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
169 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
170 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
171 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
176 class Scoreboard(Elaboratable
):
177 def __init__(self
, rwid
, n_regs
):
180 * :rwid: bit width of register file(s) - both FP and INT
181 * :n_regs: depth of register file(s) - number of FP and INT regs
187 self
.intregs
= RegFileArray(rwid
, n_regs
)
188 self
.fpregs
= RegFileArray(rwid
, n_regs
)
191 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
192 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
193 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
194 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
195 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
197 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
198 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
200 def elaborate(self
, platform
):
203 m
.submodules
.intregs
= self
.intregs
204 m
.submodules
.fpregs
= self
.fpregs
207 int_dest
= self
.intregs
.write_port("dest")
208 int_src1
= self
.intregs
.read_port("src1")
209 int_src2
= self
.intregs
.read_port("src2")
211 fp_dest
= self
.fpregs
.write_port("dest")
212 fp_src1
= self
.fpregs
.read_port("src1")
213 fp_src2
= self
.fpregs
.read_port("src2")
215 # Int ALUs and Comp Units
217 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
218 m
.d
.comb
+= cu
.shadown_i
.eq(-1)
219 m
.d
.comb
+= cu
.go_die_i
.eq(0)
222 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
224 # Count of number of FUs
225 n_int_fus
= n_int_alus
226 n_fp_fus
= 0 # for now
228 # Integer Priority Picker 1: Adder + Subtractor
229 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
230 m
.submodules
.intpick1
= intpick1
233 regdecode
= RegDecode(self
.n_regs
)
234 m
.submodules
.regdecode
= regdecode
235 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
236 m
.submodules
.issueunit
= issueunit
238 # Shadow Matrix. currently n_int_fus shadows, to be used for
239 # write-after-write hazards
240 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
)
241 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
242 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
244 # Write-after-Write grid: selects one shadow to enable, based
245 # on which unit(s) have writes pending and the current instruction
246 # also needing to write
247 m
.submodules
.wawgrid
= wawgrid
= WaWGrid(n_int_fus
, n_int_fus
)
248 busy_prev
= Signal(n_int_fus
)
249 busy_curr
= Signal(n_int_fus
)
252 # ok start wiring things together...
253 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
254 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
258 # Issue Unit is where it starts. set up some in/outs for this module
260 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
261 regdecode
.dest_i
.eq(self
.int_dest_i
),
262 regdecode
.src1_i
.eq(self
.int_src1_i
),
263 regdecode
.src2_i
.eq(self
.int_src2_i
),
264 regdecode
.enable_i
.eq(self
.reg_enable_i
),
265 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
266 self
.issue_o
.eq(issueunit
.issue_o
)
268 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
270 # connect global rd/wr pending vector (for WaW detection)
271 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
272 # TODO: issueunit.f (FP)
274 # and int function issue / busy arrays, and dest/src1/src2
275 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
276 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
277 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
279 fn_issue_o
= issueunit
.i
.fn_issue_o
281 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
282 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
283 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
286 # connect fu-fu matrix
289 # Group Picker... done manually for now.
290 go_rd_o
= intpick1
.go_rd_o
291 go_wr_o
= intpick1
.go_wr_o
292 go_rd_i
= intfus
.go_rd_i
293 go_wr_i
= intfus
.go_wr_i
294 # NOTE: connect to the shadowed versions so that they can "die" (reset)
295 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_rst
[0:n_int_fus
]) # rd
296 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_rst
[0:n_int_fus
]) # wr
300 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
301 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
302 int_rd_o
= intfus
.readable_o
303 int_wr_o
= intfus
.writable_o
304 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
305 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
311 m
.d
.comb
+= shadows
.issue_i
.eq(fn_issue_o
)
312 # these are explained in ShadowMatrix docstring, and are to be
313 # connected to the FUReg and FUFU Matrices, to get them to reset
314 # NOTE: do NOT connect these to the Computation Units. The CUs need to
315 # do something slightly different (due to the revolving-door SRLatches)
316 m
.d
.comb
+= go_rd_rst
.eq(go_rd_o | shadows
.go_die_o
)
317 m
.d
.comb
+= go_wr_rst
.eq(go_wr_o | shadows
.go_die_o
)
319 # connect shadows / go_dies to Computation Units
320 m
.d
.comb
+= cu
.shadown_i
[0:n_int_fus
].eq(shadows
.shadown_o
[0:n_int_fus
])
321 m
.d
.comb
+= cu
.go_die_i
[0:n_int_fus
].eq(shadows
.go_die_o
[0:n_int_fus
])
323 # ok connect first n_int_fu shadows to busy lines, to create an
324 # instruction-order linked-list-like arrangement, using a bit-matrix
325 # (instead of e.g. a ring buffer).
328 # when written, the shadow can be cancelled (and was good)
329 m
.d
.comb
+= shadows
.s_good_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
331 # work out the current-activated busy unit (by recording the old one)
332 with m
.If(self
.issue_o
): # only update busy_prev if instruction issued
333 m
.d
.sync
+= busy_prev
.eq(cu
.busy_o
)
334 m
.d
.comb
+= busy_curr
.eq(~busy_prev
& cu
.busy_o
)
337 # Connect Register File(s)
339 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
340 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
341 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
342 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
344 # connect ALUs to regfule
345 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
346 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
347 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
349 # connect ALU Computation Units
350 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
351 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
352 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
358 yield from self
.intregs
359 yield from self
.fpregs
360 yield self
.int_store_i
361 yield self
.int_dest_i
362 yield self
.int_src1_i
363 yield self
.int_src2_i
365 #yield from self.int_src1
366 #yield from self.int_dest
367 #yield from self.int_src1
368 #yield from self.int_src2
369 #yield from self.fp_dest
370 #yield from self.fp_src1
371 #yield from self.fp_src2
382 def __init__(self
, rwidth
, nregs
):
384 self
.regs
= [0] * nregs
386 def op(self
, op
, src1
, src2
, dest
):
387 maxbits
= (1 << self
.rwidth
) - 1
388 src1
= self
.regs
[src1
]
389 src2
= self
.regs
[src2
]
397 val
= src1
>> (src2
& maxbits
)
399 self
.regs
[dest
] = val
401 def setval(self
, dest
, val
):
402 self
.regs
[dest
] = val
405 for i
, val
in enumerate(self
.regs
):
406 reg
= yield dut
.intregs
.regs
[i
].reg
407 okstr
= "OK" if reg
== val
else "!ok"
408 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
410 def check(self
, dut
):
411 for i
, val
in enumerate(self
.regs
):
412 reg
= yield dut
.intregs
.regs
[i
].reg
414 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
415 yield from self
.dump(dut
)
418 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
419 for i
in range(len(dut
.int_insn_i
)):
420 yield dut
.int_insn_i
[i
].eq(0)
421 yield dut
.int_dest_i
.eq(dest
)
422 yield dut
.int_src1_i
.eq(src1
)
423 yield dut
.int_src2_i
.eq(src2
)
424 yield dut
.int_insn_i
[op
].eq(1)
425 yield dut
.reg_enable_i
.eq(1)
426 alusim
.op(op
, src1
, src2
, dest
)
429 def print_reg(dut
, rnums
):
432 reg
= yield dut
.intregs
.regs
[rnum
].reg
433 rs
.append("%x" % reg
)
434 rnums
= map(str, rnums
)
435 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
438 def scoreboard_sim(dut
, alusim
):
440 yield dut
.int_store_i
.eq(0)
444 # set random values in the registers
445 for i
in range(1, dut
.n_regs
):
447 val
= randint(0, (1<<alusim
.rwidth
)-1)
448 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
449 alusim
.setval(i
, val
)
451 # create some instructions (some random, some regression tests)
455 src1
= randint(1, dut
.n_regs
-1)
456 src2
= randint(1, dut
.n_regs
-1)
458 dest
= randint(1, dut
.n_regs
-1)
460 if dest
not in [src1
, src2
]:
470 instrs
.append((src1
, src2
, dest
, op
))
473 instrs
.append((2, 3, 3, 0))
474 instrs
.append((5, 3, 3, 1))
477 instrs
.append((5, 6, 2, 1))
478 instrs
.append((2, 2, 4, 0))
479 #instrs.append((2, 2, 3, 1))
482 instrs
.append((2, 1, 2, 3))
485 instrs
.append((2, 6, 2, 1))
486 instrs
.append((2, 1, 2, 0))
489 instrs
.append((1, 2, 7, 2))
490 instrs
.append((7, 1, 5, 0))
491 instrs
.append((4, 4, 1, 1))
494 instrs
.append((5, 6, 2, 2))
495 instrs
.append((1, 1, 4, 1))
496 instrs
.append((6, 5, 3, 0))
499 # Write-after-Write Hazard
500 instrs
.append( (3, 6, 7, 2) )
501 instrs
.append( (4, 4, 7, 1) )
504 # self-read/write-after-write followed by Read-after-Write
505 instrs
.append((1, 1, 1, 1))
506 instrs
.append((1, 5, 3, 0))
509 # Read-after-Write followed by self-read-after-write
510 instrs
.append((5, 6, 1, 2))
511 instrs
.append((1, 1, 1, 1))
514 # self-read-write sandwich
515 instrs
.append((5, 6, 1, 2))
516 instrs
.append((1, 1, 1, 1))
517 instrs
.append((1, 5, 3, 0))
521 instrs
.append( (5, 2, 5, 2) )
522 instrs
.append( (2, 6, 3, 0) )
523 instrs
.append( (4, 2, 2, 1) )
525 # issue instruction(s), wait for issue to be free before proceeding
526 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
528 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
529 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
532 issue_o
= yield dut
.issue_o
534 for i
in range(len(dut
.int_insn_i
)):
535 yield dut
.int_insn_i
[i
].eq(0)
536 yield dut
.reg_enable_i
.eq(0)
539 #yield from print_reg(dut, [1,2,3])
541 #yield from print_reg(dut, [1,2,3])
543 # wait for all instructions to stop before checking
546 busy_o
= yield dut
.busy_o
553 yield from alusim
.check(dut
)
554 yield from alusim
.dump(dut
)
557 def explore_groups(dut
):
558 from nmigen
.hdl
.ir
import Fragment
559 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
561 fragment
= dut
.elaborate(platform
=None)
562 fr
= Fragment
.get(fragment
, platform
=None)
564 groups
= LHSGroupAnalyzer()(fragment
._statements
)
569 def test_scoreboard():
570 dut
= Scoreboard(16, 8)
571 alusim
= RegSim(16, 8)
572 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
573 with
open("test_scoreboard6600.il", "w") as f
:
576 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
577 vcd_name
='test_scoreboard6600.vcd')
580 if __name__
== '__main__':