1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
12 from scoreboard
.shadow
import ShadowMatrix
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
17 from nmutil
.latch
import SRLatch
19 from random
import randint
21 class CompUnits(Elaboratable
):
23 def __init__(self
, rwid
, n_units
):
26 * :rwid: bit width of register file(s) - both FP and INT
27 * :n_units: number of ALUs
29 self
.n_units
= n_units
32 self
.issue_i
= Signal(n_units
, reset_less
=True)
33 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
34 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
35 self
.shadown_i
= Signal(n_units
, reset_less
=True)
36 self
.go_die_i
= Signal(n_units
, reset_less
=True)
37 self
.busy_o
= Signal(n_units
, reset_less
=True)
38 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
39 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
41 self
.dest_o
= Signal(rwid
, reset_less
=True)
42 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
43 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
45 def elaborate(self
, platform
):
53 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
54 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
55 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
56 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
57 int_alus
= [comp1
, comp2
, comp3
, comp4
]
59 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
60 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
61 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
62 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
73 req_rel_l
.append(alu
.req_rel_o
)
74 rd_rel_l
.append(alu
.rd_rel_o
)
75 shadow_l
.append(alu
.shadown_i
)
76 godie_l
.append(alu
.go_die_i
)
77 go_wr_l
.append(alu
.go_wr_i
)
78 go_rd_l
.append(alu
.go_rd_i
)
79 issue_l
.append(alu
.issue_i
)
80 busy_l
.append(alu
.busy_o
)
81 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
82 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
83 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
84 m
.d
.comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
85 m
.d
.comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
86 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
87 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
88 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
90 # connect data register input/output
92 # merge (OR) all integer FU / ALU outputs to a single value
93 # bit of a hack: treereduce needs a list with an item named "dest_o"
94 dest_o
= treereduce(int_alus
)
95 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
97 for i
, alu
in enumerate(int_alus
):
98 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
99 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
104 class FunctionUnits(Elaboratable
):
106 def __init__(self
, n_regs
, n_int_alus
):
108 self
.n_int_alus
= n_int_alus
110 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
111 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
112 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
114 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
115 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
117 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
118 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
119 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
121 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
122 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
123 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
125 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
126 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
127 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
128 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
130 def elaborate(self
, platform
):
133 n_int_fus
= self
.n_int_alus
135 # Integer FU-FU Dep Matrix
136 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
137 m
.submodules
.intfudeps
= intfudeps
138 # Integer FU-Reg Dep Matrix
139 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
140 m
.submodules
.intregdeps
= intregdeps
142 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
143 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
145 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
146 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
148 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
149 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
151 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
152 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
153 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
154 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
155 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
157 # Connect function issue / arrays, and dest/src1/src2
158 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
159 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
160 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
162 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
163 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
164 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
166 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
167 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
168 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
173 class Scoreboard(Elaboratable
):
174 def __init__(self
, rwid
, n_regs
):
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :n_regs: depth of register file(s) - number of FP and INT regs
184 self
.intregs
= RegFileArray(rwid
, n_regs
)
185 self
.fpregs
= RegFileArray(rwid
, n_regs
)
188 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
189 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
190 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
191 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
192 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
194 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
195 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
197 def elaborate(self
, platform
):
200 m
.submodules
.intregs
= self
.intregs
201 m
.submodules
.fpregs
= self
.fpregs
204 int_dest
= self
.intregs
.write_port("dest")
205 int_src1
= self
.intregs
.read_port("src1")
206 int_src2
= self
.intregs
.read_port("src2")
208 fp_dest
= self
.fpregs
.write_port("dest")
209 fp_src1
= self
.fpregs
.read_port("src1")
210 fp_src2
= self
.fpregs
.read_port("src2")
212 # Int ALUs and Comp Units
214 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
215 m
.d
.comb
+= cu
.shadown_i
.eq(-1)
216 m
.d
.comb
+= cu
.go_die_i
.eq(0)
219 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
221 # Count of number of FUs
222 n_int_fus
= n_int_alus
223 n_fp_fus
= 0 # for now
225 # Integer Priority Picker 1: Adder + Subtractor
226 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
227 m
.submodules
.intpick1
= intpick1
230 regdecode
= RegDecode(self
.n_regs
)
231 m
.submodules
.regdecode
= regdecode
232 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
233 m
.submodules
.issueunit
= issueunit
235 # Shadow Matrix. currently n_int_fus shadows, to be used for
236 # write-after-write hazards
237 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, n_int_fus
)
238 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
239 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
242 # ok start wiring things together...
243 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
244 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
248 # Issue Unit is where it starts. set up some in/outs for this module
250 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
251 regdecode
.dest_i
.eq(self
.int_dest_i
),
252 regdecode
.src1_i
.eq(self
.int_src1_i
),
253 regdecode
.src2_i
.eq(self
.int_src2_i
),
254 regdecode
.enable_i
.eq(self
.reg_enable_i
),
255 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
256 self
.issue_o
.eq(issueunit
.issue_o
)
258 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
260 # connect global rd/wr pending vector (for WaW detection)
261 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
262 # TODO: issueunit.f (FP)
264 # and int function issue / busy arrays, and dest/src1/src2
265 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
266 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
267 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
269 fn_issue_o
= issueunit
.i
.fn_issue_o
271 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
272 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
273 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
276 # connect fu-fu matrix
279 # Group Picker... done manually for now.
280 go_rd_o
= intpick1
.go_rd_o
281 go_wr_o
= intpick1
.go_wr_o
282 go_rd_i
= intfus
.go_rd_i
283 go_wr_i
= intfus
.go_wr_i
284 # NOTE: connect to the shadowed versions so that they can "die" (reset)
285 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_rst
[0:n_int_fus
]) # rd
286 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_rst
[0:n_int_fus
]) # wr
290 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
291 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
292 int_rd_o
= intfus
.readable_o
293 int_wr_o
= intfus
.writable_o
294 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
295 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
301 m
.d
.comb
+= shadows
.issue_i
.eq(fn_issue_o
)
302 # these are explained in ShadowMatrix docstring, and are to be
303 # connected to the FUReg and FUFU Matrices, to get them to reset
304 # NOTE: do NOT connect these to the Computation Units. The CUs need to
305 # do something slightly different (due to the revolving-door SRLatches)
306 m
.d
.comb
+= go_rd_rst
.eq(go_rd_o | shadows
.go_die_o
)
307 m
.d
.comb
+= go_wr_rst
.eq(go_wr_o | shadows
.go_die_o
)
309 # connect shadows / go_dies to Computation Units
310 m
.d
.comb
+= cu
.shadown_i
[0:n_int_fus
].eq(shadows
.shadown_o
[0:n_int_fus
])
311 m
.d
.comb
+= cu
.go_die_i
[0:n_int_fus
].eq(shadows
.go_die_o
[0:n_int_fus
])
313 # ok connect first n_int_fu shadows to busy lines, to create an
314 # instruction-order linked-list-like arrangement, using a bit-matrix
315 # (instead of e.g. a ring buffer).
319 # Connect Register File(s)
321 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
322 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
323 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
324 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
326 # connect ALUs to regfule
327 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
328 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
329 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
331 # connect ALU Computation Units
332 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
333 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
334 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
340 yield from self
.intregs
341 yield from self
.fpregs
342 yield self
.int_store_i
343 yield self
.int_dest_i
344 yield self
.int_src1_i
345 yield self
.int_src2_i
347 #yield from self.int_src1
348 #yield from self.int_dest
349 #yield from self.int_src1
350 #yield from self.int_src2
351 #yield from self.fp_dest
352 #yield from self.fp_src1
353 #yield from self.fp_src2
364 def __init__(self
, rwidth
, nregs
):
366 self
.regs
= [0] * nregs
368 def op(self
, op
, src1
, src2
, dest
):
369 maxbits
= (1 << self
.rwidth
) - 1
370 src1
= self
.regs
[src1
]
371 src2
= self
.regs
[src2
]
379 val
= src1
>> (src2
& maxbits
)
381 self
.regs
[dest
] = val
383 def setval(self
, dest
, val
):
384 self
.regs
[dest
] = val
387 for i
, val
in enumerate(self
.regs
):
388 reg
= yield dut
.intregs
.regs
[i
].reg
389 okstr
= "OK" if reg
== val
else "!ok"
390 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
392 def check(self
, dut
):
393 for i
, val
in enumerate(self
.regs
):
394 reg
= yield dut
.intregs
.regs
[i
].reg
396 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
397 yield from self
.dump(dut
)
400 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
401 for i
in range(len(dut
.int_insn_i
)):
402 yield dut
.int_insn_i
[i
].eq(0)
403 yield dut
.int_dest_i
.eq(dest
)
404 yield dut
.int_src1_i
.eq(src1
)
405 yield dut
.int_src2_i
.eq(src2
)
406 yield dut
.int_insn_i
[op
].eq(1)
407 yield dut
.reg_enable_i
.eq(1)
408 alusim
.op(op
, src1
, src2
, dest
)
411 def print_reg(dut
, rnums
):
414 reg
= yield dut
.intregs
.regs
[rnum
].reg
415 rs
.append("%x" % reg
)
416 rnums
= map(str, rnums
)
417 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
420 def scoreboard_sim(dut
, alusim
):
422 yield dut
.int_store_i
.eq(0)
426 # set random values in the registers
427 for i
in range(1, dut
.n_regs
):
429 val
= randint(0, (1<<alusim
.rwidth
)-1)
430 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
431 alusim
.setval(i
, val
)
433 # create some instructions (some random, some regression tests)
437 src1
= randint(1, dut
.n_regs
-1)
438 src2
= randint(1, dut
.n_regs
-1)
440 dest
= randint(1, dut
.n_regs
-1)
442 if dest
not in [src1
, src2
]:
452 instrs
.append((src1
, src2
, dest
, op
))
455 instrs
.append((2, 3, 3, 0))
456 instrs
.append((5, 3, 3, 1))
459 instrs
.append((5, 6, 2, 1))
460 instrs
.append((2, 2, 4, 0))
461 #instrs.append((2, 2, 3, 1))
464 instrs
.append((2, 1, 2, 3))
467 instrs
.append((2, 6, 2, 1))
468 instrs
.append((2, 1, 2, 0))
471 instrs
.append((1, 2, 7, 2))
472 instrs
.append((7, 1, 5, 0))
473 instrs
.append((4, 4, 1, 1))
476 instrs
.append((5, 6, 2, 2))
477 instrs
.append((1, 1, 4, 1))
478 instrs
.append((6, 5, 3, 0))
481 # Write-after-Write Hazard
482 instrs
.append( (3, 6, 7, 2) )
483 instrs
.append( (4, 4, 7, 1) )
486 # self-read/write-after-write followed by Read-after-Write
487 instrs
.append((1, 1, 1, 1))
488 instrs
.append((1, 5, 3, 0))
491 # Read-after-Write followed by self-read-after-write
492 instrs
.append((5, 6, 1, 2))
493 instrs
.append((1, 1, 1, 1))
496 # self-read-write sandwich
497 instrs
.append((5, 6, 1, 2))
498 instrs
.append((1, 1, 1, 1))
499 instrs
.append((1, 5, 3, 0))
503 instrs
.append( (5, 2, 5, 2) )
504 instrs
.append( (2, 6, 3, 0) )
505 instrs
.append( (4, 2, 2, 1) )
507 # issue instruction(s), wait for issue to be free before proceeding
508 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
510 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
511 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
514 issue_o
= yield dut
.issue_o
516 for i
in range(len(dut
.int_insn_i
)):
517 yield dut
.int_insn_i
[i
].eq(0)
518 yield dut
.reg_enable_i
.eq(0)
521 #yield from print_reg(dut, [1,2,3])
523 #yield from print_reg(dut, [1,2,3])
525 # wait for all instructions to stop before checking
528 busy_o
= yield dut
.busy_o
535 yield from alusim
.check(dut
)
536 yield from alusim
.dump(dut
)
539 def explore_groups(dut
):
540 from nmigen
.hdl
.ir
import Fragment
541 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
543 fragment
= dut
.elaborate(platform
=None)
544 fr
= Fragment
.get(fragment
, platform
=None)
546 groups
= LHSGroupAnalyzer()(fragment
._statements
)
551 def test_scoreboard():
552 dut
= Scoreboard(16, 8)
553 alusim
= RegSim(16, 8)
554 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
555 with
open("test_scoreboard6600.il", "w") as f
:
558 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
559 vcd_name
='test_scoreboard6600.vcd')
562 if __name__
== '__main__':