1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
20 class CompUnits(Elaboratable
):
22 def __init__(self
, rwid
, n_units
):
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_units: number of ALUs
28 self
.n_units
= n_units
31 self
.issue_i
= Signal(n_units
, reset_less
=True)
32 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
33 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
34 self
.busy_o
= Signal(n_units
, reset_less
=True)
35 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
36 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
38 self
.dest_o
= Signal(rwid
, reset_less
=True)
39 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
40 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
42 def elaborate(self
, platform
):
50 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
51 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
52 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
53 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
54 int_alus
= [comp1
, comp2
, comp3
, comp4
]
56 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
57 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
58 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
59 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
68 req_rel_l
.append(alu
.req_rel_o
)
69 rd_rel_l
.append(alu
.rd_rel_o
)
70 go_wr_l
.append(alu
.go_wr_i
)
71 go_rd_l
.append(alu
.go_rd_i
)
72 issue_l
.append(alu
.issue_i
)
73 busy_l
.append(alu
.busy_o
)
74 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
75 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
76 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
77 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
78 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
79 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
81 # connect data register input/output
83 # merge (OR) all integer FU / ALU outputs to a single value
84 # bit of a hack: treereduce needs a list with an item named "dest_o"
85 dest_o
= treereduce(int_alus
)
86 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
88 for i
, alu
in enumerate(int_alus
):
89 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
90 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
95 class FunctionUnits(Elaboratable
):
97 def __init__(self
, n_regs
, n_int_alus
):
99 self
.n_int_alus
= n_int_alus
101 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
102 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
103 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
105 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
106 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
108 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
109 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
110 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
112 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
113 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
114 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
116 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
117 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
118 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
119 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
121 def elaborate(self
, platform
):
124 n_int_fus
= self
.n_int_alus
126 # Integer FU-FU Dep Matrix
127 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
128 m
.submodules
.intfudeps
= intfudeps
129 # Integer FU-Reg Dep Matrix
130 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
131 m
.submodules
.intregdeps
= intregdeps
133 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
134 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
136 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
137 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
139 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
140 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
142 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
143 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
144 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
145 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
146 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
148 # Connect function issue / arrays, and dest/src1/src2
149 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
150 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
151 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
153 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
154 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
155 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
157 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
158 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
159 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
164 class Scoreboard(Elaboratable
):
165 def __init__(self
, rwid
, n_regs
):
168 * :rwid: bit width of register file(s) - both FP and INT
169 * :n_regs: depth of register file(s) - number of FP and INT regs
175 self
.intregs
= RegFileArray(rwid
, n_regs
)
176 self
.fpregs
= RegFileArray(rwid
, n_regs
)
179 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
180 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
181 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
182 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
183 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
185 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
186 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
188 def elaborate(self
, platform
):
191 m
.submodules
.intregs
= self
.intregs
192 m
.submodules
.fpregs
= self
.fpregs
195 int_dest
= self
.intregs
.write_port("dest")
196 int_src1
= self
.intregs
.read_port("src1")
197 int_src2
= self
.intregs
.read_port("src2")
199 fp_dest
= self
.fpregs
.write_port("dest")
200 fp_src1
= self
.fpregs
.read_port("src1")
201 fp_src2
= self
.fpregs
.read_port("src2")
203 # Int ALUs and Comp Units
205 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
208 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
210 # Count of number of FUs
211 n_int_fus
= n_int_alus
212 n_fp_fus
= 0 # for now
214 # Integer Priority Picker 1: Adder + Subtractor
215 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
216 m
.submodules
.intpick1
= intpick1
219 regdecode
= RegDecode(self
.n_regs
)
220 m
.submodules
.regdecode
= regdecode
221 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
222 m
.submodules
.issueunit
= issueunit
225 # ok start wiring things together...
226 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
227 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
231 # Issue Unit is where it starts. set up some in/outs for this module
233 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
234 regdecode
.dest_i
.eq(self
.int_dest_i
),
235 regdecode
.src1_i
.eq(self
.int_src1_i
),
236 regdecode
.src2_i
.eq(self
.int_src2_i
),
237 regdecode
.enable_i
.eq(self
.reg_enable_i
),
238 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
239 self
.issue_o
.eq(issueunit
.issue_o
)
241 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
243 # connect global rd/wr pending vector (for WaW detection)
244 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
245 # TODO: issueunit.f (FP)
247 # and int function issue / busy arrays, and dest/src1/src2
248 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
249 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
250 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
252 fn_issue_o
= issueunit
.i
.fn_issue_o
254 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
255 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
256 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
259 # connect fu-fu matrix
262 # Group Picker... done manually for now. TODO: cat array of pick sigs
263 go_rd_o
= intpick1
.go_rd_o
264 go_wr_o
= intpick1
.go_wr_o
265 go_rd_i
= intfus
.go_rd_i
266 go_wr_i
= intfus
.go_wr_i
267 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
268 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
272 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
273 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
274 int_rd_o
= intfus
.readable_o
275 int_wr_o
= intfus
.writable_o
276 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
277 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
280 # Connect Register File(s)
282 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
283 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
284 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
285 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
287 # connect ALUs to regfule
288 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
289 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
290 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
292 # connect ALU Computation Units
293 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
294 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
295 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
301 yield from self
.intregs
302 yield from self
.fpregs
303 yield self
.int_store_i
304 yield self
.int_dest_i
305 yield self
.int_src1_i
306 yield self
.int_src2_i
308 #yield from self.int_src1
309 #yield from self.int_dest
310 #yield from self.int_src1
311 #yield from self.int_src2
312 #yield from self.fp_dest
313 #yield from self.fp_src1
314 #yield from self.fp_src2
325 def __init__(self
, rwidth
, nregs
):
327 self
.regs
= [0] * nregs
329 def op(self
, op
, src1
, src2
, dest
):
330 maxbits
= (1 << self
.rwidth
) - 1
331 src1
= self
.regs
[src1
]
332 src2
= self
.regs
[src2
]
340 val
= src1
>> (src2
& maxbits
)
342 self
.regs
[dest
] = val
344 def setval(self
, dest
, val
):
345 self
.regs
[dest
] = val
348 for i
, val
in enumerate(self
.regs
):
349 reg
= yield dut
.intregs
.regs
[i
].reg
350 okstr
= "OK" if reg
== val
else "!ok"
351 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
353 def check(self
, dut
):
354 for i
, val
in enumerate(self
.regs
):
355 reg
= yield dut
.intregs
.regs
[i
].reg
357 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
358 yield from self
.dump(dut
)
361 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
362 for i
in range(len(dut
.int_insn_i
)):
363 yield dut
.int_insn_i
[i
].eq(0)
364 yield dut
.int_dest_i
.eq(dest
)
365 yield dut
.int_src1_i
.eq(src1
)
366 yield dut
.int_src2_i
.eq(src2
)
367 yield dut
.int_insn_i
[op
].eq(1)
368 yield dut
.reg_enable_i
.eq(1)
369 alusim
.op(op
, src1
, src2
, dest
)
372 def print_reg(dut
, rnums
):
375 reg
= yield dut
.intregs
.regs
[rnum
].reg
376 rs
.append("%x" % reg
)
377 rnums
= map(str, rnums
)
378 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
381 def scoreboard_sim(dut
, alusim
):
383 yield dut
.int_store_i
.eq(0)
385 for i
in range(1000):
387 # set random values in the registers
388 for i
in range(1, dut
.n_regs
):
389 val
= randint(0, (1<<alusim
.rwidth
)-1) # 31+i*3
390 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
391 alusim
.setval(i
, val
)
393 # create some instructions (some random, some regression tests)
397 src1
= randint(1, dut
.n_regs
-1)
398 src2
= randint(1, dut
.n_regs
-1)
400 dest
= randint(1, dut
.n_regs
-1)
402 if dest
not in [src1
, src2
]:
412 instrs
.append((src1
, src2
, dest
, op
))
415 instrs
.append((2, 3, 3, 0))
416 instrs
.append((5, 3, 3, 1))
419 instrs
.append((5, 6, 2, 1))
420 instrs
.append((2, 2, 4, 0))
421 #instrs.append((2, 2, 3, 1))
424 instrs
.append((2, 1, 2, 3))
427 instrs
.append((2, 6, 2, 1))
428 instrs
.append((2, 1, 2, 0))
431 instrs
.append((1, 2, 7, 2))
432 instrs
.append((7, 1, 5, 0))
433 instrs
.append((4, 4, 1, 1))
436 instrs
.append((5, 6, 2, 2))
437 instrs
.append((1, 1, 4, 1))
438 instrs
.append((6, 5, 3, 0))
441 # Write-after-Write Hazard
442 instrs
.append( (3, 6, 7, 2) )
443 instrs
.append( (4, 4, 7, 1) )
446 # self-read/write-after-write followed by Read-after-Write
447 instrs
.append((1, 1, 1, 1))
448 instrs
.append((1, 5, 3, 0))
451 # Read-after-Write followed by self-read-after-write
452 instrs
.append((5, 6, 1, 2))
453 instrs
.append((1, 1, 1, 1))
456 # self-read-write sandwich
457 instrs
.append((5, 6, 1, 2))
458 instrs
.append((1, 1, 1, 1))
459 instrs
.append((1, 5, 3, 0))
463 instrs
.append( (5, 2, 5, 2) )
464 instrs
.append( (2, 6, 3, 0) )
465 instrs
.append( (4, 2, 2, 1) )
467 # issue instruction(s), wait for issue to be free before proceeding
468 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
470 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
471 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
474 issue_o
= yield dut
.issue_o
476 for i
in range(len(dut
.int_insn_i
)):
477 yield dut
.int_insn_i
[i
].eq(0)
478 yield dut
.reg_enable_i
.eq(0)
481 #yield from print_reg(dut, [1,2,3])
483 #yield from print_reg(dut, [1,2,3])
485 # wait for all instructions to stop before checking
488 busy_o
= yield dut
.busy_o
495 yield from alusim
.check(dut
)
496 yield from alusim
.dump(dut
)
499 def explore_groups(dut
):
500 from nmigen
.hdl
.ir
import Fragment
501 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
503 fragment
= dut
.elaborate(platform
=None)
504 fr
= Fragment
.get(fragment
, platform
=None)
506 groups
= LHSGroupAnalyzer()(fragment
._statements
)
511 def test_scoreboard():
512 dut
= Scoreboard(16, 8)
513 alusim
= RegSim(16, 8)
514 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
515 with
open("test_scoreboard6600.il", "w") as f
:
518 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
519 vcd_name
='test_scoreboard6600.vcd')
522 if __name__
== '__main__':