1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
20 class CompUnits(Elaboratable
):
22 def __init__(self
, rwid
, n_units
):
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_units: number of ALUs
28 self
.n_units
= n_units
31 self
.issue_i
= Signal(n_units
, reset_less
=True)
32 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
33 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
34 self
.busy_o
= Signal(n_units
, reset_less
=True)
35 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
36 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
38 self
.dest_o
= Signal(rwid
, reset_less
=True)
39 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
40 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
42 def elaborate(self
, platform
):
48 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 1, add
)
49 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 1, sub
)
50 int_alus
= [comp1
, comp2
]
52 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0)) # temporary/experiment: op=add
53 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1)) # temporary/experiment: op=sub
62 req_rel_l
.append(alu
.req_rel_o
)
63 rd_rel_l
.append(alu
.rd_rel_o
)
64 go_wr_l
.append(alu
.go_wr_i
)
65 go_rd_l
.append(alu
.go_rd_i
)
66 issue_l
.append(alu
.issue_i
)
67 busy_l
.append(alu
.busy_o
)
68 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
69 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
70 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
71 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
72 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
73 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
75 # connect data register input/output
77 # merge (OR) all integer FU / ALU outputs to a single value
78 # bit of a hack: treereduce needs a list with an item named "dest_o"
79 dest_o
= treereduce(int_alus
)
80 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
82 for i
, alu
in enumerate(int_alus
):
83 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
84 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
89 class FunctionUnits(Elaboratable
):
91 def __init__(self
, n_regs
, n_int_alus
):
93 self
.n_int_alus
= n_int_alus
95 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
96 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
97 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
99 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
100 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
102 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
103 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
104 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
106 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
107 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
108 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
110 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
111 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
112 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
113 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
115 def elaborate(self
, platform
):
118 n_int_fus
= self
.n_int_alus
120 # Integer FU-FU Dep Matrix
121 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
122 m
.submodules
.intfudeps
= intfudeps
123 # Integer FU-Reg Dep Matrix
124 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
125 m
.submodules
.intregdeps
= intregdeps
127 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
128 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
130 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
131 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
133 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
134 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
136 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
137 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
138 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
139 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
140 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
142 # Connect function issue / arrays, and dest/src1/src2
143 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
144 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
145 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
147 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
148 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
149 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
151 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
152 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
153 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
158 class Scoreboard(Elaboratable
):
159 def __init__(self
, rwid
, n_regs
):
162 * :rwid: bit width of register file(s) - both FP and INT
163 * :n_regs: depth of register file(s) - number of FP and INT regs
169 self
.intregs
= RegFileArray(rwid
, n_regs
)
170 self
.fpregs
= RegFileArray(rwid
, n_regs
)
173 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
174 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
175 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
176 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
177 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
179 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
181 def elaborate(self
, platform
):
184 m
.submodules
.intregs
= self
.intregs
185 m
.submodules
.fpregs
= self
.fpregs
188 int_dest
= self
.intregs
.write_port("dest")
189 int_src1
= self
.intregs
.read_port("src1")
190 int_src2
= self
.intregs
.read_port("src2")
192 fp_dest
= self
.fpregs
.write_port("dest")
193 fp_src1
= self
.fpregs
.read_port("src1")
194 fp_src2
= self
.fpregs
.read_port("src2")
196 # Int ALUs and Comp Units
198 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
201 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
203 # Count of number of FUs
204 n_int_fus
= n_int_alus
205 n_fp_fus
= 0 # for now
207 # Integer Priority Picker 1: Adder + Subtractor
208 intpick1
= GroupPicker(2) # picks between add and sub
209 m
.submodules
.intpick1
= intpick1
212 regdecode
= RegDecode(self
.n_regs
)
213 m
.submodules
.regdecode
= regdecode
214 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
215 m
.submodules
.issueunit
= issueunit
218 # ok start wiring things together...
219 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
220 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
224 # Issue Unit is where it starts. set up some in/outs for this module
226 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
227 regdecode
.dest_i
.eq(self
.int_dest_i
),
228 regdecode
.src1_i
.eq(self
.int_src1_i
),
229 regdecode
.src2_i
.eq(self
.int_src2_i
),
230 regdecode
.enable_i
.eq(self
.reg_enable_i
),
231 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
232 self
.issue_o
.eq(issueunit
.issue_o
)
234 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
236 # connect global rd/wr pending vectors
237 m
.d
.comb
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
238 # TODO: issueunit.f (FP)
240 # and int function issue / busy arrays, and dest/src1/src2
241 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
242 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
243 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
245 fn_issue_o
= issueunit
.i
.fn_issue_o
247 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
248 # XXX sync, so as to stop a simulation infinite loop
249 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
252 # connect fu-fu matrix
255 # Group Picker... done manually for now. TODO: cat array of pick sigs
256 go_rd_o
= intpick1
.go_rd_o
257 go_wr_o
= intpick1
.go_wr_o
258 go_rd_i
= intfus
.go_rd_i
259 go_wr_i
= intfus
.go_wr_i
260 m
.d
.comb
+= go_rd_i
[0:2].eq(go_rd_o
[0:2]) # add rd
261 m
.d
.comb
+= go_wr_i
[0:2].eq(go_wr_o
[0:2]) # add wr
265 #m.d.comb += intpick1.rd_rel_i[0:2].eq(~go_rd_i[0:2] & cu.busy_o[0:2])
266 m
.d
.comb
+= intpick1
.rd_rel_i
[0:2].eq(cu
.rd_rel_o
[0:2])
267 #m.d.comb += intpick1.go_rd_i[0:2].eq(cu.req_rel_o[0:2])
268 m
.d
.comb
+= intpick1
.req_rel_i
[0:2].eq(cu
.req_rel_o
[0:2])
269 int_readable_o
= intfus
.readable_o
270 int_writable_o
= intfus
.writable_o
271 m
.d
.comb
+= intpick1
.readable_i
[0:2].eq(int_readable_o
[0:2])
272 m
.d
.comb
+= intpick1
.writable_i
[0:2].eq(int_writable_o
[0:2])
275 # Connect Register File(s)
277 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
278 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
279 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
280 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
282 # connect ALUs to regfule
283 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
284 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
285 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
287 # connect ALU Computation Units
288 m
.d
.comb
+= cu
.go_rd_i
[0:2].eq(go_rd_o
[0:2])
289 m
.d
.comb
+= cu
.go_wr_i
[0:2].eq(go_wr_o
[0:2])
290 m
.d
.comb
+= cu
.issue_i
[0:2].eq(fn_issue_o
[0:2])
296 yield from self
.intregs
297 yield from self
.fpregs
298 yield self
.int_store_i
299 yield self
.int_dest_i
300 yield self
.int_src1_i
301 yield self
.int_src2_i
303 #yield from self.int_src1
304 #yield from self.int_dest
305 #yield from self.int_src1
306 #yield from self.int_src2
307 #yield from self.fp_dest
308 #yield from self.fp_src1
309 #yield from self.fp_src2
318 def __init__(self
, rwidth
, nregs
):
320 self
.regs
= [0] * nregs
322 def op(self
, op
, src1
, src2
, dest
):
323 src1
= self
.regs
[src1
]
324 src2
= self
.regs
[src2
]
326 val
= (src1
+ src2
) & ((1<<(self
.rwidth
))-1)
328 val
= (src1
- src2
) & ((1<<(self
.rwidth
))-1)
329 self
.regs
[dest
] = val
331 def setval(self
, dest
, val
):
332 self
.regs
[dest
] = val
335 for i
, val
in enumerate(self
.regs
):
336 reg
= yield dut
.intregs
.regs
[i
].reg
337 okstr
= "OK" if reg
== val
else "!ok"
338 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
340 def check(self
, dut
):
341 for i
, val
in enumerate(self
.regs
):
342 reg
= yield dut
.intregs
.regs
[i
].reg
344 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
345 yield from self
.dump(dut
)
348 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
349 for i
in range(len(dut
.int_insn_i
)):
350 yield dut
.int_insn_i
[i
].eq(0)
351 yield dut
.int_dest_i
.eq(dest
)
352 yield dut
.int_src1_i
.eq(src1
)
353 yield dut
.int_src2_i
.eq(src2
)
354 yield dut
.int_insn_i
[op
].eq(1)
355 yield dut
.reg_enable_i
.eq(1)
356 alusim
.op(op
, src1
, src2
, dest
)
359 def print_reg(dut
, rnums
):
362 reg
= yield dut
.intregs
.regs
[rnum
].reg
363 rs
.append("%x" % reg
)
364 rnums
= map(str, rnums
)
365 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
368 def scoreboard_sim(dut
, alusim
):
369 yield dut
.int_store_i
.eq(0)
371 for i
in range(1, dut
.n_regs
):
372 yield dut
.intregs
.regs
[i
].reg
.eq(4+i
*2)
373 alusim
.setval(i
, 4+i
*2)
378 src1
= randint(1, dut
.n_regs
-1)
379 src2
= randint(1, dut
.n_regs
-1)
381 dest
= randint(1, dut
.n_regs
-1)
382 if dest
not in [src1
, src2
]:
392 instrs
.append((src1
, src2
, dest
, op
))
395 instrs
.append((2, 3, 3, 0))
396 instrs
.append((5, 3, 3, 1))
399 instrs
.append((5, 6, 2, 1))
400 instrs
.append((2, 2, 4, 0))
401 #instrs.append((2, 2, 3, 1))
404 instrs
.append((2, 1, 2, 0))
407 instrs
.append((2, 6, 2, 1))
408 instrs
.append((2, 1, 2, 0))
411 instrs
.append((1, 4, 7, 1))
412 instrs
.append((7, 1, 5, 0))
413 instrs
.append((4, 3, 1, 1))
414 instrs
.append((6, 5, 7, 1))
416 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
418 print ("instr %d: %d %d %d %d\n" % (i
, op
, src1
, src2
, dest
))
419 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
422 issue_o
= yield dut
.issue_o
424 for i
in range(len(dut
.int_insn_i
)):
425 yield dut
.int_insn_i
[i
].eq(0)
426 yield dut
.reg_enable_i
.eq(0)
429 yield from print_reg(dut
, [1,2,3])
431 yield from print_reg(dut
, [1,2,3])
435 yield from print_reg(dut
, [1,2,3])
437 yield from print_reg(dut
, [1,2,3])
439 yield from print_reg(dut
, [1,2,3])
441 yield from print_reg(dut
, [1,2,3])
446 yield from alusim
.check(dut
)
447 yield from alusim
.dump(dut
)
450 def explore_groups(dut
):
451 from nmigen
.hdl
.ir
import Fragment
452 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
454 fragment
= dut
.elaborate(platform
=None)
455 fr
= Fragment
.get(fragment
, platform
=None)
457 groups
= LHSGroupAnalyzer()(fragment
._statements
)
462 def test_scoreboard():
463 dut
= Scoreboard(16, 8)
464 alusim
= RegSim(16, 8)
465 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
466 with
open("test_scoreboard6600.il", "w") as f
:
469 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
470 vcd_name
='test_scoreboard6600.vcd')
473 if __name__
== '__main__':