1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
20 class CompUnits(Elaboratable
):
22 def __init__(self
, rwid
, n_units
):
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_units: number of ALUs
28 self
.n_units
= n_units
31 self
.issue_i
= Signal(n_units
, reset_less
=True)
32 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
33 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
34 self
.busy_o
= Signal(n_units
, reset_less
=True)
35 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
36 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
38 self
.dest_o
= Signal(rwid
, reset_less
=True)
39 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
40 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
42 def elaborate(self
, platform
):
50 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
51 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
52 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
53 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
54 int_alus
= [comp1
, comp2
, comp3
, comp4
]
56 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
57 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
58 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
59 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
68 req_rel_l
.append(alu
.req_rel_o
)
69 rd_rel_l
.append(alu
.rd_rel_o
)
70 go_wr_l
.append(alu
.go_wr_i
)
71 go_rd_l
.append(alu
.go_rd_i
)
72 issue_l
.append(alu
.issue_i
)
73 busy_l
.append(alu
.busy_o
)
74 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
75 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
76 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
77 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
78 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
79 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
81 # connect data register input/output
83 # merge (OR) all integer FU / ALU outputs to a single value
84 # bit of a hack: treereduce needs a list with an item named "dest_o"
85 dest_o
= treereduce(int_alus
)
86 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
88 for i
, alu
in enumerate(int_alus
):
89 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
90 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
95 class FunctionUnits(Elaboratable
):
97 def __init__(self
, n_regs
, n_int_alus
):
99 self
.n_int_alus
= n_int_alus
101 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
102 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
103 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
105 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
106 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
108 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
109 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
110 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
112 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
113 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
114 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
116 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
117 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
118 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
119 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
121 def elaborate(self
, platform
):
124 n_int_fus
= self
.n_int_alus
126 # Integer FU-FU Dep Matrix
127 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
128 m
.submodules
.intfudeps
= intfudeps
129 # Integer FU-Reg Dep Matrix
130 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
131 m
.submodules
.intregdeps
= intregdeps
133 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
134 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
136 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
137 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
139 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
140 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
142 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
143 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
144 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
145 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
146 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
148 # Connect function issue / arrays, and dest/src1/src2
149 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
150 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
151 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
153 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
154 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
155 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
157 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
158 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
159 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
164 class Scoreboard(Elaboratable
):
165 def __init__(self
, rwid
, n_regs
):
168 * :rwid: bit width of register file(s) - both FP and INT
169 * :n_regs: depth of register file(s) - number of FP and INT regs
175 self
.intregs
= RegFileArray(rwid
, n_regs
)
176 self
.fpregs
= RegFileArray(rwid
, n_regs
)
179 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
180 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
181 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
182 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
183 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
185 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
187 def elaborate(self
, platform
):
190 m
.submodules
.intregs
= self
.intregs
191 m
.submodules
.fpregs
= self
.fpregs
194 int_dest
= self
.intregs
.write_port("dest")
195 int_src1
= self
.intregs
.read_port("src1")
196 int_src2
= self
.intregs
.read_port("src2")
198 fp_dest
= self
.fpregs
.write_port("dest")
199 fp_src1
= self
.fpregs
.read_port("src1")
200 fp_src2
= self
.fpregs
.read_port("src2")
202 # Int ALUs and Comp Units
204 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
207 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
209 # Count of number of FUs
210 n_int_fus
= n_int_alus
211 n_fp_fus
= 0 # for now
213 # Integer Priority Picker 1: Adder + Subtractor
214 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
215 m
.submodules
.intpick1
= intpick1
218 regdecode
= RegDecode(self
.n_regs
)
219 m
.submodules
.regdecode
= regdecode
220 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
221 m
.submodules
.issueunit
= issueunit
224 # ok start wiring things together...
225 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
226 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
230 # Issue Unit is where it starts. set up some in/outs for this module
232 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
233 regdecode
.dest_i
.eq(self
.int_dest_i
),
234 regdecode
.src1_i
.eq(self
.int_src1_i
),
235 regdecode
.src2_i
.eq(self
.int_src2_i
),
236 regdecode
.enable_i
.eq(self
.reg_enable_i
),
237 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
238 self
.issue_o
.eq(issueunit
.issue_o
)
240 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
242 # connect global rd/wr pending vectors
243 m
.d
.comb
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
244 # TODO: issueunit.f (FP)
246 # and int function issue / busy arrays, and dest/src1/src2
247 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
248 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
249 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
251 fn_issue_o
= issueunit
.i
.fn_issue_o
253 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
254 # XXX sync, so as to stop a simulation infinite loop
255 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
258 # connect fu-fu matrix
261 # Group Picker... done manually for now. TODO: cat array of pick sigs
262 go_rd_o
= intpick1
.go_rd_o
263 go_wr_o
= intpick1
.go_wr_o
264 go_rd_i
= intfus
.go_rd_i
265 go_wr_i
= intfus
.go_wr_i
266 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
267 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
271 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
272 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
273 int_rd_o
= intfus
.readable_o
274 int_wr_o
= intfus
.writable_o
275 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
276 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
279 # Connect Register File(s)
281 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
282 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
283 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
284 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
286 # connect ALUs to regfule
287 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
288 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
289 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
291 # connect ALU Computation Units
292 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
293 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
294 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
300 yield from self
.intregs
301 yield from self
.fpregs
302 yield self
.int_store_i
303 yield self
.int_dest_i
304 yield self
.int_src1_i
305 yield self
.int_src2_i
307 #yield from self.int_src1
308 #yield from self.int_dest
309 #yield from self.int_src1
310 #yield from self.int_src2
311 #yield from self.fp_dest
312 #yield from self.fp_src1
313 #yield from self.fp_src2
324 def __init__(self
, rwidth
, nregs
):
326 self
.regs
= [0] * nregs
328 def op(self
, op
, src1
, src2
, dest
):
329 src1
= self
.regs
[src1
]
330 src2
= self
.regs
[src2
]
338 val
= (src1
<< (src2
& self
.rwidth
))
339 val
&= ((1<<(self
.rwidth
))-1)
340 self
.regs
[dest
] = val
342 def setval(self
, dest
, val
):
343 self
.regs
[dest
] = val
346 for i
, val
in enumerate(self
.regs
):
347 reg
= yield dut
.intregs
.regs
[i
].reg
348 okstr
= "OK" if reg
== val
else "!ok"
349 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
351 def check(self
, dut
):
352 for i
, val
in enumerate(self
.regs
):
353 reg
= yield dut
.intregs
.regs
[i
].reg
355 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
356 yield from self
.dump(dut
)
359 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
360 for i
in range(len(dut
.int_insn_i
)):
361 yield dut
.int_insn_i
[i
].eq(0)
362 yield dut
.int_dest_i
.eq(dest
)
363 yield dut
.int_src1_i
.eq(src1
)
364 yield dut
.int_src2_i
.eq(src2
)
365 yield dut
.int_insn_i
[op
].eq(1)
366 yield dut
.reg_enable_i
.eq(1)
367 alusim
.op(op
, src1
, src2
, dest
)
370 def print_reg(dut
, rnums
):
373 reg
= yield dut
.intregs
.regs
[rnum
].reg
374 rs
.append("%x" % reg
)
375 rnums
= map(str, rnums
)
376 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
379 def scoreboard_sim(dut
, alusim
):
380 yield dut
.int_store_i
.eq(0)
382 for i
in range(1, dut
.n_regs
):
383 yield dut
.intregs
.regs
[i
].reg
.eq(4+i
*2)
384 alusim
.setval(i
, 4+i
*2)
389 src1
= randint(1, dut
.n_regs
-1)
390 src2
= randint(1, dut
.n_regs
-1)
392 dest
= randint(1, dut
.n_regs
-1)
393 if dest
not in [src1
, src2
]:
403 instrs
.append((src1
, src2
, dest
, op
))
406 instrs
.append((2, 3, 3, 0))
407 instrs
.append((5, 3, 3, 1))
410 instrs
.append((5, 6, 2, 1))
411 instrs
.append((2, 2, 4, 0))
412 #instrs.append((2, 2, 3, 1))
415 instrs
.append((2, 1, 2, 3))
418 instrs
.append((2, 6, 2, 1))
419 instrs
.append((2, 1, 2, 0))
422 instrs
.append((1, 2, 7, 2))
423 instrs
.append((7, 1, 5, 0))
424 instrs
.append((4, 4, 1, 1))
427 instrs
.append((5, 6, 2, 2))
428 instrs
.append((1, 1, 4, 1))
429 instrs
.append((6, 5, 3, 0))
431 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
433 print ("instr %d: %d %d %d %d\n" % (i
, op
, src1
, src2
, dest
))
434 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
437 issue_o
= yield dut
.issue_o
439 for i
in range(len(dut
.int_insn_i
)):
440 yield dut
.int_insn_i
[i
].eq(0)
441 yield dut
.reg_enable_i
.eq(0)
444 yield from print_reg(dut
, [1,2,3])
446 yield from print_reg(dut
, [1,2,3])
449 yield from print_reg(dut
, [1,2,3])
451 yield from print_reg(dut
, [1,2,3])
453 yield from print_reg(dut
, [1,2,3])
455 yield from print_reg(dut
, [1,2,3])
460 yield from alusim
.check(dut
)
461 yield from alusim
.dump(dut
)
464 def explore_groups(dut
):
465 from nmigen
.hdl
.ir
import Fragment
466 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
468 fragment
= dut
.elaborate(platform
=None)
469 fr
= Fragment
.get(fragment
, platform
=None)
471 groups
= LHSGroupAnalyzer()(fragment
._statements
)
476 def test_scoreboard():
477 dut
= Scoreboard(16, 8)
478 alusim
= RegSim(16, 8)
479 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
480 with
open("test_scoreboard6600.il", "w") as f
:
483 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
484 vcd_name
='test_scoreboard6600.vcd')
487 if __name__
== '__main__':