1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
16 from nmutil
.latch
import SRLatch
18 from random
import randint
20 class CompUnits(Elaboratable
):
22 def __init__(self
, rwid
, n_units
):
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_units: number of ALUs
28 self
.n_units
= n_units
31 self
.issue_i
= Signal(n_units
, reset_less
=True)
32 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
33 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
34 self
.busy_o
= Signal(n_units
, reset_less
=True)
35 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
36 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
38 self
.dest_o
= Signal(rwid
, reset_less
=True)
39 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
40 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
42 def elaborate(self
, platform
):
48 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
49 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
50 int_alus
= [comp1
, comp2
]
52 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0)) # temporary/experiment: op=add
53 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1)) # temporary/experiment: op=sub
62 req_rel_l
.append(alu
.req_rel_o
)
63 rd_rel_l
.append(alu
.rd_rel_o
)
64 go_wr_l
.append(alu
.go_wr_i
)
65 go_rd_l
.append(alu
.go_rd_i
)
66 issue_l
.append(alu
.issue_i
)
67 busy_l
.append(alu
.busy_o
)
68 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
69 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
70 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
71 m
.d
.comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
72 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
73 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
75 # connect data register input/output
77 # merge (OR) all integer FU / ALU outputs to a single value
78 # bit of a hack: treereduce needs a list with an item named "dest_o"
79 dest_o
= treereduce(int_alus
)
80 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
82 for i
, alu
in enumerate(int_alus
):
83 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
84 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
89 class FunctionUnits(Elaboratable
):
91 def __init__(self
, n_regs
, n_int_alus
):
93 self
.n_int_alus
= n_int_alus
95 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
96 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
97 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
99 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
100 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
102 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
103 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
104 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
106 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
107 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
108 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
110 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
111 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
112 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
113 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
115 def elaborate(self
, platform
):
118 n_int_fus
= self
.n_int_alus
120 # Integer FU-FU Dep Matrix
121 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
122 m
.submodules
.intfudeps
= intfudeps
123 # Integer FU-Reg Dep Matrix
124 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
125 m
.submodules
.intregdeps
= intregdeps
127 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
128 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
130 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
131 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
133 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
134 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
136 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
137 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
138 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
139 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
140 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
142 # Connect function issue / arrays, and dest/src1/src2
143 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
144 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
145 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
147 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
148 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
149 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
151 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
152 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
153 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
158 class Scoreboard(Elaboratable
):
159 def __init__(self
, rwid
, n_regs
):
162 * :rwid: bit width of register file(s) - both FP and INT
163 * :n_regs: depth of register file(s) - number of FP and INT regs
169 self
.intregs
= RegFileArray(rwid
, n_regs
)
170 self
.fpregs
= RegFileArray(rwid
, n_regs
)
173 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
174 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
175 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
176 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
177 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
179 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
181 def elaborate(self
, platform
):
184 m
.submodules
.intregs
= self
.intregs
185 m
.submodules
.fpregs
= self
.fpregs
188 int_dest
= self
.intregs
.write_port("dest")
189 int_src1
= self
.intregs
.read_port("src1")
190 int_src2
= self
.intregs
.read_port("src2")
192 fp_dest
= self
.fpregs
.write_port("dest")
193 fp_src1
= self
.fpregs
.read_port("src1")
194 fp_src2
= self
.fpregs
.read_port("src2")
196 # Int ALUs and Comp Units
198 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
201 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
203 # Count of number of FUs
204 n_int_fus
= n_int_alus
205 n_fp_fus
= 0 # for now
207 # Integer Priority Picker 1: Adder + Subtractor
208 intpick1
= GroupPicker(2) # picks between add and sub
209 m
.submodules
.intpick1
= intpick1
212 regdecode
= RegDecode(self
.n_regs
)
213 m
.submodules
.regdecode
= regdecode
214 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
215 m
.submodules
.issueunit
= issueunit
218 # ok start wiring things together...
219 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
220 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
224 # Issue Unit is where it starts. set up some in/outs for this module
226 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
227 regdecode
.dest_i
.eq(self
.int_dest_i
),
228 regdecode
.src1_i
.eq(self
.int_src1_i
),
229 regdecode
.src2_i
.eq(self
.int_src2_i
),
230 regdecode
.enable_i
.eq(self
.reg_enable_i
),
231 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
232 self
.issue_o
.eq(issueunit
.issue_o
)
234 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
236 # connect global rd/wr pending vectors
237 m
.d
.comb
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
238 # TODO: issueunit.f (FP)
240 # and int function issue / busy arrays, and dest/src1/src2
241 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
242 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
243 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
245 fn_issue_o
= issueunit
.i
.fn_issue_o
247 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
248 # XXX sync, so as to stop a simulation infinite loop
249 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
252 # connect fu-fu matrix
255 # Group Picker... done manually for now. TODO: cat array of pick sigs
256 go_rd_o
= intpick1
.go_rd_o
257 go_wr_o
= intpick1
.go_wr_o
258 go_rd_i
= intfus
.go_rd_i
259 go_wr_i
= intfus
.go_wr_i
260 m
.d
.comb
+= go_rd_i
[0:2].eq(go_rd_o
[0:2]) # add rd
261 m
.d
.comb
+= go_wr_i
[0:2].eq(go_wr_o
[0:2]) # add wr
265 #m.d.comb += intpick1.rd_rel_i[0:2].eq(~go_rd_i[0:2] & cu.busy_o[0:2])
266 m
.d
.comb
+= intpick1
.rd_rel_i
[0:2].eq(cu
.rd_rel_o
[0:2])
267 #m.d.comb += intpick1.go_rd_i[0:2].eq(cu.req_rel_o[0:2])
268 m
.d
.comb
+= intpick1
.req_rel_i
[0:2].eq(cu
.req_rel_o
[0:2])
269 int_readable_o
= intfus
.readable_o
270 int_writable_o
= intfus
.writable_o
271 m
.d
.comb
+= intpick1
.readable_i
[0:2].eq(int_readable_o
[0:2])
272 m
.d
.comb
+= intpick1
.writable_i
[0:2].eq(int_writable_o
[0:2])
275 # Connect Register File(s)
277 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
278 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
279 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
280 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
282 # connect ALUs to regfule
283 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
284 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
285 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
287 # connect ALU Computation Units
288 m
.d
.comb
+= cu
.go_rd_i
[0:2].eq(go_rd_o
[0:2])
289 m
.d
.comb
+= cu
.go_wr_i
[0:2].eq(go_wr_o
[0:2])
290 m
.d
.comb
+= cu
.issue_i
[0:2].eq(fn_issue_o
[0:2])
296 yield from self
.intregs
297 yield from self
.fpregs
298 yield self
.int_store_i
299 yield self
.int_dest_i
300 yield self
.int_src1_i
301 yield self
.int_src2_i
303 #yield from self.int_src1
304 #yield from self.int_dest
305 #yield from self.int_src1
306 #yield from self.int_src2
307 #yield from self.fp_dest
308 #yield from self.fp_src1
309 #yield from self.fp_src2
320 def __init__(self
, rwidth
, nregs
):
322 self
.regs
= [0] * nregs
324 def op(self
, op
, src1
, src2
, dest
):
325 src1
= self
.regs
[src1
]
326 src2
= self
.regs
[src2
]
334 val
= (src1
<< (src2
& self
.rwidth
))
335 val
&= ((1<<(self
.rwidth
))-1)
336 self
.regs
[dest
] = val
338 def setval(self
, dest
, val
):
339 self
.regs
[dest
] = val
342 for i
, val
in enumerate(self
.regs
):
343 reg
= yield dut
.intregs
.regs
[i
].reg
344 okstr
= "OK" if reg
== val
else "!ok"
345 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
347 def check(self
, dut
):
348 for i
, val
in enumerate(self
.regs
):
349 reg
= yield dut
.intregs
.regs
[i
].reg
351 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
352 yield from self
.dump(dut
)
355 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
356 for i
in range(len(dut
.int_insn_i
)):
357 yield dut
.int_insn_i
[i
].eq(0)
358 yield dut
.int_dest_i
.eq(dest
)
359 yield dut
.int_src1_i
.eq(src1
)
360 yield dut
.int_src2_i
.eq(src2
)
361 yield dut
.int_insn_i
[op
].eq(1)
362 yield dut
.reg_enable_i
.eq(1)
363 alusim
.op(op
, src1
, src2
, dest
)
366 def print_reg(dut
, rnums
):
369 reg
= yield dut
.intregs
.regs
[rnum
].reg
370 rs
.append("%x" % reg
)
371 rnums
= map(str, rnums
)
372 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
375 def scoreboard_sim(dut
, alusim
):
376 yield dut
.int_store_i
.eq(0)
378 for i
in range(1, dut
.n_regs
):
379 yield dut
.intregs
.regs
[i
].reg
.eq(4+i
*2)
380 alusim
.setval(i
, 4+i
*2)
385 src1
= randint(1, dut
.n_regs
-1)
386 src2
= randint(1, dut
.n_regs
-1)
388 dest
= randint(1, dut
.n_regs
-1)
389 if dest
not in [src1
, src2
]:
399 instrs
.append((src1
, src2
, dest
, op
))
402 instrs
.append((2, 3, 3, 0))
403 instrs
.append((5, 3, 3, 1))
406 instrs
.append((5, 6, 2, 1))
407 instrs
.append((2, 2, 4, 0))
408 #instrs.append((2, 2, 3, 1))
411 instrs
.append((2, 1, 2, 0))
414 instrs
.append((2, 6, 2, 1))
415 instrs
.append((2, 1, 2, 0))
418 instrs
.append((1, 2, 7, 1))
419 instrs
.append((7, 1, 5, 0))
420 instrs
.append((4, 4, 1, 1))
422 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
424 print ("instr %d: %d %d %d %d\n" % (i
, op
, src1
, src2
, dest
))
425 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
428 issue_o
= yield dut
.issue_o
430 for i
in range(len(dut
.int_insn_i
)):
431 yield dut
.int_insn_i
[i
].eq(0)
432 yield dut
.reg_enable_i
.eq(0)
435 yield from print_reg(dut
, [1,2,3])
437 yield from print_reg(dut
, [1,2,3])
440 yield from print_reg(dut
, [1,2,3])
442 yield from print_reg(dut
, [1,2,3])
444 yield from print_reg(dut
, [1,2,3])
446 yield from print_reg(dut
, [1,2,3])
451 yield from alusim
.check(dut
)
452 yield from alusim
.dump(dut
)
455 def explore_groups(dut
):
456 from nmigen
.hdl
.ir
import Fragment
457 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
459 fragment
= dut
.elaborate(platform
=None)
460 fr
= Fragment
.get(fragment
, platform
=None)
462 groups
= LHSGroupAnalyzer()(fragment
._statements
)
467 def test_scoreboard():
468 dut
= Scoreboard(16, 8)
469 alusim
= RegSim(16, 8)
470 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
471 with
open("test_scoreboard6600.il", "w") as f
:
474 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
475 vcd_name
='test_scoreboard6600.vcd')
478 if __name__
== '__main__':