53eea0d52e4d36a900258c02acf8a5783498a2cc
1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
, RegDecode
12 from scoreboard
.shadow
import ShadowMatrix
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
17 from nmutil
.latch
import SRLatch
19 from random
import randint
21 class CompUnits(Elaboratable
):
23 def __init__(self
, rwid
, n_units
):
26 * :rwid: bit width of register file(s) - both FP and INT
27 * :n_units: number of ALUs
29 self
.n_units
= n_units
32 self
.issue_i
= Signal(n_units
, reset_less
=True)
33 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
34 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
35 self
.shadown_i
= Signal(n_units
, reset_less
=True)
36 self
.go_die_i
= Signal(n_units
, reset_less
=True)
37 self
.busy_o
= Signal(n_units
, reset_less
=True)
38 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
39 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
41 self
.dest_o
= Signal(rwid
, reset_less
=True)
42 self
.src1_data_i
= Signal(rwid
, reset_less
=True)
43 self
.src2_data_i
= Signal(rwid
, reset_less
=True)
45 def elaborate(self
, platform
):
53 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 2, add
)
54 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 2, sub
)
55 m
.submodules
.comp3
= comp3
= ComputationUnitNoDelay(self
.rwid
, 2, mul
)
56 m
.submodules
.comp4
= comp4
= ComputationUnitNoDelay(self
.rwid
, 2, shf
)
57 int_alus
= [comp1
, comp2
, comp3
, comp4
]
59 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0, 2)) # op=add
60 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1, 2)) # op=sub
61 m
.d
.comb
+= comp3
.oper_i
.eq(Const(2, 2)) # op=mul
62 m
.d
.comb
+= comp4
.oper_i
.eq(Const(3, 2)) # op=shf
73 req_rel_l
.append(alu
.req_rel_o
)
74 rd_rel_l
.append(alu
.rd_rel_o
)
75 shadow_l
.append(alu
.shadown_i
)
76 godie_l
.append(alu
.go_die_i
)
77 go_wr_l
.append(alu
.go_wr_i
)
78 go_rd_l
.append(alu
.go_rd_i
)
79 issue_l
.append(alu
.issue_i
)
80 busy_l
.append(alu
.busy_o
)
81 m
.d
.comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
82 m
.d
.comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
83 m
.d
.comb
+= self
.busy_o
.eq(Cat(*busy_l
))
84 #m.d.comb += Cat(*godie_l).eq(self.go_die_i)
85 #m.d.comb += Cat(*shadow_l).eq(self.shadown_i)
86 m
.d
.comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
87 m
.d
.comb
+= Cat(*issue_l
).eq(self
.issue_i
)
89 # connect data register input/output
91 # merge (OR) all integer FU / ALU outputs to a single value
92 # bit of a hack: treereduce needs a list with an item named "dest_o"
93 dest_o
= treereduce(int_alus
)
94 m
.d
.comb
+= self
.dest_o
.eq(dest_o
)
96 for i
, alu
in enumerate(int_alus
):
97 m
.d
.comb
+= alu
.src1_i
.eq(self
.src1_data_i
)
98 m
.d
.comb
+= alu
.src2_i
.eq(self
.src2_data_i
)
103 class FunctionUnits(Elaboratable
):
105 def __init__(self
, n_regs
, n_int_alus
):
107 self
.n_int_alus
= n_int_alus
109 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
110 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
111 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
113 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
114 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
116 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
117 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
118 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
120 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
121 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
122 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
124 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
125 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
126 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
127 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
129 def elaborate(self
, platform
):
132 n_int_fus
= self
.n_int_alus
134 # Integer FU-FU Dep Matrix
135 intfudeps
= FUFUDepMatrix(n_int_fus
, n_int_fus
)
136 m
.submodules
.intfudeps
= intfudeps
137 # Integer FU-Reg Dep Matrix
138 intregdeps
= FURegDepMatrix(n_int_fus
, self
.n_regs
)
139 m
.submodules
.intregdeps
= intregdeps
141 m
.d
.comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
142 m
.d
.comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
144 m
.d
.comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
145 m
.d
.comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
147 m
.d
.comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
148 m
.d
.comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
150 m
.d
.comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
151 m
.d
.comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
152 m
.d
.comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
153 m
.d
.comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
154 m
.d
.comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
156 # Connect function issue / arrays, and dest/src1/src2
157 m
.d
.comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
158 m
.d
.comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
159 m
.d
.comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
161 m
.d
.comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
162 m
.d
.comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
163 m
.d
.comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
165 m
.d
.comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
166 m
.d
.comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
167 m
.d
.comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
172 class Scoreboard(Elaboratable
):
173 def __init__(self
, rwid
, n_regs
):
176 * :rwid: bit width of register file(s) - both FP and INT
177 * :n_regs: depth of register file(s) - number of FP and INT regs
183 self
.intregs
= RegFileArray(rwid
, n_regs
)
184 self
.fpregs
= RegFileArray(rwid
, n_regs
)
187 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
188 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
189 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
190 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
191 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
193 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
194 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
196 def elaborate(self
, platform
):
199 m
.submodules
.intregs
= self
.intregs
200 m
.submodules
.fpregs
= self
.fpregs
203 int_dest
= self
.intregs
.write_port("dest")
204 int_src1
= self
.intregs
.read_port("src1")
205 int_src2
= self
.intregs
.read_port("src2")
207 fp_dest
= self
.fpregs
.write_port("dest")
208 fp_src1
= self
.fpregs
.read_port("src1")
209 fp_src2
= self
.fpregs
.read_port("src2")
211 # Int ALUs and Comp Units
213 m
.submodules
.cu
= cu
= CompUnits(self
.rwid
, n_int_alus
)
214 m
.d
.comb
+= cu
.shadown_i
.eq(-1)
217 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
219 # Count of number of FUs
220 n_int_fus
= n_int_alus
221 n_fp_fus
= 0 # for now
223 # Integer Priority Picker 1: Adder + Subtractor
224 intpick1
= GroupPicker(n_int_fus
) # picks between add, sub, mul and shf
225 m
.submodules
.intpick1
= intpick1
228 regdecode
= RegDecode(self
.n_regs
)
229 m
.submodules
.regdecode
= regdecode
230 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
231 m
.submodules
.issueunit
= issueunit
233 # Shadow Matrix. currently only 1 branch
234 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_int_fus
, 1)
235 go_rd_rst
= Signal(n_int_fus
, reset_less
=True)
236 go_wr_rst
= Signal(n_int_fus
, reset_less
=True)
239 # ok start wiring things together...
240 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
241 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
245 # Issue Unit is where it starts. set up some in/outs for this module
247 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
248 regdecode
.dest_i
.eq(self
.int_dest_i
),
249 regdecode
.src1_i
.eq(self
.int_src1_i
),
250 regdecode
.src2_i
.eq(self
.int_src2_i
),
251 regdecode
.enable_i
.eq(self
.reg_enable_i
),
252 issueunit
.i
.dest_i
.eq(regdecode
.dest_o
),
253 self
.issue_o
.eq(issueunit
.issue_o
)
255 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
257 # connect global rd/wr pending vector (for WaW detection)
258 m
.d
.sync
+= issueunit
.i
.g_wr_pend_i
.eq(intfus
.g_int_wr_pend_o
)
259 # TODO: issueunit.f (FP)
261 # and int function issue / busy arrays, and dest/src1/src2
262 m
.d
.comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
263 m
.d
.comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
264 m
.d
.comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
266 fn_issue_o
= issueunit
.i
.fn_issue_o
268 m
.d
.comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
269 m
.d
.comb
+= issueunit
.i
.busy_i
.eq(cu
.busy_o
)
270 m
.d
.comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
273 # connect fu-fu matrix
276 # Group Picker... done manually for now. TODO: cat array of pick sigs
277 go_rd_o
= intpick1
.go_rd_o
278 go_wr_o
= intpick1
.go_wr_o
279 go_rd_i
= intfus
.go_rd_i
280 go_wr_i
= intfus
.go_wr_i
281 m
.d
.comb
+= go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
]) # rd
282 m
.d
.comb
+= go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
]) # wr
286 m
.d
.comb
+= intpick1
.rd_rel_i
[0:n_int_fus
].eq(cu
.rd_rel_o
[0:n_int_fus
])
287 m
.d
.comb
+= intpick1
.req_rel_i
[0:n_int_fus
].eq(cu
.req_rel_o
[0:n_int_fus
])
288 int_rd_o
= intfus
.readable_o
289 int_wr_o
= intfus
.writable_o
290 m
.d
.comb
+= intpick1
.readable_i
[0:n_int_fus
].eq(int_rd_o
[0:n_int_fus
])
291 m
.d
.comb
+= intpick1
.writable_i
[0:n_int_fus
].eq(int_wr_o
[0:n_int_fus
])
294 # Connect Register File(s)
296 print ("intregdeps wen len", len(intfus
.dest_rsel_o
))
297 m
.d
.comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
298 m
.d
.comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
299 m
.d
.comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
301 # connect ALUs to regfule
302 m
.d
.comb
+= int_dest
.data_i
.eq(cu
.dest_o
)
303 m
.d
.comb
+= cu
.src1_data_i
.eq(int_src1
.data_o
)
304 m
.d
.comb
+= cu
.src2_data_i
.eq(int_src2
.data_o
)
306 # connect ALU Computation Units
307 m
.d
.comb
+= cu
.go_rd_i
[0:n_int_fus
].eq(go_rd_o
[0:n_int_fus
])
308 m
.d
.comb
+= cu
.go_wr_i
[0:n_int_fus
].eq(go_wr_o
[0:n_int_fus
])
309 m
.d
.comb
+= cu
.issue_i
[0:n_int_fus
].eq(fn_issue_o
[0:n_int_fus
])
315 yield from self
.intregs
316 yield from self
.fpregs
317 yield self
.int_store_i
318 yield self
.int_dest_i
319 yield self
.int_src1_i
320 yield self
.int_src2_i
322 #yield from self.int_src1
323 #yield from self.int_dest
324 #yield from self.int_src1
325 #yield from self.int_src2
326 #yield from self.fp_dest
327 #yield from self.fp_src1
328 #yield from self.fp_src2
339 def __init__(self
, rwidth
, nregs
):
341 self
.regs
= [0] * nregs
343 def op(self
, op
, src1
, src2
, dest
):
344 maxbits
= (1 << self
.rwidth
) - 1
345 src1
= self
.regs
[src1
]
346 src2
= self
.regs
[src2
]
354 val
= src1
>> (src2
& maxbits
)
356 self
.regs
[dest
] = val
358 def setval(self
, dest
, val
):
359 self
.regs
[dest
] = val
362 for i
, val
in enumerate(self
.regs
):
363 reg
= yield dut
.intregs
.regs
[i
].reg
364 okstr
= "OK" if reg
== val
else "!ok"
365 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
367 def check(self
, dut
):
368 for i
, val
in enumerate(self
.regs
):
369 reg
= yield dut
.intregs
.regs
[i
].reg
371 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
372 yield from self
.dump(dut
)
375 def int_instr(dut
, alusim
, op
, src1
, src2
, dest
):
376 for i
in range(len(dut
.int_insn_i
)):
377 yield dut
.int_insn_i
[i
].eq(0)
378 yield dut
.int_dest_i
.eq(dest
)
379 yield dut
.int_src1_i
.eq(src1
)
380 yield dut
.int_src2_i
.eq(src2
)
381 yield dut
.int_insn_i
[op
].eq(1)
382 yield dut
.reg_enable_i
.eq(1)
383 alusim
.op(op
, src1
, src2
, dest
)
386 def print_reg(dut
, rnums
):
389 reg
= yield dut
.intregs
.regs
[rnum
].reg
390 rs
.append("%x" % reg
)
391 rnums
= map(str, rnums
)
392 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
395 def scoreboard_sim(dut
, alusim
):
397 yield dut
.int_store_i
.eq(0)
401 # set random values in the registers
402 for i
in range(1, dut
.n_regs
):
404 val
= randint(0, (1<<alusim
.rwidth
)-1)
405 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
406 alusim
.setval(i
, val
)
408 # create some instructions (some random, some regression tests)
412 src1
= randint(1, dut
.n_regs
-1)
413 src2
= randint(1, dut
.n_regs
-1)
415 dest
= randint(1, dut
.n_regs
-1)
417 if dest
not in [src1
, src2
]:
427 instrs
.append((src1
, src2
, dest
, op
))
430 instrs
.append((2, 3, 3, 0))
431 instrs
.append((5, 3, 3, 1))
434 instrs
.append((5, 6, 2, 1))
435 instrs
.append((2, 2, 4, 0))
436 #instrs.append((2, 2, 3, 1))
439 instrs
.append((2, 1, 2, 3))
442 instrs
.append((2, 6, 2, 1))
443 instrs
.append((2, 1, 2, 0))
446 instrs
.append((1, 2, 7, 2))
447 instrs
.append((7, 1, 5, 0))
448 instrs
.append((4, 4, 1, 1))
451 instrs
.append((5, 6, 2, 2))
452 instrs
.append((1, 1, 4, 1))
453 instrs
.append((6, 5, 3, 0))
456 # Write-after-Write Hazard
457 instrs
.append( (3, 6, 7, 2) )
458 instrs
.append( (4, 4, 7, 1) )
461 # self-read/write-after-write followed by Read-after-Write
462 instrs
.append((1, 1, 1, 1))
463 instrs
.append((1, 5, 3, 0))
466 # Read-after-Write followed by self-read-after-write
467 instrs
.append((5, 6, 1, 2))
468 instrs
.append((1, 1, 1, 1))
471 # self-read-write sandwich
472 instrs
.append((5, 6, 1, 2))
473 instrs
.append((1, 1, 1, 1))
474 instrs
.append((1, 5, 3, 0))
478 instrs
.append( (5, 2, 5, 2) )
479 instrs
.append( (2, 6, 3, 0) )
480 instrs
.append( (4, 2, 2, 1) )
482 # issue instruction(s), wait for issue to be free before proceeding
483 for i
, (src1
, src2
, dest
, op
) in enumerate(instrs
):
485 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
486 yield from int_instr(dut
, alusim
, op
, src1
, src2
, dest
)
489 issue_o
= yield dut
.issue_o
491 for i
in range(len(dut
.int_insn_i
)):
492 yield dut
.int_insn_i
[i
].eq(0)
493 yield dut
.reg_enable_i
.eq(0)
496 #yield from print_reg(dut, [1,2,3])
498 #yield from print_reg(dut, [1,2,3])
500 # wait for all instructions to stop before checking
503 busy_o
= yield dut
.busy_o
510 yield from alusim
.check(dut
)
511 yield from alusim
.dump(dut
)
514 def explore_groups(dut
):
515 from nmigen
.hdl
.ir
import Fragment
516 from nmigen
.hdl
.xfrm
import LHSGroupAnalyzer
518 fragment
= dut
.elaborate(platform
=None)
519 fr
= Fragment
.get(fragment
, platform
=None)
521 groups
= LHSGroupAnalyzer()(fragment
._statements
)
526 def test_scoreboard():
527 dut
= Scoreboard(16, 8)
528 alusim
= RegSim(16, 8)
529 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
530 with
open("test_scoreboard6600.il", "w") as f
:
533 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
534 vcd_name
='test_scoreboard6600.vcd')
537 if __name__
== '__main__':