bdbf8ace407ff2f5f7f5f123f8bb69dd846c23ac
[soc.git] / src / experiment / cscore.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fn_unit import IntFnUnit, FPFnUnit, LDFnUnit, STFnUnit
7 from scoreboard.fu_fu_matrix import FUFUDepMatrix
8 from scoreboard.fu_reg_matrix import FURegDepMatrix
9 from scoreboard.global_pending import GlobalPending
10 from scoreboard.group_picker import GroupPicker
11 from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
12
13 from compalu import ComputationUnitNoDelay
14
15 from alu_hier import ALU
16 from nmutil.latch import SRLatch
17
18 from random import randint
19
20
21 class Scoreboard(Elaboratable):
22 def __init__(self, rwid, n_regs):
23 """ Inputs:
24
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_regs: depth of register file(s) - number of FP and INT regs
27 """
28 self.rwid = rwid
29 self.n_regs = n_regs
30
31 # Register Files
32 self.intregs = RegFileArray(rwid, n_regs)
33 self.fpregs = RegFileArray(rwid, n_regs)
34
35 # inputs
36 self.int_store_i = Signal(reset_less=True) # instruction is a store
37 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
38 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
39 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
40
41 self.issue_o = Signal(reset_less=True) # instruction was accepted
42
43 def elaborate(self, platform):
44 m = Module()
45
46 m.submodules.intregs = self.intregs
47 m.submodules.fpregs = self.fpregs
48
49 # register ports
50 int_dest = self.intregs.write_port("dest")
51 int_src1 = self.intregs.read_port("src1")
52 int_src2 = self.intregs.read_port("src2")
53
54 fp_dest = self.fpregs.write_port("dest")
55 fp_src1 = self.fpregs.read_port("src1")
56 fp_src2 = self.fpregs.read_port("src2")
57
58 # Int ALUs
59 add = ALU(self.rwid)
60 sub = ALU(self.rwid)
61 m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 1, add)
62 m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 1, sub)
63 int_alus = [comp1, comp2]
64
65 m.d.comb += comp1.oper_i.eq(Const(0)) # temporary/experiment: op=add
66 m.d.comb += comp2.oper_i.eq(Const(1)) # temporary/experiment: op=sub
67
68 # Int FUs
69 if_l = []
70 int_src1_pend_v = []
71 int_src2_pend_v = []
72 int_rd_pend_v = []
73 int_wr_pend_v = []
74 for i, a in enumerate(int_alus):
75 # set up Integer Function Unit, add to module (and python list)
76 fu = IntFnUnit(self.n_regs, shadow_wid=0)
77 setattr(m.submodules, "intfu%d" % i, fu)
78 if_l.append(fu)
79 # collate the read/write pending vectors (to go into global pending)
80 int_src1_pend_v.append(fu.src1_pend_o)
81 int_src2_pend_v.append(fu.src2_pend_o)
82 int_rd_pend_v.append(fu.int_rd_pend_o)
83 int_wr_pend_v.append(fu.int_wr_pend_o)
84 int_fus = Array(if_l)
85
86 # Count of number of FUs
87 n_int_fus = len(if_l)
88 n_fp_fus = 0 # for now
89
90 n_fus = n_int_fus + n_fp_fus # plus FP FUs
91
92 # XXX replaced by array of FUs? *FnUnit
93 # # Integer FU-FU Dep Matrix
94 # m.submodules.intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
95 # Integer FU-Reg Dep Matrix
96 # intregdeps = FURegDepMatrix(self.n_regs, n_int_fus)
97 # m.submodules.intregdeps = intregdeps
98
99 # Integer Priority Picker 1: Adder + Subtractor
100 intpick1 = GroupPicker(2) # picks between add and sub
101 m.submodules.intpick1 = intpick1
102
103 # Global Pending Vectors (INT and FP)
104 # NOTE: number of vectors is NOT same as number of FUs.
105 g_int_src1_pend_v = GlobalPending(self.n_regs, int_src1_pend_v)
106 g_int_src2_pend_v = GlobalPending(self.n_regs, int_src2_pend_v)
107 g_int_rd_pend_v = GlobalPending(self.n_regs, int_rd_pend_v, True)
108 g_int_wr_pend_v = GlobalPending(self.n_regs, int_wr_pend_v, True)
109 m.submodules.g_int_src1_pend_v = g_int_src1_pend_v
110 m.submodules.g_int_src2_pend_v = g_int_src2_pend_v
111 m.submodules.g_int_rd_pend_v = g_int_rd_pend_v
112 m.submodules.g_int_wr_pend_v = g_int_wr_pend_v
113
114 # INT/FP Issue Unit
115 regdecode = RegDecode(self.n_regs)
116 m.submodules.regdecode = regdecode
117 issueunit = IntFPIssueUnit(self.n_regs, n_int_fus, n_fp_fus)
118 m.submodules.issueunit = issueunit
119
120 #---------
121 # ok start wiring things together...
122 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
123 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
124 #---------
125
126 #---------
127 # Issue Unit is where it starts. set up some in/outs for this module
128 #---------
129 m.d.comb += [issueunit.i.store_i.eq(self.int_store_i),
130 regdecode.dest_i.eq(self.int_dest_i),
131 regdecode.src1_i.eq(self.int_src1_i),
132 regdecode.src2_i.eq(self.int_src2_i),
133 regdecode.enable_i.eq(1),
134 self.issue_o.eq(issueunit.issue_o)
135 ]
136 m.d.sync += issueunit.i.dest_i.eq(regdecode.dest_o),
137 self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
138
139 # connect global rd/wr pending vectors
140 m.d.comb += issueunit.i.g_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
141 # TODO: issueunit.f (FP)
142
143 # and int function issue / busy arrays, and dest/src1/src2
144 fn_issue_l = []
145 fn_busy_l = []
146 for i, fu in enumerate(if_l):
147 fn_issue_l.append(fu.issue_i)
148 fn_busy_l.append(fu.busy_o)
149 m.d.sync += fu.issue_i.eq(issueunit.i.fn_issue_o[i])
150 m.d.sync += fu.dest_i.eq(self.int_dest_i)
151 m.d.sync += fu.src1_i.eq(self.int_src1_i)
152 m.d.sync += fu.src2_i.eq(self.int_src2_i)
153 # XXX sync, so as to stop a simulation infinite loop
154 m.d.comb += issueunit.i.busy_i[i].eq(fu.busy_o)
155
156 #---------
157 # connect Function Units
158 #---------
159
160 # XXX sync, again to avoid an infinite loop. is it the right thing???
161
162 # Group Picker... done manually for now. TODO: cat array of pick sigs
163 m.d.sync += if_l[0].go_rd_i.eq(intpick1.go_rd_o[0]) # add rd
164 m.d.sync += if_l[0].go_wr_i.eq(intpick1.go_wr_o[0]) # add wr
165
166 m.d.sync += if_l[1].go_rd_i.eq(intpick1.go_rd_o[1]) # subtract rd
167 m.d.sync += if_l[1].go_wr_i.eq(intpick1.go_wr_o[1]) # subtract wr
168
169 # Connect INT Fn Unit global wr/rd pending
170 for fu in if_l:
171 m.d.comb += fu.g_int_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
172 m.d.comb += fu.g_int_rd_pend_i.eq(g_int_rd_pend_v.g_pend_o)
173
174 # Connect Picker
175 #---------
176 m.d.comb += intpick1.req_rel_i[0].eq(int_alus[0].req_rel_o)
177 m.d.comb += intpick1.req_rel_i[1].eq(int_alus[1].req_rel_o)
178 m.d.comb += intpick1.readable_i[0].eq(if_l[0].int_readable_o) # add rd
179 m.d.comb += intpick1.writable_i[0].eq(if_l[0].int_writable_o) # add wr
180 m.d.comb += intpick1.readable_i[1].eq(if_l[1].int_readable_o) # sub rd
181 m.d.comb += intpick1.writable_i[1].eq(if_l[1].int_writable_o) # sub wr
182
183 #---------
184 # Connect Register File(s)
185 #---------
186 with m.If(if_l[0].go_wr_i | if_l[1].go_wr_i):
187 m.d.comb += int_dest.wen.eq(g_int_wr_pend_v.g_pend_o)
188 #with m.If(intpick1.go_rd_o):
189 #with m.If(if_l[0].go_rd_i | if_l[1].go_rd_i):
190 m.d.comb += int_src1.ren.eq(g_int_src1_pend_v.g_pend_o)
191 m.d.comb += int_src2.ren.eq(g_int_src2_pend_v.g_pend_o)
192
193 # merge (OR) all integer FU / ALU outputs to a single value
194 # bit of a hack: treereduce needs a list with an item named "dest_o"
195 dest_o = treereduce(int_alus)
196 m.d.comb += int_dest.data_i.eq(dest_o)
197
198 # connect ALUs
199 for i, alu in enumerate(int_alus):
200 m.d.sync += alu.go_rd_i.eq(intpick1.go_rd_o[i])
201 m.d.sync += alu.go_wr_i.eq(intpick1.go_wr_o[i])
202 m.d.comb += alu.issue_i.eq(fn_issue_l[i])
203 #m.d.comb += fn_busy_l[i].eq(alu.busy_o) # XXX ignore, use fnissue
204 m.d.comb += alu.src1_i.eq(int_src1.data_o)
205 m.d.comb += alu.src2_i.eq(int_src2.data_o)
206 m.d.comb += if_l[i].req_rel_i.eq(alu.req_rel_o) # pipe out ready
207
208 return m
209
210
211 def __iter__(self):
212 yield from self.intregs
213 yield from self.fpregs
214 yield self.int_store_i
215 yield self.int_dest_i
216 yield self.int_src1_i
217 yield self.int_src2_i
218 yield self.issue_o
219 #yield from self.int_src1
220 #yield from self.int_dest
221 #yield from self.int_src1
222 #yield from self.int_src2
223 #yield from self.fp_dest
224 #yield from self.fp_src1
225 #yield from self.fp_src2
226
227 def ports(self):
228 return list(self)
229
230 IADD = 0
231 ISUB = 1
232
233 class RegSim:
234 def __init__(self, rwidth, nregs):
235 self.rwidth = rwidth
236 self.regs = [0] * nregs
237
238 def op(self, op, src1, src2, dest):
239 src1 = self.regs[src1]
240 src2 = self.regs[src2]
241 if op == IADD:
242 val = (src1 + src2) & ((1<<(self.rwidth))-1)
243 elif op == ISUB:
244 val = (src1 - src2) & ((1<<(self.rwidth))-1)
245 self.regs[dest] = val
246
247 def setval(self, dest, val):
248 self.regs[dest] = val
249
250 def dump(self, dut):
251 for i, val in enumerate(self.regs):
252 reg = yield dut.intregs.regs[i].reg
253 okstr = "OK" if reg == val else "!ok"
254 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
255
256 def check(self, dut):
257 for i, val in enumerate(self.regs):
258 reg = yield dut.intregs.regs[i].reg
259 if reg != val:
260 print("reg %d expected %x received %x\n" % (i, val, reg))
261 yield from self.dump(dut)
262 assert False
263
264 def int_instr(dut, alusim, op, src1, src2, dest):
265 for i in range(len(dut.int_insn_i)):
266 yield dut.int_insn_i[i].eq(0)
267 yield dut.int_dest_i.eq(dest)
268 yield dut.int_src1_i.eq(src1)
269 yield dut.int_src2_i.eq(src2)
270 yield dut.int_insn_i[op].eq(1)
271 alusim.op(op, src1, src2, dest)
272
273
274 def print_reg(dut, rnums):
275 rs = []
276 for rnum in rnums:
277 reg = yield dut.intregs.regs[rnum].reg
278 rs.append("%x" % reg)
279 rnums = map(str, rnums)
280 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
281
282
283 def scoreboard_sim(dut, alusim):
284 yield dut.int_store_i.eq(0)
285
286 for i in range(1, dut.n_regs):
287 yield dut.intregs.regs[i].reg.eq(i)
288 alusim.setval(i, i)
289
290 if False:
291 yield from int_instr(dut, alusim, IADD, 4, 3, 5)
292 yield from print_reg(dut, [3,4,5])
293 yield
294 yield from int_instr(dut, alusim, IADD, 5, 2, 5)
295 yield from print_reg(dut, [3,4,5])
296 yield
297 yield from int_instr(dut, alusim, ISUB, 5, 1, 3)
298 yield from print_reg(dut, [3,4,5])
299 yield
300 for i in range(len(dut.int_insn_i)):
301 yield dut.int_insn_i[i].eq(0)
302 yield from print_reg(dut, [3,4,5])
303 yield
304 yield from print_reg(dut, [3,4,5])
305 yield
306 yield from print_reg(dut, [3,4,5])
307 yield
308
309 yield from alusim.check(dut)
310
311 for i in range(2):
312 src1 = randint(1, dut.n_regs-1)
313 src2 = randint(1, dut.n_regs-1)
314 while True:
315 dest = randint(1, dut.n_regs-1)
316 break
317 if dest not in [src1, src2]:
318 break
319 op = randint(0, 1)
320 if False:
321 if i % 2 == 0:
322 src1 = 6
323 src2 = 6
324 dest = 1
325 else:
326 src1 = 1
327 src2 = 7
328 dest = 2
329 #src1 = 2
330 #src2 = 3
331 #dest = 2
332
333 op = i
334
335 if True:
336 if i == 0:
337 src1 = 2
338 src2 = 3
339 dest = 3
340 else:
341 src1 = 5
342 src2 = 3
343 dest = 7
344
345 #op = (i+1) % 2
346 op = i
347
348 print ("random %d: %d %d %d %d\n" % (i, op, src1, src2, dest))
349 yield from int_instr(dut, alusim, op, src1, src2, dest)
350 yield from print_reg(dut, [3,4,5])
351 while True:
352 yield
353 issue_o = yield dut.issue_o
354 if issue_o:
355 yield from print_reg(dut, [3,4,5])
356 for i in range(len(dut.int_insn_i)):
357 yield dut.int_insn_i[i].eq(0)
358 break
359 print ("busy",)
360 yield from print_reg(dut, [3,4,5])
361
362
363 yield
364 yield from print_reg(dut, [3,4,5])
365 yield
366 yield from print_reg(dut, [3,4,5])
367 yield
368 yield from print_reg(dut, [3,4,5])
369 yield
370 yield from print_reg(dut, [3,4,5])
371 yield
372 yield
373 yield
374 yield
375 yield
376 yield
377 yield
378 yield
379 yield
380 yield from alusim.check(dut)
381 yield from alusim.dump(dut)
382
383
384 def explore_groups(dut):
385 from nmigen.hdl.ir import Fragment
386 from nmigen.hdl.xfrm import LHSGroupAnalyzer
387
388 fragment = dut.elaborate(platform=None)
389 fr = Fragment.get(fragment, platform=None)
390
391 groups = LHSGroupAnalyzer()(fragment._statements)
392
393 print (groups)
394
395
396 def test_scoreboard():
397 dut = Scoreboard(16, 8)
398 alusim = RegSim(16, 8)
399 vl = rtlil.convert(dut, ports=dut.ports())
400 with open("test_scoreboard.il", "w") as f:
401 f.write(vl)
402
403 run_simulation(dut, scoreboard_sim(dut, alusim),
404 vcd_name='test_scoreboard.vcd')
405
406
407 if __name__ == '__main__':
408 test_scoreboard()