ff66b91e229010a9242602f37653b56a97e5d3f9
[soc.git] / src / experiment / cscore.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fn_unit import IntFnUnit, FPFnUnit, LDFnUnit, STFnUnit
7 from scoreboard.fu_fu_matrix import FUFUDepMatrix
8 from scoreboard.fu_reg_matrix import FURegDepMatrix
9 from scoreboard.global_pending import GlobalPending
10 from scoreboard.group_picker import GroupPicker
11 from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
12
13 from compalu import ComputationUnitNoDelay
14
15 from alu_hier import ALU
16 from nmutil.latch import SRLatch
17
18 from random import randint
19
20
21 class Scoreboard(Elaboratable):
22 def __init__(self, rwid, n_regs):
23 """ Inputs:
24
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_regs: depth of register file(s) - number of FP and INT regs
27 """
28 self.rwid = rwid
29 self.n_regs = n_regs
30
31 # Register Files
32 self.intregs = RegFileArray(rwid, n_regs)
33 self.fpregs = RegFileArray(rwid, n_regs)
34
35 # inputs
36 self.int_store_i = Signal(reset_less=True) # instruction is a store
37 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
38 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
39 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
40
41 self.issue_o = Signal(reset_less=True) # instruction was accepted
42
43 def elaborate(self, platform):
44 m = Module()
45
46 m.submodules.intregs = self.intregs
47 m.submodules.fpregs = self.fpregs
48
49 # register ports
50 int_dest = self.intregs.write_port("dest")
51 int_src1 = self.intregs.read_port("src1")
52 int_src2 = self.intregs.read_port("src2")
53
54 fp_dest = self.fpregs.write_port("dest")
55 fp_src1 = self.fpregs.read_port("src1")
56 fp_src2 = self.fpregs.read_port("src2")
57
58 # Int ALUs
59 add = ALU(self.rwid)
60 sub = ALU(self.rwid)
61 m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 1, add)
62 m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 1, sub)
63 int_alus = [comp1, comp2]
64
65 m.d.comb += comp1.oper_i.eq(Const(0)) # temporary/experiment: op=add
66 m.d.comb += comp2.oper_i.eq(Const(1)) # temporary/experiment: op=sub
67
68 # Int FUs
69 if_l = []
70 int_src1_pend_v = []
71 int_src2_pend_v = []
72 int_rd_pend_v = []
73 int_wr_pend_v = []
74 for i, a in enumerate(int_alus):
75 # set up Integer Function Unit, add to module (and python list)
76 fu = IntFnUnit(self.n_regs, shadow_wid=0)
77 setattr(m.submodules, "intfu%d" % i, fu)
78 if_l.append(fu)
79 # collate the read/write pending vectors (to go into global pending)
80 int_src1_pend_v.append(fu.src1_pend_o)
81 int_src2_pend_v.append(fu.src2_pend_o)
82 int_rd_pend_v.append(fu.int_rd_pend_o)
83 int_wr_pend_v.append(fu.int_wr_pend_o)
84 int_fus = Array(if_l)
85
86 # Count of number of FUs
87 n_int_fus = len(if_l)
88 n_fp_fus = 0 # for now
89
90 n_fus = n_int_fus + n_fp_fus # plus FP FUs
91
92 # XXX replaced by array of FUs? *FnUnit
93 # # Integer FU-FU Dep Matrix
94 # m.submodules.intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
95 # Integer FU-Reg Dep Matrix
96 # intregdeps = FURegDepMatrix(self.n_regs, n_int_fus)
97 # m.submodules.intregdeps = intregdeps
98
99 # Integer Priority Picker 1: Adder + Subtractor
100 intpick1 = GroupPicker(2) # picks between add and sub
101 m.submodules.intpick1 = intpick1
102
103 # Global Pending Vectors (INT and FP)
104 # NOTE: number of vectors is NOT same as number of FUs.
105 g_int_src1_pend_v = GlobalPending(self.n_regs, int_src1_pend_v)
106 g_int_src2_pend_v = GlobalPending(self.n_regs, int_src2_pend_v)
107 g_int_rd_pend_v = GlobalPending(self.n_regs, int_rd_pend_v, True)
108 g_int_wr_pend_v = GlobalPending(self.n_regs, int_wr_pend_v, True)
109 m.submodules.g_int_src1_pend_v = g_int_src1_pend_v
110 m.submodules.g_int_src2_pend_v = g_int_src2_pend_v
111 m.submodules.g_int_rd_pend_v = g_int_rd_pend_v
112 m.submodules.g_int_wr_pend_v = g_int_wr_pend_v
113
114 # INT/FP Issue Unit
115 regdecode = RegDecode(self.n_regs)
116 m.submodules.regdecode = regdecode
117 issueunit = IntFPIssueUnit(self.n_regs, n_int_fus, n_fp_fus)
118 m.submodules.issueunit = issueunit
119
120 # FU-FU Dependency Matrices
121 intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
122 m.submodules.intfudeps = intfudeps
123
124 #---------
125 # ok start wiring things together...
126 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
127 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
128 #---------
129
130 #---------
131 # Issue Unit is where it starts. set up some in/outs for this module
132 #---------
133 m.d.comb += [issueunit.i.store_i.eq(self.int_store_i),
134 regdecode.dest_i.eq(self.int_dest_i),
135 regdecode.src1_i.eq(self.int_src1_i),
136 regdecode.src2_i.eq(self.int_src2_i),
137 regdecode.enable_i.eq(1),
138 self.issue_o.eq(issueunit.issue_o),
139 issueunit.i.dest_i.eq(regdecode.dest_o),
140 ]
141 self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
142
143 # connect global rd/wr pending vectors
144 m.d.comb += issueunit.i.g_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
145 # TODO: issueunit.f (FP)
146
147 # and int function issue / busy arrays, and dest/src1/src2
148 fn_issue_l = []
149 fn_busy_l = []
150 for i, fu in enumerate(if_l):
151 fn_issue_l.append(fu.issue_i)
152 fn_busy_l.append(fu.busy_o)
153 m.d.sync += fu.issue_i.eq(issueunit.i.fn_issue_o[i])
154 m.d.sync += fu.dest_i.eq(self.int_dest_i)
155 m.d.sync += fu.src1_i.eq(self.int_src1_i)
156 m.d.sync += fu.src2_i.eq(self.int_src2_i)
157 # XXX sync, so as to stop a simulation infinite loop
158 m.d.comb += issueunit.i.busy_i[i].eq(fu.busy_o)
159
160 #---------
161 # connect Function Units
162 #---------
163
164 # XXX sync, again to avoid an infinite loop. is it the right thing???
165
166 # Group Picker... done manually for now. TODO: cat array of pick sigs
167 m.d.comb += if_l[0].go_rd_i.eq(intpick1.go_rd_o[0]) # add rd
168 m.d.comb += if_l[0].go_wr_i.eq(intpick1.go_wr_o[0]) # add wr
169
170 m.d.comb += if_l[1].go_rd_i.eq(intpick1.go_rd_o[1]) # subtract rd
171 m.d.comb += if_l[1].go_wr_i.eq(intpick1.go_wr_o[1]) # subtract wr
172
173 # create read-pending FU-FU vectors
174 intfu_rd_pend_v = Signal(n_int_fus, reset_less = True)
175 intfu_wr_pend_v = Signal(n_int_fus, reset_less = True)
176 for i in range(n_int_fus):
177 #m.d.comb += intfu_rd_pend_v[i].eq(if_l[i].int_rd_pend_o.bool())
178 #m.d.comb += intfu_wr_pend_v[i].eq(if_l[i].int_wr_pend_o.bool())
179 m.d.comb += intfu_rd_pend_v[i].eq(if_l[i].int_readable_o)
180 m.d.comb += intfu_wr_pend_v[i].eq(if_l[i].int_writable_o)
181
182 # Connect INT Fn Unit global wr/rd pending
183 for fu in if_l:
184 m.d.comb += fu.g_int_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
185 m.d.comb += fu.g_int_rd_pend_i.eq(g_int_rd_pend_v.g_pend_o)
186
187 # Connect FU-FU Matrix, NOTE: FN Units readable/writable considered
188 # to be unit "read-pending / write-pending"
189 m.d.comb += intfudeps.rd_pend_i.eq(intfu_rd_pend_v)
190 m.d.comb += intfudeps.wr_pend_i.eq(intfu_wr_pend_v)
191 m.d.comb += intfudeps.issue_i.eq(issueunit.i.fn_issue_o)
192 for i in range(n_int_fus):
193 m.d.comb += intfudeps.go_rd_i[i].eq(intpick1.go_rd_o[i])
194 m.d.comb += intfudeps.go_wr_i[i].eq(intpick1.go_wr_o[i])
195
196 # Connect Picker (note connection to FU-FU)
197 #---------
198 readable_o = intfudeps.readable_o
199 writable_o = intfudeps.writable_o
200 m.d.comb += intpick1.rd_rel_i[0].eq(int_alus[0].rd_rel_o)
201 m.d.comb += intpick1.rd_rel_i[1].eq(int_alus[1].rd_rel_o)
202 m.d.comb += intpick1.req_rel_i[0].eq(int_alus[0].req_rel_o)
203 m.d.comb += intpick1.req_rel_i[1].eq(int_alus[1].req_rel_o)
204 m.d.comb += intpick1.readable_i[0].eq(readable_o[0]) # add rd
205 m.d.comb += intpick1.writable_i[0].eq(writable_o[0]) # add wr
206 m.d.comb += intpick1.readable_i[1].eq(readable_o[1]) # sub rd
207 m.d.comb += intpick1.writable_i[1].eq(writable_o[1]) # sub wr
208
209 #---------
210 # Connect Register File(s)
211 #---------
212 #with m.If(if_l[0].go_wr_i | if_l[1].go_wr_i):
213 m.d.sync += int_dest.wen.eq(g_int_wr_pend_v.g_pend_o)
214 #with m.If(intpick1.go_rd_o):
215 #with m.If(if_l[0].go_rd_i | if_l[1].go_rd_i):
216 m.d.sync += int_src1.ren.eq(g_int_src1_pend_v.g_pend_o)
217 m.d.sync += int_src2.ren.eq(g_int_src2_pend_v.g_pend_o)
218
219 # merge (OR) all integer FU / ALU outputs to a single value
220 # bit of a hack: treereduce needs a list with an item named "dest_o"
221 dest_o = treereduce(int_alus)
222 m.d.sync += int_dest.data_i.eq(dest_o)
223
224 # connect ALUs
225 for i, alu in enumerate(int_alus):
226 m.d.comb += alu.go_rd_i.eq(intpick1.go_rd_o[i])
227 m.d.comb += alu.go_wr_i.eq(intpick1.go_wr_o[i])
228 m.d.comb += alu.issue_i.eq(fn_issue_l[i])
229 #m.d.comb += fn_busy_l[i].eq(alu.busy_o) # XXX ignore, use fnissue
230 m.d.comb += alu.src1_i.eq(int_src1.data_o)
231 m.d.comb += alu.src2_i.eq(int_src2.data_o)
232 m.d.comb += if_l[i].req_rel_i.eq(alu.req_rel_o) # pipe out ready
233
234 return m
235
236
237 def __iter__(self):
238 yield from self.intregs
239 yield from self.fpregs
240 yield self.int_store_i
241 yield self.int_dest_i
242 yield self.int_src1_i
243 yield self.int_src2_i
244 yield self.issue_o
245 #yield from self.int_src1
246 #yield from self.int_dest
247 #yield from self.int_src1
248 #yield from self.int_src2
249 #yield from self.fp_dest
250 #yield from self.fp_src1
251 #yield from self.fp_src2
252
253 def ports(self):
254 return list(self)
255
256 IADD = 0
257 ISUB = 1
258
259 class RegSim:
260 def __init__(self, rwidth, nregs):
261 self.rwidth = rwidth
262 self.regs = [0] * nregs
263
264 def op(self, op, src1, src2, dest):
265 src1 = self.regs[src1]
266 src2 = self.regs[src2]
267 if op == IADD:
268 val = (src1 + src2) & ((1<<(self.rwidth))-1)
269 elif op == ISUB:
270 val = (src1 - src2) & ((1<<(self.rwidth))-1)
271 self.regs[dest] = val
272
273 def setval(self, dest, val):
274 self.regs[dest] = val
275
276 def dump(self, dut):
277 for i, val in enumerate(self.regs):
278 reg = yield dut.intregs.regs[i].reg
279 okstr = "OK" if reg == val else "!ok"
280 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
281
282 def check(self, dut):
283 for i, val in enumerate(self.regs):
284 reg = yield dut.intregs.regs[i].reg
285 if reg != val:
286 print("reg %d expected %x received %x\n" % (i, val, reg))
287 yield from self.dump(dut)
288 assert False
289
290 def int_instr(dut, alusim, op, src1, src2, dest):
291 for i in range(len(dut.int_insn_i)):
292 yield dut.int_insn_i[i].eq(0)
293 yield dut.int_dest_i.eq(dest)
294 yield dut.int_src1_i.eq(src1)
295 yield dut.int_src2_i.eq(src2)
296 yield dut.int_insn_i[op].eq(1)
297 alusim.op(op, src1, src2, dest)
298
299
300 def print_reg(dut, rnums):
301 rs = []
302 for rnum in rnums:
303 reg = yield dut.intregs.regs[rnum].reg
304 rs.append("%x" % reg)
305 rnums = map(str, rnums)
306 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
307
308
309 def scoreboard_sim(dut, alusim):
310 yield dut.int_store_i.eq(0)
311
312 for i in range(1, dut.n_regs):
313 yield dut.intregs.regs[i].reg.eq(i)
314 alusim.setval(i, i)
315
316 if False:
317 yield from int_instr(dut, alusim, IADD, 4, 3, 5)
318 yield from print_reg(dut, [3,4,5])
319 yield
320 yield from int_instr(dut, alusim, IADD, 5, 2, 5)
321 yield from print_reg(dut, [3,4,5])
322 yield
323 yield from int_instr(dut, alusim, ISUB, 5, 1, 3)
324 yield from print_reg(dut, [3,4,5])
325 yield
326 for i in range(len(dut.int_insn_i)):
327 yield dut.int_insn_i[i].eq(0)
328 yield from print_reg(dut, [3,4,5])
329 yield
330 yield from print_reg(dut, [3,4,5])
331 yield
332 yield from print_reg(dut, [3,4,5])
333 yield
334
335 yield from alusim.check(dut)
336
337 for i in range(2):
338 src1 = randint(1, dut.n_regs-1)
339 src2 = randint(1, dut.n_regs-1)
340 while True:
341 dest = randint(1, dut.n_regs-1)
342 break
343 if dest not in [src1, src2]:
344 break
345 op = randint(0, 1)
346 if False:
347 if i % 2 == 0:
348 src1 = 6
349 src2 = 6
350 dest = 1
351 else:
352 src1 = 1
353 src2 = 7
354 dest = 2
355 #src1 = 2
356 #src2 = 3
357 #dest = 2
358
359 op = i
360
361 if True:
362 if i == 0:
363 src1 = 2
364 src2 = 3
365 dest = 3
366 else:
367 src1 = 5
368 src2 = 3
369 dest = 4
370
371 #op = (i+1) % 2
372 op = i
373
374 print ("random %d: %d %d %d %d\n" % (i, op, src1, src2, dest))
375 yield from int_instr(dut, alusim, op, src1, src2, dest)
376 yield from print_reg(dut, [3,4,5])
377 while True:
378 yield
379 issue_o = yield dut.issue_o
380 if issue_o:
381 yield from print_reg(dut, [3,4,5])
382 for i in range(len(dut.int_insn_i)):
383 yield dut.int_insn_i[i].eq(0)
384 break
385 print ("busy",)
386 yield from print_reg(dut, [3,4,5])
387 yield
388 yield
389 yield
390
391
392 yield
393 yield from print_reg(dut, [3,4,5])
394 yield
395 yield from print_reg(dut, [3,4,5])
396 yield
397 yield from print_reg(dut, [3,4,5])
398 yield
399 yield from print_reg(dut, [3,4,5])
400 yield
401 yield
402 yield
403 yield
404 yield
405 yield
406 yield
407 yield
408 yield
409 yield from alusim.check(dut)
410 yield from alusim.dump(dut)
411
412
413 def explore_groups(dut):
414 from nmigen.hdl.ir import Fragment
415 from nmigen.hdl.xfrm import LHSGroupAnalyzer
416
417 fragment = dut.elaborate(platform=None)
418 fr = Fragment.get(fragment, platform=None)
419
420 groups = LHSGroupAnalyzer()(fragment._statements)
421
422 print (groups)
423
424
425 def test_scoreboard():
426 dut = Scoreboard(16, 8)
427 alusim = RegSim(16, 8)
428 vl = rtlil.convert(dut, ports=dut.ports())
429 with open("test_scoreboard.il", "w") as f:
430 f.write(vl)
431
432 run_simulation(dut, scoreboard_sim(dut, alusim),
433 vcd_name='test_scoreboard.vcd')
434
435
436 if __name__ == '__main__':
437 test_scoreboard()