add in go_rd
[soc.git] / src / experiment / cscore.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fn_unit import IntFnUnit, FPFnUnit, LDFnUnit, STFnUnit
7 from scoreboard.fu_fu_matrix import FUFUDepMatrix
8 from scoreboard.fu_reg_matrix import FURegDepMatrix
9 from scoreboard.global_pending import GlobalPending
10 from scoreboard.group_picker import GroupPicker
11 from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
12
13 from compalu import ComputationUnitNoDelay
14
15 from alu_hier import ALU
16 from nmutil.latch import SRLatch
17
18 from random import randint
19
20
21 class Scoreboard(Elaboratable):
22 def __init__(self, rwid, n_regs):
23 """ Inputs:
24
25 * :rwid: bit width of register file(s) - both FP and INT
26 * :n_regs: depth of register file(s) - number of FP and INT regs
27 """
28 self.rwid = rwid
29 self.n_regs = n_regs
30
31 # Register Files
32 self.intregs = RegFileArray(rwid, n_regs)
33 self.fpregs = RegFileArray(rwid, n_regs)
34
35 # inputs
36 self.int_store_i = Signal(reset_less=True) # instruction is a store
37 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
38 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
39 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
40
41 self.issue_o = Signal(reset_less=True) # instruction was accepted
42
43 def elaborate(self, platform):
44 m = Module()
45
46 m.submodules.intregs = self.intregs
47 m.submodules.fpregs = self.fpregs
48
49 # register ports
50 int_dest = self.intregs.write_port("dest")
51 int_src1 = self.intregs.read_port("src1")
52 int_src2 = self.intregs.read_port("src2")
53
54 fp_dest = self.fpregs.write_port("dest")
55 fp_src1 = self.fpregs.read_port("src1")
56 fp_src2 = self.fpregs.read_port("src2")
57
58 # Int ALUs
59 add = ALU(self.rwid)
60 sub = ALU(self.rwid)
61 m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 1, add)
62 m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 1, sub)
63 int_alus = [comp1, comp2]
64
65 m.d.comb += comp1.oper_i.eq(Const(0)) # temporary/experiment: op=add
66 m.d.comb += comp2.oper_i.eq(Const(1)) # temporary/experiment: op=sub
67
68 # Int FUs
69 if_l = []
70 int_src1_pend_v = []
71 int_src2_pend_v = []
72 int_rd_pend_v = []
73 int_wr_pend_v = []
74 for i, a in enumerate(int_alus):
75 # set up Integer Function Unit, add to module (and python list)
76 fu = IntFnUnit(self.n_regs, shadow_wid=0)
77 setattr(m.submodules, "intfu%d" % i, fu)
78 if_l.append(fu)
79 # collate the read/write pending vectors (to go into global pending)
80 int_src1_pend_v.append(fu.src1_pend_o)
81 int_src2_pend_v.append(fu.src2_pend_o)
82 int_rd_pend_v.append(fu.int_rd_pend_o)
83 int_wr_pend_v.append(fu.int_wr_pend_o)
84 int_fus = Array(if_l)
85
86 # Count of number of FUs
87 n_int_fus = len(if_l)
88 n_fp_fus = 0 # for now
89
90 n_fus = n_int_fus + n_fp_fus # plus FP FUs
91
92 # XXX replaced by array of FUs? *FnUnit
93 # # Integer FU-FU Dep Matrix
94 # m.submodules.intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
95 # Integer FU-Reg Dep Matrix
96 # intregdeps = FURegDepMatrix(self.n_regs, n_int_fus)
97 # m.submodules.intregdeps = intregdeps
98
99 # Integer Priority Picker 1: Adder + Subtractor
100 intpick1 = GroupPicker(2) # picks between add and sub
101 m.submodules.intpick1 = intpick1
102
103 # Global Pending Vectors (INT and FP)
104 # NOTE: number of vectors is NOT same as number of FUs.
105 g_int_src1_pend_v = GlobalPending(self.n_regs, int_src1_pend_v)
106 g_int_src2_pend_v = GlobalPending(self.n_regs, int_src2_pend_v)
107 g_int_rd_pend_v = GlobalPending(self.n_regs, int_rd_pend_v, True)
108 g_int_wr_pend_v = GlobalPending(self.n_regs, int_wr_pend_v, True)
109 m.submodules.g_int_src1_pend_v = g_int_src1_pend_v
110 m.submodules.g_int_src2_pend_v = g_int_src2_pend_v
111 m.submodules.g_int_rd_pend_v = g_int_rd_pend_v
112 m.submodules.g_int_wr_pend_v = g_int_wr_pend_v
113
114 # INT/FP Issue Unit
115 regdecode = RegDecode(self.n_regs)
116 m.submodules.regdecode = regdecode
117 issueunit = IntFPIssueUnit(self.n_regs, n_int_fus, n_fp_fus)
118 m.submodules.issueunit = issueunit
119
120 #---------
121 # ok start wiring things together...
122 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
123 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
124 #---------
125
126 #---------
127 # Issue Unit is where it starts. set up some in/outs for this module
128 #---------
129 m.d.comb += [issueunit.i.store_i.eq(self.int_store_i),
130 regdecode.dest_i.eq(self.int_dest_i),
131 regdecode.src1_i.eq(self.int_src1_i),
132 regdecode.src2_i.eq(self.int_src2_i),
133 regdecode.enable_i.eq(1),
134 self.issue_o.eq(issueunit.issue_o)
135 ]
136 m.d.sync += issueunit.i.dest_i.eq(regdecode.dest_o),
137 self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
138
139 # connect global rd/wr pending vectors
140 m.d.comb += issueunit.i.g_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
141 # TODO: issueunit.f (FP)
142
143 # and int function issue / busy arrays, and dest/src1/src2
144 fn_issue_l = []
145 fn_busy_l = []
146 for i, fu in enumerate(if_l):
147 fn_issue_l.append(fu.issue_i)
148 fn_busy_l.append(fu.busy_o)
149 m.d.sync += fu.issue_i.eq(issueunit.i.fn_issue_o[i])
150 m.d.sync += fu.dest_i.eq(self.int_dest_i)
151 m.d.sync += fu.src1_i.eq(self.int_src1_i)
152 m.d.sync += fu.src2_i.eq(self.int_src2_i)
153 # XXX sync, so as to stop a simulation infinite loop
154 m.d.comb += issueunit.i.busy_i[i].eq(fu.busy_o)
155
156 #---------
157 # connect Function Units
158 #---------
159
160 # XXX sync, again to avoid an infinite loop. is it the right thing???
161
162 # Group Picker... done manually for now. TODO: cat array of pick sigs
163 m.d.sync += if_l[0].go_rd_i.eq(intpick1.go_rd_o[0]) # add rd
164 m.d.sync += if_l[0].go_wr_i.eq(intpick1.go_wr_o[0]) # add wr
165
166 m.d.sync += if_l[1].go_rd_i.eq(intpick1.go_rd_o[1]) # subtract rd
167 m.d.sync += if_l[1].go_wr_i.eq(intpick1.go_wr_o[1]) # subtract wr
168
169 # Connect INT Fn Unit global wr/rd pending
170 for fu in if_l:
171 m.d.comb += fu.g_int_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
172 m.d.comb += fu.g_int_rd_pend_i.eq(g_int_rd_pend_v.g_pend_o)
173
174 # Connect Picker
175 #---------
176 m.d.comb += intpick1.go_rd_i[0].eq(~if_l[0].go_rd_i)
177 m.d.comb += intpick1.go_rd_i[1].eq(~if_l[1].go_rd_i)
178 m.d.comb += intpick1.req_rel_i[0].eq(int_alus[0].req_rel_o)
179 m.d.comb += intpick1.req_rel_i[1].eq(int_alus[1].req_rel_o)
180 m.d.comb += intpick1.readable_i[0].eq(if_l[0].int_readable_o) # add rd
181 m.d.comb += intpick1.writable_i[0].eq(if_l[0].int_writable_o) # add wr
182 m.d.comb += intpick1.readable_i[1].eq(if_l[1].int_readable_o) # sub rd
183 m.d.comb += intpick1.writable_i[1].eq(if_l[1].int_writable_o) # sub wr
184
185 #---------
186 # Connect Register File(s)
187 #---------
188 with m.If(if_l[0].go_wr_i | if_l[1].go_wr_i):
189 m.d.comb += int_dest.wen.eq(g_int_wr_pend_v.g_pend_o)
190 #with m.If(intpick1.go_rd_o):
191 #with m.If(if_l[0].go_rd_i | if_l[1].go_rd_i):
192 m.d.comb += int_src1.ren.eq(g_int_src1_pend_v.g_pend_o)
193 m.d.comb += int_src2.ren.eq(g_int_src2_pend_v.g_pend_o)
194
195 # merge (OR) all integer FU / ALU outputs to a single value
196 # bit of a hack: treereduce needs a list with an item named "dest_o"
197 dest_o = treereduce(int_alus)
198 m.d.comb += int_dest.data_i.eq(dest_o)
199
200 # connect ALUs
201 for i, alu in enumerate(int_alus):
202 m.d.sync += alu.go_rd_i.eq(intpick1.go_rd_o[i])
203 m.d.sync += alu.go_wr_i.eq(intpick1.go_wr_o[i])
204 m.d.comb += alu.issue_i.eq(fn_issue_l[i])
205 #m.d.comb += fn_busy_l[i].eq(alu.busy_o) # XXX ignore, use fnissue
206 m.d.comb += alu.src1_i.eq(int_src1.data_o)
207 m.d.comb += alu.src2_i.eq(int_src2.data_o)
208 m.d.comb += if_l[i].req_rel_i.eq(alu.req_rel_o) # pipe out ready
209
210 return m
211
212
213 def __iter__(self):
214 yield from self.intregs
215 yield from self.fpregs
216 yield self.int_store_i
217 yield self.int_dest_i
218 yield self.int_src1_i
219 yield self.int_src2_i
220 yield self.issue_o
221 #yield from self.int_src1
222 #yield from self.int_dest
223 #yield from self.int_src1
224 #yield from self.int_src2
225 #yield from self.fp_dest
226 #yield from self.fp_src1
227 #yield from self.fp_src2
228
229 def ports(self):
230 return list(self)
231
232 IADD = 0
233 ISUB = 1
234
235 class RegSim:
236 def __init__(self, rwidth, nregs):
237 self.rwidth = rwidth
238 self.regs = [0] * nregs
239
240 def op(self, op, src1, src2, dest):
241 src1 = self.regs[src1]
242 src2 = self.regs[src2]
243 if op == IADD:
244 val = (src1 + src2) & ((1<<(self.rwidth))-1)
245 elif op == ISUB:
246 val = (src1 - src2) & ((1<<(self.rwidth))-1)
247 self.regs[dest] = val
248
249 def setval(self, dest, val):
250 self.regs[dest] = val
251
252 def dump(self, dut):
253 for i, val in enumerate(self.regs):
254 reg = yield dut.intregs.regs[i].reg
255 okstr = "OK" if reg == val else "!ok"
256 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
257
258 def check(self, dut):
259 for i, val in enumerate(self.regs):
260 reg = yield dut.intregs.regs[i].reg
261 if reg != val:
262 print("reg %d expected %x received %x\n" % (i, val, reg))
263 yield from self.dump(dut)
264 assert False
265
266 def int_instr(dut, alusim, op, src1, src2, dest):
267 for i in range(len(dut.int_insn_i)):
268 yield dut.int_insn_i[i].eq(0)
269 yield dut.int_dest_i.eq(dest)
270 yield dut.int_src1_i.eq(src1)
271 yield dut.int_src2_i.eq(src2)
272 yield dut.int_insn_i[op].eq(1)
273 alusim.op(op, src1, src2, dest)
274
275
276 def print_reg(dut, rnums):
277 rs = []
278 for rnum in rnums:
279 reg = yield dut.intregs.regs[rnum].reg
280 rs.append("%x" % reg)
281 rnums = map(str, rnums)
282 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
283
284
285 def scoreboard_sim(dut, alusim):
286 yield dut.int_store_i.eq(0)
287
288 for i in range(1, dut.n_regs):
289 yield dut.intregs.regs[i].reg.eq(i)
290 alusim.setval(i, i)
291
292 if False:
293 yield from int_instr(dut, alusim, IADD, 4, 3, 5)
294 yield from print_reg(dut, [3,4,5])
295 yield
296 yield from int_instr(dut, alusim, IADD, 5, 2, 5)
297 yield from print_reg(dut, [3,4,5])
298 yield
299 yield from int_instr(dut, alusim, ISUB, 5, 1, 3)
300 yield from print_reg(dut, [3,4,5])
301 yield
302 for i in range(len(dut.int_insn_i)):
303 yield dut.int_insn_i[i].eq(0)
304 yield from print_reg(dut, [3,4,5])
305 yield
306 yield from print_reg(dut, [3,4,5])
307 yield
308 yield from print_reg(dut, [3,4,5])
309 yield
310
311 yield from alusim.check(dut)
312
313 for i in range(2):
314 src1 = randint(1, dut.n_regs-1)
315 src2 = randint(1, dut.n_regs-1)
316 while True:
317 dest = randint(1, dut.n_regs-1)
318 break
319 if dest not in [src1, src2]:
320 break
321 op = randint(0, 1)
322 if False:
323 if i % 2 == 0:
324 src1 = 6
325 src2 = 6
326 dest = 1
327 else:
328 src1 = 1
329 src2 = 7
330 dest = 2
331 #src1 = 2
332 #src2 = 3
333 #dest = 2
334
335 op = i
336
337 if True:
338 if i == 0:
339 src1 = 2
340 src2 = 3
341 dest = 3
342 else:
343 src1 = 5
344 src2 = 3
345 dest = 7
346
347 #op = (i+1) % 2
348 op = i
349
350 print ("random %d: %d %d %d %d\n" % (i, op, src1, src2, dest))
351 yield from int_instr(dut, alusim, op, src1, src2, dest)
352 yield from print_reg(dut, [3,4,5])
353 while True:
354 yield
355 issue_o = yield dut.issue_o
356 if issue_o:
357 yield from print_reg(dut, [3,4,5])
358 for i in range(len(dut.int_insn_i)):
359 yield dut.int_insn_i[i].eq(0)
360 break
361 print ("busy",)
362 yield from print_reg(dut, [3,4,5])
363
364
365 yield
366 yield from print_reg(dut, [3,4,5])
367 yield
368 yield from print_reg(dut, [3,4,5])
369 yield
370 yield from print_reg(dut, [3,4,5])
371 yield
372 yield from print_reg(dut, [3,4,5])
373 yield
374 yield
375 yield
376 yield
377 yield
378 yield
379 yield
380 yield
381 yield
382 yield from alusim.check(dut)
383 yield from alusim.dump(dut)
384
385
386 def explore_groups(dut):
387 from nmigen.hdl.ir import Fragment
388 from nmigen.hdl.xfrm import LHSGroupAnalyzer
389
390 fragment = dut.elaborate(platform=None)
391 fr = Fragment.get(fragment, platform=None)
392
393 groups = LHSGroupAnalyzer()(fragment._statements)
394
395 print (groups)
396
397
398 def test_scoreboard():
399 dut = Scoreboard(16, 8)
400 alusim = RegSim(16, 8)
401 vl = rtlil.convert(dut, ports=dut.ports())
402 with open("test_scoreboard.il", "w") as f:
403 f.write(vl)
404
405 run_simulation(dut, scoreboard_sim(dut, alusim),
406 vcd_name='test_scoreboard.vcd')
407
408
409 if __name__ == '__main__':
410 test_scoreboard()