start putting LDSTSplitter together
[soc.git] / src / soc / scoreboard / test_mem2_fu_matrix.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from soc.scoreboard.global_pending import GlobalPending
7 from soc.scoreboard.group_picker import GroupPicker
8 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
9 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
10 from soc.scoreboard.memfu import MemFunctionUnits
11 from nmutil.latch import SRLatch
12 from nmutil.nmoperator import eq
13
14 from random import randint, seed
15 from copy import deepcopy
16 from math import log
17
18
19 class Memory(Elaboratable):
20 def __init__(self, regwid, addrw):
21 self.ddepth = regwid/8
22 depth = (1<<addrw) / self.ddepth
23 self.adr = Signal(addrw)
24 self.dat_r = Signal(regwid)
25 self.dat_w = Signal(regwid)
26 self.we = Signal()
27 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
28
29 def elaborate(self, platform):
30 m = Module()
31 m.submodules.rdport = rdport = self.mem.read_port()
32 m.submodules.wrport = wrport = self.mem.write_port()
33 m.d.comb += [
34 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
35 self.dat_r.eq(rdport.data),
36 wrport.addr.eq(self.adr),
37 wrport.data.eq(self.dat_w),
38 wrport.en.eq(self.we),
39 ]
40 return m
41
42
43 class MemSim:
44 def __init__(self, regwid, addrw):
45 self.regwid = regwid
46 self.ddepth = regwid//8
47 depth = (1<<addrw) // self.ddepth
48 self.mem = list(range(0, depth))
49
50 def ld(self, addr):
51 return self.mem[addr>>self.ddepth]
52
53 def st(self, addr, data):
54 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
55
56
57 class Scoreboard(Elaboratable):
58 def __init__(self, rwid, n_regs):
59 """ Inputs:
60
61 * :rwid: bit width of register file(s) - both FP and INT
62 * :n_regs: depth of register file(s) - number of FP and INT regs
63 """
64 self.rwid = rwid
65 self.n_regs = n_regs
66
67 # Register Files
68 self.intregs = RegFileArray(rwid, n_regs)
69 self.fpregs = RegFileArray(rwid, n_regs)
70
71 # issue q needs to get at these
72 self.aluissue = IssueUnitGroup(4)
73 self.brissue = IssueUnitGroup(1)
74 # and these
75 self.alu_oper_i = Signal(4, reset_less=True)
76 self.alu_imm_i = Signal(rwid, reset_less=True)
77 self.br_oper_i = Signal(4, reset_less=True)
78 self.br_imm_i = Signal(rwid, reset_less=True)
79
80 # inputs
81 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
82 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
83 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
84 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
85
86 # outputs
87 self.issue_o = Signal(reset_less=True) # instruction was accepted
88 self.busy_o = Signal(reset_less=True) # at least one CU is busy
89
90 # for branch speculation experiment. branch_direction = 0 if
91 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
92 # branch_succ and branch_fail are requests to have the current
93 # instruction be dependent on the branch unit "shadow" capability.
94 self.branch_succ_i = Signal(reset_less=True)
95 self.branch_fail_i = Signal(reset_less=True)
96 self.branch_direction_o = Signal(2, reset_less=True)
97
98 def elaborate(self, platform):
99 m = Module()
100 comb = m.d.comb
101 sync = m.d.sync
102
103 m.submodules.intregs = self.intregs
104 m.submodules.fpregs = self.fpregs
105
106 # register ports
107 int_dest = self.intregs.write_port("dest")
108 int_src1 = self.intregs.read_port("src1")
109 int_src2 = self.intregs.read_port("src2")
110
111 fp_dest = self.fpregs.write_port("dest")
112 fp_src1 = self.fpregs.read_port("src1")
113 fp_src2 = self.fpregs.read_port("src2")
114
115 # Int ALUs and Comp Units
116 n_int_alus = 5
117 cua = CompUnitALUs(self.rwid, 3)
118 cub = CompUnitBR(self.rwid, 3)
119 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
120 bgt = cub.bgt # get at the branch computation unit
121 br1 = cub.br1
122
123 # Int FUs
124 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
125
126 # Count of number of FUs
127 n_intfus = n_int_alus
128 n_fp_fus = 0 # for now
129
130 # Integer Priority Picker 1: Adder + Subtractor
131 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
132 m.submodules.intpick1 = intpick1
133
134 # INT/FP Issue Unit
135 regdecode = RegDecode(self.n_regs)
136 m.submodules.regdecode = regdecode
137 issueunit = IssueUnitArray([self.aluissue, self.brissue])
138 m.submodules.issueunit = issueunit
139
140 # Shadow Matrix. currently n_intfus shadows, to be used for
141 # write-after-write hazards. NOTE: there is one extra for branches,
142 # so the shadow width is increased by 1
143 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
144 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
145
146 # record previous instruction to cast shadow on current instruction
147 prev_shadow = Signal(n_intfus)
148
149 # Branch Speculation recorder. tracks the success/fail state as
150 # each instruction is issued, so that when the branch occurs the
151 # allow/cancel can be issued as appropriate.
152 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
153
154 #---------
155 # ok start wiring things together...
156 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
157 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
158 #---------
159
160 #---------
161 # Issue Unit is where it starts. set up some in/outs for this module
162 #---------
163 comb += [ regdecode.dest_i.eq(self.int_dest_i),
164 regdecode.src1_i.eq(self.int_src1_i),
165 regdecode.src2_i.eq(self.int_src2_i),
166 regdecode.enable_i.eq(self.reg_enable_i),
167 self.issue_o.eq(issueunit.issue_o)
168 ]
169
170 # take these to outside (issue needs them)
171 comb += cua.oper_i.eq(self.alu_oper_i)
172 comb += cua.imm_i.eq(self.alu_imm_i)
173 comb += cub.oper_i.eq(self.br_oper_i)
174 comb += cub.imm_i.eq(self.br_imm_i)
175
176 # TODO: issueunit.f (FP)
177
178 # and int function issue / busy arrays, and dest/src1/src2
179 comb += intfus.dest_i.eq(regdecode.dest_o)
180 comb += intfus.src1_i.eq(regdecode.src1_o)
181 comb += intfus.src2_i.eq(regdecode.src2_o)
182
183 fn_issue_o = issueunit.fn_issue_o
184
185 comb += intfus.fn_issue_i.eq(fn_issue_o)
186 comb += issueunit.busy_i.eq(cu.busy_o)
187 comb += self.busy_o.eq(cu.busy_o.bool())
188
189 #---------
190 # merge shadow matrices outputs
191 #---------
192
193 # these are explained in ShadowMatrix docstring, and are to be
194 # connected to the FUReg and FUFU Matrices, to get them to reset
195 anydie = Signal(n_intfus, reset_less=True)
196 allshadown = Signal(n_intfus, reset_less=True)
197 shreset = Signal(n_intfus, reset_less=True)
198 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
199 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
200 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
201
202 #---------
203 # connect fu-fu matrix
204 #---------
205
206 # Group Picker... done manually for now.
207 go_rd_o = intpick1.go_rd_o
208 go_wr_o = intpick1.go_wr_o
209 go_rd_i = intfus.go_rd_i
210 go_wr_i = intfus.go_wr_i
211 go_die_i = intfus.go_die_i
212 # NOTE: connect to the shadowed versions so that they can "die" (reset)
213 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
214 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
215 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
216
217 # Connect Picker
218 #---------
219 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
220 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
221 int_rd_o = intfus.readable_o
222 int_wr_o = intfus.writable_o
223 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
224 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
225
226 #---------
227 # Shadow Matrix
228 #---------
229
230 comb += shadows.issue_i.eq(fn_issue_o)
231 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
232 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
233 #---------
234 # NOTE; this setup is for the instruction order preservation...
235
236 # connect shadows / go_dies to Computation Units
237 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
238 comb += cu.go_die_i[0:n_intfus].eq(anydie)
239
240 # ok connect first n_int_fu shadows to busy lines, to create an
241 # instruction-order linked-list-like arrangement, using a bit-matrix
242 # (instead of e.g. a ring buffer).
243 # XXX TODO
244
245 # when written, the shadow can be cancelled (and was good)
246 for i in range(n_intfus):
247 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
248
249 # *previous* instruction shadows *current* instruction, and, obviously,
250 # if the previous is completed (!busy) don't cast the shadow!
251 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
252 for i in range(n_intfus):
253 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
254
255 #---------
256 # ... and this is for branch speculation. it uses the extra bit
257 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
258 # only needs to set shadow_i, s_fail_i and s_good_i
259
260 # issue captures shadow_i (if enabled)
261 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
262
263 bactive = Signal(reset_less=True)
264 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
265
266 # instruction being issued (fn_issue_o) has a shadow cast by the branch
267 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
268 comb += bshadow.issue_i.eq(fn_issue_o)
269 for i in range(n_intfus):
270 with m.If(fn_issue_o & (Const(1<<i))):
271 comb += bshadow.shadow_i[i][0].eq(1)
272
273 # finally, we need an indicator to the test infrastructure as to
274 # whether the branch succeeded or failed, plus, link up to the
275 # "recorder" of whether the instruction was under shadow or not
276
277 with m.If(br1.issue_i):
278 sync += bspec.active_i.eq(1)
279 with m.If(self.branch_succ_i):
280 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
281 with m.If(self.branch_fail_i):
282 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
283
284 # branch is active (TODO: a better signal: this is over-using the
285 # go_write signal - actually the branch should not be "writing")
286 with m.If(br1.go_wr_i):
287 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
288 sync += bspec.active_i.eq(0)
289 comb += bspec.br_i.eq(1)
290 # branch occurs if data == 1, failed if data == 0
291 comb += bspec.br_ok_i.eq(br1.data_o == 1)
292 for i in range(n_intfus):
293 # *expected* direction of the branch matched against *actual*
294 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
295 # ... or it didn't
296 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
297
298 #---------
299 # Connect Register File(s)
300 #---------
301 comb += int_dest.wen.eq(intfus.dest_rsel_o)
302 comb += int_src1.ren.eq(intfus.src1_rsel_o)
303 comb += int_src2.ren.eq(intfus.src2_rsel_o)
304
305 # connect ALUs to regfule
306 comb += int_dest.data_i.eq(cu.data_o)
307 comb += cu.src1_i.eq(int_src1.data_o)
308 comb += cu.src2_i.eq(int_src2.data_o)
309
310 # connect ALU Computation Units
311 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
312 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
313 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
314
315 return m
316
317 def __iter__(self):
318 yield from self.intregs
319 yield from self.fpregs
320 yield self.int_dest_i
321 yield self.int_src1_i
322 yield self.int_src2_i
323 yield self.issue_o
324 yield self.branch_succ_i
325 yield self.branch_fail_i
326 yield self.branch_direction_o
327
328 def ports(self):
329 return list(self)
330
331
332
333
334 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
335 yield from disable_issue(dut)
336 yield dut.int_dest_i.eq(dest)
337 yield dut.int_src1_i.eq(src1)
338 yield dut.int_src2_i.eq(src2)
339 if (op & (0x3<<2)) != 0: # branch
340 yield dut.brissue.insn_i.eq(1)
341 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
342 yield dut.br_imm_i.eq(imm)
343 dut_issue = dut.brissue
344 else:
345 yield dut.aluissue.insn_i.eq(1)
346 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
347 yield dut.alu_imm_i.eq(imm)
348 dut_issue = dut.aluissue
349 yield dut.reg_enable_i.eq(1)
350
351 # these indicate that the instruction is to be made shadow-dependent on
352 # (either) branch success or branch fail
353 yield dut.branch_fail_i.eq(branch_fail)
354 yield dut.branch_succ_i.eq(branch_success)
355
356 yield
357 yield from wait_for_issue(dut, dut_issue)
358
359
360 def print_reg(dut, rnums):
361 rs = []
362 for rnum in rnums:
363 reg = yield dut.intregs.regs[rnum].reg
364 rs.append("%x" % reg)
365 rnums = map(str, rnums)
366 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
367
368
369 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
370 insts = []
371 for i in range(n_ops):
372 src1 = randint(1, dut.n_regs-1)
373 src2 = randint(1, dut.n_regs-1)
374 imm = randint(1, (1<<dut.rwid)-1)
375 dest = randint(1, dut.n_regs-1)
376 op = randint(0, max_opnums)
377 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
378
379 if shadowing:
380 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
381 else:
382 insts.append((src1, src2, dest, op, opi, imm))
383 return insts
384
385
386
387 def scoreboard_sim(dut, alusim):
388
389 seed(0)
390
391 for i in range(50):
392
393 # set random values in the registers
394 for i in range(1, dut.n_regs):
395 val = randint(0, (1<<alusim.rwidth)-1)
396 #val = 31+i*3
397 #val = i
398 yield dut.intregs.regs[i].reg.eq(val)
399 alusim.setval(i, val)
400
401 # create some instructions (some random, some regression tests)
402 instrs = []
403 if True:
404 instrs = create_random_ops(dut, 15, True, 4)
405
406 if False:
407 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
408
409 if False:
410 instrs.append( (7, 3, 2, 4, (0, 0)) )
411 instrs.append( (7, 6, 6, 2, (0, 0)) )
412 instrs.append( (1, 7, 2, 2, (0, 0)) )
413
414 if False:
415 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
416 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
417 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
418 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
419 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
420
421 if False:
422 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
423 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
424 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
425
426 if False:
427 instrs.append((5, 6, 2, 1))
428 instrs.append((2, 2, 4, 0))
429 #instrs.append((2, 2, 3, 1))
430
431 if False:
432 instrs.append((2, 1, 2, 3))
433
434 if False:
435 instrs.append((2, 6, 2, 1))
436 instrs.append((2, 1, 2, 0))
437
438 if False:
439 instrs.append((1, 2, 7, 2))
440 instrs.append((7, 1, 5, 0))
441 instrs.append((4, 4, 1, 1))
442
443 if False:
444 instrs.append((5, 6, 2, 2))
445 instrs.append((1, 1, 4, 1))
446 instrs.append((6, 5, 3, 0))
447
448 if False:
449 # Write-after-Write Hazard
450 instrs.append( (3, 6, 7, 2) )
451 instrs.append( (4, 4, 7, 1) )
452
453 if False:
454 # self-read/write-after-write followed by Read-after-Write
455 instrs.append((1, 1, 1, 1))
456 instrs.append((1, 5, 3, 0))
457
458 if False:
459 # Read-after-Write followed by self-read-after-write
460 instrs.append((5, 6, 1, 2))
461 instrs.append((1, 1, 1, 1))
462
463 if False:
464 # self-read-write sandwich
465 instrs.append((5, 6, 1, 2))
466 instrs.append((1, 1, 1, 1))
467 instrs.append((1, 5, 3, 0))
468
469 if False:
470 # very weird failure
471 instrs.append( (5, 2, 5, 2) )
472 instrs.append( (2, 6, 3, 0) )
473 instrs.append( (4, 2, 2, 1) )
474
475 if False:
476 v1 = 4
477 yield dut.intregs.regs[5].reg.eq(v1)
478 alusim.setval(5, v1)
479 yield dut.intregs.regs[3].reg.eq(5)
480 alusim.setval(3, 5)
481 instrs.append((5, 3, 3, 4, (0, 0)))
482 instrs.append((4, 2, 1, 2, (0, 1)))
483
484 if False:
485 v1 = 6
486 yield dut.intregs.regs[5].reg.eq(v1)
487 alusim.setval(5, v1)
488 yield dut.intregs.regs[3].reg.eq(5)
489 alusim.setval(3, 5)
490 instrs.append((5, 3, 3, 4, (0, 0)))
491 instrs.append((4, 2, 1, 2, (1, 0)))
492
493 if False:
494 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
495 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
496 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
497 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
498 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
499 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
500 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
501 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
502 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
503
504 # issue instruction(s), wait for issue to be free before proceeding
505 for i, instr in enumerate(instrs):
506 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
507
508 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
509 (i, src1, src2, dest, op, opi, imm))
510 alusim.op(op, opi, imm, src1, src2, dest)
511 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
512 br_ok, br_fail)
513
514 # wait for all instructions to stop before checking
515 while True:
516 iqlen = yield dut.qlen_o
517 if iqlen == 0:
518 break
519 yield
520 yield
521 yield
522 yield
523 yield
524 yield from wait_for_busy_clear(dut)
525
526 # check status
527 yield from alusim.check(dut)
528 yield from alusim.dump(dut)
529
530
531 def test_scoreboard():
532 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
533 alusim = RegSim(16, 8)
534 memsim = MemSim(16, 16)
535 vl = rtlil.convert(dut, ports=dut.ports())
536 with open("test_scoreboard6600.il", "w") as f:
537 f.write(vl)
538
539 run_simulation(dut, scoreboard_sim(dut, alusim),
540 vcd_name='test_scoreboard6600.vcd')
541
542 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
543 # vcd_name='test_scoreboard6600.vcd')
544
545
546 def mem_sim(dut):
547 yield dut.ld_i.eq(0x1)
548 yield dut.fn_issue_i.eq(0x1)
549 yield
550 yield dut.ld_i.eq(0x0)
551 yield dut.st_i.eq(0x3)
552 yield dut.fn_issue_i.eq(0x2)
553 yield
554 yield dut.st_i.eq(0x0)
555 yield dut.fn_issue_i.eq(0x0)
556 yield
557
558 yield dut.addrs_i[0].eq(0x012)
559 yield dut.addrs_i[1].eq(0x012)
560 yield dut.addrs_i[2].eq(0x010)
561 yield dut.addr_en_i.eq(0x3)
562 yield
563 yield dut.addr_we_i.eq(0x3)
564 yield
565 yield dut.go_ld_i.eq(0x1)
566 yield
567 yield dut.go_ld_i.eq(0x0)
568 yield
569 yield dut.go_st_i.eq(0x2)
570 yield
571 yield dut.go_st_i.eq(0x0)
572 yield
573
574
575 def test_mem_fus():
576 dut = MemFunctionUnits(3, 11)
577 vl = rtlil.convert(dut, ports=dut.ports())
578 with open("test_mem_fus.il", "w") as f:
579 f.write(vl)
580
581 run_simulation(dut, mem_sim(dut),
582 vcd_name='test_mem_fus.vcd')
583
584
585 if __name__ == '__main__':
586 test_mem_fus()