add MemSim, remove redundant signal
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13
14 from compalu import ComputationUnitNoDelay
15
16 from alu_hier import ALU, BranchALU
17 from nmutil.latch import SRLatch
18 from nmutil.nmoperator import eq
19
20 from random import randint, seed
21 from copy import deepcopy
22 from math import log
23
24
25 class Memory(Elaboratable):
26 def __init__(self, regwid, addrw):
27 self.ddepth = regwid/8
28 depth = (1<<addrw) / self.ddepth
29 self.adr = Signal(addrw)
30 self.dat_r = Signal(regwid)
31 self.dat_w = Signal(regwid)
32 self.we = Signal()
33 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
34
35 def elaborate(self, platform):
36 m = Module()
37 m.submodules.rdport = rdport = self.mem.read_port()
38 m.submodules.wrport = wrport = self.mem.write_port()
39 m.d.comb += [
40 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
41 self.dat_r.eq(rdport.data),
42 wrport.addr.eq(self.adr),
43 wrport.data.eq(self.dat_w),
44 wrport.en.eq(self.we),
45 ]
46 return m
47
48
49 class MemSim:
50 def __init__(self, regwid, addrw):
51 self.regwid = regwid
52 self.ddepth = regwid//8
53 depth = (1<<addrw) // self.ddepth
54 self.mem = list(range(0, depth))
55
56 def ld(self, addr):
57 return self.mem[addr>>self.ddepth]
58
59 def st(self, addr, data):
60 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
61
62
63 class CompUnitsBase(Elaboratable):
64 """ Computation Unit Base class.
65
66 Amazingly, this class works recursively. It's supposed to just
67 look after some ALUs (that can handle the same operations),
68 grouping them together, however it turns out that the same code
69 can also group *groups* of Computation Units together as well.
70
71 Basically it was intended just to concatenate the ALU's issue,
72 go_rd etc. signals together, which start out as bits and become
73 sequences. Turns out that the same trick works just as well
74 on Computation Units!
75
76 So this class may be used recursively to present a top-level
77 sequential concatenation of all the signals in and out of
78 ALUs, whilst at the same time making it convenient to group
79 ALUs together.
80
81 At the lower level, the intent is that groups of (identical)
82 ALUs may be passed the same operation. Even beyond that,
83 the intent is that that group of (identical) ALUs actually
84 share the *same pipeline* and as such become a "Concurrent
85 Computation Unit" as defined by Mitch Alsup (see section
86 11.4.9.3)
87 """
88 def __init__(self, rwid, units):
89 """ Inputs:
90
91 * :rwid: bit width of register file(s) - both FP and INT
92 * :units: sequence of ALUs (or CompUnitsBase derivatives)
93 """
94 self.units = units
95 self.rwid = rwid
96 self.rwid = rwid
97 if units and isinstance(units[0], CompUnitsBase):
98 self.n_units = 0
99 for u in self.units:
100 self.n_units += u.n_units
101 else:
102 self.n_units = len(units)
103
104 n_units = self.n_units
105
106 # inputs
107 self.issue_i = Signal(n_units, reset_less=True)
108 self.go_rd_i = Signal(n_units, reset_less=True)
109 self.go_wr_i = Signal(n_units, reset_less=True)
110 self.shadown_i = Signal(n_units, reset_less=True)
111 self.go_die_i = Signal(n_units, reset_less=True)
112
113 # outputs
114 self.busy_o = Signal(n_units, reset_less=True)
115 self.rd_rel_o = Signal(n_units, reset_less=True)
116 self.req_rel_o = Signal(n_units, reset_less=True)
117
118 # in/out register data (note: not register#, actual data)
119 self.data_o = Signal(rwid, reset_less=True)
120 self.src1_i = Signal(rwid, reset_less=True)
121 self.src2_i = Signal(rwid, reset_less=True)
122 # input operand
123
124 def elaborate(self, platform):
125 m = Module()
126 comb = m.d.comb
127
128 for i, alu in enumerate(self.units):
129 setattr(m.submodules, "comp%d" % i, alu)
130
131 go_rd_l = []
132 go_wr_l = []
133 issue_l = []
134 busy_l = []
135 req_rel_l = []
136 rd_rel_l = []
137 shadow_l = []
138 godie_l = []
139 for alu in self.units:
140 req_rel_l.append(alu.req_rel_o)
141 rd_rel_l.append(alu.rd_rel_o)
142 shadow_l.append(alu.shadown_i)
143 godie_l.append(alu.go_die_i)
144 go_wr_l.append(alu.go_wr_i)
145 go_rd_l.append(alu.go_rd_i)
146 issue_l.append(alu.issue_i)
147 busy_l.append(alu.busy_o)
148 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
149 comb += self.req_rel_o.eq(Cat(*req_rel_l))
150 comb += self.busy_o.eq(Cat(*busy_l))
151 comb += Cat(*godie_l).eq(self.go_die_i)
152 comb += Cat(*shadow_l).eq(self.shadown_i)
153 comb += Cat(*go_wr_l).eq(self.go_wr_i)
154 comb += Cat(*go_rd_l).eq(self.go_rd_i)
155 comb += Cat(*issue_l).eq(self.issue_i)
156
157 # connect data register input/output
158
159 # merge (OR) all integer FU / ALU outputs to a single value
160 # bit of a hack: treereduce needs a list with an item named "data_o"
161 if self.units:
162 data_o = treereduce(self.units)
163 comb += self.data_o.eq(data_o)
164
165 for i, alu in enumerate(self.units):
166 comb += alu.src1_i.eq(self.src1_i)
167 comb += alu.src2_i.eq(self.src2_i)
168
169 return m
170
171
172 class CompUnitALUs(CompUnitsBase):
173
174 def __init__(self, rwid, opwid):
175 """ Inputs:
176
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :opwid: operand bit width
179 """
180 self.opwid = opwid
181
182 # inputs
183 self.oper_i = Signal(opwid, reset_less=True)
184
185 # Int ALUs
186 add = ALU(rwid)
187 sub = ALU(rwid)
188 mul = ALU(rwid)
189 shf = ALU(rwid)
190
191 units = []
192 for alu in [add, sub, mul, shf]:
193 units.append(ComputationUnitNoDelay(rwid, 2, alu))
194
195 CompUnitsBase.__init__(self, rwid, units)
196
197 def elaborate(self, platform):
198 m = CompUnitsBase.elaborate(self, platform)
199 comb = m.d.comb
200
201 # hand the same operation to all units
202 for alu in self.units:
203 comb += alu.oper_i.eq(self.oper_i)
204 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
205 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
206 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
207 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
208
209 return m
210
211
212 class CompUnitBR(CompUnitsBase):
213
214 def __init__(self, rwid, opwid):
215 """ Inputs:
216
217 * :rwid: bit width of register file(s) - both FP and INT
218 * :opwid: operand bit width
219
220 Note: bgt unit is returned so that a shadow unit can be created
221 for it
222 """
223 self.opwid = opwid
224
225 # inputs
226 self.oper_i = Signal(opwid, reset_less=True)
227
228 # Branch ALU and CU
229 self.bgt = BranchALU(rwid)
230 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
231 CompUnitsBase.__init__(self, rwid, [self.br1])
232
233 def elaborate(self, platform):
234 m = CompUnitsBase.elaborate(self, platform)
235 comb = m.d.comb
236
237 # hand the same operation to all units
238 for alu in self.units:
239 comb += alu.oper_i.eq(self.oper_i)
240 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
241
242 return m
243
244
245 class FunctionUnits(Elaboratable):
246
247 def __init__(self, n_regs, n_int_alus):
248 self.n_regs = n_regs
249 self.n_int_alus = n_int_alus
250
251 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
252 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
253 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
254
255 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
256 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
257
258 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
259 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
260 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
261
262 self.req_rel_i = Signal(n_int_alus, reset_less = True)
263 self.readable_o = Signal(n_int_alus, reset_less=True)
264 self.writable_o = Signal(n_int_alus, reset_less=True)
265
266 self.go_rd_i = Signal(n_int_alus, reset_less=True)
267 self.go_wr_i = Signal(n_int_alus, reset_less=True)
268 self.go_die_i = Signal(n_int_alus, reset_less=True)
269 self.req_rel_o = Signal(n_int_alus, reset_less=True)
270 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
271
272 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
273
274 def elaborate(self, platform):
275 m = Module()
276 comb = m.d.comb
277 sync = m.d.sync
278
279 n_intfus = self.n_int_alus
280
281 # Integer FU-FU Dep Matrix
282 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
283 m.submodules.intfudeps = intfudeps
284 # Integer FU-Reg Dep Matrix
285 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
286 m.submodules.intregdeps = intregdeps
287
288 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
289 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
290
291 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
292 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
293
294 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
295 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
296 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
297
298 comb += intfudeps.issue_i.eq(self.fn_issue_i)
299 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
300 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
301 comb += intfudeps.go_die_i.eq(self.go_die_i)
302 comb += self.readable_o.eq(intfudeps.readable_o)
303 comb += self.writable_o.eq(intfudeps.writable_o)
304
305 # Connect function issue / arrays, and dest/src1/src2
306 comb += intregdeps.dest_i.eq(self.dest_i)
307 comb += intregdeps.src1_i.eq(self.src1_i)
308 comb += intregdeps.src2_i.eq(self.src2_i)
309
310 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
311 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
312 comb += intregdeps.go_die_i.eq(self.go_die_i)
313 comb += intregdeps.issue_i.eq(self.fn_issue_i)
314
315 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
316 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
317 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
318
319 return m
320
321
322 class Scoreboard(Elaboratable):
323 def __init__(self, rwid, n_regs):
324 """ Inputs:
325
326 * :rwid: bit width of register file(s) - both FP and INT
327 * :n_regs: depth of register file(s) - number of FP and INT regs
328 """
329 self.rwid = rwid
330 self.n_regs = n_regs
331
332 # Register Files
333 self.intregs = RegFileArray(rwid, n_regs)
334 self.fpregs = RegFileArray(rwid, n_regs)
335
336 # issue q needs to get at these
337 self.aluissue = IssueUnitGroup(4)
338 self.brissue = IssueUnitGroup(1)
339 # and these
340 self.alu_oper_i = Signal(4, reset_less=True)
341 self.br_oper_i = Signal(4, reset_less=True)
342
343 # inputs
344 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
345 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
346 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
347 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
348
349 # outputs
350 self.issue_o = Signal(reset_less=True) # instruction was accepted
351 self.busy_o = Signal(reset_less=True) # at least one CU is busy
352
353 # for branch speculation experiment. branch_direction = 0 if
354 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
355 # branch_succ and branch_fail are requests to have the current
356 # instruction be dependent on the branch unit "shadow" capability.
357 self.branch_succ_i = Signal(reset_less=True)
358 self.branch_fail_i = Signal(reset_less=True)
359 self.branch_direction_o = Signal(2, reset_less=True)
360
361 def elaborate(self, platform):
362 m = Module()
363 comb = m.d.comb
364 sync = m.d.sync
365
366 m.submodules.intregs = self.intregs
367 m.submodules.fpregs = self.fpregs
368
369 # register ports
370 int_dest = self.intregs.write_port("dest")
371 int_src1 = self.intregs.read_port("src1")
372 int_src2 = self.intregs.read_port("src2")
373
374 fp_dest = self.fpregs.write_port("dest")
375 fp_src1 = self.fpregs.read_port("src1")
376 fp_src2 = self.fpregs.read_port("src2")
377
378 # Int ALUs and Comp Units
379 n_int_alus = 5
380 cua = CompUnitALUs(self.rwid, 2)
381 cub = CompUnitBR(self.rwid, 2)
382 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
383 bgt = cub.bgt # get at the branch computation unit
384 br1 = cub.br1
385
386 # Int FUs
387 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
388
389 # Count of number of FUs
390 n_intfus = n_int_alus
391 n_fp_fus = 0 # for now
392
393 # Integer Priority Picker 1: Adder + Subtractor
394 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
395 m.submodules.intpick1 = intpick1
396
397 # INT/FP Issue Unit
398 regdecode = RegDecode(self.n_regs)
399 m.submodules.regdecode = regdecode
400 issueunit = IssueUnitArray([self.aluissue, self.brissue])
401 m.submodules.issueunit = issueunit
402
403 # Shadow Matrix. currently n_intfus shadows, to be used for
404 # write-after-write hazards. NOTE: there is one extra for branches,
405 # so the shadow width is increased by 1
406 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
407 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
408
409 # record previous instruction to cast shadow on current instruction
410 prev_shadow = Signal(n_intfus)
411
412 # Branch Speculation recorder. tracks the success/fail state as
413 # each instruction is issued, so that when the branch occurs the
414 # allow/cancel can be issued as appropriate.
415 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
416
417 #---------
418 # ok start wiring things together...
419 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
420 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
421 #---------
422
423 #---------
424 # Issue Unit is where it starts. set up some in/outs for this module
425 #---------
426 comb += [ regdecode.dest_i.eq(self.int_dest_i),
427 regdecode.src1_i.eq(self.int_src1_i),
428 regdecode.src2_i.eq(self.int_src2_i),
429 regdecode.enable_i.eq(self.reg_enable_i),
430 self.issue_o.eq(issueunit.issue_o)
431 ]
432
433 # take these to outside (issue needs them)
434 comb += cua.oper_i.eq(self.alu_oper_i)
435 comb += cub.oper_i.eq(self.br_oper_i)
436
437 # TODO: issueunit.f (FP)
438
439 # and int function issue / busy arrays, and dest/src1/src2
440 comb += intfus.dest_i.eq(regdecode.dest_o)
441 comb += intfus.src1_i.eq(regdecode.src1_o)
442 comb += intfus.src2_i.eq(regdecode.src2_o)
443
444 fn_issue_o = issueunit.fn_issue_o
445
446 comb += intfus.fn_issue_i.eq(fn_issue_o)
447 comb += issueunit.busy_i.eq(cu.busy_o)
448 comb += self.busy_o.eq(cu.busy_o.bool())
449
450 #---------
451 # merge shadow matrices outputs
452 #---------
453
454 # these are explained in ShadowMatrix docstring, and are to be
455 # connected to the FUReg and FUFU Matrices, to get them to reset
456 anydie = Signal(n_intfus, reset_less=True)
457 allshadown = Signal(n_intfus, reset_less=True)
458 shreset = Signal(n_intfus, reset_less=True)
459 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
460 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
461 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
462
463 #---------
464 # connect fu-fu matrix
465 #---------
466
467 # Group Picker... done manually for now.
468 go_rd_o = intpick1.go_rd_o
469 go_wr_o = intpick1.go_wr_o
470 go_rd_i = intfus.go_rd_i
471 go_wr_i = intfus.go_wr_i
472 go_die_i = intfus.go_die_i
473 # NOTE: connect to the shadowed versions so that they can "die" (reset)
474 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
475 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
476 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
477
478 # Connect Picker
479 #---------
480 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
481 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
482 int_rd_o = intfus.readable_o
483 int_wr_o = intfus.writable_o
484 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
485 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
486
487 #---------
488 # Shadow Matrix
489 #---------
490
491 comb += shadows.issue_i.eq(fn_issue_o)
492 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
493 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
494 #---------
495 # NOTE; this setup is for the instruction order preservation...
496
497 # connect shadows / go_dies to Computation Units
498 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
499 comb += cu.go_die_i[0:n_intfus].eq(anydie)
500
501 # ok connect first n_int_fu shadows to busy lines, to create an
502 # instruction-order linked-list-like arrangement, using a bit-matrix
503 # (instead of e.g. a ring buffer).
504 # XXX TODO
505
506 # when written, the shadow can be cancelled (and was good)
507 for i in range(n_intfus):
508 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
509
510 # *previous* instruction shadows *current* instruction, and, obviously,
511 # if the previous is completed (!busy) don't cast the shadow!
512 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
513 for i in range(n_intfus):
514 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
515
516 #---------
517 # ... and this is for branch speculation. it uses the extra bit
518 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
519 # only needs to set shadow_i, s_fail_i and s_good_i
520
521 # issue captures shadow_i (if enabled)
522 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
523
524 bactive = Signal(reset_less=True)
525 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
526
527 # instruction being issued (fn_issue_o) has a shadow cast by the branch
528 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
529 comb += bshadow.issue_i.eq(fn_issue_o)
530 for i in range(n_intfus):
531 with m.If(fn_issue_o & (Const(1<<i))):
532 comb += bshadow.shadow_i[i][0].eq(1)
533
534 # finally, we need an indicator to the test infrastructure as to
535 # whether the branch succeeded or failed, plus, link up to the
536 # "recorder" of whether the instruction was under shadow or not
537
538 with m.If(br1.issue_i):
539 sync += bspec.active_i.eq(1)
540 with m.If(self.branch_succ_i):
541 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
542 with m.If(self.branch_fail_i):
543 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
544
545 # branch is active (TODO: a better signal: this is over-using the
546 # go_write signal - actually the branch should not be "writing")
547 with m.If(br1.go_wr_i):
548 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
549 sync += bspec.active_i.eq(0)
550 comb += bspec.br_i.eq(1)
551 # branch occurs if data == 1, failed if data == 0
552 comb += bspec.br_ok_i.eq(br1.data_o == 1)
553 for i in range(n_intfus):
554 # *expected* direction of the branch matched against *actual*
555 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
556 # ... or it didn't
557 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
558
559 #---------
560 # Connect Register File(s)
561 #---------
562 comb += int_dest.wen.eq(intfus.dest_rsel_o)
563 comb += int_src1.ren.eq(intfus.src1_rsel_o)
564 comb += int_src2.ren.eq(intfus.src2_rsel_o)
565
566 # connect ALUs to regfule
567 comb += int_dest.data_i.eq(cu.data_o)
568 comb += cu.src1_i.eq(int_src1.data_o)
569 comb += cu.src2_i.eq(int_src2.data_o)
570
571 # connect ALU Computation Units
572 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
573 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
574 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
575
576 return m
577
578 def __iter__(self):
579 yield from self.intregs
580 yield from self.fpregs
581 yield self.int_dest_i
582 yield self.int_src1_i
583 yield self.int_src2_i
584 yield self.issue_o
585 yield self.branch_succ_i
586 yield self.branch_fail_i
587 yield self.branch_direction_o
588
589 def ports(self):
590 return list(self)
591
592 class IssueToScoreboard(Elaboratable):
593
594 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
595 self.qlen = qlen
596 self.n_in = n_in
597 self.n_out = n_out
598 self.rwid = rwid
599 self.opw = opwid
600 self.n_regs = n_regs
601
602 mqbits = (int(log(qlen) / log(2))+2, False)
603 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
604 self.p_ready_o = Signal() # instructions were added
605 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
606
607 self.busy_o = Signal(reset_less=True) # at least one CU is busy
608 self.qlen_o = Signal(mqbits, reset_less=True)
609
610 def elaborate(self, platform):
611 m = Module()
612 comb = m.d.comb
613 sync = m.d.sync
614
615 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
616 sc = Scoreboard(self.rwid, self.n_regs)
617 m.submodules.iq = iq
618 m.submodules.sc = sc
619
620 # get at the regfile for testing
621 self.intregs = sc.intregs
622
623 # and the "busy" signal and instruction queue length
624 comb += self.busy_o.eq(sc.busy_o)
625 comb += self.qlen_o.eq(iq.qlen_o)
626
627 # link up instruction queue
628 comb += iq.p_add_i.eq(self.p_add_i)
629 comb += self.p_ready_o.eq(iq.p_ready_o)
630 for i in range(self.n_in):
631 comb += eq(iq.data_i[i], self.data_i[i])
632
633 # take instruction and process it. note that it's possible to
634 # "inspect" the queue contents *without* actually removing the
635 # items. items are only removed when the
636
637 # in "waiting" state
638 wait_issue_br = Signal()
639 wait_issue_alu = Signal()
640
641 with m.If(wait_issue_br | wait_issue_alu):
642 # set instruction pop length to 1 if the unit accepted
643 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
644 with m.If(iq.qlen_o != 0):
645 comb += iq.n_sub_i.eq(1)
646 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
647 with m.If(iq.qlen_o != 0):
648 comb += iq.n_sub_i.eq(1)
649
650 # see if some instruction(s) are here. note that this is
651 # "inspecting" the in-place queue. note also that on the
652 # cycle following "waiting" for fn_issue_o to be set, the
653 # "resetting" done above (insn_i=0) could be re-ASSERTed.
654 with m.If(iq.qlen_o != 0):
655 # get the operands and operation
656 dest = iq.data_o[0].dest_i
657 src1 = iq.data_o[0].src1_i
658 src2 = iq.data_o[0].src2_i
659 op = iq.data_o[0].oper_i
660
661 # set the src/dest regs
662 comb += sc.int_dest_i.eq(dest)
663 comb += sc.int_src1_i.eq(src1)
664 comb += sc.int_src2_i.eq(src2)
665 comb += sc.reg_enable_i.eq(1) # enable the regfile
666
667 # choose a Function-Unit-Group
668 with m.If((op & (0x3<<2)) != 0): # branch
669 comb += sc.brissue.insn_i.eq(1)
670 comb += sc.br_oper_i.eq(op & 0x3)
671 comb += wait_issue_br.eq(1)
672 with m.Else(): # alu
673 comb += sc.aluissue.insn_i.eq(1)
674 comb += sc.alu_oper_i.eq(op & 0x3)
675 comb += wait_issue_alu.eq(1)
676
677 # XXX TODO
678 # these indicate that the instruction is to be made
679 # shadow-dependent on
680 # (either) branch success or branch fail
681 #yield sc.branch_fail_i.eq(branch_fail)
682 #yield sc.branch_succ_i.eq(branch_success)
683
684 return m
685
686 def __iter__(self):
687 yield self.p_ready_o
688 for o in self.data_i:
689 yield from list(o)
690 yield self.p_add_i
691
692 def ports(self):
693 return list(self)
694
695
696 IADD = 0
697 ISUB = 1
698 IMUL = 2
699 ISHF = 3
700 IBGT = 4
701 IBLT = 5
702 IBEQ = 6
703 IBNE = 7
704
705 class RegSim:
706 def __init__(self, rwidth, nregs):
707 self.rwidth = rwidth
708 self.regs = [0] * nregs
709
710 def op(self, op, src1, src2, dest):
711 maxbits = (1 << self.rwidth) - 1
712 src1 = self.regs[src1] & maxbits
713 src2 = self.regs[src2] & maxbits
714 if op == IADD:
715 val = src1 + src2
716 elif op == ISUB:
717 val = src1 - src2
718 elif op == IMUL:
719 val = src1 * src2
720 elif op == ISHF:
721 val = src1 >> (src2 & maxbits)
722 elif op == IBGT:
723 val = int(src1 > src2)
724 elif op == IBLT:
725 val = int(src1 < src2)
726 elif op == IBEQ:
727 val = int(src1 == src2)
728 elif op == IBNE:
729 val = int(src1 != src2)
730 val &= maxbits
731 self.setval(dest, val)
732 return val
733
734 def setval(self, dest, val):
735 print ("sim setval", dest, hex(val))
736 self.regs[dest] = val
737
738 def dump(self, dut):
739 for i, val in enumerate(self.regs):
740 reg = yield dut.intregs.regs[i].reg
741 okstr = "OK" if reg == val else "!ok"
742 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
743
744 def check(self, dut):
745 for i, val in enumerate(self.regs):
746 reg = yield dut.intregs.regs[i].reg
747 if reg != val:
748 print("reg %d expected %x received %x\n" % (i, val, reg))
749 yield from self.dump(dut)
750 assert False
751
752 def instr_q(dut, op, src1, src2, dest, branch_success, branch_fail):
753 instrs = [{'oper_i': op, 'dest_i': dest, 'src1_i': src1, 'src2_i': src2}]
754
755 sendlen = 1
756 for idx in range(sendlen):
757 yield from eq(dut.data_i[idx], instrs[idx])
758 di = yield dut.data_i[idx]
759 print ("senddata %d %x" % (idx, di))
760 yield dut.p_add_i.eq(sendlen)
761 yield
762 o_p_ready = yield dut.p_ready_o
763 while not o_p_ready:
764 yield
765 o_p_ready = yield dut.p_ready_o
766
767 yield dut.p_add_i.eq(0)
768
769
770 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
771 yield from disable_issue(dut)
772 yield dut.int_dest_i.eq(dest)
773 yield dut.int_src1_i.eq(src1)
774 yield dut.int_src2_i.eq(src2)
775 if (op & (0x3<<2)) != 0: # branch
776 yield dut.brissue.insn_i.eq(1)
777 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
778 dut_issue = dut.brissue
779 else:
780 yield dut.aluissue.insn_i.eq(1)
781 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
782 dut_issue = dut.aluissue
783 yield dut.reg_enable_i.eq(1)
784
785 # these indicate that the instruction is to be made shadow-dependent on
786 # (either) branch success or branch fail
787 yield dut.branch_fail_i.eq(branch_fail)
788 yield dut.branch_succ_i.eq(branch_success)
789
790 yield
791 yield from wait_for_issue(dut, dut_issue)
792
793
794 def print_reg(dut, rnums):
795 rs = []
796 for rnum in rnums:
797 reg = yield dut.intregs.regs[rnum].reg
798 rs.append("%x" % reg)
799 rnums = map(str, rnums)
800 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
801
802
803 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
804 insts = []
805 for i in range(n_ops):
806 src1 = randint(1, dut.n_regs-1)
807 src2 = randint(1, dut.n_regs-1)
808 dest = randint(1, dut.n_regs-1)
809 op = randint(0, max_opnums)
810
811 if shadowing:
812 insts.append((src1, src2, dest, op, (0, 0)))
813 else:
814 insts.append((src1, src2, dest, op))
815 return insts
816
817
818 def wait_for_busy_clear(dut):
819 while True:
820 busy_o = yield dut.busy_o
821 if not busy_o:
822 break
823 print ("busy",)
824 yield
825
826 def disable_issue(dut):
827 yield dut.aluissue.insn_i.eq(0)
828 yield dut.brissue.insn_i.eq(0)
829
830
831 def wait_for_issue(dut, dut_issue):
832 while True:
833 issue_o = yield dut_issue.fn_issue_o
834 if issue_o:
835 yield from disable_issue(dut)
836 yield dut.reg_enable_i.eq(0)
837 break
838 print ("busy",)
839 #yield from print_reg(dut, [1,2,3])
840 yield
841 #yield from print_reg(dut, [1,2,3])
842
843 def scoreboard_branch_sim(dut, alusim):
844
845 iseed = 3
846
847 for i in range(1):
848
849 print ("rseed", iseed)
850 seed(iseed)
851 iseed += 1
852
853 yield dut.branch_direction_o.eq(0)
854
855 # set random values in the registers
856 for i in range(1, dut.n_regs):
857 val = 31+i*3
858 val = randint(0, (1<<alusim.rwidth)-1)
859 yield dut.intregs.regs[i].reg.eq(val)
860 alusim.setval(i, val)
861
862 if False:
863 # create some instructions: branches create a tree
864 insts = create_random_ops(dut, 1, True, 1)
865 #insts.append((6, 6, 1, 2, (0, 0)))
866 #insts.append((4, 3, 3, 0, (0, 0)))
867
868 src1 = randint(1, dut.n_regs-1)
869 src2 = randint(1, dut.n_regs-1)
870 #op = randint(4, 7)
871 op = 4 # only BGT at the moment
872
873 branch_ok = create_random_ops(dut, 1, True, 1)
874 branch_fail = create_random_ops(dut, 1, True, 1)
875
876 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
877
878 if True:
879 insts = []
880 insts.append( (3, 5, 2, 0, (0, 0)) )
881 branch_ok = []
882 branch_fail = []
883 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
884 branch_ok.append( None )
885 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
886 #branch_fail.append( None )
887 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
888
889 siminsts = deepcopy(insts)
890
891 # issue instruction(s)
892 i = -1
893 instrs = insts
894 branch_direction = 0
895 while instrs:
896 yield
897 yield
898 i += 1
899 branch_direction = yield dut.branch_direction_o # way branch went
900 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
901 if branch_direction == 1 and shadow_on:
902 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
903 continue # branch was "success" and this is a "failed"... skip
904 if branch_direction == 2 and shadow_off:
905 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
906 continue # branch was "fail" and this is a "success"... skip
907 if branch_direction != 0:
908 shadow_on = 0
909 shadow_off = 0
910 is_branch = op >= 4
911 if is_branch:
912 branch_ok, branch_fail = dest
913 dest = src2
914 # ok zip up the branch success / fail instructions and
915 # drop them into the queue, one marked "to have branch success"
916 # the other to be marked shadow branch "fail".
917 # one out of each of these will be cancelled
918 for ok, fl in zip(branch_ok, branch_fail):
919 if ok:
920 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
921 if fl:
922 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
923 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
924 (i, src1, src2, dest, op, shadow_on, shadow_off))
925 yield from int_instr(dut, op, src1, src2, dest,
926 shadow_on, shadow_off)
927
928 # wait for all instructions to stop before checking
929 yield
930 yield from wait_for_busy_clear(dut)
931
932 i = -1
933 while siminsts:
934 instr = siminsts.pop(0)
935 if instr is None:
936 continue
937 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
938 i += 1
939 is_branch = op >= 4
940 if is_branch:
941 branch_ok, branch_fail = dest
942 dest = src2
943 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
944 (i, src1, src2, dest, op, shadow_on, shadow_off))
945 branch_res = alusim.op(op, src1, src2, dest)
946 if is_branch:
947 if branch_res:
948 siminsts += branch_ok
949 else:
950 siminsts += branch_fail
951
952 # check status
953 yield from alusim.check(dut)
954 yield from alusim.dump(dut)
955
956
957 def scoreboard_sim(dut, alusim):
958
959 #seed(2)
960
961 for i in range(1):
962
963 # set random values in the registers
964 for i in range(1, dut.n_regs):
965 val = randint(0, (1<<alusim.rwidth)-1)
966 #val = 31+i*3
967 #val = i
968 yield dut.intregs.regs[i].reg.eq(val)
969 alusim.setval(i, val)
970
971 # create some instructions (some random, some regression tests)
972 instrs = []
973 if True:
974 instrs = create_random_ops(dut, 15, True, 3)
975
976 if False:
977 instrs.append( (7, 3, 2, 4, (0, 0)) )
978 instrs.append( (7, 6, 6, 2, (0, 0)) )
979 instrs.append( (1, 7, 2, 2, (0, 0)) )
980
981
982 if False:
983 instrs.append((2, 3, 3, 0, (0, 0)))
984 instrs.append((5, 3, 3, 1, (0, 0)))
985 instrs.append((3, 5, 5, 2, (0, 0)))
986 instrs.append((5, 3, 3, 3, (0, 0)))
987 instrs.append((3, 5, 5, 0, (0, 0)))
988
989 if False:
990 instrs.append((5, 6, 2, 1))
991 instrs.append((2, 2, 4, 0))
992 #instrs.append((2, 2, 3, 1))
993
994 if False:
995 instrs.append((2, 1, 2, 3))
996
997 if False:
998 instrs.append((2, 6, 2, 1))
999 instrs.append((2, 1, 2, 0))
1000
1001 if False:
1002 instrs.append((1, 2, 7, 2))
1003 instrs.append((7, 1, 5, 0))
1004 instrs.append((4, 4, 1, 1))
1005
1006 if False:
1007 instrs.append((5, 6, 2, 2))
1008 instrs.append((1, 1, 4, 1))
1009 instrs.append((6, 5, 3, 0))
1010
1011 if False:
1012 # Write-after-Write Hazard
1013 instrs.append( (3, 6, 7, 2) )
1014 instrs.append( (4, 4, 7, 1) )
1015
1016 if False:
1017 # self-read/write-after-write followed by Read-after-Write
1018 instrs.append((1, 1, 1, 1))
1019 instrs.append((1, 5, 3, 0))
1020
1021 if False:
1022 # Read-after-Write followed by self-read-after-write
1023 instrs.append((5, 6, 1, 2))
1024 instrs.append((1, 1, 1, 1))
1025
1026 if False:
1027 # self-read-write sandwich
1028 instrs.append((5, 6, 1, 2))
1029 instrs.append((1, 1, 1, 1))
1030 instrs.append((1, 5, 3, 0))
1031
1032 if False:
1033 # very weird failure
1034 instrs.append( (5, 2, 5, 2) )
1035 instrs.append( (2, 6, 3, 0) )
1036 instrs.append( (4, 2, 2, 1) )
1037
1038 if False:
1039 v1 = 4
1040 yield dut.intregs.regs[5].reg.eq(v1)
1041 alusim.setval(5, v1)
1042 yield dut.intregs.regs[3].reg.eq(5)
1043 alusim.setval(3, 5)
1044 instrs.append((5, 3, 3, 4, (0, 0)))
1045 instrs.append((4, 2, 1, 2, (0, 1)))
1046
1047 if False:
1048 v1 = 6
1049 yield dut.intregs.regs[5].reg.eq(v1)
1050 alusim.setval(5, v1)
1051 yield dut.intregs.regs[3].reg.eq(5)
1052 alusim.setval(3, 5)
1053 instrs.append((5, 3, 3, 4, (0, 0)))
1054 instrs.append((4, 2, 1, 2, (1, 0)))
1055
1056 if False:
1057 instrs.append( (4, 3, 5, 1, (0, 0)) )
1058 instrs.append( (5, 2, 3, 1, (0, 0)) )
1059 instrs.append( (7, 1, 5, 2, (0, 0)) )
1060 instrs.append( (5, 6, 6, 4, (0, 0)) )
1061 instrs.append( (7, 5, 2, 2, (1, 0)) )
1062 instrs.append( (1, 7, 5, 0, (0, 1)) )
1063 instrs.append( (1, 6, 1, 2, (1, 0)) )
1064 instrs.append( (1, 6, 7, 3, (0, 0)) )
1065 instrs.append( (6, 7, 7, 0, (0, 0)) )
1066
1067 # issue instruction(s), wait for issue to be free before proceeding
1068 for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
1069
1070 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1071 alusim.op(op, src1, src2, dest)
1072 yield from instr_q(dut, op, src1, src2, dest, br_ok, br_fail)
1073
1074 # wait for all instructions to stop before checking
1075 while True:
1076 iqlen = yield dut.qlen_o
1077 if iqlen == 0:
1078 break
1079 yield
1080 yield
1081 yield
1082 yield
1083 yield
1084 yield from wait_for_busy_clear(dut)
1085
1086 # check status
1087 yield from alusim.check(dut)
1088 yield from alusim.dump(dut)
1089
1090
1091 def test_scoreboard():
1092 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1093 alusim = RegSim(16, 8)
1094 memsim = MemSim(16, 16)
1095 vl = rtlil.convert(dut, ports=dut.ports())
1096 with open("test_scoreboard6600.il", "w") as f:
1097 f.write(vl)
1098
1099 run_simulation(dut, scoreboard_sim(dut, alusim),
1100 vcd_name='test_scoreboard6600.vcd')
1101
1102 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1103 # vcd_name='test_scoreboard6600.vcd')
1104
1105
1106 if __name__ == '__main__':
1107 test_scoreboard()