remove unneeded signals
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13
14 from compalu import ComputationUnitNoDelay
15
16 from alu_hier import ALU, BranchALU
17 from nmutil.latch import SRLatch
18 from nmutil.nmoperator import eq
19
20 from random import randint, seed
21 from copy import deepcopy
22 from math import log
23
24
25 class Memory(Elaboratable):
26 def __init__(self, regwid, addrw):
27 self.ddepth = regwid/8
28 depth = (1<<addrw) / self.ddepth
29 self.adr = Signal(addrw)
30 self.dat_r = Signal(regwid)
31 self.dat_w = Signal(regwid)
32 self.we = Signal()
33 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
34
35 def elaborate(self, platform):
36 m = Module()
37 m.submodules.rdport = rdport = self.mem.read_port()
38 m.submodules.wrport = wrport = self.mem.write_port()
39 m.d.comb += [
40 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
41 self.dat_r.eq(rdport.data),
42 wrport.addr.eq(self.adr),
43 wrport.data.eq(self.dat_w),
44 wrport.en.eq(self.we),
45 ]
46 return m
47
48
49 class MemSim:
50 def __init__(self, regwid, addrw):
51 self.regwid = regwid
52 self.ddepth = regwid//8
53 depth = (1<<addrw) // self.ddepth
54 self.mem = list(range(0, depth))
55
56 def ld(self, addr):
57 return self.mem[addr>>self.ddepth]
58
59 def st(self, addr, data):
60 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
61
62
63 class CompUnitsBase(Elaboratable):
64 """ Computation Unit Base class.
65
66 Amazingly, this class works recursively. It's supposed to just
67 look after some ALUs (that can handle the same operations),
68 grouping them together, however it turns out that the same code
69 can also group *groups* of Computation Units together as well.
70
71 Basically it was intended just to concatenate the ALU's issue,
72 go_rd etc. signals together, which start out as bits and become
73 sequences. Turns out that the same trick works just as well
74 on Computation Units!
75
76 So this class may be used recursively to present a top-level
77 sequential concatenation of all the signals in and out of
78 ALUs, whilst at the same time making it convenient to group
79 ALUs together.
80
81 At the lower level, the intent is that groups of (identical)
82 ALUs may be passed the same operation. Even beyond that,
83 the intent is that that group of (identical) ALUs actually
84 share the *same pipeline* and as such become a "Concurrent
85 Computation Unit" as defined by Mitch Alsup (see section
86 11.4.9.3)
87 """
88 def __init__(self, rwid, units):
89 """ Inputs:
90
91 * :rwid: bit width of register file(s) - both FP and INT
92 * :units: sequence of ALUs (or CompUnitsBase derivatives)
93 """
94 self.units = units
95 self.rwid = rwid
96 self.rwid = rwid
97 if units and isinstance(units[0], CompUnitsBase):
98 self.n_units = 0
99 for u in self.units:
100 self.n_units += u.n_units
101 else:
102 self.n_units = len(units)
103
104 n_units = self.n_units
105
106 # inputs
107 self.issue_i = Signal(n_units, reset_less=True)
108 self.go_rd_i = Signal(n_units, reset_less=True)
109 self.go_wr_i = Signal(n_units, reset_less=True)
110 self.shadown_i = Signal(n_units, reset_less=True)
111 self.go_die_i = Signal(n_units, reset_less=True)
112
113 # outputs
114 self.busy_o = Signal(n_units, reset_less=True)
115 self.rd_rel_o = Signal(n_units, reset_less=True)
116 self.req_rel_o = Signal(n_units, reset_less=True)
117
118 # in/out register data (note: not register#, actual data)
119 self.data_o = Signal(rwid, reset_less=True)
120 self.src1_i = Signal(rwid, reset_less=True)
121 self.src2_i = Signal(rwid, reset_less=True)
122 # input operand
123
124 def elaborate(self, platform):
125 m = Module()
126 comb = m.d.comb
127
128 for i, alu in enumerate(self.units):
129 setattr(m.submodules, "comp%d" % i, alu)
130
131 go_rd_l = []
132 go_wr_l = []
133 issue_l = []
134 busy_l = []
135 req_rel_l = []
136 rd_rel_l = []
137 shadow_l = []
138 godie_l = []
139 for alu in self.units:
140 req_rel_l.append(alu.req_rel_o)
141 rd_rel_l.append(alu.rd_rel_o)
142 shadow_l.append(alu.shadown_i)
143 godie_l.append(alu.go_die_i)
144 go_wr_l.append(alu.go_wr_i)
145 go_rd_l.append(alu.go_rd_i)
146 issue_l.append(alu.issue_i)
147 busy_l.append(alu.busy_o)
148 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
149 comb += self.req_rel_o.eq(Cat(*req_rel_l))
150 comb += self.busy_o.eq(Cat(*busy_l))
151 comb += Cat(*godie_l).eq(self.go_die_i)
152 comb += Cat(*shadow_l).eq(self.shadown_i)
153 comb += Cat(*go_wr_l).eq(self.go_wr_i)
154 comb += Cat(*go_rd_l).eq(self.go_rd_i)
155 comb += Cat(*issue_l).eq(self.issue_i)
156
157 # connect data register input/output
158
159 # merge (OR) all integer FU / ALU outputs to a single value
160 # bit of a hack: treereduce needs a list with an item named "data_o"
161 if self.units:
162 data_o = treereduce(self.units)
163 comb += self.data_o.eq(data_o)
164
165 for i, alu in enumerate(self.units):
166 comb += alu.src1_i.eq(self.src1_i)
167 comb += alu.src2_i.eq(self.src2_i)
168
169 return m
170
171
172 class CompUnitALUs(CompUnitsBase):
173
174 def __init__(self, rwid, opwid):
175 """ Inputs:
176
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :opwid: operand bit width
179 """
180 self.opwid = opwid
181
182 # inputs
183 self.oper_i = Signal(opwid, reset_less=True)
184 self.imm_i = Signal(rwid, reset_less=True)
185
186 # Int ALUs
187 add = ALU(rwid)
188 sub = ALU(rwid)
189 mul = ALU(rwid)
190 shf = ALU(rwid)
191
192 units = []
193 for alu in [add, sub, mul, shf]:
194 aluopwid = 3 # extra bit for immediate mode
195 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
196
197 CompUnitsBase.__init__(self, rwid, units)
198
199 def elaborate(self, platform):
200 m = CompUnitsBase.elaborate(self, platform)
201 comb = m.d.comb
202
203 # hand the same operation to all units, only lower 2 bits though
204 for alu in self.units:
205 comb += alu.oper_i[0:3].eq(self.oper_i)
206 comb += alu.imm_i.eq(self.imm_i)
207
208 return m
209
210
211 class CompUnitBR(CompUnitsBase):
212
213 def __init__(self, rwid, opwid):
214 """ Inputs:
215
216 * :rwid: bit width of register file(s) - both FP and INT
217 * :opwid: operand bit width
218
219 Note: bgt unit is returned so that a shadow unit can be created
220 for it
221 """
222 self.opwid = opwid
223
224 # inputs
225 self.oper_i = Signal(opwid, reset_less=True)
226 self.imm_i = Signal(rwid, reset_less=True)
227
228 # Branch ALU and CU
229 self.bgt = BranchALU(rwid)
230 aluopwid = 3 # extra bit for immediate mode
231 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
232 CompUnitsBase.__init__(self, rwid, [self.br1])
233
234 def elaborate(self, platform):
235 m = CompUnitsBase.elaborate(self, platform)
236 comb = m.d.comb
237
238 # hand the same operation to all units
239 for alu in self.units:
240 comb += alu.oper_i.eq(self.oper_i)
241 comb += alu.imm_i.eq(self.imm_i)
242
243 return m
244
245
246 class FunctionUnits(Elaboratable):
247
248 def __init__(self, n_regs, n_int_alus):
249 self.n_regs = n_regs
250 self.n_int_alus = n_int_alus
251
252 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
253 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
254 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
255
256 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
257 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
258
259 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
260 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
261 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
262
263 self.readable_o = Signal(n_int_alus, reset_less=True)
264 self.writable_o = Signal(n_int_alus, reset_less=True)
265
266 self.go_rd_i = Signal(n_int_alus, reset_less=True)
267 self.go_wr_i = Signal(n_int_alus, reset_less=True)
268 self.go_die_i = Signal(n_int_alus, reset_less=True)
269 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
270
271 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
272
273 def elaborate(self, platform):
274 m = Module()
275 comb = m.d.comb
276 sync = m.d.sync
277
278 n_intfus = self.n_int_alus
279
280 # Integer FU-FU Dep Matrix
281 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
282 m.submodules.intfudeps = intfudeps
283 # Integer FU-Reg Dep Matrix
284 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
285 m.submodules.intregdeps = intregdeps
286
287 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
288 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
289
290 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
291 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
292
293 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
294 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
295 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
296
297 comb += intfudeps.issue_i.eq(self.fn_issue_i)
298 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
299 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
300 comb += intfudeps.go_die_i.eq(self.go_die_i)
301 comb += self.readable_o.eq(intfudeps.readable_o)
302 comb += self.writable_o.eq(intfudeps.writable_o)
303
304 # Connect function issue / arrays, and dest/src1/src2
305 comb += intregdeps.dest_i.eq(self.dest_i)
306 comb += intregdeps.src_i[0].eq(self.src1_i)
307 comb += intregdeps.src_i[1].eq(self.src2_i)
308
309 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
310 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
311 comb += intregdeps.go_die_i.eq(self.go_die_i)
312 comb += intregdeps.issue_i.eq(self.fn_issue_i)
313
314 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
315 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
316 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
317
318 return m
319
320
321 class Scoreboard(Elaboratable):
322 def __init__(self, rwid, n_regs):
323 """ Inputs:
324
325 * :rwid: bit width of register file(s) - both FP and INT
326 * :n_regs: depth of register file(s) - number of FP and INT regs
327 """
328 self.rwid = rwid
329 self.n_regs = n_regs
330
331 # Register Files
332 self.intregs = RegFileArray(rwid, n_regs)
333 self.fpregs = RegFileArray(rwid, n_regs)
334
335 # issue q needs to get at these
336 self.aluissue = IssueUnitGroup(4)
337 self.brissue = IssueUnitGroup(1)
338 # and these
339 self.alu_oper_i = Signal(4, reset_less=True)
340 self.alu_imm_i = Signal(rwid, reset_less=True)
341 self.br_oper_i = Signal(4, reset_less=True)
342 self.br_imm_i = Signal(rwid, reset_less=True)
343
344 # inputs
345 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
346 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
347 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
348 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
349
350 # outputs
351 self.issue_o = Signal(reset_less=True) # instruction was accepted
352 self.busy_o = Signal(reset_less=True) # at least one CU is busy
353
354 # for branch speculation experiment. branch_direction = 0 if
355 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
356 # branch_succ and branch_fail are requests to have the current
357 # instruction be dependent on the branch unit "shadow" capability.
358 self.branch_succ_i = Signal(reset_less=True)
359 self.branch_fail_i = Signal(reset_less=True)
360 self.branch_direction_o = Signal(2, reset_less=True)
361
362 def elaborate(self, platform):
363 m = Module()
364 comb = m.d.comb
365 sync = m.d.sync
366
367 m.submodules.intregs = self.intregs
368 m.submodules.fpregs = self.fpregs
369
370 # register ports
371 int_dest = self.intregs.write_port("dest")
372 int_src1 = self.intregs.read_port("src1")
373 int_src2 = self.intregs.read_port("src2")
374
375 fp_dest = self.fpregs.write_port("dest")
376 fp_src1 = self.fpregs.read_port("src1")
377 fp_src2 = self.fpregs.read_port("src2")
378
379 # Int ALUs and Comp Units
380 n_int_alus = 5
381 cua = CompUnitALUs(self.rwid, 3)
382 cub = CompUnitBR(self.rwid, 3)
383 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
384 bgt = cub.bgt # get at the branch computation unit
385 br1 = cub.br1
386
387 # Int FUs
388 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
389
390 # Count of number of FUs
391 n_intfus = n_int_alus
392 n_fp_fus = 0 # for now
393
394 # Integer Priority Picker 1: Adder + Subtractor
395 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
396 m.submodules.intpick1 = intpick1
397
398 # INT/FP Issue Unit
399 regdecode = RegDecode(self.n_regs)
400 m.submodules.regdecode = regdecode
401 issueunit = IssueUnitArray([self.aluissue, self.brissue])
402 m.submodules.issueunit = issueunit
403
404 # Shadow Matrix. currently n_intfus shadows, to be used for
405 # write-after-write hazards. NOTE: there is one extra for branches,
406 # so the shadow width is increased by 1
407 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
408 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
409
410 # record previous instruction to cast shadow on current instruction
411 prev_shadow = Signal(n_intfus)
412
413 # Branch Speculation recorder. tracks the success/fail state as
414 # each instruction is issued, so that when the branch occurs the
415 # allow/cancel can be issued as appropriate.
416 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
417
418 #---------
419 # ok start wiring things together...
420 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
421 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
422 #---------
423
424 #---------
425 # Issue Unit is where it starts. set up some in/outs for this module
426 #---------
427 comb += [ regdecode.dest_i.eq(self.int_dest_i),
428 regdecode.src1_i.eq(self.int_src1_i),
429 regdecode.src2_i.eq(self.int_src2_i),
430 regdecode.enable_i.eq(self.reg_enable_i),
431 self.issue_o.eq(issueunit.issue_o)
432 ]
433
434 # take these to outside (issue needs them)
435 comb += cua.oper_i.eq(self.alu_oper_i)
436 comb += cua.imm_i.eq(self.alu_imm_i)
437 comb += cub.oper_i.eq(self.br_oper_i)
438 comb += cub.imm_i.eq(self.br_imm_i)
439
440 # TODO: issueunit.f (FP)
441
442 # and int function issue / busy arrays, and dest/src1/src2
443 comb += intfus.dest_i.eq(regdecode.dest_o)
444 comb += intfus.src1_i.eq(regdecode.src1_o)
445 comb += intfus.src2_i.eq(regdecode.src2_o)
446
447 fn_issue_o = issueunit.fn_issue_o
448
449 comb += intfus.fn_issue_i.eq(fn_issue_o)
450 comb += issueunit.busy_i.eq(cu.busy_o)
451 comb += self.busy_o.eq(cu.busy_o.bool())
452
453 #---------
454 # merge shadow matrices outputs
455 #---------
456
457 # these are explained in ShadowMatrix docstring, and are to be
458 # connected to the FUReg and FUFU Matrices, to get them to reset
459 anydie = Signal(n_intfus, reset_less=True)
460 allshadown = Signal(n_intfus, reset_less=True)
461 shreset = Signal(n_intfus, reset_less=True)
462 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
463 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
464 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
465
466 #---------
467 # connect fu-fu matrix
468 #---------
469
470 # Group Picker... done manually for now.
471 go_rd_o = intpick1.go_rd_o
472 go_wr_o = intpick1.go_wr_o
473 go_rd_i = intfus.go_rd_i
474 go_wr_i = intfus.go_wr_i
475 go_die_i = intfus.go_die_i
476 # NOTE: connect to the shadowed versions so that they can "die" (reset)
477 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
478 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
479 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
480
481 # Connect Picker
482 #---------
483 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
484 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
485 int_rd_o = intfus.readable_o
486 int_wr_o = intfus.writable_o
487 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
488 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
489
490 #---------
491 # Shadow Matrix
492 #---------
493
494 comb += shadows.issue_i.eq(fn_issue_o)
495 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
496 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
497 #---------
498 # NOTE; this setup is for the instruction order preservation...
499
500 # connect shadows / go_dies to Computation Units
501 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
502 comb += cu.go_die_i[0:n_intfus].eq(anydie)
503
504 # ok connect first n_int_fu shadows to busy lines, to create an
505 # instruction-order linked-list-like arrangement, using a bit-matrix
506 # (instead of e.g. a ring buffer).
507 # XXX TODO
508
509 # when written, the shadow can be cancelled (and was good)
510 for i in range(n_intfus):
511 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
512
513 # *previous* instruction shadows *current* instruction, and, obviously,
514 # if the previous is completed (!busy) don't cast the shadow!
515 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
516 for i in range(n_intfus):
517 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
518
519 #---------
520 # ... and this is for branch speculation. it uses the extra bit
521 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
522 # only needs to set shadow_i, s_fail_i and s_good_i
523
524 # issue captures shadow_i (if enabled)
525 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
526
527 bactive = Signal(reset_less=True)
528 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
529
530 # instruction being issued (fn_issue_o) has a shadow cast by the branch
531 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
532 comb += bshadow.issue_i.eq(fn_issue_o)
533 for i in range(n_intfus):
534 with m.If(fn_issue_o & (Const(1<<i))):
535 comb += bshadow.shadow_i[i][0].eq(1)
536
537 # finally, we need an indicator to the test infrastructure as to
538 # whether the branch succeeded or failed, plus, link up to the
539 # "recorder" of whether the instruction was under shadow or not
540
541 with m.If(br1.issue_i):
542 sync += bspec.active_i.eq(1)
543 with m.If(self.branch_succ_i):
544 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
545 with m.If(self.branch_fail_i):
546 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
547
548 # branch is active (TODO: a better signal: this is over-using the
549 # go_write signal - actually the branch should not be "writing")
550 with m.If(br1.go_wr_i):
551 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
552 sync += bspec.active_i.eq(0)
553 comb += bspec.br_i.eq(1)
554 # branch occurs if data == 1, failed if data == 0
555 comb += bspec.br_ok_i.eq(br1.data_o == 1)
556 for i in range(n_intfus):
557 # *expected* direction of the branch matched against *actual*
558 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
559 # ... or it didn't
560 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
561
562 #---------
563 # Connect Register File(s)
564 #---------
565 comb += int_dest.wen.eq(intfus.dest_rsel_o)
566 comb += int_src1.ren.eq(intfus.src1_rsel_o)
567 comb += int_src2.ren.eq(intfus.src2_rsel_o)
568
569 # connect ALUs to regfule
570 comb += int_dest.data_i.eq(cu.data_o)
571 comb += cu.src1_i.eq(int_src1.data_o)
572 comb += cu.src2_i.eq(int_src2.data_o)
573
574 # connect ALU Computation Units
575 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
576 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
577 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
578
579 return m
580
581 def __iter__(self):
582 yield from self.intregs
583 yield from self.fpregs
584 yield self.int_dest_i
585 yield self.int_src1_i
586 yield self.int_src2_i
587 yield self.issue_o
588 yield self.branch_succ_i
589 yield self.branch_fail_i
590 yield self.branch_direction_o
591
592 def ports(self):
593 return list(self)
594
595
596 class IssueToScoreboard(Elaboratable):
597
598 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
599 self.qlen = qlen
600 self.n_in = n_in
601 self.n_out = n_out
602 self.rwid = rwid
603 self.opw = opwid
604 self.n_regs = n_regs
605
606 mqbits = (int(log(qlen) / log(2))+2, False)
607 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
608 self.p_ready_o = Signal() # instructions were added
609 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
610
611 self.busy_o = Signal(reset_less=True) # at least one CU is busy
612 self.qlen_o = Signal(mqbits, reset_less=True)
613
614 def elaborate(self, platform):
615 m = Module()
616 comb = m.d.comb
617 sync = m.d.sync
618
619 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
620 sc = Scoreboard(self.rwid, self.n_regs)
621 m.submodules.iq = iq
622 m.submodules.sc = sc
623
624 # get at the regfile for testing
625 self.intregs = sc.intregs
626
627 # and the "busy" signal and instruction queue length
628 comb += self.busy_o.eq(sc.busy_o)
629 comb += self.qlen_o.eq(iq.qlen_o)
630
631 # link up instruction queue
632 comb += iq.p_add_i.eq(self.p_add_i)
633 comb += self.p_ready_o.eq(iq.p_ready_o)
634 for i in range(self.n_in):
635 comb += eq(iq.data_i[i], self.data_i[i])
636
637 # take instruction and process it. note that it's possible to
638 # "inspect" the queue contents *without* actually removing the
639 # items. items are only removed when the
640
641 # in "waiting" state
642 wait_issue_br = Signal()
643 wait_issue_alu = Signal()
644
645 with m.If(wait_issue_br | wait_issue_alu):
646 # set instruction pop length to 1 if the unit accepted
647 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
648 with m.If(iq.qlen_o != 0):
649 comb += iq.n_sub_i.eq(1)
650 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
651 with m.If(iq.qlen_o != 0):
652 comb += iq.n_sub_i.eq(1)
653
654 # see if some instruction(s) are here. note that this is
655 # "inspecting" the in-place queue. note also that on the
656 # cycle following "waiting" for fn_issue_o to be set, the
657 # "resetting" done above (insn_i=0) could be re-ASSERTed.
658 with m.If(iq.qlen_o != 0):
659 # get the operands and operation
660 imm = iq.data_o[0].imm_i
661 dest = iq.data_o[0].dest_i
662 src1 = iq.data_o[0].src1_i
663 src2 = iq.data_o[0].src2_i
664 op = iq.data_o[0].oper_i
665 opi = iq.data_o[0].opim_i # immediate set
666
667 # set the src/dest regs
668 comb += sc.int_dest_i.eq(dest)
669 comb += sc.int_src1_i.eq(src1)
670 comb += sc.int_src2_i.eq(src2)
671 comb += sc.reg_enable_i.eq(1) # enable the regfile
672
673 # choose a Function-Unit-Group
674 with m.If((op & (0x3<<2)) != 0): # branch
675 comb += sc.brissue.insn_i.eq(1)
676 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
677 comb += sc.br_imm_i.eq(imm)
678 comb += wait_issue_br.eq(1)
679 with m.Else(): # alu
680 comb += sc.aluissue.insn_i.eq(1)
681 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
682 comb += sc.alu_imm_i.eq(imm)
683 comb += wait_issue_alu.eq(1)
684
685 # XXX TODO
686 # these indicate that the instruction is to be made
687 # shadow-dependent on
688 # (either) branch success or branch fail
689 #yield sc.branch_fail_i.eq(branch_fail)
690 #yield sc.branch_succ_i.eq(branch_success)
691
692 return m
693
694 def __iter__(self):
695 yield self.p_ready_o
696 for o in self.data_i:
697 yield from list(o)
698 yield self.p_add_i
699
700 def ports(self):
701 return list(self)
702
703
704 IADD = 0
705 ISUB = 1
706 IMUL = 2
707 ISHF = 3
708 IBGT = 4
709 IBLT = 5
710 IBEQ = 6
711 IBNE = 7
712
713 class RegSim:
714 def __init__(self, rwidth, nregs):
715 self.rwidth = rwidth
716 self.regs = [0] * nregs
717
718 def op(self, op, op_imm, imm, src1, src2, dest):
719 maxbits = (1 << self.rwidth) - 1
720 src1 = self.regs[src1] & maxbits
721 if op_imm:
722 src2 = imm
723 else:
724 src2 = self.regs[src2] & maxbits
725 if op == IADD:
726 val = src1 + src2
727 elif op == ISUB:
728 val = src1 - src2
729 elif op == IMUL:
730 val = src1 * src2
731 elif op == ISHF:
732 val = src1 >> (src2 & maxbits)
733 elif op == IBGT:
734 val = int(src1 > src2)
735 elif op == IBLT:
736 val = int(src1 < src2)
737 elif op == IBEQ:
738 val = int(src1 == src2)
739 elif op == IBNE:
740 val = int(src1 != src2)
741 val &= maxbits
742 self.setval(dest, val)
743 return val
744
745 def setval(self, dest, val):
746 print ("sim setval", dest, hex(val))
747 self.regs[dest] = val
748
749 def dump(self, dut):
750 for i, val in enumerate(self.regs):
751 reg = yield dut.intregs.regs[i].reg
752 okstr = "OK" if reg == val else "!ok"
753 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
754
755 def check(self, dut):
756 for i, val in enumerate(self.regs):
757 reg = yield dut.intregs.regs[i].reg
758 if reg != val:
759 print("reg %d expected %x received %x\n" % (i, val, reg))
760 yield from self.dump(dut)
761 assert False
762
763 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
764 branch_success, branch_fail):
765 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
766 'src1_i': src1, 'src2_i': src2}]
767
768 sendlen = 1
769 for idx in range(sendlen):
770 yield from eq(dut.data_i[idx], instrs[idx])
771 di = yield dut.data_i[idx]
772 print ("senddata %d %x" % (idx, di))
773 yield dut.p_add_i.eq(sendlen)
774 yield
775 o_p_ready = yield dut.p_ready_o
776 while not o_p_ready:
777 yield
778 o_p_ready = yield dut.p_ready_o
779
780 yield dut.p_add_i.eq(0)
781
782
783 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
784 yield from disable_issue(dut)
785 yield dut.int_dest_i.eq(dest)
786 yield dut.int_src1_i.eq(src1)
787 yield dut.int_src2_i.eq(src2)
788 if (op & (0x3<<2)) != 0: # branch
789 yield dut.brissue.insn_i.eq(1)
790 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
791 yield dut.br_imm_i.eq(imm)
792 dut_issue = dut.brissue
793 else:
794 yield dut.aluissue.insn_i.eq(1)
795 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
796 yield dut.alu_imm_i.eq(imm)
797 dut_issue = dut.aluissue
798 yield dut.reg_enable_i.eq(1)
799
800 # these indicate that the instruction is to be made shadow-dependent on
801 # (either) branch success or branch fail
802 yield dut.branch_fail_i.eq(branch_fail)
803 yield dut.branch_succ_i.eq(branch_success)
804
805 yield
806 yield from wait_for_issue(dut, dut_issue)
807
808
809 def print_reg(dut, rnums):
810 rs = []
811 for rnum in rnums:
812 reg = yield dut.intregs.regs[rnum].reg
813 rs.append("%x" % reg)
814 rnums = map(str, rnums)
815 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
816
817
818 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
819 insts = []
820 for i in range(n_ops):
821 src1 = randint(1, dut.n_regs-1)
822 src2 = randint(1, dut.n_regs-1)
823 imm = randint(1, (1<<dut.rwid)-1)
824 dest = randint(1, dut.n_regs-1)
825 op = randint(0, max_opnums)
826 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
827
828 if shadowing:
829 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
830 else:
831 insts.append((src1, src2, dest, op, opi, imm))
832 return insts
833
834
835 def wait_for_busy_clear(dut):
836 while True:
837 busy_o = yield dut.busy_o
838 if not busy_o:
839 break
840 print ("busy",)
841 yield
842
843 def disable_issue(dut):
844 yield dut.aluissue.insn_i.eq(0)
845 yield dut.brissue.insn_i.eq(0)
846
847
848 def wait_for_issue(dut, dut_issue):
849 while True:
850 issue_o = yield dut_issue.fn_issue_o
851 if issue_o:
852 yield from disable_issue(dut)
853 yield dut.reg_enable_i.eq(0)
854 break
855 print ("busy",)
856 #yield from print_reg(dut, [1,2,3])
857 yield
858 #yield from print_reg(dut, [1,2,3])
859
860 def scoreboard_branch_sim(dut, alusim):
861
862 iseed = 3
863
864 for i in range(1):
865
866 print ("rseed", iseed)
867 seed(iseed)
868 iseed += 1
869
870 yield dut.branch_direction_o.eq(0)
871
872 # set random values in the registers
873 for i in range(1, dut.n_regs):
874 val = 31+i*3
875 val = randint(0, (1<<alusim.rwidth)-1)
876 yield dut.intregs.regs[i].reg.eq(val)
877 alusim.setval(i, val)
878
879 if False:
880 # create some instructions: branches create a tree
881 insts = create_random_ops(dut, 1, True, 1)
882 #insts.append((6, 6, 1, 2, (0, 0)))
883 #insts.append((4, 3, 3, 0, (0, 0)))
884
885 src1 = randint(1, dut.n_regs-1)
886 src2 = randint(1, dut.n_regs-1)
887 #op = randint(4, 7)
888 op = 4 # only BGT at the moment
889
890 branch_ok = create_random_ops(dut, 1, True, 1)
891 branch_fail = create_random_ops(dut, 1, True, 1)
892
893 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
894
895 if True:
896 insts = []
897 insts.append( (3, 5, 2, 0, (0, 0)) )
898 branch_ok = []
899 branch_fail = []
900 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
901 branch_ok.append( None )
902 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
903 #branch_fail.append( None )
904 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
905
906 siminsts = deepcopy(insts)
907
908 # issue instruction(s)
909 i = -1
910 instrs = insts
911 branch_direction = 0
912 while instrs:
913 yield
914 yield
915 i += 1
916 branch_direction = yield dut.branch_direction_o # way branch went
917 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
918 if branch_direction == 1 and shadow_on:
919 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
920 continue # branch was "success" and this is a "failed"... skip
921 if branch_direction == 2 and shadow_off:
922 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
923 continue # branch was "fail" and this is a "success"... skip
924 if branch_direction != 0:
925 shadow_on = 0
926 shadow_off = 0
927 is_branch = op >= 4
928 if is_branch:
929 branch_ok, branch_fail = dest
930 dest = src2
931 # ok zip up the branch success / fail instructions and
932 # drop them into the queue, one marked "to have branch success"
933 # the other to be marked shadow branch "fail".
934 # one out of each of these will be cancelled
935 for ok, fl in zip(branch_ok, branch_fail):
936 if ok:
937 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
938 if fl:
939 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
940 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
941 (i, src1, src2, dest, op, shadow_on, shadow_off))
942 yield from int_instr(dut, op, src1, src2, dest,
943 shadow_on, shadow_off)
944
945 # wait for all instructions to stop before checking
946 yield
947 yield from wait_for_busy_clear(dut)
948
949 i = -1
950 while siminsts:
951 instr = siminsts.pop(0)
952 if instr is None:
953 continue
954 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
955 i += 1
956 is_branch = op >= 4
957 if is_branch:
958 branch_ok, branch_fail = dest
959 dest = src2
960 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
961 (i, src1, src2, dest, op, shadow_on, shadow_off))
962 branch_res = alusim.op(op, src1, src2, dest)
963 if is_branch:
964 if branch_res:
965 siminsts += branch_ok
966 else:
967 siminsts += branch_fail
968
969 # check status
970 yield from alusim.check(dut)
971 yield from alusim.dump(dut)
972
973
974 def scoreboard_sim(dut, alusim):
975
976 seed(0)
977
978 for i in range(50):
979
980 # set random values in the registers
981 for i in range(1, dut.n_regs):
982 val = randint(0, (1<<alusim.rwidth)-1)
983 #val = 31+i*3
984 #val = i
985 yield dut.intregs.regs[i].reg.eq(val)
986 alusim.setval(i, val)
987
988 # create some instructions (some random, some regression tests)
989 instrs = []
990 if True:
991 instrs = create_random_ops(dut, 15, True, 4)
992
993 if False:
994 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
995
996 if False:
997 instrs.append( (7, 3, 2, 4, (0, 0)) )
998 instrs.append( (7, 6, 6, 2, (0, 0)) )
999 instrs.append( (1, 7, 2, 2, (0, 0)) )
1000
1001 if False:
1002 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1003 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1004 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1005 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1006 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1007
1008 if False:
1009 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1010 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1011 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1012
1013 if False:
1014 instrs.append((5, 6, 2, 1))
1015 instrs.append((2, 2, 4, 0))
1016 #instrs.append((2, 2, 3, 1))
1017
1018 if False:
1019 instrs.append((2, 1, 2, 3))
1020
1021 if False:
1022 instrs.append((2, 6, 2, 1))
1023 instrs.append((2, 1, 2, 0))
1024
1025 if False:
1026 instrs.append((1, 2, 7, 2))
1027 instrs.append((7, 1, 5, 0))
1028 instrs.append((4, 4, 1, 1))
1029
1030 if False:
1031 instrs.append((5, 6, 2, 2))
1032 instrs.append((1, 1, 4, 1))
1033 instrs.append((6, 5, 3, 0))
1034
1035 if False:
1036 # Write-after-Write Hazard
1037 instrs.append( (3, 6, 7, 2) )
1038 instrs.append( (4, 4, 7, 1) )
1039
1040 if False:
1041 # self-read/write-after-write followed by Read-after-Write
1042 instrs.append((1, 1, 1, 1))
1043 instrs.append((1, 5, 3, 0))
1044
1045 if False:
1046 # Read-after-Write followed by self-read-after-write
1047 instrs.append((5, 6, 1, 2))
1048 instrs.append((1, 1, 1, 1))
1049
1050 if False:
1051 # self-read-write sandwich
1052 instrs.append((5, 6, 1, 2))
1053 instrs.append((1, 1, 1, 1))
1054 instrs.append((1, 5, 3, 0))
1055
1056 if False:
1057 # very weird failure
1058 instrs.append( (5, 2, 5, 2) )
1059 instrs.append( (2, 6, 3, 0) )
1060 instrs.append( (4, 2, 2, 1) )
1061
1062 if False:
1063 v1 = 4
1064 yield dut.intregs.regs[5].reg.eq(v1)
1065 alusim.setval(5, v1)
1066 yield dut.intregs.regs[3].reg.eq(5)
1067 alusim.setval(3, 5)
1068 instrs.append((5, 3, 3, 4, (0, 0)))
1069 instrs.append((4, 2, 1, 2, (0, 1)))
1070
1071 if False:
1072 v1 = 6
1073 yield dut.intregs.regs[5].reg.eq(v1)
1074 alusim.setval(5, v1)
1075 yield dut.intregs.regs[3].reg.eq(5)
1076 alusim.setval(3, 5)
1077 instrs.append((5, 3, 3, 4, (0, 0)))
1078 instrs.append((4, 2, 1, 2, (1, 0)))
1079
1080 if False:
1081 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1082 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1083 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1084 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1085 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1086 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1087 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1088 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1089 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1090
1091 # issue instruction(s), wait for issue to be free before proceeding
1092 for i, instr in enumerate(instrs):
1093 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1094
1095 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1096 (i, src1, src2, dest, op, opi, imm))
1097 alusim.op(op, opi, imm, src1, src2, dest)
1098 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1099 br_ok, br_fail)
1100
1101 # wait for all instructions to stop before checking
1102 while True:
1103 iqlen = yield dut.qlen_o
1104 if iqlen == 0:
1105 break
1106 yield
1107 yield
1108 yield
1109 yield
1110 yield
1111 yield from wait_for_busy_clear(dut)
1112
1113 # check status
1114 yield from alusim.check(dut)
1115 yield from alusim.dump(dut)
1116
1117
1118 def test_scoreboard():
1119 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1120 alusim = RegSim(16, 8)
1121 memsim = MemSim(16, 16)
1122 vl = rtlil.convert(dut, ports=dut.ports())
1123 with open("test_scoreboard6600.il", "w") as f:
1124 f.write(vl)
1125
1126 run_simulation(dut, scoreboard_sim(dut, alusim),
1127 vcd_name='test_scoreboard6600.vcd')
1128
1129 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1130 # vcd_name='test_scoreboard6600.vcd')
1131
1132
1133 if __name__ == '__main__':
1134 test_scoreboard()