602be44ed86e316b5654ce55596c64d08081088b
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13
14 from compalu import ComputationUnitNoDelay
15
16 from alu_hier import ALU, BranchALU
17 from nmutil.latch import SRLatch
18 from nmutil.nmoperator import eq
19
20 from random import randint, seed
21 from copy import deepcopy
22 from math import log
23
24
25 class Memory(Elaboratable):
26 def __init__(self, regwid, addrw):
27 self.ddepth = regwid/8
28 depth = (1<<addrw) / self.ddepth
29 self.adr = Signal(addrw)
30 self.dat_r = Signal(regwid)
31 self.dat_w = Signal(regwid)
32 self.we = Signal()
33 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
34
35 def elaborate(self, platform):
36 m = Module()
37 m.submodules.rdport = rdport = self.mem.read_port()
38 m.submodules.wrport = wrport = self.mem.write_port()
39 m.d.comb += [
40 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
41 self.dat_r.eq(rdport.data),
42 wrport.addr.eq(self.adr),
43 wrport.data.eq(self.dat_w),
44 wrport.en.eq(self.we),
45 ]
46 return m
47
48
49 class MemSim:
50 def __init__(self, regwid, addrw):
51 self.regwid = regwid
52 self.ddepth = regwid//8
53 depth = (1<<addrw) // self.ddepth
54 self.mem = list(range(0, depth))
55
56 def ld(self, addr):
57 return self.mem[addr>>self.ddepth]
58
59 def st(self, addr, data):
60 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
61
62
63 class CompUnitsBase(Elaboratable):
64 """ Computation Unit Base class.
65
66 Amazingly, this class works recursively. It's supposed to just
67 look after some ALUs (that can handle the same operations),
68 grouping them together, however it turns out that the same code
69 can also group *groups* of Computation Units together as well.
70
71 Basically it was intended just to concatenate the ALU's issue,
72 go_rd etc. signals together, which start out as bits and become
73 sequences. Turns out that the same trick works just as well
74 on Computation Units!
75
76 So this class may be used recursively to present a top-level
77 sequential concatenation of all the signals in and out of
78 ALUs, whilst at the same time making it convenient to group
79 ALUs together.
80
81 At the lower level, the intent is that groups of (identical)
82 ALUs may be passed the same operation. Even beyond that,
83 the intent is that that group of (identical) ALUs actually
84 share the *same pipeline* and as such become a "Concurrent
85 Computation Unit" as defined by Mitch Alsup (see section
86 11.4.9.3)
87 """
88 def __init__(self, rwid, units):
89 """ Inputs:
90
91 * :rwid: bit width of register file(s) - both FP and INT
92 * :units: sequence of ALUs (or CompUnitsBase derivatives)
93 """
94 self.units = units
95 self.rwid = rwid
96 self.rwid = rwid
97 if units and isinstance(units[0], CompUnitsBase):
98 self.n_units = 0
99 for u in self.units:
100 self.n_units += u.n_units
101 else:
102 self.n_units = len(units)
103
104 n_units = self.n_units
105
106 # inputs
107 self.issue_i = Signal(n_units, reset_less=True)
108 self.go_rd_i = Signal(n_units, reset_less=True)
109 self.go_wr_i = Signal(n_units, reset_less=True)
110 self.shadown_i = Signal(n_units, reset_less=True)
111 self.go_die_i = Signal(n_units, reset_less=True)
112
113 # outputs
114 self.busy_o = Signal(n_units, reset_less=True)
115 self.rd_rel_o = Signal(n_units, reset_less=True)
116 self.req_rel_o = Signal(n_units, reset_less=True)
117
118 # in/out register data (note: not register#, actual data)
119 self.data_o = Signal(rwid, reset_less=True)
120 self.src1_i = Signal(rwid, reset_less=True)
121 self.src2_i = Signal(rwid, reset_less=True)
122 # input operand
123
124 def elaborate(self, platform):
125 m = Module()
126 comb = m.d.comb
127
128 for i, alu in enumerate(self.units):
129 setattr(m.submodules, "comp%d" % i, alu)
130
131 go_rd_l = []
132 go_wr_l = []
133 issue_l = []
134 busy_l = []
135 req_rel_l = []
136 rd_rel_l = []
137 shadow_l = []
138 godie_l = []
139 for alu in self.units:
140 req_rel_l.append(alu.req_rel_o)
141 rd_rel_l.append(alu.rd_rel_o)
142 shadow_l.append(alu.shadown_i)
143 godie_l.append(alu.go_die_i)
144 go_wr_l.append(alu.go_wr_i)
145 go_rd_l.append(alu.go_rd_i)
146 issue_l.append(alu.issue_i)
147 busy_l.append(alu.busy_o)
148 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
149 comb += self.req_rel_o.eq(Cat(*req_rel_l))
150 comb += self.busy_o.eq(Cat(*busy_l))
151 comb += Cat(*godie_l).eq(self.go_die_i)
152 comb += Cat(*shadow_l).eq(self.shadown_i)
153 comb += Cat(*go_wr_l).eq(self.go_wr_i)
154 comb += Cat(*go_rd_l).eq(self.go_rd_i)
155 comb += Cat(*issue_l).eq(self.issue_i)
156
157 # connect data register input/output
158
159 # merge (OR) all integer FU / ALU outputs to a single value
160 # bit of a hack: treereduce needs a list with an item named "data_o"
161 if self.units:
162 data_o = treereduce(self.units)
163 comb += self.data_o.eq(data_o)
164
165 for i, alu in enumerate(self.units):
166 comb += alu.src1_i.eq(self.src1_i)
167 comb += alu.src2_i.eq(self.src2_i)
168
169 return m
170
171
172 class CompUnitALUs(CompUnitsBase):
173
174 def __init__(self, rwid, opwid):
175 """ Inputs:
176
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :opwid: operand bit width
179 """
180 self.opwid = opwid
181
182 # inputs
183 self.oper_i = Signal(opwid, reset_less=True)
184
185 # Int ALUs
186 add = ALU(rwid)
187 sub = ALU(rwid)
188 mul = ALU(rwid)
189 shf = ALU(rwid)
190
191 units = []
192 for alu in [add, sub, mul, shf]:
193 units.append(ComputationUnitNoDelay(rwid, 2, alu))
194
195 CompUnitsBase.__init__(self, rwid, units)
196
197 def elaborate(self, platform):
198 m = CompUnitsBase.elaborate(self, platform)
199 comb = m.d.comb
200
201 # hand the same operation to all units
202 for alu in self.units:
203 comb += alu.oper_i.eq(self.oper_i)
204 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
205 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
206 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
207 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
208
209 return m
210
211
212 class CompUnitBR(CompUnitsBase):
213
214 def __init__(self, rwid, opwid):
215 """ Inputs:
216
217 * :rwid: bit width of register file(s) - both FP and INT
218 * :opwid: operand bit width
219
220 Note: bgt unit is returned so that a shadow unit can be created
221 for it
222 """
223 self.opwid = opwid
224
225 # inputs
226 self.oper_i = Signal(opwid, reset_less=True)
227
228 # Branch ALU and CU
229 self.bgt = BranchALU(rwid)
230 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
231 CompUnitsBase.__init__(self, rwid, [self.br1])
232
233 def elaborate(self, platform):
234 m = CompUnitsBase.elaborate(self, platform)
235 comb = m.d.comb
236
237 # hand the same operation to all units
238 for alu in self.units:
239 comb += alu.oper_i.eq(self.oper_i)
240 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
241
242 return m
243
244
245 class FunctionUnits(Elaboratable):
246
247 def __init__(self, n_regs, n_int_alus):
248 self.n_regs = n_regs
249 self.n_int_alus = n_int_alus
250
251 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
252 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
253 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
254
255 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
256 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
257
258 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
259 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
260 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
261
262 self.req_rel_i = Signal(n_int_alus, reset_less = True)
263 self.readable_o = Signal(n_int_alus, reset_less=True)
264 self.writable_o = Signal(n_int_alus, reset_less=True)
265
266 self.go_rd_i = Signal(n_int_alus, reset_less=True)
267 self.go_wr_i = Signal(n_int_alus, reset_less=True)
268 self.go_die_i = Signal(n_int_alus, reset_less=True)
269 self.req_rel_o = Signal(n_int_alus, reset_less=True)
270 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
271
272 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
273
274 def elaborate(self, platform):
275 m = Module()
276 comb = m.d.comb
277 sync = m.d.sync
278
279 n_intfus = self.n_int_alus
280
281 # Integer FU-FU Dep Matrix
282 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
283 m.submodules.intfudeps = intfudeps
284 # Integer FU-Reg Dep Matrix
285 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
286 m.submodules.intregdeps = intregdeps
287
288 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
289 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
290
291 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
292 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
293
294 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
295 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
296 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
297
298 comb += intfudeps.issue_i.eq(self.fn_issue_i)
299 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
300 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
301 comb += intfudeps.go_die_i.eq(self.go_die_i)
302 comb += self.readable_o.eq(intfudeps.readable_o)
303 comb += self.writable_o.eq(intfudeps.writable_o)
304
305 # Connect function issue / arrays, and dest/src1/src2
306 comb += intregdeps.dest_i.eq(self.dest_i)
307 comb += intregdeps.src1_i.eq(self.src1_i)
308 comb += intregdeps.src2_i.eq(self.src2_i)
309
310 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
311 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
312 comb += intregdeps.go_die_i.eq(self.go_die_i)
313 comb += intregdeps.issue_i.eq(self.fn_issue_i)
314
315 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
316 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
317 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
318
319 return m
320
321
322 class Scoreboard(Elaboratable):
323 def __init__(self, rwid, n_regs):
324 """ Inputs:
325
326 * :rwid: bit width of register file(s) - both FP and INT
327 * :n_regs: depth of register file(s) - number of FP and INT regs
328 """
329 self.rwid = rwid
330 self.n_regs = n_regs
331
332 # Register Files
333 self.intregs = RegFileArray(rwid, n_regs)
334 self.fpregs = RegFileArray(rwid, n_regs)
335
336 # issue q needs to get at these
337 self.aluissue = IssueUnitGroup(4)
338 self.brissue = IssueUnitGroup(1)
339 # and these
340 self.alu_oper_i = Signal(4, reset_less=True)
341 self.br_oper_i = Signal(4, reset_less=True)
342
343 # inputs
344 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
345 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
346 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
347 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
348
349 # outputs
350 self.issue_o = Signal(reset_less=True) # instruction was accepted
351 self.busy_o = Signal(reset_less=True) # at least one CU is busy
352
353 # for branch speculation experiment. branch_direction = 0 if
354 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
355 # branch_succ and branch_fail are requests to have the current
356 # instruction be dependent on the branch unit "shadow" capability.
357 self.branch_succ_i = Signal(reset_less=True)
358 self.branch_fail_i = Signal(reset_less=True)
359 self.branch_direction_o = Signal(2, reset_less=True)
360
361 def elaborate(self, platform):
362 m = Module()
363 comb = m.d.comb
364 sync = m.d.sync
365
366 m.submodules.intregs = self.intregs
367 m.submodules.fpregs = self.fpregs
368
369 # register ports
370 int_dest = self.intregs.write_port("dest")
371 int_src1 = self.intregs.read_port("src1")
372 int_src2 = self.intregs.read_port("src2")
373
374 fp_dest = self.fpregs.write_port("dest")
375 fp_src1 = self.fpregs.read_port("src1")
376 fp_src2 = self.fpregs.read_port("src2")
377
378 # Int ALUs and Comp Units
379 n_int_alus = 5
380 cua = CompUnitALUs(self.rwid, 2)
381 cub = CompUnitBR(self.rwid, 2)
382 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
383 bgt = cub.bgt # get at the branch computation unit
384 br1 = cub.br1
385
386 # Int FUs
387 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
388
389 # Count of number of FUs
390 n_intfus = n_int_alus
391 n_fp_fus = 0 # for now
392
393 # Integer Priority Picker 1: Adder + Subtractor
394 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
395 m.submodules.intpick1 = intpick1
396
397 # INT/FP Issue Unit
398 regdecode = RegDecode(self.n_regs)
399 m.submodules.regdecode = regdecode
400 issueunit = IssueUnitArray([self.aluissue, self.brissue])
401 m.submodules.issueunit = issueunit
402
403 # Shadow Matrix. currently n_intfus shadows, to be used for
404 # write-after-write hazards. NOTE: there is one extra for branches,
405 # so the shadow width is increased by 1
406 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
407 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
408
409 # record previous instruction to cast shadow on current instruction
410 prev_shadow = Signal(n_intfus)
411
412 # Branch Speculation recorder. tracks the success/fail state as
413 # each instruction is issued, so that when the branch occurs the
414 # allow/cancel can be issued as appropriate.
415 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
416
417 #---------
418 # ok start wiring things together...
419 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
420 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
421 #---------
422
423 #---------
424 # Issue Unit is where it starts. set up some in/outs for this module
425 #---------
426 comb += [ regdecode.dest_i.eq(self.int_dest_i),
427 regdecode.src1_i.eq(self.int_src1_i),
428 regdecode.src2_i.eq(self.int_src2_i),
429 regdecode.enable_i.eq(self.reg_enable_i),
430 self.issue_o.eq(issueunit.issue_o)
431 ]
432
433 # take these to outside (issue needs them)
434 comb += cua.oper_i.eq(self.alu_oper_i)
435 comb += cub.oper_i.eq(self.br_oper_i)
436
437 # TODO: issueunit.f (FP)
438
439 # and int function issue / busy arrays, and dest/src1/src2
440 comb += intfus.dest_i.eq(regdecode.dest_o)
441 comb += intfus.src1_i.eq(regdecode.src1_o)
442 comb += intfus.src2_i.eq(regdecode.src2_o)
443
444 fn_issue_o = issueunit.fn_issue_o
445
446 comb += intfus.fn_issue_i.eq(fn_issue_o)
447 comb += issueunit.busy_i.eq(cu.busy_o)
448 comb += self.busy_o.eq(cu.busy_o.bool())
449
450 #---------
451 # merge shadow matrices outputs
452 #---------
453
454 # these are explained in ShadowMatrix docstring, and are to be
455 # connected to the FUReg and FUFU Matrices, to get them to reset
456 anydie = Signal(n_intfus, reset_less=True)
457 allshadown = Signal(n_intfus, reset_less=True)
458 shreset = Signal(n_intfus, reset_less=True)
459 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
460 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
461 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
462
463 #---------
464 # connect fu-fu matrix
465 #---------
466
467 # Group Picker... done manually for now.
468 go_rd_o = intpick1.go_rd_o
469 go_wr_o = intpick1.go_wr_o
470 go_rd_i = intfus.go_rd_i
471 go_wr_i = intfus.go_wr_i
472 go_die_i = intfus.go_die_i
473 # NOTE: connect to the shadowed versions so that they can "die" (reset)
474 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
475 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
476 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
477
478 # Connect Picker
479 #---------
480 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
481 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
482 int_rd_o = intfus.readable_o
483 int_wr_o = intfus.writable_o
484 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
485 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
486
487 #---------
488 # Shadow Matrix
489 #---------
490
491 comb += shadows.issue_i.eq(fn_issue_o)
492 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
493 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
494 #---------
495 # NOTE; this setup is for the instruction order preservation...
496
497 # connect shadows / go_dies to Computation Units
498 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
499 comb += cu.go_die_i[0:n_intfus].eq(anydie)
500
501 # ok connect first n_int_fu shadows to busy lines, to create an
502 # instruction-order linked-list-like arrangement, using a bit-matrix
503 # (instead of e.g. a ring buffer).
504 # XXX TODO
505
506 # when written, the shadow can be cancelled (and was good)
507 for i in range(n_intfus):
508 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
509
510 # *previous* instruction shadows *current* instruction, and, obviously,
511 # if the previous is completed (!busy) don't cast the shadow!
512 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
513 for i in range(n_intfus):
514 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
515
516 #---------
517 # ... and this is for branch speculation. it uses the extra bit
518 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
519 # only needs to set shadow_i, s_fail_i and s_good_i
520
521 # issue captures shadow_i (if enabled)
522 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
523
524 bactive = Signal(reset_less=True)
525 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
526
527 # instruction being issued (fn_issue_o) has a shadow cast by the branch
528 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
529 comb += bshadow.issue_i.eq(fn_issue_o)
530 for i in range(n_intfus):
531 with m.If(fn_issue_o & (Const(1<<i))):
532 comb += bshadow.shadow_i[i][0].eq(1)
533
534 # finally, we need an indicator to the test infrastructure as to
535 # whether the branch succeeded or failed, plus, link up to the
536 # "recorder" of whether the instruction was under shadow or not
537
538 with m.If(br1.issue_i):
539 sync += bspec.active_i.eq(1)
540 with m.If(self.branch_succ_i):
541 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
542 with m.If(self.branch_fail_i):
543 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
544
545 # branch is active (TODO: a better signal: this is over-using the
546 # go_write signal - actually the branch should not be "writing")
547 with m.If(br1.go_wr_i):
548 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
549 sync += bspec.active_i.eq(0)
550 comb += bspec.br_i.eq(1)
551 # branch occurs if data == 1, failed if data == 0
552 comb += bspec.br_ok_i.eq(br1.data_o == 1)
553 for i in range(n_intfus):
554 # *expected* direction of the branch matched against *actual*
555 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
556 # ... or it didn't
557 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
558
559 #---------
560 # Connect Register File(s)
561 #---------
562 comb += int_dest.wen.eq(intfus.dest_rsel_o)
563 comb += int_src1.ren.eq(intfus.src1_rsel_o)
564 comb += int_src2.ren.eq(intfus.src2_rsel_o)
565
566 # connect ALUs to regfule
567 comb += int_dest.data_i.eq(cu.data_o)
568 comb += cu.src1_i.eq(int_src1.data_o)
569 comb += cu.src2_i.eq(int_src2.data_o)
570
571 # connect ALU Computation Units
572 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
573 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
574 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
575
576 return m
577
578 def __iter__(self):
579 yield from self.intregs
580 yield from self.fpregs
581 yield self.int_dest_i
582 yield self.int_src1_i
583 yield self.int_src2_i
584 yield self.issue_o
585 yield self.branch_succ_i
586 yield self.branch_fail_i
587 yield self.branch_direction_o
588
589 def ports(self):
590 return list(self)
591
592
593 class IssueToScoreboard(Elaboratable):
594
595 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
596 self.qlen = qlen
597 self.n_in = n_in
598 self.n_out = n_out
599 self.rwid = rwid
600 self.opw = opwid
601 self.n_regs = n_regs
602
603 mqbits = (int(log(qlen) / log(2))+2, False)
604 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
605 self.p_ready_o = Signal() # instructions were added
606 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
607
608 self.busy_o = Signal(reset_less=True) # at least one CU is busy
609 self.qlen_o = Signal(mqbits, reset_less=True)
610
611 def elaborate(self, platform):
612 m = Module()
613 comb = m.d.comb
614 sync = m.d.sync
615
616 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
617 sc = Scoreboard(self.rwid, self.n_regs)
618 m.submodules.iq = iq
619 m.submodules.sc = sc
620
621 # get at the regfile for testing
622 self.intregs = sc.intregs
623
624 # and the "busy" signal and instruction queue length
625 comb += self.busy_o.eq(sc.busy_o)
626 comb += self.qlen_o.eq(iq.qlen_o)
627
628 # link up instruction queue
629 comb += iq.p_add_i.eq(self.p_add_i)
630 comb += self.p_ready_o.eq(iq.p_ready_o)
631 for i in range(self.n_in):
632 comb += eq(iq.data_i[i], self.data_i[i])
633
634 # take instruction and process it. note that it's possible to
635 # "inspect" the queue contents *without* actually removing the
636 # items. items are only removed when the
637
638 # in "waiting" state
639 wait_issue_br = Signal()
640 wait_issue_alu = Signal()
641
642 with m.If(wait_issue_br | wait_issue_alu):
643 # set instruction pop length to 1 if the unit accepted
644 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
645 with m.If(iq.qlen_o != 0):
646 comb += iq.n_sub_i.eq(1)
647 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
648 with m.If(iq.qlen_o != 0):
649 comb += iq.n_sub_i.eq(1)
650
651 # see if some instruction(s) are here. note that this is
652 # "inspecting" the in-place queue. note also that on the
653 # cycle following "waiting" for fn_issue_o to be set, the
654 # "resetting" done above (insn_i=0) could be re-ASSERTed.
655 with m.If(iq.qlen_o != 0):
656 # get the operands and operation
657 dest = iq.data_o[0].dest_i
658 src1 = iq.data_o[0].src1_i
659 src2 = iq.data_o[0].src2_i
660 op = iq.data_o[0].oper_i
661
662 # set the src/dest regs
663 comb += sc.int_dest_i.eq(dest)
664 comb += sc.int_src1_i.eq(src1)
665 comb += sc.int_src2_i.eq(src2)
666 comb += sc.reg_enable_i.eq(1) # enable the regfile
667
668 # choose a Function-Unit-Group
669 with m.If((op & (0x3<<2)) != 0): # branch
670 comb += sc.brissue.insn_i.eq(1)
671 comb += sc.br_oper_i.eq(op & 0x3)
672 comb += wait_issue_br.eq(1)
673 with m.Else(): # alu
674 comb += sc.aluissue.insn_i.eq(1)
675 comb += sc.alu_oper_i.eq(op & 0x3)
676 comb += wait_issue_alu.eq(1)
677
678 # XXX TODO
679 # these indicate that the instruction is to be made
680 # shadow-dependent on
681 # (either) branch success or branch fail
682 #yield sc.branch_fail_i.eq(branch_fail)
683 #yield sc.branch_succ_i.eq(branch_success)
684
685 return m
686
687 def __iter__(self):
688 yield self.p_ready_o
689 for o in self.data_i:
690 yield from list(o)
691 yield self.p_add_i
692
693 def ports(self):
694 return list(self)
695
696
697 IADD = 0
698 ISUB = 1
699 IMUL = 2
700 ISHF = 3
701 IBGT = 4
702 IBLT = 5
703 IBEQ = 6
704 IBNE = 7
705
706 class RegSim:
707 def __init__(self, rwidth, nregs):
708 self.rwidth = rwidth
709 self.regs = [0] * nregs
710
711 def op(self, op, op_imm, src1, src2, dest):
712 maxbits = (1 << self.rwidth) - 1
713 src1 = self.regs[src1] & maxbits
714 if not op_imm: # put op in src2
715 src2 = self.regs[src2] & maxbits
716 if op == IADD:
717 val = src1 + src2
718 elif op == ISUB:
719 val = src1 - src2
720 elif op == IMUL:
721 val = src1 * src2
722 elif op == ISHF:
723 val = src1 >> (src2 & maxbits)
724 elif op == IBGT:
725 val = int(src1 > src2)
726 elif op == IBLT:
727 val = int(src1 < src2)
728 elif op == IBEQ:
729 val = int(src1 == src2)
730 elif op == IBNE:
731 val = int(src1 != src2)
732 val &= maxbits
733 self.setval(dest, val)
734 return val
735
736 def setval(self, dest, val):
737 print ("sim setval", dest, hex(val))
738 self.regs[dest] = val
739
740 def dump(self, dut):
741 for i, val in enumerate(self.regs):
742 reg = yield dut.intregs.regs[i].reg
743 okstr = "OK" if reg == val else "!ok"
744 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
745
746 def check(self, dut):
747 for i, val in enumerate(self.regs):
748 reg = yield dut.intregs.regs[i].reg
749 if reg != val:
750 print("reg %d expected %x received %x\n" % (i, val, reg))
751 yield from self.dump(dut)
752 assert False
753
754 def instr_q(dut, op, op_imm, src1, src2, dest, branch_success, branch_fail):
755 instrs = [{'oper_i': op, 'dest_i': dest, 'opim_i': op_imm,
756 'src1_i': src1, 'src2_i': src2}]
757
758 sendlen = 1
759 for idx in range(sendlen):
760 yield from eq(dut.data_i[idx], instrs[idx])
761 di = yield dut.data_i[idx]
762 print ("senddata %d %x" % (idx, di))
763 yield dut.p_add_i.eq(sendlen)
764 yield
765 o_p_ready = yield dut.p_ready_o
766 while not o_p_ready:
767 yield
768 o_p_ready = yield dut.p_ready_o
769
770 yield dut.p_add_i.eq(0)
771
772
773 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
774 yield from disable_issue(dut)
775 yield dut.int_dest_i.eq(dest)
776 yield dut.int_src1_i.eq(src1)
777 yield dut.int_src2_i.eq(src2)
778 if (op & (0x3<<2)) != 0: # branch
779 yield dut.brissue.insn_i.eq(1)
780 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
781 dut_issue = dut.brissue
782 else:
783 yield dut.aluissue.insn_i.eq(1)
784 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
785 dut_issue = dut.aluissue
786 yield dut.reg_enable_i.eq(1)
787
788 # these indicate that the instruction is to be made shadow-dependent on
789 # (either) branch success or branch fail
790 yield dut.branch_fail_i.eq(branch_fail)
791 yield dut.branch_succ_i.eq(branch_success)
792
793 yield
794 yield from wait_for_issue(dut, dut_issue)
795
796
797 def print_reg(dut, rnums):
798 rs = []
799 for rnum in rnums:
800 reg = yield dut.intregs.regs[rnum].reg
801 rs.append("%x" % reg)
802 rnums = map(str, rnums)
803 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
804
805
806 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
807 insts = []
808 for i in range(n_ops):
809 src1 = randint(1, dut.n_regs-1)
810 src2 = randint(1, dut.n_regs-1)
811 dest = randint(1, dut.n_regs-1)
812 op = randint(0, max_opnums)
813 opi = 0 # if randint(0, 3) else 1 # set true if random is nonzero
814
815 if shadowing:
816 insts.append((src1, src2, dest, op, opi, (0, 0)))
817 else:
818 insts.append((src1, src2, dest, op, opi))
819 return insts
820
821
822 def wait_for_busy_clear(dut):
823 while True:
824 busy_o = yield dut.busy_o
825 if not busy_o:
826 break
827 print ("busy",)
828 yield
829
830 def disable_issue(dut):
831 yield dut.aluissue.insn_i.eq(0)
832 yield dut.brissue.insn_i.eq(0)
833
834
835 def wait_for_issue(dut, dut_issue):
836 while True:
837 issue_o = yield dut_issue.fn_issue_o
838 if issue_o:
839 yield from disable_issue(dut)
840 yield dut.reg_enable_i.eq(0)
841 break
842 print ("busy",)
843 #yield from print_reg(dut, [1,2,3])
844 yield
845 #yield from print_reg(dut, [1,2,3])
846
847 def scoreboard_branch_sim(dut, alusim):
848
849 iseed = 3
850
851 for i in range(1):
852
853 print ("rseed", iseed)
854 seed(iseed)
855 iseed += 1
856
857 yield dut.branch_direction_o.eq(0)
858
859 # set random values in the registers
860 for i in range(1, dut.n_regs):
861 val = 31+i*3
862 val = randint(0, (1<<alusim.rwidth)-1)
863 yield dut.intregs.regs[i].reg.eq(val)
864 alusim.setval(i, val)
865
866 if False:
867 # create some instructions: branches create a tree
868 insts = create_random_ops(dut, 1, True, 1)
869 #insts.append((6, 6, 1, 2, (0, 0)))
870 #insts.append((4, 3, 3, 0, (0, 0)))
871
872 src1 = randint(1, dut.n_regs-1)
873 src2 = randint(1, dut.n_regs-1)
874 #op = randint(4, 7)
875 op = 4 # only BGT at the moment
876
877 branch_ok = create_random_ops(dut, 1, True, 1)
878 branch_fail = create_random_ops(dut, 1, True, 1)
879
880 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
881
882 if True:
883 insts = []
884 insts.append( (3, 5, 2, 0, (0, 0)) )
885 branch_ok = []
886 branch_fail = []
887 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
888 branch_ok.append( None )
889 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
890 #branch_fail.append( None )
891 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
892
893 siminsts = deepcopy(insts)
894
895 # issue instruction(s)
896 i = -1
897 instrs = insts
898 branch_direction = 0
899 while instrs:
900 yield
901 yield
902 i += 1
903 branch_direction = yield dut.branch_direction_o # way branch went
904 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
905 if branch_direction == 1 and shadow_on:
906 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
907 continue # branch was "success" and this is a "failed"... skip
908 if branch_direction == 2 and shadow_off:
909 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
910 continue # branch was "fail" and this is a "success"... skip
911 if branch_direction != 0:
912 shadow_on = 0
913 shadow_off = 0
914 is_branch = op >= 4
915 if is_branch:
916 branch_ok, branch_fail = dest
917 dest = src2
918 # ok zip up the branch success / fail instructions and
919 # drop them into the queue, one marked "to have branch success"
920 # the other to be marked shadow branch "fail".
921 # one out of each of these will be cancelled
922 for ok, fl in zip(branch_ok, branch_fail):
923 if ok:
924 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
925 if fl:
926 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
927 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
928 (i, src1, src2, dest, op, shadow_on, shadow_off))
929 yield from int_instr(dut, op, src1, src2, dest,
930 shadow_on, shadow_off)
931
932 # wait for all instructions to stop before checking
933 yield
934 yield from wait_for_busy_clear(dut)
935
936 i = -1
937 while siminsts:
938 instr = siminsts.pop(0)
939 if instr is None:
940 continue
941 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
942 i += 1
943 is_branch = op >= 4
944 if is_branch:
945 branch_ok, branch_fail = dest
946 dest = src2
947 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
948 (i, src1, src2, dest, op, shadow_on, shadow_off))
949 branch_res = alusim.op(op, src1, src2, dest)
950 if is_branch:
951 if branch_res:
952 siminsts += branch_ok
953 else:
954 siminsts += branch_fail
955
956 # check status
957 yield from alusim.check(dut)
958 yield from alusim.dump(dut)
959
960
961 def scoreboard_sim(dut, alusim):
962
963 #seed(2)
964
965 for i in range(1):
966
967 # set random values in the registers
968 for i in range(1, dut.n_regs):
969 val = randint(0, (1<<alusim.rwidth)-1)
970 #val = 31+i*3
971 #val = i
972 yield dut.intregs.regs[i].reg.eq(val)
973 alusim.setval(i, val)
974
975 # create some instructions (some random, some regression tests)
976 instrs = []
977 if True:
978 instrs = create_random_ops(dut, 15, True, 3)
979
980 if False:
981 instrs.append( (7, 3, 2, 4, (0, 0)) )
982 instrs.append( (7, 6, 6, 2, (0, 0)) )
983 instrs.append( (1, 7, 2, 2, (0, 0)) )
984
985
986 if False:
987 instrs.append((2, 3, 3, 0, (0, 0)))
988 instrs.append((5, 3, 3, 1, (0, 0)))
989 instrs.append((3, 5, 5, 2, (0, 0)))
990 instrs.append((5, 3, 3, 3, (0, 0)))
991 instrs.append((3, 5, 5, 0, (0, 0)))
992
993 if False:
994 instrs.append((5, 6, 2, 1))
995 instrs.append((2, 2, 4, 0))
996 #instrs.append((2, 2, 3, 1))
997
998 if False:
999 instrs.append((2, 1, 2, 3))
1000
1001 if False:
1002 instrs.append((2, 6, 2, 1))
1003 instrs.append((2, 1, 2, 0))
1004
1005 if False:
1006 instrs.append((1, 2, 7, 2))
1007 instrs.append((7, 1, 5, 0))
1008 instrs.append((4, 4, 1, 1))
1009
1010 if False:
1011 instrs.append((5, 6, 2, 2))
1012 instrs.append((1, 1, 4, 1))
1013 instrs.append((6, 5, 3, 0))
1014
1015 if False:
1016 # Write-after-Write Hazard
1017 instrs.append( (3, 6, 7, 2) )
1018 instrs.append( (4, 4, 7, 1) )
1019
1020 if False:
1021 # self-read/write-after-write followed by Read-after-Write
1022 instrs.append((1, 1, 1, 1))
1023 instrs.append((1, 5, 3, 0))
1024
1025 if False:
1026 # Read-after-Write followed by self-read-after-write
1027 instrs.append((5, 6, 1, 2))
1028 instrs.append((1, 1, 1, 1))
1029
1030 if False:
1031 # self-read-write sandwich
1032 instrs.append((5, 6, 1, 2))
1033 instrs.append((1, 1, 1, 1))
1034 instrs.append((1, 5, 3, 0))
1035
1036 if False:
1037 # very weird failure
1038 instrs.append( (5, 2, 5, 2) )
1039 instrs.append( (2, 6, 3, 0) )
1040 instrs.append( (4, 2, 2, 1) )
1041
1042 if False:
1043 v1 = 4
1044 yield dut.intregs.regs[5].reg.eq(v1)
1045 alusim.setval(5, v1)
1046 yield dut.intregs.regs[3].reg.eq(5)
1047 alusim.setval(3, 5)
1048 instrs.append((5, 3, 3, 4, (0, 0)))
1049 instrs.append((4, 2, 1, 2, (0, 1)))
1050
1051 if False:
1052 v1 = 6
1053 yield dut.intregs.regs[5].reg.eq(v1)
1054 alusim.setval(5, v1)
1055 yield dut.intregs.regs[3].reg.eq(5)
1056 alusim.setval(3, 5)
1057 instrs.append((5, 3, 3, 4, (0, 0)))
1058 instrs.append((4, 2, 1, 2, (1, 0)))
1059
1060 if False:
1061 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1062 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1063 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1064 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1065 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1066 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1067 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1068 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1069 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1070
1071 # issue instruction(s), wait for issue to be free before proceeding
1072 for i, instr in enumerate(instrs):
1073 src1, src2, dest, op, opi, (br_ok, br_fail) = instr
1074
1075 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1076 alusim.op(op, opi, src1, src2, dest)
1077 yield from instr_q(dut, op, opi, src1, src2, dest, br_ok, br_fail)
1078
1079 # wait for all instructions to stop before checking
1080 while True:
1081 iqlen = yield dut.qlen_o
1082 if iqlen == 0:
1083 break
1084 yield
1085 yield
1086 yield
1087 yield
1088 yield
1089 yield from wait_for_busy_clear(dut)
1090
1091 # check status
1092 yield from alusim.check(dut)
1093 yield from alusim.dump(dut)
1094
1095
1096 def test_scoreboard():
1097 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1098 alusim = RegSim(16, 8)
1099 memsim = MemSim(16, 16)
1100 vl = rtlil.convert(dut, ports=dut.ports())
1101 with open("test_scoreboard6600.il", "w") as f:
1102 f.write(vl)
1103
1104 run_simulation(dut, scoreboard_sim(dut, alusim),
1105 vcd_name='test_scoreboard6600.vcd')
1106
1107 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1108 # vcd_name='test_scoreboard6600.vcd')
1109
1110
1111 if __name__ == '__main__':
1112 test_scoreboard()