4ed9acbd1aa215b05a61046979426ad7bd32bdb8
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13
14 from compalu import ComputationUnitNoDelay
15
16 from alu_hier import ALU, BranchALU
17 from nmutil.latch import SRLatch
18 from nmutil.nmoperator import eq
19
20 from random import randint, seed
21 from copy import deepcopy
22 from math import log
23
24
25 class CompUnitsBase(Elaboratable):
26 """ Computation Unit Base class.
27
28 Amazingly, this class works recursively. It's supposed to just
29 look after some ALUs (that can handle the same operations),
30 grouping them together, however it turns out that the same code
31 can also group *groups* of Computation Units together as well.
32
33 Basically it was intended just to concatenate the ALU's issue,
34 go_rd etc. signals together, which start out as bits and become
35 sequences. Turns out that the same trick works just as well
36 on Computation Units!
37
38 So this class may be used recursively to present a top-level
39 sequential concatenation of all the signals in and out of
40 ALUs, whilst at the same time making it convenient to group
41 ALUs together.
42
43 At the lower level, the intent is that groups of (identical)
44 ALUs may be passed the same operation. Even beyond that,
45 the intent is that that group of (identical) ALUs actually
46 share the *same pipeline* and as such become a "Concurrent
47 Computation Unit" as defined by Mitch Alsup (see section
48 11.4.9.3)
49 """
50 def __init__(self, rwid, units):
51 """ Inputs:
52
53 * :rwid: bit width of register file(s) - both FP and INT
54 * :units: sequence of ALUs (or CompUnitsBase derivatives)
55 """
56 self.units = units
57 self.rwid = rwid
58 self.rwid = rwid
59 if units and isinstance(units[0], CompUnitsBase):
60 self.n_units = 0
61 for u in self.units:
62 self.n_units += u.n_units
63 else:
64 self.n_units = len(units)
65
66 n_units = self.n_units
67
68 # inputs
69 self.issue_i = Signal(n_units, reset_less=True)
70 self.go_rd_i = Signal(n_units, reset_less=True)
71 self.go_wr_i = Signal(n_units, reset_less=True)
72 self.shadown_i = Signal(n_units, reset_less=True)
73 self.go_die_i = Signal(n_units, reset_less=True)
74
75 # outputs
76 self.busy_o = Signal(n_units, reset_less=True)
77 self.rd_rel_o = Signal(n_units, reset_less=True)
78 self.req_rel_o = Signal(n_units, reset_less=True)
79
80 # in/out register data (note: not register#, actual data)
81 self.data_o = Signal(rwid, reset_less=True)
82 self.src1_i = Signal(rwid, reset_less=True)
83 self.src2_i = Signal(rwid, reset_less=True)
84 # input operand
85
86 def elaborate(self, platform):
87 m = Module()
88 comb = m.d.comb
89
90 for i, alu in enumerate(self.units):
91 setattr(m.submodules, "comp%d" % i, alu)
92
93 go_rd_l = []
94 go_wr_l = []
95 issue_l = []
96 busy_l = []
97 req_rel_l = []
98 rd_rel_l = []
99 shadow_l = []
100 godie_l = []
101 for alu in self.units:
102 req_rel_l.append(alu.req_rel_o)
103 rd_rel_l.append(alu.rd_rel_o)
104 shadow_l.append(alu.shadown_i)
105 godie_l.append(alu.go_die_i)
106 go_wr_l.append(alu.go_wr_i)
107 go_rd_l.append(alu.go_rd_i)
108 issue_l.append(alu.issue_i)
109 busy_l.append(alu.busy_o)
110 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
111 comb += self.req_rel_o.eq(Cat(*req_rel_l))
112 comb += self.busy_o.eq(Cat(*busy_l))
113 comb += Cat(*godie_l).eq(self.go_die_i)
114 comb += Cat(*shadow_l).eq(self.shadown_i)
115 comb += Cat(*go_wr_l).eq(self.go_wr_i)
116 comb += Cat(*go_rd_l).eq(self.go_rd_i)
117 comb += Cat(*issue_l).eq(self.issue_i)
118
119 # connect data register input/output
120
121 # merge (OR) all integer FU / ALU outputs to a single value
122 # bit of a hack: treereduce needs a list with an item named "data_o"
123 if self.units:
124 data_o = treereduce(self.units)
125 comb += self.data_o.eq(data_o)
126
127 for i, alu in enumerate(self.units):
128 comb += alu.src1_i.eq(self.src1_i)
129 comb += alu.src2_i.eq(self.src2_i)
130
131 return m
132
133
134 class CompUnitALUs(CompUnitsBase):
135
136 def __init__(self, rwid, opwid):
137 """ Inputs:
138
139 * :rwid: bit width of register file(s) - both FP and INT
140 * :opwid: operand bit width
141 """
142 self.opwid = opwid
143
144 # inputs
145 self.oper_i = Signal(opwid, reset_less=True)
146
147 # Int ALUs
148 add = ALU(rwid)
149 sub = ALU(rwid)
150 mul = ALU(rwid)
151 shf = ALU(rwid)
152
153 units = []
154 for alu in [add, sub, mul, shf]:
155 units.append(ComputationUnitNoDelay(rwid, 2, alu))
156
157 CompUnitsBase.__init__(self, rwid, units)
158
159 def elaborate(self, platform):
160 m = CompUnitsBase.elaborate(self, platform)
161 comb = m.d.comb
162
163 # hand the same operation to all units
164 for alu in self.units:
165 comb += alu.oper_i.eq(self.oper_i)
166 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
167 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
168 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
169 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
170
171 return m
172
173
174 class CompUnitBR(CompUnitsBase):
175
176 def __init__(self, rwid, opwid):
177 """ Inputs:
178
179 * :rwid: bit width of register file(s) - both FP and INT
180 * :opwid: operand bit width
181
182 Note: bgt unit is returned so that a shadow unit can be created
183 for it
184 """
185 self.opwid = opwid
186
187 # inputs
188 self.oper_i = Signal(opwid, reset_less=True)
189
190 # Branch ALU and CU
191 self.bgt = BranchALU(rwid)
192 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
193 CompUnitsBase.__init__(self, rwid, [self.br1])
194
195 def elaborate(self, platform):
196 m = CompUnitsBase.elaborate(self, platform)
197 comb = m.d.comb
198
199 # hand the same operation to all units
200 for alu in self.units:
201 comb += alu.oper_i.eq(self.oper_i)
202 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
203
204 return m
205
206
207 class FunctionUnits(Elaboratable):
208
209 def __init__(self, n_regs, n_int_alus):
210 self.n_regs = n_regs
211 self.n_int_alus = n_int_alus
212
213 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
214 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
215 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
216
217 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
218 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
219
220 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
221 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
222 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
223
224 self.req_rel_i = Signal(n_int_alus, reset_less = True)
225 self.readable_o = Signal(n_int_alus, reset_less=True)
226 self.writable_o = Signal(n_int_alus, reset_less=True)
227
228 self.go_rd_i = Signal(n_int_alus, reset_less=True)
229 self.go_wr_i = Signal(n_int_alus, reset_less=True)
230 self.go_die_i = Signal(n_int_alus, reset_less=True)
231 self.req_rel_o = Signal(n_int_alus, reset_less=True)
232 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
233
234 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
235
236 def elaborate(self, platform):
237 m = Module()
238 comb = m.d.comb
239 sync = m.d.sync
240
241 n_intfus = self.n_int_alus
242
243 # Integer FU-FU Dep Matrix
244 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
245 m.submodules.intfudeps = intfudeps
246 # Integer FU-Reg Dep Matrix
247 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
248 m.submodules.intregdeps = intregdeps
249
250 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
251 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
252
253 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
254 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
255
256 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
257 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
258 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
259
260 comb += intfudeps.issue_i.eq(self.fn_issue_i)
261 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
262 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
263 comb += intfudeps.go_die_i.eq(self.go_die_i)
264 comb += self.readable_o.eq(intfudeps.readable_o)
265 comb += self.writable_o.eq(intfudeps.writable_o)
266
267 # Connect function issue / arrays, and dest/src1/src2
268 comb += intregdeps.dest_i.eq(self.dest_i)
269 comb += intregdeps.src1_i.eq(self.src1_i)
270 comb += intregdeps.src2_i.eq(self.src2_i)
271
272 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
273 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
274 comb += intregdeps.go_die_i.eq(self.go_die_i)
275 comb += intregdeps.issue_i.eq(self.fn_issue_i)
276
277 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
278 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
279 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
280
281 return m
282
283
284 class Scoreboard(Elaboratable):
285 def __init__(self, rwid, n_regs):
286 """ Inputs:
287
288 * :rwid: bit width of register file(s) - both FP and INT
289 * :n_regs: depth of register file(s) - number of FP and INT regs
290 """
291 self.rwid = rwid
292 self.n_regs = n_regs
293
294 # Register Files
295 self.intregs = RegFileArray(rwid, n_regs)
296 self.fpregs = RegFileArray(rwid, n_regs)
297
298 # issue q needs to get at these
299 self.aluissue = IssueUnitGroup(4)
300 self.brissue = IssueUnitGroup(1)
301 # and these
302 self.alu_oper_i = Signal(4, reset_less=True)
303 self.br_oper_i = Signal(4, reset_less=True)
304
305 # inputs
306 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
307 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
308 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
309 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
310
311 # outputs
312 self.issue_o = Signal(reset_less=True) # instruction was accepted
313 self.busy_o = Signal(reset_less=True) # at least one CU is busy
314
315 # for branch speculation experiment. branch_direction = 0 if
316 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
317 # branch_succ and branch_fail are requests to have the current
318 # instruction be dependent on the branch unit "shadow" capability.
319 self.branch_succ_i = Signal(reset_less=True)
320 self.branch_fail_i = Signal(reset_less=True)
321 self.branch_direction_o = Signal(2, reset_less=True)
322
323 def elaborate(self, platform):
324 m = Module()
325 comb = m.d.comb
326 sync = m.d.sync
327
328 m.submodules.intregs = self.intregs
329 m.submodules.fpregs = self.fpregs
330
331 # register ports
332 int_dest = self.intregs.write_port("dest")
333 int_src1 = self.intregs.read_port("src1")
334 int_src2 = self.intregs.read_port("src2")
335
336 fp_dest = self.fpregs.write_port("dest")
337 fp_src1 = self.fpregs.read_port("src1")
338 fp_src2 = self.fpregs.read_port("src2")
339
340 # Int ALUs and Comp Units
341 n_int_alus = 5
342 cua = CompUnitALUs(self.rwid, 2)
343 cub = CompUnitBR(self.rwid, 2)
344 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
345 bgt = cub.bgt # get at the branch computation unit
346 br1 = cub.br1
347
348 # Int FUs
349 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
350
351 # Count of number of FUs
352 n_intfus = n_int_alus
353 n_fp_fus = 0 # for now
354
355 # Integer Priority Picker 1: Adder + Subtractor
356 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
357 m.submodules.intpick1 = intpick1
358
359 # INT/FP Issue Unit
360 regdecode = RegDecode(self.n_regs)
361 m.submodules.regdecode = regdecode
362 issueunit = IssueUnitArray([self.aluissue, self.brissue])
363 m.submodules.issueunit = issueunit
364
365 # Shadow Matrix. currently n_intfus shadows, to be used for
366 # write-after-write hazards. NOTE: there is one extra for branches,
367 # so the shadow width is increased by 1
368 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
369 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
370
371 # record previous instruction to cast shadow on current instruction
372 fn_issue_prev = Signal(n_intfus)
373 prev_shadow = Signal(n_intfus)
374
375 # Branch Speculation recorder. tracks the success/fail state as
376 # each instruction is issued, so that when the branch occurs the
377 # allow/cancel can be issued as appropriate.
378 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
379
380 #---------
381 # ok start wiring things together...
382 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
383 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
384 #---------
385
386 #---------
387 # Issue Unit is where it starts. set up some in/outs for this module
388 #---------
389 comb += [ regdecode.dest_i.eq(self.int_dest_i),
390 regdecode.src1_i.eq(self.int_src1_i),
391 regdecode.src2_i.eq(self.int_src2_i),
392 regdecode.enable_i.eq(self.reg_enable_i),
393 self.issue_o.eq(issueunit.issue_o)
394 ]
395
396 # take these to outside (issue needs them)
397 comb += cua.oper_i.eq(self.alu_oper_i)
398 comb += cub.oper_i.eq(self.br_oper_i)
399
400 # TODO: issueunit.f (FP)
401
402 # and int function issue / busy arrays, and dest/src1/src2
403 comb += intfus.dest_i.eq(regdecode.dest_o)
404 comb += intfus.src1_i.eq(regdecode.src1_o)
405 comb += intfus.src2_i.eq(regdecode.src2_o)
406
407 fn_issue_o = issueunit.fn_issue_o
408
409 comb += intfus.fn_issue_i.eq(fn_issue_o)
410 comb += issueunit.busy_i.eq(cu.busy_o)
411 comb += self.busy_o.eq(cu.busy_o.bool())
412
413 #---------
414 # merge shadow matrices outputs
415 #---------
416
417 # these are explained in ShadowMatrix docstring, and are to be
418 # connected to the FUReg and FUFU Matrices, to get them to reset
419 anydie = Signal(n_intfus, reset_less=True)
420 allshadown = Signal(n_intfus, reset_less=True)
421 shreset = Signal(n_intfus, reset_less=True)
422 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
423 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
424 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
425
426 #---------
427 # connect fu-fu matrix
428 #---------
429
430 # Group Picker... done manually for now.
431 go_rd_o = intpick1.go_rd_o
432 go_wr_o = intpick1.go_wr_o
433 go_rd_i = intfus.go_rd_i
434 go_wr_i = intfus.go_wr_i
435 go_die_i = intfus.go_die_i
436 # NOTE: connect to the shadowed versions so that they can "die" (reset)
437 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
438 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
439 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
440
441 # Connect Picker
442 #---------
443 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
444 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
445 int_rd_o = intfus.readable_o
446 int_wr_o = intfus.writable_o
447 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
448 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
449
450 #---------
451 # Shadow Matrix
452 #---------
453
454 comb += shadows.issue_i.eq(fn_issue_o)
455 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
456 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
457 #---------
458 # NOTE; this setup is for the instruction order preservation...
459
460 # connect shadows / go_dies to Computation Units
461 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
462 comb += cu.go_die_i[0:n_intfus].eq(anydie)
463
464 # ok connect first n_int_fu shadows to busy lines, to create an
465 # instruction-order linked-list-like arrangement, using a bit-matrix
466 # (instead of e.g. a ring buffer).
467 # XXX TODO
468
469 # when written, the shadow can be cancelled (and was good)
470 for i in range(n_intfus):
471 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
472
473 # work out the current-activated busy unit (by recording the old one)
474 with m.If(fn_issue_o): # only update prev bit if instruction issued
475 sync += fn_issue_prev.eq(fn_issue_o)
476
477 # *previous* instruction shadows *current* instruction, and, obviously,
478 # if the previous is completed (!busy) don't cast the shadow!
479 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
480 for i in range(n_intfus):
481 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
482
483 #---------
484 # ... and this is for branch speculation. it uses the extra bit
485 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
486 # only needs to set shadow_i, s_fail_i and s_good_i
487
488 # issue captures shadow_i (if enabled)
489 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
490
491 bactive = Signal(reset_less=True)
492 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
493
494 # instruction being issued (fn_issue_o) has a shadow cast by the branch
495 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
496 comb += bshadow.issue_i.eq(fn_issue_o)
497 for i in range(n_intfus):
498 with m.If(fn_issue_o & (Const(1<<i))):
499 comb += bshadow.shadow_i[i][0].eq(1)
500
501 # finally, we need an indicator to the test infrastructure as to
502 # whether the branch succeeded or failed, plus, link up to the
503 # "recorder" of whether the instruction was under shadow or not
504
505 with m.If(br1.issue_i):
506 sync += bspec.active_i.eq(1)
507 with m.If(self.branch_succ_i):
508 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
509 with m.If(self.branch_fail_i):
510 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
511
512 # branch is active (TODO: a better signal: this is over-using the
513 # go_write signal - actually the branch should not be "writing")
514 with m.If(br1.go_wr_i):
515 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
516 sync += bspec.active_i.eq(0)
517 comb += bspec.br_i.eq(1)
518 # branch occurs if data == 1, failed if data == 0
519 comb += bspec.br_ok_i.eq(br1.data_o == 1)
520 for i in range(n_intfus):
521 # *expected* direction of the branch matched against *actual*
522 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
523 # ... or it didn't
524 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
525
526 #---------
527 # Connect Register File(s)
528 #---------
529 comb += int_dest.wen.eq(intfus.dest_rsel_o)
530 comb += int_src1.ren.eq(intfus.src1_rsel_o)
531 comb += int_src2.ren.eq(intfus.src2_rsel_o)
532
533 # connect ALUs to regfule
534 comb += int_dest.data_i.eq(cu.data_o)
535 comb += cu.src1_i.eq(int_src1.data_o)
536 comb += cu.src2_i.eq(int_src2.data_o)
537
538 # connect ALU Computation Units
539 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
540 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
541 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
542
543 return m
544
545 def __iter__(self):
546 yield from self.intregs
547 yield from self.fpregs
548 yield self.int_dest_i
549 yield self.int_src1_i
550 yield self.int_src2_i
551 yield self.issue_o
552 yield self.branch_succ_i
553 yield self.branch_fail_i
554 yield self.branch_direction_o
555
556 def ports(self):
557 return list(self)
558
559 class IssueToScoreboard(Elaboratable):
560
561 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
562 self.qlen = qlen
563 self.n_in = n_in
564 self.n_out = n_out
565 self.rwid = rwid
566 self.opw = opwid
567 self.n_regs = n_regs
568
569 mqbits = (int(log(qlen) / log(2))+2, False)
570 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
571 self.p_ready_o = Signal() # instructions were added
572 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
573
574 self.busy_o = Signal(reset_less=True) # at least one CU is busy
575 self.qlen_o = Signal(mqbits, reset_less=True)
576
577 def elaborate(self, platform):
578 m = Module()
579 comb = m.d.comb
580 sync = m.d.sync
581
582 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
583 sc = Scoreboard(self.rwid, self.n_regs)
584 m.submodules.iq = iq
585 m.submodules.sc = sc
586
587 # get at the regfile for testing
588 self.intregs = sc.intregs
589
590 # and the "busy" signal and instruction queue length
591 comb += self.busy_o.eq(sc.busy_o)
592 comb += self.qlen_o.eq(iq.qlen_o)
593
594 # link up instruction queue
595 comb += iq.p_add_i.eq(self.p_add_i)
596 comb += self.p_ready_o.eq(iq.p_ready_o)
597 for i in range(self.n_in):
598 comb += eq(iq.data_i[i], self.data_i[i])
599
600 # take instruction and process it. note that it's possible to
601 # "inspect" the queue contents *without* actually removing the
602 # items. items are only removed when the
603
604 # in "waiting" state
605 wait_issue_br = Signal()
606 wait_issue_alu = Signal()
607
608 with m.If(wait_issue_br | wait_issue_alu):
609 # set instruction pop length to 1 if the unit accepted
610 # also tell the unit-group to stop accepting the instruction
611 # and disable the regfile
612 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
613 with m.If(iq.qlen_o != 0):
614 comb += iq.n_sub_i.eq(1)
615 comb += wait_issue_br.eq(0)
616 comb += sc.brissue.insn_i.eq(0)
617 comb += sc.int_dest_i.eq(0)
618 comb += sc.int_src1_i.eq(0)
619 comb += sc.int_src2_i.eq(0)
620 comb += sc.reg_enable_i.eq(0)
621 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
622 with m.If(iq.qlen_o != 0):
623 comb += iq.n_sub_i.eq(1)
624 comb += wait_issue_alu.eq(0)
625 comb += sc.aluissue.insn_i.eq(0)
626 comb += sc.int_dest_i.eq(0)
627 comb += sc.int_src1_i.eq(0)
628 comb += sc.int_src2_i.eq(0)
629 comb += sc.reg_enable_i.eq(0)
630
631 # see if some instruction(s) are here. note that this is
632 # "inspecting" the in-place queue. note also that on the
633 # cycle following "waiting" for fn_issue_o to be set, the
634 # "resetting" done above (insn_i=0) could be re-ASSERTed.
635 with m.If(iq.qlen_o != 0):
636 # get the operands and operation
637 dest = iq.data_o[0].dest_i
638 src1 = iq.data_o[0].src1_i
639 src2 = iq.data_o[0].src2_i
640 op = iq.data_o[0].oper_i
641
642 # set the src/dest regs
643 comb += sc.int_dest_i.eq(dest)
644 comb += sc.int_src1_i.eq(src1)
645 comb += sc.int_src2_i.eq(src2)
646 comb += sc.reg_enable_i.eq(1) # enable the regfile
647
648 # choose a Function-Unit-Group
649 with m.If((op & (0x3<<2)) != 0): # branch
650 comb += sc.brissue.insn_i.eq(1)
651 comb += sc.br_oper_i.eq(op & 0x3)
652 comb += wait_issue_br.eq(1)
653 with m.Else(): # alu
654 comb += sc.aluissue.insn_i.eq(1)
655 comb += sc.alu_oper_i.eq(op & 0x3)
656 comb += wait_issue_alu.eq(1)
657
658 # XXX TODO
659 # these indicate that the instruction is to be made
660 # shadow-dependent on
661 # (either) branch success or branch fail
662 #yield sc.branch_fail_i.eq(branch_fail)
663 #yield sc.branch_succ_i.eq(branch_success)
664
665 return m
666
667 def __iter__(self):
668 yield self.p_ready_o
669 for o in self.data_i:
670 yield from list(o)
671 yield self.p_add_i
672
673 def ports(self):
674 return list(self)
675
676 IADD = 0
677 ISUB = 1
678 IMUL = 2
679 ISHF = 3
680 IBGT = 4
681 IBLT = 5
682 IBEQ = 6
683 IBNE = 7
684
685 class RegSim:
686 def __init__(self, rwidth, nregs):
687 self.rwidth = rwidth
688 self.regs = [0] * nregs
689
690 def op(self, op, src1, src2, dest):
691 maxbits = (1 << self.rwidth) - 1
692 src1 = self.regs[src1] & maxbits
693 src2 = self.regs[src2] & maxbits
694 if op == IADD:
695 val = src1 + src2
696 elif op == ISUB:
697 val = src1 - src2
698 elif op == IMUL:
699 val = src1 * src2
700 elif op == ISHF:
701 val = src1 >> (src2 & maxbits)
702 elif op == IBGT:
703 val = int(src1 > src2)
704 elif op == IBLT:
705 val = int(src1 < src2)
706 elif op == IBEQ:
707 val = int(src1 == src2)
708 elif op == IBNE:
709 val = int(src1 != src2)
710 val &= maxbits
711 self.setval(dest, val)
712 return val
713
714 def setval(self, dest, val):
715 print ("sim setval", dest, hex(val))
716 self.regs[dest] = val
717
718 def dump(self, dut):
719 for i, val in enumerate(self.regs):
720 reg = yield dut.intregs.regs[i].reg
721 okstr = "OK" if reg == val else "!ok"
722 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
723
724 def check(self, dut):
725 for i, val in enumerate(self.regs):
726 reg = yield dut.intregs.regs[i].reg
727 if reg != val:
728 print("reg %d expected %x received %x\n" % (i, val, reg))
729 yield from self.dump(dut)
730 assert False
731
732 def instr_q(dut, op, src1, src2, dest, branch_success, branch_fail):
733 instrs = [{'oper_i': op, 'dest_i': dest, 'src1_i': src1, 'src2_i': src2}]
734
735 sendlen = 1
736 for idx in range(sendlen):
737 yield from eq(dut.data_i[idx], instrs[idx])
738 di = yield dut.data_i[idx]
739 print ("senddata %d %x" % (idx, di))
740 yield dut.p_add_i.eq(sendlen)
741 yield
742 o_p_ready = yield dut.p_ready_o
743 while not o_p_ready:
744 yield
745 o_p_ready = yield dut.p_ready_o
746
747 yield dut.p_add_i.eq(0)
748
749
750 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
751 yield from disable_issue(dut)
752 yield dut.int_dest_i.eq(dest)
753 yield dut.int_src1_i.eq(src1)
754 yield dut.int_src2_i.eq(src2)
755 if (op & (0x3<<2)) != 0: # branch
756 yield dut.brissue.insn_i.eq(1)
757 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
758 dut_issue = dut.brissue
759 else:
760 yield dut.aluissue.insn_i.eq(1)
761 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
762 dut_issue = dut.aluissue
763 yield dut.reg_enable_i.eq(1)
764
765 # these indicate that the instruction is to be made shadow-dependent on
766 # (either) branch success or branch fail
767 yield dut.branch_fail_i.eq(branch_fail)
768 yield dut.branch_succ_i.eq(branch_success)
769
770 yield
771 yield from wait_for_issue(dut, dut_issue)
772
773
774 def print_reg(dut, rnums):
775 rs = []
776 for rnum in rnums:
777 reg = yield dut.intregs.regs[rnum].reg
778 rs.append("%x" % reg)
779 rnums = map(str, rnums)
780 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
781
782
783 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
784 insts = []
785 for i in range(n_ops):
786 src1 = randint(1, dut.n_regs-1)
787 src2 = randint(1, dut.n_regs-1)
788 dest = randint(1, dut.n_regs-1)
789 op = randint(0, max_opnums)
790
791 if shadowing:
792 insts.append((src1, src2, dest, op, (0, 0)))
793 else:
794 insts.append((src1, src2, dest, op))
795 return insts
796
797
798 def wait_for_busy_clear(dut):
799 while True:
800 busy_o = yield dut.busy_o
801 if not busy_o:
802 break
803 print ("busy",)
804 yield
805
806 def disable_issue(dut):
807 yield dut.aluissue.insn_i.eq(0)
808 yield dut.brissue.insn_i.eq(0)
809
810
811 def wait_for_issue(dut, dut_issue):
812 while True:
813 issue_o = yield dut_issue.fn_issue_o
814 if issue_o:
815 yield from disable_issue(dut)
816 yield dut.reg_enable_i.eq(0)
817 break
818 print ("busy",)
819 #yield from print_reg(dut, [1,2,3])
820 yield
821 #yield from print_reg(dut, [1,2,3])
822
823 def scoreboard_branch_sim(dut, alusim):
824
825 iseed = 3
826
827 for i in range(1):
828
829 print ("rseed", iseed)
830 seed(iseed)
831 iseed += 1
832
833 yield dut.branch_direction_o.eq(0)
834
835 # set random values in the registers
836 for i in range(1, dut.n_regs):
837 val = 31+i*3
838 val = randint(0, (1<<alusim.rwidth)-1)
839 yield dut.intregs.regs[i].reg.eq(val)
840 alusim.setval(i, val)
841
842 if False:
843 # create some instructions: branches create a tree
844 insts = create_random_ops(dut, 1, True, 1)
845 #insts.append((6, 6, 1, 2, (0, 0)))
846 #insts.append((4, 3, 3, 0, (0, 0)))
847
848 src1 = randint(1, dut.n_regs-1)
849 src2 = randint(1, dut.n_regs-1)
850 #op = randint(4, 7)
851 op = 4 # only BGT at the moment
852
853 branch_ok = create_random_ops(dut, 1, True, 1)
854 branch_fail = create_random_ops(dut, 1, True, 1)
855
856 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
857
858 if True:
859 insts = []
860 insts.append( (3, 5, 2, 0, (0, 0)) )
861 branch_ok = []
862 branch_fail = []
863 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
864 branch_ok.append( None )
865 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
866 #branch_fail.append( None )
867 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
868
869 siminsts = deepcopy(insts)
870
871 # issue instruction(s)
872 i = -1
873 instrs = insts
874 branch_direction = 0
875 while instrs:
876 yield
877 yield
878 i += 1
879 branch_direction = yield dut.branch_direction_o # way branch went
880 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
881 if branch_direction == 1 and shadow_on:
882 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
883 continue # branch was "success" and this is a "failed"... skip
884 if branch_direction == 2 and shadow_off:
885 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
886 continue # branch was "fail" and this is a "success"... skip
887 if branch_direction != 0:
888 shadow_on = 0
889 shadow_off = 0
890 is_branch = op >= 4
891 if is_branch:
892 branch_ok, branch_fail = dest
893 dest = src2
894 # ok zip up the branch success / fail instructions and
895 # drop them into the queue, one marked "to have branch success"
896 # the other to be marked shadow branch "fail".
897 # one out of each of these will be cancelled
898 for ok, fl in zip(branch_ok, branch_fail):
899 if ok:
900 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
901 if fl:
902 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
903 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
904 (i, src1, src2, dest, op, shadow_on, shadow_off))
905 yield from int_instr(dut, op, src1, src2, dest,
906 shadow_on, shadow_off)
907
908 # wait for all instructions to stop before checking
909 yield
910 yield from wait_for_busy_clear(dut)
911
912 i = -1
913 while siminsts:
914 instr = siminsts.pop(0)
915 if instr is None:
916 continue
917 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
918 i += 1
919 is_branch = op >= 4
920 if is_branch:
921 branch_ok, branch_fail = dest
922 dest = src2
923 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
924 (i, src1, src2, dest, op, shadow_on, shadow_off))
925 branch_res = alusim.op(op, src1, src2, dest)
926 if is_branch:
927 if branch_res:
928 siminsts += branch_ok
929 else:
930 siminsts += branch_fail
931
932 # check status
933 yield from alusim.check(dut)
934 yield from alusim.dump(dut)
935
936
937 def scoreboard_sim(dut, alusim):
938
939 #seed(2)
940
941 for i in range(1):
942
943 # set random values in the registers
944 for i in range(1, dut.n_regs):
945 val = randint(0, (1<<alusim.rwidth)-1)
946 #val = 31+i*3
947 #val = i
948 yield dut.intregs.regs[i].reg.eq(val)
949 alusim.setval(i, val)
950
951 # create some instructions (some random, some regression tests)
952 instrs = []
953 if True:
954 instrs = create_random_ops(dut, 15, True, 3)
955
956 if False:
957 instrs.append( (7, 3, 2, 4, (0, 0)) )
958 instrs.append( (7, 6, 6, 2, (0, 0)) )
959 instrs.append( (1, 7, 2, 2, (0, 0)) )
960
961
962 if False:
963 instrs.append((2, 3, 3, 0, (0, 0)))
964 instrs.append((5, 3, 3, 1, (0, 0)))
965 instrs.append((3, 5, 5, 2, (0, 0)))
966 instrs.append((5, 3, 3, 3, (0, 0)))
967 instrs.append((3, 5, 5, 0, (0, 0)))
968
969 if False:
970 instrs.append((5, 6, 2, 1))
971 instrs.append((2, 2, 4, 0))
972 #instrs.append((2, 2, 3, 1))
973
974 if False:
975 instrs.append((2, 1, 2, 3))
976
977 if False:
978 instrs.append((2, 6, 2, 1))
979 instrs.append((2, 1, 2, 0))
980
981 if False:
982 instrs.append((1, 2, 7, 2))
983 instrs.append((7, 1, 5, 0))
984 instrs.append((4, 4, 1, 1))
985
986 if False:
987 instrs.append((5, 6, 2, 2))
988 instrs.append((1, 1, 4, 1))
989 instrs.append((6, 5, 3, 0))
990
991 if False:
992 # Write-after-Write Hazard
993 instrs.append( (3, 6, 7, 2) )
994 instrs.append( (4, 4, 7, 1) )
995
996 if False:
997 # self-read/write-after-write followed by Read-after-Write
998 instrs.append((1, 1, 1, 1))
999 instrs.append((1, 5, 3, 0))
1000
1001 if False:
1002 # Read-after-Write followed by self-read-after-write
1003 instrs.append((5, 6, 1, 2))
1004 instrs.append((1, 1, 1, 1))
1005
1006 if False:
1007 # self-read-write sandwich
1008 instrs.append((5, 6, 1, 2))
1009 instrs.append((1, 1, 1, 1))
1010 instrs.append((1, 5, 3, 0))
1011
1012 if False:
1013 # very weird failure
1014 instrs.append( (5, 2, 5, 2) )
1015 instrs.append( (2, 6, 3, 0) )
1016 instrs.append( (4, 2, 2, 1) )
1017
1018 if False:
1019 v1 = 4
1020 yield dut.intregs.regs[5].reg.eq(v1)
1021 alusim.setval(5, v1)
1022 yield dut.intregs.regs[3].reg.eq(5)
1023 alusim.setval(3, 5)
1024 instrs.append((5, 3, 3, 4, (0, 0)))
1025 instrs.append((4, 2, 1, 2, (0, 1)))
1026
1027 if False:
1028 v1 = 6
1029 yield dut.intregs.regs[5].reg.eq(v1)
1030 alusim.setval(5, v1)
1031 yield dut.intregs.regs[3].reg.eq(5)
1032 alusim.setval(3, 5)
1033 instrs.append((5, 3, 3, 4, (0, 0)))
1034 instrs.append((4, 2, 1, 2, (1, 0)))
1035
1036 if False:
1037 instrs.append( (4, 3, 5, 1, (0, 0)) )
1038 instrs.append( (5, 2, 3, 1, (0, 0)) )
1039 instrs.append( (7, 1, 5, 2, (0, 0)) )
1040 instrs.append( (5, 6, 6, 4, (0, 0)) )
1041 instrs.append( (7, 5, 2, 2, (1, 0)) )
1042 instrs.append( (1, 7, 5, 0, (0, 1)) )
1043 instrs.append( (1, 6, 1, 2, (1, 0)) )
1044 instrs.append( (1, 6, 7, 3, (0, 0)) )
1045 instrs.append( (6, 7, 7, 0, (0, 0)) )
1046
1047 # issue instruction(s), wait for issue to be free before proceeding
1048 for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
1049
1050 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1051 alusim.op(op, src1, src2, dest)
1052 yield from instr_q(dut, op, src1, src2, dest, br_ok, br_fail)
1053
1054 # wait for all instructions to stop before checking
1055 while True:
1056 iqlen = yield dut.qlen_o
1057 if iqlen == 0:
1058 break
1059 yield
1060 yield
1061 yield
1062 yield
1063 yield
1064 yield from wait_for_busy_clear(dut)
1065
1066 # check status
1067 yield from alusim.check(dut)
1068 yield from alusim.dump(dut)
1069
1070
1071 def test_scoreboard():
1072 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1073 alusim = RegSim(16, 8)
1074 vl = rtlil.convert(dut, ports=dut.ports())
1075 with open("test_scoreboard6600.il", "w") as f:
1076 f.write(vl)
1077
1078 run_simulation(dut, scoreboard_sim(dut, alusim),
1079 vcd_name='test_scoreboard6600.vcd')
1080
1081 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1082 # vcd_name='test_scoreboard6600.vcd')
1083
1084
1085 if __name__ == '__main__':
1086 test_scoreboard()