issue from q is combinatorial so do not need set to zer0
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13
14 from compalu import ComputationUnitNoDelay
15
16 from alu_hier import ALU, BranchALU
17 from nmutil.latch import SRLatch
18 from nmutil.nmoperator import eq
19
20 from random import randint, seed
21 from copy import deepcopy
22 from math import log
23
24
25 class CompUnitsBase(Elaboratable):
26 """ Computation Unit Base class.
27
28 Amazingly, this class works recursively. It's supposed to just
29 look after some ALUs (that can handle the same operations),
30 grouping them together, however it turns out that the same code
31 can also group *groups* of Computation Units together as well.
32
33 Basically it was intended just to concatenate the ALU's issue,
34 go_rd etc. signals together, which start out as bits and become
35 sequences. Turns out that the same trick works just as well
36 on Computation Units!
37
38 So this class may be used recursively to present a top-level
39 sequential concatenation of all the signals in and out of
40 ALUs, whilst at the same time making it convenient to group
41 ALUs together.
42
43 At the lower level, the intent is that groups of (identical)
44 ALUs may be passed the same operation. Even beyond that,
45 the intent is that that group of (identical) ALUs actually
46 share the *same pipeline* and as such become a "Concurrent
47 Computation Unit" as defined by Mitch Alsup (see section
48 11.4.9.3)
49 """
50 def __init__(self, rwid, units):
51 """ Inputs:
52
53 * :rwid: bit width of register file(s) - both FP and INT
54 * :units: sequence of ALUs (or CompUnitsBase derivatives)
55 """
56 self.units = units
57 self.rwid = rwid
58 self.rwid = rwid
59 if units and isinstance(units[0], CompUnitsBase):
60 self.n_units = 0
61 for u in self.units:
62 self.n_units += u.n_units
63 else:
64 self.n_units = len(units)
65
66 n_units = self.n_units
67
68 # inputs
69 self.issue_i = Signal(n_units, reset_less=True)
70 self.go_rd_i = Signal(n_units, reset_less=True)
71 self.go_wr_i = Signal(n_units, reset_less=True)
72 self.shadown_i = Signal(n_units, reset_less=True)
73 self.go_die_i = Signal(n_units, reset_less=True)
74
75 # outputs
76 self.busy_o = Signal(n_units, reset_less=True)
77 self.rd_rel_o = Signal(n_units, reset_less=True)
78 self.req_rel_o = Signal(n_units, reset_less=True)
79
80 # in/out register data (note: not register#, actual data)
81 self.data_o = Signal(rwid, reset_less=True)
82 self.src1_i = Signal(rwid, reset_less=True)
83 self.src2_i = Signal(rwid, reset_less=True)
84 # input operand
85
86 def elaborate(self, platform):
87 m = Module()
88 comb = m.d.comb
89
90 for i, alu in enumerate(self.units):
91 setattr(m.submodules, "comp%d" % i, alu)
92
93 go_rd_l = []
94 go_wr_l = []
95 issue_l = []
96 busy_l = []
97 req_rel_l = []
98 rd_rel_l = []
99 shadow_l = []
100 godie_l = []
101 for alu in self.units:
102 req_rel_l.append(alu.req_rel_o)
103 rd_rel_l.append(alu.rd_rel_o)
104 shadow_l.append(alu.shadown_i)
105 godie_l.append(alu.go_die_i)
106 go_wr_l.append(alu.go_wr_i)
107 go_rd_l.append(alu.go_rd_i)
108 issue_l.append(alu.issue_i)
109 busy_l.append(alu.busy_o)
110 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
111 comb += self.req_rel_o.eq(Cat(*req_rel_l))
112 comb += self.busy_o.eq(Cat(*busy_l))
113 comb += Cat(*godie_l).eq(self.go_die_i)
114 comb += Cat(*shadow_l).eq(self.shadown_i)
115 comb += Cat(*go_wr_l).eq(self.go_wr_i)
116 comb += Cat(*go_rd_l).eq(self.go_rd_i)
117 comb += Cat(*issue_l).eq(self.issue_i)
118
119 # connect data register input/output
120
121 # merge (OR) all integer FU / ALU outputs to a single value
122 # bit of a hack: treereduce needs a list with an item named "data_o"
123 if self.units:
124 data_o = treereduce(self.units)
125 comb += self.data_o.eq(data_o)
126
127 for i, alu in enumerate(self.units):
128 comb += alu.src1_i.eq(self.src1_i)
129 comb += alu.src2_i.eq(self.src2_i)
130
131 return m
132
133
134 class CompUnitALUs(CompUnitsBase):
135
136 def __init__(self, rwid, opwid):
137 """ Inputs:
138
139 * :rwid: bit width of register file(s) - both FP and INT
140 * :opwid: operand bit width
141 """
142 self.opwid = opwid
143
144 # inputs
145 self.oper_i = Signal(opwid, reset_less=True)
146
147 # Int ALUs
148 add = ALU(rwid)
149 sub = ALU(rwid)
150 mul = ALU(rwid)
151 shf = ALU(rwid)
152
153 units = []
154 for alu in [add, sub, mul, shf]:
155 units.append(ComputationUnitNoDelay(rwid, 2, alu))
156
157 CompUnitsBase.__init__(self, rwid, units)
158
159 def elaborate(self, platform):
160 m = CompUnitsBase.elaborate(self, platform)
161 comb = m.d.comb
162
163 # hand the same operation to all units
164 for alu in self.units:
165 comb += alu.oper_i.eq(self.oper_i)
166 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
167 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
168 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
169 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
170
171 return m
172
173
174 class CompUnitBR(CompUnitsBase):
175
176 def __init__(self, rwid, opwid):
177 """ Inputs:
178
179 * :rwid: bit width of register file(s) - both FP and INT
180 * :opwid: operand bit width
181
182 Note: bgt unit is returned so that a shadow unit can be created
183 for it
184 """
185 self.opwid = opwid
186
187 # inputs
188 self.oper_i = Signal(opwid, reset_less=True)
189
190 # Branch ALU and CU
191 self.bgt = BranchALU(rwid)
192 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
193 CompUnitsBase.__init__(self, rwid, [self.br1])
194
195 def elaborate(self, platform):
196 m = CompUnitsBase.elaborate(self, platform)
197 comb = m.d.comb
198
199 # hand the same operation to all units
200 for alu in self.units:
201 comb += alu.oper_i.eq(self.oper_i)
202 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
203
204 return m
205
206
207 class FunctionUnits(Elaboratable):
208
209 def __init__(self, n_regs, n_int_alus):
210 self.n_regs = n_regs
211 self.n_int_alus = n_int_alus
212
213 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
214 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
215 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
216
217 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
218 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
219
220 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
221 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
222 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
223
224 self.req_rel_i = Signal(n_int_alus, reset_less = True)
225 self.readable_o = Signal(n_int_alus, reset_less=True)
226 self.writable_o = Signal(n_int_alus, reset_less=True)
227
228 self.go_rd_i = Signal(n_int_alus, reset_less=True)
229 self.go_wr_i = Signal(n_int_alus, reset_less=True)
230 self.go_die_i = Signal(n_int_alus, reset_less=True)
231 self.req_rel_o = Signal(n_int_alus, reset_less=True)
232 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
233
234 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
235
236 def elaborate(self, platform):
237 m = Module()
238 comb = m.d.comb
239 sync = m.d.sync
240
241 n_intfus = self.n_int_alus
242
243 # Integer FU-FU Dep Matrix
244 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
245 m.submodules.intfudeps = intfudeps
246 # Integer FU-Reg Dep Matrix
247 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
248 m.submodules.intregdeps = intregdeps
249
250 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
251 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
252
253 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
254 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
255
256 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
257 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
258 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
259
260 comb += intfudeps.issue_i.eq(self.fn_issue_i)
261 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
262 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
263 comb += intfudeps.go_die_i.eq(self.go_die_i)
264 comb += self.readable_o.eq(intfudeps.readable_o)
265 comb += self.writable_o.eq(intfudeps.writable_o)
266
267 # Connect function issue / arrays, and dest/src1/src2
268 comb += intregdeps.dest_i.eq(self.dest_i)
269 comb += intregdeps.src1_i.eq(self.src1_i)
270 comb += intregdeps.src2_i.eq(self.src2_i)
271
272 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
273 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
274 comb += intregdeps.go_die_i.eq(self.go_die_i)
275 comb += intregdeps.issue_i.eq(self.fn_issue_i)
276
277 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
278 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
279 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
280
281 return m
282
283
284 class Scoreboard(Elaboratable):
285 def __init__(self, rwid, n_regs):
286 """ Inputs:
287
288 * :rwid: bit width of register file(s) - both FP and INT
289 * :n_regs: depth of register file(s) - number of FP and INT regs
290 """
291 self.rwid = rwid
292 self.n_regs = n_regs
293
294 # Register Files
295 self.intregs = RegFileArray(rwid, n_regs)
296 self.fpregs = RegFileArray(rwid, n_regs)
297
298 # issue q needs to get at these
299 self.aluissue = IssueUnitGroup(4)
300 self.brissue = IssueUnitGroup(1)
301 # and these
302 self.alu_oper_i = Signal(4, reset_less=True)
303 self.br_oper_i = Signal(4, reset_less=True)
304
305 # inputs
306 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
307 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
308 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
309 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
310
311 # outputs
312 self.issue_o = Signal(reset_less=True) # instruction was accepted
313 self.busy_o = Signal(reset_less=True) # at least one CU is busy
314
315 # for branch speculation experiment. branch_direction = 0 if
316 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
317 # branch_succ and branch_fail are requests to have the current
318 # instruction be dependent on the branch unit "shadow" capability.
319 self.branch_succ_i = Signal(reset_less=True)
320 self.branch_fail_i = Signal(reset_less=True)
321 self.branch_direction_o = Signal(2, reset_less=True)
322
323 def elaborate(self, platform):
324 m = Module()
325 comb = m.d.comb
326 sync = m.d.sync
327
328 m.submodules.intregs = self.intregs
329 m.submodules.fpregs = self.fpregs
330
331 # register ports
332 int_dest = self.intregs.write_port("dest")
333 int_src1 = self.intregs.read_port("src1")
334 int_src2 = self.intregs.read_port("src2")
335
336 fp_dest = self.fpregs.write_port("dest")
337 fp_src1 = self.fpregs.read_port("src1")
338 fp_src2 = self.fpregs.read_port("src2")
339
340 # Int ALUs and Comp Units
341 n_int_alus = 5
342 cua = CompUnitALUs(self.rwid, 2)
343 cub = CompUnitBR(self.rwid, 2)
344 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
345 bgt = cub.bgt # get at the branch computation unit
346 br1 = cub.br1
347
348 # Int FUs
349 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
350
351 # Count of number of FUs
352 n_intfus = n_int_alus
353 n_fp_fus = 0 # for now
354
355 # Integer Priority Picker 1: Adder + Subtractor
356 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
357 m.submodules.intpick1 = intpick1
358
359 # INT/FP Issue Unit
360 regdecode = RegDecode(self.n_regs)
361 m.submodules.regdecode = regdecode
362 issueunit = IssueUnitArray([self.aluissue, self.brissue])
363 m.submodules.issueunit = issueunit
364
365 # Shadow Matrix. currently n_intfus shadows, to be used for
366 # write-after-write hazards. NOTE: there is one extra for branches,
367 # so the shadow width is increased by 1
368 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
369 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
370
371 # record previous instruction to cast shadow on current instruction
372 fn_issue_prev = Signal(n_intfus)
373 prev_shadow = Signal(n_intfus)
374
375 # Branch Speculation recorder. tracks the success/fail state as
376 # each instruction is issued, so that when the branch occurs the
377 # allow/cancel can be issued as appropriate.
378 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
379
380 #---------
381 # ok start wiring things together...
382 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
383 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
384 #---------
385
386 #---------
387 # Issue Unit is where it starts. set up some in/outs for this module
388 #---------
389 comb += [ regdecode.dest_i.eq(self.int_dest_i),
390 regdecode.src1_i.eq(self.int_src1_i),
391 regdecode.src2_i.eq(self.int_src2_i),
392 regdecode.enable_i.eq(self.reg_enable_i),
393 self.issue_o.eq(issueunit.issue_o)
394 ]
395
396 # take these to outside (issue needs them)
397 comb += cua.oper_i.eq(self.alu_oper_i)
398 comb += cub.oper_i.eq(self.br_oper_i)
399
400 # TODO: issueunit.f (FP)
401
402 # and int function issue / busy arrays, and dest/src1/src2
403 comb += intfus.dest_i.eq(regdecode.dest_o)
404 comb += intfus.src1_i.eq(regdecode.src1_o)
405 comb += intfus.src2_i.eq(regdecode.src2_o)
406
407 fn_issue_o = issueunit.fn_issue_o
408
409 comb += intfus.fn_issue_i.eq(fn_issue_o)
410 comb += issueunit.busy_i.eq(cu.busy_o)
411 comb += self.busy_o.eq(cu.busy_o.bool())
412
413 #---------
414 # merge shadow matrices outputs
415 #---------
416
417 # these are explained in ShadowMatrix docstring, and are to be
418 # connected to the FUReg and FUFU Matrices, to get them to reset
419 anydie = Signal(n_intfus, reset_less=True)
420 allshadown = Signal(n_intfus, reset_less=True)
421 shreset = Signal(n_intfus, reset_less=True)
422 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
423 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
424 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
425
426 #---------
427 # connect fu-fu matrix
428 #---------
429
430 # Group Picker... done manually for now.
431 go_rd_o = intpick1.go_rd_o
432 go_wr_o = intpick1.go_wr_o
433 go_rd_i = intfus.go_rd_i
434 go_wr_i = intfus.go_wr_i
435 go_die_i = intfus.go_die_i
436 # NOTE: connect to the shadowed versions so that they can "die" (reset)
437 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
438 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
439 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
440
441 # Connect Picker
442 #---------
443 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
444 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
445 int_rd_o = intfus.readable_o
446 int_wr_o = intfus.writable_o
447 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
448 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
449
450 #---------
451 # Shadow Matrix
452 #---------
453
454 comb += shadows.issue_i.eq(fn_issue_o)
455 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
456 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
457 #---------
458 # NOTE; this setup is for the instruction order preservation...
459
460 # connect shadows / go_dies to Computation Units
461 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
462 comb += cu.go_die_i[0:n_intfus].eq(anydie)
463
464 # ok connect first n_int_fu shadows to busy lines, to create an
465 # instruction-order linked-list-like arrangement, using a bit-matrix
466 # (instead of e.g. a ring buffer).
467 # XXX TODO
468
469 # when written, the shadow can be cancelled (and was good)
470 for i in range(n_intfus):
471 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
472
473 # work out the current-activated busy unit (by recording the old one)
474 with m.If(fn_issue_o): # only update prev bit if instruction issued
475 sync += fn_issue_prev.eq(fn_issue_o)
476
477 # *previous* instruction shadows *current* instruction, and, obviously,
478 # if the previous is completed (!busy) don't cast the shadow!
479 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
480 for i in range(n_intfus):
481 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
482
483 #---------
484 # ... and this is for branch speculation. it uses the extra bit
485 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
486 # only needs to set shadow_i, s_fail_i and s_good_i
487
488 # issue captures shadow_i (if enabled)
489 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
490
491 bactive = Signal(reset_less=True)
492 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
493
494 # instruction being issued (fn_issue_o) has a shadow cast by the branch
495 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
496 comb += bshadow.issue_i.eq(fn_issue_o)
497 for i in range(n_intfus):
498 with m.If(fn_issue_o & (Const(1<<i))):
499 comb += bshadow.shadow_i[i][0].eq(1)
500
501 # finally, we need an indicator to the test infrastructure as to
502 # whether the branch succeeded or failed, plus, link up to the
503 # "recorder" of whether the instruction was under shadow or not
504
505 with m.If(br1.issue_i):
506 sync += bspec.active_i.eq(1)
507 with m.If(self.branch_succ_i):
508 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
509 with m.If(self.branch_fail_i):
510 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
511
512 # branch is active (TODO: a better signal: this is over-using the
513 # go_write signal - actually the branch should not be "writing")
514 with m.If(br1.go_wr_i):
515 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
516 sync += bspec.active_i.eq(0)
517 comb += bspec.br_i.eq(1)
518 # branch occurs if data == 1, failed if data == 0
519 comb += bspec.br_ok_i.eq(br1.data_o == 1)
520 for i in range(n_intfus):
521 # *expected* direction of the branch matched against *actual*
522 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
523 # ... or it didn't
524 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
525
526 #---------
527 # Connect Register File(s)
528 #---------
529 comb += int_dest.wen.eq(intfus.dest_rsel_o)
530 comb += int_src1.ren.eq(intfus.src1_rsel_o)
531 comb += int_src2.ren.eq(intfus.src2_rsel_o)
532
533 # connect ALUs to regfule
534 comb += int_dest.data_i.eq(cu.data_o)
535 comb += cu.src1_i.eq(int_src1.data_o)
536 comb += cu.src2_i.eq(int_src2.data_o)
537
538 # connect ALU Computation Units
539 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
540 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
541 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
542
543 return m
544
545 def __iter__(self):
546 yield from self.intregs
547 yield from self.fpregs
548 yield self.int_dest_i
549 yield self.int_src1_i
550 yield self.int_src2_i
551 yield self.issue_o
552 yield self.branch_succ_i
553 yield self.branch_fail_i
554 yield self.branch_direction_o
555
556 def ports(self):
557 return list(self)
558
559 class IssueToScoreboard(Elaboratable):
560
561 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
562 self.qlen = qlen
563 self.n_in = n_in
564 self.n_out = n_out
565 self.rwid = rwid
566 self.opw = opwid
567 self.n_regs = n_regs
568
569 mqbits = (int(log(qlen) / log(2))+2, False)
570 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
571 self.p_ready_o = Signal() # instructions were added
572 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
573
574 self.busy_o = Signal(reset_less=True) # at least one CU is busy
575 self.qlen_o = Signal(mqbits, reset_less=True)
576
577 def elaborate(self, platform):
578 m = Module()
579 comb = m.d.comb
580 sync = m.d.sync
581
582 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
583 sc = Scoreboard(self.rwid, self.n_regs)
584 m.submodules.iq = iq
585 m.submodules.sc = sc
586
587 # get at the regfile for testing
588 self.intregs = sc.intregs
589
590 # and the "busy" signal and instruction queue length
591 comb += self.busy_o.eq(sc.busy_o)
592 comb += self.qlen_o.eq(iq.qlen_o)
593
594 # link up instruction queue
595 comb += iq.p_add_i.eq(self.p_add_i)
596 comb += self.p_ready_o.eq(iq.p_ready_o)
597 for i in range(self.n_in):
598 comb += eq(iq.data_i[i], self.data_i[i])
599
600 # take instruction and process it. note that it's possible to
601 # "inspect" the queue contents *without* actually removing the
602 # items. items are only removed when the
603
604 # in "waiting" state
605 wait_issue_br = Signal()
606 wait_issue_alu = Signal()
607
608 with m.If(wait_issue_br | wait_issue_alu):
609 # set instruction pop length to 1 if the unit accepted
610 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
611 with m.If(iq.qlen_o != 0):
612 comb += iq.n_sub_i.eq(1)
613 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
614 with m.If(iq.qlen_o != 0):
615 comb += iq.n_sub_i.eq(1)
616
617 # see if some instruction(s) are here. note that this is
618 # "inspecting" the in-place queue. note also that on the
619 # cycle following "waiting" for fn_issue_o to be set, the
620 # "resetting" done above (insn_i=0) could be re-ASSERTed.
621 with m.If(iq.qlen_o != 0):
622 # get the operands and operation
623 dest = iq.data_o[0].dest_i
624 src1 = iq.data_o[0].src1_i
625 src2 = iq.data_o[0].src2_i
626 op = iq.data_o[0].oper_i
627
628 # set the src/dest regs
629 comb += sc.int_dest_i.eq(dest)
630 comb += sc.int_src1_i.eq(src1)
631 comb += sc.int_src2_i.eq(src2)
632 comb += sc.reg_enable_i.eq(1) # enable the regfile
633
634 # choose a Function-Unit-Group
635 with m.If((op & (0x3<<2)) != 0): # branch
636 comb += sc.brissue.insn_i.eq(1)
637 comb += sc.br_oper_i.eq(op & 0x3)
638 comb += wait_issue_br.eq(1)
639 with m.Else(): # alu
640 comb += sc.aluissue.insn_i.eq(1)
641 comb += sc.alu_oper_i.eq(op & 0x3)
642 comb += wait_issue_alu.eq(1)
643
644 # XXX TODO
645 # these indicate that the instruction is to be made
646 # shadow-dependent on
647 # (either) branch success or branch fail
648 #yield sc.branch_fail_i.eq(branch_fail)
649 #yield sc.branch_succ_i.eq(branch_success)
650
651 return m
652
653 def __iter__(self):
654 yield self.p_ready_o
655 for o in self.data_i:
656 yield from list(o)
657 yield self.p_add_i
658
659 def ports(self):
660 return list(self)
661
662 IADD = 0
663 ISUB = 1
664 IMUL = 2
665 ISHF = 3
666 IBGT = 4
667 IBLT = 5
668 IBEQ = 6
669 IBNE = 7
670
671 class RegSim:
672 def __init__(self, rwidth, nregs):
673 self.rwidth = rwidth
674 self.regs = [0] * nregs
675
676 def op(self, op, src1, src2, dest):
677 maxbits = (1 << self.rwidth) - 1
678 src1 = self.regs[src1] & maxbits
679 src2 = self.regs[src2] & maxbits
680 if op == IADD:
681 val = src1 + src2
682 elif op == ISUB:
683 val = src1 - src2
684 elif op == IMUL:
685 val = src1 * src2
686 elif op == ISHF:
687 val = src1 >> (src2 & maxbits)
688 elif op == IBGT:
689 val = int(src1 > src2)
690 elif op == IBLT:
691 val = int(src1 < src2)
692 elif op == IBEQ:
693 val = int(src1 == src2)
694 elif op == IBNE:
695 val = int(src1 != src2)
696 val &= maxbits
697 self.setval(dest, val)
698 return val
699
700 def setval(self, dest, val):
701 print ("sim setval", dest, hex(val))
702 self.regs[dest] = val
703
704 def dump(self, dut):
705 for i, val in enumerate(self.regs):
706 reg = yield dut.intregs.regs[i].reg
707 okstr = "OK" if reg == val else "!ok"
708 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
709
710 def check(self, dut):
711 for i, val in enumerate(self.regs):
712 reg = yield dut.intregs.regs[i].reg
713 if reg != val:
714 print("reg %d expected %x received %x\n" % (i, val, reg))
715 yield from self.dump(dut)
716 assert False
717
718 def instr_q(dut, op, src1, src2, dest, branch_success, branch_fail):
719 instrs = [{'oper_i': op, 'dest_i': dest, 'src1_i': src1, 'src2_i': src2}]
720
721 sendlen = 1
722 for idx in range(sendlen):
723 yield from eq(dut.data_i[idx], instrs[idx])
724 di = yield dut.data_i[idx]
725 print ("senddata %d %x" % (idx, di))
726 yield dut.p_add_i.eq(sendlen)
727 yield
728 o_p_ready = yield dut.p_ready_o
729 while not o_p_ready:
730 yield
731 o_p_ready = yield dut.p_ready_o
732
733 yield dut.p_add_i.eq(0)
734
735
736 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
737 yield from disable_issue(dut)
738 yield dut.int_dest_i.eq(dest)
739 yield dut.int_src1_i.eq(src1)
740 yield dut.int_src2_i.eq(src2)
741 if (op & (0x3<<2)) != 0: # branch
742 yield dut.brissue.insn_i.eq(1)
743 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
744 dut_issue = dut.brissue
745 else:
746 yield dut.aluissue.insn_i.eq(1)
747 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
748 dut_issue = dut.aluissue
749 yield dut.reg_enable_i.eq(1)
750
751 # these indicate that the instruction is to be made shadow-dependent on
752 # (either) branch success or branch fail
753 yield dut.branch_fail_i.eq(branch_fail)
754 yield dut.branch_succ_i.eq(branch_success)
755
756 yield
757 yield from wait_for_issue(dut, dut_issue)
758
759
760 def print_reg(dut, rnums):
761 rs = []
762 for rnum in rnums:
763 reg = yield dut.intregs.regs[rnum].reg
764 rs.append("%x" % reg)
765 rnums = map(str, rnums)
766 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
767
768
769 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
770 insts = []
771 for i in range(n_ops):
772 src1 = randint(1, dut.n_regs-1)
773 src2 = randint(1, dut.n_regs-1)
774 dest = randint(1, dut.n_regs-1)
775 op = randint(0, max_opnums)
776
777 if shadowing:
778 insts.append((src1, src2, dest, op, (0, 0)))
779 else:
780 insts.append((src1, src2, dest, op))
781 return insts
782
783
784 def wait_for_busy_clear(dut):
785 while True:
786 busy_o = yield dut.busy_o
787 if not busy_o:
788 break
789 print ("busy",)
790 yield
791
792 def disable_issue(dut):
793 yield dut.aluissue.insn_i.eq(0)
794 yield dut.brissue.insn_i.eq(0)
795
796
797 def wait_for_issue(dut, dut_issue):
798 while True:
799 issue_o = yield dut_issue.fn_issue_o
800 if issue_o:
801 yield from disable_issue(dut)
802 yield dut.reg_enable_i.eq(0)
803 break
804 print ("busy",)
805 #yield from print_reg(dut, [1,2,3])
806 yield
807 #yield from print_reg(dut, [1,2,3])
808
809 def scoreboard_branch_sim(dut, alusim):
810
811 iseed = 3
812
813 for i in range(1):
814
815 print ("rseed", iseed)
816 seed(iseed)
817 iseed += 1
818
819 yield dut.branch_direction_o.eq(0)
820
821 # set random values in the registers
822 for i in range(1, dut.n_regs):
823 val = 31+i*3
824 val = randint(0, (1<<alusim.rwidth)-1)
825 yield dut.intregs.regs[i].reg.eq(val)
826 alusim.setval(i, val)
827
828 if False:
829 # create some instructions: branches create a tree
830 insts = create_random_ops(dut, 1, True, 1)
831 #insts.append((6, 6, 1, 2, (0, 0)))
832 #insts.append((4, 3, 3, 0, (0, 0)))
833
834 src1 = randint(1, dut.n_regs-1)
835 src2 = randint(1, dut.n_regs-1)
836 #op = randint(4, 7)
837 op = 4 # only BGT at the moment
838
839 branch_ok = create_random_ops(dut, 1, True, 1)
840 branch_fail = create_random_ops(dut, 1, True, 1)
841
842 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
843
844 if True:
845 insts = []
846 insts.append( (3, 5, 2, 0, (0, 0)) )
847 branch_ok = []
848 branch_fail = []
849 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
850 branch_ok.append( None )
851 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
852 #branch_fail.append( None )
853 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
854
855 siminsts = deepcopy(insts)
856
857 # issue instruction(s)
858 i = -1
859 instrs = insts
860 branch_direction = 0
861 while instrs:
862 yield
863 yield
864 i += 1
865 branch_direction = yield dut.branch_direction_o # way branch went
866 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
867 if branch_direction == 1 and shadow_on:
868 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
869 continue # branch was "success" and this is a "failed"... skip
870 if branch_direction == 2 and shadow_off:
871 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
872 continue # branch was "fail" and this is a "success"... skip
873 if branch_direction != 0:
874 shadow_on = 0
875 shadow_off = 0
876 is_branch = op >= 4
877 if is_branch:
878 branch_ok, branch_fail = dest
879 dest = src2
880 # ok zip up the branch success / fail instructions and
881 # drop them into the queue, one marked "to have branch success"
882 # the other to be marked shadow branch "fail".
883 # one out of each of these will be cancelled
884 for ok, fl in zip(branch_ok, branch_fail):
885 if ok:
886 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
887 if fl:
888 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
889 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
890 (i, src1, src2, dest, op, shadow_on, shadow_off))
891 yield from int_instr(dut, op, src1, src2, dest,
892 shadow_on, shadow_off)
893
894 # wait for all instructions to stop before checking
895 yield
896 yield from wait_for_busy_clear(dut)
897
898 i = -1
899 while siminsts:
900 instr = siminsts.pop(0)
901 if instr is None:
902 continue
903 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
904 i += 1
905 is_branch = op >= 4
906 if is_branch:
907 branch_ok, branch_fail = dest
908 dest = src2
909 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
910 (i, src1, src2, dest, op, shadow_on, shadow_off))
911 branch_res = alusim.op(op, src1, src2, dest)
912 if is_branch:
913 if branch_res:
914 siminsts += branch_ok
915 else:
916 siminsts += branch_fail
917
918 # check status
919 yield from alusim.check(dut)
920 yield from alusim.dump(dut)
921
922
923 def scoreboard_sim(dut, alusim):
924
925 #seed(2)
926
927 for i in range(1):
928
929 # set random values in the registers
930 for i in range(1, dut.n_regs):
931 val = randint(0, (1<<alusim.rwidth)-1)
932 #val = 31+i*3
933 #val = i
934 yield dut.intregs.regs[i].reg.eq(val)
935 alusim.setval(i, val)
936
937 # create some instructions (some random, some regression tests)
938 instrs = []
939 if True:
940 instrs = create_random_ops(dut, 15, True, 3)
941
942 if False:
943 instrs.append( (7, 3, 2, 4, (0, 0)) )
944 instrs.append( (7, 6, 6, 2, (0, 0)) )
945 instrs.append( (1, 7, 2, 2, (0, 0)) )
946
947
948 if False:
949 instrs.append((2, 3, 3, 0, (0, 0)))
950 instrs.append((5, 3, 3, 1, (0, 0)))
951 instrs.append((3, 5, 5, 2, (0, 0)))
952 instrs.append((5, 3, 3, 3, (0, 0)))
953 instrs.append((3, 5, 5, 0, (0, 0)))
954
955 if False:
956 instrs.append((5, 6, 2, 1))
957 instrs.append((2, 2, 4, 0))
958 #instrs.append((2, 2, 3, 1))
959
960 if False:
961 instrs.append((2, 1, 2, 3))
962
963 if False:
964 instrs.append((2, 6, 2, 1))
965 instrs.append((2, 1, 2, 0))
966
967 if False:
968 instrs.append((1, 2, 7, 2))
969 instrs.append((7, 1, 5, 0))
970 instrs.append((4, 4, 1, 1))
971
972 if False:
973 instrs.append((5, 6, 2, 2))
974 instrs.append((1, 1, 4, 1))
975 instrs.append((6, 5, 3, 0))
976
977 if False:
978 # Write-after-Write Hazard
979 instrs.append( (3, 6, 7, 2) )
980 instrs.append( (4, 4, 7, 1) )
981
982 if False:
983 # self-read/write-after-write followed by Read-after-Write
984 instrs.append((1, 1, 1, 1))
985 instrs.append((1, 5, 3, 0))
986
987 if False:
988 # Read-after-Write followed by self-read-after-write
989 instrs.append((5, 6, 1, 2))
990 instrs.append((1, 1, 1, 1))
991
992 if False:
993 # self-read-write sandwich
994 instrs.append((5, 6, 1, 2))
995 instrs.append((1, 1, 1, 1))
996 instrs.append((1, 5, 3, 0))
997
998 if False:
999 # very weird failure
1000 instrs.append( (5, 2, 5, 2) )
1001 instrs.append( (2, 6, 3, 0) )
1002 instrs.append( (4, 2, 2, 1) )
1003
1004 if False:
1005 v1 = 4
1006 yield dut.intregs.regs[5].reg.eq(v1)
1007 alusim.setval(5, v1)
1008 yield dut.intregs.regs[3].reg.eq(5)
1009 alusim.setval(3, 5)
1010 instrs.append((5, 3, 3, 4, (0, 0)))
1011 instrs.append((4, 2, 1, 2, (0, 1)))
1012
1013 if False:
1014 v1 = 6
1015 yield dut.intregs.regs[5].reg.eq(v1)
1016 alusim.setval(5, v1)
1017 yield dut.intregs.regs[3].reg.eq(5)
1018 alusim.setval(3, 5)
1019 instrs.append((5, 3, 3, 4, (0, 0)))
1020 instrs.append((4, 2, 1, 2, (1, 0)))
1021
1022 if False:
1023 instrs.append( (4, 3, 5, 1, (0, 0)) )
1024 instrs.append( (5, 2, 3, 1, (0, 0)) )
1025 instrs.append( (7, 1, 5, 2, (0, 0)) )
1026 instrs.append( (5, 6, 6, 4, (0, 0)) )
1027 instrs.append( (7, 5, 2, 2, (1, 0)) )
1028 instrs.append( (1, 7, 5, 0, (0, 1)) )
1029 instrs.append( (1, 6, 1, 2, (1, 0)) )
1030 instrs.append( (1, 6, 7, 3, (0, 0)) )
1031 instrs.append( (6, 7, 7, 0, (0, 0)) )
1032
1033 # issue instruction(s), wait for issue to be free before proceeding
1034 for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
1035
1036 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1037 alusim.op(op, src1, src2, dest)
1038 yield from instr_q(dut, op, src1, src2, dest, br_ok, br_fail)
1039
1040 # wait for all instructions to stop before checking
1041 while True:
1042 iqlen = yield dut.qlen_o
1043 if iqlen == 0:
1044 break
1045 yield
1046 yield
1047 yield
1048 yield
1049 yield
1050 yield from wait_for_busy_clear(dut)
1051
1052 # check status
1053 yield from alusim.check(dut)
1054 yield from alusim.dump(dut)
1055
1056
1057 def test_scoreboard():
1058 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1059 alusim = RegSim(16, 8)
1060 vl = rtlil.convert(dut, ports=dut.ports())
1061 with open("test_scoreboard6600.il", "w") as f:
1062 f.write(vl)
1063
1064 run_simulation(dut, scoreboard_sim(dut, alusim),
1065 vcd_name='test_scoreboard6600.vcd')
1066
1067 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1068 # vcd_name='test_scoreboard6600.vcd')
1069
1070
1071 if __name__ == '__main__':
1072 test_scoreboard()