Allow the formal engine to perform a same-cycle result in the ALU
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13
14 from compalu import ComputationUnitNoDelay
15
16 from alu_hier import ALU, BranchALU
17 from nmutil.latch import SRLatch
18 from nmutil.nmoperator import eq
19
20 from random import randint, seed
21 from copy import deepcopy
22 from math import log
23
24
25 class Memory(Elaboratable):
26 def __init__(self, regwid, addrw):
27 depth = (1<<addrw) / (regwid/8)
28 self.adr = Signal(addrw)
29 self.dat_r = Signal(regwid)
30 self.dat_w = Signal(regwid)
31 self.we = Signal()
32 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
33
34 def elaborate(self, platform):
35 m = Module()
36 m.submodules.rdport = rdport = self.mem.read_port()
37 m.submodules.wrport = wrport = self.mem.write_port()
38 m.d.comb += [
39 rdport.addr.eq(self.adr[2:]),
40 self.dat_r.eq(rdport.data),
41 wrport.addr.eq(self.adr),
42 wrport.data.eq(self.dat_w),
43 wrport.en.eq(self.we),
44 ]
45 return m
46
47
48 class CompUnitsBase(Elaboratable):
49 """ Computation Unit Base class.
50
51 Amazingly, this class works recursively. It's supposed to just
52 look after some ALUs (that can handle the same operations),
53 grouping them together, however it turns out that the same code
54 can also group *groups* of Computation Units together as well.
55
56 Basically it was intended just to concatenate the ALU's issue,
57 go_rd etc. signals together, which start out as bits and become
58 sequences. Turns out that the same trick works just as well
59 on Computation Units!
60
61 So this class may be used recursively to present a top-level
62 sequential concatenation of all the signals in and out of
63 ALUs, whilst at the same time making it convenient to group
64 ALUs together.
65
66 At the lower level, the intent is that groups of (identical)
67 ALUs may be passed the same operation. Even beyond that,
68 the intent is that that group of (identical) ALUs actually
69 share the *same pipeline* and as such become a "Concurrent
70 Computation Unit" as defined by Mitch Alsup (see section
71 11.4.9.3)
72 """
73 def __init__(self, rwid, units):
74 """ Inputs:
75
76 * :rwid: bit width of register file(s) - both FP and INT
77 * :units: sequence of ALUs (or CompUnitsBase derivatives)
78 """
79 self.units = units
80 self.rwid = rwid
81 self.rwid = rwid
82 if units and isinstance(units[0], CompUnitsBase):
83 self.n_units = 0
84 for u in self.units:
85 self.n_units += u.n_units
86 else:
87 self.n_units = len(units)
88
89 n_units = self.n_units
90
91 # inputs
92 self.issue_i = Signal(n_units, reset_less=True)
93 self.go_rd_i = Signal(n_units, reset_less=True)
94 self.go_wr_i = Signal(n_units, reset_less=True)
95 self.shadown_i = Signal(n_units, reset_less=True)
96 self.go_die_i = Signal(n_units, reset_less=True)
97
98 # outputs
99 self.busy_o = Signal(n_units, reset_less=True)
100 self.rd_rel_o = Signal(n_units, reset_less=True)
101 self.req_rel_o = Signal(n_units, reset_less=True)
102
103 # in/out register data (note: not register#, actual data)
104 self.data_o = Signal(rwid, reset_less=True)
105 self.src1_i = Signal(rwid, reset_less=True)
106 self.src2_i = Signal(rwid, reset_less=True)
107 # input operand
108
109 def elaborate(self, platform):
110 m = Module()
111 comb = m.d.comb
112
113 for i, alu in enumerate(self.units):
114 setattr(m.submodules, "comp%d" % i, alu)
115
116 go_rd_l = []
117 go_wr_l = []
118 issue_l = []
119 busy_l = []
120 req_rel_l = []
121 rd_rel_l = []
122 shadow_l = []
123 godie_l = []
124 for alu in self.units:
125 req_rel_l.append(alu.req_rel_o)
126 rd_rel_l.append(alu.rd_rel_o)
127 shadow_l.append(alu.shadown_i)
128 godie_l.append(alu.go_die_i)
129 go_wr_l.append(alu.go_wr_i)
130 go_rd_l.append(alu.go_rd_i)
131 issue_l.append(alu.issue_i)
132 busy_l.append(alu.busy_o)
133 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
134 comb += self.req_rel_o.eq(Cat(*req_rel_l))
135 comb += self.busy_o.eq(Cat(*busy_l))
136 comb += Cat(*godie_l).eq(self.go_die_i)
137 comb += Cat(*shadow_l).eq(self.shadown_i)
138 comb += Cat(*go_wr_l).eq(self.go_wr_i)
139 comb += Cat(*go_rd_l).eq(self.go_rd_i)
140 comb += Cat(*issue_l).eq(self.issue_i)
141
142 # connect data register input/output
143
144 # merge (OR) all integer FU / ALU outputs to a single value
145 # bit of a hack: treereduce needs a list with an item named "data_o"
146 if self.units:
147 data_o = treereduce(self.units)
148 comb += self.data_o.eq(data_o)
149
150 for i, alu in enumerate(self.units):
151 comb += alu.src1_i.eq(self.src1_i)
152 comb += alu.src2_i.eq(self.src2_i)
153
154 return m
155
156
157 class CompUnitALUs(CompUnitsBase):
158
159 def __init__(self, rwid, opwid):
160 """ Inputs:
161
162 * :rwid: bit width of register file(s) - both FP and INT
163 * :opwid: operand bit width
164 """
165 self.opwid = opwid
166
167 # inputs
168 self.oper_i = Signal(opwid, reset_less=True)
169
170 # Int ALUs
171 add = ALU(rwid)
172 sub = ALU(rwid)
173 mul = ALU(rwid)
174 shf = ALU(rwid)
175
176 units = []
177 for alu in [add, sub, mul, shf]:
178 units.append(ComputationUnitNoDelay(rwid, 2, alu))
179
180 CompUnitsBase.__init__(self, rwid, units)
181
182 def elaborate(self, platform):
183 m = CompUnitsBase.elaborate(self, platform)
184 comb = m.d.comb
185
186 # hand the same operation to all units
187 for alu in self.units:
188 comb += alu.oper_i.eq(self.oper_i)
189 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
190 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
191 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
192 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
193
194 return m
195
196
197 class CompUnitBR(CompUnitsBase):
198
199 def __init__(self, rwid, opwid):
200 """ Inputs:
201
202 * :rwid: bit width of register file(s) - both FP and INT
203 * :opwid: operand bit width
204
205 Note: bgt unit is returned so that a shadow unit can be created
206 for it
207 """
208 self.opwid = opwid
209
210 # inputs
211 self.oper_i = Signal(opwid, reset_less=True)
212
213 # Branch ALU and CU
214 self.bgt = BranchALU(rwid)
215 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
216 CompUnitsBase.__init__(self, rwid, [self.br1])
217
218 def elaborate(self, platform):
219 m = CompUnitsBase.elaborate(self, platform)
220 comb = m.d.comb
221
222 # hand the same operation to all units
223 for alu in self.units:
224 comb += alu.oper_i.eq(self.oper_i)
225 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
226
227 return m
228
229
230 class FunctionUnits(Elaboratable):
231
232 def __init__(self, n_regs, n_int_alus):
233 self.n_regs = n_regs
234 self.n_int_alus = n_int_alus
235
236 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
237 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
238 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
239
240 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
241 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
242
243 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
244 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
245 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
246
247 self.req_rel_i = Signal(n_int_alus, reset_less = True)
248 self.readable_o = Signal(n_int_alus, reset_less=True)
249 self.writable_o = Signal(n_int_alus, reset_less=True)
250
251 self.go_rd_i = Signal(n_int_alus, reset_less=True)
252 self.go_wr_i = Signal(n_int_alus, reset_less=True)
253 self.go_die_i = Signal(n_int_alus, reset_less=True)
254 self.req_rel_o = Signal(n_int_alus, reset_less=True)
255 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
256
257 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
258
259 def elaborate(self, platform):
260 m = Module()
261 comb = m.d.comb
262 sync = m.d.sync
263
264 n_intfus = self.n_int_alus
265
266 # Integer FU-FU Dep Matrix
267 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
268 m.submodules.intfudeps = intfudeps
269 # Integer FU-Reg Dep Matrix
270 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
271 m.submodules.intregdeps = intregdeps
272
273 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
274 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
275
276 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
277 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
278
279 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
280 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
281 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
282
283 comb += intfudeps.issue_i.eq(self.fn_issue_i)
284 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
285 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
286 comb += intfudeps.go_die_i.eq(self.go_die_i)
287 comb += self.readable_o.eq(intfudeps.readable_o)
288 comb += self.writable_o.eq(intfudeps.writable_o)
289
290 # Connect function issue / arrays, and dest/src1/src2
291 comb += intregdeps.dest_i.eq(self.dest_i)
292 comb += intregdeps.src1_i.eq(self.src1_i)
293 comb += intregdeps.src2_i.eq(self.src2_i)
294
295 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
296 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
297 comb += intregdeps.go_die_i.eq(self.go_die_i)
298 comb += intregdeps.issue_i.eq(self.fn_issue_i)
299
300 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
301 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
302 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
303
304 return m
305
306
307 class Scoreboard(Elaboratable):
308 def __init__(self, rwid, n_regs):
309 """ Inputs:
310
311 * :rwid: bit width of register file(s) - both FP and INT
312 * :n_regs: depth of register file(s) - number of FP and INT regs
313 """
314 self.rwid = rwid
315 self.n_regs = n_regs
316
317 # Register Files
318 self.intregs = RegFileArray(rwid, n_regs)
319 self.fpregs = RegFileArray(rwid, n_regs)
320
321 # issue q needs to get at these
322 self.aluissue = IssueUnitGroup(4)
323 self.brissue = IssueUnitGroup(1)
324 # and these
325 self.alu_oper_i = Signal(4, reset_less=True)
326 self.br_oper_i = Signal(4, reset_less=True)
327
328 # inputs
329 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
330 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
331 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
332 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
333
334 # outputs
335 self.issue_o = Signal(reset_less=True) # instruction was accepted
336 self.busy_o = Signal(reset_less=True) # at least one CU is busy
337
338 # for branch speculation experiment. branch_direction = 0 if
339 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
340 # branch_succ and branch_fail are requests to have the current
341 # instruction be dependent on the branch unit "shadow" capability.
342 self.branch_succ_i = Signal(reset_less=True)
343 self.branch_fail_i = Signal(reset_less=True)
344 self.branch_direction_o = Signal(2, reset_less=True)
345
346 def elaborate(self, platform):
347 m = Module()
348 comb = m.d.comb
349 sync = m.d.sync
350
351 m.submodules.intregs = self.intregs
352 m.submodules.fpregs = self.fpregs
353
354 # register ports
355 int_dest = self.intregs.write_port("dest")
356 int_src1 = self.intregs.read_port("src1")
357 int_src2 = self.intregs.read_port("src2")
358
359 fp_dest = self.fpregs.write_port("dest")
360 fp_src1 = self.fpregs.read_port("src1")
361 fp_src2 = self.fpregs.read_port("src2")
362
363 # Int ALUs and Comp Units
364 n_int_alus = 5
365 cua = CompUnitALUs(self.rwid, 2)
366 cub = CompUnitBR(self.rwid, 2)
367 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
368 bgt = cub.bgt # get at the branch computation unit
369 br1 = cub.br1
370
371 # Int FUs
372 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
373
374 # Count of number of FUs
375 n_intfus = n_int_alus
376 n_fp_fus = 0 # for now
377
378 # Integer Priority Picker 1: Adder + Subtractor
379 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
380 m.submodules.intpick1 = intpick1
381
382 # INT/FP Issue Unit
383 regdecode = RegDecode(self.n_regs)
384 m.submodules.regdecode = regdecode
385 issueunit = IssueUnitArray([self.aluissue, self.brissue])
386 m.submodules.issueunit = issueunit
387
388 # Shadow Matrix. currently n_intfus shadows, to be used for
389 # write-after-write hazards. NOTE: there is one extra for branches,
390 # so the shadow width is increased by 1
391 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
392 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
393
394 # record previous instruction to cast shadow on current instruction
395 fn_issue_prev = Signal(n_intfus)
396 prev_shadow = Signal(n_intfus)
397
398 # Branch Speculation recorder. tracks the success/fail state as
399 # each instruction is issued, so that when the branch occurs the
400 # allow/cancel can be issued as appropriate.
401 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
402
403 #---------
404 # ok start wiring things together...
405 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
406 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
407 #---------
408
409 #---------
410 # Issue Unit is where it starts. set up some in/outs for this module
411 #---------
412 comb += [ regdecode.dest_i.eq(self.int_dest_i),
413 regdecode.src1_i.eq(self.int_src1_i),
414 regdecode.src2_i.eq(self.int_src2_i),
415 regdecode.enable_i.eq(self.reg_enable_i),
416 self.issue_o.eq(issueunit.issue_o)
417 ]
418
419 # take these to outside (issue needs them)
420 comb += cua.oper_i.eq(self.alu_oper_i)
421 comb += cub.oper_i.eq(self.br_oper_i)
422
423 # TODO: issueunit.f (FP)
424
425 # and int function issue / busy arrays, and dest/src1/src2
426 comb += intfus.dest_i.eq(regdecode.dest_o)
427 comb += intfus.src1_i.eq(regdecode.src1_o)
428 comb += intfus.src2_i.eq(regdecode.src2_o)
429
430 fn_issue_o = issueunit.fn_issue_o
431
432 comb += intfus.fn_issue_i.eq(fn_issue_o)
433 comb += issueunit.busy_i.eq(cu.busy_o)
434 comb += self.busy_o.eq(cu.busy_o.bool())
435
436 #---------
437 # merge shadow matrices outputs
438 #---------
439
440 # these are explained in ShadowMatrix docstring, and are to be
441 # connected to the FUReg and FUFU Matrices, to get them to reset
442 anydie = Signal(n_intfus, reset_less=True)
443 allshadown = Signal(n_intfus, reset_less=True)
444 shreset = Signal(n_intfus, reset_less=True)
445 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
446 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
447 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
448
449 #---------
450 # connect fu-fu matrix
451 #---------
452
453 # Group Picker... done manually for now.
454 go_rd_o = intpick1.go_rd_o
455 go_wr_o = intpick1.go_wr_o
456 go_rd_i = intfus.go_rd_i
457 go_wr_i = intfus.go_wr_i
458 go_die_i = intfus.go_die_i
459 # NOTE: connect to the shadowed versions so that they can "die" (reset)
460 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
461 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
462 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
463
464 # Connect Picker
465 #---------
466 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
467 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
468 int_rd_o = intfus.readable_o
469 int_wr_o = intfus.writable_o
470 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
471 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
472
473 #---------
474 # Shadow Matrix
475 #---------
476
477 comb += shadows.issue_i.eq(fn_issue_o)
478 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
479 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
480 #---------
481 # NOTE; this setup is for the instruction order preservation...
482
483 # connect shadows / go_dies to Computation Units
484 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
485 comb += cu.go_die_i[0:n_intfus].eq(anydie)
486
487 # ok connect first n_int_fu shadows to busy lines, to create an
488 # instruction-order linked-list-like arrangement, using a bit-matrix
489 # (instead of e.g. a ring buffer).
490 # XXX TODO
491
492 # when written, the shadow can be cancelled (and was good)
493 for i in range(n_intfus):
494 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
495
496 # work out the current-activated busy unit (by recording the old one)
497 with m.If(fn_issue_o): # only update prev bit if instruction issued
498 sync += fn_issue_prev.eq(fn_issue_o)
499
500 # *previous* instruction shadows *current* instruction, and, obviously,
501 # if the previous is completed (!busy) don't cast the shadow!
502 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
503 for i in range(n_intfus):
504 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
505
506 #---------
507 # ... and this is for branch speculation. it uses the extra bit
508 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
509 # only needs to set shadow_i, s_fail_i and s_good_i
510
511 # issue captures shadow_i (if enabled)
512 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
513
514 bactive = Signal(reset_less=True)
515 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
516
517 # instruction being issued (fn_issue_o) has a shadow cast by the branch
518 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
519 comb += bshadow.issue_i.eq(fn_issue_o)
520 for i in range(n_intfus):
521 with m.If(fn_issue_o & (Const(1<<i))):
522 comb += bshadow.shadow_i[i][0].eq(1)
523
524 # finally, we need an indicator to the test infrastructure as to
525 # whether the branch succeeded or failed, plus, link up to the
526 # "recorder" of whether the instruction was under shadow or not
527
528 with m.If(br1.issue_i):
529 sync += bspec.active_i.eq(1)
530 with m.If(self.branch_succ_i):
531 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
532 with m.If(self.branch_fail_i):
533 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
534
535 # branch is active (TODO: a better signal: this is over-using the
536 # go_write signal - actually the branch should not be "writing")
537 with m.If(br1.go_wr_i):
538 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
539 sync += bspec.active_i.eq(0)
540 comb += bspec.br_i.eq(1)
541 # branch occurs if data == 1, failed if data == 0
542 comb += bspec.br_ok_i.eq(br1.data_o == 1)
543 for i in range(n_intfus):
544 # *expected* direction of the branch matched against *actual*
545 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
546 # ... or it didn't
547 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
548
549 #---------
550 # Connect Register File(s)
551 #---------
552 comb += int_dest.wen.eq(intfus.dest_rsel_o)
553 comb += int_src1.ren.eq(intfus.src1_rsel_o)
554 comb += int_src2.ren.eq(intfus.src2_rsel_o)
555
556 # connect ALUs to regfule
557 comb += int_dest.data_i.eq(cu.data_o)
558 comb += cu.src1_i.eq(int_src1.data_o)
559 comb += cu.src2_i.eq(int_src2.data_o)
560
561 # connect ALU Computation Units
562 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
563 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
564 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
565
566 return m
567
568 def __iter__(self):
569 yield from self.intregs
570 yield from self.fpregs
571 yield self.int_dest_i
572 yield self.int_src1_i
573 yield self.int_src2_i
574 yield self.issue_o
575 yield self.branch_succ_i
576 yield self.branch_fail_i
577 yield self.branch_direction_o
578
579 def ports(self):
580 return list(self)
581
582 class IssueToScoreboard(Elaboratable):
583
584 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
585 self.qlen = qlen
586 self.n_in = n_in
587 self.n_out = n_out
588 self.rwid = rwid
589 self.opw = opwid
590 self.n_regs = n_regs
591
592 mqbits = (int(log(qlen) / log(2))+2, False)
593 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
594 self.p_ready_o = Signal() # instructions were added
595 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
596
597 self.busy_o = Signal(reset_less=True) # at least one CU is busy
598 self.qlen_o = Signal(mqbits, reset_less=True)
599
600 def elaborate(self, platform):
601 m = Module()
602 comb = m.d.comb
603 sync = m.d.sync
604
605 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
606 sc = Scoreboard(self.rwid, self.n_regs)
607 m.submodules.iq = iq
608 m.submodules.sc = sc
609
610 # get at the regfile for testing
611 self.intregs = sc.intregs
612
613 # and the "busy" signal and instruction queue length
614 comb += self.busy_o.eq(sc.busy_o)
615 comb += self.qlen_o.eq(iq.qlen_o)
616
617 # link up instruction queue
618 comb += iq.p_add_i.eq(self.p_add_i)
619 comb += self.p_ready_o.eq(iq.p_ready_o)
620 for i in range(self.n_in):
621 comb += eq(iq.data_i[i], self.data_i[i])
622
623 # take instruction and process it. note that it's possible to
624 # "inspect" the queue contents *without* actually removing the
625 # items. items are only removed when the
626
627 # in "waiting" state
628 wait_issue_br = Signal()
629 wait_issue_alu = Signal()
630
631 with m.If(wait_issue_br | wait_issue_alu):
632 # set instruction pop length to 1 if the unit accepted
633 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
634 with m.If(iq.qlen_o != 0):
635 comb += iq.n_sub_i.eq(1)
636 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
637 with m.If(iq.qlen_o != 0):
638 comb += iq.n_sub_i.eq(1)
639
640 # see if some instruction(s) are here. note that this is
641 # "inspecting" the in-place queue. note also that on the
642 # cycle following "waiting" for fn_issue_o to be set, the
643 # "resetting" done above (insn_i=0) could be re-ASSERTed.
644 with m.If(iq.qlen_o != 0):
645 # get the operands and operation
646 dest = iq.data_o[0].dest_i
647 src1 = iq.data_o[0].src1_i
648 src2 = iq.data_o[0].src2_i
649 op = iq.data_o[0].oper_i
650
651 # set the src/dest regs
652 comb += sc.int_dest_i.eq(dest)
653 comb += sc.int_src1_i.eq(src1)
654 comb += sc.int_src2_i.eq(src2)
655 comb += sc.reg_enable_i.eq(1) # enable the regfile
656
657 # choose a Function-Unit-Group
658 with m.If((op & (0x3<<2)) != 0): # branch
659 comb += sc.brissue.insn_i.eq(1)
660 comb += sc.br_oper_i.eq(op & 0x3)
661 comb += wait_issue_br.eq(1)
662 with m.Else(): # alu
663 comb += sc.aluissue.insn_i.eq(1)
664 comb += sc.alu_oper_i.eq(op & 0x3)
665 comb += wait_issue_alu.eq(1)
666
667 # XXX TODO
668 # these indicate that the instruction is to be made
669 # shadow-dependent on
670 # (either) branch success or branch fail
671 #yield sc.branch_fail_i.eq(branch_fail)
672 #yield sc.branch_succ_i.eq(branch_success)
673
674 return m
675
676 def __iter__(self):
677 yield self.p_ready_o
678 for o in self.data_i:
679 yield from list(o)
680 yield self.p_add_i
681
682 def ports(self):
683 return list(self)
684
685 IADD = 0
686 ISUB = 1
687 IMUL = 2
688 ISHF = 3
689 IBGT = 4
690 IBLT = 5
691 IBEQ = 6
692 IBNE = 7
693
694 class RegSim:
695 def __init__(self, rwidth, nregs):
696 self.rwidth = rwidth
697 self.regs = [0] * nregs
698
699 def op(self, op, src1, src2, dest):
700 maxbits = (1 << self.rwidth) - 1
701 src1 = self.regs[src1] & maxbits
702 src2 = self.regs[src2] & maxbits
703 if op == IADD:
704 val = src1 + src2
705 elif op == ISUB:
706 val = src1 - src2
707 elif op == IMUL:
708 val = src1 * src2
709 elif op == ISHF:
710 val = src1 >> (src2 & maxbits)
711 elif op == IBGT:
712 val = int(src1 > src2)
713 elif op == IBLT:
714 val = int(src1 < src2)
715 elif op == IBEQ:
716 val = int(src1 == src2)
717 elif op == IBNE:
718 val = int(src1 != src2)
719 val &= maxbits
720 self.setval(dest, val)
721 return val
722
723 def setval(self, dest, val):
724 print ("sim setval", dest, hex(val))
725 self.regs[dest] = val
726
727 def dump(self, dut):
728 for i, val in enumerate(self.regs):
729 reg = yield dut.intregs.regs[i].reg
730 okstr = "OK" if reg == val else "!ok"
731 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
732
733 def check(self, dut):
734 for i, val in enumerate(self.regs):
735 reg = yield dut.intregs.regs[i].reg
736 if reg != val:
737 print("reg %d expected %x received %x\n" % (i, val, reg))
738 yield from self.dump(dut)
739 assert False
740
741 def instr_q(dut, op, src1, src2, dest, branch_success, branch_fail):
742 instrs = [{'oper_i': op, 'dest_i': dest, 'src1_i': src1, 'src2_i': src2}]
743
744 sendlen = 1
745 for idx in range(sendlen):
746 yield from eq(dut.data_i[idx], instrs[idx])
747 di = yield dut.data_i[idx]
748 print ("senddata %d %x" % (idx, di))
749 yield dut.p_add_i.eq(sendlen)
750 yield
751 o_p_ready = yield dut.p_ready_o
752 while not o_p_ready:
753 yield
754 o_p_ready = yield dut.p_ready_o
755
756 yield dut.p_add_i.eq(0)
757
758
759 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
760 yield from disable_issue(dut)
761 yield dut.int_dest_i.eq(dest)
762 yield dut.int_src1_i.eq(src1)
763 yield dut.int_src2_i.eq(src2)
764 if (op & (0x3<<2)) != 0: # branch
765 yield dut.brissue.insn_i.eq(1)
766 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
767 dut_issue = dut.brissue
768 else:
769 yield dut.aluissue.insn_i.eq(1)
770 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
771 dut_issue = dut.aluissue
772 yield dut.reg_enable_i.eq(1)
773
774 # these indicate that the instruction is to be made shadow-dependent on
775 # (either) branch success or branch fail
776 yield dut.branch_fail_i.eq(branch_fail)
777 yield dut.branch_succ_i.eq(branch_success)
778
779 yield
780 yield from wait_for_issue(dut, dut_issue)
781
782
783 def print_reg(dut, rnums):
784 rs = []
785 for rnum in rnums:
786 reg = yield dut.intregs.regs[rnum].reg
787 rs.append("%x" % reg)
788 rnums = map(str, rnums)
789 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
790
791
792 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
793 insts = []
794 for i in range(n_ops):
795 src1 = randint(1, dut.n_regs-1)
796 src2 = randint(1, dut.n_regs-1)
797 dest = randint(1, dut.n_regs-1)
798 op = randint(0, max_opnums)
799
800 if shadowing:
801 insts.append((src1, src2, dest, op, (0, 0)))
802 else:
803 insts.append((src1, src2, dest, op))
804 return insts
805
806
807 def wait_for_busy_clear(dut):
808 while True:
809 busy_o = yield dut.busy_o
810 if not busy_o:
811 break
812 print ("busy",)
813 yield
814
815 def disable_issue(dut):
816 yield dut.aluissue.insn_i.eq(0)
817 yield dut.brissue.insn_i.eq(0)
818
819
820 def wait_for_issue(dut, dut_issue):
821 while True:
822 issue_o = yield dut_issue.fn_issue_o
823 if issue_o:
824 yield from disable_issue(dut)
825 yield dut.reg_enable_i.eq(0)
826 break
827 print ("busy",)
828 #yield from print_reg(dut, [1,2,3])
829 yield
830 #yield from print_reg(dut, [1,2,3])
831
832 def scoreboard_branch_sim(dut, alusim):
833
834 iseed = 3
835
836 for i in range(1):
837
838 print ("rseed", iseed)
839 seed(iseed)
840 iseed += 1
841
842 yield dut.branch_direction_o.eq(0)
843
844 # set random values in the registers
845 for i in range(1, dut.n_regs):
846 val = 31+i*3
847 val = randint(0, (1<<alusim.rwidth)-1)
848 yield dut.intregs.regs[i].reg.eq(val)
849 alusim.setval(i, val)
850
851 if False:
852 # create some instructions: branches create a tree
853 insts = create_random_ops(dut, 1, True, 1)
854 #insts.append((6, 6, 1, 2, (0, 0)))
855 #insts.append((4, 3, 3, 0, (0, 0)))
856
857 src1 = randint(1, dut.n_regs-1)
858 src2 = randint(1, dut.n_regs-1)
859 #op = randint(4, 7)
860 op = 4 # only BGT at the moment
861
862 branch_ok = create_random_ops(dut, 1, True, 1)
863 branch_fail = create_random_ops(dut, 1, True, 1)
864
865 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
866
867 if True:
868 insts = []
869 insts.append( (3, 5, 2, 0, (0, 0)) )
870 branch_ok = []
871 branch_fail = []
872 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
873 branch_ok.append( None )
874 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
875 #branch_fail.append( None )
876 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
877
878 siminsts = deepcopy(insts)
879
880 # issue instruction(s)
881 i = -1
882 instrs = insts
883 branch_direction = 0
884 while instrs:
885 yield
886 yield
887 i += 1
888 branch_direction = yield dut.branch_direction_o # way branch went
889 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
890 if branch_direction == 1 and shadow_on:
891 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
892 continue # branch was "success" and this is a "failed"... skip
893 if branch_direction == 2 and shadow_off:
894 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
895 continue # branch was "fail" and this is a "success"... skip
896 if branch_direction != 0:
897 shadow_on = 0
898 shadow_off = 0
899 is_branch = op >= 4
900 if is_branch:
901 branch_ok, branch_fail = dest
902 dest = src2
903 # ok zip up the branch success / fail instructions and
904 # drop them into the queue, one marked "to have branch success"
905 # the other to be marked shadow branch "fail".
906 # one out of each of these will be cancelled
907 for ok, fl in zip(branch_ok, branch_fail):
908 if ok:
909 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
910 if fl:
911 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
912 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
913 (i, src1, src2, dest, op, shadow_on, shadow_off))
914 yield from int_instr(dut, op, src1, src2, dest,
915 shadow_on, shadow_off)
916
917 # wait for all instructions to stop before checking
918 yield
919 yield from wait_for_busy_clear(dut)
920
921 i = -1
922 while siminsts:
923 instr = siminsts.pop(0)
924 if instr is None:
925 continue
926 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
927 i += 1
928 is_branch = op >= 4
929 if is_branch:
930 branch_ok, branch_fail = dest
931 dest = src2
932 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
933 (i, src1, src2, dest, op, shadow_on, shadow_off))
934 branch_res = alusim.op(op, src1, src2, dest)
935 if is_branch:
936 if branch_res:
937 siminsts += branch_ok
938 else:
939 siminsts += branch_fail
940
941 # check status
942 yield from alusim.check(dut)
943 yield from alusim.dump(dut)
944
945
946 def scoreboard_sim(dut, alusim):
947
948 #seed(2)
949
950 for i in range(1):
951
952 # set random values in the registers
953 for i in range(1, dut.n_regs):
954 val = randint(0, (1<<alusim.rwidth)-1)
955 #val = 31+i*3
956 #val = i
957 yield dut.intregs.regs[i].reg.eq(val)
958 alusim.setval(i, val)
959
960 # create some instructions (some random, some regression tests)
961 instrs = []
962 if True:
963 instrs = create_random_ops(dut, 15, True, 3)
964
965 if False:
966 instrs.append( (7, 3, 2, 4, (0, 0)) )
967 instrs.append( (7, 6, 6, 2, (0, 0)) )
968 instrs.append( (1, 7, 2, 2, (0, 0)) )
969
970
971 if False:
972 instrs.append((2, 3, 3, 0, (0, 0)))
973 instrs.append((5, 3, 3, 1, (0, 0)))
974 instrs.append((3, 5, 5, 2, (0, 0)))
975 instrs.append((5, 3, 3, 3, (0, 0)))
976 instrs.append((3, 5, 5, 0, (0, 0)))
977
978 if False:
979 instrs.append((5, 6, 2, 1))
980 instrs.append((2, 2, 4, 0))
981 #instrs.append((2, 2, 3, 1))
982
983 if False:
984 instrs.append((2, 1, 2, 3))
985
986 if False:
987 instrs.append((2, 6, 2, 1))
988 instrs.append((2, 1, 2, 0))
989
990 if False:
991 instrs.append((1, 2, 7, 2))
992 instrs.append((7, 1, 5, 0))
993 instrs.append((4, 4, 1, 1))
994
995 if False:
996 instrs.append((5, 6, 2, 2))
997 instrs.append((1, 1, 4, 1))
998 instrs.append((6, 5, 3, 0))
999
1000 if False:
1001 # Write-after-Write Hazard
1002 instrs.append( (3, 6, 7, 2) )
1003 instrs.append( (4, 4, 7, 1) )
1004
1005 if False:
1006 # self-read/write-after-write followed by Read-after-Write
1007 instrs.append((1, 1, 1, 1))
1008 instrs.append((1, 5, 3, 0))
1009
1010 if False:
1011 # Read-after-Write followed by self-read-after-write
1012 instrs.append((5, 6, 1, 2))
1013 instrs.append((1, 1, 1, 1))
1014
1015 if False:
1016 # self-read-write sandwich
1017 instrs.append((5, 6, 1, 2))
1018 instrs.append((1, 1, 1, 1))
1019 instrs.append((1, 5, 3, 0))
1020
1021 if False:
1022 # very weird failure
1023 instrs.append( (5, 2, 5, 2) )
1024 instrs.append( (2, 6, 3, 0) )
1025 instrs.append( (4, 2, 2, 1) )
1026
1027 if False:
1028 v1 = 4
1029 yield dut.intregs.regs[5].reg.eq(v1)
1030 alusim.setval(5, v1)
1031 yield dut.intregs.regs[3].reg.eq(5)
1032 alusim.setval(3, 5)
1033 instrs.append((5, 3, 3, 4, (0, 0)))
1034 instrs.append((4, 2, 1, 2, (0, 1)))
1035
1036 if False:
1037 v1 = 6
1038 yield dut.intregs.regs[5].reg.eq(v1)
1039 alusim.setval(5, v1)
1040 yield dut.intregs.regs[3].reg.eq(5)
1041 alusim.setval(3, 5)
1042 instrs.append((5, 3, 3, 4, (0, 0)))
1043 instrs.append((4, 2, 1, 2, (1, 0)))
1044
1045 if False:
1046 instrs.append( (4, 3, 5, 1, (0, 0)) )
1047 instrs.append( (5, 2, 3, 1, (0, 0)) )
1048 instrs.append( (7, 1, 5, 2, (0, 0)) )
1049 instrs.append( (5, 6, 6, 4, (0, 0)) )
1050 instrs.append( (7, 5, 2, 2, (1, 0)) )
1051 instrs.append( (1, 7, 5, 0, (0, 1)) )
1052 instrs.append( (1, 6, 1, 2, (1, 0)) )
1053 instrs.append( (1, 6, 7, 3, (0, 0)) )
1054 instrs.append( (6, 7, 7, 0, (0, 0)) )
1055
1056 # issue instruction(s), wait for issue to be free before proceeding
1057 for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
1058
1059 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1060 alusim.op(op, src1, src2, dest)
1061 yield from instr_q(dut, op, src1, src2, dest, br_ok, br_fail)
1062
1063 # wait for all instructions to stop before checking
1064 while True:
1065 iqlen = yield dut.qlen_o
1066 if iqlen == 0:
1067 break
1068 yield
1069 yield
1070 yield
1071 yield
1072 yield
1073 yield from wait_for_busy_clear(dut)
1074
1075 # check status
1076 yield from alusim.check(dut)
1077 yield from alusim.dump(dut)
1078
1079
1080 def test_scoreboard():
1081 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1082 alusim = RegSim(16, 8)
1083 vl = rtlil.convert(dut, ports=dut.ports())
1084 with open("test_scoreboard6600.il", "w") as f:
1085 f.write(vl)
1086
1087 run_simulation(dut, scoreboard_sim(dut, alusim),
1088 vcd_name='test_scoreboard6600.vcd')
1089
1090 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1091 # vcd_name='test_scoreboard6600.vcd')
1092
1093
1094 if __name__ == '__main__':
1095 test_scoreboard()