rename fu-regs rd/wr sel vector
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13
14 from compalu import ComputationUnitNoDelay
15
16 from alu_hier import ALU, BranchALU
17 from nmutil.latch import SRLatch
18 from nmutil.nmoperator import eq
19
20 from random import randint, seed
21 from copy import deepcopy
22 from math import log
23
24
25 class Memory(Elaboratable):
26 def __init__(self, regwid, addrw):
27 self.ddepth = regwid/8
28 depth = (1<<addrw) / self.ddepth
29 self.adr = Signal(addrw)
30 self.dat_r = Signal(regwid)
31 self.dat_w = Signal(regwid)
32 self.we = Signal()
33 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
34
35 def elaborate(self, platform):
36 m = Module()
37 m.submodules.rdport = rdport = self.mem.read_port()
38 m.submodules.wrport = wrport = self.mem.write_port()
39 m.d.comb += [
40 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
41 self.dat_r.eq(rdport.data),
42 wrport.addr.eq(self.adr),
43 wrport.data.eq(self.dat_w),
44 wrport.en.eq(self.we),
45 ]
46 return m
47
48
49 class MemSim:
50 def __init__(self, regwid, addrw):
51 self.regwid = regwid
52 self.ddepth = regwid//8
53 depth = (1<<addrw) // self.ddepth
54 self.mem = list(range(0, depth))
55
56 def ld(self, addr):
57 return self.mem[addr>>self.ddepth]
58
59 def st(self, addr, data):
60 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
61
62
63 class CompUnitsBase(Elaboratable):
64 """ Computation Unit Base class.
65
66 Amazingly, this class works recursively. It's supposed to just
67 look after some ALUs (that can handle the same operations),
68 grouping them together, however it turns out that the same code
69 can also group *groups* of Computation Units together as well.
70
71 Basically it was intended just to concatenate the ALU's issue,
72 go_rd etc. signals together, which start out as bits and become
73 sequences. Turns out that the same trick works just as well
74 on Computation Units!
75
76 So this class may be used recursively to present a top-level
77 sequential concatenation of all the signals in and out of
78 ALUs, whilst at the same time making it convenient to group
79 ALUs together.
80
81 At the lower level, the intent is that groups of (identical)
82 ALUs may be passed the same operation. Even beyond that,
83 the intent is that that group of (identical) ALUs actually
84 share the *same pipeline* and as such become a "Concurrent
85 Computation Unit" as defined by Mitch Alsup (see section
86 11.4.9.3)
87 """
88 def __init__(self, rwid, units):
89 """ Inputs:
90
91 * :rwid: bit width of register file(s) - both FP and INT
92 * :units: sequence of ALUs (or CompUnitsBase derivatives)
93 """
94 self.units = units
95 self.rwid = rwid
96 self.rwid = rwid
97 if units and isinstance(units[0], CompUnitsBase):
98 self.n_units = 0
99 for u in self.units:
100 self.n_units += u.n_units
101 else:
102 self.n_units = len(units)
103
104 n_units = self.n_units
105
106 # inputs
107 self.issue_i = Signal(n_units, reset_less=True)
108 self.go_rd_i = Signal(n_units, reset_less=True)
109 self.go_wr_i = Signal(n_units, reset_less=True)
110 self.shadown_i = Signal(n_units, reset_less=True)
111 self.go_die_i = Signal(n_units, reset_less=True)
112
113 # outputs
114 self.busy_o = Signal(n_units, reset_less=True)
115 self.rd_rel_o = Signal(n_units, reset_less=True)
116 self.req_rel_o = Signal(n_units, reset_less=True)
117
118 # in/out register data (note: not register#, actual data)
119 self.data_o = Signal(rwid, reset_less=True)
120 self.src1_i = Signal(rwid, reset_less=True)
121 self.src2_i = Signal(rwid, reset_less=True)
122 # input operand
123
124 def elaborate(self, platform):
125 m = Module()
126 comb = m.d.comb
127
128 for i, alu in enumerate(self.units):
129 setattr(m.submodules, "comp%d" % i, alu)
130
131 go_rd_l = []
132 go_wr_l = []
133 issue_l = []
134 busy_l = []
135 req_rel_l = []
136 rd_rel_l = []
137 shadow_l = []
138 godie_l = []
139 for alu in self.units:
140 req_rel_l.append(alu.req_rel_o)
141 rd_rel_l.append(alu.rd_rel_o)
142 shadow_l.append(alu.shadown_i)
143 godie_l.append(alu.go_die_i)
144 go_wr_l.append(alu.go_wr_i)
145 go_rd_l.append(alu.go_rd_i)
146 issue_l.append(alu.issue_i)
147 busy_l.append(alu.busy_o)
148 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
149 comb += self.req_rel_o.eq(Cat(*req_rel_l))
150 comb += self.busy_o.eq(Cat(*busy_l))
151 comb += Cat(*godie_l).eq(self.go_die_i)
152 comb += Cat(*shadow_l).eq(self.shadown_i)
153 comb += Cat(*go_wr_l).eq(self.go_wr_i)
154 comb += Cat(*go_rd_l).eq(self.go_rd_i)
155 comb += Cat(*issue_l).eq(self.issue_i)
156
157 # connect data register input/output
158
159 # merge (OR) all integer FU / ALU outputs to a single value
160 # bit of a hack: treereduce needs a list with an item named "data_o"
161 if self.units:
162 data_o = treereduce(self.units)
163 comb += self.data_o.eq(data_o)
164
165 for i, alu in enumerate(self.units):
166 comb += alu.src1_i.eq(self.src1_i)
167 comb += alu.src2_i.eq(self.src2_i)
168
169 return m
170
171
172 class CompUnitALUs(CompUnitsBase):
173
174 def __init__(self, rwid, opwid):
175 """ Inputs:
176
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :opwid: operand bit width
179 """
180 self.opwid = opwid
181
182 # inputs
183 self.oper_i = Signal(opwid, reset_less=True)
184 self.imm_i = Signal(rwid, reset_less=True)
185
186 # Int ALUs
187 add = ALU(rwid)
188 sub = ALU(rwid)
189 mul = ALU(rwid)
190 shf = ALU(rwid)
191
192 units = []
193 for alu in [add, sub, mul, shf]:
194 aluopwid = 3 # extra bit for immediate mode
195 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
196
197 CompUnitsBase.__init__(self, rwid, units)
198
199 def elaborate(self, platform):
200 m = CompUnitsBase.elaborate(self, platform)
201 comb = m.d.comb
202
203 # hand the same operation to all units, only lower 2 bits though
204 for alu in self.units:
205 comb += alu.oper_i[0:3].eq(self.oper_i)
206 comb += alu.imm_i.eq(self.imm_i)
207
208 return m
209
210
211 class CompUnitBR(CompUnitsBase):
212
213 def __init__(self, rwid, opwid):
214 """ Inputs:
215
216 * :rwid: bit width of register file(s) - both FP and INT
217 * :opwid: operand bit width
218
219 Note: bgt unit is returned so that a shadow unit can be created
220 for it
221 """
222 self.opwid = opwid
223
224 # inputs
225 self.oper_i = Signal(opwid, reset_less=True)
226 self.imm_i = Signal(rwid, reset_less=True)
227
228 # Branch ALU and CU
229 self.bgt = BranchALU(rwid)
230 aluopwid = 3 # extra bit for immediate mode
231 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
232 CompUnitsBase.__init__(self, rwid, [self.br1])
233
234 def elaborate(self, platform):
235 m = CompUnitsBase.elaborate(self, platform)
236 comb = m.d.comb
237
238 # hand the same operation to all units
239 for alu in self.units:
240 comb += alu.oper_i.eq(self.oper_i)
241 comb += alu.imm_i.eq(self.imm_i)
242
243 return m
244
245
246 class FunctionUnits(Elaboratable):
247
248 def __init__(self, n_regs, n_int_alus):
249 self.n_regs = n_regs
250 self.n_int_alus = n_int_alus
251
252 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
253 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
254 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
255
256 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
257 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
258
259 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
260 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
261 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
262
263 self.req_rel_i = Signal(n_int_alus, reset_less = True)
264 self.readable_o = Signal(n_int_alus, reset_less=True)
265 self.writable_o = Signal(n_int_alus, reset_less=True)
266
267 self.go_rd_i = Signal(n_int_alus, reset_less=True)
268 self.go_wr_i = Signal(n_int_alus, reset_less=True)
269 self.go_die_i = Signal(n_int_alus, reset_less=True)
270 self.req_rel_o = Signal(n_int_alus, reset_less=True)
271 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
272
273 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
274
275 def elaborate(self, platform):
276 m = Module()
277 comb = m.d.comb
278 sync = m.d.sync
279
280 n_intfus = self.n_int_alus
281
282 # Integer FU-FU Dep Matrix
283 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
284 m.submodules.intfudeps = intfudeps
285 # Integer FU-Reg Dep Matrix
286 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
287 m.submodules.intregdeps = intregdeps
288
289 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
290 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
291
292 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
293 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
294
295 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
296 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
297 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
298
299 comb += intfudeps.issue_i.eq(self.fn_issue_i)
300 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
301 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
302 comb += intfudeps.go_die_i.eq(self.go_die_i)
303 comb += self.readable_o.eq(intfudeps.readable_o)
304 comb += self.writable_o.eq(intfudeps.writable_o)
305
306 # Connect function issue / arrays, and dest/src1/src2
307 comb += intregdeps.dest_i.eq(self.dest_i)
308 comb += intregdeps.src1_i.eq(self.src1_i)
309 comb += intregdeps.src2_i.eq(self.src2_i)
310
311 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
312 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
313 comb += intregdeps.go_die_i.eq(self.go_die_i)
314 comb += intregdeps.issue_i.eq(self.fn_issue_i)
315
316 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
317 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
318 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
319
320 return m
321
322
323 class Scoreboard(Elaboratable):
324 def __init__(self, rwid, n_regs):
325 """ Inputs:
326
327 * :rwid: bit width of register file(s) - both FP and INT
328 * :n_regs: depth of register file(s) - number of FP and INT regs
329 """
330 self.rwid = rwid
331 self.n_regs = n_regs
332
333 # Register Files
334 self.intregs = RegFileArray(rwid, n_regs)
335 self.fpregs = RegFileArray(rwid, n_regs)
336
337 # issue q needs to get at these
338 self.aluissue = IssueUnitGroup(4)
339 self.brissue = IssueUnitGroup(1)
340 # and these
341 self.alu_oper_i = Signal(4, reset_less=True)
342 self.alu_imm_i = Signal(rwid, reset_less=True)
343 self.br_oper_i = Signal(4, reset_less=True)
344 self.br_imm_i = Signal(rwid, reset_less=True)
345
346 # inputs
347 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
348 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
349 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
350 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
351
352 # outputs
353 self.issue_o = Signal(reset_less=True) # instruction was accepted
354 self.busy_o = Signal(reset_less=True) # at least one CU is busy
355
356 # for branch speculation experiment. branch_direction = 0 if
357 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
358 # branch_succ and branch_fail are requests to have the current
359 # instruction be dependent on the branch unit "shadow" capability.
360 self.branch_succ_i = Signal(reset_less=True)
361 self.branch_fail_i = Signal(reset_less=True)
362 self.branch_direction_o = Signal(2, reset_less=True)
363
364 def elaborate(self, platform):
365 m = Module()
366 comb = m.d.comb
367 sync = m.d.sync
368
369 m.submodules.intregs = self.intregs
370 m.submodules.fpregs = self.fpregs
371
372 # register ports
373 int_dest = self.intregs.write_port("dest")
374 int_src1 = self.intregs.read_port("src1")
375 int_src2 = self.intregs.read_port("src2")
376
377 fp_dest = self.fpregs.write_port("dest")
378 fp_src1 = self.fpregs.read_port("src1")
379 fp_src2 = self.fpregs.read_port("src2")
380
381 # Int ALUs and Comp Units
382 n_int_alus = 5
383 cua = CompUnitALUs(self.rwid, 3)
384 cub = CompUnitBR(self.rwid, 3)
385 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
386 bgt = cub.bgt # get at the branch computation unit
387 br1 = cub.br1
388
389 # Int FUs
390 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
391
392 # Count of number of FUs
393 n_intfus = n_int_alus
394 n_fp_fus = 0 # for now
395
396 # Integer Priority Picker 1: Adder + Subtractor
397 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
398 m.submodules.intpick1 = intpick1
399
400 # INT/FP Issue Unit
401 regdecode = RegDecode(self.n_regs)
402 m.submodules.regdecode = regdecode
403 issueunit = IssueUnitArray([self.aluissue, self.brissue])
404 m.submodules.issueunit = issueunit
405
406 # Shadow Matrix. currently n_intfus shadows, to be used for
407 # write-after-write hazards. NOTE: there is one extra for branches,
408 # so the shadow width is increased by 1
409 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
410 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
411
412 # record previous instruction to cast shadow on current instruction
413 prev_shadow = Signal(n_intfus)
414
415 # Branch Speculation recorder. tracks the success/fail state as
416 # each instruction is issued, so that when the branch occurs the
417 # allow/cancel can be issued as appropriate.
418 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
419
420 #---------
421 # ok start wiring things together...
422 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
423 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
424 #---------
425
426 #---------
427 # Issue Unit is where it starts. set up some in/outs for this module
428 #---------
429 comb += [ regdecode.dest_i.eq(self.int_dest_i),
430 regdecode.src1_i.eq(self.int_src1_i),
431 regdecode.src2_i.eq(self.int_src2_i),
432 regdecode.enable_i.eq(self.reg_enable_i),
433 self.issue_o.eq(issueunit.issue_o)
434 ]
435
436 # take these to outside (issue needs them)
437 comb += cua.oper_i.eq(self.alu_oper_i)
438 comb += cua.imm_i.eq(self.alu_imm_i)
439 comb += cub.oper_i.eq(self.br_oper_i)
440 comb += cub.imm_i.eq(self.br_imm_i)
441
442 # TODO: issueunit.f (FP)
443
444 # and int function issue / busy arrays, and dest/src1/src2
445 comb += intfus.dest_i.eq(regdecode.dest_o)
446 comb += intfus.src1_i.eq(regdecode.src1_o)
447 comb += intfus.src2_i.eq(regdecode.src2_o)
448
449 fn_issue_o = issueunit.fn_issue_o
450
451 comb += intfus.fn_issue_i.eq(fn_issue_o)
452 comb += issueunit.busy_i.eq(cu.busy_o)
453 comb += self.busy_o.eq(cu.busy_o.bool())
454
455 #---------
456 # merge shadow matrices outputs
457 #---------
458
459 # these are explained in ShadowMatrix docstring, and are to be
460 # connected to the FUReg and FUFU Matrices, to get them to reset
461 anydie = Signal(n_intfus, reset_less=True)
462 allshadown = Signal(n_intfus, reset_less=True)
463 shreset = Signal(n_intfus, reset_less=True)
464 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
465 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
466 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
467
468 #---------
469 # connect fu-fu matrix
470 #---------
471
472 # Group Picker... done manually for now.
473 go_rd_o = intpick1.go_rd_o
474 go_wr_o = intpick1.go_wr_o
475 go_rd_i = intfus.go_rd_i
476 go_wr_i = intfus.go_wr_i
477 go_die_i = intfus.go_die_i
478 # NOTE: connect to the shadowed versions so that they can "die" (reset)
479 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
480 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
481 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
482
483 # Connect Picker
484 #---------
485 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
486 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
487 int_rd_o = intfus.readable_o
488 int_wr_o = intfus.writable_o
489 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
490 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
491
492 #---------
493 # Shadow Matrix
494 #---------
495
496 comb += shadows.issue_i.eq(fn_issue_o)
497 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
498 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
499 #---------
500 # NOTE; this setup is for the instruction order preservation...
501
502 # connect shadows / go_dies to Computation Units
503 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
504 comb += cu.go_die_i[0:n_intfus].eq(anydie)
505
506 # ok connect first n_int_fu shadows to busy lines, to create an
507 # instruction-order linked-list-like arrangement, using a bit-matrix
508 # (instead of e.g. a ring buffer).
509 # XXX TODO
510
511 # when written, the shadow can be cancelled (and was good)
512 for i in range(n_intfus):
513 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
514
515 # *previous* instruction shadows *current* instruction, and, obviously,
516 # if the previous is completed (!busy) don't cast the shadow!
517 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
518 for i in range(n_intfus):
519 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
520
521 #---------
522 # ... and this is for branch speculation. it uses the extra bit
523 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
524 # only needs to set shadow_i, s_fail_i and s_good_i
525
526 # issue captures shadow_i (if enabled)
527 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
528
529 bactive = Signal(reset_less=True)
530 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
531
532 # instruction being issued (fn_issue_o) has a shadow cast by the branch
533 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
534 comb += bshadow.issue_i.eq(fn_issue_o)
535 for i in range(n_intfus):
536 with m.If(fn_issue_o & (Const(1<<i))):
537 comb += bshadow.shadow_i[i][0].eq(1)
538
539 # finally, we need an indicator to the test infrastructure as to
540 # whether the branch succeeded or failed, plus, link up to the
541 # "recorder" of whether the instruction was under shadow or not
542
543 with m.If(br1.issue_i):
544 sync += bspec.active_i.eq(1)
545 with m.If(self.branch_succ_i):
546 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
547 with m.If(self.branch_fail_i):
548 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
549
550 # branch is active (TODO: a better signal: this is over-using the
551 # go_write signal - actually the branch should not be "writing")
552 with m.If(br1.go_wr_i):
553 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
554 sync += bspec.active_i.eq(0)
555 comb += bspec.br_i.eq(1)
556 # branch occurs if data == 1, failed if data == 0
557 comb += bspec.br_ok_i.eq(br1.data_o == 1)
558 for i in range(n_intfus):
559 # *expected* direction of the branch matched against *actual*
560 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
561 # ... or it didn't
562 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
563
564 #---------
565 # Connect Register File(s)
566 #---------
567 comb += int_dest.wen.eq(intfus.dest_rsel_o)
568 comb += int_src1.ren.eq(intfus.src1_rsel_o)
569 comb += int_src2.ren.eq(intfus.src2_rsel_o)
570
571 # connect ALUs to regfule
572 comb += int_dest.data_i.eq(cu.data_o)
573 comb += cu.src1_i.eq(int_src1.data_o)
574 comb += cu.src2_i.eq(int_src2.data_o)
575
576 # connect ALU Computation Units
577 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
578 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
579 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
580
581 return m
582
583 def __iter__(self):
584 yield from self.intregs
585 yield from self.fpregs
586 yield self.int_dest_i
587 yield self.int_src1_i
588 yield self.int_src2_i
589 yield self.issue_o
590 yield self.branch_succ_i
591 yield self.branch_fail_i
592 yield self.branch_direction_o
593
594 def ports(self):
595 return list(self)
596
597
598 class IssueToScoreboard(Elaboratable):
599
600 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
601 self.qlen = qlen
602 self.n_in = n_in
603 self.n_out = n_out
604 self.rwid = rwid
605 self.opw = opwid
606 self.n_regs = n_regs
607
608 mqbits = (int(log(qlen) / log(2))+2, False)
609 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
610 self.p_ready_o = Signal() # instructions were added
611 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
612
613 self.busy_o = Signal(reset_less=True) # at least one CU is busy
614 self.qlen_o = Signal(mqbits, reset_less=True)
615
616 def elaborate(self, platform):
617 m = Module()
618 comb = m.d.comb
619 sync = m.d.sync
620
621 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
622 sc = Scoreboard(self.rwid, self.n_regs)
623 m.submodules.iq = iq
624 m.submodules.sc = sc
625
626 # get at the regfile for testing
627 self.intregs = sc.intregs
628
629 # and the "busy" signal and instruction queue length
630 comb += self.busy_o.eq(sc.busy_o)
631 comb += self.qlen_o.eq(iq.qlen_o)
632
633 # link up instruction queue
634 comb += iq.p_add_i.eq(self.p_add_i)
635 comb += self.p_ready_o.eq(iq.p_ready_o)
636 for i in range(self.n_in):
637 comb += eq(iq.data_i[i], self.data_i[i])
638
639 # take instruction and process it. note that it's possible to
640 # "inspect" the queue contents *without* actually removing the
641 # items. items are only removed when the
642
643 # in "waiting" state
644 wait_issue_br = Signal()
645 wait_issue_alu = Signal()
646
647 with m.If(wait_issue_br | wait_issue_alu):
648 # set instruction pop length to 1 if the unit accepted
649 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
650 with m.If(iq.qlen_o != 0):
651 comb += iq.n_sub_i.eq(1)
652 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
653 with m.If(iq.qlen_o != 0):
654 comb += iq.n_sub_i.eq(1)
655
656 # see if some instruction(s) are here. note that this is
657 # "inspecting" the in-place queue. note also that on the
658 # cycle following "waiting" for fn_issue_o to be set, the
659 # "resetting" done above (insn_i=0) could be re-ASSERTed.
660 with m.If(iq.qlen_o != 0):
661 # get the operands and operation
662 imm = iq.data_o[0].imm_i
663 dest = iq.data_o[0].dest_i
664 src1 = iq.data_o[0].src1_i
665 src2 = iq.data_o[0].src2_i
666 op = iq.data_o[0].oper_i
667 opi = iq.data_o[0].opim_i # immediate set
668
669 # set the src/dest regs
670 comb += sc.int_dest_i.eq(dest)
671 comb += sc.int_src1_i.eq(src1)
672 comb += sc.int_src2_i.eq(src2)
673 comb += sc.reg_enable_i.eq(1) # enable the regfile
674
675 # choose a Function-Unit-Group
676 with m.If((op & (0x3<<2)) != 0): # branch
677 comb += sc.brissue.insn_i.eq(1)
678 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
679 comb += sc.br_imm_i.eq(imm)
680 comb += wait_issue_br.eq(1)
681 with m.Else(): # alu
682 comb += sc.aluissue.insn_i.eq(1)
683 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
684 comb += sc.alu_imm_i.eq(imm)
685 comb += wait_issue_alu.eq(1)
686
687 # XXX TODO
688 # these indicate that the instruction is to be made
689 # shadow-dependent on
690 # (either) branch success or branch fail
691 #yield sc.branch_fail_i.eq(branch_fail)
692 #yield sc.branch_succ_i.eq(branch_success)
693
694 return m
695
696 def __iter__(self):
697 yield self.p_ready_o
698 for o in self.data_i:
699 yield from list(o)
700 yield self.p_add_i
701
702 def ports(self):
703 return list(self)
704
705
706 IADD = 0
707 ISUB = 1
708 IMUL = 2
709 ISHF = 3
710 IBGT = 4
711 IBLT = 5
712 IBEQ = 6
713 IBNE = 7
714
715 class RegSim:
716 def __init__(self, rwidth, nregs):
717 self.rwidth = rwidth
718 self.regs = [0] * nregs
719
720 def op(self, op, op_imm, imm, src1, src2, dest):
721 maxbits = (1 << self.rwidth) - 1
722 src1 = self.regs[src1] & maxbits
723 if op_imm:
724 src2 = imm
725 else:
726 src2 = self.regs[src2] & maxbits
727 if op == IADD:
728 val = src1 + src2
729 elif op == ISUB:
730 val = src1 - src2
731 elif op == IMUL:
732 val = src1 * src2
733 elif op == ISHF:
734 val = src1 >> (src2 & maxbits)
735 elif op == IBGT:
736 val = int(src1 > src2)
737 elif op == IBLT:
738 val = int(src1 < src2)
739 elif op == IBEQ:
740 val = int(src1 == src2)
741 elif op == IBNE:
742 val = int(src1 != src2)
743 val &= maxbits
744 self.setval(dest, val)
745 return val
746
747 def setval(self, dest, val):
748 print ("sim setval", dest, hex(val))
749 self.regs[dest] = val
750
751 def dump(self, dut):
752 for i, val in enumerate(self.regs):
753 reg = yield dut.intregs.regs[i].reg
754 okstr = "OK" if reg == val else "!ok"
755 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
756
757 def check(self, dut):
758 for i, val in enumerate(self.regs):
759 reg = yield dut.intregs.regs[i].reg
760 if reg != val:
761 print("reg %d expected %x received %x\n" % (i, val, reg))
762 yield from self.dump(dut)
763 assert False
764
765 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
766 branch_success, branch_fail):
767 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
768 'src1_i': src1, 'src2_i': src2}]
769
770 sendlen = 1
771 for idx in range(sendlen):
772 yield from eq(dut.data_i[idx], instrs[idx])
773 di = yield dut.data_i[idx]
774 print ("senddata %d %x" % (idx, di))
775 yield dut.p_add_i.eq(sendlen)
776 yield
777 o_p_ready = yield dut.p_ready_o
778 while not o_p_ready:
779 yield
780 o_p_ready = yield dut.p_ready_o
781
782 yield dut.p_add_i.eq(0)
783
784
785 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
786 yield from disable_issue(dut)
787 yield dut.int_dest_i.eq(dest)
788 yield dut.int_src1_i.eq(src1)
789 yield dut.int_src2_i.eq(src2)
790 if (op & (0x3<<2)) != 0: # branch
791 yield dut.brissue.insn_i.eq(1)
792 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
793 yield dut.br_imm_i.eq(imm)
794 dut_issue = dut.brissue
795 else:
796 yield dut.aluissue.insn_i.eq(1)
797 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
798 yield dut.alu_imm_i.eq(imm)
799 dut_issue = dut.aluissue
800 yield dut.reg_enable_i.eq(1)
801
802 # these indicate that the instruction is to be made shadow-dependent on
803 # (either) branch success or branch fail
804 yield dut.branch_fail_i.eq(branch_fail)
805 yield dut.branch_succ_i.eq(branch_success)
806
807 yield
808 yield from wait_for_issue(dut, dut_issue)
809
810
811 def print_reg(dut, rnums):
812 rs = []
813 for rnum in rnums:
814 reg = yield dut.intregs.regs[rnum].reg
815 rs.append("%x" % reg)
816 rnums = map(str, rnums)
817 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
818
819
820 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
821 insts = []
822 for i in range(n_ops):
823 src1 = randint(1, dut.n_regs-1)
824 src2 = randint(1, dut.n_regs-1)
825 imm = randint(1, (1<<dut.rwid)-1)
826 dest = randint(1, dut.n_regs-1)
827 op = randint(0, max_opnums)
828 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
829
830 if shadowing:
831 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
832 else:
833 insts.append((src1, src2, dest, op, opi, imm))
834 return insts
835
836
837 def wait_for_busy_clear(dut):
838 while True:
839 busy_o = yield dut.busy_o
840 if not busy_o:
841 break
842 print ("busy",)
843 yield
844
845 def disable_issue(dut):
846 yield dut.aluissue.insn_i.eq(0)
847 yield dut.brissue.insn_i.eq(0)
848
849
850 def wait_for_issue(dut, dut_issue):
851 while True:
852 issue_o = yield dut_issue.fn_issue_o
853 if issue_o:
854 yield from disable_issue(dut)
855 yield dut.reg_enable_i.eq(0)
856 break
857 print ("busy",)
858 #yield from print_reg(dut, [1,2,3])
859 yield
860 #yield from print_reg(dut, [1,2,3])
861
862 def scoreboard_branch_sim(dut, alusim):
863
864 iseed = 3
865
866 for i in range(1):
867
868 print ("rseed", iseed)
869 seed(iseed)
870 iseed += 1
871
872 yield dut.branch_direction_o.eq(0)
873
874 # set random values in the registers
875 for i in range(1, dut.n_regs):
876 val = 31+i*3
877 val = randint(0, (1<<alusim.rwidth)-1)
878 yield dut.intregs.regs[i].reg.eq(val)
879 alusim.setval(i, val)
880
881 if False:
882 # create some instructions: branches create a tree
883 insts = create_random_ops(dut, 1, True, 1)
884 #insts.append((6, 6, 1, 2, (0, 0)))
885 #insts.append((4, 3, 3, 0, (0, 0)))
886
887 src1 = randint(1, dut.n_regs-1)
888 src2 = randint(1, dut.n_regs-1)
889 #op = randint(4, 7)
890 op = 4 # only BGT at the moment
891
892 branch_ok = create_random_ops(dut, 1, True, 1)
893 branch_fail = create_random_ops(dut, 1, True, 1)
894
895 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
896
897 if True:
898 insts = []
899 insts.append( (3, 5, 2, 0, (0, 0)) )
900 branch_ok = []
901 branch_fail = []
902 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
903 branch_ok.append( None )
904 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
905 #branch_fail.append( None )
906 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
907
908 siminsts = deepcopy(insts)
909
910 # issue instruction(s)
911 i = -1
912 instrs = insts
913 branch_direction = 0
914 while instrs:
915 yield
916 yield
917 i += 1
918 branch_direction = yield dut.branch_direction_o # way branch went
919 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
920 if branch_direction == 1 and shadow_on:
921 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
922 continue # branch was "success" and this is a "failed"... skip
923 if branch_direction == 2 and shadow_off:
924 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
925 continue # branch was "fail" and this is a "success"... skip
926 if branch_direction != 0:
927 shadow_on = 0
928 shadow_off = 0
929 is_branch = op >= 4
930 if is_branch:
931 branch_ok, branch_fail = dest
932 dest = src2
933 # ok zip up the branch success / fail instructions and
934 # drop them into the queue, one marked "to have branch success"
935 # the other to be marked shadow branch "fail".
936 # one out of each of these will be cancelled
937 for ok, fl in zip(branch_ok, branch_fail):
938 if ok:
939 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
940 if fl:
941 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
942 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
943 (i, src1, src2, dest, op, shadow_on, shadow_off))
944 yield from int_instr(dut, op, src1, src2, dest,
945 shadow_on, shadow_off)
946
947 # wait for all instructions to stop before checking
948 yield
949 yield from wait_for_busy_clear(dut)
950
951 i = -1
952 while siminsts:
953 instr = siminsts.pop(0)
954 if instr is None:
955 continue
956 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
957 i += 1
958 is_branch = op >= 4
959 if is_branch:
960 branch_ok, branch_fail = dest
961 dest = src2
962 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
963 (i, src1, src2, dest, op, shadow_on, shadow_off))
964 branch_res = alusim.op(op, src1, src2, dest)
965 if is_branch:
966 if branch_res:
967 siminsts += branch_ok
968 else:
969 siminsts += branch_fail
970
971 # check status
972 yield from alusim.check(dut)
973 yield from alusim.dump(dut)
974
975
976 def scoreboard_sim(dut, alusim):
977
978 seed(0)
979
980 for i in range(50):
981
982 # set random values in the registers
983 for i in range(1, dut.n_regs):
984 val = randint(0, (1<<alusim.rwidth)-1)
985 #val = 31+i*3
986 #val = i
987 yield dut.intregs.regs[i].reg.eq(val)
988 alusim.setval(i, val)
989
990 # create some instructions (some random, some regression tests)
991 instrs = []
992 if True:
993 instrs = create_random_ops(dut, 15, True, 4)
994
995 if False:
996 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
997
998 if False:
999 instrs.append( (7, 3, 2, 4, (0, 0)) )
1000 instrs.append( (7, 6, 6, 2, (0, 0)) )
1001 instrs.append( (1, 7, 2, 2, (0, 0)) )
1002
1003 if False:
1004 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1005 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1006 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1007 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1008 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1009
1010 if False:
1011 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1012 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1013 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1014
1015 if False:
1016 instrs.append((5, 6, 2, 1))
1017 instrs.append((2, 2, 4, 0))
1018 #instrs.append((2, 2, 3, 1))
1019
1020 if False:
1021 instrs.append((2, 1, 2, 3))
1022
1023 if False:
1024 instrs.append((2, 6, 2, 1))
1025 instrs.append((2, 1, 2, 0))
1026
1027 if False:
1028 instrs.append((1, 2, 7, 2))
1029 instrs.append((7, 1, 5, 0))
1030 instrs.append((4, 4, 1, 1))
1031
1032 if False:
1033 instrs.append((5, 6, 2, 2))
1034 instrs.append((1, 1, 4, 1))
1035 instrs.append((6, 5, 3, 0))
1036
1037 if False:
1038 # Write-after-Write Hazard
1039 instrs.append( (3, 6, 7, 2) )
1040 instrs.append( (4, 4, 7, 1) )
1041
1042 if False:
1043 # self-read/write-after-write followed by Read-after-Write
1044 instrs.append((1, 1, 1, 1))
1045 instrs.append((1, 5, 3, 0))
1046
1047 if False:
1048 # Read-after-Write followed by self-read-after-write
1049 instrs.append((5, 6, 1, 2))
1050 instrs.append((1, 1, 1, 1))
1051
1052 if False:
1053 # self-read-write sandwich
1054 instrs.append((5, 6, 1, 2))
1055 instrs.append((1, 1, 1, 1))
1056 instrs.append((1, 5, 3, 0))
1057
1058 if False:
1059 # very weird failure
1060 instrs.append( (5, 2, 5, 2) )
1061 instrs.append( (2, 6, 3, 0) )
1062 instrs.append( (4, 2, 2, 1) )
1063
1064 if False:
1065 v1 = 4
1066 yield dut.intregs.regs[5].reg.eq(v1)
1067 alusim.setval(5, v1)
1068 yield dut.intregs.regs[3].reg.eq(5)
1069 alusim.setval(3, 5)
1070 instrs.append((5, 3, 3, 4, (0, 0)))
1071 instrs.append((4, 2, 1, 2, (0, 1)))
1072
1073 if False:
1074 v1 = 6
1075 yield dut.intregs.regs[5].reg.eq(v1)
1076 alusim.setval(5, v1)
1077 yield dut.intregs.regs[3].reg.eq(5)
1078 alusim.setval(3, 5)
1079 instrs.append((5, 3, 3, 4, (0, 0)))
1080 instrs.append((4, 2, 1, 2, (1, 0)))
1081
1082 if False:
1083 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1084 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1085 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1086 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1087 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1088 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1089 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1090 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1091 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1092
1093 # issue instruction(s), wait for issue to be free before proceeding
1094 for i, instr in enumerate(instrs):
1095 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1096
1097 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1098 (i, src1, src2, dest, op, opi, imm))
1099 alusim.op(op, opi, imm, src1, src2, dest)
1100 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1101 br_ok, br_fail)
1102
1103 # wait for all instructions to stop before checking
1104 while True:
1105 iqlen = yield dut.qlen_o
1106 if iqlen == 0:
1107 break
1108 yield
1109 yield
1110 yield
1111 yield
1112 yield
1113 yield from wait_for_busy_clear(dut)
1114
1115 # check status
1116 yield from alusim.check(dut)
1117 yield from alusim.dump(dut)
1118
1119
1120 def test_scoreboard():
1121 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1122 alusim = RegSim(16, 8)
1123 memsim = MemSim(16, 16)
1124 vl = rtlil.convert(dut, ports=dut.ports())
1125 with open("test_scoreboard6600.il", "w") as f:
1126 f.write(vl)
1127
1128 run_simulation(dut, scoreboard_sim(dut, alusim),
1129 vcd_name='test_scoreboard6600.vcd')
1130
1131 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1132 # vcd_name='test_scoreboard6600.vcd')
1133
1134
1135 if __name__ == '__main__':
1136 test_scoreboard()