remove unneeded code
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13
14 from compalu import ComputationUnitNoDelay
15
16 from alu_hier import ALU, BranchALU
17 from nmutil.latch import SRLatch
18 from nmutil.nmoperator import eq
19
20 from random import randint, seed
21 from copy import deepcopy
22 from math import log
23
24
25 class Memory(Elaboratable):
26 def __init__(self, regwid, addrw):
27 self.ddepth = regwid/8
28 depth = (1<<addrw) / self.ddepth
29 self.adr = Signal(addrw)
30 self.dat_r = Signal(regwid)
31 self.dat_w = Signal(regwid)
32 self.we = Signal()
33 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
34
35 def elaborate(self, platform):
36 m = Module()
37 m.submodules.rdport = rdport = self.mem.read_port()
38 m.submodules.wrport = wrport = self.mem.write_port()
39 m.d.comb += [
40 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
41 self.dat_r.eq(rdport.data),
42 wrport.addr.eq(self.adr),
43 wrport.data.eq(self.dat_w),
44 wrport.en.eq(self.we),
45 ]
46 return m
47
48
49 class MemSim:
50 def __init__(self, regwid, addrw):
51 self.regwid = regwid
52 self.ddepth = regwid//8
53 depth = (1<<addrw) // self.ddepth
54 self.mem = list(range(0, depth))
55
56 def ld(self, addr):
57 return self.mem[addr>>self.ddepth]
58
59 def st(self, addr, data):
60 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
61
62
63 class CompUnitsBase(Elaboratable):
64 """ Computation Unit Base class.
65
66 Amazingly, this class works recursively. It's supposed to just
67 look after some ALUs (that can handle the same operations),
68 grouping them together, however it turns out that the same code
69 can also group *groups* of Computation Units together as well.
70
71 Basically it was intended just to concatenate the ALU's issue,
72 go_rd etc. signals together, which start out as bits and become
73 sequences. Turns out that the same trick works just as well
74 on Computation Units!
75
76 So this class may be used recursively to present a top-level
77 sequential concatenation of all the signals in and out of
78 ALUs, whilst at the same time making it convenient to group
79 ALUs together.
80
81 At the lower level, the intent is that groups of (identical)
82 ALUs may be passed the same operation. Even beyond that,
83 the intent is that that group of (identical) ALUs actually
84 share the *same pipeline* and as such become a "Concurrent
85 Computation Unit" as defined by Mitch Alsup (see section
86 11.4.9.3)
87 """
88 def __init__(self, rwid, units):
89 """ Inputs:
90
91 * :rwid: bit width of register file(s) - both FP and INT
92 * :units: sequence of ALUs (or CompUnitsBase derivatives)
93 """
94 self.units = units
95 self.rwid = rwid
96 self.rwid = rwid
97 if units and isinstance(units[0], CompUnitsBase):
98 self.n_units = 0
99 for u in self.units:
100 self.n_units += u.n_units
101 else:
102 self.n_units = len(units)
103
104 n_units = self.n_units
105
106 # inputs
107 self.issue_i = Signal(n_units, reset_less=True)
108 self.go_rd_i = Signal(n_units, reset_less=True)
109 self.go_wr_i = Signal(n_units, reset_less=True)
110 self.shadown_i = Signal(n_units, reset_less=True)
111 self.go_die_i = Signal(n_units, reset_less=True)
112
113 # outputs
114 self.busy_o = Signal(n_units, reset_less=True)
115 self.rd_rel_o = Signal(n_units, reset_less=True)
116 self.req_rel_o = Signal(n_units, reset_less=True)
117
118 # in/out register data (note: not register#, actual data)
119 self.data_o = Signal(rwid, reset_less=True)
120 self.src1_i = Signal(rwid, reset_less=True)
121 self.src2_i = Signal(rwid, reset_less=True)
122 # input operand
123
124 def elaborate(self, platform):
125 m = Module()
126 comb = m.d.comb
127
128 for i, alu in enumerate(self.units):
129 setattr(m.submodules, "comp%d" % i, alu)
130
131 go_rd_l = []
132 go_wr_l = []
133 issue_l = []
134 busy_l = []
135 req_rel_l = []
136 rd_rel_l = []
137 shadow_l = []
138 godie_l = []
139 for alu in self.units:
140 req_rel_l.append(alu.req_rel_o)
141 rd_rel_l.append(alu.rd_rel_o)
142 shadow_l.append(alu.shadown_i)
143 godie_l.append(alu.go_die_i)
144 go_wr_l.append(alu.go_wr_i)
145 go_rd_l.append(alu.go_rd_i)
146 issue_l.append(alu.issue_i)
147 busy_l.append(alu.busy_o)
148 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
149 comb += self.req_rel_o.eq(Cat(*req_rel_l))
150 comb += self.busy_o.eq(Cat(*busy_l))
151 comb += Cat(*godie_l).eq(self.go_die_i)
152 comb += Cat(*shadow_l).eq(self.shadown_i)
153 comb += Cat(*go_wr_l).eq(self.go_wr_i)
154 comb += Cat(*go_rd_l).eq(self.go_rd_i)
155 comb += Cat(*issue_l).eq(self.issue_i)
156
157 # connect data register input/output
158
159 # merge (OR) all integer FU / ALU outputs to a single value
160 # bit of a hack: treereduce needs a list with an item named "data_o"
161 if self.units:
162 data_o = treereduce(self.units)
163 comb += self.data_o.eq(data_o)
164
165 for i, alu in enumerate(self.units):
166 comb += alu.src1_i.eq(self.src1_i)
167 comb += alu.src2_i.eq(self.src2_i)
168
169 return m
170
171
172 class CompUnitALUs(CompUnitsBase):
173
174 def __init__(self, rwid, opwid):
175 """ Inputs:
176
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :opwid: operand bit width
179 """
180 self.opwid = opwid
181
182 # inputs
183 self.oper_i = Signal(opwid, reset_less=True)
184
185 # Int ALUs
186 add = ALU(rwid)
187 sub = ALU(rwid)
188 mul = ALU(rwid)
189 shf = ALU(rwid)
190
191 units = []
192 for alu in [add, sub, mul, shf]:
193 units.append(ComputationUnitNoDelay(rwid, 2, alu))
194
195 CompUnitsBase.__init__(self, rwid, units)
196
197 def elaborate(self, platform):
198 m = CompUnitsBase.elaborate(self, platform)
199 comb = m.d.comb
200
201 # hand the same operation to all units
202 for alu in self.units:
203 comb += alu.oper_i.eq(self.oper_i)
204
205 return m
206
207
208 class CompUnitBR(CompUnitsBase):
209
210 def __init__(self, rwid, opwid):
211 """ Inputs:
212
213 * :rwid: bit width of register file(s) - both FP and INT
214 * :opwid: operand bit width
215
216 Note: bgt unit is returned so that a shadow unit can be created
217 for it
218 """
219 self.opwid = opwid
220
221 # inputs
222 self.oper_i = Signal(opwid, reset_less=True)
223
224 # Branch ALU and CU
225 self.bgt = BranchALU(rwid)
226 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
227 CompUnitsBase.__init__(self, rwid, [self.br1])
228
229 def elaborate(self, platform):
230 m = CompUnitsBase.elaborate(self, platform)
231 comb = m.d.comb
232
233 # hand the same operation to all units
234 for alu in self.units:
235 comb += alu.oper_i.eq(self.oper_i)
236
237 return m
238
239
240 class FunctionUnits(Elaboratable):
241
242 def __init__(self, n_regs, n_int_alus):
243 self.n_regs = n_regs
244 self.n_int_alus = n_int_alus
245
246 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
247 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
248 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
249
250 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
251 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
252
253 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
254 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
255 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
256
257 self.req_rel_i = Signal(n_int_alus, reset_less = True)
258 self.readable_o = Signal(n_int_alus, reset_less=True)
259 self.writable_o = Signal(n_int_alus, reset_less=True)
260
261 self.go_rd_i = Signal(n_int_alus, reset_less=True)
262 self.go_wr_i = Signal(n_int_alus, reset_less=True)
263 self.go_die_i = Signal(n_int_alus, reset_less=True)
264 self.req_rel_o = Signal(n_int_alus, reset_less=True)
265 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
266
267 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
268
269 def elaborate(self, platform):
270 m = Module()
271 comb = m.d.comb
272 sync = m.d.sync
273
274 n_intfus = self.n_int_alus
275
276 # Integer FU-FU Dep Matrix
277 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
278 m.submodules.intfudeps = intfudeps
279 # Integer FU-Reg Dep Matrix
280 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
281 m.submodules.intregdeps = intregdeps
282
283 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
284 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
285
286 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
287 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
288
289 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
290 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
291 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
292
293 comb += intfudeps.issue_i.eq(self.fn_issue_i)
294 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
295 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
296 comb += intfudeps.go_die_i.eq(self.go_die_i)
297 comb += self.readable_o.eq(intfudeps.readable_o)
298 comb += self.writable_o.eq(intfudeps.writable_o)
299
300 # Connect function issue / arrays, and dest/src1/src2
301 comb += intregdeps.dest_i.eq(self.dest_i)
302 comb += intregdeps.src1_i.eq(self.src1_i)
303 comb += intregdeps.src2_i.eq(self.src2_i)
304
305 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
306 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
307 comb += intregdeps.go_die_i.eq(self.go_die_i)
308 comb += intregdeps.issue_i.eq(self.fn_issue_i)
309
310 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
311 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
312 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
313
314 return m
315
316
317 class Scoreboard(Elaboratable):
318 def __init__(self, rwid, n_regs):
319 """ Inputs:
320
321 * :rwid: bit width of register file(s) - both FP and INT
322 * :n_regs: depth of register file(s) - number of FP and INT regs
323 """
324 self.rwid = rwid
325 self.n_regs = n_regs
326
327 # Register Files
328 self.intregs = RegFileArray(rwid, n_regs)
329 self.fpregs = RegFileArray(rwid, n_regs)
330
331 # issue q needs to get at these
332 self.aluissue = IssueUnitGroup(4)
333 self.brissue = IssueUnitGroup(1)
334 # and these
335 self.alu_oper_i = Signal(4, reset_less=True)
336 self.br_oper_i = Signal(4, reset_less=True)
337
338 # inputs
339 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
340 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
341 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
342 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
343
344 # outputs
345 self.issue_o = Signal(reset_less=True) # instruction was accepted
346 self.busy_o = Signal(reset_less=True) # at least one CU is busy
347
348 # for branch speculation experiment. branch_direction = 0 if
349 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
350 # branch_succ and branch_fail are requests to have the current
351 # instruction be dependent on the branch unit "shadow" capability.
352 self.branch_succ_i = Signal(reset_less=True)
353 self.branch_fail_i = Signal(reset_less=True)
354 self.branch_direction_o = Signal(2, reset_less=True)
355
356 def elaborate(self, platform):
357 m = Module()
358 comb = m.d.comb
359 sync = m.d.sync
360
361 m.submodules.intregs = self.intregs
362 m.submodules.fpregs = self.fpregs
363
364 # register ports
365 int_dest = self.intregs.write_port("dest")
366 int_src1 = self.intregs.read_port("src1")
367 int_src2 = self.intregs.read_port("src2")
368
369 fp_dest = self.fpregs.write_port("dest")
370 fp_src1 = self.fpregs.read_port("src1")
371 fp_src2 = self.fpregs.read_port("src2")
372
373 # Int ALUs and Comp Units
374 n_int_alus = 5
375 cua = CompUnitALUs(self.rwid, 2)
376 cub = CompUnitBR(self.rwid, 2)
377 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
378 bgt = cub.bgt # get at the branch computation unit
379 br1 = cub.br1
380
381 # Int FUs
382 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
383
384 # Count of number of FUs
385 n_intfus = n_int_alus
386 n_fp_fus = 0 # for now
387
388 # Integer Priority Picker 1: Adder + Subtractor
389 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
390 m.submodules.intpick1 = intpick1
391
392 # INT/FP Issue Unit
393 regdecode = RegDecode(self.n_regs)
394 m.submodules.regdecode = regdecode
395 issueunit = IssueUnitArray([self.aluissue, self.brissue])
396 m.submodules.issueunit = issueunit
397
398 # Shadow Matrix. currently n_intfus shadows, to be used for
399 # write-after-write hazards. NOTE: there is one extra for branches,
400 # so the shadow width is increased by 1
401 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
402 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
403
404 # record previous instruction to cast shadow on current instruction
405 prev_shadow = Signal(n_intfus)
406
407 # Branch Speculation recorder. tracks the success/fail state as
408 # each instruction is issued, so that when the branch occurs the
409 # allow/cancel can be issued as appropriate.
410 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
411
412 #---------
413 # ok start wiring things together...
414 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
415 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
416 #---------
417
418 #---------
419 # Issue Unit is where it starts. set up some in/outs for this module
420 #---------
421 comb += [ regdecode.dest_i.eq(self.int_dest_i),
422 regdecode.src1_i.eq(self.int_src1_i),
423 regdecode.src2_i.eq(self.int_src2_i),
424 regdecode.enable_i.eq(self.reg_enable_i),
425 self.issue_o.eq(issueunit.issue_o)
426 ]
427
428 # take these to outside (issue needs them)
429 comb += cua.oper_i.eq(self.alu_oper_i)
430 comb += cub.oper_i.eq(self.br_oper_i)
431
432 # TODO: issueunit.f (FP)
433
434 # and int function issue / busy arrays, and dest/src1/src2
435 comb += intfus.dest_i.eq(regdecode.dest_o)
436 comb += intfus.src1_i.eq(regdecode.src1_o)
437 comb += intfus.src2_i.eq(regdecode.src2_o)
438
439 fn_issue_o = issueunit.fn_issue_o
440
441 comb += intfus.fn_issue_i.eq(fn_issue_o)
442 comb += issueunit.busy_i.eq(cu.busy_o)
443 comb += self.busy_o.eq(cu.busy_o.bool())
444
445 #---------
446 # merge shadow matrices outputs
447 #---------
448
449 # these are explained in ShadowMatrix docstring, and are to be
450 # connected to the FUReg and FUFU Matrices, to get them to reset
451 anydie = Signal(n_intfus, reset_less=True)
452 allshadown = Signal(n_intfus, reset_less=True)
453 shreset = Signal(n_intfus, reset_less=True)
454 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
455 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
456 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
457
458 #---------
459 # connect fu-fu matrix
460 #---------
461
462 # Group Picker... done manually for now.
463 go_rd_o = intpick1.go_rd_o
464 go_wr_o = intpick1.go_wr_o
465 go_rd_i = intfus.go_rd_i
466 go_wr_i = intfus.go_wr_i
467 go_die_i = intfus.go_die_i
468 # NOTE: connect to the shadowed versions so that they can "die" (reset)
469 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
470 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
471 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
472
473 # Connect Picker
474 #---------
475 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
476 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
477 int_rd_o = intfus.readable_o
478 int_wr_o = intfus.writable_o
479 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
480 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
481
482 #---------
483 # Shadow Matrix
484 #---------
485
486 comb += shadows.issue_i.eq(fn_issue_o)
487 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
488 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
489 #---------
490 # NOTE; this setup is for the instruction order preservation...
491
492 # connect shadows / go_dies to Computation Units
493 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
494 comb += cu.go_die_i[0:n_intfus].eq(anydie)
495
496 # ok connect first n_int_fu shadows to busy lines, to create an
497 # instruction-order linked-list-like arrangement, using a bit-matrix
498 # (instead of e.g. a ring buffer).
499 # XXX TODO
500
501 # when written, the shadow can be cancelled (and was good)
502 for i in range(n_intfus):
503 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
504
505 # *previous* instruction shadows *current* instruction, and, obviously,
506 # if the previous is completed (!busy) don't cast the shadow!
507 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
508 for i in range(n_intfus):
509 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
510
511 #---------
512 # ... and this is for branch speculation. it uses the extra bit
513 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
514 # only needs to set shadow_i, s_fail_i and s_good_i
515
516 # issue captures shadow_i (if enabled)
517 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
518
519 bactive = Signal(reset_less=True)
520 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
521
522 # instruction being issued (fn_issue_o) has a shadow cast by the branch
523 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
524 comb += bshadow.issue_i.eq(fn_issue_o)
525 for i in range(n_intfus):
526 with m.If(fn_issue_o & (Const(1<<i))):
527 comb += bshadow.shadow_i[i][0].eq(1)
528
529 # finally, we need an indicator to the test infrastructure as to
530 # whether the branch succeeded or failed, plus, link up to the
531 # "recorder" of whether the instruction was under shadow or not
532
533 with m.If(br1.issue_i):
534 sync += bspec.active_i.eq(1)
535 with m.If(self.branch_succ_i):
536 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
537 with m.If(self.branch_fail_i):
538 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
539
540 # branch is active (TODO: a better signal: this is over-using the
541 # go_write signal - actually the branch should not be "writing")
542 with m.If(br1.go_wr_i):
543 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
544 sync += bspec.active_i.eq(0)
545 comb += bspec.br_i.eq(1)
546 # branch occurs if data == 1, failed if data == 0
547 comb += bspec.br_ok_i.eq(br1.data_o == 1)
548 for i in range(n_intfus):
549 # *expected* direction of the branch matched against *actual*
550 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
551 # ... or it didn't
552 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
553
554 #---------
555 # Connect Register File(s)
556 #---------
557 comb += int_dest.wen.eq(intfus.dest_rsel_o)
558 comb += int_src1.ren.eq(intfus.src1_rsel_o)
559 comb += int_src2.ren.eq(intfus.src2_rsel_o)
560
561 # connect ALUs to regfule
562 comb += int_dest.data_i.eq(cu.data_o)
563 comb += cu.src1_i.eq(int_src1.data_o)
564 comb += cu.src2_i.eq(int_src2.data_o)
565
566 # connect ALU Computation Units
567 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
568 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
569 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
570
571 return m
572
573 def __iter__(self):
574 yield from self.intregs
575 yield from self.fpregs
576 yield self.int_dest_i
577 yield self.int_src1_i
578 yield self.int_src2_i
579 yield self.issue_o
580 yield self.branch_succ_i
581 yield self.branch_fail_i
582 yield self.branch_direction_o
583
584 def ports(self):
585 return list(self)
586
587
588 class IssueToScoreboard(Elaboratable):
589
590 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
591 self.qlen = qlen
592 self.n_in = n_in
593 self.n_out = n_out
594 self.rwid = rwid
595 self.opw = opwid
596 self.n_regs = n_regs
597
598 mqbits = (int(log(qlen) / log(2))+2, False)
599 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
600 self.p_ready_o = Signal() # instructions were added
601 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
602
603 self.busy_o = Signal(reset_less=True) # at least one CU is busy
604 self.qlen_o = Signal(mqbits, reset_less=True)
605
606 def elaborate(self, platform):
607 m = Module()
608 comb = m.d.comb
609 sync = m.d.sync
610
611 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
612 sc = Scoreboard(self.rwid, self.n_regs)
613 m.submodules.iq = iq
614 m.submodules.sc = sc
615
616 # get at the regfile for testing
617 self.intregs = sc.intregs
618
619 # and the "busy" signal and instruction queue length
620 comb += self.busy_o.eq(sc.busy_o)
621 comb += self.qlen_o.eq(iq.qlen_o)
622
623 # link up instruction queue
624 comb += iq.p_add_i.eq(self.p_add_i)
625 comb += self.p_ready_o.eq(iq.p_ready_o)
626 for i in range(self.n_in):
627 comb += eq(iq.data_i[i], self.data_i[i])
628
629 # take instruction and process it. note that it's possible to
630 # "inspect" the queue contents *without* actually removing the
631 # items. items are only removed when the
632
633 # in "waiting" state
634 wait_issue_br = Signal()
635 wait_issue_alu = Signal()
636
637 with m.If(wait_issue_br | wait_issue_alu):
638 # set instruction pop length to 1 if the unit accepted
639 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
640 with m.If(iq.qlen_o != 0):
641 comb += iq.n_sub_i.eq(1)
642 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
643 with m.If(iq.qlen_o != 0):
644 comb += iq.n_sub_i.eq(1)
645
646 # see if some instruction(s) are here. note that this is
647 # "inspecting" the in-place queue. note also that on the
648 # cycle following "waiting" for fn_issue_o to be set, the
649 # "resetting" done above (insn_i=0) could be re-ASSERTed.
650 with m.If(iq.qlen_o != 0):
651 # get the operands and operation
652 dest = iq.data_o[0].dest_i
653 src1 = iq.data_o[0].src1_i
654 src2 = iq.data_o[0].src2_i
655 op = iq.data_o[0].oper_i
656
657 # set the src/dest regs
658 comb += sc.int_dest_i.eq(dest)
659 comb += sc.int_src1_i.eq(src1)
660 comb += sc.int_src2_i.eq(src2)
661 comb += sc.reg_enable_i.eq(1) # enable the regfile
662
663 # choose a Function-Unit-Group
664 with m.If((op & (0x3<<2)) != 0): # branch
665 comb += sc.brissue.insn_i.eq(1)
666 comb += sc.br_oper_i.eq(op & 0x3)
667 comb += wait_issue_br.eq(1)
668 with m.Else(): # alu
669 comb += sc.aluissue.insn_i.eq(1)
670 comb += sc.alu_oper_i.eq(op & 0x3)
671 comb += wait_issue_alu.eq(1)
672
673 # XXX TODO
674 # these indicate that the instruction is to be made
675 # shadow-dependent on
676 # (either) branch success or branch fail
677 #yield sc.branch_fail_i.eq(branch_fail)
678 #yield sc.branch_succ_i.eq(branch_success)
679
680 return m
681
682 def __iter__(self):
683 yield self.p_ready_o
684 for o in self.data_i:
685 yield from list(o)
686 yield self.p_add_i
687
688 def ports(self):
689 return list(self)
690
691
692 IADD = 0
693 ISUB = 1
694 IMUL = 2
695 ISHF = 3
696 IBGT = 4
697 IBLT = 5
698 IBEQ = 6
699 IBNE = 7
700
701 class RegSim:
702 def __init__(self, rwidth, nregs):
703 self.rwidth = rwidth
704 self.regs = [0] * nregs
705
706 def op(self, op, op_imm, src1, src2, dest):
707 maxbits = (1 << self.rwidth) - 1
708 src1 = self.regs[src1] & maxbits
709 if not op_imm: # put op in src2
710 src2 = self.regs[src2] & maxbits
711 if op == IADD:
712 val = src1 + src2
713 elif op == ISUB:
714 val = src1 - src2
715 elif op == IMUL:
716 val = src1 * src2
717 elif op == ISHF:
718 val = src1 >> (src2 & maxbits)
719 elif op == IBGT:
720 val = int(src1 > src2)
721 elif op == IBLT:
722 val = int(src1 < src2)
723 elif op == IBEQ:
724 val = int(src1 == src2)
725 elif op == IBNE:
726 val = int(src1 != src2)
727 val &= maxbits
728 self.setval(dest, val)
729 return val
730
731 def setval(self, dest, val):
732 print ("sim setval", dest, hex(val))
733 self.regs[dest] = val
734
735 def dump(self, dut):
736 for i, val in enumerate(self.regs):
737 reg = yield dut.intregs.regs[i].reg
738 okstr = "OK" if reg == val else "!ok"
739 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
740
741 def check(self, dut):
742 for i, val in enumerate(self.regs):
743 reg = yield dut.intregs.regs[i].reg
744 if reg != val:
745 print("reg %d expected %x received %x\n" % (i, val, reg))
746 yield from self.dump(dut)
747 assert False
748
749 def instr_q(dut, op, op_imm, src1, src2, dest, branch_success, branch_fail):
750 instrs = [{'oper_i': op, 'dest_i': dest, 'opim_i': op_imm,
751 'src1_i': src1, 'src2_i': src2}]
752
753 sendlen = 1
754 for idx in range(sendlen):
755 yield from eq(dut.data_i[idx], instrs[idx])
756 di = yield dut.data_i[idx]
757 print ("senddata %d %x" % (idx, di))
758 yield dut.p_add_i.eq(sendlen)
759 yield
760 o_p_ready = yield dut.p_ready_o
761 while not o_p_ready:
762 yield
763 o_p_ready = yield dut.p_ready_o
764
765 yield dut.p_add_i.eq(0)
766
767
768 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
769 yield from disable_issue(dut)
770 yield dut.int_dest_i.eq(dest)
771 yield dut.int_src1_i.eq(src1)
772 yield dut.int_src2_i.eq(src2)
773 if (op & (0x3<<2)) != 0: # branch
774 yield dut.brissue.insn_i.eq(1)
775 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
776 dut_issue = dut.brissue
777 else:
778 yield dut.aluissue.insn_i.eq(1)
779 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
780 dut_issue = dut.aluissue
781 yield dut.reg_enable_i.eq(1)
782
783 # these indicate that the instruction is to be made shadow-dependent on
784 # (either) branch success or branch fail
785 yield dut.branch_fail_i.eq(branch_fail)
786 yield dut.branch_succ_i.eq(branch_success)
787
788 yield
789 yield from wait_for_issue(dut, dut_issue)
790
791
792 def print_reg(dut, rnums):
793 rs = []
794 for rnum in rnums:
795 reg = yield dut.intregs.regs[rnum].reg
796 rs.append("%x" % reg)
797 rnums = map(str, rnums)
798 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
799
800
801 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
802 insts = []
803 for i in range(n_ops):
804 src1 = randint(1, dut.n_regs-1)
805 src2 = randint(1, dut.n_regs-1)
806 dest = randint(1, dut.n_regs-1)
807 op = randint(0, max_opnums)
808 opi = 0 if randint(0, 3) else 1 # set true if random is nonzero
809
810 if shadowing:
811 insts.append((src1, src2, dest, op, opi, (0, 0)))
812 else:
813 insts.append((src1, src2, dest, op, opi))
814 return insts
815
816
817 def wait_for_busy_clear(dut):
818 while True:
819 busy_o = yield dut.busy_o
820 if not busy_o:
821 break
822 print ("busy",)
823 yield
824
825 def disable_issue(dut):
826 yield dut.aluissue.insn_i.eq(0)
827 yield dut.brissue.insn_i.eq(0)
828
829
830 def wait_for_issue(dut, dut_issue):
831 while True:
832 issue_o = yield dut_issue.fn_issue_o
833 if issue_o:
834 yield from disable_issue(dut)
835 yield dut.reg_enable_i.eq(0)
836 break
837 print ("busy",)
838 #yield from print_reg(dut, [1,2,3])
839 yield
840 #yield from print_reg(dut, [1,2,3])
841
842 def scoreboard_branch_sim(dut, alusim):
843
844 iseed = 3
845
846 for i in range(1):
847
848 print ("rseed", iseed)
849 seed(iseed)
850 iseed += 1
851
852 yield dut.branch_direction_o.eq(0)
853
854 # set random values in the registers
855 for i in range(1, dut.n_regs):
856 val = 31+i*3
857 val = randint(0, (1<<alusim.rwidth)-1)
858 yield dut.intregs.regs[i].reg.eq(val)
859 alusim.setval(i, val)
860
861 if False:
862 # create some instructions: branches create a tree
863 insts = create_random_ops(dut, 1, True, 1)
864 #insts.append((6, 6, 1, 2, (0, 0)))
865 #insts.append((4, 3, 3, 0, (0, 0)))
866
867 src1 = randint(1, dut.n_regs-1)
868 src2 = randint(1, dut.n_regs-1)
869 #op = randint(4, 7)
870 op = 4 # only BGT at the moment
871
872 branch_ok = create_random_ops(dut, 1, True, 1)
873 branch_fail = create_random_ops(dut, 1, True, 1)
874
875 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
876
877 if True:
878 insts = []
879 insts.append( (3, 5, 2, 0, (0, 0)) )
880 branch_ok = []
881 branch_fail = []
882 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
883 branch_ok.append( None )
884 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
885 #branch_fail.append( None )
886 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
887
888 siminsts = deepcopy(insts)
889
890 # issue instruction(s)
891 i = -1
892 instrs = insts
893 branch_direction = 0
894 while instrs:
895 yield
896 yield
897 i += 1
898 branch_direction = yield dut.branch_direction_o # way branch went
899 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
900 if branch_direction == 1 and shadow_on:
901 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
902 continue # branch was "success" and this is a "failed"... skip
903 if branch_direction == 2 and shadow_off:
904 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
905 continue # branch was "fail" and this is a "success"... skip
906 if branch_direction != 0:
907 shadow_on = 0
908 shadow_off = 0
909 is_branch = op >= 4
910 if is_branch:
911 branch_ok, branch_fail = dest
912 dest = src2
913 # ok zip up the branch success / fail instructions and
914 # drop them into the queue, one marked "to have branch success"
915 # the other to be marked shadow branch "fail".
916 # one out of each of these will be cancelled
917 for ok, fl in zip(branch_ok, branch_fail):
918 if ok:
919 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
920 if fl:
921 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
922 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
923 (i, src1, src2, dest, op, shadow_on, shadow_off))
924 yield from int_instr(dut, op, src1, src2, dest,
925 shadow_on, shadow_off)
926
927 # wait for all instructions to stop before checking
928 yield
929 yield from wait_for_busy_clear(dut)
930
931 i = -1
932 while siminsts:
933 instr = siminsts.pop(0)
934 if instr is None:
935 continue
936 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
937 i += 1
938 is_branch = op >= 4
939 if is_branch:
940 branch_ok, branch_fail = dest
941 dest = src2
942 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
943 (i, src1, src2, dest, op, shadow_on, shadow_off))
944 branch_res = alusim.op(op, src1, src2, dest)
945 if is_branch:
946 if branch_res:
947 siminsts += branch_ok
948 else:
949 siminsts += branch_fail
950
951 # check status
952 yield from alusim.check(dut)
953 yield from alusim.dump(dut)
954
955
956 def scoreboard_sim(dut, alusim):
957
958 #seed(2)
959
960 for i in range(1):
961
962 # set random values in the registers
963 for i in range(1, dut.n_regs):
964 val = randint(0, (1<<alusim.rwidth)-1)
965 #val = 31+i*3
966 #val = i
967 yield dut.intregs.regs[i].reg.eq(val)
968 alusim.setval(i, val)
969
970 # create some instructions (some random, some regression tests)
971 instrs = []
972 if True:
973 instrs = create_random_ops(dut, 15, True, 3)
974
975 if False:
976 instrs.append( (7, 3, 2, 4, (0, 0)) )
977 instrs.append( (7, 6, 6, 2, (0, 0)) )
978 instrs.append( (1, 7, 2, 2, (0, 0)) )
979
980
981 if False:
982 instrs.append((2, 3, 3, 0, (0, 0)))
983 instrs.append((5, 3, 3, 1, (0, 0)))
984 instrs.append((3, 5, 5, 2, (0, 0)))
985 instrs.append((5, 3, 3, 3, (0, 0)))
986 instrs.append((3, 5, 5, 0, (0, 0)))
987
988 if False:
989 instrs.append((5, 6, 2, 1))
990 instrs.append((2, 2, 4, 0))
991 #instrs.append((2, 2, 3, 1))
992
993 if False:
994 instrs.append((2, 1, 2, 3))
995
996 if False:
997 instrs.append((2, 6, 2, 1))
998 instrs.append((2, 1, 2, 0))
999
1000 if False:
1001 instrs.append((1, 2, 7, 2))
1002 instrs.append((7, 1, 5, 0))
1003 instrs.append((4, 4, 1, 1))
1004
1005 if False:
1006 instrs.append((5, 6, 2, 2))
1007 instrs.append((1, 1, 4, 1))
1008 instrs.append((6, 5, 3, 0))
1009
1010 if False:
1011 # Write-after-Write Hazard
1012 instrs.append( (3, 6, 7, 2) )
1013 instrs.append( (4, 4, 7, 1) )
1014
1015 if False:
1016 # self-read/write-after-write followed by Read-after-Write
1017 instrs.append((1, 1, 1, 1))
1018 instrs.append((1, 5, 3, 0))
1019
1020 if False:
1021 # Read-after-Write followed by self-read-after-write
1022 instrs.append((5, 6, 1, 2))
1023 instrs.append((1, 1, 1, 1))
1024
1025 if False:
1026 # self-read-write sandwich
1027 instrs.append((5, 6, 1, 2))
1028 instrs.append((1, 1, 1, 1))
1029 instrs.append((1, 5, 3, 0))
1030
1031 if False:
1032 # very weird failure
1033 instrs.append( (5, 2, 5, 2) )
1034 instrs.append( (2, 6, 3, 0) )
1035 instrs.append( (4, 2, 2, 1) )
1036
1037 if False:
1038 v1 = 4
1039 yield dut.intregs.regs[5].reg.eq(v1)
1040 alusim.setval(5, v1)
1041 yield dut.intregs.regs[3].reg.eq(5)
1042 alusim.setval(3, 5)
1043 instrs.append((5, 3, 3, 4, (0, 0)))
1044 instrs.append((4, 2, 1, 2, (0, 1)))
1045
1046 if False:
1047 v1 = 6
1048 yield dut.intregs.regs[5].reg.eq(v1)
1049 alusim.setval(5, v1)
1050 yield dut.intregs.regs[3].reg.eq(5)
1051 alusim.setval(3, 5)
1052 instrs.append((5, 3, 3, 4, (0, 0)))
1053 instrs.append((4, 2, 1, 2, (1, 0)))
1054
1055 if False:
1056 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1057 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1058 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1059 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1060 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1061 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1062 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1063 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1064 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1065
1066 # issue instruction(s), wait for issue to be free before proceeding
1067 for i, instr in enumerate(instrs):
1068 src1, src2, dest, op, opi, (br_ok, br_fail) = instr
1069
1070 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1071 alusim.op(op, opi, src1, src2, dest)
1072 yield from instr_q(dut, op, opi, src1, src2, dest, br_ok, br_fail)
1073
1074 # wait for all instructions to stop before checking
1075 while True:
1076 iqlen = yield dut.qlen_o
1077 if iqlen == 0:
1078 break
1079 yield
1080 yield
1081 yield
1082 yield
1083 yield
1084 yield from wait_for_busy_clear(dut)
1085
1086 # check status
1087 yield from alusim.check(dut)
1088 yield from alusim.dump(dut)
1089
1090
1091 def test_scoreboard():
1092 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1093 alusim = RegSim(16, 8)
1094 memsim = MemSim(16, 16)
1095 vl = rtlil.convert(dut, ports=dut.ports())
1096 with open("test_scoreboard6600.il", "w") as f:
1097 f.write(vl)
1098
1099 run_simulation(dut, scoreboard_sim(dut, alusim),
1100 vcd_name='test_scoreboard6600.vcd')
1101
1102 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1103 # vcd_name='test_scoreboard6600.vcd')
1104
1105
1106 if __name__ == '__main__':
1107 test_scoreboard()