9afdf0652bc351eaf2fd951a770d453f9f0ab0f0
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13
14 from compalu import ComputationUnitNoDelay
15
16 from alu_hier import ALU, BranchALU
17 from nmutil.latch import SRLatch
18 from nmutil.nmoperator import eq
19
20 from random import randint, seed
21 from copy import deepcopy
22 from math import log
23
24
25 class Memory(Elaboratable):
26 def __init__(self, regwid, addrw):
27 self.ddepth = regwid/8
28 depth = (1<<addrw) / self.ddepth
29 self.adr = Signal(addrw)
30 self.dat_r = Signal(regwid)
31 self.dat_w = Signal(regwid)
32 self.we = Signal()
33 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
34
35 def elaborate(self, platform):
36 m = Module()
37 m.submodules.rdport = rdport = self.mem.read_port()
38 m.submodules.wrport = wrport = self.mem.write_port()
39 m.d.comb += [
40 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
41 self.dat_r.eq(rdport.data),
42 wrport.addr.eq(self.adr),
43 wrport.data.eq(self.dat_w),
44 wrport.en.eq(self.we),
45 ]
46 return m
47
48
49 class MemSim:
50 def __init__(self, regwid, addrw):
51 self.regwid = regwid
52 self.ddepth = regwid//8
53 depth = (1<<addrw) // self.ddepth
54 self.mem = list(range(0, depth))
55
56 def ld(self, addr):
57 return self.mem[addr>>self.ddepth]
58
59 def st(self, addr, data):
60 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
61
62
63 class CompUnitsBase(Elaboratable):
64 """ Computation Unit Base class.
65
66 Amazingly, this class works recursively. It's supposed to just
67 look after some ALUs (that can handle the same operations),
68 grouping them together, however it turns out that the same code
69 can also group *groups* of Computation Units together as well.
70
71 Basically it was intended just to concatenate the ALU's issue,
72 go_rd etc. signals together, which start out as bits and become
73 sequences. Turns out that the same trick works just as well
74 on Computation Units!
75
76 So this class may be used recursively to present a top-level
77 sequential concatenation of all the signals in and out of
78 ALUs, whilst at the same time making it convenient to group
79 ALUs together.
80
81 At the lower level, the intent is that groups of (identical)
82 ALUs may be passed the same operation. Even beyond that,
83 the intent is that that group of (identical) ALUs actually
84 share the *same pipeline* and as such become a "Concurrent
85 Computation Unit" as defined by Mitch Alsup (see section
86 11.4.9.3)
87 """
88 def __init__(self, rwid, units):
89 """ Inputs:
90
91 * :rwid: bit width of register file(s) - both FP and INT
92 * :units: sequence of ALUs (or CompUnitsBase derivatives)
93 """
94 self.units = units
95 self.rwid = rwid
96 self.rwid = rwid
97 if units and isinstance(units[0], CompUnitsBase):
98 self.n_units = 0
99 for u in self.units:
100 self.n_units += u.n_units
101 else:
102 self.n_units = len(units)
103
104 n_units = self.n_units
105
106 # inputs
107 self.issue_i = Signal(n_units, reset_less=True)
108 self.go_rd_i = Signal(n_units, reset_less=True)
109 self.go_wr_i = Signal(n_units, reset_less=True)
110 self.shadown_i = Signal(n_units, reset_less=True)
111 self.go_die_i = Signal(n_units, reset_less=True)
112
113 # outputs
114 self.busy_o = Signal(n_units, reset_less=True)
115 self.rd_rel_o = Signal(n_units, reset_less=True)
116 self.req_rel_o = Signal(n_units, reset_less=True)
117
118 # in/out register data (note: not register#, actual data)
119 self.data_o = Signal(rwid, reset_less=True)
120 self.src1_i = Signal(rwid, reset_less=True)
121 self.src2_i = Signal(rwid, reset_less=True)
122 # input operand
123
124 def elaborate(self, platform):
125 m = Module()
126 comb = m.d.comb
127
128 for i, alu in enumerate(self.units):
129 setattr(m.submodules, "comp%d" % i, alu)
130
131 go_rd_l = []
132 go_wr_l = []
133 issue_l = []
134 busy_l = []
135 req_rel_l = []
136 rd_rel_l = []
137 shadow_l = []
138 godie_l = []
139 for alu in self.units:
140 req_rel_l.append(alu.req_rel_o)
141 rd_rel_l.append(alu.rd_rel_o)
142 shadow_l.append(alu.shadown_i)
143 godie_l.append(alu.go_die_i)
144 go_wr_l.append(alu.go_wr_i)
145 go_rd_l.append(alu.go_rd_i)
146 issue_l.append(alu.issue_i)
147 busy_l.append(alu.busy_o)
148 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
149 comb += self.req_rel_o.eq(Cat(*req_rel_l))
150 comb += self.busy_o.eq(Cat(*busy_l))
151 comb += Cat(*godie_l).eq(self.go_die_i)
152 comb += Cat(*shadow_l).eq(self.shadown_i)
153 comb += Cat(*go_wr_l).eq(self.go_wr_i)
154 comb += Cat(*go_rd_l).eq(self.go_rd_i)
155 comb += Cat(*issue_l).eq(self.issue_i)
156
157 # connect data register input/output
158
159 # merge (OR) all integer FU / ALU outputs to a single value
160 # bit of a hack: treereduce needs a list with an item named "data_o"
161 if self.units:
162 data_o = treereduce(self.units)
163 comb += self.data_o.eq(data_o)
164
165 for i, alu in enumerate(self.units):
166 comb += alu.src1_i.eq(self.src1_i)
167 comb += alu.src2_i.eq(self.src2_i)
168
169 return m
170
171
172 class CompUnitALUs(CompUnitsBase):
173
174 def __init__(self, rwid, opwid):
175 """ Inputs:
176
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :opwid: operand bit width
179 """
180 self.opwid = opwid
181
182 # inputs
183 self.oper_i = Signal(opwid, reset_less=True)
184
185 # Int ALUs
186 add = ALU(rwid)
187 sub = ALU(rwid)
188 mul = ALU(rwid)
189 shf = ALU(rwid)
190
191 units = []
192 for alu in [add, sub, mul, shf]:
193 aluopwid = 3 # extra bit for immediate mode
194 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
195
196 CompUnitsBase.__init__(self, rwid, units)
197
198 def elaborate(self, platform):
199 m = CompUnitsBase.elaborate(self, platform)
200 comb = m.d.comb
201
202 # hand the same operation to all units, only lower 2 bits though
203 for alu in self.units:
204 comb += alu.oper_i[0:2].eq(self.oper_i)
205
206 return m
207
208
209 class CompUnitBR(CompUnitsBase):
210
211 def __init__(self, rwid, opwid):
212 """ Inputs:
213
214 * :rwid: bit width of register file(s) - both FP and INT
215 * :opwid: operand bit width
216
217 Note: bgt unit is returned so that a shadow unit can be created
218 for it
219 """
220 self.opwid = opwid
221
222 # inputs
223 self.oper_i = Signal(opwid, reset_less=True)
224
225 # Branch ALU and CU
226 self.bgt = BranchALU(rwid)
227 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
228 CompUnitsBase.__init__(self, rwid, [self.br1])
229
230 def elaborate(self, platform):
231 m = CompUnitsBase.elaborate(self, platform)
232 comb = m.d.comb
233
234 # hand the same operation to all units
235 for alu in self.units:
236 comb += alu.oper_i.eq(self.oper_i)
237
238 return m
239
240
241 class FunctionUnits(Elaboratable):
242
243 def __init__(self, n_regs, n_int_alus):
244 self.n_regs = n_regs
245 self.n_int_alus = n_int_alus
246
247 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
248 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
249 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
250
251 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
252 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
253
254 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
255 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
256 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
257
258 self.req_rel_i = Signal(n_int_alus, reset_less = True)
259 self.readable_o = Signal(n_int_alus, reset_less=True)
260 self.writable_o = Signal(n_int_alus, reset_less=True)
261
262 self.go_rd_i = Signal(n_int_alus, reset_less=True)
263 self.go_wr_i = Signal(n_int_alus, reset_less=True)
264 self.go_die_i = Signal(n_int_alus, reset_less=True)
265 self.req_rel_o = Signal(n_int_alus, reset_less=True)
266 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
267
268 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
269
270 def elaborate(self, platform):
271 m = Module()
272 comb = m.d.comb
273 sync = m.d.sync
274
275 n_intfus = self.n_int_alus
276
277 # Integer FU-FU Dep Matrix
278 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
279 m.submodules.intfudeps = intfudeps
280 # Integer FU-Reg Dep Matrix
281 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
282 m.submodules.intregdeps = intregdeps
283
284 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
285 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
286
287 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
288 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
289
290 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
291 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
292 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
293
294 comb += intfudeps.issue_i.eq(self.fn_issue_i)
295 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
296 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
297 comb += intfudeps.go_die_i.eq(self.go_die_i)
298 comb += self.readable_o.eq(intfudeps.readable_o)
299 comb += self.writable_o.eq(intfudeps.writable_o)
300
301 # Connect function issue / arrays, and dest/src1/src2
302 comb += intregdeps.dest_i.eq(self.dest_i)
303 comb += intregdeps.src1_i.eq(self.src1_i)
304 comb += intregdeps.src2_i.eq(self.src2_i)
305
306 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
307 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
308 comb += intregdeps.go_die_i.eq(self.go_die_i)
309 comb += intregdeps.issue_i.eq(self.fn_issue_i)
310
311 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
312 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
313 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
314
315 return m
316
317
318 class Scoreboard(Elaboratable):
319 def __init__(self, rwid, n_regs):
320 """ Inputs:
321
322 * :rwid: bit width of register file(s) - both FP and INT
323 * :n_regs: depth of register file(s) - number of FP and INT regs
324 """
325 self.rwid = rwid
326 self.n_regs = n_regs
327
328 # Register Files
329 self.intregs = RegFileArray(rwid, n_regs)
330 self.fpregs = RegFileArray(rwid, n_regs)
331
332 # issue q needs to get at these
333 self.aluissue = IssueUnitGroup(4)
334 self.brissue = IssueUnitGroup(1)
335 # and these
336 self.alu_oper_i = Signal(4, reset_less=True)
337 self.br_oper_i = Signal(4, reset_less=True)
338
339 # inputs
340 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
341 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
342 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
343 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
344
345 # outputs
346 self.issue_o = Signal(reset_less=True) # instruction was accepted
347 self.busy_o = Signal(reset_less=True) # at least one CU is busy
348
349 # for branch speculation experiment. branch_direction = 0 if
350 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
351 # branch_succ and branch_fail are requests to have the current
352 # instruction be dependent on the branch unit "shadow" capability.
353 self.branch_succ_i = Signal(reset_less=True)
354 self.branch_fail_i = Signal(reset_less=True)
355 self.branch_direction_o = Signal(2, reset_less=True)
356
357 def elaborate(self, platform):
358 m = Module()
359 comb = m.d.comb
360 sync = m.d.sync
361
362 m.submodules.intregs = self.intregs
363 m.submodules.fpregs = self.fpregs
364
365 # register ports
366 int_dest = self.intregs.write_port("dest")
367 int_src1 = self.intregs.read_port("src1")
368 int_src2 = self.intregs.read_port("src2")
369
370 fp_dest = self.fpregs.write_port("dest")
371 fp_src1 = self.fpregs.read_port("src1")
372 fp_src2 = self.fpregs.read_port("src2")
373
374 # Int ALUs and Comp Units
375 n_int_alus = 5
376 cua = CompUnitALUs(self.rwid, 3)
377 cub = CompUnitBR(self.rwid, 2)
378 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
379 bgt = cub.bgt # get at the branch computation unit
380 br1 = cub.br1
381
382 # Int FUs
383 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
384
385 # Count of number of FUs
386 n_intfus = n_int_alus
387 n_fp_fus = 0 # for now
388
389 # Integer Priority Picker 1: Adder + Subtractor
390 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
391 m.submodules.intpick1 = intpick1
392
393 # INT/FP Issue Unit
394 regdecode = RegDecode(self.n_regs)
395 m.submodules.regdecode = regdecode
396 issueunit = IssueUnitArray([self.aluissue, self.brissue])
397 m.submodules.issueunit = issueunit
398
399 # Shadow Matrix. currently n_intfus shadows, to be used for
400 # write-after-write hazards. NOTE: there is one extra for branches,
401 # so the shadow width is increased by 1
402 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
403 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
404
405 # record previous instruction to cast shadow on current instruction
406 prev_shadow = Signal(n_intfus)
407
408 # Branch Speculation recorder. tracks the success/fail state as
409 # each instruction is issued, so that when the branch occurs the
410 # allow/cancel can be issued as appropriate.
411 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
412
413 #---------
414 # ok start wiring things together...
415 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
416 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
417 #---------
418
419 #---------
420 # Issue Unit is where it starts. set up some in/outs for this module
421 #---------
422 comb += [ regdecode.dest_i.eq(self.int_dest_i),
423 regdecode.src1_i.eq(self.int_src1_i),
424 regdecode.src2_i.eq(self.int_src2_i),
425 regdecode.enable_i.eq(self.reg_enable_i),
426 self.issue_o.eq(issueunit.issue_o)
427 ]
428
429 # take these to outside (issue needs them)
430 comb += cua.oper_i.eq(self.alu_oper_i)
431 comb += cub.oper_i.eq(self.br_oper_i)
432
433 # TODO: issueunit.f (FP)
434
435 # and int function issue / busy arrays, and dest/src1/src2
436 comb += intfus.dest_i.eq(regdecode.dest_o)
437 comb += intfus.src1_i.eq(regdecode.src1_o)
438 comb += intfus.src2_i.eq(regdecode.src2_o)
439
440 fn_issue_o = issueunit.fn_issue_o
441
442 comb += intfus.fn_issue_i.eq(fn_issue_o)
443 comb += issueunit.busy_i.eq(cu.busy_o)
444 comb += self.busy_o.eq(cu.busy_o.bool())
445
446 #---------
447 # merge shadow matrices outputs
448 #---------
449
450 # these are explained in ShadowMatrix docstring, and are to be
451 # connected to the FUReg and FUFU Matrices, to get them to reset
452 anydie = Signal(n_intfus, reset_less=True)
453 allshadown = Signal(n_intfus, reset_less=True)
454 shreset = Signal(n_intfus, reset_less=True)
455 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
456 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
457 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
458
459 #---------
460 # connect fu-fu matrix
461 #---------
462
463 # Group Picker... done manually for now.
464 go_rd_o = intpick1.go_rd_o
465 go_wr_o = intpick1.go_wr_o
466 go_rd_i = intfus.go_rd_i
467 go_wr_i = intfus.go_wr_i
468 go_die_i = intfus.go_die_i
469 # NOTE: connect to the shadowed versions so that they can "die" (reset)
470 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
471 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
472 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
473
474 # Connect Picker
475 #---------
476 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
477 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
478 int_rd_o = intfus.readable_o
479 int_wr_o = intfus.writable_o
480 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
481 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
482
483 #---------
484 # Shadow Matrix
485 #---------
486
487 comb += shadows.issue_i.eq(fn_issue_o)
488 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
489 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
490 #---------
491 # NOTE; this setup is for the instruction order preservation...
492
493 # connect shadows / go_dies to Computation Units
494 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
495 comb += cu.go_die_i[0:n_intfus].eq(anydie)
496
497 # ok connect first n_int_fu shadows to busy lines, to create an
498 # instruction-order linked-list-like arrangement, using a bit-matrix
499 # (instead of e.g. a ring buffer).
500 # XXX TODO
501
502 # when written, the shadow can be cancelled (and was good)
503 for i in range(n_intfus):
504 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
505
506 # *previous* instruction shadows *current* instruction, and, obviously,
507 # if the previous is completed (!busy) don't cast the shadow!
508 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
509 for i in range(n_intfus):
510 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
511
512 #---------
513 # ... and this is for branch speculation. it uses the extra bit
514 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
515 # only needs to set shadow_i, s_fail_i and s_good_i
516
517 # issue captures shadow_i (if enabled)
518 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
519
520 bactive = Signal(reset_less=True)
521 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
522
523 # instruction being issued (fn_issue_o) has a shadow cast by the branch
524 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
525 comb += bshadow.issue_i.eq(fn_issue_o)
526 for i in range(n_intfus):
527 with m.If(fn_issue_o & (Const(1<<i))):
528 comb += bshadow.shadow_i[i][0].eq(1)
529
530 # finally, we need an indicator to the test infrastructure as to
531 # whether the branch succeeded or failed, plus, link up to the
532 # "recorder" of whether the instruction was under shadow or not
533
534 with m.If(br1.issue_i):
535 sync += bspec.active_i.eq(1)
536 with m.If(self.branch_succ_i):
537 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
538 with m.If(self.branch_fail_i):
539 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
540
541 # branch is active (TODO: a better signal: this is over-using the
542 # go_write signal - actually the branch should not be "writing")
543 with m.If(br1.go_wr_i):
544 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
545 sync += bspec.active_i.eq(0)
546 comb += bspec.br_i.eq(1)
547 # branch occurs if data == 1, failed if data == 0
548 comb += bspec.br_ok_i.eq(br1.data_o == 1)
549 for i in range(n_intfus):
550 # *expected* direction of the branch matched against *actual*
551 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
552 # ... or it didn't
553 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
554
555 #---------
556 # Connect Register File(s)
557 #---------
558 comb += int_dest.wen.eq(intfus.dest_rsel_o)
559 comb += int_src1.ren.eq(intfus.src1_rsel_o)
560 comb += int_src2.ren.eq(intfus.src2_rsel_o)
561
562 # connect ALUs to regfule
563 comb += int_dest.data_i.eq(cu.data_o)
564 comb += cu.src1_i.eq(int_src1.data_o)
565 comb += cu.src2_i.eq(int_src2.data_o)
566
567 # connect ALU Computation Units
568 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
569 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
570 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
571
572 return m
573
574 def __iter__(self):
575 yield from self.intregs
576 yield from self.fpregs
577 yield self.int_dest_i
578 yield self.int_src1_i
579 yield self.int_src2_i
580 yield self.issue_o
581 yield self.branch_succ_i
582 yield self.branch_fail_i
583 yield self.branch_direction_o
584
585 def ports(self):
586 return list(self)
587
588
589 class IssueToScoreboard(Elaboratable):
590
591 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
592 self.qlen = qlen
593 self.n_in = n_in
594 self.n_out = n_out
595 self.rwid = rwid
596 self.opw = opwid
597 self.n_regs = n_regs
598
599 mqbits = (int(log(qlen) / log(2))+2, False)
600 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
601 self.p_ready_o = Signal() # instructions were added
602 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
603
604 self.busy_o = Signal(reset_less=True) # at least one CU is busy
605 self.qlen_o = Signal(mqbits, reset_less=True)
606
607 def elaborate(self, platform):
608 m = Module()
609 comb = m.d.comb
610 sync = m.d.sync
611
612 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
613 sc = Scoreboard(self.rwid, self.n_regs)
614 m.submodules.iq = iq
615 m.submodules.sc = sc
616
617 # get at the regfile for testing
618 self.intregs = sc.intregs
619
620 # and the "busy" signal and instruction queue length
621 comb += self.busy_o.eq(sc.busy_o)
622 comb += self.qlen_o.eq(iq.qlen_o)
623
624 # link up instruction queue
625 comb += iq.p_add_i.eq(self.p_add_i)
626 comb += self.p_ready_o.eq(iq.p_ready_o)
627 for i in range(self.n_in):
628 comb += eq(iq.data_i[i], self.data_i[i])
629
630 # take instruction and process it. note that it's possible to
631 # "inspect" the queue contents *without* actually removing the
632 # items. items are only removed when the
633
634 # in "waiting" state
635 wait_issue_br = Signal()
636 wait_issue_alu = Signal()
637
638 with m.If(wait_issue_br | wait_issue_alu):
639 # set instruction pop length to 1 if the unit accepted
640 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
641 with m.If(iq.qlen_o != 0):
642 comb += iq.n_sub_i.eq(1)
643 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
644 with m.If(iq.qlen_o != 0):
645 comb += iq.n_sub_i.eq(1)
646
647 # see if some instruction(s) are here. note that this is
648 # "inspecting" the in-place queue. note also that on the
649 # cycle following "waiting" for fn_issue_o to be set, the
650 # "resetting" done above (insn_i=0) could be re-ASSERTed.
651 with m.If(iq.qlen_o != 0):
652 # get the operands and operation
653 dest = iq.data_o[0].dest_i
654 src1 = iq.data_o[0].src1_i
655 src2 = iq.data_o[0].src2_i
656 op = iq.data_o[0].oper_i
657 opi = iq.data_o[0].opim_i # immediate set
658
659 # set the src/dest regs
660 comb += sc.int_dest_i.eq(dest)
661 comb += sc.int_src1_i.eq(src1)
662 comb += sc.int_src2_i.eq(src2)
663 comb += sc.reg_enable_i.eq(1) # enable the regfile
664
665 # choose a Function-Unit-Group
666 with m.If((op & (0x3<<2)) != 0): # branch
667 comb += sc.brissue.insn_i.eq(1)
668 comb += sc.br_oper_i.eq(op & 0x3)
669 comb += wait_issue_br.eq(1)
670 with m.Else(): # alu
671 comb += sc.aluissue.insn_i.eq(1)
672 comb += sc.alu_oper_i.eq(Cat(op & 0x3, opi))
673 comb += wait_issue_alu.eq(1)
674
675 # XXX TODO
676 # these indicate that the instruction is to be made
677 # shadow-dependent on
678 # (either) branch success or branch fail
679 #yield sc.branch_fail_i.eq(branch_fail)
680 #yield sc.branch_succ_i.eq(branch_success)
681
682 return m
683
684 def __iter__(self):
685 yield self.p_ready_o
686 for o in self.data_i:
687 yield from list(o)
688 yield self.p_add_i
689
690 def ports(self):
691 return list(self)
692
693
694 IADD = 0
695 ISUB = 1
696 IMUL = 2
697 ISHF = 3
698 IBGT = 4
699 IBLT = 5
700 IBEQ = 6
701 IBNE = 7
702
703 class RegSim:
704 def __init__(self, rwidth, nregs):
705 self.rwidth = rwidth
706 self.regs = [0] * nregs
707
708 def op(self, op, op_imm, src1, src2, dest):
709 maxbits = (1 << self.rwidth) - 1
710 src1 = self.regs[src1] & maxbits
711 if not op_imm: # put op in src2
712 src2 = self.regs[src2] & maxbits
713 if op == IADD:
714 val = src1 + src2
715 elif op == ISUB:
716 val = src1 - src2
717 elif op == IMUL:
718 val = src1 * src2
719 elif op == ISHF:
720 val = src1 >> (src2 & maxbits)
721 elif op == IBGT:
722 val = int(src1 > src2)
723 elif op == IBLT:
724 val = int(src1 < src2)
725 elif op == IBEQ:
726 val = int(src1 == src2)
727 elif op == IBNE:
728 val = int(src1 != src2)
729 val &= maxbits
730 self.setval(dest, val)
731 return val
732
733 def setval(self, dest, val):
734 print ("sim setval", dest, hex(val))
735 self.regs[dest] = val
736
737 def dump(self, dut):
738 for i, val in enumerate(self.regs):
739 reg = yield dut.intregs.regs[i].reg
740 okstr = "OK" if reg == val else "!ok"
741 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
742
743 def check(self, dut):
744 for i, val in enumerate(self.regs):
745 reg = yield dut.intregs.regs[i].reg
746 if reg != val:
747 print("reg %d expected %x received %x\n" % (i, val, reg))
748 yield from self.dump(dut)
749 assert False
750
751 def instr_q(dut, op, op_imm, src1, src2, dest, branch_success, branch_fail):
752 instrs = [{'oper_i': op, 'dest_i': dest, 'opim_i': op_imm,
753 'src1_i': src1, 'src2_i': src2}]
754
755 sendlen = 1
756 for idx in range(sendlen):
757 yield from eq(dut.data_i[idx], instrs[idx])
758 di = yield dut.data_i[idx]
759 print ("senddata %d %x" % (idx, di))
760 yield dut.p_add_i.eq(sendlen)
761 yield
762 o_p_ready = yield dut.p_ready_o
763 while not o_p_ready:
764 yield
765 o_p_ready = yield dut.p_ready_o
766
767 yield dut.p_add_i.eq(0)
768
769
770 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
771 yield from disable_issue(dut)
772 yield dut.int_dest_i.eq(dest)
773 yield dut.int_src1_i.eq(src1)
774 yield dut.int_src2_i.eq(src2)
775 if (op & (0x3<<2)) != 0: # branch
776 yield dut.brissue.insn_i.eq(1)
777 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
778 dut_issue = dut.brissue
779 else:
780 yield dut.aluissue.insn_i.eq(1)
781 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
782 dut_issue = dut.aluissue
783 yield dut.reg_enable_i.eq(1)
784
785 # these indicate that the instruction is to be made shadow-dependent on
786 # (either) branch success or branch fail
787 yield dut.branch_fail_i.eq(branch_fail)
788 yield dut.branch_succ_i.eq(branch_success)
789
790 yield
791 yield from wait_for_issue(dut, dut_issue)
792
793
794 def print_reg(dut, rnums):
795 rs = []
796 for rnum in rnums:
797 reg = yield dut.intregs.regs[rnum].reg
798 rs.append("%x" % reg)
799 rnums = map(str, rnums)
800 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
801
802
803 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
804 insts = []
805 for i in range(n_ops):
806 src1 = randint(1, dut.n_regs-1)
807 src2 = randint(1, dut.n_regs-1)
808 dest = randint(1, dut.n_regs-1)
809 op = randint(0, max_opnums)
810 opi = 0 if randint(0, 3) else 1 # set true if random is nonzero
811
812 if shadowing:
813 insts.append((src1, src2, dest, op, opi, (0, 0)))
814 else:
815 insts.append((src1, src2, dest, op, opi))
816 return insts
817
818
819 def wait_for_busy_clear(dut):
820 while True:
821 busy_o = yield dut.busy_o
822 if not busy_o:
823 break
824 print ("busy",)
825 yield
826
827 def disable_issue(dut):
828 yield dut.aluissue.insn_i.eq(0)
829 yield dut.brissue.insn_i.eq(0)
830
831
832 def wait_for_issue(dut, dut_issue):
833 while True:
834 issue_o = yield dut_issue.fn_issue_o
835 if issue_o:
836 yield from disable_issue(dut)
837 yield dut.reg_enable_i.eq(0)
838 break
839 print ("busy",)
840 #yield from print_reg(dut, [1,2,3])
841 yield
842 #yield from print_reg(dut, [1,2,3])
843
844 def scoreboard_branch_sim(dut, alusim):
845
846 iseed = 3
847
848 for i in range(1):
849
850 print ("rseed", iseed)
851 seed(iseed)
852 iseed += 1
853
854 yield dut.branch_direction_o.eq(0)
855
856 # set random values in the registers
857 for i in range(1, dut.n_regs):
858 val = 31+i*3
859 val = randint(0, (1<<alusim.rwidth)-1)
860 yield dut.intregs.regs[i].reg.eq(val)
861 alusim.setval(i, val)
862
863 if False:
864 # create some instructions: branches create a tree
865 insts = create_random_ops(dut, 1, True, 1)
866 #insts.append((6, 6, 1, 2, (0, 0)))
867 #insts.append((4, 3, 3, 0, (0, 0)))
868
869 src1 = randint(1, dut.n_regs-1)
870 src2 = randint(1, dut.n_regs-1)
871 #op = randint(4, 7)
872 op = 4 # only BGT at the moment
873
874 branch_ok = create_random_ops(dut, 1, True, 1)
875 branch_fail = create_random_ops(dut, 1, True, 1)
876
877 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
878
879 if True:
880 insts = []
881 insts.append( (3, 5, 2, 0, (0, 0)) )
882 branch_ok = []
883 branch_fail = []
884 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
885 branch_ok.append( None )
886 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
887 #branch_fail.append( None )
888 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
889
890 siminsts = deepcopy(insts)
891
892 # issue instruction(s)
893 i = -1
894 instrs = insts
895 branch_direction = 0
896 while instrs:
897 yield
898 yield
899 i += 1
900 branch_direction = yield dut.branch_direction_o # way branch went
901 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
902 if branch_direction == 1 and shadow_on:
903 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
904 continue # branch was "success" and this is a "failed"... skip
905 if branch_direction == 2 and shadow_off:
906 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
907 continue # branch was "fail" and this is a "success"... skip
908 if branch_direction != 0:
909 shadow_on = 0
910 shadow_off = 0
911 is_branch = op >= 4
912 if is_branch:
913 branch_ok, branch_fail = dest
914 dest = src2
915 # ok zip up the branch success / fail instructions and
916 # drop them into the queue, one marked "to have branch success"
917 # the other to be marked shadow branch "fail".
918 # one out of each of these will be cancelled
919 for ok, fl in zip(branch_ok, branch_fail):
920 if ok:
921 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
922 if fl:
923 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
924 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
925 (i, src1, src2, dest, op, shadow_on, shadow_off))
926 yield from int_instr(dut, op, src1, src2, dest,
927 shadow_on, shadow_off)
928
929 # wait for all instructions to stop before checking
930 yield
931 yield from wait_for_busy_clear(dut)
932
933 i = -1
934 while siminsts:
935 instr = siminsts.pop(0)
936 if instr is None:
937 continue
938 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
939 i += 1
940 is_branch = op >= 4
941 if is_branch:
942 branch_ok, branch_fail = dest
943 dest = src2
944 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
945 (i, src1, src2, dest, op, shadow_on, shadow_off))
946 branch_res = alusim.op(op, src1, src2, dest)
947 if is_branch:
948 if branch_res:
949 siminsts += branch_ok
950 else:
951 siminsts += branch_fail
952
953 # check status
954 yield from alusim.check(dut)
955 yield from alusim.dump(dut)
956
957
958 def scoreboard_sim(dut, alusim):
959
960 #seed(2)
961
962 for i in range(1):
963
964 # set random values in the registers
965 for i in range(1, dut.n_regs):
966 val = randint(0, (1<<alusim.rwidth)-1)
967 #val = 31+i*3
968 #val = i
969 yield dut.intregs.regs[i].reg.eq(val)
970 alusim.setval(i, val)
971
972 # create some instructions (some random, some regression tests)
973 instrs = []
974 if True:
975 instrs = create_random_ops(dut, 15, True, 3)
976
977 if False:
978 instrs.append( (7, 3, 2, 4, (0, 0)) )
979 instrs.append( (7, 6, 6, 2, (0, 0)) )
980 instrs.append( (1, 7, 2, 2, (0, 0)) )
981
982
983 if False:
984 instrs.append((2, 3, 3, 0, (0, 0)))
985 instrs.append((5, 3, 3, 1, (0, 0)))
986 instrs.append((3, 5, 5, 2, (0, 0)))
987 instrs.append((5, 3, 3, 3, (0, 0)))
988 instrs.append((3, 5, 5, 0, (0, 0)))
989
990 if False:
991 instrs.append((5, 6, 2, 1))
992 instrs.append((2, 2, 4, 0))
993 #instrs.append((2, 2, 3, 1))
994
995 if False:
996 instrs.append((2, 1, 2, 3))
997
998 if False:
999 instrs.append((2, 6, 2, 1))
1000 instrs.append((2, 1, 2, 0))
1001
1002 if False:
1003 instrs.append((1, 2, 7, 2))
1004 instrs.append((7, 1, 5, 0))
1005 instrs.append((4, 4, 1, 1))
1006
1007 if False:
1008 instrs.append((5, 6, 2, 2))
1009 instrs.append((1, 1, 4, 1))
1010 instrs.append((6, 5, 3, 0))
1011
1012 if False:
1013 # Write-after-Write Hazard
1014 instrs.append( (3, 6, 7, 2) )
1015 instrs.append( (4, 4, 7, 1) )
1016
1017 if False:
1018 # self-read/write-after-write followed by Read-after-Write
1019 instrs.append((1, 1, 1, 1))
1020 instrs.append((1, 5, 3, 0))
1021
1022 if False:
1023 # Read-after-Write followed by self-read-after-write
1024 instrs.append((5, 6, 1, 2))
1025 instrs.append((1, 1, 1, 1))
1026
1027 if False:
1028 # self-read-write sandwich
1029 instrs.append((5, 6, 1, 2))
1030 instrs.append((1, 1, 1, 1))
1031 instrs.append((1, 5, 3, 0))
1032
1033 if False:
1034 # very weird failure
1035 instrs.append( (5, 2, 5, 2) )
1036 instrs.append( (2, 6, 3, 0) )
1037 instrs.append( (4, 2, 2, 1) )
1038
1039 if False:
1040 v1 = 4
1041 yield dut.intregs.regs[5].reg.eq(v1)
1042 alusim.setval(5, v1)
1043 yield dut.intregs.regs[3].reg.eq(5)
1044 alusim.setval(3, 5)
1045 instrs.append((5, 3, 3, 4, (0, 0)))
1046 instrs.append((4, 2, 1, 2, (0, 1)))
1047
1048 if False:
1049 v1 = 6
1050 yield dut.intregs.regs[5].reg.eq(v1)
1051 alusim.setval(5, v1)
1052 yield dut.intregs.regs[3].reg.eq(5)
1053 alusim.setval(3, 5)
1054 instrs.append((5, 3, 3, 4, (0, 0)))
1055 instrs.append((4, 2, 1, 2, (1, 0)))
1056
1057 if False:
1058 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1059 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1060 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1061 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1062 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1063 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1064 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1065 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1066 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1067
1068 # issue instruction(s), wait for issue to be free before proceeding
1069 for i, instr in enumerate(instrs):
1070 src1, src2, dest, op, opi, (br_ok, br_fail) = instr
1071
1072 print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1073 alusim.op(op, opi, src1, src2, dest)
1074 yield from instr_q(dut, op, opi, src1, src2, dest, br_ok, br_fail)
1075
1076 # wait for all instructions to stop before checking
1077 while True:
1078 iqlen = yield dut.qlen_o
1079 if iqlen == 0:
1080 break
1081 yield
1082 yield
1083 yield
1084 yield
1085 yield
1086 yield from wait_for_busy_clear(dut)
1087
1088 # check status
1089 yield from alusim.check(dut)
1090 yield from alusim.dump(dut)
1091
1092
1093 def test_scoreboard():
1094 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1095 alusim = RegSim(16, 8)
1096 memsim = MemSim(16, 16)
1097 vl = rtlil.convert(dut, ports=dut.ports())
1098 with open("test_scoreboard6600.il", "w") as f:
1099 f.write(vl)
1100
1101 run_simulation(dut, scoreboard_sim(dut, alusim),
1102 vcd_name='test_scoreboard6600.vcd')
1103
1104 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1105 # vcd_name='test_scoreboard6600.vcd')
1106
1107
1108 if __name__ == '__main__':
1109 test_scoreboard()