reasonably sure that the pipelined ALU will work...
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13
14 from compalu import ComputationUnitNoDelay
15
16 from alu_hier import ALU, BranchALU
17 from nmutil.latch import SRLatch
18 from nmutil.nmoperator import eq
19
20 from random import randint, seed
21 from copy import deepcopy
22 from math import log
23
24
25 class Memory(Elaboratable):
26 def __init__(self, regwid, addrw):
27 self.ddepth = regwid/8
28 depth = (1<<addrw) / self.ddepth
29 self.adr = Signal(addrw)
30 self.dat_r = Signal(regwid)
31 self.dat_w = Signal(regwid)
32 self.we = Signal()
33 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
34
35 def elaborate(self, platform):
36 m = Module()
37 m.submodules.rdport = rdport = self.mem.read_port()
38 m.submodules.wrport = wrport = self.mem.write_port()
39 m.d.comb += [
40 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
41 self.dat_r.eq(rdport.data),
42 wrport.addr.eq(self.adr),
43 wrport.data.eq(self.dat_w),
44 wrport.en.eq(self.we),
45 ]
46 return m
47
48
49 class MemSim:
50 def __init__(self, regwid, addrw):
51 self.regwid = regwid
52 self.ddepth = regwid//8
53 depth = (1<<addrw) // self.ddepth
54 self.mem = list(range(0, depth))
55
56 def ld(self, addr):
57 return self.mem[addr>>self.ddepth]
58
59 def st(self, addr, data):
60 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
61
62
63 class CompUnitsBase(Elaboratable):
64 """ Computation Unit Base class.
65
66 Amazingly, this class works recursively. It's supposed to just
67 look after some ALUs (that can handle the same operations),
68 grouping them together, however it turns out that the same code
69 can also group *groups* of Computation Units together as well.
70
71 Basically it was intended just to concatenate the ALU's issue,
72 go_rd etc. signals together, which start out as bits and become
73 sequences. Turns out that the same trick works just as well
74 on Computation Units!
75
76 So this class may be used recursively to present a top-level
77 sequential concatenation of all the signals in and out of
78 ALUs, whilst at the same time making it convenient to group
79 ALUs together.
80
81 At the lower level, the intent is that groups of (identical)
82 ALUs may be passed the same operation. Even beyond that,
83 the intent is that that group of (identical) ALUs actually
84 share the *same pipeline* and as such become a "Concurrent
85 Computation Unit" as defined by Mitch Alsup (see section
86 11.4.9.3)
87 """
88 def __init__(self, rwid, units):
89 """ Inputs:
90
91 * :rwid: bit width of register file(s) - both FP and INT
92 * :units: sequence of ALUs (or CompUnitsBase derivatives)
93 """
94 self.units = units
95 self.rwid = rwid
96 self.rwid = rwid
97 if units and isinstance(units[0], CompUnitsBase):
98 self.n_units = 0
99 for u in self.units:
100 self.n_units += u.n_units
101 else:
102 self.n_units = len(units)
103
104 n_units = self.n_units
105
106 # inputs
107 self.issue_i = Signal(n_units, reset_less=True)
108 self.go_rd_i = Signal(n_units, reset_less=True)
109 self.go_wr_i = Signal(n_units, reset_less=True)
110 self.shadown_i = Signal(n_units, reset_less=True)
111 self.go_die_i = Signal(n_units, reset_less=True)
112
113 # outputs
114 self.busy_o = Signal(n_units, reset_less=True)
115 self.rd_rel_o = Signal(n_units, reset_less=True)
116 self.req_rel_o = Signal(n_units, reset_less=True)
117
118 # in/out register data (note: not register#, actual data)
119 self.data_o = Signal(rwid, reset_less=True)
120 self.src1_i = Signal(rwid, reset_less=True)
121 self.src2_i = Signal(rwid, reset_less=True)
122 # input operand
123
124 def elaborate(self, platform):
125 m = Module()
126 comb = m.d.comb
127
128 for i, alu in enumerate(self.units):
129 setattr(m.submodules, "comp%d" % i, alu)
130
131 go_rd_l = []
132 go_wr_l = []
133 issue_l = []
134 busy_l = []
135 req_rel_l = []
136 rd_rel_l = []
137 shadow_l = []
138 godie_l = []
139 for alu in self.units:
140 req_rel_l.append(alu.req_rel_o)
141 rd_rel_l.append(alu.rd_rel_o)
142 shadow_l.append(alu.shadown_i)
143 godie_l.append(alu.go_die_i)
144 go_wr_l.append(alu.go_wr_i)
145 go_rd_l.append(alu.go_rd_i)
146 issue_l.append(alu.issue_i)
147 busy_l.append(alu.busy_o)
148 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
149 comb += self.req_rel_o.eq(Cat(*req_rel_l))
150 comb += self.busy_o.eq(Cat(*busy_l))
151 comb += Cat(*godie_l).eq(self.go_die_i)
152 comb += Cat(*shadow_l).eq(self.shadown_i)
153 comb += Cat(*go_wr_l).eq(self.go_wr_i)
154 comb += Cat(*go_rd_l).eq(self.go_rd_i)
155 comb += Cat(*issue_l).eq(self.issue_i)
156
157 # connect data register input/output
158
159 # merge (OR) all integer FU / ALU outputs to a single value
160 # bit of a hack: treereduce needs a list with an item named "data_o"
161 if self.units:
162 data_o = treereduce(self.units)
163 comb += self.data_o.eq(data_o)
164
165 for i, alu in enumerate(self.units):
166 comb += alu.src1_i.eq(self.src1_i)
167 comb += alu.src2_i.eq(self.src2_i)
168
169 return m
170
171
172 class CompUnitALUs(CompUnitsBase):
173
174 def __init__(self, rwid, opwid):
175 """ Inputs:
176
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :opwid: operand bit width
179 """
180 self.opwid = opwid
181
182 # inputs
183 self.oper_i = Signal(opwid, reset_less=True)
184 self.imm_i = Signal(rwid, reset_less=True)
185
186 # Int ALUs
187 add = ALU(rwid)
188 sub = ALU(rwid)
189 mul = ALU(rwid)
190 shf = ALU(rwid)
191
192 units = []
193 for alu in [add, sub, mul, shf]:
194 aluopwid = 3 # extra bit for immediate mode
195 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
196
197 CompUnitsBase.__init__(self, rwid, units)
198
199 def elaborate(self, platform):
200 m = CompUnitsBase.elaborate(self, platform)
201 comb = m.d.comb
202
203 # hand the same operation to all units, only lower 2 bits though
204 for alu in self.units:
205 comb += alu.oper_i[0:3].eq(self.oper_i)
206 comb += alu.imm_i.eq(self.imm_i)
207
208 return m
209
210
211 class CompUnitBR(CompUnitsBase):
212
213 def __init__(self, rwid, opwid):
214 """ Inputs:
215
216 * :rwid: bit width of register file(s) - both FP and INT
217 * :opwid: operand bit width
218
219 Note: bgt unit is returned so that a shadow unit can be created
220 for it
221 """
222 self.opwid = opwid
223
224 # inputs
225 self.oper_i = Signal(opwid, reset_less=True)
226
227 # Branch ALU and CU
228 self.bgt = BranchALU(rwid)
229 self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
230 CompUnitsBase.__init__(self, rwid, [self.br1])
231
232 def elaborate(self, platform):
233 m = CompUnitsBase.elaborate(self, platform)
234 comb = m.d.comb
235
236 # hand the same operation to all units
237 for alu in self.units:
238 comb += alu.oper_i.eq(self.oper_i)
239
240 return m
241
242
243 class FunctionUnits(Elaboratable):
244
245 def __init__(self, n_regs, n_int_alus):
246 self.n_regs = n_regs
247 self.n_int_alus = n_int_alus
248
249 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
250 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
251 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
252
253 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
254 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
255
256 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
257 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
258 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
259
260 self.req_rel_i = Signal(n_int_alus, reset_less = True)
261 self.readable_o = Signal(n_int_alus, reset_less=True)
262 self.writable_o = Signal(n_int_alus, reset_less=True)
263
264 self.go_rd_i = Signal(n_int_alus, reset_less=True)
265 self.go_wr_i = Signal(n_int_alus, reset_less=True)
266 self.go_die_i = Signal(n_int_alus, reset_less=True)
267 self.req_rel_o = Signal(n_int_alus, reset_less=True)
268 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
269
270 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
271
272 def elaborate(self, platform):
273 m = Module()
274 comb = m.d.comb
275 sync = m.d.sync
276
277 n_intfus = self.n_int_alus
278
279 # Integer FU-FU Dep Matrix
280 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
281 m.submodules.intfudeps = intfudeps
282 # Integer FU-Reg Dep Matrix
283 intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
284 m.submodules.intregdeps = intregdeps
285
286 comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
287 comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
288
289 comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
290 comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
291
292 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
293 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
294 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
295
296 comb += intfudeps.issue_i.eq(self.fn_issue_i)
297 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
298 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
299 comb += intfudeps.go_die_i.eq(self.go_die_i)
300 comb += self.readable_o.eq(intfudeps.readable_o)
301 comb += self.writable_o.eq(intfudeps.writable_o)
302
303 # Connect function issue / arrays, and dest/src1/src2
304 comb += intregdeps.dest_i.eq(self.dest_i)
305 comb += intregdeps.src1_i.eq(self.src1_i)
306 comb += intregdeps.src2_i.eq(self.src2_i)
307
308 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
309 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
310 comb += intregdeps.go_die_i.eq(self.go_die_i)
311 comb += intregdeps.issue_i.eq(self.fn_issue_i)
312
313 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
314 comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
315 comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
316
317 return m
318
319
320 class Scoreboard(Elaboratable):
321 def __init__(self, rwid, n_regs):
322 """ Inputs:
323
324 * :rwid: bit width of register file(s) - both FP and INT
325 * :n_regs: depth of register file(s) - number of FP and INT regs
326 """
327 self.rwid = rwid
328 self.n_regs = n_regs
329
330 # Register Files
331 self.intregs = RegFileArray(rwid, n_regs)
332 self.fpregs = RegFileArray(rwid, n_regs)
333
334 # issue q needs to get at these
335 self.aluissue = IssueUnitGroup(4)
336 self.brissue = IssueUnitGroup(1)
337 # and these
338 self.alu_oper_i = Signal(4, reset_less=True)
339 self.alu_imm_i = Signal(rwid, reset_less=True)
340 self.br_oper_i = Signal(4, reset_less=True)
341
342 # inputs
343 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
344 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
345 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
346 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
347
348 # outputs
349 self.issue_o = Signal(reset_less=True) # instruction was accepted
350 self.busy_o = Signal(reset_less=True) # at least one CU is busy
351
352 # for branch speculation experiment. branch_direction = 0 if
353 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
354 # branch_succ and branch_fail are requests to have the current
355 # instruction be dependent on the branch unit "shadow" capability.
356 self.branch_succ_i = Signal(reset_less=True)
357 self.branch_fail_i = Signal(reset_less=True)
358 self.branch_direction_o = Signal(2, reset_less=True)
359
360 def elaborate(self, platform):
361 m = Module()
362 comb = m.d.comb
363 sync = m.d.sync
364
365 m.submodules.intregs = self.intregs
366 m.submodules.fpregs = self.fpregs
367
368 # register ports
369 int_dest = self.intregs.write_port("dest")
370 int_src1 = self.intregs.read_port("src1")
371 int_src2 = self.intregs.read_port("src2")
372
373 fp_dest = self.fpregs.write_port("dest")
374 fp_src1 = self.fpregs.read_port("src1")
375 fp_src2 = self.fpregs.read_port("src2")
376
377 # Int ALUs and Comp Units
378 n_int_alus = 5
379 cua = CompUnitALUs(self.rwid, 3)
380 cub = CompUnitBR(self.rwid, 2)
381 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
382 bgt = cub.bgt # get at the branch computation unit
383 br1 = cub.br1
384
385 # Int FUs
386 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
387
388 # Count of number of FUs
389 n_intfus = n_int_alus
390 n_fp_fus = 0 # for now
391
392 # Integer Priority Picker 1: Adder + Subtractor
393 intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
394 m.submodules.intpick1 = intpick1
395
396 # INT/FP Issue Unit
397 regdecode = RegDecode(self.n_regs)
398 m.submodules.regdecode = regdecode
399 issueunit = IssueUnitArray([self.aluissue, self.brissue])
400 m.submodules.issueunit = issueunit
401
402 # Shadow Matrix. currently n_intfus shadows, to be used for
403 # write-after-write hazards. NOTE: there is one extra for branches,
404 # so the shadow width is increased by 1
405 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
406 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
407
408 # record previous instruction to cast shadow on current instruction
409 prev_shadow = Signal(n_intfus)
410
411 # Branch Speculation recorder. tracks the success/fail state as
412 # each instruction is issued, so that when the branch occurs the
413 # allow/cancel can be issued as appropriate.
414 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
415
416 #---------
417 # ok start wiring things together...
418 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
419 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
420 #---------
421
422 #---------
423 # Issue Unit is where it starts. set up some in/outs for this module
424 #---------
425 comb += [ regdecode.dest_i.eq(self.int_dest_i),
426 regdecode.src1_i.eq(self.int_src1_i),
427 regdecode.src2_i.eq(self.int_src2_i),
428 regdecode.enable_i.eq(self.reg_enable_i),
429 self.issue_o.eq(issueunit.issue_o)
430 ]
431
432 # take these to outside (issue needs them)
433 comb += cua.oper_i.eq(self.alu_oper_i)
434 comb += cua.imm_i.eq(self.alu_imm_i)
435 comb += cub.oper_i.eq(self.br_oper_i)
436
437 # TODO: issueunit.f (FP)
438
439 # and int function issue / busy arrays, and dest/src1/src2
440 comb += intfus.dest_i.eq(regdecode.dest_o)
441 comb += intfus.src1_i.eq(regdecode.src1_o)
442 comb += intfus.src2_i.eq(regdecode.src2_o)
443
444 fn_issue_o = issueunit.fn_issue_o
445
446 comb += intfus.fn_issue_i.eq(fn_issue_o)
447 comb += issueunit.busy_i.eq(cu.busy_o)
448 comb += self.busy_o.eq(cu.busy_o.bool())
449
450 #---------
451 # merge shadow matrices outputs
452 #---------
453
454 # these are explained in ShadowMatrix docstring, and are to be
455 # connected to the FUReg and FUFU Matrices, to get them to reset
456 anydie = Signal(n_intfus, reset_less=True)
457 allshadown = Signal(n_intfus, reset_less=True)
458 shreset = Signal(n_intfus, reset_less=True)
459 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
460 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
461 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
462
463 #---------
464 # connect fu-fu matrix
465 #---------
466
467 # Group Picker... done manually for now.
468 go_rd_o = intpick1.go_rd_o
469 go_wr_o = intpick1.go_wr_o
470 go_rd_i = intfus.go_rd_i
471 go_wr_i = intfus.go_wr_i
472 go_die_i = intfus.go_die_i
473 # NOTE: connect to the shadowed versions so that they can "die" (reset)
474 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
475 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
476 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
477
478 # Connect Picker
479 #---------
480 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
481 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
482 int_rd_o = intfus.readable_o
483 int_wr_o = intfus.writable_o
484 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
485 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
486
487 #---------
488 # Shadow Matrix
489 #---------
490
491 comb += shadows.issue_i.eq(fn_issue_o)
492 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
493 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
494 #---------
495 # NOTE; this setup is for the instruction order preservation...
496
497 # connect shadows / go_dies to Computation Units
498 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
499 comb += cu.go_die_i[0:n_intfus].eq(anydie)
500
501 # ok connect first n_int_fu shadows to busy lines, to create an
502 # instruction-order linked-list-like arrangement, using a bit-matrix
503 # (instead of e.g. a ring buffer).
504 # XXX TODO
505
506 # when written, the shadow can be cancelled (and was good)
507 for i in range(n_intfus):
508 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
509
510 # *previous* instruction shadows *current* instruction, and, obviously,
511 # if the previous is completed (!busy) don't cast the shadow!
512 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
513 for i in range(n_intfus):
514 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
515
516 #---------
517 # ... and this is for branch speculation. it uses the extra bit
518 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
519 # only needs to set shadow_i, s_fail_i and s_good_i
520
521 # issue captures shadow_i (if enabled)
522 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
523
524 bactive = Signal(reset_less=True)
525 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
526
527 # instruction being issued (fn_issue_o) has a shadow cast by the branch
528 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
529 comb += bshadow.issue_i.eq(fn_issue_o)
530 for i in range(n_intfus):
531 with m.If(fn_issue_o & (Const(1<<i))):
532 comb += bshadow.shadow_i[i][0].eq(1)
533
534 # finally, we need an indicator to the test infrastructure as to
535 # whether the branch succeeded or failed, plus, link up to the
536 # "recorder" of whether the instruction was under shadow or not
537
538 with m.If(br1.issue_i):
539 sync += bspec.active_i.eq(1)
540 with m.If(self.branch_succ_i):
541 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
542 with m.If(self.branch_fail_i):
543 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
544
545 # branch is active (TODO: a better signal: this is over-using the
546 # go_write signal - actually the branch should not be "writing")
547 with m.If(br1.go_wr_i):
548 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
549 sync += bspec.active_i.eq(0)
550 comb += bspec.br_i.eq(1)
551 # branch occurs if data == 1, failed if data == 0
552 comb += bspec.br_ok_i.eq(br1.data_o == 1)
553 for i in range(n_intfus):
554 # *expected* direction of the branch matched against *actual*
555 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
556 # ... or it didn't
557 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
558
559 #---------
560 # Connect Register File(s)
561 #---------
562 comb += int_dest.wen.eq(intfus.dest_rsel_o)
563 comb += int_src1.ren.eq(intfus.src1_rsel_o)
564 comb += int_src2.ren.eq(intfus.src2_rsel_o)
565
566 # connect ALUs to regfule
567 comb += int_dest.data_i.eq(cu.data_o)
568 comb += cu.src1_i.eq(int_src1.data_o)
569 comb += cu.src2_i.eq(int_src2.data_o)
570
571 # connect ALU Computation Units
572 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
573 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
574 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
575
576 return m
577
578 def __iter__(self):
579 yield from self.intregs
580 yield from self.fpregs
581 yield self.int_dest_i
582 yield self.int_src1_i
583 yield self.int_src2_i
584 yield self.issue_o
585 yield self.branch_succ_i
586 yield self.branch_fail_i
587 yield self.branch_direction_o
588
589 def ports(self):
590 return list(self)
591
592
593 class IssueToScoreboard(Elaboratable):
594
595 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
596 self.qlen = qlen
597 self.n_in = n_in
598 self.n_out = n_out
599 self.rwid = rwid
600 self.opw = opwid
601 self.n_regs = n_regs
602
603 mqbits = (int(log(qlen) / log(2))+2, False)
604 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
605 self.p_ready_o = Signal() # instructions were added
606 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
607
608 self.busy_o = Signal(reset_less=True) # at least one CU is busy
609 self.qlen_o = Signal(mqbits, reset_less=True)
610
611 def elaborate(self, platform):
612 m = Module()
613 comb = m.d.comb
614 sync = m.d.sync
615
616 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
617 sc = Scoreboard(self.rwid, self.n_regs)
618 m.submodules.iq = iq
619 m.submodules.sc = sc
620
621 # get at the regfile for testing
622 self.intregs = sc.intregs
623
624 # and the "busy" signal and instruction queue length
625 comb += self.busy_o.eq(sc.busy_o)
626 comb += self.qlen_o.eq(iq.qlen_o)
627
628 # link up instruction queue
629 comb += iq.p_add_i.eq(self.p_add_i)
630 comb += self.p_ready_o.eq(iq.p_ready_o)
631 for i in range(self.n_in):
632 comb += eq(iq.data_i[i], self.data_i[i])
633
634 # take instruction and process it. note that it's possible to
635 # "inspect" the queue contents *without* actually removing the
636 # items. items are only removed when the
637
638 # in "waiting" state
639 wait_issue_br = Signal()
640 wait_issue_alu = Signal()
641
642 with m.If(wait_issue_br | wait_issue_alu):
643 # set instruction pop length to 1 if the unit accepted
644 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
645 with m.If(iq.qlen_o != 0):
646 comb += iq.n_sub_i.eq(1)
647 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
648 with m.If(iq.qlen_o != 0):
649 comb += iq.n_sub_i.eq(1)
650
651 # see if some instruction(s) are here. note that this is
652 # "inspecting" the in-place queue. note also that on the
653 # cycle following "waiting" for fn_issue_o to be set, the
654 # "resetting" done above (insn_i=0) could be re-ASSERTed.
655 with m.If(iq.qlen_o != 0):
656 # get the operands and operation
657 imm = iq.data_o[0].imm_i
658 dest = iq.data_o[0].dest_i
659 src1 = iq.data_o[0].src1_i
660 src2 = iq.data_o[0].src2_i
661 op = iq.data_o[0].oper_i
662 opi = iq.data_o[0].opim_i # immediate set
663
664 # set the src/dest regs
665 comb += sc.int_dest_i.eq(dest)
666 comb += sc.int_src1_i.eq(src1)
667 comb += sc.int_src2_i.eq(src2)
668 comb += sc.reg_enable_i.eq(1) # enable the regfile
669
670 # choose a Function-Unit-Group
671 with m.If((op & (0x3<<2)) != 0): # branch
672 comb += sc.brissue.insn_i.eq(1)
673 comb += sc.br_oper_i.eq(op & 0x3)
674 comb += wait_issue_br.eq(1)
675 with m.Else(): # alu
676 comb += sc.aluissue.insn_i.eq(1)
677 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
678 comb += sc.alu_imm_i.eq(imm)
679 comb += wait_issue_alu.eq(1)
680
681 # XXX TODO
682 # these indicate that the instruction is to be made
683 # shadow-dependent on
684 # (either) branch success or branch fail
685 #yield sc.branch_fail_i.eq(branch_fail)
686 #yield sc.branch_succ_i.eq(branch_success)
687
688 return m
689
690 def __iter__(self):
691 yield self.p_ready_o
692 for o in self.data_i:
693 yield from list(o)
694 yield self.p_add_i
695
696 def ports(self):
697 return list(self)
698
699
700 IADD = 0
701 ISUB = 1
702 IMUL = 2
703 ISHF = 3
704 IBGT = 4
705 IBLT = 5
706 IBEQ = 6
707 IBNE = 7
708
709 class RegSim:
710 def __init__(self, rwidth, nregs):
711 self.rwidth = rwidth
712 self.regs = [0] * nregs
713
714 def op(self, op, op_imm, imm, src1, src2, dest):
715 maxbits = (1 << self.rwidth) - 1
716 src1 = self.regs[src1] & maxbits
717 if op_imm:
718 src2 = imm
719 else:
720 src2 = self.regs[src2] & maxbits
721 if op == IADD:
722 val = src1 + src2
723 elif op == ISUB:
724 val = src1 - src2
725 elif op == IMUL:
726 val = src1 * src2
727 elif op == ISHF:
728 val = src1 >> (src2 & maxbits)
729 elif op == IBGT:
730 val = int(src1 > src2)
731 elif op == IBLT:
732 val = int(src1 < src2)
733 elif op == IBEQ:
734 val = int(src1 == src2)
735 elif op == IBNE:
736 val = int(src1 != src2)
737 val &= maxbits
738 self.setval(dest, val)
739 return val
740
741 def setval(self, dest, val):
742 print ("sim setval", dest, hex(val))
743 self.regs[dest] = val
744
745 def dump(self, dut):
746 for i, val in enumerate(self.regs):
747 reg = yield dut.intregs.regs[i].reg
748 okstr = "OK" if reg == val else "!ok"
749 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
750
751 def check(self, dut):
752 for i, val in enumerate(self.regs):
753 reg = yield dut.intregs.regs[i].reg
754 if reg != val:
755 print("reg %d expected %x received %x\n" % (i, val, reg))
756 yield from self.dump(dut)
757 assert False
758
759 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
760 branch_success, branch_fail):
761 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
762 'src1_i': src1, 'src2_i': src2}]
763
764 sendlen = 1
765 for idx in range(sendlen):
766 yield from eq(dut.data_i[idx], instrs[idx])
767 di = yield dut.data_i[idx]
768 print ("senddata %d %x" % (idx, di))
769 yield dut.p_add_i.eq(sendlen)
770 yield
771 o_p_ready = yield dut.p_ready_o
772 while not o_p_ready:
773 yield
774 o_p_ready = yield dut.p_ready_o
775
776 yield dut.p_add_i.eq(0)
777
778
779 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
780 yield from disable_issue(dut)
781 yield dut.int_dest_i.eq(dest)
782 yield dut.int_src1_i.eq(src1)
783 yield dut.int_src2_i.eq(src2)
784 if (op & (0x3<<2)) != 0: # branch
785 yield dut.brissue.insn_i.eq(1)
786 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
787 dut_issue = dut.brissue
788 else:
789 yield dut.aluissue.insn_i.eq(1)
790 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
791 yield dut.alu_imm_i.eq(imm)
792 dut_issue = dut.aluissue
793 yield dut.reg_enable_i.eq(1)
794
795 # these indicate that the instruction is to be made shadow-dependent on
796 # (either) branch success or branch fail
797 yield dut.branch_fail_i.eq(branch_fail)
798 yield dut.branch_succ_i.eq(branch_success)
799
800 yield
801 yield from wait_for_issue(dut, dut_issue)
802
803
804 def print_reg(dut, rnums):
805 rs = []
806 for rnum in rnums:
807 reg = yield dut.intregs.regs[rnum].reg
808 rs.append("%x" % reg)
809 rnums = map(str, rnums)
810 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
811
812
813 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
814 insts = []
815 for i in range(n_ops):
816 src1 = randint(1, dut.n_regs-1)
817 src2 = randint(1, dut.n_regs-1)
818 imm = randint(1, (1<<dut.rwid)-1)
819 dest = randint(1, dut.n_regs-1)
820 op = randint(0, max_opnums)
821 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
822
823 if shadowing:
824 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
825 else:
826 insts.append((src1, src2, dest, op, opi, imm))
827 return insts
828
829
830 def wait_for_busy_clear(dut):
831 while True:
832 busy_o = yield dut.busy_o
833 if not busy_o:
834 break
835 print ("busy",)
836 yield
837
838 def disable_issue(dut):
839 yield dut.aluissue.insn_i.eq(0)
840 yield dut.brissue.insn_i.eq(0)
841
842
843 def wait_for_issue(dut, dut_issue):
844 while True:
845 issue_o = yield dut_issue.fn_issue_o
846 if issue_o:
847 yield from disable_issue(dut)
848 yield dut.reg_enable_i.eq(0)
849 break
850 print ("busy",)
851 #yield from print_reg(dut, [1,2,3])
852 yield
853 #yield from print_reg(dut, [1,2,3])
854
855 def scoreboard_branch_sim(dut, alusim):
856
857 iseed = 3
858
859 for i in range(1):
860
861 print ("rseed", iseed)
862 seed(iseed)
863 iseed += 1
864
865 yield dut.branch_direction_o.eq(0)
866
867 # set random values in the registers
868 for i in range(1, dut.n_regs):
869 val = 31+i*3
870 val = randint(0, (1<<alusim.rwidth)-1)
871 yield dut.intregs.regs[i].reg.eq(val)
872 alusim.setval(i, val)
873
874 if False:
875 # create some instructions: branches create a tree
876 insts = create_random_ops(dut, 1, True, 1)
877 #insts.append((6, 6, 1, 2, (0, 0)))
878 #insts.append((4, 3, 3, 0, (0, 0)))
879
880 src1 = randint(1, dut.n_regs-1)
881 src2 = randint(1, dut.n_regs-1)
882 #op = randint(4, 7)
883 op = 4 # only BGT at the moment
884
885 branch_ok = create_random_ops(dut, 1, True, 1)
886 branch_fail = create_random_ops(dut, 1, True, 1)
887
888 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
889
890 if True:
891 insts = []
892 insts.append( (3, 5, 2, 0, (0, 0)) )
893 branch_ok = []
894 branch_fail = []
895 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
896 branch_ok.append( None )
897 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
898 #branch_fail.append( None )
899 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
900
901 siminsts = deepcopy(insts)
902
903 # issue instruction(s)
904 i = -1
905 instrs = insts
906 branch_direction = 0
907 while instrs:
908 yield
909 yield
910 i += 1
911 branch_direction = yield dut.branch_direction_o # way branch went
912 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
913 if branch_direction == 1 and shadow_on:
914 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
915 continue # branch was "success" and this is a "failed"... skip
916 if branch_direction == 2 and shadow_off:
917 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
918 continue # branch was "fail" and this is a "success"... skip
919 if branch_direction != 0:
920 shadow_on = 0
921 shadow_off = 0
922 is_branch = op >= 4
923 if is_branch:
924 branch_ok, branch_fail = dest
925 dest = src2
926 # ok zip up the branch success / fail instructions and
927 # drop them into the queue, one marked "to have branch success"
928 # the other to be marked shadow branch "fail".
929 # one out of each of these will be cancelled
930 for ok, fl in zip(branch_ok, branch_fail):
931 if ok:
932 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
933 if fl:
934 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
935 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
936 (i, src1, src2, dest, op, shadow_on, shadow_off))
937 yield from int_instr(dut, op, src1, src2, dest,
938 shadow_on, shadow_off)
939
940 # wait for all instructions to stop before checking
941 yield
942 yield from wait_for_busy_clear(dut)
943
944 i = -1
945 while siminsts:
946 instr = siminsts.pop(0)
947 if instr is None:
948 continue
949 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
950 i += 1
951 is_branch = op >= 4
952 if is_branch:
953 branch_ok, branch_fail = dest
954 dest = src2
955 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
956 (i, src1, src2, dest, op, shadow_on, shadow_off))
957 branch_res = alusim.op(op, src1, src2, dest)
958 if is_branch:
959 if branch_res:
960 siminsts += branch_ok
961 else:
962 siminsts += branch_fail
963
964 # check status
965 yield from alusim.check(dut)
966 yield from alusim.dump(dut)
967
968
969 def scoreboard_sim(dut, alusim):
970
971 seed(0)
972
973 for i in range(1):
974
975 # set random values in the registers
976 for i in range(1, dut.n_regs):
977 val = randint(0, (1<<alusim.rwidth)-1)
978 #val = 31+i*3
979 #val = i
980 yield dut.intregs.regs[i].reg.eq(val)
981 alusim.setval(i, val)
982
983 # create some instructions (some random, some regression tests)
984 instrs = []
985 if False:
986 instrs = create_random_ops(dut, 15, True, 4)
987
988 if False:
989 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
990
991 if False:
992 instrs.append( (7, 3, 2, 4, (0, 0)) )
993 instrs.append( (7, 6, 6, 2, (0, 0)) )
994 instrs.append( (1, 7, 2, 2, (0, 0)) )
995
996 if False:
997 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
998 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
999 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1000 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1001 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1002
1003 if True:
1004 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1005 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1006 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1007
1008 if False:
1009 instrs.append((5, 6, 2, 1))
1010 instrs.append((2, 2, 4, 0))
1011 #instrs.append((2, 2, 3, 1))
1012
1013 if False:
1014 instrs.append((2, 1, 2, 3))
1015
1016 if False:
1017 instrs.append((2, 6, 2, 1))
1018 instrs.append((2, 1, 2, 0))
1019
1020 if False:
1021 instrs.append((1, 2, 7, 2))
1022 instrs.append((7, 1, 5, 0))
1023 instrs.append((4, 4, 1, 1))
1024
1025 if False:
1026 instrs.append((5, 6, 2, 2))
1027 instrs.append((1, 1, 4, 1))
1028 instrs.append((6, 5, 3, 0))
1029
1030 if False:
1031 # Write-after-Write Hazard
1032 instrs.append( (3, 6, 7, 2) )
1033 instrs.append( (4, 4, 7, 1) )
1034
1035 if False:
1036 # self-read/write-after-write followed by Read-after-Write
1037 instrs.append((1, 1, 1, 1))
1038 instrs.append((1, 5, 3, 0))
1039
1040 if False:
1041 # Read-after-Write followed by self-read-after-write
1042 instrs.append((5, 6, 1, 2))
1043 instrs.append((1, 1, 1, 1))
1044
1045 if False:
1046 # self-read-write sandwich
1047 instrs.append((5, 6, 1, 2))
1048 instrs.append((1, 1, 1, 1))
1049 instrs.append((1, 5, 3, 0))
1050
1051 if False:
1052 # very weird failure
1053 instrs.append( (5, 2, 5, 2) )
1054 instrs.append( (2, 6, 3, 0) )
1055 instrs.append( (4, 2, 2, 1) )
1056
1057 if False:
1058 v1 = 4
1059 yield dut.intregs.regs[5].reg.eq(v1)
1060 alusim.setval(5, v1)
1061 yield dut.intregs.regs[3].reg.eq(5)
1062 alusim.setval(3, 5)
1063 instrs.append((5, 3, 3, 4, (0, 0)))
1064 instrs.append((4, 2, 1, 2, (0, 1)))
1065
1066 if False:
1067 v1 = 6
1068 yield dut.intregs.regs[5].reg.eq(v1)
1069 alusim.setval(5, v1)
1070 yield dut.intregs.regs[3].reg.eq(5)
1071 alusim.setval(3, 5)
1072 instrs.append((5, 3, 3, 4, (0, 0)))
1073 instrs.append((4, 2, 1, 2, (1, 0)))
1074
1075 if False:
1076 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1077 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1078 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1079 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1080 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1081 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1082 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1083 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1084 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1085
1086 # issue instruction(s), wait for issue to be free before proceeding
1087 for i, instr in enumerate(instrs):
1088 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1089
1090 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1091 (i, src1, src2, dest, op, opi, imm))
1092 alusim.op(op, opi, imm, src1, src2, dest)
1093 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1094 br_ok, br_fail)
1095
1096 # wait for all instructions to stop before checking
1097 while True:
1098 iqlen = yield dut.qlen_o
1099 if iqlen == 0:
1100 break
1101 yield
1102 yield
1103 yield
1104 yield
1105 yield
1106 yield from wait_for_busy_clear(dut)
1107
1108 # check status
1109 yield from alusim.check(dut)
1110 yield from alusim.dump(dut)
1111
1112
1113 def test_scoreboard():
1114 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1115 alusim = RegSim(16, 8)
1116 memsim = MemSim(16, 16)
1117 vl = rtlil.convert(dut, ports=dut.ports())
1118 with open("test_scoreboard6600.il", "w") as f:
1119 f.write(vl)
1120
1121 run_simulation(dut, scoreboard_sim(dut, alusim),
1122 vcd_name='test_scoreboard6600.vcd')
1123
1124 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1125 # vcd_name='test_scoreboard6600.vcd')
1126
1127
1128 if __name__ == '__main__':
1129 test_scoreboard()