added in the LD/ST Comp Unit (not connected up yet) and the code didnt fall over
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13 from scoreboard.memfu import MemFunctionUnits
14
15 from compalu import ComputationUnitNoDelay
16 from compldst import LDSTCompUnit
17
18 from alu_hier import ALU, BranchALU
19 from nmutil.latch import SRLatch
20 from nmutil.nmoperator import eq
21
22 from random import randint, seed
23 from copy import deepcopy
24 from math import log
25
26
27 class Memory(Elaboratable):
28 def __init__(self, regwid, addrw):
29 self.ddepth = regwid/8
30 depth = (1<<addrw) / self.ddepth
31 self.adr = Signal(addrw)
32 self.dat_r = Signal(regwid)
33 self.dat_w = Signal(regwid)
34 self.we = Signal()
35 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
36
37 def elaborate(self, platform):
38 m = Module()
39 m.submodules.rdport = rdport = self.mem.read_port()
40 m.submodules.wrport = wrport = self.mem.write_port()
41 m.d.comb += [
42 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
43 self.dat_r.eq(rdport.data),
44 wrport.addr.eq(self.adr),
45 wrport.data.eq(self.dat_w),
46 wrport.en.eq(self.we),
47 ]
48 return m
49
50
51 class MemSim:
52 def __init__(self, regwid, addrw):
53 self.regwid = regwid
54 self.ddepth = regwid//8
55 depth = (1<<addrw) // self.ddepth
56 self.mem = list(range(0, depth))
57
58 def ld(self, addr):
59 return self.mem[addr>>self.ddepth]
60
61 def st(self, addr, data):
62 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
63
64
65 class CompUnitsBase(Elaboratable):
66 """ Computation Unit Base class.
67
68 Amazingly, this class works recursively. It's supposed to just
69 look after some ALUs (that can handle the same operations),
70 grouping them together, however it turns out that the same code
71 can also group *groups* of Computation Units together as well.
72
73 Basically it was intended just to concatenate the ALU's issue,
74 go_rd etc. signals together, which start out as bits and become
75 sequences. Turns out that the same trick works just as well
76 on Computation Units!
77
78 So this class may be used recursively to present a top-level
79 sequential concatenation of all the signals in and out of
80 ALUs, whilst at the same time making it convenient to group
81 ALUs together.
82
83 At the lower level, the intent is that groups of (identical)
84 ALUs may be passed the same operation. Even beyond that,
85 the intent is that that group of (identical) ALUs actually
86 share the *same pipeline* and as such become a "Concurrent
87 Computation Unit" as defined by Mitch Alsup (see section
88 11.4.9.3)
89 """
90 def __init__(self, rwid, units, ldstmode=False):
91 """ Inputs:
92
93 * :rwid: bit width of register file(s) - both FP and INT
94 * :units: sequence of ALUs (or CompUnitsBase derivatives)
95 """
96 self.units = units
97 self.ldstmode = ldstmode
98 self.rwid = rwid
99 self.rwid = rwid
100 if units and isinstance(units[0], CompUnitsBase):
101 self.n_units = 0
102 for u in self.units:
103 self.n_units += u.n_units
104 else:
105 self.n_units = len(units)
106
107 n_units = self.n_units
108
109 # inputs
110 self.issue_i = Signal(n_units, reset_less=True)
111 self.go_rd_i = Signal(n_units, reset_less=True)
112 self.go_wr_i = Signal(n_units, reset_less=True)
113 self.shadown_i = Signal(n_units, reset_less=True)
114 self.go_die_i = Signal(n_units, reset_less=True)
115 if ldstmode:
116 self.go_ad_i = Signal(n_units, reset_less=True)
117
118 # outputs
119 self.busy_o = Signal(n_units, reset_less=True)
120 self.rd_rel_o = Signal(n_units, reset_less=True)
121 self.req_rel_o = Signal(n_units, reset_less=True)
122 if ldstmode:
123 self.adr_rel_o = Signal(n_units, reset_less=True)
124 self.sto_rel_o = Signal(n_units, reset_less=True)
125 self.req_rel_o = Signal(n_units, reset_less=True)
126 self.load_mem_o = Signal(n_units, reset_less=True)
127 self.stwd_mem_o = Signal(n_units, reset_less=True)
128
129 # in/out register data (note: not register#, actual data)
130 self.data_o = Signal(rwid, reset_less=True)
131 self.src1_i = Signal(rwid, reset_less=True)
132 self.src2_i = Signal(rwid, reset_less=True)
133 # input operand
134
135 def elaborate(self, platform):
136 m = Module()
137 comb = m.d.comb
138
139 for i, alu in enumerate(self.units):
140 setattr(m.submodules, "comp%d" % i, alu)
141
142 go_rd_l = []
143 go_wr_l = []
144 issue_l = []
145 busy_l = []
146 req_rel_l = []
147 rd_rel_l = []
148 shadow_l = []
149 godie_l = []
150 for alu in self.units:
151 req_rel_l.append(alu.req_rel_o)
152 rd_rel_l.append(alu.rd_rel_o)
153 shadow_l.append(alu.shadown_i)
154 godie_l.append(alu.go_die_i)
155 go_wr_l.append(alu.go_wr_i)
156 go_rd_l.append(alu.go_rd_i)
157 issue_l.append(alu.issue_i)
158 busy_l.append(alu.busy_o)
159 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
160 comb += self.req_rel_o.eq(Cat(*req_rel_l))
161 comb += self.busy_o.eq(Cat(*busy_l))
162 comb += Cat(*godie_l).eq(self.go_die_i)
163 comb += Cat(*shadow_l).eq(self.shadown_i)
164 comb += Cat(*go_wr_l).eq(self.go_wr_i)
165 comb += Cat(*go_rd_l).eq(self.go_rd_i)
166 comb += Cat(*issue_l).eq(self.issue_i)
167
168 # connect data register input/output
169
170 # merge (OR) all integer FU / ALU outputs to a single value
171 # bit of a hack: treereduce needs a list with an item named "data_o"
172 if self.units:
173 data_o = treereduce(self.units)
174 comb += self.data_o.eq(data_o)
175
176 for i, alu in enumerate(self.units):
177 comb += alu.src1_i.eq(self.src1_i)
178 comb += alu.src2_i.eq(self.src2_i)
179
180 if not self.ldstmode:
181 return m
182
183 ldmem_l = []
184 stmem_l = []
185 go_ad_l = []
186 adr_rel_l = []
187 sto_rel_l = []
188 for alu in self.units:
189 adr_rel_l.append(alu.adr_rel_o)
190 sto_rel_l.append(alu.sto_rel_o)
191 ldmem_l.append(alu.load_mem_o)
192 stmem_l.append(alu.stwd_mem_o)
193 go_ad_l.append(alu.go_ad_i)
194 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
195 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
196 comb += self.load_mem_o.eq(Cat(*ldmem_l))
197 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
198 comb += Cat(*go_ad_l).eq(self.go_ad_i)
199
200 return m
201
202
203 class CompUnitLDSTs(CompUnitsBase):
204
205 def __init__(self, rwid, opwid, mem):
206 """ Inputs:
207
208 * :rwid: bit width of register file(s) - both FP and INT
209 * :opwid: operand bit width
210 """
211 self.opwid = opwid
212
213 # inputs
214 self.oper_i = Signal(opwid, reset_less=True)
215 self.imm_i = Signal(rwid, reset_less=True)
216
217 # Int ALUs
218 add1 = ALU(rwid)
219 add2 = ALU(rwid)
220
221 units = []
222 for alu in [add1, add2]:
223 aluopwid = 4 # see compldst.py for "internal" opcode
224 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
225
226 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
227
228 def elaborate(self, platform):
229 m = CompUnitsBase.elaborate(self, platform)
230 comb = m.d.comb
231
232 # hand the same operation to all units, 4 lower bits though
233 for alu in self.units:
234 comb += alu.oper_i[0:4].eq(self.oper_i)
235 comb += alu.imm_i.eq(self.imm_i)
236 comb += alu.isalu_i.eq(0)
237
238 return m
239
240
241 class CompUnitALUs(CompUnitsBase):
242
243 def __init__(self, rwid, opwid, n_alus):
244 """ Inputs:
245
246 * :rwid: bit width of register file(s) - both FP and INT
247 * :opwid: operand bit width
248 """
249 self.opwid = opwid
250
251 # inputs
252 self.oper_i = Signal(opwid, reset_less=True)
253 self.imm_i = Signal(rwid, reset_less=True)
254
255 # Int ALUs
256 alus = []
257 for i in range(n_alus):
258 alus.append(ALU(rwid))
259
260 units = []
261 for alu in alus:
262 aluopwid = 3 # extra bit for immediate mode
263 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
264
265 CompUnitsBase.__init__(self, rwid, units)
266
267 def elaborate(self, platform):
268 m = CompUnitsBase.elaborate(self, platform)
269 comb = m.d.comb
270
271 # hand the same operation to all units, only lower 3 bits though
272 for alu in self.units:
273 comb += alu.oper_i[0:3].eq(self.oper_i)
274 comb += alu.imm_i.eq(self.imm_i)
275
276 return m
277
278
279 class CompUnitBR(CompUnitsBase):
280
281 def __init__(self, rwid, opwid):
282 """ Inputs:
283
284 * :rwid: bit width of register file(s) - both FP and INT
285 * :opwid: operand bit width
286
287 Note: bgt unit is returned so that a shadow unit can be created
288 for it
289 """
290 self.opwid = opwid
291
292 # inputs
293 self.oper_i = Signal(opwid, reset_less=True)
294 self.imm_i = Signal(rwid, reset_less=True)
295
296 # Branch ALU and CU
297 self.bgt = BranchALU(rwid)
298 aluopwid = 3 # extra bit for immediate mode
299 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
300 CompUnitsBase.__init__(self, rwid, [self.br1])
301
302 def elaborate(self, platform):
303 m = CompUnitsBase.elaborate(self, platform)
304 comb = m.d.comb
305
306 # hand the same operation to all units
307 for alu in self.units:
308 comb += alu.oper_i.eq(self.oper_i)
309 comb += alu.imm_i.eq(self.imm_i)
310
311 return m
312
313
314 class FunctionUnits(Elaboratable):
315
316 def __init__(self, n_regs, n_int_alus):
317 self.n_regs = n_regs
318 self.n_int_alus = n_int_alus
319
320 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
321 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
322 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
323
324 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
325 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
326
327 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
328 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
329 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
330
331 self.readable_o = Signal(n_int_alus, reset_less=True)
332 self.writable_o = Signal(n_int_alus, reset_less=True)
333
334 self.go_rd_i = Signal(n_int_alus, reset_less=True)
335 self.go_wr_i = Signal(n_int_alus, reset_less=True)
336 self.go_die_i = Signal(n_int_alus, reset_less=True)
337 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
338
339 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
340
341 def elaborate(self, platform):
342 m = Module()
343 comb = m.d.comb
344 sync = m.d.sync
345
346 n_intfus = self.n_int_alus
347
348 # Integer FU-FU Dep Matrix
349 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
350 m.submodules.intfudeps = intfudeps
351 # Integer FU-Reg Dep Matrix
352 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
353 m.submodules.intregdeps = intregdeps
354
355 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
356 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
357
358 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
359 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
360
361 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
362 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
363 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
364
365 comb += intfudeps.issue_i.eq(self.fn_issue_i)
366 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
367 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
368 comb += intfudeps.go_die_i.eq(self.go_die_i)
369 comb += self.readable_o.eq(intfudeps.readable_o)
370 comb += self.writable_o.eq(intfudeps.writable_o)
371
372 # Connect function issue / arrays, and dest/src1/src2
373 comb += intregdeps.dest_i.eq(self.dest_i)
374 comb += intregdeps.src_i[0].eq(self.src1_i)
375 comb += intregdeps.src_i[1].eq(self.src2_i)
376
377 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
378 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
379 comb += intregdeps.go_die_i.eq(self.go_die_i)
380 comb += intregdeps.issue_i.eq(self.fn_issue_i)
381
382 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
383 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
384 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
385
386 return m
387
388
389 class Scoreboard(Elaboratable):
390 def __init__(self, rwid, n_regs):
391 """ Inputs:
392
393 * :rwid: bit width of register file(s) - both FP and INT
394 * :n_regs: depth of register file(s) - number of FP and INT regs
395 """
396 self.rwid = rwid
397 self.n_regs = n_regs
398
399 # Register Files
400 self.intregs = RegFileArray(rwid, n_regs)
401 self.fpregs = RegFileArray(rwid, n_regs)
402
403 # issue q needs to get at these
404 self.aluissue = IssueUnitGroup(4)
405 self.brissue = IssueUnitGroup(1)
406 # and these
407 self.alu_oper_i = Signal(4, reset_less=True)
408 self.alu_imm_i = Signal(rwid, reset_less=True)
409 self.br_oper_i = Signal(4, reset_less=True)
410 self.br_imm_i = Signal(rwid, reset_less=True)
411
412 # inputs
413 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
414 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
415 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
416 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
417
418 # outputs
419 self.issue_o = Signal(reset_less=True) # instruction was accepted
420 self.busy_o = Signal(reset_less=True) # at least one CU is busy
421
422 # for branch speculation experiment. branch_direction = 0 if
423 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
424 # branch_succ and branch_fail are requests to have the current
425 # instruction be dependent on the branch unit "shadow" capability.
426 self.branch_succ_i = Signal(reset_less=True)
427 self.branch_fail_i = Signal(reset_less=True)
428 self.branch_direction_o = Signal(2, reset_less=True)
429
430 def elaborate(self, platform):
431 m = Module()
432 comb = m.d.comb
433 sync = m.d.sync
434
435 m.submodules.intregs = self.intregs
436 m.submodules.fpregs = self.fpregs
437
438 # register ports
439 int_dest = self.intregs.write_port("dest")
440 int_src1 = self.intregs.read_port("src1")
441 int_src2 = self.intregs.read_port("src2")
442
443 fp_dest = self.fpregs.write_port("dest")
444 fp_src1 = self.fpregs.read_port("src1")
445 fp_src2 = self.fpregs.read_port("src2")
446
447 # Int ALUs and BR ALUs
448 n_int_alus = 5
449 cua = CompUnitALUs(self.rwid, 3, n_alus=4)
450 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
451
452 # LDST Comp Units
453 n_ldsts = 2
454 cul = CompUnitLDSTs(self.rwid, 3, None)
455
456 # Comp Units
457 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub, cul])
458 bgt = cub.bgt # get at the branch computation unit
459 br1 = cub.br1
460
461 # Int FUs
462 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
463
464 # Memory FUs
465 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 11)
466
467 # Count of number of FUs
468 n_intfus = n_int_alus
469 n_fp_fus = 0 # for now
470
471 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
472 intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
473 m.submodules.intpick1 = intpick1
474
475 # INT/FP Issue Unit
476 regdecode = RegDecode(self.n_regs)
477 m.submodules.regdecode = regdecode
478 issueunit = IssueUnitArray([self.aluissue, self.brissue])
479 m.submodules.issueunit = issueunit
480
481 # Shadow Matrix. currently n_intfus shadows, to be used for
482 # write-after-write hazards. NOTE: there is one extra for branches,
483 # so the shadow width is increased by 1
484 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
485 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
486
487 # record previous instruction to cast shadow on current instruction
488 prev_shadow = Signal(n_intfus)
489
490 # Branch Speculation recorder. tracks the success/fail state as
491 # each instruction is issued, so that when the branch occurs the
492 # allow/cancel can be issued as appropriate.
493 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
494
495 #---------
496 # ok start wiring things together...
497 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
498 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
499 #---------
500
501 #---------
502 # Issue Unit is where it starts. set up some in/outs for this module
503 #---------
504 comb += [ regdecode.dest_i.eq(self.int_dest_i),
505 regdecode.src1_i.eq(self.int_src1_i),
506 regdecode.src2_i.eq(self.int_src2_i),
507 regdecode.enable_i.eq(self.reg_enable_i),
508 self.issue_o.eq(issueunit.issue_o)
509 ]
510
511 # take these to outside (issue needs them)
512 comb += cua.oper_i.eq(self.alu_oper_i)
513 comb += cua.imm_i.eq(self.alu_imm_i)
514 comb += cub.oper_i.eq(self.br_oper_i)
515 comb += cub.imm_i.eq(self.br_imm_i)
516
517 # TODO: issueunit.f (FP)
518
519 # and int function issue / busy arrays, and dest/src1/src2
520 comb += intfus.dest_i.eq(regdecode.dest_o)
521 comb += intfus.src1_i.eq(regdecode.src1_o)
522 comb += intfus.src2_i.eq(regdecode.src2_o)
523
524 fn_issue_o = issueunit.fn_issue_o
525
526 comb += intfus.fn_issue_i.eq(fn_issue_o)
527 comb += issueunit.busy_i.eq(cu.busy_o)
528 comb += self.busy_o.eq(cu.busy_o.bool())
529
530 #---------
531 # merge shadow matrices outputs
532 #---------
533
534 # these are explained in ShadowMatrix docstring, and are to be
535 # connected to the FUReg and FUFU Matrices, to get them to reset
536 anydie = Signal(n_intfus, reset_less=True)
537 allshadown = Signal(n_intfus, reset_less=True)
538 shreset = Signal(n_intfus, reset_less=True)
539 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
540 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
541 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
542
543 #---------
544 # connect fu-fu matrix
545 #---------
546
547 # Group Picker... done manually for now.
548 go_rd_o = intpick1.go_rd_o
549 go_wr_o = intpick1.go_wr_o
550 go_rd_i = intfus.go_rd_i
551 go_wr_i = intfus.go_wr_i
552 go_die_i = intfus.go_die_i
553 # NOTE: connect to the shadowed versions so that they can "die" (reset)
554 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
555 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
556 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
557
558 # Connect Picker
559 #---------
560 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
561 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
562 int_rd_o = intfus.readable_o
563 int_wr_o = intfus.writable_o
564 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
565 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
566
567 #---------
568 # Shadow Matrix
569 #---------
570
571 comb += shadows.issue_i.eq(fn_issue_o)
572 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
573 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
574 #---------
575 # NOTE; this setup is for the instruction order preservation...
576
577 # connect shadows / go_dies to Computation Units
578 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
579 comb += cu.go_die_i[0:n_intfus].eq(anydie)
580
581 # ok connect first n_int_fu shadows to busy lines, to create an
582 # instruction-order linked-list-like arrangement, using a bit-matrix
583 # (instead of e.g. a ring buffer).
584 # XXX TODO
585
586 # when written, the shadow can be cancelled (and was good)
587 for i in range(n_intfus):
588 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
589
590 # *previous* instruction shadows *current* instruction, and, obviously,
591 # if the previous is completed (!busy) don't cast the shadow!
592 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
593 for i in range(n_intfus):
594 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
595
596 #---------
597 # ... and this is for branch speculation. it uses the extra bit
598 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
599 # only needs to set shadow_i, s_fail_i and s_good_i
600
601 # issue captures shadow_i (if enabled)
602 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
603
604 bactive = Signal(reset_less=True)
605 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
606
607 # instruction being issued (fn_issue_o) has a shadow cast by the branch
608 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
609 comb += bshadow.issue_i.eq(fn_issue_o)
610 for i in range(n_intfus):
611 with m.If(fn_issue_o & (Const(1<<i))):
612 comb += bshadow.shadow_i[i][0].eq(1)
613
614 # finally, we need an indicator to the test infrastructure as to
615 # whether the branch succeeded or failed, plus, link up to the
616 # "recorder" of whether the instruction was under shadow or not
617
618 with m.If(br1.issue_i):
619 sync += bspec.active_i.eq(1)
620 with m.If(self.branch_succ_i):
621 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
622 with m.If(self.branch_fail_i):
623 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
624
625 # branch is active (TODO: a better signal: this is over-using the
626 # go_write signal - actually the branch should not be "writing")
627 with m.If(br1.go_wr_i):
628 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
629 sync += bspec.active_i.eq(0)
630 comb += bspec.br_i.eq(1)
631 # branch occurs if data == 1, failed if data == 0
632 comb += bspec.br_ok_i.eq(br1.data_o == 1)
633 for i in range(n_intfus):
634 # *expected* direction of the branch matched against *actual*
635 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
636 # ... or it didn't
637 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
638
639 #---------
640 # Connect Register File(s)
641 #---------
642 comb += int_dest.wen.eq(intfus.dest_rsel_o)
643 comb += int_src1.ren.eq(intfus.src1_rsel_o)
644 comb += int_src2.ren.eq(intfus.src2_rsel_o)
645
646 # connect ALUs to regfule
647 comb += int_dest.data_i.eq(cu.data_o)
648 comb += cu.src1_i.eq(int_src1.data_o)
649 comb += cu.src2_i.eq(int_src2.data_o)
650
651 # connect ALU Computation Units
652 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
653 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
654 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
655
656 return m
657
658 def __iter__(self):
659 yield from self.intregs
660 yield from self.fpregs
661 yield self.int_dest_i
662 yield self.int_src1_i
663 yield self.int_src2_i
664 yield self.issue_o
665 yield self.branch_succ_i
666 yield self.branch_fail_i
667 yield self.branch_direction_o
668
669 def ports(self):
670 return list(self)
671
672
673 class IssueToScoreboard(Elaboratable):
674
675 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
676 self.qlen = qlen
677 self.n_in = n_in
678 self.n_out = n_out
679 self.rwid = rwid
680 self.opw = opwid
681 self.n_regs = n_regs
682
683 mqbits = (int(log(qlen) / log(2))+2, False)
684 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
685 self.p_ready_o = Signal() # instructions were added
686 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
687
688 self.busy_o = Signal(reset_less=True) # at least one CU is busy
689 self.qlen_o = Signal(mqbits, reset_less=True)
690
691 def elaborate(self, platform):
692 m = Module()
693 comb = m.d.comb
694 sync = m.d.sync
695
696 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
697 sc = Scoreboard(self.rwid, self.n_regs)
698 m.submodules.iq = iq
699 m.submodules.sc = sc
700
701 # get at the regfile for testing
702 self.intregs = sc.intregs
703
704 # and the "busy" signal and instruction queue length
705 comb += self.busy_o.eq(sc.busy_o)
706 comb += self.qlen_o.eq(iq.qlen_o)
707
708 # link up instruction queue
709 comb += iq.p_add_i.eq(self.p_add_i)
710 comb += self.p_ready_o.eq(iq.p_ready_o)
711 for i in range(self.n_in):
712 comb += eq(iq.data_i[i], self.data_i[i])
713
714 # take instruction and process it. note that it's possible to
715 # "inspect" the queue contents *without* actually removing the
716 # items. items are only removed when the
717
718 # in "waiting" state
719 wait_issue_br = Signal()
720 wait_issue_alu = Signal()
721
722 with m.If(wait_issue_br | wait_issue_alu):
723 # set instruction pop length to 1 if the unit accepted
724 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
725 with m.If(iq.qlen_o != 0):
726 comb += iq.n_sub_i.eq(1)
727 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
728 with m.If(iq.qlen_o != 0):
729 comb += iq.n_sub_i.eq(1)
730
731 # see if some instruction(s) are here. note that this is
732 # "inspecting" the in-place queue. note also that on the
733 # cycle following "waiting" for fn_issue_o to be set, the
734 # "resetting" done above (insn_i=0) could be re-ASSERTed.
735 with m.If(iq.qlen_o != 0):
736 # get the operands and operation
737 imm = iq.data_o[0].imm_i
738 dest = iq.data_o[0].dest_i
739 src1 = iq.data_o[0].src1_i
740 src2 = iq.data_o[0].src2_i
741 op = iq.data_o[0].oper_i
742 opi = iq.data_o[0].opim_i # immediate set
743
744 # set the src/dest regs
745 comb += sc.int_dest_i.eq(dest)
746 comb += sc.int_src1_i.eq(src1)
747 comb += sc.int_src2_i.eq(src2)
748 comb += sc.reg_enable_i.eq(1) # enable the regfile
749
750 # choose a Function-Unit-Group
751 with m.If((op & (0x3<<2)) != 0): # branch
752 comb += sc.brissue.insn_i.eq(1)
753 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
754 comb += sc.br_imm_i.eq(imm)
755 comb += wait_issue_br.eq(1)
756 with m.Else(): # alu
757 comb += sc.aluissue.insn_i.eq(1)
758 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
759 comb += sc.alu_imm_i.eq(imm)
760 comb += wait_issue_alu.eq(1)
761
762 # XXX TODO
763 # these indicate that the instruction is to be made
764 # shadow-dependent on
765 # (either) branch success or branch fail
766 #yield sc.branch_fail_i.eq(branch_fail)
767 #yield sc.branch_succ_i.eq(branch_success)
768
769 return m
770
771 def __iter__(self):
772 yield self.p_ready_o
773 for o in self.data_i:
774 yield from list(o)
775 yield self.p_add_i
776
777 def ports(self):
778 return list(self)
779
780
781 IADD = 0
782 ISUB = 1
783 IMUL = 2
784 ISHF = 3
785 IBGT = 4
786 IBLT = 5
787 IBEQ = 6
788 IBNE = 7
789
790 class RegSim:
791 def __init__(self, rwidth, nregs):
792 self.rwidth = rwidth
793 self.regs = [0] * nregs
794
795 def op(self, op, op_imm, imm, src1, src2, dest):
796 maxbits = (1 << self.rwidth) - 1
797 src1 = self.regs[src1] & maxbits
798 if op_imm:
799 src2 = imm
800 else:
801 src2 = self.regs[src2] & maxbits
802 if op == IADD:
803 val = src1 + src2
804 elif op == ISUB:
805 val = src1 - src2
806 elif op == IMUL:
807 val = src1 * src2
808 elif op == ISHF:
809 val = src1 >> (src2 & maxbits)
810 elif op == IBGT:
811 val = int(src1 > src2)
812 elif op == IBLT:
813 val = int(src1 < src2)
814 elif op == IBEQ:
815 val = int(src1 == src2)
816 elif op == IBNE:
817 val = int(src1 != src2)
818 val &= maxbits
819 self.setval(dest, val)
820 return val
821
822 def setval(self, dest, val):
823 print ("sim setval", dest, hex(val))
824 self.regs[dest] = val
825
826 def dump(self, dut):
827 for i, val in enumerate(self.regs):
828 reg = yield dut.intregs.regs[i].reg
829 okstr = "OK" if reg == val else "!ok"
830 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
831
832 def check(self, dut):
833 for i, val in enumerate(self.regs):
834 reg = yield dut.intregs.regs[i].reg
835 if reg != val:
836 print("reg %d expected %x received %x\n" % (i, val, reg))
837 yield from self.dump(dut)
838 assert False
839
840 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
841 branch_success, branch_fail):
842 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
843 'src1_i': src1, 'src2_i': src2}]
844
845 sendlen = 1
846 for idx in range(sendlen):
847 yield from eq(dut.data_i[idx], instrs[idx])
848 di = yield dut.data_i[idx]
849 print ("senddata %d %x" % (idx, di))
850 yield dut.p_add_i.eq(sendlen)
851 yield
852 o_p_ready = yield dut.p_ready_o
853 while not o_p_ready:
854 yield
855 o_p_ready = yield dut.p_ready_o
856
857 yield dut.p_add_i.eq(0)
858
859
860 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
861 yield from disable_issue(dut)
862 yield dut.int_dest_i.eq(dest)
863 yield dut.int_src1_i.eq(src1)
864 yield dut.int_src2_i.eq(src2)
865 if (op & (0x3<<2)) != 0: # branch
866 yield dut.brissue.insn_i.eq(1)
867 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
868 yield dut.br_imm_i.eq(imm)
869 dut_issue = dut.brissue
870 else:
871 yield dut.aluissue.insn_i.eq(1)
872 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
873 yield dut.alu_imm_i.eq(imm)
874 dut_issue = dut.aluissue
875 yield dut.reg_enable_i.eq(1)
876
877 # these indicate that the instruction is to be made shadow-dependent on
878 # (either) branch success or branch fail
879 yield dut.branch_fail_i.eq(branch_fail)
880 yield dut.branch_succ_i.eq(branch_success)
881
882 yield
883 yield from wait_for_issue(dut, dut_issue)
884
885
886 def print_reg(dut, rnums):
887 rs = []
888 for rnum in rnums:
889 reg = yield dut.intregs.regs[rnum].reg
890 rs.append("%x" % reg)
891 rnums = map(str, rnums)
892 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
893
894
895 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
896 insts = []
897 for i in range(n_ops):
898 src1 = randint(1, dut.n_regs-1)
899 src2 = randint(1, dut.n_regs-1)
900 imm = randint(1, (1<<dut.rwid)-1)
901 dest = randint(1, dut.n_regs-1)
902 op = randint(0, max_opnums)
903 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
904
905 if shadowing:
906 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
907 else:
908 insts.append((src1, src2, dest, op, opi, imm))
909 return insts
910
911
912 def wait_for_busy_clear(dut):
913 while True:
914 busy_o = yield dut.busy_o
915 if not busy_o:
916 break
917 print ("busy",)
918 yield
919
920 def disable_issue(dut):
921 yield dut.aluissue.insn_i.eq(0)
922 yield dut.brissue.insn_i.eq(0)
923
924
925 def wait_for_issue(dut, dut_issue):
926 while True:
927 issue_o = yield dut_issue.fn_issue_o
928 if issue_o:
929 yield from disable_issue(dut)
930 yield dut.reg_enable_i.eq(0)
931 break
932 print ("busy",)
933 #yield from print_reg(dut, [1,2,3])
934 yield
935 #yield from print_reg(dut, [1,2,3])
936
937 def scoreboard_branch_sim(dut, alusim):
938
939 iseed = 3
940
941 for i in range(1):
942
943 print ("rseed", iseed)
944 seed(iseed)
945 iseed += 1
946
947 yield dut.branch_direction_o.eq(0)
948
949 # set random values in the registers
950 for i in range(1, dut.n_regs):
951 val = 31+i*3
952 val = randint(0, (1<<alusim.rwidth)-1)
953 yield dut.intregs.regs[i].reg.eq(val)
954 alusim.setval(i, val)
955
956 if False:
957 # create some instructions: branches create a tree
958 insts = create_random_ops(dut, 1, True, 1)
959 #insts.append((6, 6, 1, 2, (0, 0)))
960 #insts.append((4, 3, 3, 0, (0, 0)))
961
962 src1 = randint(1, dut.n_regs-1)
963 src2 = randint(1, dut.n_regs-1)
964 #op = randint(4, 7)
965 op = 4 # only BGT at the moment
966
967 branch_ok = create_random_ops(dut, 1, True, 1)
968 branch_fail = create_random_ops(dut, 1, True, 1)
969
970 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
971
972 if True:
973 insts = []
974 insts.append( (3, 5, 2, 0, (0, 0)) )
975 branch_ok = []
976 branch_fail = []
977 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
978 branch_ok.append( None )
979 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
980 #branch_fail.append( None )
981 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
982
983 siminsts = deepcopy(insts)
984
985 # issue instruction(s)
986 i = -1
987 instrs = insts
988 branch_direction = 0
989 while instrs:
990 yield
991 yield
992 i += 1
993 branch_direction = yield dut.branch_direction_o # way branch went
994 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
995 if branch_direction == 1 and shadow_on:
996 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
997 continue # branch was "success" and this is a "failed"... skip
998 if branch_direction == 2 and shadow_off:
999 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1000 continue # branch was "fail" and this is a "success"... skip
1001 if branch_direction != 0:
1002 shadow_on = 0
1003 shadow_off = 0
1004 is_branch = op >= 4
1005 if is_branch:
1006 branch_ok, branch_fail = dest
1007 dest = src2
1008 # ok zip up the branch success / fail instructions and
1009 # drop them into the queue, one marked "to have branch success"
1010 # the other to be marked shadow branch "fail".
1011 # one out of each of these will be cancelled
1012 for ok, fl in zip(branch_ok, branch_fail):
1013 if ok:
1014 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1015 if fl:
1016 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1017 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1018 (i, src1, src2, dest, op, shadow_on, shadow_off))
1019 yield from int_instr(dut, op, src1, src2, dest,
1020 shadow_on, shadow_off)
1021
1022 # wait for all instructions to stop before checking
1023 yield
1024 yield from wait_for_busy_clear(dut)
1025
1026 i = -1
1027 while siminsts:
1028 instr = siminsts.pop(0)
1029 if instr is None:
1030 continue
1031 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1032 i += 1
1033 is_branch = op >= 4
1034 if is_branch:
1035 branch_ok, branch_fail = dest
1036 dest = src2
1037 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1038 (i, src1, src2, dest, op, shadow_on, shadow_off))
1039 branch_res = alusim.op(op, src1, src2, dest)
1040 if is_branch:
1041 if branch_res:
1042 siminsts += branch_ok
1043 else:
1044 siminsts += branch_fail
1045
1046 # check status
1047 yield from alusim.check(dut)
1048 yield from alusim.dump(dut)
1049
1050
1051 def scoreboard_sim(dut, alusim):
1052
1053 seed(0)
1054
1055 for i in range(50):
1056
1057 # set random values in the registers
1058 for i in range(1, dut.n_regs):
1059 val = randint(0, (1<<alusim.rwidth)-1)
1060 #val = 31+i*3
1061 #val = i
1062 yield dut.intregs.regs[i].reg.eq(val)
1063 alusim.setval(i, val)
1064
1065 # create some instructions (some random, some regression tests)
1066 instrs = []
1067 if True:
1068 instrs = create_random_ops(dut, 15, True, 4)
1069
1070 if False:
1071 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1072
1073 if False:
1074 instrs.append( (7, 3, 2, 4, (0, 0)) )
1075 instrs.append( (7, 6, 6, 2, (0, 0)) )
1076 instrs.append( (1, 7, 2, 2, (0, 0)) )
1077
1078 if False:
1079 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1080 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1081 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1082 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1083 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1084
1085 if False:
1086 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1087 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1088 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1089
1090 if False:
1091 instrs.append((5, 6, 2, 1))
1092 instrs.append((2, 2, 4, 0))
1093 #instrs.append((2, 2, 3, 1))
1094
1095 if False:
1096 instrs.append((2, 1, 2, 3))
1097
1098 if False:
1099 instrs.append((2, 6, 2, 1))
1100 instrs.append((2, 1, 2, 0))
1101
1102 if False:
1103 instrs.append((1, 2, 7, 2))
1104 instrs.append((7, 1, 5, 0))
1105 instrs.append((4, 4, 1, 1))
1106
1107 if False:
1108 instrs.append((5, 6, 2, 2))
1109 instrs.append((1, 1, 4, 1))
1110 instrs.append((6, 5, 3, 0))
1111
1112 if False:
1113 # Write-after-Write Hazard
1114 instrs.append( (3, 6, 7, 2) )
1115 instrs.append( (4, 4, 7, 1) )
1116
1117 if False:
1118 # self-read/write-after-write followed by Read-after-Write
1119 instrs.append((1, 1, 1, 1))
1120 instrs.append((1, 5, 3, 0))
1121
1122 if False:
1123 # Read-after-Write followed by self-read-after-write
1124 instrs.append((5, 6, 1, 2))
1125 instrs.append((1, 1, 1, 1))
1126
1127 if False:
1128 # self-read-write sandwich
1129 instrs.append((5, 6, 1, 2))
1130 instrs.append((1, 1, 1, 1))
1131 instrs.append((1, 5, 3, 0))
1132
1133 if False:
1134 # very weird failure
1135 instrs.append( (5, 2, 5, 2) )
1136 instrs.append( (2, 6, 3, 0) )
1137 instrs.append( (4, 2, 2, 1) )
1138
1139 if False:
1140 v1 = 4
1141 yield dut.intregs.regs[5].reg.eq(v1)
1142 alusim.setval(5, v1)
1143 yield dut.intregs.regs[3].reg.eq(5)
1144 alusim.setval(3, 5)
1145 instrs.append((5, 3, 3, 4, (0, 0)))
1146 instrs.append((4, 2, 1, 2, (0, 1)))
1147
1148 if False:
1149 v1 = 6
1150 yield dut.intregs.regs[5].reg.eq(v1)
1151 alusim.setval(5, v1)
1152 yield dut.intregs.regs[3].reg.eq(5)
1153 alusim.setval(3, 5)
1154 instrs.append((5, 3, 3, 4, (0, 0)))
1155 instrs.append((4, 2, 1, 2, (1, 0)))
1156
1157 if False:
1158 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1159 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1160 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1161 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1162 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1163 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1164 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1165 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1166 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1167
1168 # issue instruction(s), wait for issue to be free before proceeding
1169 for i, instr in enumerate(instrs):
1170 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1171
1172 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1173 (i, src1, src2, dest, op, opi, imm))
1174 alusim.op(op, opi, imm, src1, src2, dest)
1175 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1176 br_ok, br_fail)
1177
1178 # wait for all instructions to stop before checking
1179 while True:
1180 iqlen = yield dut.qlen_o
1181 if iqlen == 0:
1182 break
1183 yield
1184 yield
1185 yield
1186 yield
1187 yield
1188 yield from wait_for_busy_clear(dut)
1189
1190 # check status
1191 yield from alusim.check(dut)
1192 yield from alusim.dump(dut)
1193
1194
1195 def test_scoreboard():
1196 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1197 alusim = RegSim(16, 8)
1198 memsim = MemSim(16, 16)
1199 vl = rtlil.convert(dut, ports=dut.ports())
1200 with open("test_scoreboard6600.il", "w") as f:
1201 f.write(vl)
1202
1203 run_simulation(dut, scoreboard_sim(dut, alusim),
1204 vcd_name='test_scoreboard6600.vcd')
1205
1206 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1207 # vcd_name='test_scoreboard6600.vcd')
1208
1209
1210 if __name__ == '__main__':
1211 test_scoreboard()