add in a TestMemory class
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13 from scoreboard.memfu import MemFunctionUnits
14
15 from compalu import ComputationUnitNoDelay
16 from compldst import LDSTCompUnit
17
18 from alu_hier import ALU, BranchALU
19 from nmutil.latch import SRLatch
20 from nmutil.nmoperator import eq
21
22 from random import randint, seed
23 from copy import deepcopy
24 from math import log
25
26
27 class TestMemory(Elaboratable):
28 def __init__(self, regwid, addrw):
29 self.ddepth = 1 # regwid //8
30 depth = (1<<addrw) // self.ddepth
31 self.adr = Signal(addrw)
32 self.dat_r = Signal(regwid)
33 self.dat_w = Signal(regwid)
34 self.we = Signal()
35 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
36
37 def elaborate(self, platform):
38 m = Module()
39 m.submodules.rdport = rdport = self.mem.read_port()
40 m.submodules.wrport = wrport = self.mem.write_port()
41 m.d.comb += [
42 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
43 self.dat_r.eq(rdport.data),
44 wrport.addr.eq(self.adr),
45 wrport.data.eq(self.dat_w),
46 wrport.en.eq(self.we),
47 ]
48 return m
49
50
51 class MemSim:
52 def __init__(self, regwid, addrw):
53 self.regwid = regwid
54 self.ddepth = 1 # regwid//8
55 depth = (1<<addrw) // self.ddepth
56 self.mem = list(range(0, depth))
57
58 def ld(self, addr):
59 return self.mem[addr>>self.ddepth]
60
61 def st(self, addr, data):
62 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
63
64
65 class CompUnitsBase(Elaboratable):
66 """ Computation Unit Base class.
67
68 Amazingly, this class works recursively. It's supposed to just
69 look after some ALUs (that can handle the same operations),
70 grouping them together, however it turns out that the same code
71 can also group *groups* of Computation Units together as well.
72
73 Basically it was intended just to concatenate the ALU's issue,
74 go_rd etc. signals together, which start out as bits and become
75 sequences. Turns out that the same trick works just as well
76 on Computation Units!
77
78 So this class may be used recursively to present a top-level
79 sequential concatenation of all the signals in and out of
80 ALUs, whilst at the same time making it convenient to group
81 ALUs together.
82
83 At the lower level, the intent is that groups of (identical)
84 ALUs may be passed the same operation. Even beyond that,
85 the intent is that that group of (identical) ALUs actually
86 share the *same pipeline* and as such become a "Concurrent
87 Computation Unit" as defined by Mitch Alsup (see section
88 11.4.9.3)
89 """
90 def __init__(self, rwid, units, ldstmode=False):
91 """ Inputs:
92
93 * :rwid: bit width of register file(s) - both FP and INT
94 * :units: sequence of ALUs (or CompUnitsBase derivatives)
95 """
96 self.units = units
97 self.ldstmode = ldstmode
98 self.rwid = rwid
99 self.rwid = rwid
100 if units and isinstance(units[0], CompUnitsBase):
101 self.n_units = 0
102 for u in self.units:
103 self.n_units += u.n_units
104 else:
105 self.n_units = len(units)
106
107 n_units = self.n_units
108
109 # inputs
110 self.issue_i = Signal(n_units, reset_less=True)
111 self.go_rd_i = Signal(n_units, reset_less=True)
112 self.go_wr_i = Signal(n_units, reset_less=True)
113 self.shadown_i = Signal(n_units, reset_less=True)
114 self.go_die_i = Signal(n_units, reset_less=True)
115 if ldstmode:
116 self.go_ad_i = Signal(n_units, reset_less=True)
117
118 # outputs
119 self.busy_o = Signal(n_units, reset_less=True)
120 self.rd_rel_o = Signal(n_units, reset_less=True)
121 self.req_rel_o = Signal(n_units, reset_less=True)
122 if ldstmode:
123 self.adr_rel_o = Signal(n_units, reset_less=True)
124 self.sto_rel_o = Signal(n_units, reset_less=True)
125 self.req_rel_o = Signal(n_units, reset_less=True)
126 self.load_mem_o = Signal(n_units, reset_less=True)
127 self.stwd_mem_o = Signal(n_units, reset_less=True)
128
129 # in/out register data (note: not register#, actual data)
130 self.data_o = Signal(rwid, reset_less=True)
131 self.src1_i = Signal(rwid, reset_less=True)
132 self.src2_i = Signal(rwid, reset_less=True)
133 # input operand
134
135 def elaborate(self, platform):
136 m = Module()
137 comb = m.d.comb
138
139 for i, alu in enumerate(self.units):
140 setattr(m.submodules, "comp%d" % i, alu)
141
142 go_rd_l = []
143 go_wr_l = []
144 issue_l = []
145 busy_l = []
146 req_rel_l = []
147 rd_rel_l = []
148 shadow_l = []
149 godie_l = []
150 for alu in self.units:
151 req_rel_l.append(alu.req_rel_o)
152 rd_rel_l.append(alu.rd_rel_o)
153 shadow_l.append(alu.shadown_i)
154 godie_l.append(alu.go_die_i)
155 go_wr_l.append(alu.go_wr_i)
156 go_rd_l.append(alu.go_rd_i)
157 issue_l.append(alu.issue_i)
158 busy_l.append(alu.busy_o)
159 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
160 comb += self.req_rel_o.eq(Cat(*req_rel_l))
161 comb += self.busy_o.eq(Cat(*busy_l))
162 comb += Cat(*godie_l).eq(self.go_die_i)
163 comb += Cat(*shadow_l).eq(self.shadown_i)
164 comb += Cat(*go_wr_l).eq(self.go_wr_i)
165 comb += Cat(*go_rd_l).eq(self.go_rd_i)
166 comb += Cat(*issue_l).eq(self.issue_i)
167
168 # connect data register input/output
169
170 # merge (OR) all integer FU / ALU outputs to a single value
171 # bit of a hack: treereduce needs a list with an item named "data_o"
172 if self.units:
173 data_o = treereduce(self.units)
174 comb += self.data_o.eq(data_o)
175
176 for i, alu in enumerate(self.units):
177 comb += alu.src1_i.eq(self.src1_i)
178 comb += alu.src2_i.eq(self.src2_i)
179
180 if not self.ldstmode:
181 return m
182
183 ldmem_l = []
184 stmem_l = []
185 go_ad_l = []
186 adr_rel_l = []
187 sto_rel_l = []
188 for alu in self.units:
189 adr_rel_l.append(alu.adr_rel_o)
190 sto_rel_l.append(alu.sto_rel_o)
191 ldmem_l.append(alu.load_mem_o)
192 stmem_l.append(alu.stwd_mem_o)
193 go_ad_l.append(alu.go_ad_i)
194 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
195 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
196 comb += self.load_mem_o.eq(Cat(*ldmem_l))
197 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
198 comb += Cat(*go_ad_l).eq(self.go_ad_i)
199
200 return m
201
202
203 class CompUnitLDSTs(CompUnitsBase):
204
205 def __init__(self, rwid, opwid, mem):
206 """ Inputs:
207
208 * :rwid: bit width of register file(s) - both FP and INT
209 * :opwid: operand bit width
210 """
211 self.opwid = opwid
212
213 # inputs
214 self.oper_i = Signal(opwid, reset_less=True)
215 self.imm_i = Signal(rwid, reset_less=True)
216
217 # Int ALUs
218 add1 = ALU(rwid)
219 add2 = ALU(rwid)
220
221 units = []
222 for alu in [add1, add2]:
223 aluopwid = 4 # see compldst.py for "internal" opcode
224 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
225
226 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
227
228 def elaborate(self, platform):
229 m = CompUnitsBase.elaborate(self, platform)
230 comb = m.d.comb
231
232 # hand the same operation to all units, 4 lower bits though
233 for alu in self.units:
234 comb += alu.oper_i[0:4].eq(self.oper_i)
235 comb += alu.imm_i.eq(self.imm_i)
236 comb += alu.isalu_i.eq(0)
237
238 return m
239
240
241 class CompUnitALUs(CompUnitsBase):
242
243 def __init__(self, rwid, opwid, n_alus):
244 """ Inputs:
245
246 * :rwid: bit width of register file(s) - both FP and INT
247 * :opwid: operand bit width
248 """
249 self.opwid = opwid
250
251 # inputs
252 self.oper_i = Signal(opwid, reset_less=True)
253 self.imm_i = Signal(rwid, reset_less=True)
254
255 # Int ALUs
256 alus = []
257 for i in range(n_alus):
258 alus.append(ALU(rwid))
259
260 units = []
261 for alu in alus:
262 aluopwid = 3 # extra bit for immediate mode
263 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
264
265 CompUnitsBase.__init__(self, rwid, units)
266
267 def elaborate(self, platform):
268 m = CompUnitsBase.elaborate(self, platform)
269 comb = m.d.comb
270
271 # hand the same operation to all units, only lower 3 bits though
272 for alu in self.units:
273 comb += alu.oper_i[0:3].eq(self.oper_i)
274 comb += alu.imm_i.eq(self.imm_i)
275
276 return m
277
278
279 class CompUnitBR(CompUnitsBase):
280
281 def __init__(self, rwid, opwid):
282 """ Inputs:
283
284 * :rwid: bit width of register file(s) - both FP and INT
285 * :opwid: operand bit width
286
287 Note: bgt unit is returned so that a shadow unit can be created
288 for it
289 """
290 self.opwid = opwid
291
292 # inputs
293 self.oper_i = Signal(opwid, reset_less=True)
294 self.imm_i = Signal(rwid, reset_less=True)
295
296 # Branch ALU and CU
297 self.bgt = BranchALU(rwid)
298 aluopwid = 3 # extra bit for immediate mode
299 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
300 CompUnitsBase.__init__(self, rwid, [self.br1])
301
302 def elaborate(self, platform):
303 m = CompUnitsBase.elaborate(self, platform)
304 comb = m.d.comb
305
306 # hand the same operation to all units
307 for alu in self.units:
308 comb += alu.oper_i.eq(self.oper_i)
309 comb += alu.imm_i.eq(self.imm_i)
310
311 return m
312
313
314 class FunctionUnits(Elaboratable):
315
316 def __init__(self, n_regs, n_int_alus):
317 self.n_regs = n_regs
318 self.n_int_alus = n_int_alus
319
320 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
321 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
322 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
323
324 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
325 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
326
327 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
328 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
329 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
330
331 self.readable_o = Signal(n_int_alus, reset_less=True)
332 self.writable_o = Signal(n_int_alus, reset_less=True)
333
334 self.go_rd_i = Signal(n_int_alus, reset_less=True)
335 self.go_wr_i = Signal(n_int_alus, reset_less=True)
336 self.go_die_i = Signal(n_int_alus, reset_less=True)
337 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
338
339 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
340
341 def elaborate(self, platform):
342 m = Module()
343 comb = m.d.comb
344 sync = m.d.sync
345
346 n_intfus = self.n_int_alus
347
348 # Integer FU-FU Dep Matrix
349 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
350 m.submodules.intfudeps = intfudeps
351 # Integer FU-Reg Dep Matrix
352 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
353 m.submodules.intregdeps = intregdeps
354
355 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
356 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
357
358 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
359 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
360
361 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
362 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
363 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
364
365 comb += intfudeps.issue_i.eq(self.fn_issue_i)
366 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
367 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
368 comb += intfudeps.go_die_i.eq(self.go_die_i)
369 comb += self.readable_o.eq(intfudeps.readable_o)
370 comb += self.writable_o.eq(intfudeps.writable_o)
371
372 # Connect function issue / arrays, and dest/src1/src2
373 comb += intregdeps.dest_i.eq(self.dest_i)
374 comb += intregdeps.src_i[0].eq(self.src1_i)
375 comb += intregdeps.src_i[1].eq(self.src2_i)
376
377 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
378 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
379 comb += intregdeps.go_die_i.eq(self.go_die_i)
380 comb += intregdeps.issue_i.eq(self.fn_issue_i)
381
382 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
383 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
384 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
385
386 return m
387
388
389 class Scoreboard(Elaboratable):
390 def __init__(self, rwid, n_regs):
391 """ Inputs:
392
393 * :rwid: bit width of register file(s) - both FP and INT
394 * :n_regs: depth of register file(s) - number of FP and INT regs
395 """
396 self.rwid = rwid
397 self.n_regs = n_regs
398
399 # Register Files
400 self.intregs = RegFileArray(rwid, n_regs)
401 self.fpregs = RegFileArray(rwid, n_regs)
402
403 # issue q needs to get at these
404 self.aluissue = IssueUnitGroup(4)
405 self.brissue = IssueUnitGroup(1)
406 # and these
407 self.alu_oper_i = Signal(4, reset_less=True)
408 self.alu_imm_i = Signal(rwid, reset_less=True)
409 self.br_oper_i = Signal(4, reset_less=True)
410 self.br_imm_i = Signal(rwid, reset_less=True)
411
412 # inputs
413 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
414 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
415 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
416 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
417
418 # outputs
419 self.issue_o = Signal(reset_less=True) # instruction was accepted
420 self.busy_o = Signal(reset_less=True) # at least one CU is busy
421
422 # for branch speculation experiment. branch_direction = 0 if
423 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
424 # branch_succ and branch_fail are requests to have the current
425 # instruction be dependent on the branch unit "shadow" capability.
426 self.branch_succ_i = Signal(reset_less=True)
427 self.branch_fail_i = Signal(reset_less=True)
428 self.branch_direction_o = Signal(2, reset_less=True)
429
430 def elaborate(self, platform):
431 m = Module()
432 comb = m.d.comb
433 sync = m.d.sync
434
435 m.submodules.intregs = self.intregs
436 m.submodules.fpregs = self.fpregs
437
438 # register ports
439 int_dest = self.intregs.write_port("dest")
440 int_src1 = self.intregs.read_port("src1")
441 int_src2 = self.intregs.read_port("src2")
442
443 fp_dest = self.fpregs.write_port("dest")
444 fp_src1 = self.fpregs.read_port("src1")
445 fp_src2 = self.fpregs.read_port("src2")
446
447 # Int ALUs and BR ALUs
448 n_int_alus = 5
449 cua = CompUnitALUs(self.rwid, 3, n_alus=4)
450 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
451
452 # LDST Comp Units
453 n_ldsts = 2
454 cul = CompUnitLDSTs(self.rwid, 3, None)
455
456 # Comp Units
457 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub, cul])
458 bgt = cub.bgt # get at the branch computation unit
459 br1 = cub.br1
460
461 # Int FUs
462 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
463
464 # Memory FUs
465 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
466
467 # Count of number of FUs
468 n_intfus = n_int_alus
469 n_fp_fus = 0 # for now
470
471 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
472 intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
473 m.submodules.intpick1 = intpick1
474
475 # INT/FP Issue Unit
476 regdecode = RegDecode(self.n_regs)
477 m.submodules.regdecode = regdecode
478 issueunit = IssueUnitArray([self.aluissue, self.brissue])
479 m.submodules.issueunit = issueunit
480
481 # Shadow Matrix. currently n_intfus shadows, to be used for
482 # write-after-write hazards. NOTE: there is one extra for branches,
483 # so the shadow width is increased by 1
484 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
485 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
486
487 # record previous instruction to cast shadow on current instruction
488 prev_shadow = Signal(n_intfus)
489
490 # Branch Speculation recorder. tracks the success/fail state as
491 # each instruction is issued, so that when the branch occurs the
492 # allow/cancel can be issued as appropriate.
493 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
494
495 #---------
496 # ok start wiring things together...
497 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
498 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
499 #---------
500
501 #---------
502 # Issue Unit is where it starts. set up some in/outs for this module
503 #---------
504 comb += [ regdecode.dest_i.eq(self.int_dest_i),
505 regdecode.src1_i.eq(self.int_src1_i),
506 regdecode.src2_i.eq(self.int_src2_i),
507 regdecode.enable_i.eq(self.reg_enable_i),
508 self.issue_o.eq(issueunit.issue_o)
509 ]
510
511 # take these to outside (issue needs them)
512 comb += cua.oper_i.eq(self.alu_oper_i)
513 comb += cua.imm_i.eq(self.alu_imm_i)
514 comb += cub.oper_i.eq(self.br_oper_i)
515 comb += cub.imm_i.eq(self.br_imm_i)
516
517 # TODO: issueunit.f (FP)
518
519 # and int function issue / busy arrays, and dest/src1/src2
520 comb += intfus.dest_i.eq(regdecode.dest_o)
521 comb += intfus.src1_i.eq(regdecode.src1_o)
522 comb += intfus.src2_i.eq(regdecode.src2_o)
523
524 fn_issue_o = issueunit.fn_issue_o
525
526 comb += intfus.fn_issue_i.eq(fn_issue_o)
527 comb += issueunit.busy_i.eq(cu.busy_o)
528 comb += self.busy_o.eq(cu.busy_o.bool())
529
530 #---------
531 # merge shadow matrices outputs
532 #---------
533
534 # these are explained in ShadowMatrix docstring, and are to be
535 # connected to the FUReg and FUFU Matrices, to get them to reset
536 anydie = Signal(n_intfus, reset_less=True)
537 allshadown = Signal(n_intfus, reset_less=True)
538 shreset = Signal(n_intfus, reset_less=True)
539 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
540 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
541 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
542
543 #---------
544 # connect fu-fu matrix
545 #---------
546
547 # Group Picker... done manually for now.
548 go_rd_o = intpick1.go_rd_o
549 go_wr_o = intpick1.go_wr_o
550 go_rd_i = intfus.go_rd_i
551 go_wr_i = intfus.go_wr_i
552 go_die_i = intfus.go_die_i
553 # NOTE: connect to the shadowed versions so that they can "die" (reset)
554 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
555 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
556 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
557
558 # Connect Picker
559 #---------
560 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
561 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
562 int_rd_o = intfus.readable_o
563 int_wr_o = intfus.writable_o
564 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
565 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
566
567 #---------
568 # Shadow Matrix
569 #---------
570
571 comb += shadows.issue_i.eq(fn_issue_o)
572 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
573 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
574 #---------
575 # NOTE; this setup is for the instruction order preservation...
576
577 # connect shadows / go_dies to Computation Units
578 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
579 comb += cu.go_die_i[0:n_intfus].eq(anydie)
580
581 # ok connect first n_int_fu shadows to busy lines, to create an
582 # instruction-order linked-list-like arrangement, using a bit-matrix
583 # (instead of e.g. a ring buffer).
584 # XXX TODO
585
586 # when written, the shadow can be cancelled (and was good)
587 for i in range(n_intfus):
588 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
589
590 # *previous* instruction shadows *current* instruction, and, obviously,
591 # if the previous is completed (!busy) don't cast the shadow!
592 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
593 for i in range(n_intfus):
594 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
595
596 #---------
597 # ... and this is for branch speculation. it uses the extra bit
598 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
599 # only needs to set shadow_i, s_fail_i and s_good_i
600
601 # issue captures shadow_i (if enabled)
602 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
603
604 bactive = Signal(reset_less=True)
605 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
606
607 # instruction being issued (fn_issue_o) has a shadow cast by the branch
608 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
609 comb += bshadow.issue_i.eq(fn_issue_o)
610 for i in range(n_intfus):
611 with m.If(fn_issue_o & (Const(1<<i))):
612 comb += bshadow.shadow_i[i][0].eq(1)
613
614 # finally, we need an indicator to the test infrastructure as to
615 # whether the branch succeeded or failed, plus, link up to the
616 # "recorder" of whether the instruction was under shadow or not
617
618 with m.If(br1.issue_i):
619 sync += bspec.active_i.eq(1)
620 with m.If(self.branch_succ_i):
621 comb += bspec.good_i.eq(fn_issue_o & 0x1f)
622 with m.If(self.branch_fail_i):
623 comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
624
625 # branch is active (TODO: a better signal: this is over-using the
626 # go_write signal - actually the branch should not be "writing")
627 with m.If(br1.go_wr_i):
628 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
629 sync += bspec.active_i.eq(0)
630 comb += bspec.br_i.eq(1)
631 # branch occurs if data == 1, failed if data == 0
632 comb += bspec.br_ok_i.eq(br1.data_o == 1)
633 for i in range(n_intfus):
634 # *expected* direction of the branch matched against *actual*
635 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
636 # ... or it didn't
637 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
638
639 #---------
640 # Connect Register File(s)
641 #---------
642 comb += int_dest.wen.eq(intfus.dest_rsel_o)
643 comb += int_src1.ren.eq(intfus.src1_rsel_o)
644 comb += int_src2.ren.eq(intfus.src2_rsel_o)
645
646 # connect ALUs to regfule
647 comb += int_dest.data_i.eq(cu.data_o)
648 comb += cu.src1_i.eq(int_src1.data_o)
649 comb += cu.src2_i.eq(int_src2.data_o)
650
651 # connect ALU Computation Units
652 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
653 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
654 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
655
656 return m
657
658 def __iter__(self):
659 yield from self.intregs
660 yield from self.fpregs
661 yield self.int_dest_i
662 yield self.int_src1_i
663 yield self.int_src2_i
664 yield self.issue_o
665 yield self.branch_succ_i
666 yield self.branch_fail_i
667 yield self.branch_direction_o
668
669 def ports(self):
670 return list(self)
671
672
673 class IssueToScoreboard(Elaboratable):
674
675 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
676 self.qlen = qlen
677 self.n_in = n_in
678 self.n_out = n_out
679 self.rwid = rwid
680 self.opw = opwid
681 self.n_regs = n_regs
682
683 mqbits = (int(log(qlen) / log(2))+2, False)
684 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
685 self.p_ready_o = Signal() # instructions were added
686 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
687
688 self.busy_o = Signal(reset_less=True) # at least one CU is busy
689 self.qlen_o = Signal(mqbits, reset_less=True)
690
691 def elaborate(self, platform):
692 m = Module()
693 comb = m.d.comb
694 sync = m.d.sync
695
696 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
697 sc = Scoreboard(self.rwid, self.n_regs)
698 mem = TestMemory(self.rwid, 8) # not too big, takes too long
699 m.submodules.iq = iq
700 m.submodules.sc = sc
701 m.submodules.mem = mem
702
703 # get at the regfile for testing
704 self.intregs = sc.intregs
705
706 # and the "busy" signal and instruction queue length
707 comb += self.busy_o.eq(sc.busy_o)
708 comb += self.qlen_o.eq(iq.qlen_o)
709
710 # link up instruction queue
711 comb += iq.p_add_i.eq(self.p_add_i)
712 comb += self.p_ready_o.eq(iq.p_ready_o)
713 for i in range(self.n_in):
714 comb += eq(iq.data_i[i], self.data_i[i])
715
716 # take instruction and process it. note that it's possible to
717 # "inspect" the queue contents *without* actually removing the
718 # items. items are only removed when the
719
720 # in "waiting" state
721 wait_issue_br = Signal()
722 wait_issue_alu = Signal()
723
724 with m.If(wait_issue_br | wait_issue_alu):
725 # set instruction pop length to 1 if the unit accepted
726 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
727 with m.If(iq.qlen_o != 0):
728 comb += iq.n_sub_i.eq(1)
729 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
730 with m.If(iq.qlen_o != 0):
731 comb += iq.n_sub_i.eq(1)
732
733 # see if some instruction(s) are here. note that this is
734 # "inspecting" the in-place queue. note also that on the
735 # cycle following "waiting" for fn_issue_o to be set, the
736 # "resetting" done above (insn_i=0) could be re-ASSERTed.
737 with m.If(iq.qlen_o != 0):
738 # get the operands and operation
739 imm = iq.data_o[0].imm_i
740 dest = iq.data_o[0].dest_i
741 src1 = iq.data_o[0].src1_i
742 src2 = iq.data_o[0].src2_i
743 op = iq.data_o[0].oper_i
744 opi = iq.data_o[0].opim_i # immediate set
745
746 # set the src/dest regs
747 comb += sc.int_dest_i.eq(dest)
748 comb += sc.int_src1_i.eq(src1)
749 comb += sc.int_src2_i.eq(src2)
750 comb += sc.reg_enable_i.eq(1) # enable the regfile
751
752 # choose a Function-Unit-Group
753 with m.If((op & (0x3<<2)) != 0): # branch
754 comb += sc.brissue.insn_i.eq(1)
755 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
756 comb += sc.br_imm_i.eq(imm)
757 comb += wait_issue_br.eq(1)
758 with m.Else(): # alu
759 comb += sc.aluissue.insn_i.eq(1)
760 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
761 comb += sc.alu_imm_i.eq(imm)
762 comb += wait_issue_alu.eq(1)
763
764 # XXX TODO
765 # these indicate that the instruction is to be made
766 # shadow-dependent on
767 # (either) branch success or branch fail
768 #yield sc.branch_fail_i.eq(branch_fail)
769 #yield sc.branch_succ_i.eq(branch_success)
770
771 return m
772
773 def __iter__(self):
774 yield self.p_ready_o
775 for o in self.data_i:
776 yield from list(o)
777 yield self.p_add_i
778
779 def ports(self):
780 return list(self)
781
782
783 IADD = 0
784 ISUB = 1
785 IMUL = 2
786 ISHF = 3
787 IBGT = 4
788 IBLT = 5
789 IBEQ = 6
790 IBNE = 7
791
792 class RegSim:
793 def __init__(self, rwidth, nregs):
794 self.rwidth = rwidth
795 self.regs = [0] * nregs
796
797 def op(self, op, op_imm, imm, src1, src2, dest):
798 maxbits = (1 << self.rwidth) - 1
799 src1 = self.regs[src1] & maxbits
800 if op_imm:
801 src2 = imm
802 else:
803 src2 = self.regs[src2] & maxbits
804 if op == IADD:
805 val = src1 + src2
806 elif op == ISUB:
807 val = src1 - src2
808 elif op == IMUL:
809 val = src1 * src2
810 elif op == ISHF:
811 val = src1 >> (src2 & maxbits)
812 elif op == IBGT:
813 val = int(src1 > src2)
814 elif op == IBLT:
815 val = int(src1 < src2)
816 elif op == IBEQ:
817 val = int(src1 == src2)
818 elif op == IBNE:
819 val = int(src1 != src2)
820 val &= maxbits
821 self.setval(dest, val)
822 return val
823
824 def setval(self, dest, val):
825 print ("sim setval", dest, hex(val))
826 self.regs[dest] = val
827
828 def dump(self, dut):
829 for i, val in enumerate(self.regs):
830 reg = yield dut.intregs.regs[i].reg
831 okstr = "OK" if reg == val else "!ok"
832 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
833
834 def check(self, dut):
835 for i, val in enumerate(self.regs):
836 reg = yield dut.intregs.regs[i].reg
837 if reg != val:
838 print("reg %d expected %x received %x\n" % (i, val, reg))
839 yield from self.dump(dut)
840 assert False
841
842 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
843 branch_success, branch_fail):
844 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
845 'src1_i': src1, 'src2_i': src2}]
846
847 sendlen = 1
848 for idx in range(sendlen):
849 yield from eq(dut.data_i[idx], instrs[idx])
850 di = yield dut.data_i[idx]
851 print ("senddata %d %x" % (idx, di))
852 yield dut.p_add_i.eq(sendlen)
853 yield
854 o_p_ready = yield dut.p_ready_o
855 while not o_p_ready:
856 yield
857 o_p_ready = yield dut.p_ready_o
858
859 yield dut.p_add_i.eq(0)
860
861
862 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
863 yield from disable_issue(dut)
864 yield dut.int_dest_i.eq(dest)
865 yield dut.int_src1_i.eq(src1)
866 yield dut.int_src2_i.eq(src2)
867 if (op & (0x3<<2)) != 0: # branch
868 yield dut.brissue.insn_i.eq(1)
869 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
870 yield dut.br_imm_i.eq(imm)
871 dut_issue = dut.brissue
872 else:
873 yield dut.aluissue.insn_i.eq(1)
874 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
875 yield dut.alu_imm_i.eq(imm)
876 dut_issue = dut.aluissue
877 yield dut.reg_enable_i.eq(1)
878
879 # these indicate that the instruction is to be made shadow-dependent on
880 # (either) branch success or branch fail
881 yield dut.branch_fail_i.eq(branch_fail)
882 yield dut.branch_succ_i.eq(branch_success)
883
884 yield
885 yield from wait_for_issue(dut, dut_issue)
886
887
888 def print_reg(dut, rnums):
889 rs = []
890 for rnum in rnums:
891 reg = yield dut.intregs.regs[rnum].reg
892 rs.append("%x" % reg)
893 rnums = map(str, rnums)
894 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
895
896
897 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
898 insts = []
899 for i in range(n_ops):
900 src1 = randint(1, dut.n_regs-1)
901 src2 = randint(1, dut.n_regs-1)
902 imm = randint(1, (1<<dut.rwid)-1)
903 dest = randint(1, dut.n_regs-1)
904 op = randint(0, max_opnums)
905 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
906
907 if shadowing:
908 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
909 else:
910 insts.append((src1, src2, dest, op, opi, imm))
911 return insts
912
913
914 def wait_for_busy_clear(dut):
915 while True:
916 busy_o = yield dut.busy_o
917 if not busy_o:
918 break
919 print ("busy",)
920 yield
921
922 def disable_issue(dut):
923 yield dut.aluissue.insn_i.eq(0)
924 yield dut.brissue.insn_i.eq(0)
925
926
927 def wait_for_issue(dut, dut_issue):
928 while True:
929 issue_o = yield dut_issue.fn_issue_o
930 if issue_o:
931 yield from disable_issue(dut)
932 yield dut.reg_enable_i.eq(0)
933 break
934 print ("busy",)
935 #yield from print_reg(dut, [1,2,3])
936 yield
937 #yield from print_reg(dut, [1,2,3])
938
939 def scoreboard_branch_sim(dut, alusim):
940
941 iseed = 3
942
943 for i in range(1):
944
945 print ("rseed", iseed)
946 seed(iseed)
947 iseed += 1
948
949 yield dut.branch_direction_o.eq(0)
950
951 # set random values in the registers
952 for i in range(1, dut.n_regs):
953 val = 31+i*3
954 val = randint(0, (1<<alusim.rwidth)-1)
955 yield dut.intregs.regs[i].reg.eq(val)
956 alusim.setval(i, val)
957
958 if False:
959 # create some instructions: branches create a tree
960 insts = create_random_ops(dut, 1, True, 1)
961 #insts.append((6, 6, 1, 2, (0, 0)))
962 #insts.append((4, 3, 3, 0, (0, 0)))
963
964 src1 = randint(1, dut.n_regs-1)
965 src2 = randint(1, dut.n_regs-1)
966 #op = randint(4, 7)
967 op = 4 # only BGT at the moment
968
969 branch_ok = create_random_ops(dut, 1, True, 1)
970 branch_fail = create_random_ops(dut, 1, True, 1)
971
972 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
973
974 if True:
975 insts = []
976 insts.append( (3, 5, 2, 0, (0, 0)) )
977 branch_ok = []
978 branch_fail = []
979 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
980 branch_ok.append( None )
981 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
982 #branch_fail.append( None )
983 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
984
985 siminsts = deepcopy(insts)
986
987 # issue instruction(s)
988 i = -1
989 instrs = insts
990 branch_direction = 0
991 while instrs:
992 yield
993 yield
994 i += 1
995 branch_direction = yield dut.branch_direction_o # way branch went
996 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
997 if branch_direction == 1 and shadow_on:
998 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
999 continue # branch was "success" and this is a "failed"... skip
1000 if branch_direction == 2 and shadow_off:
1001 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1002 continue # branch was "fail" and this is a "success"... skip
1003 if branch_direction != 0:
1004 shadow_on = 0
1005 shadow_off = 0
1006 is_branch = op >= 4
1007 if is_branch:
1008 branch_ok, branch_fail = dest
1009 dest = src2
1010 # ok zip up the branch success / fail instructions and
1011 # drop them into the queue, one marked "to have branch success"
1012 # the other to be marked shadow branch "fail".
1013 # one out of each of these will be cancelled
1014 for ok, fl in zip(branch_ok, branch_fail):
1015 if ok:
1016 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1017 if fl:
1018 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1019 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1020 (i, src1, src2, dest, op, shadow_on, shadow_off))
1021 yield from int_instr(dut, op, src1, src2, dest,
1022 shadow_on, shadow_off)
1023
1024 # wait for all instructions to stop before checking
1025 yield
1026 yield from wait_for_busy_clear(dut)
1027
1028 i = -1
1029 while siminsts:
1030 instr = siminsts.pop(0)
1031 if instr is None:
1032 continue
1033 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1034 i += 1
1035 is_branch = op >= 4
1036 if is_branch:
1037 branch_ok, branch_fail = dest
1038 dest = src2
1039 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1040 (i, src1, src2, dest, op, shadow_on, shadow_off))
1041 branch_res = alusim.op(op, src1, src2, dest)
1042 if is_branch:
1043 if branch_res:
1044 siminsts += branch_ok
1045 else:
1046 siminsts += branch_fail
1047
1048 # check status
1049 yield from alusim.check(dut)
1050 yield from alusim.dump(dut)
1051
1052
1053 def scoreboard_sim(dut, alusim):
1054
1055 seed(0)
1056
1057 for i in range(1):
1058
1059 # set random values in the registers
1060 for i in range(1, dut.n_regs):
1061 val = randint(0, (1<<alusim.rwidth)-1)
1062 #val = 31+i*3
1063 #val = i
1064 yield dut.intregs.regs[i].reg.eq(val)
1065 alusim.setval(i, val)
1066
1067 # create some instructions (some random, some regression tests)
1068 instrs = []
1069 if True:
1070 instrs = create_random_ops(dut, 15, True, 4)
1071
1072 if False:
1073 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1074
1075 if False:
1076 instrs.append( (7, 3, 2, 4, (0, 0)) )
1077 instrs.append( (7, 6, 6, 2, (0, 0)) )
1078 instrs.append( (1, 7, 2, 2, (0, 0)) )
1079
1080 if False:
1081 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1082 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1083 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1084 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1085 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1086
1087 if False:
1088 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1089 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1090 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1091
1092 if False:
1093 instrs.append((5, 6, 2, 1))
1094 instrs.append((2, 2, 4, 0))
1095 #instrs.append((2, 2, 3, 1))
1096
1097 if False:
1098 instrs.append((2, 1, 2, 3))
1099
1100 if False:
1101 instrs.append((2, 6, 2, 1))
1102 instrs.append((2, 1, 2, 0))
1103
1104 if False:
1105 instrs.append((1, 2, 7, 2))
1106 instrs.append((7, 1, 5, 0))
1107 instrs.append((4, 4, 1, 1))
1108
1109 if False:
1110 instrs.append((5, 6, 2, 2))
1111 instrs.append((1, 1, 4, 1))
1112 instrs.append((6, 5, 3, 0))
1113
1114 if False:
1115 # Write-after-Write Hazard
1116 instrs.append( (3, 6, 7, 2) )
1117 instrs.append( (4, 4, 7, 1) )
1118
1119 if False:
1120 # self-read/write-after-write followed by Read-after-Write
1121 instrs.append((1, 1, 1, 1))
1122 instrs.append((1, 5, 3, 0))
1123
1124 if False:
1125 # Read-after-Write followed by self-read-after-write
1126 instrs.append((5, 6, 1, 2))
1127 instrs.append((1, 1, 1, 1))
1128
1129 if False:
1130 # self-read-write sandwich
1131 instrs.append((5, 6, 1, 2))
1132 instrs.append((1, 1, 1, 1))
1133 instrs.append((1, 5, 3, 0))
1134
1135 if False:
1136 # very weird failure
1137 instrs.append( (5, 2, 5, 2) )
1138 instrs.append( (2, 6, 3, 0) )
1139 instrs.append( (4, 2, 2, 1) )
1140
1141 if False:
1142 v1 = 4
1143 yield dut.intregs.regs[5].reg.eq(v1)
1144 alusim.setval(5, v1)
1145 yield dut.intregs.regs[3].reg.eq(5)
1146 alusim.setval(3, 5)
1147 instrs.append((5, 3, 3, 4, (0, 0)))
1148 instrs.append((4, 2, 1, 2, (0, 1)))
1149
1150 if False:
1151 v1 = 6
1152 yield dut.intregs.regs[5].reg.eq(v1)
1153 alusim.setval(5, v1)
1154 yield dut.intregs.regs[3].reg.eq(5)
1155 alusim.setval(3, 5)
1156 instrs.append((5, 3, 3, 4, (0, 0)))
1157 instrs.append((4, 2, 1, 2, (1, 0)))
1158
1159 if False:
1160 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1161 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1162 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1163 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1164 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1165 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1166 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1167 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1168 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1169
1170 # issue instruction(s), wait for issue to be free before proceeding
1171 for i, instr in enumerate(instrs):
1172 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1173
1174 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1175 (i, src1, src2, dest, op, opi, imm))
1176 alusim.op(op, opi, imm, src1, src2, dest)
1177 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1178 br_ok, br_fail)
1179
1180 # wait for all instructions to stop before checking
1181 while True:
1182 iqlen = yield dut.qlen_o
1183 if iqlen == 0:
1184 break
1185 yield
1186 yield
1187 yield
1188 yield
1189 yield
1190 yield from wait_for_busy_clear(dut)
1191
1192 # check status
1193 yield from alusim.check(dut)
1194 yield from alusim.dump(dut)
1195
1196
1197 def test_scoreboard():
1198 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1199 alusim = RegSim(16, 8)
1200 memsim = MemSim(16, 16)
1201 vl = rtlil.convert(dut, ports=dut.ports())
1202 with open("test_scoreboard6600.il", "w") as f:
1203 f.write(vl)
1204
1205 run_simulation(dut, scoreboard_sim(dut, alusim),
1206 vcd_name='test_scoreboard6600.vcd')
1207
1208 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1209 # vcd_name='test_scoreboard6600.vcd')
1210
1211
1212 if __name__ == '__main__':
1213 test_scoreboard()