c81c4273b1f886c010e8a62f981d38c3af7fa420
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13 from scoreboard.memfu import MemFunctionUnits
14
15 from compalu import ComputationUnitNoDelay
16 from compldst import LDSTCompUnit
17
18 from alu_hier import ALU, BranchALU
19 from nmutil.latch import SRLatch
20 from nmutil.nmoperator import eq
21
22 from random import randint, seed
23 from copy import deepcopy
24 from math import log
25
26
27 class TestMemory(Elaboratable):
28 def __init__(self, regwid, addrw):
29 self.ddepth = 1 # regwid //8
30 depth = (1<<addrw) // self.ddepth
31 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
32
33 def elaborate(self, platform):
34 m = Module()
35 m.submodules.rdport = self.rdport = self.mem.read_port()
36 m.submodules.wrport = self.wrport = self.mem.write_port()
37 return m
38
39
40 class MemSim:
41 def __init__(self, regwid, addrw):
42 self.regwid = regwid
43 self.ddepth = 1 # regwid//8
44 depth = (1<<addrw) // self.ddepth
45 self.mem = list(range(0, depth))
46
47 def ld(self, addr):
48 return self.mem[addr>>self.ddepth]
49
50 def st(self, addr, data):
51 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
52
53
54 class CompUnitsBase(Elaboratable):
55 """ Computation Unit Base class.
56
57 Amazingly, this class works recursively. It's supposed to just
58 look after some ALUs (that can handle the same operations),
59 grouping them together, however it turns out that the same code
60 can also group *groups* of Computation Units together as well.
61
62 Basically it was intended just to concatenate the ALU's issue,
63 go_rd etc. signals together, which start out as bits and become
64 sequences. Turns out that the same trick works just as well
65 on Computation Units!
66
67 So this class may be used recursively to present a top-level
68 sequential concatenation of all the signals in and out of
69 ALUs, whilst at the same time making it convenient to group
70 ALUs together.
71
72 At the lower level, the intent is that groups of (identical)
73 ALUs may be passed the same operation. Even beyond that,
74 the intent is that that group of (identical) ALUs actually
75 share the *same pipeline* and as such become a "Concurrent
76 Computation Unit" as defined by Mitch Alsup (see section
77 11.4.9.3)
78 """
79 def __init__(self, rwid, units, ldstmode=False):
80 """ Inputs:
81
82 * :rwid: bit width of register file(s) - both FP and INT
83 * :units: sequence of ALUs (or CompUnitsBase derivatives)
84 """
85 self.units = units
86 self.ldstmode = ldstmode
87 self.rwid = rwid
88 self.rwid = rwid
89 if units and isinstance(units[0], CompUnitsBase):
90 self.n_units = 0
91 for u in self.units:
92 self.n_units += u.n_units
93 else:
94 self.n_units = len(units)
95
96 n_units = self.n_units
97
98 # inputs
99 self.issue_i = Signal(n_units, reset_less=True)
100 self.go_rd_i = Signal(n_units, reset_less=True)
101 self.go_wr_i = Signal(n_units, reset_less=True)
102 self.shadown_i = Signal(n_units, reset_less=True)
103 self.go_die_i = Signal(n_units, reset_less=True)
104 if ldstmode:
105 self.go_ad_i = Signal(n_units, reset_less=True)
106 self.go_st_i = Signal(n_units, reset_less=True)
107
108 # outputs
109 self.busy_o = Signal(n_units, reset_less=True)
110 self.rd_rel_o = Signal(n_units, reset_less=True)
111 self.req_rel_o = Signal(n_units, reset_less=True)
112 if ldstmode:
113 self.ld_o = Signal(n_units, reset_less=True) # op is LD
114 self.st_o = Signal(n_units, reset_less=True) # op is ST
115 self.adr_rel_o = Signal(n_units, reset_less=True)
116 self.sto_rel_o = Signal(n_units, reset_less=True)
117 self.req_rel_o = Signal(n_units, reset_less=True)
118 self.load_mem_o = Signal(n_units, reset_less=True)
119 self.stwd_mem_o = Signal(n_units, reset_less=True)
120 self.addr_o = Signal(rwid, reset_less=True)
121
122 # in/out register data (note: not register#, actual data)
123 self.data_o = Signal(rwid, reset_less=True)
124 self.src1_i = Signal(rwid, reset_less=True)
125 self.src2_i = Signal(rwid, reset_less=True)
126 # input operand
127
128 def elaborate(self, platform):
129 m = Module()
130 comb = m.d.comb
131
132 for i, alu in enumerate(self.units):
133 setattr(m.submodules, "comp%d" % i, alu)
134
135 go_rd_l = []
136 go_wr_l = []
137 issue_l = []
138 busy_l = []
139 req_rel_l = []
140 rd_rel_l = []
141 shadow_l = []
142 godie_l = []
143 for alu in self.units:
144 req_rel_l.append(alu.req_rel_o)
145 rd_rel_l.append(alu.rd_rel_o)
146 shadow_l.append(alu.shadown_i)
147 godie_l.append(alu.go_die_i)
148 go_wr_l.append(alu.go_wr_i)
149 go_rd_l.append(alu.go_rd_i)
150 issue_l.append(alu.issue_i)
151 busy_l.append(alu.busy_o)
152 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
153 comb += self.req_rel_o.eq(Cat(*req_rel_l))
154 comb += self.busy_o.eq(Cat(*busy_l))
155 comb += Cat(*godie_l).eq(self.go_die_i)
156 comb += Cat(*shadow_l).eq(self.shadown_i)
157 comb += Cat(*go_wr_l).eq(self.go_wr_i)
158 comb += Cat(*go_rd_l).eq(self.go_rd_i)
159 comb += Cat(*issue_l).eq(self.issue_i)
160
161 # connect data register input/output
162
163 # merge (OR) all integer FU / ALU outputs to a single value
164 if self.units:
165 data_o = treereduce(self.units, "data_o")
166 comb += self.data_o.eq(data_o)
167 if self.ldstmode:
168 addr_o = treereduce(self.units, "addr_o")
169 comb += self.addr_o.eq(addr_o)
170
171 for i, alu in enumerate(self.units):
172 comb += alu.src1_i.eq(self.src1_i)
173 comb += alu.src2_i.eq(self.src2_i)
174
175 if not self.ldstmode:
176 return m
177
178 ldmem_l = []
179 stmem_l = []
180 go_ad_l = []
181 go_st_l = []
182 ld_l = []
183 st_l = []
184 adr_rel_l = []
185 sto_rel_l = []
186 for alu in self.units:
187 ld_l.append(alu.ld_o)
188 st_l.append(alu.st_o)
189 adr_rel_l.append(alu.adr_rel_o)
190 sto_rel_l.append(alu.sto_rel_o)
191 ldmem_l.append(alu.load_mem_o)
192 stmem_l.append(alu.stwd_mem_o)
193 go_ad_l.append(alu.go_ad_i)
194 go_st_l.append(alu.go_st_i)
195 comb += self.ld_o.eq(Cat(*ld_l))
196 comb += self.st_o.eq(Cat(*st_l))
197 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
198 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
199 comb += self.load_mem_o.eq(Cat(*ldmem_l))
200 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
201 comb += Cat(*go_ad_l).eq(self.go_ad_i)
202 comb += Cat(*go_st_l).eq(self.go_st_i)
203
204 return m
205
206
207 class CompUnitLDSTs(CompUnitsBase):
208
209 def __init__(self, rwid, opwid, n_ldsts, mem):
210 """ Inputs:
211
212 * :rwid: bit width of register file(s) - both FP and INT
213 * :opwid: operand bit width
214 """
215 self.opwid = opwid
216
217 # inputs
218 self.oper_i = Signal(opwid, reset_less=True)
219 self.imm_i = Signal(rwid, reset_less=True)
220
221 # Int ALUs
222 self.alus = []
223 for i in range(n_ldsts):
224 self.alus.append(ALU(rwid))
225
226 units = []
227 for alu in self.alus:
228 aluopwid = 4 # see compldst.py for "internal" opcode
229 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
230
231 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
232
233 def elaborate(self, platform):
234 m = CompUnitsBase.elaborate(self, platform)
235 comb = m.d.comb
236
237 # hand the same operation to all units, 4 lower bits though
238 for alu in self.units:
239 comb += alu.oper_i[0:4].eq(self.oper_i)
240 comb += alu.imm_i.eq(self.imm_i)
241 comb += alu.isalu_i.eq(0)
242
243 return m
244
245
246 class CompUnitALUs(CompUnitsBase):
247
248 def __init__(self, rwid, opwid, n_alus):
249 """ Inputs:
250
251 * :rwid: bit width of register file(s) - both FP and INT
252 * :opwid: operand bit width
253 """
254 self.opwid = opwid
255
256 # inputs
257 self.oper_i = Signal(opwid, reset_less=True)
258 self.imm_i = Signal(rwid, reset_less=True)
259
260 # Int ALUs
261 alus = []
262 for i in range(n_alus):
263 alus.append(ALU(rwid))
264
265 units = []
266 for alu in alus:
267 aluopwid = 3 # extra bit for immediate mode
268 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
269
270 CompUnitsBase.__init__(self, rwid, units)
271
272 def elaborate(self, platform):
273 m = CompUnitsBase.elaborate(self, platform)
274 comb = m.d.comb
275
276 # hand the same operation to all units, only lower 3 bits though
277 for alu in self.units:
278 comb += alu.oper_i[0:3].eq(self.oper_i)
279 comb += alu.imm_i.eq(self.imm_i)
280
281 return m
282
283
284 class CompUnitBR(CompUnitsBase):
285
286 def __init__(self, rwid, opwid):
287 """ Inputs:
288
289 * :rwid: bit width of register file(s) - both FP and INT
290 * :opwid: operand bit width
291
292 Note: bgt unit is returned so that a shadow unit can be created
293 for it
294 """
295 self.opwid = opwid
296
297 # inputs
298 self.oper_i = Signal(opwid, reset_less=True)
299 self.imm_i = Signal(rwid, reset_less=True)
300
301 # Branch ALU and CU
302 self.bgt = BranchALU(rwid)
303 aluopwid = 3 # extra bit for immediate mode
304 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
305 CompUnitsBase.__init__(self, rwid, [self.br1])
306
307 def elaborate(self, platform):
308 m = CompUnitsBase.elaborate(self, platform)
309 comb = m.d.comb
310
311 # hand the same operation to all units
312 for alu in self.units:
313 comb += alu.oper_i.eq(self.oper_i)
314 comb += alu.imm_i.eq(self.imm_i)
315
316 return m
317
318
319 class FunctionUnits(Elaboratable):
320
321 def __init__(self, n_regs, n_int_alus):
322 self.n_regs = n_regs
323 self.n_int_alus = n_int_alus
324
325 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
326 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
327 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
328
329 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
330 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
331
332 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
333 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
334 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
335
336 self.readable_o = Signal(n_int_alus, reset_less=True)
337 self.writable_o = Signal(n_int_alus, reset_less=True)
338
339 self.go_rd_i = Signal(n_int_alus, reset_less=True)
340 self.go_wr_i = Signal(n_int_alus, reset_less=True)
341 self.go_die_i = Signal(n_int_alus, reset_less=True)
342 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
343
344 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
345
346 def elaborate(self, platform):
347 m = Module()
348 comb = m.d.comb
349 sync = m.d.sync
350
351 n_intfus = self.n_int_alus
352
353 # Integer FU-FU Dep Matrix
354 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
355 m.submodules.intfudeps = intfudeps
356 # Integer FU-Reg Dep Matrix
357 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
358 m.submodules.intregdeps = intregdeps
359
360 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
361 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
362
363 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
364 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
365
366 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
367 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
368 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
369
370 comb += intfudeps.issue_i.eq(self.fn_issue_i)
371 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
372 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
373 comb += intfudeps.go_die_i.eq(self.go_die_i)
374 comb += self.readable_o.eq(intfudeps.readable_o)
375 comb += self.writable_o.eq(intfudeps.writable_o)
376
377 # Connect function issue / arrays, and dest/src1/src2
378 comb += intregdeps.dest_i.eq(self.dest_i)
379 comb += intregdeps.src_i[0].eq(self.src1_i)
380 comb += intregdeps.src_i[1].eq(self.src2_i)
381
382 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
383 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
384 comb += intregdeps.go_die_i.eq(self.go_die_i)
385 comb += intregdeps.issue_i.eq(self.fn_issue_i)
386
387 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
388 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
389 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
390
391 return m
392
393
394 class Scoreboard(Elaboratable):
395 def __init__(self, rwid, n_regs):
396 """ Inputs:
397
398 * :rwid: bit width of register file(s) - both FP and INT
399 * :n_regs: depth of register file(s) - number of FP and INT regs
400 """
401 self.rwid = rwid
402 self.n_regs = n_regs
403
404 # Register Files
405 self.intregs = RegFileArray(rwid, n_regs)
406 self.fpregs = RegFileArray(rwid, n_regs)
407
408 # Memory (test for now)
409 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
410
411 # issue q needs to get at these
412 self.aluissue = IssueUnitGroup(2)
413 self.lsissue = IssueUnitGroup(2)
414 self.brissue = IssueUnitGroup(1)
415 # and these
416 self.alu_oper_i = Signal(4, reset_less=True)
417 self.alu_imm_i = Signal(rwid, reset_less=True)
418 self.br_oper_i = Signal(4, reset_less=True)
419 self.br_imm_i = Signal(rwid, reset_less=True)
420 self.ls_oper_i = Signal(4, reset_less=True)
421 self.ls_imm_i = Signal(rwid, reset_less=True)
422
423 # inputs
424 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
425 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
426 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
427 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
428
429 # outputs
430 self.issue_o = Signal(reset_less=True) # instruction was accepted
431 self.busy_o = Signal(reset_less=True) # at least one CU is busy
432
433 # for branch speculation experiment. branch_direction = 0 if
434 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
435 # branch_succ and branch_fail are requests to have the current
436 # instruction be dependent on the branch unit "shadow" capability.
437 self.branch_succ_i = Signal(reset_less=True)
438 self.branch_fail_i = Signal(reset_less=True)
439 self.branch_direction_o = Signal(2, reset_less=True)
440
441 def elaborate(self, platform):
442 m = Module()
443 comb = m.d.comb
444 sync = m.d.sync
445
446 m.submodules.intregs = self.intregs
447 m.submodules.fpregs = self.fpregs
448 m.submodules.mem = mem = self.mem
449
450 # register ports
451 int_dest = self.intregs.write_port("dest")
452 int_src1 = self.intregs.read_port("src1")
453 int_src2 = self.intregs.read_port("src2")
454
455 fp_dest = self.fpregs.write_port("dest")
456 fp_src1 = self.fpregs.read_port("src1")
457 fp_src2 = self.fpregs.read_port("src2")
458
459 # Int ALUs and BR ALUs
460 n_int_alus = 5
461 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
462 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
463
464 # LDST Comp Units
465 n_ldsts = 2
466 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
467
468 # Comp Units
469 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
470 bgt = cub.bgt # get at the branch computation unit
471 br1 = cub.br1
472
473 # Int FUs
474 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
475
476 # Memory FUs
477 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
478
479 # Memory Priority Picker 1: one gateway per memory port
480 mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
481 m.submodules.mempick1 = mempick1
482
483 # Count of number of FUs
484 n_intfus = n_int_alus
485 n_fp_fus = 0 # for now
486
487 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
488 intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
489 m.submodules.intpick1 = intpick1
490
491 # INT/FP Issue Unit
492 regdecode = RegDecode(self.n_regs)
493 m.submodules.regdecode = regdecode
494 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
495 m.submodules.issueunit = issueunit
496
497 # Shadow Matrix. currently n_intfus shadows, to be used for
498 # write-after-write hazards. NOTE: there is one extra for branches,
499 # so the shadow width is increased by 1
500 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
501 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
502
503 # record previous instruction to cast shadow on current instruction
504 prev_shadow = Signal(n_intfus)
505
506 # Branch Speculation recorder. tracks the success/fail state as
507 # each instruction is issued, so that when the branch occurs the
508 # allow/cancel can be issued as appropriate.
509 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
510
511 #---------
512 # ok start wiring things together...
513 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
514 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
515 #---------
516
517 #---------
518 # Issue Unit is where it starts. set up some in/outs for this module
519 #---------
520 comb += [ regdecode.dest_i.eq(self.int_dest_i),
521 regdecode.src1_i.eq(self.int_src1_i),
522 regdecode.src2_i.eq(self.int_src2_i),
523 regdecode.enable_i.eq(self.reg_enable_i),
524 self.issue_o.eq(issueunit.issue_o)
525 ]
526
527 # take these to outside (issue needs them)
528 comb += cua.oper_i.eq(self.alu_oper_i)
529 comb += cua.imm_i.eq(self.alu_imm_i)
530 comb += cub.oper_i.eq(self.br_oper_i)
531 comb += cub.imm_i.eq(self.br_imm_i)
532 comb += cul.oper_i.eq(self.ls_oper_i)
533 comb += cul.imm_i.eq(self.ls_imm_i)
534
535 # TODO: issueunit.f (FP)
536
537 # and int function issue / busy arrays, and dest/src1/src2
538 comb += intfus.dest_i.eq(regdecode.dest_o)
539 comb += intfus.src1_i.eq(regdecode.src1_o)
540 comb += intfus.src2_i.eq(regdecode.src2_o)
541
542 fn_issue_o = issueunit.fn_issue_o
543
544 comb += intfus.fn_issue_i.eq(fn_issue_o)
545 comb += issueunit.busy_i.eq(cu.busy_o)
546 comb += self.busy_o.eq(cu.busy_o.bool())
547
548 #---------
549 # Memory Function Unit
550 #---------
551 reset_b = Signal(cul.n_units, reset_less=True)
552 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
553
554 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
555 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
556 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
557
558 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
559 # in a transitive fashion). This cycle activates based on LDSTCompUnit
560 # issue_i. multi-issue gets a bit more complex but not a lot.
561 prior_ldsts = Signal(cul.n_units, reset_less=True)
562 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
563 with m.If(self.ls_oper_i[2]): # LD bit of operand
564 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
565 with m.If(self.ls_oper_i[3]): # ST bit of operand
566 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
567
568 # TODO: adr_rel_o needs to go into L1 Cache. for now,
569 # just immediately activate go_adr
570 comb += cul.go_ad_i.eq(cul.adr_rel_o)
571
572 # connect up address data
573 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
574 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
575
576 # connect loadable / storable to go_ld/go_st.
577 # XXX should only be done when the memory ld/st has actually happened!
578 go_st_i = Signal(cul.n_units, reset_less=True)
579 go_ld_i = Signal(cul.n_units, reset_less=True)
580 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
581 cul.req_rel_o & cul.ld_o)
582 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
583 cul.sto_rel_o & cul.st_o)
584 comb += memfus.go_ld_i.eq(go_ld_i)
585 comb += memfus.go_st_i.eq(go_st_i)
586 #comb += cul.go_wr_i.eq(go_ld_i)
587 comb += cul.go_st_i.eq(go_st_i)
588
589 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
590 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
591 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
592
593 #---------
594 # merge shadow matrices outputs
595 #---------
596
597 # these are explained in ShadowMatrix docstring, and are to be
598 # connected to the FUReg and FUFU Matrices, to get them to reset
599 anydie = Signal(n_intfus, reset_less=True)
600 allshadown = Signal(n_intfus, reset_less=True)
601 shreset = Signal(n_intfus, reset_less=True)
602 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
603 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
604 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
605
606 #---------
607 # connect fu-fu matrix
608 #---------
609
610 # Group Picker... done manually for now.
611 go_rd_o = intpick1.go_rd_o
612 go_wr_o = intpick1.go_wr_o
613 go_rd_i = intfus.go_rd_i
614 go_wr_i = intfus.go_wr_i
615 go_die_i = intfus.go_die_i
616 # NOTE: connect to the shadowed versions so that they can "die" (reset)
617 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
618 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
619 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
620
621 # Connect Picker
622 #---------
623 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
624 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
625 int_rd_o = intfus.readable_o
626 int_wr_o = intfus.writable_o
627 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
628 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
629
630 #---------
631 # Shadow Matrix
632 #---------
633
634 comb += shadows.issue_i.eq(fn_issue_o)
635 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
636 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
637 #---------
638 # NOTE; this setup is for the instruction order preservation...
639
640 # connect shadows / go_dies to Computation Units
641 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
642 comb += cu.go_die_i[0:n_intfus].eq(anydie)
643
644 # ok connect first n_int_fu shadows to busy lines, to create an
645 # instruction-order linked-list-like arrangement, using a bit-matrix
646 # (instead of e.g. a ring buffer).
647
648 # when written, the shadow can be cancelled (and was good)
649 for i in range(n_intfus):
650 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
651
652 # *previous* instruction shadows *current* instruction, and, obviously,
653 # if the previous is completed (!busy) don't cast the shadow!
654 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
655 for i in range(n_intfus):
656 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
657
658 #---------
659 # ... and this is for branch speculation. it uses the extra bit
660 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
661 # only needs to set shadow_i, s_fail_i and s_good_i
662
663 # issue captures shadow_i (if enabled)
664 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
665
666 bactive = Signal(reset_less=True)
667 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
668
669 # instruction being issued (fn_issue_o) has a shadow cast by the branch
670 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
671 comb += bshadow.issue_i.eq(fn_issue_o)
672 for i in range(n_intfus):
673 with m.If(fn_issue_o & (Const(1<<i))):
674 comb += bshadow.shadow_i[i][0].eq(1)
675
676 # finally, we need an indicator to the test infrastructure as to
677 # whether the branch succeeded or failed, plus, link up to the
678 # "recorder" of whether the instruction was under shadow or not
679
680 with m.If(br1.issue_i):
681 sync += bspec.active_i.eq(1)
682 with m.If(self.branch_succ_i):
683 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
684 with m.If(self.branch_fail_i):
685 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
686
687 # branch is active (TODO: a better signal: this is over-using the
688 # go_write signal - actually the branch should not be "writing")
689 with m.If(br1.go_wr_i):
690 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
691 sync += bspec.active_i.eq(0)
692 comb += bspec.br_i.eq(1)
693 # branch occurs if data == 1, failed if data == 0
694 comb += bspec.br_ok_i.eq(br1.data_o == 1)
695 for i in range(n_intfus):
696 # *expected* direction of the branch matched against *actual*
697 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
698 # ... or it didn't
699 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
700
701 #---------
702 # Connect Register File(s)
703 #---------
704 comb += int_dest.wen.eq(intfus.dest_rsel_o)
705 comb += int_src1.ren.eq(intfus.src1_rsel_o)
706 comb += int_src2.ren.eq(intfus.src2_rsel_o)
707
708 # connect ALUs to regfule
709 comb += int_dest.data_i.eq(cu.data_o)
710 comb += cu.src1_i.eq(int_src1.data_o)
711 comb += cu.src2_i.eq(int_src2.data_o)
712
713 # connect ALU Computation Units
714 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
715 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
716 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
717
718 return m
719
720 def __iter__(self):
721 yield from self.intregs
722 yield from self.fpregs
723 yield self.int_dest_i
724 yield self.int_src1_i
725 yield self.int_src2_i
726 yield self.issue_o
727 yield self.branch_succ_i
728 yield self.branch_fail_i
729 yield self.branch_direction_o
730
731 def ports(self):
732 return list(self)
733
734
735 class IssueToScoreboard(Elaboratable):
736
737 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
738 self.qlen = qlen
739 self.n_in = n_in
740 self.n_out = n_out
741 self.rwid = rwid
742 self.opw = opwid
743 self.n_regs = n_regs
744
745 mqbits = (int(log(qlen) / log(2))+2, False)
746 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
747 self.p_ready_o = Signal() # instructions were added
748 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
749
750 self.busy_o = Signal(reset_less=True) # at least one CU is busy
751 self.qlen_o = Signal(mqbits, reset_less=True)
752
753 def elaborate(self, platform):
754 m = Module()
755 comb = m.d.comb
756 sync = m.d.sync
757
758 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
759 sc = Scoreboard(self.rwid, self.n_regs)
760 m.submodules.iq = iq
761 m.submodules.sc = sc
762
763 # get at the regfile for testing
764 self.intregs = sc.intregs
765
766 # and the "busy" signal and instruction queue length
767 comb += self.busy_o.eq(sc.busy_o)
768 comb += self.qlen_o.eq(iq.qlen_o)
769
770 # link up instruction queue
771 comb += iq.p_add_i.eq(self.p_add_i)
772 comb += self.p_ready_o.eq(iq.p_ready_o)
773 for i in range(self.n_in):
774 comb += eq(iq.data_i[i], self.data_i[i])
775
776 # take instruction and process it. note that it's possible to
777 # "inspect" the queue contents *without* actually removing the
778 # items. items are only removed when the
779
780 # in "waiting" state
781 wait_issue_br = Signal()
782 wait_issue_alu = Signal()
783 wait_issue_ls = Signal()
784
785 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
786 # set instruction pop length to 1 if the unit accepted
787 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
788 with m.If(iq.qlen_o != 0):
789 comb += iq.n_sub_i.eq(1)
790 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
791 with m.If(iq.qlen_o != 0):
792 comb += iq.n_sub_i.eq(1)
793 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
794 with m.If(iq.qlen_o != 0):
795 comb += iq.n_sub_i.eq(1)
796
797 # see if some instruction(s) are here. note that this is
798 # "inspecting" the in-place queue. note also that on the
799 # cycle following "waiting" for fn_issue_o to be set, the
800 # "resetting" done above (insn_i=0) could be re-ASSERTed.
801 with m.If(iq.qlen_o != 0):
802 # get the operands and operation
803 imm = iq.data_o[0].imm_i
804 dest = iq.data_o[0].dest_i
805 src1 = iq.data_o[0].src1_i
806 src2 = iq.data_o[0].src2_i
807 op = iq.data_o[0].oper_i
808 opi = iq.data_o[0].opim_i # immediate set
809
810 # set the src/dest regs
811 comb += sc.int_dest_i.eq(dest)
812 comb += sc.int_src1_i.eq(src1)
813 comb += sc.int_src2_i.eq(src2)
814 comb += sc.reg_enable_i.eq(1) # enable the regfile
815
816 # choose a Function-Unit-Group
817 with m.If((op & (0x3<<2)) != 0): # branch
818 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
819 comb += sc.br_imm_i.eq(imm)
820 comb += sc.brissue.insn_i.eq(1)
821 comb += wait_issue_br.eq(1)
822 with m.Elif((op & (0x3<<4)) != 0): # ld/st
823 # see compldst.py
824 # bit 0: ADD/SUB
825 # bit 1: immed
826 # bit 4: LD
827 # bit 5: ST
828 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
829 comb += sc.ls_imm_i.eq(imm)
830 comb += sc.lsissue.insn_i.eq(1)
831 comb += wait_issue_ls.eq(1)
832 with m.Else(): # alu
833 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
834 comb += sc.alu_imm_i.eq(imm)
835 comb += sc.aluissue.insn_i.eq(1)
836 comb += wait_issue_alu.eq(1)
837
838 # XXX TODO
839 # these indicate that the instruction is to be made
840 # shadow-dependent on
841 # (either) branch success or branch fail
842 #yield sc.branch_fail_i.eq(branch_fail)
843 #yield sc.branch_succ_i.eq(branch_success)
844
845 return m
846
847 def __iter__(self):
848 yield self.p_ready_o
849 for o in self.data_i:
850 yield from list(o)
851 yield self.p_add_i
852
853 def ports(self):
854 return list(self)
855
856
857 IADD = 0
858 ISUB = 1
859 IMUL = 2
860 ISHF = 3
861 IBGT = 4
862 IBLT = 5
863 IBEQ = 6
864 IBNE = 7
865
866
867 class RegSim:
868 def __init__(self, rwidth, nregs):
869 self.rwidth = rwidth
870 self.regs = [0] * nregs
871
872 def op(self, op, op_imm, imm, src1, src2, dest):
873 maxbits = (1 << self.rwidth) - 1
874 src1 = self.regs[src1] & maxbits
875 if op_imm:
876 src2 = imm
877 else:
878 src2 = self.regs[src2] & maxbits
879 if op == IADD:
880 val = src1 + src2
881 elif op == ISUB:
882 val = src1 - src2
883 elif op == IMUL:
884 val = src1 * src2
885 elif op == ISHF:
886 val = src1 >> (src2 & maxbits)
887 elif op == IBGT:
888 val = int(src1 > src2)
889 elif op == IBLT:
890 val = int(src1 < src2)
891 elif op == IBEQ:
892 val = int(src1 == src2)
893 elif op == IBNE:
894 val = int(src1 != src2)
895 else:
896 return 0 # LD/ST TODO
897 val &= maxbits
898 self.setval(dest, val)
899 return val
900
901 def setval(self, dest, val):
902 print ("sim setval", dest, hex(val))
903 self.regs[dest] = val
904
905 def dump(self, dut):
906 for i, val in enumerate(self.regs):
907 reg = yield dut.intregs.regs[i].reg
908 okstr = "OK" if reg == val else "!ok"
909 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
910
911 def check(self, dut):
912 for i, val in enumerate(self.regs):
913 reg = yield dut.intregs.regs[i].reg
914 if reg != val:
915 print("reg %d expected %x received %x\n" % (i, val, reg))
916 yield from self.dump(dut)
917 assert False
918
919 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
920 branch_success, branch_fail):
921 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
922 'src1_i': src1, 'src2_i': src2}]
923
924 sendlen = 1
925 for idx in range(sendlen):
926 yield from eq(dut.data_i[idx], instrs[idx])
927 di = yield dut.data_i[idx]
928 print ("senddata %d %x" % (idx, di))
929 yield dut.p_add_i.eq(sendlen)
930 yield
931 o_p_ready = yield dut.p_ready_o
932 while not o_p_ready:
933 yield
934 o_p_ready = yield dut.p_ready_o
935
936 yield dut.p_add_i.eq(0)
937
938
939 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
940 yield from disable_issue(dut)
941 yield dut.int_dest_i.eq(dest)
942 yield dut.int_src1_i.eq(src1)
943 yield dut.int_src2_i.eq(src2)
944 if (op & (0x3<<2)) != 0: # branch
945 yield dut.brissue.insn_i.eq(1)
946 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
947 yield dut.br_imm_i.eq(imm)
948 dut_issue = dut.brissue
949 else:
950 yield dut.aluissue.insn_i.eq(1)
951 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
952 yield dut.alu_imm_i.eq(imm)
953 dut_issue = dut.aluissue
954 yield dut.reg_enable_i.eq(1)
955
956 # these indicate that the instruction is to be made shadow-dependent on
957 # (either) branch success or branch fail
958 yield dut.branch_fail_i.eq(branch_fail)
959 yield dut.branch_succ_i.eq(branch_success)
960
961 yield
962 yield from wait_for_issue(dut, dut_issue)
963
964
965 def print_reg(dut, rnums):
966 rs = []
967 for rnum in rnums:
968 reg = yield dut.intregs.regs[rnum].reg
969 rs.append("%x" % reg)
970 rnums = map(str, rnums)
971 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
972
973
974 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
975 insts = []
976 for i in range(n_ops):
977 src1 = randint(1, dut.n_regs-1)
978 src2 = randint(1, dut.n_regs-1)
979 imm = randint(1, (1<<dut.rwid)-1)
980 dest = randint(1, dut.n_regs-1)
981 op = randint(0, max_opnums)
982 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
983
984 if shadowing:
985 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
986 else:
987 insts.append((src1, src2, dest, op, opi, imm))
988 return insts
989
990
991 def wait_for_busy_clear(dut):
992 while True:
993 busy_o = yield dut.busy_o
994 if not busy_o:
995 break
996 print ("busy",)
997 yield
998
999 def disable_issue(dut):
1000 yield dut.aluissue.insn_i.eq(0)
1001 yield dut.brissue.insn_i.eq(0)
1002 yield dut.lsissue.insn_i.eq(0)
1003
1004
1005 def wait_for_issue(dut, dut_issue):
1006 while True:
1007 issue_o = yield dut_issue.fn_issue_o
1008 if issue_o:
1009 yield from disable_issue(dut)
1010 yield dut.reg_enable_i.eq(0)
1011 break
1012 print ("busy",)
1013 #yield from print_reg(dut, [1,2,3])
1014 yield
1015 #yield from print_reg(dut, [1,2,3])
1016
1017 def scoreboard_branch_sim(dut, alusim):
1018
1019 iseed = 3
1020
1021 for i in range(1):
1022
1023 print ("rseed", iseed)
1024 seed(iseed)
1025 iseed += 1
1026
1027 yield dut.branch_direction_o.eq(0)
1028
1029 # set random values in the registers
1030 for i in range(1, dut.n_regs):
1031 val = 31+i*3
1032 val = randint(0, (1<<alusim.rwidth)-1)
1033 yield dut.intregs.regs[i].reg.eq(val)
1034 alusim.setval(i, val)
1035
1036 if False:
1037 # create some instructions: branches create a tree
1038 insts = create_random_ops(dut, 1, True, 1)
1039 #insts.append((6, 6, 1, 2, (0, 0)))
1040 #insts.append((4, 3, 3, 0, (0, 0)))
1041
1042 src1 = randint(1, dut.n_regs-1)
1043 src2 = randint(1, dut.n_regs-1)
1044 #op = randint(4, 7)
1045 op = 4 # only BGT at the moment
1046
1047 branch_ok = create_random_ops(dut, 1, True, 1)
1048 branch_fail = create_random_ops(dut, 1, True, 1)
1049
1050 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1051
1052 if True:
1053 insts = []
1054 insts.append( (3, 5, 2, 0, (0, 0)) )
1055 branch_ok = []
1056 branch_fail = []
1057 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1058 branch_ok.append( None )
1059 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1060 #branch_fail.append( None )
1061 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1062
1063 siminsts = deepcopy(insts)
1064
1065 # issue instruction(s)
1066 i = -1
1067 instrs = insts
1068 branch_direction = 0
1069 while instrs:
1070 yield
1071 yield
1072 i += 1
1073 branch_direction = yield dut.branch_direction_o # way branch went
1074 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1075 if branch_direction == 1 and shadow_on:
1076 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1077 continue # branch was "success" and this is a "failed"... skip
1078 if branch_direction == 2 and shadow_off:
1079 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1080 continue # branch was "fail" and this is a "success"... skip
1081 if branch_direction != 0:
1082 shadow_on = 0
1083 shadow_off = 0
1084 is_branch = op >= 4
1085 if is_branch:
1086 branch_ok, branch_fail = dest
1087 dest = src2
1088 # ok zip up the branch success / fail instructions and
1089 # drop them into the queue, one marked "to have branch success"
1090 # the other to be marked shadow branch "fail".
1091 # one out of each of these will be cancelled
1092 for ok, fl in zip(branch_ok, branch_fail):
1093 if ok:
1094 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1095 if fl:
1096 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1097 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1098 (i, src1, src2, dest, op, shadow_on, shadow_off))
1099 yield from int_instr(dut, op, src1, src2, dest,
1100 shadow_on, shadow_off)
1101
1102 # wait for all instructions to stop before checking
1103 yield
1104 yield from wait_for_busy_clear(dut)
1105
1106 i = -1
1107 while siminsts:
1108 instr = siminsts.pop(0)
1109 if instr is None:
1110 continue
1111 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1112 i += 1
1113 is_branch = op >= 4
1114 if is_branch:
1115 branch_ok, branch_fail = dest
1116 dest = src2
1117 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1118 (i, src1, src2, dest, op, shadow_on, shadow_off))
1119 branch_res = alusim.op(op, src1, src2, dest)
1120 if is_branch:
1121 if branch_res:
1122 siminsts += branch_ok
1123 else:
1124 siminsts += branch_fail
1125
1126 # check status
1127 yield from alusim.check(dut)
1128 yield from alusim.dump(dut)
1129
1130
1131 def scoreboard_sim(dut, alusim):
1132
1133 seed(0)
1134
1135 for i in range(1):
1136
1137 # set random values in the registers
1138 for i in range(1, dut.n_regs):
1139 val = randint(0, (1<<alusim.rwidth)-1)
1140 #val = 31+i*3
1141 #val = i
1142 yield dut.intregs.regs[i].reg.eq(val)
1143 alusim.setval(i, val)
1144
1145 # create some instructions (some random, some regression tests)
1146 instrs = []
1147 if False:
1148 instrs = create_random_ops(dut, 15, True, 4)
1149
1150 if True: # LD/ST test (with immediate)
1151 instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1152 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1153
1154 if False:
1155 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1156
1157 if False:
1158 instrs.append( (7, 3, 2, 4, (0, 0)) )
1159 instrs.append( (7, 6, 6, 2, (0, 0)) )
1160 instrs.append( (1, 7, 2, 2, (0, 0)) )
1161
1162 if False:
1163 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1164 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1165 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1166 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1167 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1168
1169 if False:
1170 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1171 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1172 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1173
1174 if False:
1175 instrs.append((5, 6, 2, 1))
1176 instrs.append((2, 2, 4, 0))
1177 #instrs.append((2, 2, 3, 1))
1178
1179 if False:
1180 instrs.append((2, 1, 2, 3))
1181
1182 if False:
1183 instrs.append((2, 6, 2, 1))
1184 instrs.append((2, 1, 2, 0))
1185
1186 if False:
1187 instrs.append((1, 2, 7, 2))
1188 instrs.append((7, 1, 5, 0))
1189 instrs.append((4, 4, 1, 1))
1190
1191 if False:
1192 instrs.append((5, 6, 2, 2))
1193 instrs.append((1, 1, 4, 1))
1194 instrs.append((6, 5, 3, 0))
1195
1196 if False:
1197 # Write-after-Write Hazard
1198 instrs.append( (3, 6, 7, 2) )
1199 instrs.append( (4, 4, 7, 1) )
1200
1201 if False:
1202 # self-read/write-after-write followed by Read-after-Write
1203 instrs.append((1, 1, 1, 1))
1204 instrs.append((1, 5, 3, 0))
1205
1206 if False:
1207 # Read-after-Write followed by self-read-after-write
1208 instrs.append((5, 6, 1, 2))
1209 instrs.append((1, 1, 1, 1))
1210
1211 if False:
1212 # self-read-write sandwich
1213 instrs.append((5, 6, 1, 2))
1214 instrs.append((1, 1, 1, 1))
1215 instrs.append((1, 5, 3, 0))
1216
1217 if False:
1218 # very weird failure
1219 instrs.append( (5, 2, 5, 2) )
1220 instrs.append( (2, 6, 3, 0) )
1221 instrs.append( (4, 2, 2, 1) )
1222
1223 if False:
1224 v1 = 4
1225 yield dut.intregs.regs[5].reg.eq(v1)
1226 alusim.setval(5, v1)
1227 yield dut.intregs.regs[3].reg.eq(5)
1228 alusim.setval(3, 5)
1229 instrs.append((5, 3, 3, 4, (0, 0)))
1230 instrs.append((4, 2, 1, 2, (0, 1)))
1231
1232 if False:
1233 v1 = 6
1234 yield dut.intregs.regs[5].reg.eq(v1)
1235 alusim.setval(5, v1)
1236 yield dut.intregs.regs[3].reg.eq(5)
1237 alusim.setval(3, 5)
1238 instrs.append((5, 3, 3, 4, (0, 0)))
1239 instrs.append((4, 2, 1, 2, (1, 0)))
1240
1241 if False:
1242 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1243 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1244 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1245 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1246 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1247 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1248 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1249 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1250 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1251
1252 # issue instruction(s), wait for issue to be free before proceeding
1253 for i, instr in enumerate(instrs):
1254 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1255
1256 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1257 (i, src1, src2, dest, op, opi, imm))
1258 alusim.op(op, opi, imm, src1, src2, dest)
1259 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1260 br_ok, br_fail)
1261
1262 # wait for all instructions to stop before checking
1263 while True:
1264 iqlen = yield dut.qlen_o
1265 if iqlen == 0:
1266 break
1267 yield
1268 yield
1269 yield
1270 yield
1271 yield
1272 yield from wait_for_busy_clear(dut)
1273
1274 # check status
1275 yield from alusim.check(dut)
1276 yield from alusim.dump(dut)
1277
1278
1279 def test_scoreboard():
1280 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1281 alusim = RegSim(16, 8)
1282 memsim = MemSim(16, 16)
1283 vl = rtlil.convert(dut, ports=dut.ports())
1284 with open("test_scoreboard6600.il", "w") as f:
1285 f.write(vl)
1286
1287 run_simulation(dut, scoreboard_sim(dut, alusim),
1288 vcd_name='test_scoreboard6600.vcd')
1289
1290 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1291 # vcd_name='test_scoreboard6600.vcd')
1292
1293
1294 if __name__ == '__main__':
1295 test_scoreboard()