move mem into scoreboard (really should be outside, as should regfile)
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13 from scoreboard.memfu import MemFunctionUnits
14
15 from compalu import ComputationUnitNoDelay
16 from compldst import LDSTCompUnit
17
18 from alu_hier import ALU, BranchALU
19 from nmutil.latch import SRLatch
20 from nmutil.nmoperator import eq
21
22 from random import randint, seed
23 from copy import deepcopy
24 from math import log
25
26
27 class TestMemory(Elaboratable):
28 def __init__(self, regwid, addrw):
29 self.ddepth = 1 # regwid //8
30 depth = (1<<addrw) // self.ddepth
31 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
32
33 def elaborate(self, platform):
34 m = Module()
35 m.submodules.rdport = self.rdport = self.mem.read_port()
36 m.submodules.wrport = self.wrport = self.mem.write_port()
37 return m
38
39
40 class MemSim:
41 def __init__(self, regwid, addrw):
42 self.regwid = regwid
43 self.ddepth = 1 # regwid//8
44 depth = (1<<addrw) // self.ddepth
45 self.mem = list(range(0, depth))
46
47 def ld(self, addr):
48 return self.mem[addr>>self.ddepth]
49
50 def st(self, addr, data):
51 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
52
53
54 class CompUnitsBase(Elaboratable):
55 """ Computation Unit Base class.
56
57 Amazingly, this class works recursively. It's supposed to just
58 look after some ALUs (that can handle the same operations),
59 grouping them together, however it turns out that the same code
60 can also group *groups* of Computation Units together as well.
61
62 Basically it was intended just to concatenate the ALU's issue,
63 go_rd etc. signals together, which start out as bits and become
64 sequences. Turns out that the same trick works just as well
65 on Computation Units!
66
67 So this class may be used recursively to present a top-level
68 sequential concatenation of all the signals in and out of
69 ALUs, whilst at the same time making it convenient to group
70 ALUs together.
71
72 At the lower level, the intent is that groups of (identical)
73 ALUs may be passed the same operation. Even beyond that,
74 the intent is that that group of (identical) ALUs actually
75 share the *same pipeline* and as such become a "Concurrent
76 Computation Unit" as defined by Mitch Alsup (see section
77 11.4.9.3)
78 """
79 def __init__(self, rwid, units, ldstmode=False):
80 """ Inputs:
81
82 * :rwid: bit width of register file(s) - both FP and INT
83 * :units: sequence of ALUs (or CompUnitsBase derivatives)
84 """
85 self.units = units
86 self.ldstmode = ldstmode
87 self.rwid = rwid
88 self.rwid = rwid
89 if units and isinstance(units[0], CompUnitsBase):
90 self.n_units = 0
91 for u in self.units:
92 self.n_units += u.n_units
93 else:
94 self.n_units = len(units)
95
96 n_units = self.n_units
97
98 # inputs
99 self.issue_i = Signal(n_units, reset_less=True)
100 self.go_rd_i = Signal(n_units, reset_less=True)
101 self.go_wr_i = Signal(n_units, reset_less=True)
102 self.shadown_i = Signal(n_units, reset_less=True)
103 self.go_die_i = Signal(n_units, reset_less=True)
104 if ldstmode:
105 self.go_ad_i = Signal(n_units, reset_less=True)
106 self.go_st_i = Signal(n_units, reset_less=True)
107
108 # outputs
109 self.busy_o = Signal(n_units, reset_less=True)
110 self.rd_rel_o = Signal(n_units, reset_less=True)
111 self.req_rel_o = Signal(n_units, reset_less=True)
112 if ldstmode:
113 self.ld_o = Signal(n_units, reset_less=True) # op is LD
114 self.st_o = Signal(n_units, reset_less=True) # op is ST
115 self.adr_rel_o = Signal(n_units, reset_less=True)
116 self.sto_rel_o = Signal(n_units, reset_less=True)
117 self.req_rel_o = Signal(n_units, reset_less=True)
118 self.load_mem_o = Signal(n_units, reset_less=True)
119 self.stwd_mem_o = Signal(n_units, reset_less=True)
120 self.addr_o = Signal(rwid, reset_less=True)
121
122 # in/out register data (note: not register#, actual data)
123 self.data_o = Signal(rwid, reset_less=True)
124 self.src1_i = Signal(rwid, reset_less=True)
125 self.src2_i = Signal(rwid, reset_less=True)
126 # input operand
127
128 def elaborate(self, platform):
129 m = Module()
130 comb = m.d.comb
131
132 for i, alu in enumerate(self.units):
133 setattr(m.submodules, "comp%d" % i, alu)
134
135 go_rd_l = []
136 go_wr_l = []
137 issue_l = []
138 busy_l = []
139 req_rel_l = []
140 rd_rel_l = []
141 shadow_l = []
142 godie_l = []
143 for alu in self.units:
144 req_rel_l.append(alu.req_rel_o)
145 rd_rel_l.append(alu.rd_rel_o)
146 shadow_l.append(alu.shadown_i)
147 godie_l.append(alu.go_die_i)
148 go_wr_l.append(alu.go_wr_i)
149 go_rd_l.append(alu.go_rd_i)
150 issue_l.append(alu.issue_i)
151 busy_l.append(alu.busy_o)
152 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
153 comb += self.req_rel_o.eq(Cat(*req_rel_l))
154 comb += self.busy_o.eq(Cat(*busy_l))
155 comb += Cat(*godie_l).eq(self.go_die_i)
156 comb += Cat(*shadow_l).eq(self.shadown_i)
157 comb += Cat(*go_wr_l).eq(self.go_wr_i)
158 comb += Cat(*go_rd_l).eq(self.go_rd_i)
159 comb += Cat(*issue_l).eq(self.issue_i)
160
161 # connect data register input/output
162
163 # merge (OR) all integer FU / ALU outputs to a single value
164 if self.units:
165 data_o = treereduce(self.units, "data_o")
166 comb += self.data_o.eq(data_o)
167 if self.ldstmode:
168 addr_o = treereduce(self.units, "addr_o")
169 comb += self.addr_o.eq(addr_o)
170
171 for i, alu in enumerate(self.units):
172 comb += alu.src1_i.eq(self.src1_i)
173 comb += alu.src2_i.eq(self.src2_i)
174
175 if not self.ldstmode:
176 return m
177
178 ldmem_l = []
179 stmem_l = []
180 go_ad_l = []
181 go_st_l = []
182 ld_l = []
183 st_l = []
184 adr_rel_l = []
185 sto_rel_l = []
186 for alu in self.units:
187 ld_l.append(alu.ld_o)
188 st_l.append(alu.st_o)
189 adr_rel_l.append(alu.adr_rel_o)
190 sto_rel_l.append(alu.sto_rel_o)
191 ldmem_l.append(alu.load_mem_o)
192 stmem_l.append(alu.stwd_mem_o)
193 go_ad_l.append(alu.go_ad_i)
194 go_st_l.append(alu.go_st_i)
195 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
196 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
197 comb += self.load_mem_o.eq(Cat(*ldmem_l))
198 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
199 comb += Cat(*go_ad_l).eq(self.go_ad_i)
200 comb += Cat(*go_st_l).eq(self.go_st_i)
201
202 return m
203
204
205 class CompUnitLDSTs(CompUnitsBase):
206
207 def __init__(self, rwid, opwid, n_ldsts, mem):
208 """ Inputs:
209
210 * :rwid: bit width of register file(s) - both FP and INT
211 * :opwid: operand bit width
212 """
213 self.opwid = opwid
214
215 # inputs
216 self.oper_i = Signal(opwid, reset_less=True)
217 self.imm_i = Signal(rwid, reset_less=True)
218
219 # Int ALUs
220 self.alus = []
221 for i in range(n_ldsts):
222 self.alus.append(ALU(rwid))
223
224 units = []
225 for alu in self.alus:
226 aluopwid = 4 # see compldst.py for "internal" opcode
227 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
228
229 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
230
231 def elaborate(self, platform):
232 m = CompUnitsBase.elaborate(self, platform)
233 comb = m.d.comb
234
235 # hand the same operation to all units, 4 lower bits though
236 for alu in self.units:
237 comb += alu.oper_i[0:4].eq(self.oper_i)
238 comb += alu.imm_i.eq(self.imm_i)
239 comb += alu.isalu_i.eq(0)
240
241 return m
242
243
244 class CompUnitALUs(CompUnitsBase):
245
246 def __init__(self, rwid, opwid, n_alus):
247 """ Inputs:
248
249 * :rwid: bit width of register file(s) - both FP and INT
250 * :opwid: operand bit width
251 """
252 self.opwid = opwid
253
254 # inputs
255 self.oper_i = Signal(opwid, reset_less=True)
256 self.imm_i = Signal(rwid, reset_less=True)
257
258 # Int ALUs
259 alus = []
260 for i in range(n_alus):
261 alus.append(ALU(rwid))
262
263 units = []
264 for alu in alus:
265 aluopwid = 3 # extra bit for immediate mode
266 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
267
268 CompUnitsBase.__init__(self, rwid, units)
269
270 def elaborate(self, platform):
271 m = CompUnitsBase.elaborate(self, platform)
272 comb = m.d.comb
273
274 # hand the same operation to all units, only lower 3 bits though
275 for alu in self.units:
276 comb += alu.oper_i[0:3].eq(self.oper_i)
277 comb += alu.imm_i.eq(self.imm_i)
278
279 return m
280
281
282 class CompUnitBR(CompUnitsBase):
283
284 def __init__(self, rwid, opwid):
285 """ Inputs:
286
287 * :rwid: bit width of register file(s) - both FP and INT
288 * :opwid: operand bit width
289
290 Note: bgt unit is returned so that a shadow unit can be created
291 for it
292 """
293 self.opwid = opwid
294
295 # inputs
296 self.oper_i = Signal(opwid, reset_less=True)
297 self.imm_i = Signal(rwid, reset_less=True)
298
299 # Branch ALU and CU
300 self.bgt = BranchALU(rwid)
301 aluopwid = 3 # extra bit for immediate mode
302 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
303 CompUnitsBase.__init__(self, rwid, [self.br1])
304
305 def elaborate(self, platform):
306 m = CompUnitsBase.elaborate(self, platform)
307 comb = m.d.comb
308
309 # hand the same operation to all units
310 for alu in self.units:
311 comb += alu.oper_i.eq(self.oper_i)
312 comb += alu.imm_i.eq(self.imm_i)
313
314 return m
315
316
317 class FunctionUnits(Elaboratable):
318
319 def __init__(self, n_regs, n_int_alus):
320 self.n_regs = n_regs
321 self.n_int_alus = n_int_alus
322
323 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
324 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
325 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
326
327 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
328 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
329
330 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
331 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
332 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
333
334 self.readable_o = Signal(n_int_alus, reset_less=True)
335 self.writable_o = Signal(n_int_alus, reset_less=True)
336
337 self.go_rd_i = Signal(n_int_alus, reset_less=True)
338 self.go_wr_i = Signal(n_int_alus, reset_less=True)
339 self.go_die_i = Signal(n_int_alus, reset_less=True)
340 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
341
342 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
343
344 def elaborate(self, platform):
345 m = Module()
346 comb = m.d.comb
347 sync = m.d.sync
348
349 n_intfus = self.n_int_alus
350
351 # Integer FU-FU Dep Matrix
352 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
353 m.submodules.intfudeps = intfudeps
354 # Integer FU-Reg Dep Matrix
355 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
356 m.submodules.intregdeps = intregdeps
357
358 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
359 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
360
361 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
362 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
363
364 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
365 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
366 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
367
368 comb += intfudeps.issue_i.eq(self.fn_issue_i)
369 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
370 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
371 comb += intfudeps.go_die_i.eq(self.go_die_i)
372 comb += self.readable_o.eq(intfudeps.readable_o)
373 comb += self.writable_o.eq(intfudeps.writable_o)
374
375 # Connect function issue / arrays, and dest/src1/src2
376 comb += intregdeps.dest_i.eq(self.dest_i)
377 comb += intregdeps.src_i[0].eq(self.src1_i)
378 comb += intregdeps.src_i[1].eq(self.src2_i)
379
380 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
381 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
382 comb += intregdeps.go_die_i.eq(self.go_die_i)
383 comb += intregdeps.issue_i.eq(self.fn_issue_i)
384
385 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
386 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
387 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
388
389 return m
390
391
392 class Scoreboard(Elaboratable):
393 def __init__(self, rwid, n_regs):
394 """ Inputs:
395
396 * :rwid: bit width of register file(s) - both FP and INT
397 * :n_regs: depth of register file(s) - number of FP and INT regs
398 """
399 self.rwid = rwid
400 self.n_regs = n_regs
401
402 # Register Files
403 self.intregs = RegFileArray(rwid, n_regs)
404 self.fpregs = RegFileArray(rwid, n_regs)
405
406 # Memory (test for now)
407 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
408
409 # issue q needs to get at these
410 self.aluissue = IssueUnitGroup(2)
411 self.lsissue = IssueUnitGroup(2)
412 self.brissue = IssueUnitGroup(1)
413 # and these
414 self.alu_oper_i = Signal(4, reset_less=True)
415 self.alu_imm_i = Signal(rwid, reset_less=True)
416 self.br_oper_i = Signal(4, reset_less=True)
417 self.br_imm_i = Signal(rwid, reset_less=True)
418 self.ls_oper_i = Signal(4, reset_less=True)
419 self.ls_imm_i = Signal(rwid, reset_less=True)
420
421 # inputs
422 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
423 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
424 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
425 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
426
427 # outputs
428 self.issue_o = Signal(reset_less=True) # instruction was accepted
429 self.busy_o = Signal(reset_less=True) # at least one CU is busy
430
431 # for branch speculation experiment. branch_direction = 0 if
432 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
433 # branch_succ and branch_fail are requests to have the current
434 # instruction be dependent on the branch unit "shadow" capability.
435 self.branch_succ_i = Signal(reset_less=True)
436 self.branch_fail_i = Signal(reset_less=True)
437 self.branch_direction_o = Signal(2, reset_less=True)
438
439 def elaborate(self, platform):
440 m = Module()
441 comb = m.d.comb
442 sync = m.d.sync
443
444 m.submodules.intregs = self.intregs
445 m.submodules.fpregs = self.fpregs
446 m.submodules.mem = mem = self.mem
447
448 # register ports
449 int_dest = self.intregs.write_port("dest")
450 int_src1 = self.intregs.read_port("src1")
451 int_src2 = self.intregs.read_port("src2")
452
453 fp_dest = self.fpregs.write_port("dest")
454 fp_src1 = self.fpregs.read_port("src1")
455 fp_src2 = self.fpregs.read_port("src2")
456
457 # Int ALUs and BR ALUs
458 n_int_alus = 5
459 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
460 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
461
462 # LDST Comp Units
463 n_ldsts = 2
464 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
465
466 # Comp Units
467 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
468 bgt = cub.bgt # get at the branch computation unit
469 br1 = cub.br1
470
471 # Int FUs
472 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
473
474 # Memory FUs
475 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
476
477 # Memory Priority Picker 1: one gateway per memory port
478 mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
479 m.submodules.mempick1 = mempick1
480
481 # Count of number of FUs
482 n_intfus = n_int_alus
483 n_fp_fus = 0 # for now
484
485 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
486 intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
487 m.submodules.intpick1 = intpick1
488
489 # INT/FP Issue Unit
490 regdecode = RegDecode(self.n_regs)
491 m.submodules.regdecode = regdecode
492 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
493 m.submodules.issueunit = issueunit
494
495 # Shadow Matrix. currently n_intfus shadows, to be used for
496 # write-after-write hazards. NOTE: there is one extra for branches,
497 # so the shadow width is increased by 1
498 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
499 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
500
501 # record previous instruction to cast shadow on current instruction
502 prev_shadow = Signal(n_intfus)
503
504 # Branch Speculation recorder. tracks the success/fail state as
505 # each instruction is issued, so that when the branch occurs the
506 # allow/cancel can be issued as appropriate.
507 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
508
509 #---------
510 # ok start wiring things together...
511 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
512 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
513 #---------
514
515 #---------
516 # Issue Unit is where it starts. set up some in/outs for this module
517 #---------
518 comb += [ regdecode.dest_i.eq(self.int_dest_i),
519 regdecode.src1_i.eq(self.int_src1_i),
520 regdecode.src2_i.eq(self.int_src2_i),
521 regdecode.enable_i.eq(self.reg_enable_i),
522 self.issue_o.eq(issueunit.issue_o)
523 ]
524
525 # take these to outside (issue needs them)
526 comb += cua.oper_i.eq(self.alu_oper_i)
527 comb += cua.imm_i.eq(self.alu_imm_i)
528 comb += cub.oper_i.eq(self.br_oper_i)
529 comb += cub.imm_i.eq(self.br_imm_i)
530 comb += cul.oper_i.eq(self.ls_oper_i)
531 comb += cul.imm_i.eq(self.ls_imm_i)
532
533 # TODO: issueunit.f (FP)
534
535 # and int function issue / busy arrays, and dest/src1/src2
536 comb += intfus.dest_i.eq(regdecode.dest_o)
537 comb += intfus.src1_i.eq(regdecode.src1_o)
538 comb += intfus.src2_i.eq(regdecode.src2_o)
539
540 fn_issue_o = issueunit.fn_issue_o
541
542 comb += intfus.fn_issue_i.eq(fn_issue_o)
543 comb += issueunit.busy_i.eq(cu.busy_o)
544 comb += self.busy_o.eq(cu.busy_o.bool())
545
546 #---------
547 # Memory Function Unit
548 #---------
549 reset_b = Signal(cul.n_units, reset_less=True)
550 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
551
552 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
553 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
554 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
555
556 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
557 # in a transitive fashion). This cycle activates based on LDSTCompUnit
558 # issue_i. multi-issue gets a bit more complex but not a lot.
559 prior_ldsts = Signal(cul.n_units, reset_less=True)
560 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
561 with m.If(self.ls_oper_i[2]): # LD bit of operand
562 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
563 with m.If(self.ls_oper_i[3]): # ST bit of operand
564 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
565
566 # TODO: adr_rel_o needs to go into L1 Cache. for now,
567 # just immediately activate go_adr
568 comb += cul.go_ad_i.eq(cul.adr_rel_o)
569
570 # connect up address data
571 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
572 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
573
574 # connect loadable / storable to go_ld/go_st.
575 # XXX should only be done when the memory ld/st has actually happened!
576 go_st_i = Signal(cul.n_units, reset_less=True)
577 go_ld_i = Signal(cul.n_units, reset_less=True)
578 comb += go_ld_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
579 cul.req_rel_o & cul.ld_o)
580 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
581 cul.sto_rel_o & cul.st_o)
582 comb += memfus.go_ld_i.eq(go_ld_i)
583 comb += memfus.go_st_i.eq(go_st_i)
584 #comb += cul.go_wr_i.eq(go_ld_i)
585 comb += cul.go_st_i.eq(go_st_i)
586
587 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
588 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
589 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
590
591 #---------
592 # merge shadow matrices outputs
593 #---------
594
595 # these are explained in ShadowMatrix docstring, and are to be
596 # connected to the FUReg and FUFU Matrices, to get them to reset
597 anydie = Signal(n_intfus, reset_less=True)
598 allshadown = Signal(n_intfus, reset_less=True)
599 shreset = Signal(n_intfus, reset_less=True)
600 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
601 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
602 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
603
604 #---------
605 # connect fu-fu matrix
606 #---------
607
608 # Group Picker... done manually for now.
609 go_rd_o = intpick1.go_rd_o
610 go_wr_o = intpick1.go_wr_o
611 go_rd_i = intfus.go_rd_i
612 go_wr_i = intfus.go_wr_i
613 go_die_i = intfus.go_die_i
614 # NOTE: connect to the shadowed versions so that they can "die" (reset)
615 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
616 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
617 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
618
619 # Connect Picker
620 #---------
621 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
622 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
623 int_rd_o = intfus.readable_o
624 int_wr_o = intfus.writable_o
625 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
626 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
627
628 #---------
629 # Shadow Matrix
630 #---------
631
632 comb += shadows.issue_i.eq(fn_issue_o)
633 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
634 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
635 #---------
636 # NOTE; this setup is for the instruction order preservation...
637
638 # connect shadows / go_dies to Computation Units
639 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
640 comb += cu.go_die_i[0:n_intfus].eq(anydie)
641
642 # ok connect first n_int_fu shadows to busy lines, to create an
643 # instruction-order linked-list-like arrangement, using a bit-matrix
644 # (instead of e.g. a ring buffer).
645
646 # when written, the shadow can be cancelled (and was good)
647 for i in range(n_intfus):
648 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
649
650 # *previous* instruction shadows *current* instruction, and, obviously,
651 # if the previous is completed (!busy) don't cast the shadow!
652 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
653 for i in range(n_intfus):
654 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
655
656 #---------
657 # ... and this is for branch speculation. it uses the extra bit
658 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
659 # only needs to set shadow_i, s_fail_i and s_good_i
660
661 # issue captures shadow_i (if enabled)
662 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
663
664 bactive = Signal(reset_less=True)
665 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
666
667 # instruction being issued (fn_issue_o) has a shadow cast by the branch
668 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
669 comb += bshadow.issue_i.eq(fn_issue_o)
670 for i in range(n_intfus):
671 with m.If(fn_issue_o & (Const(1<<i))):
672 comb += bshadow.shadow_i[i][0].eq(1)
673
674 # finally, we need an indicator to the test infrastructure as to
675 # whether the branch succeeded or failed, plus, link up to the
676 # "recorder" of whether the instruction was under shadow or not
677
678 with m.If(br1.issue_i):
679 sync += bspec.active_i.eq(1)
680 with m.If(self.branch_succ_i):
681 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
682 with m.If(self.branch_fail_i):
683 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
684
685 # branch is active (TODO: a better signal: this is over-using the
686 # go_write signal - actually the branch should not be "writing")
687 with m.If(br1.go_wr_i):
688 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
689 sync += bspec.active_i.eq(0)
690 comb += bspec.br_i.eq(1)
691 # branch occurs if data == 1, failed if data == 0
692 comb += bspec.br_ok_i.eq(br1.data_o == 1)
693 for i in range(n_intfus):
694 # *expected* direction of the branch matched against *actual*
695 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
696 # ... or it didn't
697 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
698
699 #---------
700 # Connect Register File(s)
701 #---------
702 comb += int_dest.wen.eq(intfus.dest_rsel_o)
703 comb += int_src1.ren.eq(intfus.src1_rsel_o)
704 comb += int_src2.ren.eq(intfus.src2_rsel_o)
705
706 # connect ALUs to regfule
707 comb += int_dest.data_i.eq(cu.data_o)
708 comb += cu.src1_i.eq(int_src1.data_o)
709 comb += cu.src2_i.eq(int_src2.data_o)
710
711 # connect ALU Computation Units
712 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
713 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
714 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
715
716 return m
717
718 def __iter__(self):
719 yield from self.intregs
720 yield from self.fpregs
721 yield self.int_dest_i
722 yield self.int_src1_i
723 yield self.int_src2_i
724 yield self.issue_o
725 yield self.branch_succ_i
726 yield self.branch_fail_i
727 yield self.branch_direction_o
728
729 def ports(self):
730 return list(self)
731
732
733 class IssueToScoreboard(Elaboratable):
734
735 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
736 self.qlen = qlen
737 self.n_in = n_in
738 self.n_out = n_out
739 self.rwid = rwid
740 self.opw = opwid
741 self.n_regs = n_regs
742
743 mqbits = (int(log(qlen) / log(2))+2, False)
744 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
745 self.p_ready_o = Signal() # instructions were added
746 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
747
748 self.busy_o = Signal(reset_less=True) # at least one CU is busy
749 self.qlen_o = Signal(mqbits, reset_less=True)
750
751 def elaborate(self, platform):
752 m = Module()
753 comb = m.d.comb
754 sync = m.d.sync
755
756 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
757 sc = Scoreboard(self.rwid, self.n_regs)
758 m.submodules.iq = iq
759 m.submodules.sc = sc
760
761 # get at the regfile for testing
762 self.intregs = sc.intregs
763
764 # and the "busy" signal and instruction queue length
765 comb += self.busy_o.eq(sc.busy_o)
766 comb += self.qlen_o.eq(iq.qlen_o)
767
768 # link up instruction queue
769 comb += iq.p_add_i.eq(self.p_add_i)
770 comb += self.p_ready_o.eq(iq.p_ready_o)
771 for i in range(self.n_in):
772 comb += eq(iq.data_i[i], self.data_i[i])
773
774 # take instruction and process it. note that it's possible to
775 # "inspect" the queue contents *without* actually removing the
776 # items. items are only removed when the
777
778 # in "waiting" state
779 wait_issue_br = Signal()
780 wait_issue_alu = Signal()
781 wait_issue_ls = Signal()
782
783 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
784 # set instruction pop length to 1 if the unit accepted
785 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
786 with m.If(iq.qlen_o != 0):
787 comb += iq.n_sub_i.eq(1)
788 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
789 with m.If(iq.qlen_o != 0):
790 comb += iq.n_sub_i.eq(1)
791 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
792 with m.If(iq.qlen_o != 0):
793 comb += iq.n_sub_i.eq(1)
794
795 # see if some instruction(s) are here. note that this is
796 # "inspecting" the in-place queue. note also that on the
797 # cycle following "waiting" for fn_issue_o to be set, the
798 # "resetting" done above (insn_i=0) could be re-ASSERTed.
799 with m.If(iq.qlen_o != 0):
800 # get the operands and operation
801 imm = iq.data_o[0].imm_i
802 dest = iq.data_o[0].dest_i
803 src1 = iq.data_o[0].src1_i
804 src2 = iq.data_o[0].src2_i
805 op = iq.data_o[0].oper_i
806 opi = iq.data_o[0].opim_i # immediate set
807
808 # set the src/dest regs
809 comb += sc.int_dest_i.eq(dest)
810 comb += sc.int_src1_i.eq(src1)
811 comb += sc.int_src2_i.eq(src2)
812 comb += sc.reg_enable_i.eq(1) # enable the regfile
813
814 # choose a Function-Unit-Group
815 with m.If((op & (0x3<<2)) != 0): # branch
816 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
817 comb += sc.br_imm_i.eq(imm)
818 comb += sc.brissue.insn_i.eq(1)
819 comb += wait_issue_br.eq(1)
820 with m.Elif((op & (0x3<<4)) != 0): # ld/st
821 # see compldst.py
822 # bit 0: ADD/SUB
823 # bit 1: immed
824 # bit 4: LD
825 # bit 5: ST
826 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
827 comb += sc.ls_imm_i.eq(imm)
828 comb += sc.lsissue.insn_i.eq(1)
829 comb += wait_issue_ls.eq(1)
830 with m.Else(): # alu
831 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
832 comb += sc.alu_imm_i.eq(imm)
833 comb += sc.aluissue.insn_i.eq(1)
834 comb += wait_issue_alu.eq(1)
835
836 # XXX TODO
837 # these indicate that the instruction is to be made
838 # shadow-dependent on
839 # (either) branch success or branch fail
840 #yield sc.branch_fail_i.eq(branch_fail)
841 #yield sc.branch_succ_i.eq(branch_success)
842
843 return m
844
845 def __iter__(self):
846 yield self.p_ready_o
847 for o in self.data_i:
848 yield from list(o)
849 yield self.p_add_i
850
851 def ports(self):
852 return list(self)
853
854
855 IADD = 0
856 ISUB = 1
857 IMUL = 2
858 ISHF = 3
859 IBGT = 4
860 IBLT = 5
861 IBEQ = 6
862 IBNE = 7
863
864
865 class RegSim:
866 def __init__(self, rwidth, nregs):
867 self.rwidth = rwidth
868 self.regs = [0] * nregs
869
870 def op(self, op, op_imm, imm, src1, src2, dest):
871 maxbits = (1 << self.rwidth) - 1
872 src1 = self.regs[src1] & maxbits
873 if op_imm:
874 src2 = imm
875 else:
876 src2 = self.regs[src2] & maxbits
877 if op == IADD:
878 val = src1 + src2
879 elif op == ISUB:
880 val = src1 - src2
881 elif op == IMUL:
882 val = src1 * src2
883 elif op == ISHF:
884 val = src1 >> (src2 & maxbits)
885 elif op == IBGT:
886 val = int(src1 > src2)
887 elif op == IBLT:
888 val = int(src1 < src2)
889 elif op == IBEQ:
890 val = int(src1 == src2)
891 elif op == IBNE:
892 val = int(src1 != src2)
893 else:
894 return 0 # LD/ST TODO
895 val &= maxbits
896 self.setval(dest, val)
897 return val
898
899 def setval(self, dest, val):
900 print ("sim setval", dest, hex(val))
901 self.regs[dest] = val
902
903 def dump(self, dut):
904 for i, val in enumerate(self.regs):
905 reg = yield dut.intregs.regs[i].reg
906 okstr = "OK" if reg == val else "!ok"
907 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
908
909 def check(self, dut):
910 for i, val in enumerate(self.regs):
911 reg = yield dut.intregs.regs[i].reg
912 if reg != val:
913 print("reg %d expected %x received %x\n" % (i, val, reg))
914 yield from self.dump(dut)
915 assert False
916
917 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
918 branch_success, branch_fail):
919 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
920 'src1_i': src1, 'src2_i': src2}]
921
922 sendlen = 1
923 for idx in range(sendlen):
924 yield from eq(dut.data_i[idx], instrs[idx])
925 di = yield dut.data_i[idx]
926 print ("senddata %d %x" % (idx, di))
927 yield dut.p_add_i.eq(sendlen)
928 yield
929 o_p_ready = yield dut.p_ready_o
930 while not o_p_ready:
931 yield
932 o_p_ready = yield dut.p_ready_o
933
934 yield dut.p_add_i.eq(0)
935
936
937 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
938 yield from disable_issue(dut)
939 yield dut.int_dest_i.eq(dest)
940 yield dut.int_src1_i.eq(src1)
941 yield dut.int_src2_i.eq(src2)
942 if (op & (0x3<<2)) != 0: # branch
943 yield dut.brissue.insn_i.eq(1)
944 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
945 yield dut.br_imm_i.eq(imm)
946 dut_issue = dut.brissue
947 else:
948 yield dut.aluissue.insn_i.eq(1)
949 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
950 yield dut.alu_imm_i.eq(imm)
951 dut_issue = dut.aluissue
952 yield dut.reg_enable_i.eq(1)
953
954 # these indicate that the instruction is to be made shadow-dependent on
955 # (either) branch success or branch fail
956 yield dut.branch_fail_i.eq(branch_fail)
957 yield dut.branch_succ_i.eq(branch_success)
958
959 yield
960 yield from wait_for_issue(dut, dut_issue)
961
962
963 def print_reg(dut, rnums):
964 rs = []
965 for rnum in rnums:
966 reg = yield dut.intregs.regs[rnum].reg
967 rs.append("%x" % reg)
968 rnums = map(str, rnums)
969 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
970
971
972 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
973 insts = []
974 for i in range(n_ops):
975 src1 = randint(1, dut.n_regs-1)
976 src2 = randint(1, dut.n_regs-1)
977 imm = randint(1, (1<<dut.rwid)-1)
978 dest = randint(1, dut.n_regs-1)
979 op = randint(0, max_opnums)
980 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
981
982 if shadowing:
983 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
984 else:
985 insts.append((src1, src2, dest, op, opi, imm))
986 return insts
987
988
989 def wait_for_busy_clear(dut):
990 while True:
991 busy_o = yield dut.busy_o
992 if not busy_o:
993 break
994 print ("busy",)
995 yield
996
997 def disable_issue(dut):
998 yield dut.aluissue.insn_i.eq(0)
999 yield dut.brissue.insn_i.eq(0)
1000 yield dut.lsissue.insn_i.eq(0)
1001
1002
1003 def wait_for_issue(dut, dut_issue):
1004 while True:
1005 issue_o = yield dut_issue.fn_issue_o
1006 if issue_o:
1007 yield from disable_issue(dut)
1008 yield dut.reg_enable_i.eq(0)
1009 break
1010 print ("busy",)
1011 #yield from print_reg(dut, [1,2,3])
1012 yield
1013 #yield from print_reg(dut, [1,2,3])
1014
1015 def scoreboard_branch_sim(dut, alusim):
1016
1017 iseed = 3
1018
1019 for i in range(1):
1020
1021 print ("rseed", iseed)
1022 seed(iseed)
1023 iseed += 1
1024
1025 yield dut.branch_direction_o.eq(0)
1026
1027 # set random values in the registers
1028 for i in range(1, dut.n_regs):
1029 val = 31+i*3
1030 val = randint(0, (1<<alusim.rwidth)-1)
1031 yield dut.intregs.regs[i].reg.eq(val)
1032 alusim.setval(i, val)
1033
1034 if False:
1035 # create some instructions: branches create a tree
1036 insts = create_random_ops(dut, 1, True, 1)
1037 #insts.append((6, 6, 1, 2, (0, 0)))
1038 #insts.append((4, 3, 3, 0, (0, 0)))
1039
1040 src1 = randint(1, dut.n_regs-1)
1041 src2 = randint(1, dut.n_regs-1)
1042 #op = randint(4, 7)
1043 op = 4 # only BGT at the moment
1044
1045 branch_ok = create_random_ops(dut, 1, True, 1)
1046 branch_fail = create_random_ops(dut, 1, True, 1)
1047
1048 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1049
1050 if True:
1051 insts = []
1052 insts.append( (3, 5, 2, 0, (0, 0)) )
1053 branch_ok = []
1054 branch_fail = []
1055 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1056 branch_ok.append( None )
1057 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1058 #branch_fail.append( None )
1059 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1060
1061 siminsts = deepcopy(insts)
1062
1063 # issue instruction(s)
1064 i = -1
1065 instrs = insts
1066 branch_direction = 0
1067 while instrs:
1068 yield
1069 yield
1070 i += 1
1071 branch_direction = yield dut.branch_direction_o # way branch went
1072 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1073 if branch_direction == 1 and shadow_on:
1074 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1075 continue # branch was "success" and this is a "failed"... skip
1076 if branch_direction == 2 and shadow_off:
1077 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1078 continue # branch was "fail" and this is a "success"... skip
1079 if branch_direction != 0:
1080 shadow_on = 0
1081 shadow_off = 0
1082 is_branch = op >= 4
1083 if is_branch:
1084 branch_ok, branch_fail = dest
1085 dest = src2
1086 # ok zip up the branch success / fail instructions and
1087 # drop them into the queue, one marked "to have branch success"
1088 # the other to be marked shadow branch "fail".
1089 # one out of each of these will be cancelled
1090 for ok, fl in zip(branch_ok, branch_fail):
1091 if ok:
1092 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1093 if fl:
1094 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1095 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1096 (i, src1, src2, dest, op, shadow_on, shadow_off))
1097 yield from int_instr(dut, op, src1, src2, dest,
1098 shadow_on, shadow_off)
1099
1100 # wait for all instructions to stop before checking
1101 yield
1102 yield from wait_for_busy_clear(dut)
1103
1104 i = -1
1105 while siminsts:
1106 instr = siminsts.pop(0)
1107 if instr is None:
1108 continue
1109 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1110 i += 1
1111 is_branch = op >= 4
1112 if is_branch:
1113 branch_ok, branch_fail = dest
1114 dest = src2
1115 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1116 (i, src1, src2, dest, op, shadow_on, shadow_off))
1117 branch_res = alusim.op(op, src1, src2, dest)
1118 if is_branch:
1119 if branch_res:
1120 siminsts += branch_ok
1121 else:
1122 siminsts += branch_fail
1123
1124 # check status
1125 yield from alusim.check(dut)
1126 yield from alusim.dump(dut)
1127
1128
1129 def scoreboard_sim(dut, alusim):
1130
1131 seed(0)
1132
1133 for i in range(1):
1134
1135 # set random values in the registers
1136 for i in range(1, dut.n_regs):
1137 val = randint(0, (1<<alusim.rwidth)-1)
1138 #val = 31+i*3
1139 #val = i
1140 yield dut.intregs.regs[i].reg.eq(val)
1141 alusim.setval(i, val)
1142
1143 # create some instructions (some random, some regression tests)
1144 instrs = []
1145 if False:
1146 instrs = create_random_ops(dut, 15, True, 4)
1147
1148 if True: # LD/ST test (with immediate)
1149 instrs.append( (1, 2, 2, 0x30, 1, 1, (0, 0)) )
1150 #instrs.append( (1, 2, 7, 0x10, 1, 1, (0, 0)) )
1151
1152 if False:
1153 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1154
1155 if False:
1156 instrs.append( (7, 3, 2, 4, (0, 0)) )
1157 instrs.append( (7, 6, 6, 2, (0, 0)) )
1158 instrs.append( (1, 7, 2, 2, (0, 0)) )
1159
1160 if False:
1161 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1162 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1163 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1164 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1165 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1166
1167 if False:
1168 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1169 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1170 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1171
1172 if False:
1173 instrs.append((5, 6, 2, 1))
1174 instrs.append((2, 2, 4, 0))
1175 #instrs.append((2, 2, 3, 1))
1176
1177 if False:
1178 instrs.append((2, 1, 2, 3))
1179
1180 if False:
1181 instrs.append((2, 6, 2, 1))
1182 instrs.append((2, 1, 2, 0))
1183
1184 if False:
1185 instrs.append((1, 2, 7, 2))
1186 instrs.append((7, 1, 5, 0))
1187 instrs.append((4, 4, 1, 1))
1188
1189 if False:
1190 instrs.append((5, 6, 2, 2))
1191 instrs.append((1, 1, 4, 1))
1192 instrs.append((6, 5, 3, 0))
1193
1194 if False:
1195 # Write-after-Write Hazard
1196 instrs.append( (3, 6, 7, 2) )
1197 instrs.append( (4, 4, 7, 1) )
1198
1199 if False:
1200 # self-read/write-after-write followed by Read-after-Write
1201 instrs.append((1, 1, 1, 1))
1202 instrs.append((1, 5, 3, 0))
1203
1204 if False:
1205 # Read-after-Write followed by self-read-after-write
1206 instrs.append((5, 6, 1, 2))
1207 instrs.append((1, 1, 1, 1))
1208
1209 if False:
1210 # self-read-write sandwich
1211 instrs.append((5, 6, 1, 2))
1212 instrs.append((1, 1, 1, 1))
1213 instrs.append((1, 5, 3, 0))
1214
1215 if False:
1216 # very weird failure
1217 instrs.append( (5, 2, 5, 2) )
1218 instrs.append( (2, 6, 3, 0) )
1219 instrs.append( (4, 2, 2, 1) )
1220
1221 if False:
1222 v1 = 4
1223 yield dut.intregs.regs[5].reg.eq(v1)
1224 alusim.setval(5, v1)
1225 yield dut.intregs.regs[3].reg.eq(5)
1226 alusim.setval(3, 5)
1227 instrs.append((5, 3, 3, 4, (0, 0)))
1228 instrs.append((4, 2, 1, 2, (0, 1)))
1229
1230 if False:
1231 v1 = 6
1232 yield dut.intregs.regs[5].reg.eq(v1)
1233 alusim.setval(5, v1)
1234 yield dut.intregs.regs[3].reg.eq(5)
1235 alusim.setval(3, 5)
1236 instrs.append((5, 3, 3, 4, (0, 0)))
1237 instrs.append((4, 2, 1, 2, (1, 0)))
1238
1239 if False:
1240 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1241 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1242 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1243 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1244 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1245 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1246 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1247 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1248 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1249
1250 # issue instruction(s), wait for issue to be free before proceeding
1251 for i, instr in enumerate(instrs):
1252 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1253
1254 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1255 (i, src1, src2, dest, op, opi, imm))
1256 alusim.op(op, opi, imm, src1, src2, dest)
1257 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1258 br_ok, br_fail)
1259
1260 # wait for all instructions to stop before checking
1261 while True:
1262 iqlen = yield dut.qlen_o
1263 if iqlen == 0:
1264 break
1265 yield
1266 yield
1267 yield
1268 yield
1269 yield
1270 yield from wait_for_busy_clear(dut)
1271
1272 # check status
1273 yield from alusim.check(dut)
1274 yield from alusim.dump(dut)
1275
1276
1277 def test_scoreboard():
1278 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1279 alusim = RegSim(16, 8)
1280 memsim = MemSim(16, 16)
1281 vl = rtlil.convert(dut, ports=dut.ports())
1282 with open("test_scoreboard6600.il", "w") as f:
1283 f.write(vl)
1284
1285 run_simulation(dut, scoreboard_sim(dut, alusim),
1286 vcd_name='test_scoreboard6600.vcd')
1287
1288 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1289 # vcd_name='test_scoreboard6600.vcd')
1290
1291
1292 if __name__ == '__main__':
1293 test_scoreboard()