Allow the formal engine to perform a same-cycle result in the ALU
[soc.git] / src / soc / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5
6 from soc.regfile.regfile import RegFileArray, treereduce
7 from soc.scoreboard.fu_fu_matrix import FUFUDepMatrix
8 from soc.scoreboard.fu_reg_matrix import FURegDepMatrix
9 from soc.scoreboard.global_pending import GlobalPending
10 from soc.scoreboard.group_picker import GroupPicker
11 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
12 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
13 from soc.scoreboard.instruction_q import Instruction, InstructionQ
14 from soc.scoreboard.memfu import MemFunctionUnits
15
16 from compalu import ComputationUnitNoDelay
17 from compldst import LDSTCompUnit
18 from testmem import TestMemory
19
20 from alu_hier import ALU, BranchALU
21 from nmutil.latch import SRLatch
22 from nmutil.nmoperator import eq
23
24 from random import randint, seed
25 from copy import deepcopy
26 from math import log
27
28
29 class MemSim:
30 def __init__(self, regwid, addrw):
31 self.regwid = regwid
32 self.ddepth = 1 # regwid//8
33 depth = (1<<addrw) // self.ddepth
34 self.mem = list(range(0, depth))
35
36 def ld(self, addr):
37 return self.mem[addr>>self.ddepth]
38
39 def st(self, addr, data):
40 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
41
42
43 class CompUnitsBase(Elaboratable):
44 """ Computation Unit Base class.
45
46 Amazingly, this class works recursively. It's supposed to just
47 look after some ALUs (that can handle the same operations),
48 grouping them together, however it turns out that the same code
49 can also group *groups* of Computation Units together as well.
50
51 Basically it was intended just to concatenate the ALU's issue,
52 go_rd etc. signals together, which start out as bits and become
53 sequences. Turns out that the same trick works just as well
54 on Computation Units!
55
56 So this class may be used recursively to present a top-level
57 sequential concatenation of all the signals in and out of
58 ALUs, whilst at the same time making it convenient to group
59 ALUs together.
60
61 At the lower level, the intent is that groups of (identical)
62 ALUs may be passed the same operation. Even beyond that,
63 the intent is that that group of (identical) ALUs actually
64 share the *same pipeline* and as such become a "Concurrent
65 Computation Unit" as defined by Mitch Alsup (see section
66 11.4.9.3)
67 """
68 def __init__(self, rwid, units, ldstmode=False):
69 """ Inputs:
70
71 * :rwid: bit width of register file(s) - both FP and INT
72 * :units: sequence of ALUs (or CompUnitsBase derivatives)
73 """
74 self.units = units
75 self.ldstmode = ldstmode
76 self.rwid = rwid
77 self.rwid = rwid
78 if units and isinstance(units[0], CompUnitsBase):
79 self.n_units = 0
80 for u in self.units:
81 self.n_units += u.n_units
82 else:
83 self.n_units = len(units)
84
85 n_units = self.n_units
86
87 # inputs
88 self.issue_i = Signal(n_units, reset_less=True)
89 self.go_rd_i = Signal(n_units, reset_less=True)
90 self.go_wr_i = Signal(n_units, reset_less=True)
91 self.shadown_i = Signal(n_units, reset_less=True)
92 self.go_die_i = Signal(n_units, reset_less=True)
93 if ldstmode:
94 self.go_ad_i = Signal(n_units, reset_less=True)
95 self.go_st_i = Signal(n_units, reset_less=True)
96
97 # outputs
98 self.busy_o = Signal(n_units, reset_less=True)
99 self.rd_rel_o = Signal(n_units, reset_less=True)
100 self.req_rel_o = Signal(n_units, reset_less=True)
101 self.done_o = Signal(n_units, reset_less=True)
102 if ldstmode:
103 self.ld_o = Signal(n_units, reset_less=True) # op is LD
104 self.st_o = Signal(n_units, reset_less=True) # op is ST
105 self.adr_rel_o = Signal(n_units, reset_less=True)
106 self.sto_rel_o = Signal(n_units, reset_less=True)
107 self.load_mem_o = Signal(n_units, reset_less=True)
108 self.stwd_mem_o = Signal(n_units, reset_less=True)
109 self.addr_o = Signal(rwid, reset_less=True)
110
111 # in/out register data (note: not register#, actual data)
112 self.data_o = Signal(rwid, reset_less=True)
113 self.src1_i = Signal(rwid, reset_less=True)
114 self.src2_i = Signal(rwid, reset_less=True)
115 # input operand
116
117 def elaborate(self, platform):
118 m = Module()
119 comb = m.d.comb
120
121 for i, alu in enumerate(self.units):
122 setattr(m.submodules, "comp%d" % i, alu)
123
124 go_rd_l = []
125 go_wr_l = []
126 issue_l = []
127 busy_l = []
128 req_rel_l = []
129 done_l = []
130 rd_rel_l = []
131 shadow_l = []
132 godie_l = []
133 for alu in self.units:
134 req_rel_l.append(alu.req_rel_o)
135 done_l.append(alu.done_o)
136 rd_rel_l.append(alu.rd_rel_o)
137 shadow_l.append(alu.shadown_i)
138 godie_l.append(alu.go_die_i)
139 go_wr_l.append(alu.go_wr_i)
140 go_rd_l.append(alu.go_rd_i)
141 issue_l.append(alu.issue_i)
142 busy_l.append(alu.busy_o)
143 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
144 comb += self.req_rel_o.eq(Cat(*req_rel_l))
145 comb += self.done_o.eq(Cat(*done_l))
146 comb += self.busy_o.eq(Cat(*busy_l))
147 comb += Cat(*godie_l).eq(self.go_die_i)
148 comb += Cat(*shadow_l).eq(self.shadown_i)
149 comb += Cat(*go_wr_l).eq(self.go_wr_i)
150 comb += Cat(*go_rd_l).eq(self.go_rd_i)
151 comb += Cat(*issue_l).eq(self.issue_i)
152
153 # connect data register input/output
154
155 # merge (OR) all integer FU / ALU outputs to a single value
156 if self.units:
157 data_o = treereduce(self.units, "data_o")
158 comb += self.data_o.eq(data_o)
159 if self.ldstmode:
160 addr_o = treereduce(self.units, "addr_o")
161 comb += self.addr_o.eq(addr_o)
162
163 for i, alu in enumerate(self.units):
164 comb += alu.src1_i.eq(self.src1_i)
165 comb += alu.src2_i.eq(self.src2_i)
166
167 if not self.ldstmode:
168 return m
169
170 ldmem_l = []
171 stmem_l = []
172 go_ad_l = []
173 go_st_l = []
174 ld_l = []
175 st_l = []
176 adr_rel_l = []
177 sto_rel_l = []
178 for alu in self.units:
179 ld_l.append(alu.ld_o)
180 st_l.append(alu.st_o)
181 adr_rel_l.append(alu.adr_rel_o)
182 sto_rel_l.append(alu.sto_rel_o)
183 ldmem_l.append(alu.load_mem_o)
184 stmem_l.append(alu.stwd_mem_o)
185 go_ad_l.append(alu.go_ad_i)
186 go_st_l.append(alu.go_st_i)
187 comb += self.ld_o.eq(Cat(*ld_l))
188 comb += self.st_o.eq(Cat(*st_l))
189 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
190 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
191 comb += self.load_mem_o.eq(Cat(*ldmem_l))
192 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
193 comb += Cat(*go_ad_l).eq(self.go_ad_i)
194 comb += Cat(*go_st_l).eq(self.go_st_i)
195
196 return m
197
198
199 class CompUnitLDSTs(CompUnitsBase):
200
201 def __init__(self, rwid, opwid, n_ldsts, mem):
202 """ Inputs:
203
204 * :rwid: bit width of register file(s) - both FP and INT
205 * :opwid: operand bit width
206 """
207 self.opwid = opwid
208
209 # inputs
210 self.oper_i = Signal(opwid, reset_less=True)
211 self.imm_i = Signal(rwid, reset_less=True)
212
213 # Int ALUs
214 self.alus = []
215 for i in range(n_ldsts):
216 self.alus.append(ALU(rwid))
217
218 units = []
219 for alu in self.alus:
220 aluopwid = 4 # see compldst.py for "internal" opcode
221 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
222
223 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
224
225 def elaborate(self, platform):
226 m = CompUnitsBase.elaborate(self, platform)
227 comb = m.d.comb
228
229 # hand the same operation to all units, 4 lower bits though
230 for alu in self.units:
231 comb += alu.oper_i[0:4].eq(self.oper_i)
232 comb += alu.imm_i.eq(self.imm_i)
233 comb += alu.isalu_i.eq(0)
234
235 return m
236
237
238 class CompUnitALUs(CompUnitsBase):
239
240 def __init__(self, rwid, opwid, n_alus):
241 """ Inputs:
242
243 * :rwid: bit width of register file(s) - both FP and INT
244 * :opwid: operand bit width
245 """
246 self.opwid = opwid
247
248 # inputs
249 self.oper_i = Signal(opwid, reset_less=True)
250 self.imm_i = Signal(rwid, reset_less=True)
251
252 # Int ALUs
253 alus = []
254 for i in range(n_alus):
255 alus.append(ALU(rwid))
256
257 units = []
258 for alu in alus:
259 aluopwid = 3 # extra bit for immediate mode
260 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
261
262 CompUnitsBase.__init__(self, rwid, units)
263
264 def elaborate(self, platform):
265 m = CompUnitsBase.elaborate(self, platform)
266 comb = m.d.comb
267
268 # hand the same operation to all units, only lower 3 bits though
269 for alu in self.units:
270 comb += alu.oper_i[0:3].eq(self.oper_i)
271 comb += alu.imm_i.eq(self.imm_i)
272
273 return m
274
275
276 class CompUnitBR(CompUnitsBase):
277
278 def __init__(self, rwid, opwid):
279 """ Inputs:
280
281 * :rwid: bit width of register file(s) - both FP and INT
282 * :opwid: operand bit width
283
284 Note: bgt unit is returned so that a shadow unit can be created
285 for it
286 """
287 self.opwid = opwid
288
289 # inputs
290 self.oper_i = Signal(opwid, reset_less=True)
291 self.imm_i = Signal(rwid, reset_less=True)
292
293 # Branch ALU and CU
294 self.bgt = BranchALU(rwid)
295 aluopwid = 3 # extra bit for immediate mode
296 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
297 CompUnitsBase.__init__(self, rwid, [self.br1])
298
299 def elaborate(self, platform):
300 m = CompUnitsBase.elaborate(self, platform)
301 comb = m.d.comb
302
303 # hand the same operation to all units
304 for alu in self.units:
305 comb += alu.oper_i.eq(self.oper_i)
306 comb += alu.imm_i.eq(self.imm_i)
307
308 return m
309
310
311 class FunctionUnits(Elaboratable):
312
313 def __init__(self, n_regs, n_int_alus):
314 self.n_regs = n_regs
315 self.n_int_alus = n_int_alus
316
317 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
318 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
319 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
320
321 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
322 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
323
324 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
325 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
326 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
327
328 self.readable_o = Signal(n_int_alus, reset_less=True)
329 self.writable_o = Signal(n_int_alus, reset_less=True)
330
331 self.go_rd_i = Signal(n_int_alus, reset_less=True)
332 self.go_wr_i = Signal(n_int_alus, reset_less=True)
333 self.go_die_i = Signal(n_int_alus, reset_less=True)
334 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
335
336 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
337
338 def elaborate(self, platform):
339 m = Module()
340 comb = m.d.comb
341 sync = m.d.sync
342
343 n_intfus = self.n_int_alus
344
345 # Integer FU-FU Dep Matrix
346 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
347 m.submodules.intfudeps = intfudeps
348 # Integer FU-Reg Dep Matrix
349 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
350 m.submodules.intregdeps = intregdeps
351
352 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
353 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
354
355 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
356 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
357
358 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
359 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
360 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
361
362 comb += intfudeps.issue_i.eq(self.fn_issue_i)
363 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
364 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
365 comb += intfudeps.go_die_i.eq(self.go_die_i)
366 comb += self.readable_o.eq(intfudeps.readable_o)
367 comb += self.writable_o.eq(intfudeps.writable_o)
368
369 # Connect function issue / arrays, and dest/src1/src2
370 comb += intregdeps.dest_i.eq(self.dest_i)
371 comb += intregdeps.src_i[0].eq(self.src1_i)
372 comb += intregdeps.src_i[1].eq(self.src2_i)
373
374 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
375 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
376 comb += intregdeps.go_die_i.eq(self.go_die_i)
377 comb += intregdeps.issue_i.eq(self.fn_issue_i)
378
379 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
380 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
381 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
382
383 return m
384
385
386 class Scoreboard(Elaboratable):
387 def __init__(self, rwid, n_regs):
388 """ Inputs:
389
390 * :rwid: bit width of register file(s) - both FP and INT
391 * :n_regs: depth of register file(s) - number of FP and INT regs
392 """
393 self.rwid = rwid
394 self.n_regs = n_regs
395
396 # Register Files
397 self.intregs = RegFileArray(rwid, n_regs)
398 self.fpregs = RegFileArray(rwid, n_regs)
399
400 # Memory (test for now)
401 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
402
403 # issue q needs to get at these
404 self.aluissue = IssueUnitGroup(2)
405 self.lsissue = IssueUnitGroup(2)
406 self.brissue = IssueUnitGroup(1)
407 # and these
408 self.alu_oper_i = Signal(4, reset_less=True)
409 self.alu_imm_i = Signal(rwid, reset_less=True)
410 self.br_oper_i = Signal(4, reset_less=True)
411 self.br_imm_i = Signal(rwid, reset_less=True)
412 self.ls_oper_i = Signal(4, reset_less=True)
413 self.ls_imm_i = Signal(rwid, reset_less=True)
414
415 # inputs
416 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
417 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
418 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
419 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
420
421 # outputs
422 self.issue_o = Signal(reset_less=True) # instruction was accepted
423 self.busy_o = Signal(reset_less=True) # at least one CU is busy
424
425 # for branch speculation experiment. branch_direction = 0 if
426 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
427 # branch_succ and branch_fail are requests to have the current
428 # instruction be dependent on the branch unit "shadow" capability.
429 self.branch_succ_i = Signal(reset_less=True)
430 self.branch_fail_i = Signal(reset_less=True)
431 self.branch_direction_o = Signal(2, reset_less=True)
432
433 def elaborate(self, platform):
434 m = Module()
435 comb = m.d.comb
436 sync = m.d.sync
437
438 m.submodules.intregs = self.intregs
439 m.submodules.fpregs = self.fpregs
440 m.submodules.mem = mem = self.mem
441
442 # register ports
443 int_dest = self.intregs.write_port("dest")
444 int_src1 = self.intregs.read_port("src1")
445 int_src2 = self.intregs.read_port("src2")
446
447 fp_dest = self.fpregs.write_port("dest")
448 fp_src1 = self.fpregs.read_port("src1")
449 fp_src2 = self.fpregs.read_port("src2")
450
451 # Int ALUs and BR ALUs
452 n_int_alus = 5
453 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
454 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
455
456 # LDST Comp Units
457 n_ldsts = 2
458 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
459
460 # Comp Units
461 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
462 bgt = cub.bgt # get at the branch computation unit
463 br1 = cub.br1
464
465 # Int FUs
466 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
467
468 # Memory FUs
469 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
470
471 # Memory Priority Picker 1: one gateway per memory port
472 mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
473 m.submodules.mempick1 = mempick1
474
475 # Count of number of FUs
476 n_intfus = n_int_alus
477 n_fp_fus = 0 # for now
478
479 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
480 intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
481 m.submodules.intpick1 = intpick1
482
483 # INT/FP Issue Unit
484 regdecode = RegDecode(self.n_regs)
485 m.submodules.regdecode = regdecode
486 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
487 m.submodules.issueunit = issueunit
488
489 # Shadow Matrix. currently n_intfus shadows, to be used for
490 # write-after-write hazards. NOTE: there is one extra for branches,
491 # so the shadow width is increased by 1
492 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
493 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
494
495 # record previous instruction to cast shadow on current instruction
496 prev_shadow = Signal(n_intfus)
497
498 # Branch Speculation recorder. tracks the success/fail state as
499 # each instruction is issued, so that when the branch occurs the
500 # allow/cancel can be issued as appropriate.
501 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
502
503 #---------
504 # ok start wiring things together...
505 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
506 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
507 #---------
508
509 #---------
510 # Issue Unit is where it starts. set up some in/outs for this module
511 #---------
512 comb += [ regdecode.dest_i.eq(self.int_dest_i),
513 regdecode.src1_i.eq(self.int_src1_i),
514 regdecode.src2_i.eq(self.int_src2_i),
515 regdecode.enable_i.eq(self.reg_enable_i),
516 self.issue_o.eq(issueunit.issue_o)
517 ]
518
519 # take these to outside (issue needs them)
520 comb += cua.oper_i.eq(self.alu_oper_i)
521 comb += cua.imm_i.eq(self.alu_imm_i)
522 comb += cub.oper_i.eq(self.br_oper_i)
523 comb += cub.imm_i.eq(self.br_imm_i)
524 comb += cul.oper_i.eq(self.ls_oper_i)
525 comb += cul.imm_i.eq(self.ls_imm_i)
526
527 # TODO: issueunit.f (FP)
528
529 # and int function issue / busy arrays, and dest/src1/src2
530 comb += intfus.dest_i.eq(regdecode.dest_o)
531 comb += intfus.src1_i.eq(regdecode.src1_o)
532 comb += intfus.src2_i.eq(regdecode.src2_o)
533
534 fn_issue_o = issueunit.fn_issue_o
535
536 comb += intfus.fn_issue_i.eq(fn_issue_o)
537 comb += issueunit.busy_i.eq(cu.busy_o)
538 comb += self.busy_o.eq(cu.busy_o.bool())
539
540 #---------
541 # Memory Function Unit
542 #---------
543 reset_b = Signal(cul.n_units, reset_less=True)
544 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
545
546 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
547 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
548 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
549
550 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
551 # in a transitive fashion). This cycle activates based on LDSTCompUnit
552 # issue_i. multi-issue gets a bit more complex but not a lot.
553 prior_ldsts = Signal(cul.n_units, reset_less=True)
554 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
555 with m.If(self.ls_oper_i[3]): # LD bit of operand
556 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
557 with m.If(self.ls_oper_i[2]): # ST bit of operand
558 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
559
560 # TODO: adr_rel_o needs to go into L1 Cache. for now,
561 # just immediately activate go_adr
562 comb += cul.go_ad_i.eq(cul.adr_rel_o)
563
564 # connect up address data
565 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
566 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
567
568 # connect loadable / storable to go_ld/go_st.
569 # XXX should only be done when the memory ld/st has actually happened!
570 go_st_i = Signal(cul.n_units, reset_less=True)
571 go_ld_i = Signal(cul.n_units, reset_less=True)
572 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
573 cul.adr_rel_o & cul.ld_o)
574 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
575 cul.sto_rel_o & cul.st_o)
576 comb += memfus.go_ld_i.eq(go_ld_i)
577 comb += memfus.go_st_i.eq(go_st_i)
578 #comb += cul.go_wr_i.eq(go_ld_i)
579 comb += cul.go_st_i.eq(go_st_i)
580
581 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
582 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
583 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
584
585 #---------
586 # merge shadow matrices outputs
587 #---------
588
589 # these are explained in ShadowMatrix docstring, and are to be
590 # connected to the FUReg and FUFU Matrices, to get them to reset
591 anydie = Signal(n_intfus, reset_less=True)
592 allshadown = Signal(n_intfus, reset_less=True)
593 shreset = Signal(n_intfus, reset_less=True)
594 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
595 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
596 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
597
598 #---------
599 # connect fu-fu matrix
600 #---------
601
602 # Group Picker... done manually for now.
603 go_rd_o = intpick1.go_rd_o
604 go_wr_o = intpick1.go_wr_o
605 go_rd_i = intfus.go_rd_i
606 go_wr_i = intfus.go_wr_i
607 go_die_i = intfus.go_die_i
608 # NOTE: connect to the shadowed versions so that they can "die" (reset)
609 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
610 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
611 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
612
613 # Connect Picker
614 #---------
615 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
616 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.done_o[0:n_intfus])
617 int_rd_o = intfus.readable_o
618 int_wr_o = intfus.writable_o
619 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
620 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
621
622 #---------
623 # Shadow Matrix
624 #---------
625
626 comb += shadows.issue_i.eq(fn_issue_o)
627 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
628 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
629 #---------
630 # NOTE; this setup is for the instruction order preservation...
631
632 # connect shadows / go_dies to Computation Units
633 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
634 comb += cu.go_die_i[0:n_intfus].eq(anydie)
635
636 # ok connect first n_int_fu shadows to busy lines, to create an
637 # instruction-order linked-list-like arrangement, using a bit-matrix
638 # (instead of e.g. a ring buffer).
639
640 # when written, the shadow can be cancelled (and was good)
641 for i in range(n_intfus):
642 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
643
644 # *previous* instruction shadows *current* instruction, and, obviously,
645 # if the previous is completed (!busy) don't cast the shadow!
646 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
647 for i in range(n_intfus):
648 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
649
650 #---------
651 # ... and this is for branch speculation. it uses the extra bit
652 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
653 # only needs to set shadow_i, s_fail_i and s_good_i
654
655 # issue captures shadow_i (if enabled)
656 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
657
658 bactive = Signal(reset_less=True)
659 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
660
661 # instruction being issued (fn_issue_o) has a shadow cast by the branch
662 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
663 comb += bshadow.issue_i.eq(fn_issue_o)
664 for i in range(n_intfus):
665 with m.If(fn_issue_o & (Const(1<<i))):
666 comb += bshadow.shadow_i[i][0].eq(1)
667
668 # finally, we need an indicator to the test infrastructure as to
669 # whether the branch succeeded or failed, plus, link up to the
670 # "recorder" of whether the instruction was under shadow or not
671
672 with m.If(br1.issue_i):
673 sync += bspec.active_i.eq(1)
674 with m.If(self.branch_succ_i):
675 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
676 with m.If(self.branch_fail_i):
677 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
678
679 # branch is active (TODO: a better signal: this is over-using the
680 # go_write signal - actually the branch should not be "writing")
681 with m.If(br1.go_wr_i):
682 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
683 sync += bspec.active_i.eq(0)
684 comb += bspec.br_i.eq(1)
685 # branch occurs if data == 1, failed if data == 0
686 comb += bspec.br_ok_i.eq(br1.data_o == 1)
687 for i in range(n_intfus):
688 # *expected* direction of the branch matched against *actual*
689 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
690 # ... or it didn't
691 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
692
693 #---------
694 # Connect Register File(s)
695 #---------
696 comb += int_dest.wen.eq(intfus.dest_rsel_o)
697 comb += int_src1.ren.eq(intfus.src1_rsel_o)
698 comb += int_src2.ren.eq(intfus.src2_rsel_o)
699
700 # connect ALUs to regfule
701 comb += int_dest.data_i.eq(cu.data_o)
702 comb += cu.src1_i.eq(int_src1.data_o)
703 comb += cu.src2_i.eq(int_src2.data_o)
704
705 # connect ALU Computation Units
706 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
707 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
708 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
709
710 return m
711
712 def __iter__(self):
713 yield from self.intregs
714 yield from self.fpregs
715 yield self.int_dest_i
716 yield self.int_src1_i
717 yield self.int_src2_i
718 yield self.issue_o
719 yield self.branch_succ_i
720 yield self.branch_fail_i
721 yield self.branch_direction_o
722
723 def ports(self):
724 return list(self)
725
726
727 class IssueToScoreboard(Elaboratable):
728
729 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
730 self.qlen = qlen
731 self.n_in = n_in
732 self.n_out = n_out
733 self.rwid = rwid
734 self.opw = opwid
735 self.n_regs = n_regs
736
737 mqbits = unsigned(int(log(qlen) / log(2))+2)
738 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
739 self.p_ready_o = Signal() # instructions were added
740 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
741
742 self.busy_o = Signal(reset_less=True) # at least one CU is busy
743 self.qlen_o = Signal(mqbits, reset_less=True)
744
745 def elaborate(self, platform):
746 m = Module()
747 comb = m.d.comb
748 sync = m.d.sync
749
750 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
751 sc = Scoreboard(self.rwid, self.n_regs)
752 m.submodules.iq = iq
753 m.submodules.sc = sc
754
755 # get at the regfile for testing
756 self.intregs = sc.intregs
757
758 # and the "busy" signal and instruction queue length
759 comb += self.busy_o.eq(sc.busy_o)
760 comb += self.qlen_o.eq(iq.qlen_o)
761
762 # link up instruction queue
763 comb += iq.p_add_i.eq(self.p_add_i)
764 comb += self.p_ready_o.eq(iq.p_ready_o)
765 for i in range(self.n_in):
766 comb += eq(iq.data_i[i], self.data_i[i])
767
768 # take instruction and process it. note that it's possible to
769 # "inspect" the queue contents *without* actually removing the
770 # items. items are only removed when the
771
772 # in "waiting" state
773 wait_issue_br = Signal()
774 wait_issue_alu = Signal()
775 wait_issue_ls = Signal()
776
777 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
778 # set instruction pop length to 1 if the unit accepted
779 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
780 with m.If(iq.qlen_o != 0):
781 comb += iq.n_sub_i.eq(1)
782 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
783 with m.If(iq.qlen_o != 0):
784 comb += iq.n_sub_i.eq(1)
785 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
786 with m.If(iq.qlen_o != 0):
787 comb += iq.n_sub_i.eq(1)
788
789 # see if some instruction(s) are here. note that this is
790 # "inspecting" the in-place queue. note also that on the
791 # cycle following "waiting" for fn_issue_o to be set, the
792 # "resetting" done above (insn_i=0) could be re-ASSERTed.
793 with m.If(iq.qlen_o != 0):
794 # get the operands and operation
795 imm = iq.data_o[0].imm_i
796 dest = iq.data_o[0].dest_i
797 src1 = iq.data_o[0].src1_i
798 src2 = iq.data_o[0].src2_i
799 op = iq.data_o[0].oper_i
800 opi = iq.data_o[0].opim_i # immediate set
801
802 # set the src/dest regs
803 comb += sc.int_dest_i.eq(dest)
804 comb += sc.int_src1_i.eq(src1)
805 comb += sc.int_src2_i.eq(src2)
806 comb += sc.reg_enable_i.eq(1) # enable the regfile
807
808 # choose a Function-Unit-Group
809 with m.If((op & (0x3<<2)) != 0): # branch
810 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
811 comb += sc.br_imm_i.eq(imm)
812 comb += sc.brissue.insn_i.eq(1)
813 comb += wait_issue_br.eq(1)
814 with m.Elif((op & (0x3<<4)) != 0): # ld/st
815 # see compldst.py
816 # bit 0: ADD/SUB
817 # bit 1: immed
818 # bit 4: LD
819 # bit 5: ST
820 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
821 comb += sc.ls_imm_i.eq(imm)
822 comb += sc.lsissue.insn_i.eq(1)
823 comb += wait_issue_ls.eq(1)
824 with m.Else(): # alu
825 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
826 comb += sc.alu_imm_i.eq(imm)
827 comb += sc.aluissue.insn_i.eq(1)
828 comb += wait_issue_alu.eq(1)
829
830 # XXX TODO
831 # these indicate that the instruction is to be made
832 # shadow-dependent on
833 # (either) branch success or branch fail
834 #yield sc.branch_fail_i.eq(branch_fail)
835 #yield sc.branch_succ_i.eq(branch_success)
836
837 return m
838
839 def __iter__(self):
840 yield self.p_ready_o
841 for o in self.data_i:
842 yield from list(o)
843 yield self.p_add_i
844
845 def ports(self):
846 return list(self)
847
848
849 IADD = 0
850 ISUB = 1
851 IMUL = 2
852 ISHF = 3
853 IBGT = 4
854 IBLT = 5
855 IBEQ = 6
856 IBNE = 7
857
858
859 class RegSim:
860 def __init__(self, rwidth, nregs):
861 self.rwidth = rwidth
862 self.regs = [0] * nregs
863
864 def op(self, op, op_imm, imm, src1, src2, dest):
865 maxbits = (1 << self.rwidth) - 1
866 src1 = self.regs[src1] & maxbits
867 if op_imm:
868 src2 = imm
869 else:
870 src2 = self.regs[src2] & maxbits
871 if op == IADD:
872 val = src1 + src2
873 elif op == ISUB:
874 val = src1 - src2
875 elif op == IMUL:
876 val = src1 * src2
877 elif op == ISHF:
878 val = src1 >> (src2 & maxbits)
879 elif op == IBGT:
880 val = int(src1 > src2)
881 elif op == IBLT:
882 val = int(src1 < src2)
883 elif op == IBEQ:
884 val = int(src1 == src2)
885 elif op == IBNE:
886 val = int(src1 != src2)
887 else:
888 return 0 # LD/ST TODO
889 val &= maxbits
890 self.setval(dest, val)
891 return val
892
893 def setval(self, dest, val):
894 print ("sim setval", dest, hex(val))
895 self.regs[dest] = val
896
897 def dump(self, dut):
898 for i, val in enumerate(self.regs):
899 reg = yield dut.intregs.regs[i].reg
900 okstr = "OK" if reg == val else "!ok"
901 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
902
903 def check(self, dut):
904 for i, val in enumerate(self.regs):
905 reg = yield dut.intregs.regs[i].reg
906 if reg != val:
907 print("reg %d expected %x received %x\n" % (i, val, reg))
908 yield from self.dump(dut)
909 assert False
910
911 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
912 branch_success, branch_fail):
913 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
914 'src1_i': src1, 'src2_i': src2}]
915
916 sendlen = 1
917 for idx in range(sendlen):
918 yield from eq(dut.data_i[idx], instrs[idx])
919 di = yield dut.data_i[idx]
920 print ("senddata %d %x" % (idx, di))
921 yield dut.p_add_i.eq(sendlen)
922 yield
923 o_p_ready = yield dut.p_ready_o
924 while not o_p_ready:
925 yield
926 o_p_ready = yield dut.p_ready_o
927
928 yield dut.p_add_i.eq(0)
929
930
931 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
932 yield from disable_issue(dut)
933 yield dut.int_dest_i.eq(dest)
934 yield dut.int_src1_i.eq(src1)
935 yield dut.int_src2_i.eq(src2)
936 if (op & (0x3<<2)) != 0: # branch
937 yield dut.brissue.insn_i.eq(1)
938 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
939 yield dut.br_imm_i.eq(imm)
940 dut_issue = dut.brissue
941 else:
942 yield dut.aluissue.insn_i.eq(1)
943 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
944 yield dut.alu_imm_i.eq(imm)
945 dut_issue = dut.aluissue
946 yield dut.reg_enable_i.eq(1)
947
948 # these indicate that the instruction is to be made shadow-dependent on
949 # (either) branch success or branch fail
950 yield dut.branch_fail_i.eq(branch_fail)
951 yield dut.branch_succ_i.eq(branch_success)
952
953 yield
954 yield from wait_for_issue(dut, dut_issue)
955
956
957 def print_reg(dut, rnums):
958 rs = []
959 for rnum in rnums:
960 reg = yield dut.intregs.regs[rnum].reg
961 rs.append("%x" % reg)
962 rnums = map(str, rnums)
963 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
964
965
966 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
967 insts = []
968 for i in range(n_ops):
969 src1 = randint(1, dut.n_regs-1)
970 src2 = randint(1, dut.n_regs-1)
971 imm = randint(1, (1<<dut.rwid)-1)
972 dest = randint(1, dut.n_regs-1)
973 op = randint(0, max_opnums)
974 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
975
976 if shadowing:
977 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
978 else:
979 insts.append((src1, src2, dest, op, opi, imm))
980 return insts
981
982
983 def wait_for_busy_clear(dut):
984 while True:
985 busy_o = yield dut.busy_o
986 if not busy_o:
987 break
988 print ("busy",)
989 yield
990
991 def disable_issue(dut):
992 yield dut.aluissue.insn_i.eq(0)
993 yield dut.brissue.insn_i.eq(0)
994 yield dut.lsissue.insn_i.eq(0)
995
996
997 def wait_for_issue(dut, dut_issue):
998 while True:
999 issue_o = yield dut_issue.fn_issue_o
1000 if issue_o:
1001 yield from disable_issue(dut)
1002 yield dut.reg_enable_i.eq(0)
1003 break
1004 print ("busy",)
1005 #yield from print_reg(dut, [1,2,3])
1006 yield
1007 #yield from print_reg(dut, [1,2,3])
1008
1009 def scoreboard_branch_sim(dut, alusim):
1010
1011 iseed = 3
1012
1013 for i in range(1):
1014
1015 print ("rseed", iseed)
1016 seed(iseed)
1017 iseed += 1
1018
1019 yield dut.branch_direction_o.eq(0)
1020
1021 # set random values in the registers
1022 for i in range(1, dut.n_regs):
1023 val = 31+i*3
1024 val = randint(0, (1<<alusim.rwidth)-1)
1025 yield dut.intregs.regs[i].reg.eq(val)
1026 alusim.setval(i, val)
1027
1028 if False:
1029 # create some instructions: branches create a tree
1030 insts = create_random_ops(dut, 1, True, 1)
1031 #insts.append((6, 6, 1, 2, (0, 0)))
1032 #insts.append((4, 3, 3, 0, (0, 0)))
1033
1034 src1 = randint(1, dut.n_regs-1)
1035 src2 = randint(1, dut.n_regs-1)
1036 #op = randint(4, 7)
1037 op = 4 # only BGT at the moment
1038
1039 branch_ok = create_random_ops(dut, 1, True, 1)
1040 branch_fail = create_random_ops(dut, 1, True, 1)
1041
1042 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1043
1044 if True:
1045 insts = []
1046 insts.append( (3, 5, 2, 0, (0, 0)) )
1047 branch_ok = []
1048 branch_fail = []
1049 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1050 branch_ok.append( None )
1051 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1052 #branch_fail.append( None )
1053 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1054
1055 siminsts = deepcopy(insts)
1056
1057 # issue instruction(s)
1058 i = -1
1059 instrs = insts
1060 branch_direction = 0
1061 while instrs:
1062 yield
1063 yield
1064 i += 1
1065 branch_direction = yield dut.branch_direction_o # way branch went
1066 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1067 if branch_direction == 1 and shadow_on:
1068 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1069 continue # branch was "success" and this is a "failed"... skip
1070 if branch_direction == 2 and shadow_off:
1071 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1072 continue # branch was "fail" and this is a "success"... skip
1073 if branch_direction != 0:
1074 shadow_on = 0
1075 shadow_off = 0
1076 is_branch = op >= 4
1077 if is_branch:
1078 branch_ok, branch_fail = dest
1079 dest = src2
1080 # ok zip up the branch success / fail instructions and
1081 # drop them into the queue, one marked "to have branch success"
1082 # the other to be marked shadow branch "fail".
1083 # one out of each of these will be cancelled
1084 for ok, fl in zip(branch_ok, branch_fail):
1085 if ok:
1086 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1087 if fl:
1088 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1089 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1090 (i, src1, src2, dest, op, shadow_on, shadow_off))
1091 yield from int_instr(dut, op, src1, src2, dest,
1092 shadow_on, shadow_off)
1093
1094 # wait for all instructions to stop before checking
1095 yield
1096 yield from wait_for_busy_clear(dut)
1097
1098 i = -1
1099 while siminsts:
1100 instr = siminsts.pop(0)
1101 if instr is None:
1102 continue
1103 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1104 i += 1
1105 is_branch = op >= 4
1106 if is_branch:
1107 branch_ok, branch_fail = dest
1108 dest = src2
1109 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1110 (i, src1, src2, dest, op, shadow_on, shadow_off))
1111 branch_res = alusim.op(op, src1, src2, dest)
1112 if is_branch:
1113 if branch_res:
1114 siminsts += branch_ok
1115 else:
1116 siminsts += branch_fail
1117
1118 # check status
1119 yield from alusim.check(dut)
1120 yield from alusim.dump(dut)
1121
1122
1123 def scoreboard_sim(dut, alusim):
1124
1125 seed(0)
1126
1127 for i in range(1):
1128
1129 # set random values in the registers
1130 for i in range(1, dut.n_regs):
1131 #val = randint(0, (1<<alusim.rwidth)-1)
1132 #val = 31+i*3
1133 val = i
1134 yield dut.intregs.regs[i].reg.eq(val)
1135 alusim.setval(i, val)
1136
1137 # create some instructions (some random, some regression tests)
1138 instrs = []
1139 if False:
1140 instrs = create_random_ops(dut, 15, True, 4)
1141
1142 if True: # LD/ST test (with immediate)
1143 instrs.append( (1, 2, 0, 0x20, 1, 1, (0, 0)) ) # LD
1144 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1145
1146 if True:
1147 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1148
1149 if True:
1150 instrs.append( (7, 3, 2, 4, 0, 0, (0, 0)) )
1151 instrs.append( (7, 6, 6, 2, 0, 0, (0, 0)) )
1152 instrs.append( (1, 7, 2, 2, 0, 0, (0, 0)) )
1153
1154 if True:
1155 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1156 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1157 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1158 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1159 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1160
1161 if False:
1162 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1163 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1164 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1165
1166 if False:
1167 instrs.append((5, 6, 2, 1))
1168 instrs.append((2, 2, 4, 0))
1169 #instrs.append((2, 2, 3, 1))
1170
1171 if False:
1172 instrs.append((2, 1, 2, 3))
1173
1174 if False:
1175 instrs.append((2, 6, 2, 1))
1176 instrs.append((2, 1, 2, 0))
1177
1178 if False:
1179 instrs.append((1, 2, 7, 2))
1180 instrs.append((7, 1, 5, 0))
1181 instrs.append((4, 4, 1, 1))
1182
1183 if False:
1184 instrs.append((5, 6, 2, 2))
1185 instrs.append((1, 1, 4, 1))
1186 instrs.append((6, 5, 3, 0))
1187
1188 if False:
1189 # Write-after-Write Hazard
1190 instrs.append( (3, 6, 7, 2) )
1191 instrs.append( (4, 4, 7, 1) )
1192
1193 if False:
1194 # self-read/write-after-write followed by Read-after-Write
1195 instrs.append((1, 1, 1, 1))
1196 instrs.append((1, 5, 3, 0))
1197
1198 if False:
1199 # Read-after-Write followed by self-read-after-write
1200 instrs.append((5, 6, 1, 2))
1201 instrs.append((1, 1, 1, 1))
1202
1203 if False:
1204 # self-read-write sandwich
1205 instrs.append((5, 6, 1, 2))
1206 instrs.append((1, 1, 1, 1))
1207 instrs.append((1, 5, 3, 0))
1208
1209 if False:
1210 # very weird failure
1211 instrs.append( (5, 2, 5, 2) )
1212 instrs.append( (2, 6, 3, 0) )
1213 instrs.append( (4, 2, 2, 1) )
1214
1215 if False:
1216 v1 = 4
1217 yield dut.intregs.regs[5].reg.eq(v1)
1218 alusim.setval(5, v1)
1219 yield dut.intregs.regs[3].reg.eq(5)
1220 alusim.setval(3, 5)
1221 instrs.append((5, 3, 3, 4, (0, 0)))
1222 instrs.append((4, 2, 1, 2, (0, 1)))
1223
1224 if False:
1225 v1 = 6
1226 yield dut.intregs.regs[5].reg.eq(v1)
1227 alusim.setval(5, v1)
1228 yield dut.intregs.regs[3].reg.eq(5)
1229 alusim.setval(3, 5)
1230 instrs.append((5, 3, 3, 4, (0, 0)))
1231 instrs.append((4, 2, 1, 2, (1, 0)))
1232
1233 if False:
1234 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1235 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1236 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1237 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1238 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1239 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1240 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1241 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1242 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1243
1244 # issue instruction(s), wait for issue to be free before proceeding
1245 for i, instr in enumerate(instrs):
1246 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1247
1248 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1249 (i, src1, src2, dest, op, opi, imm))
1250 alusim.op(op, opi, imm, src1, src2, dest)
1251 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1252 br_ok, br_fail)
1253
1254 # wait for all instructions to stop before checking
1255 while True:
1256 iqlen = yield dut.qlen_o
1257 if iqlen == 0:
1258 break
1259 yield
1260 yield
1261 yield
1262 yield
1263 yield
1264 yield from wait_for_busy_clear(dut)
1265
1266 # check status
1267 yield from alusim.check(dut)
1268 yield from alusim.dump(dut)
1269
1270
1271 def test_scoreboard():
1272 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1273 alusim = RegSim(16, 8)
1274 memsim = MemSim(16, 8)
1275 vl = rtlil.convert(dut, ports=dut.ports())
1276 with open("test_scoreboard6600.il", "w") as f:
1277 f.write(vl)
1278
1279 run_simulation(dut, scoreboard_sim(dut, alusim),
1280 vcd_name='test_scoreboard6600.vcd')
1281
1282 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1283 # vcd_name='test_scoreboard6600.vcd')
1284
1285
1286 if __name__ == '__main__':
1287 test_scoreboard()