209bc99c28e8dae05a8e4003c697aac9fbb385a1
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5
6 from regfile.regfile import RegFileArray, treereduce
7 from scoreboard.fu_fu_matrix import FUFUDepMatrix
8 from scoreboard.fu_reg_matrix import FURegDepMatrix
9 from scoreboard.global_pending import GlobalPending
10 from scoreboard.group_picker import GroupPicker
11 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
12 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
13 from scoreboard.instruction_q import Instruction, InstructionQ
14 from scoreboard.memfu import MemFunctionUnits
15
16 from compalu import ComputationUnitNoDelay
17 from compldst import LDSTCompUnit
18
19 from alu_hier import ALU, BranchALU
20 from nmutil.latch import SRLatch
21 from nmutil.nmoperator import eq
22
23 from random import randint, seed
24 from copy import deepcopy
25 from math import log
26
27
28 class TestMemory(Elaboratable):
29 def __init__(self, regwid, addrw):
30 self.ddepth = 1 # regwid //8
31 depth = (1<<addrw) // self.ddepth
32 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
33
34 def elaborate(self, platform):
35 m = Module()
36 m.submodules.rdport = self.rdport = self.mem.read_port()
37 m.submodules.wrport = self.wrport = self.mem.write_port()
38 return m
39
40
41 class MemSim:
42 def __init__(self, regwid, addrw):
43 self.regwid = regwid
44 self.ddepth = 1 # regwid//8
45 depth = (1<<addrw) // self.ddepth
46 self.mem = list(range(0, depth))
47
48 def ld(self, addr):
49 return self.mem[addr>>self.ddepth]
50
51 def st(self, addr, data):
52 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
53
54
55 class CompUnitsBase(Elaboratable):
56 """ Computation Unit Base class.
57
58 Amazingly, this class works recursively. It's supposed to just
59 look after some ALUs (that can handle the same operations),
60 grouping them together, however it turns out that the same code
61 can also group *groups* of Computation Units together as well.
62
63 Basically it was intended just to concatenate the ALU's issue,
64 go_rd etc. signals together, which start out as bits and become
65 sequences. Turns out that the same trick works just as well
66 on Computation Units!
67
68 So this class may be used recursively to present a top-level
69 sequential concatenation of all the signals in and out of
70 ALUs, whilst at the same time making it convenient to group
71 ALUs together.
72
73 At the lower level, the intent is that groups of (identical)
74 ALUs may be passed the same operation. Even beyond that,
75 the intent is that that group of (identical) ALUs actually
76 share the *same pipeline* and as such become a "Concurrent
77 Computation Unit" as defined by Mitch Alsup (see section
78 11.4.9.3)
79 """
80 def __init__(self, rwid, units, ldstmode=False):
81 """ Inputs:
82
83 * :rwid: bit width of register file(s) - both FP and INT
84 * :units: sequence of ALUs (or CompUnitsBase derivatives)
85 """
86 self.units = units
87 self.ldstmode = ldstmode
88 self.rwid = rwid
89 self.rwid = rwid
90 if units and isinstance(units[0], CompUnitsBase):
91 self.n_units = 0
92 for u in self.units:
93 self.n_units += u.n_units
94 else:
95 self.n_units = len(units)
96
97 n_units = self.n_units
98
99 # inputs
100 self.issue_i = Signal(n_units, reset_less=True)
101 self.go_rd_i = Signal(n_units, reset_less=True)
102 self.go_wr_i = Signal(n_units, reset_less=True)
103 self.shadown_i = Signal(n_units, reset_less=True)
104 self.go_die_i = Signal(n_units, reset_less=True)
105 if ldstmode:
106 self.go_ad_i = Signal(n_units, reset_less=True)
107 self.go_st_i = Signal(n_units, reset_less=True)
108
109 # outputs
110 self.busy_o = Signal(n_units, reset_less=True)
111 self.rd_rel_o = Signal(n_units, reset_less=True)
112 self.req_rel_o = Signal(n_units, reset_less=True)
113 if ldstmode:
114 self.ld_o = Signal(n_units, reset_less=True) # op is LD
115 self.st_o = Signal(n_units, reset_less=True) # op is ST
116 self.adr_rel_o = Signal(n_units, reset_less=True)
117 self.sto_rel_o = Signal(n_units, reset_less=True)
118 self.req_rel_o = Signal(n_units, reset_less=True)
119 self.load_mem_o = Signal(n_units, reset_less=True)
120 self.stwd_mem_o = Signal(n_units, reset_less=True)
121 self.addr_o = Signal(rwid, reset_less=True)
122
123 # in/out register data (note: not register#, actual data)
124 self.data_o = Signal(rwid, reset_less=True)
125 self.src1_i = Signal(rwid, reset_less=True)
126 self.src2_i = Signal(rwid, reset_less=True)
127 # input operand
128
129 def elaborate(self, platform):
130 m = Module()
131 comb = m.d.comb
132
133 for i, alu in enumerate(self.units):
134 setattr(m.submodules, "comp%d" % i, alu)
135
136 go_rd_l = []
137 go_wr_l = []
138 issue_l = []
139 busy_l = []
140 req_rel_l = []
141 rd_rel_l = []
142 shadow_l = []
143 godie_l = []
144 for alu in self.units:
145 req_rel_l.append(alu.req_rel_o)
146 rd_rel_l.append(alu.rd_rel_o)
147 shadow_l.append(alu.shadown_i)
148 godie_l.append(alu.go_die_i)
149 go_wr_l.append(alu.go_wr_i)
150 go_rd_l.append(alu.go_rd_i)
151 issue_l.append(alu.issue_i)
152 busy_l.append(alu.busy_o)
153 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
154 comb += self.req_rel_o.eq(Cat(*req_rel_l))
155 comb += self.busy_o.eq(Cat(*busy_l))
156 comb += Cat(*godie_l).eq(self.go_die_i)
157 comb += Cat(*shadow_l).eq(self.shadown_i)
158 comb += Cat(*go_wr_l).eq(self.go_wr_i)
159 comb += Cat(*go_rd_l).eq(self.go_rd_i)
160 comb += Cat(*issue_l).eq(self.issue_i)
161
162 # connect data register input/output
163
164 # merge (OR) all integer FU / ALU outputs to a single value
165 if self.units:
166 data_o = treereduce(self.units, "data_o")
167 comb += self.data_o.eq(data_o)
168 if self.ldstmode:
169 addr_o = treereduce(self.units, "addr_o")
170 comb += self.addr_o.eq(addr_o)
171
172 for i, alu in enumerate(self.units):
173 comb += alu.src1_i.eq(self.src1_i)
174 comb += alu.src2_i.eq(self.src2_i)
175
176 if not self.ldstmode:
177 return m
178
179 ldmem_l = []
180 stmem_l = []
181 go_ad_l = []
182 go_st_l = []
183 ld_l = []
184 st_l = []
185 adr_rel_l = []
186 sto_rel_l = []
187 for alu in self.units:
188 ld_l.append(alu.ld_o)
189 st_l.append(alu.st_o)
190 adr_rel_l.append(alu.adr_rel_o)
191 sto_rel_l.append(alu.sto_rel_o)
192 ldmem_l.append(alu.load_mem_o)
193 stmem_l.append(alu.stwd_mem_o)
194 go_ad_l.append(alu.go_ad_i)
195 go_st_l.append(alu.go_st_i)
196 comb += self.ld_o.eq(Cat(*ld_l))
197 comb += self.st_o.eq(Cat(*st_l))
198 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
199 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
200 comb += self.load_mem_o.eq(Cat(*ldmem_l))
201 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
202 comb += Cat(*go_ad_l).eq(self.go_ad_i)
203 comb += Cat(*go_st_l).eq(self.go_st_i)
204
205 return m
206
207
208 class CompUnitLDSTs(CompUnitsBase):
209
210 def __init__(self, rwid, opwid, n_ldsts, mem):
211 """ Inputs:
212
213 * :rwid: bit width of register file(s) - both FP and INT
214 * :opwid: operand bit width
215 """
216 self.opwid = opwid
217
218 # inputs
219 self.oper_i = Signal(opwid, reset_less=True)
220 self.imm_i = Signal(rwid, reset_less=True)
221
222 # Int ALUs
223 self.alus = []
224 for i in range(n_ldsts):
225 self.alus.append(ALU(rwid))
226
227 units = []
228 for alu in self.alus:
229 aluopwid = 4 # see compldst.py for "internal" opcode
230 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
231
232 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
233
234 def elaborate(self, platform):
235 m = CompUnitsBase.elaborate(self, platform)
236 comb = m.d.comb
237
238 # hand the same operation to all units, 4 lower bits though
239 for alu in self.units:
240 comb += alu.oper_i[0:4].eq(self.oper_i)
241 comb += alu.imm_i.eq(self.imm_i)
242 comb += alu.isalu_i.eq(0)
243
244 return m
245
246
247 class CompUnitALUs(CompUnitsBase):
248
249 def __init__(self, rwid, opwid, n_alus):
250 """ Inputs:
251
252 * :rwid: bit width of register file(s) - both FP and INT
253 * :opwid: operand bit width
254 """
255 self.opwid = opwid
256
257 # inputs
258 self.oper_i = Signal(opwid, reset_less=True)
259 self.imm_i = Signal(rwid, reset_less=True)
260
261 # Int ALUs
262 alus = []
263 for i in range(n_alus):
264 alus.append(ALU(rwid))
265
266 units = []
267 for alu in alus:
268 aluopwid = 3 # extra bit for immediate mode
269 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
270
271 CompUnitsBase.__init__(self, rwid, units)
272
273 def elaborate(self, platform):
274 m = CompUnitsBase.elaborate(self, platform)
275 comb = m.d.comb
276
277 # hand the same operation to all units, only lower 3 bits though
278 for alu in self.units:
279 comb += alu.oper_i[0:3].eq(self.oper_i)
280 comb += alu.imm_i.eq(self.imm_i)
281
282 return m
283
284
285 class CompUnitBR(CompUnitsBase):
286
287 def __init__(self, rwid, opwid):
288 """ Inputs:
289
290 * :rwid: bit width of register file(s) - both FP and INT
291 * :opwid: operand bit width
292
293 Note: bgt unit is returned so that a shadow unit can be created
294 for it
295 """
296 self.opwid = opwid
297
298 # inputs
299 self.oper_i = Signal(opwid, reset_less=True)
300 self.imm_i = Signal(rwid, reset_less=True)
301
302 # Branch ALU and CU
303 self.bgt = BranchALU(rwid)
304 aluopwid = 3 # extra bit for immediate mode
305 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
306 CompUnitsBase.__init__(self, rwid, [self.br1])
307
308 def elaborate(self, platform):
309 m = CompUnitsBase.elaborate(self, platform)
310 comb = m.d.comb
311
312 # hand the same operation to all units
313 for alu in self.units:
314 comb += alu.oper_i.eq(self.oper_i)
315 comb += alu.imm_i.eq(self.imm_i)
316
317 return m
318
319
320 class FunctionUnits(Elaboratable):
321
322 def __init__(self, n_regs, n_int_alus):
323 self.n_regs = n_regs
324 self.n_int_alus = n_int_alus
325
326 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
327 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
328 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
329
330 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
331 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
332
333 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
334 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
335 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
336
337 self.readable_o = Signal(n_int_alus, reset_less=True)
338 self.writable_o = Signal(n_int_alus, reset_less=True)
339
340 self.go_rd_i = Signal(n_int_alus, reset_less=True)
341 self.go_wr_i = Signal(n_int_alus, reset_less=True)
342 self.go_die_i = Signal(n_int_alus, reset_less=True)
343 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
344
345 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
346
347 def elaborate(self, platform):
348 m = Module()
349 comb = m.d.comb
350 sync = m.d.sync
351
352 n_intfus = self.n_int_alus
353
354 # Integer FU-FU Dep Matrix
355 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
356 m.submodules.intfudeps = intfudeps
357 # Integer FU-Reg Dep Matrix
358 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
359 m.submodules.intregdeps = intregdeps
360
361 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
362 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
363
364 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
365 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
366
367 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
368 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
369 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
370
371 comb += intfudeps.issue_i.eq(self.fn_issue_i)
372 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
373 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
374 comb += intfudeps.go_die_i.eq(self.go_die_i)
375 comb += self.readable_o.eq(intfudeps.readable_o)
376 comb += self.writable_o.eq(intfudeps.writable_o)
377
378 # Connect function issue / arrays, and dest/src1/src2
379 comb += intregdeps.dest_i.eq(self.dest_i)
380 comb += intregdeps.src_i[0].eq(self.src1_i)
381 comb += intregdeps.src_i[1].eq(self.src2_i)
382
383 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
384 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
385 comb += intregdeps.go_die_i.eq(self.go_die_i)
386 comb += intregdeps.issue_i.eq(self.fn_issue_i)
387
388 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
389 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
390 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
391
392 return m
393
394
395 class Scoreboard(Elaboratable):
396 def __init__(self, rwid, n_regs):
397 """ Inputs:
398
399 * :rwid: bit width of register file(s) - both FP and INT
400 * :n_regs: depth of register file(s) - number of FP and INT regs
401 """
402 self.rwid = rwid
403 self.n_regs = n_regs
404
405 # Register Files
406 self.intregs = RegFileArray(rwid, n_regs)
407 self.fpregs = RegFileArray(rwid, n_regs)
408
409 # Memory (test for now)
410 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
411
412 # issue q needs to get at these
413 self.aluissue = IssueUnitGroup(2)
414 self.lsissue = IssueUnitGroup(2)
415 self.brissue = IssueUnitGroup(1)
416 # and these
417 self.alu_oper_i = Signal(4, reset_less=True)
418 self.alu_imm_i = Signal(rwid, reset_less=True)
419 self.br_oper_i = Signal(4, reset_less=True)
420 self.br_imm_i = Signal(rwid, reset_less=True)
421 self.ls_oper_i = Signal(4, reset_less=True)
422 self.ls_imm_i = Signal(rwid, reset_less=True)
423
424 # inputs
425 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
426 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
427 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
428 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
429
430 # outputs
431 self.issue_o = Signal(reset_less=True) # instruction was accepted
432 self.busy_o = Signal(reset_less=True) # at least one CU is busy
433
434 # for branch speculation experiment. branch_direction = 0 if
435 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
436 # branch_succ and branch_fail are requests to have the current
437 # instruction be dependent on the branch unit "shadow" capability.
438 self.branch_succ_i = Signal(reset_less=True)
439 self.branch_fail_i = Signal(reset_less=True)
440 self.branch_direction_o = Signal(2, reset_less=True)
441
442 def elaborate(self, platform):
443 m = Module()
444 comb = m.d.comb
445 sync = m.d.sync
446
447 m.submodules.intregs = self.intregs
448 m.submodules.fpregs = self.fpregs
449 m.submodules.mem = mem = self.mem
450
451 # register ports
452 int_dest = self.intregs.write_port("dest")
453 int_src1 = self.intregs.read_port("src1")
454 int_src2 = self.intregs.read_port("src2")
455
456 fp_dest = self.fpregs.write_port("dest")
457 fp_src1 = self.fpregs.read_port("src1")
458 fp_src2 = self.fpregs.read_port("src2")
459
460 # Int ALUs and BR ALUs
461 n_int_alus = 5
462 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
463 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
464
465 # LDST Comp Units
466 n_ldsts = 2
467 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
468
469 # Comp Units
470 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
471 bgt = cub.bgt # get at the branch computation unit
472 br1 = cub.br1
473
474 # Int FUs
475 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
476
477 # Memory FUs
478 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
479
480 # Memory Priority Picker 1: one gateway per memory port
481 mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
482 m.submodules.mempick1 = mempick1
483
484 # Count of number of FUs
485 n_intfus = n_int_alus
486 n_fp_fus = 0 # for now
487
488 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
489 intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
490 m.submodules.intpick1 = intpick1
491
492 # INT/FP Issue Unit
493 regdecode = RegDecode(self.n_regs)
494 m.submodules.regdecode = regdecode
495 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
496 m.submodules.issueunit = issueunit
497
498 # Shadow Matrix. currently n_intfus shadows, to be used for
499 # write-after-write hazards. NOTE: there is one extra for branches,
500 # so the shadow width is increased by 1
501 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
502 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
503
504 # record previous instruction to cast shadow on current instruction
505 prev_shadow = Signal(n_intfus)
506
507 # Branch Speculation recorder. tracks the success/fail state as
508 # each instruction is issued, so that when the branch occurs the
509 # allow/cancel can be issued as appropriate.
510 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
511
512 #---------
513 # ok start wiring things together...
514 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
515 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
516 #---------
517
518 #---------
519 # Issue Unit is where it starts. set up some in/outs for this module
520 #---------
521 comb += [ regdecode.dest_i.eq(self.int_dest_i),
522 regdecode.src1_i.eq(self.int_src1_i),
523 regdecode.src2_i.eq(self.int_src2_i),
524 regdecode.enable_i.eq(self.reg_enable_i),
525 self.issue_o.eq(issueunit.issue_o)
526 ]
527
528 # take these to outside (issue needs them)
529 comb += cua.oper_i.eq(self.alu_oper_i)
530 comb += cua.imm_i.eq(self.alu_imm_i)
531 comb += cub.oper_i.eq(self.br_oper_i)
532 comb += cub.imm_i.eq(self.br_imm_i)
533 comb += cul.oper_i.eq(self.ls_oper_i)
534 comb += cul.imm_i.eq(self.ls_imm_i)
535
536 # TODO: issueunit.f (FP)
537
538 # and int function issue / busy arrays, and dest/src1/src2
539 comb += intfus.dest_i.eq(regdecode.dest_o)
540 comb += intfus.src1_i.eq(regdecode.src1_o)
541 comb += intfus.src2_i.eq(regdecode.src2_o)
542
543 fn_issue_o = issueunit.fn_issue_o
544
545 comb += intfus.fn_issue_i.eq(fn_issue_o)
546 comb += issueunit.busy_i.eq(cu.busy_o)
547 comb += self.busy_o.eq(cu.busy_o.bool())
548
549 #---------
550 # Memory Function Unit
551 #---------
552 reset_b = Signal(cul.n_units, reset_less=True)
553 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
554
555 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
556 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
557 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
558
559 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
560 # in a transitive fashion). This cycle activates based on LDSTCompUnit
561 # issue_i. multi-issue gets a bit more complex but not a lot.
562 prior_ldsts = Signal(cul.n_units, reset_less=True)
563 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
564 with m.If(self.ls_oper_i[2]): # LD bit of operand
565 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
566 with m.If(self.ls_oper_i[3]): # ST bit of operand
567 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
568
569 # TODO: adr_rel_o needs to go into L1 Cache. for now,
570 # just immediately activate go_adr
571 comb += cul.go_ad_i.eq(cul.adr_rel_o)
572
573 # connect up address data
574 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
575 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
576
577 # connect loadable / storable to go_ld/go_st.
578 # XXX should only be done when the memory ld/st has actually happened!
579 go_st_i = Signal(cul.n_units, reset_less=True)
580 go_ld_i = Signal(cul.n_units, reset_less=True)
581 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
582 cul.req_rel_o & cul.ld_o)
583 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
584 cul.sto_rel_o & cul.st_o)
585 comb += memfus.go_ld_i.eq(go_ld_i)
586 comb += memfus.go_st_i.eq(go_st_i)
587 #comb += cul.go_wr_i.eq(go_ld_i)
588 comb += cul.go_st_i.eq(go_st_i)
589
590 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
591 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
592 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
593
594 #---------
595 # merge shadow matrices outputs
596 #---------
597
598 # these are explained in ShadowMatrix docstring, and are to be
599 # connected to the FUReg and FUFU Matrices, to get them to reset
600 anydie = Signal(n_intfus, reset_less=True)
601 allshadown = Signal(n_intfus, reset_less=True)
602 shreset = Signal(n_intfus, reset_less=True)
603 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
604 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
605 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
606
607 #---------
608 # connect fu-fu matrix
609 #---------
610
611 # Group Picker... done manually for now.
612 go_rd_o = intpick1.go_rd_o
613 go_wr_o = intpick1.go_wr_o
614 go_rd_i = intfus.go_rd_i
615 go_wr_i = intfus.go_wr_i
616 go_die_i = intfus.go_die_i
617 # NOTE: connect to the shadowed versions so that they can "die" (reset)
618 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
619 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
620 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
621
622 # Connect Picker
623 #---------
624 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
625 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
626 int_rd_o = intfus.readable_o
627 int_wr_o = intfus.writable_o
628 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
629 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
630
631 #---------
632 # Shadow Matrix
633 #---------
634
635 comb += shadows.issue_i.eq(fn_issue_o)
636 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
637 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
638 #---------
639 # NOTE; this setup is for the instruction order preservation...
640
641 # connect shadows / go_dies to Computation Units
642 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
643 comb += cu.go_die_i[0:n_intfus].eq(anydie)
644
645 # ok connect first n_int_fu shadows to busy lines, to create an
646 # instruction-order linked-list-like arrangement, using a bit-matrix
647 # (instead of e.g. a ring buffer).
648
649 # when written, the shadow can be cancelled (and was good)
650 for i in range(n_intfus):
651 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
652
653 # *previous* instruction shadows *current* instruction, and, obviously,
654 # if the previous is completed (!busy) don't cast the shadow!
655 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
656 for i in range(n_intfus):
657 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
658
659 #---------
660 # ... and this is for branch speculation. it uses the extra bit
661 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
662 # only needs to set shadow_i, s_fail_i and s_good_i
663
664 # issue captures shadow_i (if enabled)
665 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
666
667 bactive = Signal(reset_less=True)
668 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
669
670 # instruction being issued (fn_issue_o) has a shadow cast by the branch
671 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
672 comb += bshadow.issue_i.eq(fn_issue_o)
673 for i in range(n_intfus):
674 with m.If(fn_issue_o & (Const(1<<i))):
675 comb += bshadow.shadow_i[i][0].eq(1)
676
677 # finally, we need an indicator to the test infrastructure as to
678 # whether the branch succeeded or failed, plus, link up to the
679 # "recorder" of whether the instruction was under shadow or not
680
681 with m.If(br1.issue_i):
682 sync += bspec.active_i.eq(1)
683 with m.If(self.branch_succ_i):
684 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
685 with m.If(self.branch_fail_i):
686 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
687
688 # branch is active (TODO: a better signal: this is over-using the
689 # go_write signal - actually the branch should not be "writing")
690 with m.If(br1.go_wr_i):
691 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
692 sync += bspec.active_i.eq(0)
693 comb += bspec.br_i.eq(1)
694 # branch occurs if data == 1, failed if data == 0
695 comb += bspec.br_ok_i.eq(br1.data_o == 1)
696 for i in range(n_intfus):
697 # *expected* direction of the branch matched against *actual*
698 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
699 # ... or it didn't
700 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
701
702 #---------
703 # Connect Register File(s)
704 #---------
705 comb += int_dest.wen.eq(intfus.dest_rsel_o)
706 comb += int_src1.ren.eq(intfus.src1_rsel_o)
707 comb += int_src2.ren.eq(intfus.src2_rsel_o)
708
709 # connect ALUs to regfule
710 comb += int_dest.data_i.eq(cu.data_o)
711 comb += cu.src1_i.eq(int_src1.data_o)
712 comb += cu.src2_i.eq(int_src2.data_o)
713
714 # connect ALU Computation Units
715 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
716 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
717 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
718
719 return m
720
721 def __iter__(self):
722 yield from self.intregs
723 yield from self.fpregs
724 yield self.int_dest_i
725 yield self.int_src1_i
726 yield self.int_src2_i
727 yield self.issue_o
728 yield self.branch_succ_i
729 yield self.branch_fail_i
730 yield self.branch_direction_o
731
732 def ports(self):
733 return list(self)
734
735
736 class IssueToScoreboard(Elaboratable):
737
738 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
739 self.qlen = qlen
740 self.n_in = n_in
741 self.n_out = n_out
742 self.rwid = rwid
743 self.opw = opwid
744 self.n_regs = n_regs
745
746 mqbits = unsigned(int(log(qlen) / log(2))+2)
747 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
748 self.p_ready_o = Signal() # instructions were added
749 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
750
751 self.busy_o = Signal(reset_less=True) # at least one CU is busy
752 self.qlen_o = Signal(mqbits, reset_less=True)
753
754 def elaborate(self, platform):
755 m = Module()
756 comb = m.d.comb
757 sync = m.d.sync
758
759 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
760 sc = Scoreboard(self.rwid, self.n_regs)
761 m.submodules.iq = iq
762 m.submodules.sc = sc
763
764 # get at the regfile for testing
765 self.intregs = sc.intregs
766
767 # and the "busy" signal and instruction queue length
768 comb += self.busy_o.eq(sc.busy_o)
769 comb += self.qlen_o.eq(iq.qlen_o)
770
771 # link up instruction queue
772 comb += iq.p_add_i.eq(self.p_add_i)
773 comb += self.p_ready_o.eq(iq.p_ready_o)
774 for i in range(self.n_in):
775 comb += eq(iq.data_i[i], self.data_i[i])
776
777 # take instruction and process it. note that it's possible to
778 # "inspect" the queue contents *without* actually removing the
779 # items. items are only removed when the
780
781 # in "waiting" state
782 wait_issue_br = Signal()
783 wait_issue_alu = Signal()
784 wait_issue_ls = Signal()
785
786 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
787 # set instruction pop length to 1 if the unit accepted
788 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
789 with m.If(iq.qlen_o != 0):
790 comb += iq.n_sub_i.eq(1)
791 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
792 with m.If(iq.qlen_o != 0):
793 comb += iq.n_sub_i.eq(1)
794 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
795 with m.If(iq.qlen_o != 0):
796 comb += iq.n_sub_i.eq(1)
797
798 # see if some instruction(s) are here. note that this is
799 # "inspecting" the in-place queue. note also that on the
800 # cycle following "waiting" for fn_issue_o to be set, the
801 # "resetting" done above (insn_i=0) could be re-ASSERTed.
802 with m.If(iq.qlen_o != 0):
803 # get the operands and operation
804 imm = iq.data_o[0].imm_i
805 dest = iq.data_o[0].dest_i
806 src1 = iq.data_o[0].src1_i
807 src2 = iq.data_o[0].src2_i
808 op = iq.data_o[0].oper_i
809 opi = iq.data_o[0].opim_i # immediate set
810
811 # set the src/dest regs
812 comb += sc.int_dest_i.eq(dest)
813 comb += sc.int_src1_i.eq(src1)
814 comb += sc.int_src2_i.eq(src2)
815 comb += sc.reg_enable_i.eq(1) # enable the regfile
816
817 # choose a Function-Unit-Group
818 with m.If((op & (0x3<<2)) != 0): # branch
819 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
820 comb += sc.br_imm_i.eq(imm)
821 comb += sc.brissue.insn_i.eq(1)
822 comb += wait_issue_br.eq(1)
823 with m.Elif((op & (0x3<<4)) != 0): # ld/st
824 # see compldst.py
825 # bit 0: ADD/SUB
826 # bit 1: immed
827 # bit 4: LD
828 # bit 5: ST
829 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
830 comb += sc.ls_imm_i.eq(imm)
831 comb += sc.lsissue.insn_i.eq(1)
832 comb += wait_issue_ls.eq(1)
833 with m.Else(): # alu
834 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
835 comb += sc.alu_imm_i.eq(imm)
836 comb += sc.aluissue.insn_i.eq(1)
837 comb += wait_issue_alu.eq(1)
838
839 # XXX TODO
840 # these indicate that the instruction is to be made
841 # shadow-dependent on
842 # (either) branch success or branch fail
843 #yield sc.branch_fail_i.eq(branch_fail)
844 #yield sc.branch_succ_i.eq(branch_success)
845
846 return m
847
848 def __iter__(self):
849 yield self.p_ready_o
850 for o in self.data_i:
851 yield from list(o)
852 yield self.p_add_i
853
854 def ports(self):
855 return list(self)
856
857
858 IADD = 0
859 ISUB = 1
860 IMUL = 2
861 ISHF = 3
862 IBGT = 4
863 IBLT = 5
864 IBEQ = 6
865 IBNE = 7
866
867
868 class RegSim:
869 def __init__(self, rwidth, nregs):
870 self.rwidth = rwidth
871 self.regs = [0] * nregs
872
873 def op(self, op, op_imm, imm, src1, src2, dest):
874 maxbits = (1 << self.rwidth) - 1
875 src1 = self.regs[src1] & maxbits
876 if op_imm:
877 src2 = imm
878 else:
879 src2 = self.regs[src2] & maxbits
880 if op == IADD:
881 val = src1 + src2
882 elif op == ISUB:
883 val = src1 - src2
884 elif op == IMUL:
885 val = src1 * src2
886 elif op == ISHF:
887 val = src1 >> (src2 & maxbits)
888 elif op == IBGT:
889 val = int(src1 > src2)
890 elif op == IBLT:
891 val = int(src1 < src2)
892 elif op == IBEQ:
893 val = int(src1 == src2)
894 elif op == IBNE:
895 val = int(src1 != src2)
896 else:
897 return 0 # LD/ST TODO
898 val &= maxbits
899 self.setval(dest, val)
900 return val
901
902 def setval(self, dest, val):
903 print ("sim setval", dest, hex(val))
904 self.regs[dest] = val
905
906 def dump(self, dut):
907 for i, val in enumerate(self.regs):
908 reg = yield dut.intregs.regs[i].reg
909 okstr = "OK" if reg == val else "!ok"
910 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
911
912 def check(self, dut):
913 for i, val in enumerate(self.regs):
914 reg = yield dut.intregs.regs[i].reg
915 if reg != val:
916 print("reg %d expected %x received %x\n" % (i, val, reg))
917 yield from self.dump(dut)
918 assert False
919
920 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
921 branch_success, branch_fail):
922 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
923 'src1_i': src1, 'src2_i': src2}]
924
925 sendlen = 1
926 for idx in range(sendlen):
927 yield from eq(dut.data_i[idx], instrs[idx])
928 di = yield dut.data_i[idx]
929 print ("senddata %d %x" % (idx, di))
930 yield dut.p_add_i.eq(sendlen)
931 yield
932 o_p_ready = yield dut.p_ready_o
933 while not o_p_ready:
934 yield
935 o_p_ready = yield dut.p_ready_o
936
937 yield dut.p_add_i.eq(0)
938
939
940 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
941 yield from disable_issue(dut)
942 yield dut.int_dest_i.eq(dest)
943 yield dut.int_src1_i.eq(src1)
944 yield dut.int_src2_i.eq(src2)
945 if (op & (0x3<<2)) != 0: # branch
946 yield dut.brissue.insn_i.eq(1)
947 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
948 yield dut.br_imm_i.eq(imm)
949 dut_issue = dut.brissue
950 else:
951 yield dut.aluissue.insn_i.eq(1)
952 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
953 yield dut.alu_imm_i.eq(imm)
954 dut_issue = dut.aluissue
955 yield dut.reg_enable_i.eq(1)
956
957 # these indicate that the instruction is to be made shadow-dependent on
958 # (either) branch success or branch fail
959 yield dut.branch_fail_i.eq(branch_fail)
960 yield dut.branch_succ_i.eq(branch_success)
961
962 yield
963 yield from wait_for_issue(dut, dut_issue)
964
965
966 def print_reg(dut, rnums):
967 rs = []
968 for rnum in rnums:
969 reg = yield dut.intregs.regs[rnum].reg
970 rs.append("%x" % reg)
971 rnums = map(str, rnums)
972 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
973
974
975 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
976 insts = []
977 for i in range(n_ops):
978 src1 = randint(1, dut.n_regs-1)
979 src2 = randint(1, dut.n_regs-1)
980 imm = randint(1, (1<<dut.rwid)-1)
981 dest = randint(1, dut.n_regs-1)
982 op = randint(0, max_opnums)
983 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
984
985 if shadowing:
986 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
987 else:
988 insts.append((src1, src2, dest, op, opi, imm))
989 return insts
990
991
992 def wait_for_busy_clear(dut):
993 while True:
994 busy_o = yield dut.busy_o
995 if not busy_o:
996 break
997 print ("busy",)
998 yield
999
1000 def disable_issue(dut):
1001 yield dut.aluissue.insn_i.eq(0)
1002 yield dut.brissue.insn_i.eq(0)
1003 yield dut.lsissue.insn_i.eq(0)
1004
1005
1006 def wait_for_issue(dut, dut_issue):
1007 while True:
1008 issue_o = yield dut_issue.fn_issue_o
1009 if issue_o:
1010 yield from disable_issue(dut)
1011 yield dut.reg_enable_i.eq(0)
1012 break
1013 print ("busy",)
1014 #yield from print_reg(dut, [1,2,3])
1015 yield
1016 #yield from print_reg(dut, [1,2,3])
1017
1018 def scoreboard_branch_sim(dut, alusim):
1019
1020 iseed = 3
1021
1022 for i in range(1):
1023
1024 print ("rseed", iseed)
1025 seed(iseed)
1026 iseed += 1
1027
1028 yield dut.branch_direction_o.eq(0)
1029
1030 # set random values in the registers
1031 for i in range(1, dut.n_regs):
1032 val = 31+i*3
1033 val = randint(0, (1<<alusim.rwidth)-1)
1034 yield dut.intregs.regs[i].reg.eq(val)
1035 alusim.setval(i, val)
1036
1037 if False:
1038 # create some instructions: branches create a tree
1039 insts = create_random_ops(dut, 1, True, 1)
1040 #insts.append((6, 6, 1, 2, (0, 0)))
1041 #insts.append((4, 3, 3, 0, (0, 0)))
1042
1043 src1 = randint(1, dut.n_regs-1)
1044 src2 = randint(1, dut.n_regs-1)
1045 #op = randint(4, 7)
1046 op = 4 # only BGT at the moment
1047
1048 branch_ok = create_random_ops(dut, 1, True, 1)
1049 branch_fail = create_random_ops(dut, 1, True, 1)
1050
1051 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1052
1053 if True:
1054 insts = []
1055 insts.append( (3, 5, 2, 0, (0, 0)) )
1056 branch_ok = []
1057 branch_fail = []
1058 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1059 branch_ok.append( None )
1060 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1061 #branch_fail.append( None )
1062 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1063
1064 siminsts = deepcopy(insts)
1065
1066 # issue instruction(s)
1067 i = -1
1068 instrs = insts
1069 branch_direction = 0
1070 while instrs:
1071 yield
1072 yield
1073 i += 1
1074 branch_direction = yield dut.branch_direction_o # way branch went
1075 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1076 if branch_direction == 1 and shadow_on:
1077 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1078 continue # branch was "success" and this is a "failed"... skip
1079 if branch_direction == 2 and shadow_off:
1080 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1081 continue # branch was "fail" and this is a "success"... skip
1082 if branch_direction != 0:
1083 shadow_on = 0
1084 shadow_off = 0
1085 is_branch = op >= 4
1086 if is_branch:
1087 branch_ok, branch_fail = dest
1088 dest = src2
1089 # ok zip up the branch success / fail instructions and
1090 # drop them into the queue, one marked "to have branch success"
1091 # the other to be marked shadow branch "fail".
1092 # one out of each of these will be cancelled
1093 for ok, fl in zip(branch_ok, branch_fail):
1094 if ok:
1095 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1096 if fl:
1097 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1098 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1099 (i, src1, src2, dest, op, shadow_on, shadow_off))
1100 yield from int_instr(dut, op, src1, src2, dest,
1101 shadow_on, shadow_off)
1102
1103 # wait for all instructions to stop before checking
1104 yield
1105 yield from wait_for_busy_clear(dut)
1106
1107 i = -1
1108 while siminsts:
1109 instr = siminsts.pop(0)
1110 if instr is None:
1111 continue
1112 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1113 i += 1
1114 is_branch = op >= 4
1115 if is_branch:
1116 branch_ok, branch_fail = dest
1117 dest = src2
1118 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1119 (i, src1, src2, dest, op, shadow_on, shadow_off))
1120 branch_res = alusim.op(op, src1, src2, dest)
1121 if is_branch:
1122 if branch_res:
1123 siminsts += branch_ok
1124 else:
1125 siminsts += branch_fail
1126
1127 # check status
1128 yield from alusim.check(dut)
1129 yield from alusim.dump(dut)
1130
1131
1132 def scoreboard_sim(dut, alusim):
1133
1134 seed(0)
1135
1136 for i in range(1):
1137
1138 # set random values in the registers
1139 for i in range(1, dut.n_regs):
1140 val = randint(0, (1<<alusim.rwidth)-1)
1141 #val = 31+i*3
1142 #val = i
1143 yield dut.intregs.regs[i].reg.eq(val)
1144 alusim.setval(i, val)
1145
1146 # create some instructions (some random, some regression tests)
1147 instrs = []
1148 if False:
1149 instrs = create_random_ops(dut, 15, True, 4)
1150
1151 if False: # LD/ST test (with immediate)
1152 instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1153 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1154
1155 if True:
1156 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1157
1158 if True:
1159 instrs.append( (7, 3, 2, 4, 0, 0, (0, 0)) )
1160 instrs.append( (7, 6, 6, 2, 0, 0, (0, 0)) )
1161 instrs.append( (1, 7, 2, 2, 0, 0, (0, 0)) )
1162
1163 if True:
1164 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1165 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1166 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1167 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1168 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1169
1170 if False:
1171 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1172 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1173 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1174
1175 if False:
1176 instrs.append((5, 6, 2, 1))
1177 instrs.append((2, 2, 4, 0))
1178 #instrs.append((2, 2, 3, 1))
1179
1180 if False:
1181 instrs.append((2, 1, 2, 3))
1182
1183 if False:
1184 instrs.append((2, 6, 2, 1))
1185 instrs.append((2, 1, 2, 0))
1186
1187 if False:
1188 instrs.append((1, 2, 7, 2))
1189 instrs.append((7, 1, 5, 0))
1190 instrs.append((4, 4, 1, 1))
1191
1192 if False:
1193 instrs.append((5, 6, 2, 2))
1194 instrs.append((1, 1, 4, 1))
1195 instrs.append((6, 5, 3, 0))
1196
1197 if False:
1198 # Write-after-Write Hazard
1199 instrs.append( (3, 6, 7, 2) )
1200 instrs.append( (4, 4, 7, 1) )
1201
1202 if False:
1203 # self-read/write-after-write followed by Read-after-Write
1204 instrs.append((1, 1, 1, 1))
1205 instrs.append((1, 5, 3, 0))
1206
1207 if False:
1208 # Read-after-Write followed by self-read-after-write
1209 instrs.append((5, 6, 1, 2))
1210 instrs.append((1, 1, 1, 1))
1211
1212 if False:
1213 # self-read-write sandwich
1214 instrs.append((5, 6, 1, 2))
1215 instrs.append((1, 1, 1, 1))
1216 instrs.append((1, 5, 3, 0))
1217
1218 if False:
1219 # very weird failure
1220 instrs.append( (5, 2, 5, 2) )
1221 instrs.append( (2, 6, 3, 0) )
1222 instrs.append( (4, 2, 2, 1) )
1223
1224 if False:
1225 v1 = 4
1226 yield dut.intregs.regs[5].reg.eq(v1)
1227 alusim.setval(5, v1)
1228 yield dut.intregs.regs[3].reg.eq(5)
1229 alusim.setval(3, 5)
1230 instrs.append((5, 3, 3, 4, (0, 0)))
1231 instrs.append((4, 2, 1, 2, (0, 1)))
1232
1233 if False:
1234 v1 = 6
1235 yield dut.intregs.regs[5].reg.eq(v1)
1236 alusim.setval(5, v1)
1237 yield dut.intregs.regs[3].reg.eq(5)
1238 alusim.setval(3, 5)
1239 instrs.append((5, 3, 3, 4, (0, 0)))
1240 instrs.append((4, 2, 1, 2, (1, 0)))
1241
1242 if False:
1243 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1244 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1245 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1246 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1247 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1248 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1249 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1250 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1251 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1252
1253 # issue instruction(s), wait for issue to be free before proceeding
1254 for i, instr in enumerate(instrs):
1255 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1256
1257 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1258 (i, src1, src2, dest, op, opi, imm))
1259 alusim.op(op, opi, imm, src1, src2, dest)
1260 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1261 br_ok, br_fail)
1262
1263 # wait for all instructions to stop before checking
1264 while True:
1265 iqlen = yield dut.qlen_o
1266 if iqlen == 0:
1267 break
1268 yield
1269 yield
1270 yield
1271 yield
1272 yield
1273 yield from wait_for_busy_clear(dut)
1274
1275 # check status
1276 yield from alusim.check(dut)
1277 yield from alusim.dump(dut)
1278
1279
1280 def test_scoreboard():
1281 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1282 alusim = RegSim(16, 8)
1283 memsim = MemSim(16, 16)
1284 vl = rtlil.convert(dut, ports=dut.ports())
1285 with open("test_scoreboard6600.il", "w") as f:
1286 f.write(vl)
1287
1288 run_simulation(dut, scoreboard_sim(dut, alusim),
1289 vcd_name='test_scoreboard6600.vcd')
1290
1291 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1292 # vcd_name='test_scoreboard6600.vcd')
1293
1294
1295 if __name__ == '__main__':
1296 test_scoreboard()