cc85ac500bc61963e8ce4207b56a0ce77d4aff4c
[soc.git] / src / soc / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5
6 from soc.regfile.regfile import RegFileArray, treereduce
7 from soc.scoreboard.fu_fu_matrix import FUFUDepMatrix
8 from soc.scoreboard.fu_reg_matrix import FURegDepMatrix
9 from soc.scoreboard.global_pending import GlobalPending
10 from soc.scoreboard.group_picker import GroupPicker
11 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
12 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
13 from soc.scoreboard.instruction_q import Instruction, InstructionQ
14 from soc.scoreboard.memfu import MemFunctionUnits
15
16 from compalu import ComputationUnitNoDelay
17 from compldst import LDSTCompUnit
18 from testmem import TestMemory
19
20 from alu_hier import ALU, BranchALU
21 from nmutil.latch import SRLatch
22 from nmutil.nmoperator import eq
23
24 from random import randint, seed
25 from copy import deepcopy
26 from math import log
27
28
29 class MemSim:
30 def __init__(self, regwid, addrw):
31 self.regwid = regwid
32 self.ddepth = 1 # regwid//8
33 depth = (1<<addrw) // self.ddepth
34 self.mem = list(range(0, depth))
35
36 def ld(self, addr):
37 return self.mem[addr>>self.ddepth]
38
39 def st(self, addr, data):
40 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
41
42
43 class CompUnitsBase(Elaboratable):
44 """ Computation Unit Base class.
45
46 Amazingly, this class works recursively. It's supposed to just
47 look after some ALUs (that can handle the same operations),
48 grouping them together, however it turns out that the same code
49 can also group *groups* of Computation Units together as well.
50
51 Basically it was intended just to concatenate the ALU's issue,
52 go_rd etc. signals together, which start out as bits and become
53 sequences. Turns out that the same trick works just as well
54 on Computation Units!
55
56 So this class may be used recursively to present a top-level
57 sequential concatenation of all the signals in and out of
58 ALUs, whilst at the same time making it convenient to group
59 ALUs together.
60
61 At the lower level, the intent is that groups of (identical)
62 ALUs may be passed the same operation. Even beyond that,
63 the intent is that that group of (identical) ALUs actually
64 share the *same pipeline* and as such become a "Concurrent
65 Computation Unit" as defined by Mitch Alsup (see section
66 11.4.9.3)
67 """
68 def __init__(self, rwid, units, ldstmode=False):
69 """ Inputs:
70
71 * :rwid: bit width of register file(s) - both FP and INT
72 * :units: sequence of ALUs (or CompUnitsBase derivatives)
73 """
74 self.units = units
75 self.ldstmode = ldstmode
76 self.rwid = rwid
77 self.rwid = rwid
78 if units and isinstance(units[0], CompUnitsBase):
79 self.n_units = 0
80 for u in self.units:
81 self.n_units += u.n_units
82 else:
83 self.n_units = len(units)
84
85 n_units = self.n_units
86
87 # inputs
88 self.issue_i = Signal(n_units, reset_less=True)
89 self.go_rd_i = Signal(n_units, reset_less=True)
90 self.go_wr_i = Signal(n_units, reset_less=True)
91 self.shadown_i = Signal(n_units, reset_less=True)
92 self.go_die_i = Signal(n_units, reset_less=True)
93 if ldstmode:
94 self.go_ad_i = Signal(n_units, reset_less=True)
95 self.go_st_i = Signal(n_units, reset_less=True)
96
97 # outputs
98 self.busy_o = Signal(n_units, reset_less=True)
99 self.rd_rel_o = Signal(n_units, reset_less=True)
100 self.req_rel_o = Signal(n_units, reset_less=True)
101 if ldstmode:
102 self.ld_o = Signal(n_units, reset_less=True) # op is LD
103 self.st_o = Signal(n_units, reset_less=True) # op is ST
104 self.adr_rel_o = Signal(n_units, reset_less=True)
105 self.sto_rel_o = Signal(n_units, reset_less=True)
106 self.req_rel_o = Signal(n_units, reset_less=True)
107 self.load_mem_o = Signal(n_units, reset_less=True)
108 self.stwd_mem_o = Signal(n_units, reset_less=True)
109 self.addr_o = Signal(rwid, reset_less=True)
110
111 # in/out register data (note: not register#, actual data)
112 self.data_o = Signal(rwid, reset_less=True)
113 self.src1_i = Signal(rwid, reset_less=True)
114 self.src2_i = Signal(rwid, reset_less=True)
115 # input operand
116
117 def elaborate(self, platform):
118 m = Module()
119 comb = m.d.comb
120
121 for i, alu in enumerate(self.units):
122 setattr(m.submodules, "comp%d" % i, alu)
123
124 go_rd_l = []
125 go_wr_l = []
126 issue_l = []
127 busy_l = []
128 req_rel_l = []
129 rd_rel_l = []
130 shadow_l = []
131 godie_l = []
132 for alu in self.units:
133 req_rel_l.append(alu.req_rel_o)
134 rd_rel_l.append(alu.rd_rel_o)
135 shadow_l.append(alu.shadown_i)
136 godie_l.append(alu.go_die_i)
137 go_wr_l.append(alu.go_wr_i)
138 go_rd_l.append(alu.go_rd_i)
139 issue_l.append(alu.issue_i)
140 busy_l.append(alu.busy_o)
141 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
142 comb += self.req_rel_o.eq(Cat(*req_rel_l))
143 comb += self.busy_o.eq(Cat(*busy_l))
144 comb += Cat(*godie_l).eq(self.go_die_i)
145 comb += Cat(*shadow_l).eq(self.shadown_i)
146 comb += Cat(*go_wr_l).eq(self.go_wr_i)
147 comb += Cat(*go_rd_l).eq(self.go_rd_i)
148 comb += Cat(*issue_l).eq(self.issue_i)
149
150 # connect data register input/output
151
152 # merge (OR) all integer FU / ALU outputs to a single value
153 if self.units:
154 data_o = treereduce(self.units, "data_o")
155 comb += self.data_o.eq(data_o)
156 if self.ldstmode:
157 addr_o = treereduce(self.units, "addr_o")
158 comb += self.addr_o.eq(addr_o)
159
160 for i, alu in enumerate(self.units):
161 comb += alu.src1_i.eq(self.src1_i)
162 comb += alu.src2_i.eq(self.src2_i)
163
164 if not self.ldstmode:
165 return m
166
167 ldmem_l = []
168 stmem_l = []
169 go_ad_l = []
170 go_st_l = []
171 ld_l = []
172 st_l = []
173 adr_rel_l = []
174 sto_rel_l = []
175 for alu in self.units:
176 ld_l.append(alu.ld_o)
177 st_l.append(alu.st_o)
178 adr_rel_l.append(alu.adr_rel_o)
179 sto_rel_l.append(alu.sto_rel_o)
180 ldmem_l.append(alu.load_mem_o)
181 stmem_l.append(alu.stwd_mem_o)
182 go_ad_l.append(alu.go_ad_i)
183 go_st_l.append(alu.go_st_i)
184 comb += self.ld_o.eq(Cat(*ld_l))
185 comb += self.st_o.eq(Cat(*st_l))
186 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
187 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
188 comb += self.load_mem_o.eq(Cat(*ldmem_l))
189 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
190 comb += Cat(*go_ad_l).eq(self.go_ad_i)
191 comb += Cat(*go_st_l).eq(self.go_st_i)
192
193 return m
194
195
196 class CompUnitLDSTs(CompUnitsBase):
197
198 def __init__(self, rwid, opwid, n_ldsts, mem):
199 """ Inputs:
200
201 * :rwid: bit width of register file(s) - both FP and INT
202 * :opwid: operand bit width
203 """
204 self.opwid = opwid
205
206 # inputs
207 self.oper_i = Signal(opwid, reset_less=True)
208 self.imm_i = Signal(rwid, reset_less=True)
209
210 # Int ALUs
211 self.alus = []
212 for i in range(n_ldsts):
213 self.alus.append(ALU(rwid))
214
215 units = []
216 for alu in self.alus:
217 aluopwid = 4 # see compldst.py for "internal" opcode
218 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
219
220 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
221
222 def elaborate(self, platform):
223 m = CompUnitsBase.elaborate(self, platform)
224 comb = m.d.comb
225
226 # hand the same operation to all units, 4 lower bits though
227 for alu in self.units:
228 comb += alu.oper_i[0:4].eq(self.oper_i)
229 comb += alu.imm_i.eq(self.imm_i)
230 comb += alu.isalu_i.eq(0)
231
232 return m
233
234
235 class CompUnitALUs(CompUnitsBase):
236
237 def __init__(self, rwid, opwid, n_alus):
238 """ Inputs:
239
240 * :rwid: bit width of register file(s) - both FP and INT
241 * :opwid: operand bit width
242 """
243 self.opwid = opwid
244
245 # inputs
246 self.oper_i = Signal(opwid, reset_less=True)
247 self.imm_i = Signal(rwid, reset_less=True)
248
249 # Int ALUs
250 alus = []
251 for i in range(n_alus):
252 alus.append(ALU(rwid))
253
254 units = []
255 for alu in alus:
256 aluopwid = 3 # extra bit for immediate mode
257 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
258
259 CompUnitsBase.__init__(self, rwid, units)
260
261 def elaborate(self, platform):
262 m = CompUnitsBase.elaborate(self, platform)
263 comb = m.d.comb
264
265 # hand the same operation to all units, only lower 3 bits though
266 for alu in self.units:
267 comb += alu.oper_i[0:3].eq(self.oper_i)
268 comb += alu.imm_i.eq(self.imm_i)
269
270 return m
271
272
273 class CompUnitBR(CompUnitsBase):
274
275 def __init__(self, rwid, opwid):
276 """ Inputs:
277
278 * :rwid: bit width of register file(s) - both FP and INT
279 * :opwid: operand bit width
280
281 Note: bgt unit is returned so that a shadow unit can be created
282 for it
283 """
284 self.opwid = opwid
285
286 # inputs
287 self.oper_i = Signal(opwid, reset_less=True)
288 self.imm_i = Signal(rwid, reset_less=True)
289
290 # Branch ALU and CU
291 self.bgt = BranchALU(rwid)
292 aluopwid = 3 # extra bit for immediate mode
293 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
294 CompUnitsBase.__init__(self, rwid, [self.br1])
295
296 def elaborate(self, platform):
297 m = CompUnitsBase.elaborate(self, platform)
298 comb = m.d.comb
299
300 # hand the same operation to all units
301 for alu in self.units:
302 comb += alu.oper_i.eq(self.oper_i)
303 comb += alu.imm_i.eq(self.imm_i)
304
305 return m
306
307
308 class FunctionUnits(Elaboratable):
309
310 def __init__(self, n_regs, n_int_alus):
311 self.n_regs = n_regs
312 self.n_int_alus = n_int_alus
313
314 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
315 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
316 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
317
318 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
319 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
320
321 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
322 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
323 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
324
325 self.readable_o = Signal(n_int_alus, reset_less=True)
326 self.writable_o = Signal(n_int_alus, reset_less=True)
327
328 self.go_rd_i = Signal(n_int_alus, reset_less=True)
329 self.go_wr_i = Signal(n_int_alus, reset_less=True)
330 self.go_die_i = Signal(n_int_alus, reset_less=True)
331 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
332
333 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
334
335 def elaborate(self, platform):
336 m = Module()
337 comb = m.d.comb
338 sync = m.d.sync
339
340 n_intfus = self.n_int_alus
341
342 # Integer FU-FU Dep Matrix
343 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
344 m.submodules.intfudeps = intfudeps
345 # Integer FU-Reg Dep Matrix
346 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
347 m.submodules.intregdeps = intregdeps
348
349 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
350 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
351
352 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
353 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
354
355 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
356 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
357 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
358
359 comb += intfudeps.issue_i.eq(self.fn_issue_i)
360 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
361 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
362 comb += intfudeps.go_die_i.eq(self.go_die_i)
363 comb += self.readable_o.eq(intfudeps.readable_o)
364 comb += self.writable_o.eq(intfudeps.writable_o)
365
366 # Connect function issue / arrays, and dest/src1/src2
367 comb += intregdeps.dest_i.eq(self.dest_i)
368 comb += intregdeps.src_i[0].eq(self.src1_i)
369 comb += intregdeps.src_i[1].eq(self.src2_i)
370
371 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
372 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
373 comb += intregdeps.go_die_i.eq(self.go_die_i)
374 comb += intregdeps.issue_i.eq(self.fn_issue_i)
375
376 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
377 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
378 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
379
380 return m
381
382
383 class Scoreboard(Elaboratable):
384 def __init__(self, rwid, n_regs):
385 """ Inputs:
386
387 * :rwid: bit width of register file(s) - both FP and INT
388 * :n_regs: depth of register file(s) - number of FP and INT regs
389 """
390 self.rwid = rwid
391 self.n_regs = n_regs
392
393 # Register Files
394 self.intregs = RegFileArray(rwid, n_regs)
395 self.fpregs = RegFileArray(rwid, n_regs)
396
397 # Memory (test for now)
398 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
399
400 # issue q needs to get at these
401 self.aluissue = IssueUnitGroup(2)
402 self.lsissue = IssueUnitGroup(2)
403 self.brissue = IssueUnitGroup(1)
404 # and these
405 self.alu_oper_i = Signal(4, reset_less=True)
406 self.alu_imm_i = Signal(rwid, reset_less=True)
407 self.br_oper_i = Signal(4, reset_less=True)
408 self.br_imm_i = Signal(rwid, reset_less=True)
409 self.ls_oper_i = Signal(4, reset_less=True)
410 self.ls_imm_i = Signal(rwid, reset_less=True)
411
412 # inputs
413 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
414 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
415 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
416 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
417
418 # outputs
419 self.issue_o = Signal(reset_less=True) # instruction was accepted
420 self.busy_o = Signal(reset_less=True) # at least one CU is busy
421
422 # for branch speculation experiment. branch_direction = 0 if
423 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
424 # branch_succ and branch_fail are requests to have the current
425 # instruction be dependent on the branch unit "shadow" capability.
426 self.branch_succ_i = Signal(reset_less=True)
427 self.branch_fail_i = Signal(reset_less=True)
428 self.branch_direction_o = Signal(2, reset_less=True)
429
430 def elaborate(self, platform):
431 m = Module()
432 comb = m.d.comb
433 sync = m.d.sync
434
435 m.submodules.intregs = self.intregs
436 m.submodules.fpregs = self.fpregs
437 m.submodules.mem = mem = self.mem
438
439 # register ports
440 int_dest = self.intregs.write_port("dest")
441 int_src1 = self.intregs.read_port("src1")
442 int_src2 = self.intregs.read_port("src2")
443
444 fp_dest = self.fpregs.write_port("dest")
445 fp_src1 = self.fpregs.read_port("src1")
446 fp_src2 = self.fpregs.read_port("src2")
447
448 # Int ALUs and BR ALUs
449 n_int_alus = 5
450 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
451 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
452
453 # LDST Comp Units
454 n_ldsts = 2
455 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
456
457 # Comp Units
458 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
459 bgt = cub.bgt # get at the branch computation unit
460 br1 = cub.br1
461
462 # Int FUs
463 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
464
465 # Memory FUs
466 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
467
468 # Memory Priority Picker 1: one gateway per memory port
469 mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
470 m.submodules.mempick1 = mempick1
471
472 # Count of number of FUs
473 n_intfus = n_int_alus
474 n_fp_fus = 0 # for now
475
476 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
477 intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
478 m.submodules.intpick1 = intpick1
479
480 # INT/FP Issue Unit
481 regdecode = RegDecode(self.n_regs)
482 m.submodules.regdecode = regdecode
483 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
484 m.submodules.issueunit = issueunit
485
486 # Shadow Matrix. currently n_intfus shadows, to be used for
487 # write-after-write hazards. NOTE: there is one extra for branches,
488 # so the shadow width is increased by 1
489 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
490 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
491
492 # record previous instruction to cast shadow on current instruction
493 prev_shadow = Signal(n_intfus)
494
495 # Branch Speculation recorder. tracks the success/fail state as
496 # each instruction is issued, so that when the branch occurs the
497 # allow/cancel can be issued as appropriate.
498 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
499
500 #---------
501 # ok start wiring things together...
502 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
503 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
504 #---------
505
506 #---------
507 # Issue Unit is where it starts. set up some in/outs for this module
508 #---------
509 comb += [ regdecode.dest_i.eq(self.int_dest_i),
510 regdecode.src1_i.eq(self.int_src1_i),
511 regdecode.src2_i.eq(self.int_src2_i),
512 regdecode.enable_i.eq(self.reg_enable_i),
513 self.issue_o.eq(issueunit.issue_o)
514 ]
515
516 # take these to outside (issue needs them)
517 comb += cua.oper_i.eq(self.alu_oper_i)
518 comb += cua.imm_i.eq(self.alu_imm_i)
519 comb += cub.oper_i.eq(self.br_oper_i)
520 comb += cub.imm_i.eq(self.br_imm_i)
521 comb += cul.oper_i.eq(self.ls_oper_i)
522 comb += cul.imm_i.eq(self.ls_imm_i)
523
524 # TODO: issueunit.f (FP)
525
526 # and int function issue / busy arrays, and dest/src1/src2
527 comb += intfus.dest_i.eq(regdecode.dest_o)
528 comb += intfus.src1_i.eq(regdecode.src1_o)
529 comb += intfus.src2_i.eq(regdecode.src2_o)
530
531 fn_issue_o = issueunit.fn_issue_o
532
533 comb += intfus.fn_issue_i.eq(fn_issue_o)
534 comb += issueunit.busy_i.eq(cu.busy_o)
535 comb += self.busy_o.eq(cu.busy_o.bool())
536
537 #---------
538 # Memory Function Unit
539 #---------
540 reset_b = Signal(cul.n_units, reset_less=True)
541 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
542
543 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
544 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
545 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
546
547 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
548 # in a transitive fashion). This cycle activates based on LDSTCompUnit
549 # issue_i. multi-issue gets a bit more complex but not a lot.
550 prior_ldsts = Signal(cul.n_units, reset_less=True)
551 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
552 with m.If(self.ls_oper_i[3]): # LD bit of operand
553 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
554 with m.If(self.ls_oper_i[2]): # ST bit of operand
555 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
556
557 # TODO: adr_rel_o needs to go into L1 Cache. for now,
558 # just immediately activate go_adr
559 comb += cul.go_ad_i.eq(cul.adr_rel_o)
560
561 # connect up address data
562 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
563 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
564
565 # connect loadable / storable to go_ld/go_st.
566 # XXX should only be done when the memory ld/st has actually happened!
567 go_st_i = Signal(cul.n_units, reset_less=True)
568 go_ld_i = Signal(cul.n_units, reset_less=True)
569 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
570 cul.adr_rel_o & cul.ld_o)
571 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
572 cul.sto_rel_o & cul.st_o)
573 comb += memfus.go_ld_i.eq(go_ld_i)
574 comb += memfus.go_st_i.eq(go_st_i)
575 #comb += cul.go_wr_i.eq(go_ld_i)
576 comb += cul.go_st_i.eq(go_st_i)
577
578 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
579 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
580 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
581
582 #---------
583 # merge shadow matrices outputs
584 #---------
585
586 # these are explained in ShadowMatrix docstring, and are to be
587 # connected to the FUReg and FUFU Matrices, to get them to reset
588 anydie = Signal(n_intfus, reset_less=True)
589 allshadown = Signal(n_intfus, reset_less=True)
590 shreset = Signal(n_intfus, reset_less=True)
591 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
592 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
593 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
594
595 #---------
596 # connect fu-fu matrix
597 #---------
598
599 # Group Picker... done manually for now.
600 go_rd_o = intpick1.go_rd_o
601 go_wr_o = intpick1.go_wr_o
602 go_rd_i = intfus.go_rd_i
603 go_wr_i = intfus.go_wr_i
604 go_die_i = intfus.go_die_i
605 # NOTE: connect to the shadowed versions so that they can "die" (reset)
606 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
607 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
608 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
609
610 # Connect Picker
611 #---------
612 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
613 #comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
614 # HACK for now: connect LD/ST request release to *address* release
615 comb += intpick1.req_rel_i[0].eq(cu.req_rel_o[0]) # ALU 0
616 comb += intpick1.req_rel_i[1].eq(cu.req_rel_o[1]) # ALU 1
617 comb += intpick1.req_rel_i[2].eq(cul.adr_rel_o[0]) # LD/ST 0
618 comb += intpick1.req_rel_i[3].eq(cul.adr_rel_o[1]) # LD/ST 1
619 comb += intpick1.req_rel_i[4].eq(cu.req_rel_o[4]) # BR 0
620 int_rd_o = intfus.readable_o
621 int_wr_o = intfus.writable_o
622 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
623 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
624
625 #---------
626 # Shadow Matrix
627 #---------
628
629 comb += shadows.issue_i.eq(fn_issue_o)
630 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
631 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
632 #---------
633 # NOTE; this setup is for the instruction order preservation...
634
635 # connect shadows / go_dies to Computation Units
636 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
637 comb += cu.go_die_i[0:n_intfus].eq(anydie)
638
639 # ok connect first n_int_fu shadows to busy lines, to create an
640 # instruction-order linked-list-like arrangement, using a bit-matrix
641 # (instead of e.g. a ring buffer).
642
643 # when written, the shadow can be cancelled (and was good)
644 for i in range(n_intfus):
645 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
646
647 # *previous* instruction shadows *current* instruction, and, obviously,
648 # if the previous is completed (!busy) don't cast the shadow!
649 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
650 for i in range(n_intfus):
651 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
652
653 #---------
654 # ... and this is for branch speculation. it uses the extra bit
655 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
656 # only needs to set shadow_i, s_fail_i and s_good_i
657
658 # issue captures shadow_i (if enabled)
659 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
660
661 bactive = Signal(reset_less=True)
662 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
663
664 # instruction being issued (fn_issue_o) has a shadow cast by the branch
665 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
666 comb += bshadow.issue_i.eq(fn_issue_o)
667 for i in range(n_intfus):
668 with m.If(fn_issue_o & (Const(1<<i))):
669 comb += bshadow.shadow_i[i][0].eq(1)
670
671 # finally, we need an indicator to the test infrastructure as to
672 # whether the branch succeeded or failed, plus, link up to the
673 # "recorder" of whether the instruction was under shadow or not
674
675 with m.If(br1.issue_i):
676 sync += bspec.active_i.eq(1)
677 with m.If(self.branch_succ_i):
678 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
679 with m.If(self.branch_fail_i):
680 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
681
682 # branch is active (TODO: a better signal: this is over-using the
683 # go_write signal - actually the branch should not be "writing")
684 with m.If(br1.go_wr_i):
685 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
686 sync += bspec.active_i.eq(0)
687 comb += bspec.br_i.eq(1)
688 # branch occurs if data == 1, failed if data == 0
689 comb += bspec.br_ok_i.eq(br1.data_o == 1)
690 for i in range(n_intfus):
691 # *expected* direction of the branch matched against *actual*
692 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
693 # ... or it didn't
694 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
695
696 #---------
697 # Connect Register File(s)
698 #---------
699 comb += int_dest.wen.eq(intfus.dest_rsel_o)
700 comb += int_src1.ren.eq(intfus.src1_rsel_o)
701 comb += int_src2.ren.eq(intfus.src2_rsel_o)
702
703 # connect ALUs to regfule
704 comb += int_dest.data_i.eq(cu.data_o)
705 comb += cu.src1_i.eq(int_src1.data_o)
706 comb += cu.src2_i.eq(int_src2.data_o)
707
708 # connect ALU Computation Units
709 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
710 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
711 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
712
713 return m
714
715 def __iter__(self):
716 yield from self.intregs
717 yield from self.fpregs
718 yield self.int_dest_i
719 yield self.int_src1_i
720 yield self.int_src2_i
721 yield self.issue_o
722 yield self.branch_succ_i
723 yield self.branch_fail_i
724 yield self.branch_direction_o
725
726 def ports(self):
727 return list(self)
728
729
730 class IssueToScoreboard(Elaboratable):
731
732 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
733 self.qlen = qlen
734 self.n_in = n_in
735 self.n_out = n_out
736 self.rwid = rwid
737 self.opw = opwid
738 self.n_regs = n_regs
739
740 mqbits = unsigned(int(log(qlen) / log(2))+2)
741 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
742 self.p_ready_o = Signal() # instructions were added
743 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
744
745 self.busy_o = Signal(reset_less=True) # at least one CU is busy
746 self.qlen_o = Signal(mqbits, reset_less=True)
747
748 def elaborate(self, platform):
749 m = Module()
750 comb = m.d.comb
751 sync = m.d.sync
752
753 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
754 sc = Scoreboard(self.rwid, self.n_regs)
755 m.submodules.iq = iq
756 m.submodules.sc = sc
757
758 # get at the regfile for testing
759 self.intregs = sc.intregs
760
761 # and the "busy" signal and instruction queue length
762 comb += self.busy_o.eq(sc.busy_o)
763 comb += self.qlen_o.eq(iq.qlen_o)
764
765 # link up instruction queue
766 comb += iq.p_add_i.eq(self.p_add_i)
767 comb += self.p_ready_o.eq(iq.p_ready_o)
768 for i in range(self.n_in):
769 comb += eq(iq.data_i[i], self.data_i[i])
770
771 # take instruction and process it. note that it's possible to
772 # "inspect" the queue contents *without* actually removing the
773 # items. items are only removed when the
774
775 # in "waiting" state
776 wait_issue_br = Signal()
777 wait_issue_alu = Signal()
778 wait_issue_ls = Signal()
779
780 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
781 # set instruction pop length to 1 if the unit accepted
782 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
783 with m.If(iq.qlen_o != 0):
784 comb += iq.n_sub_i.eq(1)
785 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
786 with m.If(iq.qlen_o != 0):
787 comb += iq.n_sub_i.eq(1)
788 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
789 with m.If(iq.qlen_o != 0):
790 comb += iq.n_sub_i.eq(1)
791
792 # see if some instruction(s) are here. note that this is
793 # "inspecting" the in-place queue. note also that on the
794 # cycle following "waiting" for fn_issue_o to be set, the
795 # "resetting" done above (insn_i=0) could be re-ASSERTed.
796 with m.If(iq.qlen_o != 0):
797 # get the operands and operation
798 imm = iq.data_o[0].imm_i
799 dest = iq.data_o[0].dest_i
800 src1 = iq.data_o[0].src1_i
801 src2 = iq.data_o[0].src2_i
802 op = iq.data_o[0].oper_i
803 opi = iq.data_o[0].opim_i # immediate set
804
805 # set the src/dest regs
806 comb += sc.int_dest_i.eq(dest)
807 comb += sc.int_src1_i.eq(src1)
808 comb += sc.int_src2_i.eq(src2)
809 comb += sc.reg_enable_i.eq(1) # enable the regfile
810
811 # choose a Function-Unit-Group
812 with m.If((op & (0x3<<2)) != 0): # branch
813 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
814 comb += sc.br_imm_i.eq(imm)
815 comb += sc.brissue.insn_i.eq(1)
816 comb += wait_issue_br.eq(1)
817 with m.Elif((op & (0x3<<4)) != 0): # ld/st
818 # see compldst.py
819 # bit 0: ADD/SUB
820 # bit 1: immed
821 # bit 4: LD
822 # bit 5: ST
823 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
824 comb += sc.ls_imm_i.eq(imm)
825 comb += sc.lsissue.insn_i.eq(1)
826 comb += wait_issue_ls.eq(1)
827 with m.Else(): # alu
828 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
829 comb += sc.alu_imm_i.eq(imm)
830 comb += sc.aluissue.insn_i.eq(1)
831 comb += wait_issue_alu.eq(1)
832
833 # XXX TODO
834 # these indicate that the instruction is to be made
835 # shadow-dependent on
836 # (either) branch success or branch fail
837 #yield sc.branch_fail_i.eq(branch_fail)
838 #yield sc.branch_succ_i.eq(branch_success)
839
840 return m
841
842 def __iter__(self):
843 yield self.p_ready_o
844 for o in self.data_i:
845 yield from list(o)
846 yield self.p_add_i
847
848 def ports(self):
849 return list(self)
850
851
852 IADD = 0
853 ISUB = 1
854 IMUL = 2
855 ISHF = 3
856 IBGT = 4
857 IBLT = 5
858 IBEQ = 6
859 IBNE = 7
860
861
862 class RegSim:
863 def __init__(self, rwidth, nregs):
864 self.rwidth = rwidth
865 self.regs = [0] * nregs
866
867 def op(self, op, op_imm, imm, src1, src2, dest):
868 maxbits = (1 << self.rwidth) - 1
869 src1 = self.regs[src1] & maxbits
870 if op_imm:
871 src2 = imm
872 else:
873 src2 = self.regs[src2] & maxbits
874 if op == IADD:
875 val = src1 + src2
876 elif op == ISUB:
877 val = src1 - src2
878 elif op == IMUL:
879 val = src1 * src2
880 elif op == ISHF:
881 val = src1 >> (src2 & maxbits)
882 elif op == IBGT:
883 val = int(src1 > src2)
884 elif op == IBLT:
885 val = int(src1 < src2)
886 elif op == IBEQ:
887 val = int(src1 == src2)
888 elif op == IBNE:
889 val = int(src1 != src2)
890 else:
891 return 0 # LD/ST TODO
892 val &= maxbits
893 self.setval(dest, val)
894 return val
895
896 def setval(self, dest, val):
897 print ("sim setval", dest, hex(val))
898 self.regs[dest] = val
899
900 def dump(self, dut):
901 for i, val in enumerate(self.regs):
902 reg = yield dut.intregs.regs[i].reg
903 okstr = "OK" if reg == val else "!ok"
904 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
905
906 def check(self, dut):
907 for i, val in enumerate(self.regs):
908 reg = yield dut.intregs.regs[i].reg
909 if reg != val:
910 print("reg %d expected %x received %x\n" % (i, val, reg))
911 yield from self.dump(dut)
912 assert False
913
914 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
915 branch_success, branch_fail):
916 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
917 'src1_i': src1, 'src2_i': src2}]
918
919 sendlen = 1
920 for idx in range(sendlen):
921 yield from eq(dut.data_i[idx], instrs[idx])
922 di = yield dut.data_i[idx]
923 print ("senddata %d %x" % (idx, di))
924 yield dut.p_add_i.eq(sendlen)
925 yield
926 o_p_ready = yield dut.p_ready_o
927 while not o_p_ready:
928 yield
929 o_p_ready = yield dut.p_ready_o
930
931 yield dut.p_add_i.eq(0)
932
933
934 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
935 yield from disable_issue(dut)
936 yield dut.int_dest_i.eq(dest)
937 yield dut.int_src1_i.eq(src1)
938 yield dut.int_src2_i.eq(src2)
939 if (op & (0x3<<2)) != 0: # branch
940 yield dut.brissue.insn_i.eq(1)
941 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
942 yield dut.br_imm_i.eq(imm)
943 dut_issue = dut.brissue
944 else:
945 yield dut.aluissue.insn_i.eq(1)
946 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
947 yield dut.alu_imm_i.eq(imm)
948 dut_issue = dut.aluissue
949 yield dut.reg_enable_i.eq(1)
950
951 # these indicate that the instruction is to be made shadow-dependent on
952 # (either) branch success or branch fail
953 yield dut.branch_fail_i.eq(branch_fail)
954 yield dut.branch_succ_i.eq(branch_success)
955
956 yield
957 yield from wait_for_issue(dut, dut_issue)
958
959
960 def print_reg(dut, rnums):
961 rs = []
962 for rnum in rnums:
963 reg = yield dut.intregs.regs[rnum].reg
964 rs.append("%x" % reg)
965 rnums = map(str, rnums)
966 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
967
968
969 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
970 insts = []
971 for i in range(n_ops):
972 src1 = randint(1, dut.n_regs-1)
973 src2 = randint(1, dut.n_regs-1)
974 imm = randint(1, (1<<dut.rwid)-1)
975 dest = randint(1, dut.n_regs-1)
976 op = randint(0, max_opnums)
977 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
978
979 if shadowing:
980 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
981 else:
982 insts.append((src1, src2, dest, op, opi, imm))
983 return insts
984
985
986 def wait_for_busy_clear(dut):
987 while True:
988 busy_o = yield dut.busy_o
989 if not busy_o:
990 break
991 print ("busy",)
992 yield
993
994 def disable_issue(dut):
995 yield dut.aluissue.insn_i.eq(0)
996 yield dut.brissue.insn_i.eq(0)
997 yield dut.lsissue.insn_i.eq(0)
998
999
1000 def wait_for_issue(dut, dut_issue):
1001 while True:
1002 issue_o = yield dut_issue.fn_issue_o
1003 if issue_o:
1004 yield from disable_issue(dut)
1005 yield dut.reg_enable_i.eq(0)
1006 break
1007 print ("busy",)
1008 #yield from print_reg(dut, [1,2,3])
1009 yield
1010 #yield from print_reg(dut, [1,2,3])
1011
1012 def scoreboard_branch_sim(dut, alusim):
1013
1014 iseed = 3
1015
1016 for i in range(1):
1017
1018 print ("rseed", iseed)
1019 seed(iseed)
1020 iseed += 1
1021
1022 yield dut.branch_direction_o.eq(0)
1023
1024 # set random values in the registers
1025 for i in range(1, dut.n_regs):
1026 val = 31+i*3
1027 val = randint(0, (1<<alusim.rwidth)-1)
1028 yield dut.intregs.regs[i].reg.eq(val)
1029 alusim.setval(i, val)
1030
1031 if False:
1032 # create some instructions: branches create a tree
1033 insts = create_random_ops(dut, 1, True, 1)
1034 #insts.append((6, 6, 1, 2, (0, 0)))
1035 #insts.append((4, 3, 3, 0, (0, 0)))
1036
1037 src1 = randint(1, dut.n_regs-1)
1038 src2 = randint(1, dut.n_regs-1)
1039 #op = randint(4, 7)
1040 op = 4 # only BGT at the moment
1041
1042 branch_ok = create_random_ops(dut, 1, True, 1)
1043 branch_fail = create_random_ops(dut, 1, True, 1)
1044
1045 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1046
1047 if True:
1048 insts = []
1049 insts.append( (3, 5, 2, 0, (0, 0)) )
1050 branch_ok = []
1051 branch_fail = []
1052 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1053 branch_ok.append( None )
1054 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1055 #branch_fail.append( None )
1056 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1057
1058 siminsts = deepcopy(insts)
1059
1060 # issue instruction(s)
1061 i = -1
1062 instrs = insts
1063 branch_direction = 0
1064 while instrs:
1065 yield
1066 yield
1067 i += 1
1068 branch_direction = yield dut.branch_direction_o # way branch went
1069 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1070 if branch_direction == 1 and shadow_on:
1071 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1072 continue # branch was "success" and this is a "failed"... skip
1073 if branch_direction == 2 and shadow_off:
1074 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1075 continue # branch was "fail" and this is a "success"... skip
1076 if branch_direction != 0:
1077 shadow_on = 0
1078 shadow_off = 0
1079 is_branch = op >= 4
1080 if is_branch:
1081 branch_ok, branch_fail = dest
1082 dest = src2
1083 # ok zip up the branch success / fail instructions and
1084 # drop them into the queue, one marked "to have branch success"
1085 # the other to be marked shadow branch "fail".
1086 # one out of each of these will be cancelled
1087 for ok, fl in zip(branch_ok, branch_fail):
1088 if ok:
1089 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1090 if fl:
1091 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1092 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1093 (i, src1, src2, dest, op, shadow_on, shadow_off))
1094 yield from int_instr(dut, op, src1, src2, dest,
1095 shadow_on, shadow_off)
1096
1097 # wait for all instructions to stop before checking
1098 yield
1099 yield from wait_for_busy_clear(dut)
1100
1101 i = -1
1102 while siminsts:
1103 instr = siminsts.pop(0)
1104 if instr is None:
1105 continue
1106 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1107 i += 1
1108 is_branch = op >= 4
1109 if is_branch:
1110 branch_ok, branch_fail = dest
1111 dest = src2
1112 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1113 (i, src1, src2, dest, op, shadow_on, shadow_off))
1114 branch_res = alusim.op(op, src1, src2, dest)
1115 if is_branch:
1116 if branch_res:
1117 siminsts += branch_ok
1118 else:
1119 siminsts += branch_fail
1120
1121 # check status
1122 yield from alusim.check(dut)
1123 yield from alusim.dump(dut)
1124
1125
1126 def scoreboard_sim(dut, alusim):
1127
1128 seed(0)
1129
1130 for i in range(1):
1131
1132 # set random values in the registers
1133 for i in range(1, dut.n_regs):
1134 #val = randint(0, (1<<alusim.rwidth)-1)
1135 #val = 31+i*3
1136 val = i
1137 yield dut.intregs.regs[i].reg.eq(val)
1138 alusim.setval(i, val)
1139
1140 # create some instructions (some random, some regression tests)
1141 instrs = []
1142 if False:
1143 instrs = create_random_ops(dut, 15, True, 4)
1144
1145 if True: # LD/ST test (with immediate)
1146 instrs.append( (1, 2, 0, 0x20, 1, 1, (0, 0)) ) # LD
1147 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1148
1149 if True:
1150 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1151
1152 if True:
1153 instrs.append( (7, 3, 2, 4, 0, 0, (0, 0)) )
1154 instrs.append( (7, 6, 6, 2, 0, 0, (0, 0)) )
1155 instrs.append( (1, 7, 2, 2, 0, 0, (0, 0)) )
1156
1157 if True:
1158 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1159 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1160 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1161 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1162 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1163
1164 if False:
1165 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1166 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1167 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1168
1169 if False:
1170 instrs.append((5, 6, 2, 1))
1171 instrs.append((2, 2, 4, 0))
1172 #instrs.append((2, 2, 3, 1))
1173
1174 if False:
1175 instrs.append((2, 1, 2, 3))
1176
1177 if False:
1178 instrs.append((2, 6, 2, 1))
1179 instrs.append((2, 1, 2, 0))
1180
1181 if False:
1182 instrs.append((1, 2, 7, 2))
1183 instrs.append((7, 1, 5, 0))
1184 instrs.append((4, 4, 1, 1))
1185
1186 if False:
1187 instrs.append((5, 6, 2, 2))
1188 instrs.append((1, 1, 4, 1))
1189 instrs.append((6, 5, 3, 0))
1190
1191 if False:
1192 # Write-after-Write Hazard
1193 instrs.append( (3, 6, 7, 2) )
1194 instrs.append( (4, 4, 7, 1) )
1195
1196 if False:
1197 # self-read/write-after-write followed by Read-after-Write
1198 instrs.append((1, 1, 1, 1))
1199 instrs.append((1, 5, 3, 0))
1200
1201 if False:
1202 # Read-after-Write followed by self-read-after-write
1203 instrs.append((5, 6, 1, 2))
1204 instrs.append((1, 1, 1, 1))
1205
1206 if False:
1207 # self-read-write sandwich
1208 instrs.append((5, 6, 1, 2))
1209 instrs.append((1, 1, 1, 1))
1210 instrs.append((1, 5, 3, 0))
1211
1212 if False:
1213 # very weird failure
1214 instrs.append( (5, 2, 5, 2) )
1215 instrs.append( (2, 6, 3, 0) )
1216 instrs.append( (4, 2, 2, 1) )
1217
1218 if False:
1219 v1 = 4
1220 yield dut.intregs.regs[5].reg.eq(v1)
1221 alusim.setval(5, v1)
1222 yield dut.intregs.regs[3].reg.eq(5)
1223 alusim.setval(3, 5)
1224 instrs.append((5, 3, 3, 4, (0, 0)))
1225 instrs.append((4, 2, 1, 2, (0, 1)))
1226
1227 if False:
1228 v1 = 6
1229 yield dut.intregs.regs[5].reg.eq(v1)
1230 alusim.setval(5, v1)
1231 yield dut.intregs.regs[3].reg.eq(5)
1232 alusim.setval(3, 5)
1233 instrs.append((5, 3, 3, 4, (0, 0)))
1234 instrs.append((4, 2, 1, 2, (1, 0)))
1235
1236 if False:
1237 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1238 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1239 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1240 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1241 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1242 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1243 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1244 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1245 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1246
1247 # issue instruction(s), wait for issue to be free before proceeding
1248 for i, instr in enumerate(instrs):
1249 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1250
1251 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1252 (i, src1, src2, dest, op, opi, imm))
1253 alusim.op(op, opi, imm, src1, src2, dest)
1254 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1255 br_ok, br_fail)
1256
1257 # wait for all instructions to stop before checking
1258 while True:
1259 iqlen = yield dut.qlen_o
1260 if iqlen == 0:
1261 break
1262 yield
1263 yield
1264 yield
1265 yield
1266 yield
1267 yield from wait_for_busy_clear(dut)
1268
1269 # check status
1270 yield from alusim.check(dut)
1271 yield from alusim.dump(dut)
1272
1273
1274 def test_scoreboard():
1275 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1276 alusim = RegSim(16, 8)
1277 memsim = MemSim(16, 8)
1278 vl = rtlil.convert(dut, ports=dut.ports())
1279 with open("test_scoreboard6600.il", "w") as f:
1280 f.write(vl)
1281
1282 run_simulation(dut, scoreboard_sim(dut, alusim),
1283 vcd_name='test_scoreboard6600.vcd')
1284
1285 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1286 # vcd_name='test_scoreboard6600.vcd')
1287
1288
1289 if __name__ == '__main__':
1290 test_scoreboard()