add transitive accumulation of LD/STs into MDM
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13 from scoreboard.memfu import MemFunctionUnits
14
15 from compalu import ComputationUnitNoDelay
16 from compldst import LDSTCompUnit
17
18 from alu_hier import ALU, BranchALU
19 from nmutil.latch import SRLatch
20 from nmutil.nmoperator import eq
21
22 from random import randint, seed
23 from copy import deepcopy
24 from math import log
25
26
27 class TestMemory(Elaboratable):
28 def __init__(self, regwid, addrw):
29 self.ddepth = 1 # regwid //8
30 depth = (1<<addrw) // self.ddepth
31 self.adr = Signal(addrw)
32 self.dat_r = Signal(regwid)
33 self.dat_w = Signal(regwid)
34 self.we = Signal()
35 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
36
37 def elaborate(self, platform):
38 m = Module()
39 m.submodules.rdport = rdport = self.mem.read_port()
40 m.submodules.wrport = wrport = self.mem.write_port()
41 m.d.comb += [
42 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
43 self.dat_r.eq(rdport.data),
44 wrport.addr.eq(self.adr),
45 wrport.data.eq(self.dat_w),
46 wrport.en.eq(self.we),
47 ]
48 return m
49
50
51 class MemSim:
52 def __init__(self, regwid, addrw):
53 self.regwid = regwid
54 self.ddepth = 1 # regwid//8
55 depth = (1<<addrw) // self.ddepth
56 self.mem = list(range(0, depth))
57
58 def ld(self, addr):
59 return self.mem[addr>>self.ddepth]
60
61 def st(self, addr, data):
62 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
63
64
65 class CompUnitsBase(Elaboratable):
66 """ Computation Unit Base class.
67
68 Amazingly, this class works recursively. It's supposed to just
69 look after some ALUs (that can handle the same operations),
70 grouping them together, however it turns out that the same code
71 can also group *groups* of Computation Units together as well.
72
73 Basically it was intended just to concatenate the ALU's issue,
74 go_rd etc. signals together, which start out as bits and become
75 sequences. Turns out that the same trick works just as well
76 on Computation Units!
77
78 So this class may be used recursively to present a top-level
79 sequential concatenation of all the signals in and out of
80 ALUs, whilst at the same time making it convenient to group
81 ALUs together.
82
83 At the lower level, the intent is that groups of (identical)
84 ALUs may be passed the same operation. Even beyond that,
85 the intent is that that group of (identical) ALUs actually
86 share the *same pipeline* and as such become a "Concurrent
87 Computation Unit" as defined by Mitch Alsup (see section
88 11.4.9.3)
89 """
90 def __init__(self, rwid, units, ldstmode=False):
91 """ Inputs:
92
93 * :rwid: bit width of register file(s) - both FP and INT
94 * :units: sequence of ALUs (or CompUnitsBase derivatives)
95 """
96 self.units = units
97 self.ldstmode = ldstmode
98 self.rwid = rwid
99 self.rwid = rwid
100 if units and isinstance(units[0], CompUnitsBase):
101 self.n_units = 0
102 for u in self.units:
103 self.n_units += u.n_units
104 else:
105 self.n_units = len(units)
106
107 n_units = self.n_units
108
109 # inputs
110 self.issue_i = Signal(n_units, reset_less=True)
111 self.go_rd_i = Signal(n_units, reset_less=True)
112 self.go_wr_i = Signal(n_units, reset_less=True)
113 self.shadown_i = Signal(n_units, reset_less=True)
114 self.go_die_i = Signal(n_units, reset_less=True)
115 if ldstmode:
116 self.go_ad_i = Signal(n_units, reset_less=True)
117 self.go_st_i = Signal(n_units, reset_less=True)
118
119 # outputs
120 self.busy_o = Signal(n_units, reset_less=True)
121 self.rd_rel_o = Signal(n_units, reset_less=True)
122 self.req_rel_o = Signal(n_units, reset_less=True)
123 if ldstmode:
124 self.adr_rel_o = Signal(n_units, reset_less=True)
125 self.sto_rel_o = Signal(n_units, reset_less=True)
126 self.req_rel_o = Signal(n_units, reset_less=True)
127 self.load_mem_o = Signal(n_units, reset_less=True)
128 self.stwd_mem_o = Signal(n_units, reset_less=True)
129
130 # in/out register data (note: not register#, actual data)
131 self.data_o = Signal(rwid, reset_less=True)
132 self.src1_i = Signal(rwid, reset_less=True)
133 self.src2_i = Signal(rwid, reset_less=True)
134 # input operand
135
136 def elaborate(self, platform):
137 m = Module()
138 comb = m.d.comb
139
140 for i, alu in enumerate(self.units):
141 setattr(m.submodules, "comp%d" % i, alu)
142
143 go_rd_l = []
144 go_wr_l = []
145 issue_l = []
146 busy_l = []
147 req_rel_l = []
148 rd_rel_l = []
149 shadow_l = []
150 godie_l = []
151 for alu in self.units:
152 req_rel_l.append(alu.req_rel_o)
153 rd_rel_l.append(alu.rd_rel_o)
154 shadow_l.append(alu.shadown_i)
155 godie_l.append(alu.go_die_i)
156 go_wr_l.append(alu.go_wr_i)
157 go_rd_l.append(alu.go_rd_i)
158 issue_l.append(alu.issue_i)
159 busy_l.append(alu.busy_o)
160 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
161 comb += self.req_rel_o.eq(Cat(*req_rel_l))
162 comb += self.busy_o.eq(Cat(*busy_l))
163 comb += Cat(*godie_l).eq(self.go_die_i)
164 comb += Cat(*shadow_l).eq(self.shadown_i)
165 comb += Cat(*go_wr_l).eq(self.go_wr_i)
166 comb += Cat(*go_rd_l).eq(self.go_rd_i)
167 comb += Cat(*issue_l).eq(self.issue_i)
168
169 # connect data register input/output
170
171 # merge (OR) all integer FU / ALU outputs to a single value
172 # bit of a hack: treereduce needs a list with an item named "data_o"
173 if self.units:
174 data_o = treereduce(self.units)
175 comb += self.data_o.eq(data_o)
176
177 for i, alu in enumerate(self.units):
178 comb += alu.src1_i.eq(self.src1_i)
179 comb += alu.src2_i.eq(self.src2_i)
180
181 if not self.ldstmode:
182 return m
183
184 ldmem_l = []
185 stmem_l = []
186 go_ad_l = []
187 go_st_l = []
188 adr_rel_l = []
189 sto_rel_l = []
190 for alu in self.units:
191 adr_rel_l.append(alu.adr_rel_o)
192 sto_rel_l.append(alu.sto_rel_o)
193 ldmem_l.append(alu.load_mem_o)
194 stmem_l.append(alu.stwd_mem_o)
195 go_ad_l.append(alu.go_ad_i)
196 go_st_l.append(alu.go_st_i)
197 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
198 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
199 comb += self.load_mem_o.eq(Cat(*ldmem_l))
200 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
201 comb += Cat(*go_ad_l).eq(self.go_ad_i)
202 comb += Cat(*go_st_l).eq(self.go_st_i)
203
204 return m
205
206
207 class CompUnitLDSTs(CompUnitsBase):
208
209 def __init__(self, rwid, opwid, n_ldsts, mem):
210 """ Inputs:
211
212 * :rwid: bit width of register file(s) - both FP and INT
213 * :opwid: operand bit width
214 """
215 self.opwid = opwid
216
217 # inputs
218 self.oper_i = Signal(opwid, reset_less=True)
219 self.imm_i = Signal(rwid, reset_less=True)
220
221 # Int ALUs
222 self.alus = []
223 for i in range(n_ldsts):
224 self.alus.append(ALU(rwid))
225
226 units = []
227 for alu in self.alus:
228 aluopwid = 4 # see compldst.py for "internal" opcode
229 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
230
231 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
232
233 def elaborate(self, platform):
234 m = CompUnitsBase.elaborate(self, platform)
235 comb = m.d.comb
236
237 # hand the same operation to all units, 4 lower bits though
238 for alu in self.units:
239 comb += alu.oper_i[0:4].eq(self.oper_i)
240 comb += alu.imm_i.eq(self.imm_i)
241 comb += alu.isalu_i.eq(0)
242
243 return m
244
245
246 class CompUnitALUs(CompUnitsBase):
247
248 def __init__(self, rwid, opwid, n_alus):
249 """ Inputs:
250
251 * :rwid: bit width of register file(s) - both FP and INT
252 * :opwid: operand bit width
253 """
254 self.opwid = opwid
255
256 # inputs
257 self.oper_i = Signal(opwid, reset_less=True)
258 self.imm_i = Signal(rwid, reset_less=True)
259
260 # Int ALUs
261 alus = []
262 for i in range(n_alus):
263 alus.append(ALU(rwid))
264
265 units = []
266 for alu in alus:
267 aluopwid = 3 # extra bit for immediate mode
268 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
269
270 CompUnitsBase.__init__(self, rwid, units)
271
272 def elaborate(self, platform):
273 m = CompUnitsBase.elaborate(self, platform)
274 comb = m.d.comb
275
276 # hand the same operation to all units, only lower 3 bits though
277 for alu in self.units:
278 comb += alu.oper_i[0:3].eq(self.oper_i)
279 comb += alu.imm_i.eq(self.imm_i)
280
281 return m
282
283
284 class CompUnitBR(CompUnitsBase):
285
286 def __init__(self, rwid, opwid):
287 """ Inputs:
288
289 * :rwid: bit width of register file(s) - both FP and INT
290 * :opwid: operand bit width
291
292 Note: bgt unit is returned so that a shadow unit can be created
293 for it
294 """
295 self.opwid = opwid
296
297 # inputs
298 self.oper_i = Signal(opwid, reset_less=True)
299 self.imm_i = Signal(rwid, reset_less=True)
300
301 # Branch ALU and CU
302 self.bgt = BranchALU(rwid)
303 aluopwid = 3 # extra bit for immediate mode
304 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
305 CompUnitsBase.__init__(self, rwid, [self.br1])
306
307 def elaborate(self, platform):
308 m = CompUnitsBase.elaborate(self, platform)
309 comb = m.d.comb
310
311 # hand the same operation to all units
312 for alu in self.units:
313 comb += alu.oper_i.eq(self.oper_i)
314 comb += alu.imm_i.eq(self.imm_i)
315
316 return m
317
318
319 class FunctionUnits(Elaboratable):
320
321 def __init__(self, n_regs, n_int_alus):
322 self.n_regs = n_regs
323 self.n_int_alus = n_int_alus
324
325 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
326 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
327 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
328
329 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
330 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
331
332 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
333 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
334 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
335
336 self.readable_o = Signal(n_int_alus, reset_less=True)
337 self.writable_o = Signal(n_int_alus, reset_less=True)
338
339 self.go_rd_i = Signal(n_int_alus, reset_less=True)
340 self.go_wr_i = Signal(n_int_alus, reset_less=True)
341 self.go_die_i = Signal(n_int_alus, reset_less=True)
342 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
343
344 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
345
346 def elaborate(self, platform):
347 m = Module()
348 comb = m.d.comb
349 sync = m.d.sync
350
351 n_intfus = self.n_int_alus
352
353 # Integer FU-FU Dep Matrix
354 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
355 m.submodules.intfudeps = intfudeps
356 # Integer FU-Reg Dep Matrix
357 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
358 m.submodules.intregdeps = intregdeps
359
360 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
361 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
362
363 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
364 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
365
366 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
367 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
368 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
369
370 comb += intfudeps.issue_i.eq(self.fn_issue_i)
371 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
372 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
373 comb += intfudeps.go_die_i.eq(self.go_die_i)
374 comb += self.readable_o.eq(intfudeps.readable_o)
375 comb += self.writable_o.eq(intfudeps.writable_o)
376
377 # Connect function issue / arrays, and dest/src1/src2
378 comb += intregdeps.dest_i.eq(self.dest_i)
379 comb += intregdeps.src_i[0].eq(self.src1_i)
380 comb += intregdeps.src_i[1].eq(self.src2_i)
381
382 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
383 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
384 comb += intregdeps.go_die_i.eq(self.go_die_i)
385 comb += intregdeps.issue_i.eq(self.fn_issue_i)
386
387 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
388 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
389 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
390
391 return m
392
393
394 class Scoreboard(Elaboratable):
395 def __init__(self, rwid, n_regs):
396 """ Inputs:
397
398 * :rwid: bit width of register file(s) - both FP and INT
399 * :n_regs: depth of register file(s) - number of FP and INT regs
400 """
401 self.rwid = rwid
402 self.n_regs = n_regs
403
404 # Register Files
405 self.intregs = RegFileArray(rwid, n_regs)
406 self.fpregs = RegFileArray(rwid, n_regs)
407
408 # issue q needs to get at these
409 self.aluissue = IssueUnitGroup(2)
410 self.lsissue = IssueUnitGroup(2)
411 self.brissue = IssueUnitGroup(1)
412 # and these
413 self.alu_oper_i = Signal(4, reset_less=True)
414 self.alu_imm_i = Signal(rwid, reset_less=True)
415 self.br_oper_i = Signal(4, reset_less=True)
416 self.br_imm_i = Signal(rwid, reset_less=True)
417 self.ls_oper_i = Signal(4, reset_less=True)
418 self.ls_imm_i = Signal(rwid, reset_less=True)
419
420 # inputs
421 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
422 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
423 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
424 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
425
426 # outputs
427 self.issue_o = Signal(reset_less=True) # instruction was accepted
428 self.busy_o = Signal(reset_less=True) # at least one CU is busy
429
430 # for branch speculation experiment. branch_direction = 0 if
431 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
432 # branch_succ and branch_fail are requests to have the current
433 # instruction be dependent on the branch unit "shadow" capability.
434 self.branch_succ_i = Signal(reset_less=True)
435 self.branch_fail_i = Signal(reset_less=True)
436 self.branch_direction_o = Signal(2, reset_less=True)
437
438 def elaborate(self, platform):
439 m = Module()
440 comb = m.d.comb
441 sync = m.d.sync
442
443 m.submodules.intregs = self.intregs
444 m.submodules.fpregs = self.fpregs
445
446 # register ports
447 int_dest = self.intregs.write_port("dest")
448 int_src1 = self.intregs.read_port("src1")
449 int_src2 = self.intregs.read_port("src2")
450
451 fp_dest = self.fpregs.write_port("dest")
452 fp_src1 = self.fpregs.read_port("src1")
453 fp_src2 = self.fpregs.read_port("src2")
454
455 # Int ALUs and BR ALUs
456 n_int_alus = 5
457 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
458 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
459
460 # LDST Comp Units
461 n_ldsts = 2
462 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
463
464 # Comp Units
465 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
466 bgt = cub.bgt # get at the branch computation unit
467 br1 = cub.br1
468
469 # Int FUs
470 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
471
472 # Memory FUs
473 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
474
475 # Count of number of FUs
476 n_intfus = n_int_alus
477 n_fp_fus = 0 # for now
478
479 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
480 intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
481 m.submodules.intpick1 = intpick1
482
483 # INT/FP Issue Unit
484 regdecode = RegDecode(self.n_regs)
485 m.submodules.regdecode = regdecode
486 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
487 m.submodules.issueunit = issueunit
488
489 # Shadow Matrix. currently n_intfus shadows, to be used for
490 # write-after-write hazards. NOTE: there is one extra for branches,
491 # so the shadow width is increased by 1
492 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
493 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
494
495 # record previous instruction to cast shadow on current instruction
496 prev_shadow = Signal(n_intfus)
497
498 # Branch Speculation recorder. tracks the success/fail state as
499 # each instruction is issued, so that when the branch occurs the
500 # allow/cancel can be issued as appropriate.
501 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
502
503 #---------
504 # ok start wiring things together...
505 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
506 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
507 #---------
508
509 #---------
510 # Issue Unit is where it starts. set up some in/outs for this module
511 #---------
512 comb += [ regdecode.dest_i.eq(self.int_dest_i),
513 regdecode.src1_i.eq(self.int_src1_i),
514 regdecode.src2_i.eq(self.int_src2_i),
515 regdecode.enable_i.eq(self.reg_enable_i),
516 self.issue_o.eq(issueunit.issue_o)
517 ]
518
519 # take these to outside (issue needs them)
520 comb += cua.oper_i.eq(self.alu_oper_i)
521 comb += cua.imm_i.eq(self.alu_imm_i)
522 comb += cub.oper_i.eq(self.br_oper_i)
523 comb += cub.imm_i.eq(self.br_imm_i)
524 comb += cul.oper_i.eq(self.ls_oper_i)
525 comb += cul.imm_i.eq(self.ls_imm_i)
526
527 # TODO: issueunit.f (FP)
528
529 # and int function issue / busy arrays, and dest/src1/src2
530 comb += intfus.dest_i.eq(regdecode.dest_o)
531 comb += intfus.src1_i.eq(regdecode.src1_o)
532 comb += intfus.src2_i.eq(regdecode.src2_o)
533
534 fn_issue_o = issueunit.fn_issue_o
535
536 comb += intfus.fn_issue_i.eq(fn_issue_o)
537 comb += issueunit.busy_i.eq(cu.busy_o)
538 comb += self.busy_o.eq(cu.busy_o.bool())
539
540 #---------
541 # Memory Function Unit
542 #---------
543 reset_b = Signal(cul.n_units, reset_less=True)
544 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
545
546
547 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
548 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
549 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
550
551 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
552 # in a transitive fashion). This cycle activates based on LDSTCompUnit
553 # issue_i. multi-issue gets a bit more complex but not a lot.
554 prior_ldsts = Signal(cul.n_units, reset_less=True)
555 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
556 with m.If(self.ls_oper_i[2]): # LD bit of operand
557 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
558 with m.If(self.ls_oper_i[3]): # ST bit of operand
559 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
560
561 # connect up address data
562 comb += memfus.addrs_i[0].eq(cul.units[0].data_o)
563 comb += memfus.addrs_i[1].eq(cul.units[1].data_o)
564
565 # connect loadable / storable to go_ld/go_st.
566 # XXX should only be done when the memory ld/st has actually happened!
567
568 comb += memfus.go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
569 comb += memfus.go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o)
570 #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
571 comb += cul.go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o)
572
573 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
574 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
575 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
576
577 #---------
578 # merge shadow matrices outputs
579 #---------
580
581 # these are explained in ShadowMatrix docstring, and are to be
582 # connected to the FUReg and FUFU Matrices, to get them to reset
583 anydie = Signal(n_intfus, reset_less=True)
584 allshadown = Signal(n_intfus, reset_less=True)
585 shreset = Signal(n_intfus, reset_less=True)
586 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
587 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
588 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
589
590 #---------
591 # connect fu-fu matrix
592 #---------
593
594 # Group Picker... done manually for now.
595 go_rd_o = intpick1.go_rd_o
596 go_wr_o = intpick1.go_wr_o
597 go_rd_i = intfus.go_rd_i
598 go_wr_i = intfus.go_wr_i
599 go_die_i = intfus.go_die_i
600 # NOTE: connect to the shadowed versions so that they can "die" (reset)
601 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
602 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
603 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
604
605 # Connect Picker
606 #---------
607 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
608 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
609 int_rd_o = intfus.readable_o
610 int_wr_o = intfus.writable_o
611 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
612 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
613
614 #---------
615 # Shadow Matrix
616 #---------
617
618 comb += shadows.issue_i.eq(fn_issue_o)
619 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
620 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
621 #---------
622 # NOTE; this setup is for the instruction order preservation...
623
624 # connect shadows / go_dies to Computation Units
625 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
626 comb += cu.go_die_i[0:n_intfus].eq(anydie)
627
628 # ok connect first n_int_fu shadows to busy lines, to create an
629 # instruction-order linked-list-like arrangement, using a bit-matrix
630 # (instead of e.g. a ring buffer).
631
632 # when written, the shadow can be cancelled (and was good)
633 for i in range(n_intfus):
634 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
635
636 # *previous* instruction shadows *current* instruction, and, obviously,
637 # if the previous is completed (!busy) don't cast the shadow!
638 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
639 for i in range(n_intfus):
640 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
641
642 #---------
643 # ... and this is for branch speculation. it uses the extra bit
644 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
645 # only needs to set shadow_i, s_fail_i and s_good_i
646
647 # issue captures shadow_i (if enabled)
648 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
649
650 bactive = Signal(reset_less=True)
651 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
652
653 # instruction being issued (fn_issue_o) has a shadow cast by the branch
654 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
655 comb += bshadow.issue_i.eq(fn_issue_o)
656 for i in range(n_intfus):
657 with m.If(fn_issue_o & (Const(1<<i))):
658 comb += bshadow.shadow_i[i][0].eq(1)
659
660 # finally, we need an indicator to the test infrastructure as to
661 # whether the branch succeeded or failed, plus, link up to the
662 # "recorder" of whether the instruction was under shadow or not
663
664 with m.If(br1.issue_i):
665 sync += bspec.active_i.eq(1)
666 with m.If(self.branch_succ_i):
667 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
668 with m.If(self.branch_fail_i):
669 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
670
671 # branch is active (TODO: a better signal: this is over-using the
672 # go_write signal - actually the branch should not be "writing")
673 with m.If(br1.go_wr_i):
674 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
675 sync += bspec.active_i.eq(0)
676 comb += bspec.br_i.eq(1)
677 # branch occurs if data == 1, failed if data == 0
678 comb += bspec.br_ok_i.eq(br1.data_o == 1)
679 for i in range(n_intfus):
680 # *expected* direction of the branch matched against *actual*
681 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
682 # ... or it didn't
683 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
684
685 #---------
686 # Connect Register File(s)
687 #---------
688 comb += int_dest.wen.eq(intfus.dest_rsel_o)
689 comb += int_src1.ren.eq(intfus.src1_rsel_o)
690 comb += int_src2.ren.eq(intfus.src2_rsel_o)
691
692 # connect ALUs to regfule
693 comb += int_dest.data_i.eq(cu.data_o)
694 comb += cu.src1_i.eq(int_src1.data_o)
695 comb += cu.src2_i.eq(int_src2.data_o)
696
697 # connect ALU Computation Units
698 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
699 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
700 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
701
702 return m
703
704 def __iter__(self):
705 yield from self.intregs
706 yield from self.fpregs
707 yield self.int_dest_i
708 yield self.int_src1_i
709 yield self.int_src2_i
710 yield self.issue_o
711 yield self.branch_succ_i
712 yield self.branch_fail_i
713 yield self.branch_direction_o
714
715 def ports(self):
716 return list(self)
717
718
719 class IssueToScoreboard(Elaboratable):
720
721 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
722 self.qlen = qlen
723 self.n_in = n_in
724 self.n_out = n_out
725 self.rwid = rwid
726 self.opw = opwid
727 self.n_regs = n_regs
728
729 mqbits = (int(log(qlen) / log(2))+2, False)
730 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
731 self.p_ready_o = Signal() # instructions were added
732 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
733
734 self.busy_o = Signal(reset_less=True) # at least one CU is busy
735 self.qlen_o = Signal(mqbits, reset_less=True)
736
737 def elaborate(self, platform):
738 m = Module()
739 comb = m.d.comb
740 sync = m.d.sync
741
742 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
743 sc = Scoreboard(self.rwid, self.n_regs)
744 mem = TestMemory(self.rwid, 8) # not too big, takes too long
745 m.submodules.iq = iq
746 m.submodules.sc = sc
747 m.submodules.mem = mem
748
749 # get at the regfile for testing
750 self.intregs = sc.intregs
751
752 # and the "busy" signal and instruction queue length
753 comb += self.busy_o.eq(sc.busy_o)
754 comb += self.qlen_o.eq(iq.qlen_o)
755
756 # link up instruction queue
757 comb += iq.p_add_i.eq(self.p_add_i)
758 comb += self.p_ready_o.eq(iq.p_ready_o)
759 for i in range(self.n_in):
760 comb += eq(iq.data_i[i], self.data_i[i])
761
762 # take instruction and process it. note that it's possible to
763 # "inspect" the queue contents *without* actually removing the
764 # items. items are only removed when the
765
766 # in "waiting" state
767 wait_issue_br = Signal()
768 wait_issue_alu = Signal()
769 wait_issue_ls = Signal()
770
771 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
772 # set instruction pop length to 1 if the unit accepted
773 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
774 with m.If(iq.qlen_o != 0):
775 comb += iq.n_sub_i.eq(1)
776 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
777 with m.If(iq.qlen_o != 0):
778 comb += iq.n_sub_i.eq(1)
779 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
780 with m.If(iq.qlen_o != 0):
781 comb += iq.n_sub_i.eq(1)
782
783 # see if some instruction(s) are here. note that this is
784 # "inspecting" the in-place queue. note also that on the
785 # cycle following "waiting" for fn_issue_o to be set, the
786 # "resetting" done above (insn_i=0) could be re-ASSERTed.
787 with m.If(iq.qlen_o != 0):
788 # get the operands and operation
789 imm = iq.data_o[0].imm_i
790 dest = iq.data_o[0].dest_i
791 src1 = iq.data_o[0].src1_i
792 src2 = iq.data_o[0].src2_i
793 op = iq.data_o[0].oper_i
794 opi = iq.data_o[0].opim_i # immediate set
795
796 # set the src/dest regs
797 comb += sc.int_dest_i.eq(dest)
798 comb += sc.int_src1_i.eq(src1)
799 comb += sc.int_src2_i.eq(src2)
800 comb += sc.reg_enable_i.eq(1) # enable the regfile
801
802 # choose a Function-Unit-Group
803 with m.If((op & (0x3<<2)) != 0): # branch
804 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
805 comb += sc.br_imm_i.eq(imm)
806 comb += sc.brissue.insn_i.eq(1)
807 comb += wait_issue_br.eq(1)
808 with m.Elif((op & (0x3<<4)) != 0): # ld/st
809 # see compldst.py
810 # bit 0: ADD/SUB
811 # bit 1: immed
812 # bit 4: LD
813 # bit 5: ST
814 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
815 comb += sc.ls_imm_i.eq(imm)
816 comb += sc.lsissue.insn_i.eq(1)
817 comb += wait_issue_ls.eq(1)
818 with m.Else(): # alu
819 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
820 comb += sc.alu_imm_i.eq(imm)
821 comb += sc.aluissue.insn_i.eq(1)
822 comb += wait_issue_alu.eq(1)
823
824 # XXX TODO
825 # these indicate that the instruction is to be made
826 # shadow-dependent on
827 # (either) branch success or branch fail
828 #yield sc.branch_fail_i.eq(branch_fail)
829 #yield sc.branch_succ_i.eq(branch_success)
830
831 return m
832
833 def __iter__(self):
834 yield self.p_ready_o
835 for o in self.data_i:
836 yield from list(o)
837 yield self.p_add_i
838
839 def ports(self):
840 return list(self)
841
842
843 IADD = 0
844 ISUB = 1
845 IMUL = 2
846 ISHF = 3
847 IBGT = 4
848 IBLT = 5
849 IBEQ = 6
850 IBNE = 7
851
852
853 class RegSim:
854 def __init__(self, rwidth, nregs):
855 self.rwidth = rwidth
856 self.regs = [0] * nregs
857
858 def op(self, op, op_imm, imm, src1, src2, dest):
859 maxbits = (1 << self.rwidth) - 1
860 src1 = self.regs[src1] & maxbits
861 if op_imm:
862 src2 = imm
863 else:
864 src2 = self.regs[src2] & maxbits
865 if op == IADD:
866 val = src1 + src2
867 elif op == ISUB:
868 val = src1 - src2
869 elif op == IMUL:
870 val = src1 * src2
871 elif op == ISHF:
872 val = src1 >> (src2 & maxbits)
873 elif op == IBGT:
874 val = int(src1 > src2)
875 elif op == IBLT:
876 val = int(src1 < src2)
877 elif op == IBEQ:
878 val = int(src1 == src2)
879 elif op == IBNE:
880 val = int(src1 != src2)
881 else:
882 return 0 # LD/ST TODO
883 val &= maxbits
884 self.setval(dest, val)
885 return val
886
887 def setval(self, dest, val):
888 print ("sim setval", dest, hex(val))
889 self.regs[dest] = val
890
891 def dump(self, dut):
892 for i, val in enumerate(self.regs):
893 reg = yield dut.intregs.regs[i].reg
894 okstr = "OK" if reg == val else "!ok"
895 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
896
897 def check(self, dut):
898 for i, val in enumerate(self.regs):
899 reg = yield dut.intregs.regs[i].reg
900 if reg != val:
901 print("reg %d expected %x received %x\n" % (i, val, reg))
902 yield from self.dump(dut)
903 assert False
904
905 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
906 branch_success, branch_fail):
907 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
908 'src1_i': src1, 'src2_i': src2}]
909
910 sendlen = 1
911 for idx in range(sendlen):
912 yield from eq(dut.data_i[idx], instrs[idx])
913 di = yield dut.data_i[idx]
914 print ("senddata %d %x" % (idx, di))
915 yield dut.p_add_i.eq(sendlen)
916 yield
917 o_p_ready = yield dut.p_ready_o
918 while not o_p_ready:
919 yield
920 o_p_ready = yield dut.p_ready_o
921
922 yield dut.p_add_i.eq(0)
923
924
925 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
926 yield from disable_issue(dut)
927 yield dut.int_dest_i.eq(dest)
928 yield dut.int_src1_i.eq(src1)
929 yield dut.int_src2_i.eq(src2)
930 if (op & (0x3<<2)) != 0: # branch
931 yield dut.brissue.insn_i.eq(1)
932 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
933 yield dut.br_imm_i.eq(imm)
934 dut_issue = dut.brissue
935 else:
936 yield dut.aluissue.insn_i.eq(1)
937 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
938 yield dut.alu_imm_i.eq(imm)
939 dut_issue = dut.aluissue
940 yield dut.reg_enable_i.eq(1)
941
942 # these indicate that the instruction is to be made shadow-dependent on
943 # (either) branch success or branch fail
944 yield dut.branch_fail_i.eq(branch_fail)
945 yield dut.branch_succ_i.eq(branch_success)
946
947 yield
948 yield from wait_for_issue(dut, dut_issue)
949
950
951 def print_reg(dut, rnums):
952 rs = []
953 for rnum in rnums:
954 reg = yield dut.intregs.regs[rnum].reg
955 rs.append("%x" % reg)
956 rnums = map(str, rnums)
957 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
958
959
960 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
961 insts = []
962 for i in range(n_ops):
963 src1 = randint(1, dut.n_regs-1)
964 src2 = randint(1, dut.n_regs-1)
965 imm = randint(1, (1<<dut.rwid)-1)
966 dest = randint(1, dut.n_regs-1)
967 op = randint(0, max_opnums)
968 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
969
970 if shadowing:
971 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
972 else:
973 insts.append((src1, src2, dest, op, opi, imm))
974 return insts
975
976
977 def wait_for_busy_clear(dut):
978 while True:
979 busy_o = yield dut.busy_o
980 if not busy_o:
981 break
982 print ("busy",)
983 yield
984
985 def disable_issue(dut):
986 yield dut.aluissue.insn_i.eq(0)
987 yield dut.brissue.insn_i.eq(0)
988 yield dut.lsissue.insn_i.eq(0)
989
990
991 def wait_for_issue(dut, dut_issue):
992 while True:
993 issue_o = yield dut_issue.fn_issue_o
994 if issue_o:
995 yield from disable_issue(dut)
996 yield dut.reg_enable_i.eq(0)
997 break
998 print ("busy",)
999 #yield from print_reg(dut, [1,2,3])
1000 yield
1001 #yield from print_reg(dut, [1,2,3])
1002
1003 def scoreboard_branch_sim(dut, alusim):
1004
1005 iseed = 3
1006
1007 for i in range(1):
1008
1009 print ("rseed", iseed)
1010 seed(iseed)
1011 iseed += 1
1012
1013 yield dut.branch_direction_o.eq(0)
1014
1015 # set random values in the registers
1016 for i in range(1, dut.n_regs):
1017 val = 31+i*3
1018 val = randint(0, (1<<alusim.rwidth)-1)
1019 yield dut.intregs.regs[i].reg.eq(val)
1020 alusim.setval(i, val)
1021
1022 if False:
1023 # create some instructions: branches create a tree
1024 insts = create_random_ops(dut, 1, True, 1)
1025 #insts.append((6, 6, 1, 2, (0, 0)))
1026 #insts.append((4, 3, 3, 0, (0, 0)))
1027
1028 src1 = randint(1, dut.n_regs-1)
1029 src2 = randint(1, dut.n_regs-1)
1030 #op = randint(4, 7)
1031 op = 4 # only BGT at the moment
1032
1033 branch_ok = create_random_ops(dut, 1, True, 1)
1034 branch_fail = create_random_ops(dut, 1, True, 1)
1035
1036 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1037
1038 if True:
1039 insts = []
1040 insts.append( (3, 5, 2, 0, (0, 0)) )
1041 branch_ok = []
1042 branch_fail = []
1043 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1044 branch_ok.append( None )
1045 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1046 #branch_fail.append( None )
1047 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1048
1049 siminsts = deepcopy(insts)
1050
1051 # issue instruction(s)
1052 i = -1
1053 instrs = insts
1054 branch_direction = 0
1055 while instrs:
1056 yield
1057 yield
1058 i += 1
1059 branch_direction = yield dut.branch_direction_o # way branch went
1060 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1061 if branch_direction == 1 and shadow_on:
1062 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1063 continue # branch was "success" and this is a "failed"... skip
1064 if branch_direction == 2 and shadow_off:
1065 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1066 continue # branch was "fail" and this is a "success"... skip
1067 if branch_direction != 0:
1068 shadow_on = 0
1069 shadow_off = 0
1070 is_branch = op >= 4
1071 if is_branch:
1072 branch_ok, branch_fail = dest
1073 dest = src2
1074 # ok zip up the branch success / fail instructions and
1075 # drop them into the queue, one marked "to have branch success"
1076 # the other to be marked shadow branch "fail".
1077 # one out of each of these will be cancelled
1078 for ok, fl in zip(branch_ok, branch_fail):
1079 if ok:
1080 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1081 if fl:
1082 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1083 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1084 (i, src1, src2, dest, op, shadow_on, shadow_off))
1085 yield from int_instr(dut, op, src1, src2, dest,
1086 shadow_on, shadow_off)
1087
1088 # wait for all instructions to stop before checking
1089 yield
1090 yield from wait_for_busy_clear(dut)
1091
1092 i = -1
1093 while siminsts:
1094 instr = siminsts.pop(0)
1095 if instr is None:
1096 continue
1097 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1098 i += 1
1099 is_branch = op >= 4
1100 if is_branch:
1101 branch_ok, branch_fail = dest
1102 dest = src2
1103 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1104 (i, src1, src2, dest, op, shadow_on, shadow_off))
1105 branch_res = alusim.op(op, src1, src2, dest)
1106 if is_branch:
1107 if branch_res:
1108 siminsts += branch_ok
1109 else:
1110 siminsts += branch_fail
1111
1112 # check status
1113 yield from alusim.check(dut)
1114 yield from alusim.dump(dut)
1115
1116
1117 def scoreboard_sim(dut, alusim):
1118
1119 seed(0)
1120
1121 for i in range(1):
1122
1123 # set random values in the registers
1124 for i in range(1, dut.n_regs):
1125 val = randint(0, (1<<alusim.rwidth)-1)
1126 #val = 31+i*3
1127 #val = i
1128 yield dut.intregs.regs[i].reg.eq(val)
1129 alusim.setval(i, val)
1130
1131 # create some instructions (some random, some regression tests)
1132 instrs = []
1133 if False:
1134 instrs = create_random_ops(dut, 15, True, 4)
1135
1136 if True: # LD/ST test (with immediate)
1137 instrs.append( (1, 2, 2, 0x10, 1, 1, (0, 0)) )
1138 #instrs.append( (1, 2, 7, 0x10, 1, 1, (0, 0)) )
1139
1140 if False:
1141 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1142
1143 if False:
1144 instrs.append( (7, 3, 2, 4, (0, 0)) )
1145 instrs.append( (7, 6, 6, 2, (0, 0)) )
1146 instrs.append( (1, 7, 2, 2, (0, 0)) )
1147
1148 if False:
1149 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1150 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1151 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1152 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1153 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1154
1155 if False:
1156 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1157 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1158 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1159
1160 if False:
1161 instrs.append((5, 6, 2, 1))
1162 instrs.append((2, 2, 4, 0))
1163 #instrs.append((2, 2, 3, 1))
1164
1165 if False:
1166 instrs.append((2, 1, 2, 3))
1167
1168 if False:
1169 instrs.append((2, 6, 2, 1))
1170 instrs.append((2, 1, 2, 0))
1171
1172 if False:
1173 instrs.append((1, 2, 7, 2))
1174 instrs.append((7, 1, 5, 0))
1175 instrs.append((4, 4, 1, 1))
1176
1177 if False:
1178 instrs.append((5, 6, 2, 2))
1179 instrs.append((1, 1, 4, 1))
1180 instrs.append((6, 5, 3, 0))
1181
1182 if False:
1183 # Write-after-Write Hazard
1184 instrs.append( (3, 6, 7, 2) )
1185 instrs.append( (4, 4, 7, 1) )
1186
1187 if False:
1188 # self-read/write-after-write followed by Read-after-Write
1189 instrs.append((1, 1, 1, 1))
1190 instrs.append((1, 5, 3, 0))
1191
1192 if False:
1193 # Read-after-Write followed by self-read-after-write
1194 instrs.append((5, 6, 1, 2))
1195 instrs.append((1, 1, 1, 1))
1196
1197 if False:
1198 # self-read-write sandwich
1199 instrs.append((5, 6, 1, 2))
1200 instrs.append((1, 1, 1, 1))
1201 instrs.append((1, 5, 3, 0))
1202
1203 if False:
1204 # very weird failure
1205 instrs.append( (5, 2, 5, 2) )
1206 instrs.append( (2, 6, 3, 0) )
1207 instrs.append( (4, 2, 2, 1) )
1208
1209 if False:
1210 v1 = 4
1211 yield dut.intregs.regs[5].reg.eq(v1)
1212 alusim.setval(5, v1)
1213 yield dut.intregs.regs[3].reg.eq(5)
1214 alusim.setval(3, 5)
1215 instrs.append((5, 3, 3, 4, (0, 0)))
1216 instrs.append((4, 2, 1, 2, (0, 1)))
1217
1218 if False:
1219 v1 = 6
1220 yield dut.intregs.regs[5].reg.eq(v1)
1221 alusim.setval(5, v1)
1222 yield dut.intregs.regs[3].reg.eq(5)
1223 alusim.setval(3, 5)
1224 instrs.append((5, 3, 3, 4, (0, 0)))
1225 instrs.append((4, 2, 1, 2, (1, 0)))
1226
1227 if False:
1228 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1229 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1230 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1231 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1232 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1233 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1234 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1235 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1236 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1237
1238 # issue instruction(s), wait for issue to be free before proceeding
1239 for i, instr in enumerate(instrs):
1240 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1241
1242 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1243 (i, src1, src2, dest, op, opi, imm))
1244 alusim.op(op, opi, imm, src1, src2, dest)
1245 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1246 br_ok, br_fail)
1247
1248 # wait for all instructions to stop before checking
1249 while True:
1250 iqlen = yield dut.qlen_o
1251 if iqlen == 0:
1252 break
1253 yield
1254 yield
1255 yield
1256 yield
1257 yield
1258 yield from wait_for_busy_clear(dut)
1259
1260 # check status
1261 yield from alusim.check(dut)
1262 yield from alusim.dump(dut)
1263
1264
1265 def test_scoreboard():
1266 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1267 alusim = RegSim(16, 8)
1268 memsim = MemSim(16, 16)
1269 vl = rtlil.convert(dut, ports=dut.ports())
1270 with open("test_scoreboard6600.il", "w") as f:
1271 f.write(vl)
1272
1273 run_simulation(dut, scoreboard_sim(dut, alusim),
1274 vcd_name='test_scoreboard6600.vcd')
1275
1276 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1277 # vcd_name='test_scoreboard6600.vcd')
1278
1279
1280 if __name__ == '__main__':
1281 test_scoreboard()