add address and output mode from LDSTCUs
[soc.git] / src / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
4
5 from regfile.regfile import RegFileArray, treereduce
6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
7 from scoreboard.fu_reg_matrix import FURegDepMatrix
8 from scoreboard.global_pending import GlobalPending
9 from scoreboard.group_picker import GroupPicker
10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
12 from scoreboard.instruction_q import Instruction, InstructionQ
13 from scoreboard.memfu import MemFunctionUnits
14
15 from compalu import ComputationUnitNoDelay
16 from compldst import LDSTCompUnit
17
18 from alu_hier import ALU, BranchALU
19 from nmutil.latch import SRLatch
20 from nmutil.nmoperator import eq
21
22 from random import randint, seed
23 from copy import deepcopy
24 from math import log
25
26
27 class TestMemory(Elaboratable):
28 def __init__(self, regwid, addrw):
29 self.ddepth = 1 # regwid //8
30 depth = (1<<addrw) // self.ddepth
31 self.adr = Signal(addrw)
32 self.dat_r = Signal(regwid)
33 self.dat_w = Signal(regwid)
34 self.we = Signal()
35 self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
36
37 def elaborate(self, platform):
38 m = Module()
39 m.submodules.rdport = rdport = self.mem.read_port()
40 m.submodules.wrport = wrport = self.mem.write_port()
41 m.d.comb += [
42 rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
43 self.dat_r.eq(rdport.data),
44 wrport.addr.eq(self.adr),
45 wrport.data.eq(self.dat_w),
46 wrport.en.eq(self.we),
47 ]
48 return m
49
50
51 class MemSim:
52 def __init__(self, regwid, addrw):
53 self.regwid = regwid
54 self.ddepth = 1 # regwid//8
55 depth = (1<<addrw) // self.ddepth
56 self.mem = list(range(0, depth))
57
58 def ld(self, addr):
59 return self.mem[addr>>self.ddepth]
60
61 def st(self, addr, data):
62 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
63
64
65 class CompUnitsBase(Elaboratable):
66 """ Computation Unit Base class.
67
68 Amazingly, this class works recursively. It's supposed to just
69 look after some ALUs (that can handle the same operations),
70 grouping them together, however it turns out that the same code
71 can also group *groups* of Computation Units together as well.
72
73 Basically it was intended just to concatenate the ALU's issue,
74 go_rd etc. signals together, which start out as bits and become
75 sequences. Turns out that the same trick works just as well
76 on Computation Units!
77
78 So this class may be used recursively to present a top-level
79 sequential concatenation of all the signals in and out of
80 ALUs, whilst at the same time making it convenient to group
81 ALUs together.
82
83 At the lower level, the intent is that groups of (identical)
84 ALUs may be passed the same operation. Even beyond that,
85 the intent is that that group of (identical) ALUs actually
86 share the *same pipeline* and as such become a "Concurrent
87 Computation Unit" as defined by Mitch Alsup (see section
88 11.4.9.3)
89 """
90 def __init__(self, rwid, units, ldstmode=False):
91 """ Inputs:
92
93 * :rwid: bit width of register file(s) - both FP and INT
94 * :units: sequence of ALUs (or CompUnitsBase derivatives)
95 """
96 self.units = units
97 self.ldstmode = ldstmode
98 self.rwid = rwid
99 self.rwid = rwid
100 if units and isinstance(units[0], CompUnitsBase):
101 self.n_units = 0
102 for u in self.units:
103 self.n_units += u.n_units
104 else:
105 self.n_units = len(units)
106
107 n_units = self.n_units
108
109 # inputs
110 self.issue_i = Signal(n_units, reset_less=True)
111 self.go_rd_i = Signal(n_units, reset_less=True)
112 self.go_wr_i = Signal(n_units, reset_less=True)
113 self.shadown_i = Signal(n_units, reset_less=True)
114 self.go_die_i = Signal(n_units, reset_less=True)
115 if ldstmode:
116 self.go_ad_i = Signal(n_units, reset_less=True)
117 self.go_st_i = Signal(n_units, reset_less=True)
118
119 # outputs
120 self.busy_o = Signal(n_units, reset_less=True)
121 self.rd_rel_o = Signal(n_units, reset_less=True)
122 self.req_rel_o = Signal(n_units, reset_less=True)
123 if ldstmode:
124 self.ld_o = Signal(n_units, reset_less=True) # op is LD
125 self.st_o = Signal(n_units, reset_less=True) # op is ST
126 self.adr_rel_o = Signal(n_units, reset_less=True)
127 self.sto_rel_o = Signal(n_units, reset_less=True)
128 self.req_rel_o = Signal(n_units, reset_less=True)
129 self.load_mem_o = Signal(n_units, reset_less=True)
130 self.stwd_mem_o = Signal(n_units, reset_less=True)
131 self.addr_o = Signal(rwid, reset_less=True)
132
133 # in/out register data (note: not register#, actual data)
134 self.data_o = Signal(rwid, reset_less=True)
135 self.src1_i = Signal(rwid, reset_less=True)
136 self.src2_i = Signal(rwid, reset_less=True)
137 # input operand
138
139 def elaborate(self, platform):
140 m = Module()
141 comb = m.d.comb
142
143 for i, alu in enumerate(self.units):
144 setattr(m.submodules, "comp%d" % i, alu)
145
146 go_rd_l = []
147 go_wr_l = []
148 issue_l = []
149 busy_l = []
150 req_rel_l = []
151 rd_rel_l = []
152 shadow_l = []
153 godie_l = []
154 for alu in self.units:
155 req_rel_l.append(alu.req_rel_o)
156 rd_rel_l.append(alu.rd_rel_o)
157 shadow_l.append(alu.shadown_i)
158 godie_l.append(alu.go_die_i)
159 go_wr_l.append(alu.go_wr_i)
160 go_rd_l.append(alu.go_rd_i)
161 issue_l.append(alu.issue_i)
162 busy_l.append(alu.busy_o)
163 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
164 comb += self.req_rel_o.eq(Cat(*req_rel_l))
165 comb += self.busy_o.eq(Cat(*busy_l))
166 comb += Cat(*godie_l).eq(self.go_die_i)
167 comb += Cat(*shadow_l).eq(self.shadown_i)
168 comb += Cat(*go_wr_l).eq(self.go_wr_i)
169 comb += Cat(*go_rd_l).eq(self.go_rd_i)
170 comb += Cat(*issue_l).eq(self.issue_i)
171
172 # connect data register input/output
173
174 # merge (OR) all integer FU / ALU outputs to a single value
175 if self.units:
176 data_o = treereduce(self.units, "data_o")
177 comb += self.data_o.eq(data_o)
178 if self.ldstmode:
179 addr_o = treereduce(self.units, "addr_o")
180 comb += self.addr_o.eq(addr_o)
181
182 for i, alu in enumerate(self.units):
183 comb += alu.src1_i.eq(self.src1_i)
184 comb += alu.src2_i.eq(self.src2_i)
185
186 if not self.ldstmode:
187 return m
188
189 ldmem_l = []
190 stmem_l = []
191 go_ad_l = []
192 go_st_l = []
193 ld_l = []
194 st_l = []
195 adr_rel_l = []
196 sto_rel_l = []
197 for alu in self.units:
198 ld_l.append(alu.ld_o)
199 st_l.append(alu.st_o)
200 adr_rel_l.append(alu.adr_rel_o)
201 sto_rel_l.append(alu.sto_rel_o)
202 ldmem_l.append(alu.load_mem_o)
203 stmem_l.append(alu.stwd_mem_o)
204 go_ad_l.append(alu.go_ad_i)
205 go_st_l.append(alu.go_st_i)
206 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
207 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
208 comb += self.load_mem_o.eq(Cat(*ldmem_l))
209 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
210 comb += Cat(*go_ad_l).eq(self.go_ad_i)
211 comb += Cat(*go_st_l).eq(self.go_st_i)
212
213 return m
214
215
216 class CompUnitLDSTs(CompUnitsBase):
217
218 def __init__(self, rwid, opwid, n_ldsts, mem):
219 """ Inputs:
220
221 * :rwid: bit width of register file(s) - both FP and INT
222 * :opwid: operand bit width
223 """
224 self.opwid = opwid
225
226 # inputs
227 self.oper_i = Signal(opwid, reset_less=True)
228 self.imm_i = Signal(rwid, reset_less=True)
229
230 # Int ALUs
231 self.alus = []
232 for i in range(n_ldsts):
233 self.alus.append(ALU(rwid))
234
235 units = []
236 for alu in self.alus:
237 aluopwid = 4 # see compldst.py for "internal" opcode
238 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
239
240 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
241
242 def elaborate(self, platform):
243 m = CompUnitsBase.elaborate(self, platform)
244 comb = m.d.comb
245
246 # hand the same operation to all units, 4 lower bits though
247 for alu in self.units:
248 comb += alu.oper_i[0:4].eq(self.oper_i)
249 comb += alu.imm_i.eq(self.imm_i)
250 comb += alu.isalu_i.eq(0)
251
252 return m
253
254
255 class CompUnitALUs(CompUnitsBase):
256
257 def __init__(self, rwid, opwid, n_alus):
258 """ Inputs:
259
260 * :rwid: bit width of register file(s) - both FP and INT
261 * :opwid: operand bit width
262 """
263 self.opwid = opwid
264
265 # inputs
266 self.oper_i = Signal(opwid, reset_less=True)
267 self.imm_i = Signal(rwid, reset_less=True)
268
269 # Int ALUs
270 alus = []
271 for i in range(n_alus):
272 alus.append(ALU(rwid))
273
274 units = []
275 for alu in alus:
276 aluopwid = 3 # extra bit for immediate mode
277 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
278
279 CompUnitsBase.__init__(self, rwid, units)
280
281 def elaborate(self, platform):
282 m = CompUnitsBase.elaborate(self, platform)
283 comb = m.d.comb
284
285 # hand the same operation to all units, only lower 3 bits though
286 for alu in self.units:
287 comb += alu.oper_i[0:3].eq(self.oper_i)
288 comb += alu.imm_i.eq(self.imm_i)
289
290 return m
291
292
293 class CompUnitBR(CompUnitsBase):
294
295 def __init__(self, rwid, opwid):
296 """ Inputs:
297
298 * :rwid: bit width of register file(s) - both FP and INT
299 * :opwid: operand bit width
300
301 Note: bgt unit is returned so that a shadow unit can be created
302 for it
303 """
304 self.opwid = opwid
305
306 # inputs
307 self.oper_i = Signal(opwid, reset_less=True)
308 self.imm_i = Signal(rwid, reset_less=True)
309
310 # Branch ALU and CU
311 self.bgt = BranchALU(rwid)
312 aluopwid = 3 # extra bit for immediate mode
313 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
314 CompUnitsBase.__init__(self, rwid, [self.br1])
315
316 def elaborate(self, platform):
317 m = CompUnitsBase.elaborate(self, platform)
318 comb = m.d.comb
319
320 # hand the same operation to all units
321 for alu in self.units:
322 comb += alu.oper_i.eq(self.oper_i)
323 comb += alu.imm_i.eq(self.imm_i)
324
325 return m
326
327
328 class FunctionUnits(Elaboratable):
329
330 def __init__(self, n_regs, n_int_alus):
331 self.n_regs = n_regs
332 self.n_int_alus = n_int_alus
333
334 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
335 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
336 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
337
338 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
339 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
340
341 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
342 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
343 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
344
345 self.readable_o = Signal(n_int_alus, reset_less=True)
346 self.writable_o = Signal(n_int_alus, reset_less=True)
347
348 self.go_rd_i = Signal(n_int_alus, reset_less=True)
349 self.go_wr_i = Signal(n_int_alus, reset_less=True)
350 self.go_die_i = Signal(n_int_alus, reset_less=True)
351 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
352
353 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
354
355 def elaborate(self, platform):
356 m = Module()
357 comb = m.d.comb
358 sync = m.d.sync
359
360 n_intfus = self.n_int_alus
361
362 # Integer FU-FU Dep Matrix
363 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
364 m.submodules.intfudeps = intfudeps
365 # Integer FU-Reg Dep Matrix
366 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
367 m.submodules.intregdeps = intregdeps
368
369 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
370 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
371
372 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
373 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
374
375 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
376 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
377 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
378
379 comb += intfudeps.issue_i.eq(self.fn_issue_i)
380 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
381 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
382 comb += intfudeps.go_die_i.eq(self.go_die_i)
383 comb += self.readable_o.eq(intfudeps.readable_o)
384 comb += self.writable_o.eq(intfudeps.writable_o)
385
386 # Connect function issue / arrays, and dest/src1/src2
387 comb += intregdeps.dest_i.eq(self.dest_i)
388 comb += intregdeps.src_i[0].eq(self.src1_i)
389 comb += intregdeps.src_i[1].eq(self.src2_i)
390
391 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
392 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
393 comb += intregdeps.go_die_i.eq(self.go_die_i)
394 comb += intregdeps.issue_i.eq(self.fn_issue_i)
395
396 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
397 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
398 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
399
400 return m
401
402
403 class Scoreboard(Elaboratable):
404 def __init__(self, rwid, n_regs):
405 """ Inputs:
406
407 * :rwid: bit width of register file(s) - both FP and INT
408 * :n_regs: depth of register file(s) - number of FP and INT regs
409 """
410 self.rwid = rwid
411 self.n_regs = n_regs
412
413 # Register Files
414 self.intregs = RegFileArray(rwid, n_regs)
415 self.fpregs = RegFileArray(rwid, n_regs)
416
417 # issue q needs to get at these
418 self.aluissue = IssueUnitGroup(2)
419 self.lsissue = IssueUnitGroup(2)
420 self.brissue = IssueUnitGroup(1)
421 # and these
422 self.alu_oper_i = Signal(4, reset_less=True)
423 self.alu_imm_i = Signal(rwid, reset_less=True)
424 self.br_oper_i = Signal(4, reset_less=True)
425 self.br_imm_i = Signal(rwid, reset_less=True)
426 self.ls_oper_i = Signal(4, reset_less=True)
427 self.ls_imm_i = Signal(rwid, reset_less=True)
428
429 # inputs
430 self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
431 self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
432 self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
433 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
434
435 # outputs
436 self.issue_o = Signal(reset_less=True) # instruction was accepted
437 self.busy_o = Signal(reset_less=True) # at least one CU is busy
438
439 # for branch speculation experiment. branch_direction = 0 if
440 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
441 # branch_succ and branch_fail are requests to have the current
442 # instruction be dependent on the branch unit "shadow" capability.
443 self.branch_succ_i = Signal(reset_less=True)
444 self.branch_fail_i = Signal(reset_less=True)
445 self.branch_direction_o = Signal(2, reset_less=True)
446
447 def elaborate(self, platform):
448 m = Module()
449 comb = m.d.comb
450 sync = m.d.sync
451
452 m.submodules.intregs = self.intregs
453 m.submodules.fpregs = self.fpregs
454
455 # register ports
456 int_dest = self.intregs.write_port("dest")
457 int_src1 = self.intregs.read_port("src1")
458 int_src2 = self.intregs.read_port("src2")
459
460 fp_dest = self.fpregs.write_port("dest")
461 fp_src1 = self.fpregs.read_port("src1")
462 fp_src2 = self.fpregs.read_port("src2")
463
464 # Int ALUs and BR ALUs
465 n_int_alus = 5
466 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
467 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
468
469 # LDST Comp Units
470 n_ldsts = 2
471 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
472
473 # Comp Units
474 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
475 bgt = cub.bgt # get at the branch computation unit
476 br1 = cub.br1
477
478 # Int FUs
479 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
480
481 # Memory FUs
482 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
483
484 # Count of number of FUs
485 n_intfus = n_int_alus
486 n_fp_fus = 0 # for now
487
488 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
489 intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
490 m.submodules.intpick1 = intpick1
491
492 # INT/FP Issue Unit
493 regdecode = RegDecode(self.n_regs)
494 m.submodules.regdecode = regdecode
495 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
496 m.submodules.issueunit = issueunit
497
498 # Shadow Matrix. currently n_intfus shadows, to be used for
499 # write-after-write hazards. NOTE: there is one extra for branches,
500 # so the shadow width is increased by 1
501 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
502 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
503
504 # record previous instruction to cast shadow on current instruction
505 prev_shadow = Signal(n_intfus)
506
507 # Branch Speculation recorder. tracks the success/fail state as
508 # each instruction is issued, so that when the branch occurs the
509 # allow/cancel can be issued as appropriate.
510 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
511
512 #---------
513 # ok start wiring things together...
514 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
515 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
516 #---------
517
518 #---------
519 # Issue Unit is where it starts. set up some in/outs for this module
520 #---------
521 comb += [ regdecode.dest_i.eq(self.int_dest_i),
522 regdecode.src1_i.eq(self.int_src1_i),
523 regdecode.src2_i.eq(self.int_src2_i),
524 regdecode.enable_i.eq(self.reg_enable_i),
525 self.issue_o.eq(issueunit.issue_o)
526 ]
527
528 # take these to outside (issue needs them)
529 comb += cua.oper_i.eq(self.alu_oper_i)
530 comb += cua.imm_i.eq(self.alu_imm_i)
531 comb += cub.oper_i.eq(self.br_oper_i)
532 comb += cub.imm_i.eq(self.br_imm_i)
533 comb += cul.oper_i.eq(self.ls_oper_i)
534 comb += cul.imm_i.eq(self.ls_imm_i)
535
536 # TODO: issueunit.f (FP)
537
538 # and int function issue / busy arrays, and dest/src1/src2
539 comb += intfus.dest_i.eq(regdecode.dest_o)
540 comb += intfus.src1_i.eq(regdecode.src1_o)
541 comb += intfus.src2_i.eq(regdecode.src2_o)
542
543 fn_issue_o = issueunit.fn_issue_o
544
545 comb += intfus.fn_issue_i.eq(fn_issue_o)
546 comb += issueunit.busy_i.eq(cu.busy_o)
547 comb += self.busy_o.eq(cu.busy_o.bool())
548
549 #---------
550 # Memory Function Unit
551 #---------
552 reset_b = Signal(cul.n_units, reset_less=True)
553 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
554
555
556 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
557 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
558 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
559
560 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
561 # in a transitive fashion). This cycle activates based on LDSTCompUnit
562 # issue_i. multi-issue gets a bit more complex but not a lot.
563 prior_ldsts = Signal(cul.n_units, reset_less=True)
564 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
565 with m.If(self.ls_oper_i[2]): # LD bit of operand
566 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
567 with m.If(self.ls_oper_i[3]): # ST bit of operand
568 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
569
570 # TODO: adr_rel_o needs to go into L1 Cache. for now,
571 # just immediately activate go_adr
572 comb += cul.go_ad_i.eq(cul.adr_rel_o)
573
574 # connect up address data
575 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
576 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
577
578 # connect loadable / storable to go_ld/go_st.
579 # XXX should only be done when the memory ld/st has actually happened!
580 go_st_i = Signal(cul.n_units, reset_less=True)
581 go_ld_i = Signal(cul.n_units, reset_less=True)
582 comb += go_ld_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
583 cul.req_rel_o & cul.ld_o)
584 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
585 cul.sto_rel_o & cul.st_o)
586 comb += memfus.go_ld_i.eq(go_ld_i)
587 comb += memfus.go_st_i.eq(go_st_i)
588 #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
589 comb += cul.go_st_i.eq(go_st_i)
590
591 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
592 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
593 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
594
595 #---------
596 # merge shadow matrices outputs
597 #---------
598
599 # these are explained in ShadowMatrix docstring, and are to be
600 # connected to the FUReg and FUFU Matrices, to get them to reset
601 anydie = Signal(n_intfus, reset_less=True)
602 allshadown = Signal(n_intfus, reset_less=True)
603 shreset = Signal(n_intfus, reset_less=True)
604 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
605 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
606 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
607
608 #---------
609 # connect fu-fu matrix
610 #---------
611
612 # Group Picker... done manually for now.
613 go_rd_o = intpick1.go_rd_o
614 go_wr_o = intpick1.go_wr_o
615 go_rd_i = intfus.go_rd_i
616 go_wr_i = intfus.go_wr_i
617 go_die_i = intfus.go_die_i
618 # NOTE: connect to the shadowed versions so that they can "die" (reset)
619 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
620 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
621 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
622
623 # Connect Picker
624 #---------
625 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
626 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
627 int_rd_o = intfus.readable_o
628 int_wr_o = intfus.writable_o
629 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
630 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
631
632 #---------
633 # Shadow Matrix
634 #---------
635
636 comb += shadows.issue_i.eq(fn_issue_o)
637 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
638 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
639 #---------
640 # NOTE; this setup is for the instruction order preservation...
641
642 # connect shadows / go_dies to Computation Units
643 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
644 comb += cu.go_die_i[0:n_intfus].eq(anydie)
645
646 # ok connect first n_int_fu shadows to busy lines, to create an
647 # instruction-order linked-list-like arrangement, using a bit-matrix
648 # (instead of e.g. a ring buffer).
649
650 # when written, the shadow can be cancelled (and was good)
651 for i in range(n_intfus):
652 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
653
654 # *previous* instruction shadows *current* instruction, and, obviously,
655 # if the previous is completed (!busy) don't cast the shadow!
656 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
657 for i in range(n_intfus):
658 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
659
660 #---------
661 # ... and this is for branch speculation. it uses the extra bit
662 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
663 # only needs to set shadow_i, s_fail_i and s_good_i
664
665 # issue captures shadow_i (if enabled)
666 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
667
668 bactive = Signal(reset_less=True)
669 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
670
671 # instruction being issued (fn_issue_o) has a shadow cast by the branch
672 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
673 comb += bshadow.issue_i.eq(fn_issue_o)
674 for i in range(n_intfus):
675 with m.If(fn_issue_o & (Const(1<<i))):
676 comb += bshadow.shadow_i[i][0].eq(1)
677
678 # finally, we need an indicator to the test infrastructure as to
679 # whether the branch succeeded or failed, plus, link up to the
680 # "recorder" of whether the instruction was under shadow or not
681
682 with m.If(br1.issue_i):
683 sync += bspec.active_i.eq(1)
684 with m.If(self.branch_succ_i):
685 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
686 with m.If(self.branch_fail_i):
687 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
688
689 # branch is active (TODO: a better signal: this is over-using the
690 # go_write signal - actually the branch should not be "writing")
691 with m.If(br1.go_wr_i):
692 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
693 sync += bspec.active_i.eq(0)
694 comb += bspec.br_i.eq(1)
695 # branch occurs if data == 1, failed if data == 0
696 comb += bspec.br_ok_i.eq(br1.data_o == 1)
697 for i in range(n_intfus):
698 # *expected* direction of the branch matched against *actual*
699 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
700 # ... or it didn't
701 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
702
703 #---------
704 # Connect Register File(s)
705 #---------
706 comb += int_dest.wen.eq(intfus.dest_rsel_o)
707 comb += int_src1.ren.eq(intfus.src1_rsel_o)
708 comb += int_src2.ren.eq(intfus.src2_rsel_o)
709
710 # connect ALUs to regfule
711 comb += int_dest.data_i.eq(cu.data_o)
712 comb += cu.src1_i.eq(int_src1.data_o)
713 comb += cu.src2_i.eq(int_src2.data_o)
714
715 # connect ALU Computation Units
716 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
717 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
718 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
719
720 return m
721
722 def __iter__(self):
723 yield from self.intregs
724 yield from self.fpregs
725 yield self.int_dest_i
726 yield self.int_src1_i
727 yield self.int_src2_i
728 yield self.issue_o
729 yield self.branch_succ_i
730 yield self.branch_fail_i
731 yield self.branch_direction_o
732
733 def ports(self):
734 return list(self)
735
736
737 class IssueToScoreboard(Elaboratable):
738
739 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
740 self.qlen = qlen
741 self.n_in = n_in
742 self.n_out = n_out
743 self.rwid = rwid
744 self.opw = opwid
745 self.n_regs = n_regs
746
747 mqbits = (int(log(qlen) / log(2))+2, False)
748 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
749 self.p_ready_o = Signal() # instructions were added
750 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
751
752 self.busy_o = Signal(reset_less=True) # at least one CU is busy
753 self.qlen_o = Signal(mqbits, reset_less=True)
754
755 def elaborate(self, platform):
756 m = Module()
757 comb = m.d.comb
758 sync = m.d.sync
759
760 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
761 sc = Scoreboard(self.rwid, self.n_regs)
762 mem = TestMemory(self.rwid, 8) # not too big, takes too long
763 m.submodules.iq = iq
764 m.submodules.sc = sc
765 m.submodules.mem = mem
766
767 # get at the regfile for testing
768 self.intregs = sc.intregs
769
770 # and the "busy" signal and instruction queue length
771 comb += self.busy_o.eq(sc.busy_o)
772 comb += self.qlen_o.eq(iq.qlen_o)
773
774 # link up instruction queue
775 comb += iq.p_add_i.eq(self.p_add_i)
776 comb += self.p_ready_o.eq(iq.p_ready_o)
777 for i in range(self.n_in):
778 comb += eq(iq.data_i[i], self.data_i[i])
779
780 # take instruction and process it. note that it's possible to
781 # "inspect" the queue contents *without* actually removing the
782 # items. items are only removed when the
783
784 # in "waiting" state
785 wait_issue_br = Signal()
786 wait_issue_alu = Signal()
787 wait_issue_ls = Signal()
788
789 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
790 # set instruction pop length to 1 if the unit accepted
791 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
792 with m.If(iq.qlen_o != 0):
793 comb += iq.n_sub_i.eq(1)
794 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
795 with m.If(iq.qlen_o != 0):
796 comb += iq.n_sub_i.eq(1)
797 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
798 with m.If(iq.qlen_o != 0):
799 comb += iq.n_sub_i.eq(1)
800
801 # see if some instruction(s) are here. note that this is
802 # "inspecting" the in-place queue. note also that on the
803 # cycle following "waiting" for fn_issue_o to be set, the
804 # "resetting" done above (insn_i=0) could be re-ASSERTed.
805 with m.If(iq.qlen_o != 0):
806 # get the operands and operation
807 imm = iq.data_o[0].imm_i
808 dest = iq.data_o[0].dest_i
809 src1 = iq.data_o[0].src1_i
810 src2 = iq.data_o[0].src2_i
811 op = iq.data_o[0].oper_i
812 opi = iq.data_o[0].opim_i # immediate set
813
814 # set the src/dest regs
815 comb += sc.int_dest_i.eq(dest)
816 comb += sc.int_src1_i.eq(src1)
817 comb += sc.int_src2_i.eq(src2)
818 comb += sc.reg_enable_i.eq(1) # enable the regfile
819
820 # choose a Function-Unit-Group
821 with m.If((op & (0x3<<2)) != 0): # branch
822 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
823 comb += sc.br_imm_i.eq(imm)
824 comb += sc.brissue.insn_i.eq(1)
825 comb += wait_issue_br.eq(1)
826 with m.Elif((op & (0x3<<4)) != 0): # ld/st
827 # see compldst.py
828 # bit 0: ADD/SUB
829 # bit 1: immed
830 # bit 4: LD
831 # bit 5: ST
832 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
833 comb += sc.ls_imm_i.eq(imm)
834 comb += sc.lsissue.insn_i.eq(1)
835 comb += wait_issue_ls.eq(1)
836 with m.Else(): # alu
837 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
838 comb += sc.alu_imm_i.eq(imm)
839 comb += sc.aluissue.insn_i.eq(1)
840 comb += wait_issue_alu.eq(1)
841
842 # XXX TODO
843 # these indicate that the instruction is to be made
844 # shadow-dependent on
845 # (either) branch success or branch fail
846 #yield sc.branch_fail_i.eq(branch_fail)
847 #yield sc.branch_succ_i.eq(branch_success)
848
849 return m
850
851 def __iter__(self):
852 yield self.p_ready_o
853 for o in self.data_i:
854 yield from list(o)
855 yield self.p_add_i
856
857 def ports(self):
858 return list(self)
859
860
861 IADD = 0
862 ISUB = 1
863 IMUL = 2
864 ISHF = 3
865 IBGT = 4
866 IBLT = 5
867 IBEQ = 6
868 IBNE = 7
869
870
871 class RegSim:
872 def __init__(self, rwidth, nregs):
873 self.rwidth = rwidth
874 self.regs = [0] * nregs
875
876 def op(self, op, op_imm, imm, src1, src2, dest):
877 maxbits = (1 << self.rwidth) - 1
878 src1 = self.regs[src1] & maxbits
879 if op_imm:
880 src2 = imm
881 else:
882 src2 = self.regs[src2] & maxbits
883 if op == IADD:
884 val = src1 + src2
885 elif op == ISUB:
886 val = src1 - src2
887 elif op == IMUL:
888 val = src1 * src2
889 elif op == ISHF:
890 val = src1 >> (src2 & maxbits)
891 elif op == IBGT:
892 val = int(src1 > src2)
893 elif op == IBLT:
894 val = int(src1 < src2)
895 elif op == IBEQ:
896 val = int(src1 == src2)
897 elif op == IBNE:
898 val = int(src1 != src2)
899 else:
900 return 0 # LD/ST TODO
901 val &= maxbits
902 self.setval(dest, val)
903 return val
904
905 def setval(self, dest, val):
906 print ("sim setval", dest, hex(val))
907 self.regs[dest] = val
908
909 def dump(self, dut):
910 for i, val in enumerate(self.regs):
911 reg = yield dut.intregs.regs[i].reg
912 okstr = "OK" if reg == val else "!ok"
913 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
914
915 def check(self, dut):
916 for i, val in enumerate(self.regs):
917 reg = yield dut.intregs.regs[i].reg
918 if reg != val:
919 print("reg %d expected %x received %x\n" % (i, val, reg))
920 yield from self.dump(dut)
921 assert False
922
923 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
924 branch_success, branch_fail):
925 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
926 'src1_i': src1, 'src2_i': src2}]
927
928 sendlen = 1
929 for idx in range(sendlen):
930 yield from eq(dut.data_i[idx], instrs[idx])
931 di = yield dut.data_i[idx]
932 print ("senddata %d %x" % (idx, di))
933 yield dut.p_add_i.eq(sendlen)
934 yield
935 o_p_ready = yield dut.p_ready_o
936 while not o_p_ready:
937 yield
938 o_p_ready = yield dut.p_ready_o
939
940 yield dut.p_add_i.eq(0)
941
942
943 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
944 yield from disable_issue(dut)
945 yield dut.int_dest_i.eq(dest)
946 yield dut.int_src1_i.eq(src1)
947 yield dut.int_src2_i.eq(src2)
948 if (op & (0x3<<2)) != 0: # branch
949 yield dut.brissue.insn_i.eq(1)
950 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
951 yield dut.br_imm_i.eq(imm)
952 dut_issue = dut.brissue
953 else:
954 yield dut.aluissue.insn_i.eq(1)
955 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
956 yield dut.alu_imm_i.eq(imm)
957 dut_issue = dut.aluissue
958 yield dut.reg_enable_i.eq(1)
959
960 # these indicate that the instruction is to be made shadow-dependent on
961 # (either) branch success or branch fail
962 yield dut.branch_fail_i.eq(branch_fail)
963 yield dut.branch_succ_i.eq(branch_success)
964
965 yield
966 yield from wait_for_issue(dut, dut_issue)
967
968
969 def print_reg(dut, rnums):
970 rs = []
971 for rnum in rnums:
972 reg = yield dut.intregs.regs[rnum].reg
973 rs.append("%x" % reg)
974 rnums = map(str, rnums)
975 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
976
977
978 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
979 insts = []
980 for i in range(n_ops):
981 src1 = randint(1, dut.n_regs-1)
982 src2 = randint(1, dut.n_regs-1)
983 imm = randint(1, (1<<dut.rwid)-1)
984 dest = randint(1, dut.n_regs-1)
985 op = randint(0, max_opnums)
986 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
987
988 if shadowing:
989 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
990 else:
991 insts.append((src1, src2, dest, op, opi, imm))
992 return insts
993
994
995 def wait_for_busy_clear(dut):
996 while True:
997 busy_o = yield dut.busy_o
998 if not busy_o:
999 break
1000 print ("busy",)
1001 yield
1002
1003 def disable_issue(dut):
1004 yield dut.aluissue.insn_i.eq(0)
1005 yield dut.brissue.insn_i.eq(0)
1006 yield dut.lsissue.insn_i.eq(0)
1007
1008
1009 def wait_for_issue(dut, dut_issue):
1010 while True:
1011 issue_o = yield dut_issue.fn_issue_o
1012 if issue_o:
1013 yield from disable_issue(dut)
1014 yield dut.reg_enable_i.eq(0)
1015 break
1016 print ("busy",)
1017 #yield from print_reg(dut, [1,2,3])
1018 yield
1019 #yield from print_reg(dut, [1,2,3])
1020
1021 def scoreboard_branch_sim(dut, alusim):
1022
1023 iseed = 3
1024
1025 for i in range(1):
1026
1027 print ("rseed", iseed)
1028 seed(iseed)
1029 iseed += 1
1030
1031 yield dut.branch_direction_o.eq(0)
1032
1033 # set random values in the registers
1034 for i in range(1, dut.n_regs):
1035 val = 31+i*3
1036 val = randint(0, (1<<alusim.rwidth)-1)
1037 yield dut.intregs.regs[i].reg.eq(val)
1038 alusim.setval(i, val)
1039
1040 if False:
1041 # create some instructions: branches create a tree
1042 insts = create_random_ops(dut, 1, True, 1)
1043 #insts.append((6, 6, 1, 2, (0, 0)))
1044 #insts.append((4, 3, 3, 0, (0, 0)))
1045
1046 src1 = randint(1, dut.n_regs-1)
1047 src2 = randint(1, dut.n_regs-1)
1048 #op = randint(4, 7)
1049 op = 4 # only BGT at the moment
1050
1051 branch_ok = create_random_ops(dut, 1, True, 1)
1052 branch_fail = create_random_ops(dut, 1, True, 1)
1053
1054 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1055
1056 if True:
1057 insts = []
1058 insts.append( (3, 5, 2, 0, (0, 0)) )
1059 branch_ok = []
1060 branch_fail = []
1061 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1062 branch_ok.append( None )
1063 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1064 #branch_fail.append( None )
1065 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1066
1067 siminsts = deepcopy(insts)
1068
1069 # issue instruction(s)
1070 i = -1
1071 instrs = insts
1072 branch_direction = 0
1073 while instrs:
1074 yield
1075 yield
1076 i += 1
1077 branch_direction = yield dut.branch_direction_o # way branch went
1078 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1079 if branch_direction == 1 and shadow_on:
1080 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1081 continue # branch was "success" and this is a "failed"... skip
1082 if branch_direction == 2 and shadow_off:
1083 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1084 continue # branch was "fail" and this is a "success"... skip
1085 if branch_direction != 0:
1086 shadow_on = 0
1087 shadow_off = 0
1088 is_branch = op >= 4
1089 if is_branch:
1090 branch_ok, branch_fail = dest
1091 dest = src2
1092 # ok zip up the branch success / fail instructions and
1093 # drop them into the queue, one marked "to have branch success"
1094 # the other to be marked shadow branch "fail".
1095 # one out of each of these will be cancelled
1096 for ok, fl in zip(branch_ok, branch_fail):
1097 if ok:
1098 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1099 if fl:
1100 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1101 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1102 (i, src1, src2, dest, op, shadow_on, shadow_off))
1103 yield from int_instr(dut, op, src1, src2, dest,
1104 shadow_on, shadow_off)
1105
1106 # wait for all instructions to stop before checking
1107 yield
1108 yield from wait_for_busy_clear(dut)
1109
1110 i = -1
1111 while siminsts:
1112 instr = siminsts.pop(0)
1113 if instr is None:
1114 continue
1115 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1116 i += 1
1117 is_branch = op >= 4
1118 if is_branch:
1119 branch_ok, branch_fail = dest
1120 dest = src2
1121 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1122 (i, src1, src2, dest, op, shadow_on, shadow_off))
1123 branch_res = alusim.op(op, src1, src2, dest)
1124 if is_branch:
1125 if branch_res:
1126 siminsts += branch_ok
1127 else:
1128 siminsts += branch_fail
1129
1130 # check status
1131 yield from alusim.check(dut)
1132 yield from alusim.dump(dut)
1133
1134
1135 def scoreboard_sim(dut, alusim):
1136
1137 seed(0)
1138
1139 for i in range(1):
1140
1141 # set random values in the registers
1142 for i in range(1, dut.n_regs):
1143 val = randint(0, (1<<alusim.rwidth)-1)
1144 #val = 31+i*3
1145 #val = i
1146 yield dut.intregs.regs[i].reg.eq(val)
1147 alusim.setval(i, val)
1148
1149 # create some instructions (some random, some regression tests)
1150 instrs = []
1151 if False:
1152 instrs = create_random_ops(dut, 15, True, 4)
1153
1154 if True: # LD/ST test (with immediate)
1155 instrs.append( (1, 2, 2, 0x10, 1, 1, (0, 0)) )
1156 instrs.append( (1, 2, 7, 0x12, 1, 1, (0, 0)) )
1157
1158 if False:
1159 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1160
1161 if False:
1162 instrs.append( (7, 3, 2, 4, (0, 0)) )
1163 instrs.append( (7, 6, 6, 2, (0, 0)) )
1164 instrs.append( (1, 7, 2, 2, (0, 0)) )
1165
1166 if False:
1167 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1168 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1169 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1170 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1171 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1172
1173 if False:
1174 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1175 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1176 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1177
1178 if False:
1179 instrs.append((5, 6, 2, 1))
1180 instrs.append((2, 2, 4, 0))
1181 #instrs.append((2, 2, 3, 1))
1182
1183 if False:
1184 instrs.append((2, 1, 2, 3))
1185
1186 if False:
1187 instrs.append((2, 6, 2, 1))
1188 instrs.append((2, 1, 2, 0))
1189
1190 if False:
1191 instrs.append((1, 2, 7, 2))
1192 instrs.append((7, 1, 5, 0))
1193 instrs.append((4, 4, 1, 1))
1194
1195 if False:
1196 instrs.append((5, 6, 2, 2))
1197 instrs.append((1, 1, 4, 1))
1198 instrs.append((6, 5, 3, 0))
1199
1200 if False:
1201 # Write-after-Write Hazard
1202 instrs.append( (3, 6, 7, 2) )
1203 instrs.append( (4, 4, 7, 1) )
1204
1205 if False:
1206 # self-read/write-after-write followed by Read-after-Write
1207 instrs.append((1, 1, 1, 1))
1208 instrs.append((1, 5, 3, 0))
1209
1210 if False:
1211 # Read-after-Write followed by self-read-after-write
1212 instrs.append((5, 6, 1, 2))
1213 instrs.append((1, 1, 1, 1))
1214
1215 if False:
1216 # self-read-write sandwich
1217 instrs.append((5, 6, 1, 2))
1218 instrs.append((1, 1, 1, 1))
1219 instrs.append((1, 5, 3, 0))
1220
1221 if False:
1222 # very weird failure
1223 instrs.append( (5, 2, 5, 2) )
1224 instrs.append( (2, 6, 3, 0) )
1225 instrs.append( (4, 2, 2, 1) )
1226
1227 if False:
1228 v1 = 4
1229 yield dut.intregs.regs[5].reg.eq(v1)
1230 alusim.setval(5, v1)
1231 yield dut.intregs.regs[3].reg.eq(5)
1232 alusim.setval(3, 5)
1233 instrs.append((5, 3, 3, 4, (0, 0)))
1234 instrs.append((4, 2, 1, 2, (0, 1)))
1235
1236 if False:
1237 v1 = 6
1238 yield dut.intregs.regs[5].reg.eq(v1)
1239 alusim.setval(5, v1)
1240 yield dut.intregs.regs[3].reg.eq(5)
1241 alusim.setval(3, 5)
1242 instrs.append((5, 3, 3, 4, (0, 0)))
1243 instrs.append((4, 2, 1, 2, (1, 0)))
1244
1245 if False:
1246 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1247 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1248 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1249 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1250 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1251 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1252 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1253 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1254 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1255
1256 # issue instruction(s), wait for issue to be free before proceeding
1257 for i, instr in enumerate(instrs):
1258 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1259
1260 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1261 (i, src1, src2, dest, op, opi, imm))
1262 alusim.op(op, opi, imm, src1, src2, dest)
1263 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1264 br_ok, br_fail)
1265
1266 # wait for all instructions to stop before checking
1267 while True:
1268 iqlen = yield dut.qlen_o
1269 if iqlen == 0:
1270 break
1271 yield
1272 yield
1273 yield
1274 yield
1275 yield
1276 yield from wait_for_busy_clear(dut)
1277
1278 # check status
1279 yield from alusim.check(dut)
1280 yield from alusim.dump(dut)
1281
1282
1283 def test_scoreboard():
1284 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1285 alusim = RegSim(16, 8)
1286 memsim = MemSim(16, 16)
1287 vl = rtlil.convert(dut, ports=dut.ports())
1288 with open("test_scoreboard6600.il", "w") as f:
1289 f.write(vl)
1290
1291 run_simulation(dut, scoreboard_sim(dut, alusim),
1292 vcd_name='test_scoreboard6600.vcd')
1293
1294 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1295 # vcd_name='test_scoreboard6600.vcd')
1296
1297
1298 if __name__ == '__main__':
1299 test_scoreboard()