1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
, Memory
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
12 from scoreboard
.instruction_q
import Instruction
, InstructionQ
13 from scoreboard
.memfu
import MemFunctionUnits
15 from compalu
import ComputationUnitNoDelay
16 from compldst
import LDSTCompUnit
18 from alu_hier
import ALU
, BranchALU
19 from nmutil
.latch
import SRLatch
20 from nmutil
.nmoperator
import eq
22 from random
import randint
, seed
23 from copy
import deepcopy
27 class TestMemory(Elaboratable
):
28 def __init__(self
, regwid
, addrw
):
29 self
.ddepth
= 1 # regwid //8
30 depth
= (1<<addrw
) // self
.ddepth
31 self
.mem
= Memory(width
=regwid
, depth
=depth
, init
=range(0, depth
))
33 def elaborate(self
, platform
):
35 m
.submodules
.rdport
= self
.rdport
= self
.mem
.read_port()
36 m
.submodules
.wrport
= self
.wrport
= self
.mem
.write_port()
41 def __init__(self
, regwid
, addrw
):
43 self
.ddepth
= 1 # regwid//8
44 depth
= (1<<addrw
) // self
.ddepth
45 self
.mem
= list(range(0, depth
))
48 return self
.mem
[addr
>>self
.ddepth
]
50 def st(self
, addr
, data
):
51 self
.mem
[addr
>>self
.ddepth
] = data
& ((1<<self
.regwid
)-1)
54 class CompUnitsBase(Elaboratable
):
55 """ Computation Unit Base class.
57 Amazingly, this class works recursively. It's supposed to just
58 look after some ALUs (that can handle the same operations),
59 grouping them together, however it turns out that the same code
60 can also group *groups* of Computation Units together as well.
62 Basically it was intended just to concatenate the ALU's issue,
63 go_rd etc. signals together, which start out as bits and become
64 sequences. Turns out that the same trick works just as well
67 So this class may be used recursively to present a top-level
68 sequential concatenation of all the signals in and out of
69 ALUs, whilst at the same time making it convenient to group
72 At the lower level, the intent is that groups of (identical)
73 ALUs may be passed the same operation. Even beyond that,
74 the intent is that that group of (identical) ALUs actually
75 share the *same pipeline* and as such become a "Concurrent
76 Computation Unit" as defined by Mitch Alsup (see section
79 def __init__(self
, rwid
, units
, ldstmode
=False):
82 * :rwid: bit width of register file(s) - both FP and INT
83 * :units: sequence of ALUs (or CompUnitsBase derivatives)
86 self
.ldstmode
= ldstmode
89 if units
and isinstance(units
[0], CompUnitsBase
):
92 self
.n_units
+= u
.n_units
94 self
.n_units
= len(units
)
96 n_units
= self
.n_units
99 self
.issue_i
= Signal(n_units
, reset_less
=True)
100 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
101 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
102 self
.shadown_i
= Signal(n_units
, reset_less
=True)
103 self
.go_die_i
= Signal(n_units
, reset_less
=True)
105 self
.go_ad_i
= Signal(n_units
, reset_less
=True)
106 self
.go_st_i
= Signal(n_units
, reset_less
=True)
109 self
.busy_o
= Signal(n_units
, reset_less
=True)
110 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
111 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
113 self
.ld_o
= Signal(n_units
, reset_less
=True) # op is LD
114 self
.st_o
= Signal(n_units
, reset_less
=True) # op is ST
115 self
.adr_rel_o
= Signal(n_units
, reset_less
=True)
116 self
.sto_rel_o
= Signal(n_units
, reset_less
=True)
117 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
118 self
.load_mem_o
= Signal(n_units
, reset_less
=True)
119 self
.stwd_mem_o
= Signal(n_units
, reset_less
=True)
120 self
.addr_o
= Signal(rwid
, reset_less
=True)
122 # in/out register data (note: not register#, actual data)
123 self
.data_o
= Signal(rwid
, reset_less
=True)
124 self
.src1_i
= Signal(rwid
, reset_less
=True)
125 self
.src2_i
= Signal(rwid
, reset_less
=True)
128 def elaborate(self
, platform
):
132 for i
, alu
in enumerate(self
.units
):
133 setattr(m
.submodules
, "comp%d" % i
, alu
)
143 for alu
in self
.units
:
144 req_rel_l
.append(alu
.req_rel_o
)
145 rd_rel_l
.append(alu
.rd_rel_o
)
146 shadow_l
.append(alu
.shadown_i
)
147 godie_l
.append(alu
.go_die_i
)
148 go_wr_l
.append(alu
.go_wr_i
)
149 go_rd_l
.append(alu
.go_rd_i
)
150 issue_l
.append(alu
.issue_i
)
151 busy_l
.append(alu
.busy_o
)
152 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
153 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
154 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
155 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
156 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
157 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
158 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
159 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
161 # connect data register input/output
163 # merge (OR) all integer FU / ALU outputs to a single value
165 data_o
= treereduce(self
.units
, "data_o")
166 comb
+= self
.data_o
.eq(data_o
)
168 addr_o
= treereduce(self
.units
, "addr_o")
169 comb
+= self
.addr_o
.eq(addr_o
)
171 for i
, alu
in enumerate(self
.units
):
172 comb
+= alu
.src1_i
.eq(self
.src1_i
)
173 comb
+= alu
.src2_i
.eq(self
.src2_i
)
175 if not self
.ldstmode
:
186 for alu
in self
.units
:
187 ld_l
.append(alu
.ld_o
)
188 st_l
.append(alu
.st_o
)
189 adr_rel_l
.append(alu
.adr_rel_o
)
190 sto_rel_l
.append(alu
.sto_rel_o
)
191 ldmem_l
.append(alu
.load_mem_o
)
192 stmem_l
.append(alu
.stwd_mem_o
)
193 go_ad_l
.append(alu
.go_ad_i
)
194 go_st_l
.append(alu
.go_st_i
)
195 comb
+= self
.ld_o
.eq(Cat(*ld_l
))
196 comb
+= self
.st_o
.eq(Cat(*st_l
))
197 comb
+= self
.adr_rel_o
.eq(Cat(*adr_rel_l
))
198 comb
+= self
.sto_rel_o
.eq(Cat(*sto_rel_l
))
199 comb
+= self
.load_mem_o
.eq(Cat(*ldmem_l
))
200 comb
+= self
.stwd_mem_o
.eq(Cat(*stmem_l
))
201 comb
+= Cat(*go_ad_l
).eq(self
.go_ad_i
)
202 comb
+= Cat(*go_st_l
).eq(self
.go_st_i
)
207 class CompUnitLDSTs(CompUnitsBase
):
209 def __init__(self
, rwid
, opwid
, n_ldsts
, mem
):
212 * :rwid: bit width of register file(s) - both FP and INT
213 * :opwid: operand bit width
218 self
.oper_i
= Signal(opwid
, reset_less
=True)
219 self
.imm_i
= Signal(rwid
, reset_less
=True)
223 for i
in range(n_ldsts
):
224 self
.alus
.append(ALU(rwid
))
227 for alu
in self
.alus
:
228 aluopwid
= 4 # see compldst.py for "internal" opcode
229 units
.append(LDSTCompUnit(rwid
, aluopwid
, alu
, mem
))
231 CompUnitsBase
.__init
__(self
, rwid
, units
, ldstmode
=True)
233 def elaborate(self
, platform
):
234 m
= CompUnitsBase
.elaborate(self
, platform
)
237 # hand the same operation to all units, 4 lower bits though
238 for alu
in self
.units
:
239 comb
+= alu
.oper_i
[0:4].eq(self
.oper_i
)
240 comb
+= alu
.imm_i
.eq(self
.imm_i
)
241 comb
+= alu
.isalu_i
.eq(0)
246 class CompUnitALUs(CompUnitsBase
):
248 def __init__(self
, rwid
, opwid
, n_alus
):
251 * :rwid: bit width of register file(s) - both FP and INT
252 * :opwid: operand bit width
257 self
.oper_i
= Signal(opwid
, reset_less
=True)
258 self
.imm_i
= Signal(rwid
, reset_less
=True)
262 for i
in range(n_alus
):
263 alus
.append(ALU(rwid
))
267 aluopwid
= 3 # extra bit for immediate mode
268 units
.append(ComputationUnitNoDelay(rwid
, aluopwid
, alu
))
270 CompUnitsBase
.__init
__(self
, rwid
, units
)
272 def elaborate(self
, platform
):
273 m
= CompUnitsBase
.elaborate(self
, platform
)
276 # hand the same operation to all units, only lower 3 bits though
277 for alu
in self
.units
:
278 comb
+= alu
.oper_i
[0:3].eq(self
.oper_i
)
279 comb
+= alu
.imm_i
.eq(self
.imm_i
)
284 class CompUnitBR(CompUnitsBase
):
286 def __init__(self
, rwid
, opwid
):
289 * :rwid: bit width of register file(s) - both FP and INT
290 * :opwid: operand bit width
292 Note: bgt unit is returned so that a shadow unit can be created
298 self
.oper_i
= Signal(opwid
, reset_less
=True)
299 self
.imm_i
= Signal(rwid
, reset_less
=True)
302 self
.bgt
= BranchALU(rwid
)
303 aluopwid
= 3 # extra bit for immediate mode
304 self
.br1
= ComputationUnitNoDelay(rwid
, aluopwid
, self
.bgt
)
305 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
307 def elaborate(self
, platform
):
308 m
= CompUnitsBase
.elaborate(self
, platform
)
311 # hand the same operation to all units
312 for alu
in self
.units
:
313 comb
+= alu
.oper_i
.eq(self
.oper_i
)
314 comb
+= alu
.imm_i
.eq(self
.imm_i
)
319 class FunctionUnits(Elaboratable
):
321 def __init__(self
, n_regs
, n_int_alus
):
323 self
.n_int_alus
= n_int_alus
325 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
326 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
327 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
329 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
330 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
332 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
333 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
334 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
336 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
337 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
339 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
340 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
341 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
342 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
344 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
346 def elaborate(self
, platform
):
351 n_intfus
= self
.n_int_alus
353 # Integer FU-FU Dep Matrix
354 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
355 m
.submodules
.intfudeps
= intfudeps
356 # Integer FU-Reg Dep Matrix
357 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
, 2)
358 m
.submodules
.intregdeps
= intregdeps
360 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.v_rd_rsel_o
)
361 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.v_wr_rsel_o
)
363 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.v_rd_rsel_o
)
364 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.v_wr_rsel_o
)
366 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
367 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
368 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
370 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
371 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
372 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
373 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
374 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
375 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
377 # Connect function issue / arrays, and dest/src1/src2
378 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
379 comb
+= intregdeps
.src_i
[0].eq(self
.src1_i
)
380 comb
+= intregdeps
.src_i
[1].eq(self
.src2_i
)
382 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
383 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
384 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
385 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
387 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
388 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src_rsel_o
[0])
389 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src_rsel_o
[1])
394 class Scoreboard(Elaboratable
):
395 def __init__(self
, rwid
, n_regs
):
398 * :rwid: bit width of register file(s) - both FP and INT
399 * :n_regs: depth of register file(s) - number of FP and INT regs
405 self
.intregs
= RegFileArray(rwid
, n_regs
)
406 self
.fpregs
= RegFileArray(rwid
, n_regs
)
408 # Memory (test for now)
409 self
.mem
= TestMemory(self
.rwid
, 8) # not too big, takes too long
411 # issue q needs to get at these
412 self
.aluissue
= IssueUnitGroup(2)
413 self
.lsissue
= IssueUnitGroup(2)
414 self
.brissue
= IssueUnitGroup(1)
416 self
.alu_oper_i
= Signal(4, reset_less
=True)
417 self
.alu_imm_i
= Signal(rwid
, reset_less
=True)
418 self
.br_oper_i
= Signal(4, reset_less
=True)
419 self
.br_imm_i
= Signal(rwid
, reset_less
=True)
420 self
.ls_oper_i
= Signal(4, reset_less
=True)
421 self
.ls_imm_i
= Signal(rwid
, reset_less
=True)
424 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
425 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
426 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
427 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
430 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
431 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
433 # for branch speculation experiment. branch_direction = 0 if
434 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
435 # branch_succ and branch_fail are requests to have the current
436 # instruction be dependent on the branch unit "shadow" capability.
437 self
.branch_succ_i
= Signal(reset_less
=True)
438 self
.branch_fail_i
= Signal(reset_less
=True)
439 self
.branch_direction_o
= Signal(2, reset_less
=True)
441 def elaborate(self
, platform
):
446 m
.submodules
.intregs
= self
.intregs
447 m
.submodules
.fpregs
= self
.fpregs
448 m
.submodules
.mem
= mem
= self
.mem
451 int_dest
= self
.intregs
.write_port("dest")
452 int_src1
= self
.intregs
.read_port("src1")
453 int_src2
= self
.intregs
.read_port("src2")
455 fp_dest
= self
.fpregs
.write_port("dest")
456 fp_src1
= self
.fpregs
.read_port("src1")
457 fp_src2
= self
.fpregs
.read_port("src2")
459 # Int ALUs and BR ALUs
461 cua
= CompUnitALUs(self
.rwid
, 3, n_alus
=self
.aluissue
.n_insns
)
462 cub
= CompUnitBR(self
.rwid
, 3) # 1 BR ALUs
466 cul
= CompUnitLDSTs(self
.rwid
, 4, self
.lsissue
.n_insns
, None)
469 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cul
, cub
])
470 bgt
= cub
.bgt
# get at the branch computation unit
474 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
477 m
.submodules
.memfus
= memfus
= MemFunctionUnits(n_ldsts
, 5)
479 # Memory Priority Picker 1: one gateway per memory port
480 mempick1
= GroupPicker(n_ldsts
) # picks 1 reader and 1 writer to intreg
481 m
.submodules
.mempick1
= mempick1
483 # Count of number of FUs
484 n_intfus
= n_int_alus
485 n_fp_fus
= 0 # for now
487 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
488 intpick1
= GroupPicker(n_intfus
) # picks 1 reader and 1 writer to intreg
489 m
.submodules
.intpick1
= intpick1
492 regdecode
= RegDecode(self
.n_regs
)
493 m
.submodules
.regdecode
= regdecode
494 issueunit
= IssueUnitArray([self
.aluissue
, self
.lsissue
, self
.brissue
])
495 m
.submodules
.issueunit
= issueunit
497 # Shadow Matrix. currently n_intfus shadows, to be used for
498 # write-after-write hazards. NOTE: there is one extra for branches,
499 # so the shadow width is increased by 1
500 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
501 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
503 # record previous instruction to cast shadow on current instruction
504 prev_shadow
= Signal(n_intfus
)
506 # Branch Speculation recorder. tracks the success/fail state as
507 # each instruction is issued, so that when the branch occurs the
508 # allow/cancel can be issued as appropriate.
509 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
512 # ok start wiring things together...
513 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
514 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
518 # Issue Unit is where it starts. set up some in/outs for this module
520 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
521 regdecode
.src1_i
.eq(self
.int_src1_i
),
522 regdecode
.src2_i
.eq(self
.int_src2_i
),
523 regdecode
.enable_i
.eq(self
.reg_enable_i
),
524 self
.issue_o
.eq(issueunit
.issue_o
)
527 # take these to outside (issue needs them)
528 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
529 comb
+= cua
.imm_i
.eq(self
.alu_imm_i
)
530 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
531 comb
+= cub
.imm_i
.eq(self
.br_imm_i
)
532 comb
+= cul
.oper_i
.eq(self
.ls_oper_i
)
533 comb
+= cul
.imm_i
.eq(self
.ls_imm_i
)
535 # TODO: issueunit.f (FP)
537 # and int function issue / busy arrays, and dest/src1/src2
538 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
539 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
540 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
542 fn_issue_o
= issueunit
.fn_issue_o
544 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
545 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
546 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
549 # Memory Function Unit
551 reset_b
= Signal(cul
.n_units
, reset_less
=True)
552 sync
+= reset_b
.eq(cul
.go_st_i | cul
.go_wr_i | cul
.go_die_i
)
554 comb
+= memfus
.fn_issue_i
.eq(cul
.issue_i
) # Comp Unit Issue -> Mem FUs
555 comb
+= memfus
.addr_en_i
.eq(cul
.adr_rel_o
) # Match enable on adr rel
556 comb
+= memfus
.addr_rs_i
.eq(reset_b
) # reset same as LDSTCompUnit
558 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
559 # in a transitive fashion). This cycle activates based on LDSTCompUnit
560 # issue_i. multi-issue gets a bit more complex but not a lot.
561 prior_ldsts
= Signal(cul
.n_units
, reset_less
=True)
562 sync
+= prior_ldsts
.eq(memfus
.g_int_ld_pend_o | memfus
.g_int_st_pend_o
)
563 with m
.If(self
.ls_oper_i
[2]): # LD bit of operand
564 comb
+= memfus
.ld_i
.eq(cul
.issue_i | prior_ldsts
)
565 with m
.If(self
.ls_oper_i
[3]): # ST bit of operand
566 comb
+= memfus
.st_i
.eq(cul
.issue_i | prior_ldsts
)
568 # TODO: adr_rel_o needs to go into L1 Cache. for now,
569 # just immediately activate go_adr
570 comb
+= cul
.go_ad_i
.eq(cul
.adr_rel_o
)
572 # connect up address data
573 comb
+= memfus
.addrs_i
[0].eq(cul
.units
[0].addr_o
)
574 comb
+= memfus
.addrs_i
[1].eq(cul
.units
[1].addr_o
)
576 # connect loadable / storable to go_ld/go_st.
577 # XXX should only be done when the memory ld/st has actually happened!
578 go_st_i
= Signal(cul
.n_units
, reset_less
=True)
579 go_ld_i
= Signal(cul
.n_units
, reset_less
=True)
580 comb
+= go_ld_i
.eq(memfus
.loadable_o
& memfus
.addr_nomatch_o
&\
581 cul
.req_rel_o
& cul
.ld_o
)
582 comb
+= go_st_i
.eq(memfus
.storable_o
& memfus
.addr_nomatch_o
&\
583 cul
.sto_rel_o
& cul
.st_o
)
584 comb
+= memfus
.go_ld_i
.eq(go_ld_i
)
585 comb
+= memfus
.go_st_i
.eq(go_st_i
)
586 #comb += cul.go_wr_i.eq(go_ld_i)
587 comb
+= cul
.go_st_i
.eq(go_st_i
)
589 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
590 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
591 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
594 # merge shadow matrices outputs
597 # these are explained in ShadowMatrix docstring, and are to be
598 # connected to the FUReg and FUFU Matrices, to get them to reset
599 anydie
= Signal(n_intfus
, reset_less
=True)
600 allshadown
= Signal(n_intfus
, reset_less
=True)
601 shreset
= Signal(n_intfus
, reset_less
=True)
602 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
603 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
604 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
607 # connect fu-fu matrix
610 # Group Picker... done manually for now.
611 go_rd_o
= intpick1
.go_rd_o
612 go_wr_o
= intpick1
.go_wr_o
613 go_rd_i
= intfus
.go_rd_i
614 go_wr_i
= intfus
.go_wr_i
615 go_die_i
= intfus
.go_die_i
616 # NOTE: connect to the shadowed versions so that they can "die" (reset)
617 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
618 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
619 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
623 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
624 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
625 int_rd_o
= intfus
.readable_o
626 int_wr_o
= intfus
.writable_o
627 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
628 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
634 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
635 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
636 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
638 # NOTE; this setup is for the instruction order preservation...
640 # connect shadows / go_dies to Computation Units
641 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
642 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
644 # ok connect first n_int_fu shadows to busy lines, to create an
645 # instruction-order linked-list-like arrangement, using a bit-matrix
646 # (instead of e.g. a ring buffer).
648 # when written, the shadow can be cancelled (and was good)
649 for i
in range(n_intfus
):
650 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
652 # *previous* instruction shadows *current* instruction, and, obviously,
653 # if the previous is completed (!busy) don't cast the shadow!
654 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
655 for i
in range(n_intfus
):
656 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
659 # ... and this is for branch speculation. it uses the extra bit
660 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
661 # only needs to set shadow_i, s_fail_i and s_good_i
663 # issue captures shadow_i (if enabled)
664 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
666 bactive
= Signal(reset_less
=True)
667 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
669 # instruction being issued (fn_issue_o) has a shadow cast by the branch
670 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
671 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
672 for i
in range(n_intfus
):
673 with m
.If(fn_issue_o
& (Const(1<<i
))):
674 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
676 # finally, we need an indicator to the test infrastructure as to
677 # whether the branch succeeded or failed, plus, link up to the
678 # "recorder" of whether the instruction was under shadow or not
680 with m
.If(br1
.issue_i
):
681 sync
+= bspec
.active_i
.eq(1)
682 with m
.If(self
.branch_succ_i
):
683 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
684 with m
.If(self
.branch_fail_i
):
685 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f) # XXX MAGIC CONSTANT
687 # branch is active (TODO: a better signal: this is over-using the
688 # go_write signal - actually the branch should not be "writing")
689 with m
.If(br1
.go_wr_i
):
690 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
691 sync
+= bspec
.active_i
.eq(0)
692 comb
+= bspec
.br_i
.eq(1)
693 # branch occurs if data == 1, failed if data == 0
694 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
695 for i
in range(n_intfus
):
696 # *expected* direction of the branch matched against *actual*
697 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
699 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
702 # Connect Register File(s)
704 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
705 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
706 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
708 # connect ALUs to regfule
709 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
710 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
711 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
713 # connect ALU Computation Units
714 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
715 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
716 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
721 yield from self
.intregs
722 yield from self
.fpregs
723 yield self
.int_dest_i
724 yield self
.int_src1_i
725 yield self
.int_src2_i
727 yield self
.branch_succ_i
728 yield self
.branch_fail_i
729 yield self
.branch_direction_o
735 class IssueToScoreboard(Elaboratable
):
737 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
745 mqbits
= (int(log(qlen
) / log(2))+2, False)
746 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
747 self
.p_ready_o
= Signal() # instructions were added
748 self
.data_i
= Instruction
.nq(n_in
, "data_i", rwid
, opwid
)
750 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
751 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
753 def elaborate(self
, platform
):
758 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
, self
.n_in
, self
.n_out
)
759 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
763 # get at the regfile for testing
764 self
.intregs
= sc
.intregs
766 # and the "busy" signal and instruction queue length
767 comb
+= self
.busy_o
.eq(sc
.busy_o
)
768 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
770 # link up instruction queue
771 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
772 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
773 for i
in range(self
.n_in
):
774 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
776 # take instruction and process it. note that it's possible to
777 # "inspect" the queue contents *without* actually removing the
778 # items. items are only removed when the
781 wait_issue_br
= Signal()
782 wait_issue_alu
= Signal()
783 wait_issue_ls
= Signal()
785 with m
.If(wait_issue_br | wait_issue_alu | wait_issue_ls
):
786 # set instruction pop length to 1 if the unit accepted
787 with m
.If(wait_issue_ls
& (sc
.lsissue
.fn_issue_o
!= 0)):
788 with m
.If(iq
.qlen_o
!= 0):
789 comb
+= iq
.n_sub_i
.eq(1)
790 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
791 with m
.If(iq
.qlen_o
!= 0):
792 comb
+= iq
.n_sub_i
.eq(1)
793 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
794 with m
.If(iq
.qlen_o
!= 0):
795 comb
+= iq
.n_sub_i
.eq(1)
797 # see if some instruction(s) are here. note that this is
798 # "inspecting" the in-place queue. note also that on the
799 # cycle following "waiting" for fn_issue_o to be set, the
800 # "resetting" done above (insn_i=0) could be re-ASSERTed.
801 with m
.If(iq
.qlen_o
!= 0):
802 # get the operands and operation
803 imm
= iq
.data_o
[0].imm_i
804 dest
= iq
.data_o
[0].dest_i
805 src1
= iq
.data_o
[0].src1_i
806 src2
= iq
.data_o
[0].src2_i
807 op
= iq
.data_o
[0].oper_i
808 opi
= iq
.data_o
[0].opim_i
# immediate set
810 # set the src/dest regs
811 comb
+= sc
.int_dest_i
.eq(dest
)
812 comb
+= sc
.int_src1_i
.eq(src1
)
813 comb
+= sc
.int_src2_i
.eq(src2
)
814 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
816 # choose a Function-Unit-Group
817 with m
.If((op
& (0x3<<2)) != 0): # branch
818 comb
+= sc
.br_oper_i
.eq(Cat(op
[0:2], opi
))
819 comb
+= sc
.br_imm_i
.eq(imm
)
820 comb
+= sc
.brissue
.insn_i
.eq(1)
821 comb
+= wait_issue_br
.eq(1)
822 with m
.Elif((op
& (0x3<<4)) != 0): # ld/st
828 comb
+= sc
.ls_oper_i
.eq(Cat(op
[0], opi
[0], op
[4:6]))
829 comb
+= sc
.ls_imm_i
.eq(imm
)
830 comb
+= sc
.lsissue
.insn_i
.eq(1)
831 comb
+= wait_issue_ls
.eq(1)
833 comb
+= sc
.alu_oper_i
.eq(Cat(op
[0:2], opi
))
834 comb
+= sc
.alu_imm_i
.eq(imm
)
835 comb
+= sc
.aluissue
.insn_i
.eq(1)
836 comb
+= wait_issue_alu
.eq(1)
839 # these indicate that the instruction is to be made
840 # shadow-dependent on
841 # (either) branch success or branch fail
842 #yield sc.branch_fail_i.eq(branch_fail)
843 #yield sc.branch_succ_i.eq(branch_success)
849 for o
in self
.data_i
:
868 def __init__(self
, rwidth
, nregs
):
870 self
.regs
= [0] * nregs
872 def op(self
, op
, op_imm
, imm
, src1
, src2
, dest
):
873 maxbits
= (1 << self
.rwidth
) - 1
874 src1
= self
.regs
[src1
] & maxbits
878 src2
= self
.regs
[src2
] & maxbits
886 val
= src1
>> (src2
& maxbits
)
888 val
= int(src1
> src2
)
890 val
= int(src1
< src2
)
892 val
= int(src1
== src2
)
894 val
= int(src1
!= src2
)
896 return 0 # LD/ST TODO
898 self
.setval(dest
, val
)
901 def setval(self
, dest
, val
):
902 print ("sim setval", dest
, hex(val
))
903 self
.regs
[dest
] = val
906 for i
, val
in enumerate(self
.regs
):
907 reg
= yield dut
.intregs
.regs
[i
].reg
908 okstr
= "OK" if reg
== val
else "!ok"
909 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
911 def check(self
, dut
):
912 for i
, val
in enumerate(self
.regs
):
913 reg
= yield dut
.intregs
.regs
[i
].reg
915 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
916 yield from self
.dump(dut
)
919 def instr_q(dut
, op
, op_imm
, imm
, src1
, src2
, dest
,
920 branch_success
, branch_fail
):
921 instrs
= [{'oper_i': op
, 'dest_i': dest
, 'imm_i': imm
, 'opim_i': op_imm
,
922 'src1_i': src1
, 'src2_i': src2
}]
925 for idx
in range(sendlen
):
926 yield from eq(dut
.data_i
[idx
], instrs
[idx
])
927 di
= yield dut
.data_i
[idx
]
928 print ("senddata %d %x" % (idx
, di
))
929 yield dut
.p_add_i
.eq(sendlen
)
931 o_p_ready
= yield dut
.p_ready_o
934 o_p_ready
= yield dut
.p_ready_o
936 yield dut
.p_add_i
.eq(0)
939 def int_instr(dut
, op
, imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
940 yield from disable_issue(dut
)
941 yield dut
.int_dest_i
.eq(dest
)
942 yield dut
.int_src1_i
.eq(src1
)
943 yield dut
.int_src2_i
.eq(src2
)
944 if (op
& (0x3<<2)) != 0: # branch
945 yield dut
.brissue
.insn_i
.eq(1)
946 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
947 yield dut
.br_imm_i
.eq(imm
)
948 dut_issue
= dut
.brissue
950 yield dut
.aluissue
.insn_i
.eq(1)
951 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
952 yield dut
.alu_imm_i
.eq(imm
)
953 dut_issue
= dut
.aluissue
954 yield dut
.reg_enable_i
.eq(1)
956 # these indicate that the instruction is to be made shadow-dependent on
957 # (either) branch success or branch fail
958 yield dut
.branch_fail_i
.eq(branch_fail
)
959 yield dut
.branch_succ_i
.eq(branch_success
)
962 yield from wait_for_issue(dut
, dut_issue
)
965 def print_reg(dut
, rnums
):
968 reg
= yield dut
.intregs
.regs
[rnum
].reg
969 rs
.append("%x" % reg
)
970 rnums
= map(str, rnums
)
971 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
974 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
976 for i
in range(n_ops
):
977 src1
= randint(1, dut
.n_regs
-1)
978 src2
= randint(1, dut
.n_regs
-1)
979 imm
= randint(1, (1<<dut
.rwid
)-1)
980 dest
= randint(1, dut
.n_regs
-1)
981 op
= randint(0, max_opnums
)
982 opi
= 0 if randint(0, 2) else 1 # set true if random is nonzero
985 insts
.append((src1
, src2
, dest
, op
, opi
, imm
, (0, 0)))
987 insts
.append((src1
, src2
, dest
, op
, opi
, imm
))
991 def wait_for_busy_clear(dut
):
993 busy_o
= yield dut
.busy_o
999 def disable_issue(dut
):
1000 yield dut
.aluissue
.insn_i
.eq(0)
1001 yield dut
.brissue
.insn_i
.eq(0)
1002 yield dut
.lsissue
.insn_i
.eq(0)
1005 def wait_for_issue(dut
, dut_issue
):
1007 issue_o
= yield dut_issue
.fn_issue_o
1009 yield from disable_issue(dut
)
1010 yield dut
.reg_enable_i
.eq(0)
1013 #yield from print_reg(dut, [1,2,3])
1015 #yield from print_reg(dut, [1,2,3])
1017 def scoreboard_branch_sim(dut
, alusim
):
1023 print ("rseed", iseed
)
1027 yield dut
.branch_direction_o
.eq(0)
1029 # set random values in the registers
1030 for i
in range(1, dut
.n_regs
):
1032 val
= randint(0, (1<<alusim
.rwidth
)-1)
1033 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1034 alusim
.setval(i
, val
)
1037 # create some instructions: branches create a tree
1038 insts
= create_random_ops(dut
, 1, True, 1)
1039 #insts.append((6, 6, 1, 2, (0, 0)))
1040 #insts.append((4, 3, 3, 0, (0, 0)))
1042 src1
= randint(1, dut
.n_regs
-1)
1043 src2
= randint(1, dut
.n_regs
-1)
1045 op
= 4 # only BGT at the moment
1047 branch_ok
= create_random_ops(dut
, 1, True, 1)
1048 branch_fail
= create_random_ops(dut
, 1, True, 1)
1050 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
1054 insts
.append( (3, 5, 2, 0, (0, 0)) )
1057 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1058 branch_ok
.append( None )
1059 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
1060 #branch_fail.append( None )
1061 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
1063 siminsts
= deepcopy(insts
)
1065 # issue instruction(s)
1068 branch_direction
= 0
1073 branch_direction
= yield dut
.branch_direction_o
# way branch went
1074 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
1075 if branch_direction
== 1 and shadow_on
:
1076 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1077 continue # branch was "success" and this is a "failed"... skip
1078 if branch_direction
== 2 and shadow_off
:
1079 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
1080 continue # branch was "fail" and this is a "success"... skip
1081 if branch_direction
!= 0:
1086 branch_ok
, branch_fail
= dest
1088 # ok zip up the branch success / fail instructions and
1089 # drop them into the queue, one marked "to have branch success"
1090 # the other to be marked shadow branch "fail".
1091 # one out of each of these will be cancelled
1092 for ok
, fl
in zip(branch_ok
, branch_fail
):
1094 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
1096 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
1097 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1098 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1099 yield from int_instr(dut
, op
, src1
, src2
, dest
,
1100 shadow_on
, shadow_off
)
1102 # wait for all instructions to stop before checking
1104 yield from wait_for_busy_clear(dut
)
1108 instr
= siminsts
.pop(0)
1111 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
1115 branch_ok
, branch_fail
= dest
1117 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1118 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
1119 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
1122 siminsts
+= branch_ok
1124 siminsts
+= branch_fail
1127 yield from alusim
.check(dut
)
1128 yield from alusim
.dump(dut
)
1131 def scoreboard_sim(dut
, alusim
):
1137 # set random values in the registers
1138 for i
in range(1, dut
.n_regs
):
1139 val
= randint(0, (1<<alusim
.rwidth
)-1)
1142 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
1143 alusim
.setval(i
, val
)
1145 # create some instructions (some random, some regression tests)
1148 instrs
= create_random_ops(dut
, 15, True, 4)
1150 if True: # LD/ST test (with immediate)
1151 instrs
.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1152 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1155 instrs
.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1158 instrs
.append( (7, 3, 2, 4, (0, 0)) )
1159 instrs
.append( (7, 6, 6, 2, (0, 0)) )
1160 instrs
.append( (1, 7, 2, 2, (0, 0)) )
1163 instrs
.append((2, 3, 3, 0, 0, 0, (0, 0)))
1164 instrs
.append((5, 3, 3, 1, 0, 0, (0, 0)))
1165 instrs
.append((3, 5, 5, 2, 0, 0, (0, 0)))
1166 instrs
.append((5, 3, 3, 3, 0, 0, (0, 0)))
1167 instrs
.append((3, 5, 5, 0, 0, 0, (0, 0)))
1170 instrs
.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1171 instrs
.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1172 instrs
.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1175 instrs
.append((5, 6, 2, 1))
1176 instrs
.append((2, 2, 4, 0))
1177 #instrs.append((2, 2, 3, 1))
1180 instrs
.append((2, 1, 2, 3))
1183 instrs
.append((2, 6, 2, 1))
1184 instrs
.append((2, 1, 2, 0))
1187 instrs
.append((1, 2, 7, 2))
1188 instrs
.append((7, 1, 5, 0))
1189 instrs
.append((4, 4, 1, 1))
1192 instrs
.append((5, 6, 2, 2))
1193 instrs
.append((1, 1, 4, 1))
1194 instrs
.append((6, 5, 3, 0))
1197 # Write-after-Write Hazard
1198 instrs
.append( (3, 6, 7, 2) )
1199 instrs
.append( (4, 4, 7, 1) )
1202 # self-read/write-after-write followed by Read-after-Write
1203 instrs
.append((1, 1, 1, 1))
1204 instrs
.append((1, 5, 3, 0))
1207 # Read-after-Write followed by self-read-after-write
1208 instrs
.append((5, 6, 1, 2))
1209 instrs
.append((1, 1, 1, 1))
1212 # self-read-write sandwich
1213 instrs
.append((5, 6, 1, 2))
1214 instrs
.append((1, 1, 1, 1))
1215 instrs
.append((1, 5, 3, 0))
1218 # very weird failure
1219 instrs
.append( (5, 2, 5, 2) )
1220 instrs
.append( (2, 6, 3, 0) )
1221 instrs
.append( (4, 2, 2, 1) )
1225 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1226 alusim
.setval(5, v1
)
1227 yield dut
.intregs
.regs
[3].reg
.eq(5)
1229 instrs
.append((5, 3, 3, 4, (0, 0)))
1230 instrs
.append((4, 2, 1, 2, (0, 1)))
1234 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1235 alusim
.setval(5, v1
)
1236 yield dut
.intregs
.regs
[3].reg
.eq(5)
1238 instrs
.append((5, 3, 3, 4, (0, 0)))
1239 instrs
.append((4, 2, 1, 2, (1, 0)))
1242 instrs
.append( (4, 3, 5, 1, 0, (0, 0)) )
1243 instrs
.append( (5, 2, 3, 1, 0, (0, 0)) )
1244 instrs
.append( (7, 1, 5, 2, 0, (0, 0)) )
1245 instrs
.append( (5, 6, 6, 4, 0, (0, 0)) )
1246 instrs
.append( (7, 5, 2, 2, 0, (1, 0)) )
1247 instrs
.append( (1, 7, 5, 0, 0, (0, 1)) )
1248 instrs
.append( (1, 6, 1, 2, 0, (1, 0)) )
1249 instrs
.append( (1, 6, 7, 3, 0, (0, 0)) )
1250 instrs
.append( (6, 7, 7, 0, 0, (0, 0)) )
1252 # issue instruction(s), wait for issue to be free before proceeding
1253 for i
, instr
in enumerate(instrs
):
1254 src1
, src2
, dest
, op
, opi
, imm
, (br_ok
, br_fail
) = instr
1256 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1257 (i
, src1
, src2
, dest
, op
, opi
, imm
))
1258 alusim
.op(op
, opi
, imm
, src1
, src2
, dest
)
1259 yield from instr_q(dut
, op
, opi
, imm
, src1
, src2
, dest
,
1262 # wait for all instructions to stop before checking
1264 iqlen
= yield dut
.qlen_o
1272 yield from wait_for_busy_clear(dut
)
1275 yield from alusim
.check(dut
)
1276 yield from alusim
.dump(dut
)
1279 def test_scoreboard():
1280 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
1281 alusim
= RegSim(16, 8)
1282 memsim
= MemSim(16, 16)
1283 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1284 with
open("test_scoreboard6600.il", "w") as f
:
1287 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1288 vcd_name
='test_scoreboard6600.vcd')
1290 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1291 # vcd_name='test_scoreboard6600.vcd')
1294 if __name__
== '__main__':