1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
12 from scoreboard
.instruction_q
import Instruction
, InstructionQ
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
, BranchALU
17 from nmutil
.latch
import SRLatch
18 from nmutil
.nmoperator
import eq
20 from random
import randint
, seed
21 from copy
import deepcopy
25 class Memory(Elaboratable
):
26 def __init__(self
, regwid
, addrw
):
27 self
.ddepth
= regwid
/8
28 depth
= (1<<addrw
) / self
.ddepth
29 self
.adr
= Signal(addrw
)
30 self
.dat_r
= Signal(regwid
)
31 self
.dat_w
= Signal(regwid
)
33 self
.mem
= Memory(width
=regwid
, depth
=depth
, init
=range(0, depth
))
35 def elaborate(self
, platform
):
37 m
.submodules
.rdport
= rdport
= self
.mem
.read_port()
38 m
.submodules
.wrport
= wrport
= self
.mem
.write_port()
40 rdport
.addr
.eq(self
.adr
[self
.ddepth
:]), # ignore low bits
41 self
.dat_r
.eq(rdport
.data
),
42 wrport
.addr
.eq(self
.adr
),
43 wrport
.data
.eq(self
.dat_w
),
44 wrport
.en
.eq(self
.we
),
50 def __init__(self
, regwid
, addrw
):
52 self
.ddepth
= regwid
//8
53 depth
= (1<<addrw
) // self
.ddepth
54 self
.mem
= list(range(0, depth
))
57 return self
.mem
[addr
>>self
.ddepth
]
59 def st(self
, addr
, data
):
60 self
.mem
[addr
>>self
.ddepth
] = data
& ((1<<self
.regwid
)-1)
63 class CompUnitsBase(Elaboratable
):
64 """ Computation Unit Base class.
66 Amazingly, this class works recursively. It's supposed to just
67 look after some ALUs (that can handle the same operations),
68 grouping them together, however it turns out that the same code
69 can also group *groups* of Computation Units together as well.
71 Basically it was intended just to concatenate the ALU's issue,
72 go_rd etc. signals together, which start out as bits and become
73 sequences. Turns out that the same trick works just as well
76 So this class may be used recursively to present a top-level
77 sequential concatenation of all the signals in and out of
78 ALUs, whilst at the same time making it convenient to group
81 At the lower level, the intent is that groups of (identical)
82 ALUs may be passed the same operation. Even beyond that,
83 the intent is that that group of (identical) ALUs actually
84 share the *same pipeline* and as such become a "Concurrent
85 Computation Unit" as defined by Mitch Alsup (see section
88 def __init__(self
, rwid
, units
):
91 * :rwid: bit width of register file(s) - both FP and INT
92 * :units: sequence of ALUs (or CompUnitsBase derivatives)
97 if units
and isinstance(units
[0], CompUnitsBase
):
100 self
.n_units
+= u
.n_units
102 self
.n_units
= len(units
)
104 n_units
= self
.n_units
107 self
.issue_i
= Signal(n_units
, reset_less
=True)
108 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
109 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
110 self
.shadown_i
= Signal(n_units
, reset_less
=True)
111 self
.go_die_i
= Signal(n_units
, reset_less
=True)
114 self
.busy_o
= Signal(n_units
, reset_less
=True)
115 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
116 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
118 # in/out register data (note: not register#, actual data)
119 self
.data_o
= Signal(rwid
, reset_less
=True)
120 self
.src1_i
= Signal(rwid
, reset_less
=True)
121 self
.src2_i
= Signal(rwid
, reset_less
=True)
124 def elaborate(self
, platform
):
128 for i
, alu
in enumerate(self
.units
):
129 setattr(m
.submodules
, "comp%d" % i
, alu
)
139 for alu
in self
.units
:
140 req_rel_l
.append(alu
.req_rel_o
)
141 rd_rel_l
.append(alu
.rd_rel_o
)
142 shadow_l
.append(alu
.shadown_i
)
143 godie_l
.append(alu
.go_die_i
)
144 go_wr_l
.append(alu
.go_wr_i
)
145 go_rd_l
.append(alu
.go_rd_i
)
146 issue_l
.append(alu
.issue_i
)
147 busy_l
.append(alu
.busy_o
)
148 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
149 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
150 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
151 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
152 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
153 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
154 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
155 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
157 # connect data register input/output
159 # merge (OR) all integer FU / ALU outputs to a single value
160 # bit of a hack: treereduce needs a list with an item named "data_o"
162 data_o
= treereduce(self
.units
)
163 comb
+= self
.data_o
.eq(data_o
)
165 for i
, alu
in enumerate(self
.units
):
166 comb
+= alu
.src1_i
.eq(self
.src1_i
)
167 comb
+= alu
.src2_i
.eq(self
.src2_i
)
172 class CompUnitALUs(CompUnitsBase
):
174 def __init__(self
, rwid
, opwid
):
177 * :rwid: bit width of register file(s) - both FP and INT
178 * :opwid: operand bit width
183 self
.oper_i
= Signal(opwid
, reset_less
=True)
192 for alu
in [add
, sub
, mul
, shf
]:
193 aluopwid
= 3 # extra bit for immediate mode
194 units
.append(ComputationUnitNoDelay(rwid
, aluopwid
, alu
))
196 CompUnitsBase
.__init
__(self
, rwid
, units
)
198 def elaborate(self
, platform
):
199 m
= CompUnitsBase
.elaborate(self
, platform
)
202 # hand the same operation to all units, only lower 2 bits though
203 for alu
in self
.units
:
204 comb
+= alu
.oper_i
[0:2].eq(self
.oper_i
)
209 class CompUnitBR(CompUnitsBase
):
211 def __init__(self
, rwid
, opwid
):
214 * :rwid: bit width of register file(s) - both FP and INT
215 * :opwid: operand bit width
217 Note: bgt unit is returned so that a shadow unit can be created
223 self
.oper_i
= Signal(opwid
, reset_less
=True)
226 self
.bgt
= BranchALU(rwid
)
227 self
.br1
= ComputationUnitNoDelay(rwid
, 3, self
.bgt
)
228 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
230 def elaborate(self
, platform
):
231 m
= CompUnitsBase
.elaborate(self
, platform
)
234 # hand the same operation to all units
235 for alu
in self
.units
:
236 comb
+= alu
.oper_i
.eq(self
.oper_i
)
241 class FunctionUnits(Elaboratable
):
243 def __init__(self
, n_regs
, n_int_alus
):
245 self
.n_int_alus
= n_int_alus
247 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
248 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
249 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
251 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
252 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
254 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
255 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
256 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
258 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
259 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
260 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
262 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
263 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
264 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
265 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
266 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
268 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
270 def elaborate(self
, platform
):
275 n_intfus
= self
.n_int_alus
277 # Integer FU-FU Dep Matrix
278 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
279 m
.submodules
.intfudeps
= intfudeps
280 # Integer FU-Reg Dep Matrix
281 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
)
282 m
.submodules
.intregdeps
= intregdeps
284 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
285 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
287 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
288 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
290 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
291 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
292 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
294 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
295 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
296 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
297 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
298 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
299 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
301 # Connect function issue / arrays, and dest/src1/src2
302 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
303 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
304 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
306 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
307 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
308 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
309 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
311 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
312 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
313 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
318 class Scoreboard(Elaboratable
):
319 def __init__(self
, rwid
, n_regs
):
322 * :rwid: bit width of register file(s) - both FP and INT
323 * :n_regs: depth of register file(s) - number of FP and INT regs
329 self
.intregs
= RegFileArray(rwid
, n_regs
)
330 self
.fpregs
= RegFileArray(rwid
, n_regs
)
332 # issue q needs to get at these
333 self
.aluissue
= IssueUnitGroup(4)
334 self
.brissue
= IssueUnitGroup(1)
336 self
.alu_oper_i
= Signal(4, reset_less
=True)
337 self
.br_oper_i
= Signal(4, reset_less
=True)
340 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
341 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
342 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
343 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
346 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
347 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
349 # for branch speculation experiment. branch_direction = 0 if
350 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
351 # branch_succ and branch_fail are requests to have the current
352 # instruction be dependent on the branch unit "shadow" capability.
353 self
.branch_succ_i
= Signal(reset_less
=True)
354 self
.branch_fail_i
= Signal(reset_less
=True)
355 self
.branch_direction_o
= Signal(2, reset_less
=True)
357 def elaborate(self
, platform
):
362 m
.submodules
.intregs
= self
.intregs
363 m
.submodules
.fpregs
= self
.fpregs
366 int_dest
= self
.intregs
.write_port("dest")
367 int_src1
= self
.intregs
.read_port("src1")
368 int_src2
= self
.intregs
.read_port("src2")
370 fp_dest
= self
.fpregs
.write_port("dest")
371 fp_src1
= self
.fpregs
.read_port("src1")
372 fp_src2
= self
.fpregs
.read_port("src2")
374 # Int ALUs and Comp Units
376 cua
= CompUnitALUs(self
.rwid
, 3)
377 cub
= CompUnitBR(self
.rwid
, 2)
378 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cub
])
379 bgt
= cub
.bgt
# get at the branch computation unit
383 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
385 # Count of number of FUs
386 n_intfus
= n_int_alus
387 n_fp_fus
= 0 # for now
389 # Integer Priority Picker 1: Adder + Subtractor
390 intpick1
= GroupPicker(n_intfus
) # picks between add, sub, mul and shf
391 m
.submodules
.intpick1
= intpick1
394 regdecode
= RegDecode(self
.n_regs
)
395 m
.submodules
.regdecode
= regdecode
396 issueunit
= IssueUnitArray([self
.aluissue
, self
.brissue
])
397 m
.submodules
.issueunit
= issueunit
399 # Shadow Matrix. currently n_intfus shadows, to be used for
400 # write-after-write hazards. NOTE: there is one extra for branches,
401 # so the shadow width is increased by 1
402 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
403 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
405 # record previous instruction to cast shadow on current instruction
406 prev_shadow
= Signal(n_intfus
)
408 # Branch Speculation recorder. tracks the success/fail state as
409 # each instruction is issued, so that when the branch occurs the
410 # allow/cancel can be issued as appropriate.
411 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
414 # ok start wiring things together...
415 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
416 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
420 # Issue Unit is where it starts. set up some in/outs for this module
422 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
423 regdecode
.src1_i
.eq(self
.int_src1_i
),
424 regdecode
.src2_i
.eq(self
.int_src2_i
),
425 regdecode
.enable_i
.eq(self
.reg_enable_i
),
426 self
.issue_o
.eq(issueunit
.issue_o
)
429 # take these to outside (issue needs them)
430 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
431 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
433 # TODO: issueunit.f (FP)
435 # and int function issue / busy arrays, and dest/src1/src2
436 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
437 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
438 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
440 fn_issue_o
= issueunit
.fn_issue_o
442 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
443 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
444 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
447 # merge shadow matrices outputs
450 # these are explained in ShadowMatrix docstring, and are to be
451 # connected to the FUReg and FUFU Matrices, to get them to reset
452 anydie
= Signal(n_intfus
, reset_less
=True)
453 allshadown
= Signal(n_intfus
, reset_less
=True)
454 shreset
= Signal(n_intfus
, reset_less
=True)
455 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
456 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
457 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
460 # connect fu-fu matrix
463 # Group Picker... done manually for now.
464 go_rd_o
= intpick1
.go_rd_o
465 go_wr_o
= intpick1
.go_wr_o
466 go_rd_i
= intfus
.go_rd_i
467 go_wr_i
= intfus
.go_wr_i
468 go_die_i
= intfus
.go_die_i
469 # NOTE: connect to the shadowed versions so that they can "die" (reset)
470 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
471 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
472 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
476 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
477 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
478 int_rd_o
= intfus
.readable_o
479 int_wr_o
= intfus
.writable_o
480 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
481 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
487 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
488 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
489 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
491 # NOTE; this setup is for the instruction order preservation...
493 # connect shadows / go_dies to Computation Units
494 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
495 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
497 # ok connect first n_int_fu shadows to busy lines, to create an
498 # instruction-order linked-list-like arrangement, using a bit-matrix
499 # (instead of e.g. a ring buffer).
502 # when written, the shadow can be cancelled (and was good)
503 for i
in range(n_intfus
):
504 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
506 # *previous* instruction shadows *current* instruction, and, obviously,
507 # if the previous is completed (!busy) don't cast the shadow!
508 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
509 for i
in range(n_intfus
):
510 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
513 # ... and this is for branch speculation. it uses the extra bit
514 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
515 # only needs to set shadow_i, s_fail_i and s_good_i
517 # issue captures shadow_i (if enabled)
518 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
520 bactive
= Signal(reset_less
=True)
521 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
523 # instruction being issued (fn_issue_o) has a shadow cast by the branch
524 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
525 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
526 for i
in range(n_intfus
):
527 with m
.If(fn_issue_o
& (Const(1<<i
))):
528 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
530 # finally, we need an indicator to the test infrastructure as to
531 # whether the branch succeeded or failed, plus, link up to the
532 # "recorder" of whether the instruction was under shadow or not
534 with m
.If(br1
.issue_i
):
535 sync
+= bspec
.active_i
.eq(1)
536 with m
.If(self
.branch_succ_i
):
537 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
538 with m
.If(self
.branch_fail_i
):
539 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
541 # branch is active (TODO: a better signal: this is over-using the
542 # go_write signal - actually the branch should not be "writing")
543 with m
.If(br1
.go_wr_i
):
544 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
545 sync
+= bspec
.active_i
.eq(0)
546 comb
+= bspec
.br_i
.eq(1)
547 # branch occurs if data == 1, failed if data == 0
548 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
549 for i
in range(n_intfus
):
550 # *expected* direction of the branch matched against *actual*
551 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
553 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
556 # Connect Register File(s)
558 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
559 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
560 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
562 # connect ALUs to regfule
563 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
564 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
565 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
567 # connect ALU Computation Units
568 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
569 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
570 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
575 yield from self
.intregs
576 yield from self
.fpregs
577 yield self
.int_dest_i
578 yield self
.int_src1_i
579 yield self
.int_src2_i
581 yield self
.branch_succ_i
582 yield self
.branch_fail_i
583 yield self
.branch_direction_o
589 class IssueToScoreboard(Elaboratable
):
591 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
599 mqbits
= (int(log(qlen
) / log(2))+2, False)
600 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
601 self
.p_ready_o
= Signal() # instructions were added
602 self
.data_i
= Instruction
.nq(n_in
, "data_i", rwid
, opwid
)
604 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
605 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
607 def elaborate(self
, platform
):
612 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
, self
.n_in
, self
.n_out
)
613 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
617 # get at the regfile for testing
618 self
.intregs
= sc
.intregs
620 # and the "busy" signal and instruction queue length
621 comb
+= self
.busy_o
.eq(sc
.busy_o
)
622 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
624 # link up instruction queue
625 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
626 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
627 for i
in range(self
.n_in
):
628 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
630 # take instruction and process it. note that it's possible to
631 # "inspect" the queue contents *without* actually removing the
632 # items. items are only removed when the
635 wait_issue_br
= Signal()
636 wait_issue_alu
= Signal()
638 with m
.If(wait_issue_br | wait_issue_alu
):
639 # set instruction pop length to 1 if the unit accepted
640 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
641 with m
.If(iq
.qlen_o
!= 0):
642 comb
+= iq
.n_sub_i
.eq(1)
643 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
644 with m
.If(iq
.qlen_o
!= 0):
645 comb
+= iq
.n_sub_i
.eq(1)
647 # see if some instruction(s) are here. note that this is
648 # "inspecting" the in-place queue. note also that on the
649 # cycle following "waiting" for fn_issue_o to be set, the
650 # "resetting" done above (insn_i=0) could be re-ASSERTed.
651 with m
.If(iq
.qlen_o
!= 0):
652 # get the operands and operation
653 dest
= iq
.data_o
[0].dest_i
654 src1
= iq
.data_o
[0].src1_i
655 src2
= iq
.data_o
[0].src2_i
656 op
= iq
.data_o
[0].oper_i
657 opi
= iq
.data_o
[0].opim_i
# immediate set
659 # set the src/dest regs
660 comb
+= sc
.int_dest_i
.eq(dest
)
661 comb
+= sc
.int_src1_i
.eq(src1
)
662 comb
+= sc
.int_src2_i
.eq(src2
)
663 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
665 # choose a Function-Unit-Group
666 with m
.If((op
& (0x3<<2)) != 0): # branch
667 comb
+= sc
.brissue
.insn_i
.eq(1)
668 comb
+= sc
.br_oper_i
.eq(op
& 0x3)
669 comb
+= wait_issue_br
.eq(1)
671 comb
+= sc
.aluissue
.insn_i
.eq(1)
672 comb
+= sc
.alu_oper_i
.eq(Cat(op
& 0x3, opi
))
673 comb
+= wait_issue_alu
.eq(1)
676 # these indicate that the instruction is to be made
677 # shadow-dependent on
678 # (either) branch success or branch fail
679 #yield sc.branch_fail_i.eq(branch_fail)
680 #yield sc.branch_succ_i.eq(branch_success)
686 for o
in self
.data_i
:
704 def __init__(self
, rwidth
, nregs
):
706 self
.regs
= [0] * nregs
708 def op(self
, op
, op_imm
, src1
, src2
, dest
):
709 maxbits
= (1 << self
.rwidth
) - 1
710 src1
= self
.regs
[src1
] & maxbits
711 if not op_imm
: # put op in src2
712 src2
= self
.regs
[src2
] & maxbits
720 val
= src1
>> (src2
& maxbits
)
722 val
= int(src1
> src2
)
724 val
= int(src1
< src2
)
726 val
= int(src1
== src2
)
728 val
= int(src1
!= src2
)
730 self
.setval(dest
, val
)
733 def setval(self
, dest
, val
):
734 print ("sim setval", dest
, hex(val
))
735 self
.regs
[dest
] = val
738 for i
, val
in enumerate(self
.regs
):
739 reg
= yield dut
.intregs
.regs
[i
].reg
740 okstr
= "OK" if reg
== val
else "!ok"
741 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
743 def check(self
, dut
):
744 for i
, val
in enumerate(self
.regs
):
745 reg
= yield dut
.intregs
.regs
[i
].reg
747 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
748 yield from self
.dump(dut
)
751 def instr_q(dut
, op
, op_imm
, src1
, src2
, dest
, branch_success
, branch_fail
):
752 instrs
= [{'oper_i': op
, 'dest_i': dest
, 'opim_i': op_imm
,
753 'src1_i': src1
, 'src2_i': src2
}]
756 for idx
in range(sendlen
):
757 yield from eq(dut
.data_i
[idx
], instrs
[idx
])
758 di
= yield dut
.data_i
[idx
]
759 print ("senddata %d %x" % (idx
, di
))
760 yield dut
.p_add_i
.eq(sendlen
)
762 o_p_ready
= yield dut
.p_ready_o
765 o_p_ready
= yield dut
.p_ready_o
767 yield dut
.p_add_i
.eq(0)
770 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
771 yield from disable_issue(dut
)
772 yield dut
.int_dest_i
.eq(dest
)
773 yield dut
.int_src1_i
.eq(src1
)
774 yield dut
.int_src2_i
.eq(src2
)
775 if (op
& (0x3<<2)) != 0: # branch
776 yield dut
.brissue
.insn_i
.eq(1)
777 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
778 dut_issue
= dut
.brissue
780 yield dut
.aluissue
.insn_i
.eq(1)
781 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
782 dut_issue
= dut
.aluissue
783 yield dut
.reg_enable_i
.eq(1)
785 # these indicate that the instruction is to be made shadow-dependent on
786 # (either) branch success or branch fail
787 yield dut
.branch_fail_i
.eq(branch_fail
)
788 yield dut
.branch_succ_i
.eq(branch_success
)
791 yield from wait_for_issue(dut
, dut_issue
)
794 def print_reg(dut
, rnums
):
797 reg
= yield dut
.intregs
.regs
[rnum
].reg
798 rs
.append("%x" % reg
)
799 rnums
= map(str, rnums
)
800 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
803 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
805 for i
in range(n_ops
):
806 src1
= randint(1, dut
.n_regs
-1)
807 src2
= randint(1, dut
.n_regs
-1)
808 dest
= randint(1, dut
.n_regs
-1)
809 op
= randint(0, max_opnums
)
810 opi
= 0 if randint(0, 3) else 1 # set true if random is nonzero
813 insts
.append((src1
, src2
, dest
, op
, opi
, (0, 0)))
815 insts
.append((src1
, src2
, dest
, op
, opi
))
819 def wait_for_busy_clear(dut
):
821 busy_o
= yield dut
.busy_o
827 def disable_issue(dut
):
828 yield dut
.aluissue
.insn_i
.eq(0)
829 yield dut
.brissue
.insn_i
.eq(0)
832 def wait_for_issue(dut
, dut_issue
):
834 issue_o
= yield dut_issue
.fn_issue_o
836 yield from disable_issue(dut
)
837 yield dut
.reg_enable_i
.eq(0)
840 #yield from print_reg(dut, [1,2,3])
842 #yield from print_reg(dut, [1,2,3])
844 def scoreboard_branch_sim(dut
, alusim
):
850 print ("rseed", iseed
)
854 yield dut
.branch_direction_o
.eq(0)
856 # set random values in the registers
857 for i
in range(1, dut
.n_regs
):
859 val
= randint(0, (1<<alusim
.rwidth
)-1)
860 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
861 alusim
.setval(i
, val
)
864 # create some instructions: branches create a tree
865 insts
= create_random_ops(dut
, 1, True, 1)
866 #insts.append((6, 6, 1, 2, (0, 0)))
867 #insts.append((4, 3, 3, 0, (0, 0)))
869 src1
= randint(1, dut
.n_regs
-1)
870 src2
= randint(1, dut
.n_regs
-1)
872 op
= 4 # only BGT at the moment
874 branch_ok
= create_random_ops(dut
, 1, True, 1)
875 branch_fail
= create_random_ops(dut
, 1, True, 1)
877 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
881 insts
.append( (3, 5, 2, 0, (0, 0)) )
884 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
885 branch_ok
.append( None )
886 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
887 #branch_fail.append( None )
888 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
890 siminsts
= deepcopy(insts
)
892 # issue instruction(s)
900 branch_direction
= yield dut
.branch_direction_o
# way branch went
901 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
902 if branch_direction
== 1 and shadow_on
:
903 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
904 continue # branch was "success" and this is a "failed"... skip
905 if branch_direction
== 2 and shadow_off
:
906 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
907 continue # branch was "fail" and this is a "success"... skip
908 if branch_direction
!= 0:
913 branch_ok
, branch_fail
= dest
915 # ok zip up the branch success / fail instructions and
916 # drop them into the queue, one marked "to have branch success"
917 # the other to be marked shadow branch "fail".
918 # one out of each of these will be cancelled
919 for ok
, fl
in zip(branch_ok
, branch_fail
):
921 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
923 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
924 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
925 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
926 yield from int_instr(dut
, op
, src1
, src2
, dest
,
927 shadow_on
, shadow_off
)
929 # wait for all instructions to stop before checking
931 yield from wait_for_busy_clear(dut
)
935 instr
= siminsts
.pop(0)
938 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
942 branch_ok
, branch_fail
= dest
944 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
945 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
946 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
949 siminsts
+= branch_ok
951 siminsts
+= branch_fail
954 yield from alusim
.check(dut
)
955 yield from alusim
.dump(dut
)
958 def scoreboard_sim(dut
, alusim
):
964 # set random values in the registers
965 for i
in range(1, dut
.n_regs
):
966 val
= randint(0, (1<<alusim
.rwidth
)-1)
969 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
970 alusim
.setval(i
, val
)
972 # create some instructions (some random, some regression tests)
975 instrs
= create_random_ops(dut
, 15, True, 3)
978 instrs
.append( (7, 3, 2, 4, (0, 0)) )
979 instrs
.append( (7, 6, 6, 2, (0, 0)) )
980 instrs
.append( (1, 7, 2, 2, (0, 0)) )
984 instrs
.append((2, 3, 3, 0, (0, 0)))
985 instrs
.append((5, 3, 3, 1, (0, 0)))
986 instrs
.append((3, 5, 5, 2, (0, 0)))
987 instrs
.append((5, 3, 3, 3, (0, 0)))
988 instrs
.append((3, 5, 5, 0, (0, 0)))
991 instrs
.append((5, 6, 2, 1))
992 instrs
.append((2, 2, 4, 0))
993 #instrs.append((2, 2, 3, 1))
996 instrs
.append((2, 1, 2, 3))
999 instrs
.append((2, 6, 2, 1))
1000 instrs
.append((2, 1, 2, 0))
1003 instrs
.append((1, 2, 7, 2))
1004 instrs
.append((7, 1, 5, 0))
1005 instrs
.append((4, 4, 1, 1))
1008 instrs
.append((5, 6, 2, 2))
1009 instrs
.append((1, 1, 4, 1))
1010 instrs
.append((6, 5, 3, 0))
1013 # Write-after-Write Hazard
1014 instrs
.append( (3, 6, 7, 2) )
1015 instrs
.append( (4, 4, 7, 1) )
1018 # self-read/write-after-write followed by Read-after-Write
1019 instrs
.append((1, 1, 1, 1))
1020 instrs
.append((1, 5, 3, 0))
1023 # Read-after-Write followed by self-read-after-write
1024 instrs
.append((5, 6, 1, 2))
1025 instrs
.append((1, 1, 1, 1))
1028 # self-read-write sandwich
1029 instrs
.append((5, 6, 1, 2))
1030 instrs
.append((1, 1, 1, 1))
1031 instrs
.append((1, 5, 3, 0))
1034 # very weird failure
1035 instrs
.append( (5, 2, 5, 2) )
1036 instrs
.append( (2, 6, 3, 0) )
1037 instrs
.append( (4, 2, 2, 1) )
1041 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1042 alusim
.setval(5, v1
)
1043 yield dut
.intregs
.regs
[3].reg
.eq(5)
1045 instrs
.append((5, 3, 3, 4, (0, 0)))
1046 instrs
.append((4, 2, 1, 2, (0, 1)))
1050 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1051 alusim
.setval(5, v1
)
1052 yield dut
.intregs
.regs
[3].reg
.eq(5)
1054 instrs
.append((5, 3, 3, 4, (0, 0)))
1055 instrs
.append((4, 2, 1, 2, (1, 0)))
1058 instrs
.append( (4, 3, 5, 1, 0, (0, 0)) )
1059 instrs
.append( (5, 2, 3, 1, 0, (0, 0)) )
1060 instrs
.append( (7, 1, 5, 2, 0, (0, 0)) )
1061 instrs
.append( (5, 6, 6, 4, 0, (0, 0)) )
1062 instrs
.append( (7, 5, 2, 2, 0, (1, 0)) )
1063 instrs
.append( (1, 7, 5, 0, 0, (0, 1)) )
1064 instrs
.append( (1, 6, 1, 2, 0, (1, 0)) )
1065 instrs
.append( (1, 6, 7, 3, 0, (0, 0)) )
1066 instrs
.append( (6, 7, 7, 0, 0, (0, 0)) )
1068 # issue instruction(s), wait for issue to be free before proceeding
1069 for i
, instr
in enumerate(instrs
):
1070 src1
, src2
, dest
, op
, opi
, (br_ok
, br_fail
) = instr
1072 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
1073 alusim
.op(op
, opi
, src1
, src2
, dest
)
1074 yield from instr_q(dut
, op
, opi
, src1
, src2
, dest
, br_ok
, br_fail
)
1076 # wait for all instructions to stop before checking
1078 iqlen
= yield dut
.qlen_o
1086 yield from wait_for_busy_clear(dut
)
1089 yield from alusim
.check(dut
)
1090 yield from alusim
.dump(dut
)
1093 def test_scoreboard():
1094 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
1095 alusim
= RegSim(16, 8)
1096 memsim
= MemSim(16, 16)
1097 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1098 with
open("test_scoreboard6600.il", "w") as f
:
1101 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1102 vcd_name
='test_scoreboard6600.vcd')
1104 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1105 # vcd_name='test_scoreboard6600.vcd')
1108 if __name__
== '__main__':