1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
7 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
8 from scoreboard
.global_pending
import GlobalPending
9 from scoreboard
.group_picker
import GroupPicker
10 from scoreboard
.issue_unit
import IssueUnitGroup
, IssueUnitArray
, RegDecode
11 from scoreboard
.shadow
import ShadowMatrix
, BranchSpeculationRecord
12 from scoreboard
.instruction_q
import Instruction
, InstructionQ
14 from compalu
import ComputationUnitNoDelay
16 from alu_hier
import ALU
, BranchALU
17 from nmutil
.latch
import SRLatch
18 from nmutil
.nmoperator
import eq
20 from random
import randint
, seed
21 from copy
import deepcopy
25 class CompUnitsBase(Elaboratable
):
26 """ Computation Unit Base class.
28 Amazingly, this class works recursively. It's supposed to just
29 look after some ALUs (that can handle the same operations),
30 grouping them together, however it turns out that the same code
31 can also group *groups* of Computation Units together as well.
33 Basically it was intended just to concatenate the ALU's issue,
34 go_rd etc. signals together, which start out as bits and become
35 sequences. Turns out that the same trick works just as well
38 So this class may be used recursively to present a top-level
39 sequential concatenation of all the signals in and out of
40 ALUs, whilst at the same time making it convenient to group
43 At the lower level, the intent is that groups of (identical)
44 ALUs may be passed the same operation. Even beyond that,
45 the intent is that that group of (identical) ALUs actually
46 share the *same pipeline* and as such become a "Concurrent
47 Computation Unit" as defined by Mitch Alsup (see section
50 def __init__(self
, rwid
, units
):
53 * :rwid: bit width of register file(s) - both FP and INT
54 * :units: sequence of ALUs (or CompUnitsBase derivatives)
59 if units
and isinstance(units
[0], CompUnitsBase
):
62 self
.n_units
+= u
.n_units
64 self
.n_units
= len(units
)
66 n_units
= self
.n_units
69 self
.issue_i
= Signal(n_units
, reset_less
=True)
70 self
.go_rd_i
= Signal(n_units
, reset_less
=True)
71 self
.go_wr_i
= Signal(n_units
, reset_less
=True)
72 self
.shadown_i
= Signal(n_units
, reset_less
=True)
73 self
.go_die_i
= Signal(n_units
, reset_less
=True)
76 self
.busy_o
= Signal(n_units
, reset_less
=True)
77 self
.rd_rel_o
= Signal(n_units
, reset_less
=True)
78 self
.req_rel_o
= Signal(n_units
, reset_less
=True)
80 # in/out register data (note: not register#, actual data)
81 self
.data_o
= Signal(rwid
, reset_less
=True)
82 self
.src1_i
= Signal(rwid
, reset_less
=True)
83 self
.src2_i
= Signal(rwid
, reset_less
=True)
86 def elaborate(self
, platform
):
90 for i
, alu
in enumerate(self
.units
):
91 setattr(m
.submodules
, "comp%d" % i
, alu
)
101 for alu
in self
.units
:
102 req_rel_l
.append(alu
.req_rel_o
)
103 rd_rel_l
.append(alu
.rd_rel_o
)
104 shadow_l
.append(alu
.shadown_i
)
105 godie_l
.append(alu
.go_die_i
)
106 go_wr_l
.append(alu
.go_wr_i
)
107 go_rd_l
.append(alu
.go_rd_i
)
108 issue_l
.append(alu
.issue_i
)
109 busy_l
.append(alu
.busy_o
)
110 comb
+= self
.rd_rel_o
.eq(Cat(*rd_rel_l
))
111 comb
+= self
.req_rel_o
.eq(Cat(*req_rel_l
))
112 comb
+= self
.busy_o
.eq(Cat(*busy_l
))
113 comb
+= Cat(*godie_l
).eq(self
.go_die_i
)
114 comb
+= Cat(*shadow_l
).eq(self
.shadown_i
)
115 comb
+= Cat(*go_wr_l
).eq(self
.go_wr_i
)
116 comb
+= Cat(*go_rd_l
).eq(self
.go_rd_i
)
117 comb
+= Cat(*issue_l
).eq(self
.issue_i
)
119 # connect data register input/output
121 # merge (OR) all integer FU / ALU outputs to a single value
122 # bit of a hack: treereduce needs a list with an item named "data_o"
124 data_o
= treereduce(self
.units
)
125 comb
+= self
.data_o
.eq(data_o
)
127 for i
, alu
in enumerate(self
.units
):
128 comb
+= alu
.src1_i
.eq(self
.src1_i
)
129 comb
+= alu
.src2_i
.eq(self
.src2_i
)
134 class CompUnitALUs(CompUnitsBase
):
136 def __init__(self
, rwid
, opwid
):
139 * :rwid: bit width of register file(s) - both FP and INT
140 * :opwid: operand bit width
145 self
.oper_i
= Signal(opwid
, reset_less
=True)
154 for alu
in [add
, sub
, mul
, shf
]:
155 units
.append(ComputationUnitNoDelay(rwid
, 2, alu
))
157 CompUnitsBase
.__init
__(self
, rwid
, units
)
159 def elaborate(self
, platform
):
160 m
= CompUnitsBase
.elaborate(self
, platform
)
163 # hand the same operation to all units
164 for alu
in self
.units
:
165 comb
+= alu
.oper_i
.eq(self
.oper_i
)
166 #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
167 #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
168 #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
169 #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
174 class CompUnitBR(CompUnitsBase
):
176 def __init__(self
, rwid
, opwid
):
179 * :rwid: bit width of register file(s) - both FP and INT
180 * :opwid: operand bit width
182 Note: bgt unit is returned so that a shadow unit can be created
188 self
.oper_i
= Signal(opwid
, reset_less
=True)
191 self
.bgt
= BranchALU(rwid
)
192 self
.br1
= ComputationUnitNoDelay(rwid
, 3, self
.bgt
)
193 CompUnitsBase
.__init
__(self
, rwid
, [self
.br1
])
195 def elaborate(self
, platform
):
196 m
= CompUnitsBase
.elaborate(self
, platform
)
199 # hand the same operation to all units
200 for alu
in self
.units
:
201 comb
+= alu
.oper_i
.eq(self
.oper_i
)
202 #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
207 class FunctionUnits(Elaboratable
):
209 def __init__(self
, n_regs
, n_int_alus
):
211 self
.n_int_alus
= n_int_alus
213 self
.dest_i
= Signal(n_regs
, reset_less
=True) # Dest R# in
214 self
.src1_i
= Signal(n_regs
, reset_less
=True) # oper1 R# in
215 self
.src2_i
= Signal(n_regs
, reset_less
=True) # oper2 R# in
217 self
.g_int_rd_pend_o
= Signal(n_regs
, reset_less
=True)
218 self
.g_int_wr_pend_o
= Signal(n_regs
, reset_less
=True)
220 self
.dest_rsel_o
= Signal(n_regs
, reset_less
=True) # dest reg (bot)
221 self
.src1_rsel_o
= Signal(n_regs
, reset_less
=True) # src1 reg (bot)
222 self
.src2_rsel_o
= Signal(n_regs
, reset_less
=True) # src2 reg (bot)
224 self
.req_rel_i
= Signal(n_int_alus
, reset_less
= True)
225 self
.readable_o
= Signal(n_int_alus
, reset_less
=True)
226 self
.writable_o
= Signal(n_int_alus
, reset_less
=True)
228 self
.go_rd_i
= Signal(n_int_alus
, reset_less
=True)
229 self
.go_wr_i
= Signal(n_int_alus
, reset_less
=True)
230 self
.go_die_i
= Signal(n_int_alus
, reset_less
=True)
231 self
.req_rel_o
= Signal(n_int_alus
, reset_less
=True)
232 self
.fn_issue_i
= Signal(n_int_alus
, reset_less
=True)
234 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
236 def elaborate(self
, platform
):
241 n_intfus
= self
.n_int_alus
243 # Integer FU-FU Dep Matrix
244 intfudeps
= FUFUDepMatrix(n_intfus
, n_intfus
)
245 m
.submodules
.intfudeps
= intfudeps
246 # Integer FU-Reg Dep Matrix
247 intregdeps
= FURegDepMatrix(n_intfus
, self
.n_regs
)
248 m
.submodules
.intregdeps
= intregdeps
250 comb
+= self
.g_int_rd_pend_o
.eq(intregdeps
.rd_rsel_o
)
251 comb
+= self
.g_int_wr_pend_o
.eq(intregdeps
.wr_rsel_o
)
253 comb
+= intregdeps
.rd_pend_i
.eq(intregdeps
.rd_rsel_o
)
254 comb
+= intregdeps
.wr_pend_i
.eq(intregdeps
.wr_rsel_o
)
256 comb
+= intfudeps
.rd_pend_i
.eq(intregdeps
.rd_pend_o
)
257 comb
+= intfudeps
.wr_pend_i
.eq(intregdeps
.wr_pend_o
)
258 self
.wr_pend_o
= intregdeps
.wr_pend_o
# also output for use in WaWGrid
260 comb
+= intfudeps
.issue_i
.eq(self
.fn_issue_i
)
261 comb
+= intfudeps
.go_rd_i
.eq(self
.go_rd_i
)
262 comb
+= intfudeps
.go_wr_i
.eq(self
.go_wr_i
)
263 comb
+= intfudeps
.go_die_i
.eq(self
.go_die_i
)
264 comb
+= self
.readable_o
.eq(intfudeps
.readable_o
)
265 comb
+= self
.writable_o
.eq(intfudeps
.writable_o
)
267 # Connect function issue / arrays, and dest/src1/src2
268 comb
+= intregdeps
.dest_i
.eq(self
.dest_i
)
269 comb
+= intregdeps
.src1_i
.eq(self
.src1_i
)
270 comb
+= intregdeps
.src2_i
.eq(self
.src2_i
)
272 comb
+= intregdeps
.go_rd_i
.eq(self
.go_rd_i
)
273 comb
+= intregdeps
.go_wr_i
.eq(self
.go_wr_i
)
274 comb
+= intregdeps
.go_die_i
.eq(self
.go_die_i
)
275 comb
+= intregdeps
.issue_i
.eq(self
.fn_issue_i
)
277 comb
+= self
.dest_rsel_o
.eq(intregdeps
.dest_rsel_o
)
278 comb
+= self
.src1_rsel_o
.eq(intregdeps
.src1_rsel_o
)
279 comb
+= self
.src2_rsel_o
.eq(intregdeps
.src2_rsel_o
)
284 class Scoreboard(Elaboratable
):
285 def __init__(self
, rwid
, n_regs
):
288 * :rwid: bit width of register file(s) - both FP and INT
289 * :n_regs: depth of register file(s) - number of FP and INT regs
295 self
.intregs
= RegFileArray(rwid
, n_regs
)
296 self
.fpregs
= RegFileArray(rwid
, n_regs
)
298 # issue q needs to get at these
299 self
.aluissue
= IssueUnitGroup(4)
300 self
.brissue
= IssueUnitGroup(1)
302 self
.alu_oper_i
= Signal(4, reset_less
=True)
303 self
.br_oper_i
= Signal(4, reset_less
=True)
306 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
307 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
308 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
309 self
.reg_enable_i
= Signal(reset_less
=True) # enable reg decode
312 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
313 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
315 # for branch speculation experiment. branch_direction = 0 if
316 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
317 # branch_succ and branch_fail are requests to have the current
318 # instruction be dependent on the branch unit "shadow" capability.
319 self
.branch_succ_i
= Signal(reset_less
=True)
320 self
.branch_fail_i
= Signal(reset_less
=True)
321 self
.branch_direction_o
= Signal(2, reset_less
=True)
323 def elaborate(self
, platform
):
328 m
.submodules
.intregs
= self
.intregs
329 m
.submodules
.fpregs
= self
.fpregs
332 int_dest
= self
.intregs
.write_port("dest")
333 int_src1
= self
.intregs
.read_port("src1")
334 int_src2
= self
.intregs
.read_port("src2")
336 fp_dest
= self
.fpregs
.write_port("dest")
337 fp_src1
= self
.fpregs
.read_port("src1")
338 fp_src2
= self
.fpregs
.read_port("src2")
340 # Int ALUs and Comp Units
342 cua
= CompUnitALUs(self
.rwid
, 2)
343 cub
= CompUnitBR(self
.rwid
, 2)
344 m
.submodules
.cu
= cu
= CompUnitsBase(self
.rwid
, [cua
, cub
])
345 bgt
= cub
.bgt
# get at the branch computation unit
349 m
.submodules
.intfus
= intfus
= FunctionUnits(self
.n_regs
, n_int_alus
)
351 # Count of number of FUs
352 n_intfus
= n_int_alus
353 n_fp_fus
= 0 # for now
355 # Integer Priority Picker 1: Adder + Subtractor
356 intpick1
= GroupPicker(n_intfus
) # picks between add, sub, mul and shf
357 m
.submodules
.intpick1
= intpick1
360 regdecode
= RegDecode(self
.n_regs
)
361 m
.submodules
.regdecode
= regdecode
362 issueunit
= IssueUnitArray([self
.aluissue
, self
.brissue
])
363 m
.submodules
.issueunit
= issueunit
365 # Shadow Matrix. currently n_intfus shadows, to be used for
366 # write-after-write hazards. NOTE: there is one extra for branches,
367 # so the shadow width is increased by 1
368 m
.submodules
.shadows
= shadows
= ShadowMatrix(n_intfus
, n_intfus
, True)
369 m
.submodules
.bshadow
= bshadow
= ShadowMatrix(n_intfus
, 1, False)
371 # record previous instruction to cast shadow on current instruction
372 fn_issue_prev
= Signal(n_intfus
)
373 prev_shadow
= Signal(n_intfus
)
375 # Branch Speculation recorder. tracks the success/fail state as
376 # each instruction is issued, so that when the branch occurs the
377 # allow/cancel can be issued as appropriate.
378 m
.submodules
.specrec
= bspec
= BranchSpeculationRecord(n_intfus
)
381 # ok start wiring things together...
382 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
383 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
387 # Issue Unit is where it starts. set up some in/outs for this module
389 comb
+= [ regdecode
.dest_i
.eq(self
.int_dest_i
),
390 regdecode
.src1_i
.eq(self
.int_src1_i
),
391 regdecode
.src2_i
.eq(self
.int_src2_i
),
392 regdecode
.enable_i
.eq(self
.reg_enable_i
),
393 self
.issue_o
.eq(issueunit
.issue_o
)
396 # take these to outside (issue needs them)
397 comb
+= cua
.oper_i
.eq(self
.alu_oper_i
)
398 comb
+= cub
.oper_i
.eq(self
.br_oper_i
)
400 # TODO: issueunit.f (FP)
402 # and int function issue / busy arrays, and dest/src1/src2
403 comb
+= intfus
.dest_i
.eq(regdecode
.dest_o
)
404 comb
+= intfus
.src1_i
.eq(regdecode
.src1_o
)
405 comb
+= intfus
.src2_i
.eq(regdecode
.src2_o
)
407 fn_issue_o
= issueunit
.fn_issue_o
409 comb
+= intfus
.fn_issue_i
.eq(fn_issue_o
)
410 comb
+= issueunit
.busy_i
.eq(cu
.busy_o
)
411 comb
+= self
.busy_o
.eq(cu
.busy_o
.bool())
414 # merge shadow matrices outputs
417 # these are explained in ShadowMatrix docstring, and are to be
418 # connected to the FUReg and FUFU Matrices, to get them to reset
419 anydie
= Signal(n_intfus
, reset_less
=True)
420 allshadown
= Signal(n_intfus
, reset_less
=True)
421 shreset
= Signal(n_intfus
, reset_less
=True)
422 comb
+= allshadown
.eq(shadows
.shadown_o
& bshadow
.shadown_o
)
423 comb
+= anydie
.eq(shadows
.go_die_o | bshadow
.go_die_o
)
424 comb
+= shreset
.eq(bspec
.match_g_o | bspec
.match_f_o
)
427 # connect fu-fu matrix
430 # Group Picker... done manually for now.
431 go_rd_o
= intpick1
.go_rd_o
432 go_wr_o
= intpick1
.go_wr_o
433 go_rd_i
= intfus
.go_rd_i
434 go_wr_i
= intfus
.go_wr_i
435 go_die_i
= intfus
.go_die_i
436 # NOTE: connect to the shadowed versions so that they can "die" (reset)
437 comb
+= go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
]) # rd
438 comb
+= go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
]) # wr
439 comb
+= go_die_i
[0:n_intfus
].eq(anydie
[0:n_intfus
]) # die
443 comb
+= intpick1
.rd_rel_i
[0:n_intfus
].eq(cu
.rd_rel_o
[0:n_intfus
])
444 comb
+= intpick1
.req_rel_i
[0:n_intfus
].eq(cu
.req_rel_o
[0:n_intfus
])
445 int_rd_o
= intfus
.readable_o
446 int_wr_o
= intfus
.writable_o
447 comb
+= intpick1
.readable_i
[0:n_intfus
].eq(int_rd_o
[0:n_intfus
])
448 comb
+= intpick1
.writable_i
[0:n_intfus
].eq(int_wr_o
[0:n_intfus
])
454 comb
+= shadows
.issue_i
.eq(fn_issue_o
)
455 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
456 comb
+= shadows
.reset_i
[0:n_intfus
].eq(bshadow
.go_die_o
[0:n_intfus
])
458 # NOTE; this setup is for the instruction order preservation...
460 # connect shadows / go_dies to Computation Units
461 comb
+= cu
.shadown_i
[0:n_intfus
].eq(allshadown
)
462 comb
+= cu
.go_die_i
[0:n_intfus
].eq(anydie
)
464 # ok connect first n_int_fu shadows to busy lines, to create an
465 # instruction-order linked-list-like arrangement, using a bit-matrix
466 # (instead of e.g. a ring buffer).
469 # when written, the shadow can be cancelled (and was good)
470 for i
in range(n_intfus
):
471 comb
+= shadows
.s_good_i
[i
][0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
473 # work out the current-activated busy unit (by recording the old one)
474 with m
.If(fn_issue_o
): # only update prev bit if instruction issued
475 sync
+= fn_issue_prev
.eq(fn_issue_o
)
477 # *previous* instruction shadows *current* instruction, and, obviously,
478 # if the previous is completed (!busy) don't cast the shadow!
479 comb
+= prev_shadow
.eq(~fn_issue_o
& cu
.busy_o
)
480 for i
in range(n_intfus
):
481 comb
+= shadows
.shadow_i
[i
][0:n_intfus
].eq(prev_shadow
)
484 # ... and this is for branch speculation. it uses the extra bit
485 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
486 # only needs to set shadow_i, s_fail_i and s_good_i
488 # issue captures shadow_i (if enabled)
489 comb
+= bshadow
.reset_i
[0:n_intfus
].eq(shreset
[0:n_intfus
])
491 bactive
= Signal(reset_less
=True)
492 comb
+= bactive
.eq((bspec
.active_i | br1
.issue_i
) & ~br1
.go_wr_i
)
494 # instruction being issued (fn_issue_o) has a shadow cast by the branch
495 with m
.If(bactive
& (self
.branch_succ_i | self
.branch_fail_i
)):
496 comb
+= bshadow
.issue_i
.eq(fn_issue_o
)
497 for i
in range(n_intfus
):
498 with m
.If(fn_issue_o
& (Const(1<<i
))):
499 comb
+= bshadow
.shadow_i
[i
][0].eq(1)
501 # finally, we need an indicator to the test infrastructure as to
502 # whether the branch succeeded or failed, plus, link up to the
503 # "recorder" of whether the instruction was under shadow or not
505 with m
.If(br1
.issue_i
):
506 sync
+= bspec
.active_i
.eq(1)
507 with m
.If(self
.branch_succ_i
):
508 comb
+= bspec
.good_i
.eq(fn_issue_o
& 0x1f)
509 with m
.If(self
.branch_fail_i
):
510 comb
+= bspec
.fail_i
.eq(fn_issue_o
& 0x1f)
512 # branch is active (TODO: a better signal: this is over-using the
513 # go_write signal - actually the branch should not be "writing")
514 with m
.If(br1
.go_wr_i
):
515 sync
+= self
.branch_direction_o
.eq(br1
.data_o
+Const(1, 2))
516 sync
+= bspec
.active_i
.eq(0)
517 comb
+= bspec
.br_i
.eq(1)
518 # branch occurs if data == 1, failed if data == 0
519 comb
+= bspec
.br_ok_i
.eq(br1
.data_o
== 1)
520 for i
in range(n_intfus
):
521 # *expected* direction of the branch matched against *actual*
522 comb
+= bshadow
.s_good_i
[i
][0].eq(bspec
.match_g_o
[i
])
524 comb
+= bshadow
.s_fail_i
[i
][0].eq(bspec
.match_f_o
[i
])
527 # Connect Register File(s)
529 comb
+= int_dest
.wen
.eq(intfus
.dest_rsel_o
)
530 comb
+= int_src1
.ren
.eq(intfus
.src1_rsel_o
)
531 comb
+= int_src2
.ren
.eq(intfus
.src2_rsel_o
)
533 # connect ALUs to regfule
534 comb
+= int_dest
.data_i
.eq(cu
.data_o
)
535 comb
+= cu
.src1_i
.eq(int_src1
.data_o
)
536 comb
+= cu
.src2_i
.eq(int_src2
.data_o
)
538 # connect ALU Computation Units
539 comb
+= cu
.go_rd_i
[0:n_intfus
].eq(go_rd_o
[0:n_intfus
])
540 comb
+= cu
.go_wr_i
[0:n_intfus
].eq(go_wr_o
[0:n_intfus
])
541 comb
+= cu
.issue_i
[0:n_intfus
].eq(fn_issue_o
[0:n_intfus
])
546 yield from self
.intregs
547 yield from self
.fpregs
548 yield self
.int_dest_i
549 yield self
.int_src1_i
550 yield self
.int_src2_i
552 yield self
.branch_succ_i
553 yield self
.branch_fail_i
554 yield self
.branch_direction_o
559 class IssueToScoreboard(Elaboratable
):
561 def __init__(self
, qlen
, n_in
, n_out
, rwid
, opwid
, n_regs
):
569 mqbits
= (int(log(qlen
) / log(2))+2, False)
570 self
.p_add_i
= Signal(mqbits
) # instructions to add (from data_i)
571 self
.p_ready_o
= Signal() # instructions were added
572 self
.data_i
= Instruction
.nq(n_in
, "data_i", rwid
, opwid
)
574 self
.busy_o
= Signal(reset_less
=True) # at least one CU is busy
575 self
.qlen_o
= Signal(mqbits
, reset_less
=True)
577 def elaborate(self
, platform
):
582 iq
= InstructionQ(self
.rwid
, self
.opw
, self
.qlen
, self
.n_in
, self
.n_out
)
583 sc
= Scoreboard(self
.rwid
, self
.n_regs
)
587 # get at the regfile for testing
588 self
.intregs
= sc
.intregs
590 # and the "busy" signal and instruction queue length
591 comb
+= self
.busy_o
.eq(sc
.busy_o
)
592 comb
+= self
.qlen_o
.eq(iq
.qlen_o
)
594 # link up instruction queue
595 comb
+= iq
.p_add_i
.eq(self
.p_add_i
)
596 comb
+= self
.p_ready_o
.eq(iq
.p_ready_o
)
597 for i
in range(self
.n_in
):
598 comb
+= eq(iq
.data_i
[i
], self
.data_i
[i
])
600 # take instruction and process it. note that it's possible to
601 # "inspect" the queue contents *without* actually removing the
602 # items. items are only removed when the
605 wait_issue_br
= Signal()
606 wait_issue_alu
= Signal()
608 with m
.If(wait_issue_br | wait_issue_alu
):
609 # set instruction pop length to 1 if the unit accepted
610 with m
.If(wait_issue_br
& (sc
.brissue
.fn_issue_o
!= 0)):
611 with m
.If(iq
.qlen_o
!= 0):
612 comb
+= iq
.n_sub_i
.eq(1)
613 with m
.If(wait_issue_alu
& (sc
.aluissue
.fn_issue_o
!= 0)):
614 with m
.If(iq
.qlen_o
!= 0):
615 comb
+= iq
.n_sub_i
.eq(1)
617 # see if some instruction(s) are here. note that this is
618 # "inspecting" the in-place queue. note also that on the
619 # cycle following "waiting" for fn_issue_o to be set, the
620 # "resetting" done above (insn_i=0) could be re-ASSERTed.
621 with m
.If(iq
.qlen_o
!= 0):
622 # get the operands and operation
623 dest
= iq
.data_o
[0].dest_i
624 src1
= iq
.data_o
[0].src1_i
625 src2
= iq
.data_o
[0].src2_i
626 op
= iq
.data_o
[0].oper_i
628 # set the src/dest regs
629 comb
+= sc
.int_dest_i
.eq(dest
)
630 comb
+= sc
.int_src1_i
.eq(src1
)
631 comb
+= sc
.int_src2_i
.eq(src2
)
632 comb
+= sc
.reg_enable_i
.eq(1) # enable the regfile
634 # choose a Function-Unit-Group
635 with m
.If((op
& (0x3<<2)) != 0): # branch
636 comb
+= sc
.brissue
.insn_i
.eq(1)
637 comb
+= sc
.br_oper_i
.eq(op
& 0x3)
638 comb
+= wait_issue_br
.eq(1)
640 comb
+= sc
.aluissue
.insn_i
.eq(1)
641 comb
+= sc
.alu_oper_i
.eq(op
& 0x3)
642 comb
+= wait_issue_alu
.eq(1)
645 # these indicate that the instruction is to be made
646 # shadow-dependent on
647 # (either) branch success or branch fail
648 #yield sc.branch_fail_i.eq(branch_fail)
649 #yield sc.branch_succ_i.eq(branch_success)
655 for o
in self
.data_i
:
672 def __init__(self
, rwidth
, nregs
):
674 self
.regs
= [0] * nregs
676 def op(self
, op
, src1
, src2
, dest
):
677 maxbits
= (1 << self
.rwidth
) - 1
678 src1
= self
.regs
[src1
] & maxbits
679 src2
= self
.regs
[src2
] & maxbits
687 val
= src1
>> (src2
& maxbits
)
689 val
= int(src1
> src2
)
691 val
= int(src1
< src2
)
693 val
= int(src1
== src2
)
695 val
= int(src1
!= src2
)
697 self
.setval(dest
, val
)
700 def setval(self
, dest
, val
):
701 print ("sim setval", dest
, hex(val
))
702 self
.regs
[dest
] = val
705 for i
, val
in enumerate(self
.regs
):
706 reg
= yield dut
.intregs
.regs
[i
].reg
707 okstr
= "OK" if reg
== val
else "!ok"
708 print("reg %d expected %x received %x %s" % (i
, val
, reg
, okstr
))
710 def check(self
, dut
):
711 for i
, val
in enumerate(self
.regs
):
712 reg
= yield dut
.intregs
.regs
[i
].reg
714 print("reg %d expected %x received %x\n" % (i
, val
, reg
))
715 yield from self
.dump(dut
)
718 def instr_q(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
719 instrs
= [{'oper_i': op
, 'dest_i': dest
, 'src1_i': src1
, 'src2_i': src2
}]
722 for idx
in range(sendlen
):
723 yield from eq(dut
.data_i
[idx
], instrs
[idx
])
724 di
= yield dut
.data_i
[idx
]
725 print ("senddata %d %x" % (idx
, di
))
726 yield dut
.p_add_i
.eq(sendlen
)
728 o_p_ready
= yield dut
.p_ready_o
731 o_p_ready
= yield dut
.p_ready_o
733 yield dut
.p_add_i
.eq(0)
736 def int_instr(dut
, op
, src1
, src2
, dest
, branch_success
, branch_fail
):
737 yield from disable_issue(dut
)
738 yield dut
.int_dest_i
.eq(dest
)
739 yield dut
.int_src1_i
.eq(src1
)
740 yield dut
.int_src2_i
.eq(src2
)
741 if (op
& (0x3<<2)) != 0: # branch
742 yield dut
.brissue
.insn_i
.eq(1)
743 yield dut
.br_oper_i
.eq(Const(op
& 0x3, 2))
744 dut_issue
= dut
.brissue
746 yield dut
.aluissue
.insn_i
.eq(1)
747 yield dut
.alu_oper_i
.eq(Const(op
& 0x3, 2))
748 dut_issue
= dut
.aluissue
749 yield dut
.reg_enable_i
.eq(1)
751 # these indicate that the instruction is to be made shadow-dependent on
752 # (either) branch success or branch fail
753 yield dut
.branch_fail_i
.eq(branch_fail
)
754 yield dut
.branch_succ_i
.eq(branch_success
)
757 yield from wait_for_issue(dut
, dut_issue
)
760 def print_reg(dut
, rnums
):
763 reg
= yield dut
.intregs
.regs
[rnum
].reg
764 rs
.append("%x" % reg
)
765 rnums
= map(str, rnums
)
766 print ("reg %s: %s" % (','.join(rnums
), ','.join(rs
)))
769 def create_random_ops(dut
, n_ops
, shadowing
=False, max_opnums
=3):
771 for i
in range(n_ops
):
772 src1
= randint(1, dut
.n_regs
-1)
773 src2
= randint(1, dut
.n_regs
-1)
774 dest
= randint(1, dut
.n_regs
-1)
775 op
= randint(0, max_opnums
)
778 insts
.append((src1
, src2
, dest
, op
, (0, 0)))
780 insts
.append((src1
, src2
, dest
, op
))
784 def wait_for_busy_clear(dut
):
786 busy_o
= yield dut
.busy_o
792 def disable_issue(dut
):
793 yield dut
.aluissue
.insn_i
.eq(0)
794 yield dut
.brissue
.insn_i
.eq(0)
797 def wait_for_issue(dut
, dut_issue
):
799 issue_o
= yield dut_issue
.fn_issue_o
801 yield from disable_issue(dut
)
802 yield dut
.reg_enable_i
.eq(0)
805 #yield from print_reg(dut, [1,2,3])
807 #yield from print_reg(dut, [1,2,3])
809 def scoreboard_branch_sim(dut
, alusim
):
815 print ("rseed", iseed
)
819 yield dut
.branch_direction_o
.eq(0)
821 # set random values in the registers
822 for i
in range(1, dut
.n_regs
):
824 val
= randint(0, (1<<alusim
.rwidth
)-1)
825 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
826 alusim
.setval(i
, val
)
829 # create some instructions: branches create a tree
830 insts
= create_random_ops(dut
, 1, True, 1)
831 #insts.append((6, 6, 1, 2, (0, 0)))
832 #insts.append((4, 3, 3, 0, (0, 0)))
834 src1
= randint(1, dut
.n_regs
-1)
835 src2
= randint(1, dut
.n_regs
-1)
837 op
= 4 # only BGT at the moment
839 branch_ok
= create_random_ops(dut
, 1, True, 1)
840 branch_fail
= create_random_ops(dut
, 1, True, 1)
842 insts
.append((src1
, src2
, (branch_ok
, branch_fail
), op
, (0, 0)))
846 insts
.append( (3, 5, 2, 0, (0, 0)) )
849 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
850 branch_ok
.append( None )
851 branch_fail
.append( (1, 1, 2, 0, (0, 1)) )
852 #branch_fail.append( None )
853 insts
.append( (6, 4, (branch_ok
, branch_fail
), 4, (0, 0)) )
855 siminsts
= deepcopy(insts
)
857 # issue instruction(s)
865 branch_direction
= yield dut
.branch_direction_o
# way branch went
866 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = insts
.pop(0)
867 if branch_direction
== 1 and shadow_on
:
868 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
869 continue # branch was "success" and this is a "failed"... skip
870 if branch_direction
== 2 and shadow_off
:
871 print ("skip", i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
)
872 continue # branch was "fail" and this is a "success"... skip
873 if branch_direction
!= 0:
878 branch_ok
, branch_fail
= dest
880 # ok zip up the branch success / fail instructions and
881 # drop them into the queue, one marked "to have branch success"
882 # the other to be marked shadow branch "fail".
883 # one out of each of these will be cancelled
884 for ok
, fl
in zip(branch_ok
, branch_fail
):
886 instrs
.append((ok
[0], ok
[1], ok
[2], ok
[3], (1, 0)))
888 instrs
.append((fl
[0], fl
[1], fl
[2], fl
[3], (0, 1)))
889 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
890 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
891 yield from int_instr(dut
, op
, src1
, src2
, dest
,
892 shadow_on
, shadow_off
)
894 # wait for all instructions to stop before checking
896 yield from wait_for_busy_clear(dut
)
900 instr
= siminsts
.pop(0)
903 (src1
, src2
, dest
, op
, (shadow_on
, shadow_off
)) = instr
907 branch_ok
, branch_fail
= dest
909 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
910 (i
, src1
, src2
, dest
, op
, shadow_on
, shadow_off
))
911 branch_res
= alusim
.op(op
, src1
, src2
, dest
)
914 siminsts
+= branch_ok
916 siminsts
+= branch_fail
919 yield from alusim
.check(dut
)
920 yield from alusim
.dump(dut
)
923 def scoreboard_sim(dut
, alusim
):
929 # set random values in the registers
930 for i
in range(1, dut
.n_regs
):
931 val
= randint(0, (1<<alusim
.rwidth
)-1)
934 yield dut
.intregs
.regs
[i
].reg
.eq(val
)
935 alusim
.setval(i
, val
)
937 # create some instructions (some random, some regression tests)
940 instrs
= create_random_ops(dut
, 15, True, 3)
943 instrs
.append( (7, 3, 2, 4, (0, 0)) )
944 instrs
.append( (7, 6, 6, 2, (0, 0)) )
945 instrs
.append( (1, 7, 2, 2, (0, 0)) )
949 instrs
.append((2, 3, 3, 0, (0, 0)))
950 instrs
.append((5, 3, 3, 1, (0, 0)))
951 instrs
.append((3, 5, 5, 2, (0, 0)))
952 instrs
.append((5, 3, 3, 3, (0, 0)))
953 instrs
.append((3, 5, 5, 0, (0, 0)))
956 instrs
.append((5, 6, 2, 1))
957 instrs
.append((2, 2, 4, 0))
958 #instrs.append((2, 2, 3, 1))
961 instrs
.append((2, 1, 2, 3))
964 instrs
.append((2, 6, 2, 1))
965 instrs
.append((2, 1, 2, 0))
968 instrs
.append((1, 2, 7, 2))
969 instrs
.append((7, 1, 5, 0))
970 instrs
.append((4, 4, 1, 1))
973 instrs
.append((5, 6, 2, 2))
974 instrs
.append((1, 1, 4, 1))
975 instrs
.append((6, 5, 3, 0))
978 # Write-after-Write Hazard
979 instrs
.append( (3, 6, 7, 2) )
980 instrs
.append( (4, 4, 7, 1) )
983 # self-read/write-after-write followed by Read-after-Write
984 instrs
.append((1, 1, 1, 1))
985 instrs
.append((1, 5, 3, 0))
988 # Read-after-Write followed by self-read-after-write
989 instrs
.append((5, 6, 1, 2))
990 instrs
.append((1, 1, 1, 1))
993 # self-read-write sandwich
994 instrs
.append((5, 6, 1, 2))
995 instrs
.append((1, 1, 1, 1))
996 instrs
.append((1, 5, 3, 0))
1000 instrs
.append( (5, 2, 5, 2) )
1001 instrs
.append( (2, 6, 3, 0) )
1002 instrs
.append( (4, 2, 2, 1) )
1006 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1007 alusim
.setval(5, v1
)
1008 yield dut
.intregs
.regs
[3].reg
.eq(5)
1010 instrs
.append((5, 3, 3, 4, (0, 0)))
1011 instrs
.append((4, 2, 1, 2, (0, 1)))
1015 yield dut
.intregs
.regs
[5].reg
.eq(v1
)
1016 alusim
.setval(5, v1
)
1017 yield dut
.intregs
.regs
[3].reg
.eq(5)
1019 instrs
.append((5, 3, 3, 4, (0, 0)))
1020 instrs
.append((4, 2, 1, 2, (1, 0)))
1023 instrs
.append( (4, 3, 5, 1, (0, 0)) )
1024 instrs
.append( (5, 2, 3, 1, (0, 0)) )
1025 instrs
.append( (7, 1, 5, 2, (0, 0)) )
1026 instrs
.append( (5, 6, 6, 4, (0, 0)) )
1027 instrs
.append( (7, 5, 2, 2, (1, 0)) )
1028 instrs
.append( (1, 7, 5, 0, (0, 1)) )
1029 instrs
.append( (1, 6, 1, 2, (1, 0)) )
1030 instrs
.append( (1, 6, 7, 3, (0, 0)) )
1031 instrs
.append( (6, 7, 7, 0, (0, 0)) )
1033 # issue instruction(s), wait for issue to be free before proceeding
1034 for i
, (src1
, src2
, dest
, op
, (br_ok
, br_fail
)) in enumerate(instrs
):
1036 print ("instr %d: (%d, %d, %d, %d)" % (i
, src1
, src2
, dest
, op
))
1037 alusim
.op(op
, src1
, src2
, dest
)
1038 yield from instr_q(dut
, op
, src1
, src2
, dest
, br_ok
, br_fail
)
1040 # wait for all instructions to stop before checking
1042 iqlen
= yield dut
.qlen_o
1050 yield from wait_for_busy_clear(dut
)
1053 yield from alusim
.check(dut
)
1054 yield from alusim
.dump(dut
)
1057 def test_scoreboard():
1058 dut
= IssueToScoreboard(2, 1, 1, 16, 8, 8)
1059 alusim
= RegSim(16, 8)
1060 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1061 with
open("test_scoreboard6600.il", "w") as f
:
1064 run_simulation(dut
, scoreboard_sim(dut
, alusim
),
1065 vcd_name
='test_scoreboard6600.vcd')
1067 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1068 # vcd_name='test_scoreboard6600.vcd')
1071 if __name__
== '__main__':