3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
24 from nmigen
import (Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
,
26 from nmigen
.cli
import rtlil
28 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
29 from openpower
.decoder
.power_regspec_map
import regspec_decode
30 from openpower
.sv
.svp64
import SVP64Rec
32 from nmutil
.picker
import PriorityPicker
33 from nmutil
.util
import treereduce
34 from nmutil
.singlepipe
import ControlBase
36 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
37 from soc
.regfile
.regfiles
import RegFiles
38 from openpower
.decoder
.power_decoder2
import get_rdflags
39 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from openpower
.decoder
.power_enums
import MicrOp
, Function
42 from soc
.simple
.core_data
import CoreInput
, CoreOutput
44 from collections
import defaultdict
, namedtuple
47 from nmutil
.util
import rising_edge
49 FUSpec
= namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec
= namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
52 # helper function for reducing a list of signals down to a parallel
54 def ortreereduce(tree
, attr
="o_data"):
55 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
58 def ortreereduce_sig(tree
):
59 return treereduce(tree
, operator
.or_
, lambda x
: x
)
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs
):
65 for (regname
, fspec
) in fuspecs
.items():
66 if regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 for (regname
, fspec
) in fuspecs
.items():
69 if not regname
.startswith("full"):
70 res
.append((regname
, fspec
))
71 return res
# enumerate(res)
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile
, rfile
, port
):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
87 # 3 bits, unary alrady: return the port
90 # 3 bits, unary: return the port
93 # 3 bits, unary: return the port
94 if regfile
== 'SVSTATE':
96 # 9 bits (9 entries), might be unary already
98 if rfile
.unary
: # FAST might be unary already
102 # 10 bits (!!) - reduce to one
104 if rfile
.unary
: # FAST might be unary already
109 if rfile
.unary
: # INT, check if unary/binary
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase
):
118 def __init__(self
, pspec
):
121 # test is SVP64 is to be enabled
122 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
124 # test to see if regfile ports should be reduced
125 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
126 (pspec
.regreduce
== True))
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
132 (pspec
.allow_overlap
== True))
135 self
.make_hazard_vecs
= self
.allow_overlap
136 self
.core_type
= "fsm"
137 if hasattr(pspec
, "core_type"):
138 self
.core_type
= pspec
.core_type
140 super().__init
__(stage
=self
)
142 # single LD/ST funnel for memory access
143 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
150 # link LoadStore1 into MMU
151 mmu
= self
.fus
.get_fu('mmu0')
152 ldst0
= self
.fus
.get_fu('ldst0')
153 print ("core pspec", pspec
.ldst_ifacetype
)
154 print ("core mmu", mmu
)
156 lsi
= l0
.cmpi
.lsmem
.lsi
# a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi
)
158 mmu
.alu
.set_ldst_interface(lsi
)
159 # urr store I-Cache in core so it is easier to get at
160 self
.icache
= lsi
.icache
162 self
.msr_at_reset
= 0x0
163 if hasattr(pspec
, "msr_reset") and isinstance(pspec
.msr_reset
, int):
164 self
.msr_at_reset
= pspec
.msr_reset
165 state_resets
= [0x0, # PC at reset
166 self
.msr_at_reset
, # MSR at reset
167 0x0] # SVSTATE at reset
169 # register files (yes plural)
170 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
,
171 state_resets
=state_resets
)
173 # set up input and output: unusual requirement to set data directly
174 # (due to the way that the core is set up in a different domain,
175 # see TestIssuer.setup_peripherals
176 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
177 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
179 # actual internal input data used (captured)
180 self
.ireg
= self
.ispec()
182 # create per-FU instruction decoders (subsetted). these "satellite"
183 # decoders reduce wire fan-out from the one (main) PowerDecoder2
184 # (used directly by the trap unit) to the *twelve* (or more)
185 # Function Units. we can either have 32 wires (the instruction)
186 # to each, or we can have well over a 200 wire fan-out (to 12
187 # ALUs). it's an easy choice to make.
191 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
192 # they should be shared (put into the ALU *once*).
194 for funame
, fu
in self
.fus
.fus
.items():
195 f_name
= fu
.fnunit
.name
196 fnunit
= fu
.fnunit
.value
197 opkls
= fu
.opsubsetkls
199 # TRAP decoder is the *main* decoder
200 self
.trapunit
= funame
202 assert funame
not in self
.decoders
203 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
205 state
=self
.ireg
.state
,
206 svp64_en
=self
.svp64_en
,
207 regreduce_en
=self
.regreduce_en
)
208 self
.des
[funame
] = self
.decoders
[funame
].do
209 print ("create decoder subset", funame
, opkls
, self
.des
[funame
])
211 # create per-Function Unit write-after-write hazard signals
212 # yes, really, this should have been added in ReservationStations
214 for funame
, fu
in self
.fus
.fus
.items():
215 fu
._waw
_hazard
= Signal(name
="waw_%s" % funame
)
217 # share the SPR decoder with the MMU if it exists
218 if "mmu0" in self
.decoders
:
219 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
221 # next 3 functions are Stage API Compliance
222 def setup(self
, m
, i
):
226 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
231 # elaborate function to create HDL
232 def elaborate(self
, platform
):
233 m
= super().elaborate(platform
)
235 # for testing purposes, to cut down on build time in coriolis2
236 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
237 x
= Signal() # dummy signal
242 m
.submodules
.fus
= self
.fus
243 m
.submodules
.l0
= l0
= self
.l0
244 self
.regs
.elaborate_into(m
, platform
)
248 # amalgamate write-hazards into a single top-level Signal
249 self
.waw_hazard
= Signal()
251 for funame
, fu
in self
.fus
.fus
.items():
252 whaz
.append(fu
._waw
_hazard
)
253 comb
+= self
.waw_hazard
.eq(Cat(*whaz
).bool())
256 self
.connect_satellite_decoders(m
)
258 # ssh, cheat: trap uses the main decoder because of the rewriting
259 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
261 # connect up Function Units, then read/write ports, and hazard conflict
262 self
.issue_conflict
= Signal()
263 fu_bitdict
, fu_selected
= self
.connect_instruction(m
)
264 raw_hazard
= self
.connect_rdports(m
, fu_bitdict
, fu_selected
)
265 self
.connect_wrports(m
, fu_bitdict
, fu_selected
)
266 if self
.allow_overlap
:
267 comb
+= self
.issue_conflict
.eq(raw_hazard
)
269 # note if an exception happened. in a pipelined or OoO design
270 # this needs to be accompanied by "shadowing" (or stalling)
272 for exc
in self
.fus
.excs
.values():
273 el
.append(exc
.happened
)
274 if len(el
) > 0: # at least one exception
275 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
279 def connect_satellite_decoders(self
, m
):
281 for k
, v
in self
.decoders
.items():
282 # connect each satellite decoder and give it the instruction.
283 # as subset decoders this massively reduces wire fanout given
284 # the large number of ALUs
285 m
.submodules
["dec_%s" % k
] = v
286 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
287 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
288 # sigh due to SVP64 RA_OR_ZERO detection connect these too
289 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
290 if not self
.svp64_en
:
292 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
293 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
294 if k
== self
.trapunit
:
296 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
297 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
298 # only the LDST PowerDecodeSubset *actually* needs to
299 # know to use the alternative decoder. this is all
301 if not k
.lower().startswith("ldst"):
303 comb
+= v
.use_svp64_ldst_dec
.eq( self
.ireg
.use_svp64_ldst_dec
)
305 def connect_instruction(self
, m
):
306 """connect_instruction
308 uses decoded (from PowerOp) function unit information from CSV files
309 to ascertain which Function Unit should deal with the current
312 some (such as OP_ATTN, OP_NOP) are dealt with here, including
313 ignoring it and halting the processor. OP_NOP is a bit annoying
314 because the issuer expects busy flag still to be raised then lowered.
315 (this requires a fake counter to be set).
317 comb
, sync
= m
.d
.comb
, m
.d
.sync
320 # indicate if core is busy
321 busy_o
= self
.o
.busy_o
322 any_busy_o
= self
.o
.any_busy_o
324 # connect up temporary copy of incoming instruction. the FSM will
325 # either blat the incoming instruction (if valid) into self.ireg
326 # or if the instruction could not be delivered, keep dropping the
327 # latched copy into ireg
328 ilatch
= self
.ispec()
329 self
.instr_active
= Signal()
331 # enable/busy-signals for each FU, get one bit for each FU (by name)
332 fu_enable
= Signal(len(fus
), reset_less
=True)
333 fu_busy
= Signal(len(fus
), reset_less
=True)
336 for i
, funame
in enumerate(fus
.keys()):
337 fu_bitdict
[funame
] = fu_enable
[i
]
338 fu_selected
[funame
] = fu_busy
[i
]
340 # identify function units and create a list by fnunit so that
341 # PriorityPickers can be created for selecting one of them that
342 # isn't busy at the time the incoming instruction needs passing on
343 by_fnunit
= defaultdict(list)
344 for fname
, member
in Function
.__members
__.items():
345 for funame
, fu
in fus
.items():
346 fnunit
= fu
.fnunit
.value
347 if member
.value
& fnunit
: # this FU handles this type of op
348 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
350 # ok now just print out the list of FUs by Function, because we can
351 for fname
, fu_list
in by_fnunit
.items():
352 print ("FUs by type", fname
, fu_list
)
354 # now create a PriorityPicker per FU-type such that only one
355 # non-busy FU will be picked
357 fu_found
= Signal() # take a note if no Function Unit was available
358 for fname
, fu_list
in by_fnunit
.items():
359 i_pp
= PriorityPicker(len(fu_list
))
360 m
.submodules
['i_pp_%s' % fname
] = i_pp
362 for i
, (funame
, fu
) in enumerate(fu_list
):
363 # match the decoded instruction (e.do.fn_unit) against the
364 # "capability" of this FU, gate that by whether that FU is
365 # busy, and drop that into the PriorityPicker.
366 # this will give us an output of the first available *non-busy*
367 # Function Unit (Reservation Statio) capable of handling this
369 fnunit
= fu
.fnunit
.value
370 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
371 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
372 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
&
374 i_l
.append(en_req
) # store in list for doing the Cat-trick
375 # picker output, gated by enable: store in fu_bitdict
376 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
377 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
378 comb
+= fu_bitdict
[funame
].eq(po
)
379 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
380 # if we don't do this, then when there are no FUs available,
381 # the "p.o_ready" signal will go back "ok we accepted this
382 # instruction" which of course isn't true.
383 with m
.If(i_pp
.en_o
):
384 comb
+= fu_found
.eq(1)
385 # for each input, Cat them together and drop them into the picker
386 comb
+= i_pp
.i
.eq(Cat(*i_l
))
388 # rdmask, which is for registers needs to come from the *main* decoder
389 for funame
, fu
in fus
.items():
390 rdmask
= get_rdflags(m
, self
.ireg
.e
, fu
)
391 comb
+= fu
.rdmaskn
.eq(~rdmask
)
393 # sigh - need a NOP counter
395 with m
.If(counter
!= 0):
396 sync
+= counter
.eq(counter
- 1)
399 # default to reading from incoming instruction: may be overridden
400 # by copy from latch when "waiting"
401 comb
+= self
.ireg
.eq(self
.i
)
402 # always say "ready" except if overridden
403 comb
+= self
.p
.o_ready
.eq(1)
406 with m
.State("READY"):
407 with m
.If(self
.p
.i_valid
): # run only when valid
408 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
409 # check for ATTN: halt if true
410 with m
.Case(MicrOp
.OP_ATTN
):
411 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
413 # fake NOP - this isn't really used (Issuer detects NOP)
414 with m
.Case(MicrOp
.OP_NOP
):
415 sync
+= counter
.eq(2)
419 comb
+= self
.instr_active
.eq(1)
420 comb
+= self
.p
.o_ready
.eq(0)
421 # connect instructions. only one enabled at a time
422 for funame
, fu
in fus
.items():
423 do
= self
.des
[funame
]
424 enable
= fu_bitdict
[funame
]
426 # run this FunctionUnit if enabled route op,
427 # issue, busy, read flags and mask to FU
429 # operand comes from the *local* decoder
430 # do not actually issue, though, if there
431 # is a waw hazard. decoder has to still
432 # be asserted in order to detect that, tho
433 comb
+= fu
.oper_i
.eq_from(do
)
435 # URRR this is truly dreadful.
436 # OP_FETCH_FAILED is a "fake" op.
437 # no instruction creates it. OP_TRAP
438 # uses the *main* decoder: this is
439 # a *Satellite* decoder that reacts
440 # on *insn_in*... not fake ops. gaah.
441 main_op
= self
.ireg
.e
.do
442 with m
.If(main_op
.insn_type
==
443 MicrOp
.OP_FETCH_FAILED
):
444 comb
+= fu
.oper_i
.insn_type
.eq(
445 MicrOp
.OP_FETCH_FAILED
)
446 comb
+= fu
.oper_i
.fn_unit
.eq(
448 # issue when valid (and no write-hazard)
449 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
450 # instruction ok, indicate ready
451 comb
+= self
.p
.o_ready
.eq(1)
453 if self
.allow_overlap
:
454 with m
.If(~fu_found | self
.waw_hazard
):
455 # latch copy of instruction
456 sync
+= ilatch
.eq(self
.i
)
457 comb
+= self
.p
.o_ready
.eq(1) # accept
461 with m
.State("WAITING"):
462 comb
+= self
.instr_active
.eq(1)
463 comb
+= self
.p
.o_ready
.eq(0)
465 # using copy of instruction, keep waiting until an FU is free
466 comb
+= self
.ireg
.eq(ilatch
)
467 with m
.If(fu_found
): # wait for conflict to clear
468 # connect instructions. only one enabled at a time
469 for funame
, fu
in fus
.items():
470 do
= self
.des
[funame
]
471 enable
= fu_bitdict
[funame
]
473 # run this FunctionUnit if enabled route op,
474 # issue, busy, read flags and mask to FU
476 # operand comes from the *local* decoder,
477 # which is asserted even if not issued,
478 # so that WaW-detection can check for hazards.
479 # only if the waw hazard is clear does the
480 # instruction actually get issued
481 comb
+= fu
.oper_i
.eq_from(do
)
483 comb
+= fu
.issue_i
.eq(~self
.waw_hazard
)
484 with m
.If(~self
.waw_hazard
):
485 comb
+= self
.p
.o_ready
.eq(1)
489 print ("core: overlap allowed", self
.allow_overlap
)
490 # true when any FU is busy (including the cycle where it is perhaps
491 # to be issued - because that's what fu_busy is)
492 comb
+= any_busy_o
.eq(fu_busy
.bool())
493 if not self
.allow_overlap
:
494 # for simple non-overlap, if any instruction is busy, set
495 # busy output for core.
496 comb
+= busy_o
.eq(any_busy_o
)
498 # sigh deal with a fun situation that needs to be investigated
500 with m
.If(self
.issue_conflict
):
502 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
503 # and do not allow overlap. these are all the ones that
504 # are non-forward-progressing: exceptions etc. that otherwise
505 # change CoreState for some reason (MSR, PC, SVSTATE)
506 for funame
, fu
in fus
.items():
507 if (funame
.lower().startswith('ldst') or
508 funame
.lower().startswith('branch') or
509 funame
.lower().startswith('mmu') or
510 funame
.lower().startswith('spr') or
511 funame
.lower().startswith('trap')):
512 with m
.If(fu
.busy_o
):
515 # return both the function unit "enable" dict as well as the "busy".
516 # the "busy-or-issued" can be passed in to the Read/Write port
517 # connecters to give them permission to request access to regfiles
518 return fu_bitdict
, fu_selected
520 def connect_rdport(self
, m
, fu_bitdict
, fu_selected
,
521 rdpickers
, regfile
, regname
, fspec
):
522 comb
, sync
= m
.d
.comb
, m
.d
.sync
528 # select the required read port. these are pre-defined sizes
529 rfile
= regs
.rf
[regfile
.lower()]
530 rport
= rfile
.r_ports
[rpidx
]
531 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
534 # for checking if the read port has an outstanding write
535 if self
.make_hazard_vecs
:
536 wv
= regs
.wv
[regfile
.lower()]
537 wvchk
= wv
.q_int
# write-vec bit-level hazard check
539 # if a hazard is detected on this read port, simply blithely block
540 # every FU from reading on it. this is complete overkill but very
542 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
545 if not isinstance(fspecs
, list):
551 for i
, fspec
in enumerate(fspecs
):
552 # get the regfile specs for this regfile port
553 print ("fpsec", i
, fspec
, len(fspec
.specs
))
554 name
= "%s_%s_%d" % (regfile
, regname
, i
)
555 ppoffs
.append(pplen
) # record offset for picker
556 pplen
+= len(fspec
.specs
)
557 rdflag
= Signal(name
="rdflag_"+name
, reset_less
=True)
558 comb
+= rdflag
.eq(fspec
.okflag
)
559 rdflags
.append(rdflag
)
561 print ("pplen", pplen
)
563 # create a priority picker to manage this port
564 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
565 m
.submodules
["rdpick_%s_%s" % (regfile
, rpidx
)] = rdpick
571 for i
, fspec
in enumerate(fspecs
):
572 (rf
, _read
, wid
, fuspecs
) = \
573 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
574 # connect up the FU req/go signals, and the reg-read to the FU
575 # and create a Read Broadcast Bus
576 for pi
, fuspec
in enumerate(fspec
.specs
):
577 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
579 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
580 fu_active
= fu_selected
[funame
]
581 fu_issued
= fu_bitdict
[funame
]
583 # get (or set up) a latched copy of read register number
584 # and (sigh) also the read-ok flag
585 # TODO: use nmutil latchregister
586 rhname
= "%s_%s_%d" % (regfile
, regname
, i
)
587 rdflag
= Signal(name
="rdflag_%s_%s" % (funame
, rhname
),
589 if rhname
not in fu
.rf_latches
:
590 rfl
= Signal(name
="rdflag_latch_%s_%s" % (funame
, rhname
))
591 fu
.rf_latches
[rhname
] = rfl
592 with m
.If(fu
.issue_i
):
593 sync
+= rfl
.eq(rdflags
[i
])
595 rfl
= fu
.rf_latches
[rhname
]
597 # now the register port
598 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, pi
)
599 read
= Signal
.like(_read
, name
="read_"+rname
)
600 if rname
not in fu
.rd_latches
:
601 rdl
= Signal
.like(_read
, name
="rdlatch_"+rname
)
602 fu
.rd_latches
[rname
] = rdl
603 with m
.If(fu
.issue_i
):
604 sync
+= rdl
.eq(_read
)
606 rdl
= fu
.rd_latches
[rname
]
608 # make the read immediately available on issue cycle
609 # after the read cycle, otherwies use the latched copy.
610 # this captures the regport and okflag on issue
611 with m
.If(fu
.issue_i
):
612 comb
+= read
.eq(_read
)
613 comb
+= rdflag
.eq(rdflags
[i
])
616 comb
+= rdflag
.eq(rfl
)
618 # connect request-read to picker input, and output to go-rd
619 addr_en
= Signal
.like(read
, name
="addr_en_"+name
)
620 pick
= Signal(name
="pick_"+name
) # picker input
621 rp
= Signal(name
="rp_"+name
) # picker output
622 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
623 rhazard
= Signal(name
="rhaz_"+name
)
625 # exclude any currently-enabled read-request (mask out active)
626 # entirely block anything hazarded from being picked
627 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflag
&
628 ~delay_pick
& ~rhazard
)
629 comb
+= rdpick
.i
[pi
].eq(pick
)
630 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
632 # if picked, select read-port "reg select" number to port
633 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
634 sync
+= delay_pick
.eq(rp
) # delayed "pick"
635 comb
+= addr_en
.eq(Mux(rp
, read
, 0))
637 # the read-enable happens combinatorially (see mux-bus below)
638 # but it results in the data coming out on a one-cycle delay.
642 addrs
.append(addr_en
)
645 # use the *delayed* pick signal to put requested data onto bus
646 with m
.If(delay_pick
):
647 # connect regfile port to input, creating fan-out Bus
649 print("reg connect widths",
650 regfile
, regname
, pi
, funame
,
651 src
.shape(), rport
.o_data
.shape())
652 # all FUs connect to same port
653 comb
+= src
.eq(rport
.o_data
)
655 if not self
.make_hazard_vecs
:
658 # read the write-hazard bitvector (wv) for any bit that is
659 wvchk_en
= Signal(len(wvchk
), name
="wv_chk_addr_en_"+name
)
660 issue_active
= Signal(name
="rd_iactive_"+name
)
661 # XXX combinatorial loop here
662 comb
+= issue_active
.eq(fu_active
& rdflag
)
663 with m
.If(issue_active
):
665 comb
+= wvchk_en
.eq(read
)
667 comb
+= wvchk_en
.eq(1<<read
)
668 # if FU is busy (which doesn't get set at the same time as
669 # issue) and no hazard was detected, clear wvchk_en (i.e.
670 # stop checking for hazards). there is a loop here, but it's
671 # via a DFF, so is ok. some linters may complain, but hey.
672 with m
.If(fu
.busy_o
& ~rhazard
):
673 comb
+= wvchk_en
.eq(0)
675 # read-hazard is ANDed with (filtered by) what is actually
677 comb
+= rhazard
.eq((wvchk
& wvchk_en
).bool())
679 wvens
.append(wvchk_en
)
681 # or-reduce the muxed read signals
683 # for unary-addressed
684 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
686 # for binary-addressed
687 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
688 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
689 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
691 if not self
.make_hazard_vecs
:
692 return Const(0) # declare "no hazards"
694 # enable the read bitvectors for this issued instruction
695 # and return whether any write-hazard bit is set
696 wvchk_and
= Signal(len(wvchk
), name
="wv_chk_"+name
)
697 comb
+= wvchk_and
.eq(wvchk
& ortreereduce_sig(wvens
))
698 comb
+= hazard_detected
.eq(wvchk_and
.bool())
699 return hazard_detected
701 def connect_rdports(self
, m
, fu_bitdict
, fu_selected
):
702 """connect read ports
704 orders the read regspecs into a dict-of-dicts, by regfile, by
705 regport name, then connects all FUs that want that regport by
706 way of a PriorityPicker.
708 comb
, sync
= m
.d
.comb
, m
.d
.sync
713 # dictionary of lists of regfile read ports
714 byregfiles_rdspec
= self
.get_byregfiles(m
, True)
716 # okaay, now we need a PriorityPicker per regfile per regfile port
717 # loootta pickers... peter piper picked a pack of pickled peppers...
719 for regfile
, fuspecs
in byregfiles_rdspec
.items():
720 rdpickers
[regfile
] = {}
722 # argh. an experiment to merge RA and RB in the INT regfile
723 # (we have too many read/write ports)
724 if self
.regreduce_en
:
726 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
727 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
728 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
729 if regfile
== 'FAST':
730 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
731 if 'fast2' in fuspecs
:
732 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
733 if 'fast3' in fuspecs
:
734 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
736 # for each named regfile port, connect up all FUs to that port
737 # also return (and collate) hazard detection)
738 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
739 print("connect rd", regname
, fspec
)
740 rh
= self
.connect_rdport(m
, fu_bitdict
, fu_selected
,
745 return Cat(*rd_hazard
).bool()
747 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
748 funame
, regname
, idx
,
749 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
751 """make_hazards: a setter and a clearer for the regfile write ports
753 setter is at issue time (using PowerDecoder2 regfile write numbers)
754 clearer is at regfile write time (when FU has said what to write to)
756 there is *one* unusual case here which has to be dealt with:
757 when the Function Unit does *NOT* request a write to the regfile
758 (has its data.ok bit CLEARED). this is perfectly legitimate.
761 comb
, sync
= m
.d
.comb
, m
.d
.sync
762 name
= "%s_%s_%d" % (funame
, regname
, idx
)
764 # connect up the bitvector write hazard. unlike the
765 # regfile writeports, a ONE must be written to the corresponding
766 # bit of the hazard bitvector (to indicate the existence of
769 # the detection of what shall be written to is based
770 # on *issue*. it is delayed by 1 cycle so that instructions
771 # "addi 5,5,0x2" do not cause combinatorial loops due to
772 # fake-dependency on *themselves*. this will totally fail
773 # spectacularly when doing multi-issue
774 print ("write vector (for regread)", regfile
, wvset
)
775 wviaddr_en
= Signal(len(wvset
), name
="wv_issue_addr_en_"+name
)
776 issue_active
= Signal(name
="iactive_"+name
)
777 sync
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
778 with m
.If(issue_active
):
780 comb
+= wviaddr_en
.eq(write
)
782 comb
+= wviaddr_en
.eq(1<<write
)
784 # deal with write vector clear: this kicks in when the regfile
785 # is written to, and clears the corresponding bitvector entry
786 print ("write vector", regfile
, wvclr
)
787 wvaddr_en
= Signal(len(wvclr
), name
="wvaddr_en_"+name
)
789 comb
+= wvaddr_en
.eq(addr_en
)
792 comb
+= wvaddr_en
.eq(1<<addr_en
)
794 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
795 # this may NOT be the case when an exception occurs
796 if isinstance(fu
, LDSTFunctionUnit
):
797 return wvaddr_en
, wviaddr_en
799 # okaaay, this is preparation for the awkward case.
800 # * latch a copy of wrflag when issue goes high.
801 # * when the fu_wrok (data.ok) flag is NOT set,
802 # but the FU is done, the FU is NEVER going to write
803 # so the bitvector has to be cleared.
804 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
805 with m
.If(~fu
.busy_o
):
806 sync
+= latch_wrflag
.eq(0)
807 with m
.If(fu
.issue_i
& fu_active
):
808 sync
+= latch_wrflag
.eq(wrflag
)
809 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
811 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
813 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
815 return wvaddr_en
, wviaddr_en
817 def connect_wrport(self
, m
, fu_bitdict
, fu_selected
,
818 wrpickers
, regfile
, regname
, fspec
):
819 comb
, sync
= m
.d
.comb
, m
.d
.sync
825 # select the required write port. these are pre-defined sizes
826 rfile
= regs
.rf
[regfile
.lower()]
827 wport
= rfile
.w_ports
[rpidx
]
829 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
830 print(regfile
, regs
.rf
.keys())
832 # select the write-protection hazard vector. note that this still
833 # requires to WRITE to the hazard bitvector! read-requests need
834 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
835 if self
.make_hazard_vecs
:
836 wv
= regs
.wv
[regfile
.lower()]
837 wvset
= wv
.s
# write-vec bit-level hazard ctrl
838 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
839 wvchk
= wv
.q
# write-after-write hazard check
842 if not isinstance(fspecs
, list):
849 for i
, fspec
in enumerate(fspecs
):
850 # get the regfile specs for this regfile port
851 (wf
, _write
, wid
, fuspecs
) = \
852 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
853 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspecs
))
854 ppoffs
.append(pplen
) # record offset for picker
855 pplen
+= len(fuspecs
)
857 name
= "%s_%s_%d" % (regfile
, regname
, i
)
858 wrflag
= Signal(name
="wr_flag_"+name
)
860 comb
+= wrflag
.eq(wf
)
863 wrflags
.append(wrflag
)
865 # create a priority picker to manage this port
866 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
867 m
.submodules
["wrpick_%s_%s" % (regfile
, rpidx
)] = wrpick
874 #wvens = [] - not needed: reading of writevec is permanently held hi
876 for i
, fspec
in enumerate(fspecs
):
877 # connect up the FU req/go signals and the reg-read to the FU
878 # these are arbitrated by Data.ok signals
879 (wf
, _write
, wid
, fuspecs
) = \
880 (fspec
.okflag
, fspec
.regport
, fspec
.wid
, fspec
.specs
)
881 for pi
, fuspec
in enumerate(fspec
.specs
):
882 (funame
, fu
, idx
) = (fuspec
.funame
, fuspec
.fu
, fuspec
.idx
)
883 fu_requested
= fu_bitdict
[funame
]
885 name
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
886 # get (or set up) a write-latched copy of write register number
887 write
= Signal
.like(_write
, name
="write_"+name
)
888 rname
= "%s_%s_%s_%d" % (funame
, regfile
, regname
, idx
)
889 if rname
not in fu
.wr_latches
:
890 wrl
= Signal
.like(_write
, name
="wrlatch_"+rname
)
891 fu
.wr_latches
[rname
] = write
892 # do not depend on fu.issue_i here, it creates a
893 # combinatorial loop on waw checking. using the FU
894 # "enable" bitdict entry for this FU is sufficient,
895 # because the PowerDecoder2 read/write nums are
896 # valid continuously when the instruction is valid
897 with m
.If(fu_requested
):
898 sync
+= wrl
.eq(_write
)
899 comb
+= write
.eq(_write
)
901 comb
+= write
.eq(wrl
)
903 write
= fu
.wr_latches
[rname
]
905 # write-request comes from dest.ok
906 dest
= fu
.get_out(idx
)
907 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
908 name
= "%s_%s_%d" % (funame
, regname
, idx
)
909 fu_wrok
= Signal(name
="fu_wrok_"+name
, reset_less
=True)
910 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
912 # connect request-write to picker input, and output to go-wr
913 fu_active
= fu_selected
[funame
]
914 pick
= fu
.wr
.rel_o
[idx
] & fu_active
915 comb
+= wrpick
.i
[pi
].eq(pick
)
916 # create a single-pulse go write from the picker output
917 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
918 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
919 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
921 # connect the regspec write "reg select" number to this port
922 # only if one FU actually requests (and is granted) the port
923 # will the write-enable be activated
924 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
925 addr_en
= Signal
.like(write
, name
=wname
)
927 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
928 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
932 addrs
.append(addr_en
)
935 # connect regfile port to input
936 print("reg connect widths",
937 regfile
, regname
, pi
, funame
,
938 dest
.shape(), wport
.i_data
.shape())
939 wsigs
.append(fu_dest_latch
)
941 # now connect up the bitvector write hazard
942 if not self
.make_hazard_vecs
:
944 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
945 funame
, regname
, idx
,
946 addr_en
, wp
, fu
, fu_active
,
947 wrflags
[i
], write
, fu_wrok
)
948 wvaddr_en
, wv_issue_en
= res
949 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
950 wvseten
.append(wv_issue_en
) # set data same as enable
952 # read the write-hazard bitvector (wv) for any bit that is
953 fu_requested
= fu_bitdict
[funame
]
954 wvchk_en
= Signal(len(wvchk
), name
="waw_chk_addr_en_"+name
)
955 issue_active
= Signal(name
="waw_iactive_"+name
)
956 whazard
= Signal(name
="whaz_"+name
)
958 # XXX EEK! STATE regfile (branch) does not have an
959 # write-active indicator in regspec_decode_write()
960 print ("XXX FIXME waw_iactive", issue_active
,
963 # check bits from the incoming instruction. note (back
964 # in connect_instruction) that the decoder is held for
965 # us to be able to do this, here... *without* issue being
966 # held HI. we MUST NOT gate this with fu.issue_i or
967 # with fu_bitdict "enable": it would create a loop
968 comb
+= issue_active
.eq(wf
)
969 with m
.If(issue_active
):
971 comb
+= wvchk_en
.eq(write
)
973 comb
+= wvchk_en
.eq(1<<write
)
974 # if FU is busy (which doesn't get set at the same time as
975 # issue) and no hazard was detected, clear wvchk_en (i.e.
976 # stop checking for hazards). there is a loop here, but it's
977 # via a DFF, so is ok. some linters may complain, but hey.
978 with m
.If(fu
.busy_o
& ~whazard
):
979 comb
+= wvchk_en
.eq(0)
981 # write-hazard is ANDed with (filtered by) what is actually
982 # being requested. the wvchk data is on a one-clock delay,
983 # and wvchk_en comes directly from the main decoder
984 comb
+= whazard
.eq((wvchk
& wvchk_en
).bool())
986 comb
+= fu
._waw
_hazard
.eq(1)
988 #wvens.append(wvchk_en)
990 # here is where we create the Write Broadcast Bus. simple, eh?
991 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
993 # for unary-addressed
994 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
996 # for binary-addressed
997 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
998 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
1000 if not self
.make_hazard_vecs
:
1003 # return these here rather than set wvclr/wvset directly,
1004 # because there may be more than one write-port to a given
1005 # regfile. example: XER has a write-port for SO, CA, and OV
1006 # and the *last one added* of those would overwrite the other
1007 # two. solution: have connect_wrports collate all the
1008 # or-tree-reduced bitvector set/clear requests and drop them
1009 # in as a single "thing". this can only be done because the
1010 # set/get is an unary bitvector.
1011 print ("make write-vecs", regfile
, regname
, wvset
, wvclr
)
1012 return (wvclren
, # clear (regfile write)
1013 wvseten
) # set (issue time)
1015 def connect_wrports(self
, m
, fu_bitdict
, fu_selected
):
1016 """connect write ports
1018 orders the write regspecs into a dict-of-dicts, by regfile,
1019 by regport name, then connects all FUs that want that regport
1020 by way of a PriorityPicker.
1022 note that the write-port wen, write-port data, and go_wr_i all need to
1023 be on the exact same clock cycle. as there is a combinatorial loop bug
1024 at the moment, these all use sync.
1026 comb
, sync
= m
.d
.comb
, m
.d
.sync
1029 # dictionary of lists of regfile write ports
1030 byregfiles_wrspec
= self
.get_byregfiles(m
, False)
1032 # same for write ports.
1033 # BLECH! complex code-duplication! BLECH!
1035 wvclrers
= defaultdict(list)
1036 wvseters
= defaultdict(list)
1037 for regfile
, fuspecs
in byregfiles_wrspec
.items():
1038 wrpickers
[regfile
] = {}
1040 if self
.regreduce_en
:
1041 # argh, more port-merging
1042 if regfile
== 'INT':
1043 fuspecs
['o'] = [fuspecs
.pop('o')]
1044 fuspecs
['o'].append(fuspecs
.pop('o1'))
1045 if regfile
== 'FAST':
1046 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
1047 if 'fast2' in fuspecs
:
1048 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
1049 if 'fast3' in fuspecs
:
1050 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
1052 # collate these and record them by regfile because there
1053 # are sometimes more write-ports per regfile
1054 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
1055 wvclren
, wvseten
= self
.connect_wrport(m
,
1056 fu_bitdict
, fu_selected
,
1058 regfile
, regname
, fspec
)
1059 wvclrers
[regfile
.lower()] += wvclren
1060 wvseters
[regfile
.lower()] += wvseten
1062 if not self
.make_hazard_vecs
:
1065 # for write-vectors: reduce the clr-ers and set-ers down to
1066 # a single set of bits. otherwise if there are two write
1067 # ports (on some regfiles), the last one doing comb += on
1068 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1069 # whoops). if there was only one write-port per wv regfile this would
1071 for regfile
in wvclrers
.keys():
1072 wv
= regs
.wv
[regfile
]
1073 wvset
= wv
.s
# write-vec bit-level hazard ctrl
1074 wvclr
= wv
.r
# write-vec bit-level hazard ctrl
1075 wvclren
= wvclrers
[regfile
]
1076 wvseten
= wvseters
[regfile
]
1077 comb
+= wvclr
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
1078 comb
+= wvset
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
1080 def get_byregfiles(self
, m
, readmode
):
1082 mode
= "read" if readmode
else "write"
1085 e
= self
.ireg
.e
# decoded instruction to execute
1087 # dictionary of dictionaries of lists/tuples of regfile ports.
1088 # first key: regfile. second key: regfile port name
1089 byregfiles_spec
= defaultdict(dict)
1091 for (funame
, fu
) in fus
.items():
1092 # create in each FU a receptacle for the read/write register
1093 # hazard numbers (and okflags for read). to be latched in
1094 # connect_rd/write_ports
1096 fu
.rd_latches
= {} # read reg number latches
1097 fu
.rf_latches
= {} # read flag latches
1101 # construct regfile specs: read uses inspec, write outspec
1102 print("%s ports for %s" % (mode
, funame
))
1103 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
1104 (regfile
, regname
, wid
) = fu
.get_io_spec(readmode
, idx
)
1105 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
1107 # the PowerDecoder2 (main one, not the satellites) contains
1108 # the decoded regfile numbers. obtain these now
1109 decinfo
= regspec_decode(m
, readmode
, e
, regfile
, regname
)
1110 okflag
, regport
= decinfo
.okflag
, decinfo
.regport
1112 # construct the dictionary of regspec information by regfile
1113 if regname
not in byregfiles_spec
[regfile
]:
1114 byregfiles_spec
[regfile
][regname
] = \
1115 ByRegSpec(okflag
, regport
, wid
, [])
1117 # here we start to create "lanes" where each Function Unit
1118 # requiring access to a given [single-contended resource]
1119 # regfile port is appended to a list, so that PriorityPickers
1120 # can be created to give uncontested access to it
1121 fuspec
= FUSpec(funame
, fu
, idx
)
1122 byregfiles_spec
[regfile
][regname
].specs
.append(fuspec
)
1124 # ok just print that all out, for convenience
1125 for regfile
, fuspecs
in byregfiles_spec
.items():
1126 print("regfile %s ports:" % mode
, regfile
)
1127 for regname
, fspec
in fuspecs
.items():
1128 [okflag
, regport
, wid
, fuspecs
] = fspec
1129 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
1130 print(" %s" % regname
, wid
, okflag
, regport
)
1131 for (funame
, fu
, idx
) in fuspecs
:
1132 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
1133 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
1136 return byregfiles_spec
1139 yield from self
.fus
.ports()
1140 yield from self
.i
.e
.ports()
1141 yield from self
.l0
.ports()
1148 if __name__
== '__main__':
1149 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
1155 dut
= NonProductionCore(pspec
)
1156 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
1157 with
open("test_core.il", "w") as f
: