3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
26 from openpower
.decoder
.power_regspec_map
import regspec_decode_read
27 from openpower
.decoder
.power_regspec_map
import regspec_decode_write
28 from openpower
.sv
.svp64
import SVP64Rec
30 from nmutil
.picker
import PriorityPicker
31 from nmutil
.util
import treereduce
32 from nmutil
.singlepipe
import ControlBase
34 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
, LDSTFunctionUnit
35 from soc
.regfile
.regfiles
import RegFiles
36 from openpower
.decoder
.power_decoder2
import get_rdflags
37 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
38 from soc
.config
.test
.test_loadstore
import TestMemPspec
39 from openpower
.decoder
.power_enums
import MicrOp
, Function
40 from soc
.simple
.core_data
import CoreInput
, CoreOutput
42 from collections
import defaultdict
45 from nmutil
.util
import rising_edge
48 # helper function for reducing a list of signals down to a parallel
50 def ortreereduce(tree
, attr
="o_data"):
51 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
54 def ortreereduce_sig(tree
):
55 return treereduce(tree
, operator
.or_
, lambda x
: x
)
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs
):
61 for (regname
, fspec
) in fuspecs
.items():
62 if regname
.startswith("full"):
63 res
.append((regname
, fspec
))
64 for (regname
, fspec
) in fuspecs
.items():
65 if not regname
.startswith("full"):
66 res
.append((regname
, fspec
))
67 return res
# enumerate(res)
70 # derive from ControlBase rather than have a separate Stage instance,
71 # this is simpler to do
72 class NonProductionCore(ControlBase
):
73 def __init__(self
, pspec
):
76 # test is SVP64 is to be enabled
77 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
79 # test to see if regfile ports should be reduced
80 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
81 (pspec
.regreduce
== True))
83 # test to see if overlapping of instructions is allowed
84 # (not normally enabled for TestIssuer FSM but useful for checking
85 # the bitvector hazard detection, before doing In-Order)
86 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
87 (pspec
.allow_overlap
== True))
90 self
.make_hazard_vecs
= True
91 self
.core_type
= "fsm"
92 if hasattr(pspec
, "core_type"):
93 self
.core_type
= pspec
.core_type
95 super().__init
__(stage
=self
)
97 # single LD/ST funnel for memory access
98 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
101 # function units (only one each)
102 # only include mmu if enabled in pspec
103 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
105 # link LoadStore1 into MMU
106 mmu
= self
.fus
.get_fu('mmu0')
107 print ("core pspec", pspec
.ldst_ifacetype
)
108 print ("core mmu", mmu
)
110 print ("core lsmem.lsi", l0
.cmpi
.lsmem
.lsi
)
111 mmu
.alu
.set_ldst_interface(l0
.cmpi
.lsmem
.lsi
)
113 # register files (yes plural)
114 self
.regs
= RegFiles(pspec
, make_hazard_vecs
=self
.make_hazard_vecs
)
116 # set up input and output: unusual requirement to set data directly
117 # (due to the way that the core is set up in a different domain,
118 # see TestIssuer.setup_peripherals
119 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
120 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
122 # actual internal input data used (captured)
123 self
.ireg
= self
.ispec()
125 # create per-FU instruction decoders (subsetted). these "satellite"
126 # decoders reduce wire fan-out from the one (main) PowerDecoder2
127 # (used directly by the trap unit) to the *twelve* (or more)
128 # Function Units. we can either have 32 wires (the instruction)
129 # to each, or we can have well over a 200 wire fan-out (to 12
130 # ALUs). it's an easy choice to make.
134 for funame
, fu
in self
.fus
.fus
.items():
135 f_name
= fu
.fnunit
.name
136 fnunit
= fu
.fnunit
.value
137 opkls
= fu
.opsubsetkls
139 # TRAP decoder is the *main* decoder
140 self
.trapunit
= funame
142 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
144 state
=self
.ireg
.state
,
145 svp64_en
=self
.svp64_en
,
146 regreduce_en
=self
.regreduce_en
)
147 self
.des
[funame
] = self
.decoders
[funame
].do
149 # share the SPR decoder with the MMU if it exists
150 if "mmu0" in self
.decoders
:
151 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
153 # next 3 functions are Stage API Compliance
154 def setup(self
, m
, i
):
158 return CoreInput(self
.pspec
, self
.svp64_en
, self
.regreduce_en
)
163 # elaborate function to create HDL
164 def elaborate(self
, platform
):
165 m
= super().elaborate(platform
)
167 # for testing purposes, to cut down on build time in coriolis2
168 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
169 x
= Signal() # dummy signal
174 m
.submodules
.fus
= self
.fus
175 m
.submodules
.l0
= l0
= self
.l0
176 self
.regs
.elaborate_into(m
, platform
)
180 # connect up temporary copy of incoming instruction
181 print ("connect ireg, i", self
.ireg
, self
.i
)
182 comb
+= self
.ireg
.eq(self
.i
)
185 self
.connect_satellite_decoders(m
)
187 # ssh, cheat: trap uses the main decoder because of the rewriting
188 self
.des
[self
.trapunit
] = self
.ireg
.e
.do
190 # connect up Function Units, then read/write ports, and hazard conflict
191 issue_conflict
= Signal()
192 fu_bitdict
, fu_selected
= self
.connect_instruction(m
, issue_conflict
)
193 raw_hazard
= self
.connect_rdports(m
, fu_selected
)
194 self
.connect_wrports(m
, fu_selected
)
195 comb
+= issue_conflict
.eq(raw_hazard
)
197 # note if an exception happened. in a pipelined or OoO design
198 # this needs to be accompanied by "shadowing" (or stalling)
200 for exc
in self
.fus
.excs
.values():
201 el
.append(exc
.happened
)
202 if len(el
) > 0: # at least one exception
203 comb
+= self
.o
.exc_happened
.eq(Cat(*el
).bool())
207 def connect_satellite_decoders(self
, m
):
209 for k
, v
in self
.decoders
.items():
210 # connect each satellite decoder and give it the instruction.
211 # as subset decoders this massively reduces wire fanout given
212 # the large number of ALUs
213 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
214 comb
+= v
.dec
.raw_opcode_in
.eq(self
.ireg
.raw_insn_i
)
215 comb
+= v
.dec
.bigendian
.eq(self
.ireg
.bigendian_i
)
216 # sigh due to SVP64 RA_OR_ZERO detection connect these too
217 comb
+= v
.sv_a_nz
.eq(self
.ireg
.sv_a_nz
)
219 comb
+= v
.pred_sm
.eq(self
.ireg
.sv_pred_sm
)
220 comb
+= v
.pred_dm
.eq(self
.ireg
.sv_pred_dm
)
221 if k
!= self
.trapunit
:
222 comb
+= v
.sv_rm
.eq(self
.ireg
.sv_rm
) # pass through SVP64 RM
223 comb
+= v
.is_svp64_mode
.eq(self
.ireg
.is_svp64_mode
)
224 # only the LDST PowerDecodeSubset *actually* needs to
225 # know to use the alternative decoder. this is all
227 if k
.lower().startswith("ldst"):
228 comb
+= v
.use_svp64_ldst_dec
.eq(
229 self
.ireg
.use_svp64_ldst_dec
)
231 def connect_instruction(self
, m
, issue_conflict
):
232 """connect_instruction
234 uses decoded (from PowerOp) function unit information from CSV files
235 to ascertain which Function Unit should deal with the current
238 some (such as OP_ATTN, OP_NOP) are dealt with here, including
239 ignoring it and halting the processor. OP_NOP is a bit annoying
240 because the issuer expects busy flag still to be raised then lowered.
241 (this requires a fake counter to be set).
243 comb
, sync
= m
.d
.comb
, m
.d
.sync
246 # indicate if core is busy
247 busy_o
= self
.o
.busy_o
249 # enable/busy-signals for each FU, get one bit for each FU (by name)
250 fu_enable
= Signal(len(fus
), reset_less
=True)
251 fu_busy
= Signal(len(fus
), reset_less
=True)
254 for i
, funame
in enumerate(fus
.keys()):
255 fu_bitdict
[funame
] = fu_enable
[i
]
256 fu_selected
[funame
] = fu_busy
[i
]
258 # identify function units and create a list by fnunit so that
259 # PriorityPickers can be created for selecting one of them that
260 # isn't busy at the time the incoming instruction needs passing on
261 by_fnunit
= defaultdict(list)
262 for fname
, member
in Function
.__members
__.items():
263 for funame
, fu
in fus
.items():
264 fnunit
= fu
.fnunit
.value
265 if member
.value
& fnunit
: # this FU handles this type of op
266 by_fnunit
[fname
].append((funame
, fu
)) # add by Function
268 # ok now just print out the list of FUs by Function, because we can
269 for fname
, fu_list
in by_fnunit
.items():
270 print ("FUs by type", fname
, fu_list
)
272 # now create a PriorityPicker per FU-type such that only one
273 # non-busy FU will be picked
275 fu_found
= Signal() # take a note if no Function Unit was available
276 for fname
, fu_list
in by_fnunit
.items():
277 i_pp
= PriorityPicker(len(fu_list
))
278 m
.submodules
['i_pp_%s' % fname
] = i_pp
280 for i
, (funame
, fu
) in enumerate(fu_list
):
281 # match the decoded instruction (e.do.fn_unit) against the
282 # "capability" of this FU, gate that by whether that FU is
283 # busy, and drop that into the PriorityPicker.
284 # this will give us an output of the first available *non-busy*
285 # Function Unit (Reservation Statio) capable of handling this
287 fnunit
= fu
.fnunit
.value
288 en_req
= Signal(name
="issue_en_%s" % funame
, reset_less
=True)
289 fnmatch
= (self
.ireg
.e
.do
.fn_unit
& fnunit
).bool()
290 comb
+= en_req
.eq(fnmatch
& ~fu
.busy_o
& self
.p
.i_valid
)
291 i_l
.append(en_req
) # store in list for doing the Cat-trick
292 # picker output, gated by enable: store in fu_bitdict
293 po
= Signal(name
="o_issue_pick_"+funame
) # picker output
294 comb
+= po
.eq(i_pp
.o
[i
] & i_pp
.en_o
)
295 comb
+= fu_bitdict
[funame
].eq(po
)
296 comb
+= fu_selected
[funame
].eq(fu
.busy_o | po
)
297 # if we don't do this, then when there are no FUs available,
298 # the "p.o_ready" signal will go back "ok we accepted this
299 # instruction" which of course isn't true.
300 with m
.If(~issue_conflict
& i_pp
.en_o
):
301 comb
+= fu_found
.eq(1)
302 # for each input, Cat them together and drop them into the picker
303 comb
+= i_pp
.i
.eq(Cat(*i_l
))
305 # sigh - need a NOP counter
307 with m
.If(counter
!= 0):
308 sync
+= counter
.eq(counter
- 1)
311 with m
.If(self
.p
.i_valid
): # run only when valid
312 with m
.Switch(self
.ireg
.e
.do
.insn_type
):
313 # check for ATTN: halt if true
314 with m
.Case(MicrOp
.OP_ATTN
):
315 m
.d
.sync
+= self
.o
.core_terminate_o
.eq(1)
317 # fake NOP - this isn't really used (Issuer detects NOP)
318 with m
.Case(MicrOp
.OP_NOP
):
319 sync
+= counter
.eq(2)
323 # connect up instructions. only one enabled at a time
324 for funame
, fu
in fus
.items():
325 do
= self
.des
[funame
]
326 enable
= fu_bitdict
[funame
]
328 # run this FunctionUnit if enabled
329 # route op, issue, busy, read flags and mask to FU
331 # operand comes from the *local* decoder
332 comb
+= fu
.oper_i
.eq_from(do
)
333 comb
+= fu
.issue_i
.eq(1) # issue when input valid
334 # rdmask, which is for registers, needs to come
335 # from the *main* decoder
336 rdmask
= get_rdflags(self
.ireg
.e
, fu
)
337 comb
+= fu
.rdmaskn
.eq(~rdmask
)
339 print ("core: overlap allowed", self
.allow_overlap
)
340 if not self
.allow_overlap
:
341 # for simple non-overlap, if any instruction is busy, set
342 # busy output for core.
343 busys
= map(lambda fu
: fu
.busy_o
, fus
.values())
344 comb
+= busy_o
.eq(Cat(*busys
).bool())
346 # for the overlap case, only set busy if an FU is not found,
347 # and an FU will not be found if the write hazards are blocked
348 comb
+= busy_o
.eq(~fu_found | issue_conflict
)
350 # ready/valid signalling. if busy, means refuse incoming issue.
351 # also, if there was no fu found we must not send back a valid
352 # indicator. BUT, of course, when there is no instruction
353 # we must ignore the fu_found flag, otherwise o_ready will never
354 # be set when everything is idle
355 comb
+= self
.p
.o_ready
.eq(fu_found | ~self
.p
.i_valid
)
357 # return both the function unit "enable" dict as well as the "busy".
358 # the "busy-or-issued" can be passed in to the Read/Write port
359 # connecters to give them permission to request access to regfiles
360 return fu_bitdict
, fu_selected
362 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
363 comb
, sync
= m
.d
.comb
, m
.d
.sync
369 # select the required read port. these are pre-defined sizes
370 rfile
= regs
.rf
[regfile
.lower()]
371 rport
= rfile
.r_ports
[rpidx
]
372 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
375 # for checking if the read port has an outstanding write
376 if self
.make_hazard_vecs
:
377 wv
= regs
.wv
[regfile
.lower()]
378 wvchk
= wv
.r_ports
["issue"] # write-vec bit-level hazard check
381 if not isinstance(fspecs
, list):
388 for i
, fspec
in enumerate(fspecs
):
389 # get the regfile specs for this regfile port
390 (rf
, wf
, read
, write
, wid
, fuspec
) = fspec
391 print ("fpsec", i
, fspec
, len(fuspec
))
392 ppoffs
.append(pplen
) # record offset for picker
394 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
395 rdflag
= Signal(name
=name
, reset_less
=True)
396 comb
+= rdflag
.eq(rf
)
397 rdflags
.append(rdflag
)
400 print ("pplen", pplen
)
402 # create a priority picker to manage this port
403 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
404 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
410 for i
, fspec
in enumerate(fspecs
):
411 (rf
, wf
, read
, write
, wid
, fuspec
) = fspec
412 # connect up the FU req/go signals, and the reg-read to the FU
413 # and create a Read Broadcast Bus
414 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
417 # connect request-read to picker input, and output to go-rd
418 fu_active
= fu_bitdict
[funame
]
419 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
420 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
421 pick
= Signal(name
="pick_"+name
) # picker input
422 rp
= Signal(name
="rp_"+name
) # picker output
423 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
425 # exclude any currently-enabled read-request (mask out active)
426 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
428 comb
+= rdpick
.i
[pi
].eq(pick
)
429 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
431 # if picked, select read-port "reg select" number to port
432 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
433 sync
+= delay_pick
.eq(rp
) # delayed "pick"
434 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
436 # the read-enable happens combinatorially (see mux-bus below)
437 # but it results in the data coming out on a one-cycle delay.
441 addrs
.append(addr_en
)
444 # use the *delayed* pick signal to put requested data onto bus
445 with m
.If(delay_pick
):
446 # connect regfile port to input, creating fan-out Bus
448 print("reg connect widths",
449 regfile
, regname
, pi
, funame
,
450 src
.shape(), rport
.o_data
.shape())
451 # all FUs connect to same port
452 comb
+= src
.eq(rport
.o_data
)
454 if not self
.make_hazard_vecs
:
457 # read the write-hazard bitvector (wv) for any bit that is
458 wvchk_en
= Signal(len(wvchk
.ren
), name
="wv_chk_addr_en_"+name
)
459 issue_active
= Signal(name
="rd_iactive_"+name
)
460 comb
+= issue_active
.eq(fu
.issue_i
& rdflags
[i
])
461 with m
.If(issue_active
):
463 comb
+= wvchk_en
.eq(reads
[i
])
465 comb
+= wvchk_en
.eq(1<<reads
[i
])
466 wvens
.append(wvchk_en
)
468 # or-reduce the muxed read signals
470 # for unary-addressed
471 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
473 # for binary-addressed
474 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
475 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
476 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
478 if not self
.make_hazard_vecs
:
479 return Const(0) # declare "no hazards"
481 # enable the read bitvectors for this issued instruction
482 # and return whether any write-hazard bit is set
483 comb
+= wvchk
.ren
.eq(ortreereduce_sig(wvens
))
484 hazard_detected
= Signal(name
="raw_%s_%s" % (regfile
, rpidx
))
485 comb
+= hazard_detected
.eq(wvchk
.o_data
.bool())
486 return hazard_detected
488 def connect_rdports(self
, m
, fu_bitdict
):
489 """connect read ports
491 orders the read regspecs into a dict-of-dicts, by regfile, by
492 regport name, then connects all FUs that want that regport by
493 way of a PriorityPicker.
495 comb
, sync
= m
.d
.comb
, m
.d
.sync
500 # dictionary of lists of regfile read ports
501 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
503 # okaay, now we need a PriorityPicker per regfile per regfile port
504 # loootta pickers... peter piper picked a pack of pickled peppers...
506 for regfile
, spec
in byregfiles_rd
.items():
507 fuspecs
= byregfiles_rdspec
[regfile
]
508 rdpickers
[regfile
] = {}
510 # argh. an experiment to merge RA and RB in the INT regfile
511 # (we have too many read/write ports)
512 if self
.regreduce_en
:
514 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
515 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
516 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
517 if regfile
== 'FAST':
518 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
519 if 'fast2' in fuspecs
:
520 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
521 if 'fast3' in fuspecs
:
522 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
524 # for each named regfile port, connect up all FUs to that port
525 # also return (and collate) hazard detection)
526 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
527 print("connect rd", regname
, fspec
)
528 rh
= self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
532 return Cat(*rd_hazard
).bool()
534 def make_hazards(self
, m
, regfile
, rfile
, wvclr
, wvset
,
535 funame
, regname
, idx
,
536 addr_en
, wp
, fu
, fu_active
, wrflag
, write
,
538 """make_hazards: a setter and a clearer for the regfile write ports
540 setter is at issue time (using PowerDecoder2 regfile write numbers)
541 clearer is at regfile write time (when FU has said what to write to)
543 there is *one* unusual case here which has to be dealt with:
544 when the Function Unit does *NOT* request a write to the regfile
545 (has its data.ok bit CLEARED). this is perfectly legitimate.
548 comb
, sync
= m
.d
.comb
, m
.d
.sync
549 name
= "%s_%s_%d" % (funame
, regname
, idx
)
551 # connect up the bitvector write hazard. unlike the
552 # regfile writeports, a ONE must be written to the corresponding
553 # bit of the hazard bitvector (to indicate the existence of
556 # the detection of what shall be written to is based
558 print ("write vector (for regread)", regfile
, wvset
)
559 wviaddr_en
= Signal(len(wvset
.wen
), name
="wv_issue_addr_en_"+name
)
560 issue_active
= Signal(name
="iactive_"+name
)
561 comb
+= issue_active
.eq(fu
.issue_i
& fu_active
& wrflag
)
562 with m
.If(issue_active
):
564 comb
+= wviaddr_en
.eq(write
)
566 comb
+= wviaddr_en
.eq(1<<write
)
568 # deal with write vector clear: this kicks in when the regfile
569 # is written to, and clears the corresponding bitvector entry
570 print ("write vector", regfile
, wvclr
)
571 wvaddr_en
= Signal(len(wvclr
.wen
), name
="wvaddr_en_"+name
)
573 comb
+= wvaddr_en
.eq(addr_en
)
576 comb
+= wvaddr_en
.eq(1<<addr_en
)
578 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
579 # this may NOT be the case when an exception occurs
580 if isinstance(fu
, LDSTFunctionUnit
):
581 return wvaddr_en
, wviaddr_en
583 # okaaay, this is preparation for the awkward case.
584 # * latch a copy of wrflag when issue goes high.
585 # * when the fu_wrok (data.ok) flag is NOT set,
586 # but the FU is done, the FU is NEVER going to write
587 # so the bitvector has to be cleared.
588 latch_wrflag
= Signal(name
="latch_wrflag_"+name
)
589 with m
.If(~fu
.busy_o
):
590 sync
+= latch_wrflag
.eq(0)
591 with m
.If(fu
.issue_i
& fu_active
):
592 sync
+= latch_wrflag
.eq(wrflag
)
593 with m
.If(fu
.alu_done_o
& latch_wrflag
& ~fu_wrok
):
595 comb
+= wvaddr_en
.eq(write
) # addr_en gated with wp, don't use
597 comb
+= wvaddr_en
.eq(1<<addr_en
) # binary addr_en not gated
599 return wvaddr_en
, wviaddr_en
601 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
602 comb
, sync
= m
.d
.comb
, m
.d
.sync
608 # select the required write port. these are pre-defined sizes
609 rfile
= regs
.rf
[regfile
.lower()]
610 wport
= rfile
.w_ports
[rpidx
]
612 print("connect wr", regname
, "unary", rfile
.unary
, fspec
)
613 print(regfile
, regs
.rf
.keys())
615 # select the write-protection hazard vector. note that this still
616 # requires to WRITE to the hazard bitvector! read-requests need
617 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
618 if self
.make_hazard_vecs
:
619 wv
= regs
.wv
[regfile
.lower()]
620 wvset
= wv
.w_ports
["set"] # write-vec bit-level hazard ctrl
621 wvclr
= wv
.w_ports
["clr"] # write-vec bit-level hazard ctrl
624 if not isinstance(fspecs
, list):
632 for i
, fspec
in enumerate(fspecs
):
633 # get the regfile specs for this regfile port
634 (rf
, wf
, read
, write
, wid
, fuspec
) = fspec
635 print ("fpsec", i
, "wrflag", wf
, fspec
, len(fuspec
))
636 ppoffs
.append(pplen
) # record offset for picker
639 name
= "%s_%s_%d" % (regfile
, regname
, i
)
640 rdflag
= Signal(name
="rd_flag_"+name
)
641 wrflag
= Signal(name
="wr_flag_"+name
)
643 comb
+= rdflag
.eq(rf
)
647 comb
+= wrflag
.eq(wf
)
650 rdflags
.append(rdflag
)
651 wrflags
.append(wrflag
)
653 # create a priority picker to manage this port
654 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
655 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
663 for i
, fspec
in enumerate(fspecs
):
664 # connect up the FU req/go signals and the reg-read to the FU
665 # these are arbitrated by Data.ok signals
666 (rf
, wf
, read
, _write
, wid
, fuspec
) = fspec
667 wrname
= "write_%s_%s_%d" % (regfile
, regname
, i
)
668 write
= Signal
.like(_write
, name
=wrname
)
669 comb
+= write
.eq(_write
)
670 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
673 # write-request comes from dest.ok
674 dest
= fu
.get_out(idx
)
675 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
676 name
= "fu_wrok_%s_%s_%d" % (funame
, regname
, idx
)
677 fu_wrok
= Signal(name
=name
, reset_less
=True)
678 comb
+= fu_wrok
.eq(dest
.ok
& fu
.busy_o
)
680 # connect request-write to picker input, and output to go-wr
681 fu_active
= fu_bitdict
[funame
]
682 pick
= fu
.wr
.rel_o
[idx
] & fu_active
683 comb
+= wrpick
.i
[pi
].eq(pick
)
684 # create a single-pulse go write from the picker output
685 wr_pick
= Signal(name
="wpick_%s_%s_%d" % (funame
, regname
, idx
))
686 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
687 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
689 # connect the regspec write "reg select" number to this port
690 # only if one FU actually requests (and is granted) the port
691 # will the write-enable be activated
692 wname
= "waddr_en_%s_%s_%d" % (funame
, regname
, idx
)
693 addr_en
= Signal
.like(write
, name
=wname
)
695 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
696 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
700 addrs
.append(addr_en
)
703 # connect regfile port to input
704 print("reg connect widths",
705 regfile
, regname
, pi
, funame
,
706 dest
.shape(), wport
.i_data
.shape())
707 wsigs
.append(fu_dest_latch
)
709 # now connect up the bitvector write hazard
710 if not self
.make_hazard_vecs
:
712 res
= self
.make_hazards(m
, regfile
, rfile
, wvclr
, wvset
,
713 funame
, regname
, idx
,
714 addr_en
, wp
, fu
, fu_active
,
715 wrflags
[i
], write
, fu_wrok
)
716 wvaddr_en
, wv_issue_en
= res
717 wvclren
.append(wvaddr_en
) # set only: no data => clear bit
718 wvseten
.append(wv_issue_en
) # set data same as enable
719 wvsets
.append(wv_issue_en
) # because enable needs a 1
721 # here is where we create the Write Broadcast Bus. simple, eh?
722 comb
+= wport
.i_data
.eq(ortreereduce_sig(wsigs
))
724 # for unary-addressed
725 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
727 # for binary-addressed
728 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
729 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
731 if not self
.make_hazard_vecs
:
735 comb
+= wvclr
.wen
.eq(ortreereduce_sig(wvclren
)) # clear (regfile write)
736 comb
+= wvset
.wen
.eq(ortreereduce_sig(wvseten
)) # set (issue time)
737 comb
+= wvset
.i_data
.eq(ortreereduce_sig(wvsets
))
739 def connect_wrports(self
, m
, fu_bitdict
):
740 """connect write ports
742 orders the write regspecs into a dict-of-dicts, by regfile,
743 by regport name, then connects all FUs that want that regport
744 by way of a PriorityPicker.
746 note that the write-port wen, write-port data, and go_wr_i all need to
747 be on the exact same clock cycle. as there is a combinatorial loop bug
748 at the moment, these all use sync.
750 comb
, sync
= m
.d
.comb
, m
.d
.sync
753 # dictionary of lists of regfile write ports
754 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
756 # same for write ports.
757 # BLECH! complex code-duplication! BLECH!
759 for regfile
, spec
in byregfiles_wr
.items():
760 fuspecs
= byregfiles_wrspec
[regfile
]
761 wrpickers
[regfile
] = {}
763 if self
.regreduce_en
:
764 # argh, more port-merging
766 fuspecs
['o'] = [fuspecs
.pop('o')]
767 fuspecs
['o'].append(fuspecs
.pop('o1'))
768 if regfile
== 'FAST':
769 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
770 if 'fast2' in fuspecs
:
771 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
772 if 'fast3' in fuspecs
:
773 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
775 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
776 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
777 regfile
, regname
, fspec
)
779 def get_byregfiles(self
, readmode
):
781 mode
= "read" if readmode
else "write"
784 e
= self
.ireg
.e
# decoded instruction to execute
786 # dictionary of lists of regfile ports
789 for (funame
, fu
) in fus
.items():
790 print("%s ports for %s" % (mode
, funame
))
791 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
793 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
795 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
796 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
798 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
799 wrport
, write
= None, None
801 rdflag
, read
= None, None
802 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
803 if regfile
not in byregfiles
:
804 byregfiles
[regfile
] = {}
805 byregfiles_spec
[regfile
] = {}
806 if regname
not in byregfiles_spec
[regfile
]:
807 byregfiles_spec
[regfile
][regname
] = \
808 (rdflag
, wrport
, read
, write
, wid
, [])
809 # here we start to create "lanes"
810 if idx
not in byregfiles
[regfile
]:
811 byregfiles
[regfile
][idx
] = []
812 fuspec
= (funame
, fu
, idx
)
813 byregfiles
[regfile
][idx
].append(fuspec
)
814 byregfiles_spec
[regfile
][regname
][5].append(fuspec
)
816 # ok just print that out, for convenience
817 for regfile
, spec
in byregfiles
.items():
818 print("regfile %s ports:" % mode
, regfile
)
819 fuspecs
= byregfiles_spec
[regfile
]
820 for regname
, fspec
in fuspecs
.items():
821 [rdflag
, wrflag
, read
, write
, wid
, fuspec
] = fspec
822 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
823 print(" %s" % regname
, wid
, read
, write
, rdflag
, wrflag
)
824 for (funame
, fu
, idx
) in fuspec
:
825 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
826 print(" ", funame
, fu
.__class
__.__name
__, idx
, fusig
)
829 return byregfiles
, byregfiles_spec
832 yield from self
.fus
.ports()
833 yield from self
.i
.e
.ports()
834 yield from self
.l0
.ports()
841 if __name__
== '__main__':
842 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
847 dut
= NonProductionCore(pspec
)
848 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
849 with
open("test_core.il", "w") as f
: