3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
, ResetSignal
, Cat
, Mux
23 from nmigen
.cli
import rtlil
25 from openpower
.decoder
.power_decoder2
import PowerDecodeSubset
26 from openpower
.decoder
.power_regspec_map
import regspec_decode_read
27 from openpower
.decoder
.power_regspec_map
import regspec_decode_write
28 from openpower
.sv
.svp64
import SVP64Rec
30 from nmutil
.picker
import PriorityPicker
31 from nmutil
.util
import treereduce
33 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
34 from soc
.regfile
.regfiles
import RegFiles
35 from openpower
.decoder
.decode2execute1
import Decode2ToExecute1Type
36 from openpower
.decoder
.decode2execute1
import IssuerDecode2ToOperand
37 from openpower
.decoder
.power_decoder2
import get_rdflags
38 from openpower
.decoder
.decode2execute1
import Data
39 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from openpower
.decoder
.power_enums
import MicrOp
42 from soc
.config
.state
import CoreState
46 from nmutil
.util
import rising_edge
49 # helper function for reducing a list of signals down to a parallel
51 def ortreereduce(tree
, attr
="data_o"):
52 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
55 def ortreereduce_sig(tree
):
56 return treereduce(tree
, operator
.or_
, lambda x
: x
)
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs
):
62 for (regname
, fspec
) in fuspecs
.items():
63 if regname
.startswith("full"):
64 res
.append((regname
, fspec
))
65 for (regname
, fspec
) in fuspecs
.items():
66 if not regname
.startswith("full"):
67 res
.append((regname
, fspec
))
68 return res
# enumerate(res)
71 class NonProductionCore(Elaboratable
):
72 def __init__(self
, pspec
):
75 # test is SVP64 is to be enabled
76 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
78 # test to see if regfile ports should be reduced
79 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
80 (pspec
.regreduce
== True))
82 # single LD/ST funnel for memory access
83 self
.l0
= l0
= TstL0CacheBuffer(pspec
, n_units
=1)
86 # function units (only one each)
87 # only include mmu if enabled in pspec
88 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
90 # link LoadStore1 into MMU
91 mmu
= self
.fus
.get_fu('mmu0')
92 print ("core pspec", pspec
.ldst_ifacetype
)
93 print ("core mmu", mmu
)
94 print ("core lsmem.lsi", l0
.cmpi
.lsmem
.lsi
)
96 mmu
.alu
.set_ldst_interface(l0
.cmpi
.lsmem
.lsi
)
98 # register files (yes plural)
99 self
.regs
= RegFiles(pspec
)
101 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
102 self
.e
= Decode2ToExecute1Type("core", opkls
=IssuerDecode2ToOperand
,
103 regreduce_en
=self
.regreduce_en
)
105 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
106 self
.sv_a_nz
= Signal()
108 # state and raw instruction (and SVP64 ReMap fields)
109 self
.state
= CoreState("core")
110 self
.raw_insn_i
= Signal(32) # raw instruction
111 self
.bigendian_i
= Signal() # bigendian - TODO, set by MSR.BE
113 self
.sv_rm
= SVP64Rec(name
="core_svp64_rm") # SVP64 RM field
114 self
.is_svp64_mode
= Signal() # set if SVP64 mode is enabled
115 self
.sv_pred_sm
= Signal() # TODO: SIMD width
116 self
.sv_pred_dm
= Signal() # TODO: SIMD width
118 # issue/valid/busy signalling
119 self
.ivalid_i
= Signal(reset_less
=True) # instruction is valid
120 self
.issue_i
= Signal(reset_less
=True)
121 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
123 # start/stop and terminated signalling
124 self
.core_terminate_o
= Signal(reset
=0) # indicates stopped
126 # create per-FU instruction decoders (subsetted)
130 for funame
, fu
in self
.fus
.fus
.items():
131 f_name
= fu
.fnunit
.name
132 fnunit
= fu
.fnunit
.value
133 opkls
= fu
.opsubsetkls
135 # TRAP decoder is the *main* decoder
136 self
.trapunit
= funame
138 self
.decoders
[funame
] = PowerDecodeSubset(None, opkls
, f_name
,
141 svp64_en
=self
.svp64_en
,
142 regreduce_en
=self
.regreduce_en
)
143 self
.des
[funame
] = self
.decoders
[funame
].do
145 if "mmu0" in self
.decoders
:
146 self
.decoders
["mmu0"].mmu0_spr_dec
= self
.decoders
["spr0"]
148 def elaborate(self
, platform
):
150 # for testing purposes, to cut down on build time in coriolis2
151 if hasattr(self
.pspec
, "nocore") and self
.pspec
.nocore
== True:
152 x
= Signal() # dummy signal
157 m
.submodules
.fus
= self
.fus
158 m
.submodules
.l0
= l0
= self
.l0
159 self
.regs
.elaborate_into(m
, platform
)
164 for k
, v
in self
.decoders
.items():
165 setattr(m
.submodules
, "dec_%s" % v
.fn_name
, v
)
166 comb
+= v
.dec
.raw_opcode_in
.eq(self
.raw_insn_i
)
167 comb
+= v
.dec
.bigendian
.eq(self
.bigendian_i
)
168 # sigh due to SVP64 RA_OR_ZERO detection connect these too
169 comb
+= v
.sv_a_nz
.eq(self
.sv_a_nz
)
171 comb
+= v
.pred_sm
.eq(self
.sv_pred_sm
)
172 comb
+= v
.pred_dm
.eq(self
.sv_pred_dm
)
173 if k
!= self
.trapunit
:
174 comb
+= v
.sv_rm
.eq(self
.sv_rm
) # pass through SVP64 ReMap
175 comb
+= v
.is_svp64_mode
.eq(self
.is_svp64_mode
)
177 # ssh, cheat: trap uses the main decoder because of the rewriting
178 self
.des
[self
.trapunit
] = self
.e
.do
180 # connect up Function Units, then read/write ports
181 fu_bitdict
= self
.connect_instruction(m
)
182 self
.connect_rdports(m
, fu_bitdict
)
183 self
.connect_wrports(m
, fu_bitdict
)
187 def connect_instruction(self
, m
):
188 """connect_instruction
190 uses decoded (from PowerOp) function unit information from CSV files
191 to ascertain which Function Unit should deal with the current
194 some (such as OP_ATTN, OP_NOP) are dealt with here, including
195 ignoring it and halting the processor. OP_NOP is a bit annoying
196 because the issuer expects busy flag still to be raised then lowered.
197 (this requires a fake counter to be set).
199 comb
, sync
= m
.d
.comb
, m
.d
.sync
202 # enable-signals for each FU, get one bit for each FU (by name)
203 fu_enable
= Signal(len(fus
), reset_less
=True)
205 for i
, funame
in enumerate(fus
.keys()):
206 fu_bitdict
[funame
] = fu_enable
[i
]
208 # enable the required Function Unit based on the opcode decode
209 # note: this *only* works correctly for simple core when one and
210 # *only* one FU is allocated per instruction
211 for funame
, fu
in fus
.items():
212 fnunit
= fu
.fnunit
.value
213 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
214 comb
+= enable
.eq((self
.e
.do
.fn_unit
& fnunit
).bool())
215 comb
+= fu_bitdict
[funame
].eq(enable
)
217 # sigh - need a NOP counter
219 with m
.If(counter
!= 0):
220 sync
+= counter
.eq(counter
- 1)
221 comb
+= self
.busy_o
.eq(1)
223 with m
.If(self
.ivalid_i
): # run only when valid
224 with m
.Switch(self
.e
.do
.insn_type
):
225 # check for ATTN: halt if true
226 with m
.Case(MicrOp
.OP_ATTN
):
227 m
.d
.sync
+= self
.core_terminate_o
.eq(1)
229 with m
.Case(MicrOp
.OP_NOP
):
230 sync
+= counter
.eq(2)
231 comb
+= self
.busy_o
.eq(1)
234 # connect up instructions. only one enabled at a time
235 for funame
, fu
in fus
.items():
236 do
= self
.des
[funame
]
237 enable
= fu_bitdict
[funame
]
239 # run this FunctionUnit if enabled
240 # route op, issue, busy, read flags and mask to FU
242 # operand comes from the *local* decoder
243 comb
+= fu
.oper_i
.eq_from(do
)
244 #comb += fu.oper_i.eq_from_execute1(e)
245 comb
+= fu
.issue_i
.eq(self
.issue_i
)
246 comb
+= self
.busy_o
.eq(fu
.busy_o
)
247 # rdmask, which is for registers, needs to come
248 # from the *main* decoder
249 rdmask
= get_rdflags(self
.e
, fu
)
250 comb
+= fu
.rdmaskn
.eq(~rdmask
)
254 def connect_rdport(self
, m
, fu_bitdict
, rdpickers
, regfile
, regname
, fspec
):
255 comb
, sync
= m
.d
.comb
, m
.d
.sync
261 # select the required read port. these are pre-defined sizes
262 rfile
= regs
.rf
[regfile
.lower()]
263 rport
= rfile
.r_ports
[rpidx
]
264 print("read regfile", rpidx
, regfile
, regs
.rf
.keys(),
268 if not isinstance(fspecs
, list):
275 for i
, fspec
in enumerate(fspecs
):
276 # get the regfile specs for this regfile port
277 (rf
, read
, write
, wid
, fuspec
) = fspec
278 print ("fpsec", i
, fspec
, len(fuspec
))
279 ppoffs
.append(pplen
) # record offset for picker
281 name
= "rdflag_%s_%s_%d" % (regfile
, regname
, i
)
282 rdflag
= Signal(name
=name
, reset_less
=True)
283 comb
+= rdflag
.eq(rf
)
284 rdflags
.append(rdflag
)
287 print ("pplen", pplen
)
289 # create a priority picker to manage this port
290 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(pplen
)
291 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
295 for i
, fspec
in enumerate(fspecs
):
296 (rf
, read
, write
, wid
, fuspec
) = fspec
297 # connect up the FU req/go signals, and the reg-read to the FU
298 # and create a Read Broadcast Bus
299 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
302 # connect request-read to picker input, and output to go-rd
303 fu_active
= fu_bitdict
[funame
]
304 name
= "%s_%s_%s_%i" % (regfile
, rpidx
, funame
, pi
)
305 addr_en
= Signal
.like(reads
[i
], name
="addr_en_"+name
)
306 pick
= Signal(name
="pick_"+name
) # picker input
307 rp
= Signal(name
="rp_"+name
) # picker output
308 delay_pick
= Signal(name
="dp_"+name
) # read-enable "underway"
310 # exclude any currently-enabled read-request (mask out active)
311 comb
+= pick
.eq(fu
.rd_rel_o
[idx
] & fu_active
& rdflags
[i
] &
313 comb
+= rdpick
.i
[pi
].eq(pick
)
314 comb
+= fu
.go_rd_i
[idx
].eq(delay_pick
) # pass in *delayed* pick
316 # if picked, select read-port "reg select" number to port
317 comb
+= rp
.eq(rdpick
.o
[pi
] & rdpick
.en_o
)
318 sync
+= delay_pick
.eq(rp
) # delayed "pick"
319 comb
+= addr_en
.eq(Mux(rp
, reads
[i
], 0))
321 # the read-enable happens combinatorially (see mux-bus below)
322 # but it results in the data coming out on a one-cycle delay.
326 addrs
.append(addr_en
)
329 # use the *delayed* pick signal to put requested data onto bus
330 with m
.If(delay_pick
):
331 # connect regfile port to input, creating fan-out Bus
333 print("reg connect widths",
334 regfile
, regname
, pi
, funame
,
335 src
.shape(), rport
.data_o
.shape())
336 # all FUs connect to same port
337 comb
+= src
.eq(rport
.data_o
)
339 # or-reduce the muxed read signals
341 # for unary-addressed
342 comb
+= rport
.ren
.eq(ortreereduce_sig(rens
))
344 # for binary-addressed
345 comb
+= rport
.addr
.eq(ortreereduce_sig(addrs
))
346 comb
+= rport
.ren
.eq(Cat(*rens
).bool())
347 print ("binary", regfile
, rpidx
, rport
, rport
.ren
, rens
, addrs
)
349 def connect_rdports(self
, m
, fu_bitdict
):
350 """connect read ports
352 orders the read regspecs into a dict-of-dicts, by regfile, by
353 regport name, then connects all FUs that want that regport by
354 way of a PriorityPicker.
356 comb
, sync
= m
.d
.comb
, m
.d
.sync
360 # dictionary of lists of regfile read ports
361 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
363 # okaay, now we need a PriorityPicker per regfile per regfile port
364 # loootta pickers... peter piper picked a pack of pickled peppers...
366 for regfile
, spec
in byregfiles_rd
.items():
367 fuspecs
= byregfiles_rdspec
[regfile
]
368 rdpickers
[regfile
] = {}
370 # argh. an experiment to merge RA and RB in the INT regfile
371 # (we have too many read/write ports)
372 if self
.regreduce_en
:
374 fuspecs
['rabc'] = [fuspecs
.pop('rb')]
375 fuspecs
['rabc'].append(fuspecs
.pop('rc'))
376 fuspecs
['rabc'].append(fuspecs
.pop('ra'))
377 if regfile
== 'FAST':
378 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
379 if 'fast2' in fuspecs
:
380 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
381 if 'fast3' in fuspecs
:
382 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
384 # for each named regfile port, connect up all FUs to that port
385 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
386 print("connect rd", regname
, fspec
)
387 self
.connect_rdport(m
, fu_bitdict
, rdpickers
, regfile
,
390 def connect_wrport(self
, m
, fu_bitdict
, wrpickers
, regfile
, regname
, fspec
):
391 comb
, sync
= m
.d
.comb
, m
.d
.sync
395 print("connect wr", regname
, fspec
)
398 # select the required write port. these are pre-defined sizes
399 print(regfile
, regs
.rf
.keys())
400 rfile
= regs
.rf
[regfile
.lower()]
401 wport
= rfile
.w_ports
[rpidx
]
404 if not isinstance(fspecs
, list):
410 for i
, fspec
in enumerate(fspecs
):
411 # get the regfile specs for this regfile port
412 (rf
, read
, write
, wid
, fuspec
) = fspec
413 print ("fpsec", i
, fspec
, len(fuspec
))
414 ppoffs
.append(pplen
) # record offset for picker
417 # create a priority picker to manage this port
418 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(pplen
)
419 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
424 for i
, fspec
in enumerate(fspecs
):
425 # connect up the FU req/go signals and the reg-read to the FU
426 # these are arbitrated by Data.ok signals
427 (rf
, read
, write
, wid
, fuspec
) = fspec
428 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
431 # write-request comes from dest.ok
432 dest
= fu
.get_out(idx
)
433 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
434 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
435 wrflag
= Signal(name
=name
, reset_less
=True)
436 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
438 # connect request-write to picker input, and output to go-wr
439 fu_active
= fu_bitdict
[funame
]
440 pick
= fu
.wr
.rel_o
[idx
] & fu_active
# & wrflag
441 comb
+= wrpick
.i
[pi
].eq(pick
)
442 # create a single-pulse go write from the picker output
444 comb
+= wr_pick
.eq(wrpick
.o
[pi
] & wrpick
.en_o
)
445 comb
+= fu
.go_wr_i
[idx
].eq(rising_edge(m
, wr_pick
))
447 # connect the regspec write "reg select" number to this port
448 # only if one FU actually requests (and is granted) the port
449 # will the write-enable be activated
450 addr_en
= Signal
.like(write
)
452 comb
+= wp
.eq(wr_pick
& wrpick
.en_o
)
453 comb
+= addr_en
.eq(Mux(wp
, write
, 0))
457 addrs
.append(addr_en
)
460 # connect regfile port to input
461 print("reg connect widths",
462 regfile
, regname
, pi
, funame
,
463 dest
.shape(), wport
.data_i
.shape())
464 wsigs
.append(fu_dest_latch
)
466 # here is where we create the Write Broadcast Bus. simple, eh?
467 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
469 # for unary-addressed
470 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
472 # for binary-addressed
473 comb
+= wport
.addr
.eq(ortreereduce_sig(addrs
))
474 comb
+= wport
.wen
.eq(ortreereduce_sig(wens
))
476 def connect_wrports(self
, m
, fu_bitdict
):
477 """connect write ports
479 orders the write regspecs into a dict-of-dicts, by regfile,
480 by regport name, then connects all FUs that want that regport
481 by way of a PriorityPicker.
483 note that the write-port wen, write-port data, and go_wr_i all need to
484 be on the exact same clock cycle. as there is a combinatorial loop bug
485 at the moment, these all use sync.
487 comb
, sync
= m
.d
.comb
, m
.d
.sync
490 # dictionary of lists of regfile write ports
491 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
493 # same for write ports.
494 # BLECH! complex code-duplication! BLECH!
496 for regfile
, spec
in byregfiles_wr
.items():
497 fuspecs
= byregfiles_wrspec
[regfile
]
498 wrpickers
[regfile
] = {}
500 if self
.regreduce_en
:
501 # argh, more port-merging
503 fuspecs
['o'] = [fuspecs
.pop('o')]
504 fuspecs
['o'].append(fuspecs
.pop('o1'))
505 if regfile
== 'FAST':
506 fuspecs
['fast1'] = [fuspecs
.pop('fast1')]
507 if 'fast2' in fuspecs
:
508 fuspecs
['fast1'].append(fuspecs
.pop('fast2'))
509 if 'fast3' in fuspecs
:
510 fuspecs
['fast1'].append(fuspecs
.pop('fast3'))
512 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
513 self
.connect_wrport(m
, fu_bitdict
, wrpickers
,
514 regfile
, regname
, fspec
)
516 def get_byregfiles(self
, readmode
):
518 mode
= "read" if readmode
else "write"
521 e
= self
.e
# decoded instruction to execute
523 # dictionary of lists of regfile ports
526 for (funame
, fu
) in fus
.items():
527 print("%s ports for %s" % (mode
, funame
))
528 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
530 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
532 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
533 print(" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
535 rdflag
, read
= regspec_decode_read(e
, regfile
, regname
)
538 rdflag
, read
= None, None
539 wrport
, write
= regspec_decode_write(e
, regfile
, regname
)
540 if regfile
not in byregfiles
:
541 byregfiles
[regfile
] = {}
542 byregfiles_spec
[regfile
] = {}
543 if regname
not in byregfiles_spec
[regfile
]:
544 byregfiles_spec
[regfile
][regname
] = \
545 (rdflag
, read
, write
, wid
, [])
546 # here we start to create "lanes"
547 if idx
not in byregfiles
[regfile
]:
548 byregfiles
[regfile
][idx
] = []
549 fuspec
= (funame
, fu
, idx
)
550 byregfiles
[regfile
][idx
].append(fuspec
)
551 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
553 # ok just print that out, for convenience
554 for regfile
, spec
in byregfiles
.items():
555 print("regfile %s ports:" % mode
, regfile
)
556 fuspecs
= byregfiles_spec
[regfile
]
557 for regname
, fspec
in fuspecs
.items():
558 [rdflag
, read
, write
, wid
, fuspec
] = fspec
559 print(" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
560 print(" %s" % regname
, wid
, read
, write
, rdflag
)
561 for (funame
, fu
, idx
) in fuspec
:
562 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
563 print(" ", funame
, fu
, idx
, fusig
)
566 return byregfiles
, byregfiles_spec
569 yield from self
.fus
.ports()
570 yield from self
.e
.ports()
571 yield from self
.l0
.ports()
578 if __name__
== '__main__':
579 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
584 dut
= NonProductionCore(pspec
)
585 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
586 with
open("test_core.il", "w") as f
: