from nmigen.cli import rtlil
from openpower.decoder.power_decoder2 import PowerDecodeSubset
-from openpower.decoder.power_regspec_map import regspec_decode_read
-from openpower.decoder.power_regspec_map import regspec_decode_write
+from openpower.decoder.power_regspec_map import regspec_decode
from openpower.sv.svp64 import SVP64Rec
from nmutil.picker import PriorityPicker
from nmutil.util import rising_edge
FUSpec = namedtuple("FUSpec", ["funame", "fu", "idx"])
-ByRegSpec = namedtuple("ByRegSpec", ["rdport", "wrport", "read",
- "write", "wid", "specs"])
+ByRegSpec = namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
# helper function for reducing a list of signals down to a parallel
# ORed single signal.
return res # enumerate(res)
+# a hazard bitvector "remap" function which returns an AST expression
+# that remaps read/write hazard regfile port numbers to either a full
+# bitvector or a reduced subset one. SPR for example is reduced to a
+# single bit.
+# CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
+# regfile! therefore the remapping is per regfile, *NOT* per regfile
+# port and certainly not based on whether it is a read port or write port.
+# note that any reductions here will result in degraded performance due
+# to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
+def bitvector_remap(regfile, rfile, port):
+ # 8-bits (at the moment, no SVP64), CR is unary: no remap
+ if regfile == 'CR':
+ return port
+ # 3 bits, unary alrady: return the port
+ if regfile == 'XER':
+ return port
+ # 3 bits, unary: return the port
+ if regfile == 'XER':
+ return port
+ # 5 bits, unary: return the port
+ if regfile == 'STATE':
+ return port
+ # 9 bits (9 entries), might be unary already
+ if regfile == 'FAST':
+ if rfile.unary: # FAST might be unary already
+ return port
+ else:
+ return 1 << port
+ # 10 bits (!!) - reduce to one
+ if regfile == 'SPR':
+ if rfile.unary: # FAST might be unary already
+ return port
+ else:
+ return 1 << port
+ if regfile == 'INT':
+ if rfile.unary: # INT, check if unary/binary
+ return port
+ else:
+ return 1 << port
+
+
# derive from ControlBase rather than have a separate Stage instance,
# this is simpler to do
class NonProductionCore(ControlBase):
# link LoadStore1 into MMU
mmu = self.fus.get_fu('mmu0')
+ ldst0 = self.fus.get_fu('ldst0')
print ("core pspec", pspec.ldst_ifacetype)
print ("core mmu", mmu)
if mmu is not None:
- print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
- mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
+ lsi = l0.cmpi.lsmem.lsi # a LoadStore1 Interface object
+ print ("core lsmem.lsi", lsi)
+ mmu.alu.set_ldst_interface(lsi)
+ # urr store I-Cache in core so it is easier to get at
+ self.icache = lsi.icache
+
+ # alternative reset values for STATE regs. these probably shouldn't
+ # be set, here, instead have them done by Issuer. which they are.
+ # as well. because core.state overrides them. sigh.
+ self.msr_at_reset = 0x0
+ self.pc_at_reset = 0x0
+ if hasattr(pspec, "msr_reset") and isinstance(pspec.msr_reset, int):
+ self.msr_at_reset = pspec.msr_reset
+ if hasattr(pspec, "pc_reset") and isinstance(pspec.pc_reset, int):
+ self.pc_at_reset = pspec.pc_reset
+ state_resets = [self.pc_at_reset, # PC at reset
+ self.msr_at_reset, # MSR at reset
+ 0x0, # SVSTATE at reset
+ 0x0, # DEC at reset
+ 0x0] # TB at reset
# register files (yes plural)
- self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs)
+ self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs,
+ state_resets=state_resets)
# set up input and output: unusual requirement to set data directly
# (due to the way that the core is set up in a different domain,
self.decoders = {}
self.des = {}
+ # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
+ # they should be shared (put into the ALU *once*).
+
for funame, fu in self.fus.fus.items():
f_name = fu.fnunit.name
fnunit = fu.fnunit.value
# TRAP decoder is the *main* decoder
self.trapunit = funame
continue
+ assert funame not in self.decoders
self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
final=True,
state=self.ireg.state,
svp64_en=self.svp64_en,
regreduce_en=self.regreduce_en)
self.des[funame] = self.decoders[funame].do
+ print ("create decoder subset", funame, opkls, self.des[funame])
# create per-Function Unit write-after-write hazard signals
# yes, really, this should have been added in ReservationStations
if "mmu0" in self.decoders:
self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
+ # allow pausing of the DEC/TB FSM back in Issuer, by spotting
+ # if there is an MTSPR instruction
+ self.pause_dec_tb = Signal()
+
# next 3 functions are Stage API Compliance
def setup(self, m, i):
pass
# connect each satellite decoder and give it the instruction.
# as subset decoders this massively reduces wire fanout given
# the large number of ALUs
- m.submodules["dec_%s" % v.fn_name] = v
+ m.submodules["dec_%s" % k] = v
comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
# sigh due to SVP64 RA_OR_ZERO detection connect these too
comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
- if self.svp64_en:
- comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
- comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
- if k != self.trapunit:
- comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
- comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
- # only the LDST PowerDecodeSubset *actually* needs to
- # know to use the alternative decoder. this is all
- # a terrible hack
- if k.lower().startswith("ldst"):
- comb += v.use_svp64_ldst_dec.eq(
- self.ireg.use_svp64_ldst_dec)
+ if not self.svp64_en:
+ continue
+ comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
+ comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
+ if k == self.trapunit:
+ continue
+ comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
+ comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
+ # only the LDST PowerDecodeSubset *actually* needs to
+ # know to use the alternative decoder. this is all
+ # a terrible hack
+ if not k.lower().startswith("ldst"):
+ continue
+ comb += v.use_svp64_ldst_dec.eq( self.ireg.use_svp64_ldst_dec)
def connect_instruction(self, m):
"""connect_instruction
# rdmask, which is for registers needs to come from the *main* decoder
for funame, fu in fus.items():
- rdmask = get_rdflags(self.ireg.e, fu)
+ rdmask = get_rdflags(m, self.ireg.e, fu)
comb += fu.rdmaskn.eq(~rdmask)
# sigh - need a NOP counter
# is a waw hazard. decoder has to still
# be asserted in order to detect that, tho
comb += fu.oper_i.eq_from(do)
+ if funame == 'mmu0':
+ # URRR this is truly dreadful.
+ # OP_FETCH_FAILED is a "fake" op.
+ # no instruction creates it. OP_TRAP
+ # uses the *main* decoder: this is
+ # a *Satellite* decoder that reacts
+ # on *insn_in*... not fake ops. gaah.
+ main_op = self.ireg.e.do
+ with m.If(main_op.insn_type ==
+ MicrOp.OP_FETCH_FAILED):
+ comb += fu.oper_i.insn_type.eq(
+ MicrOp.OP_FETCH_FAILED)
+ comb += fu.oper_i.fn_unit.eq(
+ Function.MMU)
# issue when valid (and no write-hazard)
comb += fu.issue_i.eq(~self.waw_hazard)
# instruction ok, indicate ready
funame.lower().startswith('trap')):
with m.If(fu.busy_o):
comb += busy_o.eq(1)
+ # for SPR pipeline pause dec/tb FSM to avoid race condition
+ # TODO: really this should be much more sophisticated,
+ # spot MTSPR, spot that DEC/TB is what is to be updated.
+ # a job for PowerDecoder2, there
+ if funame.lower().startswith('spr'):
+ with m.If(fu.busy_o #& fu.oper_i.insn_type == OP_MTSPR
+ ):
+ comb += self.pause_dec_tb.eq(1)
# return both the function unit "enable" dict as well as the "busy".
# the "busy-or-issued" can be passed in to the Read/Write port
ppoffs = []
for i, fspec in enumerate(fspecs):
# get the regfile specs for this regfile port
- (rf, wf, _read, _write, wid, fuspecs) = \
- (fspec.rdport, fspec.wrport, fspec.read, fspec.write,
- fspec.wid, fspec.specs)
- print ("fpsec", i, fspec, len(fuspecs))
+ print ("fpsec", i, fspec, len(fspec.specs))
+ name = "%s_%s_%d" % (regfile, regname, i)
ppoffs.append(pplen) # record offset for picker
pplen += len(fspec.specs)
- name = "rdflag_%s_%s_%d" % (regfile, regname, i)
- rdflag = Signal(name=name, reset_less=True)
- comb += rdflag.eq(fspec.rdport)
+ rdflag = Signal(name="rdflag_"+name, reset_less=True)
+ comb += rdflag.eq(fspec.okflag)
rdflags.append(rdflag)
print ("pplen", pplen)
wvens = []
for i, fspec in enumerate(fspecs):
- (rf, wf, _read, _write, wid, fuspecs) = \
- (fspec.rdport, fspec.wrport, fspec.read, fspec.write,
- fspec.wid, fspec.specs)
+ (rf, _read, wid, fuspecs) = \
+ (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
# connect up the FU req/go signals, and the reg-read to the FU
# and create a Read Broadcast Bus
for pi, fuspec in enumerate(fspec.specs):
fu_issued = fu_bitdict[funame]
# get (or set up) a latched copy of read register number
+ # and (sigh) also the read-ok flag
+ # TODO: use nmutil latchregister
+ rhname = "%s_%s_%d" % (regfile, regname, i)
+ rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
+ reset_less=True)
+ if rhname not in fu.rf_latches:
+ rfl = Signal(name="rdflag_latch_%s_%s" % (funame, rhname))
+ fu.rf_latches[rhname] = rfl
+ with m.If(fu.issue_i):
+ sync += rfl.eq(rdflags[i])
+ else:
+ rfl = fu.rf_latches[rhname]
+
+ # now the register port
rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
- read = Signal.like(_read, name="read_"+name)
+ read = Signal.like(_read, name="read_"+rname)
if rname not in fu.rd_latches:
rdl = Signal.like(_read, name="rdlatch_"+rname)
fu.rd_latches[rname] = rdl
sync += rdl.eq(_read)
else:
rdl = fu.rd_latches[rname]
- # latch to make the read immediately available on issue cycle
- # after the read cycle, use the latched copy
+
+ # make the read immediately available on issue cycle
+ # after the read cycle, otherwies use the latched copy.
+ # this captures the regport and okflag on issue
with m.If(fu.issue_i):
comb += read.eq(_read)
+ comb += rdflag.eq(rdflags[i])
with m.Else():
comb += read.eq(rdl)
+ comb += rdflag.eq(rfl)
# connect request-read to picker input, and output to go-rd
addr_en = Signal.like(read, name="addr_en_"+name)
# exclude any currently-enabled read-request (mask out active)
# entirely block anything hazarded from being picked
- comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
+ comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
~delay_pick & ~rhazard)
comb += rdpick.i[pi].eq(pick)
comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
issue_active = Signal(name="rd_iactive_"+name)
# XXX combinatorial loop here
- comb += issue_active.eq(fu_active & rf)
+ comb += issue_active.eq(fu_active & rdflag)
with m.If(issue_active):
if rfile.unary:
comb += wvchk_en.eq(read)
rd_hazard = []
# dictionary of lists of regfile read ports
- byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
+ byregfiles_rdspec = self.get_byregfiles(m, True)
# okaay, now we need a PriorityPicker per regfile per regfile port
# loootta pickers... peter piper picked a pack of pickled peppers...
rdpickers = {}
- for regfile, spec in byregfiles_rd.items():
- fuspecs = byregfiles_rdspec[regfile]
+ for regfile, fuspecs in byregfiles_rdspec.items():
rdpickers[regfile] = {}
# argh. an experiment to merge RA and RB in the INT regfile
wvset = wv.s # write-vec bit-level hazard ctrl
wvclr = wv.r # write-vec bit-level hazard ctrl
wvchk = wv.q # write-after-write hazard check
- wvchk_qint = wv.q_int # write-after-write hazard check, delayed
fspecs = fspec
if not isinstance(fspecs, list):
pplen = 0
writes = []
ppoffs = []
- rdflags = []
wrflags = []
for i, fspec in enumerate(fspecs):
# get the regfile specs for this regfile port
- (rf, wf, _read, _write, wid, fuspecs) = \
- (fspec.rdport, fspec.wrport, fspec.read, fspec.write,
- fspec.wid, fspec.specs)
+ (wf, _write, wid, fuspecs) = \
+ (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
print ("fpsec", i, "wrflag", wf, fspec, len(fuspecs))
ppoffs.append(pplen) # record offset for picker
pplen += len(fuspecs)
name = "%s_%s_%d" % (regfile, regname, i)
- rdflag = Signal(name="rd_flag_"+name)
wrflag = Signal(name="wr_flag_"+name)
- if rf is not None:
- comb += rdflag.eq(rf)
- else:
- comb += rdflag.eq(0)
if wf is not None:
comb += wrflag.eq(wf)
else:
comb += wrflag.eq(0)
- rdflags.append(rdflag)
wrflags.append(wrflag)
# create a priority picker to manage this port
for i, fspec in enumerate(fspecs):
# connect up the FU req/go signals and the reg-read to the FU
# these are arbitrated by Data.ok signals
- (rf, wf, _read, _write, wid, fuspecs) = \
- (fspec.rdport, fspec.wrport, fspec.read, fspec.write,
- fspec.wid, fspec.specs)
+ (wf, _write, wid, fuspecs) = \
+ (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
for pi, fuspec in enumerate(fspec.specs):
(funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
fu_requested = fu_bitdict[funame]
# write-hazard is ANDed with (filtered by) what is actually
# being requested. the wvchk data is on a one-clock delay,
# and wvchk_en comes directly from the main decoder
- comb += whazard.eq((wvchk_qint & wvchk_en).bool())
+ comb += whazard.eq((wvchk & wvchk_en).bool())
with m.If(whazard):
comb += fu._waw_hazard.eq(1)
# in as a single "thing". this can only be done because the
# set/get is an unary bitvector.
print ("make write-vecs", regfile, regname, wvset, wvclr)
- return (ortreereduce_sig(wvclren), # clear (regfile write)
- ortreereduce_sig(wvseten)) # set (issue time)
+ return (wvclren, # clear (regfile write)
+ wvseten) # set (issue time)
def connect_wrports(self, m, fu_bitdict, fu_selected):
"""connect write ports
fus = self.fus.fus
regs = self.regs
# dictionary of lists of regfile write ports
- byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
+ byregfiles_wrspec = self.get_byregfiles(m, False)
# same for write ports.
# BLECH! complex code-duplication! BLECH!
wrpickers = {}
wvclrers = defaultdict(list)
wvseters = defaultdict(list)
- for regfile, spec in byregfiles_wr.items():
- fuspecs = byregfiles_wrspec[regfile]
+ for regfile, fuspecs in byregfiles_wrspec.items():
wrpickers[regfile] = {}
if self.regreduce_en:
fu_bitdict, fu_selected,
wrpickers,
regfile, regname, fspec)
- wvclrers[regfile.lower()].append(wvclren)
- wvseters[regfile.lower()].append(wvseten)
+ wvclrers[regfile.lower()] += wvclren
+ wvseters[regfile.lower()] += wvseten
if not self.make_hazard_vecs:
return
comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
- def get_byregfiles(self, readmode):
+ def get_byregfiles(self, m, readmode):
mode = "read" if readmode else "write"
regs = self.regs
# dictionary of dictionaries of lists/tuples of regfile ports.
# first key: regfile. second key: regfile port name
- byregfiles = defaultdict(lambda: defaultdict(list))
byregfiles_spec = defaultdict(dict)
for (funame, fu) in fus.items():
# create in each FU a receptacle for the read/write register
- # hazard numbers. to be latched in connect_rd/write_ports
- # XXX better that this is moved into the actual FUs, but
- # the issue there is that this function is actually better
- # suited at the moment
+ # hazard numbers (and okflags for read). to be latched in
+ # connect_rd/write_ports
if readmode:
- fu.rd_latches = {}
+ fu.rd_latches = {} # read reg number latches
+ fu.rf_latches = {} # read flag latches
else:
fu.wr_latches = {}
+ # construct regfile specs: read uses inspec, write outspec
print("%s ports for %s" % (mode, funame))
for idx in range(fu.n_src if readmode else fu.n_dst):
- # construct regfile specs: read uses inspec, write outspec
- if readmode:
- (regfile, regname, wid) = fu.get_in_spec(idx)
- else:
- (regfile, regname, wid) = fu.get_out_spec(idx)
+ (regfile, regname, wid) = fu.get_io_spec(readmode, idx)
print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
# the PowerDecoder2 (main one, not the satellites) contains
# the decoded regfile numbers. obtain these now
- if readmode:
- rdport, read = regspec_decode_read(e, regfile, regname)
- wrport, write = None, None
- else:
- rdport, read = None, None
- wrport, write = regspec_decode_write(e, regfile, regname)
+ decinfo = regspec_decode(m, readmode, e, regfile, regname)
+ okflag, regport = decinfo.okflag, decinfo.regport
# construct the dictionary of regspec information by regfile
if regname not in byregfiles_spec[regfile]:
byregfiles_spec[regfile][regname] = \
- ByRegSpec(rdport, wrport, read, write, wid, [])
- # here we start to create "lanes"
+ ByRegSpec(okflag, regport, wid, [])
+
+ # here we start to create "lanes" where each Function Unit
+ # requiring access to a given [single-contended resource]
+ # regfile port is appended to a list, so that PriorityPickers
+ # can be created to give uncontested access to it
fuspec = FUSpec(funame, fu, idx)
- byregfiles[regfile][idx].append(fuspec)
byregfiles_spec[regfile][regname].specs.append(fuspec)
- continue
- # append a latch Signal to the FU's list of latches
- rname = "%s_%s" % (regfile, regname)
- if readmode:
- if rname not in fu.rd_latches:
- rdl = Signal.like(read, name="rdlatch_"+rname)
- fu.rd_latches[rname] = rdl
- else:
- if rname not in fu.wr_latches:
- wrl = Signal.like(write, name="wrlatch_"+rname)
- fu.wr_latches[rname] = wrl
-
# ok just print that all out, for convenience
- for regfile, spec in byregfiles.items():
+ for regfile, fuspecs in byregfiles_spec.items():
print("regfile %s ports:" % mode, regfile)
- fuspecs = byregfiles_spec[regfile]
for regname, fspec in fuspecs.items():
- [rdport, wrport, read, write, wid, fuspecs] = fspec
+ [okflag, regport, wid, fuspecs] = fspec
print(" rf %s port %s lane: %s" % (mode, regfile, regname))
- print(" %s" % regname, wid, read, write, rdport, wrport)
+ print(" %s" % regname, wid, okflag, regport)
for (funame, fu, idx) in fuspecs:
fusig = fu.src_i[idx] if readmode else fu.dest[idx]
print(" ", funame, fu.__class__.__name__, idx, fusig)
print()
- return byregfiles, byregfiles_spec
+ return byregfiles_spec
def __iter__(self):
yield from self.fus.ports()
if __name__ == '__main__':
pspec = TestMemPspec(ldst_ifacetype='testpi',
imem_ifacetype='',
- addr_wid=48,
+ addr_wid=64,
allow_overlap=True,
mask_wid=8,
reg_wid=64)