# connect each satellite decoder and give it the instruction.
# as subset decoders this massively reduces wire fanout given
# the large number of ALUs
- setattr(m.submodules, "dec_%s" % v.fn_name, v)
+ m.submodules["dec_%s" % v.fn_name] = v
comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
# sigh due to SVP64 RA_OR_ZERO detection connect these too
# and resolved
with m.If(self.issue_conflict):
comb += busy_o.eq(1)
+ # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
+ # and do not allow overlap. these are all the ones that
+ # are non-forward-progressing: exceptions etc. that otherwise
+ # change CoreState for some reason (MSR, PC, SVSTATE)
+ for funame, fu in fus.items():
+ if (funame.lower().startswith('ldst') or
+ funame.lower().startswith('branch') or
+ funame.lower().startswith('mmu') or
+ funame.lower().startswith('spr') or
+ funame.lower().startswith('trap')):
+ with m.If(fu.busy_o):
+ comb += busy_o.eq(1)
# return both the function unit "enable" dict as well as the "busy".
# the "busy-or-issued" can be passed in to the Read/Write port
(fspec.rdport, fspec.wrport, fspec.read, fspec.write,
fspec.wid, fspec.specs)
print ("fpsec", i, fspec, len(fuspecs))
+ name = "%s_%s_%d" % (regfile, regname, i)
ppoffs.append(pplen) # record offset for picker
pplen += len(fspec.specs)
- name = "rdflag_%s_%s_%d" % (regfile, regname, i)
- rdflag = Signal(name=name, reset_less=True)
+ rdflag = Signal(name="rdflag_"+name, reset_less=True)
comb += rdflag.eq(fspec.rdport)
rdflags.append(rdflag)
# create a priority picker to manage this port
rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
- setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
+ m.submodules["rdpick_%s_%s" % (regfile, rpidx)] = rdpick
rens = []
addrs = []
fu_issued = fu_bitdict[funame]
# get (or set up) a latched copy of read register number
+ # and (sigh) also the read-ok flag
rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
+ rhname = "%s_%s_%d" % (regfile, regname, i)
read = Signal.like(_read, name="read_"+name)
+ rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
+ reset_less=True)
+ if rhname not in fu.rf_latches:
+ rfl = Signal(name="rdflag_latch_"+rname)
+ fu.rf_latches[rhname] = rfl
+ with m.If(fu.issue_i):
+ sync += rfl.eq(rdflags[i])
+ else:
+ rfl = fu.rf_latches[rhname]
if rname not in fu.rd_latches:
rdl = Signal.like(_read, name="rdlatch_"+rname)
fu.rd_latches[rname] = rdl
# after the read cycle, use the latched copy
with m.If(fu.issue_i):
comb += read.eq(_read)
+ comb += rdflag.eq(rdflags[i])
with m.Else():
comb += read.eq(rdl)
+ comb += rdflag.eq(rfl)
# connect request-read to picker input, and output to go-rd
addr_en = Signal.like(read, name="addr_en_"+name)
# exclude any currently-enabled read-request (mask out active)
# entirely block anything hazarded from being picked
- comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
+ comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
~delay_pick & ~rhazard)
comb += rdpick.i[pi].eq(pick)
comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
issue_active = Signal(name="rd_iactive_"+name)
# XXX combinatorial loop here
- comb += issue_active.eq(fu_active & rf)
+ comb += issue_active.eq(fu_active & rdflag)
with m.If(issue_active):
if rfile.unary:
comb += wvchk_en.eq(read)
wvset = wv.s # write-vec bit-level hazard ctrl
wvclr = wv.r # write-vec bit-level hazard ctrl
wvchk = wv.q # write-after-write hazard check
- wvchk_qint = wv.q_int # write-after-write hazard check, delayed
+ wvchk_qint = wv.q # write-after-write hazard check, NOT delayed
fspecs = fspec
if not isinstance(fspecs, list):
# create a priority picker to manage this port
wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
- setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
+ m.submodules["wrpick_%s_%s" % (regfile, rpidx)] = wrpick
wsigs = []
wens = []
comb += wport.wen.eq(ortreereduce_sig(wens))
if not self.make_hazard_vecs:
- return
-
- # for write-vectors
- comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
- comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
+ return [], []
+
+ # return these here rather than set wvclr/wvset directly,
+ # because there may be more than one write-port to a given
+ # regfile. example: XER has a write-port for SO, CA, and OV
+ # and the *last one added* of those would overwrite the other
+ # two. solution: have connect_wrports collate all the
+ # or-tree-reduced bitvector set/clear requests and drop them
+ # in as a single "thing". this can only be done because the
+ # set/get is an unary bitvector.
+ print ("make write-vecs", regfile, regname, wvset, wvclr)
+ return (ortreereduce_sig(wvclren), # clear (regfile write)
+ ortreereduce_sig(wvseten)) # set (issue time)
def connect_wrports(self, m, fu_bitdict, fu_selected):
"""connect write ports
# same for write ports.
# BLECH! complex code-duplication! BLECH!
wrpickers = {}
+ wvclrers = defaultdict(list)
+ wvseters = defaultdict(list)
for regfile, spec in byregfiles_wr.items():
fuspecs = byregfiles_wrspec[regfile]
wrpickers[regfile] = {}
if 'fast3' in fuspecs:
fuspecs['fast1'].append(fuspecs.pop('fast3'))
+ # collate these and record them by regfile because there
+ # are sometimes more write-ports per regfile
for (regname, fspec) in sort_fuspecs(fuspecs):
- self.connect_wrport(m, fu_bitdict, fu_selected, wrpickers,
+ wvclren, wvseten = self.connect_wrport(m,
+ fu_bitdict, fu_selected,
+ wrpickers,
regfile, regname, fspec)
+ wvclrers[regfile.lower()].append(wvclren)
+ wvseters[regfile.lower()].append(wvseten)
+
+ if not self.make_hazard_vecs:
+ return
+
+ # for write-vectors: reduce the clr-ers and set-ers down to
+ # a single set of bits. otherwise if there are two write
+ # ports (on some regfiles), the last one doing comb += on
+ # the reg.wv[regfile] instance "wins" (and all others are ignored,
+ # whoops). if there was only one write-port per wv regfile this would
+ # not be an issue.
+ for regfile in wvclrers.keys():
+ wv = regs.wv[regfile]
+ wvset = wv.s # write-vec bit-level hazard ctrl
+ wvclr = wv.r # write-vec bit-level hazard ctrl
+ wvclren = wvclrers[regfile]
+ wvseten = wvseters[regfile]
+ comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
+ comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
def get_byregfiles(self, readmode):
# the issue there is that this function is actually better
# suited at the moment
if readmode:
- fu.rd_latches = {}
+ fu.rd_latches = {} # read reg number latches
+ fu.rf_latches = {} # read flag latches
else:
fu.wr_latches = {}
pspec = TestMemPspec(ldst_ifacetype='testpi',
imem_ifacetype='',
addr_wid=48,
+ allow_overlap=True,
mask_wid=8,
reg_wid=64)
dut = NonProductionCore(pspec)
from openpower.decoder.power_decoder2 import PowerDecode2
from openpower.decoder.selectable_int import SelectableInt
from openpower.decoder.isa.all import ISA
+from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
+from openpower.state import CoreState
# note that using SPRreduced has to be done to match the
# PowerDecoder2 SPR map
from openpower.decoder.power_enums import spr_dict, Function, XER_bits
from soc.config.test.test_loadstore import TestMemPspec
from openpower.endian import bigendian
+from soc.regfile.regfiles import StateRegs
from soc.simple.core import NonProductionCore
from soc.experiment.compalu_multi import find_ok # hack
from soc.fu.cr.test.test_pipe_caller import CRTestCase
from soc.fu.branch.test.test_pipe_caller import BranchTestCase
from soc.fu.ldst.test.test_pipe_caller import LDSTTestCase
+from openpower.test.general.overlap_hazards import HazardTestCase
from openpower.util import spr_to_fast_reg
from openpower.consts import StateRegsEnum
def wait_for_busy_clear(cu):
while True:
- busy_o = yield cu.busy_o
- terminate_o = yield cu.core_terminate_o
+ busy_o = yield cu.o.busy_o
+ terminate_o = yield cu.o.core_terminate_o
if not busy_o:
print("busy/terminate:", busy_o, terminate_o)
break
m = Module()
comb = m.d.comb
instruction = Signal(32)
- ivalid_i = Signal()
pspec = TestMemPspec(ldst_ifacetype='testpi',
imem_ifacetype='',
addr_wid=48,
mask_wid=8,
+ allow_overlap=True,
reg_wid=64)
+ cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
+ pdecode2 = PowerDecode2(None, state=cur_state,
+ #opkls=IssuerDecode2ToOperand,
+ svp64_en=True, # self.svp64_en,
+ regreduce_en=False, #self.regreduce_en
+ )
+
m.submodules.core = core = NonProductionCore(pspec)
- pdecode2 = core.pdecode2
+ m.submodules.pdecode2 = pdecode2
+ core.pdecode2 = pdecode2
l0 = core.l0
- comb += core.raw_opcode_i.eq(instruction)
- comb += core.ivalid_i.eq(ivalid_i)
+ comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+ comb += pdecode2.dec.bigendian.eq(bigendian) # little / big?
+ comb += core.i.e.eq(pdecode2.e)
+ comb += core.i.state.eq(cur_state)
+ comb += core.i.raw_insn_i.eq(instruction)
+ comb += core.i.bigendian_i.eq(bigendian)
+
+ # set the PC StateRegs read port to always send back the PC
+ stateregs = core.regs.state
+ pc_regnum = StateRegs.PC
+ comb += stateregs.r_ports['cia'].ren.eq(1<<pc_regnum)
# temporary hack: says "go" immediately for both address gen and ST
ldst = core.fus.fus['ldst0']
- m.d.comb += ldst.ad.go.eq(ldst.ad.rel) # link addr-go direct to rel
- m.d.comb += ldst.st.go.eq(ldst.st.rel) # link store-go direct to rel
+ m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go to rel
+ m.d.comb += ldst.st.go_i.eq(ldst.st.rel_o) # link store-go to rel
# nmigen Simulation
sim = Simulator(m)
sim.add_clock(1e-6)
def process():
- yield core.issue_i.eq(0)
yield
for test in self.test_data:
instructions = list(zip(gen, program.assembly.splitlines()))
yield from setup_tst_memory(l0, test.mem)
- yield from setup_regs(core, test)
+ yield from setup_regs(pdecode2, core, test)
index = sim.pc.CIA.value // 4
while index < len(instructions):
print(code)
# ask the decoder to decode this binary data (endian'd)
- yield core.bigendian_i.eq(bigendian) # little / big?
yield instruction.eq(ins) # raw binary instr.
- yield ivalid_i.eq(1)
yield Settle()
- # fn_unit = yield pdecode2.e.fn_unit
- #fuval = self.funit.value
- #self.assertEqual(fn_unit & fuval, fuval)
-
- # set operand and get inputs
- yield from set_issue(core, pdecode2, sim)
- yield Settle()
-
- yield from wait_for_busy_clear(core)
- yield ivalid_i.eq(0)
- yield
print("sim", code)
# call simulated operation
opname = code.split(' ')[0]
yield from sim.call(opname)
- index = sim.pc.CIA.value // 4
+ pc = sim.pc.CIA.value
+ nia = sim.pc.NIA.value
+ index = pc // 4
+
+ # set the PC to the same simulated value
+ # (core is not able to do this itself, except
+ # for branch / TRAP)
+ print ("after call, pc nia", pc, nia)
+ yield stateregs.regs[pc_regnum].reg.eq(pc)
+ yield Settle()
+
+ yield core.p.i_valid.eq(1)
+ yield
+ o_ready = yield core.p.o_ready
+ while True:
+ if o_ready:
+ break
+ yield
+ o_ready = yield core.p.o_ready
+ yield core.p.i_valid.eq(0)
+
+ # set operand and get inputs
+ yield from wait_for_busy_clear(core)
+
+ # synchronised (non-overlap) is fine to check
+ if not core.allow_overlap:
+ # register check
+ yield from check_regs(self, sim, core, test, code)
+
+ # Memory check
+ yield from check_mem(self, sim, core, test, code)
+
+ # non-overlap mode is only fine to check right at the end
+ if core.allow_overlap:
+ # wait until all settled
+ # XXX really this should be in DMI, which should in turn
+ # use issuer.any_busy to not send back "stopped" signal
+ while (yield core.o.any_busy_o):
+ yield
+ yield Settle()
# register check
yield from check_regs(self, sim, core, test, code)
# Memory check
yield from check_mem(self, sim, core, test, code)
+ # give a couple extra clock cycles for gtkwave display to be happy
+ yield
+ yield
+
sim.add_sync_process(process)
with sim.write_vcd("core_simulator.vcd", "core_simulator.gtkw",
traces=[]):
if __name__ == "__main__":
unittest.main(exit=False)
suite = unittest.TestSuite()
- suite.addTest(TestRunner(LDSTTestCase().test_data))
- suite.addTest(TestRunner(CRTestCase().test_data))
- suite.addTest(TestRunner(ShiftRotTestCase().test_data))
+ suite.addTest(TestRunner(HazardTestCase().test_data))
+ #suite.addTest(TestRunner(LDSTTestCase().test_data))
+ #suite.addTest(TestRunner(CRTestCase().test_data))
+ #suite.addTest(TestRunner(ShiftRotTestCase().test_data))
suite.addTest(TestRunner(LogicalTestCase().test_data))
suite.addTest(TestRunner(ALUTestCase().test_data))
- suite.addTest(TestRunner(BranchTestCase().test_data))
+ #suite.addTest(TestRunner(BranchTestCase().test_data))
runner = unittest.TextTestRunner()
runner.run(suite)