"""
from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
- ClockDomain, DomainRenamer, Mux, Const)
+ ClockDomain, DomainRenamer, Mux, Const, Repl)
from nmigen.cli import rtlil
from nmigen.cli import main
import sys
from soc.simple.core import NonProductionCore
from soc.config.test.test_loadstore import TestMemPspec
from soc.config.ifetch import ConfigFetchUnit
-from soc.decoder.power_enums import MicrOp
+from soc.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
+ SVP64PredMode)
from soc.debug.dmi import CoreDebug, DMIInterface
from soc.debug.jtag import JTAG
from soc.config.pinouts import get_pinspecs
comb += res.eq(regfile.data_o)
return res
+def get_predint(m, mask, name):
+ """decode SVP64 predicate integer mask field to reg number and invert
+ this is identical to the equivalent function in ISACaller except that
+ it doesn't read the INT directly, it just decodes "what needs to be done"
+ i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
+
+ * all1s is set to indicate that no mask is to be applied.
+ * regread indicates the GPR register number to be read
+ * invert is set to indicate that the register value is to be inverted
+ * unary indicates that the contents of the register is to be shifted 1<<r3
+ """
+ comb = m.d.comb
+ regread = Signal(5, name=name+"regread")
+ invert = Signal(name=name+"invert")
+ unary = Signal(name=name+"unary")
+ all1s = Signal(name=name+"all1s")
+ with m.Switch(mask):
+ with m.Case(SVP64PredInt.ALWAYS.value):
+ comb += all1s.eq(1) # use 0b1111 (all ones)
+ with m.Case(SVP64PredInt.R3_UNARY.value):
+ comb += regread.eq(3)
+ comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
+ with m.Case(SVP64PredInt.R3.value):
+ comb += regread.eq(3)
+ with m.Case(SVP64PredInt.R3_N.value):
+ comb += regread.eq(3)
+ comb += invert.eq(1)
+ with m.Case(SVP64PredInt.R10.value):
+ comb += regread.eq(10)
+ with m.Case(SVP64PredInt.R10_N.value):
+ comb += regread.eq(10)
+ comb += invert.eq(1)
+ with m.Case(SVP64PredInt.R30.value):
+ comb += regread.eq(30)
+ with m.Case(SVP64PredInt.R30_N.value):
+ comb += regread.eq(30)
+ comb += invert.eq(1)
+ return regread, invert, unary, all1s
+
+def get_predcr(m, mask, name):
+ """decode SVP64 predicate CR to reg number field and invert status
+ this is identical to _get_predcr in ISACaller
+ """
+ comb = m.d.comb
+ idx = Signal(2, name=name+"idx")
+ invert = Signal(name=name+"crinvert")
+ with m.Switch(mask):
+ with m.Case(SVP64PredCR.LT.value):
+ comb += idx.eq(0)
+ comb += invert.eq(1)
+ with m.Case(SVP64PredCR.GE.value):
+ comb += idx.eq(0)
+ comb += invert.eq(0)
+ with m.Case(SVP64PredCR.GT.value):
+ comb += idx.eq(1)
+ comb += invert.eq(1)
+ with m.Case(SVP64PredCR.LE.value):
+ comb += idx.eq(1)
+ comb += invert.eq(0)
+ with m.Case(SVP64PredCR.EQ.value):
+ comb += idx.eq(2)
+ comb += invert.eq(1)
+ with m.Case(SVP64PredCR.NE.value):
+ comb += idx.eq(1)
+ comb += invert.eq(0)
+ with m.Case(SVP64PredCR.SO.value):
+ comb += idx.eq(3)
+ comb += invert.eq(1)
+ with m.Case(SVP64PredCR.NS.value):
+ comb += idx.eq(3)
+ comb += invert.eq(0)
+ return idx, invert
+
class TestIssuerInternal(Elaboratable):
"""TestIssuer - reads instructions from TestMemory and issues them
- efficiency and speed is not the main goal here: functional correctness is.
+ efficiency and speed is not the main goal here: functional correctness
+ and code clarity is. optimisations (which almost 100% interfere with
+ easy understanding) come later.
"""
def __init__(self, pspec):
self.simple_gpio = SimpleGPIO()
self.gpio_o = self.simple_gpio.gpio_o
- # main instruction core25
+ # main instruction core. suitable for prototyping / demo only
self.core = core = NonProductionCore(pspec)
# instruction decoder. goes into Trap Record
pdecode = create_pdecode()
- self.cur_state = CoreState("cur") # current state (MSR/PC/EINT/SVSTATE)
+ self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
opkls=IssuerDecode2ToOperand,
svp64_en=self.svp64_en)
# Test Instruction memory
self.imem = ConfigFetchUnit(pspec).fu
- # one-row cache of instruction read
- self.iline = Signal(64) # one instruction line
- self.iprev_adr = Signal(64) # previous address: if different, do read
# DMI interface
self.dbg = CoreDebug()
self.pc_o = Signal(64, reset_less=True)
self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
self.svstate_i = Data(32, "svstate_i") # ditto
- self.core_bigendian_i = Signal()
+ self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
self.busy_o = Signal(reset_less=True)
self.memerr_o = Signal(reset_less=True)
self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
self.xer_r = xerrf.r_ports['full_xer'] # XER read
+ # for predication
+ self.int_pred = intrf.r_ports['pred'] # INT predicate read
+ self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
+
# hack method of keeping an eye on whether branch/trap set the PC
self.state_nia = self.core.regs.rf['state'].w_ports['nia']
self.state_nia.wen.name = 'state_nia_wen'
fetch_pc_ready_o, fetch_pc_valid_i,
fetch_insn_valid_o, fetch_insn_ready_i):
"""fetch FSM
+
this FSM performs fetch of raw instruction data, partial-decodes
it 32-bit at a time to detect SVP64 prefixes, and will optionally
read a 2nd 32-bit quantity if that occurs.
sync += dec_opcode_i.eq(insn)
m.next = "INSN_READY"
# TODO: probably can start looking at pdecode2.rm_dec
- # here (or maybe even in INSN_READ state, if svp64_mode
+ # here or maybe even in INSN_READ state, if svp64_mode
# detected, in order to trigger - and wait for - the
# predicate reading.
+ if self.svp64_en:
+ pmode = pdecode2.rm_dec.predmode
+ """
+ if pmode != SVP64PredMode.ALWAYS.value:
+ fire predicate loading FSM and wait before
+ moving to INSN_READY
+ else:
+ sync += self.srcmask.eq(-1) # set to all 1s
+ sync += self.dstmask.eq(-1) # set to all 1s
+ m.next = "INSN_READY"
+ """
with m.State("INSN_READY"):
# hand over the instruction, to be decoded
with m.If(fetch_insn_ready_i):
m.next = "IDLE"
- def fetch_predicate_fsm(self, m, core, TODO):
+ def fetch_predicate_fsm(self, m,
+ pred_insn_valid_i, pred_insn_ready_o,
+ pred_mask_valid_o, pred_mask_ready_i):
"""fetch_predicate_fsm - obtains (constructs in the case of CR)
src/dest predicate masks
rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
predmode = rm_dec.predmode
srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
+ cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
+
+ # elif predmode == CR:
+ # CR-src sidx, sinvert = get_predcr(m, srcpred)
+ # CR-dst didx, dinvert = get_predcr(m, dstpred)
+ # TODO read CR-src and CR-dst into self.srcmask+dstmask with loop
+ # has to cope with first one then the other
+ # for cr_idx = FSM-state-loop(0..VL-1):
+ # FSM-state-trigger-CR-read:
+ # cr_ren = (1<<7-(cr_idx+SVP64CROffs.CRPred))
+ # comb += cr_pred.ren.eq(cr_ren)
+ # FSM-state-1-clock-later-actual-Read:
+ # cr_field = Signal(4)
+ # cr_bit = Signal(1)
+ # # read the CR field, select the appropriate bit
+ # comb += cr_field.eq(cr_pred.data_o)
+ # comb += cr_bit.eq(cr_field.bit_select(idx)))
+ # # just like in branch BO tests
+ # comd += self.srcmask[cr_idx].eq(inv ^ cr_bit)
+
+ # decode predicates
+ sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
+ dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
+ sidx, scrinvert = get_predcr(m, srcpred, 's')
+ didx, dcrinvert = get_predcr(m, dstpred, 'd')
+
+ with m.FSM(name="fetch_predicate"):
+
+ with m.State("FETCH_PRED_IDLE"):
+ comb += pred_insn_ready_o.eq(1)
+ with m.If(pred_insn_valid_i):
+ with m.If(predmode == SVP64PredMode.INT):
+ # skip fetching destination mask register, when zero
+ with m.If(dall1s):
+ sync += self.dstmask.eq(-1)
+ # directly go to fetch source mask register
+ # guaranteed not to be zero (otherwise predmode
+ # would be SVP64PredMode.ALWAYS, not INT)
+ comb += int_pred.addr.eq(sregread)
+ comb += int_pred.ren.eq(1)
+ m.next = "INT_SRC_READ"
+ # fetch destination predicate register
+ with m.Else():
+ comb += int_pred.addr.eq(dregread)
+ comb += int_pred.ren.eq(1)
+ m.next = "INT_DST_READ"
+ with m.Else():
+ sync += self.srcmask.eq(-1)
+ sync += self.dstmask.eq(-1)
+ m.next = "FETCH_PRED_DONE"
+
+ with m.State("INT_DST_READ"):
+ # store destination mask
+ inv = Repl(dinvert, 64)
+ sync += self.dstmask.eq(self.int_pred.data_o ^ inv)
+ # skip fetching source mask register, when zero
+ with m.If(sall1s):
+ sync += self.srcmask.eq(-1)
+ m.next = "FETCH_PRED_DONE"
+ # fetch source predicate register
+ with m.Else():
+ comb += int_pred.addr.eq(sregread)
+ comb += int_pred.ren.eq(1)
+ m.next = "INT_SRC_READ"
+
+ with m.State("INT_SRC_READ"):
+ # store source mask
+ inv = Repl(sinvert, 64)
+ sync += self.srcmask.eq(self.int_pred.data_o ^ inv)
+ m.next = "FETCH_PRED_DONE"
+
+ with m.State("FETCH_PRED_DONE"):
+ comb += pred_mask_valid_o.eq(1)
+ with m.If(pred_mask_ready_i):
+ m.next = "FETCH_PRED_IDLE"
def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
dbg, core_rst, is_svp64_mode,
fetch_pc_ready_o, fetch_pc_valid_i,
fetch_insn_valid_o, fetch_insn_ready_i,
+ pred_insn_valid_i, pred_insn_ready_o,
+ pred_mask_valid_o, pred_mask_ready_i,
exec_insn_valid_i, exec_insn_ready_o,
exec_pc_valid_o, exec_pc_ready_i):
"""issue FSM
new_svstate = SVSTATERec("new_svstate")
comb += new_svstate.eq(cur_state.svstate)
+ # precalculate srcstep+1 and dststep+1
+ cur_srcstep = cur_state.svstate.srcstep
+ cur_dststep = cur_state.svstate.dststep
+ next_srcstep = Signal.like(cur_srcstep)
+ next_dststep = Signal.like(cur_dststep)
+ comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
+ comb += next_dststep.eq(cur_state.svstate.dststep+1)
+
with m.FSM(name="issue_fsm"):
# sync with the "fetch" phase which is reading the instruction
comb += self.insn_done.eq(1)
m.next = "ISSUE_START"
with m.Else():
- m.next = "INSN_EXECUTE" # move to "execute"
+ if self.svp64_en:
+ m.next = "PRED_START" # start fetching predicate
+ else:
+ m.next = "INSN_EXECUTE" # skip predication
+
+ with m.State("PRED_START"):
+ comb += pred_insn_valid_i.eq(1) # tell fetch_pred to start
+ with m.If(pred_insn_ready_o): # fetch_pred acknowledged us
+ m.next = "MASK_WAIT"
+
+ with m.State("MASK_WAIT"):
+ comb += pred_mask_ready_i.eq(1) # ready to receive the masks
+ with m.If(pred_mask_valid_o): # predication masks are ready
+ m.next = "INSN_EXECUTE"
# handshake with execution FSM, move to "wait" once acknowledged
with m.State("INSN_EXECUTE"):
# from self.srcmask and self.dstmask
# https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
# but still without exceeding VL in either case
+ # IMPORTANT: when changing src/dest step, have to
+ # jump to m.next = "DECODE_SV" to deal with the change in
+ # SVSTATE
+
+ with m.If(is_svp64_mode):
+
+ if self.svp64_en:
+ pred_src_zero = pdecode2.rm_dec.pred_sz
+ pred_dst_zero = pdecode2.rm_dec.pred_dz
+
+ """
+ if not pred_src_zero:
+ if (((1<<cur_srcstep) & self.srcmask) == 0) and
+ (cur_srcstep != vl):
+ comb += update_svstate.eq(1)
+ comb += new_svstate.srcstep.eq(next_srcstep)
+ sync += sv_changed.eq(1)
+
+ if not pred_dst_zero:
+ if (((1<<cur_dststep) & self.dstmask) == 0) and
+ (cur_dststep != vl):
+ comb += new_svstate.dststep.eq(next_dststep)
+ comb += update_svstate.eq(1)
+ sync += sv_changed.eq(1)
+
+ if update_svstate:
+ m.next = "DECODE_SV"
+ """
+
comb += exec_insn_valid_i.eq(1) # trigger execute
with m.If(exec_insn_ready_o): # execute acknowledged us
m.next = "EXECUTE_WAIT"
with m.If(~dbg.core_stop_o & ~core_rst):
comb += exec_pc_ready_i.eq(1)
with m.If(exec_pc_valid_o):
- # precalculate srcstep+1 and dststep+1
- next_srcstep = Signal.like(cur_state.svstate.srcstep)
- next_dststep = Signal.like(cur_state.svstate.dststep)
- comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
- comb += next_dststep.eq(cur_state.svstate.dststep+1)
# was this the last loop iteration?
is_last = Signal()
comb += self.insn_done.eq(1)
m.next = "INSN_START" # back to fetch
- def elaborate(self, platform):
- m = Module()
+ def setup_peripherals(self, m):
comb, sync = m.d.comb, m.d.sync
m.submodules.core = core = DomainRenamer("coresync")(self.core)
m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
+ return core_rst
+
+ def elaborate(self, platform):
+ m = Module()
+ # convenience
+ comb, sync = m.d.comb, m.d.sync
+ cur_state = self.cur_state
+ pdecode2 = self.pdecode2
+ dbg = self.dbg
+ core = self.core
+
+ # set up peripherals and core
+ core_rst = self.setup_peripherals(m)
+
# PC and instruction from I-Memory
comb += self.pc_o.eq(cur_state.pc)
pc_changed = Signal() # note write to PC
sv_changed = Signal() # note write to SVSTATE
# read state either from incoming override or from regfile
- # TODO: really should be doing MSR in the same say
+ # TODO: really should be doing MSR in the same way
pc = state_get(m, self.pc_i, "pc", # read PC
self.state_r_pc, StateRegs.PC)
svstate = state_get(m, self.svstate_i, "svstate", # read SVSTATE
fetch_insn_valid_o = Signal()
fetch_insn_ready_i = Signal()
+ # predicate fetch FSM decodes and fetches the predicate
+ pred_insn_valid_i = Signal()
+ pred_insn_ready_o = Signal()
+
+ # predicate fetch FSM delivers the masks
+ pred_mask_valid_o = Signal()
+ pred_mask_ready_i = Signal()
+
# issue FSM delivers the instruction to the be executed
exec_insn_valid_i = Signal()
exec_insn_ready_o = Signal()
# (as opposed to using sync - which would be on a clock's delay)
# this includes the actual opcode, valid flags and so on.
- # Fetch, then Issue, then Execute. Issue is where the VL for-loop
- # lives. the ready/valid signalling is used to communicate between
- # the three.
+ # Fetch, then predicate fetch, then Issue, then Execute.
+ # Issue is where the VL for-loop # lives. the ready/valid
+ # signalling is used to communicate between the four.
self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
fetch_pc_ready_o, fetch_pc_valid_i,
dbg, core_rst, is_svp64_mode,
fetch_pc_ready_o, fetch_pc_valid_i,
fetch_insn_valid_o, fetch_insn_ready_i,
+ pred_insn_valid_i, pred_insn_ready_o,
+ pred_mask_valid_o, pred_mask_ready_i,
exec_insn_valid_i, exec_insn_ready_o,
exec_pc_valid_o, exec_pc_ready_i)
+ if self.svp64_en:
+ self.fetch_predicate_fsm(m,
+ pred_insn_valid_i, pred_insn_ready_o,
+ pred_mask_valid_o, pred_mask_ready_i)
+
self.execute_fsm(m, core, pc_changed, sv_changed,
exec_insn_valid_i, exec_insn_ready_o,
exec_pc_valid_o, exec_pc_ready_i)
return m
def do_dmi(self, m, dbg):
+ """deals with DMI debug requests
+
+ currently only provides read requests for the INT regfile, CR and XER
+ it will later also deal with *writing* to these regfiles.
+ """
comb = m.d.comb
sync = m.d.sync
dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer