make sure non-svp64-mode works
[soc.git] / src / soc / simple / issuer.py
index 3d6c3ce6c62bb430278117415463ed2f4fd396da..482d364080f9514e6f86ad0ad6c524ed05abb773 100644 (file)
@@ -16,7 +16,7 @@ improved.
 """
 
 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
-                    ClockDomain, DomainRenamer, Mux, Const)
+                    ClockDomain, DomainRenamer, Mux, Const, Repl)
 from nmigen.cli import rtlil
 from nmigen.cli import main
 import sys
@@ -30,7 +30,8 @@ from soc.regfile.regfiles import StateRegs, FastRegs
 from soc.simple.core import NonProductionCore
 from soc.config.test.test_loadstore import TestMemPspec
 from soc.config.ifetch import ConfigFetchUnit
-from soc.decoder.power_enums import MicrOp
+from soc.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
+                                     SVP64PredMode)
 from soc.debug.dmi import CoreDebug, DMIInterface
 from soc.debug.jtag import JTAG
 from soc.config.pinouts import get_pinspecs
@@ -71,22 +72,28 @@ def state_get(m, state_i, name, regfile, regnum):
         comb += res.eq(regfile.data_o)
     return res
 
-def get_predint(m, mask):
+def get_predint(m, mask, name):
     """decode SVP64 predicate integer mask field to reg number and invert
     this is identical to the equivalent function in ISACaller except that
     it doesn't read the INT directly, it just decodes "what needs to be done"
     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
+
+    * all1s is set to indicate that no mask is to be applied.
+    * regread indicates the GPR register number to be read
+    * invert is set to indicate that the register value is to be inverted
+    * unary indicates that the contents of the register is to be shifted 1<<r3
     """
-    regread = Signal(5)
-    invert = Signal()
-    unary = Signal()
+    comb = m.d.comb
+    regread = Signal(5, name=name+"regread")
+    invert = Signal(name=name+"invert")
+    unary = Signal(name=name+"unary")
+    all1s = Signal(name=name+"all1s")
     with m.Switch(mask):
         with m.Case(SVP64PredInt.ALWAYS.value):
-            comb += regread.eq(0)
-            comb += invert.eq(1)
+            comb += all1s.eq(1)      # use 0b1111 (all ones)
         with m.Case(SVP64PredInt.R3_UNARY.value):
             comb += regread.eq(3)
-            comb += unary.eq(1)
+            comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
         with m.Case(SVP64PredInt.R3.value):
             comb += regread.eq(3)
         with m.Case(SVP64PredInt.R3_N.value):
@@ -102,14 +109,15 @@ def get_predint(m, mask):
         with m.Case(SVP64PredInt.R30_N.value):
             comb += regread.eq(30)
             comb += invert.eq(1)
-    return regread, invert, unary
+    return regread, invert, unary, all1s
 
-def get_predcr(m, mask):
+def get_predcr(m, mask, name):
     """decode SVP64 predicate CR to reg number field and invert status
     this is identical to _get_predcr in ISACaller
     """
-    idx = Signal(2)
-    invert = Signal()
+    comb = m.d.comb
+    idx = Signal(2, name=name+"idx")
+    invert = Signal(name=name+"crinvert")
     with m.Switch(mask):
         with m.Case(SVP64PredCR.LT.value):
             comb += idx.eq(0)
@@ -141,7 +149,9 @@ def get_predcr(m, mask):
 class TestIssuerInternal(Elaboratable):
     """TestIssuer - reads instructions from TestMemory and issues them
 
-    efficiency and speed is not the main goal here: functional correctness is.
+    efficiency and speed is not the main goal here: functional correctness
+    and code clarity is.  optimisations (which almost 100% interfere with
+    easy understanding) come later.
     """
     def __init__(self, pspec):
 
@@ -251,6 +261,7 @@ class TestIssuerInternal(Elaboratable):
                         fetch_pc_ready_o, fetch_pc_valid_i,
                         fetch_insn_valid_o, fetch_insn_ready_i):
         """fetch FSM
+
         this FSM performs fetch of raw instruction data, partial-decodes
         it 32-bit at a time to detect SVP64 prefixes, and will optionally
         read a 2nd 32-bit quantity if that occurs.
@@ -339,13 +350,20 @@ class TestIssuerInternal(Elaboratable):
                     sync += dec_opcode_i.eq(insn)
                     m.next = "INSN_READY"
                     # TODO: probably can start looking at pdecode2.rm_dec
-                    # here (or maybe even in INSN_READ state, if svp64_mode
+                    # here or maybe even in INSN_READ state, if svp64_mode
                     # detected, in order to trigger - and wait for - the
                     # predicate reading.
-                    pmode = pdecode2.rm_dec.predmode
-                    sv_ptype = pdecode2.dec.op.SV_Ptype
-                    srcpred = pdecode2.rm_dec.srcpred
-                    dstpred = pdecode2.rm_dec.dstpred
+                    if self.svp64_en:
+                        pmode = pdecode2.rm_dec.predmode
+                    """
+                    if pmode != SVP64PredMode.ALWAYS.value:
+                        fire predicate loading FSM and wait before
+                        moving to INSN_READY
+                    else:
+                        sync += self.srcmask.eq(-1) # set to all 1s
+                        sync += self.dstmask.eq(-1) # set to all 1s
+                        m.next = "INSN_READY"
+                    """
 
             with m.State("INSN_READY"):
                 # hand over the instruction, to be decoded
@@ -353,7 +371,9 @@ class TestIssuerInternal(Elaboratable):
                 with m.If(fetch_insn_ready_i):
                     m.next = "IDLE"
 
-    def fetch_predicate_fsm(self, m, core, TODO):
+    def fetch_predicate_fsm(self, m,
+                            pred_insn_valid_i, pred_insn_ready_o,
+                            pred_mask_valid_o, pred_mask_ready_i):
         """fetch_predicate_fsm - obtains (constructs in the case of CR)
            src/dest predicate masks
 
@@ -372,22 +392,87 @@ class TestIssuerInternal(Elaboratable):
         predmode = rm_dec.predmode
         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
-        # if predmode == INT:
-        #    INT-src sregread, sinvert, sunary = get_predint(m, srcpred)
-        #    INT-dst dregread, dinvert, dunary = get_predint(m, dstpred)
-        #    TODO read INT-src and INT-dst into self.srcmask+dstmask
+
         # elif predmode == CR:
         #    CR-src sidx, sinvert = get_predcr(m, srcpred)
         #    CR-dst didx, dinvert = get_predcr(m, dstpred)
         #    TODO read CR-src and CR-dst into self.srcmask+dstmask with loop
-        # else
-        #    sync += self.srcmask.eq(-1) # set to all 1s
-        #    sync += self.dstmask.eq(-1) # set to all 1s
+        #         has to cope with first one then the other
+        #    for cr_idx = FSM-state-loop(0..VL-1):
+        #        FSM-state-trigger-CR-read:
+        #               cr_ren = (1<<7-(cr_idx+SVP64CROffs.CRPred))
+        #               comb += cr_pred.ren.eq(cr_ren)
+        #        FSM-state-1-clock-later-actual-Read:
+        #               cr_field = Signal(4)
+        #               cr_bit = Signal(1)
+        #               # read the CR field, select the appropriate bit
+        #               comb += cr_field.eq(cr_pred.data_o)
+        #               comb += cr_bit.eq(cr_field.bit_select(idx)))
+        #               # just like in branch BO tests
+        #               comd += self.srcmask[cr_idx].eq(inv ^ cr_bit)
+
+        # decode predicates
+        sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
+        dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
+        sidx, scrinvert = get_predcr(m, srcpred, 's')
+        didx, dcrinvert = get_predcr(m, dstpred, 'd')
+
+        with m.FSM(name="fetch_predicate"):
+
+            with m.State("FETCH_PRED_IDLE"):
+                comb += pred_insn_ready_o.eq(1)
+                with m.If(pred_insn_valid_i):
+                    with m.If(predmode == SVP64PredMode.INT):
+                        # skip fetching destination mask register, when zero
+                        with m.If(dall1s):
+                            sync += self.dstmask.eq(-1)
+                            # directly go to fetch source mask register
+                            # guaranteed not to be zero (otherwise predmode
+                            # would be SVP64PredMode.ALWAYS, not INT)
+                            comb += int_pred.addr.eq(sregread)
+                            comb += int_pred.ren.eq(1)
+                            m.next = "INT_SRC_READ"
+                        # fetch destination predicate register
+                        with m.Else():
+                            comb += int_pred.addr.eq(dregread)
+                            comb += int_pred.ren.eq(1)
+                            m.next = "INT_DST_READ"
+                    with m.Else():
+                        sync += self.srcmask.eq(-1)
+                        sync += self.dstmask.eq(-1)
+                        m.next = "FETCH_PRED_DONE"
+
+            with m.State("INT_DST_READ"):
+                # store destination mask
+                inv = Repl(dinvert, 64)
+                sync += self.dstmask.eq(self.int_pred.data_o ^ inv)
+                # skip fetching source mask register, when zero
+                with m.If(sall1s):
+                    sync += self.srcmask.eq(-1)
+                    m.next = "FETCH_PRED_DONE"
+                # fetch source predicate register
+                with m.Else():
+                    comb += int_pred.addr.eq(sregread)
+                    comb += int_pred.ren.eq(1)
+                    m.next = "INT_SRC_READ"
+
+            with m.State("INT_SRC_READ"):
+                # store source mask
+                inv = Repl(sinvert, 64)
+                sync += self.srcmask.eq(self.int_pred.data_o ^ inv)
+                m.next = "FETCH_PRED_DONE"
+
+            with m.State("FETCH_PRED_DONE"):
+                comb += pred_mask_valid_o.eq(1)
+                with m.If(pred_mask_ready_i):
+                    m.next = "FETCH_PRED_IDLE"
 
     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
                   dbg, core_rst, is_svp64_mode,
                   fetch_pc_ready_o, fetch_pc_valid_i,
                   fetch_insn_valid_o, fetch_insn_ready_i,
+                  pred_insn_valid_i, pred_insn_ready_o,
+                  pred_mask_valid_o, pred_mask_ready_i,
                   exec_insn_valid_i, exec_insn_ready_o,
                   exec_pc_valid_o, exec_pc_ready_i):
         """issue FSM
@@ -472,7 +557,20 @@ class TestIssuerInternal(Elaboratable):
                         comb += self.insn_done.eq(1)
                         m.next = "ISSUE_START"
                     with m.Else():
-                        m.next = "INSN_EXECUTE"  # move to "execute"
+                        if self.svp64_en:
+                            m.next = "PRED_START"  # start fetching predicate
+                        else:
+                            m.next = "INSN_EXECUTE" # skip predication
+
+            with m.State("PRED_START"):
+                comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
+                with m.If(pred_insn_ready_o):  # fetch_pred acknowledged us
+                    m.next = "MASK_WAIT"
+
+            with m.State("MASK_WAIT"):
+                comb += pred_mask_ready_i.eq(1) # ready to receive the masks
+                with m.If(pred_mask_valid_o): # predication masks are ready
+                    m.next = "INSN_EXECUTE"
 
             # handshake with execution FSM, move to "wait" once acknowledged
             with m.State("INSN_EXECUTE"):
@@ -487,8 +585,9 @@ class TestIssuerInternal(Elaboratable):
 
                 with m.If(is_svp64_mode):
 
-                    pred_src_zero = pdecode2.rm_dec.pred_sz
-                    pred_dst_zero = pdecode2.rm_dec.pred_dz
+                    if self.svp64_en:
+                        pred_src_zero = pdecode2.rm_dec.pred_sz
+                        pred_dst_zero = pdecode2.rm_dec.pred_dz
 
                     """
                     if not pred_src_zero:
@@ -768,6 +867,14 @@ class TestIssuerInternal(Elaboratable):
         fetch_insn_valid_o = Signal()
         fetch_insn_ready_i = Signal()
 
+        # predicate fetch FSM decodes and fetches the predicate
+        pred_insn_valid_i = Signal()
+        pred_insn_ready_o = Signal()
+
+        # predicate fetch FSM delivers the masks
+        pred_mask_valid_o = Signal()
+        pred_mask_ready_i = Signal()
+
         # issue FSM delivers the instruction to the be executed
         exec_insn_valid_i = Signal()
         exec_insn_ready_o = Signal()
@@ -781,9 +888,9 @@ class TestIssuerInternal(Elaboratable):
         # (as opposed to using sync - which would be on a clock's delay)
         # this includes the actual opcode, valid flags and so on.
 
-        # Fetch, then Issue, then Execute. Issue is where the VL for-loop
-        # lives.  the ready/valid signalling is used to communicate between
-        # the three.
+        # Fetch, then predicate fetch, then Issue, then Execute.
+        # Issue is where the VL for-loop # lives.  the ready/valid
+        # signalling is used to communicate between the four.
 
         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
                        fetch_pc_ready_o, fetch_pc_valid_i,
@@ -793,9 +900,16 @@ class TestIssuerInternal(Elaboratable):
                        dbg, core_rst, is_svp64_mode,
                        fetch_pc_ready_o, fetch_pc_valid_i,
                        fetch_insn_valid_o, fetch_insn_ready_i,
+                       pred_insn_valid_i, pred_insn_ready_o,
+                       pred_mask_valid_o, pred_mask_ready_i,
                        exec_insn_valid_i, exec_insn_ready_o,
                        exec_pc_valid_o, exec_pc_ready_i)
 
+        if self.svp64_en:
+            self.fetch_predicate_fsm(m,
+                                     pred_insn_valid_i, pred_insn_ready_o,
+                                     pred_mask_valid_o, pred_mask_ready_i)
+
         self.execute_fsm(m, core, pc_changed, sv_changed,
                          exec_insn_valid_i, exec_insn_ready_o,
                          exec_pc_valid_o, exec_pc_ready_i)
@@ -811,6 +925,11 @@ class TestIssuerInternal(Elaboratable):
         return m
 
     def do_dmi(self, m, dbg):
+        """deals with DMI debug requests
+
+        currently only provides read requests for the INT regfile, CR and XER
+        it will later also deal with *writing* to these regfiles.
+        """
         comb = m.d.comb
         sync = m.d.sync
         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer