Merge branch 'master' of git.libre-soc.org:soc
authorAlain D D Williams <addw@phcomp.co.uk>
Tue, 30 Mar 2021 18:10:09 +0000 (19:10 +0100)
committerAlain D D Williams <addw@phcomp.co.uk>
Tue, 30 Mar 2021 18:10:09 +0000 (19:10 +0100)
22 files changed:
Makefile
libreriscv
pinmux
src/soc/decoder/decode2execute1.py
src/soc/decoder/isa/mem.py
src/soc/decoder/isa/radixmmu.py
src/soc/decoder/power_decoder2.py
src/soc/decoder/power_enums.py
src/soc/decoder/pseudo/pywriter.py
src/soc/fu/alu/test/svp64_cases.py
src/soc/fu/mmu/fsm.py
src/soc/fu/spr/main_stage.py
src/soc/litex/florent
src/soc/regfile/regfiles.py
src/soc/regfile/util.py
src/soc/simple/core.py
src/soc/simple/issuer.py
src/soc/simple/issuer_verilog.py
src/soc/simple/test/test_core.py
src/soc/simple/test/test_runner.py
src/soc/simulator/test_sim.py
src/soc/sv/trans/svp64.py

index 20fe492187474443575dfca71efe2fe25a01b2b1..abb446dd00b02008e36f3412235f37be5194d312 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@ mkpinmux:
        cp pinmux/ls180/ls180_pins.py src/soc/debug
        cp pinmux/ls180/ls180_pins.py src/soc/litex/florent/libresoc
 
-install: develop mkpinmux
+install: gitupdate develop mkpinmux svanalysis
 
 pywriter:
        python3 src/soc/decoder/pseudo/pywriter.py
@@ -21,13 +21,14 @@ svanalysis:
 
 develop:
        python3 setup.py develop # yes, develop, not install
-       python3 src/soc/decoder/pseudo/pywriter.py
 
-run_sim: install
-       python3 src/soc/simple/issuer_verilog.py --disable-svp64\
+# build and run libresoc litex simulation
+run_sim:
+       python3 src/soc/simple/issuer_verilog.py --disable-svp64 \
                        src/soc/litex/florent/libresoc/libresoc.v
        python3 src/soc/litex/florent/sim.py --cpu=libresoc
 
+# and with test gpio (useful for XICS IRC testing)
 testgpio_run_sim:
        python3 src/soc/simple/issuer_verilog.py \
                        src/soc/litex/florent/libresoc/libresoc.v \
@@ -38,9 +39,24 @@ testgpio_run_sim:
 ls180_verilog:
        python3 src/soc/simple/issuer_verilog.py \
                --debug=jtag --enable-core --enable-pll \
-               --enable-xics --enable-sram4x4kblock --disable-svp64
+               --enable-xics --disable-svp64 \
                        src/soc/litex/florent/libresoc/libresoc.v
 
+ls180_4k_verilog:
+       python3 src/soc/simple/issuer_verilog.py \
+               --debug=jtag --enable-core --enable-pll \
+               --enable-xics --enable-sram4x4kblock --disable-svp64 \
+                       src/soc/litex/florent/libresoc/libresoc.v
+
+# build the litex libresoc SoC without 4k SRAMs
+ls180_verilog_build: ls180_verilog
+       make -C soc/soc/litex/florent ls180
+
+# build the litex libresoc SoC with 4k SRAMs
+ls180_4ksram_verilog_build: ls180_4k_verilog
+       make -C soc/soc/litex/florent ls1804k
+
+# testing (usually done at install time)
 test: install
        python3 setup.py test # could just run nosetest3...
 
@@ -58,13 +74,16 @@ BUILDDIR      = build
 help:
        @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
+# copies all documentation to libre-soc (libre-soc admins only)
+htmlupload: clean html
+       rsync -HPavz --delete build/html/* \
+        libre-soc.org:/var/www/libre-soc.org/docs/soc/
+
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
+       echo "catch-all falling through to sphinx for document building"
        mkdir -p "$(SOURCEDIR)"/src/gen
        sphinx-apidoc --ext-autodoc -o "$(SOURCEDIR)"/src/gen ./src/soc
        @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
-htmlupload: clean html
-       rsync -HPavz --delete build/html/* \
-        libre-soc.org:/var/www/libre-soc.org/docs/soc/
index 84cababea353b8c1882eb3b1512438579c31d37c..59c4a072402ffaf98cb43fc2a33084e7bf1f1a6b 160000 (submodule)
@@ -1 +1 @@
-Subproject commit 84cababea353b8c1882eb3b1512438579c31d37c
+Subproject commit 59c4a072402ffaf98cb43fc2a33084e7bf1f1a6b
diff --git a/pinmux b/pinmux
index 6ea0beaabfa993fc6cb369ab1c5731d8f6a839c3..160c3204e21612289e5b1a0a1a1f8647e9f83870 160000 (submodule)
--- a/pinmux
+++ b/pinmux
@@ -1 +1 @@
-Subproject commit 6ea0beaabfa993fc6cb369ab1c5731d8f6a839c3
+Subproject commit 160c3204e21612289e5b1a0a1a1f8647e9f83870
index a6ac6262367c445d55ad7df6f64c72d0efb575d9..6333dcdf87641405b68fc438761cf222e7755d34 100644 (file)
@@ -5,7 +5,8 @@ based on Anton Blanchard microwatt decode2.vhdl
 """
 from nmigen import Signal, Record
 from nmutil.iocontrol import RecordObject
-from soc.decoder.power_enums import MicrOp, CryIn, Function, SPR, LDSTMode
+from soc.decoder.power_enums import (MicrOp, CryIn, Function,
+                                     SPRfull, SPRreduced, LDSTMode)
 from soc.consts import TT
 from soc.experiment.mem_types import LDSTException
 
@@ -83,7 +84,13 @@ class Decode2ToOperand(IssuerDecode2ToOperand):
 
 class Decode2ToExecute1Type(RecordObject):
 
-    def __init__(self, name=None, asmcode=True, opkls=None, do=None):
+    def __init__(self, name=None, asmcode=True, opkls=None, do=None,
+                       regreduce_en=False):
+
+        if regreduce_en:
+            SPR = SPRreduced
+        else:
+            SPR = SPRfull
 
         if do is None and opkls is None:
             opkls = Decode2ToOperand
index 7a806a402b9b3599257fbfc43c3e0cdc2ee52789..15df1cbd9cb4d3b440df64b75ec111f16d6458a9 100644 (file)
@@ -16,8 +16,6 @@ from copy import copy
 from soc.decoder.selectable_int import (FieldSelectableInt, SelectableInt,
                                         selectconcat)
 
-from soc.decoder.power_enums import SPR as DEC_SPR
-
 from soc.decoder.helpers import exts, gtu, ltu, undefined
 import math
 import sys
@@ -49,7 +47,11 @@ class Mem:
             for i, val in enumerate(mem):
                 initial_mem[startaddr + row_bytes*i] = (val, row_bytes)
 
-        for addr, (val, width) in initial_mem.items():
+        for addr, val in initial_mem.items():
+            if isinstance(val, tuple):
+                (val, width) = val
+            else:
+                width = row_bytes # assume same width
             #val = swap_order(val, width)
             self.st(addr, val, width, swap=False)
 
index edb3c8bb12a32004308658927cf50cb62a980f67..d1894e7d1f9e50cb4625eef913731c5ae363696e 100644 (file)
@@ -198,7 +198,6 @@ def NLS(x):
 
 """
 
-testaddr = 0x10000
 testmem = {
 
            0x10000:    # PARTITION_TABLE_2 (not implemented yet)
@@ -208,7 +207,6 @@ testmem = {
            0x30000:     # RADIX_ROOT_PTE
                         # V = 1 L = 0 NLB = 0x400 NLS = 9
            0x8000000000040009,
-########   0x4000000 #### wrong address calculated by _get_pgtable_addr
            0x40000:     # RADIX_SECOND_LEVEL
                         #         V = 1 L = 1 SW = 0 RPN = 0
                            # R = 1 C = 1 ATT = 0 EAA 0x7
@@ -218,8 +216,36 @@ testmem = {
                        # RTS1 = 0x2 RPDB = 0x300 RTS2 = 0x5 RPDS = 13
            0x40000000000300ad,
           }
-          
 
+# this one has a 2nd level RADIX with a RPN of 0x5000
+testmem2 = {
+
+           0x10000:    # PARTITION_TABLE_2 (not implemented yet)
+                       # PATB_GR=1 PRTB=0x1000 PRTS=0xb
+           0x800000000100000b,
+
+           0x30000:     # RADIX_ROOT_PTE
+                        # V = 1 L = 0 NLB = 0x400 NLS = 9
+           0x8000000000040009,
+           0x40000:     # RADIX_SECOND_LEVEL
+                        #         V = 1 L = 1 SW = 0 RPN = 0x5000
+                           # R = 1 C = 1 ATT = 0 EAA 0x7
+           0xc000000005000187,
+
+           0x1000000:   # PROCESS_TABLE_3
+                       # RTS1 = 0x2 RPDB = 0x300 RTS2 = 0x5 RPDS = 13
+           0x40000000000300ad,
+          }
+
+
+testresult = """
+    prtbl = 1000000
+    DCACHE GET 1000000 PROCESS_TABLE_3
+    DCACHE GET 30000 RADIX_ROOT_PTE V = 1 L = 0
+    DCACHE GET 40000 RADIX_SECOND_LEVEL V = 1 L = 1
+    DCACHE GET 10000 PARTITION_TABLE_2
+translated done 1 err 0 badtree 0 addr 40000 pte 0
+"""
 
 # see qemu/target/ppc/mmu-radix64.c for reference
 class RADIX:
@@ -258,6 +284,7 @@ class RADIX:
         #shift = SelectableInt(0, 32)
 
         pte = self._walk_tree(addr, pgbase, mode, mbits, shift, priv)
+
         # use pte to caclculate phys address
         return self.mem.ld(address, width, swap, check_in_mem)
 
@@ -285,19 +312,27 @@ class RADIX:
     def _next_level(self, addr, entry_width, swap, check_in_mem):
         # implement read access to mmu mem here
 
-        value = 0
-        if addr.value in testmem:
-            value = testmem[addr.value]
-        else:
-            print("not found")
+        value = self.mem.ld(addr.value, 8, False, check_in_mem)
+        assert(value is not None, "address lookup %x not found" % addr.value)
 
-        ##value = self.mem.ld(addr.value, entry_width, swap, check_in_mem)
         print("addr", hex(addr.value))
         data = SelectableInt(value, 64) # convert to SelectableInt
         print("value", hex(value))
         # index += 1
         return data;
 
+    def _prtable_lookup(self, prtbl, addr, pid):
+        # v.shift := unsigned('0' & r.prtbl(4 downto 0));
+        shift = prtbl[59:63]
+        print("shift",shift)
+        prtable_addr = self._get_prtable_addr(shift, prtbl, addr, pid)
+        print("prtable_addr",prtable_addr)
+        # TODO check and loop if needed
+
+        assert(prtable_addr==0x1000000)
+        print("fetch data from PROCESS_TABLE_3")
+        return "TODO"
+
     def _walk_tree(self, addr, pgbase, mode, mbits, shift, priv=1):
         """walk tree
 
@@ -375,8 +410,15 @@ class RADIX:
         print("last 8 bits ----------")
         print
 
+        prtbl = SelectableInt(0x1000000,64) #FIXME do not hardcode
+
         # get address of root entry
+        shift = selectconcat(SelectableInt(0,1),prtbl[58:63]) # TODO verify
         addr_next = self._get_prtable_addr(shift, prtbl, addr, pidr)
+        print("starting with prtable, addr_next",addr_next)
+
+        assert(addr_next.bits == 64)
+        assert(addr_next.value == 0x1000000) #TODO
 
         addr_next = SelectableInt(0x30000,64) # radix root for testing
 
@@ -396,9 +438,12 @@ class RADIX:
             if not valid:
                 return "invalid" # TODO: return error
             if leaf:
+                print ("is leaf, checking perms")
                 ok = self._check_perms(data, priv, mode)
                 if ok == True: # data was ok, found phys address, return it?
-                    return addr_next
+                    paddr = self._get_pte(addrsh, addr, data)
+                    print ("    phys addr", hex(paddr.value))
+                    return paddr
                 return ok # return the error code
             else:
                 newlookup = self._new_lookup(data, mbits, shift)
@@ -411,13 +456,11 @@ class RADIX:
                 print(mask)    #SelectableInt(value=0x9, bits=4)
                 print(pgbase)  #SelectableInt(value=0x40000, bits=56)
                 print(shift)   #SelectableInt(value=0x4, bits=16) #FIXME
-                pgbase = SelectableInt(pgbase.value,64)
+                pgbase = SelectableInt(pgbase.value, 64)
                 addrsh = addrshift(addr,shift)
                 addr_next = self._get_pgtable_addr(mask, pgbase, addrsh)
                 print("addr_next",addr_next)
                 print("addrsh",addrsh)
-                assert(addr_next == 0x40000)
-                return "TODO verify next level"
 
     def _new_lookup(self, data, mbits, shift):
         """
@@ -483,7 +526,7 @@ class RADIX:
         # below *directly* match the spec, unlike microwatt which
         # has to turn them around (to LE)
         mask = genmask(shift, 44)
-        nonzero = addr[1:32] & mask[13:44] # mask 31 LSBs (BE numbered 13:44)
+        nonzero = addr[2:33] & mask[13:44] # mask 31 LSBs (BE numbered 13:44)
         print ("RADIX _segment_check nonzero", bin(nonzero.value))
         print ("RADIX _segment_check addr[0-1]", addr[0].value, addr[1].value)
         if addr[0] != addr[1] or nonzero != 0:
@@ -572,16 +615,16 @@ class RADIX:
                 (effpid(31 downto 8) and finalmask(23 downto 0))) &
                 effpid(7 downto 0) & "0000";
         """
-        print ("_get_prtable_addr_", shift, prtbl, addr, pid)
+        print ("_get_prtable_addr", shift, prtbl, addr, pid)
         finalmask = genmask(shift, 44)
         finalmask24 = finalmask[20:44]
         if addr[0].value == 1:
             effpid = SelectableInt(0, 32)
         else:
             effpid = pid #self.pid # TODO, check on this
-        zero16 = SelectableInt(0, 16)
+        zero8 = SelectableInt(0, 8)
         zero4 = SelectableInt(0, 4)
-        res = selectconcat(zero16,
+        res = selectconcat(zero8,
                            prtbl[8:28],                        #
                            (prtbl[28:52] & ~finalmask24) |     #
                            (effpid[0:24] & finalmask24),       #
@@ -614,16 +657,22 @@ class RADIX:
          (r.addr(55 downto 12) and finalmask))
         & r.pde(11 downto 0);
         """
+        shift.value = 12
         finalmask = genmask(shift, 44)
         zero8 = SelectableInt(0, 8)
+        rpn = pde[8:52]       # RPN = Real Page Number
+        abits = addr[8:52] # non-masked address bits
+        print("     get_pte RPN", hex(rpn.value))
+        print("             abits", hex(abits.value))
+        print("             shift", shift.value)
+        print("             finalmask", bin(finalmask.value))
         res = selectconcat(zero8,
-                           (pde[8:52]  & ~finalmask) | #
-                           (addr[8:52] & finalmask),   #
-                           pde[52:64],
+                           (rpn  & ~finalmask) | #
+                           (abits & finalmask),   #
+                           addr[52:64],
                            )
         return res
 
-
 class TestRadixMMU(unittest.TestCase):
 
     def test_genmask(self):
@@ -631,7 +680,7 @@ class TestRadixMMU(unittest.TestCase):
         mask = genmask(shift, 43)
         print ("    mask", bin(mask.value))
 
-        self.assertEqual(sum([1, 2, 3]), 6, "Should be 6")
+        self.assertEqual(mask.value, 0b11111, "mask should be 5 1s")
 
     def test_get_pgtable_addr(self):
 
@@ -643,10 +692,84 @@ class TestRadixMMU(unittest.TestCase):
         pgbase = SelectableInt(0,64)
         addrsh = SelectableInt(0,16)
         ret = dut._get_pgtable_addr(mask_size, pgbase, addrsh)
-        print("ret=",ret)
-        assert(ret==0)
+        print("ret=", ret)
+        self.assertEqual(ret, 0, "pgtbl_addr should be 0")
+
+    def test_prtable_lookup(self):
+
+        mem = None
+        caller = None
+        dut = RADIX(mem, caller)
+
+        prtbl = SelectableInt(0x1000000,64)
+        addr = SelectableInt(0, 64)
+        pid = SelectableInt(0, 64)
+        ret = dut._prtable_lookup(prtbl, addr, pid)
+
+    def test_walk_tree_1(self):
+
+        # test address as in
+        # https://github.com/power-gem5/gem5/blob/gem5-experimental/src/arch/power/radix_walk_example.txt#L65
+        testaddr = 0x1000
+        expected = 0x1000
+
+        # set up dummy minimal ISACaller
+        spr = {'DSISR': SelectableInt(0, 64),
+               'DAR': SelectableInt(0, 64),
+               'PIDR': SelectableInt(0, 64),
+               'PRTBL': SelectableInt(0, 64)
+        }
+        # set problem state == 0 (other unit tests, set to 1)
+        msr = SelectableInt(0, 64)
+        msr[MSRb.PR] = 0
+        class ISACaller: pass
+        caller = ISACaller()
+        caller.spr = spr
+        caller.msr = msr
+
+        shift = SelectableInt(5, 6)
+        mask = genmask(shift, 43)
+        print ("    mask", bin(mask.value))
+
+        mem = Mem(row_bytes=8, initial_mem=testmem)
+        mem = RADIX(mem, caller)
+        # -----------------------------------------------
+        # |/|RTS1|/|     RPDB          | RTS2 |  RPDS   |
+        # -----------------------------------------------
+        # |0|1  2|3|4                55|56  58|59     63|
+        data = SelectableInt(0, 64)
+        data[1:3] = 0b01
+        data[56:59] = 0b11
+        data[59:64] = 0b01101 # mask
+        data[55] = 1
+        (rts, mbits, pgbase) = mem._decode_prte(data)
+        print ("    rts", bin(rts.value), rts.bits)
+        print ("    mbits", bin(mbits.value), mbits.bits)
+        print ("    pgbase", hex(pgbase.value), pgbase.bits)
+        addr = SelectableInt(0x1000, 64)
+        check = mem._segment_check(addr, mbits, shift)
+        print ("    segment check", check)
+
+        print("walking tree")
+        addr = SelectableInt(testaddr,64)
+        # pgbase = None
+        mode = None
+        #mbits = None
+        shift = rts
+        result = mem._walk_tree(addr, pgbase, mode, mbits, shift)
+        print("     walking tree result", result)
+        print("should be", testresult)
+        self.assertEqual(result.value, expected,
+                             "expected 0x%x got 0x%x" % (expected,
+                                                    result.value))
+
+
+    def test_walk_tree_2(self):
+
+        # test address slightly different
+        testaddr = 0x1101
+        expected = 0x5001101
 
-    def test_walk_tree(self):
         # set up dummy minimal ISACaller
         spr = {'DSISR': SelectableInt(0, 64),
                'DAR': SelectableInt(0, 64),
@@ -665,7 +788,7 @@ class TestRadixMMU(unittest.TestCase):
         mask = genmask(shift, 43)
         print ("    mask", bin(mask.value))
 
-        mem = Mem(row_bytes=8)
+        mem = Mem(row_bytes=8, initial_mem=testmem2)
         mem = RADIX(mem, caller)
         # -----------------------------------------------
         # |/|RTS1|/|     RPDB          | RTS2 |  RPDS   |
@@ -692,6 +815,10 @@ class TestRadixMMU(unittest.TestCase):
         shift = rts
         result = mem._walk_tree(addr, pgbase, mode, mbits, shift)
         print("     walking tree result", result)
+        print("should be", testresult)
+        self.assertEqual(result.value, expected,
+                             "expected 0x%x got 0x%x" % (expected,
+                                                    result.value))
 
 
 if __name__ == '__main__':
index abed03ecbd1cb29a34b8f2724791139bbfa4c661..c0d523d680352d1aa782d45e43ebbec7321f0e45 100644 (file)
@@ -27,7 +27,8 @@ from soc.decoder.power_decoder import create_pdecode
 from soc.decoder.power_enums import (MicrOp, CryIn, Function,
                                      CRInSel, CROutSel,
                                      LdstLen, In1Sel, In2Sel, In3Sel,
-                                     OutSel, SPR, RC, LDSTMode,
+                                     OutSel, SPRfull, SPRreduced,
+                                     RC, LDSTMode,
                                      SVEXTRA, SVEtype, SVPtype)
 from soc.decoder.decode2execute1 import (Decode2ToExecute1Type, Data,
                                          Decode2ToOperand)
@@ -65,13 +66,23 @@ class SPRMap(Elaboratable):
     """SPRMap: maps POWER9 SPR numbers to internal enum values, fast and slow
     """
 
-    def __init__(self):
+    def __init__(self, regreduce_en):
+        self.regreduce_en = regreduce_en
+        if regreduce_en:
+            SPR = SPRreduced
+        else:
+            SPR = SPRfull
+
         self.spr_i = Signal(10, reset_less=True)
         self.spr_o = Data(SPR, name="spr_o")
         self.fast_o = Data(3, name="fast_o")
 
     def elaborate(self, platform):
         m = Module()
+        if self.regreduce_en:
+            SPR = SPRreduced
+        else:
+            SPR = SPRfull
         with m.Switch(self.spr_i):
             for i, x in enumerate(SPR):
                 with m.Case(x.value):
@@ -90,7 +101,12 @@ class DecodeA(Elaboratable):
     decodes register RA, implicit and explicit CSRs
     """
 
-    def __init__(self, dec):
+    def __init__(self, dec, regreduce_en):
+        self.regreduce_en = regreduce_en
+        if self.regreduce_en:
+            SPR = SPRreduced
+        else:
+            SPR = SPRfull
         self.dec = dec
         self.sel_in = Signal(In1Sel, reset_less=True)
         self.insn_in = Signal(32, reset_less=True)
@@ -104,7 +120,7 @@ class DecodeA(Elaboratable):
         comb = m.d.comb
         op = self.dec.op
         reg = self.reg_out
-        m.submodules.sprmap = sprmap = SPRMap()
+        m.submodules.sprmap = sprmap = SPRMap(self.regreduce_en)
 
         # select Register A field, if *full 7 bits* are zero (2 more from SVP64)
         ra = Signal(5, reset_less=True)
@@ -325,7 +341,12 @@ class DecodeOut(Elaboratable):
     decodes output register RA, RT or SPR
     """
 
-    def __init__(self, dec):
+    def __init__(self, dec, regreduce_en):
+        self.regreduce_en = regreduce_en
+        if self.regreduce_en:
+            SPR = SPRreduced
+        else:
+            SPR = SPRfull
         self.dec = dec
         self.sel_in = Signal(OutSel, reset_less=True)
         self.insn_in = Signal(32, reset_less=True)
@@ -336,7 +357,7 @@ class DecodeOut(Elaboratable):
     def elaborate(self, platform):
         m = Module()
         comb = m.d.comb
-        m.submodules.sprmap = sprmap = SPRMap()
+        m.submodules.sprmap = sprmap = SPRMap(self.regreduce_en)
         op = self.dec.op
         reg = self.reg_out
 
@@ -684,9 +705,10 @@ class PowerDecodeSubset(Elaboratable):
     only fields actually requested are copied over. hence, "subset" (duh).
     """
     def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None,
-                            svp64_en=True):
+                            svp64_en=True, regreduce_en=False):
 
         self.svp64_en = svp64_en
+        self.regreduce_en = regreduce_en
         if svp64_en:
             self.sv_rm = SVP64Rec(name="dec_svp64") # SVP64 RM field
         self.sv_a_nz = Signal(1)
@@ -700,7 +722,8 @@ class PowerDecodeSubset(Elaboratable):
 
         # only needed for "main" PowerDecode2
         if not self.final:
-            self.e = Decode2ToExecute1Type(name=self.fn_name, do=self.do)
+            self.e = Decode2ToExecute1Type(name=self.fn_name, do=self.do,
+                                           regreduce_en=regreduce_en)
 
         # create decoder if one not already given
         if dec is None:
@@ -766,6 +789,10 @@ class PowerDecodeSubset(Elaboratable):
         return getattr(self.dec.op, op_field, None)
 
     def elaborate(self, platform):
+        if self.regreduce_en:
+            SPR = SPRreduced
+        else:
+            SPR = SPRfull
         m = Module()
         comb = m.d.comb
         state = self.state
@@ -778,7 +805,8 @@ class PowerDecodeSubset(Elaboratable):
                 name = "tmp"
             else:
                 name = self.fn_name + "tmp"
-            self.e_tmp = Decode2ToExecute1Type(name=name, opkls=self.opkls)
+            self.e_tmp = Decode2ToExecute1Type(name=name, opkls=self.opkls,
+                                           regreduce_en=self.regreduce_en)
 
         # set up submodule decoders
         m.submodules.dec = self.dec
@@ -909,8 +937,9 @@ class PowerDecode2(PowerDecodeSubset):
     """
 
     def __init__(self, dec, opkls=None, fn_name=None, final=False,
-                            state=None, svp64_en=True):
-        super().__init__(dec, opkls, fn_name, final, state, svp64_en)
+                            state=None, svp64_en=True, regreduce_en=False):
+        super().__init__(dec, opkls, fn_name, final, state, svp64_en,
+                         regreduce_en=False)
         self.exc = LDSTException("dec2_exc")
 
         if self.svp64_en:
@@ -968,10 +997,10 @@ class PowerDecode2(PowerDecodeSubset):
         # copy over if non-exception, non-privileged etc. is detected
 
         # set up submodule decoders
-        m.submodules.dec_a = dec_a = DecodeA(self.dec)
+        m.submodules.dec_a = dec_a = DecodeA(self.dec, self.regreduce_en)
         m.submodules.dec_b = dec_b = DecodeB(self.dec)
         m.submodules.dec_c = dec_c = DecodeC(self.dec)
-        m.submodules.dec_o = dec_o = DecodeOut(self.dec)
+        m.submodules.dec_o = dec_o = DecodeOut(self.dec, self.regreduce_en)
         m.submodules.dec_o2 = dec_o2 = DecodeOut2(self.dec)
         m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec)
         m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec)
index faa8cfc1c9ee827b7cd4480e1f29d8a5bde612c1..3353d202526ee2a885fd645282ab5b94c7320ef5 100644 (file)
@@ -8,6 +8,10 @@ Note: for SV, from v3.1B p12:
 
     The designated SPR sandbox consists of non-privileged SPRs 704-719 and
     privileged SPRs 720-735.
+
+Note: the option exists to select a much shorter list of SPRs, to reduce
+regfile size in HDL.  this is SPRreduced and the supported list is in
+get_spr_enum
 """
 
 from enum import Enum, unique
@@ -430,19 +434,35 @@ class CROutSel(Enum):
 # http://libre-riscv.org/openpower/isatables/sprs.csv
 # http://bugs.libre-riscv.org/show_bug.cgi?id=261
 
-spr_csv = get_csv("sprs.csv")
-spr_info = namedtuple('spr_info', 'SPR priv_mtspr priv_mfspr length idx')
-spr_dict = {}
-spr_byname = {}
-for row in spr_csv:
-    info = spr_info(SPR=row['SPR'], priv_mtspr=row['priv_mtspr'],
-                    priv_mfspr=row['priv_mfspr'], length=int(row['len']),
-                    idx=int(row['Idx']))
-    spr_dict[int(row['Idx'])] = info
-    spr_byname[row['SPR']] = info
-fields = [(row['SPR'], int(row['Idx'])) for row in spr_csv]
-SPR = Enum('SPR', fields)
-
+def get_spr_enum(full_file):
+    """get_spr_enum - creates an Enum of SPRs, dynamically
+    has the option to reduce the enum to a much shorter list.
+    this saves drastically on the size of the regfile
+    """
+    short_list = {'PIDR', 'DAR', 'PRTBL', 'DSISR', 'SVSRR0', 'SVSTATE',
+                  'SPRG0_priv', 'SPRG1_priv', 'SPRG2_priv', 'SPRG3_priv',
+                  'SPRG3'
+                 }
+    spr_csv = []
+    for row in get_csv("sprs.csv"):
+        if full_file or row['SPR'] in short_list:
+            spr_csv.append(row)
+
+    spr_info = namedtuple('spr_info', 'SPR priv_mtspr priv_mfspr length idx')
+    spr_dict = {}
+    spr_byname = {}
+    for row in spr_csv:
+        info = spr_info(SPR=row['SPR'], priv_mtspr=row['priv_mtspr'],
+                        priv_mfspr=row['priv_mfspr'], length=int(row['len']),
+                        idx=int(row['Idx']))
+        spr_dict[int(row['Idx'])] = info
+        spr_byname[row['SPR']] = info
+    fields = [(row['SPR'], int(row['Idx'])) for row in spr_csv]
+    SPR = Enum('SPR', fields)
+    return SPR, spr_dict, spr_byname
+
+SPRfull, spr_dict, spr_byname = get_spr_enum(full_file=True)
+SPRreduced, _, _ = get_spr_enum(full_file=False)
 
 XER_bits = {
     'SO': 32,
@@ -454,11 +474,13 @@ XER_bits = {
 
 if __name__ == '__main__':
     # find out what the heck is in SPR enum :)
-    print("sprs", len(SPR))
-    print(dir(SPR))
+    print("sprs full", len(SPRfull))
+    print(dir(SPRfull))
+    print("sprs reduced", len(SPRreduced))
+    print(dir(SPRreduced))
     print(dir(Enum))
-    print(SPR.__members__['TAR'])
-    for x in SPR:
+    print(SPRfull.__members__['TAR'])
+    for x in SPRfull:
         print(x, x.value, str(x), x.name)
 
     print("function", Function.ALU.name)
index a274dccb51491ddcaf7284fef7b7b1272b33622f..77ff775ebd815c2ac121318109d64cfd02b4ad7a 100644 (file)
@@ -131,15 +131,16 @@ class PyISAWriter(ISA):
 
 if __name__ == '__main__':
     isa = PyISAWriter()
+    write_isa_class = True
     if len(sys.argv) == 1:  # quick way to do it
         print(dir(isa))
         sources = isa.page.keys()
     else:
         sources = sys.argv[1:]
-    write_isa_class = True
-    if sources[0] == "noall": # don't rewrite all.py
-        write_isa_class = False
-        sources.pop(0)
+        if sources[0] == "noall": # don't rewrite all.py
+            write_isa_class = False
+            sources.pop(0)
+    print ("sources", write_isa_class, sources)
     for source in sources:
         isa.write_pysource(source)
         isa.patch_if_needed(source)
index d734122413fe82431c7ddf8fc3db4c871043ce62..18da37aeb7177c7d0b9fbca8a4f3ffc012ca7214 100644 (file)
@@ -228,7 +228,7 @@ class SVP64ALUTestCase(TestAccumulatorBase):
         # expected results:
         # r5 = 0x0                   dest r3 is 0b10: skip
         # r6 = 0xffff_ffff_ffff_ff91 2nd bit of r3 is 1
-        isa = SVP64Asm(['sv.extsb/sm=~r3/m=r3 5.v, 9.v'])
+        isa = SVP64Asm(['sv.extsb/sm=~r3/dm=r3 5.v, 9.v'])
         lst = list(isa)
         print("listing", lst)
 
@@ -246,7 +246,6 @@ class SVP64ALUTestCase(TestAccumulatorBase):
         self.add_case(Program(lst, bigendian), initial_regs,
                       initial_svstate=svstate)
 
-    @skip_case("Predication not implemented yet")
     def case_10_intpred_vcompress(self):
         #   reg num        0 1 2 3 4 5 6 7 8 9 10 11
         #   src r3=0b101                     Y  N  Y
@@ -293,7 +292,7 @@ class SVP64ALUTestCase(TestAccumulatorBase):
         # r5 = 0xffff_ffff_ffff_ff90 1st bit of r3 is 1
         # r6 = 0x0                   skip
         # r7 = 0xffff_ffff_ffff_ff91 3nd bit of r3 is 1
-        isa = SVP64Asm(['sv.extsb/m=r3 5.v, 9.v'])
+        isa = SVP64Asm(['sv.extsb/dm=r3 5.v, 9.v'])
         lst = list(isa)
         print("listing", lst)
 
@@ -311,3 +310,35 @@ class SVP64ALUTestCase(TestAccumulatorBase):
 
         self.add_case(Program(lst, bigendian), initial_regs,
                       initial_svstate=svstate)
+
+    @skip_case("Predication not implemented yet")
+    def case_12_sv_twinpred(self):
+        #   reg num        0 1 2 3 4 5 6 7 8 9 10 11
+        #   src r3=0b101                     Y  N  Y
+        #                                    |
+        #                              +-----+
+        #                              |
+        #   dest ~r3=0b010           N Y N
+
+        # expected results:
+        # r5 = 0x0                   dest ~r3 is 0b010: skip
+        # r6 = 0xffff_ffff_ffff_ff90 2nd bit of ~r3 is 1
+        # r7 = 0x0                   dest ~r3 is 0b010: skip
+        isa = SVP64Asm(['sv.extsb/sm=r3/dm=~r3 5.v, 9.v'])
+        lst = list(isa)
+        print("listing", lst)
+
+        # initial values in GPR regfile
+        initial_regs = [0] * 32
+        initial_regs[3] = 0b101  # predicate mask
+        initial_regs[9] = 0x90   # source r3 is 0b101 so this will be used
+        initial_regs[10] = 0x91  # this gets skipped
+        initial_regs[11] = 0x92  # VL loop runs out before we can use it
+        # SVSTATE (in this case, VL=3)
+        svstate = SVP64State()
+        svstate.vl[0:7] = 3  # VL
+        svstate.maxvl[0:7] = 3  # MAXVL
+        print("SVSTATE", bin(svstate.spr.asint()))
+
+        self.add_case(Program(lst, bigendian), initial_regs,
+                      initial_svstate=svstate)
index bd0c36eec53c25ac4460f49d23cddb66c8529ff0..76cff311ecae9a880ee24d2cc412c610a4650ca6 100644 (file)
@@ -10,7 +10,7 @@ from soc.experiment.dcache import DCache
 from soc.decoder.power_fields import DecodeFields
 from soc.decoder.power_fieldsn import SignalBitRange
 from soc.decoder.power_decoder2 import decode_spr_num
-from soc.decoder.power_enums import MicrOp, SPR, XER_bits
+from soc.decoder.power_enums import MicrOp, XER_bits
 
 from soc.experiment.pimem import PortInterface
 from soc.experiment.pimem import PortInterfaceBase
index 1a1d5d4ed92afb93c96e5ddcd9c4ff4dad84f51f..cca0c24e3c94ac9e18071310701198d31449fa63 100644 (file)
@@ -7,7 +7,7 @@
 from nmigen import (Module, Signal, Cat)
 from nmutil.pipemodbase import PipeModBase
 from soc.fu.spr.pipe_data import SPRInputData, SPROutputData
-from soc.decoder.power_enums import MicrOp, SPR, XER_bits
+from soc.decoder.power_enums import MicrOp, SPRfull, SPRreduced, XER_bits
 
 from soc.decoder.power_fields import DecodeFields
 from soc.decoder.power_fieldsn import SignalBitRange
@@ -17,6 +17,10 @@ from soc.decoder.power_decoder2 import decode_spr_num
 class SPRMainStage(PipeModBase):
     def __init__(self, pspec):
         super().__init__(pspec, "spr_main")
+        # test if regfiles are reduced
+        self.regreduce_en = (hasattr(pspec, "regreduce") and
+                                            (pspec.regreduce == True))
+
         self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
         self.fields.create_specs()
 
@@ -27,6 +31,10 @@ class SPRMainStage(PipeModBase):
         return SPROutputData(self.pspec)
 
     def elaborate(self, platform):
+        if self.regreduce_en:
+            SPR = SPRreduced
+        else:
+            SPR = SPRfull
         m = Module()
         comb = m.d.comb
         op = self.i.ctx.op
index cba78e3d60e6f67ce99adbdcbf6a09f9728dc849..47083f3531935d83fd1dfe98faf465cad8804cff 160000 (submodule)
@@ -1 +1 @@
-Subproject commit cba78e3d60e6f67ce99adbdcbf6a09f9728dc849
+Subproject commit 47083f3531935d83fd1dfe98faf465cad8804cff
index 2512d3ae82d37f9e7e745ea62d7064992b54167a..167ae5118a37d838227cace3b53da9b15f2e41fb 100644 (file)
@@ -26,7 +26,7 @@ Links:
 
 from soc.regfile.regfile import RegFile, RegFileArray, RegFileMem
 from soc.regfile.virtual_port import VirtualRegPort
-from soc.decoder.power_enums import SPR
+from soc.decoder.power_enums import SPRfull, SPRreduced
 
 
 # "State" Regfile
@@ -48,7 +48,7 @@ class StateRegs(RegFileArray):
     PC = 0
     MSR = 1
     SVSTATE = 2
-    def __init__(self):
+    def __init__(self, svp64_en=False, regreduce_en=False):
         super().__init__(64, 3)
         self.w_ports = {'nia': self.write_port("nia"),
                         'msr': self.write_port("msr"),
@@ -69,16 +69,21 @@ class IntRegs(RegFileMem): #class IntRegs(RegFileArray):
     * Array-based unary-indexed (not binary-indexed)
     * write-through capability (read on same cycle as write)
     """
-    def __init__(self):
+    def __init__(self, svp64_en=False, regreduce_en=False):
         super().__init__(64, 32)
         self.w_ports = {'o': self.write_port("dest1"),
-                        #'o1': self.write_port("dest2") # for now (LD/ST update)
                         }
-        self.r_ports = {'ra': self.read_port("src1"),
-                        'rb': self.read_port("src2"),
-                        'rc': self.read_port("src3"),
-                        'pred': self.read_port("pred"), # for predicate mask
+        self.r_ports = {
                         'dmi': self.read_port("dmi")} # needed for Debug (DMI)
+        if svp64_en:
+            self.r_ports['pred'] = self.read_port("pred") # for predicate mask
+        if not regreduce_en:
+            self.w_ports['o1'] = self.write_port("dest2") # (LD/ST update)
+            self.r_ports['ra'] = self.read_port("src1")
+            self.r_ports['rb'] = self.read_port("src2")
+            self.r_ports['rc'] = self.read_port("src3")
+        else:
+            self.r_ports['rabc'] = self.read_port("src1")
 
 
 # Fast SPRs Regfile
@@ -103,15 +108,16 @@ class FastRegs(RegFileMem): #RegFileArray):
     DEC = 6
     TB = 7
     N_REGS = 8 # maximum number of regs
-    def __init__(self):
+    def __init__(self, svp64_en=False, regreduce_en=False):
         super().__init__(64, self.N_REGS)
         self.w_ports = {'fast1': self.write_port("dest1"),
                         'issue': self.write_port("issue"), # writing DEC/TB
                        }
         self.r_ports = {'fast1': self.read_port("src1"),
-                        'fast2': self.read_port("src2"),
                         'issue': self.read_port("issue"), # reading DEC/TB
                         }
+        if not regreduce_en:
+            self.r_ports['fast2'] = self.read_port("src2")
 
 
 # CR Regfile
@@ -123,17 +129,18 @@ class CRRegs(VirtualRegPort):
     * Array-based unary-indexed (not binary-indexed)
     * write-through capability (read on same cycle as write)
     """
-    def __init__(self):
+    def __init__(self, svp64_en=False, regreduce_en=False):
         super().__init__(32, 8, rd2=True)
         self.w_ports = {'full_cr': self.full_wr, # 32-bit (masked, 8-en lines)
                         'cr_a': self.write_port("dest1"), # 4-bit, unary-indexed
                         'cr_b': self.write_port("dest2")} # 4-bit, unary-indexed
         self.r_ports = {'full_cr': self.full_rd, # 32-bit (masked, 8-en lines)
                         'full_cr_dbg': self.full_rd2, # for DMI
-                        'cr_pred': self.read_port("cr_pred"), # for predicate
                         'cr_a': self.read_port("src1"),
                         'cr_b': self.read_port("src2"),
                         'cr_c': self.read_port("src3")}
+        if svp64_en:
+            self.r_ports['cr_pred'] = self.read_port("cr_pred") # for predicate
 
 
 # XER Regfile
@@ -148,7 +155,7 @@ class XERRegs(VirtualRegPort):
     SO=0 # this is actually 2-bit but we ignore 1 bit of it
     CA=1 # CA and CA32
     OV=2 # OV and OV32
-    def __init__(self):
+    def __init__(self, svp64_en=False, regreduce_en=False):
         super().__init__(6, 3)
         self.w_ports = {'full_xer': self.full_wr, # 6-bit (masked, 3-en lines)
                         'xer_so': self.write_port("dest1"),
@@ -169,8 +176,11 @@ class SPRRegs(RegFileMem):
     * binary-indexed but REQUIRES MAPPING
     * write-through capability (read on same cycle as write)
     """
-    def __init__(self):
-        n_sprs = len(SPR)
+    def __init__(self, svp64_en=False, regreduce_en=False):
+        if regreduce_en:
+            n_sprs = len(SPRreduced)
+        else:
+            n_sprs = len(SPRfull)
         super().__init__(width=64, depth=n_sprs)
         self.w_ports = {'spr1': self.write_port("spr1")}
         self.r_ports = {'spr1': self.read_port("spr1")}
@@ -178,7 +188,14 @@ class SPRRegs(RegFileMem):
 
 # class containing all regfiles: int, cr, xer, fast, spr
 class RegFiles:
-    def __init__(self):
+    def __init__(self, pspec):
+        # test is SVP64 is to be enabled
+        svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
+
+        # and regfile port reduction
+        regreduce_en = hasattr(pspec, "regreduce") and \
+                      (pspec.regreduce == True)
+
         self.rf = {}
         # create regfiles here, Factory style
         for (name, kls) in [('int', IntRegs),
@@ -187,7 +204,7 @@ class RegFiles:
                             ('fast', FastRegs),
                             ('state', StateRegs),
                             ('spr', SPRRegs),]:
-            rf = self.rf[name] = kls()
+            rf = self.rf[name] = kls(svp64_en, regreduce_en)
             # also add these as instances, self.state, self.fast, self.cr etc.
             setattr(self, name, rf)
 
index 5ccc4c54c607d1c6cc18f4d0fbd672353d8646d7..e5f095dc9bff939896e5330cf6cbe02ea9dd7023 100644 (file)
@@ -1,6 +1,8 @@
 from soc.regfile.regfiles import FastRegs
-from soc.decoder.power_enums import SPR, spr_dict
+from soc.decoder.power_enums import SPRfull as SPR, spr_dict
 
+# note that we can get away with using SPRfull here because the values
+# (numerical values) are what is used for lookup.
 spr_to_fast = { SPR.CTR: FastRegs.CTR,
                 SPR.LR: FastRegs.LR,
                 SPR.TAR: FastRegs.TAR,
index 23e12f373d080d401757802ff20a264842d4dcf5..91e639a57e5c7336dd79eb55277502bdd58af141 100644 (file)
@@ -71,6 +71,13 @@ class NonProductionCore(Elaboratable):
     def __init__(self, pspec):
         self.pspec = pspec
 
+        # test is SVP64 is to be enabled
+        self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
+
+        # test to see if regfile ports should be reduced
+        self.regreduce_en = (hasattr(pspec, "regreduce") and
+                             (pspec.regreduce == True))
+
         # single LD/ST funnel for memory access
         self.l0 = TstL0CacheBuffer(pspec, n_units=1)
         pi = self.l0.l0.dports[0]
@@ -80,10 +87,11 @@ class NonProductionCore(Elaboratable):
         self.fus = AllFunctionUnits(pspec, pilist=[pi])
 
         # register files (yes plural)
-        self.regs = RegFiles()
+        self.regs = RegFiles(pspec)
 
         # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
-        self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
+        self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand,
+                                regreduce_en=self.regreduce_en)
 
         # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
         self.sv_a_nz = Signal()
@@ -115,7 +123,9 @@ class NonProductionCore(Elaboratable):
                 continue
             self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
                                                       final=True,
-                                                      state=self.state)
+                                                      state=self.state,
+                                            svp64_en=self.svp64_en,
+                                            regreduce_en=self.regreduce_en)
             self.des[funame] = self.decoders[funame].do
 
         if "mmu0" in self.decoders:
@@ -339,14 +349,15 @@ class NonProductionCore(Elaboratable):
 
             # argh.  an experiment to merge RA and RB in the INT regfile
             # (we have too many read/write ports)
-            #if regfile == 'INT':
-                #fuspecs['rabc'] = [fuspecs.pop('rb')]
-                #fuspecs['rabc'].append(fuspecs.pop('rc'))
-                #fuspecs['rabc'].append(fuspecs.pop('ra'))
-            #if regfile == 'FAST':
-            #    fuspecs['fast1'] = [fuspecs.pop('fast1')]
-            #    if 'fast2' in fuspecs:
-            #        fuspecs['fast1'].append(fuspecs.pop('fast2'))
+            if self.regreduce_en:
+                if regfile == 'INT':
+                    fuspecs['rabc'] = [fuspecs.pop('rb')]
+                    fuspecs['rabc'].append(fuspecs.pop('rc'))
+                    fuspecs['rabc'].append(fuspecs.pop('ra'))
+                if regfile == 'FAST':
+                    fuspecs['fast1'] = [fuspecs.pop('fast1')]
+                    if 'fast2' in fuspecs:
+                        fuspecs['fast1'].append(fuspecs.pop('fast2'))
 
             # for each named regfile port, connect up all FUs to that port
             for (regname, fspec) in sort_fuspecs(fuspecs):
@@ -464,14 +475,15 @@ class NonProductionCore(Elaboratable):
             fuspecs = byregfiles_wrspec[regfile]
             wrpickers[regfile] = {}
 
-            # argh, more port-merging
-            if regfile == 'INT':
-                fuspecs['o'] = [fuspecs.pop('o')]
-                fuspecs['o'].append(fuspecs.pop('o1'))
-            if regfile == 'FAST':
-                fuspecs['fast1'] = [fuspecs.pop('fast1')]
-                if 'fast2' in fuspecs:
-                    fuspecs['fast1'].append(fuspecs.pop('fast2'))
+            if self.regreduce_en:
+                # argh, more port-merging
+                if regfile == 'INT':
+                    fuspecs['o'] = [fuspecs.pop('o')]
+                    fuspecs['o'].append(fuspecs.pop('o1'))
+                if regfile == 'FAST':
+                    fuspecs['fast1'] = [fuspecs.pop('fast1')]
+                    if 'fast2' in fuspecs:
+                        fuspecs['fast1'].append(fuspecs.pop('fast2'))
 
             for (regname, fspec) in sort_fuspecs(fuspecs):
                 self.connect_wrport(m, fu_bitdict, wrpickers,
index aaed35085c3f407976ce3e1cb36d2e0c05173cd7..e2f66a50fa1142bad706ea8a1210dcf1cbf89b05 100644 (file)
@@ -16,11 +16,13 @@ improved.
 """
 
 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
-                    ClockDomain, DomainRenamer, Mux, Const, Repl)
+                    ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
 from nmigen.cli import rtlil
 from nmigen.cli import main
 import sys
 
+from nmigen.lib.coding import PriorityEncoder
+
 from soc.decoder.power_decoder import create_pdecode
 from soc.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
@@ -158,6 +160,10 @@ class TestIssuerInternal(Elaboratable):
         # test is SVP64 is to be enabled
         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 
+        # and if regfiles are reduced
+        self.regreduce_en = (hasattr(pspec, "regreduce") and
+                                            (pspec.regreduce == True))
+
         # JTAG interface.  add this right at the start because if it's
         # added it *modifies* the pspec, by adding enable/disable signals
         # for parts of the rest of the core
@@ -207,7 +213,8 @@ class TestIssuerInternal(Elaboratable):
         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
         self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
                                      opkls=IssuerDecode2ToOperand,
-                                     svp64_en=self.svp64_en)
+                                     svp64_en=self.svp64_en,
+                                     regreduce_en=self.regreduce_en)
         if self.svp64_en:
             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 
@@ -241,9 +248,10 @@ class TestIssuerInternal(Elaboratable):
         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 
-        # for predication
-        self.int_pred = intrf.r_ports['pred'] # INT predicate read
-        self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
+        if self.svp64_en:
+            # for predication
+            self.int_pred = intrf.r_ports['pred'] # INT predicate read
+            self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 
         # hack method of keeping an eye on whether branch/trap set the PC
         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
@@ -384,6 +392,8 @@ class TestIssuerInternal(Elaboratable):
         later, a faster way would be to use the 32-bit-wide CR port but
         this is more complex decoding, here.  equivalent code used in
         ISACaller is "from soc.decoder.isa.caller import get_predcr"
+
+        note: this ENTIRE FSM is not to be called when svp64 is disabled
         """
         comb = m.d.comb
         sync = m.d.sync
@@ -533,17 +543,10 @@ class TestIssuerInternal(Elaboratable):
                         comb += update_svstate.eq(1)
                         sync += sv_changed.eq(1)
 
-            # decode the instruction when it arrives
+            # wait for an instruction to arrive from Fetch
             with m.State("INSN_WAIT"):
                 comb += fetch_insn_ready_i.eq(1)
                 with m.If(fetch_insn_valid_o):
-                    # decode the instruction
-                    sync += core.e.eq(pdecode2.e)
-                    sync += core.state.eq(cur_state)
-                    sync += core.raw_insn_i.eq(dec_opcode_i)
-                    sync += core.bigendian_i.eq(self.core_bigendian_i)
-                    # set RA_OR_ZERO detection in satellite decoders
-                    sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
                     # loop into ISSUE_START if it's a SVP64 instruction
                     # and VL == 0.  this because VL==0 is a for-loop
                     # from 0 to 0 i.e. always, always a NOP.
@@ -560,7 +563,7 @@ class TestIssuerInternal(Elaboratable):
                         if self.svp64_en:
                             m.next = "PRED_START"  # start fetching predicate
                         else:
-                            m.next = "INSN_EXECUTE" # skip predication
+                            m.next = "DECODE_SV"  # skip predication
 
             with m.State("PRED_START"):
                 comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
@@ -570,42 +573,79 @@ class TestIssuerInternal(Elaboratable):
             with m.State("MASK_WAIT"):
                 comb += pred_mask_ready_i.eq(1) # ready to receive the masks
                 with m.If(pred_mask_valid_o): # predication masks are ready
-                    m.next = "INSN_EXECUTE"
-
-            # handshake with execution FSM, move to "wait" once acknowledged
-            with m.State("INSN_EXECUTE"):
-                # with m.If(is_svp64_mode):
-                #    TODO advance src/dst step to "skip" over predicated-out
-                #    from self.srcmask and self.dstmask
-                #    https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
-                #    but still without exceeding VL in either case
-                # IMPORTANT: when changing src/dest step, have to
-                # jump to m.next = "DECODE_SV" to deal with the change in
-                # SVSTATE
-
-                with m.If(is_svp64_mode):
+                    m.next = "PRED_SKIP"
 
+            # skip zeros in predicate
+            with m.State("PRED_SKIP"):
+                with m.If(~is_svp64_mode):
+                    m.next = "DECODE_SV"  # nothing to do
+                with m.Else():
                     if self.svp64_en:
                         pred_src_zero = pdecode2.rm_dec.pred_sz
                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 
-                    """
-                    if not pred_src_zero:
-                        if (((1<<cur_srcstep) & self.srcmask) == 0) and
-                              (cur_srcstep != vl):
+                        # new srcstep, after skipping zeros
+                        skip_srcstep = Signal.like(cur_srcstep)
+                        # value to be added to the current srcstep
+                        src_delta = Signal.like(cur_srcstep)
+                        # add leading zeros to srcstep, if not in zero mode
+                        with m.If(~pred_src_zero):
+                            # priority encoder (count leading zeros)
+                            # append guard bit, in case the mask is all zeros
+                            pri_enc_src = PriorityEncoder(65)
+                            m.submodules.pri_enc_src = pri_enc_src
+                            comb += pri_enc_src.i.eq(Cat(self.srcmask, 1))
+                            comb += src_delta.eq(pri_enc_src.o)
+                        # apply delta to srcstep
+                        comb += skip_srcstep.eq(cur_srcstep + src_delta)
+                        # shift-out all leading zeros from the mask
+                        # plus the leading "one" bit
+                        sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
+
+                        # same as above, but for dststep
+                        skip_dststep = Signal.like(cur_dststep)
+                        dst_delta = Signal.like(cur_dststep)
+                        with m.If(~pred_dst_zero):
+                            pri_enc_dst = PriorityEncoder(65)
+                            m.submodules.pri_enc_dst = pri_enc_dst
+                            comb += pri_enc_dst.i.eq(Cat(self.dstmask, 1))
+                            comb += dst_delta.eq(pri_enc_dst.o)
+                        comb += skip_dststep.eq(cur_dststep + dst_delta)
+                        sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
+
+                        # TODO: initialize mask[VL]=1 to avoid passing past VL
+                        with m.If((skip_srcstep >= cur_vl) |
+                                  (skip_dststep >= cur_vl)):
+                            # end of VL loop. Update PC and reset src/dst step
+                            comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
+                            comb += self.state_w_pc.data_i.eq(nia)
+                            comb += new_svstate.srcstep.eq(0)
+                            comb += new_svstate.dststep.eq(0)
                             comb += update_svstate.eq(1)
-                            comb += new_svstate.srcstep.eq(next_srcstep)
-
-                    if not pred_dst_zero:
-                        if (((1<<cur_dststep) & self.dstmask) == 0) and
-                              (cur_dststep != vl):
-                            comb += new_svstate.dststep.eq(next_dststep)
+                            # go back to Issue
+                            m.next = "ISSUE_START"
+                        with m.Else():
+                            # update new src/dst step
+                            comb += new_svstate.srcstep.eq(skip_srcstep)
+                            comb += new_svstate.dststep.eq(skip_dststep)
                             comb += update_svstate.eq(1)
+                            # proceed to Decode
+                            m.next = "DECODE_SV"
 
-                    if update_svstate:
-                        m.next = "DECODE_SV"
-                    """
+            # after src/dst step have been updated, we are ready
+            # to decode the instruction
+            with m.State("DECODE_SV"):
+                # decode the instruction
+                sync += core.e.eq(pdecode2.e)
+                sync += core.state.eq(cur_state)
+                sync += core.raw_insn_i.eq(dec_opcode_i)
+                sync += core.bigendian_i.eq(self.core_bigendian_i)
+                # set RA_OR_ZERO detection in satellite decoders
+                sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
+                m.next = "INSN_EXECUTE"  # move to "execute"
 
+            # handshake with execution FSM, move to "wait" once acknowledged
+            with m.State("INSN_EXECUTE"):
                 comb += exec_insn_valid_i.eq(1) # trigger execute
                 with m.If(exec_insn_ready_o):   # execute acknowledged us
                     m.next = "EXECUTE_WAIT"
@@ -651,7 +691,8 @@ class TestIssuerInternal(Elaboratable):
                             comb += new_svstate.srcstep.eq(next_srcstep)
                             comb += new_svstate.dststep.eq(next_dststep)
                             comb += update_svstate.eq(1)
-                            m.next = "DECODE_SV"
+                            # return to mask skip loop
+                            m.next = "PRED_SKIP"
 
                 with m.Else():
                     comb += core.core_stopped_i.eq(1)
@@ -666,17 +707,6 @@ class TestIssuerInternal(Elaboratable):
                         comb += update_svstate.eq(1)
                         sync += sv_changed.eq(1)
 
-            # need to decode the instruction again, after updating SRCSTEP
-            # in the previous state.
-            # mostly a copy of INSN_WAIT, but without the actual wait
-            with m.State("DECODE_SV"):
-                # decode the instruction
-                sync += core.e.eq(pdecode2.e)
-                sync += core.state.eq(cur_state)
-                sync += core.bigendian_i.eq(self.core_bigendian_i)
-                sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
-                m.next = "INSN_EXECUTE"  # move to "execute"
-
         # check if svstate needs updating: if so, write it to State Regfile
         with m.If(update_svstate):
             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
index a00ec4b1e69d42ac87068f855ab672a4a15dc171..446b7f419731773df6c6a754174773213cd56613 100644 (file)
@@ -18,6 +18,14 @@ if __name__ == '__main__':
     parser.add_argument("--disable-xics", dest='xics', action="store_false",
                         help="Disable interrupts",
                         default=False)
+    parser.add_argument("--enable-lessports", dest='lessports',
+                        action="store_true",
+                        help="Enable less regfile ports",
+                        default=True)
+    parser.add_argument("--disable-lessports", dest='lessports',
+                        action="store_false",
+                        help="enable more regfile ports",
+                        default=False)
     parser.add_argument("--enable-core", dest='core', action="store_true",
                         help="Enable main core",
                         default=True)
@@ -70,6 +78,7 @@ if __name__ == '__main__':
                          #wb_data_wid=32,
                          xics=args.xics, # XICS interrupt controller
                          nocore=not args.core, # test coriolis2 ioring
+                         regreduce = args.lessports, # less regfile ports
                          use_pll=args.pll,  # bypass PLL
                          gpio=args.enable_testgpio, # for test purposes
                          sram4x4kblock=args.enable_sram4x4kblock, # add SRAMs
@@ -78,6 +87,7 @@ if __name__ == '__main__':
                          units=units)
 
     print("nocore", pspec.__dict__["nocore"])
+    print("regreduce", pspec.__dict__["regreduce"])
     print("gpio", pspec.__dict__["gpio"])
     print("sram4x4kblock", pspec.__dict__["sram4x4kblock"])
     print("xics", pspec.__dict__["xics"])
index 0bf1ee42d019ef594df0333e025a2bf7b0371030..a7387e856da12b508e06f8edc7a8994ca66b4a0a 100644 (file)
@@ -14,7 +14,9 @@ from soc.decoder.power_decoder import create_pdecode
 from soc.decoder.power_decoder2 import PowerDecode2
 from soc.decoder.selectable_int import SelectableInt
 from soc.decoder.isa.all import ISA
-from soc.decoder.power_enums import SPR, spr_dict, Function, XER_bits
+
+# note that for testing using SPRfull should be ok here
+from soc.decoder.power_enums import SPRfull as SPR, spr_dict, Function, XER_bits
 from soc.config.test.test_loadstore import TestMemPspec
 from soc.config.endian import bigendian
 
index 75c95742204574d9895ae6a78878eb7aa1c21556..b6be2f3a20581a2482e9beb80490153bdd97230c 100644 (file)
@@ -145,6 +145,7 @@ class TestRunner(FHDLTestCase):
                              nocore=False,
                              xics=False,
                              gpio=False,
+                             regreduce=True,
                              svp64=self.svp64,
                              mmu=self.microwatt_mmu,
                              reg_wid=64)
@@ -154,10 +155,11 @@ class TestRunner(FHDLTestCase):
         dmi = issuer.dbg.dmi
         pdecode2 = issuer.pdecode2
         l0 = core.l0
+        regreduce_en = pspec.regreduce_en == True
 
         # copy of the decoder for simulator
         simdec = create_pdecode()
-        simdec2 = PowerDecode2(simdec)
+        simdec2 = PowerDecode2(simdec, regreduce_en=regreduce_en)
         m.submodules.simdec2 = simdec2  # pain in the neck
 
         # run core clock at same rate as test clock
@@ -393,6 +395,7 @@ class TestRunner(FHDLTestCase):
             {'comment': 'instruction memory'},
             'imem.sram.rdport.memory(0)[63:0]',
             {'comment': 'registers'},
+            # match with soc.regfile.regfiles.IntRegs port names
             'core.int.rp_src1.memory(0)[63:0]',
             'core.int.rp_src1.memory(1)[63:0]',
             'core.int.rp_src1.memory(2)[63:0]',
index 1fe38e73880e734134bea00fd241ffb2e1a5b836..4d586fa7808fd7fb8f23ce9551742feed3eb8213 100644 (file)
@@ -6,7 +6,7 @@ from soc.decoder.power_decoder import (create_pdecode)
 from soc.decoder.power_enums import (Function, MicrOp,
                                      In1Sel, In2Sel, In3Sel,
                                      OutSel, RC, LdstLen, CryIn,
-                                     single_bit_flags, Form, SPR,
+                                     single_bit_flags, Form,
                                      get_signal_name, get_csv)
 from soc.decoder.power_decoder2 import (PowerDecode2)
 from soc.simulator.program import Program
index bf507a91f0c7aba49851c0a303379872e7162fa9..3ef1f37d6839666be1d46dc95cc6339495a4eda7 100644 (file)
@@ -402,6 +402,7 @@ class SVP64Asm:
             smask = 0 # bits 16-18 but only for twin-predication
             mode = 0 # bits 19-23
 
+            mask_m_specified = False
             has_pmask = False
             has_smask = False
 
@@ -421,6 +422,7 @@ class SVP64Asm:
             for encmode in opmodes:
                 # predicate mask (src and dest)
                 if encmode.startswith("m="):
+                    mask_m_specified = True
                     pme = encmode
                     pmmode, pmask = decode_predicate(encmode[2:])
                     smmode, smask = pmmode, pmask
@@ -490,7 +492,7 @@ class SVP64Asm:
                         (pme, sme)
 
             # sanity-check that twin-predication mask only specified in 2P mode
-            if ptype == '1P':
+            if not mask_m_specified and ptype == '1P':
                 assert has_smask == False, \
                     "source-mask can only be specified on Twin-predicate ops"
 
@@ -637,6 +639,7 @@ if __name__ == '__main__':
                  'sv.extsw./satu/sz/dz/sm=r3/m=r3 5, 31',
                  'sv.extsw./pr=eq 5.v, 31',
                  'sv.add. 5.v, 2.v, 1.v',
+                 'sv.add./m=r3 5.v, 2.v, 1.v',
                 ]
     lst += [
                  'sv.stw 5.v, 4(1.v)',