From 5ddbf2700780ecada5285b722025452641a28b0f Mon Sep 17 00:00:00 2001
From: Cesar Strauss <cestrauss@gmail.com>
Date: Tue, 16 Feb 2021 14:48:33 -0300
Subject: [PATCH] Fix MSB0 issues for SVP64

Main changes are:
1) Convert indices from MSB0 to LSB0 when extracting fields
2) Convert indices from LSB0 to MSB0 when inserting fields
3) Reorder nMigen Records to start from the LSB

This was verified by inspecting the GTKWave output for
test_issuer_svp64.py, checking the instruction memory against a manually
assembled instruction, and checking that the decoded fields correspond to
the original instruction.
---
 src/soc/decoder/isa/caller.py     |  2 +-
 src/soc/decoder/power_decoder2.py | 30 ++++++++++++++++--------------
 src/soc/sv/svp64.py               | 14 ++++++++------
 src/soc/sv/svstate.py             | 12 ++++++------
 src/soc/sv/trans/svp64.py         | 18 +++++++++---------
 5 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/src/soc/decoder/isa/caller.py b/src/soc/decoder/isa/caller.py
index 59b665e2..b87ddc0e 100644
--- a/src/soc/decoder/isa/caller.py
+++ b/src/soc/decoder/isa/caller.py
@@ -708,7 +708,7 @@ class ISACaller:
         print ("svp64.rm", bin(pfx.rm.asint(msb0=True)))
         print ("    svstate.vl", self.svstate.vl.asint(msb0=True))
         print ("    svstate.mvl", self.svstate.maxvl.asint(msb0=True))
-        sv_rm = pfx.rm.asint()
+        sv_rm = pfx.rm.asint(msb0=True)
         ins = self.imem.ld(pc+4, 4, False, True)
         print("     svsetup: 0x%x 0x%x %s" % (pc+4, ins & 0xffffffff, bin(ins)))
         yield self.dec2.dec.raw_opcode_in.eq(ins & 0xffffffff) # v3.0B suffix
diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py
index e2e4e8af..fbfd3640 100644
--- a/src/soc/decoder/power_decoder2.py
+++ b/src/soc/decoder/power_decoder2.py
@@ -104,23 +104,23 @@ class SVP64ExtraSpec(Elaboratable):
             # 2-bit index selection mode
             with m.Case(SVEtype.EXTRA2):
                 with m.Switch(self.idx):
-                    with m.Case(SVEXTRA.Idx0): # 1st 2 bits
-                        comb += spec[1:3].eq(self.extra[0:2])
-                    with m.Case(SVEXTRA.Idx1): # 2nd 2 bits
-                        comb += spec[1:3].eq(self.extra[2:4])
-                    with m.Case(SVEXTRA.Idx2): # 3rd 2 bits
-                        comb += spec[1:3].eq(self.extra[4:6])
-                    with m.Case(SVEXTRA.Idx3): # 4th 2 bits
-                        comb += spec[1:3].eq(self.extra[6:8])
+                    with m.Case(SVEXTRA.Idx0):  # 1st 2 bits [0:1]
+                        comb += spec[1:3].eq(self.extra[8-1:9])
+                    with m.Case(SVEXTRA.Idx1):  # 2nd 2 bits [2:3]
+                        comb += spec[1:3].eq(self.extra[8-3:8-1])
+                    with m.Case(SVEXTRA.Idx2):  # 3rd 2 bits [4:5]
+                        comb += spec[1:3].eq(self.extra[8-5:8-3])
+                    with m.Case(SVEXTRA.Idx3):  # 4th 2 bits [6:7]
+                        comb += spec[1:3].eq(self.extra[8-7:8-5])
             # 3-bit index selection mode
             with m.Case(SVEtype.EXTRA3):
                 with m.Switch(self.idx):
-                    with m.Case(SVEXTRA.Idx0): # 1st 3 bits
-                        comb += spec.eq(self.extra[0:3])
-                    with m.Case(SVEXTRA.Idx1): # 2nd 3 bits
-                        comb += spec.eq(self.extra[3:6])
-                    with m.Case(SVEXTRA.Idx2): # 3rd 3 bits
-                        comb += spec.eq(self.extra[6:9])
+                    with m.Case(SVEXTRA.Idx0):  # 1st 3 bits [0:2]
+                        comb += spec.eq(self.extra[8-2:9])
+                    with m.Case(SVEXTRA.Idx1):  # 2nd 3 bits [3:5]
+                        comb += spec.eq(self.extra[8-5:8-2])
+                    with m.Case(SVEXTRA.Idx2):  # 3rd 3 bits [6:8]
+                        comb += spec.eq(self.extra[8-8:8-5])
                     # cannot fit more than 9 bits so there is no 4th thing
 
         return m
@@ -1331,6 +1331,8 @@ class SVP64PrefixDecoder(Elaboratable):
         l = []
         for idx in rmfields:
             l.append(self.opcode_in[31-idx])
+        # in nMigen, Cat begins at the LSB and proceeds upwards
+        l.reverse()  # put the LSB at the start of the list
         with m.If(self.is_svp64_mode):
             comb += self.svp64_rm.eq(Cat(*l))
 
diff --git a/src/soc/sv/svp64.py b/src/soc/sv/svp64.py
index 5a8be91f..3911fb4c 100644
--- a/src/soc/sv/svp64.py
+++ b/src/soc/sv/svp64.py
@@ -18,15 +18,17 @@ https://libre-soc.org/openpower/sv/svp64/
 
 from nmigen import Record
 
+
+# in nMigen, Record begins at the LSB and fills upwards
 class SVP64Rec(Record):
     def __init__(self, name=None):
-        Record.__init__(self, layout=[("mmode"   , 1),
-                                      ("mask"    , 3),
-                                      ("elwidth" , 2),
-                                      ("ewsrc"   , 2),
-                                      ("subvl"   , 2),
+        Record.__init__(self, layout=[("mode"    , 5),
                                       ("extra"   , 9),
-                                      ("mode"    , 5)], name=name)
+                                      ("subvl"   , 2),
+                                      ("ewsrc"   , 2),
+                                      ("elwidth" , 2),
+                                      ("mask"    , 3),
+                                      ("mmode"   , 1)], name=name)
 
     def ports(self):
         return [self.mmode, self.mask, self.elwidth, self.ewsrc,
diff --git a/src/soc/sv/svstate.py b/src/soc/sv/svstate.py
index c97efa48..6052a8f1 100644
--- a/src/soc/sv/svstate.py
+++ b/src/soc/sv/svstate.py
@@ -19,13 +19,13 @@ from nmutil.iocontrol import RecordObject
 from nmigen import Signal
 
 
+# In nMigen, Record order is from LSB to MSB
 class SVSTATERec(RecordObject):
     def __init__(self, name=None):
         super().__init__(name=name)
-        self.maxvl = Signal(7)
-        self.vl = Signal(7)
-        self.srcstep = Signal(7)
-        self.dststep = Signal(7)
-        self.subvl = Signal(2)
         self.svstep = Signal(2)
-
+        self.subvl = Signal(2)
+        self.dststep = Signal(7)
+        self.srcstep = Signal(7)
+        self.vl = Signal(7)
+        self.maxvl = Signal(7)
diff --git a/src/soc/sv/trans/svp64.py b/src/soc/sv/trans/svp64.py
index a2427fd6..5702deb8 100644
--- a/src/soc/sv/trans/svp64.py
+++ b/src/soc/sv/trans/svp64.py
@@ -336,7 +336,7 @@ class SVP64Asm:
                 if idx is None: continue
                 # start at bit 10, work up 2/3 times EXTRA idx
                 offs = 2 if etype == 'EXTRA2' else 3 # 2 or 3 bits
-                svp64_rm |= sv_extra << (10+idx*offs)
+                svp64_rm |= sv_extra << (24-offs-(10+idx*offs))
 
             # parts of svp64_rm
             mmode = 0  # bit 0
@@ -503,20 +503,20 @@ class SVP64Asm:
             # whewww.... modes all done :)
             # now put into svp64_rm
             mode |= sv_mode
-            svp64_rm |= (mode << 19) # mode: bits 19-23
+            svp64_rm |= (mode << 23-23)       # mode: bits 19-23
 
             # put in predicate masks into svp64_rm
             if ptype == '2P':
-                svp64_rm |= (smask << 16) # source pred: bits 16-18
-            svp64_rm |= (mmode)           # mask mode: bit 0
-            svp64_rm |= (pmask << 1)      # 1-pred: bits 1-3
+                svp64_rm |= (smask << 23-18)  # source pred: bits 16-18
+            svp64_rm |= (mmode << 23-0)       # mask mode: bit 0
+            svp64_rm |= (pmask << 23-3)       # 1-pred: bits 1-3
 
             # and subvl
-            svp64_rm += (subvl << 8)      # subvl: bits 8-9
+            svp64_rm += (subvl << 23-9)       # subvl: bits 8-9
 
             # put in elwidths
-            svp64_rm += (srcwid << 6)      # srcwid: bits 6-7
-            svp64_rm += (destwid << 4)     # destwid: bits 4-5
+            svp64_rm += (srcwid << 23-7)      # srcwid: bits 6-7
+            svp64_rm += (destwid << 23-5)     # destwid: bits 4-5
 
             # nice debug printout. (and now for something completely different)
             # https://youtu.be/u0WOIwlXE9g?t=146
@@ -544,7 +544,7 @@ class SVP64Asm:
             svp64_prefix |= 0x1 << (31-9) # SVP64 marker 2
             rmfields = [6, 8] + list(range(10,32)) # SVP64 24-bit RM
             for i, x in enumerate(rmfields):
-                svp64_prefix |= ((svp64_rm>>i)&0b1) << (31-x)
+                svp64_prefix |= ((svp64_rm>>(23-i))&0b1) << (31-x)
 
             # fiinally yield the svp64 prefix and the thingy.  v3.0b opcode
             yield ".long 0x%x" % svp64_prefix
-- 
2.30.2