add spr test, add decode of spr in/out
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 4 Jul 2020 13:19:38 +0000 (14:19 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 4 Jul 2020 13:19:38 +0000 (14:19 +0100)
libreriscv
src/soc/decoder/power_decoder2.py
src/soc/decoder/power_enums.py
src/soc/fu/spr/main_stage.py
src/soc/fu/spr/pipe_data.py
src/soc/fu/spr/pipeline.py
src/soc/fu/spr/test/test_pipe_caller.py [new file with mode: 0644]
src/soc/fu/test/common.py

index 1d09d2455985e602a5799e1fafac5cea6b1cb72d..3cde5558a3df42b4a08be86881b03dbbdf5ea3e2 160000 (submodule)
@@ -1 +1 @@
-Subproject commit 1d09d2455985e602a5799e1fafac5cea6b1cb72d
+Subproject commit 3cde5558a3df42b4a08be86881b03dbbdf5ea3e2
index e6ee584035d77db781e75a4ddd5b119496f573fb..dd7b7a5d940c021764da338d27b6a90a181b01ac 100644 (file)
@@ -27,6 +27,9 @@ TT_PRIV = 1<<1
 TT_TRAP = 1<<2
 TT_ADDR = 1<<3
 
+def decode_spr_num(spr):
+    return Cat(spr[5:10], spr[0:5])
+
 
 def instr_is_priv(m, op, insn):
     """determines if the instruction is privileged or not
@@ -100,9 +103,29 @@ class DecodeA(Elaboratable):
 
         # MFSPR move from SPRs
         with m.If(op.internal_op == InternalOp.OP_MFSPR):
-            # XXX TODO: fast/slow SPR decoding and mapping
-            comb += self.spr_out.data.eq(self.dec.SPR) # SPR field, XFX
-            comb += self.spr_out.ok.eq(1)
+            spr = Signal(10, reset_less=True)
+            comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX
+            with m.Switch(spr):
+                # fast SPRs
+                with m.Case(SPR.CTR):
+                    self.fast_out.data.eq(FastRegs.CTR)
+                    self.fast_out.ok.eq(1)
+                with m.Case(SPR.LR):
+                    self.fast_out.data.eq(FastRegs.LR)
+                    self.fast_out.ok.eq(1)
+                with m.Case(SPR.TAR):
+                    self.fast_out.data.eq(FastRegs.TAR)
+                    self.fast_out.ok.eq(1)
+                with m.Case(SPR.SRR0):
+                    self.fast_out.data.eq(FastRegs.SRR0)
+                    self.fast_out.ok.eq(1)
+                with m.Case(SPR.SRR1):
+                    self.fast_out.data.eq(FastRegs.SRR1)
+                    self.fast_out.ok.eq(1)
+                with m.Default():
+                    comb += self.spr_out.data.eq(self.dec.SPR) # from XFX
+                    comb += self.spr_out.ok.eq(1)
+
 
         return m
 
@@ -243,26 +266,30 @@ class DecodeOut(Elaboratable):
                 comb += self.reg_out.data.eq(self.dec.RA)
                 comb += self.reg_out.ok.eq(1)
             with m.Case(OutSel.SPR):
-                comb += self.spr_out.data.eq(self.dec.SPR) # from XFX
-                comb += self.spr_out.ok.eq(1)
+                spr = Signal(10, reset_less=True)
+                comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX
                 # TODO MTSPR 1st spr (fast)
                 with m.If(op.internal_op == InternalOp.OP_MTSPR):
-                    pass
-                    """
-                    sprn := decode_spr_num(f_in.insn);
-                    v.ispr1 := fast_spr_num(sprn);
-                    -- Make slow SPRs single issue
-                    if is_fast_spr(v.ispr1) = '0' then
-                        v.decode.sgl_pipe := '1';
-                        -- send MMU-related SPRs to loadstore1
-                        case sprn is
-                        when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL =>
-                            v.decode.unit := LDST;
-                        when others =>
-                        end case;
-                    end if;
-                    """
-
+                    with m.Switch(spr):
+                        # fast SPRs
+                        with m.Case(SPR.CTR):
+                            self.fast_out.data.eq(FastRegs.CTR)
+                            self.fast_out.ok.eq(1)
+                        with m.Case(SPR.LR):
+                            self.fast_out.data.eq(FastRegs.LR)
+                            self.fast_out.ok.eq(1)
+                        with m.Case(SPR.TAR):
+                            self.fast_out.data.eq(FastRegs.TAR)
+                            self.fast_out.ok.eq(1)
+                        with m.Case(SPR.SRR0):
+                            self.fast_out.data.eq(FastRegs.SRR0)
+                            self.fast_out.ok.eq(1)
+                        with m.Case(SPR.SRR1):
+                            self.fast_out.data.eq(FastRegs.SRR1)
+                            self.fast_out.ok.eq(1)
+                        with m.Default():
+                            comb += self.spr_out.data.eq(self.dec.SPR) # from XFX
+                            comb += self.spr_out.ok.eq(1)
 
         # BC or BCREG: potential implicit register (CTR) NOTE: same in DecodeA
         op = self.dec.op
index 931101ecb0aea1c5237ea4b34da5ae01164ecadd..40dd718ca264fad595f13d99d3e12fe259143cb6 100644 (file)
@@ -55,6 +55,7 @@ class Function(Enum):
     TRAP = 1<<7
     MUL = 1<<8
     DIV = 1<<9
+    SPR = 1<<10
 
 
 @unique
index 5786eff8637796c01896dc286ee107a33b7c746c..f16d24dff54c867cace9ecdaac53b56e3005dc49 100644 (file)
@@ -4,23 +4,19 @@
 * https://libre-soc.org/openpower/isa/sprset/
 """
 
-from nmigen import (Module, Signal, Cat, Mux, Const, signed)
+from nmigen import (Module, Signal, Cat)
 from nmutil.pipemodbase import PipeModBase
-from nmutil.extend import exts
 from soc.fu.spr.pipe_data import SPRInputData, SPROutputData
-from soc.fu.branch.main_stage import br_ext
-from soc.decoder.power_enums import InternalOp
+from soc.decoder.power_enums import InternalOp, SPR, XER_bits
 
 from soc.decoder.power_fields import DecodeFields
 from soc.decoder.power_fieldsn import SignalBitRange
-
-def decode_spr_num(spr):
-    return Cat(spr[5:10], spr[0:5])
+from soc.decoder.power_decoder2 import decode_spr_num
 
 
 class SPRMainStage(PipeModBase):
     def __init__(self, pspec):
-        super().__init__(pspec, "main")
+        super().__init__(pspec, "spr_main")
         self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
         self.fields.create_specs()
 
@@ -44,7 +40,7 @@ class SPRMainStage(PipeModBase):
         # take copy of D-Form TO field
         x_fields = self.fields.FormXFX
         spr = Signal(x_fields.SPR[0:-1].shape())
-        comb += spr.eq(decode_spr_num(i_fields.SPR[0:-1]))
+        comb += spr.eq(decode_spr_num(x_fields.SPR[0:-1]))
 
         # TODO: some #defines for the bits n stuff.
         with m.Switch(op.insn_type):
index 1054b147f53b676eb8b13af6c059f2d23e08db4d..f4b62aefc84e6fdf84d5f2d6d90863412b79ba11 100644 (file)
@@ -11,6 +11,7 @@ Links:
 
 from soc.fu.pipe_data import IntegerData
 from soc.fu.spr.spr_input_record import CompSPROpSubset
+from soc.fu.alu.pipe_data import CommonPipeSpec
 
 
 class SPRInputData(IntegerData):
@@ -37,6 +38,6 @@ class SPROutputData(IntegerData):
         super().__init__(pspec, True)
 
 
-class SPRPipeSpec:
+class SPRPipeSpec(CommonPipeSpec):
     regspec = (SPRInputData.regspec, SPROutputData.regspec)
     opsubsetkls = CompSPROpSubset
index fc3c7c299bba255712458a95a7dd1cf2c114217b..fc76c223244ca61d2ae69d369859df8334e95c28 100644 (file)
@@ -1,6 +1,6 @@
 from nmutil.singlepipe import ControlBase
 from nmutil.pipemodbase import PipeModBaseChain
-from soc.fu.trap.main_stage import SPRMainStage
+from soc.fu.spr.main_stage import SPRMainStage
 
 class SPRStages(PipeModBaseChain):
     def get_chain(self):
diff --git a/src/soc/fu/spr/test/test_pipe_caller.py b/src/soc/fu/spr/test/test_pipe_caller.py
new file mode 100644 (file)
index 0000000..7eca695
--- /dev/null
@@ -0,0 +1,213 @@
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay, Settle
+from nmutil.formaltest import FHDLTestCase
+from nmigen.cli import rtlil
+import unittest
+from soc.decoder.isa.caller import ISACaller, special_sprs
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_enums import (XER_bits, Function, InternalOp, CryIn)
+from soc.decoder.selectable_int import SelectableInt
+from soc.simulator.program import Program
+from soc.decoder.isa.all import ISA
+
+
+from soc.fu.test.common import (TestCase, ALUHelpers)
+from soc.fu.spr.pipeline import SPRBasePipe
+from soc.fu.spr.pipe_data import SPRPipeSpec
+import random
+
+
+def get_cu_inputs(dec2, sim):
+    """naming (res) must conform to SPRFunctionUnit input regspec
+    """
+    res = {}
+
+    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
+    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
+    yield from ALUHelpers.get_sim_fast_spr1(res, sim, dec2) # SPR1
+    yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca
+    yield from ALUHelpers.get_sim_xer_ov(res, sim, dec2) # XER.ov
+    yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so
+
+    print ("spr get_cu_inputs", res)
+
+    return res
+
+
+
+def set_alu_inputs(alu, dec2, sim):
+    # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
+    # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
+    # and place it into data_i.b
+
+    inp = yield from get_cu_inputs(dec2, sim)
+    yield from ALUHelpers.set_int_ra(alu, dec2, inp)
+    yield from ALUHelpers.set_xer_ca(alu, dec2, inp)
+    yield from ALUHelpers.set_xer_ov(alu, dec2, inp)
+    yield from ALUHelpers.set_xer_so(alu, dec2, inp)
+
+    # XXX TODO slow spr1
+    yield from ALUHelpers.set_fast_spr1(alu, dec2, inp)
+
+
+# This test bench is a bit different than is usual. Initially when I
+# was writing it, I had all of the tests call a function to create a
+# device under test and simulator, initialize the dut, run the
+# simulation for ~2 cycles, and assert that the dut output what it
+# should have. However, this was really slow, since it needed to
+# create and tear down the dut and simulator for every test case.
+
+# Now, instead of doing that, every test case in SPRTestCase puts some
+# data into the test_data list below, describing the instructions to
+# be tested and the initial state. Once all the tests have been run,
+# test_data gets passed to TestRunner which then sets up the DUT and
+# simulator once, runs all the data through it, and asserts that the
+# results match the pseudocode sim at every cycle.
+
+# By doing this, I've reduced the time it takes to run the test suite
+# massively. Before, it took around 1 minute on my computer, now it
+# takes around 3 seconds
+
+
+class SPRTestCase(FHDLTestCase):
+    test_data = []
+
+    def __init__(self, name):
+        super().__init__(name)
+        self.test_name = name
+
+    def run_tst_program(self, prog, initial_regs=None, initial_sprs=None):
+        tc = TestCase(prog, self.test_name, initial_regs, initial_sprs)
+        self.test_data.append(tc)
+
+    def test_1_mfspr(self):
+        lst = ["mfspr 1, 26", # SRR0
+               "mfspr 2, 27",] # SRR1
+        initial_regs = [0] * 32
+        initial_sprs = {'SRR0': 0x12345678, 'SRR1': 0x5678}
+        self.run_tst_program(Program(lst), initial_regs, initial_sprs)
+
+    def test_ilang(self):
+        pspec = SPRPipeSpec(id_wid=2)
+        alu = SPRBasePipe(pspec)
+        vl = rtlil.convert(alu, ports=alu.ports())
+        with open("trap_pipeline.il", "w") as f:
+            f.write(vl)
+
+
+class TestRunner(FHDLTestCase):
+    def __init__(self, test_data):
+        super().__init__("run_all")
+        self.test_data = test_data
+
+    def run_all(self):
+        m = Module()
+        comb = m.d.comb
+        instruction = Signal(32)
+
+        pdecode = create_pdecode()
+
+        m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
+
+        pspec = SPRPipeSpec(id_wid=2)
+        m.submodules.alu = alu = SPRBasePipe(pspec)
+
+        comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
+        comb += alu.p.valid_i.eq(1)
+        comb += alu.n.ready_i.eq(1)
+        comb += pdecode2.dec.raw_opcode_in.eq(instruction)
+        sim = Simulator(m)
+
+        sim.add_clock(1e-6)
+        def process():
+            for test in self.test_data:
+                print(test.name)
+                program = test.program
+                self.subTest(test.name)
+                sim = ISA(pdecode2, test.regs, test.sprs, test.cr,
+                                test.mem, test.msr)
+                gen = program.generate_instructions()
+                instructions = list(zip(gen, program.assembly.splitlines()))
+
+                pc = sim.pc.CIA.value
+                index = pc//4
+                while index < len(instructions):
+                    ins, code = instructions[index]
+
+                    print("pc %08x instr: %08x" % (pc, ins & 0xffffffff))
+                    print(code)
+                    if 'XER' in sim.spr:
+                        so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
+                        ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
+                        ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
+                        print ("before: so/ov/32", so, ov, ov32)
+
+                    # ask the decoder to decode this binary data (endian'd)
+                    yield pdecode2.dec.bigendian.eq(0)  # little / big?
+                    yield instruction.eq(ins)          # raw binary instr.
+                    yield Settle()
+                    fn_unit = yield pdecode2.e.fn_unit
+                    self.assertEqual(fn_unit, Function.SPR.value)
+                    yield from set_alu_inputs(alu, pdecode2, sim)
+                    yield
+                    opname = code.split(' ')[0]
+                    yield from sim.call(opname)
+                    pc = sim.pc.CIA.value
+                    index = pc//4
+                    print("pc after %08x" % (pc))
+
+                    vld = yield alu.n.valid_o
+                    while not vld:
+                        yield
+                        vld = yield alu.n.valid_o
+                    yield
+
+                    yield from self.check_alu_outputs(alu, pdecode2, sim, code)
+
+        sim.add_sync_process(process)
+        with sim.write_vcd("alu_simulator.vcd", "simulator.gtkw",
+                            traces=[]):
+            sim.run()
+
+    def check_alu_outputs(self, alu, dec2, sim, code):
+
+        rc = yield dec2.e.rc.data
+        cridx_ok = yield dec2.e.write_cr.ok
+        cridx = yield dec2.e.write_cr.data
+
+        print ("check extra output", repr(code), cridx_ok, cridx)
+        if rc:
+            self.assertEqual(cridx, 0, code)
+
+        sim_o = {}
+        res = {}
+
+        yield from ALUHelpers.get_int_o(res, alu, dec2)
+        yield from ALUHelpers.get_fast_spr1(res, alu, dec2)
+        yield from ALUHelpers.get_xer_ov(res, alu, dec2)
+        yield from ALUHelpers.get_xer_ca(res, alu, dec2)
+        yield from ALUHelpers.get_xer_so(res, alu, dec2)
+
+        print ("output", res)
+
+        yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_sim_cr_a(sim_o, sim, dec2)
+        yield from ALUHelpers.get_sim_xer_ov(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_sim_xer_ca(sim_o, sim, dec2)
+        yield from ALUHelpers.get_wr_fast_spr1(sim_o, sim, dec2)
+
+        ALUHelpers.check_xer_ov(self, res, sim_o, code)
+        ALUHelpers.check_xer_ca(self, res, sim_o, code)
+        ALUHelpers.check_int_o(self, res, sim_o, code)
+        ALUHelpers.check_fast_spr1(self, res, sim_o, code)
+        ALUHelpers.check_xer_so(self, res, sim_o, code)
+
+
+if __name__ == "__main__":
+    unittest.main(exit=False)
+    suite = unittest.TestSuite()
+    suite.addTest(TestRunner(SPRTestCase.test_data))
+
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
index 4b43b47ea72514f2cb776d2b8f8a3a01f79c8cb1..a14e2277b17f8c160e05b38b3fbe1bcbcc0e5775 100644 (file)
@@ -116,6 +116,11 @@ class ALUHelpers:
             yield alu.p.data_i.xer_ca.eq(inp['xer_ca'])
             print ("extra inputs: CA/32", bin(inp['xer_ca']))
 
+    def set_xer_ov(alu, dec2, inp):
+        if 'xer_ov' in inp:
+            yield alu.p.data_i.xer_ov.eq(inp['xer_ov'])
+            print ("extra inputs: OV/32", bin(inp['xer_ov']))
+
     def set_xer_so(alu, dec2, inp):
         if 'xer_so' in inp:
             so = inp['xer_so']
@@ -233,6 +238,7 @@ class ALUHelpers:
         ok = yield dec2.e.write_fast2.ok
         if ok:
             spr_num = yield dec2.e.write_fast2.data
+            spr_num = fast_reg_to_spr(spr_num)
             spr_name = spr_dict[spr_num]
             res['spr2'] = sim.spr[spr_name]
 
@@ -240,6 +246,7 @@ class ALUHelpers:
         ok = yield dec2.e.write_fast1.ok
         if ok:
             spr_num = yield dec2.e.write_fast1.data
+            spr_num = fast_reg_to_spr(spr_num)
             spr_name = spr_dict[spr_num]
             res['spr1'] = sim.spr[spr_name]
 
@@ -264,6 +271,13 @@ class ALUHelpers:
         if oe and oe_ok:
             res['xer_so'] = 1 if sim.spr['XER'][XER_bits['SO']] else 0
 
+    def check_fast_spr1(dut, res, sim_o, msg):
+        if 'fast1' in res:
+            expected = sim_o['fast1']
+            alu_out = res['fast1']
+            print(f"expected {expected:x}, actual: {alu_out:x}")
+            dut.assertEqual(expected, alu_out, msg)
+
     def check_int_o1(dut, res, sim_o, msg):
         if 'o1' in res:
             expected = sim_o['o1']