sort out misaligned store in LoadStore1
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Fri, 28 Jan 2022 03:19:40 +0000 (03:19 +0000)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Fri, 28 Jan 2022 03:19:40 +0000 (03:19 +0000)
src/soc/experiment/pimem.py
src/soc/experiment/test/test_loadstore1.py
src/soc/fu/ldst/loadstore.py

index d7e97ebaf4f18983d55655f361043c317bfc8a8a..498ecb8f4b25c199e58a1205b7c113d6b77c79dc 100644 (file)
@@ -296,7 +296,7 @@ class PortInterfaceBase(Elaboratable):
         with m.If(st_active.q & pi.st.ok):
             # shift data up before storing.  lenexp *bit* version of mask is
             # passed straight through as byte-level "write-enable" lines.
-            stdata = Signal(self.regwid, reset_less=True)
+            stdata = Signal(self.regwid*2, reset_less=True)
             comb += stdata.eq(pi.st.data << (lenexp.addr_i*8))
             # TODO: replace with link to LoadStoreUnitInterface.x_store_data
             # and also handle the ready/stall/busy protocol
index a21a4e0884c114eb8d454601ad1d4aa79f484869..e79e0c127c22ef55947363498624f70b67d66806 100644 (file)
@@ -20,7 +20,7 @@ from soc.experiment.test import pagetables
 
 from nmigen.compat.sim import run_simulation
 from random import random
-from openpower.test.wb_get import wb_get
+from openpower.test.wb_get import wb_get_classic
 from openpower.test import wb_get as wbget
 from openpower.exceptions import LDSTExceptionTuple
 
@@ -540,15 +540,25 @@ def _test_loadstore1_microwatt_mmu_bin_test5(dut, mem):
     wbget.stop = True
 
 
-def test_pi_ld_misalign(pi,addr,data_len,msr):
+def test_pi_ld_misalign(pi, addr, data_len, msr):
     for i in range(0,data_len):
         ld_data, exctype, exc = yield from pi_ld(pi, addr+i, data_len, msr=msr)
         yield
-        if i == 0:
-            assert exc is None # use "is None" not "== None"
-            print("MISALIGN: test_pi_ld_misalign returned",hex(ld_data))
-        else:
-            assert exc.alignment == 1
+        assert exc is None # use "is None" not "== None"
+        print("MISALIGN: test_pi_ld_misalign returned",hex(ld_data))
+
+
+def test_pi_st_ld_misalign(pi, addr, data_len, msr):
+    data = 0x0102030405060708
+    for i in range(0, data_len):
+        exctype, exc = yield from pi_st(pi, addr+i, data, data_len, msr=msr)
+        print (exctype, exc)
+        assert exc is None # use "is None" not "== None"
+        ld_data, exctype, exc = yield from pi_ld(pi, addr+i, data_len, msr=msr)
+        yield
+        assert exc is None # use "is None" not "== None"
+        print("MISALIGN: test_pi_ld_misalign returned",hex(ld_data))
+        assert ld_data == data
 
 
 def _test_loadstore1_misalign(dut, mem):
@@ -566,6 +576,8 @@ def _test_loadstore1_misalign(dut, mem):
 
     yield from test_pi_ld_misalign(pi,0,8,msr)
 
+    yield from test_pi_st_ld_misalign(pi,0,8,msr)
+
     wbget.stop = True
 
 
@@ -855,10 +867,10 @@ def test_loadstore1_ifetch_unit_iface():
     sim.add_clock(1e-6)
 
     sim.add_sync_process(wrap(_test_loadstore1_ifetch_iface(m, mem)))
-    # add two wb_get processes onto the *same* memory dictionary.
+    # add two wb_get_classic processes onto the *same* memory dictionary.
     # this shouuuld work.... cross-fingers...
-    sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem)))
-    sim.add_sync_process(wrap(wb_get(icache.ibus, mem))) # ibus not bus
+    sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem)))
+    sim.add_sync_process(wrap(wb_get_classic(icache.ibus, mem))) # ibus not bus
     with sim.write_vcd('test_loadstore1_ifetch_iface.vcd',
                       traces=[m.debug_status]): # include extra debug
         sim.run()
@@ -876,10 +888,10 @@ def test_loadstore1_ifetch():
 
     icache = m.submodules.ldst.icache
     sim.add_sync_process(wrap(_test_loadstore1_ifetch(m, mem)))
-    # add two wb_get processes onto the *same* memory dictionary.
+    # add two wb_get_classic processes onto the *same* memory dictionary.
     # this shouuuld work.... cross-fingers...
-    sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem)))
-    sim.add_sync_process(wrap(wb_get(icache.bus, mem)))
+    sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem)))
+    sim.add_sync_process(wrap(wb_get_classic(icache.bus, mem)))
     with sim.write_vcd('test_loadstore1_ifetch.vcd',
                       traces=[m.debug_status]): # include extra debug
         sim.run()
@@ -896,7 +908,7 @@ def test_loadstore1():
     sim.add_clock(1e-6)
 
     sim.add_sync_process(wrap(_test_loadstore1(m, mem)))
-    sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem)))
+    sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem)))
     with sim.write_vcd('test_loadstore1.vcd'):
         sim.run()
 
@@ -912,7 +924,7 @@ def test_loadstore1_microwatt_mmu_bin_test2():
     sim.add_clock(1e-6)
 
     sim.add_sync_process(wrap(_test_loadstore1_microwatt_mmu_bin_test2(m, mem)))
-    sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem)))
+    sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem)))
     with sim.write_vcd('test_microwatt_mmu_test2.vcd'):
         sim.run()
 
@@ -928,7 +940,7 @@ def test_loadstore1_microwatt_mmu_bin_test5():
     sim.add_clock(1e-6)
 
     sim.add_sync_process(wrap(_test_loadstore1_microwatt_mmu_bin_test5(m, mem)))
-    sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem)))
+    sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem)))
     with sim.write_vcd('test_microwatt_mmu_test5.vcd'):
         sim.run()
 
@@ -945,11 +957,13 @@ def test_loadstore1_misalign():
 
     ###########1122334455667788
     mem[0] = 0x0102030405060708
+    mem[8] = 0xffffffffffffffff
 
     sim.add_sync_process(wrap(_test_loadstore1_misalign(m, mem)))
-    sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem)))
+    sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem)))
     with sim.write_vcd('test_loadstore1_misalign.vcd'):
         sim.run()
+    print ("mem", mem)
 
 
 def test_loadstore1_invalid():
@@ -963,7 +977,7 @@ def test_loadstore1_invalid():
     sim.add_clock(1e-6)
 
     sim.add_sync_process(wrap(_test_loadstore1_invalid(m, mem)))
-    sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem)))
+    sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem)))
     with sim.write_vcd('test_loadstore1_invalid.vcd'):
         sim.run()
 
@@ -981,10 +995,10 @@ def test_loadstore1_ifetch_invalid():
 
     icache = m.submodules.ldst.icache
     sim.add_sync_process(wrap(_test_loadstore1_ifetch_invalid(m, mem)))
-    # add two wb_get processes onto the *same* memory dictionary.
+    # add two wb_get_classic processes onto the *same* memory dictionary.
     # this shouuuld work.... cross-fingers...
-    sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem)))
-    sim.add_sync_process(wrap(wb_get(icache.bus, mem)))
+    sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem)))
+    sim.add_sync_process(wrap(wb_get_classic(icache.bus, mem)))
     with sim.write_vcd('test_loadstore1_ifetch_invalid.vcd',
                       traces=[m.debug_status]): # include extra debug
         sim.run()
@@ -1009,10 +1023,10 @@ def test_loadstore1_ifetch_multi():
     sim.add_clock(1e-6)
 
     sim.add_sync_process(wrap(_test_loadstore1_ifetch_multi(m, mem)))
-    # add two wb_get processes onto the *same* memory dictionary.
+    # add two wb_get_classic processes onto the *same* memory dictionary.
     # this shouuuld work.... cross-fingers...
-    sim.add_sync_process(wrap(wb_get(cmpi.wb_bus(), mem)))
-    sim.add_sync_process(wrap(wb_get(icache.ibus, mem))) # ibus not bus
+    sim.add_sync_process(wrap(wb_get_classic(cmpi.wb_bus(), mem)))
+    sim.add_sync_process(wrap(wb_get_classic(icache.ibus, mem))) # ibus not bus
     with sim.write_vcd('test_loadstore1_ifetch_multi.vcd',
                       traces=[m.debug_status]): # include extra debug
         sim.run()
@@ -1020,10 +1034,10 @@ def test_loadstore1_ifetch_multi():
 if __name__ == '__main__':
     #test_loadstore1()
     #test_loadstore1_microwatt_mmu_bin_test2()
-    test_loadstore1_microwatt_mmu_bin_test5()
+    #test_loadstore1_microwatt_mmu_bin_test5()
     #test_loadstore1_invalid()
     #test_loadstore1_ifetch() #FIXME
     #test_loadstore1_ifetch_invalid()
     #test_loadstore1_ifetch_unit_iface() # guess: should be working
     #test_loadstore1_ifetch_multi()
-    #test_loadstore1_misalign()
+    test_loadstore1_misalign()
index 500024aa5494249919cdd265d91c251966a983b0..6af17736944c4bacaa879f82dafc7c69aa86e851 100644 (file)
@@ -112,8 +112,9 @@ class LoadStore1(PortInterfaceBase):
         self.dcbz          = Signal()
         self.raddr          = Signal(64)
         self.maddr          = Signal(64)
-        self.store_data    = Signal(128)   # 128-bit to cope with
-        self.load_data     = Signal(128)   # misalignment
+        self.store_data    = Signal(64)   # first half (aligned)
+        self.store_data2   = Signal(64)   # second half (misaligned)
+        self.load_data     = Signal(128)   # 128 to cope with misalignment
         self.load_data_delay = Signal(128) # perform 2 LD/STs
         self.byte_sel      = Signal(16)    # also for misaligned, 16-bit
         self.alignstate    = Signal(Misalign) # progress of alignment request
@@ -208,6 +209,7 @@ class LoadStore1(PortInterfaceBase):
         # put data into comb which is picked up in main elaborate()
         m.d.comb += self.d_w_valid.eq(1)
         m.d.comb += self.store_data.eq(data)
+        m.d.sync += self.store_data2.eq(data[64:128])
         st_ok = self.done # TODO indicates write data is valid
         m.d.comb += self.pi.store_done.data.eq(self.d_in.store_done)
         m.d.comb += self.pi.store_done.ok.eq(1)
@@ -258,6 +260,7 @@ class LoadStore1(PortInterfaceBase):
         # fsm skeleton
         with m.Switch(self.state):
             with m.Case(State.IDLE):
+                sync += self.load_data_delay.eq(0) # clear out
                 with m.If((self.d_validblip | self.instr_fault) &
                           ~exc.happened):
                     comb += self.busy.eq(1)
@@ -321,6 +324,8 @@ class LoadStore1(PortInterfaceBase):
                         with m.If(ldst_r.load):
                             m.d.comb += self.load_data[0:63].eq(d_in.data)
                             sync += self.load_data_delay[0:64].eq(d_in.data)
+                        with m.Else():
+                            m.d.sync += d_out.data.eq(self.store_data2)
                         # mmm kinda cheating, make a 2nd blip.
                         # use an aligned version of the address
                         addr_aligned, z3 = Signal(64), Const(0, 3)
@@ -435,12 +440,9 @@ class LoadStore1(PortInterfaceBase):
         if hasattr(dbus, "stall"):
             comb += dcache.bus.stall.eq(dbus.stall)
 
-        # update out d data when flag set
+        # update out d data when flag set, for first half (second done in FSM)
         with m.If(self.d_w_valid):
-            with m.If(ldst_r.alignstate == Misalign.WAITSECOND):
-                m.d.sync += d_out.data.eq(self.store_data[64:128])
-            with m.Else():
-                m.d.sync += d_out.data.eq(self.store_data[0:64])
+            m.d.sync += d_out.data.eq(self.store_data)
         #with m.Else():
         #    m.d.sync += d_out.data.eq(0)
         # unit test passes with that change