mem-ruby: LL/SC fixes
authorTimothy Hayes <timothy.hayes@arm.com>
Mon, 27 Apr 2020 14:03:26 +0000 (15:03 +0100)
committerPouya Fotouhi <pfotouhi@ucdavis.edu>
Sat, 2 May 2020 06:47:11 +0000 (06:47 +0000)
The implementation for load-linked/store-conditional did not work
correctly for multi-core simulations. Since load-links were treated as
stores, it was not possible for a line to have multiple readers which
often resulted in livelock when using these instructions to implemented
mutexes. This improved implementation treats load-linked instructions
similarly to loads but locks the line after a copy has been fetched
locally. Writes to a monitored address ensure the 'linked' property is
blown away and any subsequent store-conditional will fail.

Change-Id: I19bd74459e26732c92c8b594901936e6439fb073
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/27103
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Bobby R. Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
src/mem/ruby/protocol/RubySlicc_Types.sm
src/mem/ruby/system/Sequencer.cc
src/mem/ruby/system/Sequencer.hh

index fd7628965cc914636607da19fa9335a94af24c63..f8de9ed4c33ec2caa88a310e7e21cda98ef26286 100644 (file)
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 1999-2005 Mark D. Hill and David A. Wood
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved.
@@ -112,6 +124,10 @@ structure (Sequencer, external = "yes") {
   void writeCallback(Addr, DataBlock, bool, MachineType,
                      Cycles, Cycles, Cycles);
 
+  // ll/sc support
+  void writeCallbackScFail(Addr, DataBlock);
+  bool llscCheckMonitor(Addr);
+
   void checkCoherence(Addr);
   void evictionCallback(Addr);
   void recordRequestType(SequencerRequestType);
index 1f538c37581879546f92d8fb3abcacd6fd1a2e82..0287e13013c5b2989629b97b53b40d8c110ac8c7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited
+ * Copyright (c) 2019-2020 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -45,6 +45,7 @@
 #include "base/logging.hh"
 #include "base/str.hh"
 #include "cpu/testers/rubytest/RubyTester.hh"
+#include "debug/LLSC.hh"
 #include "debug/MemoryAccess.hh"
 #include "debug/ProtocolTrace.hh"
 #include "debug/RubySequencer.hh"
@@ -89,6 +90,64 @@ Sequencer::~Sequencer()
 {
 }
 
+void
+Sequencer::llscLoadLinked(const Addr claddr)
+{
+    AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
+    if (line) {
+        line->setLocked(m_version);
+        DPRINTF(LLSC, "LLSC Monitor - inserting load linked - "
+                      "addr=0x%lx - cpu=%u\n", claddr, m_version);
+    }
+}
+
+void
+Sequencer::llscClearMonitor(const Addr claddr)
+{
+    AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
+    if (line && line->isLocked(m_version)) {
+        line->clearLocked();
+        DPRINTF(LLSC, "LLSC Monitor - clearing due to store - "
+                      "addr=0x%lx - cpu=%u\n", claddr, m_version);
+    }
+}
+
+bool
+Sequencer::llscStoreConditional(const Addr claddr)
+{
+    AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
+    if (!line)
+        return false;
+
+    DPRINTF(LLSC, "LLSC Monitor - clearing due to "
+                  "store conditional - "
+                  "addr=0x%lx - cpu=%u\n",
+                  claddr, m_version);
+
+    if (line->isLocked(m_version)) {
+        line->clearLocked();
+        return true;
+    } else {
+        line->clearLocked();
+        return false;
+    }
+}
+
+bool
+Sequencer::llscCheckMonitor(const Addr address)
+{
+    const Addr claddr = makeLineAddress(address);
+    AbstractCacheEntry *line = m_dataCache_ptr->lookup(claddr);
+    if (!line)
+        return false;
+
+    if (line->isLocked(m_version)) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
 void
 Sequencer::wakeup()
 {
@@ -202,62 +261,6 @@ Sequencer::markRemoved()
     m_outstanding_count--;
 }
 
-void
-Sequencer::invalidateSC(Addr address)
-{
-    AbstractCacheEntry *e = m_dataCache_ptr->lookup(address);
-    // The controller has lost the coherence permissions, hence the lock
-    // on the cache line maintained by the cache should be cleared.
-    if (e && e->isLocked(m_version)) {
-        e->clearLocked();
-    }
-}
-
-bool
-Sequencer::handleLlsc(Addr address, SequencerRequest* request)
-{
-    AbstractCacheEntry *e = m_dataCache_ptr->lookup(address);
-    if (!e)
-        return true;
-
-    // The success flag indicates whether the LLSC operation was successful.
-    // LL ops will always succeed, but SC may fail if the cache line is no
-    // longer locked.
-    bool success = true;
-    if (request->m_type == RubyRequestType_Store_Conditional) {
-        if (!e->isLocked(m_version)) {
-            //
-            // For failed SC requests, indicate the failure to the cpu by
-            // setting the extra data to zero.
-            //
-            request->pkt->req->setExtraData(0);
-            success = false;
-        } else {
-            //
-            // For successful SC requests, indicate the success to the cpu by
-            // setting the extra data to one.
-            //
-            request->pkt->req->setExtraData(1);
-        }
-        //
-        // Independent of success, all SC operations must clear the lock
-        //
-        e->clearLocked();
-    } else if (request->m_type == RubyRequestType_Load_Linked) {
-        //
-        // Note: To fully follow Alpha LLSC semantics, should the LL clear any
-        // previously locked cache lines?
-        //
-        e->setLocked(m_version);
-    } else if (e->isLocked(m_version)) {
-        //
-        // Normal writes should clear the locked address
-        //
-        e->clearLocked();
-    }
-    return success;
-}
-
 void
 Sequencer::recordMissLatency(SequencerRequest* srequest, bool llscSuccess,
                              const MachineType respondingMach,
@@ -324,6 +327,13 @@ Sequencer::recordMissLatency(SequencerRequest* srequest, bool llscSuccess,
     }
 }
 
+void
+Sequencer::writeCallbackScFail(Addr address, DataBlock& data)
+{
+    llscClearMonitor(address);
+    writeCallback(address, data);
+}
+
 void
 Sequencer::writeCallback(Addr address, DataBlock& data,
                          const bool externalHit, const MachineType mach,
@@ -349,21 +359,27 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
         SequencerRequest &seq_req = seq_req_list.front();
         if (ruby_request) {
             assert(seq_req.m_type != RubyRequestType_LD);
+            assert(seq_req.m_type != RubyRequestType_Load_Linked);
             assert(seq_req.m_type != RubyRequestType_IFETCH);
         }
 
         // handle write request
         if ((seq_req.m_type != RubyRequestType_LD) &&
+            (seq_req.m_type != RubyRequestType_Load_Linked) &&
             (seq_req.m_type != RubyRequestType_IFETCH)) {
-            //
-            // For Alpha, properly handle LL, SC, and write requests with
-            // respect to locked cache blocks.
-            //
-            // Not valid for Garnet_standalone protocl
-            //
-            bool success = true;
-            if (!m_runningGarnetStandalone)
-                success = handleLlsc(address, &seq_req);
+            // LL/SC support (tested with ARMv8)
+            bool success = false;
+
+            if (seq_req.m_type != RubyRequestType_Store_Conditional) {
+                // Regular stores to addresses being monitored
+                // will fail (remove) the monitor entry.
+                llscClearMonitor(address);
+            } else {
+                // Store conditionals must first check the monitor
+                // if they will succeed or not
+                success = llscStoreConditional(address);
+                seq_req.pkt->req->setExtraData(success ? 1 : 0);
+            }
 
             // Handle SLICC block_on behavior for Locked_RMW accesses. NOTE: the
             // address variable here is assumed to be a line address, so when
@@ -435,11 +451,13 @@ Sequencer::readCallback(Addr address, DataBlock& data,
         SequencerRequest &seq_req = seq_req_list.front();
         if (ruby_request) {
             assert((seq_req.m_type == RubyRequestType_LD) ||
+                   (seq_req.m_type == RubyRequestType_Load_Linked) ||
                    (seq_req.m_type == RubyRequestType_IFETCH));
         } else {
             aliased_loads++;
         }
         if ((seq_req.m_type != RubyRequestType_LD) &&
+            (seq_req.m_type != RubyRequestType_Load_Linked) &&
             (seq_req.m_type != RubyRequestType_IFETCH)) {
             // Write request: reissue request to the cache hierarchy
             issueRequest(seq_req.pkt, seq_req.m_second_type);
@@ -480,6 +498,12 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
     Addr request_address(pkt->getAddr());
     RubyRequestType type = srequest->m_type;
 
+    // Load-linked handling
+    if (type == RubyRequestType_Load_Linked) {
+        Addr line_addr = makeLineAddress(request_address);
+        llscLoadLinked(line_addr);
+    }
+
     // update the data unless it is a non-data-carrying flush
     if (RubySystem::getWarmupEnabled()) {
         data.setData(pkt->getConstPtr<uint8_t>(),
@@ -553,23 +577,26 @@ Sequencer::makeRequest(PacketPtr pkt)
     RubyRequestType secondary_type = RubyRequestType_NULL;
 
     if (pkt->isLLSC()) {
-        //
-        // Alpha LL/SC instructions need to be handled carefully by the cache
+        // LL/SC instructions need to be handled carefully by the cache
         // coherence protocol to ensure they follow the proper semantics. In
         // particular, by identifying the operations as atomic, the protocol
         // should understand that migratory sharing optimizations should not
         // be performed (i.e. a load between the LL and SC should not steal
         // away exclusive permission).
         //
+        // The following logic works correctly with the semantics
+        // of armV8 LDEX/STEX instructions.
+
         if (pkt->isWrite()) {
             DPRINTF(RubySequencer, "Issuing SC\n");
             primary_type = RubyRequestType_Store_Conditional;
+            secondary_type = RubyRequestType_ST;
         } else {
             DPRINTF(RubySequencer, "Issuing LL\n");
             assert(pkt->isRead());
             primary_type = RubyRequestType_Load_Linked;
+            secondary_type = RubyRequestType_LD;
         }
-        secondary_type = RubyRequestType_ATOMIC;
     } else if (pkt->req->isLockedRMW()) {
         //
         // x86 locked instructions are translated to store cache coherence
@@ -724,6 +751,7 @@ Sequencer::recordRequestType(SequencerRequestType requestType) {
 void
 Sequencer::evictionCallback(Addr address)
 {
+    llscClearMonitor(address);
     ruby_eviction_callback(address);
 }
 
index bb2819b15b283b3122e9e41125a25999a01ad20a..bb93607192aa108ee0aa7fdb08eb13ab73a0e4f6 100644 (file)
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2019 ARM Limited
- * All rights reserved.
+ * Copyright (c) 2019-2020 ARM Limited
+ * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
  * not be construed as granting a license to any other intellectual
@@ -84,6 +84,13 @@ class Sequencer : public RubyPort
     Sequencer(const Params *);
     ~Sequencer();
 
+    /**
+     * Proxy function to writeCallback that first
+     * invalidates the line address in the local monitor.
+     */
+    void writeCallbackScFail(Addr address,
+                        DataBlock& data);
+
     // Public Methods
     void wakeup(); // Used only for deadlock detection
     void resetStats() override;
@@ -121,7 +128,6 @@ class Sequencer : public RubyPort
 
     void markRemoved();
     void evictionCallback(Addr address);
-    void invalidateSC(Addr address);
     int coreId() const { return m_coreId; }
 
     virtual int functionalWrite(Packet *func_pkt) override;
@@ -191,7 +197,6 @@ class Sequencer : public RubyPort
 
     RequestStatus insertRequest(PacketPtr pkt, RubyRequestType primary_type,
                                 RubyRequestType secondary_type);
-    bool handleLlsc(Addr address, SequencerRequest* request);
 
     // Private copy constructor and assignment operator
     Sequencer(const Sequencer& obj);
@@ -257,6 +262,39 @@ class Sequencer : public RubyPort
     std::vector<Stats::Counter> m_IncompleteTimes;
 
     EventFunctionWrapper deadlockCheckEvent;
+
+    // support for LL/SC
+
+    /**
+     * Places the cache line address into the global monitor
+     * tagged with this Sequencer object's version id.
+     */
+    void llscLoadLinked(const Addr);
+
+    /**
+     * Removes the cache line address from the global monitor.
+     * This is independent of this Sequencer object's version id.
+     */
+    void llscClearMonitor(const Addr);
+
+    /**
+     * Searches for cache line address in the global monitor
+     * tagged with this Sequencer object's version id.
+     * If a match is found, the entry is is erased from
+     * the global monitor.
+     *
+     * @return a boolean indicating if the line address was found.
+     */
+    bool llscStoreConditional(const Addr);
+
+  public:
+    /**
+     * Searches for cache line address in the global monitor
+     * tagged with this Sequencer object's version id.
+     *
+     * @return a boolean indicating if the line address was found.
+     */
+    bool llscCheckMonitor(const Addr);
 };
 
 inline std::ostream&