config: Support full-system with SST's memory system
authorCurtis Dunham <Curtis.Dunham@arm.com>
Wed, 8 Apr 2015 20:56:06 +0000 (15:56 -0500)
committerCurtis Dunham <Curtis.Dunham@arm.com>
Wed, 8 Apr 2015 20:56:06 +0000 (15:56 -0500)
This patch adds an example configuration in ext/sst/tests/ that allows
an SST/gem5 instance to simulate a 4-core AArch64 system with SST's
memHierarchy components providing all the caches and memories.

configs/common/CacheConfig.py
configs/common/FSConfig.py
configs/common/MemConfig.py
configs/common/Options.py
configs/example/fs.py
ext/sst/tests/test6_arm_4c.py [new file with mode: 0644]
src/dev/arm/RealView.py

index 66fe491e1ffecc9950ceac6fc851d23e4daf2f04..899090af57e62c8be65d072664ef9a1a14776700 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2013 ARM Limited
+# Copyright (c) 2012-2013, 2015 ARM Limited
 # All rights reserved
 # 
 # The license below extends only to copyright in the software and shall
@@ -46,6 +46,13 @@ from m5.objects import *
 from Caches import *
 
 def config_cache(options, system):
+    if options.external_memory_system and (options.caches or options.l2cache):
+        print "External caches and internal caches are exclusive options.\n"
+        sys.exit(1)
+
+    if options.external_memory_system:
+        ExternalCache = ExternalCacheFactory(options.external_memory_system)
+
     if options.cpu_type == "arm_detailed":
         try:
             from O3_ARM_v7a import *
@@ -114,10 +121,50 @@ def config_cache(options, system):
                 system.cpu[i].dcache = dcache_real
                 system.cpu[i].dcache_mon = dcache_mon
 
+        elif options.external_memory_system:
+            # These port names are presented to whatever 'external' system
+            # gem5 is connecting to.  Its configuration will likely depend
+            # on these names.  For simplicity, we would advise configuring
+            # it to use this naming scheme; if this isn't possible, change
+            # the names below.
+            if buildEnv['TARGET_ISA'] in ['x86', 'arm']:
+                system.cpu[i].addPrivateSplitL1Caches(
+                        ExternalCache("cpu%d.icache" % i),
+                        ExternalCache("cpu%d.dcache" % i),
+                        ExternalCache("cpu%d.itb_walker_cache" % i),
+                        ExternalCache("cpu%d.dtb_walker_cache" % i))
+            else:
+                system.cpu[i].addPrivateSplitL1Caches(
+                        ExternalCache("cpu%d.icache" % i),
+                        ExternalCache("cpu%d.dcache" % i))
+
         system.cpu[i].createInterruptController()
         if options.l2cache:
             system.cpu[i].connectAllPorts(system.tol2bus, system.membus)
+        elif options.external_memory_system:
+            system.cpu[i].connectUncachedPorts(system.membus)
         else:
             system.cpu[i].connectAllPorts(system.membus)
 
     return system
+
+# ExternalSlave provides a "port", but when that port connects to a cache,
+# the connecting CPU SimObject wants to refer to its "cpu_side".
+# The 'ExternalCache' class provides this adaptation by rewriting the name,
+# eliminating distracting changes elsewhere in the config code.
+class ExternalCache(ExternalSlave):
+    def __getattr__(cls, attr):
+        if (attr == "cpu_side"):
+            attr = "port"
+        return super(ExternalSlave, cls).__getattr__(attr)
+
+    def __setattr__(cls, attr, value):
+        if (attr == "cpu_side"):
+            attr = "port"
+        return super(ExternalSlave, cls).__setattr__(attr, value)
+
+def ExternalCacheFactory(port_type):
+    def make(name):
+        return ExternalCache(port_data=name, port_type=port_type,
+                             addr_ranges=[AllMemory])
+    return make
index cfc156649ec641014e7eda3be710fe5310d3189d..17f1f7641915757a5f03df4d86ee07c9d8a341c4 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (c) 2010-2012 ARM Limited
+# Copyright (c) 2010-2012, 2015 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -203,7 +203,8 @@ def makeSparcSystem(mem_mode, mdesc=None):
     return self
 
 def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
-                  dtb_filename=None, bare_metal=False, cmdline=None):
+                  dtb_filename=None, bare_metal=False, cmdline=None,
+                  external_memory=""):
     assert machine_type
 
     if bare_metal:
@@ -293,7 +294,15 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
                       'lpj=19988480 norandmaps rw loglevel=8 ' + \
                       'mem=%(mem)s root=%(rootdev)s'
 
-        self.realview.setupBootLoader(self.membus, self, binary)
+        # When using external memory, gem5 writes the boot loader to nvmem
+        # and then SST will read from it, but SST can only get to nvmem from
+        # iobus, as gem5's membus is only used for initialization and
+        # SST doesn't use it.  Attaching nvmem to iobus solves this issue.
+        # During initialization, system_port -> membus -> iobus -> nvmem.
+        if external_memory:
+            self.realview.setupBootLoader(self.iobus,  self, binary)
+        else:
+            self.realview.setupBootLoader(self.membus, self, binary)
         self.gic_cpu_addr = self.realview.gic.cpu_addr
         self.flags_addr = self.realview.realview_io.pio_addr + 0x30
 
@@ -322,7 +331,24 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None,
 
         self.boot_osflags = fillInCmdline(mdesc, cmdline)
 
-    self.realview.attachOnChipIO(self.membus, self.bridge)
+    if external_memory:
+        # I/O traffic enters iobus
+        self.external_io = ExternalMaster(port_data="external_io",
+                                          port_type=external_memory)
+        self.external_io.port = self.iobus.slave
+
+        # Ensure iocache only receives traffic destined for (actual) memory.
+        self.iocache = ExternalSlave(port_data="iocache",
+                                     port_type=external_memory,
+                                     addr_ranges=self.mem_ranges)
+        self.iocache.port = self.iobus.master
+
+        # Let system_port get to nvmem and nothing else.
+        self.bridge.ranges = [self.realview.nvmem.range]
+
+        self.realview.attachOnChipIO(self.iobus)
+    else:
+        self.realview.attachOnChipIO(self.membus, self.bridge)
     self.realview.attachIO(self.iobus)
     self.intrctrl = IntrControl()
     self.terminal = Terminal()
index b0ac444064531f5f300ba055f7e50536a82d0841..5266667ec56aab455d048d96a70b782169ee4fd7 100644 (file)
@@ -189,6 +189,14 @@ def config_mem(options, system):
     them.
     """
 
+    if options.external_memory_system:
+        system.external_memory = m5.objects.ExternalSlave(
+            port_type=options.external_memory_system,
+            port_data="init_mem0", port=system.membus.master,
+            addr_ranges=system.mem_ranges)
+        system.kernel_addr_check = False
+        return
+
     nbr_mem_ctrls = options.mem_channels
     import math
     from m5.util import fatal
index f110f7dfb50965e7ca5da3a757a94e86f79b3741..a383b40caeb669a639a12a21afc34eaa81ece29f 100644 (file)
@@ -104,6 +104,8 @@ def addCommonOptions(parser):
     parser.add_option("--memchecker", action="store_true")
 
     # Cache Options
+    parser.add_option("--external-memory-system", type="string",
+                      help="use external ports of this port_type for caches")
     parser.add_option("--caches", action="store_true")
     parser.add_option("--l2cache", action="store_true")
     parser.add_option("--fastmem", action="store_true")
index 98c7db4803db4c0fd745aec296824e8d71d59d73..70a3b950e42c142c8c753bb7ad00e7176ff92bb6 100644 (file)
@@ -98,7 +98,8 @@ def build_test_system(np):
         test_sys = makeArmSystem(test_mem_mode, options.machine_type,
                                  options.num_cpus, bm[0], options.dtb_filename,
                                  bare_metal=options.bare_metal,
-                                 cmdline=cmdline)
+                                 cmdline=cmdline,
+                                 external_memory=options.external_memory_system)
         if options.enable_context_switch_stats_dump:
             test_sys.enable_context_switch_stats_dump = True
     else:
@@ -185,7 +186,7 @@ def build_test_system(np):
             test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges)
             test_sys.iocache.cpu_side = test_sys.iobus.master
             test_sys.iocache.mem_side = test_sys.membus.slave
-        else:
+        elif not options.external_memory_system:
             test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges)
             test_sys.iobridge.slave = test_sys.iobus.master
             test_sys.iobridge.master = test_sys.membus.slave
diff --git a/ext/sst/tests/test6_arm_4c.py b/ext/sst/tests/test6_arm_4c.py
new file mode 100644 (file)
index 0000000..e868314
--- /dev/null
@@ -0,0 +1,199 @@
+# Copyright (c)2015 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Curtis Dunham
+
+import sst
+import sys
+import os
+
+lat="1 ns"
+buslat="2 ns"
+clockRate = "1GHz"
+
+
+def getenv(name):
+    res = ""
+    try:
+        res = os.environ[name]
+    except KeyError:
+        pass
+    return res
+
+baseCacheParams = ({
+    "debug" :getenv("DEBUG"),
+    "debug_level" : 6,
+    "coherence_protocol" : "MSI",
+    "replacement_policy" : "LRU",
+    "cache_line_size" : 64,
+    "cache_frequency" : clockRate,
+    "statistics" : 1
+    })
+
+l1CacheParams = ({
+    "debug" : getenv("DEBUG"),
+    "debug_level" : 6,
+    "L1" : 1,
+    "cache_size" : "64 KB",
+    "associativity" : 4,
+    "access_latency_cycles" : 2,
+    "low_network_links" : 1
+    })
+
+l2CacheParams = ({
+    "debug" : getenv("DEBUG"),
+    "debug_level" : 6,
+    "L1" : 0,
+    "cache_size" : "256 KB",
+    "associativity" : 8,
+    "access_latency_cycles" : 8,
+    "high_network_links" : 1,
+    "mshr_num_entries" : 4096,
+    "low_network_links" : 1
+    })
+
+
+GEM5 = sst.Component("system", "gem5.gem5")
+GEM5.addParams({
+    "comp_debug" : getenv("GEM5_DEBUG"),
+    "gem5DebugFlags" : getenv("M5_DEBUG"),
+    "frequency" : clockRate,
+    "cmd" : "configs/example/fs.py --num-cpus 4 --disk-image=vexpress64-openembedded_minimal-armv8_20130623-376.img --root-device=/dev/sda2 --kernel=vmlinux.aarch64.20140821 --dtb-filename=vexpress.aarch64.20140821.dtb --mem-size=256MB --machine-type=VExpress_EMM64 --cpu-type=timing --external-memory-system=sst --initialize-only"
+    })
+
+bus = sst.Component("membus", "memHierarchy.Bus")
+bus.addParams({
+    "bus_frequency": "2GHz",
+    "debug" : getenv("DEBUG"),
+    "debug_level" : 8
+    })
+
+def buildL1(name, m5, connector):
+    cache = sst.Component(name, "memHierarchy.Cache")
+    cache.addParams(baseCacheParams)
+    cache.addParams(l1CacheParams)
+    link = sst.Link("cpu_%s_link"%name)
+    link.connect((m5, connector, lat), (cache, "high_network_0", lat))
+    return cache
+
+SysBusConn = buildL1("gem5SystemBus", GEM5, "system.external_memory.port")
+bus_port = 0
+link = sst.Link("sysbus_bus_link")
+link.connect((SysBusConn, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
+
+bus_port = bus_port + 1
+ioCache = buildL1("ioCache", GEM5, "system.iocache.port")
+ioCache.addParams({
+    "debug" : 0,
+    "debug_level" : 6,
+    "cache_size" : "16 KB",
+    "associativity" : 4
+    })
+link = sst.Link("ioCache_bus_link")
+link.connect((ioCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
+
+def buildCPU(m5, num):
+    l1iCache = buildL1("cpu%u.l1iCache" % num, m5, "system.cpu%u.icache.port" % num)
+    l1dCache = buildL1("cpu%u.l1dCache" % num, m5, "system.cpu%u.dcache.port" % num)
+    itlbCache = buildL1("cpu%u.itlbCache" % num, m5, "system.cpu%u.itb_walker_cache.port" % num)
+    dtlbCache = buildL1("cpu%u.dtlbCache" % num, m5, "system.cpu%u.dtb_walker_cache.port" % num)
+    l1dCache.addParams({
+        "debug" : 0,
+        "debug_level" : 10,
+        "snoop_l1_invalidations" : 1
+    })
+
+    global bus_port
+    link = sst.Link("cpu%u.l1iCache_bus_link" % num) ; bus_port = bus_port + 1
+    link.connect((l1iCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
+    link = sst.Link("cpu%u.l1dCache_bus_link" % num) ; bus_port = bus_port + 1
+    link.connect((l1dCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
+    link = sst.Link("cpu%u.itlbCache_bus_link" % num) ; bus_port = bus_port + 1
+    link.connect((itlbCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
+    link = sst.Link("cpu%u.dtlbCache_bus_link" % num) ; bus_port = bus_port + 1
+    link.connect((dtlbCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat))
+
+buildCPU(GEM5, 0)
+buildCPU(GEM5, 1)
+buildCPU(GEM5, 2)
+buildCPU(GEM5, 3)
+
+l2cache = sst.Component("l2cache", "memHierarchy.Cache")
+l2cache.addParams(baseCacheParams)
+l2cache.addParams(l2CacheParams)
+l2cache.addParams({
+      "network_address" : "2",
+      "directory_at_next_level" : "1"
+})
+
+link = sst.Link("l2cache_bus_link")
+link.connect((l2cache, "high_network_0", buslat), (bus, "low_network_0", buslat))
+
+memory = sst.Component("memory", "memHierarchy.MemController")
+memory.addParams({
+    "request_width" : 64,
+    "coherence_protocol" : "MSI",
+    "access_time" : "25 ns",
+    "backend.mem_size" : 256,
+    "clock" : "2GHz",
+    "debug" : getenv("DEBUG"),
+    "range_start" : 0, # 2 * (1024 ** 3), # it's behind a directory controller.
+    })
+
+comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router")
+comp_chiprtr.addParams({
+      "xbar_bw" : "16GB/s",
+      "link_bw" : "16GB/s",
+      "input_buf_size" : "1KB",
+      "num_ports" : "3",
+      "flit_size" : "72B",
+      "output_buf_size" : "1KB",
+      "id" : "0",
+      "topology" : "merlin.singlerouter"
+})
+comp_dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController")
+comp_dirctrl.addParams({
+      "coherence_protocol" : "MSI",
+      "network_address" : "1",
+      "entry_cache_size" : "16384",
+      "network_bw" : "1GB/s",
+      "addr_range_start" : 2 * (1024 ** 3),
+      "addr_range_end" : 2 * (1024 ** 3) + 256 * (1024 ** 2)
+})
+
+sst.Link("link_cache_net_0").connect((l2cache, "directory", "10ns"), (comp_chiprtr, "port2", "2ns"))
+sst.Link("link_dir_net_0").connect((comp_chiprtr, "port1", "2ns"), (comp_dirctrl, "network", "2ns"))
+sst.Link("l2cache_io_link").connect((comp_chiprtr, "port0", "2ns"), (GEM5, "network", buslat))
+sst.Link("link_dir_mem_link").connect((comp_dirctrl, "memory", "10ns"), (memory, "direct_link", "10ns"))
index 9c9eff710d734546928a36d8575ebab0c400bf31..95edb9d5317a3b5776b99edfea12889e739ee7ed 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (c) 2009-2014 ARM Limited
+# Copyright (c) 2009-2015 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -509,21 +509,22 @@ class VExpress_EMM(RealView):
 
     # Attach I/O devices that are on chip and also set the appropriate
     # ranges for the bridge
-    def attachOnChipIO(self, bus, bridge):
-       self.gic.pio = bus.master
-       self.local_cpu_timer.pio = bus.master
-       if hasattr(self, "gicv2m"):
-           self.gicv2m.pio      = bus.master
-       self.hdlcd.dma           = bus.slave
-       # Bridge ranges based on excluding what is part of on-chip I/O
-       # (gic, a9scu)
-       bridge.ranges = [AddrRange(0x2F000000, size='16MB'),
-                        AddrRange(0x2B000000, size='4MB'),
-                        AddrRange(0x30000000, size='256MB'),
-                        AddrRange(0x40000000, size='512MB'),
-                        AddrRange(0x18000000, size='64MB'),
-                        AddrRange(0x1C000000, size='64MB')]
-       self.vgic.pio = bus.master
+    def attachOnChipIO(self, bus, bridge=None):
+        self.gic.pio             = bus.master
+        self.vgic.pio            = bus.master
+        self.local_cpu_timer.pio = bus.master
+        if hasattr(self, "gicv2m"):
+            self.gicv2m.pio      = bus.master
+        self.hdlcd.dma           = bus.slave
+        if bridge:
+            # Bridge ranges based on excluding what is part of on-chip I/O
+            # (gic, a9scu)
+            bridge.ranges = [AddrRange(0x2F000000, size='16MB'),
+                             AddrRange(0x2B000000, size='4MB'),
+                             AddrRange(0x30000000, size='256MB'),
+                             AddrRange(0x40000000, size='512MB'),
+                             AddrRange(0x18000000, size='64MB'),
+                             AddrRange(0x1C000000, size='64MB')]
 
 
     # Set the clock domain for IO objects that are considered