work-in-progress
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Fri, 15 Apr 2022 16:48:09 +0000 (17:48 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Fri, 15 Apr 2022 16:48:12 +0000 (17:48 +0100)
asynchronous DRAM wishbone bridge which is optional when
dram_clk is not requested

src/ls2.py

index cc55d1998fadbc5c0b395050d98f0e00dc901a36..f7586cf2263ea6e0b42945455b12197d0d484374 100644 (file)
@@ -34,6 +34,7 @@ from soc.bus.tercel import Tercel # SPI XIP master
 from soc.bus.opencores_ethmac import EthMAC # OpenCores 10/100 Ethernet MAC
 from soc.bus.external_core import ExternalCore # external libresoc/microwatt
 from soc.bus.wb_downconvert import WishboneDownConvert
+from soc.bus.wb_async import WBAsyncBridge
 from soc.bus.syscon import MicrowattSYSCON
 from soc.interrupts.xics import XICS_ICP, XICS_ICS
 
@@ -249,6 +250,7 @@ class DDR3SoC(SoC, Elaboratable):
                  hyperram_pins=None,
                  xics_icp_addr=None, xics_ics_addr=None,
                  clk_freq=50e6,
+                 dram_clk_freq=None,
                  add_cpu=True):
 
         # wishbone routing is as follows:
@@ -262,11 +264,13 @@ class DDR3SoC(SoC, Elaboratable):
         #          |
         #      64to32DownCvt
         #          |
-        #       arbiter------------------------------------------+
-        #          |                                             |
-        #   +---decoder----+--------+---------+-------+--------+ |
-        #   |      |       |        |         |       |        | |
-        #  uart  XICS    CSRs     DRAM     XIP SPI HyperRAM   EthMAC
+        #       arbiter------------------------------------------------------+
+        #          |                                                         |
+        #   +---decoder----+--------+---------------+-------------+--------+ |
+        #   |      |       |        |               |             |        | |
+        #   |      |       |  WBAsyncBridge         |             |        | |
+        #   |      |       |        |               |             |        | |
+        #  uart  XICS    CSRs     DRAM          XIP SPI       HyperRAM   EthMAC
 
         # set up wishbone bus arbiter and decoder. arbiter routes,
         # decoder maps local-relative addressed satellites to global addresses
@@ -287,10 +291,15 @@ class DDR3SoC(SoC, Elaboratable):
                     'orangecrab']:
             if fpga in ['isim']:
                 pod_bits = 6
-            self.crg = ECP5CRG(clk_freq, dram_clk_freq=None, pod_bits=pod_bits)
+            self.crg = ECP5CRG(clk_freq, dram_clk_freq=dram_clk_freq,
+                               pod_bits=pod_bits)
         if fpga in ['arty_a7']:
             self.crg = ArtyA7CRG(clk_freq)
 
+        self.dram_clk_freq = dram_clk_freq
+        if self.dram_clk_freq is None:
+            self.dram_clk_freq = clk_freq
+
         # set up CPU, with 64-to-32-bit downconverters
         if add_cpu:
             self.cpu = ExternalCore(name="ext_core")
@@ -345,6 +354,7 @@ class DDR3SoC(SoC, Elaboratable):
         spi_offset = 2<<20 if (spi_0_pins is not None) else None
         dram_offset = ddr_addr if (ddr_pins is not None) else None
         self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq,
+                                      mem_clk_freq=self.dram_clk_freq,
                                       has_uart=(uart_pins is not None),
                                       spi_offset=spi_offset,
                                       dram_addr=dram_offset)
@@ -394,57 +404,131 @@ class DDR3SoC(SoC, Elaboratable):
                                                     tRAS=44)}
         """
 
-        # DRAM Module
+        # DRAM Module. first, create the (triple) modules:
+        # * DDR PHY
+        # * gram Core: presents PHY with a DFI Interface
+        # * gram Bone (aka gram-with-wishbone) connects wishbone to DFI
+        # from there it gets a little complicated because of supporting
+        # several options: simulation, synchronous, and asynchronous clocks.
+        # dram_clk_freq can *never* be set equal to clk_freq, if it is,
+        # it's assumed to be synchronous, and the dram Domains need renaming
+
         if ddr_pins is not None: # or fpga == 'sim':
-            ddrmodule = dram_cls(clk_freq, "1:2") # match DDR3 ASIC P/N
+            ddrmodule = dram_cls(self.dram_clk_freq, "1:2") # match DDR3 P/N
 
             # remap both the sync domain (wherever it occurs) and
-            # the sync2x domain.  technically this should NOT be done.
-            # it's a bit of a mess.  ok: this should be done only
-            # when dramsync===sync (and dramsync2x===sync2x)
-            drs = DomainRenamer({"sync": "dramsync",
-                                 "sync2x": "dramsync2x"})
-
-            # HOWEVER, when the ASyncBridge is deployed, the two domains
-            # must NOT be renamed, instead this used:
-            #drs = lambda x: x
-            # and then the ASyncBridge takes care of the two.
-            # but, back in ecp5_crg.py, when ASyncBridge is added,
-            # dram_clk_freq must be passed to ECP5CRG, which will call
-            # ECP5CRG.phase2_domain on your behalf, setting up the
-            # necessary dramsync2x which is needed for the xdr=4 IOpads
+            # the sync2x domain, if dram frequency is specified and
+            # not equal to the core clock
+            drs = None
+            if dram_clk_freq is not None or fpga == 'sim':
+                drs = lambda x: x
+            else:
+                drs = DomainRenamer({"sync": "dramsync",
+                                     "sync2x": "dramsync2x"})
+
+            features = set()
+            if dram_clk_freq is None:
+                features.add("stall")
 
+            # create the PHY (fake one for sim)
             if fpga == 'sim':
+                settings = sim_ddr3_settings(self.dram_clk_freq)
                 self.ddrphy = FakePHY(module=ddrmodule,
-                                      settings=sim_ddr3_settings(clk_freq),
+                                      settings=settings,
                                       verbosity=SDRAM_VERBOSE_DBG,
-                                      clk_freq=clk_freq)
+                                      clk_freq=self.dram_clk_freq)
             else:
-                self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=clk_freq))
-            self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr)
+                self.ddrphy = drs(ECP5DDRPHY(ddr_pins,
+                                             #features=features,
+                                             sys_clk_freq=self.dram_clk_freq))
 
+            # create the core (bridge from PHY to DFI)
             dramcore = gramCore(phy=self.ddrphy,
                                 geom_settings=ddrmodule.geom_settings,
                                 timing_settings=ddrmodule.timing_settings,
-                                clk_freq=clk_freq)
-            if fpga == 'sim':
-                self.dramcore = dramcore
-            else:
-                self.dramcore = drs(dramcore)
-            self._decoder.add(self.dramcore.bus, addr=dramcore_addr)
-
-            # map the DRAM onto Wishbone, XXX use stall but set classic below
-            # XXX WHEN ADDING ASYNCBRIDGE IT IS THE **BRIDGE** THAT MUST
-            # XXX HAVE THE STALL SIGNAL, AND THE **BRIDGE** THAT MUST HAVE
-            # XXX stall=stb&~ack APPLIED
-            drambone = gramWishbone(dramcore, features={'stall'})
-            if fpga == 'sim':
-                self.drambone = drambone
-            else:
-                self.drambone = drs(drambone)
-            # XXX ADD THE ASYNCBRIDGE NOT THE DRAMBONE.BUS, THEN
-            # XXX ADD DRAMBONE.BUS TO ASYNCBRIDGE
-            self._decoder.add(self.drambone.bus, addr=ddr_addr)
+                                #features=features,
+                                clk_freq=self.dram_clk_freq)
+            self.dramcore = drs(dramcore)
+
+            # create the wishbone presentation (wishbone to DFI)
+            drambone = gramWishbone(dramcore, features=features)
+            self.drambone = drs(drambone)
+
+        # this is the case where sys_clk === dram_clk. no ASync Bridge
+        # needed, so just let the phy core and wb-dfi be connected
+        # directly to WB decoder.  both are running in "sync" domain
+        # (because of the DomainRenamer, above)
+
+        if ddr_pins is not None and dram_clk_freq is None:
+            self.ddrphy_bus = self.ddrphy.bus
+            self.dramcore_bus = self.dramcore.bus
+            self.drambone_bus = self.drambone.bus
+
+        # this covers the case where sys_clk != dram_clk: three separate
+        # ASync Bridges are constructed (!) and the interface that's to
+        # be wired to the WB decoder is the async bus because that's running
+        # in the "sync" domain.
+
+        if ddr_pins is not None and dram_clk_freq is not None:
+            # Set up Wishbone asynchronous bridge
+            pabus = wishbone.Interface(addr_width=self.ddrphy.bus.addr_width,
+                                       data_width=self.ddrphy.bus.data_width,
+                                       granularity=self.ddrphy.bus.granularity,
+                                       features={'stall'})
+            self.ddrphy_bus = pabus
+            self.ddrphy_bus.memory_map = self.ddrphy.bus.memory_map
+
+            pabr = WBAsyncBridge(master_bus=self.ddrphy_bus,
+                                 slave_bus=self.ddrphy.bus,
+                                 master_clock_domain=None,
+                                 slave_clock_domain="dramsync",
+                                 address_width=self.ddrphy.bus.addr_width,
+                                 data_width=self.ddrphy.bus.data_width,
+                                 granularity=self.ddrphy.bus.granularity,
+                                 master_features={'stall'})
+            self.ddrphy_async_br = pabr
+
+            # Set up Wishbone asynchronous bridge
+            dab = wishbone.Interface(addr_width=self.dramcore.bus.addr_width,
+                                     data_width=self.dramcore.bus.data_width,
+                                     granularity=self.dramcore.bus.granularity,
+                                     features={'stall'})
+            self.dramcore_bus = dab
+            self.dramcore_bus.memory_map = self.dramcore.bus.memory_map
+
+            dac = WBAsyncBridge(master_bus=self.dramcore_bus,
+                                slave_bus=self.dramcore.bus,
+                                master_clock_domain=None,
+                                slave_clock_domain="dramsync",
+                                address_width=self.dramcore.bus.addr_width,
+                                data_width=self.dramcore.bus.data_width,
+                                granularity=self.dramcore.bus.granularity,
+                                master_features={'stall'})
+            self.dramcore_async_br = dac
+
+            # Set up Wishbone asynchronous bridge
+            bab = wishbone.Interface(addr_width=self.drambone.bus.addr_width,
+                                     data_width=self.drambone.bus.data_width,
+                                     granularity=self.drambone.bus.granularity,
+                                     features={'stall'})
+            self.drambone_bus = bab
+            self.drambone_bus.memory_map = self.drambone.bus.memory_map
+
+            bab = WBAsyncBridge(master_bus=self.drambone_bus,
+                                slave_bus=self.drambone.bus,
+                                master_clock_domain=None,
+                                slave_clock_domain="dramsync",
+                                address_width=self.drambone.bus.addr_width,
+                                data_width=self.drambone.bus.data_width,
+                                granularity=self.drambone.bus.granularity,
+                                master_features={'stall'})
+            self.drambone_async_br = bab
+
+        if ddr_pins is not None:
+            # Add wishbone decoders
+            self._decoder.add(self.dramcore_bus, addr=dramcore_addr)
+            self._decoder.add(self.drambone_bus, addr=ddr_addr)
+            self._decoder.add(self.ddrphy_bus, addr=ddrphy_addr)
 
         # additional SRAM at address if DRAM is not also at 0x0
         # (TODO, check Flash, and HyperRAM as well)
@@ -567,10 +651,38 @@ class DDR3SoC(SoC, Elaboratable):
             m.submodules.ddrphy = self.ddrphy
             m.submodules.dramcore = self.dramcore
             m.submodules.drambone = drambone = self.drambone
-            # grrr, same problem with drambone: not WB4-pipe compliant
-            # XXX TAKE THIS OUT, REPLACE WITH ASYNCBRIDGE HAVING
-            # XXX asyncbridge.bus.stall.eq(asyncbridge.bus.cyc & ...)
-            comb += drambone.bus.stall.eq(drambone.bus.cyc & ~drambone.bus.ack)
+
+            # add async wishbone bridges
+            if hasattr(self, "ddrphy_async_br"):
+                m.submodules.ddrphy_async_br = self.ddrphy_async_br
+            if hasattr(self, "dramcore_async_br"):
+                m.submodules.dramcore_async_br = self.dramcore_async_br
+            if hasattr(self, "drambone_async_br"):
+                m.submodules.drambone_async_br = self.drambone_async_br
+
+            # grrr, same problem with WB async bridge: not WB4-pipe compliant
+            dab = self.ddrphy_bus
+            if hasattr(dab, "stall"):
+                comb += dab.stall.eq(dab.cyc & ~dab.ack)
+            dab = self.dramcore_bus
+            if hasattr(dab, "stall"):
+                comb += dab.stall.eq(dab.cyc & ~dab.ack)
+            dab = self.drambone_bus
+            comb += dab.stall.eq(dab.cyc & ~dab.ack)
+
+            # add wb async bridge verilog source. assumes directory structure
+            # where bridge has been checked out in a common subdirectory with:
+            # git clone https://github.com/alexforencich/verilog-wishbone.git
+            # git checkout d1fa24a0
+            verilog_wishbone = "../../verilog-wishbone/rtl"
+            pth = os.path.split(__file__)[0]
+            pth = os.path.join(pth, verilog_wishbone)
+            fname = os.path.abspath(pth)
+            print (fname)
+            if hasattr(self, "ddrphy_async_br"):
+                self.dramcore_async_br.add_verilog_source(fname, platform)
+            if hasattr(self, "drambone_async_br"):
+                self.drambone_async_br.add_verilog_source(fname, platform)
 
         # add hyperram module
         if hasattr(self, "hyperram"):
@@ -735,21 +847,29 @@ def build_platform(fpga, firmware):
 
     # set clock frequency
     clk_freq = 70e6
+    dram_clk_freq = None
     if fpga == 'sim':
         clk_freq = 100e6
+        dram_clk_freq = clk_freq
     if fpga == 'isim':
         clk_freq = 55e6 # below 50 mhz, stops DRAM being enabled
     if fpga == 'versa_ecp5':
         clk_freq = 50e6 # crank right down to test hyperram
+        #dram_clk_freq = 100e6
     if fpga == 'versa_ecp5_85':
         # 50MHz works.  100MHz works.  55MHz does NOT work.
         # Stick with multiples of 50MHz...
         clk_freq = 50e6
+        dram_clk_freq = 100e6
     if fpga == 'arty_a7':
         clk_freq = 50e6
     if fpga == 'ulx3s':
         clk_freq = 40.0e6
 
+    # merge dram_clk_freq with clk_freq if the same
+    if clk_freq == dram_clk_freq:
+        dram_clk_freq = None
+
     # select a firmware address
     fw_addr = None
     if firmware is not None:
@@ -769,14 +889,14 @@ def build_platform(fpga, firmware):
         fpga in ['versa_ecp5', 'versa_ecp5_85', 'arty_a7', 'isim']):
         ddr_pins = platform.request("ddr3", 0,
                                     dir={"dq":"-", "dqs":"-"},
-                                    xdr={"rst": 1, "clk":4, "a":4,
+                                    xdr={"rst": 4, "clk":4, "a":4,
                                          "ba":4, "clk_en":4,
                                          "odt":4, "ras":4, "cas":4, "we":4,
                                          "cs": 4})
 
     # Get SPI resource pins
     spi_0_pins = None
-    if platform is not None and \
+    if False and platform is not None and \
        fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim']:
         # Override here to get FlashResource out of the way and enable Tercel
         # direct access to the SPI flash.
@@ -840,16 +960,16 @@ def build_platform(fpga, firmware):
         ethmac_0_pins = platform.request("ethmac_0", 0,
                                         dir={"mtx_clk":"i", "mtxd":"o",
                                              "mtxen":"o",
-                                             "mtxerr":"o", "mrx_clk":"i", 
+                                             "mtxerr":"o", "mrx_clk":"i",
                                              "mrxd":"i",
-                                             "mrxdv":"i", "mrxerr":"i", 
+                                             "mrxdv":"i", "mrxerr":"i",
                                              "mcoll":"i",
                                              "mcrs":"i", "mdc":"o", "md":"io"},
-                                        xdr={"mtx_clk": 0, "mtxd": 0, 
+                                        xdr={"mtx_clk": 0, "mtxd": 0,
                                              "mtxen": 0,
-                                             "mtxerr": 0, "mrx_clk": 0, 
+                                             "mtxerr": 0, "mrx_clk": 0,
                                              "mrxd": 0,
-                                             "mrxdv": 0, "mrxerr": 0, 
+                                             "mrxdv": 0, "mrxerr": 0,
                                              "mcoll": 0,
                                              "mcrs": 0, "mdc": 0, "md": 0})
     print ("ethmac pins", ethmac_0_pins)
@@ -912,6 +1032,7 @@ def build_platform(fpga, firmware):
                   xics_icp_addr=0xc000_4000, # XICS_ICP_BASE
                   xics_ics_addr=0xc000_5000, # XICS_ICS_BASE
                   clk_freq=clk_freq,
+                  dram_clk_freq=dram_clk_freq,
                   add_cpu=True)
 
     if toolchain == 'Trellis':