From 2b9b7be75a2b405456fedace894a47bc32ed2b14 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 15 Apr 2022 17:48:09 +0100 Subject: [PATCH] work-in-progress asynchronous DRAM wishbone bridge which is optional when dram_clk is not requested --- src/ls2.py | 233 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 177 insertions(+), 56 deletions(-) diff --git a/src/ls2.py b/src/ls2.py index cc55d19..f7586cf 100644 --- a/src/ls2.py +++ b/src/ls2.py @@ -34,6 +34,7 @@ from soc.bus.tercel import Tercel # SPI XIP master from soc.bus.opencores_ethmac import EthMAC # OpenCores 10/100 Ethernet MAC from soc.bus.external_core import ExternalCore # external libresoc/microwatt from soc.bus.wb_downconvert import WishboneDownConvert +from soc.bus.wb_async import WBAsyncBridge from soc.bus.syscon import MicrowattSYSCON from soc.interrupts.xics import XICS_ICP, XICS_ICS @@ -249,6 +250,7 @@ class DDR3SoC(SoC, Elaboratable): hyperram_pins=None, xics_icp_addr=None, xics_ics_addr=None, clk_freq=50e6, + dram_clk_freq=None, add_cpu=True): # wishbone routing is as follows: @@ -262,11 +264,13 @@ class DDR3SoC(SoC, Elaboratable): # | # 64to32DownCvt # | - # arbiter------------------------------------------+ - # | | - # +---decoder----+--------+---------+-------+--------+ | - # | | | | | | | | - # uart XICS CSRs DRAM XIP SPI HyperRAM EthMAC + # arbiter------------------------------------------------------+ + # | | + # +---decoder----+--------+---------------+-------------+--------+ | + # | | | | | | | | + # | | | WBAsyncBridge | | | | + # | | | | | | | | + # uart XICS CSRs DRAM XIP SPI HyperRAM EthMAC # set up wishbone bus arbiter and decoder. arbiter routes, # decoder maps local-relative addressed satellites to global addresses @@ -287,10 +291,15 @@ class DDR3SoC(SoC, Elaboratable): 'orangecrab']: if fpga in ['isim']: pod_bits = 6 - self.crg = ECP5CRG(clk_freq, dram_clk_freq=None, pod_bits=pod_bits) + self.crg = ECP5CRG(clk_freq, dram_clk_freq=dram_clk_freq, + pod_bits=pod_bits) if fpga in ['arty_a7']: self.crg = ArtyA7CRG(clk_freq) + self.dram_clk_freq = dram_clk_freq + if self.dram_clk_freq is None: + self.dram_clk_freq = clk_freq + # set up CPU, with 64-to-32-bit downconverters if add_cpu: self.cpu = ExternalCore(name="ext_core") @@ -345,6 +354,7 @@ class DDR3SoC(SoC, Elaboratable): spi_offset = 2<<20 if (spi_0_pins is not None) else None dram_offset = ddr_addr if (ddr_pins is not None) else None self.syscon = MicrowattSYSCON(sys_clk_freq=clk_freq, + mem_clk_freq=self.dram_clk_freq, has_uart=(uart_pins is not None), spi_offset=spi_offset, dram_addr=dram_offset) @@ -394,57 +404,131 @@ class DDR3SoC(SoC, Elaboratable): tRAS=44)} """ - # DRAM Module + # DRAM Module. first, create the (triple) modules: + # * DDR PHY + # * gram Core: presents PHY with a DFI Interface + # * gram Bone (aka gram-with-wishbone) connects wishbone to DFI + # from there it gets a little complicated because of supporting + # several options: simulation, synchronous, and asynchronous clocks. + # dram_clk_freq can *never* be set equal to clk_freq, if it is, + # it's assumed to be synchronous, and the dram Domains need renaming + if ddr_pins is not None: # or fpga == 'sim': - ddrmodule = dram_cls(clk_freq, "1:2") # match DDR3 ASIC P/N + ddrmodule = dram_cls(self.dram_clk_freq, "1:2") # match DDR3 P/N # remap both the sync domain (wherever it occurs) and - # the sync2x domain. technically this should NOT be done. - # it's a bit of a mess. ok: this should be done only - # when dramsync===sync (and dramsync2x===sync2x) - drs = DomainRenamer({"sync": "dramsync", - "sync2x": "dramsync2x"}) - - # HOWEVER, when the ASyncBridge is deployed, the two domains - # must NOT be renamed, instead this used: - #drs = lambda x: x - # and then the ASyncBridge takes care of the two. - # but, back in ecp5_crg.py, when ASyncBridge is added, - # dram_clk_freq must be passed to ECP5CRG, which will call - # ECP5CRG.phase2_domain on your behalf, setting up the - # necessary dramsync2x which is needed for the xdr=4 IOpads + # the sync2x domain, if dram frequency is specified and + # not equal to the core clock + drs = None + if dram_clk_freq is not None or fpga == 'sim': + drs = lambda x: x + else: + drs = DomainRenamer({"sync": "dramsync", + "sync2x": "dramsync2x"}) + + features = set() + if dram_clk_freq is None: + features.add("stall") + # create the PHY (fake one for sim) if fpga == 'sim': + settings = sim_ddr3_settings(self.dram_clk_freq) self.ddrphy = FakePHY(module=ddrmodule, - settings=sim_ddr3_settings(clk_freq), + settings=settings, verbosity=SDRAM_VERBOSE_DBG, - clk_freq=clk_freq) + clk_freq=self.dram_clk_freq) else: - self.ddrphy = drs(ECP5DDRPHY(ddr_pins, sys_clk_freq=clk_freq)) - self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) + self.ddrphy = drs(ECP5DDRPHY(ddr_pins, + #features=features, + sys_clk_freq=self.dram_clk_freq)) + # create the core (bridge from PHY to DFI) dramcore = gramCore(phy=self.ddrphy, geom_settings=ddrmodule.geom_settings, timing_settings=ddrmodule.timing_settings, - clk_freq=clk_freq) - if fpga == 'sim': - self.dramcore = dramcore - else: - self.dramcore = drs(dramcore) - self._decoder.add(self.dramcore.bus, addr=dramcore_addr) - - # map the DRAM onto Wishbone, XXX use stall but set classic below - # XXX WHEN ADDING ASYNCBRIDGE IT IS THE **BRIDGE** THAT MUST - # XXX HAVE THE STALL SIGNAL, AND THE **BRIDGE** THAT MUST HAVE - # XXX stall=stb&~ack APPLIED - drambone = gramWishbone(dramcore, features={'stall'}) - if fpga == 'sim': - self.drambone = drambone - else: - self.drambone = drs(drambone) - # XXX ADD THE ASYNCBRIDGE NOT THE DRAMBONE.BUS, THEN - # XXX ADD DRAMBONE.BUS TO ASYNCBRIDGE - self._decoder.add(self.drambone.bus, addr=ddr_addr) + #features=features, + clk_freq=self.dram_clk_freq) + self.dramcore = drs(dramcore) + + # create the wishbone presentation (wishbone to DFI) + drambone = gramWishbone(dramcore, features=features) + self.drambone = drs(drambone) + + # this is the case where sys_clk === dram_clk. no ASync Bridge + # needed, so just let the phy core and wb-dfi be connected + # directly to WB decoder. both are running in "sync" domain + # (because of the DomainRenamer, above) + + if ddr_pins is not None and dram_clk_freq is None: + self.ddrphy_bus = self.ddrphy.bus + self.dramcore_bus = self.dramcore.bus + self.drambone_bus = self.drambone.bus + + # this covers the case where sys_clk != dram_clk: three separate + # ASync Bridges are constructed (!) and the interface that's to + # be wired to the WB decoder is the async bus because that's running + # in the "sync" domain. + + if ddr_pins is not None and dram_clk_freq is not None: + # Set up Wishbone asynchronous bridge + pabus = wishbone.Interface(addr_width=self.ddrphy.bus.addr_width, + data_width=self.ddrphy.bus.data_width, + granularity=self.ddrphy.bus.granularity, + features={'stall'}) + self.ddrphy_bus = pabus + self.ddrphy_bus.memory_map = self.ddrphy.bus.memory_map + + pabr = WBAsyncBridge(master_bus=self.ddrphy_bus, + slave_bus=self.ddrphy.bus, + master_clock_domain=None, + slave_clock_domain="dramsync", + address_width=self.ddrphy.bus.addr_width, + data_width=self.ddrphy.bus.data_width, + granularity=self.ddrphy.bus.granularity, + master_features={'stall'}) + self.ddrphy_async_br = pabr + + # Set up Wishbone asynchronous bridge + dab = wishbone.Interface(addr_width=self.dramcore.bus.addr_width, + data_width=self.dramcore.bus.data_width, + granularity=self.dramcore.bus.granularity, + features={'stall'}) + self.dramcore_bus = dab + self.dramcore_bus.memory_map = self.dramcore.bus.memory_map + + dac = WBAsyncBridge(master_bus=self.dramcore_bus, + slave_bus=self.dramcore.bus, + master_clock_domain=None, + slave_clock_domain="dramsync", + address_width=self.dramcore.bus.addr_width, + data_width=self.dramcore.bus.data_width, + granularity=self.dramcore.bus.granularity, + master_features={'stall'}) + self.dramcore_async_br = dac + + # Set up Wishbone asynchronous bridge + bab = wishbone.Interface(addr_width=self.drambone.bus.addr_width, + data_width=self.drambone.bus.data_width, + granularity=self.drambone.bus.granularity, + features={'stall'}) + self.drambone_bus = bab + self.drambone_bus.memory_map = self.drambone.bus.memory_map + + bab = WBAsyncBridge(master_bus=self.drambone_bus, + slave_bus=self.drambone.bus, + master_clock_domain=None, + slave_clock_domain="dramsync", + address_width=self.drambone.bus.addr_width, + data_width=self.drambone.bus.data_width, + granularity=self.drambone.bus.granularity, + master_features={'stall'}) + self.drambone_async_br = bab + + if ddr_pins is not None: + # Add wishbone decoders + self._decoder.add(self.dramcore_bus, addr=dramcore_addr) + self._decoder.add(self.drambone_bus, addr=ddr_addr) + self._decoder.add(self.ddrphy_bus, addr=ddrphy_addr) # additional SRAM at address if DRAM is not also at 0x0 # (TODO, check Flash, and HyperRAM as well) @@ -567,10 +651,38 @@ class DDR3SoC(SoC, Elaboratable): m.submodules.ddrphy = self.ddrphy m.submodules.dramcore = self.dramcore m.submodules.drambone = drambone = self.drambone - # grrr, same problem with drambone: not WB4-pipe compliant - # XXX TAKE THIS OUT, REPLACE WITH ASYNCBRIDGE HAVING - # XXX asyncbridge.bus.stall.eq(asyncbridge.bus.cyc & ...) - comb += drambone.bus.stall.eq(drambone.bus.cyc & ~drambone.bus.ack) + + # add async wishbone bridges + if hasattr(self, "ddrphy_async_br"): + m.submodules.ddrphy_async_br = self.ddrphy_async_br + if hasattr(self, "dramcore_async_br"): + m.submodules.dramcore_async_br = self.dramcore_async_br + if hasattr(self, "drambone_async_br"): + m.submodules.drambone_async_br = self.drambone_async_br + + # grrr, same problem with WB async bridge: not WB4-pipe compliant + dab = self.ddrphy_bus + if hasattr(dab, "stall"): + comb += dab.stall.eq(dab.cyc & ~dab.ack) + dab = self.dramcore_bus + if hasattr(dab, "stall"): + comb += dab.stall.eq(dab.cyc & ~dab.ack) + dab = self.drambone_bus + comb += dab.stall.eq(dab.cyc & ~dab.ack) + + # add wb async bridge verilog source. assumes directory structure + # where bridge has been checked out in a common subdirectory with: + # git clone https://github.com/alexforencich/verilog-wishbone.git + # git checkout d1fa24a0 + verilog_wishbone = "../../verilog-wishbone/rtl" + pth = os.path.split(__file__)[0] + pth = os.path.join(pth, verilog_wishbone) + fname = os.path.abspath(pth) + print (fname) + if hasattr(self, "ddrphy_async_br"): + self.dramcore_async_br.add_verilog_source(fname, platform) + if hasattr(self, "drambone_async_br"): + self.drambone_async_br.add_verilog_source(fname, platform) # add hyperram module if hasattr(self, "hyperram"): @@ -735,21 +847,29 @@ def build_platform(fpga, firmware): # set clock frequency clk_freq = 70e6 + dram_clk_freq = None if fpga == 'sim': clk_freq = 100e6 + dram_clk_freq = clk_freq if fpga == 'isim': clk_freq = 55e6 # below 50 mhz, stops DRAM being enabled if fpga == 'versa_ecp5': clk_freq = 50e6 # crank right down to test hyperram + #dram_clk_freq = 100e6 if fpga == 'versa_ecp5_85': # 50MHz works. 100MHz works. 55MHz does NOT work. # Stick with multiples of 50MHz... clk_freq = 50e6 + dram_clk_freq = 100e6 if fpga == 'arty_a7': clk_freq = 50e6 if fpga == 'ulx3s': clk_freq = 40.0e6 + # merge dram_clk_freq with clk_freq if the same + if clk_freq == dram_clk_freq: + dram_clk_freq = None + # select a firmware address fw_addr = None if firmware is not None: @@ -769,14 +889,14 @@ def build_platform(fpga, firmware): fpga in ['versa_ecp5', 'versa_ecp5_85', 'arty_a7', 'isim']): ddr_pins = platform.request("ddr3", 0, dir={"dq":"-", "dqs":"-"}, - xdr={"rst": 1, "clk":4, "a":4, + xdr={"rst": 4, "clk":4, "a":4, "ba":4, "clk_en":4, "odt":4, "ras":4, "cas":4, "we":4, "cs": 4}) # Get SPI resource pins spi_0_pins = None - if platform is not None and \ + if False and platform is not None and \ fpga in ['versa_ecp5', 'versa_ecp5_85', 'isim']: # Override here to get FlashResource out of the way and enable Tercel # direct access to the SPI flash. @@ -840,16 +960,16 @@ def build_platform(fpga, firmware): ethmac_0_pins = platform.request("ethmac_0", 0, dir={"mtx_clk":"i", "mtxd":"o", "mtxen":"o", - "mtxerr":"o", "mrx_clk":"i", + "mtxerr":"o", "mrx_clk":"i", "mrxd":"i", - "mrxdv":"i", "mrxerr":"i", + "mrxdv":"i", "mrxerr":"i", "mcoll":"i", "mcrs":"i", "mdc":"o", "md":"io"}, - xdr={"mtx_clk": 0, "mtxd": 0, + xdr={"mtx_clk": 0, "mtxd": 0, "mtxen": 0, - "mtxerr": 0, "mrx_clk": 0, + "mtxerr": 0, "mrx_clk": 0, "mrxd": 0, - "mrxdv": 0, "mrxerr": 0, + "mrxdv": 0, "mrxerr": 0, "mcoll": 0, "mcrs": 0, "mdc": 0, "md": 0}) print ("ethmac pins", ethmac_0_pins) @@ -912,6 +1032,7 @@ def build_platform(fpga, firmware): xics_icp_addr=0xc000_4000, # XICS_ICP_BASE xics_ics_addr=0xc000_5000, # XICS_ICS_BASE clk_freq=clk_freq, + dram_clk_freq=dram_clk_freq, add_cpu=True) if toolchain == 'Trellis': -- 2.30.2