add fabric compatibility mode

[soc.git] / src / soc / experiment / dcache.py
diff --git a/src/soc/experiment/dcache.py b/src/soc/experiment/dcache.py

index 0b74c4cd35fc4f084213a4f1755599c0128eb6fe..eae0bc7582866b702318609491f974f9cc9e8e38 100644 (file)
--- a/src/soc/experiment/dcache.py
+++ b/src/soc/experiment/dcache.py
@@ -513,12 +513,14 @@ class DTLBUpdate(Elaboratable):
          print ("    TLB_NUM_WAYS", cfg.TLB_NUM_WAYS)
  
          # TAG and PTE Memory SRAMs. transparent, write-enables are TLB_NUM_WAYS
-        tagway = Memory(depth=cfg.TLB_SET_SIZE, width=cfg.TLB_TAG_WAY_BITS)
+        tagway = Memory(depth=cfg.TLB_SET_SIZE, width=cfg.TLB_TAG_WAY_BITS,
+                             attrs={'syn_ramstyle': "block_ram"})
          m.submodules.rd_tagway = rd_tagway = tagway.read_port()
          m.submodules.wr_tagway = wr_tagway = tagway.write_port(
                                      granularity=cfg.TLB_EA_TAG_BITS)
  
-        pteway = Memory(depth=cfg.TLB_SET_SIZE, width=cfg.TLB_PTE_WAY_BITS)
+        pteway = Memory(depth=cfg.TLB_SET_SIZE, width=cfg.TLB_PTE_WAY_BITS,
+                             attrs={'syn_ramstyle': "block_ram"})
          m.submodules.rd_pteway = rd_pteway = pteway.read_port()
          m.submodules.wr_pteway = wr_pteway = pteway.write_port(
                                      granularity=cfg.TLB_PTE_BITS)
@@ -739,18 +741,39 @@ class DCache(Elaboratable, DCacheConfig):
  
          self.log_out   = Signal(20)
  
+        # test if small cache to be enabled
+        self.small_cache = (hasattr(pspec, "small_cache") and
+                                 (pspec.small_cache == True))
          # test if microwatt compatibility is to be enabled
          self.microwatt_compat = (hasattr(pspec, "microwatt_compat") and
                                   (pspec.microwatt_compat == True))
-
-        if self.microwatt_compat:
-            # reduce way sizes and num lines
-            super().__init__(NUM_LINES = 16,
-                              NUM_WAYS = 1,
-                              TLB_NUM_WAYS = 1,
-                              TLB_SET_SIZE=16) # XXX needs device-tree entry
-        else:
-            super().__init__()
+        # test if fabric compatibility is to be enabled
+        self.fabric_compat = (hasattr(pspec, "fabric_compat") and
+                                 (pspec.fabric_compat == True))
+
+        XLEN = pspec.XLEN
+        TLB_SET_SIZE = 8
+        TLB_NUM_WAYS = 2
+        NUM_LINES = 8
+        NUM_WAYS = 2
+
+        if self.small_cache:
+            # reduce way sizes and num lines to ridiculously small
+            TLB_SET_SIZE = 2
+            TLB_NUM_WAYS = 1
+            NUM_LINES = 2
+            NUM_WAYS = 1
+        if self.microwatt_compat or self.fabric_compat:
+            # reduce way sizes
+            NUM_WAYS = 1
+            TLB_NUM_WAYS = 1
+
+        super().__init__(TLB_SET_SIZE=TLB_SET_SIZE,
+                         # XLEN=XLEN, # TODO
+                         TLB_NUM_WAYS = TLB_NUM_WAYS,
+                         NUM_LINES = NUM_LINES,
+                         NUM_WAYS = NUM_WAYS
+                        )
  
      def stage_0(self, m, r0, r1, r0_full):
          """Latch the request in r0.req as long as we're not stalling
@@ -845,7 +868,7 @@ class DCache(Elaboratable, DCacheConfig):
              return
  
          # suite of PLRUs with a selection and output mechanism
-        tlb_plrus = PLRUs(self.TLB_SET_SIZE, self.TLB_WAY_BITS)
+        tlb_plrus = PLRUs("d_tlb", self.TLB_SET_SIZE, self.TLB_WAY_BITS)
          m.submodules.tlb_plrus = tlb_plrus
          comb += tlb_plrus.way.eq(r1.tlb_hit.way)
          comb += tlb_plrus.valid.eq(r1.tlb_hit.valid)
@@ -951,7 +974,8 @@ class DCache(Elaboratable, DCacheConfig):
              return
  
          # suite of PLRUs with a selection and output mechanism
-        m.submodules.plrus = plrus = PLRUs(self.NUM_LINES, self.WAY_BITS)
+        m.submodules.plrus = plrus = PLRUs("dtag", self.NUM_LINES,
+                                                   self.WAY_BITS)
          comb += plrus.way.eq(r1.hit_way)
          comb += plrus.valid.eq(r1.cache_hit)
          comb += plrus.index.eq(r1.hit_index)
@@ -966,8 +990,9 @@ class DCache(Elaboratable, DCacheConfig):
  
          m_in, d_in = self.m_in, self.d_in
  
-        # synchronous tag read-port
-        m.submodules.rd_tag = rd_tag = self.tagmem.read_port()
+        # synchronous tag read-port: NOT TRANSPARENT (cannot pass through
+        # write-to-a-read at the same time), seems to pass tests ok
+        m.submodules.rd_tag = rd_tag = self.tagmem.read_port(transparent=False)
  
          index = Signal(self.INDEX_BITS)
  
@@ -1586,20 +1611,14 @@ class DCache(Elaboratable, DCacheConfig):
                          pass
  
              with m.Case(State.RELOAD_WAIT_ACK):
-                ld_stbs_done = Signal()
-                # Requests are all sent if stb is 0
-                comb += ld_stbs_done.eq(~r1.wb.stb)
  
                  # If we are still sending requests, was one accepted?
                  with m.If((~bus.stall) & r1.wb.stb):
-                    # That was the last word?  We are done sending.
-                    # Clear stb and set ld_stbs_done so we can handle an
-                    # eventual last ack on the same cycle.
+                    # That was the last word?  We are done sending.  Clear stb
                      # sigh - reconstruct wb adr with 3 extra 0s at front
                      wb_adr = Cat(Const(0, self.ROW_OFF_BITS), r1.wb.adr)
                      with m.If(self.is_last_row_addr(wb_adr, r1.end_row_ix)):
                          sync += r1.wb.stb.eq(0)
-                        comb += ld_stbs_done.eq(1)
  
                      # Calculate the next row address in the current cache line
                      rlen = self.LINE_OFF_BITS-self.ROW_OFF_BITS
@@ -1619,11 +1638,15 @@ class DCache(Elaboratable, DCacheConfig):
                      # Compare the whole address in case the
                      # request in r1.req is not the one that
                      # started this refill.
+                    rowmatch = Signal()
+                    lastrow = Signal()
+                    comb += rowmatch.eq(r1.store_row ==
+                                        self.get_row(r1.req.real_addr))
+                    comb += lastrow.eq(self.is_last_row(r1.store_row,
+                                                      r1.end_row_ix))
                      with m.If(r1.full & r1.req.same_tag &
                                ((r1.dcbz & req.dcbz) |
-                               (r1.req.op == Op.OP_LOAD_MISS)) &
-                                (r1.store_row ==
-                                 self.get_row(r1.req.real_addr))):
+                               (r1.req.op == Op.OP_LOAD_MISS)) & rowmatch):
                          sync += r1.full.eq(r1_next_cycle)
                          sync += r1.slow_valid.eq(1)
                          with m.If(r1.mmu_req):
@@ -1634,8 +1657,7 @@ class DCache(Elaboratable, DCacheConfig):
                          sync += r1.use_forward1.eq(1)
  
                      # Check for completion
-                    with m.If(ld_stbs_done & self.is_last_row(r1.store_row,
-                                                      r1.end_row_ix)):
+                    with m.If(lastrow):
                          # Complete wishbone cycle
                          sync += r1.wb.cyc.eq(0)
  
@@ -1754,7 +1776,8 @@ class DCache(Elaboratable, DCacheConfig):
          cache_valids     = self.CacheValidsArray()
          cache_tag_set    = Signal(self.TAG_RAM_WIDTH)
  
-        self.tagmem = Memory(depth=self.NUM_LINES, width=self.TAG_RAM_WIDTH)
+        self.tagmem = Memory(depth=self.NUM_LINES, width=self.TAG_RAM_WIDTH,
+                             attrs={'syn_ramstyle': "block_ram"})
  
          """note: these are passed to nmigen.hdl.Memory as "attributes".
             don't know how, just that they are.
@@ -1835,7 +1858,7 @@ class DCache(Elaboratable, DCacheConfig):
          # deal with litex not doing wishbone pipeline mode
          # XXX in wrong way.  FIFOs are needed in the SRAM test
          # so that stb/ack match up. same thing done in icache.py
-        if not self.microwatt_compat:
+        if not self.microwatt_compat or self.fabric_compat:
              comb += self.bus.stall.eq(self.bus.cyc & ~self.bus.ack)
  
          # Wire up wishbone request latch out of stage 1