src/soc/scoreboard/addr_match.py

   1 """ Load / Store partial address matcher
   2
   3 Loads and Stores do not need a full match (CAM), they need "good enough"
   4 avoidance.  Around 11 bits on a 64-bit address is "good enough".
   5
   6 The simplest way to use this module is to ignore not only the top bits,
   7 but also the bottom bits as well: in this case (this RV64 processor),
   8 enough to cover a DWORD (64-bit).  that means ignore the bottom 4 bits,
   9 due to the possibility of 64-bit LD/ST being misaligned.
  10
  11 To reiterate: the use of this module is an *optimisation*.  All it has
  12 to do is cover the cases that are *definitely* matches (by checking 11
  13 bits or so), and if a few opportunities for parallel LD/STs are missed
  14 because the top (or bottom) bits weren't checked, so what: all that
  15 happens is: the mis-matched addresses are LD/STd on single-cycles. Big Deal.
  16
  17 However, if we wanted to enhance this algorithm (without using a CAM and
  18 without using expensive comparators) probably the best way to do so would
  19 be to turn the last 16 bits into a byte-level bitmap.  LD/ST on a byte
  20 would have 1 of the 16 bits set.  LD/ST on a DWORD would have 8 of the 16
  21 bits set (offset if the LD/ST was misaligned).  TODO.
  22
  23 Notes:
  24
  25 > I have used bits <11:6> as they are not translated (4KB pages)
  26 > and larger than a cache line (64 bytes).
  27 > I have used bits <11:4> when the L1 cache was QuadW sized and
  28 > the L2 cache was Line sized.
  29 """
  30
  31 from nmigen.compat.sim import run_simulation
  32 from nmigen.cli import verilog, rtlil
  33 from nmigen import Module, Signal, Const, Array, Cat, Elaboratable
  34
  35 from nmutil.latch import latchregister, SRLatch
  36
  37
  38 class PartialAddrMatch(Elaboratable):
  39     """A partial address matcher
  40     """
  41     def __init__(self, n_adr, bitwid):
  42         self.n_adr = n_adr
  43         self.bitwid = bitwid
  44         # inputs
  45         self.addrs_i = Array(Signal(bitwid, name="addr") for i in range(n_adr))
  46         self.addr_we_i = Signal(n_adr) # write-enable for incoming address
  47         self.addr_en_i = Signal(n_adr) # address latched in
  48         self.addr_rs_i = Signal(n_adr) # address deactivated
  49
  50         # output
  51         self.addr_nomatch_o = Signal(n_adr, name="nomatch_o")
  52         self.addr_nomatch_a_o = Array(Signal(n_adr, name="nomatch_array_o") \
  53                                   for i in range(n_adr))
  54
  55     def elaborate(self, platform):
  56         m = Module()
  57         return self._elaborate(m, platform)
  58
  59     def _elaborate(self, m, platform):
  60         comb = m.d.comb
  61         sync = m.d.sync
  62
  63         m.submodules.l = l = SRLatch(llen=self.n_adr, sync=False)
  64         addrs_r = Array(Signal(self.bitwid, name="a_r") \
  65                                 for i in range(self.n_adr))
  66
  67         # latch set/reset
  68         comb += l.s.eq(self.addr_en_i)
  69         comb += l.r.eq(self.addr_rs_i)
  70
  71         # copy in addresses (and "enable" signals)
  72         for i in range(self.n_adr):
  73             latchregister(m, self.addrs_i[i], addrs_r[i], l.q[i])
  74
  75         # is there a clash, yes/no
  76         matchgrp = []
  77         for i in range(self.n_adr):
  78             match = []
  79             for j in range(self.n_adr):
  80                 if i == j:
  81                     match.append(Const(0)) # don't match against self!
  82                 else:
  83                     match.append(addrs_r[i] == addrs_r[j])
  84             comb += self.addr_nomatch_a_o[i].eq(~Cat(*match) & l.q)
  85             matchgrp.append(self.addr_nomatch_a_o[i] == l.q)
  86         comb += self.addr_nomatch_o.eq(Cat(*matchgrp) & l.q)
  87
  88         return m
  89
  90     def __iter__(self):
  91         yield from self.addrs_i
  92         yield self.addr_we_i
  93         yield self.addr_en_i
  94         yield from self.addr_nomatch_a_o
  95         yield self.addr_nomatch_o
  96
  97     def ports(self):
  98         return list(self)
  99
 100
 101 def part_addr_sim(dut):
 102     yield dut.dest_i.eq(1)
 103     yield dut.issue_i.eq(1)
 104     yield
 105     yield dut.issue_i.eq(0)
 106     yield
 107     yield dut.src1_i.eq(1)
 108     yield dut.issue_i.eq(1)
 109     yield
 110     yield dut.issue_i.eq(0)
 111     yield
 112     yield dut.go_rd_i.eq(1)
 113     yield
 114     yield dut.go_rd_i.eq(0)
 115     yield
 116     yield dut.go_wr_i.eq(1)
 117     yield
 118     yield dut.go_wr_i.eq(0)
 119     yield
 120
 121 def test_part_addr():
 122     dut = PartialAddrMatch(3, 10)
 123     vl = rtlil.convert(dut, ports=dut.ports())
 124     with open("test_part_addr.il", "w") as f:
 125         f.write(vl)
 126
 127     run_simulation(dut, part_addr_sim(dut), vcd_name='test_part_addr.vcd')
 128
 129 if __name__ == '__main__':
 130     test_part_addr()