move all source directories to soc so that "import soc.scoreboard" etc is used
[soc.git] / src / soc / scoreboard / addr_match.py
1 """ Load / Store partial address matcher
2
3 Loads and Stores do not need a full match (CAM), they need "good enough"
4 avoidance. Around 11 bits on a 64-bit address is "good enough".
5
6 The simplest way to use this module is to ignore not only the top bits,
7 but also the bottom bits as well: in this case (this RV64 processor),
8 enough to cover a DWORD (64-bit). that means ignore the bottom 4 bits,
9 due to the possibility of 64-bit LD/ST being misaligned.
10
11 To reiterate: the use of this module is an *optimisation*. All it has
12 to do is cover the cases that are *definitely* matches (by checking 11
13 bits or so), and if a few opportunities for parallel LD/STs are missed
14 because the top (or bottom) bits weren't checked, so what: all that
15 happens is: the mis-matched addresses are LD/STd on single-cycles. Big Deal.
16
17 However, if we wanted to enhance this algorithm (without using a CAM and
18 without using expensive comparators) probably the best way to do so would
19 be to turn the last 16 bits into a byte-level bitmap. LD/ST on a byte
20 would have 1 of the 16 bits set. LD/ST on a DWORD would have 8 of the 16
21 bits set (offset if the LD/ST was misaligned). TODO.
22
23 Notes:
24
25 > I have used bits <11:6> as they are not translated (4KB pages)
26 > and larger than a cache line (64 bytes).
27 > I have used bits <11:4> when the L1 cache was QuadW sized and
28 > the L2 cache was Line sized.
29 """
30
31 from nmigen.compat.sim import run_simulation
32 from nmigen.cli import verilog, rtlil
33 from nmigen import Module, Signal, Const, Array, Cat, Elaboratable
34
35 from nmutil.latch import latchregister, SRLatch
36
37
38 class PartialAddrMatch(Elaboratable):
39 """A partial address matcher
40 """
41 def __init__(self, n_adr, bitwid):
42 self.n_adr = n_adr
43 self.bitwid = bitwid
44 # inputs
45 self.addrs_i = Array(Signal(bitwid, name="addr") for i in range(n_adr))
46 self.addr_we_i = Signal(n_adr) # write-enable for incoming address
47 self.addr_en_i = Signal(n_adr) # address latched in
48 self.addr_rs_i = Signal(n_adr) # address deactivated
49
50 # output
51 self.addr_nomatch_o = Signal(n_adr, name="nomatch_o")
52 self.addr_nomatch_a_o = Array(Signal(n_adr, name="nomatch_array_o") \
53 for i in range(n_adr))
54
55 def elaborate(self, platform):
56 m = Module()
57 return self._elaborate(m, platform)
58
59 def _elaborate(self, m, platform):
60 comb = m.d.comb
61 sync = m.d.sync
62
63 m.submodules.l = l = SRLatch(llen=self.n_adr, sync=False)
64 addrs_r = Array(Signal(self.bitwid, name="a_r") \
65 for i in range(self.n_adr))
66
67 # latch set/reset
68 comb += l.s.eq(self.addr_en_i)
69 comb += l.r.eq(self.addr_rs_i)
70
71 # copy in addresses (and "enable" signals)
72 for i in range(self.n_adr):
73 latchregister(m, self.addrs_i[i], addrs_r[i], l.q[i])
74
75 # is there a clash, yes/no
76 matchgrp = []
77 for i in range(self.n_adr):
78 match = []
79 for j in range(self.n_adr):
80 if i == j:
81 match.append(Const(0)) # don't match against self!
82 else:
83 match.append(addrs_r[i] == addrs_r[j])
84 comb += self.addr_nomatch_a_o[i].eq(~Cat(*match) & l.q)
85 matchgrp.append(self.addr_nomatch_a_o[i] == l.q)
86 comb += self.addr_nomatch_o.eq(Cat(*matchgrp) & l.q)
87
88 return m
89
90 def __iter__(self):
91 yield from self.addrs_i
92 yield self.addr_we_i
93 yield self.addr_en_i
94 yield from self.addr_nomatch_a_o
95 yield self.addr_nomatch_o
96
97 def ports(self):
98 return list(self)
99
100
101 def part_addr_sim(dut):
102 yield dut.dest_i.eq(1)
103 yield dut.issue_i.eq(1)
104 yield
105 yield dut.issue_i.eq(0)
106 yield
107 yield dut.src1_i.eq(1)
108 yield dut.issue_i.eq(1)
109 yield
110 yield dut.issue_i.eq(0)
111 yield
112 yield dut.go_rd_i.eq(1)
113 yield
114 yield dut.go_rd_i.eq(0)
115 yield
116 yield dut.go_wr_i.eq(1)
117 yield
118 yield dut.go_wr_i.eq(0)
119 yield
120
121 def test_part_addr():
122 dut = PartialAddrMatch(3, 10)
123 vl = rtlil.convert(dut, ports=dut.ports())
124 with open("test_part_addr.il", "w") as f:
125 f.write(vl)
126
127 run_simulation(dut, part_addr_sim(dut), vcd_name='test_part_addr.vcd')
128
129 if __name__ == '__main__':
130 test_part_addr()