1 """ Load / Store partial address matcher
3 Loads and Stores do not need a full match (CAM), they need "good enough"
4 avoidance. Around 11 bits on a 64-bit address is "good enough".
6 The simplest way to use this module is to ignore not only the top bits,
7 but also the bottom bits as well: in this case (this RV64 processor),
8 enough to cover a DWORD (64-bit). that means ignore the bottom 4 bits,
9 due to the possibility of 64-bit LD/ST being misaligned.
11 To reiterate: the use of this module is an *optimisation*. All it has
12 to do is cover the cases that are *definitely* matches (by checking 11
13 bits or so), and if a few opportunities for parallel LD/STs are missed
14 because the top (or bottom) bits weren't checked, so what: all that
15 happens is: the mis-matched addresses are LD/STd on single-cycles. Big Deal.
17 However, if we wanted to enhance this algorithm (without using a CAM and
18 without using expensive comparators) probably the best way to do so would
19 be to turn the last 16 bits into a byte-level bitmap. LD/ST on a byte
20 would have 1 of the 16 bits set. LD/ST on a DWORD would have 8 of the 16
21 bits set (offset if the LD/ST was misaligned). TODO.
25 > I have used bits <11:6> as they are not translated (4KB pages)
26 > and larger than a cache line (64 bytes).
27 > I have used bits <11:4> when the L1 cache was QuadW sized and
28 > the L2 cache was Line sized.
31 from nmigen
.compat
.sim
import run_simulation
32 from nmigen
.cli
import verilog
, rtlil
33 from nmigen
import Module
, Signal
, Const
, Array
, Cat
, Elaboratable
35 from nmutil
.latch
import latchregister
, SRLatch
38 class PartialAddrMatch(Elaboratable
):
39 """A partial address matcher
41 def __init__(self
, n_adr
, bitwid
):
45 self
.addrs_i
= Array(Signal(bitwid
, name
="addr") for i
in range(n_adr
))
46 self
.addr_we_i
= Signal(n_adr
) # write-enable for incoming address
47 self
.addr_en_i
= Signal(n_adr
) # address latched in
48 self
.addr_rs_i
= Signal(n_adr
) # address deactivated
51 self
.addr_nomatch_o
= Signal(n_adr
, name
="nomatch_o")
52 self
.addr_nomatch_a_o
= Array(Signal(n_adr
, name
="nomatch_array_o") \
53 for i
in range(n_adr
))
55 def elaborate(self
, platform
):
57 return self
._elaborate
(m
, platform
)
59 def _elaborate(self
, m
, platform
):
63 m
.submodules
.l
= l
= SRLatch(llen
=self
.n_adr
, sync
=False)
64 addrs_r
= Array(Signal(self
.bitwid
, name
="a_r") \
65 for i
in range(self
.n_adr
))
68 comb
+= l
.s
.eq(self
.addr_en_i
)
69 comb
+= l
.r
.eq(self
.addr_rs_i
)
71 # copy in addresses (and "enable" signals)
72 for i
in range(self
.n_adr
):
73 latchregister(m
, self
.addrs_i
[i
], addrs_r
[i
], l
.q
[i
])
75 # is there a clash, yes/no
77 for i
in range(self
.n_adr
):
79 for j
in range(self
.n_adr
):
81 match
.append(Const(0)) # don't match against self!
83 match
.append(addrs_r
[i
] == addrs_r
[j
])
84 comb
+= self
.addr_nomatch_a_o
[i
].eq(~
Cat(*match
) & l
.q
)
85 matchgrp
.append(self
.addr_nomatch_a_o
[i
] == l
.q
)
86 comb
+= self
.addr_nomatch_o
.eq(Cat(*matchgrp
) & l
.q
)
91 yield from self
.addrs_i
94 yield from self
.addr_nomatch_a_o
95 yield self
.addr_nomatch_o
101 def part_addr_sim(dut
):
102 yield dut
.dest_i
.eq(1)
103 yield dut
.issue_i
.eq(1)
105 yield dut
.issue_i
.eq(0)
107 yield dut
.src1_i
.eq(1)
108 yield dut
.issue_i
.eq(1)
110 yield dut
.issue_i
.eq(0)
112 yield dut
.go_rd_i
.eq(1)
114 yield dut
.go_rd_i
.eq(0)
116 yield dut
.go_wr_i
.eq(1)
118 yield dut
.go_wr_i
.eq(0)
121 def test_part_addr():
122 dut
= PartialAddrMatch(3, 10)
123 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
124 with
open("test_part_addr.il", "w") as f
:
127 run_simulation(dut
, part_addr_sim(dut
), vcd_name
='test_part_addr.vcd')
129 if __name__
== '__main__':