-# based on ariane plru, from tlb.sv
+# based on microwatt plru.vhdl
+# https://github.com/antonblanchard/microwatt/blob/f67b1431655c291fc1c99857a5c1ef624d5b264c/plru.vhdl
# new PLRU API, once all users have migrated to new API in plru2.py, then
# plru2.py will be renamed to plru.py.
-from nmigen import Signal, Module, Cat, Const, Repl, Array
-from nmigen.hdl.ir import Elaboratable
+from nmigen.hdl.ir import Elaboratable, Display, Signal, Array, Const, Value
+from nmigen.hdl.dsl import Module
from nmigen.cli import rtlil
-from nmigen.utils import log2_int
from nmigen.lib.coding import Decoder
lvl0 0
/ \
/ \
- lvl1 1 2
- / \ / \
- lvl2 3 4 5 6
- / \ /\/\ /\
+ / \
+ lvl1 1 2
+ / \ / \
+ lvl2 3 4 5 6
+ / \ / \ / \ / \
... ... ... ...
"""
- def __init__(self, BITS):
- self.BITS = BITS
- self.acc_i = Signal(BITS)
- self.acc_en = Signal()
- self.lru_o = Signal(BITS)
-
- self._plru_tree = Signal(self.TLBSZ)
+ def __init__(self, log2_num_ways, debug=False):
+ # type: (int, bool) -> None
+ """
+ Arguments:
+ log2_num_ways: int
+ the log-base-2 of the number of cache ways -- BITS in plru.vhdl
+ debug: bool
+ true if this should print debugging messages at simulation time.
+ """
+ assert log2_num_ways > 0
+ self.log2_num_ways = log2_num_ways
+ self.debug = debug
+ self.acc_i = Signal(log2_num_ways)
+ self.acc_en_i = Signal()
+ self.lru_o = Signal(log2_num_ways)
+
+ def mk_tree(i):
+ return Signal(name=f"tree_{i}", reset=0)
+
+ # original vhdl has array 1 too big, last entry is never used,
+ # subtract 1 to compensate
+ self._tree = Array(mk_tree(i) for i in range(self.num_ways - 1))
""" exposed only for testing """
- @property
- def TLBSZ(self):
- return 2 * (self.BITS - 1)
-
- def elaborate(self, platform=None):
- m = Module()
+ def mk_node(i, prefix):
+ return Signal(range(self.num_ways), name=f"{prefix}_node_{i}",
+ reset=0)
- # Tree (bit per entry)
-
- # Just predefine which nodes will be set/cleared
- # E.g. for a TLB with 8 entries, the for-loop is semantically
- # equivalent to the following pseudo-code:
- # unique case (1'b1)
- # acc_en[7]: plru_tree[0, 2, 6] = {1, 1, 1};
- # acc_en[6]: plru_tree[0, 2, 6] = {1, 1, 0};
- # acc_en[5]: plru_tree[0, 2, 5] = {1, 0, 1};
- # acc_en[4]: plru_tree[0, 2, 5] = {1, 0, 0};
- # acc_en[3]: plru_tree[0, 1, 4] = {0, 1, 1};
- # acc_en[2]: plru_tree[0, 1, 4] = {0, 1, 0};
- # acc_en[1]: plru_tree[0, 1, 3] = {0, 0, 1};
- # acc_en[0]: plru_tree[0, 1, 3] = {0, 0, 0};
- # default: begin /* No hit */ end
- # endcase
-
- LOG_TLB = log2_int(self.BITS, False)
- hit = Signal(self.BITS, reset_less=True)
- m.d.comb += hit.eq(Repl(self.acc_en, self.BITS) & self.acc_i)
-
- for i in range(self.BITS):
- # we got a hit so update the pointer as it was least recently used
- with m.If(hit[i]):
- # Set the nodes to the values we would expect
- for lvl in range(LOG_TLB):
- idx_base = (1 << lvl)-1
- # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
- shift = LOG_TLB - lvl
- new_idx = Const(~((i >> (shift-1)) & 1), 1)
- plru_idx = idx_base + (i >> shift)
- # print("plru", i, lvl, hex(idx_base),
- # plru_idx, shift, new_idx)
- m.d.sync += self._plru_tree[plru_idx].eq(new_idx)
-
- # Decode tree to write enable signals
- # Next for-loop basically creates the following logic for e.g.
- # an 8 entry TLB (note: pseudo-code obviously):
- # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1}
- # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0}
- # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1}
- # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0}
- # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1}
- # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0}
- # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1}
- # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0}
- # For each entry traverse the tree. If every tree-node matches
- # the corresponding bit of the entry's index, this is
- # the next entry to replace.
- replace = []
- for i in range(self.BITS):
- en = []
- for lvl in range(LOG_TLB):
- idx_base = (1 << lvl)-1
- # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
- shift = LOG_TLB - lvl
- new_idx = (i >> (shift-1)) & 1
- plru_idx = idx_base + (i >> shift)
- plru = Signal(reset_less=True,
- name="plru-%d-%d-%d-%d" %
- (i, lvl, plru_idx, new_idx))
- m.d.comb += plru.eq(self._plru_tree[plru_idx])
- if new_idx:
- en.append(~plru) # yes inverted (using bool() below)
- else:
- en.append(plru) # yes inverted (using bool() below)
- #print("plru", i, en)
- # boolean logic manipulation:
- # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2)
- replace.append(~Cat(*en).bool())
- m.d.comb += self.lru_o.eq(Cat(*replace))
+ nodes_range = range(self.log2_num_ways)
- return m
-
- def ports(self):
- return [self.acc_en, self.lru_o, self.acc_i]
+ self._get_lru_nodes = [mk_node(i, "get_lru") for i in nodes_range]
+ """ exposed only for testing """
+ self._upd_lru_nodes = [mk_node(i, "upd_lru") for i in nodes_range]
+ """ exposed only for testing """
-class PLRUs(Elaboratable):
- def __init__(self, n_plrus, n_bits):
- self.n_plrus = n_plrus
- self.n_bits = n_bits
- self.valid = Signal()
- self.way = Signal(n_bits)
- self.index = Signal(n_plrus.bit_length())
- self.isel = Signal(n_plrus.bit_length())
- self.o_index = Signal(n_bits)
+ @property
+ def num_ways(self):
+ return 1 << self.log2_num_ways
+
+ def _display(self, msg, *args):
+ if not self.debug:
+ return []
+ # work around not yet having
+ # https://gitlab.com/nmigen/nmigen/-/merge_requests/10
+ # by sending through Value.cast()
+ return [Display(msg, *map(Value.cast, args))]
+
+ def _get_lru(self, m):
+ """ get_lru process in plru.vhdl """
+ # XXX Check if we can turn that into a little ROM instead that
+ # takes the tree bit vector and returns the LRU. See if it's better
+ # in term of FPGA resource usage...
+ m.d.comb += self._get_lru_nodes[0].eq(0)
+ for i in range(self.log2_num_ways):
+ node = self._get_lru_nodes[i]
+ val = self._tree[node]
+ m.d.comb += self._display("GET: i:%i node:%#x val:%i",
+ i, node, val)
+ m.d.comb += self.lru_o[self.log2_num_ways - 1 - i].eq(val)
+ if i != self.log2_num_ways - 1:
+ # modified from microwatt version, it uses `node * 2` value
+ # to index into tree, rather than using node like is used
+ # earlier in this loop iteration
+ node <<= 1
+ with m.If(val):
+ m.d.comb += self._get_lru_nodes[i + 1].eq(node + 2)
+ with m.Else():
+ m.d.comb += self._get_lru_nodes[i + 1].eq(node + 1)
+
+ def _update_lru(self, m):
+ """ update_lru process in plru.vhdl """
+ with m.If(self.acc_en_i):
+ m.d.comb += self._upd_lru_nodes[0].eq(0)
+ for i in range(self.log2_num_ways):
+ node = self._upd_lru_nodes[i]
+ abit = self.acc_i[self.log2_num_ways - 1 - i]
+ m.d.sync += [
+ self._tree[node].eq(~abit),
+ self._display("UPD: i:%i node:%#x val:%i",
+ i, node, ~abit),
+ ]
+ if i != self.log2_num_ways - 1:
+ node <<= 1
+ with m.If(abit):
+ m.d.comb += self._upd_lru_nodes[i + 1].eq(node + 2)
+ with m.Else():
+ m.d.comb += self._upd_lru_nodes[i + 1].eq(node + 1)
- def elaborate(self, platform):
- """Generate TLB PLRUs
- """
+ def elaborate(self, platform=None):
m = Module()
- comb = m.d.comb
-
- if self.n_plrus == 0:
- return m
-
- # Binary-to-Unary one-hot, enabled by valid
- m.submodules.te = te = Decoder(self.n_plrus)
- comb += te.n.eq(~self.valid)
- comb += te.i.eq(self.index)
-
- out = Array(Signal(self.n_bits, name="plru_out%d" % x)
- for x in range(self.n_plrus))
-
- for i in range(self.n_plrus):
- # PLRU interface
- m.submodules["plru_%d" % i] = plru = PLRU(self.n_bits)
-
- comb += plru.acc_en.eq(te.o[i])
- comb += plru.acc_i.eq(self.way)
- comb += out[i].eq(plru.lru_o)
-
- # select output based on index
- comb += self.o_index.eq(out[self.isel])
-
+ self._get_lru(m)
+ self._update_lru(m)
return m
+ def __iter__(self):
+ yield self.acc_i
+ yield self.acc_en_i
+ yield self.lru_o
+
def ports(self):
- return [self.valid, self.way, self.index, self.isel, self.o_index]
+ return list(self)
+
+
+# FIXME: convert PLRUs to new API
+# class PLRUs(Elaboratable):
+# def __init__(self, n_plrus, n_bits):
+# self.n_plrus = n_plrus
+# self.n_bits = n_bits
+# self.valid = Signal()
+# self.way = Signal(n_bits)
+# self.index = Signal(n_plrus.bit_length())
+# self.isel = Signal(n_plrus.bit_length())
+# self.o_index = Signal(n_bits)
+#
+# def elaborate(self, platform):
+# """Generate TLB PLRUs
+# """
+# m = Module()
+# comb = m.d.comb
+#
+# if self.n_plrus == 0:
+# return m
+#
+# # Binary-to-Unary one-hot, enabled by valid
+# m.submodules.te = te = Decoder(self.n_plrus)
+# comb += te.n.eq(~self.valid)
+# comb += te.i.eq(self.index)
+#
+# out = Array(Signal(self.n_bits, name="plru_out%d" % x)
+# for x in range(self.n_plrus))
+#
+# for i in range(self.n_plrus):
+# # PLRU interface
+# m.submodules["plru_%d" % i] = plru = PLRU(self.n_bits)
+#
+# comb += plru.acc_en.eq(te.o[i])
+# comb += plru.acc_i.eq(self.way)
+# comb += out[i].eq(plru.lru_o)
+#
+# # select output based on index
+# comb += self.o_index.eq(out[self.isel])
+#
+# return m
+#
+# def ports(self):
+# return [self.valid, self.way, self.index, self.isel, self.o_index]
if __name__ == '__main__':
with open("test_plru.il", "w") as f:
f.write(vl)
- dut = PLRUs(4, 2)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_plrus.il", "w") as f:
- f.write(vl)
+ # dut = PLRUs(4, 2)
+ # vl = rtlil.convert(dut, ports=dut.ports())
+ # with open("test_plrus.il", "w") as f:
+ # f.write(vl)