reorg TLB src
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 7 May 2019 05:42:28 +0000 (06:42 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 7 May 2019 05:42:28 +0000 (06:42 +0100)
46 files changed:
src/TLB/AddressEncoder.py [new file with mode: 0644]
src/TLB/Cam.py [new file with mode: 0644]
src/TLB/CamEntry.py [new file with mode: 0644]
src/TLB/LFSR.py [new file with mode: 0644]
src/TLB/LFSR.pyi [new file with mode: 0644]
src/TLB/Makefile [new file with mode: 0644]
src/TLB/MemorySet.py [new file with mode: 0644]
src/TLB/PermissionValidator.py [new file with mode: 0644]
src/TLB/PteEntry.py [new file with mode: 0644]
src/TLB/SetAssociativeCache.py [new file with mode: 0644]
src/TLB/TLB.py [new file with mode: 0644]
src/TLB/__init__.py [new file with mode: 0644]
src/TLB/ariane/TreePLRU.cpp [new file with mode: 0644]
src/TLB/ariane/p_lru.txt [new file with mode: 0644]
src/TLB/ariane/src/exceptcause.py [new file with mode: 0644]
src/TLB/ariane/src/mmu.py [new file with mode: 0644]
src/TLB/ariane/src/plru.py [new file with mode: 0644]
src/TLB/ariane/src/ptw.py [new file with mode: 0644]
src/TLB/ariane/src/tlb.py [new file with mode: 0644]
src/TLB/ariane/src/tlb_content.py [new file with mode: 0644]
src/TLB/ariane/test/test_plru.py [new file with mode: 0644]
src/TLB/ariane/test/test_ptw.py [new file with mode: 0644]
src/TLB/ariane/test/test_tlb.py [new file with mode: 0644]
src/TLB/src/AddressEncoder.py [deleted file]
src/TLB/src/Cam.py [deleted file]
src/TLB/src/CamEntry.py [deleted file]
src/TLB/src/LFSR.py [deleted file]
src/TLB/src/LFSR.pyi [deleted file]
src/TLB/src/Makefile [deleted file]
src/TLB/src/MemorySet.py [deleted file]
src/TLB/src/PermissionValidator.py [deleted file]
src/TLB/src/PteEntry.py [deleted file]
src/TLB/src/SetAssociativeCache.py [deleted file]
src/TLB/src/TLB.py [deleted file]
src/TLB/src/__init__.py [deleted file]
src/TLB/src/ariane/TreePLRU.cpp [deleted file]
src/TLB/src/ariane/p_lru.txt [deleted file]
src/TLB/src/ariane/src/exceptcause.py [deleted file]
src/TLB/src/ariane/src/mmu.py [deleted file]
src/TLB/src/ariane/src/plru.py [deleted file]
src/TLB/src/ariane/src/ptw.py [deleted file]
src/TLB/src/ariane/src/tlb.py [deleted file]
src/TLB/src/ariane/src/tlb_content.py [deleted file]
src/TLB/src/ariane/test/test_plru.py [deleted file]
src/TLB/src/ariane/test/test_ptw.py [deleted file]
src/TLB/src/ariane/test/test_tlb.py [deleted file]

diff --git a/src/TLB/AddressEncoder.py b/src/TLB/AddressEncoder.py
new file mode 100644 (file)
index 0000000..4c4b8d7
--- /dev/null
@@ -0,0 +1,75 @@
+from nmigen import Module, Signal
+from nmigen.lib.coding import Encoder, PriorityEncoder
+
+class AddressEncoder():
+    """Address Encoder
+
+       The purpose of this module is to take in a vector and
+       encode the bits that are one hot into an address. This module
+       combines both nmigen's Encoder and PriorityEncoder and will state
+       whether the input line has a single bit hot, multiple bits hot,
+       or no bits hot. The output line will always have the lowest value
+       address output.
+
+       Usage:
+       The output is valid when either single or multiple match is high.
+       Otherwise output is 0.
+    """
+    def __init__(self, width):
+        """ Arguments:
+            * width: The desired length of the input vector
+        """
+        # Internal
+        self.encoder = Encoder(width)
+        self.p_encoder = PriorityEncoder(width)
+
+        # Input
+        self.i = Signal(width)
+
+        # Output
+        self.single_match = Signal(1)
+        self.multiple_match = Signal(1)
+        self.o = Signal(max=width)
+
+    def elaborate(self, platform=None):
+        m = Module()
+
+        # Add internal submodules
+        m.submodules.encoder = self.encoder
+        m.submodules.p_encoder = self.p_encoder
+
+        m.d.comb += [
+            self.encoder.i.eq(self.i),
+            self.p_encoder.i.eq(self.i)
+        ]
+
+        # Steps:
+        # 1. check if the input vector is non-zero
+        # 2. if non-zero, check if single match or multiple match
+        # 3. set output line to be lowest value address output
+
+        # If the priority encoder recieves an input of 0
+        # If n is 1 then the output is not valid
+        with m.If(self.p_encoder.n):
+            m.d.comb += [
+                self.single_match.eq(0),
+                self.multiple_match.eq(0),
+                self.o.eq(0)
+            ]
+        # If the priority encoder recieves an input > 0
+        with m.Else():
+            # Multiple Match if encoder n is invalid
+            with m.If(self.encoder.n):
+                m.d.comb += [
+                    self.single_match.eq(0),
+                    self.multiple_match.eq(1)
+                ]
+            # Single Match if encoder n is valid
+            with m.Else():
+                m.d.comb += [
+                    self.single_match.eq(1),
+                    self.multiple_match.eq(0)
+                ]
+            # Always set output based on priority encoder output
+            m.d.comb += self.o.eq(self.p_encoder.o)
+        return m
diff --git a/src/TLB/Cam.py b/src/TLB/Cam.py
new file mode 100644 (file)
index 0000000..3c49921
--- /dev/null
@@ -0,0 +1,124 @@
+from nmigen import Array, Cat, Module, Signal
+from nmigen.lib.coding import Decoder
+from nmigen.cli import main #, verilog
+
+from CamEntry import CamEntry
+from AddressEncoder import AddressEncoder
+
+class Cam():
+    """ Content Addressable Memory (CAM)
+
+        The purpose of this module is to quickly look up whether an
+        entry exists given a data key.
+        This module will search for the given data in all internal entries
+        and output whether a  single or multiple match was found.
+        If an single entry is found the address be returned and single_match
+        is set HIGH. If multiple entries are found the lowest address is
+        returned and multiple_match is set HIGH. If neither single_match or
+        multiple_match are HIGH this implies no match was found. To write
+        to the CAM set the address bus to the desired entry and set write_enable
+        HIGH. Entry managment should be performed one level above this block
+        as lookup is performed within.
+
+        Notes:
+        The read and write operations take one clock cycle to complete.
+        Currently the read_warning line is present for interfacing but
+        is not necessary for this design. This module is capable of writing
+        in the first cycle, reading on the second, and output the correct
+        address on the third.
+    """
+
+    def __init__(self, data_size, cam_size):
+        """ Arguments:
+            * data_size: (bits) The bit size of the data
+            * cam_size: (number) The number of entries in the CAM
+        """
+
+        # Internal
+        self.cam_size = cam_size
+        self.encoder = AddressEncoder(cam_size)
+        self.decoder = Decoder(cam_size)
+        self.entry_array = Array(CamEntry(data_size) for x in range(cam_size))
+
+        # Input
+        self.enable = Signal(1)
+        self.write_enable = Signal(1)
+        self.data_in = Signal(data_size) # The data to be written
+        self.data_mask = Signal(data_size) # mask for ternary writes
+        self.address_in = Signal(max=cam_size) # address of CAM Entry to write
+
+        # Output
+        self.read_warning = Signal(1) # High when a read interrupts a write
+        self.single_match = Signal(1) # High when there is only one match
+        self.multiple_match = Signal(1) # High when there at least two matches
+        self.match_address = Signal(max=cam_size) # The lowest address matched
+
+    def elaborate(self, platform=None):
+        m = Module()
+        # AddressEncoder for match types and output address
+        m.submodules.AddressEncoder = self.encoder
+        # Decoder is used to select which entry will be written to
+        m.submodules.Decoder = self.decoder
+        # CamEntry Array Submodules
+        # Note these area added anonymously
+        entry_array = self.entry_array
+        m.submodules += entry_array
+
+        # Decoder logic
+        m.d.comb += [
+            self.decoder.i.eq(self.address_in),
+            self.decoder.n.eq(0)
+        ]
+
+        encoder_vector = []
+        with m.If(self.enable):
+            # Set the key value for every CamEntry
+            for index in range(self.cam_size):
+
+                # Write Operation
+                with m.If(self.write_enable):
+                    with m.If(self.decoder.o[index]):
+                        m.d.comb += entry_array[index].command.eq(2)
+                    with m.Else():
+                        m.d.comb += entry_array[index].command.eq(0)
+
+                # Read Operation
+                with m.Else():
+                    m.d.comb += entry_array[index].command.eq(1)
+
+                # Send data input to all entries
+                m.d.comb += entry_array[index].data_in.eq(self.data_in)
+                # Send all entry matches to encoder
+                ematch = entry_array[index].match
+                encoder_vector.append(ematch)
+
+            # Give input to and accept output from encoder module
+            m.d.comb += [
+                self.encoder.i.eq(Cat(*encoder_vector)),
+                self.single_match.eq(self.encoder.single_match),
+                self.multiple_match.eq(self.encoder.multiple_match),
+                self.match_address.eq(self.encoder.o)
+            ]
+
+        # If the CAM is not enabled set all outputs to 0
+        with m.Else():
+            m.d.comb += [
+                    self.read_warning.eq(0),
+                    self.single_match.eq(0),
+                    self.multiple_match.eq(0),
+                    self.match_address.eq(0)
+            ]
+
+        return m
+
+    def ports(self):
+        return [self.enable, self.write_enable,
+                     self.data_in, self.data_mask,
+                     self.read_warning, self.single_match,
+                     self.multiple_match, self.match_address]
+
+
+if __name__ == '__main__':
+    cam = Cam(4, 4)
+    main(cam, ports=cam.ports())
+
diff --git a/src/TLB/CamEntry.py b/src/TLB/CamEntry.py
new file mode 100644 (file)
index 0000000..73081ce
--- /dev/null
@@ -0,0 +1,45 @@
+from nmigen import Module, Signal
+
+class CamEntry:
+    """ Content Addressable Memory (CAM) Entry
+
+        The purpose of this module is to represent an entry within a CAM.
+        This module when given a read command will compare  the given data
+        and output whether a match was found or not. When given a write
+        command it will write the given data into internal registers.
+    """
+
+    def __init__(self, data_size):
+        """ Arguments:
+            * data_size: (bit count) The size of the data
+        """
+        # Input
+        self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset
+        self.data_in = Signal(data_size) # Data input when writing
+
+        # Output
+        self.match = Signal(1) # Result of the internal/input key comparison
+        self.data = Signal(data_size)
+
+    def elaborate(self, platform=None):
+        m = Module()
+        with m.Switch(self.command):
+            with m.Case("00"):
+                m.d.sync += self.match.eq(0)
+            with m.Case("01"):
+                with m.If(self.data == self.data_in):
+                    m.d.sync += self.match.eq(1)
+                with m.Else():
+                    m.d.sync += self.match.eq(0)
+            with m.Case("10"):
+                m.d.sync += [
+                    self.data.eq(self.data_in),
+                    self.match.eq(0)
+                ]
+            with m.Case():
+                m.d.sync += [
+                    self.match.eq(0),
+                    self.data.eq(0)
+                ]
+
+        return m
diff --git a/src/TLB/LFSR.py b/src/TLB/LFSR.py
new file mode 100644 (file)
index 0000000..d8b606e
--- /dev/null
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+from nmigen.cli import verilog, rtlil
+
+
+class LFSRPolynomial(set):
+    """ implements a polynomial for use in LFSR
+    """
+    def __init__(self, exponents=()):
+        for e in exponents:
+            assert isinstance(e, int), TypeError("%s must be an int" % repr(e))
+            assert (e >= 0), ValueError("%d must not be negative" % e)
+        set.__init__(self, set(exponents).union({0})) # must contain zero
+
+    @property
+    def max_exponent(self):
+        return max(self) # derived from set, so this returns the max exponent
+
+    @property
+    def exponents(self):
+        exponents = list(self) # get elements of set as a list
+        exponents.sort(reverse=True)
+        return exponents
+
+    def __str__(self):
+        expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2)
+        retval = map(lambda i: expd[min(i,2)].format(i), self.exponents)
+        return " + ".join(retval)
+
+    def __repr__(self):
+        return "LFSRPolynomial(%s)" % self.exponents
+
+
+# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs  # noqa
+LFSR_POLY_2 = LFSRPolynomial([2, 1, 0])
+LFSR_POLY_3 = LFSRPolynomial([3, 2, 0])
+LFSR_POLY_4 = LFSRPolynomial([4, 3, 0])
+LFSR_POLY_5 = LFSRPolynomial([5, 3, 0])
+LFSR_POLY_6 = LFSRPolynomial([6, 5, 0])
+LFSR_POLY_7 = LFSRPolynomial([7, 6, 0])
+LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0])
+LFSR_POLY_9 = LFSRPolynomial([9, 5, 0])
+LFSR_POLY_10 = LFSRPolynomial([10, 7, 0])
+LFSR_POLY_11 = LFSRPolynomial([11, 9, 0])
+LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0])
+LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0])
+LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0])
+LFSR_POLY_15 = LFSRPolynomial([15, 14, 0])
+LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0])
+LFSR_POLY_17 = LFSRPolynomial([17, 14, 0])
+LFSR_POLY_18 = LFSRPolynomial([18, 11, 0])
+LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0])
+LFSR_POLY_20 = LFSRPolynomial([20, 17, 0])
+LFSR_POLY_21 = LFSRPolynomial([21, 19, 0])
+LFSR_POLY_22 = LFSRPolynomial([22, 21, 0])
+LFSR_POLY_23 = LFSRPolynomial([23, 18, 0])
+LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0])
+
+
+class LFSR(LFSRPolynomial, Elaboratable):
+    """ implements a Linear Feedback Shift Register
+    """
+    def __init__(self, polynomial):
+        """ Inputs:
+            ------
+            :polynomial: the polynomial to feedback on.  may be a LFSRPolynomial
+                         instance or an iterable of ints (list/tuple/generator)
+            :enable:     enable (set LO to disable.  NOTE: defaults to HI)
+
+            Outputs:
+            -------
+            :state: the LFSR state.  bitwidth is taken from the polynomial
+                    maximum exponent.
+
+            Note: if an LFSRPolynomial is passed in as the input, because
+            LFSRPolynomial is derived from set() it's ok:
+            LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p)
+        """
+        LFSRPolynomial.__init__(self, polynomial)
+        self.state = Signal(self.max_exponent, reset=1)
+        self.enable = Signal(reset=1)
+
+    def elaborate(self, platform):
+        m = Module()
+        # do absolutely nothing if the polynomial is empty (always has a zero)
+        if self.max_exponent <= 1:
+            return m
+
+        # create XOR-bunch, select bits from state based on exponent
+        feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain)
+        for exponent in self:
+            if exponent > 0: # don't have to skip, saves CPU cycles though
+                feedback ^= self.state[exponent - 1]
+
+        # if enabled, shift-and-feedback
+        with m.If(self.enable):
+            # shift up lower bits by Cat'ing in a new bit zero (feedback)
+            newstate = Cat(feedback, self.state[:-1])
+            m.d.sync += self.state.eq(newstate)
+
+        return m
+
+
+# example: Poly24
+if __name__ == '__main__':
+    p24 = rtlil.convert(LFSR(LFSR_POLY_24))
+    with open("lfsr2_p24.il", "w") as f:
+        f.write(p24)
diff --git a/src/TLB/LFSR.pyi b/src/TLB/LFSR.pyi
new file mode 100644 (file)
index 0000000..64eb911
--- /dev/null
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from nmigen import Module
+from typing import Iterable, Optional, Iterator, Any, Union
+from typing_extensions import final
+
+
+@final
+class LFSRPolynomial(set):
+    def __init__(self, exponents: Iterable[int] = ()):
+        def elements() -> Iterable[int]: ...
+    @property
+    def exponents(self) -> list[int]: ...
+    def __str__(self) -> str: ...
+    def __repr__(self) -> str: ...
+
+
+@final
+class LFSR:
+    def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ...
+    @property
+    def width(self) -> int: ...
+    def elaborate(self, platform: Any) -> Module: ...
diff --git a/src/TLB/Makefile b/src/TLB/Makefile
new file mode 100644 (file)
index 0000000..1eb67ac
--- /dev/null
@@ -0,0 +1,2 @@
+verilog:
+       python3 Cam.py generate -t v > Cam.v
diff --git a/src/TLB/MemorySet.py b/src/TLB/MemorySet.py
new file mode 100644 (file)
index 0000000..ea61bdf
--- /dev/null
@@ -0,0 +1,66 @@
+from nmigen import Cat, Memory, Module, Signal, Elaboratable
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+
+
+class MemorySet(Elaboratable):
+    def __init__(self, data_size, tag_size, set_count, active):
+        self.active = active
+        input_size = tag_size + data_size # Size of the input data
+        memory_width = input_size + 1 # The width of the cache memory
+        self.active = active
+        self.data_size = data_size
+        self.tag_size = tag_size
+
+        # XXX TODO, use rd-enable and wr-enable?
+        self.mem = Memory(memory_width, set_count)
+        self.r = self.mem.read_port()
+        self.w = self.mem.write_port()
+
+        # inputs (address)
+        self.cset = Signal(max=set_count)  # The set to be checked
+        self.tag = Signal(tag_size)        # The tag to find
+        self.data_i = Signal(data_size)    # Incoming data
+
+        # outputs
+        self.valid = Signal()
+        self.data_o = Signal(data_size)    # Outgoing data (excludes tag)
+
+    def elaborate(self, platform):
+        m = Module()
+        m.submodules.mem = self.mem
+        m.submodules.r = self.r
+        m.submodules.w = self.w
+
+        # temporaries
+        active_bit = Signal()
+        tag_valid = Signal()
+        data_start = self.active + 1
+        data_end = data_start + self.data_size
+        tag_start = data_end
+        tag_end = tag_start + self.tag_size
+
+        # connect the read port address to the set/entry
+        read_port = self.r
+        m.d.comb += read_port.addr.eq(self.cset)
+        # Pull out active bit from data
+        data = read_port.data
+        m.d.comb += active_bit.eq(data[self.active])
+        # Validate given tag vs stored tag
+        tag = data[tag_start:tag_end]
+        m.d.comb += tag_valid.eq(self.tag == tag)
+        # An entry is only valid if the tags match AND
+        # is marked as a valid entry
+        m.d.comb += self.valid.eq(tag_valid & active_bit)
+
+        # output data: TODO, check rd-enable?
+        m.d.comb += self.data_o.eq(data[data_start:data_end])
+
+        # connect the write port addr to the set/entry (only if write enabled)
+        # (which is only done on a match, see SAC.write_entry below)
+        write_port = self.w
+        with m.If(write_port.en):
+            m.d.comb += write_port.addr.eq(self.cset)
+            m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag))
+
+        return m
diff --git a/src/TLB/PermissionValidator.py b/src/TLB/PermissionValidator.py
new file mode 100644 (file)
index 0000000..14f01e4
--- /dev/null
@@ -0,0 +1,67 @@
+from nmigen import Module, Signal
+from nmigen.cli import main
+
+from PteEntry import PteEntry
+
+class PermissionValidator():
+    """ The purpose of this Module is to check the Permissions of a given PTE
+        against the requested access permissions.
+
+        This module will either validate (by setting the valid bit HIGH)
+        the request or find a permission fault and invalidate (by setting
+        the valid bit LOW) the request
+    """
+
+    def __init__(self, asid_size, pte_size):
+        """ Arguments:
+            * asid_size: (bit count) The size of the asid to be processed
+            * pte_size: (bit count) The size of the pte to be processed
+
+            Return:
+            * valid HIGH when permissions are correct
+        """
+        # Internal
+        self.pte_entry = PteEntry(asid_size, pte_size)
+
+        # Input
+        self.data = Signal(asid_size + pte_size);
+        self.xwr = Signal(3) # Execute, Write, Read
+        self.super_mode = Signal(1) # Supervisor Mode
+        self.super_access = Signal(1) # Supervisor Access
+        self.asid = Signal(15) # Address Space IDentifier (ASID)
+
+        # Output
+        self.valid = Signal(1) # Denotes if the permissions are correct
+
+    def elaborate(self, platform=None):
+        m = Module()
+
+        m.submodules.pte_entry = self.pte_entry
+
+        m.d.comb += self.pte_entry.i.eq(self.data)
+
+        # Check if the entry is valid
+        with m.If(self.pte_entry.v):
+            # ASID match or Global Permission
+            # Note that the MSB bound is exclusive
+            with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g):
+                # Check Execute, Write, Read (XWR) Permissions
+                with m.If(self.pte_entry.xwr == self.xwr):
+                    # Supervisor Logic
+                    with m.If(self.super_mode):
+                        # Valid if entry is not in user mode or supervisor
+                        # has Supervisor User Memory (SUM) access via the
+                        # SUM bit in the sstatus register
+                        m.d.comb += self.valid.eq((~self.pte_entry.u) \
+                                                  | self.super_access)
+                    # User logic
+                    with m.Else():
+                        # Valid if the entry is in user mode only
+                        m.d.comb += self.valid.eq(self.pte_entry.u)
+                with m.Else():
+                    m.d.comb += self.valid.eq(0)
+            with m.Else():
+                m.d.comb += self.valid.eq(0)
+        with m.Else():
+            m.d.comb += self.valid.eq(0)
+        return m
\ No newline at end of file
diff --git a/src/TLB/PteEntry.py b/src/TLB/PteEntry.py
new file mode 100644 (file)
index 0000000..c070545
--- /dev/null
@@ -0,0 +1,66 @@
+from nmigen import Module, Signal
+from nmigen.cli import main
+
+class PteEntry():
+    """ The purpose of this Module is to  centralize the parsing of Page
+        Table Entries (PTE) into one module to prevent common mistakes
+        and duplication of code. The control bits are parsed out for
+        ease of use.
+
+        This module parses according to the standard PTE given by the
+        Volume II: RISC-V Privileged Architectures V1.10 Pg 60.
+        The Address Space IDentifier (ASID) is appended to the MSB of the input
+        and is parsed out as such.
+
+        An valid input Signal would be:
+              ASID   PTE
+        Bits:[78-64][63-0]
+
+        The output PTE value will include the control bits.
+    """
+    def __init__(self, asid_size, pte_size):
+        """ Arguments:
+            * asid_size: (bit count) The size of the asid to be processed
+            * pte_size: (bit count) The size of the pte to be processed
+
+            Return:
+            * d The Dirty bit from the PTE portion of i
+            * a The Accessed bit from the PTE portion of i
+            * g The Global bit from the PTE portion of i
+            * u The User Mode bit from the PTE portion of i
+            * xwr The Execute/Write/Read bit from the PTE portion of i
+            * v The Valid bit from the PTE portion of i
+            * asid The asid portion of i
+            * pte The pte portion of i
+        """
+        # Internal
+        self.asid_start = pte_size
+        self.asid_end = pte_size + asid_size
+
+        # Input
+        self.i = Signal(asid_size + pte_size)
+
+        # Output
+        self.d = Signal(1) # Dirty bit (From pte)
+        self.a = Signal(1) # Accessed bit (From pte)
+        self.g = Signal(1) # Global Access (From pte)
+        self.u = Signal(1) # User Mode (From pte)
+        self.xwr = Signal(3) # Execute Read Write (From pte)
+        self.v = Signal(1) # Valid (From pte)
+        self.asid = Signal(asid_size) # Associated Address Space IDentifier
+        self.pte = Signal(pte_size) # Full Page Table Entry
+
+    def elaborate(self, platform=None):
+        m = Module()
+        # Pull out all control bites from PTE
+        m.d.comb += [
+            self.d.eq(self.i[7]),
+            self.a.eq(self.i[6]),
+            self.g.eq(self.i[5]),
+            self.u.eq(self.i[4]),
+            self.xwr.eq(self.i[1:4]),
+            self.v.eq(self.i[0])
+        ]
+        m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end])
+        m.d.comb += self.pte.eq(self.i[0:self.asid_start])
+        return m
\ No newline at end of file
diff --git a/src/TLB/SetAssociativeCache.py b/src/TLB/SetAssociativeCache.py
new file mode 100644 (file)
index 0000000..0acd348
--- /dev/null
@@ -0,0 +1,274 @@
+"""
+
+Online simulator of 4-way set-associative cache:
+http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html
+
+Python simulator of a N-way set-associative cache:
+https://github.com/vaskevich/CacheSim/blob/master/cachesim.py
+"""
+import sys
+sys.path.append("ariane/src/")
+
+from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable
+from nmigen.compat.genlib import fsm
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+
+from AddressEncoder import AddressEncoder
+from MemorySet import MemorySet
+
+# TODO: use a LFSR that advances continuously and picking the bottom
+# few bits from it to select which cache line to replace, instead of PLRU
+# http://bugs.libre-riscv.org/show_bug.cgi?id=71
+from plru import PLRU
+from LFSR import LFSR, LFSR_POLY_24
+
+SA_NA = "00" # no action (none)
+SA_RD = "01" # read
+SA_WR = "10" # write
+
+
+class SetAssociativeCache(Elaboratable):
+    """ Set Associative Cache Memory
+
+        The purpose of this module is to generate a memory cache given the
+        constraints passed in. This will create a n-way set associative cache.
+        It is expected for the SV TLB that the VMA will provide the set number
+        while the ASID provides the tag (still to be decided).
+
+    """
+    def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False):
+        """ Arguments
+            * tag_size (bits): The bit count of the tag
+            * data_size (bits): The bit count of the data to be stored
+            * set_count (number): The number of sets/entries in the cache
+            * way_count (number): The number of slots a data can be stored
+                                  in one set
+            * lfsr: if set, use an LFSR for (pseudo-randomly) selecting
+                    set/entry to write to.  otherwise, use a PLRU
+        """
+        # Internals
+        self.lfsr_mode = lfsr
+        self.way_count = way_count  # The number of slots in one set
+        self.tag_size = tag_size    # The bit count of the tag
+        self.data_size = data_size  # The bit count of the data to be stored
+
+        # set up Memory array
+        self.mem_array = Array() # memory array
+        for i in range(way_count):
+            ms = MemorySet(data_size, tag_size, set_count, active=0)
+            self.mem_array.append(ms)
+
+        # Finds valid entries
+        self.encoder = AddressEncoder(way_count)
+
+        # setup PLRU or LFSR
+        if lfsr:
+            # LFSR mode
+            self.lfsr = LFSR(LFSR_POLY_24)
+        else:
+            # PLRU mode
+            self.plru = PLRU(way_count) # One block to handle plru calculations
+            self.plru_array = Array() # PLRU data on each set
+            for i in range(set_count):
+                name="plru%d" % i
+                self.plru_array.append(Signal(self.plru.TLBSZ, name=name))
+
+        # Input
+        self.enable = Signal(1)   # Whether the cache is enabled
+        self.command = Signal(2)  # 00=None, 01=Read, 10=Write (see SA_XX)
+        self.cset = Signal(max=set_count)  # The set to be checked
+        self.tag = Signal(tag_size)        # The tag to find
+        self.data_i = Signal(data_size)    # The input data
+
+        # Output
+        self.ready = Signal(1) # 0 => Processing 1 => Ready for commands
+        self.hit = Signal(1)            # Tag matched one way in the given set
+        self.multiple_hit = Signal(1)   # Tag matched many ways in the given set
+        self.data_o = Signal(data_size) # The data linked to the matched tag
+
+    def check_tags(self, m):
+        """ Validate the tags in the selected set. If one and only one
+            tag matches set its state to zero and increment all others
+            by one. We only advance to next state if a single hit is found.
+        """
+        # Vector to store way valid results
+        # A zero denotes a way is invalid
+        valid_vector = []
+        # Loop through memory to prep read/write ports and set valid_vector
+        for i in range(self.way_count):
+            valid_vector.append(self.mem_array[i].valid)
+
+        # Pass encoder the valid vector
+        m.d.comb += self.encoder.i.eq(Cat(*valid_vector))
+
+        # Only one entry should be marked
+        # This is due to already verifying the tags
+        # matched and the valid bit is high
+        with m.If(self.hit):
+            m.next = "FINISHED_READ"
+            # Pull out data from the read port
+            data = self.mem_array[self.encoder.o].data_o
+            m.d.comb += self.data_o.eq(data)
+            if not self.lfsr_mode:
+                self.access_plru(m)
+
+        # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k
+        with m.Elif(self.multiple_hit):
+            # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
+            m.d.comb += self.data_o.eq(0)
+
+        # No tag matches means no data
+        with m.Else():
+            # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
+            m.d.comb += self.data_o.eq(0)
+
+    def access_plru(self, m):
+        """ An entry was accessed and the plru tree must now be updated
+        """
+        # Pull out the set's entry being edited
+        plru_entry = self.plru_array[self.cset]
+        m.d.comb += [
+            # Set the plru data to the current state
+            self.plru.plru_tree.eq(plru_entry),
+            # Set that the cache was accessed
+            self.plru.lu_access_i.eq(1)
+        ]
+
+    def read(self, m):
+        """ Go through the read process of the cache.
+            This takes two cycles to complete. First it checks for a valid tag
+            and secondly it updates the LRU values.
+        """
+        with m.FSM() as fsm_read:
+            with m.State("READY"):
+                m.d.comb += self.ready.eq(0)
+                # check_tags will set the state if the conditions are met
+                self.check_tags(m)
+            with m.State("FINISHED_READ"):
+                m.next = "READY"
+                m.d.comb += self.ready.eq(1)
+                if not self.lfsr_mode:
+                    plru_tree_o = self.plru.plru_tree_o
+                    m.d.sync += self.plru_array[self.cset].eq(plru_tree_o)
+
+    def write_entry(self, m):
+        if not self.lfsr_mode:
+            m.d.comb += [# set cset (mem address) into PLRU
+                         self.plru.plru_tree.eq(self.plru_array[self.cset]),
+                         # and connect plru to encoder for write
+                         self.encoder.i.eq(self.plru.replace_en_o)
+                        ]
+            write_port = self.mem_array[self.encoder.o].w
+        else:
+            # use the LFSR to generate a random(ish) one of the mem array
+            lfsr_output = Signal(max=self.way_count)
+            lfsr_random = Signal(max=self.way_count)
+            m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits
+            # address too big, limit to range of array
+            m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count,
+                                           lfsr_output - self.way_count,
+                                           lfsr_output))
+            write_port = self.mem_array[lfsr_random].w
+
+        # then if there is a match from the encoder, enable the selected write
+        with m.If(self.encoder.single_match):
+            m.d.comb += write_port.en.eq(1)
+
+    def write(self, m):
+        """ Go through the write process of the cache.
+            This takes two cycles to complete. First it writes the entry,
+            and secondly it updates the PLRU (in plru mode)
+        """
+        with m.FSM() as fsm_write:
+            with m.State("READY"):
+                m.d.comb += self.ready.eq(0)
+                self.write_entry(m)
+                m.next ="FINISHED_WRITE"
+            with m.State("FINISHED_WRITE"):
+                m.d.comb += self.ready.eq(1)
+                if not self.lfsr_mode:
+                    plru_entry = self.plru_array[self.cset]
+                    m.d.sync += plru_entry.eq(self.plru.plru_tree_o)
+                m.next = "READY"
+
+
+    def elaborate(self, platform=None):
+        m = Module()
+
+        # ----
+        # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array
+        # ----
+
+        m.submodules.AddressEncoder = self.encoder
+        if self.lfsr_mode:
+            m.submodules.LFSR = self.lfsr
+        else:
+            m.submodules.PLRU = self.plru
+
+        for i, mem in enumerate(self.mem_array):
+            setattr(m.submodules, "mem%d" % i, mem)
+
+        # ----
+        # select mode: PLRU connect to encoder, LFSR do... something
+        # ----
+
+        if not self.lfsr_mode:
+            # Set what entry was hit
+            m.d.comb += self.plru.lu_hit.eq(self.encoder.o)
+        else:
+            # enable LFSR
+            m.d.comb += self.lfsr.enable.eq(self.enable)
+
+        # ----
+        # connect hit/multiple hit to encoder output
+        # ----
+
+        m.d.comb += [
+            self.hit.eq(self.encoder.single_match),
+            self.multiple_hit.eq(self.encoder.multiple_match),
+        ]
+
+        # ----
+        # connect incoming data/tag/cset(addr) to mem_array
+        # ----
+
+        for mem in self.mem_array:
+            write_port = mem.w
+            m.d.comb += [mem.cset.eq(self.cset),
+                         mem.tag.eq(self.tag),
+                         mem.data_i.eq(self.data_i),
+                         write_port.en.eq(0), # default: disable write
+                        ]
+        # ----
+        # Commands: READ/WRITE/TODO
+        # ----
+
+        with m.If(self.enable):
+            with m.Switch(self.command):
+                # Search all sets at a particular tag
+                with m.Case(SA_RD):
+                    self.read(m)
+                with m.Case(SA_WR):
+                    self.write(m)
+                    # Maybe catch multiple tags write here?
+                    # TODO
+                # TODO: invalidate/flush, flush-all?
+
+        return m
+
+    def ports(self):
+        return [self.enable, self.command, self.cset, self.tag, self.data_i,
+                self.ready, self.hit, self.multiple_hit, self.data_o]
+
+
+if __name__ == '__main__':
+    sac = SetAssociativeCache(4, 8, 4, 6)
+    vl = rtlil.convert(sac, ports=sac.ports())
+    with open("SetAssociativeCache.il", "w") as f:
+        f.write(vl)
+
+    sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True)
+    vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports())
+    with open("SetAssociativeCacheLFSR.il", "w") as f:
+        f.write(vl)
diff --git a/src/TLB/TLB.py b/src/TLB/TLB.py
new file mode 100644 (file)
index 0000000..3538bdc
--- /dev/null
@@ -0,0 +1,173 @@
+""" TLB Module
+
+    The expected form of the data is:
+    * Item (Bits)
+    * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0)
+"""
+
+from nmigen import Memory, Module, Signal, Cat
+from nmigen.cli import main
+
+from PermissionValidator import PermissionValidator
+from Cam import Cam
+
+class TLB():
+    def __init__(self, asid_size, vma_size, pte_size, L1_size):
+        """ Arguments
+            * asid_size: Address Space IDentifier (ASID) typically 15 bits
+            * vma_size: Virtual Memory Address (VMA) typically 36 bits
+            * pte_size: Page Table Entry (PTE) typically 64 bits
+
+            Notes:
+            These arguments should represent the largest possible size
+            defined by the MODE settings. See
+            Volume II: RISC-V Privileged Architectures V1.10 Page 57
+        """
+
+        # Internal
+        self.state = 0
+        # L1 Cache Modules
+        L1_size = 8 # XXX overridden incoming argument?
+        self.cam_L1 = Cam(vma_size, L1_size)
+        self.mem_L1 = Memory(asid_size + pte_size, L1_size)
+
+        # Permission Validator
+        self.perm_validator = PermissionValidator(asid_size, pte_size)
+
+        # Inputs
+        self.supermode = Signal(1) # Supervisor Mode
+        self.super_access = Signal(1) # Supervisor Access
+        self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
+        self.xwr = Signal(3) # Execute, Write, Read
+        self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
+        self.address_L1 = Signal(max=L1_size)
+        self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
+        self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
+        self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
+
+        # Outputs
+        self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
+        self.perm_valid = Signal(1) # Denotes if the permissions are correct
+        self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
+
+    def search(self, m, read_L1, write_L1):
+        """ searches the TLB
+        """
+        m.d.comb += [
+            write_L1.en.eq(0),
+            self.cam_L1.write_enable.eq(0),
+            self.cam_L1.data_in.eq(self.vma)
+        ]
+        # Match found in L1 CAM
+        match_found = Signal(reset_less=True)
+        m.d.comb += match_found.eq(self.cam_L1.single_match
+                              | self.cam_L1.multiple_match)
+        with m.If(match_found):
+            # Memory shortcut variables
+            mem_address = self.cam_L1.match_address
+            # Memory Logic
+            m.d.comb += read_L1.addr.eq(mem_address)
+            # Permission Validator Logic
+            m.d.comb += [
+                self.hit.eq(1),
+                # Set permission validator data to the correct
+                # register file data according to CAM match
+                # address
+                self.perm_validator.data.eq(read_L1.data),
+                # Execute, Read, Write
+                self.perm_validator.xwr.eq(self.xwr),
+                # Supervisor Mode
+                self.perm_validator.super_mode.eq(self.supermode),
+                # Supverisor Access
+                self.perm_validator.super_access.eq(self.super_access),
+                # Address Space IDentifier (ASID)
+                self.perm_validator.asid.eq(self.asid),
+                # Output result of permission validation
+                self.perm_valid.eq(self.perm_validator.valid)
+            ]
+            # Only output PTE if permissions are valid
+            with m.If(self.perm_validator.valid):
+                # XXX TODO - dummy for now
+                reg_data = Signal.like(self.pte_out)
+                m.d.comb += [
+                    self.pte_out.eq(reg_data)
+                ]
+            with m.Else():
+                m.d.comb += [
+                    self.pte_out.eq(0)
+                ]
+        # Miss Logic
+        with m.Else():
+            m.d.comb += [
+                self.hit.eq(0),
+                self.perm_valid.eq(0),
+                self.pte_out.eq(0)
+            ]
+
+    def write_l1(self, m, read_L1, write_L1):
+        """ writes to the L1 cache
+        """
+        # Memory_L1 Logic
+        m.d.comb += [
+            write_L1.en.eq(1),
+            write_L1.addr.eq(self.address_L1),
+            # The Cat places arguments from LSB -> MSB
+            write_L1.data.eq(Cat(self.pte_in, self.asid))
+        ]
+        # CAM_L1 Logic
+        m.d.comb += [
+            self.cam_L1.write_enable.eq(1),
+            self.cam_L1.data_in.eq(self.vma),
+        ]
+
+    def elaborate(self, platform):
+        m = Module()
+        # Add submodules
+        # Submodules for L1 Cache
+        m.d.submodules.cam_L1 = self.cam_L1
+        m.d.sumbmodules.read_L1 = read_L1 = self.mem_L1.read_port()
+        m.d.sumbmodules.read_L1 = write_L1 = self.mem_L1.write_port()
+        # Permission Validator Submodule
+        m.d.submodules.perm_valididator = self.perm_validator
+
+        # When MODE specifies translation
+        # TODO add in different bit length handling ie prefix 0s
+        tlb_enable = Signal(reset_less=True)
+        m.d.comb += tlb_enable.eq(self.mode != 0)
+
+        with m.If(tlb_enable):
+            m.d.comb += [
+                self.cam_L1.enable.eq(1)
+            ]
+            with m.Switch(self.command):
+                # Search
+                with m.Case("01"):
+                    self.search(m, read_L1, write_L1)
+
+                # Write L1
+                # Expected that the miss will be handled in software
+                with m.Case("10"):
+                    self.write_l1(m, read_L1, write_L1)
+
+                # TODO
+                #with m.Case("11"):
+
+        # When disabled
+        with m.Else():
+            m.d.comb += [
+                self.cam_L1.enable.eq(0),
+                # XXX TODO - self.reg_file.enable.eq(0),
+                self.hit.eq(0),
+                self.perm_valid.eq(0), # XXX TODO, check this
+                self.pte_out.eq(0)
+            ]
+        return m
+
+
+if __name__ == '__main__':
+    tlb = TLB(15, 36, 64, 4)
+    main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command,
+        tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid,
+        tlb.vma, tlb.pte_in,
+        tlb.hit, tlb.perm_valid, tlb.pte_out,
+        ] + tlb.cam_L1.ports())
diff --git a/src/TLB/__init__.py b/src/TLB/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/TLB/ariane/TreePLRU.cpp b/src/TLB/ariane/TreePLRU.cpp
new file mode 100644 (file)
index 0000000..2f6aeea
--- /dev/null
@@ -0,0 +1,211 @@
+#include <cstdint>
+#include <iostream>
+#include <cmath>
+
+
+#define NWAY 4
+#define NLINE 256
+#define HIT 0
+#define MISS 1
+#define MS 1000
+/*
+Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing
+Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt
+four-way set associative - three bits
+   each bit represents one branch point in a binary decision tree; let 1
+   represent that the left side has been referenced more recently than the
+   right side, and 0 vice-versa
+              are all 4 lines valid?
+                   /       \
+                 yes        no, use an invalid line
+                  |
+                  |
+                  |
+             bit_0 == 0?            state | replace      ref to | next state
+              /       \             ------+--------      -------+-----------
+             y         n             00x  |  line_0      line_0 |    11_
+            /           \            01x  |  line_1      line_1 |    10_
+     bit_1 == 0?    bit_2 == 0?      1x0  |  line_2      line_2 |    0_1
+       /    \          /    \        1x1  |  line_3      line_3 |    0_0
+      y      n        y      n
+     /        \      /        \        ('x' means       ('_' means unchanged)
+   line_0  line_1  line_2  line_3      don't care)
+ 8-way set associative - 7  = 1+2+4 bits
+16-way set associative - 15 = 1+2+4+8 bits
+32-way set associative - 31 = 1+2+4+8+16 bits
+64-way set associative - 63 = 1+2+4+8+16+32 bits
+*/
+using namespace std;
+struct AddressField {
+    uint64_t wd_idx : 2;//Unused
+    uint64_t offset : 4;//Unused
+    uint64_t index  : 8;//NLINE = 256 = 2^8
+    uint64_t tag    : 50;
+};
+
+union Address {
+    uint32_t* p;
+    AddressField fields;
+};
+
+struct Cell {
+    bool v;
+    uint64_t tag;
+
+    Cell() : v(false), tag(0) {}
+
+    bool isHit(uint64_t tag) {
+        return v && (tag == this->tag);
+    }
+
+    void fetch(uint32_t* address) {
+        Address addr;
+        addr.p = address;
+        addr.fields.offset = 0;
+        addr.fields.wd_idx = 0;
+        tag = addr.fields.tag;
+        v = true;
+    }
+};
+
+ostream& operator<<(ostream & out, const Cell& cell) {
+    out << " v:" << cell.v << " tag:" << hex << cell.tag;
+    return out;
+}
+
+struct Block {
+    Cell cell[NWAY];
+    uint32_t state;
+    uint64_t *mask;//Mask the state to get accurate value for specified 1 bit.
+    uint64_t *value;
+    uint64_t *next_value;
+
+    Block() : state(0) {
+        switch (NWAY) {
+            case 4:
+                mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101};
+                value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101};
+                next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000};
+                break;
+            case 8:
+                mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001,
+                                       0b1010001};
+                value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000,
+                                        0b1010001};
+                next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000,
+                                             0b0000001, 0b0000000};
+                break;
+                //TODO - more NWAY goes here.
+            default:
+                std::cout << "Error definition NWAY = " << NWAY << std::endl;
+        }
+    }
+
+    uint32_t *getByTag(uint64_t tag, uint32_t *pway) {
+        for (int i = 0; i < NWAY; ++i) {
+            if (cell[i].isHit(tag)) {
+                *pway = i;
+                return pway;
+            }
+        }
+        return NULL;
+    }
+
+    void setLRU(uint32_t *address) {
+        int way = 0;
+        uint32_t st = state;
+        for (int i = 0; i < NWAY; ++i) {
+            if ((state & mask[i]) == value[i]) {
+                state ^= mask[i];
+                way = i;
+                break;
+            }
+        }
+        cell[way].fetch(address);
+        cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl;
+    }
+
+    uint32_t *get(uint32_t *address, uint32_t *pway) {
+        Address addr;
+        addr.p = address;
+        uint32_t *d = getByTag(addr.fields.tag, pway);
+        if (d != NULL) {
+            return &d[addr.fields.offset];
+        }
+        return d;
+    }
+
+    int set(uint32_t *address) {
+        uint32_t way = 0;
+        uint32_t *p = get(address, &way);
+        if (p != NULL) {
+            printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state);
+            state &= ~mask[way];
+            printf("%X --> ", state);
+            state |= next_value[way];
+            printf("%X\n", state);
+            // *p = *address; //skip since address is fake.
+            return HIT;
+        } else {
+            setLRU(address);
+            return MISS;
+        }
+    }
+};
+
+ostream& operator<<(ostream & out, const Block& block) {
+    out << "state:" << block.state << " ";
+    for (int i = 0; i<NWAY; i++) {
+        out << block.cell[i];
+    }
+    return out;
+}
+
+struct Cache {
+    Block block[NLINE];
+    uint32_t count[2];
+    Cache() { count[HIT] = 0; count[MISS] = 0; }
+
+    void access(uint32_t* address) {
+        Address addr;
+        addr.p = address;
+        Block& b = block[addr.fields.index];
+        ++count[b.set(address)];
+    }
+
+};
+ostream& operator<<(ostream & out, const Cache& cache) {
+    out << "\n==Summary==\n\tHit: " << cache.count[HIT] <<  " Miss: " << cache.count[MISS] << std::endl;
+    for (int i = 0; i < NLINE; i++) {
+        out << cache.block[i] << endl;
+    }
+    return out;
+}
+
+Cache cache;
+void multiply(uint32_t* m1, uint32_t* m2, uint32_t* res)
+{
+    int x, i, j;
+    for (i = 0; i < MS; i++) {
+        for (j = 0; j < MS; j++) {
+            cache.access(res + i*MS +j);
+            for (x = 0; x < MS; x++) {
+                cache.access(m1 + i*MS + x);
+                cache.access(m2 + x*MS + j);
+                cache.access(res + i*MS +j);
+                // res[i][j] += m1[i][x] * m2[x][j];
+                cache.access(res + i*MS +j);
+            }
+        }
+    }
+}
+
+int main()
+{
+    uint32_t* m1 = (uint32_t*) 0xFACE00A000000000LL;  // fake virtual address; don’t access it
+    uint32_t* m2 = (uint32_t*) 0xFACE00B000000000LL;  // fake virtual address; don’t access it
+    uint32_t* res =  (uint32_t*) 0xFACE00C000000000LL; // fake virtual address; don’t access it
+    multiply(m1, m2, res);
+    cout << cache << endl;
+    return 0;
+}
diff --git a/src/TLB/ariane/p_lru.txt b/src/TLB/ariane/p_lru.txt
new file mode 100644 (file)
index 0000000..4bac768
--- /dev/null
@@ -0,0 +1,51 @@
+pseudo-LRU
+
+two-way set associative - one bit
+
+   indicates which line of the two has been reference more recently
+
+
+four-way set associative - three bits
+
+   each bit represents one branch point in a binary decision tree; let 1
+   represent that the left side has been referenced more recently than the
+   right side, and 0 vice-versa
+
+              are all 4 lines valid?
+                   /       \
+                 yes        no, use an invalid line
+                  |
+                  |
+                  |
+             bit_0 == 0?            state | replace      ref to | next state
+              /       \             ------+--------      -------+-----------
+             y         n             00x  |  line_0      line_0 |    11_
+            /           \            01x  |  line_1      line_1 |    10_
+     bit_1 == 0?    bit_2 == 0?      1x0  |  line_2      line_2 |    0_1
+       /    \          /    \        1x1  |  line_3      line_3 |    0_0
+      y      n        y      n
+     /        \      /        \        ('x' means       ('_' means unchanged)
+   line_0  line_1  line_2  line_3      don't care)
+
+   (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev.
+    Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm)
+
+
+note that there is a 6-bit encoding for true LRU for four-way set associative
+
+  bit 0: bank[1] more recently used than bank[0]
+  bit 1: bank[2] more recently used than bank[0]
+  bit 2: bank[2] more recently used than bank[1]
+  bit 3: bank[3] more recently used than bank[0]
+  bit 4: bank[3] more recently used than bank[1]
+  bit 5: bank[3] more recently used than bank[2]
+
+  this results in 24 valid bit patterns within the 64 possible bit patterns
+  (4! possible valid traces for bank references)
+
+  e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111
+
+  you can implement a state machine with a 256x6 ROM (6-bit state encoding
+  appended with a 2-bit bank reference input will yield a new 6-bit state),
+  and you can implement an LRU bank indicator with a 64x2 ROM
+
diff --git a/src/TLB/ariane/src/exceptcause.py b/src/TLB/ariane/src/exceptcause.py
new file mode 100644 (file)
index 0000000..4c5cb2d
--- /dev/null
@@ -0,0 +1,16 @@
+from nmigen import Const
+
+INSTR_ADDR_MISALIGNED = Const(0, 64)
+INSTR_ACCESS_FAULT    = Const(1, 64)
+ILLEGAL_INSTR         = Const(2, 64)
+BREAKPOINT            = Const(3, 64)
+LD_ADDR_MISALIGNED    = Const(4, 64)
+LD_ACCESS_FAULT       = Const(5, 64)
+ST_ADDR_MISALIGNED    = Const(6, 64)
+ST_ACCESS_FAULT       = Const(7, 64)
+ENV_CALL_UMODE        = Const(8, 64)  # environment call from user mode
+ENV_CALL_SMODE        = Const(9, 64)  # environment call from supervisor mode
+ENV_CALL_MMODE        = Const(11, 64) # environment call from machine mode
+INSTR_PAGE_FAULT      = Const(12, 64) # Instruction page fault
+LOAD_PAGE_FAULT       = Const(13, 64) # Load page fault
+STORE_PAGE_FAULT      = Const(15, 64) # Store page fault
diff --git a/src/TLB/ariane/src/mmu.py b/src/TLB/ariane/src/mmu.py
new file mode 100644 (file)
index 0000000..c0739cc
--- /dev/null
@@ -0,0 +1,462 @@
+"""
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Author: Florian Zaruba, ETH Zurich
+# Date: 19/04/2017
+# Description: Memory Management Unit for Ariane, contains TLB and
+#              address translation unit. SV39 as defined in RISC-V
+#              privilege specification 1.11-WIP
+
+import ariane_pkg::*;
+"""
+
+from nmigen import Const, Signal, Cat, Module, Mux
+from nmigen.cli import verilog, rtlil
+
+from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW
+from tlb import TLB
+from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT,
+                         LOAD_PAGE_FAULT, STORE_PAGE_FAULT)
+
+PRIV_LVL_M = Const(0b11, 2)
+PRIV_LVL_S = Const(0b01, 2)
+PRIV_LVL_U = Const(0b00, 2)
+
+
+class RVException:
+    def __init__(self):
+         self.cause = Signal(64) # cause of exception
+         self.tval = Signal(64) # more info of causing exception
+                                # (e.g.: instruction causing it),
+                                #        address of LD/ST fault
+         self.valid = Signal()
+
+    def eq(self, inp):
+        res = []
+        for (o, i) in zip(self.ports(), inp.ports()):
+            res.append(o.eq(i))
+        return res
+
+    def __iter__(self):
+        yield self.cause
+        yield self.tval
+        yield self.valid
+
+    def ports(self):
+        return list(self)
+
+
+class ICacheReqI:
+    def __init__(self):
+        self.fetch_valid = Signal()   # address translation valid
+        self.fetch_paddr = Signal(64) # physical address in
+        self.fetch_exception = RVException() # exception occurred during fetch
+
+    def __iter__(self):
+        yield self.fetch_valid
+        yield self.fetch_paddr
+        yield from self.fetch_exception
+
+    def ports(self):
+        return list(self)
+
+
+class ICacheReqO:
+    def __init__(self):
+        self.fetch_req = Signal()     # address translation request
+        self.fetch_vaddr = Signal(64) # virtual address out
+
+    def __iter__(self):
+        yield self.fetch_req
+        yield self.fetch_vaddr
+
+    def ports(self):
+        return list(self)
+
+
+class MMU:
+    def __init__(self, instr_tlb_entries = 4,
+                       data_tlb_entries  = 4,
+                       asid_width        = 1):
+        self.instr_tlb_entries = instr_tlb_entries
+        self.data_tlb_entries = data_tlb_entries
+        self.asid_width = asid_width
+
+        self.flush_i = Signal()
+        self.enable_translation_i = Signal()
+        self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST
+        # IF interface
+        self.icache_areq_i = ICacheReqO()
+        self.icache_areq_o = ICacheReqI()
+        # LSU interface
+        # this is a more minimalistic interface because the actual addressing
+        # logic is handled in the LSU as we distinguish load and stores,
+        # what we do here is simple address translation
+        self.misaligned_ex_i = RVException()
+        self.lsu_req_i = Signal()   # request address translation
+        self.lsu_vaddr_i = Signal(64) # virtual address in
+        self.lsu_is_store_i = Signal() # the translation is requested by a store
+        # if we need to walk the page table we can't grant in the same cycle
+
+        # Cycle 0
+        self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request
+                                       # if translation hits in the DTLB
+        # Cycle 1
+        self.lsu_valid_o = Signal()  # translation is valid
+        self.lsu_paddr_o = Signal(64) # translated address
+        self.lsu_exception_o = RVException() # addr translate threw exception
+
+        # General control signals
+        self.priv_lvl_i = Signal(2)
+        self.ld_st_priv_lvl_i = Signal(2)
+        self.sum_i = Signal()
+        self.mxr_i = Signal()
+        # input logic flag_mprv_i,
+        self.satp_ppn_i = Signal(44)
+        self.asid_i = Signal(self.asid_width)
+        self.flush_tlb_i = Signal()
+        # Performance counters
+        self.itlb_miss_o = Signal()
+        self.dtlb_miss_o = Signal()
+        # PTW memory interface
+        self.req_port_i = DCacheReqO()
+        self.req_port_o = DCacheReqI()
+
+    def elaborate(self, platform):
+        m = Module()
+
+        iaccess_err = Signal()   # insufficient priv to access instr page
+        daccess_err = Signal()   # insufficient priv to access data page
+        ptw_active = Signal()    # PTW is currently walking a page table
+        walking_instr = Signal() # PTW is walking because of an ITLB miss
+        ptw_error = Signal()     # PTW threw an exception
+
+        update_vaddr = Signal(39)
+        uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros
+        update_ptw_itlb = TLBUpdate(self.asid_width)
+        update_ptw_dtlb = TLBUpdate(self.asid_width)
+
+        itlb_lu_access = Signal()
+        itlb_content = PTE()
+        itlb_is_2M = Signal()
+        itlb_is_1G = Signal()
+        itlb_lu_hit = Signal()
+
+        dtlb_lu_access = Signal()
+        dtlb_content = PTE()
+        dtlb_is_2M = Signal()
+        dtlb_is_1G = Signal()
+        dtlb_lu_hit = Signal()
+
+        # Assignments
+        m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req),
+                     dtlb_lu_access.eq(self.lsu_req_i)
+                    ]
+
+        # ITLB
+        m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries,
+                                         self.asid_width)
+        m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i),
+                     i_tlb.update_i.eq(update_ptw_itlb),
+                     i_tlb.lu_access_i.eq(itlb_lu_access),
+                     i_tlb.lu_asid_i.eq(self.asid_i),
+                     i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
+                     itlb_content.eq(i_tlb.lu_content_o),
+                     itlb_is_2M.eq(i_tlb.lu_is_2M_o),
+                     itlb_is_1G.eq(i_tlb.lu_is_1G_o),
+                     itlb_lu_hit.eq(i_tlb.lu_hit_o),
+                    ]
+
+        # DTLB
+        m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries,
+                                         self.asid_width)
+        m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i),
+                     d_tlb.update_i.eq(update_ptw_dtlb),
+                     d_tlb.lu_access_i.eq(dtlb_lu_access),
+                     d_tlb.lu_asid_i.eq(self.asid_i),
+                     d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i),
+                     dtlb_content.eq(d_tlb.lu_content_o),
+                     dtlb_is_2M.eq(d_tlb.lu_is_2M_o),
+                     dtlb_is_1G.eq(d_tlb.lu_is_1G_o),
+                     dtlb_lu_hit.eq(d_tlb.lu_hit_o),
+                    ]
+
+        # PTW
+        m.submodules.ptw = ptw = PTW(self.asid_width)
+        m.d.comb += [ptw_active.eq(ptw.ptw_active_o),
+                     walking_instr.eq(ptw.walking_instr_o),
+                     ptw_error.eq(ptw.ptw_error_o),
+                     ptw.enable_translation_i.eq(self.enable_translation_i),
+
+                     update_vaddr.eq(ptw.update_vaddr_o),
+                     update_ptw_itlb.eq(ptw.itlb_update_o),
+                     update_ptw_dtlb.eq(ptw.dtlb_update_o),
+
+                     ptw.itlb_access_i.eq(itlb_lu_access),
+                     ptw.itlb_hit_i.eq(itlb_lu_hit),
+                     ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
+
+                     ptw.dtlb_access_i.eq(dtlb_lu_access),
+                     ptw.dtlb_hit_i.eq(dtlb_lu_hit),
+                     ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i),
+
+                     ptw.req_port_i.eq(self.req_port_i),
+                     self.req_port_o.eq(ptw.req_port_o),
+                    ]
+
+        # ila_1 i_ila_1 (
+        #     .clk(clk_i), # input wire clk
+        #     .probe0({req_port_o.address_tag, req_port_o.address_index}),
+        #     .probe1(req_port_o.data_req), # input wire [63:0]  probe1
+        #     .probe2(req_port_i.data_gnt), # input wire [0:0]  probe2
+        #     .probe3(req_port_i.data_rdata), # input wire [0:0]  probe3
+        #     .probe4(req_port_i.data_rvalid), # input wire [0:0]  probe4
+        #     .probe5(ptw_error), # input wire [1:0]  probe5
+        #     .probe6(update_vaddr), # input wire [0:0]  probe6
+        #     .probe7(update_ptw_itlb.valid), # input wire [0:0]  probe7
+        #     .probe8(update_ptw_dtlb.valid), # input wire [0:0]  probe8
+        #     .probe9(dtlb_lu_access), # input wire [0:0]  probe9
+        #     .probe10(lsu_vaddr_i), # input wire [0:0]  probe10
+        #     .probe11(dtlb_lu_hit), # input wire [0:0]  probe11
+        #     .probe12(itlb_lu_access), # input wire [0:0]  probe12
+        #     .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0]  probe13
+        #     .probe14(itlb_lu_hit) # input wire [0:0]  probe13
+        # );
+
+        #-----------------------
+        # Instruction Interface
+        #-----------------------
+        # The instruction interface is a simple request response interface
+
+        # MMU disabled: just pass through
+        m.d.comb += [self.icache_areq_o.fetch_valid.eq(
+                                                self.icache_areq_i.fetch_req),
+                     # play through in case we disabled address translation
+                     self.icache_areq_o.fetch_paddr.eq(
+                                                self.icache_areq_i.fetch_vaddr)
+                    ]
+        # two potential exception sources:
+        # 1. HPTW threw an exception -> signal with a page fault exception
+        # 2. We got an access error because of insufficient permissions ->
+        #    throw an access exception
+        m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0)
+        # Check whether we are allowed to access this memory region
+        # from a fetch perspective
+
+        # XXX TODO: use PermissionValidator instead [we like modules]
+        m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \
+                                   (((self.priv_lvl_i == PRIV_LVL_U) & \
+                                      ~itlb_content.u) | \
+                                   ((self.priv_lvl_i == PRIV_LVL_S) & \
+                                    itlb_content.u)))
+
+        # MMU enabled: address from TLB, request delayed until hit.
+        # Error when TLB hit and no access right or TLB hit and
+        # translated address not valid (e.g.  AXI decode error),
+        # or when PTW performs walk due to ITLB miss and raises
+        # an error.
+        with m.If (self.enable_translation_i):
+            # we work with SV39, so if VM is enabled, check that
+            # all bits [63:38] are equal
+            with m.If (self.icache_areq_i.fetch_req & \
+                ~(((~self.icache_areq_i.fetch_vaddr[38:64]) == 0) | \
+                 (self.icache_areq_i.fetch_vaddr[38:64]) == 0)):
+                fe = self.icache_areq_o.fetch_exception
+                m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
+                             fe.tval.eq(self.icache_areq_i.fetch_vaddr),
+                             fe.valid.eq(1)
+                            ]
+
+            m.d.comb += self.icache_areq_o.fetch_valid.eq(0)
+
+            # 4K page
+            paddr = Signal.like(self.icache_areq_o.fetch_paddr)
+            paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12],
+                          itlb_content.ppn)
+            m.d.comb += paddr.eq(paddr4k)
+            # Mega page
+            with m.If(itlb_is_2M):
+                m.d.comb += paddr[12:21].eq(
+                          self.icache_areq_i.fetch_vaddr[12:21])
+            # Giga page
+            with m.If(itlb_is_1G):
+                m.d.comb += paddr[12:30].eq(
+                          self.icache_areq_i.fetch_vaddr[12:30])
+            m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
+
+            # ---------
+            # ITLB Hit
+            # --------
+            # if we hit the ITLB output the request signal immediately
+            with m.If(itlb_lu_hit):
+                m.d.comb += self.icache_areq_o.fetch_valid.eq(
+                                          self.icache_areq_i.fetch_req)
+                # we got an access error
+                with m.If (iaccess_err):
+                    # throw a page fault
+                    fe = self.icache_areq_o.fetch_exception
+                    m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
+                                 fe.tval.eq(self.icache_areq_i.fetch_vaddr),
+                                 fe.valid.eq(1)
+                                ]
+            # ---------
+            # ITLB Miss
+            # ---------
+            # watch out for exceptions happening during walking the page table
+            with m.Elif(ptw_active & walking_instr):
+                m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error)
+                fe = self.icache_areq_o.fetch_exception
+                m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT),
+                             fe.tval.eq(uaddr64),
+                             fe.valid.eq(1)
+                            ]
+
+        #-----------------------
+        # Data Interface
+        #-----------------------
+
+        lsu_vaddr = Signal(64)
+        dtlb_pte = PTE()
+        misaligned_ex = RVException()
+        lsu_req = Signal()
+        lsu_is_store = Signal()
+        dtlb_hit = Signal()
+        dtlb_is_2M = Signal()
+        dtlb_is_1G = Signal()
+
+        # check if we need to do translation or if we are always
+        # ready (e.g.: we are not translating anything)
+        m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i,
+                                          dtlb_lu_hit, 1))
+
+        # The data interface is simpler and only consists of a
+        # request/response interface
+        m.d.comb += [
+            # save request and DTLB response
+            lsu_vaddr.eq(self.lsu_vaddr_i),
+            lsu_req.eq(self.lsu_req_i),
+            misaligned_ex.eq(self.misaligned_ex_i),
+            dtlb_pte.eq(dtlb_content),
+            dtlb_hit.eq(dtlb_lu_hit),
+            lsu_is_store.eq(self.lsu_is_store_i),
+            dtlb_is_2M.eq(dtlb_is_2M),
+            dtlb_is_1G.eq(dtlb_is_1G),
+        ]
+        m.d.sync += [
+            self.lsu_paddr_o.eq(lsu_vaddr),
+            self.lsu_valid_o.eq(lsu_req),
+            self.lsu_exception_o.eq(misaligned_ex),
+        ]
+
+        sverr = Signal()
+        usrerr = Signal()
+
+        m.d.comb += [
+            # mute misaligned exceptions if there is no request
+            # otherwise they will throw accidental exceptions
+            misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i),
+
+            # SUM is not set and we are trying to access a user
+            # page in supervisor mode
+            sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \
+                       dtlb_pte.u),
+            # this is not a user page but we are in user mode and
+            # trying to access it
+            usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u),
+
+            # Check if the User flag is set, then we may only
+            # access it in supervisor mode if SUM is enabled
+            daccess_err.eq(sverr | usrerr),
+            ]
+
+        # translation is enabled and no misaligned exception occurred
+        with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid):
+            m.d.comb += lsu_req.eq(0)
+            # 4K page
+            paddr = Signal.like(lsu_vaddr)
+            paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn)
+            m.d.comb += paddr.eq(paddr4k)
+            # Mega page
+            with m.If(dtlb_is_2M):
+                m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21])
+            # Giga page
+            with m.If(dtlb_is_1G):
+                m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30])
+            m.d.sync += self.lsu_paddr_o.eq(paddr)
+
+            # ---------
+            # DTLB Hit
+            # --------
+            with m.If(dtlb_hit & lsu_req):
+                m.d.comb += lsu_req.eq(1)
+                # this is a store
+                with m.If (lsu_is_store):
+                    # check if the page is write-able and
+                    # we are not violating privileges
+                    # also check if the dirty flag is set
+                    with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d):
+                        le = self.lsu_exception_o
+                        m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
+                                     le.tval.eq(lsu_vaddr),
+                                     le.valid.eq(1)
+                                    ]
+
+                # this is a load, check for sufficient access
+                # privileges - throw a page fault if necessary
+                with m.Elif(daccess_err):
+                    le = self.lsu_exception_o
+                    m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
+                                 le.tval.eq(lsu_vaddr),
+                                 le.valid.eq(1)
+                                ]
+            # ---------
+            # DTLB Miss
+            # ---------
+            # watch out for exceptions
+            with m.Elif (ptw_active & ~walking_instr):
+                # page table walker threw an exception
+                with m.If (ptw_error):
+                    # an error makes the translation valid
+                    m.d.comb += lsu_req.eq(1)
+                    # the page table walker can only throw page faults
+                    with m.If (lsu_is_store):
+                        le = self.lsu_exception_o
+                        m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
+                                     le.tval.eq(uaddr64),
+                                     le.valid.eq(1)
+                                    ]
+                    with m.Else():
+                        m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
+                                     le.tval.eq(uaddr64),
+                                     le.valid.eq(1)
+                                    ]
+
+        return m
+
+    def ports(self):
+        return [self.flush_i, self.enable_translation_i,
+                self.en_ld_st_translation_i,
+                self.lsu_req_i,
+                self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o,
+                self.lsu_valid_o, self.lsu_paddr_o,
+                self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i,
+                self.satp_ppn_i, self.asid_i, self.flush_tlb_i,
+                self.itlb_miss_o, self.dtlb_miss_o] + \
+                self.icache_areq_i.ports() + self.icache_areq_o.ports() + \
+                self.req_port_i.ports() + self.req_port_o.ports() + \
+                self.misaligned_ex_i.ports() + self.lsu_exception_o.ports()
+
+if __name__ == '__main__':
+    mmu = MMU()
+    vl = rtlil.convert(mmu, ports=mmu.ports())
+    with open("test_mmu.il", "w") as f:
+        f.write(vl)
+
diff --git a/src/TLB/ariane/src/plru.py b/src/TLB/ariane/src/plru.py
new file mode 100644 (file)
index 0000000..95d515c
--- /dev/null
@@ -0,0 +1,106 @@
+from nmigen import Signal, Module, Cat, Const
+from nmigen.hdl.ir import Elaboratable
+from math import log2
+
+from ptw import TLBUpdate, PTE, ASID_WIDTH
+
+class PLRU(Elaboratable):
+    """ PLRU - Pseudo Least Recently Used Replacement
+
+        PLRU-tree indexing:
+        lvl0        0
+                   / \
+                  /   \
+        lvl1     1     2
+                / \   / \
+        lvl2   3   4 5   6
+              / \ /\/\  /\
+             ... ... ... ...
+    """
+    def __init__(self, entries):
+        self.entries = entries
+        self.lu_hit = Signal(entries)
+        self.replace_en_o = Signal(entries)
+        self.lu_access_i = Signal()
+        # Tree (bit per entry)
+        self.TLBSZ = 2*(self.entries-1)
+        self.plru_tree = Signal(self.TLBSZ)
+        self.plru_tree_o = Signal(self.TLBSZ)
+
+    def elaborate(self, platform=None):
+        m = Module()
+
+        # Just predefine which nodes will be set/cleared
+        # E.g. for a TLB with 8 entries, the for-loop is semantically
+        # equivalent to the following pseudo-code:
+        # unique case (1'b1)
+        # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1};
+        # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0};
+        # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1};
+        # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0};
+        # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1};
+        # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0};
+        # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1};
+        # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0};
+        # default: begin /* No hit */ end
+        # endcase
+        LOG_TLB = int(log2(self.entries))
+        print(LOG_TLB)
+        for i in range(self.entries):
+            # we got a hit so update the pointer as it was least recently used
+            hit = Signal(reset_less=True)
+            m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i)
+            with m.If(hit):
+                # Set the nodes to the values we would expect
+                for lvl in range(LOG_TLB):
+                    idx_base = (1<<lvl)-1
+                    # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+                    shift = LOG_TLB - lvl;
+                    new_idx = Const(~((i >> (shift-1)) & 1), (1, False))
+                    plru_idx = idx_base + (i >> shift)
+                    print ("plru", i, lvl, hex(idx_base),
+                                  plru_idx, shift, new_idx)
+                    m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx)
+
+        # Decode tree to write enable signals
+        # Next for-loop basically creates the following logic for e.g.
+        # an 8 entry TLB (note: pseudo-code obviously):
+        # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1}
+        # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0}
+        # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1}
+        # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0}
+        # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1}
+        # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0}
+        # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1}
+        # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0}
+        # For each entry traverse the tree. If every tree-node matches
+        # the corresponding bit of the entry's index, this is
+        # the next entry to replace.
+        replace = []
+        for i in range(self.entries):
+            en = []
+            for lvl in range(LOG_TLB):
+                idx_base = (1<<lvl)-1
+                # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+                shift = LOG_TLB - lvl;
+                new_idx = (i >> (shift-1)) & 1;
+                plru_idx = idx_base + (i>>shift)
+                plru = Signal(reset_less=True,
+                              name="plru-%d-%d-%d" % (i, lvl, plru_idx))
+                m.d.comb += plru.eq(self.plru_tree[plru_idx])
+                # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx;
+                if new_idx:
+                    en.append(~plru) # yes inverted (using bool())
+                else:
+                    en.append(plru)  # yes inverted (using bool())
+            print ("plru", i, en)
+            # boolean logic manipulation:
+            # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2)
+            replace.append(~Cat(*en).bool())
+        m.d.comb += self.replace_en_o.eq(Cat(*replace))
+
+        return m
+
+    def ports(self):
+        return [self.entries, self.lu_hit, self.replace_en_o,
+                self.lu_access_i, self.plru_tree, self.plru_tree_o]
\ No newline at end of file
diff --git a/src/TLB/ariane/src/ptw.py b/src/TLB/ariane/src/ptw.py
new file mode 100644 (file)
index 0000000..05ec2d7
--- /dev/null
@@ -0,0 +1,539 @@
+"""
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Author: David Schaffenrath, TU Graz
+# Author: Florian Zaruba, ETH Zurich
+# Date: 24.4.2017
+# Description: Hardware-PTW
+
+/* verilator lint_off WIDTH */
+import ariane_pkg::*;
+
+see linux kernel source:
+
+* "arch/riscv/include/asm/page.h"
+* "arch/riscv/include/asm/mmu_context.h"
+* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET)
+
+"""
+
+from nmigen import Const, Signal, Cat, Module
+from nmigen.hdl.ast import ArrayProxy
+from nmigen.cli import verilog, rtlil
+from math import log2
+
+
+DCACHE_SET_ASSOC = 8
+CONFIG_L1D_SIZE =  32*1024
+DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC))
+DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH
+
+ASID_WIDTH = 8
+
+
+class DCacheReqI:
+    def __init__(self):
+        self.address_index = Signal(DCACHE_INDEX_WIDTH)
+        self.address_tag = Signal(DCACHE_TAG_WIDTH)
+        self.data_wdata = Signal(64)
+        self.data_req = Signal()
+        self.data_we = Signal()
+        self.data_be = Signal(8)
+        self.data_size = Signal(2)
+        self.kill_req = Signal()
+        self.tag_valid = Signal()
+
+    def eq(self, inp):
+        res = []
+        for (o, i) in zip(self.ports(), inp.ports()):
+            res.append(o.eq(i))
+        return res
+
+    def ports(self):
+        return [self.address_index, self.address_tag,
+                self.data_wdata, self.data_req,
+                self.data_we, self.data_be, self.data_size,
+                self.kill_req, self.tag_valid,
+            ]
+
+class DCacheReqO:
+    def __init__(self):
+        self.data_gnt = Signal()
+        self.data_rvalid = Signal()
+        self.data_rdata = Signal(64) # actually in PTE object format
+
+    def eq(self, inp):
+        res = []
+        for (o, i) in zip(self.ports(), inp.ports()):
+            res.append(o.eq(i))
+        return res
+
+    def ports(self):
+        return [self.data_gnt, self.data_rvalid, self.data_rdata]
+
+
+class PTE: #(RecordObject):
+    def __init__(self):
+        self.v = Signal()
+        self.r = Signal()
+        self.w = Signal()
+        self.x = Signal()
+        self.u = Signal()
+        self.g = Signal()
+        self.a = Signal()
+        self.d = Signal()
+        self.rsw = Signal(2)
+        self.ppn = Signal(44)
+        self.reserved = Signal(10)
+
+    def flatten(self):
+        return Cat(*self.ports())
+
+    def eq(self, x):
+        if isinstance(x, ArrayProxy):
+            res = []
+            for o in self.ports():
+                i = getattr(x, o.name)
+                res.append(i)
+            x = Cat(*res)
+        else:
+            x = x.flatten()
+        return self.flatten().eq(x)
+
+    def __iter__(self):
+        """ order is critical so that flatten creates LSB to MSB
+        """
+        yield self.v
+        yield self.r
+        yield self.w
+        yield self.x
+        yield self.u
+        yield self.g
+        yield self.a
+        yield self.d
+        yield self.rsw
+        yield self.ppn
+        yield self.reserved
+
+    def ports(self):
+        return list(self)
+
+
+class TLBUpdate:
+    def __init__(self, asid_width):
+        self.valid = Signal()      # valid flag
+        self.is_2M = Signal()
+        self.is_1G = Signal()
+        self.vpn = Signal(27)
+        self.asid = Signal(asid_width)
+        self.content = PTE()
+
+    def flatten(self):
+        return Cat(*self.ports())
+
+    def eq(self, x):
+        return self.flatten().eq(x.flatten())
+
+    def ports(self):
+        return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \
+                self.content.ports()
+
+
+# SV39 defines three levels of page tables
+LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1
+LVL2 = Const(1, 2)
+LVL3 = Const(2, 2)
+
+
+class PTW:
+    def __init__(self, asid_width=8):
+        self.asid_width = asid_width
+
+        self.flush_i = Signal() # flush everything, we need to do this because
+        # actually everything we do is speculative at this stage
+        # e.g.: there could be a CSR instruction that changes everything
+        self.ptw_active_o = Signal(reset=1)    # active if not IDLE
+        self.walking_instr_o = Signal()        # set when walking for TLB
+        self.ptw_error_o = Signal()            # set when an error occurred
+        self.enable_translation_i = Signal()   # CSRs indicate to enable SV39
+        self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st
+
+        self.lsu_is_store_i = Signal()       # translation triggered by store
+        # PTW memory interface
+        self.req_port_i = DCacheReqO()
+        self.req_port_o = DCacheReqI()
+
+        # to TLBs, update logic
+        self.itlb_update_o = TLBUpdate(asid_width)
+        self.dtlb_update_o = TLBUpdate(asid_width)
+
+        self.update_vaddr_o = Signal(39)
+
+        self.asid_i = Signal(self.asid_width)
+        # from TLBs
+        # did we miss?
+        self.itlb_access_i = Signal()
+        self.itlb_hit_i = Signal()
+        self.itlb_vaddr_i = Signal(64)
+
+        self.dtlb_access_i = Signal()
+        self.dtlb_hit_i = Signal()
+        self.dtlb_vaddr_i = Signal(64)
+        # from CSR file
+        self.satp_ppn_i = Signal(44) # ppn from satp
+        self.mxr_i = Signal()
+        # Performance counters
+        self.itlb_miss_o = Signal()
+        self.dtlb_miss_o = Signal()
+
+    def ports(self):
+        return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o,
+                ]
+        return [
+                self.enable_translation_i, self.en_ld_st_translation_i,
+                self.lsu_is_store_i, self.req_port_i, self.req_port_o,
+                self.update_vaddr_o,
+                self.asid_i,
+                self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i,
+                self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i,
+                self.satp_ppn_i, self.mxr_i,
+                self.itlb_miss_o, self.dtlb_miss_o
+            ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports()
+
+    def elaborate(self, platform):
+        m = Module()
+
+        # input registers
+        data_rvalid = Signal()
+        data_rdata = Signal(64)
+
+        # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata
+        # is spec'd in 64-bit binary-format: better to spec as Record?
+        pte = PTE()
+        m.d.comb += pte.flatten().eq(data_rdata)
+
+        # SV39 defines three levels of page tables
+        ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above)
+        ptw_lvl1 = Signal()
+        ptw_lvl2 = Signal()
+        ptw_lvl3 = Signal()
+        m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1),
+                     ptw_lvl2.eq(ptw_lvl == LVL2),
+                     ptw_lvl3.eq(ptw_lvl == LVL3)]
+
+        # is this an instruction page table walk?
+        is_instr_ptw = Signal()
+        global_mapping = Signal()
+        # latched tag signal
+        tag_valid = Signal()
+        # register the ASID
+        tlb_update_asid = Signal(self.asid_width)
+        # register VPN we need to walk, SV39 defines a 39 bit virtual addr
+        vaddr = Signal(64)
+        # 4 byte aligned physical pointer
+        ptw_pptr = Signal(56)
+
+        end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH
+        m.d.sync += [
+            # Assignments
+            self.update_vaddr_o.eq(vaddr),
+
+            self.walking_instr_o.eq(is_instr_ptw),
+            # directly output the correct physical address
+            self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]),
+            self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]),
+            # we are never going to kill this request
+            self.req_port_o.kill_req.eq(0),              # XXX assign comb?
+            # we are never going to write with the HPTW
+            self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb?
+            # -----------
+            # TLB Update
+            # -----------
+            self.itlb_update_o.vpn.eq(vaddr[12:39]),
+            self.dtlb_update_o.vpn.eq(vaddr[12:39]),
+            # update the correct page table level
+            self.itlb_update_o.is_2M.eq(ptw_lvl2),
+            self.itlb_update_o.is_1G.eq(ptw_lvl1),
+            self.dtlb_update_o.is_2M.eq(ptw_lvl2),
+            self.dtlb_update_o.is_1G.eq(ptw_lvl1),
+            # output the correct ASID
+            self.itlb_update_o.asid.eq(tlb_update_asid),
+            self.dtlb_update_o.asid.eq(tlb_update_asid),
+            # set the global mapping bit
+            self.itlb_update_o.content.eq(pte),
+            self.itlb_update_o.content.g.eq(global_mapping),
+            self.dtlb_update_o.content.eq(pte),
+            self.dtlb_update_o.content.g.eq(global_mapping),
+
+            self.req_port_o.tag_valid.eq(tag_valid),
+        ]
+
+        #-------------------
+        # Page table walker
+        #-------------------
+        # A virtual address va is translated into a physical address pa as
+        # follows:
+        # 1. Let a be sptbr.ppn Ã— PAGESIZE, and let i = LEVELS-1. (For Sv39,
+        #    PAGESIZE=2^12 and LEVELS=3.)
+        # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE.
+        #    (For Sv32, PTESIZE=4.)
+        # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an
+        #    access exception.
+        # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to
+        #    step 5.  Otherwise, this PTE is a pointer to the next level of
+        #    the page table.
+        #    Let i=i-1. If i < 0, stop and raise an access exception.
+        #    Otherwise, let a = pte.ppn Ã— PAGESIZE and go to step 2.
+        # 5. A leaf PTE has been found. Determine if the requested memory
+        #    access is allowed by the pte.r, pte.w, and pte.x bits. If not,
+        #    stop and raise an access exception. Otherwise, the translation is
+        #    successful.  Set pte.a to 1, and, if the memory access is a
+        #    store, set pte.d to 1.
+        #    The translated physical address is given as follows:
+        #      - pa.pgoff = va.pgoff.
+        #      - If i > 0, then this is a superpage translation and
+        #        pa.ppn[i-1:0] = va.vpn[i-1:0].
+        #      - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
+        # 6. If i > 0 and pa.ppn[i âˆ’ 1 : 0] != 0, this is a misaligned
+        #    superpage stop and raise a page-fault exception.
+
+        m.d.sync += tag_valid.eq(0)
+
+        # default assignments
+        m.d.comb += [
+            # PTW memory interface
+            self.req_port_o.data_req.eq(0),
+            self.req_port_o.data_be.eq(Const(0xFF, 8)),
+            self.req_port_o.data_size.eq(Const(0b11, 2)),
+            self.req_port_o.data_we.eq(0),
+            self.ptw_error_o.eq(0),
+            self.itlb_update_o.valid.eq(0),
+            self.dtlb_update_o.valid.eq(0),
+
+            self.itlb_miss_o.eq(0),
+            self.dtlb_miss_o.eq(0),
+        ]
+
+        # ------------
+        # State Machine
+        # ------------
+
+        with m.FSM() as fsm:
+
+            with m.State("IDLE"):
+                self.idle(m, is_instr_ptw, ptw_lvl, global_mapping,
+                          ptw_pptr, vaddr, tlb_update_asid)
+
+            with m.State("WAIT_GRANT"):
+                self.grant(m, tag_valid, data_rvalid)
+
+            with m.State("PTE_LOOKUP"):
+                # we wait for the valid signal
+                with m.If(data_rvalid):
+                    self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
+                                data_rvalid, global_mapping,
+                                is_instr_ptw, ptw_pptr)
+
+            # Propagate error to MMU/LSU
+            with m.State("PROPAGATE_ERROR"):
+                m.next = "IDLE"
+                m.d.comb += self.ptw_error_o.eq(1)
+
+            # wait for the rvalid before going back to IDLE
+            with m.State("WAIT_RVALID"):
+                with m.If(data_rvalid):
+                    m.next = "IDLE"
+
+        m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata),
+                     data_rvalid.eq(self.req_port_i.data_rvalid)
+                    ]
+
+        return m
+
+    def set_grant_state(self, m):
+        # should we have flushed before we got an rvalid,
+        # wait for it until going back to IDLE
+        with m.If(self.flush_i):
+            with m.If (self.req_port_i.data_gnt):
+                m.next = "WAIT_RVALID"
+            with m.Else():
+                m.next = "IDLE"
+        with m.Else():
+            m.next = "WAIT_GRANT"
+
+    def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping,
+                          ptw_pptr, vaddr, tlb_update_asid):
+        # by default we start with the top-most page table
+        m.d.sync += [is_instr_ptw.eq(0),
+                     ptw_lvl.eq(LVL1),
+                     global_mapping.eq(0),
+                     self.ptw_active_o.eq(0), # deactive (IDLE)
+                    ]
+        # work out itlb/dtlb miss
+        m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \
+                                 self.itlb_access_i & \
+                                 ~self.itlb_hit_i & \
+                                 ~self.dtlb_access_i)
+        m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \
+                                        self.dtlb_access_i & \
+                                        ~self.dtlb_hit_i)
+        # we got an ITLB miss?
+        with m.If(self.itlb_miss_o):
+            pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:39],
+                       self.satp_ppn_i)
+            m.d.sync += [ptw_pptr.eq(pptr),
+                         is_instr_ptw.eq(1),
+                         vaddr.eq(self.itlb_vaddr_i),
+                         tlb_update_asid.eq(self.asid_i),
+                        ]
+            self.set_grant_state(m)
+
+        # we got a DTLB miss?
+        with m.Elif(self.dtlb_miss_o):
+            pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39],
+                       self.satp_ppn_i)
+            m.d.sync += [ptw_pptr.eq(pptr),
+                         vaddr.eq(self.dtlb_vaddr_i),
+                         tlb_update_asid.eq(self.asid_i),
+                        ]
+            self.set_grant_state(m)
+
+    def grant(self, m, tag_valid, data_rvalid):
+        # we've got a data WAIT_GRANT so tell the
+        # cache that the tag is valid
+
+        # send a request out
+        m.d.comb += self.req_port_o.data_req.eq(1)
+        # wait for the WAIT_GRANT
+        with m.If(self.req_port_i.data_gnt):
+            # send the tag valid signal one cycle later
+            m.d.sync += tag_valid.eq(1)
+            # should we have flushed before we got an rvalid,
+            # wait for it until going back to IDLE
+            with m.If(self.flush_i):
+                with m.If (~data_rvalid):
+                    m.next = "WAIT_RVALID"
+                with m.Else():
+                    m.next = "IDLE"
+            with m.Else():
+                m.next = "PTE_LOOKUP"
+
+    def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
+                            data_rvalid, global_mapping,
+                            is_instr_ptw, ptw_pptr):
+        # temporaries
+        pte_rx = Signal(reset_less=True)
+        pte_exe = Signal(reset_less=True)
+        pte_inv = Signal(reset_less=True)
+        pte_a = Signal(reset_less=True)
+        st_wd = Signal(reset_less=True)
+        m.d.comb += [pte_rx.eq(pte.r | pte.x),
+                    pte_exe.eq(~pte.x | ~pte.a),
+                    pte_inv.eq(~pte.v | (~pte.r & pte.w)),
+                    pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))),
+                    st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))]
+
+        l1err = Signal(reset_less=True)
+        l2err = Signal(reset_less=True)
+        m.d.comb += [l2err.eq((ptw_lvl2) & pte.ppn[0:9] != Const(0, 9)),
+                     l1err.eq((ptw_lvl1) & pte.ppn[0:18] != Const(0, 18)) ]
+
+        # check if the global mapping bit is set
+        with m.If (pte.g):
+            m.d.sync += global_mapping.eq(1)
+
+        m.next = "IDLE"
+
+        # -------------
+        # Invalid PTE
+        # -------------
+        # If pte.v = 0, or if pte.r = 0 and pte.w = 1,
+        # stop and raise a page-fault exception.
+        with m.If (pte_inv):
+            m.next = "PROPAGATE_ERROR"
+
+        # -----------
+        # Valid PTE
+        # -----------
+
+        # it is a valid PTE
+        # if pte.r = 1 or pte.x = 1 it is a valid PTE
+        with m.Elif (pte_rx):
+            # Valid translation found (either 1G, 2M or 4K)
+            with m.If(is_instr_ptw):
+                # ------------
+                # Update ITLB
+                # ------------
+                # If page not executable, we can directly raise error.
+                # This doesn't put a useless entry into the TLB.
+                # The same idea applies to the access flag since we let
+                # the access flag be managed by SW.
+                with m.If (pte_exe):
+                    m.next = "IDLE"
+                with m.Else():
+                    m.d.comb += self.itlb_update_o.valid.eq(1)
+
+            with m.Else():
+                # ------------
+                # Update DTLB
+                # ------------
+                # Check if the access flag has been set, otherwise
+                # throw page-fault and let software handle those bits.
+                # If page not readable (there are no write-only pages)
+                # directly raise an error. This doesn't put a useless
+                # entry into the TLB.
+                with m.If(pte_a):
+                    m.d.comb += self.dtlb_update_o.valid.eq(1)
+                with m.Else():
+                    m.next = "PROPAGATE_ERROR"
+                # Request is a store: perform additional checks
+                # If the request was a store and the page not
+                # write-able, raise an error
+                # the same applies if the dirty flag is not set
+                with m.If (st_wd):
+                    m.d.comb += self.dtlb_update_o.valid.eq(0)
+                    m.next = "PROPAGATE_ERROR"
+
+            # check if the ppn is correctly aligned: Case (6)
+            with m.If(l1err | l2err):
+                m.next = "PROPAGATE_ERROR"
+                m.d.comb += [self.dtlb_update_o.valid.eq(0),
+                             self.itlb_update_o.valid.eq(0)]
+
+        # this is a pointer to the next TLB level
+        with m.Else():
+            # pointer to next level of page table
+            with m.If (ptw_lvl1):
+                # we are in the second level now
+                pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn)
+                m.d.sync += [ptw_pptr.eq(pptr),
+                             ptw_lvl.eq(LVL2)
+                            ]
+            with m.If(ptw_lvl2):
+                # here we received a pointer to the third level
+                pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn)
+                m.d.sync += [ptw_pptr.eq(pptr),
+                             ptw_lvl.eq(LVL3)
+                            ]
+            self.set_grant_state(m)
+
+            with m.If (ptw_lvl3):
+                # Should already be the last level
+                # page table => Error
+                m.d.sync += ptw_lvl.eq(LVL3)
+                m.next = "PROPAGATE_ERROR"
+
+
+if __name__ == '__main__':
+    ptw = PTW()
+    vl = rtlil.convert(ptw, ports=ptw.ports())
+    with open("test_ptw.il", "w") as f:
+        f.write(vl)
diff --git a/src/TLB/ariane/src/tlb.py b/src/TLB/ariane/src/tlb.py
new file mode 100644 (file)
index 0000000..f768571
--- /dev/null
@@ -0,0 +1,170 @@
+"""
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Author: David Schaffenrath, TU Graz
+# Author: Florian Zaruba, ETH Zurich
+# Date: 21.4.2017
+# Description: Translation Lookaside Buffer, SV39
+#              fully set-associative
+
+Implementation in c++:
+https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp
+
+Text description:
+https://people.cs.clemson.edu/~mark/464/p_lru.txt
+
+Online simulator:
+http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html
+"""
+from math import log2
+from nmigen import Signal, Module, Cat, Const, Array
+from nmigen.cli import verilog, rtlil
+from nmigen.lib.coding import Encoder
+
+from ptw import TLBUpdate, PTE, ASID_WIDTH
+from plru import PLRU
+from tlb_content import TLBContent
+
+TLB_ENTRIES = 8
+
+class TLB:
+    def __init__(self, tlb_entries=8, asid_width=8):
+        self.tlb_entries = tlb_entries
+        self.asid_width = asid_width
+
+        self.flush_i = Signal()  # Flush signal
+        # Lookup signals
+        self.lu_access_i = Signal()
+        self.lu_asid_i = Signal(self.asid_width)
+        self.lu_vaddr_i = Signal(64)
+        self.lu_content_o = PTE()
+        self.lu_is_2M_o = Signal()
+        self.lu_is_1G_o = Signal()
+        self.lu_hit_o = Signal()
+        # Update TLB
+        self.pte_width = len(self.lu_content_o.flatten())
+        self.update_i = TLBUpdate(asid_width)
+
+    def elaborate(self, platform):
+        m = Module()
+
+        vpn2 = Signal(9)
+        vpn1 = Signal(9)
+        vpn0 = Signal(9)
+
+        #-------------
+        # Translation
+        #-------------
+
+        # SV39 defines three levels of page tables
+        m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]),
+                      vpn1.eq(self.lu_vaddr_i[21:30]),
+                      vpn2.eq(self.lu_vaddr_i[30:39]),
+                    ]
+
+        tc = []
+        for i in range(self.tlb_entries):
+            tlc = TLBContent(self.pte_width, self.asid_width)
+            setattr(m.submodules, "tc%d" % i, tlc)
+            tc.append(tlc)
+            # connect inputs
+            tlc.update_i = self.update_i     # saves a lot of graphviz links
+            m.d.comb += [tlc.vpn0.eq(vpn0),
+                         tlc.vpn1.eq(vpn1),
+                         tlc.vpn2.eq(vpn2),
+                         tlc.flush_i.eq(self.flush_i),
+                         #tlc.update_i.eq(self.update_i),
+                         tlc.lu_asid_i.eq(self.lu_asid_i)]
+        tc = Array(tc)
+
+        #--------------
+        # Select hit
+        #--------------
+
+        # use Encoder to select hit index
+        # XXX TODO: assert that there's only one valid entry (one lu_hit)
+        hitsel = Encoder(self.tlb_entries)
+        m.submodules.hitsel = hitsel
+
+        hits = []
+        for i in range(self.tlb_entries):
+            hits.append(tc[i].lu_hit_o)
+        m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well)
+        idx = hitsel.o
+
+        active = Signal(reset_less=True)
+        m.d.comb += active.eq(~hitsel.n)
+        with m.If(active):
+            # active hit, send selected as output
+            m.d.comb += [ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o),
+                          self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o),
+                          self.lu_hit_o.eq(1),
+                          self.lu_content_o.flatten().eq(tc[idx].lu_content_o),
+                        ]
+
+        #--------------
+        # PLRU.
+        #--------------
+
+        p = PLRU(self.tlb_entries)
+        plru_tree = Signal(p.TLBSZ)
+        m.submodules.plru = p
+
+        # connect PLRU inputs/outputs
+        # XXX TODO: assert that there's only one valid entry (one replace_en)
+        en = []
+        for i in range(self.tlb_entries):
+            en.append(tc[i].replace_en_i)
+        m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags
+                     p.lu_hit.eq(hitsel.i),
+                     p.lu_access_i.eq(self.lu_access_i),
+                     p.plru_tree.eq(plru_tree)]
+        m.d.sync += plru_tree.eq(p.plru_tree_o)
+
+        #--------------
+        # Sanity checks
+        #--------------
+
+        assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \
+            "TLB size must be a multiple of 2 and greater than 1"
+        assert (self.asid_width >= 1), \
+            "ASID width must be at least 1"
+
+        return m
+
+        """
+        # Just for checking
+        function int countSetBits(logic[self.tlb_entries-1:0] vector);
+          automatic int count = 0;
+          foreach (vector[idx]) begin
+            count += vector[idx];
+          end
+          return count;
+        endfunction
+
+        assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
+          else $error("More then one hit in TLB!"); $stop(); end
+        assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
+          else $error("More then one TLB entry selected for next replace!");
+        """
+
+    def ports(self):
+        return [self.flush_i, self.lu_access_i,
+                 self.lu_asid_i, self.lu_vaddr_i,
+                 self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
+                ] + self.lu_content_o.ports() + self.update_i.ports()
+
+if __name__ == '__main__':
+    tlb = TLB()
+    vl = rtlil.convert(tlb, ports=tlb.ports())
+    with open("test_tlb.il", "w") as f:
+        f.write(vl)
+
diff --git a/src/TLB/ariane/src/tlb_content.py b/src/TLB/ariane/src/tlb_content.py
new file mode 100644 (file)
index 0000000..024c569
--- /dev/null
@@ -0,0 +1,125 @@
+from nmigen import Signal, Module, Cat, Const
+
+from ptw import TLBUpdate, PTE
+
+class TLBEntry:
+    def __init__(self, asid_width):
+        self.asid = Signal(asid_width)
+        # SV39 defines three levels of page tables
+        self.vpn0 = Signal(9)
+        self.vpn1 = Signal(9)
+        self.vpn2 = Signal(9)
+        self.is_2M = Signal()
+        self.is_1G = Signal()
+        self.valid = Signal()
+
+    def flatten(self):
+        return Cat(*self.ports())
+
+    def eq(self, x):
+        return self.flatten().eq(x.flatten())
+
+    def ports(self):
+        return [self.asid, self.vpn0, self.vpn1, self.vpn2,
+                self.is_2M, self.is_1G, self.valid]
+
+class TLBContent:
+    def __init__(self, pte_width, asid_width):
+        self.asid_width = asid_width
+        self.pte_width = pte_width
+        self.flush_i = Signal()  # Flush signal
+        # Update TLB
+        self.update_i = TLBUpdate(asid_width)
+        self.vpn2 = Signal(9)
+        self.vpn1 = Signal(9)
+        self.vpn0 = Signal(9)
+        self.replace_en_i = Signal() # replace the following entry,
+                                     # set by replacement strategy
+        # Lookup signals
+        self.lu_asid_i = Signal(asid_width)
+        self.lu_content_o = Signal(pte_width)
+        self.lu_is_2M_o = Signal()
+        self.lu_is_1G_o = Signal()
+        self.lu_hit_o = Signal()
+
+    def elaborate(self, platform):
+        m = Module()
+
+        tags = TLBEntry(self.asid_width)
+        content = Signal(self.pte_width)
+
+        m.d.comb += [self.lu_hit_o.eq(0),
+                     self.lu_is_2M_o.eq(0),
+                     self.lu_is_1G_o.eq(0)]
+
+        # temporaries for 1st level match
+        asid_ok = Signal(reset_less=True)
+        vpn2_ok = Signal(reset_less=True)
+        tags_ok = Signal(reset_less=True)
+        vpn2_hit = Signal(reset_less=True)
+        m.d.comb += [tags_ok.eq(tags.valid),
+                     asid_ok.eq(tags.asid == self.lu_asid_i),
+                     vpn2_ok.eq(tags.vpn2 == self.vpn2),
+                     vpn2_hit.eq(tags_ok & asid_ok & vpn2_ok)]
+        # temporaries for 2nd level match
+        vpn1_ok = Signal(reset_less=True)
+        tags_2M = Signal(reset_less=True)
+        vpn0_ok = Signal(reset_less=True)
+        vpn0_or_2M = Signal(reset_less=True)
+        m.d.comb += [vpn1_ok.eq(self.vpn1 == tags.vpn1),
+                     tags_2M.eq(tags.is_2M),
+                     vpn0_ok.eq(self.vpn0 == tags.vpn0),
+                     vpn0_or_2M.eq(tags_2M | vpn0_ok)]
+        # first level match, this may be a giga page,
+        # check the ASID flags as well
+        with m.If(vpn2_hit):
+            # second level
+            with m.If (tags.is_1G):
+                m.d.comb += [ self.lu_content_o.eq(content),
+                              self.lu_is_1G_o.eq(1),
+                              self.lu_hit_o.eq(1),
+                            ]
+            # not a giga page hit so check further
+            with m.Elif(vpn1_ok):
+                # this could be a 2 mega page hit or a 4 kB hit
+                # output accordingly
+                with m.If(vpn0_or_2M):
+                    m.d.comb += [ self.lu_content_o.eq(content),
+                                  self.lu_is_2M_o.eq(tags.is_2M),
+                                  self.lu_hit_o.eq(1),
+                                ]
+        # ------------------
+        # Update or Flush
+        # ------------------
+
+        # temporaries
+        replace_valid = Signal(reset_less=True)
+        m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i)
+
+        # flush
+        with m.If (self.flush_i):
+            # invalidate (flush) conditions: all if zero or just this ASID
+            with m.If (self.lu_asid_i == Const(0, self.asid_width) |
+                      (self.lu_asid_i == tags.asid)):
+                m.d.sync += tags.valid.eq(0)
+
+        # normal replacement
+        with m.Elif(replace_valid):
+            m.d.sync += [ # update tag array
+                          tags.asid.eq(self.update_i.asid),
+                          tags.vpn2.eq(self.update_i.vpn[18:27]),
+                          tags.vpn1.eq(self.update_i.vpn[9:18]),
+                          tags.vpn0.eq(self.update_i.vpn[0:9]),
+                          tags.is_1G.eq(self.update_i.is_1G),
+                          tags.is_2M.eq(self.update_i.is_2M),
+                          tags.valid.eq(1),
+                          # and content as well
+                          content.eq(self.update_i.content.flatten())
+                        ]
+        return m
+
+    def ports(self):
+        return [self.flush_i,
+                 self.lu_asid_i,
+                 self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
+                ] + self.update_i.content.ports() + self.update_i.ports()
diff --git a/src/TLB/ariane/test/test_plru.py b/src/TLB/ariane/test/test_plru.py
new file mode 100644 (file)
index 0000000..9b040e1
--- /dev/null
@@ -0,0 +1,15 @@
+import sys
+sys.path.append("../src")
+sys.path.append("../../../TestUtil")
+
+from plru import PLRU
+
+from nmigen.compat.sim import run_simulation
+
+def testbench(dut):
+    yield
+
+if __name__ == "__main__":
+    dut = PLRU(4)
+    run_simulation(dut, testbench(dut), vcd_name="test_plru.vcd")
+    print("PLRU Unit Test Success")
\ No newline at end of file
diff --git a/src/TLB/ariane/test/test_ptw.py b/src/TLB/ariane/test/test_ptw.py
new file mode 100644 (file)
index 0000000..e9c5324
--- /dev/null
@@ -0,0 +1,127 @@
+import sys
+sys.path.append("../src")
+sys.path.append("../../../TestUtil")
+
+from nmigen.compat.sim import run_simulation
+
+from ptw import PTW, PTE
+
+
+def testbench(dut):
+
+    addr = 0x8000000
+
+    #pte = PTE()
+    #yield pte.v.eq(1)
+    #yield pte.r.eq(1)
+
+    yield dut.req_port_i.data_gnt.eq(1)
+    yield dut.req_port_i.data_rvalid.eq(1)
+    yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten())
+
+    # data lookup
+    yield dut.en_ld_st_translation_i.eq(1)
+    yield dut.asid_i.eq(1)
+
+    yield dut.dtlb_access_i.eq(1)
+    yield dut.dtlb_hit_i.eq(0)
+    yield dut.dtlb_vaddr_i.eq(0x400000000)
+
+    yield
+    yield
+    yield
+
+    yield dut.dtlb_access_i.eq(1)
+    yield dut.dtlb_hit_i.eq(0)
+    yield dut.dtlb_vaddr_i.eq(0x200000)
+
+    yield
+    yield
+    yield
+
+    yield dut.req_port_i.data_gnt.eq(0)
+    yield dut.dtlb_access_i.eq(1)
+    yield dut.dtlb_hit_i.eq(0)
+    yield dut.dtlb_vaddr_i.eq(0x400000011)
+
+    yield
+    yield dut.req_port_i.data_gnt.eq(1)
+    yield
+    yield
+
+    # data lookup, PTW levels 1-2-3
+    addr = 0x4000000
+    yield dut.dtlb_vaddr_i.eq(addr)
+    yield dut.mxr_i.eq(0x1)
+    yield dut.req_port_i.data_gnt.eq(1)
+    yield dut.req_port_i.data_rvalid.eq(1)
+    yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten())
+
+    yield dut.en_ld_st_translation_i.eq(1)
+    yield dut.asid_i.eq(1)
+
+    yield dut.dtlb_access_i.eq(1)
+    yield dut.dtlb_hit_i.eq(0)
+    yield dut.dtlb_vaddr_i.eq(addr)
+
+    yield
+    yield
+    yield
+    yield
+    yield
+    yield
+    yield
+    yield
+
+    yield dut.req_port_i.data_gnt.eq(0)
+    yield dut.dtlb_access_i.eq(1)
+    yield dut.dtlb_hit_i.eq(0)
+    yield dut.dtlb_vaddr_i.eq(0x400000011)
+
+    yield
+    yield dut.req_port_i.data_gnt.eq(1)
+    yield
+    yield
+    yield
+    yield
+
+
+    # instruction lookup
+    yield dut.en_ld_st_translation_i.eq(0)
+    yield dut.enable_translation_i.eq(1)
+    yield dut.asid_i.eq(1)
+
+    yield dut.itlb_access_i.eq(1)
+    yield dut.itlb_hit_i.eq(0)
+    yield dut.itlb_vaddr_i.eq(0x800000)
+
+    yield
+    yield
+    yield
+
+    yield dut.itlb_access_i.eq(1)
+    yield dut.itlb_hit_i.eq(0)
+    yield dut.itlb_vaddr_i.eq(0x200000)
+
+    yield
+    yield
+    yield
+
+    yield dut.req_port_i.data_gnt.eq(0)
+    yield dut.itlb_access_i.eq(1)
+    yield dut.itlb_hit_i.eq(0)
+    yield dut.itlb_vaddr_i.eq(0x800011)
+
+    yield
+    yield dut.req_port_i.data_gnt.eq(1)
+    yield
+    yield
+
+    yield
+
+
+
+if __name__ == "__main__":
+    dut = PTW()
+    run_simulation(dut, testbench(dut), vcd_name="test_ptw.vcd")
+    print("PTW Unit Test Success")
diff --git a/src/TLB/ariane/test/test_tlb.py b/src/TLB/ariane/test/test_tlb.py
new file mode 100644 (file)
index 0000000..aab1d43
--- /dev/null
@@ -0,0 +1,69 @@
+import sys
+sys.path.append("../src")
+sys.path.append("../../../TestUtil")
+
+from nmigen.compat.sim import run_simulation
+
+from tlb import TLB
+
+def set_vaddr(addr):
+    yield dut.lu_vaddr_i.eq(addr)
+    yield dut.update_i.vpn.eq(addr>>12)
+
+
+def testbench(dut):
+    yield dut.lu_access_i.eq(1)
+    yield dut.lu_asid_i.eq(1)
+    yield dut.update_i.valid.eq(1)
+    yield dut.update_i.is_1G.eq(0)
+    yield dut.update_i.is_2M.eq(0)
+    yield dut.update_i.asid.eq(1)
+    yield dut.update_i.content.ppn.eq(0)
+    yield dut.update_i.content.rsw.eq(0)
+    yield dut.update_i.content.r.eq(1)
+
+    yield
+
+    addr = 0x80000
+    yield from set_vaddr(addr)
+    yield
+
+    addr = 0x90001
+    yield from set_vaddr(addr)
+    yield
+
+    addr = 0x28000000
+    yield from set_vaddr(addr)
+    yield
+
+    addr = 0x28000001
+    yield from set_vaddr(addr)
+
+    addr = 0x28000001
+    yield from set_vaddr(addr)
+    yield
+
+    addr = 0x1000040000
+    yield from set_vaddr(addr)
+    yield
+
+    addr = 0x1000040001
+    yield from set_vaddr(addr)
+    yield
+
+    yield dut.update_i.is_1G.eq(1)
+    addr = 0x2040000
+    yield from set_vaddr(addr)
+    yield
+
+    yield dut.update_i.is_1G.eq(1)
+    addr = 0x2040001
+    yield from set_vaddr(addr)
+    yield
+
+    yield
+
+
+if __name__ == "__main__":
+    dut = TLB()
+    run_simulation(dut, testbench(dut), vcd_name="test_tlb.vcd")
diff --git a/src/TLB/src/AddressEncoder.py b/src/TLB/src/AddressEncoder.py
deleted file mode 100644 (file)
index 4c4b8d7..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-from nmigen import Module, Signal
-from nmigen.lib.coding import Encoder, PriorityEncoder
-
-class AddressEncoder():
-    """Address Encoder
-
-       The purpose of this module is to take in a vector and
-       encode the bits that are one hot into an address. This module
-       combines both nmigen's Encoder and PriorityEncoder and will state
-       whether the input line has a single bit hot, multiple bits hot,
-       or no bits hot. The output line will always have the lowest value
-       address output.
-
-       Usage:
-       The output is valid when either single or multiple match is high.
-       Otherwise output is 0.
-    """
-    def __init__(self, width):
-        """ Arguments:
-            * width: The desired length of the input vector
-        """
-        # Internal
-        self.encoder = Encoder(width)
-        self.p_encoder = PriorityEncoder(width)
-
-        # Input
-        self.i = Signal(width)
-
-        # Output
-        self.single_match = Signal(1)
-        self.multiple_match = Signal(1)
-        self.o = Signal(max=width)
-
-    def elaborate(self, platform=None):
-        m = Module()
-
-        # Add internal submodules
-        m.submodules.encoder = self.encoder
-        m.submodules.p_encoder = self.p_encoder
-
-        m.d.comb += [
-            self.encoder.i.eq(self.i),
-            self.p_encoder.i.eq(self.i)
-        ]
-
-        # Steps:
-        # 1. check if the input vector is non-zero
-        # 2. if non-zero, check if single match or multiple match
-        # 3. set output line to be lowest value address output
-
-        # If the priority encoder recieves an input of 0
-        # If n is 1 then the output is not valid
-        with m.If(self.p_encoder.n):
-            m.d.comb += [
-                self.single_match.eq(0),
-                self.multiple_match.eq(0),
-                self.o.eq(0)
-            ]
-        # If the priority encoder recieves an input > 0
-        with m.Else():
-            # Multiple Match if encoder n is invalid
-            with m.If(self.encoder.n):
-                m.d.comb += [
-                    self.single_match.eq(0),
-                    self.multiple_match.eq(1)
-                ]
-            # Single Match if encoder n is valid
-            with m.Else():
-                m.d.comb += [
-                    self.single_match.eq(1),
-                    self.multiple_match.eq(0)
-                ]
-            # Always set output based on priority encoder output
-            m.d.comb += self.o.eq(self.p_encoder.o)
-        return m
diff --git a/src/TLB/src/Cam.py b/src/TLB/src/Cam.py
deleted file mode 100644 (file)
index 3c49921..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-from nmigen import Array, Cat, Module, Signal
-from nmigen.lib.coding import Decoder
-from nmigen.cli import main #, verilog
-
-from CamEntry import CamEntry
-from AddressEncoder import AddressEncoder
-
-class Cam():
-    """ Content Addressable Memory (CAM)
-
-        The purpose of this module is to quickly look up whether an
-        entry exists given a data key.
-        This module will search for the given data in all internal entries
-        and output whether a  single or multiple match was found.
-        If an single entry is found the address be returned and single_match
-        is set HIGH. If multiple entries are found the lowest address is
-        returned and multiple_match is set HIGH. If neither single_match or
-        multiple_match are HIGH this implies no match was found. To write
-        to the CAM set the address bus to the desired entry and set write_enable
-        HIGH. Entry managment should be performed one level above this block
-        as lookup is performed within.
-
-        Notes:
-        The read and write operations take one clock cycle to complete.
-        Currently the read_warning line is present for interfacing but
-        is not necessary for this design. This module is capable of writing
-        in the first cycle, reading on the second, and output the correct
-        address on the third.
-    """
-
-    def __init__(self, data_size, cam_size):
-        """ Arguments:
-            * data_size: (bits) The bit size of the data
-            * cam_size: (number) The number of entries in the CAM
-        """
-
-        # Internal
-        self.cam_size = cam_size
-        self.encoder = AddressEncoder(cam_size)
-        self.decoder = Decoder(cam_size)
-        self.entry_array = Array(CamEntry(data_size) for x in range(cam_size))
-
-        # Input
-        self.enable = Signal(1)
-        self.write_enable = Signal(1)
-        self.data_in = Signal(data_size) # The data to be written
-        self.data_mask = Signal(data_size) # mask for ternary writes
-        self.address_in = Signal(max=cam_size) # address of CAM Entry to write
-
-        # Output
-        self.read_warning = Signal(1) # High when a read interrupts a write
-        self.single_match = Signal(1) # High when there is only one match
-        self.multiple_match = Signal(1) # High when there at least two matches
-        self.match_address = Signal(max=cam_size) # The lowest address matched
-
-    def elaborate(self, platform=None):
-        m = Module()
-        # AddressEncoder for match types and output address
-        m.submodules.AddressEncoder = self.encoder
-        # Decoder is used to select which entry will be written to
-        m.submodules.Decoder = self.decoder
-        # CamEntry Array Submodules
-        # Note these area added anonymously
-        entry_array = self.entry_array
-        m.submodules += entry_array
-
-        # Decoder logic
-        m.d.comb += [
-            self.decoder.i.eq(self.address_in),
-            self.decoder.n.eq(0)
-        ]
-
-        encoder_vector = []
-        with m.If(self.enable):
-            # Set the key value for every CamEntry
-            for index in range(self.cam_size):
-
-                # Write Operation
-                with m.If(self.write_enable):
-                    with m.If(self.decoder.o[index]):
-                        m.d.comb += entry_array[index].command.eq(2)
-                    with m.Else():
-                        m.d.comb += entry_array[index].command.eq(0)
-
-                # Read Operation
-                with m.Else():
-                    m.d.comb += entry_array[index].command.eq(1)
-
-                # Send data input to all entries
-                m.d.comb += entry_array[index].data_in.eq(self.data_in)
-                # Send all entry matches to encoder
-                ematch = entry_array[index].match
-                encoder_vector.append(ematch)
-
-            # Give input to and accept output from encoder module
-            m.d.comb += [
-                self.encoder.i.eq(Cat(*encoder_vector)),
-                self.single_match.eq(self.encoder.single_match),
-                self.multiple_match.eq(self.encoder.multiple_match),
-                self.match_address.eq(self.encoder.o)
-            ]
-
-        # If the CAM is not enabled set all outputs to 0
-        with m.Else():
-            m.d.comb += [
-                    self.read_warning.eq(0),
-                    self.single_match.eq(0),
-                    self.multiple_match.eq(0),
-                    self.match_address.eq(0)
-            ]
-
-        return m
-
-    def ports(self):
-        return [self.enable, self.write_enable,
-                     self.data_in, self.data_mask,
-                     self.read_warning, self.single_match,
-                     self.multiple_match, self.match_address]
-
-
-if __name__ == '__main__':
-    cam = Cam(4, 4)
-    main(cam, ports=cam.ports())
-
diff --git a/src/TLB/src/CamEntry.py b/src/TLB/src/CamEntry.py
deleted file mode 100644 (file)
index 73081ce..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-from nmigen import Module, Signal
-
-class CamEntry:
-    """ Content Addressable Memory (CAM) Entry
-
-        The purpose of this module is to represent an entry within a CAM.
-        This module when given a read command will compare  the given data
-        and output whether a match was found or not. When given a write
-        command it will write the given data into internal registers.
-    """
-
-    def __init__(self, data_size):
-        """ Arguments:
-            * data_size: (bit count) The size of the data
-        """
-        # Input
-        self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset
-        self.data_in = Signal(data_size) # Data input when writing
-
-        # Output
-        self.match = Signal(1) # Result of the internal/input key comparison
-        self.data = Signal(data_size)
-
-    def elaborate(self, platform=None):
-        m = Module()
-        with m.Switch(self.command):
-            with m.Case("00"):
-                m.d.sync += self.match.eq(0)
-            with m.Case("01"):
-                with m.If(self.data == self.data_in):
-                    m.d.sync += self.match.eq(1)
-                with m.Else():
-                    m.d.sync += self.match.eq(0)
-            with m.Case("10"):
-                m.d.sync += [
-                    self.data.eq(self.data_in),
-                    self.match.eq(0)
-                ]
-            with m.Case():
-                m.d.sync += [
-                    self.match.eq(0),
-                    self.data.eq(0)
-                ]
-
-        return m
diff --git a/src/TLB/src/LFSR.py b/src/TLB/src/LFSR.py
deleted file mode 100644 (file)
index d8b606e..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-from nmigen.cli import verilog, rtlil
-
-
-class LFSRPolynomial(set):
-    """ implements a polynomial for use in LFSR
-    """
-    def __init__(self, exponents=()):
-        for e in exponents:
-            assert isinstance(e, int), TypeError("%s must be an int" % repr(e))
-            assert (e >= 0), ValueError("%d must not be negative" % e)
-        set.__init__(self, set(exponents).union({0})) # must contain zero
-
-    @property
-    def max_exponent(self):
-        return max(self) # derived from set, so this returns the max exponent
-
-    @property
-    def exponents(self):
-        exponents = list(self) # get elements of set as a list
-        exponents.sort(reverse=True)
-        return exponents
-
-    def __str__(self):
-        expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2)
-        retval = map(lambda i: expd[min(i,2)].format(i), self.exponents)
-        return " + ".join(retval)
-
-    def __repr__(self):
-        return "LFSRPolynomial(%s)" % self.exponents
-
-
-# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs  # noqa
-LFSR_POLY_2 = LFSRPolynomial([2, 1, 0])
-LFSR_POLY_3 = LFSRPolynomial([3, 2, 0])
-LFSR_POLY_4 = LFSRPolynomial([4, 3, 0])
-LFSR_POLY_5 = LFSRPolynomial([5, 3, 0])
-LFSR_POLY_6 = LFSRPolynomial([6, 5, 0])
-LFSR_POLY_7 = LFSRPolynomial([7, 6, 0])
-LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0])
-LFSR_POLY_9 = LFSRPolynomial([9, 5, 0])
-LFSR_POLY_10 = LFSRPolynomial([10, 7, 0])
-LFSR_POLY_11 = LFSRPolynomial([11, 9, 0])
-LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0])
-LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0])
-LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0])
-LFSR_POLY_15 = LFSRPolynomial([15, 14, 0])
-LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0])
-LFSR_POLY_17 = LFSRPolynomial([17, 14, 0])
-LFSR_POLY_18 = LFSRPolynomial([18, 11, 0])
-LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0])
-LFSR_POLY_20 = LFSRPolynomial([20, 17, 0])
-LFSR_POLY_21 = LFSRPolynomial([21, 19, 0])
-LFSR_POLY_22 = LFSRPolynomial([22, 21, 0])
-LFSR_POLY_23 = LFSRPolynomial([23, 18, 0])
-LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0])
-
-
-class LFSR(LFSRPolynomial, Elaboratable):
-    """ implements a Linear Feedback Shift Register
-    """
-    def __init__(self, polynomial):
-        """ Inputs:
-            ------
-            :polynomial: the polynomial to feedback on.  may be a LFSRPolynomial
-                         instance or an iterable of ints (list/tuple/generator)
-            :enable:     enable (set LO to disable.  NOTE: defaults to HI)
-
-            Outputs:
-            -------
-            :state: the LFSR state.  bitwidth is taken from the polynomial
-                    maximum exponent.
-
-            Note: if an LFSRPolynomial is passed in as the input, because
-            LFSRPolynomial is derived from set() it's ok:
-            LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p)
-        """
-        LFSRPolynomial.__init__(self, polynomial)
-        self.state = Signal(self.max_exponent, reset=1)
-        self.enable = Signal(reset=1)
-
-    def elaborate(self, platform):
-        m = Module()
-        # do absolutely nothing if the polynomial is empty (always has a zero)
-        if self.max_exponent <= 1:
-            return m
-
-        # create XOR-bunch, select bits from state based on exponent
-        feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain)
-        for exponent in self:
-            if exponent > 0: # don't have to skip, saves CPU cycles though
-                feedback ^= self.state[exponent - 1]
-
-        # if enabled, shift-and-feedback
-        with m.If(self.enable):
-            # shift up lower bits by Cat'ing in a new bit zero (feedback)
-            newstate = Cat(feedback, self.state[:-1])
-            m.d.sync += self.state.eq(newstate)
-
-        return m
-
-
-# example: Poly24
-if __name__ == '__main__':
-    p24 = rtlil.convert(LFSR(LFSR_POLY_24))
-    with open("lfsr2_p24.il", "w") as f:
-        f.write(p24)
diff --git a/src/TLB/src/LFSR.pyi b/src/TLB/src/LFSR.pyi
deleted file mode 100644 (file)
index 64eb911..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from nmigen import Module
-from typing import Iterable, Optional, Iterator, Any, Union
-from typing_extensions import final
-
-
-@final
-class LFSRPolynomial(set):
-    def __init__(self, exponents: Iterable[int] = ()):
-        def elements() -> Iterable[int]: ...
-    @property
-    def exponents(self) -> list[int]: ...
-    def __str__(self) -> str: ...
-    def __repr__(self) -> str: ...
-
-
-@final
-class LFSR:
-    def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ...
-    @property
-    def width(self) -> int: ...
-    def elaborate(self, platform: Any) -> Module: ...
diff --git a/src/TLB/src/Makefile b/src/TLB/src/Makefile
deleted file mode 100644 (file)
index 1eb67ac..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-verilog:
-       python3 Cam.py generate -t v > Cam.v
diff --git a/src/TLB/src/MemorySet.py b/src/TLB/src/MemorySet.py
deleted file mode 100644 (file)
index ea61bdf..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-from nmigen import Cat, Memory, Module, Signal, Elaboratable
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-
-
-class MemorySet(Elaboratable):
-    def __init__(self, data_size, tag_size, set_count, active):
-        self.active = active
-        input_size = tag_size + data_size # Size of the input data
-        memory_width = input_size + 1 # The width of the cache memory
-        self.active = active
-        self.data_size = data_size
-        self.tag_size = tag_size
-
-        # XXX TODO, use rd-enable and wr-enable?
-        self.mem = Memory(memory_width, set_count)
-        self.r = self.mem.read_port()
-        self.w = self.mem.write_port()
-
-        # inputs (address)
-        self.cset = Signal(max=set_count)  # The set to be checked
-        self.tag = Signal(tag_size)        # The tag to find
-        self.data_i = Signal(data_size)    # Incoming data
-
-        # outputs
-        self.valid = Signal()
-        self.data_o = Signal(data_size)    # Outgoing data (excludes tag)
-
-    def elaborate(self, platform):
-        m = Module()
-        m.submodules.mem = self.mem
-        m.submodules.r = self.r
-        m.submodules.w = self.w
-
-        # temporaries
-        active_bit = Signal()
-        tag_valid = Signal()
-        data_start = self.active + 1
-        data_end = data_start + self.data_size
-        tag_start = data_end
-        tag_end = tag_start + self.tag_size
-
-        # connect the read port address to the set/entry
-        read_port = self.r
-        m.d.comb += read_port.addr.eq(self.cset)
-        # Pull out active bit from data
-        data = read_port.data
-        m.d.comb += active_bit.eq(data[self.active])
-        # Validate given tag vs stored tag
-        tag = data[tag_start:tag_end]
-        m.d.comb += tag_valid.eq(self.tag == tag)
-        # An entry is only valid if the tags match AND
-        # is marked as a valid entry
-        m.d.comb += self.valid.eq(tag_valid & active_bit)
-
-        # output data: TODO, check rd-enable?
-        m.d.comb += self.data_o.eq(data[data_start:data_end])
-
-        # connect the write port addr to the set/entry (only if write enabled)
-        # (which is only done on a match, see SAC.write_entry below)
-        write_port = self.w
-        with m.If(write_port.en):
-            m.d.comb += write_port.addr.eq(self.cset)
-            m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag))
-
-        return m
diff --git a/src/TLB/src/PermissionValidator.py b/src/TLB/src/PermissionValidator.py
deleted file mode 100644 (file)
index 14f01e4..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-from nmigen import Module, Signal
-from nmigen.cli import main
-
-from PteEntry import PteEntry
-
-class PermissionValidator():
-    """ The purpose of this Module is to check the Permissions of a given PTE
-        against the requested access permissions.
-
-        This module will either validate (by setting the valid bit HIGH)
-        the request or find a permission fault and invalidate (by setting
-        the valid bit LOW) the request
-    """
-
-    def __init__(self, asid_size, pte_size):
-        """ Arguments:
-            * asid_size: (bit count) The size of the asid to be processed
-            * pte_size: (bit count) The size of the pte to be processed
-
-            Return:
-            * valid HIGH when permissions are correct
-        """
-        # Internal
-        self.pte_entry = PteEntry(asid_size, pte_size)
-
-        # Input
-        self.data = Signal(asid_size + pte_size);
-        self.xwr = Signal(3) # Execute, Write, Read
-        self.super_mode = Signal(1) # Supervisor Mode
-        self.super_access = Signal(1) # Supervisor Access
-        self.asid = Signal(15) # Address Space IDentifier (ASID)
-
-        # Output
-        self.valid = Signal(1) # Denotes if the permissions are correct
-
-    def elaborate(self, platform=None):
-        m = Module()
-
-        m.submodules.pte_entry = self.pte_entry
-
-        m.d.comb += self.pte_entry.i.eq(self.data)
-
-        # Check if the entry is valid
-        with m.If(self.pte_entry.v):
-            # ASID match or Global Permission
-            # Note that the MSB bound is exclusive
-            with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g):
-                # Check Execute, Write, Read (XWR) Permissions
-                with m.If(self.pte_entry.xwr == self.xwr):
-                    # Supervisor Logic
-                    with m.If(self.super_mode):
-                        # Valid if entry is not in user mode or supervisor
-                        # has Supervisor User Memory (SUM) access via the
-                        # SUM bit in the sstatus register
-                        m.d.comb += self.valid.eq((~self.pte_entry.u) \
-                                                  | self.super_access)
-                    # User logic
-                    with m.Else():
-                        # Valid if the entry is in user mode only
-                        m.d.comb += self.valid.eq(self.pte_entry.u)
-                with m.Else():
-                    m.d.comb += self.valid.eq(0)
-            with m.Else():
-                m.d.comb += self.valid.eq(0)
-        with m.Else():
-            m.d.comb += self.valid.eq(0)
-        return m
\ No newline at end of file
diff --git a/src/TLB/src/PteEntry.py b/src/TLB/src/PteEntry.py
deleted file mode 100644 (file)
index c070545..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-from nmigen import Module, Signal
-from nmigen.cli import main
-
-class PteEntry():
-    """ The purpose of this Module is to  centralize the parsing of Page
-        Table Entries (PTE) into one module to prevent common mistakes
-        and duplication of code. The control bits are parsed out for
-        ease of use.
-
-        This module parses according to the standard PTE given by the
-        Volume II: RISC-V Privileged Architectures V1.10 Pg 60.
-        The Address Space IDentifier (ASID) is appended to the MSB of the input
-        and is parsed out as such.
-
-        An valid input Signal would be:
-              ASID   PTE
-        Bits:[78-64][63-0]
-
-        The output PTE value will include the control bits.
-    """
-    def __init__(self, asid_size, pte_size):
-        """ Arguments:
-            * asid_size: (bit count) The size of the asid to be processed
-            * pte_size: (bit count) The size of the pte to be processed
-
-            Return:
-            * d The Dirty bit from the PTE portion of i
-            * a The Accessed bit from the PTE portion of i
-            * g The Global bit from the PTE portion of i
-            * u The User Mode bit from the PTE portion of i
-            * xwr The Execute/Write/Read bit from the PTE portion of i
-            * v The Valid bit from the PTE portion of i
-            * asid The asid portion of i
-            * pte The pte portion of i
-        """
-        # Internal
-        self.asid_start = pte_size
-        self.asid_end = pte_size + asid_size
-
-        # Input
-        self.i = Signal(asid_size + pte_size)
-
-        # Output
-        self.d = Signal(1) # Dirty bit (From pte)
-        self.a = Signal(1) # Accessed bit (From pte)
-        self.g = Signal(1) # Global Access (From pte)
-        self.u = Signal(1) # User Mode (From pte)
-        self.xwr = Signal(3) # Execute Read Write (From pte)
-        self.v = Signal(1) # Valid (From pte)
-        self.asid = Signal(asid_size) # Associated Address Space IDentifier
-        self.pte = Signal(pte_size) # Full Page Table Entry
-
-    def elaborate(self, platform=None):
-        m = Module()
-        # Pull out all control bites from PTE
-        m.d.comb += [
-            self.d.eq(self.i[7]),
-            self.a.eq(self.i[6]),
-            self.g.eq(self.i[5]),
-            self.u.eq(self.i[4]),
-            self.xwr.eq(self.i[1:4]),
-            self.v.eq(self.i[0])
-        ]
-        m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end])
-        m.d.comb += self.pte.eq(self.i[0:self.asid_start])
-        return m
\ No newline at end of file
diff --git a/src/TLB/src/SetAssociativeCache.py b/src/TLB/src/SetAssociativeCache.py
deleted file mode 100644 (file)
index 0acd348..0000000
+++ /dev/null
@@ -1,274 +0,0 @@
-"""
-
-Online simulator of 4-way set-associative cache:
-http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html
-
-Python simulator of a N-way set-associative cache:
-https://github.com/vaskevich/CacheSim/blob/master/cachesim.py
-"""
-import sys
-sys.path.append("ariane/src/")
-
-from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable
-from nmigen.compat.genlib import fsm
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-
-from AddressEncoder import AddressEncoder
-from MemorySet import MemorySet
-
-# TODO: use a LFSR that advances continuously and picking the bottom
-# few bits from it to select which cache line to replace, instead of PLRU
-# http://bugs.libre-riscv.org/show_bug.cgi?id=71
-from plru import PLRU
-from LFSR import LFSR, LFSR_POLY_24
-
-SA_NA = "00" # no action (none)
-SA_RD = "01" # read
-SA_WR = "10" # write
-
-
-class SetAssociativeCache(Elaboratable):
-    """ Set Associative Cache Memory
-
-        The purpose of this module is to generate a memory cache given the
-        constraints passed in. This will create a n-way set associative cache.
-        It is expected for the SV TLB that the VMA will provide the set number
-        while the ASID provides the tag (still to be decided).
-
-    """
-    def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False):
-        """ Arguments
-            * tag_size (bits): The bit count of the tag
-            * data_size (bits): The bit count of the data to be stored
-            * set_count (number): The number of sets/entries in the cache
-            * way_count (number): The number of slots a data can be stored
-                                  in one set
-            * lfsr: if set, use an LFSR for (pseudo-randomly) selecting
-                    set/entry to write to.  otherwise, use a PLRU
-        """
-        # Internals
-        self.lfsr_mode = lfsr
-        self.way_count = way_count  # The number of slots in one set
-        self.tag_size = tag_size    # The bit count of the tag
-        self.data_size = data_size  # The bit count of the data to be stored
-
-        # set up Memory array
-        self.mem_array = Array() # memory array
-        for i in range(way_count):
-            ms = MemorySet(data_size, tag_size, set_count, active=0)
-            self.mem_array.append(ms)
-
-        # Finds valid entries
-        self.encoder = AddressEncoder(way_count)
-
-        # setup PLRU or LFSR
-        if lfsr:
-            # LFSR mode
-            self.lfsr = LFSR(LFSR_POLY_24)
-        else:
-            # PLRU mode
-            self.plru = PLRU(way_count) # One block to handle plru calculations
-            self.plru_array = Array() # PLRU data on each set
-            for i in range(set_count):
-                name="plru%d" % i
-                self.plru_array.append(Signal(self.plru.TLBSZ, name=name))
-
-        # Input
-        self.enable = Signal(1)   # Whether the cache is enabled
-        self.command = Signal(2)  # 00=None, 01=Read, 10=Write (see SA_XX)
-        self.cset = Signal(max=set_count)  # The set to be checked
-        self.tag = Signal(tag_size)        # The tag to find
-        self.data_i = Signal(data_size)    # The input data
-
-        # Output
-        self.ready = Signal(1) # 0 => Processing 1 => Ready for commands
-        self.hit = Signal(1)            # Tag matched one way in the given set
-        self.multiple_hit = Signal(1)   # Tag matched many ways in the given set
-        self.data_o = Signal(data_size) # The data linked to the matched tag
-
-    def check_tags(self, m):
-        """ Validate the tags in the selected set. If one and only one
-            tag matches set its state to zero and increment all others
-            by one. We only advance to next state if a single hit is found.
-        """
-        # Vector to store way valid results
-        # A zero denotes a way is invalid
-        valid_vector = []
-        # Loop through memory to prep read/write ports and set valid_vector
-        for i in range(self.way_count):
-            valid_vector.append(self.mem_array[i].valid)
-
-        # Pass encoder the valid vector
-        m.d.comb += self.encoder.i.eq(Cat(*valid_vector))
-
-        # Only one entry should be marked
-        # This is due to already verifying the tags
-        # matched and the valid bit is high
-        with m.If(self.hit):
-            m.next = "FINISHED_READ"
-            # Pull out data from the read port
-            data = self.mem_array[self.encoder.o].data_o
-            m.d.comb += self.data_o.eq(data)
-            if not self.lfsr_mode:
-                self.access_plru(m)
-
-        # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k
-        with m.Elif(self.multiple_hit):
-            # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
-            m.d.comb += self.data_o.eq(0)
-
-        # No tag matches means no data
-        with m.Else():
-            # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
-            m.d.comb += self.data_o.eq(0)
-
-    def access_plru(self, m):
-        """ An entry was accessed and the plru tree must now be updated
-        """
-        # Pull out the set's entry being edited
-        plru_entry = self.plru_array[self.cset]
-        m.d.comb += [
-            # Set the plru data to the current state
-            self.plru.plru_tree.eq(plru_entry),
-            # Set that the cache was accessed
-            self.plru.lu_access_i.eq(1)
-        ]
-
-    def read(self, m):
-        """ Go through the read process of the cache.
-            This takes two cycles to complete. First it checks for a valid tag
-            and secondly it updates the LRU values.
-        """
-        with m.FSM() as fsm_read:
-            with m.State("READY"):
-                m.d.comb += self.ready.eq(0)
-                # check_tags will set the state if the conditions are met
-                self.check_tags(m)
-            with m.State("FINISHED_READ"):
-                m.next = "READY"
-                m.d.comb += self.ready.eq(1)
-                if not self.lfsr_mode:
-                    plru_tree_o = self.plru.plru_tree_o
-                    m.d.sync += self.plru_array[self.cset].eq(plru_tree_o)
-
-    def write_entry(self, m):
-        if not self.lfsr_mode:
-            m.d.comb += [# set cset (mem address) into PLRU
-                         self.plru.plru_tree.eq(self.plru_array[self.cset]),
-                         # and connect plru to encoder for write
-                         self.encoder.i.eq(self.plru.replace_en_o)
-                        ]
-            write_port = self.mem_array[self.encoder.o].w
-        else:
-            # use the LFSR to generate a random(ish) one of the mem array
-            lfsr_output = Signal(max=self.way_count)
-            lfsr_random = Signal(max=self.way_count)
-            m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits
-            # address too big, limit to range of array
-            m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count,
-                                           lfsr_output - self.way_count,
-                                           lfsr_output))
-            write_port = self.mem_array[lfsr_random].w
-
-        # then if there is a match from the encoder, enable the selected write
-        with m.If(self.encoder.single_match):
-            m.d.comb += write_port.en.eq(1)
-
-    def write(self, m):
-        """ Go through the write process of the cache.
-            This takes two cycles to complete. First it writes the entry,
-            and secondly it updates the PLRU (in plru mode)
-        """
-        with m.FSM() as fsm_write:
-            with m.State("READY"):
-                m.d.comb += self.ready.eq(0)
-                self.write_entry(m)
-                m.next ="FINISHED_WRITE"
-            with m.State("FINISHED_WRITE"):
-                m.d.comb += self.ready.eq(1)
-                if not self.lfsr_mode:
-                    plru_entry = self.plru_array[self.cset]
-                    m.d.sync += plru_entry.eq(self.plru.plru_tree_o)
-                m.next = "READY"
-
-
-    def elaborate(self, platform=None):
-        m = Module()
-
-        # ----
-        # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array
-        # ----
-
-        m.submodules.AddressEncoder = self.encoder
-        if self.lfsr_mode:
-            m.submodules.LFSR = self.lfsr
-        else:
-            m.submodules.PLRU = self.plru
-
-        for i, mem in enumerate(self.mem_array):
-            setattr(m.submodules, "mem%d" % i, mem)
-
-        # ----
-        # select mode: PLRU connect to encoder, LFSR do... something
-        # ----
-
-        if not self.lfsr_mode:
-            # Set what entry was hit
-            m.d.comb += self.plru.lu_hit.eq(self.encoder.o)
-        else:
-            # enable LFSR
-            m.d.comb += self.lfsr.enable.eq(self.enable)
-
-        # ----
-        # connect hit/multiple hit to encoder output
-        # ----
-
-        m.d.comb += [
-            self.hit.eq(self.encoder.single_match),
-            self.multiple_hit.eq(self.encoder.multiple_match),
-        ]
-
-        # ----
-        # connect incoming data/tag/cset(addr) to mem_array
-        # ----
-
-        for mem in self.mem_array:
-            write_port = mem.w
-            m.d.comb += [mem.cset.eq(self.cset),
-                         mem.tag.eq(self.tag),
-                         mem.data_i.eq(self.data_i),
-                         write_port.en.eq(0), # default: disable write
-                        ]
-        # ----
-        # Commands: READ/WRITE/TODO
-        # ----
-
-        with m.If(self.enable):
-            with m.Switch(self.command):
-                # Search all sets at a particular tag
-                with m.Case(SA_RD):
-                    self.read(m)
-                with m.Case(SA_WR):
-                    self.write(m)
-                    # Maybe catch multiple tags write here?
-                    # TODO
-                # TODO: invalidate/flush, flush-all?
-
-        return m
-
-    def ports(self):
-        return [self.enable, self.command, self.cset, self.tag, self.data_i,
-                self.ready, self.hit, self.multiple_hit, self.data_o]
-
-
-if __name__ == '__main__':
-    sac = SetAssociativeCache(4, 8, 4, 6)
-    vl = rtlil.convert(sac, ports=sac.ports())
-    with open("SetAssociativeCache.il", "w") as f:
-        f.write(vl)
-
-    sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True)
-    vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports())
-    with open("SetAssociativeCacheLFSR.il", "w") as f:
-        f.write(vl)
diff --git a/src/TLB/src/TLB.py b/src/TLB/src/TLB.py
deleted file mode 100644 (file)
index 3538bdc..0000000
+++ /dev/null
@@ -1,173 +0,0 @@
-""" TLB Module
-
-    The expected form of the data is:
-    * Item (Bits)
-    * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0)
-"""
-
-from nmigen import Memory, Module, Signal, Cat
-from nmigen.cli import main
-
-from PermissionValidator import PermissionValidator
-from Cam import Cam
-
-class TLB():
-    def __init__(self, asid_size, vma_size, pte_size, L1_size):
-        """ Arguments
-            * asid_size: Address Space IDentifier (ASID) typically 15 bits
-            * vma_size: Virtual Memory Address (VMA) typically 36 bits
-            * pte_size: Page Table Entry (PTE) typically 64 bits
-
-            Notes:
-            These arguments should represent the largest possible size
-            defined by the MODE settings. See
-            Volume II: RISC-V Privileged Architectures V1.10 Page 57
-        """
-
-        # Internal
-        self.state = 0
-        # L1 Cache Modules
-        L1_size = 8 # XXX overridden incoming argument?
-        self.cam_L1 = Cam(vma_size, L1_size)
-        self.mem_L1 = Memory(asid_size + pte_size, L1_size)
-
-        # Permission Validator
-        self.perm_validator = PermissionValidator(asid_size, pte_size)
-
-        # Inputs
-        self.supermode = Signal(1) # Supervisor Mode
-        self.super_access = Signal(1) # Supervisor Access
-        self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
-        self.xwr = Signal(3) # Execute, Write, Read
-        self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
-        self.address_L1 = Signal(max=L1_size)
-        self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
-        self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
-        self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
-
-        # Outputs
-        self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
-        self.perm_valid = Signal(1) # Denotes if the permissions are correct
-        self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
-
-    def search(self, m, read_L1, write_L1):
-        """ searches the TLB
-        """
-        m.d.comb += [
-            write_L1.en.eq(0),
-            self.cam_L1.write_enable.eq(0),
-            self.cam_L1.data_in.eq(self.vma)
-        ]
-        # Match found in L1 CAM
-        match_found = Signal(reset_less=True)
-        m.d.comb += match_found.eq(self.cam_L1.single_match
-                              | self.cam_L1.multiple_match)
-        with m.If(match_found):
-            # Memory shortcut variables
-            mem_address = self.cam_L1.match_address
-            # Memory Logic
-            m.d.comb += read_L1.addr.eq(mem_address)
-            # Permission Validator Logic
-            m.d.comb += [
-                self.hit.eq(1),
-                # Set permission validator data to the correct
-                # register file data according to CAM match
-                # address
-                self.perm_validator.data.eq(read_L1.data),
-                # Execute, Read, Write
-                self.perm_validator.xwr.eq(self.xwr),
-                # Supervisor Mode
-                self.perm_validator.super_mode.eq(self.supermode),
-                # Supverisor Access
-                self.perm_validator.super_access.eq(self.super_access),
-                # Address Space IDentifier (ASID)
-                self.perm_validator.asid.eq(self.asid),
-                # Output result of permission validation
-                self.perm_valid.eq(self.perm_validator.valid)
-            ]
-            # Only output PTE if permissions are valid
-            with m.If(self.perm_validator.valid):
-                # XXX TODO - dummy for now
-                reg_data = Signal.like(self.pte_out)
-                m.d.comb += [
-                    self.pte_out.eq(reg_data)
-                ]
-            with m.Else():
-                m.d.comb += [
-                    self.pte_out.eq(0)
-                ]
-        # Miss Logic
-        with m.Else():
-            m.d.comb += [
-                self.hit.eq(0),
-                self.perm_valid.eq(0),
-                self.pte_out.eq(0)
-            ]
-
-    def write_l1(self, m, read_L1, write_L1):
-        """ writes to the L1 cache
-        """
-        # Memory_L1 Logic
-        m.d.comb += [
-            write_L1.en.eq(1),
-            write_L1.addr.eq(self.address_L1),
-            # The Cat places arguments from LSB -> MSB
-            write_L1.data.eq(Cat(self.pte_in, self.asid))
-        ]
-        # CAM_L1 Logic
-        m.d.comb += [
-            self.cam_L1.write_enable.eq(1),
-            self.cam_L1.data_in.eq(self.vma),
-        ]
-
-    def elaborate(self, platform):
-        m = Module()
-        # Add submodules
-        # Submodules for L1 Cache
-        m.d.submodules.cam_L1 = self.cam_L1
-        m.d.sumbmodules.read_L1 = read_L1 = self.mem_L1.read_port()
-        m.d.sumbmodules.read_L1 = write_L1 = self.mem_L1.write_port()
-        # Permission Validator Submodule
-        m.d.submodules.perm_valididator = self.perm_validator
-
-        # When MODE specifies translation
-        # TODO add in different bit length handling ie prefix 0s
-        tlb_enable = Signal(reset_less=True)
-        m.d.comb += tlb_enable.eq(self.mode != 0)
-
-        with m.If(tlb_enable):
-            m.d.comb += [
-                self.cam_L1.enable.eq(1)
-            ]
-            with m.Switch(self.command):
-                # Search
-                with m.Case("01"):
-                    self.search(m, read_L1, write_L1)
-
-                # Write L1
-                # Expected that the miss will be handled in software
-                with m.Case("10"):
-                    self.write_l1(m, read_L1, write_L1)
-
-                # TODO
-                #with m.Case("11"):
-
-        # When disabled
-        with m.Else():
-            m.d.comb += [
-                self.cam_L1.enable.eq(0),
-                # XXX TODO - self.reg_file.enable.eq(0),
-                self.hit.eq(0),
-                self.perm_valid.eq(0), # XXX TODO, check this
-                self.pte_out.eq(0)
-            ]
-        return m
-
-
-if __name__ == '__main__':
-    tlb = TLB(15, 36, 64, 4)
-    main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command,
-        tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid,
-        tlb.vma, tlb.pte_in,
-        tlb.hit, tlb.perm_valid, tlb.pte_out,
-        ] + tlb.cam_L1.ports())
diff --git a/src/TLB/src/__init__.py b/src/TLB/src/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/src/TLB/src/ariane/TreePLRU.cpp b/src/TLB/src/ariane/TreePLRU.cpp
deleted file mode 100644 (file)
index 2f6aeea..0000000
+++ /dev/null
@@ -1,211 +0,0 @@
-#include <cstdint>
-#include <iostream>
-#include <cmath>
-
-
-#define NWAY 4
-#define NLINE 256
-#define HIT 0
-#define MISS 1
-#define MS 1000
-/*
-Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing
-Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt
-four-way set associative - three bits
-   each bit represents one branch point in a binary decision tree; let 1
-   represent that the left side has been referenced more recently than the
-   right side, and 0 vice-versa
-              are all 4 lines valid?
-                   /       \
-                 yes        no, use an invalid line
-                  |
-                  |
-                  |
-             bit_0 == 0?            state | replace      ref to | next state
-              /       \             ------+--------      -------+-----------
-             y         n             00x  |  line_0      line_0 |    11_
-            /           \            01x  |  line_1      line_1 |    10_
-     bit_1 == 0?    bit_2 == 0?      1x0  |  line_2      line_2 |    0_1
-       /    \          /    \        1x1  |  line_3      line_3 |    0_0
-      y      n        y      n
-     /        \      /        \        ('x' means       ('_' means unchanged)
-   line_0  line_1  line_2  line_3      don't care)
- 8-way set associative - 7  = 1+2+4 bits
-16-way set associative - 15 = 1+2+4+8 bits
-32-way set associative - 31 = 1+2+4+8+16 bits
-64-way set associative - 63 = 1+2+4+8+16+32 bits
-*/
-using namespace std;
-struct AddressField {
-    uint64_t wd_idx : 2;//Unused
-    uint64_t offset : 4;//Unused
-    uint64_t index  : 8;//NLINE = 256 = 2^8
-    uint64_t tag    : 50;
-};
-
-union Address {
-    uint32_t* p;
-    AddressField fields;
-};
-
-struct Cell {
-    bool v;
-    uint64_t tag;
-
-    Cell() : v(false), tag(0) {}
-
-    bool isHit(uint64_t tag) {
-        return v && (tag == this->tag);
-    }
-
-    void fetch(uint32_t* address) {
-        Address addr;
-        addr.p = address;
-        addr.fields.offset = 0;
-        addr.fields.wd_idx = 0;
-        tag = addr.fields.tag;
-        v = true;
-    }
-};
-
-ostream& operator<<(ostream & out, const Cell& cell) {
-    out << " v:" << cell.v << " tag:" << hex << cell.tag;
-    return out;
-}
-
-struct Block {
-    Cell cell[NWAY];
-    uint32_t state;
-    uint64_t *mask;//Mask the state to get accurate value for specified 1 bit.
-    uint64_t *value;
-    uint64_t *next_value;
-
-    Block() : state(0) {
-        switch (NWAY) {
-            case 4:
-                mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101};
-                value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101};
-                next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000};
-                break;
-            case 8:
-                mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001,
-                                       0b1010001};
-                value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000,
-                                        0b1010001};
-                next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000,
-                                             0b0000001, 0b0000000};
-                break;
-                //TODO - more NWAY goes here.
-            default:
-                std::cout << "Error definition NWAY = " << NWAY << std::endl;
-        }
-    }
-
-    uint32_t *getByTag(uint64_t tag, uint32_t *pway) {
-        for (int i = 0; i < NWAY; ++i) {
-            if (cell[i].isHit(tag)) {
-                *pway = i;
-                return pway;
-            }
-        }
-        return NULL;
-    }
-
-    void setLRU(uint32_t *address) {
-        int way = 0;
-        uint32_t st = state;
-        for (int i = 0; i < NWAY; ++i) {
-            if ((state & mask[i]) == value[i]) {
-                state ^= mask[i];
-                way = i;
-                break;
-            }
-        }
-        cell[way].fetch(address);
-        cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl;
-    }
-
-    uint32_t *get(uint32_t *address, uint32_t *pway) {
-        Address addr;
-        addr.p = address;
-        uint32_t *d = getByTag(addr.fields.tag, pway);
-        if (d != NULL) {
-            return &d[addr.fields.offset];
-        }
-        return d;
-    }
-
-    int set(uint32_t *address) {
-        uint32_t way = 0;
-        uint32_t *p = get(address, &way);
-        if (p != NULL) {
-            printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state);
-            state &= ~mask[way];
-            printf("%X --> ", state);
-            state |= next_value[way];
-            printf("%X\n", state);
-            // *p = *address; //skip since address is fake.
-            return HIT;
-        } else {
-            setLRU(address);
-            return MISS;
-        }
-    }
-};
-
-ostream& operator<<(ostream & out, const Block& block) {
-    out << "state:" << block.state << " ";
-    for (int i = 0; i<NWAY; i++) {
-        out << block.cell[i];
-    }
-    return out;
-}
-
-struct Cache {
-    Block block[NLINE];
-    uint32_t count[2];
-    Cache() { count[HIT] = 0; count[MISS] = 0; }
-
-    void access(uint32_t* address) {
-        Address addr;
-        addr.p = address;
-        Block& b = block[addr.fields.index];
-        ++count[b.set(address)];
-    }
-
-};
-ostream& operator<<(ostream & out, const Cache& cache) {
-    out << "\n==Summary==\n\tHit: " << cache.count[HIT] <<  " Miss: " << cache.count[MISS] << std::endl;
-    for (int i = 0; i < NLINE; i++) {
-        out << cache.block[i] << endl;
-    }
-    return out;
-}
-
-Cache cache;
-void multiply(uint32_t* m1, uint32_t* m2, uint32_t* res)
-{
-    int x, i, j;
-    for (i = 0; i < MS; i++) {
-        for (j = 0; j < MS; j++) {
-            cache.access(res + i*MS +j);
-            for (x = 0; x < MS; x++) {
-                cache.access(m1 + i*MS + x);
-                cache.access(m2 + x*MS + j);
-                cache.access(res + i*MS +j);
-                // res[i][j] += m1[i][x] * m2[x][j];
-                cache.access(res + i*MS +j);
-            }
-        }
-    }
-}
-
-int main()
-{
-    uint32_t* m1 = (uint32_t*) 0xFACE00A000000000LL;  // fake virtual address; don’t access it
-    uint32_t* m2 = (uint32_t*) 0xFACE00B000000000LL;  // fake virtual address; don’t access it
-    uint32_t* res =  (uint32_t*) 0xFACE00C000000000LL; // fake virtual address; don’t access it
-    multiply(m1, m2, res);
-    cout << cache << endl;
-    return 0;
-}
diff --git a/src/TLB/src/ariane/p_lru.txt b/src/TLB/src/ariane/p_lru.txt
deleted file mode 100644 (file)
index 4bac768..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-pseudo-LRU
-
-two-way set associative - one bit
-
-   indicates which line of the two has been reference more recently
-
-
-four-way set associative - three bits
-
-   each bit represents one branch point in a binary decision tree; let 1
-   represent that the left side has been referenced more recently than the
-   right side, and 0 vice-versa
-
-              are all 4 lines valid?
-                   /       \
-                 yes        no, use an invalid line
-                  |
-                  |
-                  |
-             bit_0 == 0?            state | replace      ref to | next state
-              /       \             ------+--------      -------+-----------
-             y         n             00x  |  line_0      line_0 |    11_
-            /           \            01x  |  line_1      line_1 |    10_
-     bit_1 == 0?    bit_2 == 0?      1x0  |  line_2      line_2 |    0_1
-       /    \          /    \        1x1  |  line_3      line_3 |    0_0
-      y      n        y      n
-     /        \      /        \        ('x' means       ('_' means unchanged)
-   line_0  line_1  line_2  line_3      don't care)
-
-   (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev.
-    Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm)
-
-
-note that there is a 6-bit encoding for true LRU for four-way set associative
-
-  bit 0: bank[1] more recently used than bank[0]
-  bit 1: bank[2] more recently used than bank[0]
-  bit 2: bank[2] more recently used than bank[1]
-  bit 3: bank[3] more recently used than bank[0]
-  bit 4: bank[3] more recently used than bank[1]
-  bit 5: bank[3] more recently used than bank[2]
-
-  this results in 24 valid bit patterns within the 64 possible bit patterns
-  (4! possible valid traces for bank references)
-
-  e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111
-
-  you can implement a state machine with a 256x6 ROM (6-bit state encoding
-  appended with a 2-bit bank reference input will yield a new 6-bit state),
-  and you can implement an LRU bank indicator with a 64x2 ROM
-
diff --git a/src/TLB/src/ariane/src/exceptcause.py b/src/TLB/src/ariane/src/exceptcause.py
deleted file mode 100644 (file)
index 4c5cb2d..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-from nmigen import Const
-
-INSTR_ADDR_MISALIGNED = Const(0, 64)
-INSTR_ACCESS_FAULT    = Const(1, 64)
-ILLEGAL_INSTR         = Const(2, 64)
-BREAKPOINT            = Const(3, 64)
-LD_ADDR_MISALIGNED    = Const(4, 64)
-LD_ACCESS_FAULT       = Const(5, 64)
-ST_ADDR_MISALIGNED    = Const(6, 64)
-ST_ACCESS_FAULT       = Const(7, 64)
-ENV_CALL_UMODE        = Const(8, 64)  # environment call from user mode
-ENV_CALL_SMODE        = Const(9, 64)  # environment call from supervisor mode
-ENV_CALL_MMODE        = Const(11, 64) # environment call from machine mode
-INSTR_PAGE_FAULT      = Const(12, 64) # Instruction page fault
-LOAD_PAGE_FAULT       = Const(13, 64) # Load page fault
-STORE_PAGE_FAULT      = Const(15, 64) # Store page fault
diff --git a/src/TLB/src/ariane/src/mmu.py b/src/TLB/src/ariane/src/mmu.py
deleted file mode 100644 (file)
index c0739cc..0000000
+++ /dev/null
@@ -1,462 +0,0 @@
-"""
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License.  You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Author: Florian Zaruba, ETH Zurich
-# Date: 19/04/2017
-# Description: Memory Management Unit for Ariane, contains TLB and
-#              address translation unit. SV39 as defined in RISC-V
-#              privilege specification 1.11-WIP
-
-import ariane_pkg::*;
-"""
-
-from nmigen import Const, Signal, Cat, Module, Mux
-from nmigen.cli import verilog, rtlil
-
-from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW
-from tlb import TLB
-from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT,
-                         LOAD_PAGE_FAULT, STORE_PAGE_FAULT)
-
-PRIV_LVL_M = Const(0b11, 2)
-PRIV_LVL_S = Const(0b01, 2)
-PRIV_LVL_U = Const(0b00, 2)
-
-
-class RVException:
-    def __init__(self):
-         self.cause = Signal(64) # cause of exception
-         self.tval = Signal(64) # more info of causing exception
-                                # (e.g.: instruction causing it),
-                                #        address of LD/ST fault
-         self.valid = Signal()
-
-    def eq(self, inp):
-        res = []
-        for (o, i) in zip(self.ports(), inp.ports()):
-            res.append(o.eq(i))
-        return res
-
-    def __iter__(self):
-        yield self.cause
-        yield self.tval
-        yield self.valid
-
-    def ports(self):
-        return list(self)
-
-
-class ICacheReqI:
-    def __init__(self):
-        self.fetch_valid = Signal()   # address translation valid
-        self.fetch_paddr = Signal(64) # physical address in
-        self.fetch_exception = RVException() # exception occurred during fetch
-
-    def __iter__(self):
-        yield self.fetch_valid
-        yield self.fetch_paddr
-        yield from self.fetch_exception
-
-    def ports(self):
-        return list(self)
-
-
-class ICacheReqO:
-    def __init__(self):
-        self.fetch_req = Signal()     # address translation request
-        self.fetch_vaddr = Signal(64) # virtual address out
-
-    def __iter__(self):
-        yield self.fetch_req
-        yield self.fetch_vaddr
-
-    def ports(self):
-        return list(self)
-
-
-class MMU:
-    def __init__(self, instr_tlb_entries = 4,
-                       data_tlb_entries  = 4,
-                       asid_width        = 1):
-        self.instr_tlb_entries = instr_tlb_entries
-        self.data_tlb_entries = data_tlb_entries
-        self.asid_width = asid_width
-
-        self.flush_i = Signal()
-        self.enable_translation_i = Signal()
-        self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST
-        # IF interface
-        self.icache_areq_i = ICacheReqO()
-        self.icache_areq_o = ICacheReqI()
-        # LSU interface
-        # this is a more minimalistic interface because the actual addressing
-        # logic is handled in the LSU as we distinguish load and stores,
-        # what we do here is simple address translation
-        self.misaligned_ex_i = RVException()
-        self.lsu_req_i = Signal()   # request address translation
-        self.lsu_vaddr_i = Signal(64) # virtual address in
-        self.lsu_is_store_i = Signal() # the translation is requested by a store
-        # if we need to walk the page table we can't grant in the same cycle
-
-        # Cycle 0
-        self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request
-                                       # if translation hits in the DTLB
-        # Cycle 1
-        self.lsu_valid_o = Signal()  # translation is valid
-        self.lsu_paddr_o = Signal(64) # translated address
-        self.lsu_exception_o = RVException() # addr translate threw exception
-
-        # General control signals
-        self.priv_lvl_i = Signal(2)
-        self.ld_st_priv_lvl_i = Signal(2)
-        self.sum_i = Signal()
-        self.mxr_i = Signal()
-        # input logic flag_mprv_i,
-        self.satp_ppn_i = Signal(44)
-        self.asid_i = Signal(self.asid_width)
-        self.flush_tlb_i = Signal()
-        # Performance counters
-        self.itlb_miss_o = Signal()
-        self.dtlb_miss_o = Signal()
-        # PTW memory interface
-        self.req_port_i = DCacheReqO()
-        self.req_port_o = DCacheReqI()
-
-    def elaborate(self, platform):
-        m = Module()
-
-        iaccess_err = Signal()   # insufficient priv to access instr page
-        daccess_err = Signal()   # insufficient priv to access data page
-        ptw_active = Signal()    # PTW is currently walking a page table
-        walking_instr = Signal() # PTW is walking because of an ITLB miss
-        ptw_error = Signal()     # PTW threw an exception
-
-        update_vaddr = Signal(39)
-        uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros
-        update_ptw_itlb = TLBUpdate(self.asid_width)
-        update_ptw_dtlb = TLBUpdate(self.asid_width)
-
-        itlb_lu_access = Signal()
-        itlb_content = PTE()
-        itlb_is_2M = Signal()
-        itlb_is_1G = Signal()
-        itlb_lu_hit = Signal()
-
-        dtlb_lu_access = Signal()
-        dtlb_content = PTE()
-        dtlb_is_2M = Signal()
-        dtlb_is_1G = Signal()
-        dtlb_lu_hit = Signal()
-
-        # Assignments
-        m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req),
-                     dtlb_lu_access.eq(self.lsu_req_i)
-                    ]
-
-        # ITLB
-        m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries,
-                                         self.asid_width)
-        m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i),
-                     i_tlb.update_i.eq(update_ptw_itlb),
-                     i_tlb.lu_access_i.eq(itlb_lu_access),
-                     i_tlb.lu_asid_i.eq(self.asid_i),
-                     i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
-                     itlb_content.eq(i_tlb.lu_content_o),
-                     itlb_is_2M.eq(i_tlb.lu_is_2M_o),
-                     itlb_is_1G.eq(i_tlb.lu_is_1G_o),
-                     itlb_lu_hit.eq(i_tlb.lu_hit_o),
-                    ]
-
-        # DTLB
-        m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries,
-                                         self.asid_width)
-        m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i),
-                     d_tlb.update_i.eq(update_ptw_dtlb),
-                     d_tlb.lu_access_i.eq(dtlb_lu_access),
-                     d_tlb.lu_asid_i.eq(self.asid_i),
-                     d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i),
-                     dtlb_content.eq(d_tlb.lu_content_o),
-                     dtlb_is_2M.eq(d_tlb.lu_is_2M_o),
-                     dtlb_is_1G.eq(d_tlb.lu_is_1G_o),
-                     dtlb_lu_hit.eq(d_tlb.lu_hit_o),
-                    ]
-
-        # PTW
-        m.submodules.ptw = ptw = PTW(self.asid_width)
-        m.d.comb += [ptw_active.eq(ptw.ptw_active_o),
-                     walking_instr.eq(ptw.walking_instr_o),
-                     ptw_error.eq(ptw.ptw_error_o),
-                     ptw.enable_translation_i.eq(self.enable_translation_i),
-
-                     update_vaddr.eq(ptw.update_vaddr_o),
-                     update_ptw_itlb.eq(ptw.itlb_update_o),
-                     update_ptw_dtlb.eq(ptw.dtlb_update_o),
-
-                     ptw.itlb_access_i.eq(itlb_lu_access),
-                     ptw.itlb_hit_i.eq(itlb_lu_hit),
-                     ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
-
-                     ptw.dtlb_access_i.eq(dtlb_lu_access),
-                     ptw.dtlb_hit_i.eq(dtlb_lu_hit),
-                     ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i),
-
-                     ptw.req_port_i.eq(self.req_port_i),
-                     self.req_port_o.eq(ptw.req_port_o),
-                    ]
-
-        # ila_1 i_ila_1 (
-        #     .clk(clk_i), # input wire clk
-        #     .probe0({req_port_o.address_tag, req_port_o.address_index}),
-        #     .probe1(req_port_o.data_req), # input wire [63:0]  probe1
-        #     .probe2(req_port_i.data_gnt), # input wire [0:0]  probe2
-        #     .probe3(req_port_i.data_rdata), # input wire [0:0]  probe3
-        #     .probe4(req_port_i.data_rvalid), # input wire [0:0]  probe4
-        #     .probe5(ptw_error), # input wire [1:0]  probe5
-        #     .probe6(update_vaddr), # input wire [0:0]  probe6
-        #     .probe7(update_ptw_itlb.valid), # input wire [0:0]  probe7
-        #     .probe8(update_ptw_dtlb.valid), # input wire [0:0]  probe8
-        #     .probe9(dtlb_lu_access), # input wire [0:0]  probe9
-        #     .probe10(lsu_vaddr_i), # input wire [0:0]  probe10
-        #     .probe11(dtlb_lu_hit), # input wire [0:0]  probe11
-        #     .probe12(itlb_lu_access), # input wire [0:0]  probe12
-        #     .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0]  probe13
-        #     .probe14(itlb_lu_hit) # input wire [0:0]  probe13
-        # );
-
-        #-----------------------
-        # Instruction Interface
-        #-----------------------
-        # The instruction interface is a simple request response interface
-
-        # MMU disabled: just pass through
-        m.d.comb += [self.icache_areq_o.fetch_valid.eq(
-                                                self.icache_areq_i.fetch_req),
-                     # play through in case we disabled address translation
-                     self.icache_areq_o.fetch_paddr.eq(
-                                                self.icache_areq_i.fetch_vaddr)
-                    ]
-        # two potential exception sources:
-        # 1. HPTW threw an exception -> signal with a page fault exception
-        # 2. We got an access error because of insufficient permissions ->
-        #    throw an access exception
-        m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0)
-        # Check whether we are allowed to access this memory region
-        # from a fetch perspective
-
-        # XXX TODO: use PermissionValidator instead [we like modules]
-        m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \
-                                   (((self.priv_lvl_i == PRIV_LVL_U) & \
-                                      ~itlb_content.u) | \
-                                   ((self.priv_lvl_i == PRIV_LVL_S) & \
-                                    itlb_content.u)))
-
-        # MMU enabled: address from TLB, request delayed until hit.
-        # Error when TLB hit and no access right or TLB hit and
-        # translated address not valid (e.g.  AXI decode error),
-        # or when PTW performs walk due to ITLB miss and raises
-        # an error.
-        with m.If (self.enable_translation_i):
-            # we work with SV39, so if VM is enabled, check that
-            # all bits [63:38] are equal
-            with m.If (self.icache_areq_i.fetch_req & \
-                ~(((~self.icache_areq_i.fetch_vaddr[38:64]) == 0) | \
-                 (self.icache_areq_i.fetch_vaddr[38:64]) == 0)):
-                fe = self.icache_areq_o.fetch_exception
-                m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
-                             fe.tval.eq(self.icache_areq_i.fetch_vaddr),
-                             fe.valid.eq(1)
-                            ]
-
-            m.d.comb += self.icache_areq_o.fetch_valid.eq(0)
-
-            # 4K page
-            paddr = Signal.like(self.icache_areq_o.fetch_paddr)
-            paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12],
-                          itlb_content.ppn)
-            m.d.comb += paddr.eq(paddr4k)
-            # Mega page
-            with m.If(itlb_is_2M):
-                m.d.comb += paddr[12:21].eq(
-                          self.icache_areq_i.fetch_vaddr[12:21])
-            # Giga page
-            with m.If(itlb_is_1G):
-                m.d.comb += paddr[12:30].eq(
-                          self.icache_areq_i.fetch_vaddr[12:30])
-            m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
-
-            # ---------
-            # ITLB Hit
-            # --------
-            # if we hit the ITLB output the request signal immediately
-            with m.If(itlb_lu_hit):
-                m.d.comb += self.icache_areq_o.fetch_valid.eq(
-                                          self.icache_areq_i.fetch_req)
-                # we got an access error
-                with m.If (iaccess_err):
-                    # throw a page fault
-                    fe = self.icache_areq_o.fetch_exception
-                    m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
-                                 fe.tval.eq(self.icache_areq_i.fetch_vaddr),
-                                 fe.valid.eq(1)
-                                ]
-            # ---------
-            # ITLB Miss
-            # ---------
-            # watch out for exceptions happening during walking the page table
-            with m.Elif(ptw_active & walking_instr):
-                m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error)
-                fe = self.icache_areq_o.fetch_exception
-                m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT),
-                             fe.tval.eq(uaddr64),
-                             fe.valid.eq(1)
-                            ]
-
-        #-----------------------
-        # Data Interface
-        #-----------------------
-
-        lsu_vaddr = Signal(64)
-        dtlb_pte = PTE()
-        misaligned_ex = RVException()
-        lsu_req = Signal()
-        lsu_is_store = Signal()
-        dtlb_hit = Signal()
-        dtlb_is_2M = Signal()
-        dtlb_is_1G = Signal()
-
-        # check if we need to do translation or if we are always
-        # ready (e.g.: we are not translating anything)
-        m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i,
-                                          dtlb_lu_hit, 1))
-
-        # The data interface is simpler and only consists of a
-        # request/response interface
-        m.d.comb += [
-            # save request and DTLB response
-            lsu_vaddr.eq(self.lsu_vaddr_i),
-            lsu_req.eq(self.lsu_req_i),
-            misaligned_ex.eq(self.misaligned_ex_i),
-            dtlb_pte.eq(dtlb_content),
-            dtlb_hit.eq(dtlb_lu_hit),
-            lsu_is_store.eq(self.lsu_is_store_i),
-            dtlb_is_2M.eq(dtlb_is_2M),
-            dtlb_is_1G.eq(dtlb_is_1G),
-        ]
-        m.d.sync += [
-            self.lsu_paddr_o.eq(lsu_vaddr),
-            self.lsu_valid_o.eq(lsu_req),
-            self.lsu_exception_o.eq(misaligned_ex),
-        ]
-
-        sverr = Signal()
-        usrerr = Signal()
-
-        m.d.comb += [
-            # mute misaligned exceptions if there is no request
-            # otherwise they will throw accidental exceptions
-            misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i),
-
-            # SUM is not set and we are trying to access a user
-            # page in supervisor mode
-            sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \
-                       dtlb_pte.u),
-            # this is not a user page but we are in user mode and
-            # trying to access it
-            usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u),
-
-            # Check if the User flag is set, then we may only
-            # access it in supervisor mode if SUM is enabled
-            daccess_err.eq(sverr | usrerr),
-            ]
-
-        # translation is enabled and no misaligned exception occurred
-        with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid):
-            m.d.comb += lsu_req.eq(0)
-            # 4K page
-            paddr = Signal.like(lsu_vaddr)
-            paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn)
-            m.d.comb += paddr.eq(paddr4k)
-            # Mega page
-            with m.If(dtlb_is_2M):
-                m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21])
-            # Giga page
-            with m.If(dtlb_is_1G):
-                m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30])
-            m.d.sync += self.lsu_paddr_o.eq(paddr)
-
-            # ---------
-            # DTLB Hit
-            # --------
-            with m.If(dtlb_hit & lsu_req):
-                m.d.comb += lsu_req.eq(1)
-                # this is a store
-                with m.If (lsu_is_store):
-                    # check if the page is write-able and
-                    # we are not violating privileges
-                    # also check if the dirty flag is set
-                    with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d):
-                        le = self.lsu_exception_o
-                        m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
-                                     le.tval.eq(lsu_vaddr),
-                                     le.valid.eq(1)
-                                    ]
-
-                # this is a load, check for sufficient access
-                # privileges - throw a page fault if necessary
-                with m.Elif(daccess_err):
-                    le = self.lsu_exception_o
-                    m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
-                                 le.tval.eq(lsu_vaddr),
-                                 le.valid.eq(1)
-                                ]
-            # ---------
-            # DTLB Miss
-            # ---------
-            # watch out for exceptions
-            with m.Elif (ptw_active & ~walking_instr):
-                # page table walker threw an exception
-                with m.If (ptw_error):
-                    # an error makes the translation valid
-                    m.d.comb += lsu_req.eq(1)
-                    # the page table walker can only throw page faults
-                    with m.If (lsu_is_store):
-                        le = self.lsu_exception_o
-                        m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
-                                     le.tval.eq(uaddr64),
-                                     le.valid.eq(1)
-                                    ]
-                    with m.Else():
-                        m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
-                                     le.tval.eq(uaddr64),
-                                     le.valid.eq(1)
-                                    ]
-
-        return m
-
-    def ports(self):
-        return [self.flush_i, self.enable_translation_i,
-                self.en_ld_st_translation_i,
-                self.lsu_req_i,
-                self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o,
-                self.lsu_valid_o, self.lsu_paddr_o,
-                self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i,
-                self.satp_ppn_i, self.asid_i, self.flush_tlb_i,
-                self.itlb_miss_o, self.dtlb_miss_o] + \
-                self.icache_areq_i.ports() + self.icache_areq_o.ports() + \
-                self.req_port_i.ports() + self.req_port_o.ports() + \
-                self.misaligned_ex_i.ports() + self.lsu_exception_o.ports()
-
-if __name__ == '__main__':
-    mmu = MMU()
-    vl = rtlil.convert(mmu, ports=mmu.ports())
-    with open("test_mmu.il", "w") as f:
-        f.write(vl)
-
diff --git a/src/TLB/src/ariane/src/plru.py b/src/TLB/src/ariane/src/plru.py
deleted file mode 100644 (file)
index 95d515c..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-from nmigen import Signal, Module, Cat, Const
-from nmigen.hdl.ir import Elaboratable
-from math import log2
-
-from ptw import TLBUpdate, PTE, ASID_WIDTH
-
-class PLRU(Elaboratable):
-    """ PLRU - Pseudo Least Recently Used Replacement
-
-        PLRU-tree indexing:
-        lvl0        0
-                   / \
-                  /   \
-        lvl1     1     2
-                / \   / \
-        lvl2   3   4 5   6
-              / \ /\/\  /\
-             ... ... ... ...
-    """
-    def __init__(self, entries):
-        self.entries = entries
-        self.lu_hit = Signal(entries)
-        self.replace_en_o = Signal(entries)
-        self.lu_access_i = Signal()
-        # Tree (bit per entry)
-        self.TLBSZ = 2*(self.entries-1)
-        self.plru_tree = Signal(self.TLBSZ)
-        self.plru_tree_o = Signal(self.TLBSZ)
-
-    def elaborate(self, platform=None):
-        m = Module()
-
-        # Just predefine which nodes will be set/cleared
-        # E.g. for a TLB with 8 entries, the for-loop is semantically
-        # equivalent to the following pseudo-code:
-        # unique case (1'b1)
-        # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1};
-        # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0};
-        # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1};
-        # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0};
-        # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1};
-        # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0};
-        # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1};
-        # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0};
-        # default: begin /* No hit */ end
-        # endcase
-        LOG_TLB = int(log2(self.entries))
-        print(LOG_TLB)
-        for i in range(self.entries):
-            # we got a hit so update the pointer as it was least recently used
-            hit = Signal(reset_less=True)
-            m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i)
-            with m.If(hit):
-                # Set the nodes to the values we would expect
-                for lvl in range(LOG_TLB):
-                    idx_base = (1<<lvl)-1
-                    # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
-                    shift = LOG_TLB - lvl;
-                    new_idx = Const(~((i >> (shift-1)) & 1), (1, False))
-                    plru_idx = idx_base + (i >> shift)
-                    print ("plru", i, lvl, hex(idx_base),
-                                  plru_idx, shift, new_idx)
-                    m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx)
-
-        # Decode tree to write enable signals
-        # Next for-loop basically creates the following logic for e.g.
-        # an 8 entry TLB (note: pseudo-code obviously):
-        # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1}
-        # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0}
-        # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1}
-        # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0}
-        # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1}
-        # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0}
-        # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1}
-        # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0}
-        # For each entry traverse the tree. If every tree-node matches
-        # the corresponding bit of the entry's index, this is
-        # the next entry to replace.
-        replace = []
-        for i in range(self.entries):
-            en = []
-            for lvl in range(LOG_TLB):
-                idx_base = (1<<lvl)-1
-                # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
-                shift = LOG_TLB - lvl;
-                new_idx = (i >> (shift-1)) & 1;
-                plru_idx = idx_base + (i>>shift)
-                plru = Signal(reset_less=True,
-                              name="plru-%d-%d-%d" % (i, lvl, plru_idx))
-                m.d.comb += plru.eq(self.plru_tree[plru_idx])
-                # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx;
-                if new_idx:
-                    en.append(~plru) # yes inverted (using bool())
-                else:
-                    en.append(plru)  # yes inverted (using bool())
-            print ("plru", i, en)
-            # boolean logic manipulation:
-            # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2)
-            replace.append(~Cat(*en).bool())
-        m.d.comb += self.replace_en_o.eq(Cat(*replace))
-
-        return m
-
-    def ports(self):
-        return [self.entries, self.lu_hit, self.replace_en_o,
-                self.lu_access_i, self.plru_tree, self.plru_tree_o]
\ No newline at end of file
diff --git a/src/TLB/src/ariane/src/ptw.py b/src/TLB/src/ariane/src/ptw.py
deleted file mode 100644 (file)
index 05ec2d7..0000000
+++ /dev/null
@@ -1,539 +0,0 @@
-"""
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License.  You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Author: David Schaffenrath, TU Graz
-# Author: Florian Zaruba, ETH Zurich
-# Date: 24.4.2017
-# Description: Hardware-PTW
-
-/* verilator lint_off WIDTH */
-import ariane_pkg::*;
-
-see linux kernel source:
-
-* "arch/riscv/include/asm/page.h"
-* "arch/riscv/include/asm/mmu_context.h"
-* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET)
-
-"""
-
-from nmigen import Const, Signal, Cat, Module
-from nmigen.hdl.ast import ArrayProxy
-from nmigen.cli import verilog, rtlil
-from math import log2
-
-
-DCACHE_SET_ASSOC = 8
-CONFIG_L1D_SIZE =  32*1024
-DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC))
-DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH
-
-ASID_WIDTH = 8
-
-
-class DCacheReqI:
-    def __init__(self):
-        self.address_index = Signal(DCACHE_INDEX_WIDTH)
-        self.address_tag = Signal(DCACHE_TAG_WIDTH)
-        self.data_wdata = Signal(64)
-        self.data_req = Signal()
-        self.data_we = Signal()
-        self.data_be = Signal(8)
-        self.data_size = Signal(2)
-        self.kill_req = Signal()
-        self.tag_valid = Signal()
-
-    def eq(self, inp):
-        res = []
-        for (o, i) in zip(self.ports(), inp.ports()):
-            res.append(o.eq(i))
-        return res
-
-    def ports(self):
-        return [self.address_index, self.address_tag,
-                self.data_wdata, self.data_req,
-                self.data_we, self.data_be, self.data_size,
-                self.kill_req, self.tag_valid,
-            ]
-
-class DCacheReqO:
-    def __init__(self):
-        self.data_gnt = Signal()
-        self.data_rvalid = Signal()
-        self.data_rdata = Signal(64) # actually in PTE object format
-
-    def eq(self, inp):
-        res = []
-        for (o, i) in zip(self.ports(), inp.ports()):
-            res.append(o.eq(i))
-        return res
-
-    def ports(self):
-        return [self.data_gnt, self.data_rvalid, self.data_rdata]
-
-
-class PTE: #(RecordObject):
-    def __init__(self):
-        self.v = Signal()
-        self.r = Signal()
-        self.w = Signal()
-        self.x = Signal()
-        self.u = Signal()
-        self.g = Signal()
-        self.a = Signal()
-        self.d = Signal()
-        self.rsw = Signal(2)
-        self.ppn = Signal(44)
-        self.reserved = Signal(10)
-
-    def flatten(self):
-        return Cat(*self.ports())
-
-    def eq(self, x):
-        if isinstance(x, ArrayProxy):
-            res = []
-            for o in self.ports():
-                i = getattr(x, o.name)
-                res.append(i)
-            x = Cat(*res)
-        else:
-            x = x.flatten()
-        return self.flatten().eq(x)
-
-    def __iter__(self):
-        """ order is critical so that flatten creates LSB to MSB
-        """
-        yield self.v
-        yield self.r
-        yield self.w
-        yield self.x
-        yield self.u
-        yield self.g
-        yield self.a
-        yield self.d
-        yield self.rsw
-        yield self.ppn
-        yield self.reserved
-
-    def ports(self):
-        return list(self)
-
-
-class TLBUpdate:
-    def __init__(self, asid_width):
-        self.valid = Signal()      # valid flag
-        self.is_2M = Signal()
-        self.is_1G = Signal()
-        self.vpn = Signal(27)
-        self.asid = Signal(asid_width)
-        self.content = PTE()
-
-    def flatten(self):
-        return Cat(*self.ports())
-
-    def eq(self, x):
-        return self.flatten().eq(x.flatten())
-
-    def ports(self):
-        return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \
-                self.content.ports()
-
-
-# SV39 defines three levels of page tables
-LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1
-LVL2 = Const(1, 2)
-LVL3 = Const(2, 2)
-
-
-class PTW:
-    def __init__(self, asid_width=8):
-        self.asid_width = asid_width
-
-        self.flush_i = Signal() # flush everything, we need to do this because
-        # actually everything we do is speculative at this stage
-        # e.g.: there could be a CSR instruction that changes everything
-        self.ptw_active_o = Signal(reset=1)    # active if not IDLE
-        self.walking_instr_o = Signal()        # set when walking for TLB
-        self.ptw_error_o = Signal()            # set when an error occurred
-        self.enable_translation_i = Signal()   # CSRs indicate to enable SV39
-        self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st
-
-        self.lsu_is_store_i = Signal()       # translation triggered by store
-        # PTW memory interface
-        self.req_port_i = DCacheReqO()
-        self.req_port_o = DCacheReqI()
-
-        # to TLBs, update logic
-        self.itlb_update_o = TLBUpdate(asid_width)
-        self.dtlb_update_o = TLBUpdate(asid_width)
-
-        self.update_vaddr_o = Signal(39)
-
-        self.asid_i = Signal(self.asid_width)
-        # from TLBs
-        # did we miss?
-        self.itlb_access_i = Signal()
-        self.itlb_hit_i = Signal()
-        self.itlb_vaddr_i = Signal(64)
-
-        self.dtlb_access_i = Signal()
-        self.dtlb_hit_i = Signal()
-        self.dtlb_vaddr_i = Signal(64)
-        # from CSR file
-        self.satp_ppn_i = Signal(44) # ppn from satp
-        self.mxr_i = Signal()
-        # Performance counters
-        self.itlb_miss_o = Signal()
-        self.dtlb_miss_o = Signal()
-
-    def ports(self):
-        return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o,
-                ]
-        return [
-                self.enable_translation_i, self.en_ld_st_translation_i,
-                self.lsu_is_store_i, self.req_port_i, self.req_port_o,
-                self.update_vaddr_o,
-                self.asid_i,
-                self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i,
-                self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i,
-                self.satp_ppn_i, self.mxr_i,
-                self.itlb_miss_o, self.dtlb_miss_o
-            ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports()
-
-    def elaborate(self, platform):
-        m = Module()
-
-        # input registers
-        data_rvalid = Signal()
-        data_rdata = Signal(64)
-
-        # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata
-        # is spec'd in 64-bit binary-format: better to spec as Record?
-        pte = PTE()
-        m.d.comb += pte.flatten().eq(data_rdata)
-
-        # SV39 defines three levels of page tables
-        ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above)
-        ptw_lvl1 = Signal()
-        ptw_lvl2 = Signal()
-        ptw_lvl3 = Signal()
-        m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1),
-                     ptw_lvl2.eq(ptw_lvl == LVL2),
-                     ptw_lvl3.eq(ptw_lvl == LVL3)]
-
-        # is this an instruction page table walk?
-        is_instr_ptw = Signal()
-        global_mapping = Signal()
-        # latched tag signal
-        tag_valid = Signal()
-        # register the ASID
-        tlb_update_asid = Signal(self.asid_width)
-        # register VPN we need to walk, SV39 defines a 39 bit virtual addr
-        vaddr = Signal(64)
-        # 4 byte aligned physical pointer
-        ptw_pptr = Signal(56)
-
-        end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH
-        m.d.sync += [
-            # Assignments
-            self.update_vaddr_o.eq(vaddr),
-
-            self.walking_instr_o.eq(is_instr_ptw),
-            # directly output the correct physical address
-            self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]),
-            self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]),
-            # we are never going to kill this request
-            self.req_port_o.kill_req.eq(0),              # XXX assign comb?
-            # we are never going to write with the HPTW
-            self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb?
-            # -----------
-            # TLB Update
-            # -----------
-            self.itlb_update_o.vpn.eq(vaddr[12:39]),
-            self.dtlb_update_o.vpn.eq(vaddr[12:39]),
-            # update the correct page table level
-            self.itlb_update_o.is_2M.eq(ptw_lvl2),
-            self.itlb_update_o.is_1G.eq(ptw_lvl1),
-            self.dtlb_update_o.is_2M.eq(ptw_lvl2),
-            self.dtlb_update_o.is_1G.eq(ptw_lvl1),
-            # output the correct ASID
-            self.itlb_update_o.asid.eq(tlb_update_asid),
-            self.dtlb_update_o.asid.eq(tlb_update_asid),
-            # set the global mapping bit
-            self.itlb_update_o.content.eq(pte),
-            self.itlb_update_o.content.g.eq(global_mapping),
-            self.dtlb_update_o.content.eq(pte),
-            self.dtlb_update_o.content.g.eq(global_mapping),
-
-            self.req_port_o.tag_valid.eq(tag_valid),
-        ]
-
-        #-------------------
-        # Page table walker
-        #-------------------
-        # A virtual address va is translated into a physical address pa as
-        # follows:
-        # 1. Let a be sptbr.ppn Ã— PAGESIZE, and let i = LEVELS-1. (For Sv39,
-        #    PAGESIZE=2^12 and LEVELS=3.)
-        # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE.
-        #    (For Sv32, PTESIZE=4.)
-        # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an
-        #    access exception.
-        # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to
-        #    step 5.  Otherwise, this PTE is a pointer to the next level of
-        #    the page table.
-        #    Let i=i-1. If i < 0, stop and raise an access exception.
-        #    Otherwise, let a = pte.ppn Ã— PAGESIZE and go to step 2.
-        # 5. A leaf PTE has been found. Determine if the requested memory
-        #    access is allowed by the pte.r, pte.w, and pte.x bits. If not,
-        #    stop and raise an access exception. Otherwise, the translation is
-        #    successful.  Set pte.a to 1, and, if the memory access is a
-        #    store, set pte.d to 1.
-        #    The translated physical address is given as follows:
-        #      - pa.pgoff = va.pgoff.
-        #      - If i > 0, then this is a superpage translation and
-        #        pa.ppn[i-1:0] = va.vpn[i-1:0].
-        #      - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
-        # 6. If i > 0 and pa.ppn[i âˆ’ 1 : 0] != 0, this is a misaligned
-        #    superpage stop and raise a page-fault exception.
-
-        m.d.sync += tag_valid.eq(0)
-
-        # default assignments
-        m.d.comb += [
-            # PTW memory interface
-            self.req_port_o.data_req.eq(0),
-            self.req_port_o.data_be.eq(Const(0xFF, 8)),
-            self.req_port_o.data_size.eq(Const(0b11, 2)),
-            self.req_port_o.data_we.eq(0),
-            self.ptw_error_o.eq(0),
-            self.itlb_update_o.valid.eq(0),
-            self.dtlb_update_o.valid.eq(0),
-
-            self.itlb_miss_o.eq(0),
-            self.dtlb_miss_o.eq(0),
-        ]
-
-        # ------------
-        # State Machine
-        # ------------
-
-        with m.FSM() as fsm:
-
-            with m.State("IDLE"):
-                self.idle(m, is_instr_ptw, ptw_lvl, global_mapping,
-                          ptw_pptr, vaddr, tlb_update_asid)
-
-            with m.State("WAIT_GRANT"):
-                self.grant(m, tag_valid, data_rvalid)
-
-            with m.State("PTE_LOOKUP"):
-                # we wait for the valid signal
-                with m.If(data_rvalid):
-                    self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
-                                data_rvalid, global_mapping,
-                                is_instr_ptw, ptw_pptr)
-
-            # Propagate error to MMU/LSU
-            with m.State("PROPAGATE_ERROR"):
-                m.next = "IDLE"
-                m.d.comb += self.ptw_error_o.eq(1)
-
-            # wait for the rvalid before going back to IDLE
-            with m.State("WAIT_RVALID"):
-                with m.If(data_rvalid):
-                    m.next = "IDLE"
-
-        m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata),
-                     data_rvalid.eq(self.req_port_i.data_rvalid)
-                    ]
-
-        return m
-
-    def set_grant_state(self, m):
-        # should we have flushed before we got an rvalid,
-        # wait for it until going back to IDLE
-        with m.If(self.flush_i):
-            with m.If (self.req_port_i.data_gnt):
-                m.next = "WAIT_RVALID"
-            with m.Else():
-                m.next = "IDLE"
-        with m.Else():
-            m.next = "WAIT_GRANT"
-
-    def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping,
-                          ptw_pptr, vaddr, tlb_update_asid):
-        # by default we start with the top-most page table
-        m.d.sync += [is_instr_ptw.eq(0),
-                     ptw_lvl.eq(LVL1),
-                     global_mapping.eq(0),
-                     self.ptw_active_o.eq(0), # deactive (IDLE)
-                    ]
-        # work out itlb/dtlb miss
-        m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \
-                                 self.itlb_access_i & \
-                                 ~self.itlb_hit_i & \
-                                 ~self.dtlb_access_i)
-        m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \
-                                        self.dtlb_access_i & \
-                                        ~self.dtlb_hit_i)
-        # we got an ITLB miss?
-        with m.If(self.itlb_miss_o):
-            pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:39],
-                       self.satp_ppn_i)
-            m.d.sync += [ptw_pptr.eq(pptr),
-                         is_instr_ptw.eq(1),
-                         vaddr.eq(self.itlb_vaddr_i),
-                         tlb_update_asid.eq(self.asid_i),
-                        ]
-            self.set_grant_state(m)
-
-        # we got a DTLB miss?
-        with m.Elif(self.dtlb_miss_o):
-            pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39],
-                       self.satp_ppn_i)
-            m.d.sync += [ptw_pptr.eq(pptr),
-                         vaddr.eq(self.dtlb_vaddr_i),
-                         tlb_update_asid.eq(self.asid_i),
-                        ]
-            self.set_grant_state(m)
-
-    def grant(self, m, tag_valid, data_rvalid):
-        # we've got a data WAIT_GRANT so tell the
-        # cache that the tag is valid
-
-        # send a request out
-        m.d.comb += self.req_port_o.data_req.eq(1)
-        # wait for the WAIT_GRANT
-        with m.If(self.req_port_i.data_gnt):
-            # send the tag valid signal one cycle later
-            m.d.sync += tag_valid.eq(1)
-            # should we have flushed before we got an rvalid,
-            # wait for it until going back to IDLE
-            with m.If(self.flush_i):
-                with m.If (~data_rvalid):
-                    m.next = "WAIT_RVALID"
-                with m.Else():
-                    m.next = "IDLE"
-            with m.Else():
-                m.next = "PTE_LOOKUP"
-
-    def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3,
-                            data_rvalid, global_mapping,
-                            is_instr_ptw, ptw_pptr):
-        # temporaries
-        pte_rx = Signal(reset_less=True)
-        pte_exe = Signal(reset_less=True)
-        pte_inv = Signal(reset_less=True)
-        pte_a = Signal(reset_less=True)
-        st_wd = Signal(reset_less=True)
-        m.d.comb += [pte_rx.eq(pte.r | pte.x),
-                    pte_exe.eq(~pte.x | ~pte.a),
-                    pte_inv.eq(~pte.v | (~pte.r & pte.w)),
-                    pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))),
-                    st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))]
-
-        l1err = Signal(reset_less=True)
-        l2err = Signal(reset_less=True)
-        m.d.comb += [l2err.eq((ptw_lvl2) & pte.ppn[0:9] != Const(0, 9)),
-                     l1err.eq((ptw_lvl1) & pte.ppn[0:18] != Const(0, 18)) ]
-
-        # check if the global mapping bit is set
-        with m.If (pte.g):
-            m.d.sync += global_mapping.eq(1)
-
-        m.next = "IDLE"
-
-        # -------------
-        # Invalid PTE
-        # -------------
-        # If pte.v = 0, or if pte.r = 0 and pte.w = 1,
-        # stop and raise a page-fault exception.
-        with m.If (pte_inv):
-            m.next = "PROPAGATE_ERROR"
-
-        # -----------
-        # Valid PTE
-        # -----------
-
-        # it is a valid PTE
-        # if pte.r = 1 or pte.x = 1 it is a valid PTE
-        with m.Elif (pte_rx):
-            # Valid translation found (either 1G, 2M or 4K)
-            with m.If(is_instr_ptw):
-                # ------------
-                # Update ITLB
-                # ------------
-                # If page not executable, we can directly raise error.
-                # This doesn't put a useless entry into the TLB.
-                # The same idea applies to the access flag since we let
-                # the access flag be managed by SW.
-                with m.If (pte_exe):
-                    m.next = "IDLE"
-                with m.Else():
-                    m.d.comb += self.itlb_update_o.valid.eq(1)
-
-            with m.Else():
-                # ------------
-                # Update DTLB
-                # ------------
-                # Check if the access flag has been set, otherwise
-                # throw page-fault and let software handle those bits.
-                # If page not readable (there are no write-only pages)
-                # directly raise an error. This doesn't put a useless
-                # entry into the TLB.
-                with m.If(pte_a):
-                    m.d.comb += self.dtlb_update_o.valid.eq(1)
-                with m.Else():
-                    m.next = "PROPAGATE_ERROR"
-                # Request is a store: perform additional checks
-                # If the request was a store and the page not
-                # write-able, raise an error
-                # the same applies if the dirty flag is not set
-                with m.If (st_wd):
-                    m.d.comb += self.dtlb_update_o.valid.eq(0)
-                    m.next = "PROPAGATE_ERROR"
-
-            # check if the ppn is correctly aligned: Case (6)
-            with m.If(l1err | l2err):
-                m.next = "PROPAGATE_ERROR"
-                m.d.comb += [self.dtlb_update_o.valid.eq(0),
-                             self.itlb_update_o.valid.eq(0)]
-
-        # this is a pointer to the next TLB level
-        with m.Else():
-            # pointer to next level of page table
-            with m.If (ptw_lvl1):
-                # we are in the second level now
-                pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn)
-                m.d.sync += [ptw_pptr.eq(pptr),
-                             ptw_lvl.eq(LVL2)
-                            ]
-            with m.If(ptw_lvl2):
-                # here we received a pointer to the third level
-                pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn)
-                m.d.sync += [ptw_pptr.eq(pptr),
-                             ptw_lvl.eq(LVL3)
-                            ]
-            self.set_grant_state(m)
-
-            with m.If (ptw_lvl3):
-                # Should already be the last level
-                # page table => Error
-                m.d.sync += ptw_lvl.eq(LVL3)
-                m.next = "PROPAGATE_ERROR"
-
-
-if __name__ == '__main__':
-    ptw = PTW()
-    vl = rtlil.convert(ptw, ports=ptw.ports())
-    with open("test_ptw.il", "w") as f:
-        f.write(vl)
diff --git a/src/TLB/src/ariane/src/tlb.py b/src/TLB/src/ariane/src/tlb.py
deleted file mode 100644 (file)
index f768571..0000000
+++ /dev/null
@@ -1,170 +0,0 @@
-"""
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License.  You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Author: David Schaffenrath, TU Graz
-# Author: Florian Zaruba, ETH Zurich
-# Date: 21.4.2017
-# Description: Translation Lookaside Buffer, SV39
-#              fully set-associative
-
-Implementation in c++:
-https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp
-
-Text description:
-https://people.cs.clemson.edu/~mark/464/p_lru.txt
-
-Online simulator:
-http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html
-"""
-from math import log2
-from nmigen import Signal, Module, Cat, Const, Array
-from nmigen.cli import verilog, rtlil
-from nmigen.lib.coding import Encoder
-
-from ptw import TLBUpdate, PTE, ASID_WIDTH
-from plru import PLRU
-from tlb_content import TLBContent
-
-TLB_ENTRIES = 8
-
-class TLB:
-    def __init__(self, tlb_entries=8, asid_width=8):
-        self.tlb_entries = tlb_entries
-        self.asid_width = asid_width
-
-        self.flush_i = Signal()  # Flush signal
-        # Lookup signals
-        self.lu_access_i = Signal()
-        self.lu_asid_i = Signal(self.asid_width)
-        self.lu_vaddr_i = Signal(64)
-        self.lu_content_o = PTE()
-        self.lu_is_2M_o = Signal()
-        self.lu_is_1G_o = Signal()
-        self.lu_hit_o = Signal()
-        # Update TLB
-        self.pte_width = len(self.lu_content_o.flatten())
-        self.update_i = TLBUpdate(asid_width)
-
-    def elaborate(self, platform):
-        m = Module()
-
-        vpn2 = Signal(9)
-        vpn1 = Signal(9)
-        vpn0 = Signal(9)
-
-        #-------------
-        # Translation
-        #-------------
-
-        # SV39 defines three levels of page tables
-        m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]),
-                      vpn1.eq(self.lu_vaddr_i[21:30]),
-                      vpn2.eq(self.lu_vaddr_i[30:39]),
-                    ]
-
-        tc = []
-        for i in range(self.tlb_entries):
-            tlc = TLBContent(self.pte_width, self.asid_width)
-            setattr(m.submodules, "tc%d" % i, tlc)
-            tc.append(tlc)
-            # connect inputs
-            tlc.update_i = self.update_i     # saves a lot of graphviz links
-            m.d.comb += [tlc.vpn0.eq(vpn0),
-                         tlc.vpn1.eq(vpn1),
-                         tlc.vpn2.eq(vpn2),
-                         tlc.flush_i.eq(self.flush_i),
-                         #tlc.update_i.eq(self.update_i),
-                         tlc.lu_asid_i.eq(self.lu_asid_i)]
-        tc = Array(tc)
-
-        #--------------
-        # Select hit
-        #--------------
-
-        # use Encoder to select hit index
-        # XXX TODO: assert that there's only one valid entry (one lu_hit)
-        hitsel = Encoder(self.tlb_entries)
-        m.submodules.hitsel = hitsel
-
-        hits = []
-        for i in range(self.tlb_entries):
-            hits.append(tc[i].lu_hit_o)
-        m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well)
-        idx = hitsel.o
-
-        active = Signal(reset_less=True)
-        m.d.comb += active.eq(~hitsel.n)
-        with m.If(active):
-            # active hit, send selected as output
-            m.d.comb += [ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o),
-                          self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o),
-                          self.lu_hit_o.eq(1),
-                          self.lu_content_o.flatten().eq(tc[idx].lu_content_o),
-                        ]
-
-        #--------------
-        # PLRU.
-        #--------------
-
-        p = PLRU(self.tlb_entries)
-        plru_tree = Signal(p.TLBSZ)
-        m.submodules.plru = p
-
-        # connect PLRU inputs/outputs
-        # XXX TODO: assert that there's only one valid entry (one replace_en)
-        en = []
-        for i in range(self.tlb_entries):
-            en.append(tc[i].replace_en_i)
-        m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags
-                     p.lu_hit.eq(hitsel.i),
-                     p.lu_access_i.eq(self.lu_access_i),
-                     p.plru_tree.eq(plru_tree)]
-        m.d.sync += plru_tree.eq(p.plru_tree_o)
-
-        #--------------
-        # Sanity checks
-        #--------------
-
-        assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \
-            "TLB size must be a multiple of 2 and greater than 1"
-        assert (self.asid_width >= 1), \
-            "ASID width must be at least 1"
-
-        return m
-
-        """
-        # Just for checking
-        function int countSetBits(logic[self.tlb_entries-1:0] vector);
-          automatic int count = 0;
-          foreach (vector[idx]) begin
-            count += vector[idx];
-          end
-          return count;
-        endfunction
-
-        assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
-          else $error("More then one hit in TLB!"); $stop(); end
-        assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
-          else $error("More then one TLB entry selected for next replace!");
-        """
-
-    def ports(self):
-        return [self.flush_i, self.lu_access_i,
-                 self.lu_asid_i, self.lu_vaddr_i,
-                 self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
-                ] + self.lu_content_o.ports() + self.update_i.ports()
-
-if __name__ == '__main__':
-    tlb = TLB()
-    vl = rtlil.convert(tlb, ports=tlb.ports())
-    with open("test_tlb.il", "w") as f:
-        f.write(vl)
-
diff --git a/src/TLB/src/ariane/src/tlb_content.py b/src/TLB/src/ariane/src/tlb_content.py
deleted file mode 100644 (file)
index 024c569..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-from nmigen import Signal, Module, Cat, Const
-
-from ptw import TLBUpdate, PTE
-
-class TLBEntry:
-    def __init__(self, asid_width):
-        self.asid = Signal(asid_width)
-        # SV39 defines three levels of page tables
-        self.vpn0 = Signal(9)
-        self.vpn1 = Signal(9)
-        self.vpn2 = Signal(9)
-        self.is_2M = Signal()
-        self.is_1G = Signal()
-        self.valid = Signal()
-
-    def flatten(self):
-        return Cat(*self.ports())
-
-    def eq(self, x):
-        return self.flatten().eq(x.flatten())
-
-    def ports(self):
-        return [self.asid, self.vpn0, self.vpn1, self.vpn2,
-                self.is_2M, self.is_1G, self.valid]
-
-class TLBContent:
-    def __init__(self, pte_width, asid_width):
-        self.asid_width = asid_width
-        self.pte_width = pte_width
-        self.flush_i = Signal()  # Flush signal
-        # Update TLB
-        self.update_i = TLBUpdate(asid_width)
-        self.vpn2 = Signal(9)
-        self.vpn1 = Signal(9)
-        self.vpn0 = Signal(9)
-        self.replace_en_i = Signal() # replace the following entry,
-                                     # set by replacement strategy
-        # Lookup signals
-        self.lu_asid_i = Signal(asid_width)
-        self.lu_content_o = Signal(pte_width)
-        self.lu_is_2M_o = Signal()
-        self.lu_is_1G_o = Signal()
-        self.lu_hit_o = Signal()
-
-    def elaborate(self, platform):
-        m = Module()
-
-        tags = TLBEntry(self.asid_width)
-        content = Signal(self.pte_width)
-
-        m.d.comb += [self.lu_hit_o.eq(0),
-                     self.lu_is_2M_o.eq(0),
-                     self.lu_is_1G_o.eq(0)]
-
-        # temporaries for 1st level match
-        asid_ok = Signal(reset_less=True)
-        vpn2_ok = Signal(reset_less=True)
-        tags_ok = Signal(reset_less=True)
-        vpn2_hit = Signal(reset_less=True)
-        m.d.comb += [tags_ok.eq(tags.valid),
-                     asid_ok.eq(tags.asid == self.lu_asid_i),
-                     vpn2_ok.eq(tags.vpn2 == self.vpn2),
-                     vpn2_hit.eq(tags_ok & asid_ok & vpn2_ok)]
-        # temporaries for 2nd level match
-        vpn1_ok = Signal(reset_less=True)
-        tags_2M = Signal(reset_less=True)
-        vpn0_ok = Signal(reset_less=True)
-        vpn0_or_2M = Signal(reset_less=True)
-        m.d.comb += [vpn1_ok.eq(self.vpn1 == tags.vpn1),
-                     tags_2M.eq(tags.is_2M),
-                     vpn0_ok.eq(self.vpn0 == tags.vpn0),
-                     vpn0_or_2M.eq(tags_2M | vpn0_ok)]
-        # first level match, this may be a giga page,
-        # check the ASID flags as well
-        with m.If(vpn2_hit):
-            # second level
-            with m.If (tags.is_1G):
-                m.d.comb += [ self.lu_content_o.eq(content),
-                              self.lu_is_1G_o.eq(1),
-                              self.lu_hit_o.eq(1),
-                            ]
-            # not a giga page hit so check further
-            with m.Elif(vpn1_ok):
-                # this could be a 2 mega page hit or a 4 kB hit
-                # output accordingly
-                with m.If(vpn0_or_2M):
-                    m.d.comb += [ self.lu_content_o.eq(content),
-                                  self.lu_is_2M_o.eq(tags.is_2M),
-                                  self.lu_hit_o.eq(1),
-                                ]
-        # ------------------
-        # Update or Flush
-        # ------------------
-
-        # temporaries
-        replace_valid = Signal(reset_less=True)
-        m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i)
-
-        # flush
-        with m.If (self.flush_i):
-            # invalidate (flush) conditions: all if zero or just this ASID
-            with m.If (self.lu_asid_i == Const(0, self.asid_width) |
-                      (self.lu_asid_i == tags.asid)):
-                m.d.sync += tags.valid.eq(0)
-
-        # normal replacement
-        with m.Elif(replace_valid):
-            m.d.sync += [ # update tag array
-                          tags.asid.eq(self.update_i.asid),
-                          tags.vpn2.eq(self.update_i.vpn[18:27]),
-                          tags.vpn1.eq(self.update_i.vpn[9:18]),
-                          tags.vpn0.eq(self.update_i.vpn[0:9]),
-                          tags.is_1G.eq(self.update_i.is_1G),
-                          tags.is_2M.eq(self.update_i.is_2M),
-                          tags.valid.eq(1),
-                          # and content as well
-                          content.eq(self.update_i.content.flatten())
-                        ]
-        return m
-
-    def ports(self):
-        return [self.flush_i,
-                 self.lu_asid_i,
-                 self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o,
-                ] + self.update_i.content.ports() + self.update_i.ports()
diff --git a/src/TLB/src/ariane/test/test_plru.py b/src/TLB/src/ariane/test/test_plru.py
deleted file mode 100644 (file)
index 9b040e1..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-import sys
-sys.path.append("../src")
-sys.path.append("../../../TestUtil")
-
-from plru import PLRU
-
-from nmigen.compat.sim import run_simulation
-
-def testbench(dut):
-    yield
-
-if __name__ == "__main__":
-    dut = PLRU(4)
-    run_simulation(dut, testbench(dut), vcd_name="test_plru.vcd")
-    print("PLRU Unit Test Success")
\ No newline at end of file
diff --git a/src/TLB/src/ariane/test/test_ptw.py b/src/TLB/src/ariane/test/test_ptw.py
deleted file mode 100644 (file)
index e9c5324..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-import sys
-sys.path.append("../src")
-sys.path.append("../../../TestUtil")
-
-from nmigen.compat.sim import run_simulation
-
-from ptw import PTW, PTE
-
-
-def testbench(dut):
-
-    addr = 0x8000000
-
-    #pte = PTE()
-    #yield pte.v.eq(1)
-    #yield pte.r.eq(1)
-
-    yield dut.req_port_i.data_gnt.eq(1)
-    yield dut.req_port_i.data_rvalid.eq(1)
-    yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten())
-
-    # data lookup
-    yield dut.en_ld_st_translation_i.eq(1)
-    yield dut.asid_i.eq(1)
-
-    yield dut.dtlb_access_i.eq(1)
-    yield dut.dtlb_hit_i.eq(0)
-    yield dut.dtlb_vaddr_i.eq(0x400000000)
-
-    yield
-    yield
-    yield
-
-    yield dut.dtlb_access_i.eq(1)
-    yield dut.dtlb_hit_i.eq(0)
-    yield dut.dtlb_vaddr_i.eq(0x200000)
-
-    yield
-    yield
-    yield
-
-    yield dut.req_port_i.data_gnt.eq(0)
-    yield dut.dtlb_access_i.eq(1)
-    yield dut.dtlb_hit_i.eq(0)
-    yield dut.dtlb_vaddr_i.eq(0x400000011)
-
-    yield
-    yield dut.req_port_i.data_gnt.eq(1)
-    yield
-    yield
-
-    # data lookup, PTW levels 1-2-3
-    addr = 0x4000000
-    yield dut.dtlb_vaddr_i.eq(addr)
-    yield dut.mxr_i.eq(0x1)
-    yield dut.req_port_i.data_gnt.eq(1)
-    yield dut.req_port_i.data_rvalid.eq(1)
-    yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten())
-
-    yield dut.en_ld_st_translation_i.eq(1)
-    yield dut.asid_i.eq(1)
-
-    yield dut.dtlb_access_i.eq(1)
-    yield dut.dtlb_hit_i.eq(0)
-    yield dut.dtlb_vaddr_i.eq(addr)
-
-    yield
-    yield
-    yield
-    yield
-    yield
-    yield
-    yield
-    yield
-
-    yield dut.req_port_i.data_gnt.eq(0)
-    yield dut.dtlb_access_i.eq(1)
-    yield dut.dtlb_hit_i.eq(0)
-    yield dut.dtlb_vaddr_i.eq(0x400000011)
-
-    yield
-    yield dut.req_port_i.data_gnt.eq(1)
-    yield
-    yield
-    yield
-    yield
-
-
-    # instruction lookup
-    yield dut.en_ld_st_translation_i.eq(0)
-    yield dut.enable_translation_i.eq(1)
-    yield dut.asid_i.eq(1)
-
-    yield dut.itlb_access_i.eq(1)
-    yield dut.itlb_hit_i.eq(0)
-    yield dut.itlb_vaddr_i.eq(0x800000)
-
-    yield
-    yield
-    yield
-
-    yield dut.itlb_access_i.eq(1)
-    yield dut.itlb_hit_i.eq(0)
-    yield dut.itlb_vaddr_i.eq(0x200000)
-
-    yield
-    yield
-    yield
-
-    yield dut.req_port_i.data_gnt.eq(0)
-    yield dut.itlb_access_i.eq(1)
-    yield dut.itlb_hit_i.eq(0)
-    yield dut.itlb_vaddr_i.eq(0x800011)
-
-    yield
-    yield dut.req_port_i.data_gnt.eq(1)
-    yield
-    yield
-
-    yield
-
-
-
-if __name__ == "__main__":
-    dut = PTW()
-    run_simulation(dut, testbench(dut), vcd_name="test_ptw.vcd")
-    print("PTW Unit Test Success")
diff --git a/src/TLB/src/ariane/test/test_tlb.py b/src/TLB/src/ariane/test/test_tlb.py
deleted file mode 100644 (file)
index aab1d43..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-import sys
-sys.path.append("../src")
-sys.path.append("../../../TestUtil")
-
-from nmigen.compat.sim import run_simulation
-
-from tlb import TLB
-
-def set_vaddr(addr):
-    yield dut.lu_vaddr_i.eq(addr)
-    yield dut.update_i.vpn.eq(addr>>12)
-
-
-def testbench(dut):
-    yield dut.lu_access_i.eq(1)
-    yield dut.lu_asid_i.eq(1)
-    yield dut.update_i.valid.eq(1)
-    yield dut.update_i.is_1G.eq(0)
-    yield dut.update_i.is_2M.eq(0)
-    yield dut.update_i.asid.eq(1)
-    yield dut.update_i.content.ppn.eq(0)
-    yield dut.update_i.content.rsw.eq(0)
-    yield dut.update_i.content.r.eq(1)
-
-    yield
-
-    addr = 0x80000
-    yield from set_vaddr(addr)
-    yield
-
-    addr = 0x90001
-    yield from set_vaddr(addr)
-    yield
-
-    addr = 0x28000000
-    yield from set_vaddr(addr)
-    yield
-
-    addr = 0x28000001
-    yield from set_vaddr(addr)
-
-    addr = 0x28000001
-    yield from set_vaddr(addr)
-    yield
-
-    addr = 0x1000040000
-    yield from set_vaddr(addr)
-    yield
-
-    addr = 0x1000040001
-    yield from set_vaddr(addr)
-    yield
-
-    yield dut.update_i.is_1G.eq(1)
-    addr = 0x2040000
-    yield from set_vaddr(addr)
-    yield
-
-    yield dut.update_i.is_1G.eq(1)
-    addr = 0x2040001
-    yield from set_vaddr(addr)
-    yield
-
-    yield
-
-
-if __name__ == "__main__":
-    dut = TLB()
-    run_simulation(dut, testbench(dut), vcd_name="test_tlb.vcd")