src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # DSISR and DAR cached values.  note that the MMU FSM is where
  88         # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
  89         # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
  90         self.dsisr = Signal(64)
  91         self.dar = Signal(64)
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         # latch most of the input request
  96         self.load          = Signal()
  97         self.tlbie         = Signal()
  98         self.dcbz          = Signal()
  99         self.addr          = Signal(64)
 100         self.store_data    = Signal(64)
 101         self.load_data     = Signal(64)
 102         self.byte_sel      = Signal(8)
 103         #self.xerc         : xer_common_t;
 104         #self.reserve       = Signal()
 105         #self.atomic        = Signal()
 106         #self.atomic_last   = Signal()
 107         #self.rc            = Signal()
 108         self.nc            = Signal()              # non-cacheable access
 109         self.virt_mode     = Signal()
 110         self.priv_mode     = Signal()
 111         self.state        = Signal(State)
 112         self.instr_fault   = Signal()
 113         self.align_intr    = Signal()
 114         self.busy          = Signal()
 115         self.wait_dcache   = Signal()
 116         self.wait_mmu      = Signal()
 117         #self.mode_32bit    = Signal()
 118         #self.intr_vec     : integer range 0 to 16#fff#;
 119         #self.nia           = Signal(64)
 120         #self.srr1          = Signal(16)
 121
 122     def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
 123         m.d.comb += self.req.load.eq(0) # store operation
 124         m.d.comb += self.req.byte_sel.eq(mask)
 125         m.d.comb += self.req.addr.eq(addr)
 126         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 127         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 128         m.d.comb += self.req.align_intr.eq(misalign)
 129         # option to disable the cache entirely for write
 130         if self.disable_cache:
 131             m.d.comb += self.req.nc.eq(1)
 132         return None
 133
 134     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 135         m.d.comb += self.d_valid.eq(1)
 136         m.d.comb += self.req.load.eq(1) # load operation
 137         m.d.comb += self.req.byte_sel.eq(mask)
 138         m.d.comb += self.req.align_intr.eq(misalign)
 139         m.d.comb += self.req.addr.eq(addr)
 140         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 141         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 142         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 143         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 144         with m.If(addr[28:] == Const(0xc, 4)):
 145             m.d.comb += self.req.nc.eq(1)
 146         # option to disable the cache entirely for read
 147         if self.disable_cache:
 148             m.d.comb += self.req.nc.eq(1)
 149         return None #FIXME return value
 150
 151     def set_wr_data(self, m, data, wen):
 152         # do the "blip" on write data
 153         m.d.comb += self.d_valid.eq(1)
 154         # put data into comb which is picked up in main elaborate()
 155         m.d.comb += self.d_w_valid.eq(1)
 156         m.d.comb += self.store_data.eq(data)
 157         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 158         st_ok = self.done # TODO indicates write data is valid
 159         return st_ok
 160
 161     def get_rd_data(self, m):
 162         ld_ok = self.done     # indicates read data is valid
 163         data = self.load_data # actual read data
 164         return data, ld_ok
 165
 166     def elaborate(self, platform):
 167         m = super().elaborate(platform)
 168         comb, sync = m.d.comb, m.d.sync
 169
 170         # create dcache module
 171         m.submodules.dcache = dcache = self.dcache
 172
 173         # temp vars
 174         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 175         m_out, m_in = self.m_out, self.m_in
 176         exc = self.pi.exc_o
 177         exception = exc.happened
 178         mmureq = Signal()
 179
 180         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 181         maddr = Signal(64)
 182         m.d.comb += maddr.eq(self.addr)
 183
 184         # create a blip (single pulse) on valid read/write request
 185         # this can be over-ridden in the FSM to get dcache to re-run
 186         # a request when MMU_LOOKUP completes.
 187         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 188         ldst_r = LDSTRequest("ldst_r")
 189
 190         # fsm skeleton
 191         with m.Switch(self.state):
 192             with m.Case(State.IDLE):
 193                 with m.If(self.d_validblip & ~exc.happened):
 194                     comb += self.busy.eq(1)
 195                     sync += self.state.eq(State.ACK_WAIT)
 196                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 197                 with m.Else():
 198                     sync += ldst_r.eq(0)
 199
 200             # waiting for completion
 201             with m.Case(State.ACK_WAIT):
 202                 comb += self.busy.eq(~exc.happened)
 203
 204                 with m.If(d_in.error):
 205                     # cache error is not necessarily "final", it could
 206                     # be that it was just a TLB miss
 207                     with m.If(d_in.cache_paradox):
 208                         comb += exception.eq(1)
 209                         sync += self.state.eq(State.IDLE)
 210                         sync += ldst_r.eq(0)
 211                         sync += self.dsisr[63 - 38].eq(~self.load)
 212                         # XXX there is no architected bit for this
 213                         # (probably should be a machine check in fact)
 214                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 215
 216                     with m.Else():
 217                         # Look up the translation for TLB miss
 218                         # and also for permission error and RC error
 219                         # in case the PTE has been updated.
 220                         comb += mmureq.eq(1)
 221                         sync += self.state.eq(State.MMU_LOOKUP)
 222                 with m.If(d_in.valid):
 223                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 224                     with m.If(self.done):
 225                         sync += Display("ACK_WAIT, done %x", self.addr)
 226                     sync += self.state.eq(State.IDLE)
 227                     sync += ldst_r.eq(0)
 228                     with m.If(self.load):
 229                         m.d.comb += self.load_data.eq(d_in.data)
 230
 231             # waiting here for the MMU TLB lookup to complete.
 232             # either re-try the dcache lookup or throw MMU exception
 233             with m.Case(State.MMU_LOOKUP):
 234                 comb += self.busy.eq(1)
 235                 with m.If(m_in.done):
 236                     with m.If(~self.instr_fault):
 237                         sync += Display("MMU_LOOKUP, done %x -> %x",
 238                                         self.addr, d_out.addr)
 239                         # retry the request now that the MMU has
 240                         # installed a TLB entry, if not exception raised
 241                         m.d.comb += self.d_out.valid.eq(~exception)
 242                         sync += self.state.eq(State.ACK_WAIT)
 243                         sync += ldst_r.eq(0)
 244                     with m.Else():
 245                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 246                         # instruction lookup fault: store address in DAR
 247                         comb += exc.happened.eq(1)
 248                         sync += self.dar.eq(self.addr)
 249
 250                 with m.If(m_in.err):
 251                     # MMU RADIX exception thrown
 252                     comb += exception.eq(1)
 253                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 254                     sync += self.dsisr[63 - 36].eq(m_in.perm_error)
 255                     sync += self.dsisr[63 - 38].eq(self.load)
 256                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 257                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 258
 259             with m.Case(State.TLBIE_WAIT):
 260                 pass
 261
 262         # alignment error: store address in DAR
 263         with m.If(self.align_intr):
 264             comb += exc.happened.eq(1)
 265             sync += self.dar.eq(self.addr)
 266
 267         # happened, alignment, instr_fault, invalid.
 268         # note that all of these flow through - eventually to the TRAP
 269         # pipeline, via PowerDecoder2.
 270         comb += exc.invalid.eq(m_in.invalid)
 271         comb += exc.alignment.eq(self.align_intr)
 272         comb += exc.instr_fault.eq(self.instr_fault)
 273         # badtree, perm_error, rc_error, segment_fault
 274         comb += exc.badtree.eq(m_in.badtree)
 275         comb += exc.perm_error.eq(m_in.perm_error)
 276         comb += exc.rc_error.eq(m_in.rc_error)
 277         comb += exc.segment_fault.eq(m_in.segerr)
 278
 279         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 280         comb += dbus.adr.eq(dcache.wb_out.adr)
 281         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 282         comb += dbus.sel.eq(dcache.wb_out.sel)
 283         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 284         comb += dbus.stb.eq(dcache.wb_out.stb)
 285         comb += dbus.we.eq(dcache.wb_out.we)
 286
 287         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 288         comb += dcache.wb_in.ack.eq(dbus.ack)
 289         if hasattr(dbus, "stall"):
 290             comb += dcache.wb_in.stall.eq(dbus.stall)
 291
 292         # update out d data when flag set
 293         with m.If(self.d_w_valid):
 294             m.d.sync += d_out.data.eq(self.store_data)
 295         #with m.Else():
 296         #    m.d.sync += d_out.data.eq(0)
 297         # unit test passes with that change
 298
 299         # this must move into the FSM, conditionally noticing that
 300         # the "blip" comes from self.d_validblip.
 301         # task 1: look up in dcache
 302         # task 2: if dcache fails, look up in MMU.
 303         # do **NOT** confuse the two.
 304         with m.If(self.d_validblip):
 305             m.d.comb += self.d_out.valid.eq(~exc.happened)
 306             m.d.comb += d_out.load.eq(self.req.load)
 307             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 308             m.d.comb += self.addr.eq(self.req.addr)
 309             m.d.comb += d_out.nc.eq(self.req.nc)
 310             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 311             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 312             m.d.comb += self.align_intr.eq(self.req.align_intr)
 313         with m.Else():
 314             m.d.comb += d_out.load.eq(ldst_r.load)
 315             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 316             m.d.comb += self.addr.eq(ldst_r.addr)
 317             m.d.comb += d_out.nc.eq(ldst_r.nc)
 318             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 319             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 320             m.d.comb += self.align_intr.eq(ldst_r.align_intr)
 321
 322         # XXX these should be possible to remove but for some reason
 323         # cannot be... yet. TODO, investigate
 324         m.d.comb += self.load_data.eq(d_in.data)
 325         m.d.comb += d_out.addr.eq(self.addr)
 326
 327         # Update outputs to MMU
 328         m.d.comb += m_out.valid.eq(mmureq)
 329         m.d.comb += m_out.iside.eq(self.instr_fault)
 330         m.d.comb += m_out.load.eq(ldst_r.load)
 331         # m_out.priv <= r.priv_mode; TODO
 332         m.d.comb += m_out.tlbie.eq(self.tlbie)
 333         # m_out.mtspr <= mmu_mtspr; # TODO
 334         # m_out.sprn <= sprn; # TODO
 335         m.d.comb += m_out.addr.eq(maddr)
 336         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 337         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 338
 339         return m
 340
 341     def ports(self):
 342         yield from super().ports()
 343         # TODO: memory ports
 344
 345
 346 class TestSRAMLoadStore1(LoadStore1):
 347     def __init__(self, pspec):
 348         super().__init__(pspec)
 349         pspec = self.pspec
 350         # small 32-entry Memory
 351         if (hasattr(pspec, "dmem_test_depth") and
 352                 isinstance(pspec.dmem_test_depth, int)):
 353             depth = pspec.dmem_test_depth
 354         else:
 355             depth = 32
 356         print("TestSRAMBareLoadStoreUnit depth", depth)
 357
 358         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 359
 360     def elaborate(self, platform):
 361         m = super().elaborate(platform)
 362         comb = m.d.comb
 363         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 364                                         features={'cti', 'bte', 'err'})
 365         dbus = self.dbus
 366
 367         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 368         # note: SRAM is a target (slave), dbus is initiator (master)
 369         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 370         fanins = ['dat_r', 'ack', 'err']
 371         for fanout in fanouts:
 372             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 373                   getattr(dbus, fanout).shape())
 374             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 375             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 376         for fanin in fanins:
 377             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 378         # connect address
 379         comb += sram.bus.adr.eq(dbus.adr)
 380
 381         return m
 382