src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # DSISR and DAR cached values.  note that the MMU FSM is where
  88         # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
  89         # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
  90         self.dsisr = Signal(64)
  91         self.dar = Signal(64)
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         # latch most of the input request
  96         self.load          = Signal()
  97         self.tlbie         = Signal()
  98         self.dcbz          = Signal()
  99         self.addr          = Signal(64)
 100         self.store_data    = Signal(64)
 101         self.load_data     = Signal(64)
 102         self.byte_sel      = Signal(8)
 103         #self.xerc         : xer_common_t;
 104         #self.reserve       = Signal()
 105         #self.atomic        = Signal()
 106         #self.atomic_last   = Signal()
 107         #self.rc            = Signal()
 108         self.nc            = Signal()              # non-cacheable access
 109         self.virt_mode     = Signal()
 110         self.priv_mode     = Signal()
 111         self.state        = Signal(State)
 112         self.instr_fault   = Signal()
 113         self.align_intr    = Signal()
 114         self.busy          = Signal()
 115         self.wait_dcache   = Signal()
 116         self.wait_mmu      = Signal()
 117         #self.mode_32bit    = Signal()
 118         #self.intr_vec     : integer range 0 to 16#fff#;
 119         #self.nia           = Signal(64)
 120         #self.srr1          = Signal(16)
 121
 122     def set_dcbz_addr(self, m, addr):
 123         m.d.comb += self.req.load.eq(0) #not a load operation
 124         m.d.comb += self.req.dcbz.eq(1)
 125         #m.d.comb += self.req.byte_sel.eq(mask)
 126         m.d.comb += self.req.addr.eq(addr)
 127         m.d.comb += Display("set_dcbz_addr %i",addr)
 128         #m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 129         #m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 130         #m.d.comb += self.req.align_intr.eq(misalign)
 131         return None
 132
 133     def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
 134         m.d.comb += self.req.load.eq(0) # store operation
 135         m.d.comb += self.req.byte_sel.eq(mask)
 136         m.d.comb += self.req.addr.eq(addr)
 137         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 138         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 139         m.d.comb += self.req.align_intr.eq(misalign)
 140
 141         # option to disable the cache entirely for write
 142         if self.disable_cache:
 143             m.d.comb += self.req.nc.eq(1)
 144         return None
 145
 146     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 147         m.d.comb += self.d_valid.eq(1)
 148         m.d.comb += self.req.load.eq(1) # load operation
 149         m.d.comb += self.req.byte_sel.eq(mask)
 150         m.d.comb += self.req.align_intr.eq(misalign)
 151         m.d.comb += self.req.addr.eq(addr)
 152         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 153         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 154         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 155         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 156         with m.If(addr[28:] == Const(0xc, 4)):
 157             m.d.comb += self.req.nc.eq(1)
 158         # option to disable the cache entirely for read
 159         if self.disable_cache:
 160             m.d.comb += self.req.nc.eq(1)
 161         return None #FIXME return value
 162
 163     def set_wr_data(self, m, data, wen):
 164         # do the "blip" on write data
 165         m.d.comb += self.d_valid.eq(1)
 166         # put data into comb which is picked up in main elaborate()
 167         m.d.comb += self.d_w_valid.eq(1)
 168         m.d.comb += self.store_data.eq(data)
 169         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 170         st_ok = self.done # TODO indicates write data is valid
 171         return st_ok
 172
 173     def get_rd_data(self, m):
 174         ld_ok = self.done     # indicates read data is valid
 175         data = self.load_data # actual read data
 176         return data, ld_ok
 177
 178     def elaborate(self, platform):
 179         m = super().elaborate(platform)
 180         comb, sync = m.d.comb, m.d.sync
 181
 182         # create dcache module
 183         m.submodules.dcache = dcache = self.dcache
 184
 185         # temp vars
 186         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 187         m_out, m_in = self.m_out, self.m_in
 188         exc = self.pi.exc_o
 189         exception = exc.happened
 190         mmureq = Signal()
 191
 192         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 193         maddr = Signal(64)
 194         m.d.comb += maddr.eq(self.addr)
 195
 196         # create a blip (single pulse) on valid read/write request
 197         # this can be over-ridden in the FSM to get dcache to re-run
 198         # a request when MMU_LOOKUP completes.
 199         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 200         ldst_r = LDSTRequest("ldst_r")
 201
 202         # fsm skeleton
 203         with m.Switch(self.state):
 204             with m.Case(State.IDLE):
 205                 with m.If(self.d_validblip & ~exc.happened):
 206                     comb += self.busy.eq(1)
 207                     sync += self.state.eq(State.ACK_WAIT)
 208                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 209                 with m.Else():
 210                     sync += ldst_r.eq(0)
 211
 212             # waiting for completion
 213             with m.Case(State.ACK_WAIT):
 214                 comb += self.busy.eq(~exc.happened)
 215
 216                 with m.If(d_in.error):
 217                     # cache error is not necessarily "final", it could
 218                     # be that it was just a TLB miss
 219                     with m.If(d_in.cache_paradox):
 220                         comb += exception.eq(1)
 221                         sync += self.state.eq(State.IDLE)
 222                         sync += ldst_r.eq(0)
 223                         sync += self.dsisr[63 - 38].eq(~self.load)
 224                         # XXX there is no architected bit for this
 225                         # (probably should be a machine check in fact)
 226                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 227
 228                     with m.Else():
 229                         # Look up the translation for TLB miss
 230                         # and also for permission error and RC error
 231                         # in case the PTE has been updated.
 232                         comb += mmureq.eq(1)
 233                         sync += self.state.eq(State.MMU_LOOKUP)
 234                 with m.If(d_in.valid):
 235                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 236                     with m.If(self.done):
 237                         sync += Display("ACK_WAIT, done %x", self.addr)
 238                     sync += self.state.eq(State.IDLE)
 239                     sync += ldst_r.eq(0)
 240                     with m.If(self.load):
 241                         m.d.comb += self.load_data.eq(d_in.data)
 242
 243             # waiting here for the MMU TLB lookup to complete.
 244             # either re-try the dcache lookup or throw MMU exception
 245             with m.Case(State.MMU_LOOKUP):
 246                 comb += self.busy.eq(1)
 247                 with m.If(m_in.done):
 248                     with m.If(~self.instr_fault):
 249                         sync += Display("MMU_LOOKUP, done %x -> %x",
 250                                         self.addr, d_out.addr)
 251                         # retry the request now that the MMU has
 252                         # installed a TLB entry, if not exception raised
 253                         m.d.comb += self.d_out.valid.eq(~exception)
 254                         sync += self.state.eq(State.ACK_WAIT)
 255                         sync += ldst_r.eq(0)
 256                     with m.Else():
 257                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 258                         # instruction lookup fault: store address in DAR
 259                         comb += exc.happened.eq(1)
 260                         sync += self.dar.eq(self.addr)
 261
 262                 with m.If(m_in.err):
 263                     # MMU RADIX exception thrown
 264                     comb += exception.eq(1)
 265                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 266                     sync += self.dsisr[63 - 36].eq(m_in.perm_error)
 267                     sync += self.dsisr[63 - 38].eq(self.load)
 268                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 269                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 270
 271             with m.Case(State.TLBIE_WAIT):
 272                 pass
 273
 274         # alignment error: store address in DAR
 275         with m.If(self.align_intr):
 276             comb += exc.happened.eq(1)
 277             sync += self.dar.eq(self.addr)
 278
 279         # happened, alignment, instr_fault, invalid.
 280         # note that all of these flow through - eventually to the TRAP
 281         # pipeline, via PowerDecoder2.
 282         comb += exc.invalid.eq(m_in.invalid)
 283         comb += exc.alignment.eq(self.align_intr)
 284         comb += exc.instr_fault.eq(self.instr_fault)
 285         # badtree, perm_error, rc_error, segment_fault
 286         comb += exc.badtree.eq(m_in.badtree)
 287         comb += exc.perm_error.eq(m_in.perm_error)
 288         comb += exc.rc_error.eq(m_in.rc_error)
 289         comb += exc.segment_fault.eq(m_in.segerr)
 290
 291         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 292         comb += dbus.adr.eq(dcache.wb_out.adr)
 293         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 294         comb += dbus.sel.eq(dcache.wb_out.sel)
 295         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 296         comb += dbus.stb.eq(dcache.wb_out.stb)
 297         comb += dbus.we.eq(dcache.wb_out.we)
 298
 299         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 300         comb += dcache.wb_in.ack.eq(dbus.ack)
 301         if hasattr(dbus, "stall"):
 302             comb += dcache.wb_in.stall.eq(dbus.stall)
 303
 304         # update out d data when flag set
 305         with m.If(self.d_w_valid):
 306             m.d.sync += d_out.data.eq(self.store_data)
 307         #with m.Else():
 308         #    m.d.sync += d_out.data.eq(0)
 309         # unit test passes with that change
 310
 311         # this must move into the FSM, conditionally noticing that
 312         # the "blip" comes from self.d_validblip.
 313         # task 1: look up in dcache
 314         # task 2: if dcache fails, look up in MMU.
 315         # do **NOT** confuse the two.
 316         with m.If(self.d_validblip):
 317             m.d.comb += self.d_out.valid.eq(~exc.happened)
 318             m.d.comb += d_out.load.eq(self.req.load)
 319             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 320             m.d.comb += self.addr.eq(self.req.addr)
 321             m.d.comb += d_out.nc.eq(self.req.nc)
 322             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 323             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 324             m.d.comb += self.align_intr.eq(self.req.align_intr)
 325             #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
 326             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 327         with m.Else():
 328             m.d.comb += d_out.load.eq(ldst_r.load)
 329             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 330             m.d.comb += self.addr.eq(ldst_r.addr)
 331             m.d.comb += d_out.nc.eq(ldst_r.nc)
 332             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 333             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 334             m.d.comb += self.align_intr.eq(ldst_r.align_intr)
 335             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
 336             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 337
 338         # XXX these should be possible to remove but for some reason
 339         # cannot be... yet. TODO, investigate
 340         m.d.comb += self.load_data.eq(d_in.data)
 341         m.d.comb += d_out.addr.eq(self.addr)
 342
 343         # Update outputs to MMU
 344         m.d.comb += m_out.valid.eq(mmureq)
 345         m.d.comb += m_out.iside.eq(self.instr_fault)
 346         m.d.comb += m_out.load.eq(ldst_r.load)
 347         # m_out.priv <= r.priv_mode; TODO
 348         m.d.comb += m_out.tlbie.eq(self.tlbie)
 349         # m_out.mtspr <= mmu_mtspr; # TODO
 350         # m_out.sprn <= sprn; # TODO
 351         m.d.comb += m_out.addr.eq(maddr)
 352         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 353         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 354
 355         return m
 356
 357     def ports(self):
 358         yield from super().ports()
 359         # TODO: memory ports
 360
 361
 362 class TestSRAMLoadStore1(LoadStore1):
 363     def __init__(self, pspec):
 364         super().__init__(pspec)
 365         pspec = self.pspec
 366         # small 32-entry Memory
 367         if (hasattr(pspec, "dmem_test_depth") and
 368                 isinstance(pspec.dmem_test_depth, int)):
 369             depth = pspec.dmem_test_depth
 370         else:
 371             depth = 32
 372         print("TestSRAMBareLoadStoreUnit depth", depth)
 373
 374         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 375
 376     def elaborate(self, platform):
 377         m = super().elaborate(platform)
 378         comb = m.d.comb
 379         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 380                                         features={'cti', 'bte', 'err'})
 381         dbus = self.dbus
 382
 383         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 384         # note: SRAM is a target (slave), dbus is initiator (master)
 385         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 386         fanins = ['dat_r', 'ack', 'err']
 387         for fanout in fanouts:
 388             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 389                   getattr(dbus, fanout).shape())
 390             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 391             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 392         for fanin in fanins:
 393             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 394         # connect address
 395         comb += sram.bus.adr.eq(dbus.adr)
 396
 397         return m
 398