src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # DSISR and DAR cached values.  note that the MMU FSM is where
  88         # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
  89         # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
  90         self.dsisr = Signal(64)
  91         self.dar = Signal(64)
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         # latch most of the input request
  96         self.load          = Signal()
  97         self.tlbie         = Signal()
  98         self.dcbz          = Signal()
  99         self.addr          = Signal(64)
 100         self.store_data    = Signal(64)
 101         self.load_data     = Signal(64)
 102         self.byte_sel      = Signal(8)
 103         #self.xerc         : xer_common_t;
 104         #self.reserve       = Signal()
 105         #self.atomic        = Signal()
 106         #self.atomic_last   = Signal()
 107         #self.rc            = Signal()
 108         self.nc            = Signal()              # non-cacheable access
 109         self.virt_mode     = Signal()
 110         self.priv_mode     = Signal()
 111         self.state        = Signal(State)
 112         self.instr_fault   = Signal()
 113         self.align_intr    = Signal()
 114         self.busy          = Signal()
 115         self.wait_dcache   = Signal()
 116         self.wait_mmu      = Signal()
 117         #self.mode_32bit    = Signal()
 118         #self.intr_vec     : integer range 0 to 16#fff#;
 119         #self.nia           = Signal(64)
 120         #self.srr1          = Signal(16)
 121
 122     def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
 123         m.d.comb += self.req.load.eq(0) # store operation
 124         m.d.comb += self.req.byte_sel.eq(mask)
 125         m.d.comb += self.req.addr.eq(addr)
 126         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 127         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 128         m.d.comb += self.req.align_intr.eq(misalign)
 129
 130         dcbz = self.pi.is_dcbz
 131         with m.If(dcbz):
 132             m.d.comb += Display("set_wr_addr: is_dcbz")
 133         m.d.comb += self.req.dcbz.eq(dcbz)
 134
 135         # option to disable the cache entirely for write
 136         if self.disable_cache:
 137             m.d.comb += self.req.nc.eq(1)
 138         return None
 139
 140     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 141         m.d.comb += self.d_valid.eq(1)
 142         m.d.comb += self.req.load.eq(1) # load operation
 143         m.d.comb += self.req.byte_sel.eq(mask)
 144         m.d.comb += self.req.align_intr.eq(misalign)
 145         m.d.comb += self.req.addr.eq(addr)
 146         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 147         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 148         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 149         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 150         with m.If(addr[28:] == Const(0xc, 4)):
 151             m.d.comb += self.req.nc.eq(1)
 152         # option to disable the cache entirely for read
 153         if self.disable_cache:
 154             m.d.comb += self.req.nc.eq(1)
 155         return None #FIXME return value
 156
 157     def set_wr_data(self, m, data, wen):
 158         # do the "blip" on write data
 159         m.d.comb += self.d_valid.eq(1)
 160         # put data into comb which is picked up in main elaborate()
 161         m.d.comb += self.d_w_valid.eq(1)
 162         m.d.comb += self.store_data.eq(data)
 163         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 164         st_ok = self.done # TODO indicates write data is valid
 165         return st_ok
 166
 167     def get_rd_data(self, m):
 168         ld_ok = self.done     # indicates read data is valid
 169         data = self.load_data # actual read data
 170         return data, ld_ok
 171
 172     def elaborate(self, platform):
 173         m = super().elaborate(platform)
 174         comb, sync = m.d.comb, m.d.sync
 175
 176         # create dcache module
 177         m.submodules.dcache = dcache = self.dcache
 178
 179         # temp vars
 180         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 181         m_out, m_in = self.m_out, self.m_in
 182         exc = self.pi.exc_o
 183         exception = exc.happened
 184         mmureq = Signal()
 185
 186         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 187         maddr = Signal(64)
 188         m.d.comb += maddr.eq(self.addr)
 189
 190         # create a blip (single pulse) on valid read/write request
 191         # this can be over-ridden in the FSM to get dcache to re-run
 192         # a request when MMU_LOOKUP completes.
 193         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 194         ldst_r = LDSTRequest("ldst_r")
 195
 196         # fsm skeleton
 197         with m.Switch(self.state):
 198             with m.Case(State.IDLE):
 199                 with m.If(self.d_validblip & ~exc.happened):
 200                     comb += self.busy.eq(1)
 201                     sync += self.state.eq(State.ACK_WAIT)
 202                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 203                 with m.Else():
 204                     sync += ldst_r.eq(0)
 205
 206             # waiting for completion
 207             with m.Case(State.ACK_WAIT):
 208                 comb += self.busy.eq(~exc.happened)
 209
 210                 with m.If(d_in.error):
 211                     # cache error is not necessarily "final", it could
 212                     # be that it was just a TLB miss
 213                     with m.If(d_in.cache_paradox):
 214                         comb += exception.eq(1)
 215                         sync += self.state.eq(State.IDLE)
 216                         sync += ldst_r.eq(0)
 217                         sync += self.dsisr[63 - 38].eq(~self.load)
 218                         # XXX there is no architected bit for this
 219                         # (probably should be a machine check in fact)
 220                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 221
 222                     with m.Else():
 223                         # Look up the translation for TLB miss
 224                         # and also for permission error and RC error
 225                         # in case the PTE has been updated.
 226                         comb += mmureq.eq(1)
 227                         sync += self.state.eq(State.MMU_LOOKUP)
 228                 with m.If(d_in.valid):
 229                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 230                     with m.If(self.done):
 231                         sync += Display("ACK_WAIT, done %x", self.addr)
 232                     sync += self.state.eq(State.IDLE)
 233                     sync += ldst_r.eq(0)
 234                     with m.If(self.load):
 235                         m.d.comb += self.load_data.eq(d_in.data)
 236
 237             # waiting here for the MMU TLB lookup to complete.
 238             # either re-try the dcache lookup or throw MMU exception
 239             with m.Case(State.MMU_LOOKUP):
 240                 comb += self.busy.eq(1)
 241                 with m.If(m_in.done):
 242                     with m.If(~self.instr_fault):
 243                         sync += Display("MMU_LOOKUP, done %x -> %x",
 244                                         self.addr, d_out.addr)
 245                         # retry the request now that the MMU has
 246                         # installed a TLB entry, if not exception raised
 247                         m.d.comb += self.d_out.valid.eq(~exception)
 248                         sync += self.state.eq(State.ACK_WAIT)
 249                         sync += ldst_r.eq(0)
 250                     with m.Else():
 251                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 252                         # instruction lookup fault: store address in DAR
 253                         comb += exc.happened.eq(1)
 254                         sync += self.dar.eq(self.addr)
 255
 256                 with m.If(m_in.err):
 257                     # MMU RADIX exception thrown
 258                     comb += exception.eq(1)
 259                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 260                     sync += self.dsisr[63 - 36].eq(m_in.perm_error)
 261                     sync += self.dsisr[63 - 38].eq(self.load)
 262                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 263                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 264
 265             with m.Case(State.TLBIE_WAIT):
 266                 pass
 267
 268         # alignment error: store address in DAR
 269         with m.If(self.align_intr):
 270             comb += exc.happened.eq(1)
 271             sync += self.dar.eq(self.addr)
 272
 273         # happened, alignment, instr_fault, invalid.
 274         # note that all of these flow through - eventually to the TRAP
 275         # pipeline, via PowerDecoder2.
 276         comb += exc.invalid.eq(m_in.invalid)
 277         comb += exc.alignment.eq(self.align_intr)
 278         comb += exc.instr_fault.eq(self.instr_fault)
 279         # badtree, perm_error, rc_error, segment_fault
 280         comb += exc.badtree.eq(m_in.badtree)
 281         comb += exc.perm_error.eq(m_in.perm_error)
 282         comb += exc.rc_error.eq(m_in.rc_error)
 283         comb += exc.segment_fault.eq(m_in.segerr)
 284
 285         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 286         comb += dbus.adr.eq(dcache.wb_out.adr)
 287         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 288         comb += dbus.sel.eq(dcache.wb_out.sel)
 289         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 290         comb += dbus.stb.eq(dcache.wb_out.stb)
 291         comb += dbus.we.eq(dcache.wb_out.we)
 292
 293         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 294         comb += dcache.wb_in.ack.eq(dbus.ack)
 295         if hasattr(dbus, "stall"):
 296             comb += dcache.wb_in.stall.eq(dbus.stall)
 297
 298         # update out d data when flag set
 299         with m.If(self.d_w_valid):
 300             m.d.sync += d_out.data.eq(self.store_data)
 301         #with m.Else():
 302         #    m.d.sync += d_out.data.eq(0)
 303         # unit test passes with that change
 304
 305         # this must move into the FSM, conditionally noticing that
 306         # the "blip" comes from self.d_validblip.
 307         # task 1: look up in dcache
 308         # task 2: if dcache fails, look up in MMU.
 309         # do **NOT** confuse the two.
 310         with m.If(self.d_validblip):
 311             m.d.comb += self.d_out.valid.eq(~exc.happened)
 312             m.d.comb += d_out.load.eq(self.req.load)
 313             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 314             m.d.comb += self.addr.eq(self.req.addr)
 315             m.d.comb += d_out.nc.eq(self.req.nc)
 316             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 317             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 318             m.d.comb += self.align_intr.eq(self.req.align_intr)
 319             #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
 320             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 321         with m.Else():
 322             m.d.comb += d_out.load.eq(ldst_r.load)
 323             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 324             m.d.comb += self.addr.eq(ldst_r.addr)
 325             m.d.comb += d_out.nc.eq(ldst_r.nc)
 326             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 327             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 328             m.d.comb += self.align_intr.eq(ldst_r.align_intr)
 329             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
 330             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 331
 332         # XXX these should be possible to remove but for some reason
 333         # cannot be... yet. TODO, investigate
 334         m.d.comb += self.load_data.eq(d_in.data)
 335         m.d.comb += d_out.addr.eq(self.addr)
 336
 337         # Update outputs to MMU
 338         m.d.comb += m_out.valid.eq(mmureq)
 339         m.d.comb += m_out.iside.eq(self.instr_fault)
 340         m.d.comb += m_out.load.eq(ldst_r.load)
 341         # m_out.priv <= r.priv_mode; TODO
 342         m.d.comb += m_out.tlbie.eq(self.tlbie)
 343         # m_out.mtspr <= mmu_mtspr; # TODO
 344         # m_out.sprn <= sprn; # TODO
 345         m.d.comb += m_out.addr.eq(maddr)
 346         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 347         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 348
 349         return m
 350
 351     def ports(self):
 352         yield from super().ports()
 353         # TODO: memory ports
 354
 355
 356 class TestSRAMLoadStore1(LoadStore1):
 357     def __init__(self, pspec):
 358         super().__init__(pspec)
 359         pspec = self.pspec
 360         # small 32-entry Memory
 361         if (hasattr(pspec, "dmem_test_depth") and
 362                 isinstance(pspec.dmem_test_depth, int)):
 363             depth = pspec.dmem_test_depth
 364         else:
 365             depth = 32
 366         print("TestSRAMBareLoadStoreUnit depth", depth)
 367
 368         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 369
 370     def elaborate(self, platform):
 371         m = super().elaborate(platform)
 372         comb = m.d.comb
 373         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 374                                         features={'cti', 'bte', 'err'})
 375         dbus = self.dbus
 376
 377         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 378         # note: SRAM is a target (slave), dbus is initiator (master)
 379         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 380         fanins = ['dat_r', 'ack', 'err']
 381         for fanout in fanouts:
 382             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 383                   getattr(dbus, fanout).shape())
 384             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 385             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 386         for fanin in fanins:
 387             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 388         # connect address
 389         comb += sram.bus.adr.eq(dbus.adr)
 390
 391         return m
 392