src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # DSISR and DAR cached values.  note that the MMU FSM is where
  88         # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
  89         # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
  90         self.dsisr = Signal(64)
  91         self.dar = Signal(64)
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         # latch most of the input request
  96         self.load          = Signal()
  97         self.tlbie         = Signal()
  98         self.dcbz          = Signal()
  99         self.addr          = Signal(64)
 100         self.store_data    = Signal(64)
 101         self.load_data     = Signal(64)
 102         self.byte_sel      = Signal(8)
 103         #self.xerc         : xer_common_t;
 104         #self.reserve       = Signal()
 105         #self.atomic        = Signal()
 106         #self.atomic_last   = Signal()
 107         #self.rc            = Signal()
 108         self.nc            = Signal()              # non-cacheable access
 109         self.virt_mode     = Signal()
 110         self.priv_mode     = Signal()
 111         self.state        = Signal(State)
 112         self.instr_fault   = Signal()
 113         self.align_intr    = Signal()
 114         self.busy          = Signal()
 115         self.wait_dcache   = Signal()
 116         self.wait_mmu      = Signal()
 117         #self.mode_32bit    = Signal()
 118         #self.intr_vec     : integer range 0 to 16#fff#;
 119         #self.nia           = Signal(64)
 120         #self.srr1          = Signal(16)
 121
 122     def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
 123         m.d.comb += self.req.load.eq(0) # store operation
 124         m.d.comb += self.req.byte_sel.eq(mask)
 125         m.d.comb += self.req.addr.eq(addr)
 126         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 127         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 128         m.d.comb += self.req.align_intr.eq(misalign)
 129         # option to disable the cache entirely for write
 130         if self.disable_cache:
 131             m.d.comb += self.req.nc.eq(1)
 132         return None
 133
 134     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 135         m.d.comb += self.d_out.valid.eq(self.d_validblip)
 136         m.d.comb += self.d_valid.eq(1)
 137         m.d.comb += self.req.load.eq(1) # load operation
 138         m.d.comb += self.req.byte_sel.eq(mask)
 139         m.d.comb += self.req.align_intr.eq(misalign)
 140         m.d.comb += self.req.addr.eq(addr)
 141         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 142         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 143         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 144         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 145         with m.If(addr[28:] == Const(0xc, 4)):
 146             m.d.comb += self.req.nc.eq(1)
 147         # option to disable the cache entirely for read
 148         if self.disable_cache:
 149             m.d.comb += self.req.nc.eq(1)
 150         return None #FIXME return value
 151
 152     def set_wr_data(self, m, data, wen):
 153         # do the "blip" on write data
 154         m.d.comb += self.d_out.valid.eq(self.d_validblip)
 155         m.d.comb += self.d_valid.eq(1)
 156         # put data into comb which is picked up in main elaborate()
 157         m.d.comb += self.d_w_valid.eq(1)
 158         m.d.comb += self.store_data.eq(data)
 159         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 160         st_ok = self.done # TODO indicates write data is valid
 161         return st_ok
 162
 163     def get_rd_data(self, m):
 164         ld_ok = self.done     # indicates read data is valid
 165         data = self.load_data # actual read data
 166         return data, ld_ok
 167
 168     def elaborate(self, platform):
 169         m = super().elaborate(platform)
 170         comb, sync = m.d.comb, m.d.sync
 171
 172         # create dcache module
 173         m.submodules.dcache = dcache = self.dcache
 174
 175         # temp vars
 176         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 177         m_out, m_in = self.m_out, self.m_in
 178         exc = self.pi.exc_o
 179         exception = exc.happened
 180         mmureq = Signal()
 181
 182         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 183         maddr = Signal(64)
 184         m.d.comb += maddr.eq(self.addr)
 185
 186         # create a blip (single pulse) on valid read/write request
 187         # this can be over-ridden in the FSM to get dcache to re-run
 188         # a request when MMU_LOOKUP completes.
 189         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 190         ldst_r = LDSTRequest("ldst_r")
 191         with m.If(self.d_validblip):
 192             sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 193
 194         # fsm skeleton
 195         with m.Switch(self.state):
 196             with m.Case(State.IDLE):
 197                 with m.If(self.d_validblip):
 198                     comb += self.busy.eq(1)
 199                     sync += self.state.eq(State.ACK_WAIT)
 200
 201             # waiting for completion
 202             with m.Case(State.ACK_WAIT):
 203                 comb += self.busy.eq(1)
 204
 205                 with m.If(d_in.error):
 206                     # cache error is not necessarily "final", it could
 207                     # be that it was just a TLB miss
 208                     with m.If(d_in.cache_paradox):
 209                         comb += exception.eq(1)
 210                         sync += self.state.eq(State.IDLE)
 211                         sync += self.dsisr[63 - 38].eq(~self.load)
 212                         # XXX there is no architected bit for this
 213                         # (probably should be a machine check in fact)
 214                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 215
 216                     with m.Else():
 217                         # Look up the translation for TLB miss
 218                         # and also for permission error and RC error
 219                         # in case the PTE has been updated.
 220                         comb += mmureq.eq(1)
 221                         sync += self.state.eq(State.MMU_LOOKUP)
 222                 with m.If(d_in.valid):
 223                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 224                     with m.If(self.done):
 225                         sync += Display("ACK_WAIT, done %x", self.addr)
 226                     sync += self.state.eq(State.IDLE)
 227                     with m.If(self.load):
 228                         m.d.comb += self.load_data.eq(d_in.data)
 229
 230             # waiting here for the MMU TLB lookup to complete.
 231             # either re-try the dcache lookup or throw MMU exception
 232             with m.Case(State.MMU_LOOKUP):
 233                 comb += self.busy.eq(1)
 234                 with m.If(m_in.done):
 235                     with m.If(~self.instr_fault):
 236                         sync += Display("MMU_LOOKUP, done %x -> %x",
 237                                         self.addr, d_out.addr)
 238                         # retry the request now that the MMU has
 239                         # installed a TLB entry, if not exception raised
 240                         m.d.comb += self.d_out.valid.eq(~exception)
 241                         sync += self.state.eq(State.ACK_WAIT)
 242                     with m.Else():
 243                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 244                         # instruction lookup fault: store address in DAR
 245                         comb += exc.happened.eq(1)
 246                         sync += self.dar.eq(self.addr)
 247
 248                 with m.If(m_in.err):
 249                     # MMU RADIX exception thrown
 250                     comb += exception.eq(1)
 251                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 252                     sync += self.dsisr[63 - 36].eq(m_in.perm_error)
 253                     sync += self.dsisr[63 - 38].eq(self.load)
 254                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 255                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 256
 257             with m.Case(State.TLBIE_WAIT):
 258                 pass
 259
 260         # alignment error: store address in DAR
 261         with m.If(self.align_intr):
 262             comb += exc.happened.eq(1)
 263             sync += self.dar.eq(self.addr)
 264
 265         # happened, alignment, instr_fault, invalid.
 266         # note that all of these flow through - eventually to the TRAP
 267         # pipeline, via PowerDecoder2.
 268         comb += exc.invalid.eq(m_in.invalid)
 269         comb += exc.alignment.eq(self.align_intr)
 270         comb += exc.instr_fault.eq(self.instr_fault)
 271         # badtree, perm_error, rc_error, segment_fault
 272         comb += exc.badtree.eq(m_in.badtree)
 273         comb += exc.perm_error.eq(m_in.perm_error)
 274         comb += exc.rc_error.eq(m_in.rc_error)
 275         comb += exc.segment_fault.eq(m_in.segerr)
 276
 277         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 278         comb += dbus.adr.eq(dcache.wb_out.adr)
 279         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 280         comb += dbus.sel.eq(dcache.wb_out.sel)
 281         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 282         comb += dbus.stb.eq(dcache.wb_out.stb)
 283         comb += dbus.we.eq(dcache.wb_out.we)
 284
 285         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 286         comb += dcache.wb_in.ack.eq(dbus.ack)
 287         if hasattr(dbus, "stall"):
 288             comb += dcache.wb_in.stall.eq(dbus.stall)
 289
 290         # write out d data only when flag set
 291         with m.If(self.d_w_valid):
 292             m.d.sync += d_out.data.eq(self.store_data)
 293         with m.Else():
 294             m.d.sync += d_out.data.eq(0)
 295
 296         # this must move into the FSM, conditionally noticing that
 297         # the "blip" comes from self.d_validblip.
 298         # task 1: look up in dcache
 299         # task 2: if dcache fails, look up in MMU.
 300         # do **NOT** confuse the two.
 301         with m.If(self.d_validblip):
 302             m.d.comb += d_out.load.eq(self.req.load)
 303             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 304             m.d.comb += self.addr.eq(self.req.addr)
 305             m.d.comb += d_out.nc.eq(self.req.nc)
 306             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 307             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 308         with m.Else():
 309             m.d.comb += d_out.load.eq(ldst_r.load)
 310             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 311             m.d.comb += self.addr.eq(ldst_r.addr)
 312             m.d.comb += d_out.nc.eq(ldst_r.nc)
 313             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 314             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 315
 316         # XXX these should be possible to remove but for some reason
 317         # cannot be... yet. TODO, investigate
 318         m.d.comb += self.load_data.eq(d_in.data)
 319         m.d.comb += d_out.addr.eq(self.addr)
 320
 321         # Update outputs to MMU
 322         m.d.comb += m_out.valid.eq(mmureq)
 323         m.d.comb += m_out.iside.eq(self.instr_fault)
 324         m.d.comb += m_out.load.eq(self.load)
 325         # m_out.priv <= r.priv_mode; TODO
 326         m.d.comb += m_out.tlbie.eq(self.tlbie)
 327         # m_out.mtspr <= mmu_mtspr; # TODO
 328         # m_out.sprn <= sprn; # TODO
 329         m.d.comb += m_out.addr.eq(maddr)
 330         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 331         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 332
 333         return m
 334
 335     def ports(self):
 336         yield from super().ports()
 337         # TODO: memory ports
 338
 339
 340 class TestSRAMLoadStore1(LoadStore1):
 341     def __init__(self, pspec):
 342         super().__init__(pspec)
 343         pspec = self.pspec
 344         # small 32-entry Memory
 345         if (hasattr(pspec, "dmem_test_depth") and
 346                 isinstance(pspec.dmem_test_depth, int)):
 347             depth = pspec.dmem_test_depth
 348         else:
 349             depth = 32
 350         print("TestSRAMBareLoadStoreUnit depth", depth)
 351
 352         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 353
 354     def elaborate(self, platform):
 355         m = super().elaborate(platform)
 356         comb = m.d.comb
 357         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 358                                         features={'cti', 'bte', 'err'})
 359         dbus = self.dbus
 360
 361         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 362         # note: SRAM is a target (slave), dbus is initiator (master)
 363         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 364         fanins = ['dat_r', 'ack', 'err']
 365         for fanout in fanouts:
 366             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 367                   getattr(dbus, fanout).shape())
 368             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 369             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 370         for fanin in fanins:
 371             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 372         # connect address
 373         comb += sram.bus.adr.eq(dbus.adr)
 374
 375         return m
 376