src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.icache import ICache
  29 from soc.experiment.pimem import PortInterfaceBase
  30 from soc.experiment.mem_types import LoadStore1ToMMUType
  31 from soc.experiment.mem_types import MMUToLoadStore1Type
  32
  33 from soc.minerva.wishbone import make_wb_layout
  34 from soc.bus.sram import SRAM
  35 from nmutil.util import Display
  36
  37
  38 @unique
  39 class State(Enum):
  40     IDLE = 0       # ready for instruction
  41     ACK_WAIT = 1   # waiting for ack from dcache
  42     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  43     SECOND_REQ = 3 # second request for unaligned transfer
  44
  45
  46 # captures the LDSTRequest from the PortInterface, which "blips" most
  47 # of this at us (pipeline-style).
  48 class LDSTRequest(RecordObject):
  49     def __init__(self, name=None):
  50         RecordObject.__init__(self, name=name)
  51
  52         self.load          = Signal()
  53         self.dcbz          = Signal()
  54         self.raddr          = Signal(64)
  55         # self.store_data    = Signal(64) # this is already sync (on a delay)
  56         self.byte_sel      = Signal(8)
  57         self.nc            = Signal()              # non-cacheable access
  58         self.virt_mode     = Signal()
  59         self.priv_mode     = Signal()
  60         self.mode_32bit    = Signal() # XXX UNUSED AT PRESENT
  61         self.align_intr    = Signal()
  62
  63
  64 # glue logic for microwatt mmu and dcache
  65 class LoadStore1(PortInterfaceBase):
  66     def __init__(self, pspec):
  67         self.pspec = pspec
  68         self.disable_cache = (hasattr(pspec, "disable_cache") and
  69                               pspec.disable_cache == True)
  70         regwid = pspec.reg_wid
  71         addrwid = pspec.addr_wid
  72
  73         super().__init__(regwid, addrwid)
  74         self.dcache = DCache(pspec)
  75         self.icache = ICache(pspec)
  76         # these names are from the perspective of here (LoadStore1)
  77         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  78         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  79         self.i_out  = self.icache.i_in     # in to icache is out for LoadStore
  80         self.i_in = self.icache.i_out      # out from icache is in for LoadStore
  81         self.m_out  = LoadStore1ToMMUType("m_out") # out *to* MMU
  82         self.m_in = MMUToLoadStore1Type("m_in")   # in *from* MMU
  83         self.req = LDSTRequest(name="ldst_req")
  84
  85         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  86         self.dbus = Record(make_wb_layout(pspec))
  87         self.ibus = Record(make_wb_layout(pspec))
  88
  89         # for creating a single clock blip to DCache
  90         self.d_valid = Signal()
  91         self.d_w_valid = Signal()
  92         self.d_validblip = Signal()
  93
  94         # state info for LD/ST
  95         self.done          = Signal()
  96         self.done_delay    = Signal()
  97         # latch most of the input request
  98         self.load          = Signal()
  99         self.tlbie         = Signal()
 100         self.dcbz          = Signal()
 101         self.raddr          = Signal(64)
 102         self.maddr          = Signal(64)
 103         self.store_data    = Signal(64)
 104         self.load_data     = Signal(64)
 105         self.load_data_delay = Signal(64)
 106         self.byte_sel      = Signal(8)
 107         #self.xerc         : xer_common_t;
 108         #self.reserve       = Signal()
 109         #self.atomic        = Signal()
 110         #self.atomic_last   = Signal()
 111         #self.rc            = Signal()
 112         self.nc            = Signal()              # non-cacheable access
 113         self.virt_mode     = Signal()
 114         self.priv_mode     = Signal()
 115         self.mode_32bit    = Signal() # XXX UNUSED AT PRESENT
 116         self.state         = Signal(State)
 117         self.instr_fault   = Signal()  # indicator to request i-cache MMU lookup
 118         self.r_instr_fault  = Signal() # accessed in external_busy
 119         self.align_intr    = Signal()
 120         self.busy          = Signal()
 121         self.wait_dcache   = Signal()
 122         self.wait_mmu      = Signal()
 123         #self.intr_vec     : integer range 0 to 16#fff#;
 124         #self.nia           = Signal(64)
 125         #self.srr1          = Signal(16)
 126         # use these to set the dsisr or dar respectively
 127         self.mmu_set_spr    = Signal()
 128         self.mmu_set_dsisr  = Signal()
 129         self.mmu_set_dar    = Signal()
 130         self.sprval_in      = Signal(64)
 131
 132         # ONLY access these read-only, do NOT attempt to change
 133         self.dsisr          = Signal(32)
 134         self.dar            = Signal(64)
 135
 136     # when external_busy set, do not allow PortInterface to proceed
 137     def external_busy(self, m):
 138         return self.instr_fault | self.r_instr_fault
 139
 140     def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
 141         m.d.comb += self.req.load.eq(0) # store operation
 142         m.d.comb += self.req.byte_sel.eq(mask)
 143         m.d.comb += self.req.raddr.eq(addr)
 144         m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem  ==> priv
 145         m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
 146         m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
 147         m.d.comb += self.req.dcbz.eq(is_dcbz)
 148         # XXX TODO sort out misalignment, mmu test5 fails
 149         m.d.comb += self.req.align_intr.eq(misalign)
 150
 151         # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
 152
 153         # option to disable the cache entirely for write
 154         if self.disable_cache:
 155             m.d.comb += self.req.nc.eq(1)
 156
 157         # dcbz cannot do no-cache
 158         with m.If(is_dcbz & self.req.nc):
 159             m.d.comb += self.req.align_intr.eq(1)
 160
 161         return None
 162
 163     def set_rd_addr(self, m, addr, mask, misalign, msr):
 164         m.d.comb += self.d_valid.eq(1)
 165         m.d.comb += self.req.load.eq(1) # load operation
 166         m.d.comb += self.req.byte_sel.eq(mask)
 167         m.d.comb += self.req.raddr.eq(addr)
 168         m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem  ==> priv
 169         m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
 170         m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
 171         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 172         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 173         with m.If(addr[28:] == Const(0xc, 4)):
 174             m.d.comb += self.req.nc.eq(1)
 175         # option to disable the cache entirely for read
 176         if self.disable_cache:
 177             m.d.comb += self.req.nc.eq(1)
 178         # XXX TODO sort out misalignment, mmu test5 fails
 179         m.d.comb += self.req.align_intr.eq(misalign)
 180         return None #FIXME return value
 181
 182     def set_wr_data(self, m, data, wen):
 183         # do the "blip" on write data
 184         m.d.comb += self.d_valid.eq(1)
 185         # put data into comb which is picked up in main elaborate()
 186         m.d.comb += self.d_w_valid.eq(1)
 187         m.d.comb += self.store_data.eq(data)
 188         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 189         st_ok = self.done # TODO indicates write data is valid
 190         return st_ok
 191
 192     def get_rd_data(self, m):
 193         ld_ok = self.done_delay # indicates read data is valid
 194         data = self.load_data_delay   # actual read data
 195         return data, ld_ok
 196
 197     def elaborate(self, platform):
 198         m = super().elaborate(platform)
 199         comb, sync = m.d.comb, m.d.sync
 200
 201         # microwatt takes one more cycle before next operation can be issued
 202         sync += self.done_delay.eq(self.done)
 203         sync += self.load_data_delay.eq(self.load_data)
 204
 205         # create dcache and icache module
 206         m.submodules.dcache = dcache = self.dcache
 207         m.submodules.icache = icache = self.icache
 208
 209         # temp vars
 210         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 211         i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
 212         m_out, m_in = self.m_out, self.m_in
 213         exc = self.pi.exc_o
 214         exception = exc.happened
 215         mmureq = Signal()
 216
 217         # copy of address, but gets over-ridden for instr_fault
 218         maddr = Signal(64)
 219         m.d.comb += maddr.eq(self.raddr)
 220
 221         # create a blip (single pulse) on valid read/write request
 222         # this can be over-ridden in the FSM to get dcache to re-run
 223         # a request when MMU_LOOKUP completes.
 224         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 225         ldst_r = LDSTRequest("ldst_r")
 226         comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
 227
 228         # fsm skeleton
 229         with m.Switch(self.state):
 230             with m.Case(State.IDLE):
 231                 with m.If((self.d_validblip | self.instr_fault) &
 232                           ~exc.happened):
 233                     comb += self.busy.eq(1)
 234                     sync += self.state.eq(State.ACK_WAIT)
 235                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 236 #                   sync += Display("validblip self.req.virt_mode=%i",
 237 #                   self.req.virt_mode)
 238                     with m.If(self.instr_fault):
 239                         comb += mmureq.eq(1)
 240                         sync += self.r_instr_fault.eq(1)
 241                         comb += maddr.eq(self.maddr)
 242                         sync += self.state.eq(State.MMU_LOOKUP)
 243                     with m.Else():
 244                         sync += self.r_instr_fault.eq(0)
 245                 with m.Else():
 246                     sync += ldst_r.eq(0)
 247
 248             # waiting for completion
 249             with m.Case(State.ACK_WAIT):
 250                 comb += Display("MMUTEST: ACK_WAIT")
 251                 comb += self.busy.eq(~exc.happened)
 252
 253                 with m.If(d_in.error):
 254                     # cache error is not necessarily "final", it could
 255                     # be that it was just a TLB miss
 256                     with m.If(d_in.cache_paradox):
 257                         comb += exception.eq(1)
 258                         sync += self.state.eq(State.IDLE)
 259                         sync += ldst_r.eq(0)
 260                         sync += Display("cache error -> update dsisr")
 261                         sync += self.dsisr[63 - 38].eq(~ldst_r.load)
 262                         # XXX there is no architected bit for this
 263                         # (probably should be a machine check in fact)
 264                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 265                         sync += self.r_instr_fault.eq(0)
 266
 267                     with m.Else():
 268                         # Look up the translation for TLB miss
 269                         # and also for permission error and RC error
 270                         # in case the PTE has been updated.
 271                         comb += mmureq.eq(1)
 272                         sync += self.state.eq(State.MMU_LOOKUP)
 273                 with m.If(d_in.valid):
 274                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 275                     with m.If(self.done):
 276                         sync += Display("ACK_WAIT, done %x", self.raddr)
 277                     sync += self.state.eq(State.IDLE)
 278                     sync += ldst_r.eq(0)
 279                     with m.If(self.load):
 280                         m.d.comb += self.load_data.eq(d_in.data)
 281
 282             # waiting here for the MMU TLB lookup to complete.
 283             # either re-try the dcache lookup or throw MMU exception
 284             with m.Case(State.MMU_LOOKUP):
 285                 comb += self.busy.eq(~exception)
 286                 with m.If(m_in.done):
 287                     with m.If(~self.r_instr_fault):
 288                         sync += Display("MMU_LOOKUP, done %x -> %x",
 289                                         self.raddr, d_out.addr)
 290                         # retry the request now that the MMU has
 291                         # installed a TLB entry, if not exception raised
 292                         m.d.comb += self.d_out.valid.eq(~exception)
 293                         sync += self.state.eq(State.ACK_WAIT)
 294                         sync += ldst_r.eq(0)
 295                     with m.Else():
 296                         sync += self.state.eq(State.IDLE)
 297                         sync += self.r_instr_fault.eq(0)
 298                         comb += self.done.eq(1)
 299
 300                 with m.If(m_in.err):
 301                     # MMU RADIX exception thrown. XXX
 302                     # TODO: critical that the write here has to
 303                     # notify the MMU FSM of the change to dsisr
 304                     comb += exception.eq(1)
 305                     comb += self.done.eq(1)
 306                     sync += Display("MMU RADIX exception thrown")
 307                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 308                     sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
 309                     sync += self.dsisr[63 - 38].eq(~ldst_r.load)
 310                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 311                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 312                     sync += self.state.eq(State.IDLE)
 313                     # exception thrown, clear out instruction fault state
 314                     sync += self.r_instr_fault.eq(0)
 315
 316         # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
 317         with m.If(self.mmu_set_spr):
 318             with m.If(self.mmu_set_dsisr):
 319                 sync += self.dsisr.eq(self.sprval_in)
 320             with m.If(self.mmu_set_dar):
 321                 sync += self.dar.eq(self.sprval_in)
 322
 323         # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
 324         with m.If(self.align_intr):
 325             comb += exc.happened.eq(1)
 326         # check for updating DAR
 327         with m.If(exception):
 328             sync += Display("exception %x", self.raddr)
 329             # alignment error: store address in DAR
 330             with m.If(self.align_intr):
 331                 sync += Display("alignment error: addr in DAR %x", self.raddr)
 332                 sync += self.dar.eq(self.raddr)
 333             with m.Elif(~self.r_instr_fault):
 334                 sync += Display("not instr fault, addr in DAR %x", self.raddr)
 335                 sync += self.dar.eq(self.raddr)
 336
 337         # when done or exception, return to idle state
 338         with m.If(self.done | exception):
 339             sync += self.state.eq(State.IDLE)
 340             comb += self.busy.eq(0)
 341
 342         # happened, alignment, instr_fault, invalid.
 343         # note that all of these flow through - eventually to the TRAP
 344         # pipeline, via PowerDecoder2.
 345         comb += self.align_intr.eq(self.req.align_intr)
 346         comb += exc.invalid.eq(m_in.invalid)
 347         comb += exc.alignment.eq(self.align_intr)
 348         comb += exc.instr_fault.eq(self.r_instr_fault)
 349         # badtree, perm_error, rc_error, segment_fault
 350         comb += exc.badtree.eq(m_in.badtree)
 351         comb += exc.perm_error.eq(m_in.perm_error)
 352         comb += exc.rc_error.eq(m_in.rc_error)
 353         comb += exc.segment_fault.eq(m_in.segerr)
 354
 355         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 356         comb += dbus.adr.eq(dcache.bus.adr)
 357         comb += dbus.dat_w.eq(dcache.bus.dat_w)
 358         comb += dbus.sel.eq(dcache.bus.sel)
 359         comb += dbus.cyc.eq(dcache.bus.cyc)
 360         comb += dbus.stb.eq(dcache.bus.stb)
 361         comb += dbus.we.eq(dcache.bus.we)
 362
 363         comb += dcache.bus.dat_r.eq(dbus.dat_r)
 364         comb += dcache.bus.ack.eq(dbus.ack)
 365         if hasattr(dbus, "stall"):
 366             comb += dcache.bus.stall.eq(dbus.stall)
 367
 368         # update out d data when flag set
 369         with m.If(self.d_w_valid):
 370             m.d.sync += d_out.data.eq(self.store_data)
 371         #with m.Else():
 372         #    m.d.sync += d_out.data.eq(0)
 373         # unit test passes with that change
 374
 375         # this must move into the FSM, conditionally noticing that
 376         # the "blip" comes from self.d_validblip.
 377         # task 1: look up in dcache
 378         # task 2: if dcache fails, look up in MMU.
 379         # do **NOT** confuse the two.
 380         with m.If(self.d_validblip):
 381             m.d.comb += self.d_out.valid.eq(~exc.happened)
 382             m.d.comb += d_out.load.eq(self.req.load)
 383             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 384             m.d.comb += self.raddr.eq(self.req.raddr)
 385             m.d.comb += d_out.nc.eq(self.req.nc)
 386             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 387             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 388             #m.d.comb += Display("validblip dcbz=%i addr=%x",
 389             #self.req.dcbz,self.req.addr)
 390             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 391         with m.Else():
 392             m.d.comb += d_out.load.eq(ldst_r.load)
 393             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 394             m.d.comb += self.raddr.eq(ldst_r.raddr)
 395             m.d.comb += d_out.nc.eq(ldst_r.nc)
 396             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 397             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 398             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
 399             #ldst_r.dcbz,ldst_r.addr)
 400             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 401
 402         # XXX these should be possible to remove but for some reason
 403         # cannot be... yet. TODO, investigate
 404         m.d.comb += self.load_data.eq(d_in.data)
 405         m.d.comb += d_out.addr.eq(self.raddr)
 406
 407         # Update outputs to MMU
 408         m.d.comb += m_out.valid.eq(mmureq)
 409         m.d.comb += m_out.iside.eq(self.instr_fault)
 410         m.d.comb += m_out.load.eq(ldst_r.load)
 411         m.d.comb += m_out.priv.eq(self.priv_mode)
 412         # m_out.priv <= r.priv_mode; TODO
 413         m.d.comb += m_out.tlbie.eq(self.tlbie)
 414         # m_out.mtspr <= mmu_mtspr; # TODO
 415         # m_out.sprn <= sprn; # TODO
 416         m.d.comb += m_out.addr.eq(maddr)
 417         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 418         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 419
 420         return m
 421
 422     def ports(self):
 423         yield from super().ports()
 424         # TODO: memory ports
 425
 426
 427 class TestSRAMLoadStore1(LoadStore1):
 428     def __init__(self, pspec):
 429         super().__init__(pspec)
 430         pspec = self.pspec
 431         # small 32-entry Memory
 432         if (hasattr(pspec, "dmem_test_depth") and
 433                 isinstance(pspec.dmem_test_depth, int)):
 434             depth = pspec.dmem_test_depth
 435         else:
 436             depth = 32
 437         print("TestSRAMBareLoadStoreUnit depth", depth)
 438
 439         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 440
 441     def elaborate(self, platform):
 442         m = super().elaborate(platform)
 443         comb = m.d.comb
 444         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 445                                         features={'cti', 'bte', 'err'})
 446         dbus = self.dbus
 447
 448         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 449         # note: SRAM is a target (slave), dbus is initiator (master)
 450         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 451         fanins = ['dat_r', 'ack', 'err']
 452         for fanout in fanouts:
 453             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 454                   getattr(dbus, fanout).shape())
 455             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 456             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 457         for fanin in fanins:
 458             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 459         # connect address
 460         comb += sram.bus.adr.eq(dbus.adr)
 461
 462         return m
 463