src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.icache import ICache
  29 from soc.experiment.pimem import PortInterfaceBase
  30 from soc.experiment.mem_types import LoadStore1ToMMUType
  31 from soc.experiment.mem_types import MMUToLoadStore1Type
  32
  33 from soc.minerva.wishbone import make_wb_layout
  34 from soc.bus.sram import SRAM
  35 from nmutil.util import Display
  36
  37
  38 @unique
  39 class State(Enum):
  40     IDLE = 0       # ready for instruction
  41     ACK_WAIT = 1   # waiting for ack from dcache
  42     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  43     SECOND_REQ = 3 # second request for unaligned transfer
  44
  45 @unique
  46 class Misalign(Enum):
  47     ONEWORD = 0    # only one word needed, all good
  48     NEED2WORDS = 1 # need to send/receive two words
  49     WAITFIRST = 2  # waiting for the first word
  50     WAITSECOND = 3 # waiting for the second word
  51
  52
  53 # captures the LDSTRequest from the PortInterface, which "blips" most
  54 # of this at us (pipeline-style).
  55 class LDSTRequest(RecordObject):
  56     def __init__(self, name=None):
  57         RecordObject.__init__(self, name=name)
  58
  59         self.load          = Signal()
  60         self.dcbz          = Signal()
  61         self.raddr          = Signal(64)
  62         # self.store_data    = Signal(64) # this is already sync (on a delay)
  63         self.byte_sel      = Signal(16)
  64         self.nc            = Signal()              # non-cacheable access
  65         self.virt_mode     = Signal()
  66         self.priv_mode     = Signal()
  67         self.mode_32bit    = Signal() # XXX UNUSED AT PRESENT
  68         self.alignstate    = Signal(Misalign) # progress of alignment request
  69         self.align_intr    = Signal()
  70
  71
  72 # glue logic for microwatt mmu and dcache
  73 class LoadStore1(PortInterfaceBase):
  74     def __init__(self, pspec):
  75         self.pspec = pspec
  76         self.disable_cache = (hasattr(pspec, "disable_cache") and
  77                               pspec.disable_cache == True)
  78         regwid = pspec.reg_wid
  79         addrwid = pspec.addr_wid
  80
  81         super().__init__(regwid, addrwid)
  82         self.dcache = DCache(pspec)
  83         self.icache = ICache(pspec)
  84         # these names are from the perspective of here (LoadStore1)
  85         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  86         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  87         self.i_out  = self.icache.i_in     # in to icache is out for LoadStore
  88         self.i_in = self.icache.i_out      # out from icache is in for LoadStore
  89         self.m_out  = LoadStore1ToMMUType("m_out") # out *to* MMU
  90         self.m_in = MMUToLoadStore1Type("m_in")   # in *from* MMU
  91         self.req = LDSTRequest(name="ldst_req")
  92
  93         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  94         self.dbus = Record(make_wb_layout(pspec))
  95         self.ibus = Record(make_wb_layout(pspec))
  96
  97         # for creating a single clock blip to DCache
  98         self.d_valid = Signal()
  99         self.d_w_valid = Signal()
 100         self.d_validblip = Signal()
 101
 102         # state info for LD/ST
 103         self.done          = Signal()
 104         self.done_delay    = Signal()
 105         # latch most of the input request
 106         self.load          = Signal()
 107         self.tlbie         = Signal()
 108         self.dcbz          = Signal()
 109         self.raddr          = Signal(64)
 110         self.maddr          = Signal(64)
 111         self.store_data    = Signal(128)   # 128-bit to cope with
 112         self.load_data     = Signal(128)   # misalignment
 113         self.load_data_delay = Signal(128) # perform 2 LD/STs
 114         self.byte_sel      = Signal(16)    # also for misaligned, 16-bit
 115         self.alignstate    = Signal(Misalign) # progress of alignment request
 116         #self.xerc         : xer_common_t;
 117         #self.reserve       = Signal()
 118         #self.atomic        = Signal()
 119         #self.atomic_last   = Signal()
 120         #self.rc            = Signal()
 121         self.nc            = Signal()              # non-cacheable access
 122         self.virt_mode     = Signal()
 123         self.priv_mode     = Signal()
 124         self.mode_32bit    = Signal() # XXX UNUSED AT PRESENT
 125         self.state         = Signal(State)
 126         self.instr_fault   = Signal()  # indicator to request i-cache MMU lookup
 127         self.r_instr_fault  = Signal() # accessed in external_busy
 128         self.align_intr    = Signal()
 129         self.busy          = Signal()
 130         self.wait_dcache   = Signal()
 131         self.wait_mmu      = Signal()
 132         #self.intr_vec     : integer range 0 to 16#fff#;
 133         #self.nia           = Signal(64)
 134         #self.srr1          = Signal(16)
 135         # use these to set the dsisr or dar respectively
 136         self.mmu_set_spr    = Signal()
 137         self.mmu_set_dsisr  = Signal()
 138         self.mmu_set_dar    = Signal()
 139         self.sprval_in      = Signal(64)
 140
 141         # ONLY access these read-only, do NOT attempt to change
 142         self.dsisr          = Signal(32)
 143         self.dar            = Signal(64)
 144
 145     # when external_busy set, do not allow PortInterface to proceed
 146     def external_busy(self, m):
 147         return self.instr_fault | self.r_instr_fault
 148
 149     def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
 150         m.d.comb += self.req.load.eq(0) # store operation
 151         m.d.comb += self.req.byte_sel.eq(mask)
 152         m.d.comb += self.req.raddr.eq(addr)
 153         m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem  ==> priv
 154         m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
 155         m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
 156         m.d.comb += self.req.dcbz.eq(is_dcbz)
 157         with m.If(misalign):
 158             m.d.comb += self.req.alignstate.eq(Misalign.NEED2WORDS)
 159
 160         # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
 161
 162         # option to disable the cache entirely for write
 163         if self.disable_cache:
 164             m.d.comb += self.req.nc.eq(1)
 165
 166         # dcbz cannot do no-cache
 167         with m.If(is_dcbz & self.req.nc):
 168             m.d.comb += self.req.align_intr.eq(1)
 169
 170         return None
 171
 172     def set_rd_addr(self, m, addr, mask, misalign, msr):
 173         m.d.comb += self.d_valid.eq(1)
 174         m.d.comb += self.req.load.eq(1) # load operation
 175         m.d.comb += self.req.byte_sel.eq(mask)
 176         m.d.comb += self.req.raddr.eq(addr)
 177         m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem  ==> priv
 178         m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
 179         m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
 180         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 181         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 182         with m.If(addr[28:] == Const(0xc, 4)):
 183             m.d.comb += self.req.nc.eq(1)
 184         # option to disable the cache entirely for read
 185         if self.disable_cache:
 186             m.d.comb += self.req.nc.eq(1)
 187         with m.If(misalign):
 188             m.d.comb += self.req.alignstate.eq(Misalign.NEED2WORDS)
 189         return None #FIXME return value
 190
 191     def set_wr_data(self, m, data, wen):
 192         # do the "blip" on write data
 193         m.d.comb += self.d_valid.eq(1)
 194         # put data into comb which is picked up in main elaborate()
 195         m.d.comb += self.d_w_valid.eq(1)
 196         m.d.comb += self.store_data.eq(data)
 197         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 198         st_ok = self.done # TODO indicates write data is valid
 199         return st_ok
 200
 201     def get_rd_data(self, m):
 202         ld_ok = self.done_delay # indicates read data is valid
 203         data = self.load_data_delay   # actual read data
 204         return data, ld_ok
 205
 206     def elaborate(self, platform):
 207         m = super().elaborate(platform)
 208         comb, sync = m.d.comb, m.d.sync
 209
 210         # microwatt takes one more cycle before next operation can be issued
 211         sync += self.done_delay.eq(self.done)
 212         sync += self.load_data_delay.eq(self.load_data)
 213
 214         # create dcache and icache module
 215         m.submodules.dcache = dcache = self.dcache
 216         m.submodules.icache = icache = self.icache
 217
 218         # temp vars
 219         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 220         i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
 221         m_out, m_in = self.m_out, self.m_in
 222         exc = self.pi.exc_o
 223         exception = exc.happened
 224         mmureq = Signal()
 225
 226         # copy of address, but gets over-ridden for instr_fault
 227         maddr = Signal(64)
 228         m.d.comb += maddr.eq(self.raddr)
 229
 230         # create a blip (single pulse) on valid read/write request
 231         # this can be over-ridden in the FSM to get dcache to re-run
 232         # a request when MMU_LOOKUP completes.
 233         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 234         ldst_r = LDSTRequest("ldst_r")
 235         comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
 236
 237         # fsm skeleton
 238         with m.Switch(self.state):
 239             with m.Case(State.IDLE):
 240                 with m.If((self.d_validblip | self.instr_fault) &
 241                           ~exc.happened):
 242                     comb += self.busy.eq(1)
 243                     sync += self.state.eq(State.ACK_WAIT)
 244                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 245 #                   sync += Display("validblip self.req.virt_mode=%i",
 246 #                   self.req.virt_mode)
 247                     with m.If(self.instr_fault):
 248                         comb += mmureq.eq(1)
 249                         sync += self.r_instr_fault.eq(1)
 250                         comb += maddr.eq(self.maddr)
 251                         sync += self.state.eq(State.MMU_LOOKUP)
 252                     with m.Else():
 253                         sync += self.r_instr_fault.eq(0)
 254                     # if the LD/ST requires two dwords, move to waiting
 255                     # for first word
 256                     with m.If(self.req.alignstate == Misalign.NEED2WORDS):
 257                         sync += ldst_r.alignstate.eq(Misalign.WAITFIRST)
 258                 with m.Else():
 259                     sync += ldst_r.eq(0)
 260
 261             # in second request, shuffle the data, indicate this is
 262             # the last dword request, initiate a 2nd request and wait for it
 263             with m.Case(State.SECOND_REQ):
 264                 sync += self.load_data_delay[64:128].eq(self.load_data[0:64])
 265
 266             # waiting for completion
 267             with m.Case(State.ACK_WAIT):
 268                 comb += Display("MMUTEST: ACK_WAIT")
 269                 comb += self.busy.eq(~exc.happened)
 270
 271                 with m.If(d_in.error):
 272                     # cache error is not necessarily "final", it could
 273                     # be that it was just a TLB miss
 274                     with m.If(d_in.cache_paradox):
 275                         comb += exception.eq(1)
 276                         sync += self.state.eq(State.IDLE)
 277                         sync += ldst_r.eq(0)
 278                         sync += Display("cache error -> update dsisr")
 279                         sync += self.dsisr[63 - 38].eq(~ldst_r.load)
 280                         # XXX there is no architected bit for this
 281                         # (probably should be a machine check in fact)
 282                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 283                         sync += self.r_instr_fault.eq(0)
 284
 285                     with m.Else():
 286                         # Look up the translation for TLB miss
 287                         # and also for permission error and RC error
 288                         # in case the PTE has been updated.
 289                         comb += mmureq.eq(1)
 290                         sync += self.state.eq(State.MMU_LOOKUP)
 291                 with m.If(d_in.valid):
 292                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 293                     with m.If(self.done):
 294                         sync += Display("ACK_WAIT, done %x", self.raddr)
 295                     with m.If(self.req.alignstate == Misalign.ONEWORD):
 296                         # done if there is only one dcache operation
 297                         sync += self.state.eq(State.IDLE)
 298                         sync += ldst_r.eq(0)
 299                         with m.If(self.load):
 300                             m.d.comb += self.load_data.eq(d_in.data)
 301                     with m.Elif(self.req.alignstate == Misalign.WAITFIRST):
 302                         # first LD done: load data, initiate 2nd request
 303                         with m.If(self.load):
 304                             m.d.comb += self.load_data[0:63].eq(d_in.data)
 305                         m.d.comb += self.d_validblip.eq(1)
 306                         comb += self.req.eq(ldst_r) # from copy of request
 307                         comb += self.req.raddr.eq(ldst_r.raddr + 8)
 308                         comb += self.req.alignstate.eq(Misalign.WAITSECOND)
 309
 310             # waiting here for the MMU TLB lookup to complete.
 311             # either re-try the dcache lookup or throw MMU exception
 312             with m.Case(State.MMU_LOOKUP):
 313                 comb += self.busy.eq(~exception)
 314                 with m.If(m_in.done):
 315                     with m.If(~self.r_instr_fault):
 316                         sync += Display("MMU_LOOKUP, done %x -> %x",
 317                                         self.raddr, d_out.addr)
 318                         # retry the request now that the MMU has
 319                         # installed a TLB entry, if not exception raised
 320                         m.d.comb += self.d_out.valid.eq(~exception)
 321                         sync += self.state.eq(State.ACK_WAIT)
 322                         sync += ldst_r.eq(0)
 323                     with m.Else():
 324                         sync += self.state.eq(State.IDLE)
 325                         sync += self.r_instr_fault.eq(0)
 326                         comb += self.done.eq(1)
 327
 328                 with m.If(m_in.err):
 329                     # MMU RADIX exception thrown. XXX
 330                     # TODO: critical that the write here has to
 331                     # notify the MMU FSM of the change to dsisr
 332                     comb += exception.eq(1)
 333                     comb += self.done.eq(1)
 334                     sync += Display("MMU RADIX exception thrown")
 335                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 336                     sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
 337                     sync += self.dsisr[63 - 38].eq(~ldst_r.load)
 338                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 339                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 340                     sync += self.state.eq(State.IDLE)
 341                     # exception thrown, clear out instruction fault state
 342                     sync += self.r_instr_fault.eq(0)
 343
 344         # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
 345         with m.If(self.mmu_set_spr):
 346             with m.If(self.mmu_set_dsisr):
 347                 sync += self.dsisr.eq(self.sprval_in)
 348             with m.If(self.mmu_set_dar):
 349                 sync += self.dar.eq(self.sprval_in)
 350
 351         # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
 352         with m.If(self.align_intr):
 353             comb += exc.happened.eq(1)
 354         # check for updating DAR
 355         with m.If(exception):
 356             sync += Display("exception %x", self.raddr)
 357             # alignment error: store address in DAR
 358             with m.If(self.align_intr):
 359                 sync += Display("alignment error: addr in DAR %x", self.raddr)
 360                 sync += self.dar.eq(self.raddr)
 361             with m.Elif(~self.r_instr_fault):
 362                 sync += Display("not instr fault, addr in DAR %x", self.raddr)
 363                 sync += self.dar.eq(self.raddr)
 364
 365         # when done or exception, return to idle state
 366         with m.If(self.done | exception):
 367             sync += self.state.eq(State.IDLE)
 368             comb += self.busy.eq(0)
 369
 370         # happened, alignment, instr_fault, invalid.
 371         # note that all of these flow through - eventually to the TRAP
 372         # pipeline, via PowerDecoder2.
 373         comb += self.align_intr.eq(self.req.align_intr)
 374         comb += exc.invalid.eq(m_in.invalid)
 375         comb += exc.alignment.eq(self.align_intr)
 376         comb += exc.instr_fault.eq(self.r_instr_fault)
 377         # badtree, perm_error, rc_error, segment_fault
 378         comb += exc.badtree.eq(m_in.badtree)
 379         comb += exc.perm_error.eq(m_in.perm_error)
 380         comb += exc.rc_error.eq(m_in.rc_error)
 381         comb += exc.segment_fault.eq(m_in.segerr)
 382
 383         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 384         comb += dbus.adr.eq(dcache.bus.adr)
 385         comb += dbus.dat_w.eq(dcache.bus.dat_w)
 386         comb += dbus.sel.eq(dcache.bus.sel)
 387         comb += dbus.cyc.eq(dcache.bus.cyc)
 388         comb += dbus.stb.eq(dcache.bus.stb)
 389         comb += dbus.we.eq(dcache.bus.we)
 390
 391         comb += dcache.bus.dat_r.eq(dbus.dat_r)
 392         comb += dcache.bus.ack.eq(dbus.ack)
 393         if hasattr(dbus, "stall"):
 394             comb += dcache.bus.stall.eq(dbus.stall)
 395
 396         # update out d data when flag set
 397         with m.If(self.d_w_valid):
 398             with m.If(ldst_r.alignstate == Misalign.WAITSECOND):
 399                 m.d.sync += d_out.data.eq(self.store_data[64:128])
 400             with m.Else():
 401                 m.d.sync += d_out.data.eq(self.store_data[0:64])
 402         #with m.Else():
 403         #    m.d.sync += d_out.data.eq(0)
 404         # unit test passes with that change
 405
 406         # this must move into the FSM, conditionally noticing that
 407         # the "blip" comes from self.d_validblip.
 408         # task 1: look up in dcache
 409         # task 2: if dcache fails, look up in MMU.
 410         # do **NOT** confuse the two.
 411         with m.If(self.d_validblip):
 412             m.d.comb += self.d_out.valid.eq(~exc.happened)
 413             m.d.comb += d_out.load.eq(self.req.load)
 414             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 415             m.d.comb += self.raddr.eq(self.req.raddr)
 416             m.d.comb += d_out.nc.eq(self.req.nc)
 417             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 418             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 419             #m.d.comb += Display("validblip dcbz=%i addr=%x",
 420             #self.req.dcbz,self.req.addr)
 421             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 422         with m.Else():
 423             m.d.comb += d_out.load.eq(ldst_r.load)
 424             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 425             m.d.comb += self.raddr.eq(ldst_r.raddr)
 426             m.d.comb += d_out.nc.eq(ldst_r.nc)
 427             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 428             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 429             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
 430             #ldst_r.dcbz,ldst_r.addr)
 431             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 432
 433         # XXX these should be possible to remove but for some reason
 434         # cannot be... yet. TODO, investigate
 435         m.d.comb += self.load_data.eq(d_in.data)
 436         m.d.comb += d_out.addr.eq(self.raddr)
 437
 438         # Update outputs to MMU
 439         m.d.comb += m_out.valid.eq(mmureq)
 440         m.d.comb += m_out.iside.eq(self.instr_fault)
 441         m.d.comb += m_out.load.eq(ldst_r.load)
 442         m.d.comb += m_out.priv.eq(self.priv_mode)
 443         # m_out.priv <= r.priv_mode; TODO
 444         m.d.comb += m_out.tlbie.eq(self.tlbie)
 445         # m_out.mtspr <= mmu_mtspr; # TODO
 446         # m_out.sprn <= sprn; # TODO
 447         m.d.comb += m_out.addr.eq(maddr)
 448         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 449         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 450
 451         return m
 452
 453     def ports(self):
 454         yield from super().ports()
 455         # TODO: memory ports
 456
 457
 458 class TestSRAMLoadStore1(LoadStore1):
 459     def __init__(self, pspec):
 460         super().__init__(pspec)
 461         pspec = self.pspec
 462         # small 32-entry Memory
 463         if (hasattr(pspec, "dmem_test_depth") and
 464                 isinstance(pspec.dmem_test_depth, int)):
 465             depth = pspec.dmem_test_depth
 466         else:
 467             depth = 32
 468         print("TestSRAMBareLoadStoreUnit depth", depth)
 469
 470         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 471
 472     def elaborate(self, platform):
 473         m = super().elaborate(platform)
 474         comb = m.d.comb
 475         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 476                                         features={'cti', 'bte', 'err'})
 477         dbus = self.dbus
 478
 479         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 480         # note: SRAM is a target (slave), dbus is initiator (master)
 481         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 482         fanins = ['dat_r', 'ack', 'err']
 483         for fanout in fanouts:
 484             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 485                   getattr(dbus, fanout).shape())
 486             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 487             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 488         for fanin in fanins:
 489             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 490         # connect address
 491         comb += sram.bus.adr.eq(dbus.adr)
 492
 493         return m
 494