src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.icache import ICache
  29 from soc.experiment.pimem import PortInterfaceBase
  30 from soc.experiment.mem_types import LoadStore1ToMMUType
  31 from soc.experiment.mem_types import MMUToLoadStore1Type
  32
  33 from soc.minerva.wishbone import make_wb_layout
  34 from soc.bus.sram import SRAM
  35 from nmutil.util import Display
  36
  37
  38 @unique
  39 class State(Enum):
  40     IDLE = 0       # ready for instruction
  41     ACK_WAIT = 1   # waiting for ack from dcache
  42     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.raddr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.mode_32bit    = Signal() # XXX UNUSED AT PRESENT
  60         self.align_intr    = Signal()
  61
  62
  63 # glue logic for microwatt mmu and dcache
  64 class LoadStore1(PortInterfaceBase):
  65     def __init__(self, pspec):
  66         self.pspec = pspec
  67         self.disable_cache = (hasattr(pspec, "disable_cache") and
  68                               pspec.disable_cache == True)
  69         regwid = pspec.reg_wid
  70         addrwid = pspec.addr_wid
  71
  72         super().__init__(regwid, addrwid)
  73         self.dcache = DCache()
  74         self.icache = ICache(pspec)
  75         # these names are from the perspective of here (LoadStore1)
  76         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  77         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  78         self.i_out  = self.icache.i_in     # in to icache is out for LoadStore
  79         self.i_in = self.icache.i_out      # out from icache is in for LoadStore
  80         self.m_out  = LoadStore1ToMMUType("m_out") # out *to* MMU
  81         self.m_in = MMUToLoadStore1Type("m_in")   # in *from* MMU
  82         self.req = LDSTRequest(name="ldst_req")
  83
  84         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  85         self.dbus = Record(make_wb_layout(pspec))
  86         self.ibus = Record(make_wb_layout(pspec))
  87
  88         # for creating a single clock blip to DCache
  89         self.d_valid = Signal()
  90         self.d_w_valid = Signal()
  91         self.d_validblip = Signal()
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         self.done_delay    = Signal()
  96         # latch most of the input request
  97         self.load          = Signal()
  98         self.tlbie         = Signal()
  99         self.dcbz          = Signal()
 100         self.raddr          = Signal(64)
 101         self.maddr          = Signal(64)
 102         self.store_data    = Signal(64)
 103         self.load_data     = Signal(64)
 104         self.load_data_delay = Signal(64)
 105         self.byte_sel      = Signal(8)
 106         #self.xerc         : xer_common_t;
 107         #self.reserve       = Signal()
 108         #self.atomic        = Signal()
 109         #self.atomic_last   = Signal()
 110         #self.rc            = Signal()
 111         self.nc            = Signal()              # non-cacheable access
 112         self.virt_mode     = Signal()
 113         self.priv_mode     = Signal()
 114         self.mode_32bit    = Signal() # XXX UNUSED AT PRESENT
 115         self.state         = Signal(State)
 116         self.instr_fault   = Signal()  # indicator to request i-cache MMU lookup
 117         self.r_instr_fault  = Signal() # accessed in external_busy
 118         self.align_intr    = Signal()
 119         self.busy          = Signal()
 120         self.wait_dcache   = Signal()
 121         self.wait_mmu      = Signal()
 122         #self.intr_vec     : integer range 0 to 16#fff#;
 123         #self.nia           = Signal(64)
 124         #self.srr1          = Signal(16)
 125         # use these to set the dsisr or dar respectively
 126         self.mmu_set_spr    = Signal()
 127         self.mmu_set_dsisr  = Signal()
 128         self.mmu_set_dar    = Signal()
 129         self.sprval_in      = Signal(64)
 130
 131         # ONLY access these read-only, do NOT attempt to change
 132         self.dsisr          = Signal(32)
 133         self.dar            = Signal(64)
 134
 135     # when external_busy set, do not allow PortInterface to proceed
 136     def external_busy(self, m):
 137         return self.instr_fault | self.r_instr_fault
 138
 139     def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
 140         m.d.comb += self.req.load.eq(0) # store operation
 141         m.d.comb += self.req.byte_sel.eq(mask)
 142         m.d.comb += self.req.raddr.eq(addr)
 143         m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem  ==> priv
 144         m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
 145         m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
 146         m.d.comb += self.req.dcbz.eq(is_dcbz)
 147         # XXX TODO sort out misalignment, mmu test5 fails
 148         m.d.comb += self.req.align_intr.eq(misalign)
 149
 150         # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
 151
 152         # option to disable the cache entirely for write
 153         if self.disable_cache:
 154             m.d.comb += self.req.nc.eq(1)
 155
 156         # dcbz cannot do no-cache
 157         with m.If(is_dcbz & self.req.nc):
 158             m.d.comb += self.req.align_intr.eq(1)
 159
 160         return None
 161
 162     def set_rd_addr(self, m, addr, mask, misalign, msr):
 163         m.d.comb += self.d_valid.eq(1)
 164         m.d.comb += self.req.load.eq(1) # load operation
 165         m.d.comb += self.req.byte_sel.eq(mask)
 166         m.d.comb += self.req.raddr.eq(addr)
 167         m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem  ==> priv
 168         m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
 169         m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
 170         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 171         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 172         with m.If(addr[28:] == Const(0xc, 4)):
 173             m.d.comb += self.req.nc.eq(1)
 174         # option to disable the cache entirely for read
 175         if self.disable_cache:
 176             m.d.comb += self.req.nc.eq(1)
 177         # XXX TODO sort out misalignment, mmu test5 fails
 178         m.d.comb += self.req.align_intr.eq(misalign)
 179         return None #FIXME return value
 180
 181     def set_wr_data(self, m, data, wen):
 182         # do the "blip" on write data
 183         m.d.comb += self.d_valid.eq(1)
 184         # put data into comb which is picked up in main elaborate()
 185         m.d.comb += self.d_w_valid.eq(1)
 186         m.d.comb += self.store_data.eq(data)
 187         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 188         st_ok = self.done # TODO indicates write data is valid
 189         return st_ok
 190
 191     def get_rd_data(self, m):
 192         ld_ok = self.done_delay # indicates read data is valid
 193         data = self.load_data_delay   # actual read data
 194         return data, ld_ok
 195
 196     def elaborate(self, platform):
 197         m = super().elaborate(platform)
 198         comb, sync = m.d.comb, m.d.sync
 199
 200         # microwatt takes one more cycle before next operation can be issued
 201         sync += self.done_delay.eq(self.done)
 202         sync += self.load_data_delay.eq(self.load_data)
 203
 204         # create dcache and icache module
 205         m.submodules.dcache = dcache = self.dcache
 206         m.submodules.icache = icache = self.icache
 207
 208         # temp vars
 209         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 210         i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
 211         m_out, m_in = self.m_out, self.m_in
 212         exc = self.pi.exc_o
 213         exception = exc.happened
 214         mmureq = Signal()
 215
 216         # copy of address, but gets over-ridden for instr_fault
 217         maddr = Signal(64)
 218         m.d.comb += maddr.eq(self.raddr)
 219
 220         # create a blip (single pulse) on valid read/write request
 221         # this can be over-ridden in the FSM to get dcache to re-run
 222         # a request when MMU_LOOKUP completes.
 223         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 224         ldst_r = LDSTRequest("ldst_r")
 225         comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
 226
 227         # fsm skeleton
 228         with m.Switch(self.state):
 229             with m.Case(State.IDLE):
 230                 with m.If((self.d_validblip | self.instr_fault) &
 231                           ~exc.happened):
 232                     comb += self.busy.eq(1)
 233                     sync += self.state.eq(State.ACK_WAIT)
 234                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 235 #                   sync += Display("validblip self.req.virt_mode=%i",
 236 #                   self.req.virt_mode)
 237                     with m.If(self.instr_fault):
 238                         comb += mmureq.eq(1)
 239                         sync += self.r_instr_fault.eq(1)
 240                         comb += maddr.eq(self.maddr)
 241                         sync += self.state.eq(State.MMU_LOOKUP)
 242                     with m.Else():
 243                         sync += self.r_instr_fault.eq(0)
 244                 with m.Else():
 245                     sync += ldst_r.eq(0)
 246
 247             # waiting for completion
 248             with m.Case(State.ACK_WAIT):
 249                 comb += Display("MMUTEST: ACK_WAIT")
 250                 comb += self.busy.eq(~exc.happened)
 251
 252                 with m.If(d_in.error):
 253                     # cache error is not necessarily "final", it could
 254                     # be that it was just a TLB miss
 255                     with m.If(d_in.cache_paradox):
 256                         comb += exception.eq(1)
 257                         sync += self.state.eq(State.IDLE)
 258                         sync += ldst_r.eq(0)
 259                         sync += Display("cache error -> update dsisr")
 260                         sync += self.dsisr[63 - 38].eq(~ldst_r.load)
 261                         # XXX there is no architected bit for this
 262                         # (probably should be a machine check in fact)
 263                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 264                         sync += self.r_instr_fault.eq(0)
 265
 266                     with m.Else():
 267                         # Look up the translation for TLB miss
 268                         # and also for permission error and RC error
 269                         # in case the PTE has been updated.
 270                         comb += mmureq.eq(1)
 271                         sync += self.state.eq(State.MMU_LOOKUP)
 272                 with m.If(d_in.valid):
 273                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 274                     with m.If(self.done):
 275                         sync += Display("ACK_WAIT, done %x", self.raddr)
 276                     sync += self.state.eq(State.IDLE)
 277                     sync += ldst_r.eq(0)
 278                     with m.If(self.load):
 279                         m.d.comb += self.load_data.eq(d_in.data)
 280
 281             # waiting here for the MMU TLB lookup to complete.
 282             # either re-try the dcache lookup or throw MMU exception
 283             with m.Case(State.MMU_LOOKUP):
 284                 comb += self.busy.eq(~exception)
 285                 with m.If(m_in.done):
 286                     with m.If(~self.r_instr_fault):
 287                         sync += Display("MMU_LOOKUP, done %x -> %x",
 288                                         self.raddr, d_out.addr)
 289                         # retry the request now that the MMU has
 290                         # installed a TLB entry, if not exception raised
 291                         m.d.comb += self.d_out.valid.eq(~exception)
 292                         sync += self.state.eq(State.ACK_WAIT)
 293                         sync += ldst_r.eq(0)
 294                     with m.Else():
 295                         sync += self.state.eq(State.IDLE)
 296                         sync += self.r_instr_fault.eq(0)
 297                         comb += self.done.eq(1)
 298
 299                 with m.If(m_in.err):
 300                     # MMU RADIX exception thrown. XXX
 301                     # TODO: critical that the write here has to
 302                     # notify the MMU FSM of the change to dsisr
 303                     comb += exception.eq(1)
 304                     comb += self.done.eq(1)
 305                     sync += Display("MMU RADIX exception thrown")
 306                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 307                     sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
 308                     sync += self.dsisr[63 - 38].eq(~ldst_r.load)
 309                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 310                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 311                     sync += self.state.eq(State.IDLE)
 312                     # exception thrown, clear out instruction fault state
 313                     sync += self.r_instr_fault.eq(0)
 314
 315         # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
 316         with m.If(self.mmu_set_spr):
 317             with m.If(self.mmu_set_dsisr):
 318                 sync += self.dsisr.eq(self.sprval_in)
 319             with m.If(self.mmu_set_dar):
 320                 sync += self.dar.eq(self.sprval_in)
 321
 322         # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
 323         with m.If(self.align_intr):
 324             comb += exc.happened.eq(1)
 325         # check for updating DAR
 326         with m.If(exception):
 327             sync += Display("exception %x", self.raddr)
 328             # alignment error: store address in DAR
 329             with m.If(self.align_intr):
 330                 sync += Display("alignment error: addr in DAR %x", self.raddr)
 331                 sync += self.dar.eq(self.raddr)
 332             with m.Elif(~self.r_instr_fault):
 333                 sync += Display("not instr fault, addr in DAR %x", self.raddr)
 334                 sync += self.dar.eq(self.raddr)
 335
 336         # when done or exception, return to idle state
 337         with m.If(self.done | exception):
 338             sync += self.state.eq(State.IDLE)
 339             comb += self.busy.eq(0)
 340
 341         # happened, alignment, instr_fault, invalid.
 342         # note that all of these flow through - eventually to the TRAP
 343         # pipeline, via PowerDecoder2.
 344         comb += self.align_intr.eq(self.req.align_intr)
 345         comb += exc.invalid.eq(m_in.invalid)
 346         comb += exc.alignment.eq(self.align_intr)
 347         comb += exc.instr_fault.eq(self.r_instr_fault)
 348         # badtree, perm_error, rc_error, segment_fault
 349         comb += exc.badtree.eq(m_in.badtree)
 350         comb += exc.perm_error.eq(m_in.perm_error)
 351         comb += exc.rc_error.eq(m_in.rc_error)
 352         comb += exc.segment_fault.eq(m_in.segerr)
 353
 354         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 355         comb += dbus.adr.eq(dcache.bus.adr)
 356         comb += dbus.dat_w.eq(dcache.bus.dat_w)
 357         comb += dbus.sel.eq(dcache.bus.sel)
 358         comb += dbus.cyc.eq(dcache.bus.cyc)
 359         comb += dbus.stb.eq(dcache.bus.stb)
 360         comb += dbus.we.eq(dcache.bus.we)
 361
 362         comb += dcache.bus.dat_r.eq(dbus.dat_r)
 363         comb += dcache.bus.ack.eq(dbus.ack)
 364         if hasattr(dbus, "stall"):
 365             comb += dcache.bus.stall.eq(dbus.stall)
 366
 367         # update out d data when flag set
 368         with m.If(self.d_w_valid):
 369             m.d.sync += d_out.data.eq(self.store_data)
 370         #with m.Else():
 371         #    m.d.sync += d_out.data.eq(0)
 372         # unit test passes with that change
 373
 374         # this must move into the FSM, conditionally noticing that
 375         # the "blip" comes from self.d_validblip.
 376         # task 1: look up in dcache
 377         # task 2: if dcache fails, look up in MMU.
 378         # do **NOT** confuse the two.
 379         with m.If(self.d_validblip):
 380             m.d.comb += self.d_out.valid.eq(~exc.happened)
 381             m.d.comb += d_out.load.eq(self.req.load)
 382             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 383             m.d.comb += self.raddr.eq(self.req.raddr)
 384             m.d.comb += d_out.nc.eq(self.req.nc)
 385             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 386             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 387             #m.d.comb += Display("validblip dcbz=%i addr=%x",
 388             #self.req.dcbz,self.req.addr)
 389             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 390         with m.Else():
 391             m.d.comb += d_out.load.eq(ldst_r.load)
 392             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 393             m.d.comb += self.raddr.eq(ldst_r.raddr)
 394             m.d.comb += d_out.nc.eq(ldst_r.nc)
 395             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 396             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 397             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
 398             #ldst_r.dcbz,ldst_r.addr)
 399             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 400
 401         # XXX these should be possible to remove but for some reason
 402         # cannot be... yet. TODO, investigate
 403         m.d.comb += self.load_data.eq(d_in.data)
 404         m.d.comb += d_out.addr.eq(self.raddr)
 405
 406         # Update outputs to MMU
 407         m.d.comb += m_out.valid.eq(mmureq)
 408         m.d.comb += m_out.iside.eq(self.instr_fault)
 409         m.d.comb += m_out.load.eq(ldst_r.load)
 410         m.d.comb += m_out.priv.eq(self.priv_mode)
 411         # m_out.priv <= r.priv_mode; TODO
 412         m.d.comb += m_out.tlbie.eq(self.tlbie)
 413         # m_out.mtspr <= mmu_mtspr; # TODO
 414         # m_out.sprn <= sprn; # TODO
 415         m.d.comb += m_out.addr.eq(maddr)
 416         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 417         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 418
 419         return m
 420
 421     def ports(self):
 422         yield from super().ports()
 423         # TODO: memory ports
 424
 425
 426 class TestSRAMLoadStore1(LoadStore1):
 427     def __init__(self, pspec):
 428         super().__init__(pspec)
 429         pspec = self.pspec
 430         # small 32-entry Memory
 431         if (hasattr(pspec, "dmem_test_depth") and
 432                 isinstance(pspec.dmem_test_depth, int)):
 433             depth = pspec.dmem_test_depth
 434         else:
 435             depth = 32
 436         print("TestSRAMBareLoadStoreUnit depth", depth)
 437
 438         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 439
 440     def elaborate(self, platform):
 441         m = super().elaborate(platform)
 442         comb = m.d.comb
 443         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 444                                         features={'cti', 'bte', 'err'})
 445         dbus = self.dbus
 446
 447         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 448         # note: SRAM is a target (slave), dbus is initiator (master)
 449         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 450         fanins = ['dat_r', 'ack', 'err']
 451         for fanout in fanouts:
 452             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 453                   getattr(dbus, fanout).shape())
 454             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 455             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 456         for fanin in fanins:
 457             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 458         # connect address
 459         comb += sram.bus.adr.eq(dbus.adr)
 460
 461         return m
 462