src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # state info for LD/ST
  88         self.done          = Signal()
  89         self.done_delay    = Signal()
  90         # latch most of the input request
  91         self.load          = Signal()
  92         self.tlbie         = Signal()
  93         self.dcbz          = Signal()
  94         self.addr          = Signal(64)
  95         self.store_data    = Signal(64)
  96         self.load_data     = Signal(64)
  97         self.load_data_delay = Signal(64)
  98         self.byte_sel      = Signal(8)
  99         #self.xerc         : xer_common_t;
 100         #self.reserve       = Signal()
 101         #self.atomic        = Signal()
 102         #self.atomic_last   = Signal()
 103         #self.rc            = Signal()
 104         self.nc            = Signal()              # non-cacheable access
 105         self.virt_mode     = Signal()
 106         self.priv_mode     = Signal()
 107         self.state        = Signal(State)
 108         self.instr_fault   = Signal()
 109         self.align_intr    = Signal()
 110         self.busy          = Signal()
 111         self.wait_dcache   = Signal()
 112         self.wait_mmu      = Signal()
 113         #self.mode_32bit    = Signal()
 114         #self.intr_vec     : integer range 0 to 16#fff#;
 115         #self.nia           = Signal(64)
 116         #self.srr1          = Signal(16)
 117         # use these to set the dsisr or dar respectively
 118         self.mmu_set_spr    = Signal()
 119         self.mmu_set_dsisr  = Signal()
 120         self.mmu_set_dar    = Signal()
 121         self.sprval_in      = Signal(64)
 122
 123     def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
 124         m.d.comb += self.req.load.eq(0) # store operation
 125         m.d.comb += self.req.byte_sel.eq(mask)
 126         m.d.comb += self.req.addr.eq(addr)
 127         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 128         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 129         m.d.comb += self.req.align_intr.eq(misalign)
 130         m.d.comb += self.req.dcbz.eq(is_dcbz)
 131
 132         # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
 133
 134         # option to disable the cache entirely for write
 135         if self.disable_cache:
 136             m.d.comb += self.req.nc.eq(1)
 137         return None
 138
 139     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 140         m.d.comb += self.d_valid.eq(1)
 141         m.d.comb += self.req.load.eq(1) # load operation
 142         m.d.comb += self.req.byte_sel.eq(mask)
 143         m.d.comb += self.req.align_intr.eq(misalign)
 144         m.d.comb += self.req.addr.eq(addr)
 145         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 146         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 147         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 148         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 149         with m.If(addr[28:] == Const(0xc, 4)):
 150             m.d.comb += self.req.nc.eq(1)
 151         # option to disable the cache entirely for read
 152         if self.disable_cache:
 153             m.d.comb += self.req.nc.eq(1)
 154         return None #FIXME return value
 155
 156     def set_wr_data(self, m, data, wen):
 157         # do the "blip" on write data
 158         m.d.comb += self.d_valid.eq(1)
 159         # put data into comb which is picked up in main elaborate()
 160         m.d.comb += self.d_w_valid.eq(1)
 161         m.d.comb += self.store_data.eq(data)
 162         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 163         st_ok = self.done # TODO indicates write data is valid
 164         return st_ok
 165
 166     def get_rd_data(self, m):
 167         ld_ok = self.done_delay # indicates read data is valid
 168         data = self.load_data_delay   # actual read data
 169         return data, ld_ok
 170
 171     def elaborate(self, platform):
 172         m = super().elaborate(platform)
 173         comb, sync = m.d.comb, m.d.sync
 174
 175         # microwatt takes one more cycle before next operation can be issued
 176         sync += self.done_delay.eq(self.done)
 177         sync += self.load_data_delay.eq(self.load_data)
 178
 179         # create dcache module
 180         m.submodules.dcache = dcache = self.dcache
 181
 182         # temp vars
 183         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 184         m_out, m_in = self.m_out, self.m_in
 185         exc = self.pi.exc_o
 186         exception = exc.happened
 187         mmureq = Signal()
 188
 189         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 190         maddr = Signal(64)
 191         m.d.comb += maddr.eq(self.addr)
 192
 193         # DO NOT access these directly, they are internal
 194         dsisr          = Signal(32)
 195         dar            = Signal(64)
 196
 197         # create a blip (single pulse) on valid read/write request
 198         # this can be over-ridden in the FSM to get dcache to re-run
 199         # a request when MMU_LOOKUP completes.
 200         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 201         ldst_r = LDSTRequest("ldst_r")
 202         comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
 203
 204         # fsm skeleton
 205         with m.Switch(self.state):
 206             with m.Case(State.IDLE):
 207                 with m.If(self.d_validblip & ~exc.happened):
 208                     comb += self.busy.eq(1)
 209                     sync += self.state.eq(State.ACK_WAIT)
 210                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 211 #                   sync += Display("validblip self.req.virt_mode=%i",
 212 #                   self.req.virt_mode)
 213                 with m.Else():
 214                     sync += ldst_r.eq(0)
 215
 216             # waiting for completion
 217             with m.Case(State.ACK_WAIT):
 218                 comb += Display("MMUTEST: ACK_WAIT")
 219                 comb += self.busy.eq(~exc.happened)
 220
 221                 with m.If(d_in.error):
 222                     # cache error is not necessarily "final", it could
 223                     # be that it was just a TLB miss
 224                     with m.If(d_in.cache_paradox):
 225                         comb += exception.eq(1)
 226                         sync += self.state.eq(State.IDLE)
 227                         sync += ldst_r.eq(0)
 228                         sync += Display("cache error -> update dsisr")
 229                         sync += dsisr[63 - 38].eq(~self.load)
 230                         # XXX there is no architected bit for this
 231                         # (probably should be a machine check in fact)
 232                         sync += dsisr[63 - 35].eq(d_in.cache_paradox)
 233
 234                     with m.Else():
 235                         # Look up the translation for TLB miss
 236                         # and also for permission error and RC error
 237                         # in case the PTE has been updated.
 238                         comb += mmureq.eq(1)
 239                         sync += self.state.eq(State.MMU_LOOKUP)
 240                 with m.If(d_in.valid):
 241                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 242                     with m.If(self.done):
 243                         sync += Display("ACK_WAIT, done %x", self.addr)
 244                     sync += self.state.eq(State.IDLE)
 245                     sync += ldst_r.eq(0)
 246                     with m.If(self.load):
 247                         m.d.comb += self.load_data.eq(d_in.data)
 248
 249             # waiting here for the MMU TLB lookup to complete.
 250             # either re-try the dcache lookup or throw MMU exception
 251             with m.Case(State.MMU_LOOKUP):
 252                 comb += self.busy.eq(~exception)
 253                 with m.If(m_in.done):
 254                     with m.If(~self.instr_fault):
 255                         sync += Display("MMU_LOOKUP, done %x -> %x",
 256                                         self.addr, d_out.addr)
 257                         # retry the request now that the MMU has
 258                         # installed a TLB entry, if not exception raised
 259                         m.d.comb += self.d_out.valid.eq(~exception)
 260                         sync += self.state.eq(State.ACK_WAIT)
 261                         sync += ldst_r.eq(0)
 262                     with m.Else():
 263                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 264                         # instruction lookup fault: store address in DAR
 265                         comb += exc.happened.eq(1) # reason = MMU_LOOKUP
 266                         # mark dar as updated ?
 267                         sync += dar.eq(self.addr)
 268                         sync += self.state.eq(State.IDLE)
 269
 270                 with m.If(m_in.err):
 271                     # MMU RADIX exception thrown. XXX
 272                     # TODO: critical that the write here has to
 273                     # notify the MMU FSM of the change to dsisr
 274                     comb += exception.eq(1)
 275                     sync += Display("MMU RADIX exception thrown")
 276                     sync += Display("TODO: notify MMU of change to dsisr")
 277                     sync += dsisr[63 - 33].eq(m_in.invalid)
 278                     sync += dsisr[63 - 36].eq(m_in.perm_error) # noexec fault
 279                     sync += dsisr[63 - 38].eq(~self.load)
 280                     sync += dsisr[63 - 44].eq(m_in.badtree)
 281                     sync += dsisr[63 - 45].eq(m_in.rc_error)
 282                     sync += self.state.eq(State.IDLE)
 283
 284             with m.Case(State.TLBIE_WAIT):
 285                 pass
 286
 287         # MMU FSM communicating a request to update dsisr or dar
 288         # (from OP_MTSPR)
 289         with m.If(self.mmu_set_spr):
 290             with m.If(self.mmu_set_dsisr):
 291                 sync += dsisr.eq(self.sprval_in)
 292             with m.If(self.mmu_set_dar):
 293                 sync += dar.eq(self.sprval_in)
 294
 295         # alignment error: store address in DAR
 296         with m.If(self.align_intr):
 297             comb += exc.happened.eq(1) # reason = alignment
 298             sync += Display("alignment error: addr in DAR %x", self.addr)
 299             sync += dar.eq(self.addr)
 300
 301         # when done or exception, return to idle state
 302         with m.If(self.done | exception):
 303             sync += self.state.eq(State.IDLE)
 304             comb += self.busy.eq(0)
 305
 306         # happened, alignment, instr_fault, invalid.
 307         # note that all of these flow through - eventually to the TRAP
 308         # pipeline, via PowerDecoder2.
 309         comb += self.align_intr.eq(self.req.align_intr)
 310         comb += exc.invalid.eq(m_in.invalid)
 311         comb += exc.alignment.eq(self.align_intr)
 312         comb += exc.instr_fault.eq(self.instr_fault)
 313         # badtree, perm_error, rc_error, segment_fault
 314         comb += exc.badtree.eq(m_in.badtree)
 315         comb += exc.perm_error.eq(m_in.perm_error)
 316         comb += exc.rc_error.eq(m_in.rc_error)
 317         comb += exc.segment_fault.eq(m_in.segerr)
 318
 319         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 320         comb += dbus.adr.eq(dcache.wb_out.adr)
 321         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 322         comb += dbus.sel.eq(dcache.wb_out.sel)
 323         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 324         comb += dbus.stb.eq(dcache.wb_out.stb)
 325         comb += dbus.we.eq(dcache.wb_out.we)
 326
 327         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 328         comb += dcache.wb_in.ack.eq(dbus.ack)
 329         if hasattr(dbus, "stall"):
 330             comb += dcache.wb_in.stall.eq(dbus.stall)
 331
 332         # update out d data when flag set
 333         with m.If(self.d_w_valid):
 334             m.d.sync += d_out.data.eq(self.store_data)
 335         #with m.Else():
 336         #    m.d.sync += d_out.data.eq(0)
 337         # unit test passes with that change
 338
 339         # this must move into the FSM, conditionally noticing that
 340         # the "blip" comes from self.d_validblip.
 341         # task 1: look up in dcache
 342         # task 2: if dcache fails, look up in MMU.
 343         # do **NOT** confuse the two.
 344         with m.If(self.d_validblip):
 345             m.d.comb += self.d_out.valid.eq(~exc.happened)
 346             m.d.comb += d_out.load.eq(self.req.load)
 347             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 348             m.d.comb += self.addr.eq(self.req.addr)
 349             m.d.comb += d_out.nc.eq(self.req.nc)
 350             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 351             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 352             #m.d.comb += Display("validblip dcbz=%i addr=%x",
 353             #self.req.dcbz,self.req.addr)
 354             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 355         with m.Else():
 356             m.d.comb += d_out.load.eq(ldst_r.load)
 357             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 358             m.d.comb += self.addr.eq(ldst_r.addr)
 359             m.d.comb += d_out.nc.eq(ldst_r.nc)
 360             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 361             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 362             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
 363             #ldst_r.dcbz,ldst_r.addr)
 364             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 365
 366         # XXX these should be possible to remove but for some reason
 367         # cannot be... yet. TODO, investigate
 368         m.d.comb += self.load_data.eq(d_in.data)
 369         m.d.comb += d_out.addr.eq(self.addr)
 370
 371         # Update outputs to MMU
 372         m.d.comb += m_out.valid.eq(mmureq)
 373         m.d.comb += m_out.iside.eq(self.instr_fault)
 374         m.d.comb += m_out.load.eq(ldst_r.load)
 375         # m_out.priv <= r.priv_mode; TODO
 376         m.d.comb += m_out.tlbie.eq(self.tlbie)
 377         # m_out.mtspr <= mmu_mtspr; # TODO
 378         # m_out.sprn <= sprn; # TODO
 379         m.d.comb += m_out.addr.eq(maddr)
 380         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 381         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 382
 383         return m
 384
 385     def ports(self):
 386         yield from super().ports()
 387         # TODO: memory ports
 388
 389
 390 class TestSRAMLoadStore1(LoadStore1):
 391     def __init__(self, pspec):
 392         super().__init__(pspec)
 393         pspec = self.pspec
 394         # small 32-entry Memory
 395         if (hasattr(pspec, "dmem_test_depth") and
 396                 isinstance(pspec.dmem_test_depth, int)):
 397             depth = pspec.dmem_test_depth
 398         else:
 399             depth = 32
 400         print("TestSRAMBareLoadStoreUnit depth", depth)
 401
 402         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 403
 404     def elaborate(self, platform):
 405         m = super().elaborate(platform)
 406         comb = m.d.comb
 407         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 408                                         features={'cti', 'bte', 'err'})
 409         dbus = self.dbus
 410
 411         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 412         # note: SRAM is a target (slave), dbus is initiator (master)
 413         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 414         fanins = ['dat_r', 'ack', 'err']
 415         for fanout in fanouts:
 416             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 417                   getattr(dbus, fanout).shape())
 418             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 419             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 420         for fanin in fanins:
 421             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 422         # connect address
 423         comb += sram.bus.adr.eq(dbus.adr)
 424
 425         return m
 426