fix issue with priv_mode not being passed correctly to MMU
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.icache import ICache
29 from soc.experiment.pimem import PortInterfaceBase
30 from soc.experiment.mem_types import LoadStore1ToMMUType
31 from soc.experiment.mem_types import MMUToLoadStore1Type
32
33 from soc.minerva.wishbone import make_wb_layout
34 from soc.bus.sram import SRAM
35 from nmutil.util import Display
36
37
38 @unique
39 class State(Enum):
40 IDLE = 0 # ready for instruction
41 ACK_WAIT = 1 # waiting for ack from dcache
42 MMU_LOOKUP = 2 # waiting for MMU to look up translation
43 #SECOND_REQ = 3 # second request for unaligned transfer
44
45 @unique
46 class Misalign(Enum):
47 ONEWORD = 0 # only one word needed, all good
48 NEED2WORDS = 1 # need to send/receive two words
49 WAITFIRST = 2 # waiting for the first word
50 WAITSECOND = 3 # waiting for the second word
51
52
53 # captures the LDSTRequest from the PortInterface, which "blips" most
54 # of this at us (pipeline-style).
55 class LDSTRequest(RecordObject):
56 def __init__(self, name=None):
57 RecordObject.__init__(self, name=name)
58
59 self.load = Signal()
60 self.dcbz = Signal()
61 self.raddr = Signal(64)
62 # self.store_data = Signal(64) # this is already sync (on a delay)
63 self.byte_sel = Signal(16)
64 self.nc = Signal() # non-cacheable access
65 self.virt_mode = Signal()
66 self.priv_mode = Signal()
67 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
68 self.alignstate = Signal(Misalign) # progress of alignment request
69 self.align_intr = Signal()
70
71
72 # glue logic for microwatt mmu and dcache
73 class LoadStore1(PortInterfaceBase):
74 def __init__(self, pspec):
75 self.pspec = pspec
76 self.disable_cache = (hasattr(pspec, "disable_cache") and
77 pspec.disable_cache == True)
78 regwid = pspec.reg_wid
79 addrwid = pspec.addr_wid
80
81 super().__init__(regwid, addrwid)
82 self.dcache = DCache(pspec)
83 self.icache = ICache(pspec)
84 # these names are from the perspective of here (LoadStore1)
85 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
86 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
87 self.i_out = self.icache.i_in # in to icache is out for LoadStore
88 self.i_in = self.icache.i_out # out from icache is in for LoadStore
89 self.m_out = LoadStore1ToMMUType("m_out") # out *to* MMU
90 self.m_in = MMUToLoadStore1Type("m_in") # in *from* MMU
91 self.req = LDSTRequest(name="ldst_req")
92
93 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
94 self.dbus = Record(make_wb_layout(pspec))
95 self.ibus = Record(make_wb_layout(pspec))
96
97 # for creating a single clock blip to DCache
98 self.d_valid = Signal()
99 self.d_w_valid = Signal()
100 self.d_validblip = Signal()
101
102 # state info for LD/ST
103 self.done = Signal()
104 self.done_delay = Signal()
105 # latch most of the input request
106 self.load = Signal()
107 self.tlbie = Signal()
108 self.dcbz = Signal()
109 self.raddr = Signal(64)
110 self.maddr = Signal(64)
111 self.store_data = Signal(128) # 128-bit to cope with
112 self.load_data = Signal(128) # misalignment
113 self.load_data_delay = Signal(128) # perform 2 LD/STs
114 self.byte_sel = Signal(16) # also for misaligned, 16-bit
115 self.alignstate = Signal(Misalign) # progress of alignment request
116 #self.xerc : xer_common_t;
117 #self.reserve = Signal()
118 #self.atomic = Signal()
119 #self.atomic_last = Signal()
120 #self.rc = Signal()
121 self.nc = Signal() # non-cacheable access
122 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
123 self.state = Signal(State)
124 self.instr_fault = Signal() # indicator to request i-cache MMU lookup
125 self.r_instr_fault = Signal() # accessed in external_busy
126 self.priv_mode = Signal() # only for instruction fetch (not LDST)
127 self.align_intr = Signal()
128 self.busy = Signal()
129 self.wait_dcache = Signal()
130 self.wait_mmu = Signal()
131 #self.intr_vec : integer range 0 to 16#fff#;
132 #self.nia = Signal(64)
133 #self.srr1 = Signal(16)
134 # use these to set the dsisr or dar respectively
135 self.mmu_set_spr = Signal()
136 self.mmu_set_dsisr = Signal()
137 self.mmu_set_dar = Signal()
138 self.sprval_in = Signal(64)
139
140 # ONLY access these read-only, do NOT attempt to change
141 self.dsisr = Signal(32)
142 self.dar = Signal(64)
143
144 # when external_busy set, do not allow PortInterface to proceed
145 def external_busy(self, m):
146 return self.instr_fault | self.r_instr_fault
147
148 def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
149 m.d.comb += self.req.load.eq(0) # store operation
150 m.d.comb += self.req.byte_sel.eq(mask)
151 m.d.comb += self.req.raddr.eq(addr)
152 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
153 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
154 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
155 m.d.comb += self.req.dcbz.eq(is_dcbz)
156 with m.If(misalign):
157 m.d.comb += self.req.alignstate.eq(Misalign.NEED2WORDS)
158
159 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
160
161 # option to disable the cache entirely for write
162 if self.disable_cache:
163 m.d.comb += self.req.nc.eq(1)
164
165 # dcbz cannot do no-cache
166 with m.If(is_dcbz & self.req.nc):
167 m.d.comb += self.req.align_intr.eq(1)
168
169 return None
170
171 def set_rd_addr(self, m, addr, mask, misalign, msr):
172 m.d.comb += self.d_valid.eq(1)
173 m.d.comb += self.req.load.eq(1) # load operation
174 m.d.comb += self.req.byte_sel.eq(mask)
175 m.d.comb += self.req.raddr.eq(addr)
176 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
177 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
178 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
179 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
180 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
181 with m.If(addr[28:] == Const(0xc, 4)):
182 m.d.comb += self.req.nc.eq(1)
183 # option to disable the cache entirely for read
184 if self.disable_cache:
185 m.d.comb += self.req.nc.eq(1)
186 with m.If(misalign):
187 m.d.comb += self.req.alignstate.eq(Misalign.NEED2WORDS)
188 return None #FIXME return value
189
190 def set_wr_data(self, m, data, wen):
191 # do the "blip" on write data
192 m.d.comb += self.d_valid.eq(1)
193 # put data into comb which is picked up in main elaborate()
194 m.d.comb += self.d_w_valid.eq(1)
195 m.d.comb += self.store_data.eq(data)
196 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
197 st_ok = self.done # TODO indicates write data is valid
198 return st_ok
199
200 def get_rd_data(self, m):
201 ld_ok = self.done_delay # indicates read data is valid
202 data = self.load_data_delay # actual read data
203 return data, ld_ok
204
205 def elaborate(self, platform):
206 m = super().elaborate(platform)
207 comb, sync = m.d.comb, m.d.sync
208
209 # microwatt takes one more cycle before next operation can be issued
210 sync += self.done_delay.eq(self.done)
211 #sync += self.load_data_delay[0:64].eq(self.load_data[0:64])
212
213 # create dcache and icache module
214 m.submodules.dcache = dcache = self.dcache
215 m.submodules.icache = icache = self.icache
216
217 # temp vars
218 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
219 i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
220 m_out, m_in = self.m_out, self.m_in
221 exc = self.pi.exc_o
222 exception = exc.happened
223 mmureq = Signal()
224
225 # copy of address, but gets over-ridden for instr_fault
226 maddr = Signal(64)
227 m.d.comb += maddr.eq(self.raddr)
228
229 # create a blip (single pulse) on valid read/write request
230 # this can be over-ridden in the FSM to get dcache to re-run
231 # a request when MMU_LOOKUP completes.
232 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
233 ldst_r = LDSTRequest("ldst_r")
234 sync += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
235
236 # fsm skeleton
237 with m.Switch(self.state):
238 with m.Case(State.IDLE):
239 with m.If((self.d_validblip | self.instr_fault) &
240 ~exc.happened):
241 comb += self.busy.eq(1)
242 sync += self.state.eq(State.ACK_WAIT)
243 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
244 # sync += Display("validblip self.req.virt_mode=%i",
245 # self.req.virt_mode)
246 with m.If(self.instr_fault):
247 comb += mmureq.eq(1)
248 sync += self.r_instr_fault.eq(1)
249 comb += maddr.eq(self.maddr)
250 sync += self.state.eq(State.MMU_LOOKUP)
251 with m.Else():
252 sync += self.r_instr_fault.eq(0)
253 # if the LD/ST requires two dwords, move to waiting
254 # for first word
255 with m.If(self.req.alignstate == Misalign.NEED2WORDS):
256 sync += ldst_r.alignstate.eq(Misalign.WAITFIRST)
257 with m.Else():
258 sync += ldst_r.eq(0)
259
260 # waiting for completion
261 with m.Case(State.ACK_WAIT):
262 sync += Display("MMUTEST: ACK_WAIT")
263 comb += self.busy.eq(~exc.happened)
264
265 with m.If(d_in.error):
266 # cache error is not necessarily "final", it could
267 # be that it was just a TLB miss
268 with m.If(d_in.cache_paradox):
269 comb += exception.eq(1)
270 sync += self.state.eq(State.IDLE)
271 sync += ldst_r.eq(0)
272 sync += Display("cache error -> update dsisr")
273 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
274 # XXX there is no architected bit for this
275 # (probably should be a machine check in fact)
276 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
277 sync += self.r_instr_fault.eq(0)
278
279 with m.Else():
280 # Look up the translation for TLB miss
281 # and also for permission error and RC error
282 # in case the PTE has been updated.
283 comb += mmureq.eq(1)
284 sync += self.state.eq(State.MMU_LOOKUP)
285 with m.If(d_in.valid):
286 with m.If(self.done):
287 sync += Display("ACK_WAIT, done %x", self.raddr)
288 with m.If(ldst_r.alignstate == Misalign.ONEWORD):
289 # done if there is only one dcache operation
290 sync += self.state.eq(State.IDLE)
291 sync += ldst_r.eq(0)
292 with m.If(ldst_r.load):
293 m.d.comb += self.load_data.eq(d_in.data)
294 sync += self.load_data_delay[0:64].eq(d_in.data)
295 m.d.comb += self.done.eq(~mmureq) # done if not MMU
296 with m.Elif(ldst_r.alignstate == Misalign.WAITFIRST):
297 # first LD done: load data, initiate 2nd request.
298 # leave in ACK_WAIT state
299 with m.If(ldst_r.load):
300 m.d.comb += self.load_data[0:63].eq(d_in.data)
301 sync += self.load_data_delay[0:64].eq(d_in.data)
302 # mmm kinda cheating, make a 2nd blip
303 m.d.comb += self.d_validblip.eq(1)
304 comb += self.req.eq(ldst_r) # from copy of request
305 comb += self.req.raddr.eq(ldst_r.raddr + 8)
306 comb += self.req.byte_sel.eq(ldst_r.byte_sel[8:])
307 comb += self.req.alignstate.eq(Misalign.WAITSECOND)
308 sync += ldst_r.raddr.eq(ldst_r.raddr + 8)
309 sync += ldst_r.byte_sel.eq(ldst_r.byte_sel[8:])
310 sync += ldst_r.alignstate.eq(Misalign.WAITSECOND)
311 sync += Display(" second req %x", self.req.raddr)
312 with m.Elif(ldst_r.alignstate == Misalign.WAITSECOND):
313 sync += Display(" done second %x", d_in.data)
314 # done second load
315 sync += self.state.eq(State.IDLE)
316 sync += ldst_r.eq(0)
317 with m.If(ldst_r.load):
318 m.d.comb += self.load_data[64:128].eq(d_in.data)
319 sync += self.load_data_delay[64:128].eq(d_in.data)
320 m.d.comb += self.done.eq(~mmureq) # done if not MMU
321
322 # waiting here for the MMU TLB lookup to complete.
323 # either re-try the dcache lookup or throw MMU exception
324 with m.Case(State.MMU_LOOKUP):
325 comb += self.busy.eq(~exception)
326 with m.If(m_in.done):
327 with m.If(~self.r_instr_fault):
328 sync += Display("MMU_LOOKUP, done %x -> %x",
329 self.raddr, d_out.addr)
330 # retry the request now that the MMU has
331 # installed a TLB entry, if not exception raised
332 m.d.comb += self.d_out.valid.eq(~exception)
333 sync += self.state.eq(State.ACK_WAIT)
334 with m.Else():
335 sync += self.state.eq(State.IDLE)
336 sync += self.r_instr_fault.eq(0)
337 comb += self.done.eq(1)
338
339 with m.If(m_in.err):
340 # MMU RADIX exception thrown. XXX
341 # TODO: critical that the write here has to
342 # notify the MMU FSM of the change to dsisr
343 comb += exception.eq(1)
344 comb += self.done.eq(1)
345 sync += Display("MMU RADIX exception thrown")
346 sync += self.dsisr[63 - 33].eq(m_in.invalid)
347 sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
348 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
349 sync += self.dsisr[63 - 44].eq(m_in.badtree)
350 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
351 sync += self.state.eq(State.IDLE)
352 # exception thrown, clear out instruction fault state
353 sync += self.r_instr_fault.eq(0)
354
355 # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
356 with m.If(self.mmu_set_spr):
357 with m.If(self.mmu_set_dsisr):
358 sync += self.dsisr.eq(self.sprval_in)
359 with m.If(self.mmu_set_dar):
360 sync += self.dar.eq(self.sprval_in)
361
362 # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
363 with m.If(self.align_intr):
364 comb += exc.happened.eq(1)
365 # check for updating DAR
366 with m.If(exception):
367 sync += Display("exception %x", self.raddr)
368 # alignment error: store address in DAR
369 with m.If(self.align_intr):
370 sync += Display("alignment error: addr in DAR %x", self.raddr)
371 sync += self.dar.eq(self.raddr)
372 with m.Elif(~self.r_instr_fault):
373 sync += Display("not instr fault, addr in DAR %x", self.raddr)
374 sync += self.dar.eq(self.raddr)
375
376 # when done or exception, return to idle state
377 with m.If(self.done | exception):
378 sync += self.state.eq(State.IDLE)
379 comb += self.busy.eq(0)
380
381 # happened, alignment, instr_fault, invalid.
382 # note that all of these flow through - eventually to the TRAP
383 # pipeline, via PowerDecoder2.
384 comb += self.align_intr.eq(self.req.align_intr)
385 comb += exc.invalid.eq(m_in.invalid)
386 comb += exc.alignment.eq(self.align_intr)
387 comb += exc.instr_fault.eq(self.r_instr_fault)
388 # badtree, perm_error, rc_error, segment_fault
389 comb += exc.badtree.eq(m_in.badtree)
390 comb += exc.perm_error.eq(m_in.perm_error)
391 comb += exc.rc_error.eq(m_in.rc_error)
392 comb += exc.segment_fault.eq(m_in.segerr)
393
394 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
395 comb += dbus.adr.eq(dcache.bus.adr)
396 comb += dbus.dat_w.eq(dcache.bus.dat_w)
397 comb += dbus.sel.eq(dcache.bus.sel)
398 comb += dbus.cyc.eq(dcache.bus.cyc)
399 comb += dbus.stb.eq(dcache.bus.stb)
400 comb += dbus.we.eq(dcache.bus.we)
401
402 comb += dcache.bus.dat_r.eq(dbus.dat_r)
403 comb += dcache.bus.ack.eq(dbus.ack)
404 if hasattr(dbus, "stall"):
405 comb += dcache.bus.stall.eq(dbus.stall)
406
407 # update out d data when flag set
408 with m.If(self.d_w_valid):
409 with m.If(ldst_r.alignstate == Misalign.WAITSECOND):
410 m.d.sync += d_out.data.eq(self.store_data[64:128])
411 with m.Else():
412 m.d.sync += d_out.data.eq(self.store_data[0:64])
413 #with m.Else():
414 # m.d.sync += d_out.data.eq(0)
415 # unit test passes with that change
416
417 # this must move into the FSM, conditionally noticing that
418 # the "blip" comes from self.d_validblip.
419 # task 1: look up in dcache
420 # task 2: if dcache fails, look up in MMU.
421 # do **NOT** confuse the two.
422 with m.If(self.d_validblip):
423 m.d.comb += self.d_out.valid.eq(~exc.happened)
424 m.d.comb += d_out.load.eq(self.req.load)
425 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
426 m.d.comb += self.raddr.eq(self.req.raddr)
427 m.d.comb += d_out.nc.eq(self.req.nc)
428 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
429 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
430 #m.d.comb += Display("validblip dcbz=%i addr=%x",
431 #self.req.dcbz,self.req.addr)
432 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
433 with m.Else():
434 m.d.comb += d_out.load.eq(ldst_r.load)
435 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
436 m.d.comb += self.raddr.eq(ldst_r.raddr)
437 m.d.comb += d_out.nc.eq(ldst_r.nc)
438 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
439 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
440 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
441 #ldst_r.dcbz,ldst_r.addr)
442 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
443 m.d.comb += d_out.addr.eq(self.raddr)
444
445 # Update outputs to MMU
446 m.d.comb += m_out.valid.eq(mmureq)
447 m.d.comb += m_out.iside.eq(self.instr_fault)
448 m.d.comb += m_out.load.eq(ldst_r.load)
449 with m.If(self.instr_fault):
450 m.d.comb += m_out.priv.eq(self.priv_mode)
451 with m.Else():
452 m.d.comb += m_out.priv.eq(ldst_r.priv_mode)
453 m.d.comb += m_out.tlbie.eq(self.tlbie)
454 # m_out.mtspr <= mmu_mtspr; # TODO
455 # m_out.sprn <= sprn; # TODO
456 m.d.comb += m_out.addr.eq(maddr)
457 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
458 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
459
460 return m
461
462 def ports(self):
463 yield from super().ports()
464 # TODO: memory ports
465
466
467 class TestSRAMLoadStore1(LoadStore1):
468 def __init__(self, pspec):
469 super().__init__(pspec)
470 pspec = self.pspec
471 # small 32-entry Memory
472 if (hasattr(pspec, "dmem_test_depth") and
473 isinstance(pspec.dmem_test_depth, int)):
474 depth = pspec.dmem_test_depth
475 else:
476 depth = 32
477 print("TestSRAMBareLoadStoreUnit depth", depth)
478
479 self.mem = Memory(width=pspec.reg_wid, depth=depth)
480
481 def elaborate(self, platform):
482 m = super().elaborate(platform)
483 comb = m.d.comb
484 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
485 features={'cti', 'bte', 'err'})
486 dbus = self.dbus
487
488 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
489 # note: SRAM is a target (slave), dbus is initiator (master)
490 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
491 fanins = ['dat_r', 'ack', 'err']
492 for fanout in fanouts:
493 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
494 getattr(dbus, fanout).shape())
495 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
496 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
497 for fanin in fanins:
498 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
499 # connect address
500 comb += sram.bus.adr.eq(dbus.adr)
501
502 return m
503