7200540d3baa86b0ba24a23c67d87073b1378e63
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.icache import ICache
29 from soc.experiment.pimem import PortInterfaceBase
30 from soc.experiment.mem_types import LoadStore1ToMMUType
31 from soc.experiment.mem_types import MMUToLoadStore1Type
32
33 from soc.minerva.wishbone import make_wb_layout
34 from soc.bus.sram import SRAM
35 from nmutil.util import Display
36
37
38 @unique
39 class State(Enum):
40 IDLE = 0 # ready for instruction
41 ACK_WAIT = 1 # waiting for ack from dcache
42 MMU_LOOKUP = 2 # waiting for MMU to look up translation
43 #SECOND_REQ = 3 # second request for unaligned transfer
44
45 @unique
46 class Misalign(Enum):
47 ONEWORD = 0 # only one word needed, all good
48 NEED2WORDS = 1 # need to send/receive two words
49 WAITFIRST = 2 # waiting for the first word
50 WAITSECOND = 3 # waiting for the second word
51
52
53 # captures the LDSTRequest from the PortInterface, which "blips" most
54 # of this at us (pipeline-style).
55 class LDSTRequest(RecordObject):
56 def __init__(self, name=None):
57 RecordObject.__init__(self, name=name)
58
59 self.load = Signal()
60 self.dcbz = Signal()
61 self.raddr = Signal(64)
62 # self.store_data = Signal(64) # this is already sync (on a delay)
63 self.byte_sel = Signal(16)
64 self.nc = Signal() # non-cacheable access
65 self.virt_mode = Signal()
66 self.priv_mode = Signal()
67 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
68 self.alignstate = Signal(Misalign) # progress of alignment request
69 self.align_intr = Signal()
70 # atomic (LR/SC reservation)
71 self.reserve = Signal()
72 self.atomic = Signal()
73 self.atomic_last = Signal()
74
75
76 # glue logic for microwatt mmu and dcache
77 class LoadStore1(PortInterfaceBase):
78 def __init__(self, pspec):
79 self.pspec = pspec
80 self.disable_cache = (hasattr(pspec, "disable_cache") and
81 pspec.disable_cache == True)
82 regwid = pspec.reg_wid
83 addrwid = pspec.addr_wid
84
85 super().__init__(regwid, addrwid)
86 self.dcache = DCache(pspec)
87 self.icache = ICache(pspec)
88 # these names are from the perspective of here (LoadStore1)
89 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
90 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
91 self.i_out = self.icache.i_in # in to icache is out for LoadStore
92 self.i_in = self.icache.i_out # out from icache is in for LoadStore
93 self.m_out = LoadStore1ToMMUType("m_out") # out *to* MMU
94 self.m_in = MMUToLoadStore1Type("m_in") # in *from* MMU
95 self.req = LDSTRequest(name="ldst_req")
96
97 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
98 self.dbus = Record(make_wb_layout(pspec))
99 self.ibus = Record(make_wb_layout(pspec))
100
101 # for creating a single clock blip to DCache
102 self.d_valid = Signal()
103 self.d_w_valid = Signal()
104 self.d_validblip = Signal()
105
106 # state info for LD/ST
107 self.done = Signal()
108 self.done_delay = Signal()
109 # latch most of the input request
110 self.load = Signal()
111 self.tlbie = Signal()
112 self.dcbz = Signal()
113 self.raddr = Signal(64)
114 self.maddr = Signal(64)
115 self.store_data = Signal(128) # 128-bit to cope with
116 self.load_data = Signal(128) # misalignment
117 self.load_data_delay = Signal(128) # perform 2 LD/STs
118 self.byte_sel = Signal(16) # also for misaligned, 16-bit
119 self.alignstate = Signal(Misalign) # progress of alignment request
120 #self.xerc : xer_common_t;
121 #self.rc = Signal()
122 self.nc = Signal() # non-cacheable access
123 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
124 self.state = Signal(State)
125 self.instr_fault = Signal() # indicator to request i-cache MMU lookup
126 self.r_instr_fault = Signal() # accessed in external_busy
127 self.priv_mode = Signal() # only for instruction fetch (not LDST)
128 self.align_intr = Signal()
129 self.busy = Signal()
130 self.wait_dcache = Signal()
131 self.wait_mmu = Signal()
132 #self.intr_vec : integer range 0 to 16#fff#;
133 #self.nia = Signal(64)
134 #self.srr1 = Signal(16)
135 # use these to set the dsisr or dar respectively
136 self.mmu_set_spr = Signal()
137 self.mmu_set_dsisr = Signal()
138 self.mmu_set_dar = Signal()
139 self.sprval_in = Signal(64)
140
141 # ONLY access these read-only, do NOT attempt to change
142 self.dsisr = Signal(32)
143 self.dar = Signal(64)
144
145 # when external_busy set, do not allow PortInterface to proceed
146 def external_busy(self, m):
147 return self.instr_fault | self.r_instr_fault
148
149 def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
150 m.d.comb += self.req.load.eq(0) # store operation
151 m.d.comb += self.req.byte_sel.eq(mask)
152 m.d.comb += self.req.raddr.eq(addr)
153 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
154 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
155 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
156 m.d.comb += self.req.dcbz.eq(is_dcbz)
157 with m.If(misalign):
158 m.d.comb += self.req.alignstate.eq(Misalign.NEED2WORDS)
159
160 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
161
162 # option to disable the cache entirely for write
163 if self.disable_cache:
164 m.d.comb += self.req.nc.eq(1)
165
166 # dcbz cannot do no-cache
167 with m.If(is_dcbz & self.req.nc):
168 m.d.comb += self.req.align_intr.eq(1)
169
170 # hmm, rather than add yet another argument to set_wr_addr
171 # read direct from PortInterface
172 m.d.comb += self.req.reserve.eq(self.pi.reserve) # atomic request
173
174 return None
175
176 def set_rd_addr(self, m, addr, mask, misalign, msr):
177 m.d.comb += self.d_valid.eq(1)
178 m.d.comb += self.req.load.eq(1) # load operation
179 m.d.comb += self.req.byte_sel.eq(mask)
180 m.d.comb += self.req.raddr.eq(addr)
181 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
182 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
183 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
184 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
185 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
186 with m.If(addr[28:] == Const(0xc, 4)):
187 m.d.comb += self.req.nc.eq(1)
188 # option to disable the cache entirely for read
189 if self.disable_cache:
190 m.d.comb += self.req.nc.eq(1)
191 with m.If(misalign):
192 m.d.comb += self.req.alignstate.eq(Misalign.NEED2WORDS)
193
194 # hmm, rather than add yet another argument to set_rd_addr
195 # read direct from PortInterface
196 m.d.comb += self.req.reserve.eq(self.pi.reserve) # atomic request
197
198 return None #FIXME return value
199
200 def set_wr_data(self, m, data, wen):
201 # do the "blip" on write data
202 m.d.comb += self.d_valid.eq(1)
203 # put data into comb which is picked up in main elaborate()
204 m.d.comb += self.d_w_valid.eq(1)
205 m.d.comb += self.store_data.eq(data)
206 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
207 st_ok = self.done # TODO indicates write data is valid
208 return st_ok
209
210 def get_rd_data(self, m):
211 ld_ok = self.done_delay # indicates read data is valid
212 data = self.load_data_delay # actual read data
213 return data, ld_ok
214
215 def elaborate(self, platform):
216 m = super().elaborate(platform)
217 comb, sync = m.d.comb, m.d.sync
218
219 # microwatt takes one more cycle before next operation can be issued
220 sync += self.done_delay.eq(self.done)
221 #sync += self.load_data_delay[0:64].eq(self.load_data[0:64])
222
223 # create dcache and icache module
224 m.submodules.dcache = dcache = self.dcache
225 m.submodules.icache = icache = self.icache
226
227 # temp vars
228 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
229 i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
230 m_out, m_in = self.m_out, self.m_in
231 exc = self.pi.exc_o
232 exception = exc.happened
233 mmureq = Signal()
234
235 # copy of address, but gets over-ridden for instr_fault
236 maddr = Signal(64)
237 m.d.comb += maddr.eq(self.raddr)
238
239 # create a blip (single pulse) on valid read/write request
240 # this can be over-ridden in the FSM to get dcache to re-run
241 # a request when MMU_LOOKUP completes.
242 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
243 ldst_r = LDSTRequest("ldst_r")
244 sync += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
245
246 # fsm skeleton
247 with m.Switch(self.state):
248 with m.Case(State.IDLE):
249 with m.If((self.d_validblip | self.instr_fault) &
250 ~exc.happened):
251 comb += self.busy.eq(1)
252 sync += self.state.eq(State.ACK_WAIT)
253 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
254 # sync += Display("validblip self.req.virt_mode=%i",
255 # self.req.virt_mode)
256 with m.If(self.instr_fault):
257 comb += mmureq.eq(1)
258 sync += self.r_instr_fault.eq(1)
259 comb += maddr.eq(self.maddr)
260 sync += self.state.eq(State.MMU_LOOKUP)
261 with m.Else():
262 sync += self.r_instr_fault.eq(0)
263 # if the LD/ST requires two dwords, move to waiting
264 # for first word
265 with m.If(self.req.alignstate == Misalign.NEED2WORDS):
266 sync += ldst_r.alignstate.eq(Misalign.WAITFIRST)
267 with m.Else():
268 sync += ldst_r.eq(0)
269
270 # waiting for completion
271 with m.Case(State.ACK_WAIT):
272 sync += Display("MMUTEST: ACK_WAIT")
273 comb += self.busy.eq(~exc.happened)
274
275 with m.If(d_in.error):
276 # cache error is not necessarily "final", it could
277 # be that it was just a TLB miss
278 with m.If(d_in.cache_paradox):
279 comb += exception.eq(1)
280 sync += self.state.eq(State.IDLE)
281 sync += ldst_r.eq(0)
282 sync += Display("cache error -> update dsisr")
283 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
284 # XXX there is no architected bit for this
285 # (probably should be a machine check in fact)
286 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
287 sync += self.r_instr_fault.eq(0)
288
289 with m.Else():
290 # Look up the translation for TLB miss
291 # and also for permission error and RC error
292 # in case the PTE has been updated.
293 comb += mmureq.eq(1)
294 sync += self.state.eq(State.MMU_LOOKUP)
295 with m.If(d_in.valid):
296 with m.If(self.done):
297 sync += Display("ACK_WAIT, done %x", self.raddr)
298 with m.If(ldst_r.alignstate == Misalign.ONEWORD):
299 # done if there is only one dcache operation
300 sync += self.state.eq(State.IDLE)
301 sync += ldst_r.eq(0)
302 with m.If(ldst_r.load):
303 m.d.comb += self.load_data.eq(d_in.data)
304 sync += self.load_data_delay[0:64].eq(d_in.data)
305 m.d.comb += self.done.eq(~mmureq) # done if not MMU
306 with m.Elif(ldst_r.alignstate == Misalign.WAITFIRST):
307 # first LD done: load data, initiate 2nd request.
308 # leave in ACK_WAIT state
309 with m.If(ldst_r.load):
310 m.d.comb += self.load_data[0:63].eq(d_in.data)
311 sync += self.load_data_delay[0:64].eq(d_in.data)
312 # mmm kinda cheating, make a 2nd blip
313 m.d.comb += self.d_validblip.eq(1)
314 comb += self.req.eq(ldst_r) # from copy of request
315 comb += self.req.raddr.eq(ldst_r.raddr + 8)
316 comb += self.req.byte_sel.eq(ldst_r.byte_sel[8:])
317 comb += self.req.alignstate.eq(Misalign.WAITSECOND)
318 sync += ldst_r.raddr.eq(ldst_r.raddr + 8)
319 sync += ldst_r.byte_sel.eq(ldst_r.byte_sel[8:])
320 sync += ldst_r.alignstate.eq(Misalign.WAITSECOND)
321 sync += Display(" second req %x", self.req.raddr)
322 with m.Elif(ldst_r.alignstate == Misalign.WAITSECOND):
323 sync += Display(" done second %x", d_in.data)
324 # done second load
325 sync += self.state.eq(State.IDLE)
326 sync += ldst_r.eq(0)
327 with m.If(ldst_r.load):
328 m.d.comb += self.load_data[64:128].eq(d_in.data)
329 sync += self.load_data_delay[64:128].eq(d_in.data)
330 m.d.comb += self.done.eq(~mmureq) # done if not MMU
331
332 # waiting here for the MMU TLB lookup to complete.
333 # either re-try the dcache lookup or throw MMU exception
334 with m.Case(State.MMU_LOOKUP):
335 comb += self.busy.eq(~exception)
336 with m.If(m_in.done):
337 with m.If(~self.r_instr_fault):
338 sync += Display("MMU_LOOKUP, done %x -> %x",
339 self.raddr, d_out.addr)
340 # retry the request now that the MMU has
341 # installed a TLB entry, if not exception raised
342 m.d.comb += self.d_out.valid.eq(~exception)
343 sync += self.state.eq(State.ACK_WAIT)
344 with m.Else():
345 sync += self.state.eq(State.IDLE)
346 sync += self.r_instr_fault.eq(0)
347 comb += self.done.eq(1)
348
349 with m.If(m_in.err):
350 # MMU RADIX exception thrown. XXX
351 # TODO: critical that the write here has to
352 # notify the MMU FSM of the change to dsisr
353 comb += exception.eq(1)
354 comb += self.done.eq(1)
355 sync += Display("MMU RADIX exception thrown")
356 sync += self.dsisr[63 - 33].eq(m_in.invalid)
357 sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
358 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
359 sync += self.dsisr[63 - 44].eq(m_in.badtree)
360 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
361 sync += self.state.eq(State.IDLE)
362 # exception thrown, clear out instruction fault state
363 sync += self.r_instr_fault.eq(0)
364
365 # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
366 with m.If(self.mmu_set_spr):
367 with m.If(self.mmu_set_dsisr):
368 sync += self.dsisr.eq(self.sprval_in)
369 with m.If(self.mmu_set_dar):
370 sync += self.dar.eq(self.sprval_in)
371
372 # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
373 with m.If(self.align_intr):
374 comb += exc.happened.eq(1)
375 # check for updating DAR
376 with m.If(exception):
377 sync += Display("exception %x", self.raddr)
378 # alignment error: store address in DAR
379 with m.If(self.align_intr):
380 sync += Display("alignment error: addr in DAR %x", self.raddr)
381 sync += self.dar.eq(self.raddr)
382 with m.Elif(~self.r_instr_fault):
383 sync += Display("not instr fault, addr in DAR %x", self.raddr)
384 sync += self.dar.eq(self.raddr)
385
386 # when done or exception, return to idle state
387 with m.If(self.done | exception):
388 sync += self.state.eq(State.IDLE)
389 comb += self.busy.eq(0)
390
391 # happened, alignment, instr_fault, invalid.
392 # note that all of these flow through - eventually to the TRAP
393 # pipeline, via PowerDecoder2.
394 comb += self.align_intr.eq(self.req.align_intr)
395 comb += exc.invalid.eq(m_in.invalid)
396 comb += exc.alignment.eq(self.align_intr)
397 comb += exc.instr_fault.eq(self.r_instr_fault)
398 # badtree, perm_error, rc_error, segment_fault
399 comb += exc.badtree.eq(m_in.badtree)
400 comb += exc.perm_error.eq(m_in.perm_error)
401 comb += exc.rc_error.eq(m_in.rc_error)
402 comb += exc.segment_fault.eq(m_in.segerr)
403
404 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
405 comb += dbus.adr.eq(dcache.bus.adr)
406 comb += dbus.dat_w.eq(dcache.bus.dat_w)
407 comb += dbus.sel.eq(dcache.bus.sel)
408 comb += dbus.cyc.eq(dcache.bus.cyc)
409 comb += dbus.stb.eq(dcache.bus.stb)
410 comb += dbus.we.eq(dcache.bus.we)
411
412 comb += dcache.bus.dat_r.eq(dbus.dat_r)
413 comb += dcache.bus.ack.eq(dbus.ack)
414 if hasattr(dbus, "stall"):
415 comb += dcache.bus.stall.eq(dbus.stall)
416
417 # update out d data when flag set
418 with m.If(self.d_w_valid):
419 with m.If(ldst_r.alignstate == Misalign.WAITSECOND):
420 m.d.sync += d_out.data.eq(self.store_data[64:128])
421 with m.Else():
422 m.d.sync += d_out.data.eq(self.store_data[0:64])
423 #with m.Else():
424 # m.d.sync += d_out.data.eq(0)
425 # unit test passes with that change
426
427 # this must move into the FSM, conditionally noticing that
428 # the "blip" comes from self.d_validblip.
429 # task 1: look up in dcache
430 # task 2: if dcache fails, look up in MMU.
431 # do **NOT** confuse the two.
432 with m.If(self.d_validblip):
433 m.d.comb += self.d_out.valid.eq(~exc.happened)
434 m.d.comb += d_out.load.eq(self.req.load)
435 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
436 m.d.comb += self.raddr.eq(self.req.raddr)
437 m.d.comb += d_out.nc.eq(self.req.nc)
438 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
439 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
440 m.d.comb += d_out.reserve.eq(self.req.reserve)
441 m.d.comb += d_out.atomic.eq(self.req.atomic)
442 m.d.comb += d_out.atomic_last.eq(self.req.atomic_last)
443 #m.d.comb += Display("validblip dcbz=%i addr=%x",
444 #self.req.dcbz,self.req.addr)
445 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
446 with m.Else():
447 m.d.comb += d_out.load.eq(ldst_r.load)
448 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
449 m.d.comb += self.raddr.eq(ldst_r.raddr)
450 m.d.comb += d_out.nc.eq(ldst_r.nc)
451 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
452 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
453 m.d.comb += d_out.reserve.eq(ldst_r.reserve)
454 m.d.comb += d_out.atomic.eq(ldst_r.atomic)
455 m.d.comb += d_out.atomic_last.eq(ldst_r.atomic_last)
456 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
457 #ldst_r.dcbz,ldst_r.addr)
458 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
459 m.d.comb += d_out.addr.eq(self.raddr)
460
461 # Update outputs to MMU
462 m.d.comb += m_out.valid.eq(mmureq)
463 m.d.comb += m_out.iside.eq(self.instr_fault)
464 m.d.comb += m_out.load.eq(ldst_r.load)
465 with m.If(self.instr_fault):
466 m.d.comb += m_out.priv.eq(self.priv_mode)
467 with m.Else():
468 m.d.comb += m_out.priv.eq(ldst_r.priv_mode)
469 m.d.comb += m_out.tlbie.eq(self.tlbie)
470 # m_out.mtspr <= mmu_mtspr; # TODO
471 # m_out.sprn <= sprn; # TODO
472 m.d.comb += m_out.addr.eq(maddr)
473 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
474 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
475
476 return m
477
478 def ports(self):
479 yield from super().ports()
480 # TODO: memory ports
481
482
483 class TestSRAMLoadStore1(LoadStore1):
484 def __init__(self, pspec):
485 super().__init__(pspec)
486 pspec = self.pspec
487 # small 32-entry Memory
488 if (hasattr(pspec, "dmem_test_depth") and
489 isinstance(pspec.dmem_test_depth, int)):
490 depth = pspec.dmem_test_depth
491 else:
492 depth = 32
493 print("TestSRAMBareLoadStoreUnit depth", depth)
494
495 self.mem = Memory(width=pspec.reg_wid, depth=depth)
496
497 def elaborate(self, platform):
498 m = super().elaborate(platform)
499 comb = m.d.comb
500 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
501 features={'cti', 'bte', 'err'})
502 dbus = self.dbus
503
504 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
505 # note: SRAM is a target (slave), dbus is initiator (master)
506 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
507 fanins = ['dat_r', 'ack', 'err']
508 for fanout in fanouts:
509 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
510 getattr(dbus, fanout).shape())
511 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
512 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
513 for fanin in fanins:
514 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
515 # connect address
516 comb += sram.bus.adr.eq(dbus.adr)
517
518 return m
519