36836c674984cfc8c5ad870ed4a9eefcc9991d1b
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.icache import ICache
29 from soc.experiment.pimem import PortInterfaceBase
30 from soc.experiment.mem_types import LoadStore1ToMMUType
31 from soc.experiment.mem_types import MMUToLoadStore1Type
32
33 from soc.minerva.wishbone import make_wb_layout
34 from soc.bus.sram import SRAM
35 from nmutil.util import Display
36
37
38 @unique
39 class State(Enum):
40 IDLE = 0 # ready for instruction
41 ACK_WAIT = 1 # waiting for ack from dcache
42 MMU_LOOKUP = 2 # waiting for MMU to look up translation
43 #SECOND_REQ = 3 # second request for unaligned transfer
44
45 @unique
46 class Misalign(Enum):
47 ONEWORD = 0 # only one word needed, all good
48 NEED2WORDS = 1 # need to send/receive two words
49 WAITFIRST = 2 # waiting for the first word
50 WAITSECOND = 3 # waiting for the second word
51
52
53 # captures the LDSTRequest from the PortInterface, which "blips" most
54 # of this at us (pipeline-style).
55 class LDSTRequest(RecordObject):
56 def __init__(self, name=None):
57 RecordObject.__init__(self, name=name)
58
59 self.load = Signal()
60 self.dcbz = Signal()
61 self.raddr = Signal(64)
62 # self.store_data = Signal(64) # this is already sync (on a delay)
63 self.byte_sel = Signal(16)
64 self.nc = Signal() # non-cacheable access
65 self.virt_mode = Signal()
66 self.priv_mode = Signal()
67 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
68 self.alignstate = Signal(Misalign) # progress of alignment request
69 self.align_intr = Signal()
70
71
72 # glue logic for microwatt mmu and dcache
73 class LoadStore1(PortInterfaceBase):
74 def __init__(self, pspec):
75 self.pspec = pspec
76 self.disable_cache = (hasattr(pspec, "disable_cache") and
77 pspec.disable_cache == True)
78 regwid = pspec.reg_wid
79 addrwid = pspec.addr_wid
80
81 super().__init__(regwid, addrwid)
82 self.dcache = DCache(pspec)
83 self.icache = ICache(pspec)
84 # these names are from the perspective of here (LoadStore1)
85 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
86 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
87 self.i_out = self.icache.i_in # in to icache is out for LoadStore
88 self.i_in = self.icache.i_out # out from icache is in for LoadStore
89 self.m_out = LoadStore1ToMMUType("m_out") # out *to* MMU
90 self.m_in = MMUToLoadStore1Type("m_in") # in *from* MMU
91 self.req = LDSTRequest(name="ldst_req")
92
93 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
94 self.dbus = Record(make_wb_layout(pspec))
95 self.ibus = Record(make_wb_layout(pspec))
96
97 # for creating a single clock blip to DCache
98 self.d_valid = Signal()
99 self.d_w_valid = Signal()
100 self.d_validblip = Signal()
101
102 # state info for LD/ST
103 self.done = Signal()
104 self.done_delay = Signal()
105 # latch most of the input request
106 self.load = Signal()
107 self.tlbie = Signal()
108 self.dcbz = Signal()
109 self.raddr = Signal(64)
110 self.maddr = Signal(64)
111 self.store_data = Signal(128) # 128-bit to cope with
112 self.load_data = Signal(128) # misalignment
113 self.load_data_delay = Signal(128) # perform 2 LD/STs
114 self.byte_sel = Signal(16) # also for misaligned, 16-bit
115 self.alignstate = Signal(Misalign) # progress of alignment request
116 #self.xerc : xer_common_t;
117 #self.reserve = Signal()
118 #self.atomic = Signal()
119 #self.atomic_last = Signal()
120 #self.rc = Signal()
121 self.nc = Signal() # non-cacheable access
122 self.virt_mode = Signal()
123 self.priv_mode = Signal()
124 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
125 self.state = Signal(State)
126 self.instr_fault = Signal() # indicator to request i-cache MMU lookup
127 self.r_instr_fault = Signal() # accessed in external_busy
128 self.align_intr = Signal()
129 self.busy = Signal()
130 self.wait_dcache = Signal()
131 self.wait_mmu = Signal()
132 #self.intr_vec : integer range 0 to 16#fff#;
133 #self.nia = Signal(64)
134 #self.srr1 = Signal(16)
135 # use these to set the dsisr or dar respectively
136 self.mmu_set_spr = Signal()
137 self.mmu_set_dsisr = Signal()
138 self.mmu_set_dar = Signal()
139 self.sprval_in = Signal(64)
140
141 # ONLY access these read-only, do NOT attempt to change
142 self.dsisr = Signal(32)
143 self.dar = Signal(64)
144
145 # when external_busy set, do not allow PortInterface to proceed
146 def external_busy(self, m):
147 return self.instr_fault | self.r_instr_fault
148
149 def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
150 m.d.comb += self.req.load.eq(0) # store operation
151 m.d.comb += self.req.byte_sel.eq(mask)
152 m.d.comb += self.req.raddr.eq(addr)
153 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
154 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
155 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
156 m.d.comb += self.req.dcbz.eq(is_dcbz)
157 with m.If(misalign):
158 m.d.comb += self.req.alignstate.eq(Misalign.NEED2WORDS)
159
160 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
161
162 # option to disable the cache entirely for write
163 if self.disable_cache:
164 m.d.comb += self.req.nc.eq(1)
165
166 # dcbz cannot do no-cache
167 with m.If(is_dcbz & self.req.nc):
168 m.d.comb += self.req.align_intr.eq(1)
169
170 return None
171
172 def set_rd_addr(self, m, addr, mask, misalign, msr):
173 m.d.comb += self.d_valid.eq(1)
174 m.d.comb += self.req.load.eq(1) # load operation
175 m.d.comb += self.req.byte_sel.eq(mask)
176 m.d.comb += self.req.raddr.eq(addr)
177 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
178 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
179 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
180 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
181 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
182 with m.If(addr[28:] == Const(0xc, 4)):
183 m.d.comb += self.req.nc.eq(1)
184 # option to disable the cache entirely for read
185 if self.disable_cache:
186 m.d.comb += self.req.nc.eq(1)
187 with m.If(misalign):
188 m.d.comb += self.req.alignstate.eq(Misalign.NEED2WORDS)
189 return None #FIXME return value
190
191 def set_wr_data(self, m, data, wen):
192 # do the "blip" on write data
193 m.d.comb += self.d_valid.eq(1)
194 # put data into comb which is picked up in main elaborate()
195 m.d.comb += self.d_w_valid.eq(1)
196 m.d.comb += self.store_data.eq(data)
197 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
198 st_ok = self.done # TODO indicates write data is valid
199 return st_ok
200
201 def get_rd_data(self, m):
202 ld_ok = self.done_delay # indicates read data is valid
203 data = self.load_data_delay # actual read data
204 return data, ld_ok
205
206 def elaborate(self, platform):
207 m = super().elaborate(platform)
208 comb, sync = m.d.comb, m.d.sync
209
210 # microwatt takes one more cycle before next operation can be issued
211 sync += self.done_delay.eq(self.done)
212 #sync += self.load_data_delay[0:64].eq(self.load_data[0:64])
213
214 # create dcache and icache module
215 m.submodules.dcache = dcache = self.dcache
216 m.submodules.icache = icache = self.icache
217
218 # temp vars
219 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
220 i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
221 m_out, m_in = self.m_out, self.m_in
222 exc = self.pi.exc_o
223 exception = exc.happened
224 mmureq = Signal()
225
226 # copy of address, but gets over-ridden for instr_fault
227 maddr = Signal(64)
228 m.d.comb += maddr.eq(self.raddr)
229
230 # create a blip (single pulse) on valid read/write request
231 # this can be over-ridden in the FSM to get dcache to re-run
232 # a request when MMU_LOOKUP completes.
233 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
234 ldst_r = LDSTRequest("ldst_r")
235 sync += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
236
237 # fsm skeleton
238 with m.Switch(self.state):
239 with m.Case(State.IDLE):
240 with m.If((self.d_validblip | self.instr_fault) &
241 ~exc.happened):
242 comb += self.busy.eq(1)
243 sync += self.state.eq(State.ACK_WAIT)
244 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
245 # sync += Display("validblip self.req.virt_mode=%i",
246 # self.req.virt_mode)
247 with m.If(self.instr_fault):
248 comb += mmureq.eq(1)
249 sync += self.r_instr_fault.eq(1)
250 comb += maddr.eq(self.maddr)
251 sync += self.state.eq(State.MMU_LOOKUP)
252 with m.Else():
253 sync += self.r_instr_fault.eq(0)
254 # if the LD/ST requires two dwords, move to waiting
255 # for first word
256 with m.If(self.req.alignstate == Misalign.NEED2WORDS):
257 sync += ldst_r.alignstate.eq(Misalign.WAITFIRST)
258 with m.Else():
259 sync += ldst_r.eq(0)
260
261 # waiting for completion
262 with m.Case(State.ACK_WAIT):
263 sync += Display("MMUTEST: ACK_WAIT")
264 comb += self.busy.eq(~exc.happened)
265
266 with m.If(d_in.error):
267 # cache error is not necessarily "final", it could
268 # be that it was just a TLB miss
269 with m.If(d_in.cache_paradox):
270 comb += exception.eq(1)
271 sync += self.state.eq(State.IDLE)
272 sync += ldst_r.eq(0)
273 sync += Display("cache error -> update dsisr")
274 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
275 # XXX there is no architected bit for this
276 # (probably should be a machine check in fact)
277 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
278 sync += self.r_instr_fault.eq(0)
279
280 with m.Else():
281 # Look up the translation for TLB miss
282 # and also for permission error and RC error
283 # in case the PTE has been updated.
284 comb += mmureq.eq(1)
285 sync += self.state.eq(State.MMU_LOOKUP)
286 with m.If(d_in.valid):
287 with m.If(self.done):
288 sync += Display("ACK_WAIT, done %x", self.raddr)
289 with m.If(ldst_r.alignstate == Misalign.ONEWORD):
290 # done if there is only one dcache operation
291 sync += self.state.eq(State.IDLE)
292 sync += ldst_r.eq(0)
293 with m.If(ldst_r.load):
294 m.d.comb += self.load_data.eq(d_in.data)
295 sync += self.load_data_delay[0:64].eq(d_in.data)
296 m.d.comb += self.done.eq(~mmureq) # done if not MMU
297 with m.Elif(ldst_r.alignstate == Misalign.WAITFIRST):
298 # first LD done: load data, initiate 2nd request.
299 # leave in ACK_WAIT state
300 with m.If(ldst_r.load):
301 m.d.comb += self.load_data[0:63].eq(d_in.data)
302 sync += self.load_data_delay[0:64].eq(d_in.data)
303 # mmm kinda cheating, make a 2nd blip
304 m.d.comb += self.d_validblip.eq(1)
305 comb += self.req.eq(ldst_r) # from copy of request
306 comb += self.req.raddr.eq(ldst_r.raddr + 8)
307 comb += self.req.byte_sel.eq(ldst_r.byte_sel[8:])
308 comb += self.req.alignstate.eq(Misalign.WAITSECOND)
309 sync += ldst_r.raddr.eq(ldst_r.raddr + 8)
310 sync += ldst_r.byte_sel.eq(ldst_r.byte_sel[8:])
311 sync += ldst_r.alignstate.eq(Misalign.WAITSECOND)
312 sync += Display(" second req %x", self.req.raddr)
313 with m.Elif(ldst_r.alignstate == Misalign.WAITSECOND):
314 sync += Display(" done second %x", d_in.data)
315 # done second load
316 sync += self.state.eq(State.IDLE)
317 sync += ldst_r.eq(0)
318 with m.If(ldst_r.load):
319 m.d.comb += self.load_data[64:128].eq(d_in.data)
320 sync += self.load_data_delay[64:128].eq(d_in.data)
321 m.d.comb += self.done.eq(~mmureq) # done if not MMU
322
323 # waiting here for the MMU TLB lookup to complete.
324 # either re-try the dcache lookup or throw MMU exception
325 with m.Case(State.MMU_LOOKUP):
326 comb += self.busy.eq(~exception)
327 with m.If(m_in.done):
328 with m.If(~self.r_instr_fault):
329 sync += Display("MMU_LOOKUP, done %x -> %x",
330 self.raddr, d_out.addr)
331 # retry the request now that the MMU has
332 # installed a TLB entry, if not exception raised
333 m.d.comb += self.d_out.valid.eq(~exception)
334 sync += self.state.eq(State.ACK_WAIT)
335 with m.Else():
336 sync += self.state.eq(State.IDLE)
337 sync += self.r_instr_fault.eq(0)
338 comb += self.done.eq(1)
339
340 with m.If(m_in.err):
341 # MMU RADIX exception thrown. XXX
342 # TODO: critical that the write here has to
343 # notify the MMU FSM of the change to dsisr
344 comb += exception.eq(1)
345 comb += self.done.eq(1)
346 sync += Display("MMU RADIX exception thrown")
347 sync += self.dsisr[63 - 33].eq(m_in.invalid)
348 sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
349 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
350 sync += self.dsisr[63 - 44].eq(m_in.badtree)
351 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
352 sync += self.state.eq(State.IDLE)
353 # exception thrown, clear out instruction fault state
354 sync += self.r_instr_fault.eq(0)
355
356 # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
357 with m.If(self.mmu_set_spr):
358 with m.If(self.mmu_set_dsisr):
359 sync += self.dsisr.eq(self.sprval_in)
360 with m.If(self.mmu_set_dar):
361 sync += self.dar.eq(self.sprval_in)
362
363 # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
364 with m.If(self.align_intr):
365 comb += exc.happened.eq(1)
366 # check for updating DAR
367 with m.If(exception):
368 sync += Display("exception %x", self.raddr)
369 # alignment error: store address in DAR
370 with m.If(self.align_intr):
371 sync += Display("alignment error: addr in DAR %x", self.raddr)
372 sync += self.dar.eq(self.raddr)
373 with m.Elif(~self.r_instr_fault):
374 sync += Display("not instr fault, addr in DAR %x", self.raddr)
375 sync += self.dar.eq(self.raddr)
376
377 # when done or exception, return to idle state
378 with m.If(self.done | exception):
379 sync += self.state.eq(State.IDLE)
380 comb += self.busy.eq(0)
381
382 # happened, alignment, instr_fault, invalid.
383 # note that all of these flow through - eventually to the TRAP
384 # pipeline, via PowerDecoder2.
385 comb += self.align_intr.eq(self.req.align_intr)
386 comb += exc.invalid.eq(m_in.invalid)
387 comb += exc.alignment.eq(self.align_intr)
388 comb += exc.instr_fault.eq(self.r_instr_fault)
389 # badtree, perm_error, rc_error, segment_fault
390 comb += exc.badtree.eq(m_in.badtree)
391 comb += exc.perm_error.eq(m_in.perm_error)
392 comb += exc.rc_error.eq(m_in.rc_error)
393 comb += exc.segment_fault.eq(m_in.segerr)
394
395 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
396 comb += dbus.adr.eq(dcache.bus.adr)
397 comb += dbus.dat_w.eq(dcache.bus.dat_w)
398 comb += dbus.sel.eq(dcache.bus.sel)
399 comb += dbus.cyc.eq(dcache.bus.cyc)
400 comb += dbus.stb.eq(dcache.bus.stb)
401 comb += dbus.we.eq(dcache.bus.we)
402
403 comb += dcache.bus.dat_r.eq(dbus.dat_r)
404 comb += dcache.bus.ack.eq(dbus.ack)
405 if hasattr(dbus, "stall"):
406 comb += dcache.bus.stall.eq(dbus.stall)
407
408 # update out d data when flag set
409 with m.If(self.d_w_valid):
410 with m.If(ldst_r.alignstate == Misalign.WAITSECOND):
411 m.d.sync += d_out.data.eq(self.store_data[64:128])
412 with m.Else():
413 m.d.sync += d_out.data.eq(self.store_data[0:64])
414 #with m.Else():
415 # m.d.sync += d_out.data.eq(0)
416 # unit test passes with that change
417
418 # this must move into the FSM, conditionally noticing that
419 # the "blip" comes from self.d_validblip.
420 # task 1: look up in dcache
421 # task 2: if dcache fails, look up in MMU.
422 # do **NOT** confuse the two.
423 with m.If(self.d_validblip):
424 m.d.comb += self.d_out.valid.eq(~exc.happened)
425 m.d.comb += d_out.load.eq(self.req.load)
426 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
427 m.d.comb += self.raddr.eq(self.req.raddr)
428 m.d.comb += d_out.nc.eq(self.req.nc)
429 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
430 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
431 #m.d.comb += Display("validblip dcbz=%i addr=%x",
432 #self.req.dcbz,self.req.addr)
433 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
434 with m.Else():
435 m.d.comb += d_out.load.eq(ldst_r.load)
436 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
437 m.d.comb += self.raddr.eq(ldst_r.raddr)
438 m.d.comb += d_out.nc.eq(ldst_r.nc)
439 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
440 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
441 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
442 #ldst_r.dcbz,ldst_r.addr)
443 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
444 m.d.comb += d_out.addr.eq(self.raddr)
445
446 # Update outputs to MMU
447 m.d.comb += m_out.valid.eq(mmureq)
448 m.d.comb += m_out.iside.eq(self.instr_fault)
449 m.d.comb += m_out.load.eq(ldst_r.load)
450 m.d.comb += m_out.priv.eq(self.priv_mode)
451 # m_out.priv <= r.priv_mode; TODO
452 m.d.comb += m_out.tlbie.eq(self.tlbie)
453 # m_out.mtspr <= mmu_mtspr; # TODO
454 # m_out.sprn <= sprn; # TODO
455 m.d.comb += m_out.addr.eq(maddr)
456 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
457 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
458
459 return m
460
461 def ports(self):
462 yield from super().ports()
463 # TODO: memory ports
464
465
466 class TestSRAMLoadStore1(LoadStore1):
467 def __init__(self, pspec):
468 super().__init__(pspec)
469 pspec = self.pspec
470 # small 32-entry Memory
471 if (hasattr(pspec, "dmem_test_depth") and
472 isinstance(pspec.dmem_test_depth, int)):
473 depth = pspec.dmem_test_depth
474 else:
475 depth = 32
476 print("TestSRAMBareLoadStoreUnit depth", depth)
477
478 self.mem = Memory(width=pspec.reg_wid, depth=depth)
479
480 def elaborate(self, platform):
481 m = super().elaborate(platform)
482 comb = m.d.comb
483 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
484 features={'cti', 'bte', 'err'})
485 dbus = self.dbus
486
487 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
488 # note: SRAM is a target (slave), dbus is initiator (master)
489 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
490 fanins = ['dat_r', 'ack', 'err']
491 for fanout in fanouts:
492 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
493 getattr(dbus, fanout).shape())
494 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
495 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
496 for fanin in fanins:
497 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
498 # connect address
499 comb += sram.bus.adr.eq(dbus.adr)
500
501 return m
502