MMU lookup DSISR load bit inverted in LoadStore1
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # state info for LD/ST
88 self.done = Signal()
89 self.done_delay = Signal()
90 # latch most of the input request
91 self.load = Signal()
92 self.tlbie = Signal()
93 self.dcbz = Signal()
94 self.addr = Signal(64)
95 self.store_data = Signal(64)
96 self.load_data = Signal(64)
97 self.load_data_delay = Signal(64)
98 self.byte_sel = Signal(8)
99 #self.xerc : xer_common_t;
100 #self.reserve = Signal()
101 #self.atomic = Signal()
102 #self.atomic_last = Signal()
103 #self.rc = Signal()
104 self.nc = Signal() # non-cacheable access
105 self.virt_mode = Signal()
106 self.priv_mode = Signal()
107 self.state = Signal(State)
108 self.instr_fault = Signal()
109 self.align_intr = Signal()
110 self.busy = Signal()
111 self.wait_dcache = Signal()
112 self.wait_mmu = Signal()
113 #self.mode_32bit = Signal()
114 #self.intr_vec : integer range 0 to 16#fff#;
115 #self.nia = Signal(64)
116 #self.srr1 = Signal(16)
117 # use these to set the dsisr or dar respectively
118 self.mmu_set_spr = Signal()
119 self.mmu_set_dsisr = Signal()
120 self.mmu_set_dar = Signal()
121 self.sprval_in = Signal(64)
122
123 def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
124 m.d.comb += self.req.load.eq(0) # store operation
125 m.d.comb += self.req.byte_sel.eq(mask)
126 m.d.comb += self.req.addr.eq(addr)
127 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
128 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
129 m.d.comb += self.req.align_intr.eq(misalign)
130 m.d.comb += self.req.dcbz.eq(is_dcbz)
131
132 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
133
134 # option to disable the cache entirely for write
135 if self.disable_cache:
136 m.d.comb += self.req.nc.eq(1)
137 return None
138
139 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
140 m.d.comb += self.d_valid.eq(1)
141 m.d.comb += self.req.load.eq(1) # load operation
142 m.d.comb += self.req.byte_sel.eq(mask)
143 m.d.comb += self.req.align_intr.eq(misalign)
144 m.d.comb += self.req.addr.eq(addr)
145 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
146 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
147 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
148 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
149 with m.If(addr[28:] == Const(0xc, 4)):
150 m.d.comb += self.req.nc.eq(1)
151 # option to disable the cache entirely for read
152 if self.disable_cache:
153 m.d.comb += self.req.nc.eq(1)
154 return None #FIXME return value
155
156 def set_wr_data(self, m, data, wen):
157 # do the "blip" on write data
158 m.d.comb += self.d_valid.eq(1)
159 # put data into comb which is picked up in main elaborate()
160 m.d.comb += self.d_w_valid.eq(1)
161 m.d.comb += self.store_data.eq(data)
162 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
163 st_ok = self.done # TODO indicates write data is valid
164 return st_ok
165
166 def get_rd_data(self, m):
167 ld_ok = self.done_delay # indicates read data is valid
168 data = self.load_data_delay # actual read data
169 return data, ld_ok
170
171 def elaborate(self, platform):
172 m = super().elaborate(platform)
173 comb, sync = m.d.comb, m.d.sync
174
175 # microwatt takes one more cycle before next operation can be issued
176 sync += self.done_delay.eq(self.done)
177 sync += self.load_data_delay.eq(self.load_data)
178
179 # create dcache module
180 m.submodules.dcache = dcache = self.dcache
181
182 # temp vars
183 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
184 m_out, m_in = self.m_out, self.m_in
185 exc = self.pi.exc_o
186 exception = exc.happened
187 mmureq = Signal()
188
189 # copy of address, but gets over-ridden for OP_FETCH_FAILED
190 maddr = Signal(64)
191 m.d.comb += maddr.eq(self.addr)
192
193 # DO NOT access these directly, they are internal
194 dsisr = Signal(32)
195 dar = Signal(64)
196
197 # create a blip (single pulse) on valid read/write request
198 # this can be over-ridden in the FSM to get dcache to re-run
199 # a request when MMU_LOOKUP completes.
200 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
201 ldst_r = LDSTRequest("ldst_r")
202 comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
203
204 # fsm skeleton
205 with m.Switch(self.state):
206 with m.Case(State.IDLE):
207 with m.If(self.d_validblip & ~exc.happened):
208 comb += self.busy.eq(1)
209 sync += self.state.eq(State.ACK_WAIT)
210 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
211 # sync += Display("validblip self.req.virt_mode=%i",
212 # self.req.virt_mode)
213 with m.Else():
214 sync += ldst_r.eq(0)
215
216 # waiting for completion
217 with m.Case(State.ACK_WAIT):
218 comb += Display("MMUTEST: ACK_WAIT")
219 comb += self.busy.eq(~exc.happened)
220
221 with m.If(d_in.error):
222 # cache error is not necessarily "final", it could
223 # be that it was just a TLB miss
224 with m.If(d_in.cache_paradox):
225 comb += exception.eq(1)
226 sync += self.state.eq(State.IDLE)
227 sync += ldst_r.eq(0)
228 sync += Display("cache error -> update dsisr")
229 sync += dsisr[63 - 38].eq(~self.load)
230 # XXX there is no architected bit for this
231 # (probably should be a machine check in fact)
232 sync += dsisr[63 - 35].eq(d_in.cache_paradox)
233
234 with m.Else():
235 # Look up the translation for TLB miss
236 # and also for permission error and RC error
237 # in case the PTE has been updated.
238 comb += mmureq.eq(1)
239 sync += self.state.eq(State.MMU_LOOKUP)
240 with m.If(d_in.valid):
241 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
242 with m.If(self.done):
243 sync += Display("ACK_WAIT, done %x", self.addr)
244 sync += self.state.eq(State.IDLE)
245 sync += ldst_r.eq(0)
246 with m.If(self.load):
247 m.d.comb += self.load_data.eq(d_in.data)
248
249 # waiting here for the MMU TLB lookup to complete.
250 # either re-try the dcache lookup or throw MMU exception
251 with m.Case(State.MMU_LOOKUP):
252 comb += self.busy.eq(~exception)
253 with m.If(m_in.done):
254 with m.If(~self.instr_fault):
255 sync += Display("MMU_LOOKUP, done %x -> %x",
256 self.addr, d_out.addr)
257 # retry the request now that the MMU has
258 # installed a TLB entry, if not exception raised
259 m.d.comb += self.d_out.valid.eq(~exception)
260 sync += self.state.eq(State.ACK_WAIT)
261 sync += ldst_r.eq(0)
262 with m.Else():
263 sync += Display("MMU_LOOKUP, exception %x", self.addr)
264 # instruction lookup fault: store address in DAR
265 comb += exc.happened.eq(1) # reason = MMU_LOOKUP
266 # mark dar as updated ?
267 sync += dar.eq(self.addr)
268 sync += self.state.eq(State.IDLE)
269
270 with m.If(m_in.err):
271 # MMU RADIX exception thrown. XXX
272 # TODO: critical that the write here has to
273 # notify the MMU FSM of the change to dsisr
274 comb += exception.eq(1)
275 sync += Display("MMU RADIX exception thrown")
276 sync += Display("TODO: notify MMU of change to dsisr")
277 sync += dsisr[63 - 33].eq(m_in.invalid)
278 sync += dsisr[63 - 36].eq(m_in.perm_error) # noexec fault
279 sync += dsisr[63 - 38].eq(~self.load)
280 sync += dsisr[63 - 44].eq(m_in.badtree)
281 sync += dsisr[63 - 45].eq(m_in.rc_error)
282 sync += self.state.eq(State.IDLE)
283
284 with m.Case(State.TLBIE_WAIT):
285 pass
286
287 # MMU FSM communicating a request to update dsisr or dar
288 # (from OP_MTSPR)
289 with m.If(self.mmu_set_spr):
290 with m.If(self.mmu_set_dsisr):
291 sync += dsisr.eq(self.sprval_in)
292 with m.If(self.mmu_set_dar):
293 sync += dar.eq(self.sprval_in)
294
295 # alignment error: store address in DAR
296 with m.If(self.align_intr):
297 comb += exc.happened.eq(1) # reason = alignment
298 sync += Display("alignment error: addr in DAR %x", self.addr)
299 sync += dar.eq(self.addr)
300
301 # when done or exception, return to idle state
302 with m.If(self.done | exception):
303 sync += self.state.eq(State.IDLE)
304 comb += self.busy.eq(0)
305
306 # happened, alignment, instr_fault, invalid.
307 # note that all of these flow through - eventually to the TRAP
308 # pipeline, via PowerDecoder2.
309 comb += self.align_intr.eq(self.req.align_intr)
310 comb += exc.invalid.eq(m_in.invalid)
311 comb += exc.alignment.eq(self.align_intr)
312 comb += exc.instr_fault.eq(self.instr_fault)
313 # badtree, perm_error, rc_error, segment_fault
314 comb += exc.badtree.eq(m_in.badtree)
315 comb += exc.perm_error.eq(m_in.perm_error)
316 comb += exc.rc_error.eq(m_in.rc_error)
317 comb += exc.segment_fault.eq(m_in.segerr)
318
319 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
320 comb += dbus.adr.eq(dcache.wb_out.adr)
321 comb += dbus.dat_w.eq(dcache.wb_out.dat)
322 comb += dbus.sel.eq(dcache.wb_out.sel)
323 comb += dbus.cyc.eq(dcache.wb_out.cyc)
324 comb += dbus.stb.eq(dcache.wb_out.stb)
325 comb += dbus.we.eq(dcache.wb_out.we)
326
327 comb += dcache.wb_in.dat.eq(dbus.dat_r)
328 comb += dcache.wb_in.ack.eq(dbus.ack)
329 if hasattr(dbus, "stall"):
330 comb += dcache.wb_in.stall.eq(dbus.stall)
331
332 # update out d data when flag set
333 with m.If(self.d_w_valid):
334 m.d.sync += d_out.data.eq(self.store_data)
335 #with m.Else():
336 # m.d.sync += d_out.data.eq(0)
337 # unit test passes with that change
338
339 # this must move into the FSM, conditionally noticing that
340 # the "blip" comes from self.d_validblip.
341 # task 1: look up in dcache
342 # task 2: if dcache fails, look up in MMU.
343 # do **NOT** confuse the two.
344 with m.If(self.d_validblip):
345 m.d.comb += self.d_out.valid.eq(~exc.happened)
346 m.d.comb += d_out.load.eq(self.req.load)
347 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
348 m.d.comb += self.addr.eq(self.req.addr)
349 m.d.comb += d_out.nc.eq(self.req.nc)
350 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
351 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
352 #m.d.comb += Display("validblip dcbz=%i addr=%x",
353 #self.req.dcbz,self.req.addr)
354 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
355 with m.Else():
356 m.d.comb += d_out.load.eq(ldst_r.load)
357 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
358 m.d.comb += self.addr.eq(ldst_r.addr)
359 m.d.comb += d_out.nc.eq(ldst_r.nc)
360 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
361 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
362 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
363 #ldst_r.dcbz,ldst_r.addr)
364 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
365
366 # XXX these should be possible to remove but for some reason
367 # cannot be... yet. TODO, investigate
368 m.d.comb += self.load_data.eq(d_in.data)
369 m.d.comb += d_out.addr.eq(self.addr)
370
371 # Update outputs to MMU
372 m.d.comb += m_out.valid.eq(mmureq)
373 m.d.comb += m_out.iside.eq(self.instr_fault)
374 m.d.comb += m_out.load.eq(ldst_r.load)
375 # m_out.priv <= r.priv_mode; TODO
376 m.d.comb += m_out.tlbie.eq(self.tlbie)
377 # m_out.mtspr <= mmu_mtspr; # TODO
378 # m_out.sprn <= sprn; # TODO
379 m.d.comb += m_out.addr.eq(maddr)
380 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
381 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
382
383 return m
384
385 def ports(self):
386 yield from super().ports()
387 # TODO: memory ports
388
389
390 class TestSRAMLoadStore1(LoadStore1):
391 def __init__(self, pspec):
392 super().__init__(pspec)
393 pspec = self.pspec
394 # small 32-entry Memory
395 if (hasattr(pspec, "dmem_test_depth") and
396 isinstance(pspec.dmem_test_depth, int)):
397 depth = pspec.dmem_test_depth
398 else:
399 depth = 32
400 print("TestSRAMBareLoadStoreUnit depth", depth)
401
402 self.mem = Memory(width=pspec.reg_wid, depth=depth)
403
404 def elaborate(self, platform):
405 m = super().elaborate(platform)
406 comb = m.d.comb
407 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
408 features={'cti', 'bte', 'err'})
409 dbus = self.dbus
410
411 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
412 # note: SRAM is a target (slave), dbus is initiator (master)
413 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
414 fanins = ['dat_r', 'ack', 'err']
415 for fanout in fanouts:
416 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
417 getattr(dbus, fanout).shape())
418 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
419 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
420 for fanin in fanins:
421 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
422 # connect address
423 comb += sram.bus.adr.eq(dbus.adr)
424
425 return m
426