1ecac0add15494d57aed59bfaeb837387c9bfc15
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.icache import ICache
29 from soc.experiment.pimem import PortInterfaceBase
30 from soc.experiment.mem_types import LoadStore1ToMMUType
31 from soc.experiment.mem_types import MMUToLoadStore1Type
32
33 from soc.minerva.wishbone import make_wb_layout
34 from soc.bus.sram import SRAM
35 from nmutil.util import Display
36
37
38 @unique
39 class State(Enum):
40 IDLE = 0 # ready for instruction
41 ACK_WAIT = 1 # waiting for ack from dcache
42 MMU_LOOKUP = 2 # waiting for MMU to look up translation
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.raddr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
60 self.align_intr = Signal()
61
62
63 # glue logic for microwatt mmu and dcache
64 class LoadStore1(PortInterfaceBase):
65 def __init__(self, pspec):
66 self.pspec = pspec
67 self.disable_cache = (hasattr(pspec, "disable_cache") and
68 pspec.disable_cache == True)
69 regwid = pspec.reg_wid
70 addrwid = pspec.addr_wid
71
72 super().__init__(regwid, addrwid)
73 self.dcache = DCache()
74 self.icache = ICache(pspec)
75 # these names are from the perspective of here (LoadStore1)
76 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
77 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
78 self.i_out = self.icache.i_in # in to icache is out for LoadStore
79 self.i_in = self.icache.i_out # out from icache is in for LoadStore
80 self.m_out = LoadStore1ToMMUType("m_out") # out *to* MMU
81 self.m_in = MMUToLoadStore1Type("m_in") # in *from* MMU
82 self.req = LDSTRequest(name="ldst_req")
83
84 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
85 self.dbus = Record(make_wb_layout(pspec))
86 self.ibus = Record(make_wb_layout(pspec))
87
88 # for creating a single clock blip to DCache
89 self.d_valid = Signal()
90 self.d_w_valid = Signal()
91 self.d_validblip = Signal()
92
93 # state info for LD/ST
94 self.done = Signal()
95 self.done_delay = Signal()
96 # latch most of the input request
97 self.load = Signal()
98 self.tlbie = Signal()
99 self.dcbz = Signal()
100 self.raddr = Signal(64)
101 self.maddr = Signal(64)
102 self.store_data = Signal(64)
103 self.load_data = Signal(64)
104 self.load_data_delay = Signal(64)
105 self.byte_sel = Signal(8)
106 #self.xerc : xer_common_t;
107 #self.reserve = Signal()
108 #self.atomic = Signal()
109 #self.atomic_last = Signal()
110 #self.rc = Signal()
111 self.nc = Signal() # non-cacheable access
112 self.virt_mode = Signal()
113 self.priv_mode = Signal()
114 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
115 self.state = Signal(State)
116 self.instr_fault = Signal() # indicator to request i-cache MMU lookup
117 self.r_instr_fault = Signal() # accessed in external_busy
118 self.align_intr = Signal()
119 self.busy = Signal()
120 self.wait_dcache = Signal()
121 self.wait_mmu = Signal()
122 #self.intr_vec : integer range 0 to 16#fff#;
123 #self.nia = Signal(64)
124 #self.srr1 = Signal(16)
125 # use these to set the dsisr or dar respectively
126 self.mmu_set_spr = Signal()
127 self.mmu_set_dsisr = Signal()
128 self.mmu_set_dar = Signal()
129 self.sprval_in = Signal(64)
130
131 # ONLY access these read-only, do NOT attempt to change
132 self.dsisr = Signal(32)
133 self.dar = Signal(64)
134
135 # when external_busy set, do not allow PortInterface to proceed
136 def external_busy(self, m):
137 return self.instr_fault | self.r_instr_fault
138
139 def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
140 m.d.comb += self.req.load.eq(0) # store operation
141 m.d.comb += self.req.byte_sel.eq(mask)
142 m.d.comb += self.req.raddr.eq(addr)
143 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
144 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
145 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
146 m.d.comb += self.req.dcbz.eq(is_dcbz)
147 # XXX TODO sort out misalignment, mmu test5 fails
148 m.d.comb += self.req.align_intr.eq(misalign)
149
150 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
151
152 # option to disable the cache entirely for write
153 if self.disable_cache:
154 m.d.comb += self.req.nc.eq(1)
155
156 # dcbz cannot do no-cache
157 with m.If(is_dcbz & self.req.nc):
158 m.d.comb += self.req.align_intr.eq(1)
159
160 return None
161
162 def set_rd_addr(self, m, addr, mask, misalign, msr):
163 m.d.comb += self.d_valid.eq(1)
164 m.d.comb += self.req.load.eq(1) # load operation
165 m.d.comb += self.req.byte_sel.eq(mask)
166 m.d.comb += self.req.raddr.eq(addr)
167 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
168 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
169 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
170 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
171 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
172 with m.If(addr[28:] == Const(0xc, 4)):
173 m.d.comb += self.req.nc.eq(1)
174 # option to disable the cache entirely for read
175 if self.disable_cache:
176 m.d.comb += self.req.nc.eq(1)
177 # XXX TODO sort out misalignment, mmu test5 fails
178 m.d.comb += self.req.align_intr.eq(misalign)
179 return None #FIXME return value
180
181 def set_wr_data(self, m, data, wen):
182 # do the "blip" on write data
183 m.d.comb += self.d_valid.eq(1)
184 # put data into comb which is picked up in main elaborate()
185 m.d.comb += self.d_w_valid.eq(1)
186 m.d.comb += self.store_data.eq(data)
187 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
188 st_ok = self.done # TODO indicates write data is valid
189 return st_ok
190
191 def get_rd_data(self, m):
192 ld_ok = self.done_delay # indicates read data is valid
193 data = self.load_data_delay # actual read data
194 return data, ld_ok
195
196 def elaborate(self, platform):
197 m = super().elaborate(platform)
198 comb, sync = m.d.comb, m.d.sync
199
200 # microwatt takes one more cycle before next operation can be issued
201 sync += self.done_delay.eq(self.done)
202 sync += self.load_data_delay.eq(self.load_data)
203
204 # create dcache and icache module
205 m.submodules.dcache = dcache = self.dcache
206 m.submodules.icache = icache = self.icache
207
208 # temp vars
209 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
210 i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
211 m_out, m_in = self.m_out, self.m_in
212 exc = self.pi.exc_o
213 exception = exc.happened
214 mmureq = Signal()
215
216 # copy of address, but gets over-ridden for instr_fault
217 maddr = Signal(64)
218 m.d.comb += maddr.eq(self.raddr)
219
220 # create a blip (single pulse) on valid read/write request
221 # this can be over-ridden in the FSM to get dcache to re-run
222 # a request when MMU_LOOKUP completes.
223 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
224 ldst_r = LDSTRequest("ldst_r")
225 comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
226
227 # fsm skeleton
228 with m.Switch(self.state):
229 with m.Case(State.IDLE):
230 with m.If((self.d_validblip | self.instr_fault) &
231 ~exc.happened):
232 comb += self.busy.eq(1)
233 sync += self.state.eq(State.ACK_WAIT)
234 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
235 # sync += Display("validblip self.req.virt_mode=%i",
236 # self.req.virt_mode)
237 with m.If(self.instr_fault):
238 comb += mmureq.eq(1)
239 sync += self.r_instr_fault.eq(1)
240 comb += maddr.eq(self.maddr)
241 sync += self.state.eq(State.MMU_LOOKUP)
242 with m.Else():
243 sync += self.r_instr_fault.eq(0)
244 with m.Else():
245 sync += ldst_r.eq(0)
246
247 # waiting for completion
248 with m.Case(State.ACK_WAIT):
249 comb += Display("MMUTEST: ACK_WAIT")
250 comb += self.busy.eq(~exc.happened)
251
252 with m.If(d_in.error):
253 # cache error is not necessarily "final", it could
254 # be that it was just a TLB miss
255 with m.If(d_in.cache_paradox):
256 comb += exception.eq(1)
257 sync += self.state.eq(State.IDLE)
258 sync += ldst_r.eq(0)
259 sync += Display("cache error -> update dsisr")
260 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
261 # XXX there is no architected bit for this
262 # (probably should be a machine check in fact)
263 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
264 sync += self.r_instr_fault.eq(0)
265
266 with m.Else():
267 # Look up the translation for TLB miss
268 # and also for permission error and RC error
269 # in case the PTE has been updated.
270 comb += mmureq.eq(1)
271 sync += self.state.eq(State.MMU_LOOKUP)
272 with m.If(d_in.valid):
273 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
274 with m.If(self.done):
275 sync += Display("ACK_WAIT, done %x", self.raddr)
276 sync += self.state.eq(State.IDLE)
277 sync += ldst_r.eq(0)
278 with m.If(self.load):
279 m.d.comb += self.load_data.eq(d_in.data)
280
281 # waiting here for the MMU TLB lookup to complete.
282 # either re-try the dcache lookup or throw MMU exception
283 with m.Case(State.MMU_LOOKUP):
284 comb += self.busy.eq(~exception)
285 with m.If(m_in.done):
286 with m.If(~self.r_instr_fault):
287 sync += Display("MMU_LOOKUP, done %x -> %x",
288 self.raddr, d_out.addr)
289 # retry the request now that the MMU has
290 # installed a TLB entry, if not exception raised
291 m.d.comb += self.d_out.valid.eq(~exception)
292 sync += self.state.eq(State.ACK_WAIT)
293 sync += ldst_r.eq(0)
294 with m.Else():
295 sync += self.state.eq(State.IDLE)
296 sync += self.r_instr_fault.eq(0)
297 comb += self.done.eq(1)
298
299 with m.If(m_in.err):
300 # MMU RADIX exception thrown. XXX
301 # TODO: critical that the write here has to
302 # notify the MMU FSM of the change to dsisr
303 comb += exception.eq(1)
304 comb += self.done.eq(1)
305 sync += Display("MMU RADIX exception thrown")
306 sync += self.dsisr[63 - 33].eq(m_in.invalid)
307 sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
308 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
309 sync += self.dsisr[63 - 44].eq(m_in.badtree)
310 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
311 sync += self.state.eq(State.IDLE)
312 # exception thrown, clear out instruction fault state
313 sync += self.r_instr_fault.eq(0)
314
315 # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
316 with m.If(self.mmu_set_spr):
317 with m.If(self.mmu_set_dsisr):
318 sync += self.dsisr.eq(self.sprval_in)
319 with m.If(self.mmu_set_dar):
320 sync += self.dar.eq(self.sprval_in)
321
322 # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
323 with m.If(self.align_intr):
324 comb += exc.happened.eq(1)
325 # check for updating DAR
326 with m.If(exception):
327 sync += Display("exception %x", self.raddr)
328 # alignment error: store address in DAR
329 with m.If(self.align_intr):
330 sync += Display("alignment error: addr in DAR %x", self.raddr)
331 sync += self.dar.eq(self.raddr)
332 with m.Elif(~self.r_instr_fault):
333 sync += Display("not instr fault, addr in DAR %x", self.raddr)
334 sync += self.dar.eq(self.raddr)
335
336 # when done or exception, return to idle state
337 with m.If(self.done | exception):
338 sync += self.state.eq(State.IDLE)
339 comb += self.busy.eq(0)
340
341 # happened, alignment, instr_fault, invalid.
342 # note that all of these flow through - eventually to the TRAP
343 # pipeline, via PowerDecoder2.
344 comb += self.align_intr.eq(self.req.align_intr)
345 comb += exc.invalid.eq(m_in.invalid)
346 comb += exc.alignment.eq(self.align_intr)
347 comb += exc.instr_fault.eq(self.r_instr_fault)
348 # badtree, perm_error, rc_error, segment_fault
349 comb += exc.badtree.eq(m_in.badtree)
350 comb += exc.perm_error.eq(m_in.perm_error)
351 comb += exc.rc_error.eq(m_in.rc_error)
352 comb += exc.segment_fault.eq(m_in.segerr)
353
354 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
355 comb += dbus.adr.eq(dcache.bus.adr)
356 comb += dbus.dat_w.eq(dcache.bus.dat_w)
357 comb += dbus.sel.eq(dcache.bus.sel)
358 comb += dbus.cyc.eq(dcache.bus.cyc)
359 comb += dbus.stb.eq(dcache.bus.stb)
360 comb += dbus.we.eq(dcache.bus.we)
361
362 comb += dcache.bus.dat_r.eq(dbus.dat_r)
363 comb += dcache.bus.ack.eq(dbus.ack)
364 if hasattr(dbus, "stall"):
365 comb += dcache.bus.stall.eq(dbus.stall)
366
367 # update out d data when flag set
368 with m.If(self.d_w_valid):
369 m.d.sync += d_out.data.eq(self.store_data)
370 #with m.Else():
371 # m.d.sync += d_out.data.eq(0)
372 # unit test passes with that change
373
374 # this must move into the FSM, conditionally noticing that
375 # the "blip" comes from self.d_validblip.
376 # task 1: look up in dcache
377 # task 2: if dcache fails, look up in MMU.
378 # do **NOT** confuse the two.
379 with m.If(self.d_validblip):
380 m.d.comb += self.d_out.valid.eq(~exc.happened)
381 m.d.comb += d_out.load.eq(self.req.load)
382 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
383 m.d.comb += self.raddr.eq(self.req.raddr)
384 m.d.comb += d_out.nc.eq(self.req.nc)
385 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
386 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
387 #m.d.comb += Display("validblip dcbz=%i addr=%x",
388 #self.req.dcbz,self.req.addr)
389 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
390 with m.Else():
391 m.d.comb += d_out.load.eq(ldst_r.load)
392 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
393 m.d.comb += self.raddr.eq(ldst_r.raddr)
394 m.d.comb += d_out.nc.eq(ldst_r.nc)
395 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
396 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
397 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
398 #ldst_r.dcbz,ldst_r.addr)
399 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
400
401 # XXX these should be possible to remove but for some reason
402 # cannot be... yet. TODO, investigate
403 m.d.comb += self.load_data.eq(d_in.data)
404 m.d.comb += d_out.addr.eq(self.raddr)
405
406 # Update outputs to MMU
407 m.d.comb += m_out.valid.eq(mmureq)
408 m.d.comb += m_out.iside.eq(self.instr_fault)
409 m.d.comb += m_out.load.eq(ldst_r.load)
410 m.d.comb += m_out.priv.eq(self.priv_mode)
411 # m_out.priv <= r.priv_mode; TODO
412 m.d.comb += m_out.tlbie.eq(self.tlbie)
413 # m_out.mtspr <= mmu_mtspr; # TODO
414 # m_out.sprn <= sprn; # TODO
415 m.d.comb += m_out.addr.eq(maddr)
416 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
417 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
418
419 return m
420
421 def ports(self):
422 yield from super().ports()
423 # TODO: memory ports
424
425
426 class TestSRAMLoadStore1(LoadStore1):
427 def __init__(self, pspec):
428 super().__init__(pspec)
429 pspec = self.pspec
430 # small 32-entry Memory
431 if (hasattr(pspec, "dmem_test_depth") and
432 isinstance(pspec.dmem_test_depth, int)):
433 depth = pspec.dmem_test_depth
434 else:
435 depth = 32
436 print("TestSRAMBareLoadStoreUnit depth", depth)
437
438 self.mem = Memory(width=pspec.reg_wid, depth=depth)
439
440 def elaborate(self, platform):
441 m = super().elaborate(platform)
442 comb = m.d.comb
443 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
444 features={'cti', 'bte', 'err'})
445 dbus = self.dbus
446
447 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
448 # note: SRAM is a target (slave), dbus is initiator (master)
449 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
450 fanins = ['dat_r', 'ack', 'err']
451 for fanout in fanouts:
452 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
453 getattr(dbus, fanout).shape())
454 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
455 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
456 for fanin in fanins:
457 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
458 # connect address
459 comb += sram.bus.adr.eq(dbus.adr)
460
461 return m
462