connect up I-Cache to FetchUnitInterface
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.icache import ICache
29 from soc.experiment.pimem import PortInterfaceBase
30 from soc.experiment.mem_types import LoadStore1ToMMUType
31 from soc.experiment.mem_types import MMUToLoadStore1Type
32
33 from soc.minerva.wishbone import make_wb_layout
34 from soc.bus.sram import SRAM
35 from nmutil.util import Display
36
37
38 @unique
39 class State(Enum):
40 IDLE = 0 # ready for instruction
41 ACK_WAIT = 1 # waiting for ack from dcache
42 MMU_LOOKUP = 2 # waiting for MMU to look up translation
43 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
44
45
46 # captures the LDSTRequest from the PortInterface, which "blips" most
47 # of this at us (pipeline-style).
48 class LDSTRequest(RecordObject):
49 def __init__(self, name=None):
50 RecordObject.__init__(self, name=name)
51
52 self.load = Signal()
53 self.dcbz = Signal()
54 self.addr = Signal(64)
55 # self.store_data = Signal(64) # this is already sync (on a delay)
56 self.byte_sel = Signal(8)
57 self.nc = Signal() # non-cacheable access
58 self.virt_mode = Signal()
59 self.priv_mode = Signal()
60 self.align_intr = Signal()
61
62
63 # glue logic for microwatt mmu and dcache
64 class LoadStore1(PortInterfaceBase):
65 def __init__(self, pspec):
66 self.pspec = pspec
67 self.disable_cache = (hasattr(pspec, "disable_cache") and
68 pspec.disable_cache == True)
69 regwid = pspec.reg_wid
70 addrwid = pspec.addr_wid
71
72 super().__init__(regwid, addrwid)
73 self.dcache = DCache()
74 self.icache = ICache(pspec)
75 # these names are from the perspective of here (LoadStore1)
76 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
77 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
78 self.i_out = self.icache.i_in # in to icache is out for LoadStore
79 self.i_in = self.icache.i_out # out from icache is in for LoadStore
80 self.m_out = LoadStore1ToMMUType("m_out") # out *to* MMU
81 self.m_in = MMUToLoadStore1Type("m_in") # in *from* MMU
82 self.req = LDSTRequest(name="ldst_req")
83
84 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
85 self.dbus = Record(make_wb_layout(pspec))
86 self.ibus = Record(make_wb_layout(pspec))
87
88 # for creating a single clock blip to DCache
89 self.d_valid = Signal()
90 self.d_w_valid = Signal()
91 self.d_validblip = Signal()
92
93 # state info for LD/ST
94 self.done = Signal()
95 self.done_delay = Signal()
96 # latch most of the input request
97 self.load = Signal()
98 self.tlbie = Signal()
99 self.dcbz = Signal()
100 self.addr = Signal(64)
101 self.maddr = Signal(64)
102 self.store_data = Signal(64)
103 self.load_data = Signal(64)
104 self.load_data_delay = Signal(64)
105 self.byte_sel = Signal(8)
106 #self.xerc : xer_common_t;
107 #self.reserve = Signal()
108 #self.atomic = Signal()
109 #self.atomic_last = Signal()
110 #self.rc = Signal()
111 self.nc = Signal() # non-cacheable access
112 self.virt_mode = Signal()
113 self.priv_mode = Signal()
114 self.state = Signal(State)
115 self.instr_fault = Signal() # indicator to request i-cache MMU lookup
116 self.r_instr_fault = Signal() # accessed in external_busy
117 self.align_intr = Signal()
118 self.busy = Signal()
119 self.wait_dcache = Signal()
120 self.wait_mmu = Signal()
121 #self.mode_32bit = Signal()
122 #self.intr_vec : integer range 0 to 16#fff#;
123 #self.nia = Signal(64)
124 #self.srr1 = Signal(16)
125 # use these to set the dsisr or dar respectively
126 self.mmu_set_spr = Signal()
127 self.mmu_set_dsisr = Signal()
128 self.mmu_set_dar = Signal()
129 self.sprval_in = Signal(64)
130
131 # ONLY access these read-only, do NOT attempt to change
132 self.dsisr = Signal(32)
133 self.dar = Signal(64)
134
135 # when external_busy set, do not allow PortInterface to proceed
136 def external_busy(self, m):
137 return self.instr_fault | self.r_instr_fault
138
139 def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
140 m.d.comb += self.req.load.eq(0) # store operation
141 m.d.comb += self.req.byte_sel.eq(mask)
142 m.d.comb += self.req.addr.eq(addr)
143 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
144 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
145 m.d.comb += self.req.align_intr.eq(misalign)
146 m.d.comb += self.req.dcbz.eq(is_dcbz)
147
148 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
149
150 # option to disable the cache entirely for write
151 if self.disable_cache:
152 m.d.comb += self.req.nc.eq(1)
153 return None
154
155 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
156 m.d.comb += self.d_valid.eq(1)
157 m.d.comb += self.req.load.eq(1) # load operation
158 m.d.comb += self.req.byte_sel.eq(mask)
159 m.d.comb += self.req.align_intr.eq(misalign)
160 m.d.comb += self.req.addr.eq(addr)
161 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
162 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
163 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
164 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
165 with m.If(addr[28:] == Const(0xc, 4)):
166 m.d.comb += self.req.nc.eq(1)
167 # option to disable the cache entirely for read
168 if self.disable_cache:
169 m.d.comb += self.req.nc.eq(1)
170 return None #FIXME return value
171
172 def set_wr_data(self, m, data, wen):
173 # do the "blip" on write data
174 m.d.comb += self.d_valid.eq(1)
175 # put data into comb which is picked up in main elaborate()
176 m.d.comb += self.d_w_valid.eq(1)
177 m.d.comb += self.store_data.eq(data)
178 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
179 st_ok = self.done # TODO indicates write data is valid
180 return st_ok
181
182 def get_rd_data(self, m):
183 ld_ok = self.done_delay # indicates read data is valid
184 data = self.load_data_delay # actual read data
185 return data, ld_ok
186
187 def elaborate(self, platform):
188 m = super().elaborate(platform)
189 comb, sync = m.d.comb, m.d.sync
190
191 # microwatt takes one more cycle before next operation can be issued
192 sync += self.done_delay.eq(self.done)
193 sync += self.load_data_delay.eq(self.load_data)
194
195 # create dcache and icache module
196 m.submodules.dcache = dcache = self.dcache
197 m.submodules.icache = icache = self.icache
198
199 # temp vars
200 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
201 i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
202 m_out, m_in = self.m_out, self.m_in
203 exc = self.pi.exc_o
204 exception = exc.happened
205 mmureq = Signal()
206
207 # copy of address, but gets over-ridden for instr_fault
208 maddr = Signal(64)
209 m.d.comb += maddr.eq(self.addr)
210
211 # create a blip (single pulse) on valid read/write request
212 # this can be over-ridden in the FSM to get dcache to re-run
213 # a request when MMU_LOOKUP completes.
214 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
215 ldst_r = LDSTRequest("ldst_r")
216 comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
217
218 # fsm skeleton
219 with m.Switch(self.state):
220 with m.Case(State.IDLE):
221 with m.If((self.d_validblip | self.instr_fault) &
222 ~exc.happened):
223 comb += self.busy.eq(1)
224 sync += self.state.eq(State.ACK_WAIT)
225 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
226 # sync += Display("validblip self.req.virt_mode=%i",
227 # self.req.virt_mode)
228 with m.If(self.instr_fault):
229 comb += mmureq.eq(1)
230 sync += self.r_instr_fault.eq(1)
231 comb += maddr.eq(self.maddr)
232 sync += self.state.eq(State.MMU_LOOKUP)
233 with m.Else():
234 sync += ldst_r.eq(0)
235
236 # waiting for completion
237 with m.Case(State.ACK_WAIT):
238 comb += Display("MMUTEST: ACK_WAIT")
239 comb += self.busy.eq(~exc.happened)
240
241 with m.If(d_in.error):
242 # cache error is not necessarily "final", it could
243 # be that it was just a TLB miss
244 with m.If(d_in.cache_paradox):
245 comb += exception.eq(1)
246 sync += self.state.eq(State.IDLE)
247 sync += ldst_r.eq(0)
248 sync += Display("cache error -> update dsisr")
249 sync += self.dsisr[63 - 38].eq(~self.load)
250 # XXX there is no architected bit for this
251 # (probably should be a machine check in fact)
252 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
253
254 with m.Else():
255 # Look up the translation for TLB miss
256 # and also for permission error and RC error
257 # in case the PTE has been updated.
258 comb += mmureq.eq(1)
259 sync += self.state.eq(State.MMU_LOOKUP)
260 with m.If(d_in.valid):
261 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
262 with m.If(self.done):
263 sync += Display("ACK_WAIT, done %x", self.addr)
264 sync += self.state.eq(State.IDLE)
265 sync += ldst_r.eq(0)
266 with m.If(self.load):
267 m.d.comb += self.load_data.eq(d_in.data)
268
269 # waiting here for the MMU TLB lookup to complete.
270 # either re-try the dcache lookup or throw MMU exception
271 with m.Case(State.MMU_LOOKUP):
272 comb += self.busy.eq(~exception)
273 with m.If(m_in.done):
274 with m.If(~self.r_instr_fault):
275 sync += Display("MMU_LOOKUP, done %x -> %x",
276 self.addr, d_out.addr)
277 # retry the request now that the MMU has
278 # installed a TLB entry, if not exception raised
279 m.d.comb += self.d_out.valid.eq(~exception)
280 sync += self.state.eq(State.ACK_WAIT)
281 sync += ldst_r.eq(0)
282 with m.Else():
283 sync += self.state.eq(State.IDLE)
284 sync += self.r_instr_fault.eq(0)
285 comb += self.done.eq(1)
286
287 with m.If(m_in.err):
288 # MMU RADIX exception thrown. XXX
289 # TODO: critical that the write here has to
290 # notify the MMU FSM of the change to dsisr
291 comb += exception.eq(1)
292 sync += Display("MMU RADIX exception thrown")
293 sync += Display("TODO: notify MMU of change to dsisr")
294 sync += self.dsisr[63 - 33].eq(m_in.invalid)
295 sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
296 sync += self.dsisr[63 - 38].eq(~self.load)
297 sync += self.dsisr[63 - 44].eq(m_in.badtree)
298 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
299 sync += self.state.eq(State.IDLE)
300
301 with m.Case(State.TLBIE_WAIT):
302 pass
303
304 # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
305 with m.If(self.mmu_set_spr):
306 with m.If(self.mmu_set_dsisr):
307 sync += self.dsisr.eq(self.sprval_in)
308 with m.If(self.mmu_set_dar):
309 sync += self.dar.eq(self.sprval_in)
310
311 # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
312 with m.If(self.align_intr):
313 comb += exc.happened.eq(1)
314 # check for updating DAR
315 with m.If(exception):
316 sync += Display("exception %x", self.addr)
317 # alignment error: store address in DAR
318 with m.If(self.align_intr):
319 sync += Display("alignment error: addr in DAR %x", self.addr)
320 sync += self.dar.eq(self.addr)
321 with m.Elif(~self.r_instr_fault):
322 sync += Display("not instr fault, addr in DAR %x", self.addr)
323 sync += self.dar.eq(self.addr)
324
325 # when done or exception, return to idle state
326 with m.If(self.done | exception):
327 sync += self.state.eq(State.IDLE)
328 comb += self.busy.eq(0)
329
330 # happened, alignment, instr_fault, invalid.
331 # note that all of these flow through - eventually to the TRAP
332 # pipeline, via PowerDecoder2.
333 comb += self.align_intr.eq(self.req.align_intr)
334 comb += exc.invalid.eq(m_in.invalid)
335 comb += exc.alignment.eq(self.align_intr)
336 comb += exc.instr_fault.eq(self.r_instr_fault)
337 # badtree, perm_error, rc_error, segment_fault
338 comb += exc.badtree.eq(m_in.badtree)
339 comb += exc.perm_error.eq(m_in.perm_error)
340 comb += exc.rc_error.eq(m_in.rc_error)
341 comb += exc.segment_fault.eq(m_in.segerr)
342
343 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
344 comb += dbus.adr.eq(dcache.bus.adr)
345 comb += dbus.dat_w.eq(dcache.bus.dat_w)
346 comb += dbus.sel.eq(dcache.bus.sel)
347 comb += dbus.cyc.eq(dcache.bus.cyc)
348 comb += dbus.stb.eq(dcache.bus.stb)
349 comb += dbus.we.eq(dcache.bus.we)
350
351 comb += dcache.bus.dat_r.eq(dbus.dat_r)
352 comb += dcache.bus.ack.eq(dbus.ack)
353 if hasattr(dbus, "stall"):
354 comb += dcache.bus.stall.eq(dbus.stall)
355
356 # update out d data when flag set
357 with m.If(self.d_w_valid):
358 m.d.sync += d_out.data.eq(self.store_data)
359 #with m.Else():
360 # m.d.sync += d_out.data.eq(0)
361 # unit test passes with that change
362
363 # this must move into the FSM, conditionally noticing that
364 # the "blip" comes from self.d_validblip.
365 # task 1: look up in dcache
366 # task 2: if dcache fails, look up in MMU.
367 # do **NOT** confuse the two.
368 with m.If(self.d_validblip):
369 m.d.comb += self.d_out.valid.eq(~exc.happened)
370 m.d.comb += d_out.load.eq(self.req.load)
371 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
372 m.d.comb += self.addr.eq(self.req.addr)
373 m.d.comb += d_out.nc.eq(self.req.nc)
374 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
375 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
376 #m.d.comb += Display("validblip dcbz=%i addr=%x",
377 #self.req.dcbz,self.req.addr)
378 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
379 with m.Else():
380 m.d.comb += d_out.load.eq(ldst_r.load)
381 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
382 m.d.comb += self.addr.eq(ldst_r.addr)
383 m.d.comb += d_out.nc.eq(ldst_r.nc)
384 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
385 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
386 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
387 #ldst_r.dcbz,ldst_r.addr)
388 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
389
390 # XXX these should be possible to remove but for some reason
391 # cannot be... yet. TODO, investigate
392 m.d.comb += self.load_data.eq(d_in.data)
393 m.d.comb += d_out.addr.eq(self.addr)
394
395 # Update outputs to MMU
396 m.d.comb += m_out.valid.eq(mmureq)
397 m.d.comb += m_out.iside.eq(self.instr_fault)
398 m.d.comb += m_out.load.eq(ldst_r.load)
399 m.d.comb += m_out.priv.eq(self.priv_mode)
400 # m_out.priv <= r.priv_mode; TODO
401 m.d.comb += m_out.tlbie.eq(self.tlbie)
402 # m_out.mtspr <= mmu_mtspr; # TODO
403 # m_out.sprn <= sprn; # TODO
404 m.d.comb += m_out.addr.eq(maddr)
405 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
406 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
407
408 return m
409
410 def ports(self):
411 yield from super().ports()
412 # TODO: memory ports
413
414
415 class TestSRAMLoadStore1(LoadStore1):
416 def __init__(self, pspec):
417 super().__init__(pspec)
418 pspec = self.pspec
419 # small 32-entry Memory
420 if (hasattr(pspec, "dmem_test_depth") and
421 isinstance(pspec.dmem_test_depth, int)):
422 depth = pspec.dmem_test_depth
423 else:
424 depth = 32
425 print("TestSRAMBareLoadStoreUnit depth", depth)
426
427 self.mem = Memory(width=pspec.reg_wid, depth=depth)
428
429 def elaborate(self, platform):
430 m = super().elaborate(platform)
431 comb = m.d.comb
432 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
433 features={'cti', 'bte', 'err'})
434 dbus = self.dbus
435
436 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
437 # note: SRAM is a target (slave), dbus is initiator (master)
438 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
439 fanins = ['dat_r', 'ack', 'err']
440 for fanout in fanouts:
441 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
442 getattr(dbus, fanout).shape())
443 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
444 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
445 for fanin in fanins:
446 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
447 # connect address
448 comb += sram.bus.adr.eq(dbus.adr)
449
450 return m
451