add SECOND_REQ state to loadstore.py, not yet implemented
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.icache import ICache
29 from soc.experiment.pimem import PortInterfaceBase
30 from soc.experiment.mem_types import LoadStore1ToMMUType
31 from soc.experiment.mem_types import MMUToLoadStore1Type
32
33 from soc.minerva.wishbone import make_wb_layout
34 from soc.bus.sram import SRAM
35 from nmutil.util import Display
36
37
38 @unique
39 class State(Enum):
40 IDLE = 0 # ready for instruction
41 ACK_WAIT = 1 # waiting for ack from dcache
42 MMU_LOOKUP = 2 # waiting for MMU to look up translation
43 SECOND_REQ = 3 # second request for unaligned transfer
44
45
46 # captures the LDSTRequest from the PortInterface, which "blips" most
47 # of this at us (pipeline-style).
48 class LDSTRequest(RecordObject):
49 def __init__(self, name=None):
50 RecordObject.__init__(self, name=name)
51
52 self.load = Signal()
53 self.dcbz = Signal()
54 self.raddr = Signal(64)
55 # self.store_data = Signal(64) # this is already sync (on a delay)
56 self.byte_sel = Signal(8)
57 self.nc = Signal() # non-cacheable access
58 self.virt_mode = Signal()
59 self.priv_mode = Signal()
60 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
61 self.align_intr = Signal()
62
63
64 # glue logic for microwatt mmu and dcache
65 class LoadStore1(PortInterfaceBase):
66 def __init__(self, pspec):
67 self.pspec = pspec
68 self.disable_cache = (hasattr(pspec, "disable_cache") and
69 pspec.disable_cache == True)
70 regwid = pspec.reg_wid
71 addrwid = pspec.addr_wid
72
73 super().__init__(regwid, addrwid)
74 self.dcache = DCache(pspec)
75 self.icache = ICache(pspec)
76 # these names are from the perspective of here (LoadStore1)
77 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
78 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
79 self.i_out = self.icache.i_in # in to icache is out for LoadStore
80 self.i_in = self.icache.i_out # out from icache is in for LoadStore
81 self.m_out = LoadStore1ToMMUType("m_out") # out *to* MMU
82 self.m_in = MMUToLoadStore1Type("m_in") # in *from* MMU
83 self.req = LDSTRequest(name="ldst_req")
84
85 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
86 self.dbus = Record(make_wb_layout(pspec))
87 self.ibus = Record(make_wb_layout(pspec))
88
89 # for creating a single clock blip to DCache
90 self.d_valid = Signal()
91 self.d_w_valid = Signal()
92 self.d_validblip = Signal()
93
94 # state info for LD/ST
95 self.done = Signal()
96 self.done_delay = Signal()
97 # latch most of the input request
98 self.load = Signal()
99 self.tlbie = Signal()
100 self.dcbz = Signal()
101 self.raddr = Signal(64)
102 self.maddr = Signal(64)
103 self.store_data = Signal(64)
104 self.load_data = Signal(64)
105 self.load_data_delay = Signal(64)
106 self.byte_sel = Signal(8)
107 #self.xerc : xer_common_t;
108 #self.reserve = Signal()
109 #self.atomic = Signal()
110 #self.atomic_last = Signal()
111 #self.rc = Signal()
112 self.nc = Signal() # non-cacheable access
113 self.virt_mode = Signal()
114 self.priv_mode = Signal()
115 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
116 self.state = Signal(State)
117 self.instr_fault = Signal() # indicator to request i-cache MMU lookup
118 self.r_instr_fault = Signal() # accessed in external_busy
119 self.align_intr = Signal()
120 self.busy = Signal()
121 self.wait_dcache = Signal()
122 self.wait_mmu = Signal()
123 #self.intr_vec : integer range 0 to 16#fff#;
124 #self.nia = Signal(64)
125 #self.srr1 = Signal(16)
126 # use these to set the dsisr or dar respectively
127 self.mmu_set_spr = Signal()
128 self.mmu_set_dsisr = Signal()
129 self.mmu_set_dar = Signal()
130 self.sprval_in = Signal(64)
131
132 # ONLY access these read-only, do NOT attempt to change
133 self.dsisr = Signal(32)
134 self.dar = Signal(64)
135
136 # when external_busy set, do not allow PortInterface to proceed
137 def external_busy(self, m):
138 return self.instr_fault | self.r_instr_fault
139
140 def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
141 m.d.comb += self.req.load.eq(0) # store operation
142 m.d.comb += self.req.byte_sel.eq(mask)
143 m.d.comb += self.req.raddr.eq(addr)
144 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
145 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
146 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
147 m.d.comb += self.req.dcbz.eq(is_dcbz)
148 # XXX TODO sort out misalignment, mmu test5 fails
149 m.d.comb += self.req.align_intr.eq(misalign)
150
151 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
152
153 # option to disable the cache entirely for write
154 if self.disable_cache:
155 m.d.comb += self.req.nc.eq(1)
156
157 # dcbz cannot do no-cache
158 with m.If(is_dcbz & self.req.nc):
159 m.d.comb += self.req.align_intr.eq(1)
160
161 return None
162
163 def set_rd_addr(self, m, addr, mask, misalign, msr):
164 m.d.comb += self.d_valid.eq(1)
165 m.d.comb += self.req.load.eq(1) # load operation
166 m.d.comb += self.req.byte_sel.eq(mask)
167 m.d.comb += self.req.raddr.eq(addr)
168 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
169 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
170 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
171 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
172 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
173 with m.If(addr[28:] == Const(0xc, 4)):
174 m.d.comb += self.req.nc.eq(1)
175 # option to disable the cache entirely for read
176 if self.disable_cache:
177 m.d.comb += self.req.nc.eq(1)
178 # XXX TODO sort out misalignment, mmu test5 fails
179 m.d.comb += self.req.align_intr.eq(misalign)
180 return None #FIXME return value
181
182 def set_wr_data(self, m, data, wen):
183 # do the "blip" on write data
184 m.d.comb += self.d_valid.eq(1)
185 # put data into comb which is picked up in main elaborate()
186 m.d.comb += self.d_w_valid.eq(1)
187 m.d.comb += self.store_data.eq(data)
188 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
189 st_ok = self.done # TODO indicates write data is valid
190 return st_ok
191
192 def get_rd_data(self, m):
193 ld_ok = self.done_delay # indicates read data is valid
194 data = self.load_data_delay # actual read data
195 return data, ld_ok
196
197 def elaborate(self, platform):
198 m = super().elaborate(platform)
199 comb, sync = m.d.comb, m.d.sync
200
201 # microwatt takes one more cycle before next operation can be issued
202 sync += self.done_delay.eq(self.done)
203 sync += self.load_data_delay.eq(self.load_data)
204
205 # create dcache and icache module
206 m.submodules.dcache = dcache = self.dcache
207 m.submodules.icache = icache = self.icache
208
209 # temp vars
210 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
211 i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
212 m_out, m_in = self.m_out, self.m_in
213 exc = self.pi.exc_o
214 exception = exc.happened
215 mmureq = Signal()
216
217 # copy of address, but gets over-ridden for instr_fault
218 maddr = Signal(64)
219 m.d.comb += maddr.eq(self.raddr)
220
221 # create a blip (single pulse) on valid read/write request
222 # this can be over-ridden in the FSM to get dcache to re-run
223 # a request when MMU_LOOKUP completes.
224 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
225 ldst_r = LDSTRequest("ldst_r")
226 comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
227
228 # fsm skeleton
229 with m.Switch(self.state):
230 with m.Case(State.IDLE):
231 with m.If((self.d_validblip | self.instr_fault) &
232 ~exc.happened):
233 comb += self.busy.eq(1)
234 sync += self.state.eq(State.ACK_WAIT)
235 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
236 # sync += Display("validblip self.req.virt_mode=%i",
237 # self.req.virt_mode)
238 with m.If(self.instr_fault):
239 comb += mmureq.eq(1)
240 sync += self.r_instr_fault.eq(1)
241 comb += maddr.eq(self.maddr)
242 sync += self.state.eq(State.MMU_LOOKUP)
243 with m.Else():
244 sync += self.r_instr_fault.eq(0)
245 with m.Else():
246 sync += ldst_r.eq(0)
247
248 # waiting for completion
249 with m.Case(State.ACK_WAIT):
250 comb += Display("MMUTEST: ACK_WAIT")
251 comb += self.busy.eq(~exc.happened)
252
253 with m.If(d_in.error):
254 # cache error is not necessarily "final", it could
255 # be that it was just a TLB miss
256 with m.If(d_in.cache_paradox):
257 comb += exception.eq(1)
258 sync += self.state.eq(State.IDLE)
259 sync += ldst_r.eq(0)
260 sync += Display("cache error -> update dsisr")
261 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
262 # XXX there is no architected bit for this
263 # (probably should be a machine check in fact)
264 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
265 sync += self.r_instr_fault.eq(0)
266
267 with m.Else():
268 # Look up the translation for TLB miss
269 # and also for permission error and RC error
270 # in case the PTE has been updated.
271 comb += mmureq.eq(1)
272 sync += self.state.eq(State.MMU_LOOKUP)
273 with m.If(d_in.valid):
274 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
275 with m.If(self.done):
276 sync += Display("ACK_WAIT, done %x", self.raddr)
277 sync += self.state.eq(State.IDLE)
278 sync += ldst_r.eq(0)
279 with m.If(self.load):
280 m.d.comb += self.load_data.eq(d_in.data)
281
282 # waiting here for the MMU TLB lookup to complete.
283 # either re-try the dcache lookup or throw MMU exception
284 with m.Case(State.MMU_LOOKUP):
285 comb += self.busy.eq(~exception)
286 with m.If(m_in.done):
287 with m.If(~self.r_instr_fault):
288 sync += Display("MMU_LOOKUP, done %x -> %x",
289 self.raddr, d_out.addr)
290 # retry the request now that the MMU has
291 # installed a TLB entry, if not exception raised
292 m.d.comb += self.d_out.valid.eq(~exception)
293 sync += self.state.eq(State.ACK_WAIT)
294 sync += ldst_r.eq(0)
295 with m.Else():
296 sync += self.state.eq(State.IDLE)
297 sync += self.r_instr_fault.eq(0)
298 comb += self.done.eq(1)
299
300 with m.If(m_in.err):
301 # MMU RADIX exception thrown. XXX
302 # TODO: critical that the write here has to
303 # notify the MMU FSM of the change to dsisr
304 comb += exception.eq(1)
305 comb += self.done.eq(1)
306 sync += Display("MMU RADIX exception thrown")
307 sync += self.dsisr[63 - 33].eq(m_in.invalid)
308 sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
309 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
310 sync += self.dsisr[63 - 44].eq(m_in.badtree)
311 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
312 sync += self.state.eq(State.IDLE)
313 # exception thrown, clear out instruction fault state
314 sync += self.r_instr_fault.eq(0)
315
316 # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
317 with m.If(self.mmu_set_spr):
318 with m.If(self.mmu_set_dsisr):
319 sync += self.dsisr.eq(self.sprval_in)
320 with m.If(self.mmu_set_dar):
321 sync += self.dar.eq(self.sprval_in)
322
323 # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
324 with m.If(self.align_intr):
325 comb += exc.happened.eq(1)
326 # check for updating DAR
327 with m.If(exception):
328 sync += Display("exception %x", self.raddr)
329 # alignment error: store address in DAR
330 with m.If(self.align_intr):
331 sync += Display("alignment error: addr in DAR %x", self.raddr)
332 sync += self.dar.eq(self.raddr)
333 with m.Elif(~self.r_instr_fault):
334 sync += Display("not instr fault, addr in DAR %x", self.raddr)
335 sync += self.dar.eq(self.raddr)
336
337 # when done or exception, return to idle state
338 with m.If(self.done | exception):
339 sync += self.state.eq(State.IDLE)
340 comb += self.busy.eq(0)
341
342 # happened, alignment, instr_fault, invalid.
343 # note that all of these flow through - eventually to the TRAP
344 # pipeline, via PowerDecoder2.
345 comb += self.align_intr.eq(self.req.align_intr)
346 comb += exc.invalid.eq(m_in.invalid)
347 comb += exc.alignment.eq(self.align_intr)
348 comb += exc.instr_fault.eq(self.r_instr_fault)
349 # badtree, perm_error, rc_error, segment_fault
350 comb += exc.badtree.eq(m_in.badtree)
351 comb += exc.perm_error.eq(m_in.perm_error)
352 comb += exc.rc_error.eq(m_in.rc_error)
353 comb += exc.segment_fault.eq(m_in.segerr)
354
355 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
356 comb += dbus.adr.eq(dcache.bus.adr)
357 comb += dbus.dat_w.eq(dcache.bus.dat_w)
358 comb += dbus.sel.eq(dcache.bus.sel)
359 comb += dbus.cyc.eq(dcache.bus.cyc)
360 comb += dbus.stb.eq(dcache.bus.stb)
361 comb += dbus.we.eq(dcache.bus.we)
362
363 comb += dcache.bus.dat_r.eq(dbus.dat_r)
364 comb += dcache.bus.ack.eq(dbus.ack)
365 if hasattr(dbus, "stall"):
366 comb += dcache.bus.stall.eq(dbus.stall)
367
368 # update out d data when flag set
369 with m.If(self.d_w_valid):
370 m.d.sync += d_out.data.eq(self.store_data)
371 #with m.Else():
372 # m.d.sync += d_out.data.eq(0)
373 # unit test passes with that change
374
375 # this must move into the FSM, conditionally noticing that
376 # the "blip" comes from self.d_validblip.
377 # task 1: look up in dcache
378 # task 2: if dcache fails, look up in MMU.
379 # do **NOT** confuse the two.
380 with m.If(self.d_validblip):
381 m.d.comb += self.d_out.valid.eq(~exc.happened)
382 m.d.comb += d_out.load.eq(self.req.load)
383 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
384 m.d.comb += self.raddr.eq(self.req.raddr)
385 m.d.comb += d_out.nc.eq(self.req.nc)
386 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
387 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
388 #m.d.comb += Display("validblip dcbz=%i addr=%x",
389 #self.req.dcbz,self.req.addr)
390 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
391 with m.Else():
392 m.d.comb += d_out.load.eq(ldst_r.load)
393 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
394 m.d.comb += self.raddr.eq(ldst_r.raddr)
395 m.d.comb += d_out.nc.eq(ldst_r.nc)
396 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
397 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
398 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
399 #ldst_r.dcbz,ldst_r.addr)
400 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
401
402 # XXX these should be possible to remove but for some reason
403 # cannot be... yet. TODO, investigate
404 m.d.comb += self.load_data.eq(d_in.data)
405 m.d.comb += d_out.addr.eq(self.raddr)
406
407 # Update outputs to MMU
408 m.d.comb += m_out.valid.eq(mmureq)
409 m.d.comb += m_out.iside.eq(self.instr_fault)
410 m.d.comb += m_out.load.eq(ldst_r.load)
411 m.d.comb += m_out.priv.eq(self.priv_mode)
412 # m_out.priv <= r.priv_mode; TODO
413 m.d.comb += m_out.tlbie.eq(self.tlbie)
414 # m_out.mtspr <= mmu_mtspr; # TODO
415 # m_out.sprn <= sprn; # TODO
416 m.d.comb += m_out.addr.eq(maddr)
417 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
418 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
419
420 return m
421
422 def ports(self):
423 yield from super().ports()
424 # TODO: memory ports
425
426
427 class TestSRAMLoadStore1(LoadStore1):
428 def __init__(self, pspec):
429 super().__init__(pspec)
430 pspec = self.pspec
431 # small 32-entry Memory
432 if (hasattr(pspec, "dmem_test_depth") and
433 isinstance(pspec.dmem_test_depth, int)):
434 depth = pspec.dmem_test_depth
435 else:
436 depth = 32
437 print("TestSRAMBareLoadStoreUnit depth", depth)
438
439 self.mem = Memory(width=pspec.reg_wid, depth=depth)
440
441 def elaborate(self, platform):
442 m = super().elaborate(platform)
443 comb = m.d.comb
444 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
445 features={'cti', 'bte', 'err'})
446 dbus = self.dbus
447
448 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
449 # note: SRAM is a target (slave), dbus is initiator (master)
450 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
451 fanins = ['dat_r', 'ack', 'err']
452 for fanout in fanouts:
453 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
454 getattr(dbus, fanout).shape())
455 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
456 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
457 for fanin in fanins:
458 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
459 # connect address
460 comb += sram.bus.adr.eq(dbus.adr)
461
462 return m
463