3 # License for original copyright mmu.vhdl by microwatt authors: CC4
4 # License for copyrighted modifications made in mmu.py: LGPLv3+
6 # This derivative work although includes CC4 licensed material is
7 # covered by the LGPLv3+
11 based on Anton Blanchard microwatt mmu.vhdl
14 from enum
import Enum
, unique
15 from nmigen
import (C
, Module
, Signal
, Elaboratable
, Mux
, Cat
, Repl
, Signal
)
16 from nmigen
.cli
import main
17 from nmigen
.cli
import rtlil
18 from nmutil
.iocontrol
import RecordObject
19 from nmutil
.byterev
import byte_reverse
20 from nmutil
.mask
import Mask
, masked
21 from nmutil
.util
import Display
23 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
24 # Also, check out the cxxsim nmigen branch, and latest yosys from git
25 from nmutil
.sim_tmp_alternative
import Simulator
, Settle
27 from nmutil
.util
import wrap
29 from soc
.experiment
.mem_types
import (LoadStore1ToMMUType
,
37 display_invalid
= True
41 IDLE
= 0 # zero is default on reset for r.state
53 class RegStage(RecordObject
):
54 def __init__(self
, name
=None):
55 super().__init
__(name
=name
)
56 # latched request from loadstore1
61 self
.addr
= Signal(64)
62 self
.inval_all
= Signal()
64 self
.prtbl
= Signal(64)
67 self
.state
= Signal(State
) # resets to IDLE
70 self
.pgtbl0
= Signal(64)
71 self
.pt0_valid
= Signal()
72 self
.pgtbl3
= Signal(64)
73 self
.pt3_valid
= Signal()
74 self
.shift
= Signal(6)
75 self
.mask_size
= Signal(5)
76 self
.pgbase
= Signal(56)
78 self
.invalid
= Signal()
79 self
.badtree
= Signal()
80 self
.segerror
= Signal()
81 self
.perm_err
= Signal()
82 self
.rc_error
= Signal()
85 class MMU(Elaboratable
):
88 Supports 4-level trees as in arch 3.0B, but not the
89 two-step translation for guests under a hypervisor
90 (i.e. there is no gRA -> hRA translation).
93 self
.l_in
= LoadStore1ToMMUType()
94 self
.l_out
= MMUToLoadStore1Type()
95 self
.d_out
= MMUToDCacheType()
96 self
.d_in
= DCacheToMMUType()
97 self
.i_out
= MMUToICacheType()
99 def radix_tree_idle(self
, m
, l_in
, r
, v
):
108 with m
.If(~l_in
.addr
[63]):
109 comb
+= pgtbl
.eq(r
.pgtbl0
)
110 comb
+= pt_valid
.eq(r
.pt0_valid
)
112 comb
+= pgtbl
.eq(r
.pgtbl3
)
113 comb
+= pt_valid
.eq(r
.pt3_valid
)
115 # rts == radix tree size, number of address bits
117 comb
+= rts
.eq(Cat(pgtbl
[5:8], pgtbl
[61:63]))
119 # mbits == number of address bits to index top
121 comb
+= mbits
.eq(pgtbl
[0:5])
123 # set v.shift to rts so that we can use finalmask
124 # for the segment check
125 comb
+= v
.shift
.eq(rts
)
126 comb
+= v
.mask_size
.eq(mbits
[0:5])
127 comb
+= v
.pgbase
.eq(Cat(C(0, 8), pgtbl
[8:56]))
129 with m
.If(l_in
.valid
):
130 comb
+= v
.addr
.eq(l_in
.addr
)
131 comb
+= v
.iside
.eq(l_in
.iside
)
132 comb
+= v
.store
.eq(~
(l_in
.load | l_in
.iside
))
133 comb
+= v
.priv
.eq(l_in
.priv
)
135 comb
+= Display("state %d l_in.valid addr %x iside %d store %d "
136 "rts %x mbits %x pt_valid %d",
137 v
.state
, v
.addr
, v
.iside
, v
.store
,
138 rts
, mbits
, pt_valid
)
140 with m
.If(l_in
.tlbie
):
141 # Invalidate all iTLB/dTLB entries for
142 # tlbie with RB[IS] != 0 or RB[AP] != 0,
144 comb
+= v
.inval_all
.eq(l_in
.slbia
151 # The RIC field of the tlbie instruction
152 # comes across on the sprn bus as bits 2--3.
153 # RIC=2 flushes process table caches.
154 with m
.If(l_in
.sprn
[3]):
155 comb
+= v
.pt0_valid
.eq(0)
156 comb
+= v
.pt3_valid
.eq(0)
157 comb
+= v
.state
.eq(State
.DO_TLBIE
)
159 comb
+= v
.valid
.eq(1)
160 with m
.If(~pt_valid
):
161 # need to fetch process table entry
162 # set v.shift so we can use finalmask
163 # for generating the process table
165 comb
+= v
.shift
.eq(r
.prtbl
[0:5])
166 comb
+= v
.state
.eq(State
.PROC_TBL_READ
)
168 with m
.Elif(mbits
== 0):
169 # Use RPDS = 0 to disable radix tree walks
170 comb
+= v
.state
.eq(State
.RADIX_FINISH
)
171 comb
+= v
.invalid
.eq(1)
173 sync
+= Display("MMUBUG: Use RPDS = 0 to disable"
176 comb
+= v
.state
.eq(State
.SEGMENT_CHECK
)
178 with m
.If(l_in
.mtspr
):
179 # Move to PID needs to invalidate L1 TLBs
180 # and cached pgtbl0 value. Move to PRTBL
181 # does that plus invalidating the cached
182 # pgtbl3 value as well.
183 with m
.If(~l_in
.sprn
[9]):
184 comb
+= v
.pid
.eq(l_in
.rs
[0:32])
186 comb
+= v
.prtbl
.eq(l_in
.rs
)
187 comb
+= v
.pt3_valid
.eq(0)
189 comb
+= v
.pt0_valid
.eq(0)
190 comb
+= v
.inval_all
.eq(1)
191 comb
+= v
.state
.eq(State
.DO_TLBIE
)
193 def proc_tbl_wait(self
, m
, v
, r
, data
):
195 with m
.If(r
.addr
[63]):
196 comb
+= v
.pgtbl3
.eq(data
)
197 comb
+= v
.pt3_valid
.eq(1)
199 comb
+= v
.pgtbl0
.eq(data
)
200 comb
+= v
.pt0_valid
.eq(1)
205 # rts == radix tree size, # address bits being translated
206 comb
+= rts
.eq(Cat(data
[5:8], data
[61:63]))
208 # mbits == # address bits to index top level of tree
209 comb
+= mbits
.eq(data
[0:5])
211 # set v.shift to rts so that we can use finalmask for the segment check
212 comb
+= v
.shift
.eq(rts
)
213 comb
+= v
.mask_size
.eq(mbits
[0:5])
214 comb
+= v
.pgbase
.eq(Cat(C(0, 8), data
[8:56]))
217 comb
+= v
.state
.eq(State
.SEGMENT_CHECK
)
219 comb
+= v
.state
.eq(State
.RADIX_FINISH
)
220 comb
+= v
.invalid
.eq(1)
221 if(display_invalid
): m
.d
.sync
+= Display("MMUBUG: mbits is invalid")
223 def radix_read_wait(self
, m
, v
, r
, d_in
, data
):
234 comb
+= Display("RDW %016x done %d "
235 "perm %d rc %d mbits %d shf %d "
236 "valid %d leaf %d bad %d",
237 data
, d_in
.done
, perm_ok
, rc_ok
,
238 mbits
, r
.shift
, valid
, leaf
, badtree
)
241 comb
+= v
.pde
.eq(data
)
244 comb
+= valid
.eq(data
[63]) # valid=data[63]
245 comb
+= leaf
.eq(data
[62]) # valid=data[63]
247 comb
+= v
.pde
.eq(data
)
251 # check permissions and RC bits
252 with m
.If(r
.priv | ~data
[3]):
254 comb
+= perm_ok
.eq(data
[1] |
(data
[2] & ~r
.store
))
256 # no IAMR, so no KUEP support for now
257 # deny execute permission if cache inhibited
258 comb
+= perm_ok
.eq(data
[0] & ~data
[5])
260 comb
+= rc_ok
.eq(data
[8] & (data
[7] | ~r
.store
))
261 with m
.If(perm_ok
& rc_ok
):
262 comb
+= v
.state
.eq(State
.RADIX_LOAD_TLB
)
264 comb
+= v
.state
.eq(State
.RADIX_FINISH
)
265 comb
+= v
.perm_err
.eq(~perm_ok
)
266 # permission error takes precedence over RC error
267 comb
+= v
.rc_error
.eq(perm_ok
)
271 comb
+= mbits
.eq(data
[0:5])
272 comb
+= badtree
.eq((mbits
< 5) |
276 comb
+= v
.state
.eq(State
.RADIX_FINISH
)
277 comb
+= v
.badtree
.eq(1)
279 comb
+= v
.shift
.eq(r
.shift
- mbits
)
280 comb
+= v
.mask_size
.eq(mbits
[0:5])
281 comb
+= v
.pgbase
.eq(Cat(C(0, 8), data
[8:56]))
282 comb
+= v
.state
.eq(State
.RADIX_LOOKUP
)
285 # non-present PTE, generate a DSI
286 comb
+= v
.state
.eq(State
.RADIX_FINISH
)
287 comb
+= v
.invalid
.eq(1)
289 sync
+= Display("MMUBUG: non-present PTE, generate a DSI")
291 def segment_check(self
, m
, v
, r
, data
, finalmask
):
296 comb
+= mbits
.eq(r
.mask_size
)
297 comb
+= v
.shift
.eq(r
.shift
+ (31 - 12) - mbits
)
298 comb
+= nonzero
.eq((r
.addr
[31:62] & ~finalmask
[0:31]).bool())
299 with m
.If((r
.addr
[63] ^ r
.addr
[62]) | nonzero
):
300 comb
+= v
.state
.eq(State
.RADIX_FINISH
)
301 comb
+= v
.segerror
.eq(1)
302 with m
.Elif((mbits
< 5) |
(mbits
> 16) |
303 (mbits
> (r
.shift
+ (31-12)))):
304 comb
+= v
.state
.eq(State
.RADIX_FINISH
)
305 comb
+= v
.badtree
.eq(1)
307 comb
+= v
.state
.eq(State
.RADIX_LOOKUP
)
309 def mmu_0(self
, m
, r
, rin
, l_in
, l_out
, d_out
, addrsh
, mask
):
313 # Multiplex internal SPR values back to loadstore1,
314 # selected by l_in.sprn.
315 with m
.If(l_in
.sprn
[9]):
316 comb
+= l_out
.sprval
.eq(r
.prtbl
)
318 comb
+= l_out
.sprval
.eq(r
.pid
)
320 with m
.If(rin
.valid
):
321 sync
+= Display("MMU got tlb miss for %x", rin
.addr
)
323 with m
.If(l_out
.done
):
324 sync
+= Display("MMU completing op without error")
326 with m
.If(l_out
.err
):
327 sync
+= Display("MMU completing op with err invalid="
328 "%d badtree=%d", l_out
.invalid
, l_out
.badtree
)
330 with m
.If(rin
.state
== State
.RADIX_LOOKUP
):
331 sync
+= Display ("radix lookup shift=%d msize=%d",
332 rin
.shift
, rin
.mask_size
)
334 with m
.If(r
.state
== State
.RADIX_LOOKUP
):
335 sync
+= Display(f
"send load addr=%x addrsh=%d mask=%x",
336 d_out
.addr
, addrsh
, mask
)
339 def elaborate(self
, platform
):
347 finalmask
= Signal(44)
349 self
.rin
= rin
= RegStage("r_in")
358 self
.mmu_0(m
, r
, rin
, l_in
, l_out
, d_out
, addrsh
, mask
)
367 prtb_adr
= Signal(64)
368 pgtb_adr
= Signal(64)
370 tlb_data
= Signal(64)
374 comb
+= v
.valid
.eq(0)
378 comb
+= v
.invalid
.eq(0)
379 comb
+= v
.badtree
.eq(0)
380 comb
+= v
.segerror
.eq(0)
381 comb
+= v
.perm_err
.eq(0)
382 comb
+= v
.rc_error
.eq(0)
383 comb
+= tlb_load
.eq(0)
384 comb
+= itlb_load
.eq(0)
385 comb
+= tlbie_req
.eq(0)
386 comb
+= v
.inval_all
.eq(0)
387 comb
+= prtbl_rd
.eq(0)
389 # Radix tree data structures in memory are
390 # big-endian, so we need to byte-swap them
391 data
= byte_reverse(m
, "data", d_in
.data
, 8)
393 # generate mask for extracting address fields for PTE addr generation
394 m
.submodules
.pte_mask
= pte_mask
= Mask(16-5)
395 comb
+= pte_mask
.shift
.eq(r
.mask_size
- 5)
396 comb
+= mask
.eq(Cat(C(0x1f, 5), pte_mask
.mask
))
398 # generate mask for extracting address bits to go in
399 # TLB entry in order to support pages > 4kB
400 m
.submodules
.tlb_mask
= tlb_mask
= Mask(44)
401 comb
+= tlb_mask
.shift
.eq(r
.shift
)
402 comb
+= finalmask
.eq(tlb_mask
.mask
)
404 with m
.If(r
.state
!= State
.IDLE
):
405 sync
+= Display("MMU state %d %016x", r
.state
, data
)
407 with m
.Switch(r
.state
):
408 with m
.Case(State
.IDLE
):
409 self
.radix_tree_idle(m
, l_in
, r
, v
)
411 with m
.Case(State
.DO_TLBIE
):
413 comb
+= tlbie_req
.eq(1)
414 comb
+= v
.state
.eq(State
.TLB_WAIT
)
416 with m
.Case(State
.TLB_WAIT
):
417 with m
.If(d_in
.done
):
418 comb
+= v
.state
.eq(State
.RADIX_FINISH
)
420 with m
.Case(State
.PROC_TBL_READ
):
421 sync
+= Display(" TBL_READ %016x", prtb_adr
)
423 comb
+= prtbl_rd
.eq(1)
424 comb
+= v
.state
.eq(State
.PROC_TBL_WAIT
)
426 with m
.Case(State
.PROC_TBL_WAIT
):
427 with m
.If(d_in
.done
):
428 self
.proc_tbl_wait(m
, v
, r
, data
)
431 comb
+= v
.state
.eq(State
.RADIX_FINISH
)
432 comb
+= v
.badtree
.eq(1)
434 with m
.Case(State
.SEGMENT_CHECK
):
435 self
.segment_check(m
, v
, r
, data
, finalmask
)
437 with m
.Case(State
.RADIX_LOOKUP
):
438 sync
+= Display(" RADIX_LOOKUP")
440 comb
+= v
.state
.eq(State
.RADIX_READ_WAIT
)
442 with m
.Case(State
.RADIX_READ_WAIT
):
443 sync
+= Display(" READ_WAIT")
444 with m
.If(d_in
.done
):
445 self
.radix_read_wait(m
, v
, r
, d_in
, data
)
447 comb
+= v
.state
.eq(State
.RADIX_FINISH
)
448 comb
+= v
.badtree
.eq(1)
450 with m
.Case(State
.RADIX_LOAD_TLB
):
451 comb
+= tlb_load
.eq(1)
454 comb
+= v
.state
.eq(State
.TLB_WAIT
)
456 comb
+= itlb_load
.eq(1)
457 comb
+= v
.state
.eq(State
.IDLE
)
459 with m
.Case(State
.RADIX_FINISH
):
460 sync
+= Display(" RADIX_FINISH")
461 comb
+= v
.state
.eq(State
.IDLE
)
463 with m
.If((v
.state
== State
.RADIX_FINISH
) |
464 ((v
.state
== State
.RADIX_LOAD_TLB
) & r
.iside
)):
465 comb
+= v
.err
.eq(v
.invalid | v
.badtree | v
.segerror
466 | v
.perm_err | v
.rc_error
)
467 comb
+= v
.done
.eq(~v
.err
)
469 with m
.If(~r
.addr
[63]):
470 comb
+= effpid
.eq(r
.pid
)
472 pr24
= Signal(24, reset_less
=True)
473 comb
+= pr24
.eq(masked(r
.prtbl
[12:36], effpid
[8:32], finalmask
))
474 comb
+= prtb_adr
.eq(Cat(C(0, 4), effpid
[0:8], pr24
, r
.prtbl
[36:56]))
476 pg16
= Signal(16, reset_less
=True)
477 comb
+= pg16
.eq(masked(r
.pgbase
[3:19], addrsh
, mask
))
478 comb
+= pgtb_adr
.eq(Cat(C(0, 3), pg16
, r
.pgbase
[19:56]))
480 pd44
= Signal(44, reset_less
=True)
481 comb
+= pd44
.eq(masked(r
.pde
[12:56], r
.addr
[12:56], finalmask
))
482 comb
+= pte
.eq(Cat(r
.pde
[0:12], pd44
))
488 with m
.If(tlbie_req
):
489 comb
+= addr
.eq(r
.addr
)
490 with m
.Elif(tlb_load
):
491 comb
+= addr
.eq(Cat(C(0, 12), r
.addr
[12:64]))
492 comb
+= tlb_data
.eq(pte
)
493 with m
.Elif(prtbl_rd
):
494 comb
+= addr
.eq(prtb_adr
)
496 comb
+= addr
.eq(pgtb_adr
)
498 comb
+= l_out
.done
.eq(r
.done
)
499 comb
+= l_out
.err
.eq(r
.err
)
500 comb
+= l_out
.invalid
.eq(r
.invalid
)
501 comb
+= l_out
.badtree
.eq(r
.badtree
)
502 comb
+= l_out
.segerr
.eq(r
.segerror
)
503 comb
+= l_out
.perm_error
.eq(r
.perm_err
)
504 comb
+= l_out
.rc_error
.eq(r
.rc_error
)
506 comb
+= d_out
.valid
.eq(dcreq
)
507 comb
+= d_out
.tlbie
.eq(tlbie_req
)
508 comb
+= d_out
.doall
.eq(r
.inval_all
)
509 comb
+= d_out
.tlbld
.eq(tlb_load
)
510 comb
+= d_out
.addr
.eq(addr
)
511 comb
+= d_out
.pte
.eq(tlb_data
)
513 comb
+= i_out
.tlbld
.eq(itlb_load
)
514 comb
+= i_out
.tlbie
.eq(tlbie_req
)
515 comb
+= i_out
.doall
.eq(r
.inval_all
)
516 comb
+= i_out
.addr
.eq(addr
)
517 comb
+= i_out
.pte
.eq(tlb_data
)
524 """simulator process for getting memory load requests
530 return int.from_bytes(x
.to_bytes(8, byteorder
='little'),
531 byteorder
='big', signed
=False)
533 mem
= {0x0: 0x000000, # to get mtspr prtbl working
535 0x10000: # PARTITION_TABLE_2
536 # PATB_GR=1 PRTB=0x1000 PRTS=0xb
537 b(0x800000000100000b),
539 0x30000: # RADIX_ROOT_PTE
540 # V = 1 L = 0 NLB = 0x400 NLS = 9
541 b(0x8000000000040009),
543 0x40000: # RADIX_SECOND_LEVEL
544 # V = 1 L = 1 SW = 0 RPN = 0
545 # R = 1 C = 1 ATT = 0 EAA 0x7
546 b(0xc000000000000187),
548 0x1000000: # PROCESS_TABLE_3
549 # RTS1 = 0x2 RPDB = 0x300 RTS2 = 0x5 RPDS = 13
550 b(0x40000000000300ad),
554 while True: # wait for dc_valid
557 dc_valid
= yield (dut
.d_out
.valid
)
561 addr
= yield dut
.d_out
.addr
563 print (" DCACHE LOOKUP FAIL %x" % (addr
))
569 yield dut
.d_in
.data
.eq(data
)
570 print (" DCACHE GET %x data %x" % (addr
, data
))
571 yield dut
.d_in
.done
.eq(1)
573 yield dut
.d_in
.done
.eq(0)
577 while not stop
: # wait for dc_valid / err
578 l_done
= yield (dut
.l_out
.done
)
579 l_err
= yield (dut
.l_out
.err
)
580 l_badtree
= yield (dut
.l_out
.badtree
)
581 l_permerr
= yield (dut
.l_out
.perm_error
)
582 l_rc_err
= yield (dut
.l_out
.rc_error
)
583 l_segerr
= yield (dut
.l_out
.segerr
)
584 l_invalid
= yield (dut
.l_out
.invalid
)
585 if (l_done
or l_err
or l_badtree
or
586 l_permerr
or l_rc_err
or l_segerr
or l_invalid
):
589 yield dut
.l_in
.valid
.eq(0) # data already in MMU by now
590 yield dut
.l_in
.mtspr
.eq(0) # captured by RegStage(s)
591 yield dut
.l_in
.load
.eq(0) # can reset everything safely
596 # MMU MTSPR set prtbl
597 yield dut
.l_in
.mtspr
.eq(1)
598 yield dut
.l_in
.sprn
[9].eq(1) # totally fake way to set SPR=prtbl
599 yield dut
.l_in
.rs
.eq(0x1000000) # set process table
600 yield dut
.l_in
.valid
.eq(1)
601 yield from mmu_wait(dut
)
603 yield dut
.l_in
.sprn
.eq(0)
604 yield dut
.l_in
.rs
.eq(0)
607 prtbl
= yield (dut
.rin
.prtbl
)
608 print ("prtbl after MTSPR %x" % prtbl
)
609 assert prtbl
== 0x1000000
611 #yield dut.rin.prtbl.eq(0x1000000) # manually set process table
616 yield dut
.l_in
.load
.eq(1)
617 yield dut
.l_in
.priv
.eq(1)
618 yield dut
.l_in
.addr
.eq(0x10000)
619 yield dut
.l_in
.valid
.eq(1)
620 yield from mmu_wait(dut
)
622 addr
= yield dut
.d_out
.addr
623 pte
= yield dut
.d_out
.pte
624 l_done
= yield (dut
.l_out
.done
)
625 l_err
= yield (dut
.l_out
.err
)
626 l_badtree
= yield (dut
.l_out
.badtree
)
627 print ("translated done %d err %d badtree %d addr %x pte %x" % \
628 (l_done
, l_err
, l_badtree
, addr
, pte
))
630 yield dut
.l_in
.priv
.eq(0)
631 yield dut
.l_in
.addr
.eq(0)
639 vl
= rtlil
.convert(dut
, ports
=[])#dut.ports())
640 with
open("test_mmu.il", "w") as f
:
644 m
.submodules
.mmu
= dut
650 sim
.add_sync_process(wrap(mmu_sim(dut
)))
651 sim
.add_sync_process(wrap(dcache_get(dut
)))
652 with sim
.write_vcd('test_mmu.vcd'):
655 if __name__
== '__main__':