Merge pull request #168 from shenki/flash-arty
[microwatt.git] / mmu.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.common.all;
7
8 -- Radix MMU
9 -- Supports 4-level trees as in arch 3.0B, but not the two-step translation for
10 -- guests under a hypervisor (i.e. there is no gRA -> hRA translation).
11
12 entity mmu is
13 port (
14 clk : in std_ulogic;
15 rst : in std_ulogic;
16
17 l_in : in Loadstore1ToMmuType;
18 l_out : out MmuToLoadstore1Type;
19
20 d_out : out MmuToDcacheType;
21 d_in : in DcacheToMmuType;
22
23 i_out : out MmuToIcacheType
24 );
25 end mmu;
26
27 architecture behave of mmu is
28
29 type state_t is (IDLE,
30 TLB_WAIT,
31 PROC_TBL_READ,
32 PROC_TBL_WAIT,
33 SEGMENT_CHECK,
34 RADIX_LOOKUP,
35 RADIX_READ_WAIT,
36 RADIX_LOAD_TLB,
37 RADIX_ERROR
38 );
39
40 type reg_stage_t is record
41 -- latched request from loadstore1
42 valid : std_ulogic;
43 iside : std_ulogic;
44 store : std_ulogic;
45 priv : std_ulogic;
46 addr : std_ulogic_vector(63 downto 0);
47 -- config SPRs
48 prtbl : std_ulogic_vector(63 downto 0);
49 pid : std_ulogic_vector(31 downto 0);
50 -- internal state
51 state : state_t;
52 pgtbl0 : std_ulogic_vector(63 downto 0);
53 pt0_valid : std_ulogic;
54 pgtbl3 : std_ulogic_vector(63 downto 0);
55 pt3_valid : std_ulogic;
56 shift : unsigned(5 downto 0);
57 mask_size : unsigned(4 downto 0);
58 pgbase : std_ulogic_vector(55 downto 0);
59 pde : std_ulogic_vector(63 downto 0);
60 invalid : std_ulogic;
61 badtree : std_ulogic;
62 segerror : std_ulogic;
63 perm_err : std_ulogic;
64 rc_error : std_ulogic;
65 end record;
66
67 signal r, rin : reg_stage_t;
68
69 signal addrsh : std_ulogic_vector(15 downto 0);
70 signal mask : std_ulogic_vector(15 downto 0);
71 signal finalmask : std_ulogic_vector(43 downto 0);
72
73 begin
74 -- Multiplex internal SPR values back to loadstore1, selected
75 -- by l_in.sprn.
76 l_out.sprval <= r.prtbl when l_in.sprn(9) = '1' else x"00000000" & r.pid;
77
78 mmu_0: process(clk)
79 begin
80 if rising_edge(clk) then
81 if rst = '1' then
82 r.state <= IDLE;
83 r.valid <= '0';
84 r.pt0_valid <= '0';
85 r.pt3_valid <= '0';
86 r.prtbl <= (others => '0');
87 else
88 if rin.valid = '1' then
89 report "MMU got tlb miss for " & to_hstring(rin.addr);
90 end if;
91 if l_out.done = '1' then
92 report "MMU completing op with invalid=" & std_ulogic'image(l_out.invalid) &
93 " badtree=" & std_ulogic'image(l_out.badtree);
94 end if;
95 if rin.state = RADIX_LOOKUP then
96 report "radix lookup shift=" & integer'image(to_integer(rin.shift)) &
97 " msize=" & integer'image(to_integer(rin.mask_size));
98 end if;
99 if r.state = RADIX_LOOKUP then
100 report "send load addr=" & to_hstring(d_out.addr) &
101 " addrsh=" & to_hstring(addrsh) & " mask=" & to_hstring(mask);
102 end if;
103 r <= rin;
104 end if;
105 end if;
106 end process;
107
108 -- Shift address bits 61--12 right by 0--47 bits and
109 -- supply the least significant 16 bits of the result.
110 addrshifter: process(all)
111 variable sh1 : std_ulogic_vector(30 downto 0);
112 variable sh2 : std_ulogic_vector(18 downto 0);
113 variable result : std_ulogic_vector(15 downto 0);
114 begin
115 case r.shift(5 downto 4) is
116 when "00" =>
117 sh1 := r.addr(42 downto 12);
118 when "01" =>
119 sh1 := r.addr(58 downto 28);
120 when others =>
121 sh1 := "0000000000000" & r.addr(61 downto 44);
122 end case;
123 case r.shift(3 downto 2) is
124 when "00" =>
125 sh2 := sh1(18 downto 0);
126 when "01" =>
127 sh2 := sh1(22 downto 4);
128 when "10" =>
129 sh2 := sh1(26 downto 8);
130 when others =>
131 sh2 := sh1(30 downto 12);
132 end case;
133 case r.shift(1 downto 0) is
134 when "00" =>
135 result := sh2(15 downto 0);
136 when "01" =>
137 result := sh2(16 downto 1);
138 when "10" =>
139 result := sh2(17 downto 2);
140 when others =>
141 result := sh2(18 downto 3);
142 end case;
143 addrsh <= result;
144 end process;
145
146 -- generate mask for extracting address fields for PTE address generation
147 addrmaskgen: process(all)
148 variable m : std_ulogic_vector(15 downto 0);
149 begin
150 -- mask_count has to be >= 5
151 m := x"001f";
152 for i in 5 to 15 loop
153 if i < to_integer(r.mask_size) then
154 m(i) := '1';
155 end if;
156 end loop;
157 mask <= m;
158 end process;
159
160 -- generate mask for extracting address bits to go in TLB entry
161 -- in order to support pages > 4kB
162 finalmaskgen: process(all)
163 variable m : std_ulogic_vector(43 downto 0);
164 begin
165 m := (others => '0');
166 for i in 0 to 43 loop
167 if i < to_integer(r.shift) then
168 m(i) := '1';
169 end if;
170 end loop;
171 finalmask <= m;
172 end process;
173
174 mmu_1: process(all)
175 variable v : reg_stage_t;
176 variable dcreq : std_ulogic;
177 variable done : std_ulogic;
178 variable tlb_load : std_ulogic;
179 variable itlb_load : std_ulogic;
180 variable tlbie_req : std_ulogic;
181 variable inval_all : std_ulogic;
182 variable prtbl_rd : std_ulogic;
183 variable pt_valid : std_ulogic;
184 variable effpid : std_ulogic_vector(31 downto 0);
185 variable prtable_addr : std_ulogic_vector(63 downto 0);
186 variable rts : unsigned(5 downto 0);
187 variable mbits : unsigned(5 downto 0);
188 variable pgtable_addr : std_ulogic_vector(63 downto 0);
189 variable pte : std_ulogic_vector(63 downto 0);
190 variable tlb_data : std_ulogic_vector(63 downto 0);
191 variable nonzero : std_ulogic;
192 variable pgtbl : std_ulogic_vector(63 downto 0);
193 variable perm_ok : std_ulogic;
194 variable rc_ok : std_ulogic;
195 variable addr : std_ulogic_vector(63 downto 0);
196 variable data : std_ulogic_vector(63 downto 0);
197 begin
198 v := r;
199 v.valid := '0';
200 dcreq := '0';
201 done := '0';
202 v.invalid := '0';
203 v.badtree := '0';
204 v.segerror := '0';
205 v.perm_err := '0';
206 v.rc_error := '0';
207 tlb_load := '0';
208 itlb_load := '0';
209 tlbie_req := '0';
210 inval_all := '0';
211 prtbl_rd := '0';
212
213 -- Radix tree data structures in memory are big-endian,
214 -- so we need to byte-swap them
215 for i in 0 to 7 loop
216 data(i * 8 + 7 downto i * 8) := d_in.data((7 - i) * 8 + 7 downto (7 - i) * 8);
217 end loop;
218
219 case r.state is
220 when IDLE =>
221 if l_in.addr(63) = '0' then
222 pgtbl := r.pgtbl0;
223 pt_valid := r.pt0_valid;
224 else
225 pgtbl := r.pgtbl3;
226 pt_valid := r.pt3_valid;
227 end if;
228 -- rts == radix tree size, # address bits being translated
229 rts := unsigned('0' & pgtbl(62 downto 61) & pgtbl(7 downto 5));
230 -- mbits == # address bits to index top level of tree
231 mbits := unsigned('0' & pgtbl(4 downto 0));
232 -- set v.shift to rts so that we can use finalmask for the segment check
233 v.shift := rts;
234 v.mask_size := mbits(4 downto 0);
235 v.pgbase := pgtbl(55 downto 8) & x"00";
236
237 if l_in.valid = '1' then
238 v.addr := l_in.addr;
239 v.iside := l_in.iside;
240 v.store := not (l_in.load or l_in.iside);
241 v.priv := l_in.priv;
242 if l_in.tlbie = '1' then
243 dcreq := '1';
244 tlbie_req := '1';
245 -- Invalidate all iTLB/dTLB entries for tlbie with
246 -- RB[IS] != 0 or RB[AP] != 0, or for slbia
247 inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or
248 l_in.addr(7) or l_in.addr(6) or l_in.addr(5);
249 -- The RIC field of the tlbie instruction comes across on the
250 -- sprn bus as bits 2--3. RIC=2 flushes process table caches.
251 if l_in.sprn(3) = '1' then
252 v.pt0_valid := '0';
253 v.pt3_valid := '0';
254 end if;
255 v.state := TLB_WAIT;
256 else
257 v.valid := '1';
258 if pt_valid = '0' then
259 -- need to fetch process table entry
260 -- set v.shift so we can use finalmask for generating
261 -- the process table entry address
262 v.shift := unsigned('0' & r.prtbl(4 downto 0));
263 v.state := PROC_TBL_READ;
264 elsif mbits = 0 then
265 -- Use RPDS = 0 to disable radix tree walks
266 v.state := RADIX_ERROR;
267 v.invalid := '1';
268 else
269 v.state := SEGMENT_CHECK;
270 end if;
271 end if;
272 end if;
273 if l_in.mtspr = '1' then
274 -- Move to PID needs to invalidate L1 TLBs and cached
275 -- pgtbl0 value. Move to PRTBL does that plus
276 -- invalidating the cached pgtbl3 value as well.
277 if l_in.sprn(9) = '0' then
278 v.pid := l_in.rs(31 downto 0);
279 else
280 v.prtbl := l_in.rs;
281 v.pt3_valid := '0';
282 end if;
283 v.pt0_valid := '0';
284 dcreq := '1';
285 tlbie_req := '1';
286 inval_all := '1';
287 v.state := TLB_WAIT;
288 end if;
289
290 when TLB_WAIT =>
291 if d_in.done = '1' then
292 done := '1';
293 v.state := IDLE;
294 end if;
295
296 when PROC_TBL_READ =>
297 dcreq := '1';
298 prtbl_rd := '1';
299 v.state := PROC_TBL_WAIT;
300
301 when PROC_TBL_WAIT =>
302 if d_in.done = '1' then
303 if d_in.err = '0' then
304 if r.addr(63) = '1' then
305 v.pgtbl3 := data;
306 v.pt3_valid := '1';
307 else
308 v.pgtbl0 := data;
309 v.pt0_valid := '1';
310 end if;
311 -- rts == radix tree size, # address bits being translated
312 rts := unsigned('0' & data(62 downto 61) & data(7 downto 5));
313 -- mbits == # address bits to index top level of tree
314 mbits := unsigned('0' & data(4 downto 0));
315 -- set v.shift to rts so that we can use finalmask for the segment check
316 v.shift := rts;
317 v.mask_size := mbits(4 downto 0);
318 v.pgbase := data(55 downto 8) & x"00";
319 if mbits = 0 then
320 v.state := RADIX_ERROR;
321 v.invalid := '1';
322 else
323 v.state := SEGMENT_CHECK;
324 end if;
325 else
326 v.state := RADIX_ERROR;
327 v.badtree := '1';
328 end if;
329 end if;
330
331 when SEGMENT_CHECK =>
332 mbits := '0' & r.mask_size;
333 v.shift := r.shift + (31 - 12) - mbits;
334 nonzero := or(r.addr(61 downto 31) and not finalmask(30 downto 0));
335 if r.addr(63) /= r.addr(62) or nonzero = '1' then
336 v.state := RADIX_ERROR;
337 v.segerror := '1';
338 elsif mbits < 5 or mbits > 16 or mbits > (r.shift + (31 - 12)) then
339 v.state := RADIX_ERROR;
340 v.badtree := '1';
341 else
342 v.state := RADIX_LOOKUP;
343 end if;
344
345 when RADIX_LOOKUP =>
346 dcreq := '1';
347 v.state := RADIX_READ_WAIT;
348
349 when RADIX_READ_WAIT =>
350 if d_in.done = '1' then
351 if d_in.err = '0' then
352 v.pde := data;
353 -- test valid bit
354 if data(63) = '1' then
355 -- test leaf bit
356 if data(62) = '1' then
357 -- check permissions and RC bits
358 perm_ok := '0';
359 if r.priv = '1' or data(3) = '0' then
360 if r.iside = '0' then
361 perm_ok := data(1) or (data(2) and not r.store);
362 else
363 -- no IAMR, so no KUEP support for now
364 -- deny execute permission if cache inhibited
365 perm_ok := data(0) and not data(5);
366 end if;
367 end if;
368 rc_ok := data(8) and (data(7) or not r.store);
369 if perm_ok = '1' and rc_ok = '1' then
370 v.state := RADIX_LOAD_TLB;
371 else
372 v.state := RADIX_ERROR;
373 v.perm_err := not perm_ok;
374 -- permission error takes precedence over RC error
375 v.rc_error := perm_ok;
376 end if;
377 else
378 mbits := unsigned('0' & data(4 downto 0));
379 if mbits < 5 or mbits > 16 or mbits > r.shift then
380 v.state := RADIX_ERROR;
381 v.badtree := '1';
382 else
383 v.shift := v.shift - mbits;
384 v.mask_size := mbits(4 downto 0);
385 v.pgbase := data(55 downto 8) & x"00";
386 v.state := RADIX_LOOKUP;
387 end if;
388 end if;
389 else
390 -- non-present PTE, generate a DSI
391 v.state := RADIX_ERROR;
392 v.invalid := '1';
393 end if;
394 else
395 v.state := RADIX_ERROR;
396 v.badtree := '1';
397 end if;
398 end if;
399
400 when RADIX_LOAD_TLB =>
401 tlb_load := '1';
402 if r.iside = '0' then
403 dcreq := '1';
404 v.state := TLB_WAIT;
405 else
406 itlb_load := '1';
407 done := '1';
408 v.state := IDLE;
409 end if;
410
411 when RADIX_ERROR =>
412 done := '1';
413 v.state := IDLE;
414
415 end case;
416
417 if r.addr(63) = '1' then
418 effpid := x"00000000";
419 else
420 effpid := r.pid;
421 end if;
422 prtable_addr := x"00" & r.prtbl(55 downto 36) &
423 ((r.prtbl(35 downto 12) and not finalmask(23 downto 0)) or
424 (effpid(31 downto 8) and finalmask(23 downto 0))) &
425 effpid(7 downto 0) & "0000";
426
427 pgtable_addr := x"00" & r.pgbase(55 downto 19) &
428 ((r.pgbase(18 downto 3) and not mask) or (addrsh and mask)) &
429 "000";
430 pte := x"00" &
431 ((r.pde(55 downto 12) and not finalmask) or (r.addr(55 downto 12) and finalmask))
432 & r.pde(11 downto 0);
433
434 -- update registers
435 rin <= v;
436
437 -- drive outputs
438 if tlbie_req = '1' then
439 addr := l_in.addr;
440 tlb_data := l_in.rs;
441 elsif tlb_load = '1' then
442 addr := r.addr(63 downto 12) & x"000";
443 tlb_data := pte;
444 elsif prtbl_rd = '1' then
445 addr := prtable_addr;
446 tlb_data := (others => '0');
447 else
448 addr := pgtable_addr;
449 tlb_data := (others => '0');
450 end if;
451
452 l_out.done <= done;
453 l_out.invalid <= r.invalid;
454 l_out.badtree <= r.badtree;
455 l_out.segerr <= r.segerror;
456 l_out.perm_error <= r.perm_err;
457 l_out.rc_error <= r.rc_error;
458
459 d_out.valid <= dcreq;
460 d_out.tlbie <= tlbie_req;
461 d_out.doall <= inval_all;
462 d_out.tlbld <= tlb_load;
463 d_out.addr <= addr;
464 d_out.pte <= tlb_data;
465
466 i_out.tlbld <= itlb_load;
467 i_out.tlbie <= tlbie_req;
468 i_out.doall <= inval_all;
469 i_out.addr <= addr;
470 i_out.pte <= tlb_data;
471
472 end process;
473 end;