Merge pull request #206 from Jbalkind/icachecleanup
[microwatt.git] / mmu.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.common.all;
7
8 -- Radix MMU
9 -- Supports 4-level trees as in arch 3.0B, but not the two-step translation for
10 -- guests under a hypervisor (i.e. there is no gRA -> hRA translation).
11
12 entity mmu is
13 port (
14 clk : in std_ulogic;
15 rst : in std_ulogic;
16
17 l_in : in Loadstore1ToMmuType;
18 l_out : out MmuToLoadstore1Type;
19
20 d_out : out MmuToDcacheType;
21 d_in : in DcacheToMmuType;
22
23 i_out : out MmuToIcacheType
24 );
25 end mmu;
26
27 architecture behave of mmu is
28
29 type state_t is (IDLE,
30 DO_TLBIE,
31 TLB_WAIT,
32 PROC_TBL_READ,
33 PROC_TBL_WAIT,
34 SEGMENT_CHECK,
35 RADIX_LOOKUP,
36 RADIX_READ_WAIT,
37 RADIX_LOAD_TLB,
38 RADIX_ERROR
39 );
40
41 type reg_stage_t is record
42 -- latched request from loadstore1
43 valid : std_ulogic;
44 iside : std_ulogic;
45 store : std_ulogic;
46 priv : std_ulogic;
47 addr : std_ulogic_vector(63 downto 0);
48 inval_all : std_ulogic;
49 -- config SPRs
50 prtbl : std_ulogic_vector(63 downto 0);
51 pid : std_ulogic_vector(31 downto 0);
52 -- internal state
53 state : state_t;
54 pgtbl0 : std_ulogic_vector(63 downto 0);
55 pt0_valid : std_ulogic;
56 pgtbl3 : std_ulogic_vector(63 downto 0);
57 pt3_valid : std_ulogic;
58 shift : unsigned(5 downto 0);
59 mask_size : unsigned(4 downto 0);
60 pgbase : std_ulogic_vector(55 downto 0);
61 pde : std_ulogic_vector(63 downto 0);
62 invalid : std_ulogic;
63 badtree : std_ulogic;
64 segerror : std_ulogic;
65 perm_err : std_ulogic;
66 rc_error : std_ulogic;
67 end record;
68
69 signal r, rin : reg_stage_t;
70
71 signal addrsh : std_ulogic_vector(15 downto 0);
72 signal mask : std_ulogic_vector(15 downto 0);
73 signal finalmask : std_ulogic_vector(43 downto 0);
74
75 begin
76 -- Multiplex internal SPR values back to loadstore1, selected
77 -- by l_in.sprn.
78 l_out.sprval <= r.prtbl when l_in.sprn(9) = '1' else x"00000000" & r.pid;
79
80 mmu_0: process(clk)
81 begin
82 if rising_edge(clk) then
83 if rst = '1' then
84 r.state <= IDLE;
85 r.valid <= '0';
86 r.pt0_valid <= '0';
87 r.pt3_valid <= '0';
88 r.prtbl <= (others => '0');
89 else
90 if rin.valid = '1' then
91 report "MMU got tlb miss for " & to_hstring(rin.addr);
92 end if;
93 if l_out.done = '1' then
94 report "MMU completing op with invalid=" & std_ulogic'image(l_out.invalid) &
95 " badtree=" & std_ulogic'image(l_out.badtree);
96 end if;
97 if rin.state = RADIX_LOOKUP then
98 report "radix lookup shift=" & integer'image(to_integer(rin.shift)) &
99 " msize=" & integer'image(to_integer(rin.mask_size));
100 end if;
101 if r.state = RADIX_LOOKUP then
102 report "send load addr=" & to_hstring(d_out.addr) &
103 " addrsh=" & to_hstring(addrsh) & " mask=" & to_hstring(mask);
104 end if;
105 r <= rin;
106 end if;
107 end if;
108 end process;
109
110 -- Shift address bits 61--12 right by 0--47 bits and
111 -- supply the least significant 16 bits of the result.
112 addrshifter: process(all)
113 variable sh1 : std_ulogic_vector(30 downto 0);
114 variable sh2 : std_ulogic_vector(18 downto 0);
115 variable result : std_ulogic_vector(15 downto 0);
116 begin
117 case r.shift(5 downto 4) is
118 when "00" =>
119 sh1 := r.addr(42 downto 12);
120 when "01" =>
121 sh1 := r.addr(58 downto 28);
122 when others =>
123 sh1 := "0000000000000" & r.addr(61 downto 44);
124 end case;
125 case r.shift(3 downto 2) is
126 when "00" =>
127 sh2 := sh1(18 downto 0);
128 when "01" =>
129 sh2 := sh1(22 downto 4);
130 when "10" =>
131 sh2 := sh1(26 downto 8);
132 when others =>
133 sh2 := sh1(30 downto 12);
134 end case;
135 case r.shift(1 downto 0) is
136 when "00" =>
137 result := sh2(15 downto 0);
138 when "01" =>
139 result := sh2(16 downto 1);
140 when "10" =>
141 result := sh2(17 downto 2);
142 when others =>
143 result := sh2(18 downto 3);
144 end case;
145 addrsh <= result;
146 end process;
147
148 -- generate mask for extracting address fields for PTE address generation
149 addrmaskgen: process(all)
150 variable m : std_ulogic_vector(15 downto 0);
151 begin
152 -- mask_count has to be >= 5
153 m := x"001f";
154 for i in 5 to 15 loop
155 if i < to_integer(r.mask_size) then
156 m(i) := '1';
157 end if;
158 end loop;
159 mask <= m;
160 end process;
161
162 -- generate mask for extracting address bits to go in TLB entry
163 -- in order to support pages > 4kB
164 finalmaskgen: process(all)
165 variable m : std_ulogic_vector(43 downto 0);
166 begin
167 m := (others => '0');
168 for i in 0 to 43 loop
169 if i < to_integer(r.shift) then
170 m(i) := '1';
171 end if;
172 end loop;
173 finalmask <= m;
174 end process;
175
176 mmu_1: process(all)
177 variable v : reg_stage_t;
178 variable dcreq : std_ulogic;
179 variable done : std_ulogic;
180 variable tlb_load : std_ulogic;
181 variable itlb_load : std_ulogic;
182 variable tlbie_req : std_ulogic;
183 variable prtbl_rd : std_ulogic;
184 variable pt_valid : std_ulogic;
185 variable effpid : std_ulogic_vector(31 downto 0);
186 variable prtable_addr : std_ulogic_vector(63 downto 0);
187 variable rts : unsigned(5 downto 0);
188 variable mbits : unsigned(5 downto 0);
189 variable pgtable_addr : std_ulogic_vector(63 downto 0);
190 variable pte : std_ulogic_vector(63 downto 0);
191 variable tlb_data : std_ulogic_vector(63 downto 0);
192 variable nonzero : std_ulogic;
193 variable pgtbl : std_ulogic_vector(63 downto 0);
194 variable perm_ok : std_ulogic;
195 variable rc_ok : std_ulogic;
196 variable addr : std_ulogic_vector(63 downto 0);
197 variable data : std_ulogic_vector(63 downto 0);
198 begin
199 v := r;
200 v.valid := '0';
201 dcreq := '0';
202 done := '0';
203 v.invalid := '0';
204 v.badtree := '0';
205 v.segerror := '0';
206 v.perm_err := '0';
207 v.rc_error := '0';
208 tlb_load := '0';
209 itlb_load := '0';
210 tlbie_req := '0';
211 v.inval_all := '0';
212 prtbl_rd := '0';
213
214 -- Radix tree data structures in memory are big-endian,
215 -- so we need to byte-swap them
216 for i in 0 to 7 loop
217 data(i * 8 + 7 downto i * 8) := d_in.data((7 - i) * 8 + 7 downto (7 - i) * 8);
218 end loop;
219
220 case r.state is
221 when IDLE =>
222 if l_in.addr(63) = '0' then
223 pgtbl := r.pgtbl0;
224 pt_valid := r.pt0_valid;
225 else
226 pgtbl := r.pgtbl3;
227 pt_valid := r.pt3_valid;
228 end if;
229 -- rts == radix tree size, # address bits being translated
230 rts := unsigned('0' & pgtbl(62 downto 61) & pgtbl(7 downto 5));
231 -- mbits == # address bits to index top level of tree
232 mbits := unsigned('0' & pgtbl(4 downto 0));
233 -- set v.shift to rts so that we can use finalmask for the segment check
234 v.shift := rts;
235 v.mask_size := mbits(4 downto 0);
236 v.pgbase := pgtbl(55 downto 8) & x"00";
237
238 if l_in.valid = '1' then
239 v.addr := l_in.addr;
240 v.iside := l_in.iside;
241 v.store := not (l_in.load or l_in.iside);
242 v.priv := l_in.priv;
243 if l_in.tlbie = '1' then
244 -- Invalidate all iTLB/dTLB entries for tlbie with
245 -- RB[IS] != 0 or RB[AP] != 0, or for slbia
246 v.inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or
247 l_in.addr(7) or l_in.addr(6) or l_in.addr(5);
248 -- The RIC field of the tlbie instruction comes across on the
249 -- sprn bus as bits 2--3. RIC=2 flushes process table caches.
250 if l_in.sprn(3) = '1' then
251 v.pt0_valid := '0';
252 v.pt3_valid := '0';
253 end if;
254 v.state := DO_TLBIE;
255 else
256 v.valid := '1';
257 if pt_valid = '0' then
258 -- need to fetch process table entry
259 -- set v.shift so we can use finalmask for generating
260 -- the process table entry address
261 v.shift := unsigned('0' & r.prtbl(4 downto 0));
262 v.state := PROC_TBL_READ;
263 elsif mbits = 0 then
264 -- Use RPDS = 0 to disable radix tree walks
265 v.state := RADIX_ERROR;
266 v.invalid := '1';
267 else
268 v.state := SEGMENT_CHECK;
269 end if;
270 end if;
271 end if;
272 if l_in.mtspr = '1' then
273 -- Move to PID needs to invalidate L1 TLBs and cached
274 -- pgtbl0 value. Move to PRTBL does that plus
275 -- invalidating the cached pgtbl3 value as well.
276 if l_in.sprn(9) = '0' then
277 v.pid := l_in.rs(31 downto 0);
278 else
279 v.prtbl := l_in.rs;
280 v.pt3_valid := '0';
281 end if;
282 v.pt0_valid := '0';
283 v.inval_all := '1';
284 v.state := DO_TLBIE;
285 end if;
286
287 when DO_TLBIE =>
288 dcreq := '1';
289 tlbie_req := '1';
290 v.state := TLB_WAIT;
291
292 when TLB_WAIT =>
293 if d_in.done = '1' then
294 done := '1';
295 v.state := IDLE;
296 end if;
297
298 when PROC_TBL_READ =>
299 dcreq := '1';
300 prtbl_rd := '1';
301 v.state := PROC_TBL_WAIT;
302
303 when PROC_TBL_WAIT =>
304 if d_in.done = '1' then
305 if d_in.err = '0' then
306 if r.addr(63) = '1' then
307 v.pgtbl3 := data;
308 v.pt3_valid := '1';
309 else
310 v.pgtbl0 := data;
311 v.pt0_valid := '1';
312 end if;
313 -- rts == radix tree size, # address bits being translated
314 rts := unsigned('0' & data(62 downto 61) & data(7 downto 5));
315 -- mbits == # address bits to index top level of tree
316 mbits := unsigned('0' & data(4 downto 0));
317 -- set v.shift to rts so that we can use finalmask for the segment check
318 v.shift := rts;
319 v.mask_size := mbits(4 downto 0);
320 v.pgbase := data(55 downto 8) & x"00";
321 if mbits = 0 then
322 v.state := RADIX_ERROR;
323 v.invalid := '1';
324 else
325 v.state := SEGMENT_CHECK;
326 end if;
327 else
328 v.state := RADIX_ERROR;
329 v.badtree := '1';
330 end if;
331 end if;
332
333 when SEGMENT_CHECK =>
334 mbits := '0' & r.mask_size;
335 v.shift := r.shift + (31 - 12) - mbits;
336 nonzero := or(r.addr(61 downto 31) and not finalmask(30 downto 0));
337 if r.addr(63) /= r.addr(62) or nonzero = '1' then
338 v.state := RADIX_ERROR;
339 v.segerror := '1';
340 elsif mbits < 5 or mbits > 16 or mbits > (r.shift + (31 - 12)) then
341 v.state := RADIX_ERROR;
342 v.badtree := '1';
343 else
344 v.state := RADIX_LOOKUP;
345 end if;
346
347 when RADIX_LOOKUP =>
348 dcreq := '1';
349 v.state := RADIX_READ_WAIT;
350
351 when RADIX_READ_WAIT =>
352 if d_in.done = '1' then
353 if d_in.err = '0' then
354 v.pde := data;
355 -- test valid bit
356 if data(63) = '1' then
357 -- test leaf bit
358 if data(62) = '1' then
359 -- check permissions and RC bits
360 perm_ok := '0';
361 if r.priv = '1' or data(3) = '0' then
362 if r.iside = '0' then
363 perm_ok := data(1) or (data(2) and not r.store);
364 else
365 -- no IAMR, so no KUEP support for now
366 -- deny execute permission if cache inhibited
367 perm_ok := data(0) and not data(5);
368 end if;
369 end if;
370 rc_ok := data(8) and (data(7) or not r.store);
371 if perm_ok = '1' and rc_ok = '1' then
372 v.state := RADIX_LOAD_TLB;
373 else
374 v.state := RADIX_ERROR;
375 v.perm_err := not perm_ok;
376 -- permission error takes precedence over RC error
377 v.rc_error := perm_ok;
378 end if;
379 else
380 mbits := unsigned('0' & data(4 downto 0));
381 if mbits < 5 or mbits > 16 or mbits > r.shift then
382 v.state := RADIX_ERROR;
383 v.badtree := '1';
384 else
385 v.shift := v.shift - mbits;
386 v.mask_size := mbits(4 downto 0);
387 v.pgbase := data(55 downto 8) & x"00";
388 v.state := RADIX_LOOKUP;
389 end if;
390 end if;
391 else
392 -- non-present PTE, generate a DSI
393 v.state := RADIX_ERROR;
394 v.invalid := '1';
395 end if;
396 else
397 v.state := RADIX_ERROR;
398 v.badtree := '1';
399 end if;
400 end if;
401
402 when RADIX_LOAD_TLB =>
403 tlb_load := '1';
404 if r.iside = '0' then
405 dcreq := '1';
406 v.state := TLB_WAIT;
407 else
408 itlb_load := '1';
409 done := '1';
410 v.state := IDLE;
411 end if;
412
413 when RADIX_ERROR =>
414 done := '1';
415 v.state := IDLE;
416
417 end case;
418
419 if r.addr(63) = '1' then
420 effpid := x"00000000";
421 else
422 effpid := r.pid;
423 end if;
424 prtable_addr := x"00" & r.prtbl(55 downto 36) &
425 ((r.prtbl(35 downto 12) and not finalmask(23 downto 0)) or
426 (effpid(31 downto 8) and finalmask(23 downto 0))) &
427 effpid(7 downto 0) & "0000";
428
429 pgtable_addr := x"00" & r.pgbase(55 downto 19) &
430 ((r.pgbase(18 downto 3) and not mask) or (addrsh and mask)) &
431 "000";
432 pte := x"00" &
433 ((r.pde(55 downto 12) and not finalmask) or (r.addr(55 downto 12) and finalmask))
434 & r.pde(11 downto 0);
435
436 -- update registers
437 rin <= v;
438
439 -- drive outputs
440 if tlbie_req = '1' then
441 addr := r.addr;
442 tlb_data := (others => '0');
443 elsif tlb_load = '1' then
444 addr := r.addr(63 downto 12) & x"000";
445 tlb_data := pte;
446 elsif prtbl_rd = '1' then
447 addr := prtable_addr;
448 tlb_data := (others => '0');
449 else
450 addr := pgtable_addr;
451 tlb_data := (others => '0');
452 end if;
453
454 l_out.done <= done;
455 l_out.invalid <= r.invalid;
456 l_out.badtree <= r.badtree;
457 l_out.segerr <= r.segerror;
458 l_out.perm_error <= r.perm_err;
459 l_out.rc_error <= r.rc_error;
460
461 d_out.valid <= dcreq;
462 d_out.tlbie <= tlbie_req;
463 d_out.doall <= r.inval_all;
464 d_out.tlbld <= tlb_load;
465 d_out.addr <= addr;
466 d_out.pte <= tlb_data;
467
468 i_out.tlbld <= itlb_load;
469 i_out.tlbie <= tlbie_req;
470 i_out.doall <= r.inval_all;
471 i_out.addr <= addr;
472 i_out.pte <= tlb_data;
473
474 end process;
475 end;