MMU: Implement data segment interrupts
[microwatt.git] / mmu.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.common.all;
7
8 -- Radix MMU
9 -- Supports 4-level trees as in arch 3.0B, but not the two-step translation for
10 -- guests under a hypervisor (i.e. there is no gRA -> hRA translation).
11
12 entity mmu is
13 port (
14 clk : in std_ulogic;
15 rst : in std_ulogic;
16
17 l_in : in Loadstore1ToMmuType;
18 l_out : out MmuToLoadstore1Type;
19
20 d_out : out MmuToDcacheType;
21 d_in : in DcacheToMmuType
22 );
23 end mmu;
24
25 architecture behave of mmu is
26
27 type state_t is (IDLE,
28 TLB_WAIT,
29 SEGMENT_CHECK,
30 RADIX_LOOKUP,
31 RADIX_READ_WAIT,
32 RADIX_LOAD_TLB,
33 RADIX_ERROR
34 );
35
36 type reg_stage_t is record
37 -- latched request from loadstore1
38 valid : std_ulogic;
39 addr : std_ulogic_vector(63 downto 0);
40 -- internal state
41 state : state_t;
42 pgtbl0 : std_ulogic_vector(63 downto 0);
43 shift : unsigned(5 downto 0);
44 mask_size : unsigned(4 downto 0);
45 pgbase : std_ulogic_vector(55 downto 0);
46 pde : std_ulogic_vector(63 downto 0);
47 invalid : std_ulogic;
48 badtree : std_ulogic;
49 segerror : std_ulogic;
50 end record;
51
52 signal r, rin : reg_stage_t;
53
54 signal addrsh : std_ulogic_vector(15 downto 0);
55 signal mask : std_ulogic_vector(15 downto 0);
56 signal finalmask : std_ulogic_vector(43 downto 0);
57
58 begin
59 -- Multiplex internal SPR values back to loadstore1, selected
60 -- by l_in.sprn. Easy when there's only one...
61 l_out.sprval <= r.pgtbl0;
62
63 mmu_0: process(clk)
64 begin
65 if rising_edge(clk) then
66 if rst = '1' then
67 r.state <= IDLE;
68 r.valid <= '0';
69 r.pgtbl0 <= (others => '0');
70 else
71 if rin.valid = '1' then
72 report "MMU got tlb miss for " & to_hstring(rin.addr);
73 end if;
74 if l_out.done = '1' then
75 report "MMU completing op with invalid=" & std_ulogic'image(l_out.invalid) &
76 " badtree=" & std_ulogic'image(l_out.badtree);
77 end if;
78 if rin.state = RADIX_LOOKUP then
79 report "radix lookup shift=" & integer'image(to_integer(rin.shift)) &
80 " msize=" & integer'image(to_integer(rin.mask_size));
81 end if;
82 if r.state = RADIX_LOOKUP then
83 report "send load addr=" & to_hstring(d_out.addr) &
84 " addrsh=" & to_hstring(addrsh) & " mask=" & to_hstring(mask);
85 end if;
86 r <= rin;
87 end if;
88 end if;
89 end process;
90
91 -- Shift address bits 61--12 right by 0--47 bits and
92 -- supply the least significant 16 bits of the result.
93 addrshifter: process(all)
94 variable sh1 : std_ulogic_vector(30 downto 0);
95 variable sh2 : std_ulogic_vector(18 downto 0);
96 variable result : std_ulogic_vector(15 downto 0);
97 begin
98 case r.shift(5 downto 4) is
99 when "00" =>
100 sh1 := r.addr(42 downto 12);
101 when "01" =>
102 sh1 := r.addr(58 downto 28);
103 when others =>
104 sh1 := "0000000000000" & r.addr(61 downto 44);
105 end case;
106 case r.shift(3 downto 2) is
107 when "00" =>
108 sh2 := sh1(18 downto 0);
109 when "01" =>
110 sh2 := sh1(22 downto 4);
111 when "10" =>
112 sh2 := sh1(26 downto 8);
113 when others =>
114 sh2 := sh1(30 downto 12);
115 end case;
116 case r.shift(1 downto 0) is
117 when "00" =>
118 result := sh2(15 downto 0);
119 when "01" =>
120 result := sh2(16 downto 1);
121 when "10" =>
122 result := sh2(17 downto 2);
123 when others =>
124 result := sh2(18 downto 3);
125 end case;
126 addrsh <= result;
127 end process;
128
129 -- generate mask for extracting address fields for PTE address generation
130 addrmaskgen: process(all)
131 variable m : std_ulogic_vector(15 downto 0);
132 begin
133 -- mask_count has to be >= 5
134 m := x"001f";
135 for i in 5 to 15 loop
136 if i < to_integer(r.mask_size) then
137 m(i) := '1';
138 end if;
139 end loop;
140 mask <= m;
141 end process;
142
143 -- generate mask for extracting address bits to go in TLB entry
144 -- in order to support pages > 4kB
145 finalmaskgen: process(all)
146 variable m : std_ulogic_vector(43 downto 0);
147 begin
148 m := (others => '0');
149 for i in 0 to 43 loop
150 if i < to_integer(r.shift) then
151 m(i) := '1';
152 end if;
153 end loop;
154 finalmask <= m;
155 end process;
156
157 mmu_1: process(all)
158 variable v : reg_stage_t;
159 variable dcreq : std_ulogic;
160 variable done : std_ulogic;
161 variable tlb_load : std_ulogic;
162 variable tlbie_req : std_ulogic;
163 variable rts : unsigned(5 downto 0);
164 variable mbits : unsigned(5 downto 0);
165 variable pgtable_addr : std_ulogic_vector(63 downto 0);
166 variable pte : std_ulogic_vector(63 downto 0);
167 variable data : std_ulogic_vector(63 downto 0);
168 variable nonzero : std_ulogic;
169 begin
170 v := r;
171 v.valid := '0';
172 dcreq := '0';
173 done := '0';
174 v.invalid := '0';
175 v.badtree := '0';
176 v.segerror := '0';
177 tlb_load := '0';
178 tlbie_req := '0';
179
180 -- Radix tree data structures in memory are big-endian,
181 -- so we need to byte-swap them
182 for i in 0 to 7 loop
183 data(i * 8 + 7 downto i * 8) := d_in.data((7 - i) * 8 + 7 downto (7 - i) * 8);
184 end loop;
185
186 case r.state is
187 when IDLE =>
188 -- rts == radix tree size, # address bits being translated
189 rts := unsigned('0' & r.pgtbl0(62 downto 61) & r.pgtbl0(7 downto 5));
190 -- mbits == # address bits to index top level of tree
191 mbits := unsigned('0' & r.pgtbl0(4 downto 0));
192 -- set v.shift to rts so that we can use finalmask for the segment check
193 v.shift := rts;
194 v.mask_size := mbits(4 downto 0);
195 v.pgbase := r.pgtbl0(55 downto 8) & x"00";
196
197 if l_in.valid = '1' then
198 v.addr := l_in.addr;
199 if l_in.tlbie = '1' then
200 dcreq := '1';
201 tlbie_req := '1';
202 v.state := TLB_WAIT;
203 else
204 v.valid := '1';
205 -- Use RPDS = 0 to disable radix tree walks
206 if mbits = 0 then
207 v.state := RADIX_ERROR;
208 v.invalid := '1';
209 else
210 v.state := SEGMENT_CHECK;
211 end if;
212 end if;
213 end if;
214 if l_in.mtspr = '1' then
215 v.pgtbl0 := l_in.rs;
216 end if;
217
218 when TLB_WAIT =>
219 if d_in.done = '1' then
220 done := '1';
221 v.state := IDLE;
222 end if;
223
224 when SEGMENT_CHECK =>
225 mbits := '0' & r.mask_size;
226 v.shift := r.shift + (31 - 12) - mbits;
227 nonzero := or(r.addr(61 downto 31) and not finalmask(30 downto 0));
228 if r.addr(63) /= r.addr(62) or nonzero = '1' then
229 v.state := RADIX_ERROR;
230 v.segerror := '1';
231 elsif mbits < 5 or mbits > 16 or mbits > (r.shift + (31 - 12)) then
232 v.state := RADIX_ERROR;
233 v.badtree := '1';
234 else
235 v.state := RADIX_LOOKUP;
236 end if;
237
238 when RADIX_LOOKUP =>
239 dcreq := '1';
240 v.state := RADIX_READ_WAIT;
241
242 when RADIX_READ_WAIT =>
243 if d_in.done = '1' then
244 if d_in.err = '0' then
245 v.pde := data;
246 -- test valid bit
247 if data(63) = '1' then
248 -- test leaf bit
249 if data(62) = '1' then
250 v.state := RADIX_LOAD_TLB;
251 else
252 mbits := unsigned('0' & data(4 downto 0));
253 if mbits < 5 or mbits > 16 or mbits > r.shift then
254 v.state := RADIX_ERROR;
255 v.badtree := '1';
256 else
257 v.shift := v.shift - mbits;
258 v.mask_size := mbits(4 downto 0);
259 v.pgbase := data(55 downto 8) & x"00";
260 v.state := RADIX_LOOKUP;
261 end if;
262 end if;
263 else
264 -- non-present PTE, generate a DSI
265 v.state := RADIX_ERROR;
266 v.invalid := '1';
267 end if;
268 else
269 v.state := RADIX_ERROR;
270 v.badtree := '1';
271 end if;
272 end if;
273
274 when RADIX_LOAD_TLB =>
275 tlb_load := '1';
276 dcreq := '1';
277 v.state := TLB_WAIT;
278
279 when RADIX_ERROR =>
280 done := '1';
281 v.state := IDLE;
282
283 end case;
284
285 pgtable_addr := x"00" & r.pgbase(55 downto 19) &
286 ((r.pgbase(18 downto 3) and not mask) or (addrsh and mask)) &
287 "000";
288 pte := x"00" &
289 ((r.pde(55 downto 12) and not finalmask) or (r.addr(55 downto 12) and finalmask))
290 & r.pde(11 downto 0);
291
292 -- update registers
293 rin <= v;
294
295 -- drive outputs
296 l_out.done <= done;
297 l_out.invalid <= r.invalid;
298 l_out.badtree <= r.badtree;
299 l_out.segerr <= r.segerror;
300
301 d_out.valid <= dcreq;
302 d_out.tlbie <= tlbie_req;
303 d_out.tlbld <= tlb_load;
304 if tlbie_req = '1' then
305 d_out.addr <= l_in.addr;
306 d_out.pte <= l_in.rs;
307 elsif tlb_load = '1' then
308 d_out.addr <= r.addr(63 downto 12) & x"000";
309 d_out.pte <= pte;
310 else
311 d_out.addr <= pgtable_addr;
312 d_out.pte <= (others => '0');
313 end if;
314 end process;
315 end;