Add Tercel PHY reset synchronization
[microwatt.git] / loadstore1.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.decode_types.all;
7 use work.common.all;
8 use work.insn_helpers.all;
9 use work.helpers.all;
10
11 -- 2 cycle LSU
12 -- We calculate the address in the first cycle
13
14 entity loadstore1 is
15 generic (
16 HAS_FPU : boolean := true;
17 -- Non-zero to enable log data collection
18 LOG_LENGTH : natural := 0
19 );
20 port (
21 clk : in std_ulogic;
22 rst : in std_ulogic;
23
24 l_in : in Execute1ToLoadstore1Type;
25 e_out : out Loadstore1ToExecute1Type;
26 l_out : out Loadstore1ToWritebackType;
27
28 d_out : out Loadstore1ToDcacheType;
29 d_in : in DcacheToLoadstore1Type;
30
31 m_out : out Loadstore1ToMmuType;
32 m_in : in MmuToLoadstore1Type;
33
34 dc_stall : in std_ulogic;
35
36 log_out : out std_ulogic_vector(9 downto 0)
37 );
38 end loadstore1;
39
40 architecture behave of loadstore1 is
41
42 -- State machine for unaligned loads/stores
43 type state_t is (IDLE, -- ready for instruction
44 MMU_LOOKUP, -- waiting for MMU to look up translation
45 TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
46 FINISH_LFS -- write back converted SP data for lfs*
47 );
48
49 type byte_index_t is array(0 to 7) of unsigned(2 downto 0);
50 subtype byte_trim_t is std_ulogic_vector(1 downto 0);
51 type trim_ctl_t is array(0 to 7) of byte_trim_t;
52
53 type request_t is record
54 valid : std_ulogic;
55 dc_req : std_ulogic;
56 load : std_ulogic;
57 store : std_ulogic;
58 tlbie : std_ulogic;
59 dcbz : std_ulogic;
60 read_spr : std_ulogic;
61 write_spr : std_ulogic;
62 mmu_op : std_ulogic;
63 instr_fault : std_ulogic;
64 load_zero : std_ulogic;
65 do_update : std_ulogic;
66 noop : std_ulogic;
67 mode_32bit : std_ulogic;
68 addr : std_ulogic_vector(63 downto 0);
69 addr0 : std_ulogic_vector(63 downto 0);
70 byte_sel : std_ulogic_vector(7 downto 0);
71 second_bytes : std_ulogic_vector(7 downto 0);
72 store_data : std_ulogic_vector(63 downto 0);
73 instr_tag : instr_tag_t;
74 write_reg : gspr_index_t;
75 length : std_ulogic_vector(3 downto 0);
76 elt_length : std_ulogic_vector(3 downto 0);
77 byte_reverse : std_ulogic;
78 brev_mask : unsigned(2 downto 0);
79 sign_extend : std_ulogic;
80 update : std_ulogic;
81 xerc : xer_common_t;
82 reserve : std_ulogic;
83 atomic : std_ulogic;
84 atomic_last : std_ulogic;
85 rc : std_ulogic;
86 nc : std_ulogic; -- non-cacheable access
87 virt_mode : std_ulogic;
88 priv_mode : std_ulogic;
89 load_sp : std_ulogic;
90 sprn : std_ulogic_vector(9 downto 0);
91 is_slbia : std_ulogic;
92 align_intr : std_ulogic;
93 dword_index : std_ulogic;
94 two_dwords : std_ulogic;
95 nia : std_ulogic_vector(63 downto 0);
96 end record;
97 constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0',
98 dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
99 instr_fault => '0', load_zero => '0', do_update => '0', noop => '0',
100 mode_32bit => '0', addr => (others => '0'), addr0 => (others => '0'),
101 byte_sel => x"00", second_bytes => x"00",
102 store_data => (others => '0'), instr_tag => instr_tag_init,
103 write_reg => 7x"00", length => x"0",
104 elt_length => x"0", byte_reverse => '0', brev_mask => "000",
105 sign_extend => '0', update => '0',
106 xerc => xerc_init, reserve => '0',
107 atomic => '0', atomic_last => '0', rc => '0', nc => '0',
108 virt_mode => '0', priv_mode => '0', load_sp => '0',
109 sprn => 10x"0", is_slbia => '0', align_intr => '0',
110 dword_index => '0', two_dwords => '0',
111 nia => (others => '0'));
112
113 type reg_stage1_t is record
114 req : request_t;
115 issued : std_ulogic;
116 end record;
117
118 type reg_stage2_t is record
119 req : request_t;
120 byte_index : byte_index_t;
121 use_second : std_ulogic_vector(7 downto 0);
122 wait_dc : std_ulogic;
123 wait_mmu : std_ulogic;
124 one_cycle : std_ulogic;
125 wr_sel : std_ulogic_vector(1 downto 0);
126 end record;
127
128 type reg_stage3_t is record
129 state : state_t;
130 instr_tag : instr_tag_t;
131 write_enable : std_ulogic;
132 write_reg : gspr_index_t;
133 write_data : std_ulogic_vector(63 downto 0);
134 rc : std_ulogic;
135 xerc : xer_common_t;
136 store_done : std_ulogic;
137 convert_lfs : std_ulogic;
138 load_data : std_ulogic_vector(63 downto 0);
139 dar : std_ulogic_vector(63 downto 0);
140 dsisr : std_ulogic_vector(31 downto 0);
141 ld_sp_data : std_ulogic_vector(31 downto 0);
142 ld_sp_nz : std_ulogic;
143 ld_sp_lz : std_ulogic_vector(5 downto 0);
144 stage1_en : std_ulogic;
145 interrupt : std_ulogic;
146 intr_vec : integer range 0 to 16#fff#;
147 nia : std_ulogic_vector(63 downto 0);
148 srr1 : std_ulogic_vector(15 downto 0);
149 end record;
150
151 signal req_in : request_t;
152 signal r1, r1in : reg_stage1_t;
153 signal r2, r2in : reg_stage2_t;
154 signal r3, r3in : reg_stage3_t;
155
156 signal busy : std_ulogic;
157 signal complete : std_ulogic;
158 signal in_progress : std_ulogic;
159 signal flushing : std_ulogic;
160
161 signal store_sp_data : std_ulogic_vector(31 downto 0);
162 signal load_dp_data : std_ulogic_vector(63 downto 0);
163 signal store_data : std_ulogic_vector(63 downto 0);
164
165 signal stage1_issue_enable : std_ulogic;
166 signal stage1_req : request_t;
167 signal stage1_dcreq : std_ulogic;
168 signal stage1_dreq : std_ulogic;
169 signal stage2_busy_next : std_ulogic;
170 signal stage3_busy_next : std_ulogic;
171
172 -- Generate byte enables from sizes
173 function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
174 begin
175 case length is
176 when "0001" =>
177 return "00000001";
178 when "0010" =>
179 return "00000011";
180 when "0100" =>
181 return "00001111";
182 when "1000" =>
183 return "11111111";
184 when others =>
185 return "00000000";
186 end case;
187 end function length_to_sel;
188
189 -- Calculate byte enables
190 -- This returns 16 bits, giving the select signals for two transfers,
191 -- to account for unaligned loads or stores
192 function xfer_data_sel(size : in std_logic_vector(3 downto 0);
193 address : in std_logic_vector(2 downto 0))
194 return std_ulogic_vector is
195 variable longsel : std_ulogic_vector(15 downto 0);
196 begin
197 longsel := "00000000" & length_to_sel(size);
198 return std_ulogic_vector(shift_left(unsigned(longsel),
199 to_integer(unsigned(address))));
200 end function xfer_data_sel;
201
202 -- 23-bit right shifter for DP -> SP float conversions
203 function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
204 return std_ulogic_vector is
205 variable fs1 : std_ulogic_vector(22 downto 0);
206 variable fs2 : std_ulogic_vector(22 downto 0);
207 begin
208 case shift(1 downto 0) is
209 when "00" =>
210 fs1 := frac;
211 when "01" =>
212 fs1 := '0' & frac(22 downto 1);
213 when "10" =>
214 fs1 := "00" & frac(22 downto 2);
215 when others =>
216 fs1 := "000" & frac(22 downto 3);
217 end case;
218 case shift(4 downto 2) is
219 when "000" =>
220 fs2 := fs1;
221 when "001" =>
222 fs2 := x"0" & fs1(22 downto 4);
223 when "010" =>
224 fs2 := x"00" & fs1(22 downto 8);
225 when "011" =>
226 fs2 := x"000" & fs1(22 downto 12);
227 when "100" =>
228 fs2 := x"0000" & fs1(22 downto 16);
229 when others =>
230 fs2 := x"00000" & fs1(22 downto 20);
231 end case;
232 return fs2;
233 end;
234
235 -- 23-bit left shifter for SP -> DP float conversions
236 function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
237 return std_ulogic_vector is
238 variable fs1 : std_ulogic_vector(22 downto 0);
239 variable fs2 : std_ulogic_vector(22 downto 0);
240 begin
241 case shift(1 downto 0) is
242 when "00" =>
243 fs1 := frac;
244 when "01" =>
245 fs1 := frac(21 downto 0) & '0';
246 when "10" =>
247 fs1 := frac(20 downto 0) & "00";
248 when others =>
249 fs1 := frac(19 downto 0) & "000";
250 end case;
251 case shift(4 downto 2) is
252 when "000" =>
253 fs2 := fs1;
254 when "001" =>
255 fs2 := fs1(18 downto 0) & x"0" ;
256 when "010" =>
257 fs2 := fs1(14 downto 0) & x"00";
258 when "011" =>
259 fs2 := fs1(10 downto 0) & x"000";
260 when "100" =>
261 fs2 := fs1(6 downto 0) & x"0000";
262 when others =>
263 fs2 := fs1(2 downto 0) & x"00000";
264 end case;
265 return fs2;
266 end;
267
268 begin
269 loadstore1_reg: process(clk)
270 begin
271 if rising_edge(clk) then
272 if rst = '1' then
273 r1.req.valid <= '0';
274 r2.req.valid <= '0';
275 r2.wait_dc <= '0';
276 r2.wait_mmu <= '0';
277 r2.one_cycle <= '0';
278 r3.dar <= (others => '0');
279 r3.dsisr <= (others => '0');
280 r3.state <= IDLE;
281 r3.write_enable <= '0';
282 r3.interrupt <= '0';
283 r3.stage1_en <= '1';
284 r3.convert_lfs <= '0';
285 flushing <= '0';
286 else
287 r1 <= r1in;
288 r2 <= r2in;
289 r3 <= r3in;
290 flushing <= (flushing or (r1in.req.valid and r1in.req.align_intr)) and
291 not r3in.interrupt;
292 end if;
293 stage1_dreq <= stage1_dcreq;
294 if d_in.valid = '1' then
295 assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
296 end if;
297 if d_in.error = '1' then
298 assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
299 end if;
300 if m_in.done = '1' or m_in.err = '1' then
301 assert r2.req.valid = '1' and (r3.state = MMU_LOOKUP or r3.state = TLBIE_WAIT) severity failure;
302 end if;
303 end if;
304 end process;
305
306 ls_fp_conv: if HAS_FPU generate
307 -- Convert DP data to SP for stfs
308 dp_to_sp: process(all)
309 variable exp : unsigned(10 downto 0);
310 variable frac : std_ulogic_vector(22 downto 0);
311 variable shift : unsigned(4 downto 0);
312 begin
313 store_sp_data(31) <= l_in.data(63);
314 store_sp_data(30 downto 0) <= (others => '0');
315 exp := unsigned(l_in.data(62 downto 52));
316 if exp > 896 then
317 store_sp_data(30) <= l_in.data(62);
318 store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
319 elsif exp >= 874 then
320 -- denormalization required
321 frac := '1' & l_in.data(51 downto 30);
322 shift := 0 - exp(4 downto 0);
323 store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
324 end if;
325 end process;
326
327 -- Convert SP data to DP for lfs
328 sp_to_dp: process(all)
329 variable exp : unsigned(7 downto 0);
330 variable exp_dp : unsigned(10 downto 0);
331 variable exp_nz : std_ulogic;
332 variable exp_ao : std_ulogic;
333 variable frac : std_ulogic_vector(22 downto 0);
334 variable frac_shift : unsigned(4 downto 0);
335 begin
336 frac := r3.ld_sp_data(22 downto 0);
337 exp := unsigned(r3.ld_sp_data(30 downto 23));
338 exp_nz := or (r3.ld_sp_data(30 downto 23));
339 exp_ao := and (r3.ld_sp_data(30 downto 23));
340 frac_shift := (others => '0');
341 if exp_ao = '1' then
342 exp_dp := to_unsigned(2047, 11); -- infinity or NaN
343 elsif exp_nz = '1' then
344 exp_dp := 896 + resize(exp, 11); -- finite normalized value
345 elsif r3.ld_sp_nz = '0' then
346 exp_dp := to_unsigned(0, 11); -- zero
347 else
348 -- denormalized SP operand, need to normalize
349 exp_dp := 896 - resize(unsigned(r3.ld_sp_lz), 11);
350 frac_shift := unsigned(r3.ld_sp_lz(4 downto 0)) + 1;
351 end if;
352 load_dp_data(63) <= r3.ld_sp_data(31);
353 load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
354 load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
355 load_dp_data(28 downto 0) <= (others => '0');
356 end process;
357 end generate;
358
359 -- Translate a load/store instruction into the internal request format
360 -- XXX this should only depend on l_in, but actually depends on
361 -- r1.req.addr0 as well (in the l_in.second = 1 case).
362 loadstore1_in: process(all)
363 variable v : request_t;
364 variable lsu_sum : std_ulogic_vector(63 downto 0);
365 variable brev_lenm1 : unsigned(2 downto 0);
366 variable long_sel : std_ulogic_vector(15 downto 0);
367 variable addr : std_ulogic_vector(63 downto 0);
368 variable sprn : std_ulogic_vector(9 downto 0);
369 variable misaligned : std_ulogic;
370 variable addr_mask : std_ulogic_vector(2 downto 0);
371 begin
372 v := request_init;
373 sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
374
375 v.valid := l_in.valid;
376 v.instr_tag := l_in.instr_tag;
377 v.mode_32bit := l_in.mode_32bit;
378 v.write_reg := l_in.write_reg;
379 v.length := l_in.length;
380 v.elt_length := l_in.length;
381 v.byte_reverse := l_in.byte_reverse;
382 v.sign_extend := l_in.sign_extend;
383 v.update := l_in.update;
384 v.xerc := l_in.xerc;
385 v.reserve := l_in.reserve;
386 v.rc := l_in.rc;
387 v.nc := l_in.ci;
388 v.virt_mode := l_in.virt_mode;
389 v.priv_mode := l_in.priv_mode;
390 v.sprn := sprn;
391 v.nia := l_in.nia;
392
393 lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2));
394
395 if HAS_FPU and l_in.is_32bit = '1' then
396 v.store_data := x"00000000" & store_sp_data;
397 else
398 v.store_data := l_in.data;
399 end if;
400
401 addr := lsu_sum;
402
403 if l_in.second = '1' then
404 if l_in.update = '0' then
405 -- for the second half of a 16-byte transfer,
406 -- use the previous address plus 8.
407 addr := std_ulogic_vector(unsigned(r1.req.addr0(63 downto 3)) + 1) & r1.req.addr0(2 downto 0);
408 else
409 -- for an update-form load, use the previous address
410 -- as the value to write back to RA.
411 addr := r1.req.addr0;
412 end if;
413 end if;
414 if l_in.mode_32bit = '1' then
415 addr(63 downto 32) := (others => '0');
416 end if;
417 v.addr := addr;
418 v.addr0 := addr;
419
420 -- XXX Temporary hack. Mark the op as non-cachable if the address
421 -- is the form 0xc------- for a real-mode access.
422 if addr(31 downto 28) = "1100" and l_in.virt_mode = '0' then
423 v.nc := '1';
424 end if;
425
426 addr_mask := std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1);
427
428 -- Do length_to_sel and work out if we are doing 2 dwords
429 long_sel := xfer_data_sel(v.length, addr(2 downto 0));
430 v.byte_sel := long_sel(7 downto 0);
431 v.second_bytes := long_sel(15 downto 8);
432 if long_sel(15 downto 8) /= "00000000" then
433 v.two_dwords := '1';
434 end if;
435
436 -- check alignment for larx/stcx
437 misaligned := or (addr_mask and addr(2 downto 0));
438 v.align_intr := l_in.reserve and misaligned;
439 if l_in.repeat = '1' and l_in.second = '0' and l_in.update = '0' and addr(3) = '1' then
440 -- length is really 16 not 8
441 -- Make misaligned lq cause an alignment interrupt in LE mode,
442 -- in order to avoid the case with RA = RT + 1 where the second half
443 -- faults but the first doesn't (and updates RT+1, destroying RA).
444 -- The equivalent BE case doesn't occur because RA = RT is illegal.
445 misaligned := '1';
446 if l_in.reserve = '1' or (l_in.op = OP_LOAD and l_in.byte_reverse = '0') then
447 v.align_intr := '1';
448 end if;
449 end if;
450
451 v.atomic := not misaligned;
452 v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
453
454 case l_in.op is
455 when OP_STORE =>
456 v.store := '1';
457 when OP_LOAD =>
458 if l_in.update = '0' or l_in.second = '0' then
459 v.load := '1';
460 if HAS_FPU and l_in.is_32bit = '1' then
461 -- Allow an extra cycle for SP->DP precision conversion
462 v.load_sp := '1';
463 end if;
464 else
465 -- write back address to RA
466 v.do_update := '1';
467 end if;
468 when OP_DCBZ =>
469 v.dcbz := '1';
470 v.align_intr := v.nc;
471 when OP_TLBIE =>
472 v.tlbie := '1';
473 v.addr := l_in.addr2; -- address from RB for tlbie
474 v.is_slbia := l_in.insn(7);
475 v.mmu_op := '1';
476 when OP_MFSPR =>
477 v.read_spr := '1';
478 when OP_MTSPR =>
479 v.write_spr := '1';
480 v.mmu_op := sprn(9) or sprn(5);
481 when OP_FETCH_FAILED =>
482 -- send it to the MMU to do the radix walk
483 v.instr_fault := '1';
484 v.addr := l_in.nia;
485 v.mmu_op := '1';
486 when others =>
487 end case;
488 v.dc_req := l_in.valid and (v.load or v.store or v.dcbz) and not v.align_intr;
489
490 -- Work out controls for load and store formatting
491 brev_lenm1 := "000";
492 if v.byte_reverse = '1' then
493 brev_lenm1 := unsigned(v.length(2 downto 0)) - 1;
494 end if;
495 v.brev_mask := brev_lenm1;
496
497 req_in <= v;
498 end process;
499
500 busy <= r1.req.valid and ((r1.req.dc_req and not r1.issued) or
501 (r1.issued and d_in.error) or
502 stage2_busy_next or
503 (r1.req.dc_req and r1.req.two_dwords and not r1.req.dword_index));
504 complete <= r2.one_cycle or (r2.wait_dc and d_in.valid) or
505 (r2.wait_mmu and m_in.done) or r3.convert_lfs;
506 in_progress <= r1.req.valid or (r2.req.valid and not complete);
507
508 stage1_issue_enable <= r3.stage1_en and not (r1.req.valid and r1.req.mmu_op) and
509 not (r2.req.valid and r2.req.mmu_op);
510
511 -- Processing done in the first cycle of a load/store instruction
512 loadstore1_1: process(all)
513 variable v : reg_stage1_t;
514 variable req : request_t;
515 variable dcreq : std_ulogic;
516 variable addr : std_ulogic_vector(63 downto 0);
517 begin
518 v := r1;
519 dcreq := '0';
520 req := req_in;
521 if flushing = '1' then
522 -- Make this a no-op request rather than simply invalid.
523 -- It will never get to stage 3 since there is a request ahead of
524 -- it with align_intr = 1.
525 req.dc_req := '0';
526 end if;
527
528 -- Note that l_in.valid is gated with busy inside execute1
529 if l_in.valid = '1' then
530 dcreq := req.dc_req and stage1_issue_enable and not d_in.error and not dc_stall;
531 v.req := req;
532 v.issued := dcreq;
533 elsif r1.req.valid = '1' then
534 if r1.req.dc_req = '1' and r1.issued = '0' then
535 req := r1.req;
536 dcreq := stage1_issue_enable and not dc_stall and not d_in.error;
537 v.issued := dcreq;
538 elsif r1.issued = '1' and d_in.error = '1' then
539 v.issued := '0';
540 elsif stage2_busy_next = '0' then
541 -- we can change what's in r1 next cycle because the current thing
542 -- in r1 will go into r2
543 if r1.req.dc_req = '1' and r1.req.two_dwords = '1' and r1.req.dword_index = '0' then
544 -- construct the second request for a misaligned access
545 v.req.dword_index := '1';
546 v.req.addr := std_ulogic_vector(unsigned(r1.req.addr(63 downto 3)) + 1) & "000";
547 if r1.req.mode_32bit = '1' then
548 v.req.addr(32) := '0';
549 end if;
550 v.req.byte_sel := r1.req.second_bytes;
551 v.issued := stage1_issue_enable and not dc_stall;
552 dcreq := stage1_issue_enable and not dc_stall;
553 req := v.req;
554 else
555 v.req.valid := '0';
556 end if;
557 end if;
558 end if;
559 if r3in.interrupt = '1' then
560 v.req.valid := '0';
561 dcreq := '0';
562 end if;
563
564 stage1_req <= req;
565 stage1_dcreq <= dcreq;
566 r1in <= v;
567 end process;
568
569 -- Processing done in the second cycle of a load/store instruction.
570 -- Store data is formatted here and sent to the dcache.
571 -- The request in r1 is sent to stage 3 if stage 3 will not be busy next cycle.
572 loadstore1_2: process(all)
573 variable v : reg_stage2_t;
574 variable j : integer;
575 variable k : unsigned(2 downto 0);
576 variable kk : unsigned(3 downto 0);
577 variable idx : unsigned(2 downto 0);
578 variable byte_offset : unsigned(2 downto 0);
579 begin
580 v := r2;
581
582 -- Byte reversing and rotating for stores.
583 -- Done in the second cycle (the cycle after l_in.valid = 1).
584 byte_offset := unsigned(r1.req.addr0(2 downto 0));
585 for i in 0 to 7 loop
586 k := (to_unsigned(i, 3) - byte_offset) xor r1.req.brev_mask;
587 j := to_integer(k) * 8;
588 store_data(i * 8 + 7 downto i * 8) <= r1.req.store_data(j + 7 downto j);
589 end loop;
590
591 if stage3_busy_next = '0' and
592 (r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0') then
593 v.req := r1.req;
594 v.req.store_data := store_data;
595 v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and
596 not (r1.req.two_dwords and not r1.req.dword_index);
597 v.wait_mmu := r1.req.valid and r1.req.mmu_op;
598 v.one_cycle := r1.req.valid and (r1.req.noop or r1.req.read_spr or
599 (r1.req.write_spr and not r1.req.mmu_op) or
600 r1.req.load_zero or r1.req.do_update);
601 if r1.req.read_spr = '1' then
602 v.wr_sel := "00";
603 elsif r1.req.do_update = '1' or r1.req.store = '1' then
604 v.wr_sel := "01";
605 elsif r1.req.load_sp = '1' then
606 v.wr_sel := "10";
607 else
608 v.wr_sel := "11";
609 end if;
610
611 -- Work out load formatter controls for next cycle
612 for i in 0 to 7 loop
613 idx := to_unsigned(i, 3) xor r1.req.brev_mask;
614 kk := ('0' & idx) + ('0' & byte_offset);
615 v.use_second(i) := kk(3);
616 v.byte_index(i) := kk(2 downto 0);
617 end loop;
618 elsif stage3_busy_next = '0' then
619 v.req.valid := '0';
620 v.wait_dc := '0';
621 v.wait_mmu := '0';
622 end if;
623
624 stage2_busy_next <= r1.req.valid and stage3_busy_next;
625
626 if r3in.interrupt = '1' then
627 v.req.valid := '0';
628 end if;
629
630 r2in <= v;
631 end process;
632
633 -- Processing done in the third cycle of a load/store instruction.
634 -- At this stage we can do things that have side effects without
635 -- fear of the instruction getting flushed. This is the point at
636 -- which requests get sent to the MMU.
637 loadstore1_3: process(all)
638 variable v : reg_stage3_t;
639 variable j : integer;
640 variable req : std_ulogic;
641 variable mmureq : std_ulogic;
642 variable mmu_mtspr : std_ulogic;
643 variable write_enable : std_ulogic;
644 variable write_data : std_ulogic_vector(63 downto 0);
645 variable do_update : std_ulogic;
646 variable done : std_ulogic;
647 variable part_done : std_ulogic;
648 variable exception : std_ulogic;
649 variable data_permuted : std_ulogic_vector(63 downto 0);
650 variable data_trimmed : std_ulogic_vector(63 downto 0);
651 variable sprval : std_ulogic_vector(63 downto 0);
652 variable negative : std_ulogic;
653 variable dsisr : std_ulogic_vector(31 downto 0);
654 variable itlb_fault : std_ulogic;
655 variable trim_ctl : trim_ctl_t;
656 begin
657 v := r3;
658
659 req := '0';
660 mmureq := '0';
661 mmu_mtspr := '0';
662 done := '0';
663 part_done := '0';
664 exception := '0';
665 dsisr := (others => '0');
666 write_enable := '0';
667 sprval := (others => '0');
668 do_update := '0';
669 v.convert_lfs := '0';
670 v.srr1 := (others => '0');
671
672 -- load data formatting
673 -- shift and byte-reverse data bytes
674 for i in 0 to 7 loop
675 j := to_integer(r2.byte_index(i)) * 8;
676 data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
677 end loop;
678
679 -- Work out the sign bit for sign extension.
680 -- For unaligned loads crossing two dwords, the sign bit is in the
681 -- first dword for big-endian (byte_reverse = 1), or the second dword
682 -- for little-endian.
683 if r2.req.dword_index = '1' and r2.req.byte_reverse = '1' then
684 negative := (r2.req.length(3) and r3.load_data(63)) or
685 (r2.req.length(2) and r3.load_data(31)) or
686 (r2.req.length(1) and r3.load_data(15)) or
687 (r2.req.length(0) and r3.load_data(7));
688 else
689 negative := (r2.req.length(3) and data_permuted(63)) or
690 (r2.req.length(2) and data_permuted(31)) or
691 (r2.req.length(1) and data_permuted(15)) or
692 (r2.req.length(0) and data_permuted(7));
693 end if;
694
695 -- trim and sign-extend
696 for i in 0 to 7 loop
697 if i < to_integer(unsigned(r2.req.length)) then
698 if r2.req.dword_index = '1' then
699 trim_ctl(i) := '1' & not r2.use_second(i);
700 else
701 trim_ctl(i) := "10";
702 end if;
703 else
704 trim_ctl(i) := "00";
705 end if;
706 end loop;
707
708 for i in 0 to 7 loop
709 case trim_ctl(i) is
710 when "11" =>
711 data_trimmed(i * 8 + 7 downto i * 8) := r3.load_data(i * 8 + 7 downto i * 8);
712 when "10" =>
713 data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
714 when others =>
715 data_trimmed(i * 8 + 7 downto i * 8) := (others => negative and r2.req.sign_extend);
716 end case;
717 end loop;
718
719 if HAS_FPU then
720 -- Single-precision FP conversion for loads
721 v.ld_sp_data := data_trimmed(31 downto 0);
722 v.ld_sp_nz := or (data_trimmed(22 downto 0));
723 v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
724 end if;
725
726 if d_in.valid = '1' and r2.req.load = '1' then
727 v.load_data := data_permuted;
728 end if;
729
730 if r2.req.valid = '1' then
731 if r2.req.read_spr = '1' then
732 write_enable := '1';
733 -- partial decode on SPR number should be adequate given
734 -- the restricted set that get sent down this path
735 if r2.req.sprn(9) = '0' and r2.req.sprn(5) = '0' then
736 if r2.req.sprn(0) = '0' then
737 sprval := x"00000000" & r3.dsisr;
738 else
739 sprval := r3.dar;
740 end if;
741 else
742 -- reading one of the SPRs in the MMU
743 sprval := m_in.sprval;
744 end if;
745 end if;
746 if r2.req.align_intr = '1' then
747 -- generate alignment interrupt
748 exception := '1';
749 end if;
750 if r2.req.load_zero = '1' then
751 write_enable := '1';
752 end if;
753 if r2.req.do_update = '1' then
754 do_update := '1';
755 end if;
756 end if;
757
758 case r3.state is
759 when IDLE =>
760 if d_in.valid = '1' then
761 if r2.req.two_dwords = '0' or r2.req.dword_index = '1' then
762 write_enable := r2.req.load and not r2.req.load_sp;
763 if HAS_FPU and r2.req.load_sp = '1' then
764 -- SP to DP conversion takes a cycle
765 v.state := FINISH_LFS;
766 v.convert_lfs := '1';
767 else
768 -- stores write back rA update
769 do_update := r2.req.update and r2.req.store;
770 end if;
771 else
772 part_done := '1';
773 end if;
774 end if;
775 if d_in.error = '1' then
776 if d_in.cache_paradox = '1' then
777 -- signal an interrupt straight away
778 exception := '1';
779 dsisr(63 - 38) := not r2.req.load;
780 -- XXX there is no architected bit for this
781 -- (probably should be a machine check in fact)
782 dsisr(63 - 35) := d_in.cache_paradox;
783 else
784 -- Look up the translation for TLB miss
785 -- and also for permission error and RC error
786 -- in case the PTE has been updated.
787 mmureq := '1';
788 v.state := MMU_LOOKUP;
789 v.stage1_en := '0';
790 end if;
791 end if;
792 if r2.req.valid = '1' then
793 if r2.req.mmu_op = '1' then
794 -- send request (tlbie, mtspr, itlb miss) to MMU
795 mmureq := not r2.req.write_spr;
796 mmu_mtspr := r2.req.write_spr;
797 if r2.req.instr_fault = '1' then
798 v.state := MMU_LOOKUP;
799 else
800 v.state := TLBIE_WAIT;
801 end if;
802 elsif r2.req.write_spr = '1' then
803 if r2.req.sprn(0) = '0' then
804 v.dsisr := r2.req.store_data(31 downto 0);
805 else
806 v.dar := r2.req.store_data;
807 end if;
808 end if;
809 end if;
810
811 when MMU_LOOKUP =>
812 if m_in.done = '1' then
813 if r2.req.instr_fault = '0' then
814 -- retry the request now that the MMU has installed a TLB entry
815 req := '1';
816 v.stage1_en := '1';
817 v.state := IDLE;
818 end if;
819 end if;
820 if m_in.err = '1' then
821 exception := '1';
822 dsisr(63 - 33) := m_in.invalid;
823 dsisr(63 - 36) := m_in.perm_error;
824 dsisr(63 - 38) := r2.req.store or r2.req.dcbz;
825 dsisr(63 - 44) := m_in.badtree;
826 dsisr(63 - 45) := m_in.rc_error;
827 end if;
828
829 when TLBIE_WAIT =>
830
831 when FINISH_LFS =>
832 write_enable := '1';
833
834 end case;
835
836 if complete = '1' or exception = '1' then
837 v.stage1_en := '1';
838 v.state := IDLE;
839 end if;
840
841 -- generate DSI or DSegI for load/store exceptions
842 -- or ISI or ISegI for instruction fetch exceptions
843 v.interrupt := exception;
844 if exception = '1' then
845 v.nia := r2.req.nia;
846 if r2.req.align_intr = '1' then
847 v.intr_vec := 16#600#;
848 v.dar := r2.req.addr;
849 elsif r2.req.instr_fault = '0' then
850 v.dar := r2.req.addr;
851 if m_in.segerr = '0' then
852 v.intr_vec := 16#300#;
853 v.dsisr := dsisr;
854 else
855 v.intr_vec := 16#380#;
856 end if;
857 else
858 if m_in.segerr = '0' then
859 v.srr1(47 - 33) := m_in.invalid;
860 v.srr1(47 - 35) := m_in.perm_error; -- noexec fault
861 v.srr1(47 - 44) := m_in.badtree;
862 v.srr1(47 - 45) := m_in.rc_error;
863 v.intr_vec := 16#400#;
864 else
865 v.intr_vec := 16#480#;
866 end if;
867 end if;
868 end if;
869
870 case r2.wr_sel is
871 when "00" =>
872 -- mfspr result
873 write_data := sprval;
874 when "01" =>
875 -- update reg
876 write_data := r2.req.addr0;
877 when "10" =>
878 -- lfs result
879 write_data := load_dp_data;
880 when others =>
881 -- load data
882 write_data := data_trimmed;
883 end case;
884
885 -- Update outputs to dcache
886 if stage1_issue_enable = '1' then
887 d_out.valid <= stage1_dcreq;
888 d_out.load <= stage1_req.load;
889 d_out.dcbz <= stage1_req.dcbz;
890 d_out.nc <= stage1_req.nc;
891 d_out.reserve <= stage1_req.reserve;
892 d_out.atomic <= stage1_req.atomic;
893 d_out.atomic_last <= stage1_req.atomic_last;
894 d_out.addr <= stage1_req.addr;
895 d_out.byte_sel <= stage1_req.byte_sel;
896 d_out.virt_mode <= stage1_req.virt_mode;
897 d_out.priv_mode <= stage1_req.priv_mode;
898 else
899 d_out.valid <= req;
900 d_out.load <= r2.req.load;
901 d_out.dcbz <= r2.req.dcbz;
902 d_out.nc <= r2.req.nc;
903 d_out.reserve <= r2.req.reserve;
904 d_out.atomic <= r2.req.atomic;
905 d_out.atomic_last <= r2.req.atomic_last;
906 d_out.addr <= r2.req.addr;
907 d_out.byte_sel <= r2.req.byte_sel;
908 d_out.virt_mode <= r2.req.virt_mode;
909 d_out.priv_mode <= r2.req.priv_mode;
910 end if;
911 if stage1_dreq = '1' then
912 d_out.data <= store_data;
913 else
914 d_out.data <= r2.req.store_data;
915 end if;
916 d_out.hold <= r2.req.valid and r2.req.load_sp and d_in.valid;
917
918 -- Update outputs to MMU
919 m_out.valid <= mmureq;
920 m_out.iside <= r2.req.instr_fault;
921 m_out.load <= r2.req.load;
922 m_out.priv <= r2.req.priv_mode;
923 m_out.tlbie <= r2.req.tlbie;
924 m_out.mtspr <= mmu_mtspr;
925 m_out.sprn <= r2.req.sprn;
926 m_out.addr <= r2.req.addr;
927 m_out.slbia <= r2.req.is_slbia;
928 m_out.rs <= r2.req.store_data;
929
930 -- Update outputs to writeback
931 l_out.valid <= complete;
932 l_out.instr_tag <= r2.req.instr_tag;
933 l_out.write_enable <= write_enable or do_update;
934 l_out.write_reg <= r2.req.write_reg;
935 l_out.write_data <= write_data;
936 l_out.xerc <= r2.req.xerc;
937 l_out.rc <= r2.req.rc and complete;
938 l_out.store_done <= d_in.store_done;
939 l_out.interrupt <= r3.interrupt;
940 l_out.intr_vec <= r3.intr_vec;
941 l_out.srr0 <= r3.nia;
942 l_out.srr1 <= r3.srr1;
943
944 -- update busy signal back to execute1
945 e_out.busy <= busy;
946 e_out.in_progress <= in_progress;
947
948 -- Busy calculation.
949 stage3_busy_next <= r2.req.valid and not (complete or part_done or exception);
950
951 -- Update registers
952 r3in <= v;
953
954 end process;
955
956 l1_log: if LOG_LENGTH > 0 generate
957 signal log_data : std_ulogic_vector(9 downto 0);
958 begin
959 ls1_log: process(clk)
960 begin
961 if rising_edge(clk) then
962 log_data <= e_out.busy &
963 l_out.interrupt &
964 l_out.valid &
965 m_out.valid &
966 d_out.valid &
967 m_in.done &
968 r2.req.dword_index &
969 std_ulogic_vector(to_unsigned(state_t'pos(r3.state), 3));
970 end if;
971 end process;
972 log_out <= log_data;
973 end generate;
974
975 end;