decode: Add a facility field to the instruction decode tables
[microwatt.git] / loadstore1.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.decode_types.all;
7 use work.common.all;
8 use work.insn_helpers.all;
9 use work.helpers.all;
10
11 -- 2 cycle LSU
12 -- We calculate the address in the first cycle
13
14 entity loadstore1 is
15 generic (
16 HAS_FPU : boolean := true;
17 -- Non-zero to enable log data collection
18 LOG_LENGTH : natural := 0
19 );
20 port (
21 clk : in std_ulogic;
22 rst : in std_ulogic;
23
24 l_in : in Execute1ToLoadstore1Type;
25 e_out : out Loadstore1ToExecute1Type;
26 l_out : out Loadstore1ToWritebackType;
27
28 d_out : out Loadstore1ToDcacheType;
29 d_in : in DcacheToLoadstore1Type;
30
31 m_out : out Loadstore1ToMmuType;
32 m_in : in MmuToLoadstore1Type;
33
34 dc_stall : in std_ulogic;
35
36 log_out : out std_ulogic_vector(9 downto 0)
37 );
38 end loadstore1;
39
40 -- Note, we don't currently use the stall output from the dcache because
41 -- we know it can take two requests without stalling when idle, we are
42 -- its only user, and we know it never stalls when idle.
43
44 architecture behave of loadstore1 is
45
46 -- State machine for unaligned loads/stores
47 type state_t is (IDLE, -- ready for instruction
48 FPR_CONV, -- converting double to float for store
49 SECOND_REQ, -- send 2nd request of unaligned xfer
50 ACK_WAIT, -- waiting for ack from dcache
51 MMU_LOOKUP, -- waiting for MMU to look up translation
52 TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
53 FINISH_LFS, -- write back converted SP data for lfs*
54 COMPLETE -- extra cycle to complete an operation
55 );
56
57 type reg_stage_t is record
58 -- latch most of the input request
59 load : std_ulogic;
60 tlbie : std_ulogic;
61 dcbz : std_ulogic;
62 mfspr : std_ulogic;
63 addr : std_ulogic_vector(63 downto 0);
64 store_data : std_ulogic_vector(63 downto 0);
65 load_data : std_ulogic_vector(63 downto 0);
66 write_reg : gspr_index_t;
67 length : std_ulogic_vector(3 downto 0);
68 byte_reverse : std_ulogic;
69 sign_extend : std_ulogic;
70 update : std_ulogic;
71 update_reg : gpr_index_t;
72 xerc : xer_common_t;
73 reserve : std_ulogic;
74 atomic : std_ulogic;
75 atomic_last : std_ulogic;
76 rc : std_ulogic;
77 nc : std_ulogic; -- non-cacheable access
78 virt_mode : std_ulogic;
79 priv_mode : std_ulogic;
80 state : state_t;
81 dwords_done : std_ulogic;
82 last_dword : std_ulogic;
83 first_bytes : std_ulogic_vector(7 downto 0);
84 second_bytes : std_ulogic_vector(7 downto 0);
85 dar : std_ulogic_vector(63 downto 0);
86 dsisr : std_ulogic_vector(31 downto 0);
87 instr_fault : std_ulogic;
88 align_intr : std_ulogic;
89 sprval : std_ulogic_vector(63 downto 0);
90 busy : std_ulogic;
91 wait_dcache : std_ulogic;
92 wait_mmu : std_ulogic;
93 do_update : std_ulogic;
94 extra_cycle : std_ulogic;
95 mode_32bit : std_ulogic;
96 load_sp : std_ulogic;
97 ld_sp_data : std_ulogic_vector(31 downto 0);
98 ld_sp_nz : std_ulogic;
99 ld_sp_lz : std_ulogic_vector(5 downto 0);
100 st_sp_data : std_ulogic_vector(31 downto 0);
101 end record;
102
103 type byte_sel_t is array(0 to 7) of std_ulogic;
104 subtype byte_trim_t is std_ulogic_vector(1 downto 0);
105 type trim_ctl_t is array(0 to 7) of byte_trim_t;
106
107 signal r, rin : reg_stage_t;
108 signal lsu_sum : std_ulogic_vector(63 downto 0);
109
110 signal store_sp_data : std_ulogic_vector(31 downto 0);
111 signal load_dp_data : std_ulogic_vector(63 downto 0);
112
113 -- Generate byte enables from sizes
114 function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
115 begin
116 case length is
117 when "0001" =>
118 return "00000001";
119 when "0010" =>
120 return "00000011";
121 when "0100" =>
122 return "00001111";
123 when "1000" =>
124 return "11111111";
125 when others =>
126 return "00000000";
127 end case;
128 end function length_to_sel;
129
130 -- Calculate byte enables
131 -- This returns 16 bits, giving the select signals for two transfers,
132 -- to account for unaligned loads or stores
133 function xfer_data_sel(size : in std_logic_vector(3 downto 0);
134 address : in std_logic_vector(2 downto 0))
135 return std_ulogic_vector is
136 variable longsel : std_ulogic_vector(15 downto 0);
137 begin
138 longsel := "00000000" & length_to_sel(size);
139 return std_ulogic_vector(shift_left(unsigned(longsel),
140 to_integer(unsigned(address))));
141 end function xfer_data_sel;
142
143 -- 23-bit right shifter for DP -> SP float conversions
144 function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
145 return std_ulogic_vector is
146 variable fs1 : std_ulogic_vector(22 downto 0);
147 variable fs2 : std_ulogic_vector(22 downto 0);
148 begin
149 case shift(1 downto 0) is
150 when "00" =>
151 fs1 := frac;
152 when "01" =>
153 fs1 := '0' & frac(22 downto 1);
154 when "10" =>
155 fs1 := "00" & frac(22 downto 2);
156 when others =>
157 fs1 := "000" & frac(22 downto 3);
158 end case;
159 case shift(4 downto 2) is
160 when "000" =>
161 fs2 := fs1;
162 when "001" =>
163 fs2 := x"0" & fs1(22 downto 4);
164 when "010" =>
165 fs2 := x"00" & fs1(22 downto 8);
166 when "011" =>
167 fs2 := x"000" & fs1(22 downto 12);
168 when "100" =>
169 fs2 := x"0000" & fs1(22 downto 16);
170 when others =>
171 fs2 := x"00000" & fs1(22 downto 20);
172 end case;
173 return fs2;
174 end;
175
176 -- 23-bit left shifter for SP -> DP float conversions
177 function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
178 return std_ulogic_vector is
179 variable fs1 : std_ulogic_vector(22 downto 0);
180 variable fs2 : std_ulogic_vector(22 downto 0);
181 begin
182 case shift(1 downto 0) is
183 when "00" =>
184 fs1 := frac;
185 when "01" =>
186 fs1 := frac(21 downto 0) & '0';
187 when "10" =>
188 fs1 := frac(20 downto 0) & "00";
189 when others =>
190 fs1 := frac(19 downto 0) & "000";
191 end case;
192 case shift(4 downto 2) is
193 when "000" =>
194 fs2 := fs1;
195 when "001" =>
196 fs2 := fs1(18 downto 0) & x"0" ;
197 when "010" =>
198 fs2 := fs1(14 downto 0) & x"00";
199 when "011" =>
200 fs2 := fs1(10 downto 0) & x"000";
201 when "100" =>
202 fs2 := fs1(6 downto 0) & x"0000";
203 when others =>
204 fs2 := fs1(2 downto 0) & x"00000";
205 end case;
206 return fs2;
207 end;
208
209 begin
210 -- Calculate the address in the first cycle
211 lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
212
213 loadstore1_0: process(clk)
214 begin
215 if rising_edge(clk) then
216 if rst = '1' then
217 r.state <= IDLE;
218 r.busy <= '0';
219 r.do_update <= '0';
220 else
221 r <= rin;
222 end if;
223 end if;
224 end process;
225
226 ls_fp_conv: if HAS_FPU generate
227 -- Convert DP data to SP for stfs
228 dp_to_sp: process(all)
229 variable exp : unsigned(10 downto 0);
230 variable frac : std_ulogic_vector(22 downto 0);
231 variable shift : unsigned(4 downto 0);
232 begin
233 store_sp_data(31) <= l_in.data(63);
234 store_sp_data(30 downto 0) <= (others => '0');
235 exp := unsigned(l_in.data(62 downto 52));
236 if exp > 896 then
237 store_sp_data(30) <= l_in.data(62);
238 store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
239 elsif exp >= 874 then
240 -- denormalization required
241 frac := '1' & l_in.data(51 downto 30);
242 shift := 0 - exp(4 downto 0);
243 store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
244 end if;
245 end process;
246
247 -- Convert SP data to DP for lfs
248 sp_to_dp: process(all)
249 variable exp : unsigned(7 downto 0);
250 variable exp_dp : unsigned(10 downto 0);
251 variable exp_nz : std_ulogic;
252 variable exp_ao : std_ulogic;
253 variable frac : std_ulogic_vector(22 downto 0);
254 variable frac_shift : unsigned(4 downto 0);
255 begin
256 frac := r.ld_sp_data(22 downto 0);
257 exp := unsigned(r.ld_sp_data(30 downto 23));
258 exp_nz := or (r.ld_sp_data(30 downto 23));
259 exp_ao := and (r.ld_sp_data(30 downto 23));
260 frac_shift := (others => '0');
261 if exp_ao = '1' then
262 exp_dp := to_unsigned(2047, 11); -- infinity or NaN
263 elsif exp_nz = '1' then
264 exp_dp := 896 + resize(exp, 11); -- finite normalized value
265 elsif r.ld_sp_nz = '0' then
266 exp_dp := to_unsigned(0, 11); -- zero
267 else
268 -- denormalized SP operand, need to normalize
269 exp_dp := 896 - resize(unsigned(r.ld_sp_lz), 11);
270 frac_shift := unsigned(r.ld_sp_lz(4 downto 0)) + 1;
271 end if;
272 load_dp_data(63) <= r.ld_sp_data(31);
273 load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
274 load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
275 load_dp_data(28 downto 0) <= (others => '0');
276 end process;
277 end generate;
278
279 loadstore1_1: process(all)
280 variable v : reg_stage_t;
281 variable brev_lenm1 : unsigned(2 downto 0);
282 variable byte_offset : unsigned(2 downto 0);
283 variable j : integer;
284 variable k : unsigned(2 downto 0);
285 variable kk : unsigned(3 downto 0);
286 variable long_sel : std_ulogic_vector(15 downto 0);
287 variable byte_sel : std_ulogic_vector(7 downto 0);
288 variable req : std_ulogic;
289 variable busy : std_ulogic;
290 variable addr : std_ulogic_vector(63 downto 0);
291 variable maddr : std_ulogic_vector(63 downto 0);
292 variable wdata : std_ulogic_vector(63 downto 0);
293 variable write_enable : std_ulogic;
294 variable do_update : std_ulogic;
295 variable done : std_ulogic;
296 variable data_permuted : std_ulogic_vector(63 downto 0);
297 variable data_trimmed : std_ulogic_vector(63 downto 0);
298 variable store_data : std_ulogic_vector(63 downto 0);
299 variable data_in : std_ulogic_vector(63 downto 0);
300 variable byte_rev : std_ulogic;
301 variable length : std_ulogic_vector(3 downto 0);
302 variable use_second : byte_sel_t;
303 variable trim_ctl : trim_ctl_t;
304 variable negative : std_ulogic;
305 variable sprn : std_ulogic_vector(9 downto 0);
306 variable exception : std_ulogic;
307 variable next_addr : std_ulogic_vector(63 downto 0);
308 variable mmureq : std_ulogic;
309 variable dsisr : std_ulogic_vector(31 downto 0);
310 variable mmu_mtspr : std_ulogic;
311 variable itlb_fault : std_ulogic;
312 variable misaligned : std_ulogic;
313 variable fp_reg_conv : std_ulogic;
314 variable lfs_done : std_ulogic;
315 begin
316 v := r;
317 req := '0';
318 v.mfspr := '0';
319 mmu_mtspr := '0';
320 itlb_fault := '0';
321 sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
322 dsisr := (others => '0');
323 mmureq := '0';
324 fp_reg_conv := '0';
325
326 write_enable := '0';
327 lfs_done := '0';
328
329 do_update := r.do_update;
330 v.do_update := '0';
331
332 -- load data formatting
333 byte_offset := unsigned(r.addr(2 downto 0));
334 brev_lenm1 := "000";
335 if r.byte_reverse = '1' then
336 brev_lenm1 := unsigned(r.length(2 downto 0)) - 1;
337 end if;
338
339 -- shift and byte-reverse data bytes
340 for i in 0 to 7 loop
341 kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
342 use_second(i) := kk(3);
343 j := to_integer(kk(2 downto 0)) * 8;
344 data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
345 end loop;
346
347 -- Work out the sign bit for sign extension.
348 -- For unaligned loads crossing two dwords, the sign bit is in the
349 -- first dword for big-endian (byte_reverse = 1), or the second dword
350 -- for little-endian.
351 if r.dwords_done = '1' and r.byte_reverse = '1' then
352 negative := (r.length(3) and r.load_data(63)) or
353 (r.length(2) and r.load_data(31)) or
354 (r.length(1) and r.load_data(15)) or
355 (r.length(0) and r.load_data(7));
356 else
357 negative := (r.length(3) and data_permuted(63)) or
358 (r.length(2) and data_permuted(31)) or
359 (r.length(1) and data_permuted(15)) or
360 (r.length(0) and data_permuted(7));
361 end if;
362
363 -- trim and sign-extend
364 for i in 0 to 7 loop
365 if i < to_integer(unsigned(r.length)) then
366 if r.dwords_done = '1' then
367 trim_ctl(i) := '1' & not use_second(i);
368 else
369 trim_ctl(i) := "10";
370 end if;
371 else
372 trim_ctl(i) := '0' & (negative and r.sign_extend);
373 end if;
374 case trim_ctl(i) is
375 when "11" =>
376 data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
377 when "10" =>
378 data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
379 when "01" =>
380 data_trimmed(i * 8 + 7 downto i * 8) := x"FF";
381 when others =>
382 data_trimmed(i * 8 + 7 downto i * 8) := x"00";
383 end case;
384 end loop;
385
386 if HAS_FPU then
387 -- Single-precision FP conversion
388 v.st_sp_data := store_sp_data;
389 v.ld_sp_data := data_trimmed(31 downto 0);
390 v.ld_sp_nz := or (data_trimmed(22 downto 0));
391 v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
392 end if;
393
394 -- Byte reversing and rotating for stores.
395 -- Done in the first cycle (when l_in.valid = 1) for integer stores
396 -- and DP float stores, and in the second cycle for SP float stores.
397 store_data := r.store_data;
398 if l_in.valid = '1' or (HAS_FPU and r.state = FPR_CONV) then
399 if HAS_FPU and r.state = FPR_CONV then
400 data_in := x"00000000" & r.st_sp_data;
401 byte_offset := unsigned(r.addr(2 downto 0));
402 byte_rev := r.byte_reverse;
403 length := r.length;
404 else
405 data_in := l_in.data;
406 byte_offset := unsigned(lsu_sum(2 downto 0));
407 byte_rev := l_in.byte_reverse;
408 length := l_in.length;
409 end if;
410 brev_lenm1 := "000";
411 if byte_rev = '1' then
412 brev_lenm1 := unsigned(length(2 downto 0)) - 1;
413 end if;
414 for i in 0 to 7 loop
415 k := (to_unsigned(i, 3) - byte_offset) xor brev_lenm1;
416 j := to_integer(k) * 8;
417 store_data(i * 8 + 7 downto i * 8) := data_in(j + 7 downto j);
418 end loop;
419 end if;
420 v.store_data := store_data;
421
422 -- compute (addr + 8) & ~7 for the second doubleword when unaligned
423 next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
424
425 -- Busy calculation.
426 -- We need to minimize the delay from clock to busy valid because it
427 -- gates the start of execution of the next instruction.
428 busy := r.busy and not ((r.wait_dcache and d_in.valid) or (r.wait_mmu and m_in.done));
429 v.busy := busy;
430
431 done := '0';
432 if r.state /= IDLE and busy = '0' then
433 done := '1';
434 end if;
435 exception := '0';
436
437 if r.dwords_done = '1' or r.state = SECOND_REQ then
438 addr := next_addr;
439 byte_sel := r.second_bytes;
440 else
441 addr := r.addr;
442 byte_sel := r.first_bytes;
443 end if;
444 if r.mode_32bit = '1' then
445 addr(63 downto 32) := (others => '0');
446 end if;
447 maddr := addr;
448
449 case r.state is
450 when IDLE =>
451
452 when FPR_CONV =>
453 req := '1';
454 if r.second_bytes /= "00000000" then
455 v.state := SECOND_REQ;
456 else
457 v.state := ACK_WAIT;
458 end if;
459
460 when SECOND_REQ =>
461 req := '1';
462 v.state := ACK_WAIT;
463 v.last_dword := '0';
464
465 when ACK_WAIT =>
466 if d_in.error = '1' then
467 -- dcache will discard the second request if it
468 -- gets an error on the 1st of two requests
469 if d_in.cache_paradox = '1' then
470 -- signal an interrupt straight away
471 exception := '1';
472 dsisr(63 - 38) := not r.load;
473 -- XXX there is no architected bit for this
474 dsisr(63 - 35) := d_in.cache_paradox;
475 else
476 -- Look up the translation for TLB miss
477 -- and also for permission error and RC error
478 -- in case the PTE has been updated.
479 mmureq := '1';
480 v.state := MMU_LOOKUP;
481 end if;
482 end if;
483 if d_in.valid = '1' then
484 if r.last_dword = '0' then
485 v.dwords_done := '1';
486 v.last_dword := '1';
487 if r.load = '1' then
488 v.load_data := data_permuted;
489 end if;
490 else
491 write_enable := r.load and not r.load_sp;
492 if HAS_FPU and r.load_sp = '1' then
493 -- SP to DP conversion takes a cycle
494 -- Write back rA update in this cycle if needed
495 do_update := r.update;
496 v.state := FINISH_LFS;
497 elsif r.extra_cycle = '1' then
498 -- loads with rA update need an extra cycle
499 v.state := COMPLETE;
500 v.do_update := r.update;
501 else
502 -- stores write back rA update in this cycle
503 do_update := r.update;
504 end if;
505 v.busy := '0';
506 end if;
507 end if;
508 -- r.wait_dcache gets set one cycle after we come into ACK_WAIT state,
509 -- which is OK because the dcache always takes at least two cycles.
510 v.wait_dcache := r.last_dword and not r.extra_cycle;
511
512 when MMU_LOOKUP =>
513 if m_in.done = '1' then
514 if r.instr_fault = '0' then
515 -- retry the request now that the MMU has installed a TLB entry
516 req := '1';
517 if r.last_dword = '0' then
518 v.state := SECOND_REQ;
519 else
520 v.state := ACK_WAIT;
521 end if;
522 end if;
523 end if;
524 if m_in.err = '1' then
525 exception := '1';
526 dsisr(63 - 33) := m_in.invalid;
527 dsisr(63 - 36) := m_in.perm_error;
528 dsisr(63 - 38) := not r.load;
529 dsisr(63 - 44) := m_in.badtree;
530 dsisr(63 - 45) := m_in.rc_error;
531 end if;
532
533 when TLBIE_WAIT =>
534
535 when FINISH_LFS =>
536 lfs_done := '1';
537
538 when COMPLETE =>
539 exception := r.align_intr;
540
541 end case;
542
543 if done = '1' or exception = '1' then
544 v.state := IDLE;
545 v.busy := '0';
546 end if;
547
548 -- Note that l_in.valid is gated with busy inside execute1
549 if l_in.valid = '1' then
550 v.mode_32bit := l_in.mode_32bit;
551 v.load := '0';
552 v.dcbz := '0';
553 v.tlbie := '0';
554 v.instr_fault := '0';
555 v.align_intr := '0';
556 v.dwords_done := '0';
557 v.last_dword := '1';
558 v.write_reg := l_in.write_reg;
559 v.length := l_in.length;
560 v.byte_reverse := l_in.byte_reverse;
561 v.sign_extend := l_in.sign_extend;
562 v.update := l_in.update;
563 v.update_reg := l_in.update_reg;
564 v.xerc := l_in.xerc;
565 v.reserve := l_in.reserve;
566 v.rc := l_in.rc;
567 v.nc := l_in.ci;
568 v.virt_mode := l_in.virt_mode;
569 v.priv_mode := l_in.priv_mode;
570 v.load_sp := '0';
571 v.wait_dcache := '0';
572 v.wait_mmu := '0';
573 v.do_update := '0';
574 v.extra_cycle := '0';
575
576 addr := lsu_sum;
577 if l_in.second = '1' then
578 -- for the second half of a 16-byte transfer, use next_addr
579 addr := next_addr;
580 end if;
581 if l_in.mode_32bit = '1' then
582 addr(63 downto 32) := (others => '0');
583 end if;
584 v.addr := addr;
585 maddr := l_in.addr2; -- address from RB for tlbie
586
587 -- XXX Temporary hack. Mark the op as non-cachable if the address
588 -- is the form 0xc------- for a real-mode access.
589 if addr(31 downto 28) = "1100" and l_in.virt_mode = '0' then
590 v.nc := '1';
591 end if;
592
593 if l_in.second = '0' then
594 -- Do length_to_sel and work out if we are doing 2 dwords
595 long_sel := xfer_data_sel(l_in.length, lsu_sum(2 downto 0));
596 byte_sel := long_sel(7 downto 0);
597 v.first_bytes := byte_sel;
598 v.second_bytes := long_sel(15 downto 8);
599 else
600 byte_sel := r.first_bytes;
601 long_sel := r.second_bytes & r.first_bytes;
602 end if;
603
604 -- check alignment for larx/stcx
605 misaligned := or (std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1) and addr(2 downto 0));
606 v.align_intr := l_in.reserve and misaligned;
607 if l_in.repeat = '1' and l_in.second = '0' and addr(3) = '1' then
608 -- length is really 16 not 8
609 -- Make misaligned lq cause an alignment interrupt in LE mode,
610 -- in order to avoid the case with RA = RT + 1 where the second half
611 -- faults but the first doesn't (and updates RT+1, destroying RA).
612 -- The equivalent BE case doesn't occur because RA = RT is illegal.
613 misaligned := '1';
614 if l_in.reserve = '1' or (l_in.op = OP_LOAD and l_in.byte_reverse = '0') then
615 v.align_intr := '1';
616 end if;
617 end if;
618
619 v.atomic := not misaligned;
620 v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
621
622 case l_in.op is
623 when OP_STORE =>
624 if HAS_FPU and l_in.is_32bit = '1' then
625 v.state := FPR_CONV;
626 fp_reg_conv := '1';
627 else
628 req := '1';
629 end if;
630 when OP_LOAD =>
631 req := '1';
632 v.load := '1';
633 -- Allow an extra cycle for RA update on loads
634 v.extra_cycle := l_in.update;
635 if HAS_FPU and l_in.is_32bit = '1' then
636 -- Allow an extra cycle for SP->DP precision conversion
637 v.load_sp := '1';
638 v.extra_cycle := '1';
639 end if;
640 when OP_DCBZ =>
641 v.align_intr := v.nc;
642 req := '1';
643 v.dcbz := '1';
644 when OP_TLBIE =>
645 mmureq := '1';
646 v.tlbie := '1';
647 v.state := TLBIE_WAIT;
648 v.wait_mmu := '1';
649 when OP_MFSPR =>
650 v.mfspr := '1';
651 -- partial decode on SPR number should be adequate given
652 -- the restricted set that get sent down this path
653 if sprn(9) = '0' and sprn(5) = '0' then
654 if sprn(0) = '0' then
655 v.sprval := x"00000000" & r.dsisr;
656 else
657 v.sprval := r.dar;
658 end if;
659 else
660 -- reading one of the SPRs in the MMU
661 v.sprval := m_in.sprval;
662 end if;
663 v.state := COMPLETE;
664 when OP_MTSPR =>
665 if sprn(9) = '0' and sprn(5) = '0' then
666 if sprn(0) = '0' then
667 v.dsisr := l_in.data(31 downto 0);
668 else
669 v.dar := l_in.data;
670 end if;
671 v.state := COMPLETE;
672 else
673 -- writing one of the SPRs in the MMU
674 mmu_mtspr := '1';
675 v.state := TLBIE_WAIT;
676 v.wait_mmu := '1';
677 end if;
678 when OP_FETCH_FAILED =>
679 -- send it to the MMU to do the radix walk
680 maddr := l_in.nia;
681 v.instr_fault := '1';
682 mmureq := '1';
683 v.state := MMU_LOOKUP;
684 v.wait_mmu := '1';
685 when others =>
686 assert false report "unknown op sent to loadstore1";
687 end case;
688
689 if req = '1' then
690 if v.align_intr = '1' then
691 v.state := COMPLETE;
692 elsif long_sel(15 downto 8) = "00000000" then
693 v.state := ACK_WAIT;
694 else
695 v.state := SECOND_REQ;
696 end if;
697 end if;
698
699 v.busy := req or mmureq or mmu_mtspr or fp_reg_conv;
700 end if;
701
702 -- Update outputs to dcache
703 d_out.valid <= req and not v.align_intr;
704 d_out.load <= v.load;
705 d_out.dcbz <= v.dcbz;
706 d_out.nc <= v.nc;
707 d_out.reserve <= v.reserve;
708 d_out.atomic <= v.atomic;
709 d_out.atomic_last <= v.atomic_last;
710 d_out.addr <= addr;
711 d_out.data <= store_data;
712 d_out.byte_sel <= byte_sel;
713 d_out.virt_mode <= v.virt_mode;
714 d_out.priv_mode <= v.priv_mode;
715
716 -- Update outputs to MMU
717 m_out.valid <= mmureq;
718 m_out.iside <= v.instr_fault;
719 m_out.load <= r.load;
720 m_out.priv <= r.priv_mode;
721 m_out.tlbie <= v.tlbie;
722 m_out.mtspr <= mmu_mtspr;
723 m_out.sprn <= sprn;
724 m_out.addr <= maddr;
725 m_out.slbia <= l_in.insn(7);
726 m_out.rs <= l_in.data;
727
728 -- Update outputs to writeback
729 -- Multiplex either cache data to the destination GPR or
730 -- the address for the rA update.
731 l_out.valid <= done;
732 if r.mfspr = '1' then
733 l_out.write_enable <= '1';
734 l_out.write_reg <= r.write_reg;
735 l_out.write_data <= r.sprval;
736 elsif do_update = '1' then
737 l_out.write_enable <= '1';
738 l_out.write_reg <= gpr_to_gspr(r.update_reg);
739 l_out.write_data <= r.addr;
740 elsif lfs_done = '1' then
741 l_out.write_enable <= '1';
742 l_out.write_reg <= r.write_reg;
743 l_out.write_data <= load_dp_data;
744 else
745 l_out.write_enable <= write_enable;
746 l_out.write_reg <= r.write_reg;
747 l_out.write_data <= data_trimmed;
748 end if;
749 l_out.xerc <= r.xerc;
750 l_out.rc <= r.rc and done;
751 l_out.store_done <= d_in.store_done;
752
753 -- update exception info back to execute1
754 e_out.busy <= busy;
755 e_out.exception <= exception;
756 e_out.alignment <= r.align_intr;
757 e_out.instr_fault <= r.instr_fault;
758 e_out.invalid <= m_in.invalid;
759 e_out.badtree <= m_in.badtree;
760 e_out.perm_error <= m_in.perm_error;
761 e_out.rc_error <= m_in.rc_error;
762 e_out.segment_fault <= m_in.segerr;
763 if exception = '1' and r.instr_fault = '0' then
764 v.dar := addr;
765 if m_in.segerr = '0' and r.align_intr = '0' then
766 v.dsisr := dsisr;
767 end if;
768 end if;
769
770 -- Update registers
771 rin <= v;
772
773 end process;
774
775 l1_log: if LOG_LENGTH > 0 generate
776 signal log_data : std_ulogic_vector(9 downto 0);
777 begin
778 ls1_log: process(clk)
779 begin
780 if rising_edge(clk) then
781 log_data <= e_out.busy &
782 e_out.exception &
783 l_out.valid &
784 m_out.valid &
785 d_out.valid &
786 m_in.done &
787 r.dwords_done &
788 std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
789 end if;
790 end process;
791 log_out <= log_data;
792 end generate;
793
794 end;