core: Crack update-form loads into two internal ops
[microwatt.git] / loadstore1.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.decode_types.all;
7 use work.common.all;
8 use work.insn_helpers.all;
9 use work.helpers.all;
10
11 -- 2 cycle LSU
12 -- We calculate the address in the first cycle
13
14 entity loadstore1 is
15 generic (
16 HAS_FPU : boolean := true;
17 -- Non-zero to enable log data collection
18 LOG_LENGTH : natural := 0
19 );
20 port (
21 clk : in std_ulogic;
22 rst : in std_ulogic;
23
24 l_in : in Execute1ToLoadstore1Type;
25 e_out : out Loadstore1ToExecute1Type;
26 l_out : out Loadstore1ToWritebackType;
27
28 d_out : out Loadstore1ToDcacheType;
29 d_in : in DcacheToLoadstore1Type;
30
31 m_out : out Loadstore1ToMmuType;
32 m_in : in MmuToLoadstore1Type;
33
34 dc_stall : in std_ulogic;
35
36 log_out : out std_ulogic_vector(9 downto 0)
37 );
38 end loadstore1;
39
40 -- Note, we don't currently use the stall output from the dcache because
41 -- we know it can take two requests without stalling when idle, we are
42 -- its only user, and we know it never stalls when idle.
43
44 architecture behave of loadstore1 is
45
46 -- State machine for unaligned loads/stores
47 type state_t is (IDLE, -- ready for instruction
48 SECOND_REQ, -- send 2nd request of unaligned xfer
49 ACK_WAIT, -- waiting for ack from dcache
50 MMU_LOOKUP, -- waiting for MMU to look up translation
51 TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
52 FINISH_LFS, -- write back converted SP data for lfs*
53 COMPLETE -- extra cycle to complete an operation
54 );
55
56 type byte_index_t is array(0 to 7) of unsigned(2 downto 0);
57 subtype byte_trim_t is std_ulogic_vector(1 downto 0);
58 type trim_ctl_t is array(0 to 7) of byte_trim_t;
59
60 type reg_stage_t is record
61 -- latch most of the input request
62 load : std_ulogic;
63 tlbie : std_ulogic;
64 dcbz : std_ulogic;
65 addr : std_ulogic_vector(63 downto 0);
66 store_data : std_ulogic_vector(63 downto 0);
67 load_data : std_ulogic_vector(63 downto 0);
68 write_reg : gspr_index_t;
69 length : std_ulogic_vector(3 downto 0);
70 byte_reverse : std_ulogic;
71 byte_offset : unsigned(2 downto 0);
72 brev_mask : unsigned(2 downto 0);
73 sign_extend : std_ulogic;
74 update : std_ulogic;
75 xerc : xer_common_t;
76 reserve : std_ulogic;
77 atomic : std_ulogic;
78 atomic_last : std_ulogic;
79 rc : std_ulogic;
80 nc : std_ulogic; -- non-cacheable access
81 virt_mode : std_ulogic;
82 priv_mode : std_ulogic;
83 state : state_t;
84 dwords_done : std_ulogic;
85 last_dword : std_ulogic;
86 first_bytes : std_ulogic_vector(7 downto 0);
87 second_bytes : std_ulogic_vector(7 downto 0);
88 dar : std_ulogic_vector(63 downto 0);
89 dsisr : std_ulogic_vector(31 downto 0);
90 instr_fault : std_ulogic;
91 align_intr : std_ulogic;
92 sprval : std_ulogic_vector(63 downto 0);
93 busy : std_ulogic;
94 wait_dcache : std_ulogic;
95 wait_mmu : std_ulogic;
96 do_update : std_ulogic;
97 extra_cycle : std_ulogic;
98 mode_32bit : std_ulogic;
99 byte_index : byte_index_t;
100 use_second : std_ulogic_vector(7 downto 0);
101 trim_ctl : trim_ctl_t;
102 load_sp : std_ulogic;
103 ld_sp_data : std_ulogic_vector(31 downto 0);
104 ld_sp_nz : std_ulogic;
105 ld_sp_lz : std_ulogic_vector(5 downto 0);
106 wr_sel : std_ulogic_vector(1 downto 0);
107 end record;
108
109 signal r, rin : reg_stage_t;
110 signal lsu_sum : std_ulogic_vector(63 downto 0);
111
112 signal store_sp_data : std_ulogic_vector(31 downto 0);
113 signal load_dp_data : std_ulogic_vector(63 downto 0);
114
115 -- Generate byte enables from sizes
116 function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
117 begin
118 case length is
119 when "0001" =>
120 return "00000001";
121 when "0010" =>
122 return "00000011";
123 when "0100" =>
124 return "00001111";
125 when "1000" =>
126 return "11111111";
127 when others =>
128 return "00000000";
129 end case;
130 end function length_to_sel;
131
132 -- Calculate byte enables
133 -- This returns 16 bits, giving the select signals for two transfers,
134 -- to account for unaligned loads or stores
135 function xfer_data_sel(size : in std_logic_vector(3 downto 0);
136 address : in std_logic_vector(2 downto 0))
137 return std_ulogic_vector is
138 variable longsel : std_ulogic_vector(15 downto 0);
139 begin
140 longsel := "00000000" & length_to_sel(size);
141 return std_ulogic_vector(shift_left(unsigned(longsel),
142 to_integer(unsigned(address))));
143 end function xfer_data_sel;
144
145 -- 23-bit right shifter for DP -> SP float conversions
146 function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
147 return std_ulogic_vector is
148 variable fs1 : std_ulogic_vector(22 downto 0);
149 variable fs2 : std_ulogic_vector(22 downto 0);
150 begin
151 case shift(1 downto 0) is
152 when "00" =>
153 fs1 := frac;
154 when "01" =>
155 fs1 := '0' & frac(22 downto 1);
156 when "10" =>
157 fs1 := "00" & frac(22 downto 2);
158 when others =>
159 fs1 := "000" & frac(22 downto 3);
160 end case;
161 case shift(4 downto 2) is
162 when "000" =>
163 fs2 := fs1;
164 when "001" =>
165 fs2 := x"0" & fs1(22 downto 4);
166 when "010" =>
167 fs2 := x"00" & fs1(22 downto 8);
168 when "011" =>
169 fs2 := x"000" & fs1(22 downto 12);
170 when "100" =>
171 fs2 := x"0000" & fs1(22 downto 16);
172 when others =>
173 fs2 := x"00000" & fs1(22 downto 20);
174 end case;
175 return fs2;
176 end;
177
178 -- 23-bit left shifter for SP -> DP float conversions
179 function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
180 return std_ulogic_vector is
181 variable fs1 : std_ulogic_vector(22 downto 0);
182 variable fs2 : std_ulogic_vector(22 downto 0);
183 begin
184 case shift(1 downto 0) is
185 when "00" =>
186 fs1 := frac;
187 when "01" =>
188 fs1 := frac(21 downto 0) & '0';
189 when "10" =>
190 fs1 := frac(20 downto 0) & "00";
191 when others =>
192 fs1 := frac(19 downto 0) & "000";
193 end case;
194 case shift(4 downto 2) is
195 when "000" =>
196 fs2 := fs1;
197 when "001" =>
198 fs2 := fs1(18 downto 0) & x"0" ;
199 when "010" =>
200 fs2 := fs1(14 downto 0) & x"00";
201 when "011" =>
202 fs2 := fs1(10 downto 0) & x"000";
203 when "100" =>
204 fs2 := fs1(6 downto 0) & x"0000";
205 when others =>
206 fs2 := fs1(2 downto 0) & x"00000";
207 end case;
208 return fs2;
209 end;
210
211 begin
212 -- Calculate the address in the first cycle
213 lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
214
215 loadstore1_0: process(clk)
216 begin
217 if rising_edge(clk) then
218 if rst = '1' then
219 r.state <= IDLE;
220 r.busy <= '0';
221 r.do_update <= '0';
222 else
223 r <= rin;
224 end if;
225 end if;
226 end process;
227
228 ls_fp_conv: if HAS_FPU generate
229 -- Convert DP data to SP for stfs
230 dp_to_sp: process(all)
231 variable exp : unsigned(10 downto 0);
232 variable frac : std_ulogic_vector(22 downto 0);
233 variable shift : unsigned(4 downto 0);
234 begin
235 store_sp_data(31) <= l_in.data(63);
236 store_sp_data(30 downto 0) <= (others => '0');
237 exp := unsigned(l_in.data(62 downto 52));
238 if exp > 896 then
239 store_sp_data(30) <= l_in.data(62);
240 store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
241 elsif exp >= 874 then
242 -- denormalization required
243 frac := '1' & l_in.data(51 downto 30);
244 shift := 0 - exp(4 downto 0);
245 store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
246 end if;
247 end process;
248
249 -- Convert SP data to DP for lfs
250 sp_to_dp: process(all)
251 variable exp : unsigned(7 downto 0);
252 variable exp_dp : unsigned(10 downto 0);
253 variable exp_nz : std_ulogic;
254 variable exp_ao : std_ulogic;
255 variable frac : std_ulogic_vector(22 downto 0);
256 variable frac_shift : unsigned(4 downto 0);
257 begin
258 frac := r.ld_sp_data(22 downto 0);
259 exp := unsigned(r.ld_sp_data(30 downto 23));
260 exp_nz := or (r.ld_sp_data(30 downto 23));
261 exp_ao := and (r.ld_sp_data(30 downto 23));
262 frac_shift := (others => '0');
263 if exp_ao = '1' then
264 exp_dp := to_unsigned(2047, 11); -- infinity or NaN
265 elsif exp_nz = '1' then
266 exp_dp := 896 + resize(exp, 11); -- finite normalized value
267 elsif r.ld_sp_nz = '0' then
268 exp_dp := to_unsigned(0, 11); -- zero
269 else
270 -- denormalized SP operand, need to normalize
271 exp_dp := 896 - resize(unsigned(r.ld_sp_lz), 11);
272 frac_shift := unsigned(r.ld_sp_lz(4 downto 0)) + 1;
273 end if;
274 load_dp_data(63) <= r.ld_sp_data(31);
275 load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
276 load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
277 load_dp_data(28 downto 0) <= (others => '0');
278 end process;
279 end generate;
280
281 loadstore1_1: process(all)
282 variable v : reg_stage_t;
283 variable brev_lenm1 : unsigned(2 downto 0);
284 variable byte_offset : unsigned(2 downto 0);
285 variable j : integer;
286 variable k : unsigned(2 downto 0);
287 variable kk : unsigned(3 downto 0);
288 variable long_sel : std_ulogic_vector(15 downto 0);
289 variable byte_sel : std_ulogic_vector(7 downto 0);
290 variable req : std_ulogic;
291 variable busy : std_ulogic;
292 variable addr : std_ulogic_vector(63 downto 0);
293 variable maddr : std_ulogic_vector(63 downto 0);
294 variable wdata : std_ulogic_vector(63 downto 0);
295 variable write_enable : std_ulogic;
296 variable do_update : std_ulogic;
297 variable done : std_ulogic;
298 variable data_permuted : std_ulogic_vector(63 downto 0);
299 variable data_trimmed : std_ulogic_vector(63 downto 0);
300 variable store_data : std_ulogic_vector(63 downto 0);
301 variable byte_rev : std_ulogic;
302 variable length : std_ulogic_vector(3 downto 0);
303 variable negative : std_ulogic;
304 variable sprn : std_ulogic_vector(9 downto 0);
305 variable exception : std_ulogic;
306 variable next_addr : std_ulogic_vector(63 downto 0);
307 variable mmureq : std_ulogic;
308 variable dsisr : std_ulogic_vector(31 downto 0);
309 variable mmu_mtspr : std_ulogic;
310 variable itlb_fault : std_ulogic;
311 variable misaligned : std_ulogic;
312 begin
313 v := r;
314 req := '0';
315 mmu_mtspr := '0';
316 itlb_fault := '0';
317 sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
318 dsisr := (others => '0');
319 mmureq := '0';
320 v.wr_sel := "11";
321
322 write_enable := '0';
323
324 do_update := r.do_update;
325 v.do_update := '0';
326
327 -- load data formatting
328 -- shift and byte-reverse data bytes
329 for i in 0 to 7 loop
330 j := to_integer(r.byte_index(i)) * 8;
331 data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
332 end loop;
333
334 -- Work out the sign bit for sign extension.
335 -- For unaligned loads crossing two dwords, the sign bit is in the
336 -- first dword for big-endian (byte_reverse = 1), or the second dword
337 -- for little-endian.
338 if r.dwords_done = '1' and r.byte_reverse = '1' then
339 negative := (r.length(3) and r.load_data(63)) or
340 (r.length(2) and r.load_data(31)) or
341 (r.length(1) and r.load_data(15)) or
342 (r.length(0) and r.load_data(7));
343 else
344 negative := (r.length(3) and data_permuted(63)) or
345 (r.length(2) and data_permuted(31)) or
346 (r.length(1) and data_permuted(15)) or
347 (r.length(0) and data_permuted(7));
348 end if;
349
350 -- trim and sign-extend
351 for i in 0 to 7 loop
352 case r.trim_ctl(i) is
353 when "11" =>
354 data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
355 when "10" =>
356 data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
357 when "01" =>
358 data_trimmed(i * 8 + 7 downto i * 8) := (others => negative);
359 when others =>
360 data_trimmed(i * 8 + 7 downto i * 8) := x"00";
361 end case;
362 end loop;
363
364 if HAS_FPU then
365 -- Single-precision FP conversion for loads
366 v.ld_sp_data := data_trimmed(31 downto 0);
367 v.ld_sp_nz := or (data_trimmed(22 downto 0));
368 v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
369 end if;
370
371 -- Byte reversing and rotating for stores.
372 -- Done in the second cycle (the cycle after l_in.valid = 1).
373 for i in 0 to 7 loop
374 k := (to_unsigned(i, 3) - r.byte_offset) xor r.brev_mask;
375 j := to_integer(k) * 8;
376 store_data(i * 8 + 7 downto i * 8) := r.store_data(j + 7 downto j);
377 end loop;
378
379 -- compute (addr + 8) & ~7 for the second doubleword when unaligned
380 next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
381
382 -- Busy calculation.
383 -- We need to minimize the delay from clock to busy valid because it
384 -- gates the start of execution of the next instruction.
385 busy := r.busy and not ((r.wait_dcache and d_in.valid) or (r.wait_mmu and m_in.done));
386 v.busy := busy;
387
388 done := '0';
389 if r.state /= IDLE and busy = '0' then
390 done := '1';
391 end if;
392 exception := '0';
393
394 if r.dwords_done = '1' or r.state = SECOND_REQ then
395 addr := next_addr;
396 byte_sel := r.second_bytes;
397 else
398 addr := r.addr;
399 byte_sel := r.first_bytes;
400 end if;
401 if r.mode_32bit = '1' then
402 addr(63 downto 32) := (others => '0');
403 end if;
404 maddr := addr;
405
406 case r.state is
407 when IDLE =>
408
409 when SECOND_REQ =>
410 req := '1';
411 v.state := ACK_WAIT;
412 v.last_dword := '0';
413
414 when ACK_WAIT =>
415 -- r.wr_sel gets set one cycle after we come into ACK_WAIT state,
416 -- which is OK because the dcache always takes at least two cycles.
417 if r.update = '1' and r.load = '0' then
418 v.wr_sel := "01";
419 end if;
420 if d_in.error = '1' then
421 -- dcache will discard the second request if it
422 -- gets an error on the 1st of two requests
423 if d_in.cache_paradox = '1' then
424 -- signal an interrupt straight away
425 exception := '1';
426 dsisr(63 - 38) := not r.load;
427 -- XXX there is no architected bit for this
428 dsisr(63 - 35) := d_in.cache_paradox;
429 else
430 -- Look up the translation for TLB miss
431 -- and also for permission error and RC error
432 -- in case the PTE has been updated.
433 mmureq := '1';
434 v.state := MMU_LOOKUP;
435 end if;
436 end if;
437 if d_in.valid = '1' then
438 if r.last_dword = '0' then
439 v.dwords_done := '1';
440 v.last_dword := '1';
441 if r.load = '1' then
442 v.load_data := data_permuted;
443 end if;
444 else
445 write_enable := r.load and not r.load_sp;
446 if HAS_FPU and r.load_sp = '1' then
447 -- SP to DP conversion takes a cycle
448 v.wr_sel := "10";
449 v.state := FINISH_LFS;
450 elsif r.load = '0' then
451 -- stores write back rA update in this cycle
452 do_update := r.update;
453 end if;
454 v.busy := '0';
455 end if;
456 end if;
457 -- r.wait_dcache gets set one cycle after we come into ACK_WAIT state,
458 -- which is OK because the dcache always takes at least two cycles.
459 v.wait_dcache := r.last_dword and not r.extra_cycle;
460
461 when MMU_LOOKUP =>
462 if m_in.done = '1' then
463 if r.instr_fault = '0' then
464 -- retry the request now that the MMU has installed a TLB entry
465 req := '1';
466 if r.last_dword = '0' then
467 v.state := SECOND_REQ;
468 else
469 v.state := ACK_WAIT;
470 end if;
471 end if;
472 end if;
473 if m_in.err = '1' then
474 exception := '1';
475 dsisr(63 - 33) := m_in.invalid;
476 dsisr(63 - 36) := m_in.perm_error;
477 dsisr(63 - 38) := not r.load;
478 dsisr(63 - 44) := m_in.badtree;
479 dsisr(63 - 45) := m_in.rc_error;
480 end if;
481
482 when TLBIE_WAIT =>
483
484 when FINISH_LFS =>
485
486 when COMPLETE =>
487 exception := r.align_intr;
488
489 end case;
490
491 if done = '1' or exception = '1' then
492 v.state := IDLE;
493 v.busy := '0';
494 end if;
495
496 -- Note that l_in.valid is gated with busy inside execute1
497 if l_in.valid = '1' then
498 v.mode_32bit := l_in.mode_32bit;
499 v.load := '0';
500 v.dcbz := '0';
501 v.tlbie := '0';
502 v.instr_fault := '0';
503 v.align_intr := '0';
504 v.dwords_done := '0';
505 v.last_dword := '1';
506 v.write_reg := l_in.write_reg;
507 v.length := l_in.length;
508 v.byte_reverse := l_in.byte_reverse;
509 v.sign_extend := l_in.sign_extend;
510 v.update := l_in.update;
511 v.xerc := l_in.xerc;
512 v.reserve := l_in.reserve;
513 v.rc := l_in.rc;
514 v.nc := l_in.ci;
515 v.virt_mode := l_in.virt_mode;
516 v.priv_mode := l_in.priv_mode;
517 v.load_sp := '0';
518 v.wait_dcache := '0';
519 v.wait_mmu := '0';
520 v.extra_cycle := '0';
521
522 if HAS_FPU and l_in.is_32bit = '1' then
523 v.store_data := x"00000000" & store_sp_data;
524 else
525 v.store_data := l_in.data;
526 end if;
527
528 addr := lsu_sum;
529 if l_in.second = '1' then
530 -- second half of load with update does the update
531 if l_in.op = OP_LOAD and l_in.update = '1' then
532 v.do_update := '1';
533 else
534 -- for the second half of a 16-byte transfer, use next_addr
535 addr := next_addr;
536 end if;
537 end if;
538 if l_in.mode_32bit = '1' then
539 addr(63 downto 32) := (others => '0');
540 end if;
541 if v.do_update = '0' then
542 -- preserve previous r.addr for load with update
543 v.addr := addr;
544 end if;
545 maddr := l_in.addr2; -- address from RB for tlbie
546
547 -- XXX Temporary hack. Mark the op as non-cachable if the address
548 -- is the form 0xc------- for a real-mode access.
549 if addr(31 downto 28) = "1100" and l_in.virt_mode = '0' then
550 v.nc := '1';
551 end if;
552
553 if l_in.second = '0' then
554 -- Do length_to_sel and work out if we are doing 2 dwords
555 long_sel := xfer_data_sel(l_in.length, lsu_sum(2 downto 0));
556 byte_sel := long_sel(7 downto 0);
557 v.first_bytes := byte_sel;
558 v.second_bytes := long_sel(15 downto 8);
559 else
560 byte_sel := r.first_bytes;
561 long_sel := r.second_bytes & r.first_bytes;
562 end if;
563
564 -- check alignment for larx/stcx
565 misaligned := or (std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1) and addr(2 downto 0));
566 v.align_intr := l_in.reserve and misaligned;
567 if l_in.repeat = '1' and l_in.second = '0' and l_in.update = '0' and addr(3) = '1' then
568 -- length is really 16 not 8
569 -- Make misaligned lq cause an alignment interrupt in LE mode,
570 -- in order to avoid the case with RA = RT + 1 where the second half
571 -- faults but the first doesn't (and updates RT+1, destroying RA).
572 -- The equivalent BE case doesn't occur because RA = RT is illegal.
573 misaligned := '1';
574 if l_in.reserve = '1' or (l_in.op = OP_LOAD and l_in.byte_reverse = '0') then
575 v.align_intr := '1';
576 end if;
577 end if;
578
579 v.atomic := not misaligned;
580 v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
581
582 case l_in.op is
583 when OP_STORE =>
584 req := '1';
585 when OP_LOAD =>
586 v.load := '1';
587 if l_in.second = '1' and l_in.update = '1' then
588 v.wr_sel := "01";
589 v.state := COMPLETE;
590 else
591 req := '1';
592 if HAS_FPU and l_in.is_32bit = '1' then
593 -- Allow an extra cycle for SP->DP precision conversion
594 v.load_sp := '1';
595 v.extra_cycle := '1';
596 end if;
597 end if;
598 when OP_DCBZ =>
599 v.align_intr := v.nc;
600 req := '1';
601 v.dcbz := '1';
602 when OP_TLBIE =>
603 mmureq := '1';
604 v.tlbie := '1';
605 v.state := TLBIE_WAIT;
606 v.wait_mmu := '1';
607 when OP_MFSPR =>
608 v.wr_sel := "00";
609 -- partial decode on SPR number should be adequate given
610 -- the restricted set that get sent down this path
611 if sprn(9) = '0' and sprn(5) = '0' then
612 if sprn(0) = '0' then
613 v.sprval := x"00000000" & r.dsisr;
614 else
615 v.sprval := r.dar;
616 end if;
617 else
618 -- reading one of the SPRs in the MMU
619 v.sprval := m_in.sprval;
620 end if;
621 v.state := COMPLETE;
622 when OP_MTSPR =>
623 if sprn(9) = '0' and sprn(5) = '0' then
624 if sprn(0) = '0' then
625 v.dsisr := l_in.data(31 downto 0);
626 else
627 v.dar := l_in.data;
628 end if;
629 v.state := COMPLETE;
630 else
631 -- writing one of the SPRs in the MMU
632 mmu_mtspr := '1';
633 v.state := TLBIE_WAIT;
634 v.wait_mmu := '1';
635 end if;
636 when OP_FETCH_FAILED =>
637 -- send it to the MMU to do the radix walk
638 maddr := l_in.nia;
639 v.instr_fault := '1';
640 mmureq := '1';
641 v.state := MMU_LOOKUP;
642 v.wait_mmu := '1';
643 when others =>
644 assert false report "unknown op sent to loadstore1";
645 end case;
646
647 if req = '1' then
648 if v.align_intr = '1' then
649 v.state := COMPLETE;
650 elsif long_sel(15 downto 8) = "00000000" then
651 v.state := ACK_WAIT;
652 else
653 v.state := SECOND_REQ;
654 end if;
655 end if;
656
657 v.busy := req or mmureq or mmu_mtspr;
658 end if;
659
660 -- Work out controls for store formatting
661 if l_in.valid = '1' then
662 byte_offset := unsigned(lsu_sum(2 downto 0));
663 byte_rev := l_in.byte_reverse;
664 length := l_in.length;
665 brev_lenm1 := "000";
666 if byte_rev = '1' then
667 brev_lenm1 := unsigned(length(2 downto 0)) - 1;
668 end if;
669 v.byte_offset := byte_offset;
670 v.brev_mask := brev_lenm1;
671 end if;
672
673 -- Work out load formatter controls for next cycle
674 byte_offset := unsigned(v.addr(2 downto 0));
675 brev_lenm1 := "000";
676 if v.byte_reverse = '1' then
677 brev_lenm1 := unsigned(v.length(2 downto 0)) - 1;
678 end if;
679
680 for i in 0 to 7 loop
681 kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
682 v.use_second(i) := kk(3);
683 v.byte_index(i) := kk(2 downto 0);
684 end loop;
685
686 for i in 0 to 7 loop
687 if i < to_integer(unsigned(v.length)) then
688 if v.dwords_done = '1' then
689 v.trim_ctl(i) := '1' & not v.use_second(i);
690 else
691 v.trim_ctl(i) := "10";
692 end if;
693 else
694 v.trim_ctl(i) := '0' & v.sign_extend;
695 end if;
696 end loop;
697
698 -- Update outputs to dcache
699 d_out.valid <= req and not v.align_intr;
700 d_out.load <= v.load;
701 d_out.dcbz <= v.dcbz;
702 d_out.nc <= v.nc;
703 d_out.reserve <= v.reserve;
704 d_out.atomic <= v.atomic;
705 d_out.atomic_last <= v.atomic_last;
706 d_out.addr <= addr;
707 d_out.data <= store_data;
708 d_out.byte_sel <= byte_sel;
709 d_out.virt_mode <= v.virt_mode;
710 d_out.priv_mode <= v.priv_mode;
711
712 -- Update outputs to MMU
713 m_out.valid <= mmureq;
714 m_out.iside <= v.instr_fault;
715 m_out.load <= r.load;
716 m_out.priv <= r.priv_mode;
717 m_out.tlbie <= v.tlbie;
718 m_out.mtspr <= mmu_mtspr;
719 m_out.sprn <= sprn;
720 m_out.addr <= maddr;
721 m_out.slbia <= l_in.insn(7);
722 m_out.rs <= l_in.data;
723
724 -- Update outputs to writeback
725 -- Multiplex either cache data to the destination GPR or
726 -- the address for the rA update.
727 l_out.valid <= done;
728 l_out.write_reg <= r.write_reg;
729 case r.wr_sel is
730 when "00" =>
731 l_out.write_enable <= '1';
732 l_out.write_data <= r.sprval;
733 when "01" =>
734 l_out.write_enable <= do_update;
735 l_out.write_data <= r.addr;
736 when "10" =>
737 l_out.write_enable <= '1';
738 l_out.write_data <= load_dp_data;
739 when others =>
740 l_out.write_enable <= write_enable;
741 l_out.write_data <= data_trimmed;
742 end case;
743 l_out.xerc <= r.xerc;
744 l_out.rc <= r.rc and done;
745 l_out.store_done <= d_in.store_done;
746
747 -- update exception info back to execute1
748 e_out.busy <= busy;
749 e_out.exception <= exception;
750 e_out.alignment <= r.align_intr;
751 e_out.instr_fault <= r.instr_fault;
752 e_out.invalid <= m_in.invalid;
753 e_out.badtree <= m_in.badtree;
754 e_out.perm_error <= m_in.perm_error;
755 e_out.rc_error <= m_in.rc_error;
756 e_out.segment_fault <= m_in.segerr;
757 if exception = '1' and r.instr_fault = '0' then
758 v.dar := addr;
759 if m_in.segerr = '0' and r.align_intr = '0' then
760 v.dsisr := dsisr;
761 end if;
762 end if;
763
764 -- Update registers
765 rin <= v;
766
767 end process;
768
769 l1_log: if LOG_LENGTH > 0 generate
770 signal log_data : std_ulogic_vector(9 downto 0);
771 begin
772 ls1_log: process(clk)
773 begin
774 if rising_edge(clk) then
775 log_data <= e_out.busy &
776 e_out.exception &
777 l_out.valid &
778 m_out.valid &
779 d_out.valid &
780 m_in.done &
781 r.dwords_done &
782 std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
783 end if;
784 end process;
785 log_out <= log_data;
786 end generate;
787
788 end;