loadstore1: Improve timing of data path from cache RAM to writeback
[microwatt.git] / loadstore1.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.decode_types.all;
7 use work.common.all;
8 use work.insn_helpers.all;
9 use work.helpers.all;
10
11 -- 2 cycle LSU
12 -- We calculate the address in the first cycle
13
14 entity loadstore1 is
15 generic (
16 HAS_FPU : boolean := true;
17 -- Non-zero to enable log data collection
18 LOG_LENGTH : natural := 0
19 );
20 port (
21 clk : in std_ulogic;
22 rst : in std_ulogic;
23
24 l_in : in Execute1ToLoadstore1Type;
25 e_out : out Loadstore1ToExecute1Type;
26 l_out : out Loadstore1ToWritebackType;
27
28 d_out : out Loadstore1ToDcacheType;
29 d_in : in DcacheToLoadstore1Type;
30
31 m_out : out Loadstore1ToMmuType;
32 m_in : in MmuToLoadstore1Type;
33
34 dc_stall : in std_ulogic;
35
36 log_out : out std_ulogic_vector(9 downto 0)
37 );
38 end loadstore1;
39
40 -- Note, we don't currently use the stall output from the dcache because
41 -- we know it can take two requests without stalling when idle, we are
42 -- its only user, and we know it never stalls when idle.
43
44 architecture behave of loadstore1 is
45
46 -- State machine for unaligned loads/stores
47 type state_t is (IDLE, -- ready for instruction
48 FPR_CONV, -- converting double to float for store
49 SECOND_REQ, -- send 2nd request of unaligned xfer
50 ACK_WAIT, -- waiting for ack from dcache
51 MMU_LOOKUP, -- waiting for MMU to look up translation
52 TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
53 FINISH_LFS, -- write back converted SP data for lfs*
54 COMPLETE -- extra cycle to complete an operation
55 );
56
57 type byte_index_t is array(0 to 7) of unsigned(2 downto 0);
58 subtype byte_trim_t is std_ulogic_vector(1 downto 0);
59 type trim_ctl_t is array(0 to 7) of byte_trim_t;
60
61 type reg_stage_t is record
62 -- latch most of the input request
63 load : std_ulogic;
64 tlbie : std_ulogic;
65 dcbz : std_ulogic;
66 addr : std_ulogic_vector(63 downto 0);
67 store_data : std_ulogic_vector(63 downto 0);
68 load_data : std_ulogic_vector(63 downto 0);
69 write_reg : gspr_index_t;
70 length : std_ulogic_vector(3 downto 0);
71 byte_reverse : std_ulogic;
72 sign_extend : std_ulogic;
73 update : std_ulogic;
74 update_reg : gpr_index_t;
75 xerc : xer_common_t;
76 reserve : std_ulogic;
77 atomic : std_ulogic;
78 atomic_last : std_ulogic;
79 rc : std_ulogic;
80 nc : std_ulogic; -- non-cacheable access
81 virt_mode : std_ulogic;
82 priv_mode : std_ulogic;
83 state : state_t;
84 dwords_done : std_ulogic;
85 last_dword : std_ulogic;
86 first_bytes : std_ulogic_vector(7 downto 0);
87 second_bytes : std_ulogic_vector(7 downto 0);
88 dar : std_ulogic_vector(63 downto 0);
89 dsisr : std_ulogic_vector(31 downto 0);
90 instr_fault : std_ulogic;
91 align_intr : std_ulogic;
92 sprval : std_ulogic_vector(63 downto 0);
93 busy : std_ulogic;
94 wait_dcache : std_ulogic;
95 wait_mmu : std_ulogic;
96 do_update : std_ulogic;
97 extra_cycle : std_ulogic;
98 mode_32bit : std_ulogic;
99 byte_index : byte_index_t;
100 use_second : std_ulogic_vector(7 downto 0);
101 trim_ctl : trim_ctl_t;
102 load_sp : std_ulogic;
103 ld_sp_data : std_ulogic_vector(31 downto 0);
104 ld_sp_nz : std_ulogic;
105 ld_sp_lz : std_ulogic_vector(5 downto 0);
106 st_sp_data : std_ulogic_vector(31 downto 0);
107 wr_sel : std_ulogic_vector(1 downto 0);
108 end record;
109
110 signal r, rin : reg_stage_t;
111 signal lsu_sum : std_ulogic_vector(63 downto 0);
112
113 signal store_sp_data : std_ulogic_vector(31 downto 0);
114 signal load_dp_data : std_ulogic_vector(63 downto 0);
115
116 -- Generate byte enables from sizes
117 function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
118 begin
119 case length is
120 when "0001" =>
121 return "00000001";
122 when "0010" =>
123 return "00000011";
124 when "0100" =>
125 return "00001111";
126 when "1000" =>
127 return "11111111";
128 when others =>
129 return "00000000";
130 end case;
131 end function length_to_sel;
132
133 -- Calculate byte enables
134 -- This returns 16 bits, giving the select signals for two transfers,
135 -- to account for unaligned loads or stores
136 function xfer_data_sel(size : in std_logic_vector(3 downto 0);
137 address : in std_logic_vector(2 downto 0))
138 return std_ulogic_vector is
139 variable longsel : std_ulogic_vector(15 downto 0);
140 begin
141 longsel := "00000000" & length_to_sel(size);
142 return std_ulogic_vector(shift_left(unsigned(longsel),
143 to_integer(unsigned(address))));
144 end function xfer_data_sel;
145
146 -- 23-bit right shifter for DP -> SP float conversions
147 function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
148 return std_ulogic_vector is
149 variable fs1 : std_ulogic_vector(22 downto 0);
150 variable fs2 : std_ulogic_vector(22 downto 0);
151 begin
152 case shift(1 downto 0) is
153 when "00" =>
154 fs1 := frac;
155 when "01" =>
156 fs1 := '0' & frac(22 downto 1);
157 when "10" =>
158 fs1 := "00" & frac(22 downto 2);
159 when others =>
160 fs1 := "000" & frac(22 downto 3);
161 end case;
162 case shift(4 downto 2) is
163 when "000" =>
164 fs2 := fs1;
165 when "001" =>
166 fs2 := x"0" & fs1(22 downto 4);
167 when "010" =>
168 fs2 := x"00" & fs1(22 downto 8);
169 when "011" =>
170 fs2 := x"000" & fs1(22 downto 12);
171 when "100" =>
172 fs2 := x"0000" & fs1(22 downto 16);
173 when others =>
174 fs2 := x"00000" & fs1(22 downto 20);
175 end case;
176 return fs2;
177 end;
178
179 -- 23-bit left shifter for SP -> DP float conversions
180 function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
181 return std_ulogic_vector is
182 variable fs1 : std_ulogic_vector(22 downto 0);
183 variable fs2 : std_ulogic_vector(22 downto 0);
184 begin
185 case shift(1 downto 0) is
186 when "00" =>
187 fs1 := frac;
188 when "01" =>
189 fs1 := frac(21 downto 0) & '0';
190 when "10" =>
191 fs1 := frac(20 downto 0) & "00";
192 when others =>
193 fs1 := frac(19 downto 0) & "000";
194 end case;
195 case shift(4 downto 2) is
196 when "000" =>
197 fs2 := fs1;
198 when "001" =>
199 fs2 := fs1(18 downto 0) & x"0" ;
200 when "010" =>
201 fs2 := fs1(14 downto 0) & x"00";
202 when "011" =>
203 fs2 := fs1(10 downto 0) & x"000";
204 when "100" =>
205 fs2 := fs1(6 downto 0) & x"0000";
206 when others =>
207 fs2 := fs1(2 downto 0) & x"00000";
208 end case;
209 return fs2;
210 end;
211
212 begin
213 -- Calculate the address in the first cycle
214 lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
215
216 loadstore1_0: process(clk)
217 begin
218 if rising_edge(clk) then
219 if rst = '1' then
220 r.state <= IDLE;
221 r.busy <= '0';
222 r.do_update <= '0';
223 else
224 r <= rin;
225 end if;
226 end if;
227 end process;
228
229 ls_fp_conv: if HAS_FPU generate
230 -- Convert DP data to SP for stfs
231 dp_to_sp: process(all)
232 variable exp : unsigned(10 downto 0);
233 variable frac : std_ulogic_vector(22 downto 0);
234 variable shift : unsigned(4 downto 0);
235 begin
236 store_sp_data(31) <= l_in.data(63);
237 store_sp_data(30 downto 0) <= (others => '0');
238 exp := unsigned(l_in.data(62 downto 52));
239 if exp > 896 then
240 store_sp_data(30) <= l_in.data(62);
241 store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
242 elsif exp >= 874 then
243 -- denormalization required
244 frac := '1' & l_in.data(51 downto 30);
245 shift := 0 - exp(4 downto 0);
246 store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
247 end if;
248 end process;
249
250 -- Convert SP data to DP for lfs
251 sp_to_dp: process(all)
252 variable exp : unsigned(7 downto 0);
253 variable exp_dp : unsigned(10 downto 0);
254 variable exp_nz : std_ulogic;
255 variable exp_ao : std_ulogic;
256 variable frac : std_ulogic_vector(22 downto 0);
257 variable frac_shift : unsigned(4 downto 0);
258 begin
259 frac := r.ld_sp_data(22 downto 0);
260 exp := unsigned(r.ld_sp_data(30 downto 23));
261 exp_nz := or (r.ld_sp_data(30 downto 23));
262 exp_ao := and (r.ld_sp_data(30 downto 23));
263 frac_shift := (others => '0');
264 if exp_ao = '1' then
265 exp_dp := to_unsigned(2047, 11); -- infinity or NaN
266 elsif exp_nz = '1' then
267 exp_dp := 896 + resize(exp, 11); -- finite normalized value
268 elsif r.ld_sp_nz = '0' then
269 exp_dp := to_unsigned(0, 11); -- zero
270 else
271 -- denormalized SP operand, need to normalize
272 exp_dp := 896 - resize(unsigned(r.ld_sp_lz), 11);
273 frac_shift := unsigned(r.ld_sp_lz(4 downto 0)) + 1;
274 end if;
275 load_dp_data(63) <= r.ld_sp_data(31);
276 load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
277 load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
278 load_dp_data(28 downto 0) <= (others => '0');
279 end process;
280 end generate;
281
282 loadstore1_1: process(all)
283 variable v : reg_stage_t;
284 variable brev_lenm1 : unsigned(2 downto 0);
285 variable byte_offset : unsigned(2 downto 0);
286 variable j : integer;
287 variable k : unsigned(2 downto 0);
288 variable kk : unsigned(3 downto 0);
289 variable long_sel : std_ulogic_vector(15 downto 0);
290 variable byte_sel : std_ulogic_vector(7 downto 0);
291 variable req : std_ulogic;
292 variable busy : std_ulogic;
293 variable addr : std_ulogic_vector(63 downto 0);
294 variable maddr : std_ulogic_vector(63 downto 0);
295 variable wdata : std_ulogic_vector(63 downto 0);
296 variable write_enable : std_ulogic;
297 variable do_update : std_ulogic;
298 variable done : std_ulogic;
299 variable data_permuted : std_ulogic_vector(63 downto 0);
300 variable data_trimmed : std_ulogic_vector(63 downto 0);
301 variable store_data : std_ulogic_vector(63 downto 0);
302 variable data_in : std_ulogic_vector(63 downto 0);
303 variable byte_rev : std_ulogic;
304 variable length : std_ulogic_vector(3 downto 0);
305 variable negative : std_ulogic;
306 variable sprn : std_ulogic_vector(9 downto 0);
307 variable exception : std_ulogic;
308 variable next_addr : std_ulogic_vector(63 downto 0);
309 variable mmureq : std_ulogic;
310 variable dsisr : std_ulogic_vector(31 downto 0);
311 variable mmu_mtspr : std_ulogic;
312 variable itlb_fault : std_ulogic;
313 variable misaligned : std_ulogic;
314 variable fp_reg_conv : std_ulogic;
315 begin
316 v := r;
317 req := '0';
318 mmu_mtspr := '0';
319 itlb_fault := '0';
320 sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
321 dsisr := (others => '0');
322 mmureq := '0';
323 fp_reg_conv := '0';
324 v.wr_sel := "11";
325
326 write_enable := '0';
327
328 do_update := r.do_update;
329 v.do_update := '0';
330
331 -- load data formatting
332 -- shift and byte-reverse data bytes
333 for i in 0 to 7 loop
334 j := to_integer(r.byte_index(i)) * 8;
335 data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
336 end loop;
337
338 -- Work out the sign bit for sign extension.
339 -- For unaligned loads crossing two dwords, the sign bit is in the
340 -- first dword for big-endian (byte_reverse = 1), or the second dword
341 -- for little-endian.
342 if r.dwords_done = '1' and r.byte_reverse = '1' then
343 negative := (r.length(3) and r.load_data(63)) or
344 (r.length(2) and r.load_data(31)) or
345 (r.length(1) and r.load_data(15)) or
346 (r.length(0) and r.load_data(7));
347 else
348 negative := (r.length(3) and data_permuted(63)) or
349 (r.length(2) and data_permuted(31)) or
350 (r.length(1) and data_permuted(15)) or
351 (r.length(0) and data_permuted(7));
352 end if;
353
354 -- trim and sign-extend
355 for i in 0 to 7 loop
356 case r.trim_ctl(i) is
357 when "11" =>
358 data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
359 when "10" =>
360 data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
361 when "01" =>
362 data_trimmed(i * 8 + 7 downto i * 8) := (others => negative);
363 when others =>
364 data_trimmed(i * 8 + 7 downto i * 8) := x"00";
365 end case;
366 end loop;
367
368 if HAS_FPU then
369 -- Single-precision FP conversion
370 v.st_sp_data := store_sp_data;
371 v.ld_sp_data := data_trimmed(31 downto 0);
372 v.ld_sp_nz := or (data_trimmed(22 downto 0));
373 v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
374 end if;
375
376 -- Byte reversing and rotating for stores.
377 -- Done in the first cycle (when l_in.valid = 1) for integer stores
378 -- and DP float stores, and in the second cycle for SP float stores.
379 store_data := r.store_data;
380 if l_in.valid = '1' or (HAS_FPU and r.state = FPR_CONV) then
381 if HAS_FPU and r.state = FPR_CONV then
382 data_in := x"00000000" & r.st_sp_data;
383 byte_offset := unsigned(r.addr(2 downto 0));
384 byte_rev := r.byte_reverse;
385 length := r.length;
386 else
387 data_in := l_in.data;
388 byte_offset := unsigned(lsu_sum(2 downto 0));
389 byte_rev := l_in.byte_reverse;
390 length := l_in.length;
391 end if;
392 brev_lenm1 := "000";
393 if byte_rev = '1' then
394 brev_lenm1 := unsigned(length(2 downto 0)) - 1;
395 end if;
396 for i in 0 to 7 loop
397 k := (to_unsigned(i, 3) - byte_offset) xor brev_lenm1;
398 j := to_integer(k) * 8;
399 store_data(i * 8 + 7 downto i * 8) := data_in(j + 7 downto j);
400 end loop;
401 end if;
402 v.store_data := store_data;
403
404 -- compute (addr + 8) & ~7 for the second doubleword when unaligned
405 next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
406
407 -- Busy calculation.
408 -- We need to minimize the delay from clock to busy valid because it
409 -- gates the start of execution of the next instruction.
410 busy := r.busy and not ((r.wait_dcache and d_in.valid) or (r.wait_mmu and m_in.done));
411 v.busy := busy;
412
413 done := '0';
414 if r.state /= IDLE and busy = '0' then
415 done := '1';
416 end if;
417 exception := '0';
418
419 if r.dwords_done = '1' or r.state = SECOND_REQ then
420 addr := next_addr;
421 byte_sel := r.second_bytes;
422 else
423 addr := r.addr;
424 byte_sel := r.first_bytes;
425 end if;
426 if r.mode_32bit = '1' then
427 addr(63 downto 32) := (others => '0');
428 end if;
429 maddr := addr;
430
431 case r.state is
432 when IDLE =>
433
434 when FPR_CONV =>
435 req := '1';
436 if r.second_bytes /= "00000000" then
437 v.state := SECOND_REQ;
438 else
439 v.state := ACK_WAIT;
440 end if;
441
442 when SECOND_REQ =>
443 req := '1';
444 v.state := ACK_WAIT;
445 v.last_dword := '0';
446
447 when ACK_WAIT =>
448 -- r.wr_sel gets set one cycle after we come into ACK_WAIT state,
449 -- which is OK because the dcache always takes at least two cycles.
450 if r.update = '1' and (r.load = '0' or (HAS_FPU and r.load_sp = '1')) then
451 v.wr_sel := "01";
452 end if;
453 if d_in.error = '1' then
454 -- dcache will discard the second request if it
455 -- gets an error on the 1st of two requests
456 if d_in.cache_paradox = '1' then
457 -- signal an interrupt straight away
458 exception := '1';
459 dsisr(63 - 38) := not r.load;
460 -- XXX there is no architected bit for this
461 dsisr(63 - 35) := d_in.cache_paradox;
462 else
463 -- Look up the translation for TLB miss
464 -- and also for permission error and RC error
465 -- in case the PTE has been updated.
466 mmureq := '1';
467 v.state := MMU_LOOKUP;
468 end if;
469 end if;
470 if d_in.valid = '1' then
471 if r.last_dword = '0' then
472 v.dwords_done := '1';
473 v.last_dword := '1';
474 if r.load = '1' then
475 v.load_data := data_permuted;
476 end if;
477 else
478 write_enable := r.load and not r.load_sp;
479 if HAS_FPU and r.load_sp = '1' then
480 -- SP to DP conversion takes a cycle
481 -- Write back rA update in this cycle if needed
482 do_update := r.update;
483 v.wr_sel := "10";
484 v.state := FINISH_LFS;
485 elsif r.extra_cycle = '1' then
486 -- loads with rA update need an extra cycle
487 v.wr_sel := "01";
488 v.state := COMPLETE;
489 v.do_update := r.update;
490 else
491 -- stores write back rA update in this cycle
492 do_update := r.update;
493 end if;
494 v.busy := '0';
495 end if;
496 end if;
497 -- r.wait_dcache gets set one cycle after we come into ACK_WAIT state,
498 -- which is OK because the dcache always takes at least two cycles.
499 v.wait_dcache := r.last_dword and not r.extra_cycle;
500
501 when MMU_LOOKUP =>
502 if m_in.done = '1' then
503 if r.instr_fault = '0' then
504 -- retry the request now that the MMU has installed a TLB entry
505 req := '1';
506 if r.last_dword = '0' then
507 v.state := SECOND_REQ;
508 else
509 v.state := ACK_WAIT;
510 end if;
511 end if;
512 end if;
513 if m_in.err = '1' then
514 exception := '1';
515 dsisr(63 - 33) := m_in.invalid;
516 dsisr(63 - 36) := m_in.perm_error;
517 dsisr(63 - 38) := not r.load;
518 dsisr(63 - 44) := m_in.badtree;
519 dsisr(63 - 45) := m_in.rc_error;
520 end if;
521
522 when TLBIE_WAIT =>
523
524 when FINISH_LFS =>
525
526 when COMPLETE =>
527 exception := r.align_intr;
528
529 end case;
530
531 if done = '1' or exception = '1' then
532 v.state := IDLE;
533 v.busy := '0';
534 end if;
535
536 -- Note that l_in.valid is gated with busy inside execute1
537 if l_in.valid = '1' then
538 v.mode_32bit := l_in.mode_32bit;
539 v.load := '0';
540 v.dcbz := '0';
541 v.tlbie := '0';
542 v.instr_fault := '0';
543 v.align_intr := '0';
544 v.dwords_done := '0';
545 v.last_dword := '1';
546 v.write_reg := l_in.write_reg;
547 v.length := l_in.length;
548 v.byte_reverse := l_in.byte_reverse;
549 v.sign_extend := l_in.sign_extend;
550 v.update := l_in.update;
551 v.update_reg := l_in.update_reg;
552 v.xerc := l_in.xerc;
553 v.reserve := l_in.reserve;
554 v.rc := l_in.rc;
555 v.nc := l_in.ci;
556 v.virt_mode := l_in.virt_mode;
557 v.priv_mode := l_in.priv_mode;
558 v.load_sp := '0';
559 v.wait_dcache := '0';
560 v.wait_mmu := '0';
561 v.do_update := '0';
562 v.extra_cycle := '0';
563
564 addr := lsu_sum;
565 if l_in.second = '1' then
566 -- for the second half of a 16-byte transfer, use next_addr
567 addr := next_addr;
568 end if;
569 if l_in.mode_32bit = '1' then
570 addr(63 downto 32) := (others => '0');
571 end if;
572 v.addr := addr;
573 maddr := l_in.addr2; -- address from RB for tlbie
574
575 -- XXX Temporary hack. Mark the op as non-cachable if the address
576 -- is the form 0xc------- for a real-mode access.
577 if addr(31 downto 28) = "1100" and l_in.virt_mode = '0' then
578 v.nc := '1';
579 end if;
580
581 if l_in.second = '0' then
582 -- Do length_to_sel and work out if we are doing 2 dwords
583 long_sel := xfer_data_sel(l_in.length, lsu_sum(2 downto 0));
584 byte_sel := long_sel(7 downto 0);
585 v.first_bytes := byte_sel;
586 v.second_bytes := long_sel(15 downto 8);
587 else
588 byte_sel := r.first_bytes;
589 long_sel := r.second_bytes & r.first_bytes;
590 end if;
591
592 -- check alignment for larx/stcx
593 misaligned := or (std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1) and addr(2 downto 0));
594 v.align_intr := l_in.reserve and misaligned;
595 if l_in.repeat = '1' and l_in.second = '0' and addr(3) = '1' then
596 -- length is really 16 not 8
597 -- Make misaligned lq cause an alignment interrupt in LE mode,
598 -- in order to avoid the case with RA = RT + 1 where the second half
599 -- faults but the first doesn't (and updates RT+1, destroying RA).
600 -- The equivalent BE case doesn't occur because RA = RT is illegal.
601 misaligned := '1';
602 if l_in.reserve = '1' or (l_in.op = OP_LOAD and l_in.byte_reverse = '0') then
603 v.align_intr := '1';
604 end if;
605 end if;
606
607 v.atomic := not misaligned;
608 v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
609
610 case l_in.op is
611 when OP_STORE =>
612 if HAS_FPU and l_in.is_32bit = '1' then
613 v.state := FPR_CONV;
614 fp_reg_conv := '1';
615 else
616 req := '1';
617 end if;
618 when OP_LOAD =>
619 req := '1';
620 v.load := '1';
621 -- Allow an extra cycle for RA update on loads
622 v.extra_cycle := l_in.update;
623 if HAS_FPU and l_in.is_32bit = '1' then
624 -- Allow an extra cycle for SP->DP precision conversion
625 v.load_sp := '1';
626 v.extra_cycle := '1';
627 end if;
628 when OP_DCBZ =>
629 v.align_intr := v.nc;
630 req := '1';
631 v.dcbz := '1';
632 when OP_TLBIE =>
633 mmureq := '1';
634 v.tlbie := '1';
635 v.state := TLBIE_WAIT;
636 v.wait_mmu := '1';
637 when OP_MFSPR =>
638 v.wr_sel := "00";
639 -- partial decode on SPR number should be adequate given
640 -- the restricted set that get sent down this path
641 if sprn(9) = '0' and sprn(5) = '0' then
642 if sprn(0) = '0' then
643 v.sprval := x"00000000" & r.dsisr;
644 else
645 v.sprval := r.dar;
646 end if;
647 else
648 -- reading one of the SPRs in the MMU
649 v.sprval := m_in.sprval;
650 end if;
651 v.state := COMPLETE;
652 when OP_MTSPR =>
653 if sprn(9) = '0' and sprn(5) = '0' then
654 if sprn(0) = '0' then
655 v.dsisr := l_in.data(31 downto 0);
656 else
657 v.dar := l_in.data;
658 end if;
659 v.state := COMPLETE;
660 else
661 -- writing one of the SPRs in the MMU
662 mmu_mtspr := '1';
663 v.state := TLBIE_WAIT;
664 v.wait_mmu := '1';
665 end if;
666 when OP_FETCH_FAILED =>
667 -- send it to the MMU to do the radix walk
668 maddr := l_in.nia;
669 v.instr_fault := '1';
670 mmureq := '1';
671 v.state := MMU_LOOKUP;
672 v.wait_mmu := '1';
673 when others =>
674 assert false report "unknown op sent to loadstore1";
675 end case;
676
677 if req = '1' then
678 if v.align_intr = '1' then
679 v.state := COMPLETE;
680 elsif long_sel(15 downto 8) = "00000000" then
681 v.state := ACK_WAIT;
682 else
683 v.state := SECOND_REQ;
684 end if;
685 end if;
686
687 v.busy := req or mmureq or mmu_mtspr or fp_reg_conv;
688 end if;
689
690 -- Work out load formatter controls for next cycle
691 byte_offset := unsigned(v.addr(2 downto 0));
692 brev_lenm1 := "000";
693 if v.byte_reverse = '1' then
694 brev_lenm1 := unsigned(v.length(2 downto 0)) - 1;
695 end if;
696
697 for i in 0 to 7 loop
698 kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
699 v.use_second(i) := kk(3);
700 v.byte_index(i) := kk(2 downto 0);
701 end loop;
702
703 for i in 0 to 7 loop
704 if i < to_integer(unsigned(v.length)) then
705 if v.dwords_done = '1' then
706 v.trim_ctl(i) := '1' & not v.use_second(i);
707 else
708 v.trim_ctl(i) := "10";
709 end if;
710 else
711 v.trim_ctl(i) := '0' & v.sign_extend;
712 end if;
713 end loop;
714
715 -- Update outputs to dcache
716 d_out.valid <= req and not v.align_intr;
717 d_out.load <= v.load;
718 d_out.dcbz <= v.dcbz;
719 d_out.nc <= v.nc;
720 d_out.reserve <= v.reserve;
721 d_out.atomic <= v.atomic;
722 d_out.atomic_last <= v.atomic_last;
723 d_out.addr <= addr;
724 d_out.data <= store_data;
725 d_out.byte_sel <= byte_sel;
726 d_out.virt_mode <= v.virt_mode;
727 d_out.priv_mode <= v.priv_mode;
728
729 -- Update outputs to MMU
730 m_out.valid <= mmureq;
731 m_out.iside <= v.instr_fault;
732 m_out.load <= r.load;
733 m_out.priv <= r.priv_mode;
734 m_out.tlbie <= v.tlbie;
735 m_out.mtspr <= mmu_mtspr;
736 m_out.sprn <= sprn;
737 m_out.addr <= maddr;
738 m_out.slbia <= l_in.insn(7);
739 m_out.rs <= l_in.data;
740
741 -- Update outputs to writeback
742 -- Multiplex either cache data to the destination GPR or
743 -- the address for the rA update.
744 l_out.valid <= done;
745 case r.wr_sel is
746 when "00" =>
747 l_out.write_enable <= '1';
748 l_out.write_reg <= r.write_reg;
749 l_out.write_data <= r.sprval;
750 when "01" =>
751 l_out.write_enable <= do_update;
752 l_out.write_reg <= gpr_to_gspr(r.update_reg);
753 l_out.write_data <= r.addr;
754 when "10" =>
755 l_out.write_enable <= '1';
756 l_out.write_reg <= r.write_reg;
757 l_out.write_data <= load_dp_data;
758 when others =>
759 l_out.write_enable <= write_enable;
760 l_out.write_reg <= r.write_reg;
761 l_out.write_data <= data_trimmed;
762 end case;
763 l_out.xerc <= r.xerc;
764 l_out.rc <= r.rc and done;
765 l_out.store_done <= d_in.store_done;
766
767 -- update exception info back to execute1
768 e_out.busy <= busy;
769 e_out.exception <= exception;
770 e_out.alignment <= r.align_intr;
771 e_out.instr_fault <= r.instr_fault;
772 e_out.invalid <= m_in.invalid;
773 e_out.badtree <= m_in.badtree;
774 e_out.perm_error <= m_in.perm_error;
775 e_out.rc_error <= m_in.rc_error;
776 e_out.segment_fault <= m_in.segerr;
777 if exception = '1' and r.instr_fault = '0' then
778 v.dar := addr;
779 if m_in.segerr = '0' and r.align_intr = '0' then
780 v.dsisr := dsisr;
781 end if;
782 end if;
783
784 -- Update registers
785 rin <= v;
786
787 end process;
788
789 l1_log: if LOG_LENGTH > 0 generate
790 signal log_data : std_ulogic_vector(9 downto 0);
791 begin
792 ls1_log: process(clk)
793 begin
794 if rising_edge(clk) then
795 log_data <= e_out.busy &
796 e_out.exception &
797 l_out.valid &
798 m_out.valid &
799 d_out.valid &
800 m_in.done &
801 r.dwords_done &
802 std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
803 end if;
804 end process;
805 log_out <= log_data;
806 end generate;
807
808 end;