bring ulx3s frequency down to 12.5 mhz
[microwatt.git] / loadstore1.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.decode_types.all;
7 use work.common.all;
8 use work.insn_helpers.all;
9 use work.helpers.all;
10
11 -- 2 cycle LSU
12 -- We calculate the address in the first cycle
13
14 entity loadstore1 is
15 generic (
16 HAS_FPU : boolean := true;
17 -- Non-zero to enable log data collection
18 LOG_LENGTH : natural := 0
19 );
20 port (
21 clk : in std_ulogic;
22 rst : in std_ulogic;
23
24 l_in : in Execute1ToLoadstore1Type;
25 e_out : out Loadstore1ToExecute1Type;
26 l_out : out Loadstore1ToWritebackType;
27
28 d_out : out Loadstore1ToDcacheType;
29 d_in : in DcacheToLoadstore1Type;
30
31 m_out : out Loadstore1ToMmuType;
32 m_in : in MmuToLoadstore1Type;
33
34 dc_stall : in std_ulogic;
35
36 log_out : out std_ulogic_vector(9 downto 0)
37 );
38 end loadstore1;
39
40 -- Note, we don't currently use the stall output from the dcache because
41 -- we know it can take two requests without stalling when idle, we are
42 -- its only user, and we know it never stalls when idle.
43
44 architecture behave of loadstore1 is
45
46 -- State machine for unaligned loads/stores
47 type state_t is (IDLE, -- ready for instruction
48 SECOND_REQ, -- send 2nd request of unaligned xfer
49 ACK_WAIT, -- waiting for ack from dcache
50 MMU_LOOKUP, -- waiting for MMU to look up translation
51 TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
52 FINISH_LFS, -- write back converted SP data for lfs*
53 COMPLETE -- extra cycle to complete an operation
54 );
55
56 type byte_index_t is array(0 to 7) of unsigned(2 downto 0);
57 subtype byte_trim_t is std_ulogic_vector(1 downto 0);
58 type trim_ctl_t is array(0 to 7) of byte_trim_t;
59
60 type reg_stage_t is record
61 -- latch most of the input request
62 load : std_ulogic;
63 tlbie : std_ulogic;
64 dcbz : std_ulogic;
65 addr : std_ulogic_vector(63 downto 0);
66 store_data : std_ulogic_vector(63 downto 0);
67 load_data : std_ulogic_vector(63 downto 0);
68 instr_tag : instr_tag_t;
69 write_reg : gspr_index_t;
70 length : std_ulogic_vector(3 downto 0);
71 byte_reverse : std_ulogic;
72 byte_offset : unsigned(2 downto 0);
73 brev_mask : unsigned(2 downto 0);
74 sign_extend : std_ulogic;
75 update : std_ulogic;
76 xerc : xer_common_t;
77 reserve : std_ulogic;
78 atomic : std_ulogic;
79 atomic_last : std_ulogic;
80 rc : std_ulogic;
81 nc : std_ulogic; -- non-cacheable access
82 virt_mode : std_ulogic;
83 priv_mode : std_ulogic;
84 state : state_t;
85 dwords_done : std_ulogic;
86 last_dword : std_ulogic;
87 first_bytes : std_ulogic_vector(7 downto 0);
88 second_bytes : std_ulogic_vector(7 downto 0);
89 dar : std_ulogic_vector(63 downto 0);
90 dsisr : std_ulogic_vector(31 downto 0);
91 instr_fault : std_ulogic;
92 align_intr : std_ulogic;
93 sprval : std_ulogic_vector(63 downto 0);
94 busy : std_ulogic;
95 wait_dcache : std_ulogic;
96 wait_mmu : std_ulogic;
97 do_update : std_ulogic;
98 extra_cycle : std_ulogic;
99 mode_32bit : std_ulogic;
100 byte_index : byte_index_t;
101 use_second : std_ulogic_vector(7 downto 0);
102 trim_ctl : trim_ctl_t;
103 load_sp : std_ulogic;
104 ld_sp_data : std_ulogic_vector(31 downto 0);
105 ld_sp_nz : std_ulogic;
106 ld_sp_lz : std_ulogic_vector(5 downto 0);
107 wr_sel : std_ulogic_vector(1 downto 0);
108 interrupt : std_ulogic;
109 intr_vec : integer range 0 to 16#fff#;
110 nia : std_ulogic_vector(63 downto 0);
111 srr1 : std_ulogic_vector(15 downto 0);
112 end record;
113
114 signal r, rin : reg_stage_t;
115 signal lsu_sum : std_ulogic_vector(63 downto 0);
116
117 signal store_sp_data : std_ulogic_vector(31 downto 0);
118 signal load_dp_data : std_ulogic_vector(63 downto 0);
119
120 -- Generate byte enables from sizes
121 function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
122 begin
123 case length is
124 when "0001" =>
125 return "00000001";
126 when "0010" =>
127 return "00000011";
128 when "0100" =>
129 return "00001111";
130 when "1000" =>
131 return "11111111";
132 when others =>
133 return "00000000";
134 end case;
135 end function length_to_sel;
136
137 -- Calculate byte enables
138 -- This returns 16 bits, giving the select signals for two transfers,
139 -- to account for unaligned loads or stores
140 function xfer_data_sel(size : in std_logic_vector(3 downto 0);
141 address : in std_logic_vector(2 downto 0))
142 return std_ulogic_vector is
143 variable longsel : std_ulogic_vector(15 downto 0);
144 begin
145 longsel := "00000000" & length_to_sel(size);
146 return std_ulogic_vector(shift_left(unsigned(longsel),
147 to_integer(unsigned(address))));
148 end function xfer_data_sel;
149
150 -- 23-bit right shifter for DP -> SP float conversions
151 function shifter_23r(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
152 return std_ulogic_vector is
153 variable fs1 : std_ulogic_vector(22 downto 0);
154 variable fs2 : std_ulogic_vector(22 downto 0);
155 begin
156 case shift(1 downto 0) is
157 when "00" =>
158 fs1 := frac;
159 when "01" =>
160 fs1 := '0' & frac(22 downto 1);
161 when "10" =>
162 fs1 := "00" & frac(22 downto 2);
163 when others =>
164 fs1 := "000" & frac(22 downto 3);
165 end case;
166 case shift(4 downto 2) is
167 when "000" =>
168 fs2 := fs1;
169 when "001" =>
170 fs2 := x"0" & fs1(22 downto 4);
171 when "010" =>
172 fs2 := x"00" & fs1(22 downto 8);
173 when "011" =>
174 fs2 := x"000" & fs1(22 downto 12);
175 when "100" =>
176 fs2 := x"0000" & fs1(22 downto 16);
177 when others =>
178 fs2 := x"00000" & fs1(22 downto 20);
179 end case;
180 return fs2;
181 end;
182
183 -- 23-bit left shifter for SP -> DP float conversions
184 function shifter_23l(frac: std_ulogic_vector(22 downto 0); shift: unsigned(4 downto 0))
185 return std_ulogic_vector is
186 variable fs1 : std_ulogic_vector(22 downto 0);
187 variable fs2 : std_ulogic_vector(22 downto 0);
188 begin
189 case shift(1 downto 0) is
190 when "00" =>
191 fs1 := frac;
192 when "01" =>
193 fs1 := frac(21 downto 0) & '0';
194 when "10" =>
195 fs1 := frac(20 downto 0) & "00";
196 when others =>
197 fs1 := frac(19 downto 0) & "000";
198 end case;
199 case shift(4 downto 2) is
200 when "000" =>
201 fs2 := fs1;
202 when "001" =>
203 fs2 := fs1(18 downto 0) & x"0" ;
204 when "010" =>
205 fs2 := fs1(14 downto 0) & x"00";
206 when "011" =>
207 fs2 := fs1(10 downto 0) & x"000";
208 when "100" =>
209 fs2 := fs1(6 downto 0) & x"0000";
210 when others =>
211 fs2 := fs1(2 downto 0) & x"00000";
212 end case;
213 return fs2;
214 end;
215
216 begin
217 -- Calculate the address in the first cycle
218 lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
219
220 loadstore1_0: process(clk)
221 begin
222 if rising_edge(clk) then
223 if rst = '1' then
224 r.state <= IDLE;
225 r.busy <= '0';
226 r.do_update <= '0';
227 r.interrupt <= '0';
228 else
229 r <= rin;
230 end if;
231 end if;
232 end process;
233
234 ls_fp_conv: if HAS_FPU generate
235 -- Convert DP data to SP for stfs
236 dp_to_sp: process(all)
237 variable exp : unsigned(10 downto 0);
238 variable frac : std_ulogic_vector(22 downto 0);
239 variable shift : unsigned(4 downto 0);
240 begin
241 store_sp_data(31) <= l_in.data(63);
242 store_sp_data(30 downto 0) <= (others => '0');
243 exp := unsigned(l_in.data(62 downto 52));
244 if exp > 896 then
245 store_sp_data(30) <= l_in.data(62);
246 store_sp_data(29 downto 0) <= l_in.data(58 downto 29);
247 elsif exp >= 874 then
248 -- denormalization required
249 frac := '1' & l_in.data(51 downto 30);
250 shift := 0 - exp(4 downto 0);
251 store_sp_data(22 downto 0) <= shifter_23r(frac, shift);
252 end if;
253 end process;
254
255 -- Convert SP data to DP for lfs
256 sp_to_dp: process(all)
257 variable exp : unsigned(7 downto 0);
258 variable exp_dp : unsigned(10 downto 0);
259 variable exp_nz : std_ulogic;
260 variable exp_ao : std_ulogic;
261 variable frac : std_ulogic_vector(22 downto 0);
262 variable frac_shift : unsigned(4 downto 0);
263 begin
264 frac := r.ld_sp_data(22 downto 0);
265 exp := unsigned(r.ld_sp_data(30 downto 23));
266 exp_nz := or (r.ld_sp_data(30 downto 23));
267 exp_ao := and (r.ld_sp_data(30 downto 23));
268 frac_shift := (others => '0');
269 if exp_ao = '1' then
270 exp_dp := to_unsigned(2047, 11); -- infinity or NaN
271 elsif exp_nz = '1' then
272 exp_dp := 896 + resize(exp, 11); -- finite normalized value
273 elsif r.ld_sp_nz = '0' then
274 exp_dp := to_unsigned(0, 11); -- zero
275 else
276 -- denormalized SP operand, need to normalize
277 exp_dp := 896 - resize(unsigned(r.ld_sp_lz), 11);
278 frac_shift := unsigned(r.ld_sp_lz(4 downto 0)) + 1;
279 end if;
280 load_dp_data(63) <= r.ld_sp_data(31);
281 load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
282 load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
283 load_dp_data(28 downto 0) <= (others => '0');
284 end process;
285 end generate;
286
287 loadstore1_1: process(all)
288 variable v : reg_stage_t;
289 variable brev_lenm1 : unsigned(2 downto 0);
290 variable byte_offset : unsigned(2 downto 0);
291 variable j : integer;
292 variable k : unsigned(2 downto 0);
293 variable kk : unsigned(3 downto 0);
294 variable long_sel : std_ulogic_vector(15 downto 0);
295 variable byte_sel : std_ulogic_vector(7 downto 0);
296 variable req : std_ulogic;
297 variable busy : std_ulogic;
298 variable addr : std_ulogic_vector(63 downto 0);
299 variable maddr : std_ulogic_vector(63 downto 0);
300 variable wdata : std_ulogic_vector(63 downto 0);
301 variable write_enable : std_ulogic;
302 variable do_update : std_ulogic;
303 variable done : std_ulogic;
304 variable data_permuted : std_ulogic_vector(63 downto 0);
305 variable data_trimmed : std_ulogic_vector(63 downto 0);
306 variable store_data : std_ulogic_vector(63 downto 0);
307 variable byte_rev : std_ulogic;
308 variable length : std_ulogic_vector(3 downto 0);
309 variable negative : std_ulogic;
310 variable sprn : std_ulogic_vector(9 downto 0);
311 variable exception : std_ulogic;
312 variable next_addr : std_ulogic_vector(63 downto 0);
313 variable mmureq : std_ulogic;
314 variable dsisr : std_ulogic_vector(31 downto 0);
315 variable mmu_mtspr : std_ulogic;
316 variable itlb_fault : std_ulogic;
317 variable misaligned : std_ulogic;
318 begin
319 v := r;
320 req := '0';
321 mmu_mtspr := '0';
322 itlb_fault := '0';
323 sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
324 dsisr := (others => '0');
325 mmureq := '0';
326 v.wr_sel := "11";
327
328 write_enable := '0';
329
330 do_update := r.do_update;
331 v.do_update := '0';
332
333 -- load data formatting
334 -- shift and byte-reverse data bytes
335 for i in 0 to 7 loop
336 j := to_integer(r.byte_index(i)) * 8;
337 data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
338 end loop;
339
340 -- Work out the sign bit for sign extension.
341 -- For unaligned loads crossing two dwords, the sign bit is in the
342 -- first dword for big-endian (byte_reverse = 1), or the second dword
343 -- for little-endian.
344 if r.dwords_done = '1' and r.byte_reverse = '1' then
345 negative := (r.length(3) and r.load_data(63)) or
346 (r.length(2) and r.load_data(31)) or
347 (r.length(1) and r.load_data(15)) or
348 (r.length(0) and r.load_data(7));
349 else
350 negative := (r.length(3) and data_permuted(63)) or
351 (r.length(2) and data_permuted(31)) or
352 (r.length(1) and data_permuted(15)) or
353 (r.length(0) and data_permuted(7));
354 end if;
355
356 -- trim and sign-extend
357 for i in 0 to 7 loop
358 case r.trim_ctl(i) is
359 when "11" =>
360 data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
361 when "10" =>
362 data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
363 when "01" =>
364 data_trimmed(i * 8 + 7 downto i * 8) := (others => negative);
365 when others =>
366 data_trimmed(i * 8 + 7 downto i * 8) := x"00";
367 end case;
368 end loop;
369
370 if HAS_FPU then
371 -- Single-precision FP conversion for loads
372 v.ld_sp_data := data_trimmed(31 downto 0);
373 v.ld_sp_nz := or (data_trimmed(22 downto 0));
374 v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
375 end if;
376
377 -- Byte reversing and rotating for stores.
378 -- Done in the second cycle (the cycle after l_in.valid = 1).
379 for i in 0 to 7 loop
380 k := (to_unsigned(i, 3) - r.byte_offset) xor r.brev_mask;
381 j := to_integer(k) * 8;
382 store_data(i * 8 + 7 downto i * 8) := r.store_data(j + 7 downto j);
383 end loop;
384
385 -- compute (addr + 8) & ~7 for the second doubleword when unaligned
386 next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
387
388 -- Busy calculation.
389 -- We need to minimize the delay from clock to busy valid because it
390 -- gates the start of execution of the next instruction.
391 busy := r.busy and not ((r.wait_dcache and d_in.valid) or (r.wait_mmu and m_in.done));
392 v.busy := busy;
393
394 done := '0';
395 if r.state /= IDLE and busy = '0' then
396 done := '1';
397 end if;
398 exception := '0';
399
400 if r.dwords_done = '1' or r.state = SECOND_REQ then
401 addr := next_addr;
402 byte_sel := r.second_bytes;
403 else
404 addr := r.addr;
405 byte_sel := r.first_bytes;
406 end if;
407 if r.mode_32bit = '1' then
408 addr(63 downto 32) := (others => '0');
409 end if;
410 maddr := addr;
411
412 case r.state is
413 when IDLE =>
414
415 when SECOND_REQ =>
416 req := '1';
417 v.state := ACK_WAIT;
418 v.last_dword := '0';
419
420 when ACK_WAIT =>
421 -- r.wr_sel gets set one cycle after we come into ACK_WAIT state,
422 -- which is OK because the dcache always takes at least two cycles.
423 if r.update = '1' and r.load = '0' then
424 v.wr_sel := "01";
425 end if;
426 if d_in.error = '1' then
427 -- dcache will discard the second request if it
428 -- gets an error on the 1st of two requests
429 if d_in.cache_paradox = '1' then
430 -- signal an interrupt straight away
431 exception := '1';
432 dsisr(63 - 38) := not r.load;
433 -- XXX there is no architected bit for this
434 dsisr(63 - 35) := d_in.cache_paradox;
435 else
436 -- Look up the translation for TLB miss
437 -- and also for permission error and RC error
438 -- in case the PTE has been updated.
439 mmureq := '1';
440 v.state := MMU_LOOKUP;
441 end if;
442 end if;
443 if d_in.valid = '1' then
444 if r.last_dword = '0' then
445 v.dwords_done := '1';
446 v.last_dword := '1';
447 if r.load = '1' then
448 v.load_data := data_permuted;
449 end if;
450 else
451 write_enable := r.load and not r.load_sp;
452 if HAS_FPU and r.load_sp = '1' then
453 -- SP to DP conversion takes a cycle
454 v.wr_sel := "10";
455 v.state := FINISH_LFS;
456 elsif r.load = '0' then
457 -- stores write back rA update in this cycle
458 do_update := r.update;
459 end if;
460 v.busy := '0';
461 end if;
462 end if;
463 -- r.wait_dcache gets set one cycle after we come into ACK_WAIT state,
464 -- which is OK because the dcache always takes at least two cycles.
465 v.wait_dcache := r.last_dword and not r.extra_cycle;
466
467 when MMU_LOOKUP =>
468 if m_in.done = '1' then
469 if r.instr_fault = '0' then
470 -- retry the request now that the MMU has installed a TLB entry
471 req := '1';
472 if r.last_dword = '0' then
473 v.state := SECOND_REQ;
474 else
475 v.state := ACK_WAIT;
476 end if;
477 end if;
478 end if;
479 if m_in.err = '1' then
480 exception := '1';
481 dsisr(63 - 33) := m_in.invalid;
482 dsisr(63 - 36) := m_in.perm_error;
483 dsisr(63 - 38) := not r.load;
484 dsisr(63 - 44) := m_in.badtree;
485 dsisr(63 - 45) := m_in.rc_error;
486 end if;
487
488 when TLBIE_WAIT =>
489
490 when FINISH_LFS =>
491
492 when COMPLETE =>
493 exception := r.align_intr;
494
495 end case;
496
497 if done = '1' or exception = '1' then
498 v.state := IDLE;
499 v.busy := '0';
500 end if;
501
502 -- Note that l_in.valid is gated with busy inside execute1
503 if l_in.valid = '1' then
504 v.mode_32bit := l_in.mode_32bit;
505 v.load := '0';
506 v.dcbz := '0';
507 v.tlbie := '0';
508 v.instr_fault := '0';
509 v.align_intr := '0';
510 v.dwords_done := '0';
511 v.last_dword := '1';
512 v.instr_tag := l_in.instr_tag;
513 v.write_reg := l_in.write_reg;
514 v.length := l_in.length;
515 v.byte_reverse := l_in.byte_reverse;
516 v.sign_extend := l_in.sign_extend;
517 v.update := l_in.update;
518 v.xerc := l_in.xerc;
519 v.reserve := l_in.reserve;
520 v.rc := l_in.rc;
521 v.nc := l_in.ci;
522 v.virt_mode := l_in.virt_mode;
523 v.priv_mode := l_in.priv_mode;
524 v.load_sp := '0';
525 v.wait_dcache := '0';
526 v.wait_mmu := '0';
527 v.extra_cycle := '0';
528 v.nia := l_in.nia;
529 v.srr1 := (others => '0');
530
531 if HAS_FPU and l_in.is_32bit = '1' then
532 v.store_data := x"00000000" & store_sp_data;
533 else
534 v.store_data := l_in.data;
535 end if;
536
537 addr := lsu_sum;
538 if l_in.second = '1' then
539 -- second half of load with update does the update
540 if l_in.op = OP_LOAD and l_in.update = '1' then
541 v.do_update := '1';
542 else
543 -- for the second half of a 16-byte transfer, use next_addr
544 addr := next_addr;
545 end if;
546 end if;
547 if l_in.mode_32bit = '1' then
548 addr(63 downto 32) := (others => '0');
549 end if;
550 if v.do_update = '0' then
551 -- preserve previous r.addr for load with update
552 v.addr := addr;
553 end if;
554 maddr := l_in.addr2; -- address from RB for tlbie
555
556 -- XXX Temporary hack. Mark the op as non-cachable if the address
557 -- is the form 0xc------- for a real-mode access.
558 if addr(31 downto 28) = "1100" and l_in.virt_mode = '0' then
559 v.nc := '1';
560 end if;
561
562 if l_in.second = '0' then
563 -- Do length_to_sel and work out if we are doing 2 dwords
564 long_sel := xfer_data_sel(l_in.length, lsu_sum(2 downto 0));
565 byte_sel := long_sel(7 downto 0);
566 v.first_bytes := byte_sel;
567 v.second_bytes := long_sel(15 downto 8);
568 else
569 byte_sel := r.first_bytes;
570 long_sel := r.second_bytes & r.first_bytes;
571 end if;
572
573 -- check alignment for larx/stcx
574 misaligned := or (std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1) and addr(2 downto 0));
575 v.align_intr := l_in.reserve and misaligned;
576 if l_in.repeat = '1' and l_in.second = '0' and l_in.update = '0' and addr(3) = '1' then
577 -- length is really 16 not 8
578 -- Make misaligned lq cause an alignment interrupt in LE mode,
579 -- in order to avoid the case with RA = RT + 1 where the second half
580 -- faults but the first doesn't (and updates RT+1, destroying RA).
581 -- The equivalent BE case doesn't occur because RA = RT is illegal.
582 misaligned := '1';
583 if l_in.reserve = '1' or (l_in.op = OP_LOAD and l_in.byte_reverse = '0') then
584 v.align_intr := '1';
585 end if;
586 end if;
587
588 v.atomic := not misaligned;
589 v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
590
591 case l_in.op is
592 when OP_STORE =>
593 req := '1';
594 when OP_LOAD =>
595 v.load := '1';
596 if l_in.second = '1' and l_in.update = '1' then
597 v.wr_sel := "01";
598 v.state := COMPLETE;
599 else
600 req := '1';
601 if HAS_FPU and l_in.is_32bit = '1' then
602 -- Allow an extra cycle for SP->DP precision conversion
603 v.load_sp := '1';
604 v.extra_cycle := '1';
605 end if;
606 end if;
607 when OP_DCBZ =>
608 v.align_intr := v.nc;
609 req := '1';
610 v.dcbz := '1';
611 when OP_TLBIE =>
612 mmureq := '1';
613 v.tlbie := '1';
614 v.state := TLBIE_WAIT;
615 v.wait_mmu := '1';
616 when OP_MFSPR =>
617 v.wr_sel := "00";
618 -- partial decode on SPR number should be adequate given
619 -- the restricted set that get sent down this path
620 if sprn(9) = '0' and sprn(5) = '0' then
621 if sprn(0) = '0' then
622 v.sprval := x"00000000" & r.dsisr;
623 else
624 v.sprval := r.dar;
625 end if;
626 else
627 -- reading one of the SPRs in the MMU
628 v.sprval := m_in.sprval;
629 end if;
630 v.state := COMPLETE;
631 when OP_MTSPR =>
632 if sprn(9) = '0' and sprn(5) = '0' then
633 if sprn(0) = '0' then
634 v.dsisr := l_in.data(31 downto 0);
635 else
636 v.dar := l_in.data;
637 end if;
638 v.state := COMPLETE;
639 else
640 -- writing one of the SPRs in the MMU
641 mmu_mtspr := '1';
642 v.state := TLBIE_WAIT;
643 v.wait_mmu := '1';
644 end if;
645 when OP_FETCH_FAILED =>
646 -- send it to the MMU to do the radix walk
647 maddr := l_in.nia;
648 v.instr_fault := '1';
649 mmureq := '1';
650 v.state := MMU_LOOKUP;
651 v.wait_mmu := '1';
652 when others =>
653 assert false report "unknown op sent to loadstore1";
654 end case;
655
656 if req = '1' then
657 if v.align_intr = '1' then
658 v.state := COMPLETE;
659 elsif long_sel(15 downto 8) = "00000000" then
660 v.state := ACK_WAIT;
661 else
662 v.state := SECOND_REQ;
663 end if;
664 end if;
665
666 v.busy := req or mmureq or mmu_mtspr;
667 end if;
668
669 -- Work out controls for store formatting
670 if l_in.valid = '1' then
671 byte_offset := unsigned(lsu_sum(2 downto 0));
672 byte_rev := l_in.byte_reverse;
673 length := l_in.length;
674 brev_lenm1 := "000";
675 if byte_rev = '1' then
676 brev_lenm1 := unsigned(length(2 downto 0)) - 1;
677 end if;
678 v.byte_offset := byte_offset;
679 v.brev_mask := brev_lenm1;
680 end if;
681
682 -- Work out load formatter controls for next cycle
683 byte_offset := unsigned(v.addr(2 downto 0));
684 brev_lenm1 := "000";
685 if v.byte_reverse = '1' then
686 brev_lenm1 := unsigned(v.length(2 downto 0)) - 1;
687 end if;
688
689 for i in 0 to 7 loop
690 kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
691 v.use_second(i) := kk(3);
692 v.byte_index(i) := kk(2 downto 0);
693 end loop;
694
695 for i in 0 to 7 loop
696 if i < to_integer(unsigned(v.length)) then
697 if v.dwords_done = '1' then
698 v.trim_ctl(i) := '1' & not v.use_second(i);
699 else
700 v.trim_ctl(i) := "10";
701 end if;
702 else
703 v.trim_ctl(i) := '0' & v.sign_extend;
704 end if;
705 end loop;
706
707 -- generate DSI or DSegI for load/store exceptions
708 -- or ISI or ISegI for instruction fetch exceptions
709 v.interrupt := exception;
710 if exception = '1' then
711 if r.align_intr = '1' then
712 v.intr_vec := 16#600#;
713 v.dar := addr;
714 elsif r.instr_fault = '0' then
715 v.dar := addr;
716 if m_in.segerr = '0' then
717 v.intr_vec := 16#300#;
718 v.dsisr := dsisr;
719 else
720 v.intr_vec := 16#380#;
721 end if;
722 else
723 if m_in.segerr = '0' then
724 v.srr1(47 - 33) := m_in.invalid;
725 v.srr1(47 - 35) := m_in.perm_error; -- noexec fault
726 v.srr1(47 - 44) := m_in.badtree;
727 v.srr1(47 - 45) := m_in.rc_error;
728 v.intr_vec := 16#400#;
729 else
730 v.intr_vec := 16#480#;
731 end if;
732 end if;
733 end if;
734
735 -- Update outputs to dcache
736 d_out.valid <= req and not v.align_intr;
737 d_out.load <= v.load;
738 d_out.dcbz <= v.dcbz;
739 d_out.nc <= v.nc;
740 d_out.reserve <= v.reserve;
741 d_out.atomic <= v.atomic;
742 d_out.atomic_last <= v.atomic_last;
743 d_out.addr <= addr;
744 d_out.data <= store_data;
745 d_out.byte_sel <= byte_sel;
746 d_out.virt_mode <= v.virt_mode;
747 d_out.priv_mode <= v.priv_mode;
748 d_out.hold <= '0';
749
750 -- Update outputs to MMU
751 m_out.valid <= mmureq;
752 m_out.iside <= v.instr_fault;
753 m_out.load <= r.load;
754 m_out.priv <= r.priv_mode;
755 m_out.tlbie <= v.tlbie;
756 m_out.mtspr <= mmu_mtspr;
757 m_out.sprn <= sprn;
758 m_out.addr <= maddr;
759 m_out.slbia <= l_in.insn(7);
760 m_out.rs <= l_in.data;
761
762 -- Update outputs to writeback
763 -- Multiplex either cache data to the destination GPR or
764 -- the address for the rA update.
765 l_out.valid <= done;
766 l_out.instr_tag <= r.instr_tag;
767 l_out.write_reg <= r.write_reg;
768 case r.wr_sel is
769 when "00" =>
770 l_out.write_enable <= '1';
771 l_out.write_data <= r.sprval;
772 when "01" =>
773 l_out.write_enable <= do_update;
774 l_out.write_data <= r.addr;
775 when "10" =>
776 l_out.write_enable <= '1';
777 l_out.write_data <= load_dp_data;
778 when others =>
779 l_out.write_enable <= write_enable;
780 l_out.write_data <= data_trimmed;
781 end case;
782 l_out.xerc <= r.xerc;
783 l_out.rc <= r.rc and done;
784 l_out.store_done <= d_in.store_done;
785 l_out.interrupt <= r.interrupt;
786 l_out.intr_vec <= r.intr_vec;
787 l_out.srr0 <= r.nia;
788 l_out.srr1 <= r.srr1;
789
790 -- update busy signal back to execute1
791 e_out.busy <= busy;
792
793 -- Update registers
794 rin <= v;
795
796 end process;
797
798 l1_log: if LOG_LENGTH > 0 generate
799 signal log_data : std_ulogic_vector(9 downto 0);
800 begin
801 ls1_log: process(clk)
802 begin
803 if rising_edge(clk) then
804 log_data <= e_out.busy &
805 l_out.interrupt &
806 l_out.valid &
807 m_out.valid &
808 d_out.valid &
809 m_in.done &
810 r.dwords_done &
811 std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
812 end if;
813 end process;
814 log_out <= log_data;
815 end generate;
816
817 end;