2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
6 use work.decode_types.all;
10 -- We calculate the address in the first cycle
14 -- Non-zero to enable log data collection
15 LOG_LENGTH : natural := 0
21 l_in : in Execute1ToLoadstore1Type;
22 e_out : out Loadstore1ToExecute1Type;
23 l_out : out Loadstore1ToWritebackType;
25 d_out : out Loadstore1ToDcacheType;
26 d_in : in DcacheToLoadstore1Type;
28 m_out : out Loadstore1ToMmuType;
29 m_in : in MmuToLoadstore1Type;
31 dc_stall : in std_ulogic;
33 log_out : out std_ulogic_vector(9 downto 0)
37 -- Note, we don't currently use the stall output from the dcache because
38 -- we know it can take two requests without stalling when idle, we are
39 -- its only user, and we know it never stalls when idle.
41 architecture behave of loadstore1 is
43 -- State machine for unaligned loads/stores
44 type state_t is (IDLE, -- ready for instruction
45 SECOND_REQ, -- send 2nd request of unaligned xfer
46 ACK_WAIT, -- waiting for ack from dcache
47 MMU_LOOKUP, -- waiting for MMU to look up translation
48 TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
49 COMPLETE -- extra cycle to complete an operation
52 type reg_stage_t is record
53 -- latch most of the input request
58 addr : std_ulogic_vector(63 downto 0);
59 store_data : std_ulogic_vector(63 downto 0);
60 load_data : std_ulogic_vector(63 downto 0);
61 write_reg : gpr_index_t;
62 length : std_ulogic_vector(3 downto 0);
63 byte_reverse : std_ulogic;
64 sign_extend : std_ulogic;
66 update_reg : gpr_index_t;
70 nc : std_ulogic; -- non-cacheable access
71 virt_mode : std_ulogic;
72 priv_mode : std_ulogic;
74 dwords_done : std_ulogic;
75 last_dword : std_ulogic;
76 first_bytes : std_ulogic_vector(7 downto 0);
77 second_bytes : std_ulogic_vector(7 downto 0);
78 dar : std_ulogic_vector(63 downto 0);
79 dsisr : std_ulogic_vector(31 downto 0);
80 instr_fault : std_ulogic;
81 sprval : std_ulogic_vector(63 downto 0);
83 wait_dcache : std_ulogic;
84 wait_mmu : std_ulogic;
85 do_update : std_ulogic;
86 extra_cycle : std_ulogic;
89 type byte_sel_t is array(0 to 7) of std_ulogic;
90 subtype byte_trim_t is std_ulogic_vector(1 downto 0);
91 type trim_ctl_t is array(0 to 7) of byte_trim_t;
93 signal r, rin : reg_stage_t;
94 signal lsu_sum : std_ulogic_vector(63 downto 0);
96 -- Generate byte enables from sizes
97 function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
111 end function length_to_sel;
113 -- Calculate byte enables
114 -- This returns 16 bits, giving the select signals for two transfers,
115 -- to account for unaligned loads or stores
116 function xfer_data_sel(size : in std_logic_vector(3 downto 0);
117 address : in std_logic_vector(2 downto 0))
118 return std_ulogic_vector is
119 variable longsel : std_ulogic_vector(15 downto 0);
121 longsel := "00000000" & length_to_sel(size);
122 return std_ulogic_vector(shift_left(unsigned(longsel),
123 to_integer(unsigned(address))));
124 end function xfer_data_sel;
127 -- Calculate the address in the first cycle
128 lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
130 loadstore1_0: process(clk)
132 if rising_edge(clk) then
143 loadstore1_1: process(all)
144 variable v : reg_stage_t;
145 variable brev_lenm1 : unsigned(2 downto 0);
146 variable byte_offset : unsigned(2 downto 0);
147 variable j : integer;
148 variable k : unsigned(2 downto 0);
149 variable kk : unsigned(3 downto 0);
150 variable long_sel : std_ulogic_vector(15 downto 0);
151 variable byte_sel : std_ulogic_vector(7 downto 0);
152 variable req : std_ulogic;
153 variable busy : std_ulogic;
154 variable addr : std_ulogic_vector(63 downto 0);
155 variable maddr : std_ulogic_vector(63 downto 0);
156 variable wdata : std_ulogic_vector(63 downto 0);
157 variable write_enable : std_ulogic;
158 variable do_update : std_ulogic;
159 variable done : std_ulogic;
160 variable data_permuted : std_ulogic_vector(63 downto 0);
161 variable data_trimmed : std_ulogic_vector(63 downto 0);
162 variable store_data : std_ulogic_vector(63 downto 0);
163 variable use_second : byte_sel_t;
164 variable trim_ctl : trim_ctl_t;
165 variable negative : std_ulogic;
166 variable sprn : std_ulogic_vector(9 downto 0);
167 variable exception : std_ulogic;
168 variable next_addr : std_ulogic_vector(63 downto 0);
169 variable mmureq : std_ulogic;
170 variable dsisr : std_ulogic_vector(31 downto 0);
171 variable mmu_mtspr : std_ulogic;
172 variable itlb_fault : std_ulogic;
179 sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
180 dsisr := (others => '0');
185 do_update := r.do_update;
188 -- load data formatting
189 byte_offset := unsigned(r.addr(2 downto 0));
191 if r.byte_reverse = '1' then
192 brev_lenm1 := unsigned(r.length(2 downto 0)) - 1;
195 -- shift and byte-reverse data bytes
197 kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
198 use_second(i) := kk(3);
199 j := to_integer(kk(2 downto 0)) * 8;
200 data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
203 -- Work out the sign bit for sign extension.
204 -- For unaligned loads crossing two dwords, the sign bit is in the
205 -- first dword for big-endian (byte_reverse = 1), or the second dword
206 -- for little-endian.
207 if r.dwords_done = '1' and r.byte_reverse = '1' then
208 negative := (r.length(3) and r.load_data(63)) or
209 (r.length(2) and r.load_data(31)) or
210 (r.length(1) and r.load_data(15)) or
211 (r.length(0) and r.load_data(7));
213 negative := (r.length(3) and data_permuted(63)) or
214 (r.length(2) and data_permuted(31)) or
215 (r.length(1) and data_permuted(15)) or
216 (r.length(0) and data_permuted(7));
219 -- trim and sign-extend
221 if i < to_integer(unsigned(r.length)) then
222 if r.dwords_done = '1' then
223 trim_ctl(i) := '1' & not use_second(i);
228 trim_ctl(i) := '0' & (negative and r.sign_extend);
232 data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
234 data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
236 data_trimmed(i * 8 + 7 downto i * 8) := x"FF";
238 data_trimmed(i * 8 + 7 downto i * 8) := x"00";
242 -- Byte reversing and rotating for stores
243 -- Done in the first cycle (when l_in.valid = 1)
244 store_data := r.store_data;
245 if l_in.valid = '1' then
246 byte_offset := unsigned(lsu_sum(2 downto 0));
248 if l_in.byte_reverse = '1' then
249 brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
252 k := (to_unsigned(i, 3) - byte_offset) xor brev_lenm1;
253 j := to_integer(k) * 8;
254 store_data(i * 8 + 7 downto i * 8) := l_in.data(j + 7 downto j);
257 v.store_data := store_data;
259 -- compute (addr + 8) & ~7 for the second doubleword when unaligned
260 next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
263 -- We need to minimize the delay from clock to busy valid because it
264 -- gates the start of execution of the next instruction.
265 busy := r.busy and not ((r.wait_dcache and d_in.valid) or (r.wait_mmu and m_in.done));
269 if r.state /= IDLE and busy = '0' then
274 if r.dwords_done = '1' or r.state = SECOND_REQ then
276 byte_sel := r.second_bytes;
279 byte_sel := r.first_bytes;
292 if d_in.error = '1' then
293 -- dcache will discard the second request if it
294 -- gets an error on the 1st of two requests
295 if d_in.cache_paradox = '1' then
296 -- signal an interrupt straight away
298 dsisr(63 - 38) := not r.load;
299 -- XXX there is no architected bit for this
300 dsisr(63 - 35) := d_in.cache_paradox;
302 -- Look up the translation for TLB miss
303 -- and also for permission error and RC error
304 -- in case the PTE has been updated.
306 v.state := MMU_LOOKUP;
309 if d_in.valid = '1' then
310 if r.last_dword = '0' then
311 v.dwords_done := '1';
314 v.load_data := data_permuted;
317 write_enable := r.load;
318 if r.extra_cycle = '1' then
319 -- loads with rA update need an extra cycle
321 v.do_update := r.update;
323 -- stores write back rA update in this cycle
324 do_update := r.update;
329 -- r.wait_dcache gets set one cycle after we come into ACK_WAIT state,
330 -- which is OK because the dcache always takes at least two cycles.
331 v.wait_dcache := r.last_dword and not r.extra_cycle;
334 if m_in.done = '1' then
335 if r.instr_fault = '0' then
336 -- retry the request now that the MMU has installed a TLB entry
338 if r.last_dword = '0' then
339 v.state := SECOND_REQ;
345 if m_in.err = '1' then
347 dsisr(63 - 33) := m_in.invalid;
348 dsisr(63 - 36) := m_in.perm_error;
349 dsisr(63 - 38) := not r.load;
350 dsisr(63 - 44) := m_in.badtree;
351 dsisr(63 - 45) := m_in.rc_error;
360 if done = '1' or exception = '1' then
365 -- Note that l_in.valid is gated with busy inside execute1
366 if l_in.valid = '1' then
371 v.instr_fault := '0';
372 v.dwords_done := '0';
374 v.write_reg := l_in.write_reg;
375 v.length := l_in.length;
376 v.byte_reverse := l_in.byte_reverse;
377 v.sign_extend := l_in.sign_extend;
378 v.update := l_in.update;
379 v.update_reg := l_in.update_reg;
381 v.reserve := l_in.reserve;
384 v.virt_mode := l_in.virt_mode;
385 v.priv_mode := l_in.priv_mode;
386 v.wait_dcache := '0';
389 v.extra_cycle := '0';
392 maddr := l_in.addr2; -- address from RB for tlbie
394 -- XXX Temporary hack. Mark the op as non-cachable if the address
395 -- is the form 0xc------- for a real-mode access.
396 if lsu_sum(31 downto 28) = "1100" and l_in.virt_mode = '0' then
400 -- Do length_to_sel and work out if we are doing 2 dwords
401 long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0));
402 byte_sel := long_sel(7 downto 0);
403 v.first_bytes := byte_sel;
404 v.second_bytes := long_sel(15 downto 8);
412 -- Allow an extra cycle for RA update on loads
413 v.extra_cycle := l_in.update;
420 v.state := TLBIE_WAIT;
424 -- partial decode on SPR number should be adequate given
425 -- the restricted set that get sent down this path
426 if sprn(9) = '0' and sprn(5) = '0' then
427 if sprn(0) = '0' then
428 v.sprval := x"00000000" & r.dsisr;
433 -- reading one of the SPRs in the MMU
434 v.sprval := m_in.sprval;
438 if sprn(9) = '0' and sprn(5) = '0' then
439 if sprn(0) = '0' then
440 v.dsisr := l_in.data(31 downto 0);
446 -- writing one of the SPRs in the MMU
448 v.state := TLBIE_WAIT;
451 when OP_FETCH_FAILED =>
452 -- send it to the MMU to do the radix walk
454 v.instr_fault := '1';
456 v.state := MMU_LOOKUP;
459 assert false report "unknown op sent to loadstore1";
463 if long_sel(15 downto 8) = "00000000" then
466 v.state := SECOND_REQ;
470 v.busy := req or mmureq or mmu_mtspr;
473 -- Update outputs to dcache
475 d_out.load <= v.load;
476 d_out.dcbz <= v.dcbz;
478 d_out.reserve <= v.reserve;
480 d_out.data <= store_data;
481 d_out.byte_sel <= byte_sel;
482 d_out.virt_mode <= v.virt_mode;
483 d_out.priv_mode <= v.priv_mode;
485 -- Update outputs to MMU
486 m_out.valid <= mmureq;
487 m_out.iside <= v.instr_fault;
488 m_out.load <= r.load;
489 m_out.priv <= r.priv_mode;
490 m_out.tlbie <= v.tlbie;
491 m_out.mtspr <= mmu_mtspr;
494 m_out.slbia <= l_in.insn(7);
495 m_out.rs <= l_in.data;
497 -- Update outputs to writeback
498 -- Multiplex either cache data to the destination GPR or
499 -- the address for the rA update.
501 if r.mfspr = '1' then
502 l_out.write_enable <= '1';
503 l_out.write_reg <= r.write_reg;
504 l_out.write_data <= r.sprval;
505 elsif do_update = '1' then
506 l_out.write_enable <= '1';
507 l_out.write_reg <= r.update_reg;
508 l_out.write_data <= r.addr;
510 l_out.write_enable <= write_enable;
511 l_out.write_reg <= r.write_reg;
512 l_out.write_data <= data_trimmed;
514 l_out.xerc <= r.xerc;
515 l_out.rc <= r.rc and done;
516 l_out.store_done <= d_in.store_done;
518 -- update exception info back to execute1
520 e_out.exception <= exception;
521 e_out.instr_fault <= r.instr_fault;
522 e_out.invalid <= m_in.invalid;
523 e_out.badtree <= m_in.badtree;
524 e_out.perm_error <= m_in.perm_error;
525 e_out.rc_error <= m_in.rc_error;
526 e_out.segment_fault <= m_in.segerr;
527 if exception = '1' and r.instr_fault = '0' then
529 if m_in.segerr = '0' then
539 l1_log: if LOG_LENGTH > 0 generate
540 signal log_data : std_ulogic_vector(9 downto 0);
542 ls1_log: process(clk)
544 if rising_edge(clk) then
545 log_data <= e_out.busy &
552 std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));