2 -- Set associative icache
4 -- TODO (in no specific order):
6 -- * Add debug interface to inspect cache content
7 -- * Add multi-hit error detection
8 -- * Maybe add parity ? There's a few bits free in each BRAM row on Xilinx
9 -- * Add optimization: service hits on partially loaded lines
10 -- * Add optimization: (maybe) interrupt reload on fluch/redirect
11 -- * Check if playing with the geometry of the cache tags allow for more
12 -- efficient use of distributed RAM and less logic/muxes. Currently we
13 -- write TAG_BITS width which may not match full ram blocks and might
14 -- cause muxes to be inferred for "partial writes".
17 use ieee.std_logic_1164.all;
18 use ieee.numeric_std.all;
23 use work.decode_types.all;
24 use work.wishbone_types.all;
26 -- 64 bit direct mapped icache. All instructions are 4B aligned.
30 SIM : boolean := false;
31 HAS_FPU : boolean := true;
33 LINE_SIZE : positive := 64;
34 -- BRAM organisation: We never access more than wishbone_data_bits at
35 -- a time so to save resources we make the array only that wide, and
36 -- use consecutive indices for to make a cache "line"
38 -- ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
39 ROW_SIZE : positive := wishbone_data_bits / 8;
40 -- Number of lines in a set
41 NUM_LINES : positive := 32;
43 NUM_WAYS : positive := 4;
44 -- L1 ITLB number of entries (direct mapped)
45 TLB_SIZE : positive := 64;
46 -- L1 ITLB log_2(page_size)
47 TLB_LG_PGSZ : positive := 12;
48 -- Non-zero to enable log data collection
49 LOG_LENGTH : natural := 0
55 i_in : in Fetch1ToIcacheType;
56 i_out : out IcacheToDecode1Type;
58 m_in : in MmuToIcacheType;
60 stall_in : in std_ulogic;
61 stall_out : out std_ulogic;
62 flush_in : in std_ulogic;
63 inval_in : in std_ulogic;
65 wishbone_out : out wishbone_master_out;
66 wishbone_in : in wishbone_slave_out;
68 wb_snoop_in : in wishbone_master_out := wishbone_master_out_init;
70 events : out IcacheEventType;
71 log_out : out std_ulogic_vector(57 downto 0)
75 architecture rtl of icache is
76 constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
77 -- ROW_PER_LINE is the number of row (wishbone transactions) in a line
78 constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
79 -- BRAM_ROWS is the number of rows in BRAM needed to represent the full
81 constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
82 -- INSN_PER_ROW is the number of 32bit instructions per BRAM row
83 constant INSN_PER_ROW : natural := ROW_SIZE_BITS / 32;
84 -- Bit fields counts in the address
86 -- INSN_BITS is the number of bits to select an instruction in a row
87 constant INSN_BITS : natural := log2(INSN_PER_ROW);
88 -- ROW_BITS is the number of bits to select a row
89 constant ROW_BITS : natural := log2(BRAM_ROWS);
90 -- ROW_LINEBITS is the number of bits to select a row within a line
91 constant ROW_LINEBITS : natural := log2(ROW_PER_LINE);
92 -- LINE_OFF_BITS is the number of bits for the offset in a cache line
93 constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
94 -- ROW_OFF_BITS is the number of bits for the offset in a row
95 constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
96 -- INDEX_BITS is the number of bits to select a cache line
97 constant INDEX_BITS : natural := log2(NUM_LINES);
98 -- SET_SIZE_BITS is the log base 2 of the set size
99 constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
100 -- TAG_BITS is the number of bits of the tag part of the address
101 -- the +1 is to allow the endianness to be stored in the tag
102 constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS + 1;
103 -- WAY_BITS is the number of bits to select a way
104 -- Make sure this is at least 1, to avoid 0-element vectors
105 constant WAY_BITS : natural := maximum(log2(NUM_WAYS), 1);
107 -- Example of layout for 32 lines of 64 bytes:
109 -- .. tag |index| line |
111 -- .. | | | |00| zero (2)
112 -- .. | | |-| | INSN_BITS (1)
113 -- .. | |---| | ROW_LINEBITS (3)
114 -- .. | |--- - --| LINE_OFF_BITS (6)
115 -- .. | |- --| ROW_OFF_BITS (3)
116 -- .. |----- ---| | ROW_BITS (8)
117 -- .. |-----| | INDEX_BITS (5)
118 -- .. --------| | TAG_BITS (53)
120 subtype row_t is unsigned(ROW_BITS-1 downto 0);
121 subtype index_t is integer range 0 to NUM_LINES-1;
122 subtype index_sig_t is unsigned(INDEX_BITS-1 downto 0);
123 subtype way_t is integer range 0 to NUM_WAYS-1;
124 subtype way_sig_t is unsigned(WAY_BITS-1 downto 0);
125 subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
127 -- We store a pre-decoded 10-bit insn_code along with the bottom 26 bits of
128 -- each instruction, giving a total of 36 bits per instruction, which
129 -- fits neatly into the block RAMs available on FPGAs.
130 -- For illegal instructions, the top 4 bits are ones and the bottom 6 bits
131 -- are the instruction's primary opcode, so we have the whole instruction
132 -- word available (e.g. to put in HEIR). For other instructions, the
133 -- primary opcode is not stored but could be determined from the insn_code.
134 constant PREDECODE_BITS : natural := 10;
135 constant INSN_IMAGE_BITS : natural := 26;
136 constant ICWORDLEN : natural := PREDECODE_BITS + INSN_IMAGE_BITS;
137 constant ROW_WIDTH : natural := INSN_PER_ROW * ICWORDLEN;
139 -- The cache data BRAM organized as described above for each way
140 subtype cache_row_t is std_ulogic_vector(ROW_WIDTH-1 downto 0);
142 -- The cache tags LUTRAM has a row per set. Vivado is a pain and will
143 -- not handle a clean (commented) definition of the cache tags as a 3d
144 -- memory. For now, work around it by putting all the tags
145 subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
146 -- type cache_tags_set_t is array(way_t) of cache_tag_t;
147 -- type cache_tags_array_t is array(index_t) of cache_tags_set_t;
148 constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
149 subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
150 type cache_tags_array_t is array(index_t) of cache_tags_set_t;
152 -- The cache valid bits
153 subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
154 type cache_valids_t is array(index_t) of cache_way_valids_t;
155 type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
157 -- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
158 signal cache_tags : cache_tags_array_t;
159 signal cache_valids : cache_valids_t;
161 attribute ram_style : string;
162 attribute ram_style of cache_tags : signal is "distributed";
165 constant TLB_BITS : natural := log2(TLB_SIZE);
166 constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
167 constant TLB_PTE_BITS : natural := 64;
169 subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
170 type tlb_valids_t is array(tlb_index_t) of std_ulogic;
171 subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
172 type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
173 subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
174 type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
176 signal itlb_valids : tlb_valids_t;
177 signal itlb_tags : tlb_tags_t;
178 signal itlb_ptes : tlb_ptes_t;
179 attribute ram_style of itlb_tags : signal is "distributed";
180 attribute ram_style of itlb_ptes : signal is "distributed";
182 -- Privilege bit from PTE EAA field
183 signal eaa_priv : std_ulogic;
185 -- Cache reload state machine
186 type state_t is (IDLE, STOP_RELOAD, CLR_TAG, WAIT_ACK);
188 type reg_internal_t is record
189 -- Cache hit state (Latches for 1 cycle BRAM access)
191 hit_nia : std_ulogic_vector(63 downto 0);
192 hit_smark : std_ulogic;
193 hit_valid : std_ulogic;
194 big_endian: std_ulogic;
195 predicted : std_ulogic;
196 pred_ntaken: std_ulogic;
198 -- Cache miss state (reload state machine)
200 wb : wishbone_master_out;
201 store_way : way_sig_t;
202 store_index : index_sig_t;
204 recv_valid : std_ulogic;
206 store_tag : cache_tag_t;
207 store_valid : std_ulogic;
208 end_row_ix : row_in_line_t;
209 rows_valid : row_per_line_valid_t;
212 fetch_failed : std_ulogic;
215 signal r : reg_internal_t;
217 signal ev : IcacheEventType;
219 -- Async signals on incoming request
220 signal req_index : index_sig_t;
221 signal req_row : row_t;
222 signal req_hit_way : way_sig_t;
223 signal req_tag : cache_tag_t;
224 signal req_is_hit : std_ulogic;
225 signal req_is_miss : std_ulogic;
226 signal req_raddr : real_addr_t;
228 signal real_addr : real_addr_t;
229 signal ra_valid : std_ulogic;
230 signal priv_fault : std_ulogic;
231 signal access_ok : std_ulogic;
233 -- Cache RAM interface
234 type cache_ram_out_t is array(way_t) of cache_row_t;
235 signal cache_out : cache_ram_out_t;
236 signal cache_wr_data : std_ulogic_vector(ROW_WIDTH - 1 downto 0);
237 signal wb_rd_data : std_ulogic_vector(ROW_SIZE_BITS - 1 downto 0);
239 -- PLRU output interface
240 signal plru_victim : way_sig_t;
242 -- Memory write snoop signals
243 signal snoop_valid : std_ulogic;
244 signal snoop_index : index_sig_t;
245 signal snoop_hits : cache_way_valids_t;
247 signal log_insn : std_ulogic_vector(35 downto 0);
249 -- Return the cache line index (tag index) for an address
250 function get_index(addr: std_ulogic_vector) return index_sig_t is
252 return unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS));
255 -- Return the cache row index (data memory) for an address
256 function get_row(addr: std_ulogic_vector) return row_t is
258 return unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS));
261 -- Return the index of a row within a line
262 function get_row_of_line(row: row_t) return row_in_line_t is
264 return row(ROW_LINEBITS-1 downto 0);
267 -- Returns whether this is the last row of a line
268 function is_last_row_wb_addr(wb_addr: wishbone_addr_type; last: row_in_line_t) return boolean is
270 return unsigned(wb_addr(LINE_OFF_BITS - ROW_OFF_BITS - 1 downto 0)) = last;
273 -- Returns whether this is the last row of a line
274 function is_last_row(row: row_t; last: row_in_line_t) return boolean is
276 return get_row_of_line(row) = last;
279 -- Return the address of the next row in the current cache line
280 function next_row_wb_addr(wb_addr: wishbone_addr_type)
281 return std_ulogic_vector is
282 variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
283 variable result : wishbone_addr_type;
285 -- Is there no simpler way in VHDL to generate that 3 bits adder ?
286 row_idx := wb_addr(ROW_LINEBITS - 1 downto 0);
287 row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
289 result(ROW_LINEBITS - 1 downto 0) := row_idx;
293 -- Return the next row in the current cache line. We use a dedicated
294 -- function in order to limit the size of the generated adder to be
295 -- only the bits within a cache line (3 bits with default settings)
297 function next_row(row: row_t) return row_t is
298 variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
299 variable row_idx : unsigned(ROW_LINEBITS-1 downto 0);
300 variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
302 row_v := std_ulogic_vector(row);
303 row_idx := row(ROW_LINEBITS-1 downto 0);
304 row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(row_idx + 1);
305 return unsigned(row_v);
308 -- Read the instruction word for the given address in the current cache row
309 function read_insn_word(addr: std_ulogic_vector(63 downto 0);
310 data: cache_row_t) return std_ulogic_vector is
311 variable word: integer range 0 to INSN_PER_ROW-1;
313 assert not is_X(addr) severity failure;
314 word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
315 return data(word * ICWORDLEN + ICWORDLEN - 1 downto word * ICWORDLEN);
318 -- Get the tag value from the address
319 function get_tag(addr: real_addr_t; endian: std_ulogic) return cache_tag_t is
321 return endian & addr(addr'left downto SET_SIZE_BITS);
324 -- Read a tag from a tag memory row
325 function read_tag(way: way_t; tagset: cache_tags_set_t) return cache_tag_t is
327 return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
330 -- Write a tag to tag memory row
331 procedure write_tag(way: in way_t; tagset: inout cache_tags_set_t;
334 tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
337 -- Simple hash for direct-mapped TLB index
338 function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
339 variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
341 hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
342 xor addr(TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto TLB_LG_PGSZ + TLB_BITS)
343 xor addr(TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto TLB_LG_PGSZ + 2 * TLB_BITS);
349 -- byte-swap read data if big endian
353 if r.store_tag(TAG_BITS - 1) = '0' then
354 wb_rd_data <= wishbone_in.dat;
356 for ii in 0 to (wishbone_in.dat'length / 8) - 1 loop
357 j := ((ii / 4) * 4) + (3 - (ii mod 4));
358 wb_rd_data(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8);
363 predecoder_0: entity work.predecoder
366 WIDTH => INSN_PER_ROW,
367 ICODE_LEN => PREDECODE_BITS,
368 IMAGE_LEN => INSN_IMAGE_BITS
372 valid_in => wishbone_in.ack,
373 insns_in => wb_rd_data,
374 icodes_out => cache_wr_data
377 assert LINE_SIZE mod ROW_SIZE = 0;
378 assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE;
379 assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE;
380 assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2" severity FAILURE;
381 assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2" severity FAILURE;
382 assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
383 report "geometry bits don't add up" severity FAILURE;
384 assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
385 report "geometry bits don't add up" severity FAILURE;
386 assert (REAL_ADDR_BITS + 1 = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
387 report "geometry bits don't add up" severity FAILURE;
388 assert (REAL_ADDR_BITS + 1 = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
389 report "geometry bits don't add up" severity FAILURE;
391 sim_debug: if SIM generate
394 report "ROW_SIZE = " & natural'image(ROW_SIZE);
395 report "ROW_PER_LINE = " & natural'image(ROW_PER_LINE);
396 report "BRAM_ROWS = " & natural'image(BRAM_ROWS);
397 report "INSN_PER_ROW = " & natural'image(INSN_PER_ROW);
398 report "INSN_BITS = " & natural'image(INSN_BITS);
399 report "ROW_BITS = " & natural'image(ROW_BITS);
400 report "ROW_LINEBITS = " & natural'image(ROW_LINEBITS);
401 report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
402 report "ROW_OFF_BITS = " & natural'image(ROW_OFF_BITS);
403 report "INDEX_BITS = " & natural'image(INDEX_BITS);
404 report "TAG_BITS = " & natural'image(TAG_BITS);
405 report "WAY_BITS = " & natural'image(WAY_BITS);
410 -- Generate a cache RAM for each way
411 rams: for i in 0 to NUM_WAYS-1 generate
412 signal do_read : std_ulogic;
413 signal do_write : std_ulogic;
414 signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
415 signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
416 signal dout : cache_row_t;
417 signal wr_sel : std_ulogic_vector(0 downto 0);
419 way: entity work.cache_ram
421 ROW_BITS => ROW_BITS,
432 wr_data => cache_wr_data
436 do_read <= not stall_in;
438 if r.recv_valid = '1' and r.store_way = to_unsigned(i, WAY_BITS) then
441 cache_out(i) <= dout;
442 rd_addr <= std_ulogic_vector(req_row);
443 wr_addr <= std_ulogic_vector(r.store_row);
444 wr_sel(0) <= do_write;
449 maybe_plrus: if NUM_WAYS > 1 generate
450 type plru_array is array(index_t) of std_ulogic_vector(NUM_WAYS - 2 downto 0);
451 signal plru_ram : plru_array;
452 signal plru_cur : std_ulogic_vector(NUM_WAYS - 2 downto 0);
453 signal plru_upd : std_ulogic_vector(NUM_WAYS - 2 downto 0);
454 signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
455 signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
457 plru : entity work.plrufn
464 tree_out => plru_upd,
470 -- Read PLRU bits from array
471 if is_X(r.hit_nia) then
472 plru_cur <= (others => 'X');
474 plru_cur <= plru_ram(to_integer(get_index(r.hit_nia)));
478 plru_acc <= std_ulogic_vector(r.hit_way);
479 plru_victim <= unsigned(plru_out);
482 -- synchronous writes to PLRU array
485 if rising_edge(clk) then
486 if r.hit_valid = '1' then
487 assert not is_X(r.hit_nia) severity failure;
488 plru_ram(to_integer(get_index(r.hit_nia))) <= plru_upd;
494 -- TLB hit detection and real address generation
495 itlb_lookup : process(all)
496 variable pte : tlb_pte_t;
497 variable ttag : tlb_tag_t;
498 variable tlb_req_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
500 tlb_req_index := hash_ea(i_in.nia);
501 if is_X(tlb_req_index) then
502 pte := (others => 'X');
503 ttag := (others => 'X');
505 pte := itlb_ptes(to_integer(unsigned(tlb_req_index)));
506 ttag := itlb_tags(to_integer(unsigned(tlb_req_index)));
508 if i_in.virt_mode = '1' then
509 real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
510 i_in.nia(TLB_LG_PGSZ - 1 downto 0);
511 if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
512 if is_X(tlb_req_index) then
515 ra_valid <= itlb_valids(to_integer(unsigned(tlb_req_index)));
522 real_addr <= addr_to_real(i_in.nia);
527 -- no IAMR, so no KUEP support for now
528 priv_fault <= eaa_priv and not i_in.priv_mode;
529 access_ok <= ra_valid and not priv_fault;
533 itlb_update: process(clk)
534 variable wr_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
536 if rising_edge(clk) then
537 wr_index := hash_ea(m_in.addr);
538 if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
539 -- clear all valid bits
540 for i in tlb_index_t loop
541 itlb_valids(i) <= '0';
543 elsif m_in.tlbie = '1' then
544 assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
545 -- clear entry regardless of hit or miss
546 itlb_valids(to_integer(unsigned(wr_index))) <= '0';
547 elsif m_in.tlbld = '1' then
548 assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
549 itlb_tags(to_integer(unsigned(wr_index))) <= m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
550 itlb_ptes(to_integer(unsigned(wr_index))) <= m_in.pte;
551 itlb_valids(to_integer(unsigned(wr_index))) <= '1';
553 ev.itlb_miss_resolved <= m_in.tlbld and not rst;
557 -- Cache hit detection, output to fetch2 and other misc logic
558 icache_comb : process(all)
559 variable is_hit : std_ulogic;
560 variable hit_way : way_sig_t;
561 variable insn : std_ulogic_vector(ICWORDLEN - 1 downto 0);
562 variable icode : insn_code;
564 -- Extract line, row and tag from request
565 req_index <= get_index(i_in.nia);
566 req_row <= get_row(i_in.nia);
567 req_tag <= get_tag(real_addr, i_in.big_endian);
569 -- Calculate address of beginning of cache row, will be
570 -- used for cache miss processing if needed
572 req_raddr <= real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
573 (ROW_OFF_BITS-1 downto 0 => '0');
575 -- Test if pending request is a hit on any way
576 hit_way := to_unsigned(0, WAY_BITS);
578 if i_in.req = '1' then
579 assert not is_X(req_index) and not is_X(req_row) severity failure;
582 if i_in.req = '1' and
583 (cache_valids(to_integer(req_index))(i) = '1' or
584 (r.state = WAIT_ACK and
585 req_index = r.store_index and
586 to_unsigned(i, WAY_BITS) = r.store_way and
587 r.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) = '1')) then
588 if read_tag(i, cache_tags(to_integer(req_index))) = req_tag then
589 hit_way := to_unsigned(i, WAY_BITS);
595 -- Generate the "hit" and "miss" signals for the synchronous blocks
596 if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' then
597 req_is_hit <= is_hit;
598 req_is_miss <= not is_hit;
603 req_hit_way <= hit_way;
605 -- Output instruction from current cache row
607 -- Note: This is a mild violation of our design principle of having pipeline
608 -- stages output from a clean latch. In this case we output the result
609 -- of a mux. The alternative would be output an entire row which
610 -- I prefer not to do just yet as it would force fetch2 to know about
611 -- some of the cache geometry information.
613 insn := (others => '0');
614 icode := INSN_illegal;
615 if r.hit_valid = '1' then
616 assert not is_X(r.hit_way) severity failure;
617 insn := read_insn_word(r.hit_nia, cache_out(to_integer(r.hit_way)));
618 -- Currently we use only the top bit for indicating illegal
619 -- instructions because we know that insn_codes fit into 9 bits.
621 insn := (others => '0');
622 elsif insn(ICWORDLEN - 1) = '0' then
623 icode := insn_code'val(to_integer(unsigned(insn(ICWORDLEN-1 downto INSN_IMAGE_BITS))));
626 i_out.insn <= insn(31 downto 0);
627 i_out.icode <= icode;
629 i_out.valid <= r.hit_valid;
630 i_out.nia <= r.hit_nia;
631 i_out.stop_mark <= r.hit_smark;
632 i_out.fetch_failed <= r.fetch_failed;
633 i_out.big_endian <= r.big_endian;
634 i_out.next_predicted <= r.predicted;
635 i_out.next_pred_ntaken <= r.pred_ntaken;
637 -- Stall fetch1 if we have a miss on cache or TLB or a protection fault
638 stall_out <= not (is_hit and access_ok);
640 -- Wishbone requests output (from the cache miss reload machine)
641 wishbone_out <= r.wb;
644 -- Cache hit synchronous machine
645 icache_hit : process(clk)
647 if rising_edge(clk) then
648 -- keep outputs to fetch2 unchanged on a stall
649 -- except that flush or reset sets valid to 0
650 if stall_in = '1' then
651 if rst = '1' or flush_in = '1' then
655 -- On a hit, latch the request for the next cycle, when the BRAM data
656 -- will be available on the cache_out output of the corresponding way
658 r.hit_valid <= req_is_hit;
659 if req_is_hit = '1' then
660 r.hit_way <= req_hit_way;
661 -- this is a bit fragile but better than propogating bad values
662 assert not is_X(i_in.nia) report "metavalue in NIA" severity FAILURE;
664 report "cache hit nia:" & to_hstring(i_in.nia) &
665 " IR:" & std_ulogic'image(i_in.virt_mode) &
666 " SM:" & std_ulogic'image(i_in.stop_mark) &
667 " idx:" & to_hstring(req_index) &
668 " tag:" & to_hstring(req_tag) &
669 " way:" & to_hstring(req_hit_way) &
670 " RA:" & to_hstring(real_addr);
673 if stall_in = '0' then
674 -- Send stop marks and NIA down regardless of validity
675 r.hit_smark <= i_in.stop_mark;
676 r.hit_nia <= i_in.nia;
677 r.big_endian <= i_in.big_endian;
678 r.predicted <= i_in.predicted;
679 r.pred_ntaken <= i_in.pred_ntaken;
681 if i_out.valid = '1' then
682 assert not is_X(i_out.insn) severity failure;
687 -- Cache miss/reload synchronous machine
688 icache_miss : process(clk)
689 variable tagset : cache_tags_set_t;
690 variable tag : cache_tag_t;
691 variable snoop_addr : real_addr_t;
692 variable snoop_tag : cache_tag_t;
693 variable snoop_cache_tags : cache_tags_set_t;
694 variable replace_way : way_sig_t;
696 if rising_edge(clk) then
697 ev.icache_miss <= '0';
699 -- On reset, clear all valid bits to force misses
701 for i in index_t loop
702 cache_valids(i) <= (others => '0');
708 -- We only ever do reads on wishbone
709 r.wb.dat <= (others => '0');
710 r.wb.sel <= "11111111";
713 -- Not useful normally but helps avoiding tons of sim warnings
714 r.wb.adr <= (others => '0');
717 snoop_index <= to_unsigned(0, INDEX_BITS);
718 snoop_hits <= (others => '0');
720 -- Detect snooped writes and decode address into index and tag
721 -- Since we never write, any write should be snooped
722 snoop_valid <= wb_snoop_in.cyc and wb_snoop_in.stb and wb_snoop_in.we;
723 snoop_addr := addr_to_real(wb_to_addr(wb_snoop_in.adr));
724 snoop_index <= get_index(snoop_addr);
725 snoop_tag := get_tag(snoop_addr, '0');
726 snoop_hits <= (others => '0');
727 if snoop_valid = '1' then
728 if is_X(snoop_addr) then
729 report "metavalue in snoop_addr" severity FAILURE;
731 snoop_cache_tags := cache_tags(to_integer(get_index(snoop_addr)));
733 tag := read_tag(i, snoop_cache_tags);
734 -- Ignore endian bit in comparison
735 tag(TAG_BITS - 1) := '0';
736 if tag = snoop_tag then
737 snoop_hits(i) <= '1';
742 -- Process cache invalidations
743 if inval_in = '1' then
744 for i in index_t loop
745 cache_valids(i) <= (others => '0');
747 r.store_valid <= '0';
749 -- Do invalidations from snooped stores to memory, one
750 -- cycle after the address appears on wb_snoop_in.
752 if snoop_hits(i) = '1' then
753 assert not is_X(snoop_index) severity failure;
754 cache_valids(to_integer(snoop_index))(i) <= '0';
759 -- Main state machine
762 -- Reset per-row valid flags, only used in WAIT_ACK
763 for i in 0 to ROW_PER_LINE - 1 loop
764 r.rows_valid(i) <= '0';
767 -- We need to read a cache line
768 if req_is_miss = '1' then
769 report "cache miss nia:" & to_hstring(i_in.nia) &
770 " IR:" & std_ulogic'image(i_in.virt_mode) &
771 " SM:" & std_ulogic'image(i_in.stop_mark) &
772 " idx:" & to_hstring(req_index) &
773 " tag:" & to_hstring(req_tag) &
774 " RA:" & to_hstring(real_addr);
775 ev.icache_miss <= '1';
777 -- Keep track of our index and way for subsequent stores
778 r.store_index <= req_index;
779 r.recv_row <= get_row(req_raddr);
780 r.store_row <= get_row(req_raddr);
781 r.store_tag <= req_tag;
782 r.store_valid <= '1';
783 r.end_row_ix <= get_row_of_line(get_row(req_raddr)) - 1;
785 -- Prep for first wishbone read. We calculate the address of
786 -- the start of the cache line and start the WB cycle.
788 r.wb.adr <= addr_to_wb(req_raddr);
792 -- Track that we had one request sent
796 when CLR_TAG | WAIT_ACK =>
797 assert not is_X(r.store_index) severity failure;
798 assert not is_X(r.store_row) severity failure;
799 assert not is_X(r.recv_row) severity failure;
800 if r.state = CLR_TAG then
801 replace_way := to_unsigned(0, WAY_BITS);
803 -- Get victim way from plru
804 replace_way := plru_victim;
806 r.store_way <= replace_way;
808 -- Force misses on that way while reloading that line
809 assert not is_X(replace_way) severity failure;
810 cache_valids(to_integer(r.store_index))(to_integer(replace_way)) <= '0';
812 -- Store new tag in selected way
813 for i in 0 to NUM_WAYS-1 loop
814 if to_unsigned(i, WAY_BITS) = replace_way then
815 tagset := cache_tags(to_integer(r.store_index));
816 write_tag(i, tagset, r.store_tag);
817 cache_tags(to_integer(r.store_index)) <= tagset;
824 -- If we are writing in this cycle, mark row valid and see if we are done
825 if r.recv_valid = '1' then
826 r.rows_valid(to_integer(r.store_row(ROW_LINEBITS-1 downto 0))) <= not inval_in;
827 if is_last_row(r.store_row, r.end_row_ix) then
828 -- Cache line is now valid
829 cache_valids(to_integer(r.store_index))(to_integer(r.store_way)) <=
830 r.store_valid and not inval_in;
834 -- Increment store row counter
835 r.store_row <= r.recv_row;
838 -- If we are still sending requests, was one accepted ?
839 if wishbone_in.stall = '0' and r.wb.stb = '1' then
840 -- That was the last word ? We are done sending. Clear stb.
842 if is_last_row_wb_addr(r.wb.adr, r.end_row_ix) then
846 -- Calculate the next row address
847 r.wb.adr <= next_row_wb_addr(r.wb.adr);
850 -- Abort reload if we get an invalidation
851 if inval_in = '1' then
853 r.state <= STOP_RELOAD;
856 -- Incoming acks processing
857 if wishbone_in.ack = '1' then
858 -- Check for completion
859 if is_last_row(r.recv_row, r.end_row_ix) then
860 -- Complete wishbone cycle
865 -- Increment receive row counter
866 r.recv_row <= next_row(r.recv_row);
870 -- Wait for all outstanding requests to be satisfied, then
872 if get_row_of_line(r.recv_row) = get_row_of_line(get_row(wb_to_addr(r.wb.adr))) then
876 if wishbone_in.ack = '1' then
877 -- Increment store row counter
878 r.recv_row <= next_row(r.recv_row);
883 -- TLB miss and protection fault processing
884 if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
885 r.fetch_failed <= '0';
886 elsif i_in.req = '1' and access_ok = '0' and stall_in = '0' then
887 r.fetch_failed <= '1';
892 icache_log: if LOG_LENGTH > 0 generate
893 -- Output data to logger
894 signal log_data : std_ulogic_vector(57 downto 0);
896 data_log: process(clk)
897 variable lway: way_sig_t;
898 variable wstate: std_ulogic;
900 if rising_edge(clk) then
903 if r.state /= IDLE then
906 log_data <= i_out.valid &
909 r.wb.adr(2 downto 0) &
910 r.wb.stb & r.wb.cyc &
914 r.hit_nia(5 downto 2) &
916 std_ulogic_vector(resize(lway, 3)) &
917 req_is_hit & req_is_miss &