dcache: Add a dcache
[microwatt.git] / dcache.vhdl
1 --
2 -- Set associative dcache write-through
3 --
4 -- TODO (in no specific order):
5 --
6 -- * See list in icache.vhdl
7 -- * Complete load misses on the cycle when WB data comes instead of
8 -- at the end of line (this requires dealing with requests coming in
9 -- while not idle...)
10 --
11 library ieee;
12 use ieee.std_logic_1164.all;
13 use ieee.numeric_std.all;
14
15 library work;
16 use work.common.all;
17 use work.helpers.all;
18 use work.wishbone_types.all;
19
20 entity dcache is
21 generic (
22 -- Line size in bytes
23 LINE_SIZE : positive := 64;
24 -- Number of lines in a set
25 NUM_LINES : positive := 32;
26 -- Number of ways
27 NUM_WAYS : positive := 4
28 );
29 port (
30 clk : in std_ulogic;
31 rst : in std_ulogic;
32
33 d_in : in Loadstore1ToDcacheType;
34 d_out : out DcacheToWritebackType;
35
36 wishbone_out : out wishbone_master_out;
37 wishbone_in : in wishbone_slave_out
38 );
39 end entity dcache;
40
41 architecture rtl of dcache is
42 function log2(i : natural) return integer is
43 variable tmp : integer := i;
44 variable ret : integer := 0;
45 begin
46 while tmp > 1 loop
47 ret := ret + 1;
48 tmp := tmp / 2;
49 end loop;
50 return ret;
51 end function;
52
53 function ispow2(i : integer) return boolean is
54 begin
55 if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
56 return true;
57 else
58 return false;
59 end if;
60 end function;
61
62 -- BRAM organisation: We never access more than wishbone_data_bits at
63 -- a time so to save resources we make the array only that wide, and
64 -- use consecutive indices for to make a cache "line"
65 --
66 -- ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
67 constant ROW_SIZE : natural := wishbone_data_bits / 8;
68 -- ROW_PER_LINE is the number of row (wishbone transactions) in a line
69 constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
70 -- BRAM_ROWS is the number of rows in BRAM needed to represent the full
71 -- dcache
72 constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
73
74 -- Bit fields counts in the address
75
76 -- ROW_BITS is the number of bits to select a row
77 constant ROW_BITS : natural := log2(BRAM_ROWS);
78 -- ROW_LINEBITS is the number of bits to select a row within a line
79 constant ROW_LINEBITS : natural := log2(ROW_PER_LINE);
80 -- LINE_OFF_BITS is the number of bits for the offset in a cache line
81 constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
82 -- ROW_OFF_BITS is the number of bits for the offset in a row
83 constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
84 -- INDEX_BITS is the number if bits to select a cache line
85 constant INDEX_BITS : natural := log2(NUM_LINES);
86 -- TAG_BITS is the number of bits of the tag part of the address
87 constant TAG_BITS : natural := 64 - LINE_OFF_BITS - INDEX_BITS;
88 -- WAY_BITS is the number of bits to select a way
89 constant WAY_BITS : natural := log2(NUM_WAYS);
90
91 -- Example of layout for 32 lines of 64 bytes:
92 --
93 -- .. tag |index| line |
94 -- .. | row | |
95 -- .. | |---| | ROW_LINEBITS (3)
96 -- .. | |--- - --| LINE_OFF_BITS (6)
97 -- .. | |- --| ROW_OFF_BITS (3)
98 -- .. |----- ---| | ROW_BITS (8)
99 -- .. |-----| | INDEX_BITS (5)
100 -- .. --------| | TAG_BITS (53)
101
102 subtype row_t is integer range 0 to BRAM_ROWS-1;
103 subtype index_t is integer range 0 to NUM_LINES-1;
104 subtype way_t is integer range 0 to NUM_WAYS-1;
105
106 -- The cache data BRAM organized as described above for each way
107 subtype cache_row_t is std_ulogic_vector(wishbone_data_bits-1 downto 0);
108
109 -- The cache tags LUTRAM has a row per set. Vivado is a pain and will
110 -- not handle a clean (commented) definition of the cache tags as a 3d
111 -- memory. For now, work around it by putting all the tags
112 subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
113 -- type cache_tags_set_t is array(way_t) of cache_tag_t;
114 -- type cache_tags_array_t is array(index_t) of cache_tags_set_t;
115 constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
116 subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
117 type cache_tags_array_t is array(index_t) of cache_tags_set_t;
118
119 -- The cache valid bits
120 subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
121 type cache_valids_t is array(index_t) of cache_way_valids_t;
122
123 -- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
124 signal cache_tags : cache_tags_array_t;
125 signal cache_valids : cache_valids_t;
126
127 attribute ram_style : string;
128 attribute ram_style of cache_tags : signal is "distributed";
129
130 -- Type of operation on a "valid" input
131 type op_t is (OP_NONE,
132 OP_LOAD_HIT, -- Cache hit on load
133 OP_LOAD_MISS, -- Load missing cache
134 OP_LOAD_NC, -- Non-cachable load
135 OP_BAD, -- BAD: Cache hit on NC load/store
136 OP_STORE_HIT, -- Store hitting cache
137 OP_STORE_MISS); -- Store missing cache
138
139 -- Cache state machine
140 type state_t is (IDLE, -- Normal load hit processing
141 LOAD_UPDATE, -- Load with update address update cycle
142 RELOAD_WAIT_ACK, -- Cache reload wait ack
143 STORE_WAIT_ACK, -- Store wait ack
144 NC_LOAD_WAIT_ACK);-- Non-cachable load wait ack
145
146 type reg_internal_t is record
147 req_latch : Loadstore1ToDcacheType;
148
149 -- Cache hit state (Latches for 1 cycle BRAM access)
150 hit_way : way_t;
151 hit_load_valid : std_ulogic;
152
153 -- Register update (load/store with update)
154 update_valid : std_ulogic;
155
156 -- Data buffer for "slow" read ops (load miss and NC loads).
157 slow_data : std_ulogic_vector(63 downto 0);
158 slow_valid : std_ulogic;
159
160 -- Cache miss state (reload state machine)
161 state : state_t;
162 wb : wishbone_master_out;
163 store_way : way_t;
164 store_index : index_t;
165 end record;
166
167 signal r : reg_internal_t;
168
169 -- Async signals on incoming request
170 signal req_index : index_t;
171 signal req_row : row_t;
172 signal req_hit_way : way_t;
173 signal req_tag : cache_tag_t;
174 signal req_op : op_t;
175
176 -- Cache RAM interface
177 type cache_ram_out_t is array(way_t) of cache_row_t;
178 signal cache_out : cache_ram_out_t;
179
180 -- PLRU output interface
181 type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0);
182 signal plru_victim : plru_out_t;
183
184 -- Wishbone read/write/cache write formatting signals
185 signal bus_sel : wishbone_sel_type;
186 signal store_data : wishbone_data_type;
187
188 -- Return the cache line index (tag index) for an address
189 function get_index(addr: std_ulogic_vector(63 downto 0)) return index_t is
190 begin
191 return to_integer(unsigned(addr(63-TAG_BITS downto LINE_OFF_BITS)));
192 end;
193
194 -- Return the cache row index (data memory) for an address
195 function get_row(addr: std_ulogic_vector(63 downto 0)) return row_t is
196 begin
197 return to_integer(unsigned(addr(63-TAG_BITS downto ROW_OFF_BITS)));
198 end;
199
200 -- Returns whether this is the last row of a line
201 function is_last_row(addr: std_ulogic_vector(63 downto 0)) return boolean is
202 constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
203 begin
204 return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;
205 end;
206
207 -- Return the address of the next row in the current cache line
208 function next_row_addr(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
209 variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
210 variable result : std_ulogic_vector(63 downto 0);
211 begin
212 -- Is there no simpler way in VHDL to generate that 3 bits adder ?
213 row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
214 row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
215 result := addr;
216 result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
217 return result;
218 end;
219
220 -- Get the tag value from the address
221 function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is
222 begin
223 return addr(63 downto 64-TAG_BITS);
224 end;
225
226 -- Read a tag from a tag memory row
227 function read_tag(way: way_t; tagset: cache_tags_set_t) return cache_tag_t is
228 begin
229 return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
230 end;
231
232 -- Write a tag to tag memory row
233 procedure write_tag(way: in way_t; tagset: inout cache_tags_set_t;
234 tag: cache_tag_t) is
235 begin
236 tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
237 end;
238
239 -- Generate byte enables from sizes
240 function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
241 begin
242 case length is
243 when "0001" =>
244 return "00000001";
245 when "0010" =>
246 return "00000011";
247 when "0100" =>
248 return "00001111";
249 when "1000" =>
250 return "11111111";
251 when others =>
252 return "00000000";
253 end case;
254 end function length_to_sel;
255
256 -- Calculate shift and byte enables for wishbone
257 function wishbone_data_shift(address : in std_ulogic_vector(63 downto 0)) return natural is
258 begin
259 return to_integer(unsigned(address(2 downto 0))) * 8;
260 end function wishbone_data_shift;
261
262 function wishbone_data_sel(size : in std_logic_vector(3 downto 0);
263 address : in std_logic_vector(63 downto 0))
264 return std_ulogic_vector is
265 begin
266 return std_ulogic_vector(shift_left(unsigned(length_to_sel(size)),
267 to_integer(unsigned(address(2 downto 0)))));
268 end function wishbone_data_sel;
269
270 begin
271
272 assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
273 assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE;
274 assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE;
275 assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2" severity FAILURE;
276 assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
277 report "geometry bits don't add up" severity FAILURE;
278 assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
279 report "geometry bits don't add up" severity FAILURE;
280 assert (64 = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
281 report "geometry bits don't add up" severity FAILURE;
282 assert (64 = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
283 report "geometry bits don't add up" severity FAILURE;
284 assert (64 = wishbone_data_bits)
285 report "Can't yet handle a wishbone width that isn't 64-bits" severity FAILURE;
286
287 -- Generate PLRUs
288 maybe_plrus: if NUM_WAYS > 1 generate
289 begin
290 plrus: for i in 0 to NUM_LINES-1 generate
291 -- PLRU interface
292 signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
293 signal plru_acc_en : std_ulogic;
294 signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
295
296 begin
297 plru : entity work.plru
298 generic map (
299 BITS => WAY_BITS
300 )
301 port map (
302 clk => clk,
303 rst => rst,
304 acc => plru_acc,
305 acc_en => plru_acc_en,
306 lru => plru_out
307 );
308
309 process(req_index, req_op, req_hit_way, plru_out)
310 begin
311 -- PLRU interface
312 if (req_op = OP_LOAD_HIT or
313 req_op = OP_STORE_HIT) and req_index = i then
314 plru_acc_en <= '1';
315 else
316 plru_acc_en <= '0';
317 end if;
318 plru_acc <= std_ulogic_vector(to_unsigned(req_hit_way, WAY_BITS));
319 plru_victim(i) <= plru_out;
320 end process;
321 end generate;
322 end generate;
323
324 -- Cache request parsing and hit detection
325 dcache_request : process(all)
326 variable is_hit : std_ulogic;
327 variable hit_way : way_t;
328 variable op : op_t;
329 variable tmp : std_ulogic_vector(63 downto 0);
330 variable data : std_ulogic_vector(63 downto 0);
331 variable opsel : std_ulogic_vector(3 downto 0);
332 begin
333 -- Extract line, row and tag from request
334 req_index <= get_index(d_in.addr);
335 req_row <= get_row(d_in.addr);
336 req_tag <= get_tag(d_in.addr);
337
338 -- Test if pending request is a hit on any way
339 hit_way := 0;
340 is_hit := '0';
341 for i in way_t loop
342 if d_in.valid = '1' and cache_valids(req_index)(i) = '1' then
343 if read_tag(i, cache_tags(req_index)) = req_tag then
344 hit_way := i;
345 is_hit := '1';
346 end if;
347 end if;
348 end loop;
349
350 -- The way that matched on a hit
351 req_hit_way <= hit_way;
352
353 -- Combine the request and cache his status to decide what
354 -- operation needs to be done
355 --
356 opsel := d_in.valid & d_in.load & d_in.nc & is_hit;
357 case opsel is
358 when "1101" => op := OP_LOAD_HIT;
359 when "1100" => op := OP_LOAD_MISS;
360 when "1110" => op := OP_LOAD_NC;
361 when "1001" => op := OP_STORE_HIT;
362 when "1000" => op := OP_STORE_MISS;
363 when "1010" => op := OP_STORE_MISS;
364 when "1011" => op := OP_BAD;
365 when "1111" => op := OP_BAD;
366 when others => op := OP_NONE;
367 end case;
368
369 req_op <= op;
370
371 -- XXX GENERATE ERRORS
372 -- err_nc_collision <= '1' when op = OP_BAD else '0';
373
374 -- XXX Generate stalls
375 -- stall_out <= r.state /= IDLE ?
376
377 end process;
378
379 -- Wire up wishbone request latch
380 wishbone_out <= r.wb;
381
382 -- Writeback (loads and reg updates) & completion control logic
383 --
384 writeback_control: process(all)
385 variable writeback_format : boolean;
386 begin
387
388 -- The mux on d_out.write reg defaults to the normal load hit case.
389 d_out.write_enable <= '0';
390 d_out.valid <= '0';
391 d_out.write_reg <= r.req_latch.write_reg;
392 d_out.write_data <= cache_out(r.hit_way);
393 d_out.write_len <= r.req_latch.length;
394 d_out.write_shift <= r.req_latch.addr(2 downto 0);
395 d_out.sign_extend <= r.req_latch.sign_extend;
396 d_out.byte_reverse <= r.req_latch.byte_reverse;
397 d_out.second_word <= '0';
398
399 -- By default writeback doesn't need formatting
400 writeback_format := false;
401
402 -- We have a valid load or store hit or we just completed a slow
403 -- op such as a load miss, a NC load or a store
404 --
405 if r.hit_load_valid = '1' or r.slow_valid = '1' then
406 if r.req_latch.load = '1' then
407 -- If it's a load, enable write back and enable formatting
408 d_out.write_enable <= '1';
409 writeback_format := true;
410
411 -- If it's a slow load (miss or NC) source it from the buffer
412 if r.slow_valid = '1' then
413 d_out.write_data <= r.slow_data;
414 end if;
415
416 -- If it's a normal load (not a load with update), we complete
417 -- now, otherwise we wait for the delayed update.
418 --
419 if r.req_latch.update = '0' then
420 d_out.valid <= '1';
421 end if;
422 else
423 -- It's a store, complete always
424 d_out.valid <= '1';
425 end if;
426
427 -- Sanity
428 assert r.update_valid = '0' report "unexpected update_valid"
429 severity FAILURE;
430 end if;
431
432 -- We have a register update to do.
433 if r.update_valid = '1' then
434 d_out.write_enable <= '1';
435 d_out.write_reg <= r.req_latch.update_reg;
436 d_out.write_data <= r.req_latch.addr;
437
438 -- If it was a load, this completes the operation
439 if r.req_latch.load = '1' then
440 d_out.valid <= '1';
441 end if;
442 end if;
443
444 if not writeback_format then
445 d_out.write_len <= "1000";
446 d_out.write_shift <= "000";
447 d_out.sign_extend <= '0';
448 d_out.byte_reverse <= '0';
449 end if;
450
451 end process;
452
453 -- Misc data & sel signals
454 misc: process(d_in)
455 begin
456 -- Wishbone & BRAM write data formatting for stores (most of it already
457 -- happens in loadstore1, this is the remaining sel generation and shifting)
458 --
459 store_data <= std_logic_vector(shift_left(unsigned(d_in.data),
460 wishbone_data_shift(d_in.addr)));
461
462 -- Wishbone read and write and BRAM write sel bits generation
463 bus_sel <= wishbone_data_sel(d_in.length, d_in.addr);
464 end process;
465
466 -- Generate a cache RAM for each way. This handles the normal
467 -- reads, writes from reloads and the special store-hit update
468 -- path as well
469 --
470 rams: for i in 0 to NUM_WAYS-1 generate
471 signal do_read : std_ulogic;
472 signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
473 signal do_write : std_ulogic;
474 signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
475 signal wr_data : std_ulogic_vector(wishbone_data_bits-1 downto 0);
476 signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);
477 signal dout : cache_row_t;
478 begin
479 way: entity work.cache_ram
480 generic map (
481 ROW_BITS => ROW_BITS,
482 WIDTH => wishbone_data_bits
483 )
484 port map (
485 clk => clk,
486 rd_en => do_read,
487 rd_addr => rd_addr,
488 rd_data => dout,
489 wr_en => do_write,
490 wr_sel => wr_sel,
491 wr_addr => wr_addr,
492 wr_data => wr_data
493 );
494 process(all)
495 begin
496 do_read <= '0';
497 do_write <= '0';
498
499 -- Cache hit reads
500 if req_op = OP_LOAD_HIT and req_hit_way = i then
501 do_read <= '1';
502 end if;
503 rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
504 cache_out(i) <= dout;
505
506 -- Write mux:
507 --
508 -- Defaults to wishbone read responses (cache refill),
509 --
510 wr_data <= wishbone_in.dat;
511 wr_sel <= (others => '1');
512 wr_addr <= std_ulogic_vector(to_unsigned(get_row(r.wb.adr), ROW_BITS));
513 if r.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and r.store_way = i then
514 do_write <= '1';
515 end if;
516
517 -- Alternatively, store-hit BRAM update case (exclusive from the above).
518 if req_op = OP_STORE_HIT and req_hit_way = i then
519 report "store_data:" & to_hstring(store_data);
520 wr_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
521 wr_data <= store_data;
522 wr_sel <= bus_sel;
523 do_write <= '1';
524 end if;
525 end process;
526 end generate;
527
528 -- Cache hit synchronous machine for the easy case. This handles
529 -- non-update form load hits.
530 --
531 dcache_fast_hit : process(clk)
532 begin
533 if rising_edge(clk) then
534 -- On-cycle pulse values get reset on every cycle
535 r.hit_load_valid <= '0';
536
537 -- If we have a request incoming, we have to latch it as d_in.valid
538 -- is only set for a single cycle. It's up to the control logic to
539 -- ensure we don't override an uncompleted request (for now we are
540 -- single issue on load/stores so we are fine, later, we can generate
541 -- a stall output if necessary).
542
543 if d_in.valid = '1' then
544 r.req_latch <= d_in;
545
546 report "dcache op:" & op_t'image(req_op) &
547 " addr:" & to_hstring(d_in.addr) &
548 " upd:" & std_ulogic'image(d_in.update) &
549 " nc:" & std_ulogic'image(d_in.nc) &
550 " reg:" & to_hstring(d_in.write_reg) &
551 " idx:" & integer'image(req_index) &
552 " tag:" & to_hstring(req_tag) &
553 " way: " & integer'image(req_hit_way);
554 end if;
555
556 -- Fast path for load/store hits. Set signals for the writeback controls.
557 if req_op = OP_LOAD_HIT then
558 r.hit_way <= req_hit_way;
559 r.hit_load_valid <= '1';
560 end if;
561 end if;
562 end process;
563
564 -- Every other case is handled by this stage machine:
565 --
566 -- * Cache load miss/reload (in conjunction with "rams")
567 -- * Load hits for update forms
568 -- * Load hits for non-cachable forms
569 -- * Stores (the collision case is handled in "rams")
570 --
571 -- All wishbone requests generation is done here
572 --
573 dcache_slow : process(clk)
574 variable way : integer range 0 to NUM_WAYS-1;
575 variable tagset : cache_tags_set_t;
576 begin
577 if rising_edge(clk) then
578 -- On reset, clear all valid bits to force misses
579 if rst = '1' then
580 for i in index_t loop
581 cache_valids(i) <= (others => '0');
582 end loop;
583 r.state <= IDLE;
584 r.slow_valid <= '0';
585 r.update_valid <= '0';
586 r.wb.cyc <= '0';
587 r.wb.stb <= '0';
588
589 -- Not useful normally but helps avoiding tons of sim warnings
590 r.wb.adr <= (others => '0');
591 else
592 -- One cycle pulses reset
593 r.slow_valid <= '0';
594 r.update_valid <= '0';
595
596 -- We cannot currently process a new request when not idle
597 assert req_op = OP_NONE or r.state = IDLE report "request " &
598 op_t'image(req_op) & " while in state " & state_t'image(r.state)
599 severity FAILURE;
600
601 -- Main state machine
602 case r.state is
603 when IDLE =>
604 case req_op is
605 when OP_LOAD_HIT =>
606 -- We have a load with update hit, we need the delayed update cycle
607 if d_in.update = '1' then
608 r.state <= LOAD_UPDATE;
609 end if;
610
611 when OP_LOAD_MISS =>
612 -- Normal load cache miss, start the reload machine
613 --
614 -- First find a victim way from the PLRU
615 --
616 way := to_integer(unsigned(plru_victim(req_index)));
617
618 report "cache miss addr:" & to_hstring(d_in.addr) &
619 " idx:" & integer'image(req_index) &
620 " way:" & integer'image(way) &
621 " tag:" & to_hstring(req_tag);
622
623 -- Force misses on that way while reloading that line
624 cache_valids(req_index)(way) <= '0';
625
626 -- Store new tag in selected way
627 for i in 0 to NUM_WAYS-1 loop
628 if i = way then
629 tagset := cache_tags(req_index);
630 write_tag(i, tagset, req_tag);
631 cache_tags(req_index) <= tagset;
632 end if;
633 end loop;
634
635 -- Keep track of our index and way for subsequent stores.
636 r.store_index <= req_index;
637 r.store_way <= way;
638
639 -- Prep for first wishbone read. We calculate the address of
640 -- the start of the cache line
641 --
642 r.wb.adr <= d_in.addr(63 downto LINE_OFF_BITS) &
643 (LINE_OFF_BITS-1 downto 0 => '0');
644 r.wb.sel <= (others => '1');
645 r.wb.we <= '0';
646 r.wb.cyc <= '1';
647 r.wb.stb <= '1';
648 r.state <= RELOAD_WAIT_ACK;
649
650 when OP_LOAD_NC =>
651 r.wb.sel <= bus_sel;
652 r.wb.adr <= d_in.addr(63 downto 3) & "000";
653 r.wb.cyc <= '1';
654 r.wb.stb <= '1';
655 r.wb.we <= '0';
656 r.state <= NC_LOAD_WAIT_ACK;
657
658 when OP_STORE_HIT | OP_STORE_MISS =>
659 -- For store-with-update do the register update
660 if d_in.update = '1' then
661 r.update_valid <= '1';
662 end if;
663 r.wb.sel <= bus_sel;
664 r.wb.adr <= d_in.addr(63 downto 3) & "000";
665 r.wb.dat <= store_data;
666 r.wb.cyc <= '1';
667 r.wb.stb <= '1';
668 r.wb.we <= '1';
669 r.state <= STORE_WAIT_ACK;
670
671 -- OP_NONE and OP_BAD do nothing
672 when OP_NONE =>
673 when OP_BAD =>
674 end case;
675
676 when RELOAD_WAIT_ACK =>
677 if wishbone_in.ack = '1' then
678 -- Is this the data we were looking for ? Latch it so
679 -- we can respond later. We don't currently complete the
680 -- pending miss request immediately, we wait for the
681 -- whole line to be loaded. The reason is that if we
682 -- did, we would potentially get new requests in while
683 -- not idle, which we don't currently know how to deal
684 -- with.
685 --
686 if r.wb.adr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) =
687 r.req_latch.addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) then
688 r.slow_data <= wishbone_in.dat;
689 end if;
690
691 -- That was the last word ? We are done
692 if is_last_row(r.wb.adr) then
693 cache_valids(r.store_index)(way) <= '1';
694 r.wb.cyc <= '0';
695 r.wb.stb <= '0';
696
697 -- Complete the load that missed. For load with update
698 -- we also need to do the deferred update cycle.
699 --
700 r.slow_valid <= '1';
701 if r.req_latch.load = '1' and r.req_latch.update = '1' then
702 r.state <= LOAD_UPDATE;
703 report "completing miss with load-update !";
704 else
705 r.state <= IDLE;
706 report "completing miss !";
707 end if;
708 else
709 -- Otherwise, calculate the next row address
710 r.wb.adr <= next_row_addr(r.wb.adr);
711 end if;
712 end if;
713
714 when LOAD_UPDATE =>
715 -- We need the extra cycle to complete a load with update
716 r.update_valid <= '1';
717 r.state <= IDLE;
718
719 when STORE_WAIT_ACK | NC_LOAD_WAIT_ACK =>
720 if wishbone_in.ack = '1' then
721 if r.state = NC_LOAD_WAIT_ACK then
722 r.slow_data <= wishbone_in.dat;
723 end if;
724 r.slow_valid <= '1';
725 r.wb.cyc <= '0';
726 r.wb.stb <= '0';
727 r.state <= IDLE;
728 end if;
729 end case;
730 end if;
731 end if;
732 end process;
733 end;