whitespace
[soc.git] / src / soc / experiment / icache.py
1 """ICache
2
3 based on Anton Blanchard microwatt icache.vhdl
4
5 Set associative icache
6
7 TODO (in no specific order):
8 * Add debug interface to inspect cache content
9 * Add snoop/invalidate path
10 * Add multi-hit error detection
11 * Pipelined bus interface (wb or axi)
12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
13 * Add optimization: service hits on partially loaded lines
14 * Add optimization: (maybe) interrupt reload on fluch/redirect
15 * Check if playing with the geometry of the cache tags allow for more
16 efficient use of distributed RAM and less logic/muxes. Currently we
17 write TAG_BITS width which may not match full ram blocks and might
18 cause muxes to be inferred for "partial writes".
19 * Check if making the read size of PLRU a ROM helps utilization
20
21 """
22 from enum import Enum, unique
23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const, Repl)
24 from nmigen.cli import main, rtlil
25 from nmutil.iocontrol import RecordObject
26 from nmigen.utils import log2_int
27 from nmutil.util import Display
28
29 #from nmutil.plru import PLRU
30 from soc.experiment.cache_ram import CacheRam
31 from soc.experiment.plru import PLRU
32
33 from soc.experiment.mem_types import (Fetch1ToICacheType,
34 ICacheToDecode1Type,
35 MMUToICacheType)
36
37 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
38 WB_SEL_BITS, WBAddrType, WBDataType,
39 WBSelType, WBMasterOut, WBSlaveOut,
40 WBMasterOutVector, WBSlaveOutVector,
41 WBIOMasterOut, WBIOSlaveOut)
42
43 # for test
44 from nmigen_soc.wishbone.sram import SRAM
45 from nmigen import Memory
46 from nmutil.util import wrap
47 from nmigen.cli import main, rtlil
48 if True:
49 from nmigen.back.pysim import Simulator, Delay, Settle
50 else:
51 from nmigen.sim.cxxsim import Simulator, Delay, Settle
52
53
54 SIM = 0
55 LINE_SIZE = 64
56 # BRAM organisation: We never access more than wishbone_data_bits
57 # at a time so to save resources we make the array only that wide,
58 # and use consecutive indices for to make a cache "line"
59 #
60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
61 ROW_SIZE = WB_DATA_BITS // 8
62 # Number of lines in a set
63 NUM_LINES = 16
64 # Number of ways
65 NUM_WAYS = 4
66 # L1 ITLB number of entries (direct mapped)
67 TLB_SIZE = 64
68 # L1 ITLB log_2(page_size)
69 TLB_LG_PGSZ = 12
70 # Number of real address bits that we store
71 REAL_ADDR_BITS = 56
72 # Non-zero to enable log data collection
73 LOG_LENGTH = 0
74
75 ROW_SIZE_BITS = ROW_SIZE * 8
76 # ROW_PER_LINE is the number of row
77 # (wishbone) transactions in a line
78 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
79 # BRAM_ROWS is the number of rows in
80 # BRAM needed to represent the full icache
81 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
82 # INSN_PER_ROW is the number of 32bit
83 # instructions per BRAM row
84 INSN_PER_ROW = ROW_SIZE_BITS // 32
85
86 # Bit fields counts in the address
87 #
88 # INSN_BITS is the number of bits to
89 # select an instruction in a row
90 INSN_BITS = log2_int(INSN_PER_ROW)
91 # ROW_BITS is the number of bits to
92 # select a row
93 ROW_BITS = log2_int(BRAM_ROWS)
94 # ROW_LINE_BITS is the number of bits to
95 # select a row within a line
96 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
97 # LINE_OFF_BITS is the number of bits for
98 # the offset in a cache line
99 LINE_OFF_BITS = log2_int(LINE_SIZE)
100 # ROW_OFF_BITS is the number of bits for
101 # the offset in a row
102 ROW_OFF_BITS = log2_int(ROW_SIZE)
103 # INDEX_BITS is the number of bits to
104 # select a cache line
105 INDEX_BITS = log2_int(NUM_LINES)
106 # SET_SIZE_BITS is the log base 2 of
107 # the set size
108 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
109 # TAG_BITS is the number of bits of
110 # the tag part of the address
111 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
112 # TAG_WIDTH is the width in bits of each way of the tag RAM
113 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
114
115 # WAY_BITS is the number of bits to
116 # select a way
117 WAY_BITS = log2_int(NUM_WAYS)
118 TAG_RAM_WIDTH = TAG_BITS * NUM_WAYS
119
120 #-- L1 ITLB.
121 #constant TLB_BITS : natural := log2(TLB_SIZE);
122 #constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
123 #constant TLB_PTE_BITS : natural := 64;
124 TLB_BITS = log2_int(TLB_SIZE)
125 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
126 TLB_PTE_BITS = 64
127
128 print("BRAM_ROWS =", BRAM_ROWS)
129 print("INDEX_BITS =", INDEX_BITS)
130 print("INSN_BITS =", INSN_BITS)
131 print("INSN_PER_ROW =", INSN_PER_ROW)
132 print("LINE_SIZE =", LINE_SIZE)
133 print("LINE_OFF_BITS =", LINE_OFF_BITS)
134 print("LOG_LENGTH =", LOG_LENGTH)
135 print("NUM_LINES =", NUM_LINES)
136 print("NUM_WAYS =", NUM_WAYS)
137 print("REAL_ADDR_BITS =", REAL_ADDR_BITS)
138 print("ROW_BITS =", ROW_BITS)
139 print("ROW_OFF_BITS =", ROW_OFF_BITS)
140 print("ROW_LINE_BITS =", ROW_LINE_BITS)
141 print("ROW_PER_LINE =", ROW_PER_LINE)
142 print("ROW_SIZE =", ROW_SIZE)
143 print("ROW_SIZE_BITS =", ROW_SIZE_BITS)
144 print("SET_SIZE_BITS =", SET_SIZE_BITS)
145 print("SIM =", SIM)
146 print("TAG_BITS =", TAG_BITS)
147 print("TAG_RAM_WIDTH =", TAG_RAM_WIDTH)
148 print("TAG_BITS =", TAG_BITS)
149 print("TLB_BITS =", TLB_BITS)
150 print("TLB_EA_TAG_BITS =", TLB_EA_TAG_BITS)
151 print("TLB_LG_PGSZ =", TLB_LG_PGSZ)
152 print("TLB_PTE_BITS =", TLB_PTE_BITS)
153 print("TLB_SIZE =", TLB_SIZE)
154 print("WAY_BITS =", WAY_BITS)
155
156 # from microwatt/utils.vhdl
157 def ispow2(n):
158 if ((n << 32) & ((n-1) << 32)) == 0:
159 return True
160
161 else:
162 return False
163
164 assert LINE_SIZE % ROW_SIZE == 0
165 assert ispow2(LINE_SIZE), "LINE_SIZE not power of 2"
166 assert ispow2(NUM_LINES), "NUM_LINES not power of 2"
167 assert ispow2(ROW_PER_LINE), "ROW_PER_LINE not power of 2"
168 assert ispow2(INSN_PER_ROW), "INSN_PER_ROW not power of 2"
169 assert (ROW_BITS == (INDEX_BITS + ROW_LINE_BITS)), \
170 "geometry bits don't add up"
171 assert (LINE_OFF_BITS == (ROW_OFF_BITS + ROW_LINE_BITS)), \
172 "geometry bits don't add up"
173 assert (REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS)), \
174 "geometry bits don't add up"
175 assert (REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS)), \
176 "geometry bits don't add up"
177
178 # architecture rtl of icache is
179 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
180 #-- ROW_PER_LINE is the number of row (wishbone
181 #-- transactions) in a line
182 #constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
183 #-- BRAM_ROWS is the number of rows in BRAM
184 #-- needed to represent the full
185 #-- icache
186 #constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
187 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
188 #constant INSN_PER_ROW : natural := ROW_SIZE_BITS / 32;
189 #-- Bit fields counts in the address
190 #
191 #-- INSN_BITS is the number of bits to select
192 #-- an instruction in a row
193 #constant INSN_BITS : natural := log2(INSN_PER_ROW);
194 #-- ROW_BITS is the number of bits to select a row
195 #constant ROW_BITS : natural := log2(BRAM_ROWS);
196 #-- ROW_LINE_BITS is the number of bits to
197 #-- select a row within a line
198 #constant ROW_LINE_BITS : natural := log2(ROW_PER_LINE);
199 #-- LINE_OFF_BITS is the number of bits for the offset
200 #-- in a cache line
201 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
202 #-- ROW_OFF_BITS is the number of bits for the offset in a row
203 #constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
204 #-- INDEX_BITS is the number of bits to select a cache line
205 #constant INDEX_BITS : natural := log2(NUM_LINES);
206 #-- SET_SIZE_BITS is the log base 2 of the set size
207 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
208 #-- TAG_BITS is the number of bits of the tag part of the address
209 #constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
210 #-- WAY_BITS is the number of bits to select a way
211 #constant WAY_BITS : natural := log2(NUM_WAYS);
212
213 #-- Example of layout for 32 lines of 64 bytes:
214 #--
215 #-- .. tag |index| line |
216 #-- .. | row | |
217 #-- .. | | | |00| zero (2)
218 #-- .. | | |-| | INSN_BITS (1)
219 #-- .. | |---| | ROW_LINE_BITS (3)
220 #-- .. | |--- - --| LINE_OFF_BITS (6)
221 #-- .. | |- --| ROW_OFF_BITS (3)
222 #-- .. |----- ---| | ROW_BITS (8)
223 #-- .. |-----| | INDEX_BITS (5)
224 #-- .. --------| | TAG_BITS (53)
225 # Example of layout for 32 lines of 64 bytes:
226 #
227 # .. tag |index| line |
228 # .. | row | |
229 # .. | | | |00| zero (2)
230 # .. | | |-| | INSN_BITS (1)
231 # .. | |---| | ROW_LINE_BITS (3)
232 # .. | |--- - --| LINE_OFF_BITS (6)
233 # .. | |- --| ROW_OFF_BITS (3)
234 # .. |----- ---| | ROW_BITS (8)
235 # .. |-----| | INDEX_BITS (5)
236 # .. --------| | TAG_BITS (53)
237
238 #subtype row_t is integer range 0 to BRAM_ROWS-1;
239 #subtype index_t is integer range 0 to NUM_LINES-1;
240 #subtype way_t is integer range 0 to NUM_WAYS-1;
241 #subtype row_in_line_t is unsigned(ROW_LINE_BITS-1 downto 0);
242 #
243 #-- The cache data BRAM organized as described above for each way
244 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
245 #
246 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
247 #-- not handle a clean (commented) definition of the cache tags as a 3d
248 #-- memory. For now, work around it by putting all the tags
249 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
250 # type cache_tags_set_t is array(way_t) of cache_tag_t;
251 # type cache_tags_array_t is array(index_t) of cache_tags_set_t;
252 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
253 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
254 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
255 def CacheTagArray():
256 return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
257 for x in range(NUM_LINES))
258
259 #-- The cache valid bits
260 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
261 #type cache_valids_t is array(index_t) of cache_way_valids_t;
262 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
263 def CacheValidBitsArray():
264 return Array(Signal(NUM_WAYS, name="cachevalid_%d" %x) \
265 for x in range(NUM_LINES))
266
267 def RowPerLineValidArray():
268 return Array(Signal(name="rows_valid_%d" %x) \
269 for x in range(ROW_PER_LINE))
270
271
272 #attribute ram_style : string;
273 #attribute ram_style of cache_tags : signal is "distributed";
274 # TODO to be passed to nigmen as ram attributes
275 # attribute ram_style : string;
276 # attribute ram_style of cache_tags : signal is "distributed";
277
278
279 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
280 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
281 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
282 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
283 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
284 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
285 def TLBValidBitsArray():
286 return Array(Signal(name="tlbvalid_%d" %x) \
287 for x in range(TLB_SIZE))
288
289 def TLBTagArray():
290 return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
291 for x in range(TLB_SIZE))
292
293 def TLBPtesArray():
294 return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
295 for x in range(TLB_SIZE))
296
297
298 #-- Cache RAM interface
299 #type cache_ram_out_t is array(way_t) of cache_row_t;
300 # Cache RAM interface
301 def CacheRamOut():
302 return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
303 for x in range(NUM_WAYS))
304
305 #-- PLRU output interface
306 #type plru_out_t is array(index_t) of
307 # std_ulogic_vector(WAY_BITS-1 downto 0);
308 # PLRU output interface
309 def PLRUOut():
310 return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
311 for x in range(NUM_LINES))
312
313 # -- Return the cache line index (tag index) for an address
314 # function get_index(addr: std_ulogic_vector(63 downto 0))
315 # return index_t is
316 # begin
317 # return to_integer(unsigned(
318 # addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
319 # ));
320 # end;
321 # Return the cache line index (tag index) for an address
322 def get_index(addr):
323 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
324
325 # -- Return the cache row index (data memory) for an address
326 # function get_row(addr: std_ulogic_vector(63 downto 0))
327 # return row_t is
328 # begin
329 # return to_integer(unsigned(
330 # addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
331 # ));
332 # end;
333 # Return the cache row index (data memory) for an address
334 def get_row(addr):
335 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
336
337 # -- Return the index of a row within a line
338 # function get_row_of_line(row: row_t) return row_in_line_t is
339 # variable row_v : unsigned(ROW_BITS-1 downto 0);
340 # begin
341 # row_v := to_unsigned(row, ROW_BITS);
342 # return row_v(ROW_LINE_BITS-1 downto 0);
343 # end;
344 # Return the index of a row within a line
345 def get_row_of_line(row):
346 return row[:ROW_LINE_BITS]
347
348 # -- Returns whether this is the last row of a line
349 # function is_last_row_addr(addr: wishbone_addr_type;
350 # last: row_in_line_t
351 # )
352 # return boolean is
353 # begin
354 # return unsigned(
355 # addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
356 # ) = last;
357 # end;
358 # Returns whether this is the last row of a line
359 def is_last_row_addr(addr, last):
360 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
361
362 # -- Returns whether this is the last row of a line
363 # function is_last_row(row: row_t;
364 # last: row_in_line_t) return boolean is
365 # begin
366 # return get_row_of_line(row) = last;
367 # end;
368 # Returns whether this is the last row of a line
369 def is_last_row(row, last):
370 return get_row_of_line(row) == last
371
372 # -- Return the next row in the current cache line. We use a dedicated
373 # -- function in order to limit the size of the generated adder to be
374 # -- only the bits within a cache line (3 bits with default settings)
375 # function next_row(row: row_t) return row_t is
376 # variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
377 # variable row_idx : std_ulogic_vector(ROW_LINE_BITS-1 downto 0);
378 # variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
379 # begin
380 # row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
381 # row_idx := row_v(ROW_LINE_BITS-1 downto 0);
382 # row_v(ROW_LINE_BITS-1 downto 0) :=
383 # std_ulogic_vector(unsigned(row_idx) + 1);
384 # return to_integer(unsigned(row_v));
385 # end;
386 # Return the next row in the current cache line. We use a dedicated
387 # function in order to limit the size of the generated adder to be
388 # only the bits within a cache line (3 bits with default settings)
389 def next_row(row):
390 row_v = row[0:ROW_LINE_BITS] + 1
391 return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
392 # -- Read the instruction word for the given address in the
393 # -- current cache row
394 # function read_insn_word(addr: std_ulogic_vector(63 downto 0);
395 # data: cache_row_t) return std_ulogic_vector is
396 # variable word: integer range 0 to INSN_PER_ROW-1;
397 # begin
398 # word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
399 # return data(31+word*32 downto word*32);
400 # end;
401 # Read the instruction word for the given address
402 # in the current cache row
403 def read_insn_word(addr, data):
404 word = addr[2:INSN_BITS+2]
405 return data.word_select(word, 32)
406
407 # -- Get the tag value from the address
408 # function get_tag(
409 # addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
410 # )
411 # return cache_tag_t is
412 # begin
413 # return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
414 # end;
415 # Get the tag value from the address
416 def get_tag(addr):
417 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
418
419 # -- Read a tag from a tag memory row
420 # function read_tag(way: way_t; tagset: cache_tags_set_t)
421 # return cache_tag_t is
422 # begin
423 # return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
424 # end;
425 # Read a tag from a tag memory row
426 def read_tag(way, tagset):
427 return tagset.word_select(way, TAG_BITS)
428
429 # -- Write a tag to tag memory row
430 # procedure write_tag(way: in way_t;
431 # tagset: inout cache_tags_set_t; tag: cache_tag_t) is
432 # begin
433 # tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
434 # end;
435 # Write a tag to tag memory row
436 def write_tag(way, tagset, tag):
437 return read_tag(way, tagset).eq(tag)
438
439 # -- Simple hash for direct-mapped TLB index
440 # function hash_ea(addr: std_ulogic_vector(63 downto 0))
441 # return tlb_index_t is
442 # variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
443 # begin
444 # hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
445 # xor addr(
446 # TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
447 # TLB_LG_PGSZ + TLB_BITS
448 # )
449 # xor addr(
450 # TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
451 # TLB_LG_PGSZ + 2 * TLB_BITS
452 # );
453 # return to_integer(unsigned(hash));
454 # end;
455 # Simple hash for direct-mapped TLB index
456 def hash_ea(addr):
457 hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
458 TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
459 ] ^ addr[
460 TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
461 ]
462 return hsh
463
464
465 # Cache reload state machine
466 @unique
467 class State(Enum):
468 IDLE = 0
469 CLR_TAG = 1
470 WAIT_ACK = 2
471
472
473 class RegInternal(RecordObject):
474 def __init__(self):
475 super().__init__()
476 # Cache hit state (Latches for 1 cycle BRAM access)
477 self.hit_way = Signal(NUM_WAYS)
478 self.hit_nia = Signal(64)
479 self.hit_smark = Signal()
480 self.hit_valid = Signal()
481
482 # Cache miss state (reload state machine)
483 self.state = Signal(State, reset=State.IDLE)
484 self.wb = WBMasterOut("wb")
485 self.req_adr = Signal(64)
486 self.store_way = Signal(NUM_WAYS)
487 self.store_index = Signal(NUM_LINES)
488 self.store_row = Signal(BRAM_ROWS)
489 self.store_tag = Signal(TAG_BITS)
490 self.store_valid = Signal()
491 self.end_row_ix = Signal(ROW_LINE_BITS)
492 self.rows_valid = RowPerLineValidArray()
493
494 # TLB miss state
495 self.fetch_failed = Signal()
496
497 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
498 #
499 # entity icache is
500 # generic (
501 # SIM : boolean := false;
502 # -- Line size in bytes
503 # LINE_SIZE : positive := 64;
504 # -- BRAM organisation: We never access more
505 # -- than wishbone_data_bits
506 # -- at a time so to save resources we make the
507 # -- array only that wide,
508 # -- and use consecutive indices for to make a cache "line"
509 # --
510 # -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
511 # -- so 64-bits)
512 # ROW_SIZE : positive := wishbone_data_bits / 8;
513 # -- Number of lines in a set
514 # NUM_LINES : positive := 32;
515 # -- Number of ways
516 # NUM_WAYS : positive := 4;
517 # -- L1 ITLB number of entries (direct mapped)
518 # TLB_SIZE : positive := 64;
519 # -- L1 ITLB log_2(page_size)
520 # TLB_LG_PGSZ : positive := 12;
521 # -- Number of real address bits that we store
522 # REAL_ADDR_BITS : positive := 56;
523 # -- Non-zero to enable log data collection
524 # LOG_LENGTH : natural := 0
525 # );
526 # port (
527 # clk : in std_ulogic;
528 # rst : in std_ulogic;
529 #
530 # i_in : in Fetch1ToIcacheType;
531 # i_out : out IcacheToDecode1Type;
532 #
533 # m_in : in MmuToIcacheType;
534 #
535 # stall_in : in std_ulogic;
536 # stall_out : out std_ulogic;
537 # flush_in : in std_ulogic;
538 # inval_in : in std_ulogic;
539 #
540 # wishbone_out : out wishbone_master_out;
541 # wishbone_in : in wishbone_slave_out;
542 #
543 # log_out : out std_ulogic_vector(53 downto 0)
544 # );
545 # end entity icache;
546 # 64 bit direct mapped icache. All instructions are 4B aligned.
547 class ICache(Elaboratable):
548 """64 bit direct mapped icache. All instructions are 4B aligned."""
549 def __init__(self):
550 self.i_in = Fetch1ToICacheType(name="i_in")
551 self.i_out = ICacheToDecode1Type(name="i_out")
552
553 self.m_in = MMUToICacheType(name="m_in")
554
555 self.stall_in = Signal()
556 self.stall_out = Signal()
557 self.flush_in = Signal()
558 self.inval_in = Signal()
559
560 self.wb_out = WBMasterOut(name="wb_out")
561 self.wb_in = WBSlaveOut(name="wb_in")
562
563 self.log_out = Signal(54)
564
565
566 # Generate a cache RAM for each way
567 def rams(self, m, r, cache_out_row, use_previous,
568 replace_way, req_row):
569
570 comb = m.d.comb
571 sync = m.d.sync
572
573 wb_in, stall_in = self.wb_in, self.stall_in
574
575 for i in range(NUM_WAYS):
576 do_read = Signal(name="do_rd_%d" % i)
577 do_write = Signal(name="do_wr_%d" % i)
578 rd_addr = Signal(ROW_BITS)
579 wr_addr = Signal(ROW_BITS)
580 d_out = Signal(ROW_SIZE_BITS, name="d_out_%d" % i)
581 wr_sel = Signal(ROW_SIZE)
582
583 way = CacheRam(ROW_BITS, ROW_SIZE_BITS, True)
584 setattr(m.submodules, "cacheram_%d" % i, way)
585
586 comb += way.rd_en.eq(do_read)
587 comb += way.rd_addr.eq(rd_addr)
588 comb += d_out.eq(way.rd_data_o)
589 comb += way.wr_sel.eq(wr_sel)
590 comb += way.wr_addr.eq(wr_addr)
591 comb += way.wr_data.eq(wb_in.dat)
592
593 comb += do_read.eq(~(stall_in | use_previous))
594 comb += do_write.eq(wb_in.ack & (replace_way == i))
595
596 with m.If(do_write):
597 sync += Display("cache write adr: %x data: %lx",
598 wr_addr, way.wr_data)
599
600 with m.If(r.hit_way == i):
601 comb += cache_out_row.eq(d_out)
602 with m.If(do_read):
603 sync += Display("cache read adr: %x data: %x",
604 req_row, d_out)
605
606 comb += rd_addr.eq(req_row)
607 comb += wr_addr.eq(r.store_row)
608 comb += wr_sel.eq(Repl(do_write, ROW_SIZE))
609
610 # Generate PLRUs
611 def maybe_plrus(self, m, r, plru_victim):
612 comb = m.d.comb
613
614 with m.If(NUM_WAYS > 1):
615 for i in range(NUM_LINES):
616 plru_acc_i = Signal(WAY_BITS)
617 plru_acc_en = Signal()
618 plru = PLRU(WAY_BITS)
619 setattr(m.submodules, "plru_%d" % i, plru)
620
621 comb += plru.acc_i.eq(plru_acc_i)
622 comb += plru.acc_en.eq(plru_acc_en)
623
624 # PLRU interface
625 with m.If(get_index(r.hit_nia) == i):
626 comb += plru.acc_en.eq(r.hit_valid)
627
628 comb += plru.acc_i.eq(r.hit_way)
629 comb += plru_victim[i].eq(plru.lru_o)
630
631 # TLB hit detection and real address generation
632 def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
633 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
634 priv_fault, access_ok):
635
636 comb = m.d.comb
637
638 i_in = self.i_in
639
640 pte = Signal(TLB_PTE_BITS)
641 ttag = Signal(TLB_EA_TAG_BITS)
642
643 comb += tlb_req_index.eq(hash_ea(i_in.nia))
644 comb += pte.eq(itlb_ptes[tlb_req_index])
645 comb += ttag.eq(itlb_tags[tlb_req_index])
646
647 with m.If(i_in.virt_mode):
648 comb += real_addr.eq(Cat(
649 i_in.nia[:TLB_LG_PGSZ],
650 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
651 ))
652
653 with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
654 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
655
656 comb += eaa_priv.eq(pte[3])
657
658 with m.Else():
659 comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
660 comb += ra_valid.eq(1)
661 comb += eaa_priv.eq(1)
662
663 # No IAMR, so no KUEP support for now
664 comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
665 comb += access_ok.eq(ra_valid & ~priv_fault)
666
667 # iTLB update
668 def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
669 comb = m.d.comb
670 sync = m.d.sync
671
672 m_in = self.m_in
673
674 wr_index = Signal(TLB_SIZE)
675 comb += wr_index.eq(hash_ea(m_in.addr))
676
677 with m.If(m_in.tlbie & m_in.doall):
678 # Clear all valid bits
679 for i in range(TLB_SIZE):
680 sync += itlb_valid_bits[i].eq(0)
681
682 with m.Elif(m_in.tlbie):
683 # Clear entry regardless of hit or miss
684 sync += itlb_valid_bits[wr_index].eq(0)
685
686 with m.Elif(m_in.tlbld):
687 sync += itlb_tags[wr_index].eq(
688 m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
689 )
690 sync += itlb_ptes[wr_index].eq(m_in.pte)
691 sync += itlb_valid_bits[wr_index].eq(1)
692
693 # Cache hit detection, output to fetch2 and other misc logic
694 def icache_comb(self, m, use_previous, r, req_index, req_row,
695 req_hit_way, req_tag, real_addr, req_laddr,
696 cache_valid_bits, cache_tags, access_ok,
697 req_is_hit, req_is_miss, replace_way,
698 plru_victim, cache_out_row):
699
700 comb = m.d.comb
701
702 i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
703 flush_in, stall_out = self.flush_in, self.stall_out
704
705 is_hit = Signal()
706 hit_way = Signal(NUM_WAYS)
707
708 # i_in.sequential means that i_in.nia this cycle is 4 more than
709 # last cycle. If we read more than 32 bits at a time, had a
710 # cache hit last cycle, and we don't want the first 32-bit chunk
711 # then we can keep the data we read last cycle and just use that.
712 with m.If(i_in.nia[2:INSN_BITS+2] != 0):
713 comb += use_previous.eq(i_in.sequential & r.hit_valid)
714
715 # Extract line, row and tag from request
716 comb += req_index.eq(get_index(i_in.nia))
717 comb += req_row.eq(get_row(i_in.nia))
718 comb += req_tag.eq(get_tag(real_addr))
719
720 # Calculate address of beginning of cache row, will be
721 # used for cache miss processing if needed
722 comb += req_laddr.eq(Cat(
723 Const(0, ROW_OFF_BITS),
724 real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
725 ))
726
727 # Test if pending request is a hit on any way
728 hitcond = Signal()
729 comb += hitcond.eq((r.state == State.WAIT_ACK)
730 & (req_index == r.store_index)
731 & r.rows_valid[req_row % ROW_PER_LINE])
732 with m.If(i_in.req):
733 cvb = Signal(NUM_WAYS)
734 ctag = Signal(TAG_RAM_WIDTH)
735 comb += ctag.eq(cache_tags[req_index])
736 comb += cvb.eq(cache_valid_bits[req_index])
737 for i in range(NUM_WAYS):
738 tagi = Signal(TAG_BITS, name="tag_i%d" % i)
739 comb += tagi.eq(read_tag(i, ctag))
740 hit_test = Signal(name="hit_test%d" % i)
741 comb += hit_test.eq(i == r.store_way)
742 with m.If((cvb[i] | (hitcond & hit_test))
743 & (tagi == req_tag)):
744 comb += hit_way.eq(i)
745 comb += is_hit.eq(1)
746
747 # Generate the "hit" and "miss" signals
748 # for the synchronous blocks
749 with m.If(i_in.req & access_ok & ~flush_in):
750 comb += req_is_hit.eq(is_hit)
751 comb += req_is_miss.eq(~is_hit)
752
753 with m.Else():
754 comb += req_is_hit.eq(0)
755 comb += req_is_miss.eq(0)
756
757 comb += req_hit_way.eq(hit_way)
758
759 # The way to replace on a miss
760 with m.If(r.state == State.CLR_TAG):
761 comb += replace_way.eq(plru_victim[r.store_index])
762 with m.Else():
763 comb += replace_way.eq(r.store_way)
764
765 # Output instruction from current cache row
766 #
767 # Note: This is a mild violation of our design principle of
768 # having pipeline stages output from a clean latch. In this
769 # case we output the result of a mux. The alternative would
770 # be output an entire row which I prefer not to do just yet
771 # as it would force fetch2 to know about some of the cache
772 # geometry information.
773 comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out_row))
774 comb += i_out.valid.eq(r.hit_valid)
775 comb += i_out.nia.eq(r.hit_nia)
776 comb += i_out.stop_mark.eq(r.hit_smark)
777 comb += i_out.fetch_failed.eq(r.fetch_failed)
778
779 # Stall fetch1 if we have a miss on cache or TLB
780 # or a protection fault
781 comb += stall_out.eq(~(is_hit & access_ok))
782
783 # Wishbone requests output (from the cache miss reload machine)
784 comb += wb_out.eq(r.wb)
785
786 # Cache hit synchronous machine
787 def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
788 req_index, req_tag, real_addr):
789 sync = m.d.sync
790
791 i_in, stall_in = self.i_in, self.stall_in
792 flush_in = self.flush_in
793
794 # keep outputs to fetch2 unchanged on a stall
795 # except that flush or reset sets valid to 0
796 # If use_previous, keep the same data as last
797 # cycle and use the second half
798 with m.If(stall_in | use_previous):
799 with m.If(flush_in):
800 sync += r.hit_valid.eq(0)
801 with m.Else():
802 # On a hit, latch the request for the next cycle,
803 # when the BRAM data will be available on the
804 # cache_out output of the corresponding way
805 sync += r.hit_valid.eq(req_is_hit)
806
807 with m.If(req_is_hit):
808 sync += r.hit_way.eq(req_hit_way)
809 sync += Display(
810 "cache hit nia:%x IR:%x SM:%x idx:%x tag:%x " \
811 "way:%x RA:%x", i_in.nia, i_in.virt_mode, \
812 i_in.stop_mark, req_index, req_tag, \
813 req_hit_way, real_addr
814 )
815
816
817
818 with m.If(~stall_in):
819 # Send stop marks and NIA down regardless of validity
820 sync += r.hit_smark.eq(i_in.stop_mark)
821 sync += r.hit_nia.eq(i_in.nia)
822
823 def icache_miss_idle(self, m, r, req_is_miss, req_laddr,
824 req_index, req_tag, replace_way, real_addr):
825 comb = m.d.comb
826 sync = m.d.sync
827
828 i_in = self.i_in
829
830 # Reset per-row valid flags, only used in WAIT_ACK
831 for i in range(ROW_PER_LINE):
832 sync += r.rows_valid[i].eq(0)
833
834 # We need to read a cache line
835 with m.If(req_is_miss):
836 sync += Display(
837 "cache miss nia:%x IR:%x SM:%x idx:%x "
838 " way:%x tag:%x RA:%x", i_in.nia,
839 i_in.virt_mode, i_in.stop_mark, req_index,
840 replace_way, req_tag, real_addr
841 )
842
843 # Keep track of our index and way for subsequent stores
844 st_row = Signal(BRAM_ROWS)
845 comb += st_row.eq(get_row(req_laddr))
846 sync += r.store_index.eq(req_index)
847 sync += r.store_row.eq(st_row)
848 sync += r.store_tag.eq(req_tag)
849 sync += r.store_valid.eq(1)
850 sync += r.end_row_ix.eq(get_row_of_line(st_row) - 1)
851
852 # Prep for first wishbone read. We calculate the address
853 # of the start of the cache line and start the WB cycle.
854 sync += r.req_adr.eq(req_laddr)
855 sync += r.wb.cyc.eq(1)
856 sync += r.wb.stb.eq(1)
857
858 # Track that we had one request sent
859 sync += r.state.eq(State.CLR_TAG)
860
861 def icache_miss_clr_tag(self, m, r, replace_way,
862 cache_valid_bits, req_index,
863 tagset, cache_tags):
864
865 comb = m.d.comb
866 sync = m.d.sync
867
868 # Get victim way from plru
869 sync += r.store_way.eq(replace_way)
870 # Force misses on that way while reloading that line
871 cv = Signal(INDEX_BITS)
872 comb += cv.eq(cache_valid_bits[req_index])
873 comb += cv.bit_select(replace_way, 1).eq(0)
874 sync += cache_valid_bits[req_index].eq(cv)
875
876 for i in range(NUM_WAYS):
877 with m.If(i == replace_way):
878 comb += tagset.eq(cache_tags[r.store_index])
879 comb += write_tag(i, tagset, r.store_tag)
880 sync += cache_tags[r.store_index].eq(tagset)
881
882 sync += r.state.eq(State.WAIT_ACK)
883
884 def icache_miss_wait_ack(self, m, r, replace_way, inval_in,
885 stbs_done, cache_valid_bits):
886 comb = m.d.comb
887 sync = m.d.sync
888
889 wb_in = self.wb_in
890
891 # Requests are all sent if stb is 0
892 stbs_zero = Signal()
893 comb += stbs_zero.eq(r.wb.stb == 0)
894 comb += stbs_done.eq(stbs_zero)
895
896 # If we are still sending requests, was one accepted?
897 with m.If(~wb_in.stall & ~stbs_zero):
898 # That was the last word ? # We are done sending.
899 # Clear stb and set stbs_done # so we can handle
900 # an eventual last ack on # the same cycle.
901 with m.If(is_last_row_addr(r.req_adr, r.end_row_ix)):
902 sync += Display(
903 "IS_LAST_ROW_ADDR r.wb.addr:%x " \
904 "r.end_row_ix:%x r.wb.stb:%x stbs_zero:%x " \
905 "stbs_done:%x", r.wb.adr, r.end_row_ix,
906 r.wb.stb, stbs_zero, stbs_done
907 )
908 sync += r.wb.stb.eq(0)
909 comb += stbs_done.eq(1)
910
911 # Calculate the next row address
912 rarange = Signal(LINE_OFF_BITS - ROW_OFF_BITS)
913 comb += rarange.eq(
914 r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
915 )
916 sync += r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS].eq(
917 rarange
918 )
919 sync += Display("RARANGE r.req_adr:%x rarange:%x "
920 "stbs_zero:%x stbs_done:%x",
921 r.req_adr, rarange, stbs_zero, stbs_done)
922
923 # Incoming acks processing
924 with m.If(wb_in.ack):
925 sync += Display("WB_IN_ACK data:%x stbs_zero:%x "
926 "stbs_done:%x",
927 wb_in.dat, stbs_zero, stbs_done)
928
929 sync += r.rows_valid[r.store_row % ROW_PER_LINE].eq(1)
930
931 # Check for completion
932 with m.If(stbs_done &
933 is_last_row(r.store_row, r.end_row_ix)):
934 # Complete wishbone cycle
935 sync += r.wb.cyc.eq(0)
936 sync += r.req_adr.eq(0) # be nice, clear addr
937
938 # Cache line is now valid
939 cv = Signal(INDEX_BITS)
940 comb += cv.eq(cache_valid_bits[r.store_index])
941 comb += cv.bit_select(replace_way, 1).eq(
942 r.store_valid & ~inval_in
943 )
944 sync += cache_valid_bits[r.store_index].eq(cv)
945
946 sync += r.state.eq(State.IDLE)
947
948 # not completed, move on to next request in row
949 with m.Else():
950 # Increment store row counter
951 sync += r.store_row.eq(next_row(r.store_row))
952
953
954 # Cache miss/reload synchronous machine
955 def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
956 req_index, req_laddr, req_tag, replace_way,
957 cache_tags, access_ok, real_addr):
958 comb = m.d.comb
959 sync = m.d.sync
960
961 i_in, wb_in, m_in = self.i_in, self.wb_in, self.m_in
962 stall_in, flush_in = self.stall_in, self.flush_in
963 inval_in = self.inval_in
964
965 # variable tagset : cache_tags_set_t;
966 # variable stbs_done : boolean;
967
968 tagset = Signal(TAG_RAM_WIDTH)
969 stbs_done = Signal()
970
971 comb += r.wb.sel.eq(-1)
972 comb += r.wb.adr.eq(r.req_adr[3:])
973
974 # Process cache invalidations
975 with m.If(inval_in):
976 for i in range(NUM_LINES):
977 sync += cache_valid_bits[i].eq(0)
978 sync += r.store_valid.eq(0)
979
980 # Main state machine
981 with m.Switch(r.state):
982
983 with m.Case(State.IDLE):
984 self.icache_miss_idle(
985 m, r, req_is_miss, req_laddr,
986 req_index, req_tag, replace_way,
987 real_addr
988 )
989
990 with m.Case(State.CLR_TAG, State.WAIT_ACK):
991 with m.If(r.state == State.CLR_TAG):
992 self.icache_miss_clr_tag(
993 m, r, replace_way,
994 cache_valid_bits, req_index,
995 tagset, cache_tags
996 )
997
998 self.icache_miss_wait_ack(
999 m, r, replace_way, inval_in,
1000 stbs_done, cache_valid_bits
1001 )
1002
1003 # TLB miss and protection fault processing
1004 with m.If(flush_in | m_in.tlbld):
1005 sync += r.fetch_failed.eq(0)
1006 with m.Elif(i_in.req & ~access_ok & ~stall_in):
1007 sync += r.fetch_failed.eq(1)
1008
1009 # icache_log: if LOG_LENGTH > 0 generate
1010 def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1011 req_is_miss, req_is_hit, lway, wstate, r):
1012 comb = m.d.comb
1013 sync = m.d.sync
1014
1015 wb_in, i_out = self.wb_in, self.i_out
1016 log_out, stall_out = self.log_out, self.stall_out
1017
1018 # -- Output data to logger
1019 # signal log_data : std_ulogic_vector(53 downto 0);
1020 # begin
1021 # data_log: process(clk)
1022 # variable lway: way_t;
1023 # variable wstate: std_ulogic;
1024 # Output data to logger
1025 for i in range(LOG_LENGTH):
1026 # Output data to logger
1027 log_data = Signal(54)
1028 lway = Signal(NUM_WAYS)
1029 wstate = Signal()
1030
1031 # begin
1032 # if rising_edge(clk) then
1033 # lway := req_hit_way;
1034 # wstate := '0';
1035 sync += lway.eq(req_hit_way)
1036 sync += wstate.eq(0)
1037
1038 # if r.state /= IDLE then
1039 # wstate := '1';
1040 # end if;
1041 with m.If(r.state != State.IDLE):
1042 sync += wstate.eq(1)
1043
1044 # log_data <= i_out.valid &
1045 # i_out.insn &
1046 # wishbone_in.ack &
1047 # r.wb.adr(5 downto 3) &
1048 # r.wb.stb & r.wb.cyc &
1049 # wishbone_in.stall &
1050 # stall_out &
1051 # r.fetch_failed &
1052 # r.hit_nia(5 downto 2) &
1053 # wstate &
1054 # std_ulogic_vector(to_unsigned(lway, 3)) &
1055 # req_is_hit & req_is_miss &
1056 # access_ok &
1057 # ra_valid;
1058 sync += log_data.eq(Cat(
1059 ra_valid, access_ok, req_is_miss, req_is_hit,
1060 lway, wstate, r.hit_nia[2:6], r.fetch_failed,
1061 stall_out, wb_in.stall, r.wb.cyc, r.wb.stb,
1062 r.wb.adr[3:6], wb_in.ack, i_out.insn, i_out.valid
1063 ))
1064 # end if;
1065 # end process;
1066 # log_out <= log_data;
1067 comb += log_out.eq(log_data)
1068 # end generate;
1069 # end;
1070
1071 def elaborate(self, platform):
1072
1073 m = Module()
1074 comb = m.d.comb
1075
1076 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1077 cache_tags = CacheTagArray()
1078 cache_valid_bits = CacheValidBitsArray()
1079
1080 itlb_valid_bits = TLBValidBitsArray()
1081 itlb_tags = TLBTagArray()
1082 itlb_ptes = TLBPtesArray()
1083 # TODO to be passed to nmigen as ram attributes
1084 # attribute ram_style of itlb_tags : signal is "distributed";
1085 # attribute ram_style of itlb_ptes : signal is "distributed";
1086
1087 # Privilege bit from PTE EAA field
1088 eaa_priv = Signal()
1089
1090 r = RegInternal()
1091
1092 # Async signal on incoming request
1093 req_index = Signal(NUM_LINES)
1094 req_row = Signal(BRAM_ROWS)
1095 req_hit_way = Signal(NUM_WAYS)
1096 req_tag = Signal(TAG_BITS)
1097 req_is_hit = Signal()
1098 req_is_miss = Signal()
1099 req_laddr = Signal(64)
1100
1101 tlb_req_index = Signal(TLB_SIZE)
1102 real_addr = Signal(REAL_ADDR_BITS)
1103 ra_valid = Signal()
1104 priv_fault = Signal()
1105 access_ok = Signal()
1106 use_previous = Signal()
1107
1108 cache_out_row = Signal(ROW_SIZE_BITS)
1109
1110 plru_victim = PLRUOut()
1111 replace_way = Signal(NUM_WAYS)
1112
1113 # call sub-functions putting everything together,
1114 # using shared signals established above
1115 self.rams(m, r, cache_out_row, use_previous, replace_way, req_row)
1116 self.maybe_plrus(m, r, plru_victim)
1117 self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags, real_addr,
1118 itlb_valid_bits, ra_valid, eaa_priv, priv_fault,
1119 access_ok)
1120 self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1121 self.icache_comb(m, use_previous, r, req_index, req_row, req_hit_way,
1122 req_tag, real_addr, req_laddr, cache_valid_bits,
1123 cache_tags, access_ok, req_is_hit, req_is_miss,
1124 replace_way, plru_victim, cache_out_row)
1125 self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1126 req_index, req_tag, real_addr)
1127 self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1128 req_laddr, req_tag, replace_way, cache_tags,
1129 access_ok, real_addr)
1130 #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1131 # req_is_miss, req_is_hit, lway, wstate, r)
1132
1133 return m
1134
1135
1136 def icache_sim(dut):
1137 i_out = dut.i_in
1138 i_in = dut.i_out
1139 m_out = dut.m_in
1140
1141 yield i_in.valid.eq(0)
1142 yield i_out.priv_mode.eq(1)
1143 yield i_out.req.eq(0)
1144 yield i_out.nia.eq(0)
1145 yield i_out.stop_mark.eq(0)
1146 yield m_out.tlbld.eq(0)
1147 yield m_out.tlbie.eq(0)
1148 yield m_out.addr.eq(0)
1149 yield m_out.pte.eq(0)
1150 yield
1151 yield
1152 yield
1153 yield
1154 yield i_out.req.eq(1)
1155 yield i_out.nia.eq(Const(0x0000000000000004, 64))
1156 for i in range(30):
1157 yield
1158 yield
1159 valid = yield i_in.valid
1160 nia = yield i_out.nia
1161 insn = yield i_in.insn
1162 print(f"valid? {valid}")
1163 assert valid
1164 assert insn == 0x00000001, \
1165 "insn @%x=%x expected 00000001" % (nia, insn)
1166 yield i_out.req.eq(0)
1167 yield
1168
1169 # hit
1170 yield
1171 yield
1172 yield i_out.req.eq(1)
1173 yield i_out.nia.eq(Const(0x0000000000000008, 64))
1174 yield
1175 yield
1176 valid = yield i_in.valid
1177 nia = yield i_in.nia
1178 insn = yield i_in.insn
1179 assert valid
1180 assert insn == 0x00000002, \
1181 "insn @%x=%x expected 00000002" % (nia, insn)
1182 yield
1183
1184 # another miss
1185 yield i_out.req.eq(1)
1186 yield i_out.nia.eq(Const(0x0000000000000040, 64))
1187 for i in range(30):
1188 yield
1189 yield
1190 valid = yield i_in.valid
1191 nia = yield i_out.nia
1192 insn = yield i_in.insn
1193 assert valid
1194 assert insn == 0x00000010, \
1195 "insn @%x=%x expected 00000010" % (nia, insn)
1196
1197 # test something that aliases
1198 yield i_out.req.eq(1)
1199 yield i_out.nia.eq(Const(0x0000000000000100, 64))
1200 yield
1201 yield
1202 valid = yield i_in.valid
1203 assert ~valid
1204 for i in range(30):
1205 yield
1206 yield
1207 insn = yield i_in.insn
1208 valid = yield i_in.valid
1209 insn = yield i_in.insn
1210 assert valid
1211 assert insn == 0x00000040, \
1212 "insn @%x=%x expected 00000040" % (nia, insn)
1213 yield i_out.req.eq(0)
1214
1215
1216
1217 def test_icache(mem):
1218 dut = ICache()
1219
1220 memory = Memory(width=64, depth=512, init=mem)
1221 sram = SRAM(memory=memory, granularity=8)
1222
1223 m = Module()
1224
1225 m.submodules.icache = dut
1226 m.submodules.sram = sram
1227
1228 m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1229 m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1230 m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1231 m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1232 m.d.comb += sram.bus.adr.eq(dut.wb_out.adr)
1233 m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1234
1235 m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1236 m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1237
1238 # nmigen Simulation
1239 sim = Simulator(m)
1240 sim.add_clock(1e-6)
1241
1242 sim.add_sync_process(wrap(icache_sim(dut)))
1243 with sim.write_vcd('test_icache.vcd'):
1244 sim.run()
1245
1246 if __name__ == '__main__':
1247 dut = ICache()
1248 vl = rtlil.convert(dut, ports=[])
1249 with open("test_icache.il", "w") as f:
1250 f.write(vl)
1251
1252 mem = []
1253 for i in range(512):
1254 mem.append((i*2)| ((i*2+1)<<32))
1255
1256 test_icache(mem)
1257