move loop invariant test out of loop
[soc.git] / src / soc / experiment / icache.py
1 """ICache
2
3 based on Anton Blanchard microwatt icache.vhdl
4
5 Set associative icache
6
7 TODO (in no specific order):
8 * Add debug interface to inspect cache content
9 * Add snoop/invalidate path
10 * Add multi-hit error detection
11 * Pipelined bus interface (wb or axi)
12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
13 * Add optimization: service hits on partially loaded lines
14 * Add optimization: (maybe) interrupt reload on fluch/redirect
15 * Check if playing with the geometry of the cache tags allow for more
16 efficient use of distributed RAM and less logic/muxes. Currently we
17 write TAG_BITS width which may not match full ram blocks and might
18 cause muxes to be inferred for "partial writes".
19 * Check if making the read size of PLRU a ROM helps utilization
20
21 """
22 from enum import Enum, unique
23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const)
24 from nmigen.cli import main, rtlil
25 from nmutil.iocontrol import RecordObject
26 from nmigen.utils import log2_int
27 from nmutil.util import Display
28
29 #from nmutil.plru import PLRU
30 from soc.experiment.cache_ram import CacheRam
31 from soc.experiment.plru import PLRU
32
33 from soc.experiment.mem_types import (Fetch1ToICacheType,
34 ICacheToDecode1Type,
35 MMUToICacheType)
36
37 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
38 WB_SEL_BITS, WBAddrType, WBDataType,
39 WBSelType, WBMasterOut, WBSlaveOut,
40 WBMasterOutVector, WBSlaveOutVector,
41 WBIOMasterOut, WBIOSlaveOut)
42
43 # for test
44 from nmigen_soc.wishbone.sram import SRAM
45 from nmigen import Memory
46 from nmutil.util import wrap
47 from nmigen.cli import main, rtlil
48 if True:
49 from nmigen.back.pysim import Simulator, Delay, Settle
50 else:
51 from nmigen.sim.cxxsim import Simulator, Delay, Settle
52
53
54 SIM = 0
55 LINE_SIZE = 64
56 # BRAM organisation: We never access more than wishbone_data_bits
57 # at a time so to save resources we make the array only that wide,
58 # and use consecutive indices for to make a cache "line"
59 #
60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
61 ROW_SIZE = WB_DATA_BITS // 8
62 # Number of lines in a set
63 NUM_LINES = 32
64 # Number of ways
65 NUM_WAYS = 4
66 # L1 ITLB number of entries (direct mapped)
67 TLB_SIZE = 64
68 # L1 ITLB log_2(page_size)
69 TLB_LG_PGSZ = 12
70 # Number of real address bits that we store
71 REAL_ADDR_BITS = 56
72 # Non-zero to enable log data collection
73 LOG_LENGTH = 0
74
75 ROW_SIZE_BITS = ROW_SIZE * 8
76 # ROW_PER_LINE is the number of row
77 # (wishbone) transactions in a line
78 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
79 # BRAM_ROWS is the number of rows in
80 # BRAM needed to represent the full icache
81 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
82 # INSN_PER_ROW is the number of 32bit
83 # instructions per BRAM row
84 INSN_PER_ROW = ROW_SIZE_BITS // 32
85
86 print("ROW_SIZE", ROW_SIZE)
87 print("ROW_SIZE_BITS", ROW_SIZE_BITS)
88 print("ROW_PER_LINE", ROW_PER_LINE)
89 print("BRAM_ROWS", BRAM_ROWS)
90 print("INSN_PER_ROW", INSN_PER_ROW)
91
92 # Bit fields counts in the address
93 #
94 # INSN_BITS is the number of bits to
95 # select an instruction in a row
96 INSN_BITS = log2_int(INSN_PER_ROW)
97 # ROW_BITS is the number of bits to
98 # select a row
99 ROW_BITS = log2_int(BRAM_ROWS)
100 # ROW_LINEBITS is the number of bits to
101 # select a row within a line
102 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
103 # LINE_OFF_BITS is the number of bits for
104 # the offset in a cache line
105 LINE_OFF_BITS = log2_int(LINE_SIZE)
106 # ROW_OFF_BITS is the number of bits for
107 # the offset in a row
108 ROW_OFF_BITS = log2_int(ROW_SIZE)
109 # INDEX_BITS is the number of bits to
110 # select a cache line
111 INDEX_BITS = log2_int(NUM_LINES)
112 # SET_SIZE_BITS is the log base 2 of
113 # the set size
114 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
115 # TAG_BITS is the number of bits of
116 # the tag part of the address
117 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
118 # TAG_WIDTH is the width in bits of each way of the tag RAM
119 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
120
121 # WAY_BITS is the number of bits to
122 # select a way
123 WAY_BITS = log2_int(NUM_WAYS)
124 TAG_RAM_WIDTH = TAG_BITS * NUM_WAYS
125
126 # -- L1 ITLB.
127 # constant TLB_BITS : natural := log2(TLB_SIZE);
128 # constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
129 # constant TLB_PTE_BITS : natural := 64;
130 TLB_BITS = log2_int(TLB_SIZE)
131 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
132 TLB_PTE_BITS = 64
133
134
135 print("INSN_BITS", INSN_BITS)
136 print("ROW_BITS", ROW_BITS)
137 print("ROW_LINE_BITS", ROW_LINE_BITS)
138 print("LINE_OFF_BITS", LINE_OFF_BITS)
139 print("ROW_OFF_BITS", ROW_OFF_BITS)
140 print("INDEX_BITS", INDEX_BITS)
141 print("SET_SIZE_BITS", SET_SIZE_BITS)
142 print("TAG_BITS", TAG_BITS)
143 print("WAY_BITS", WAY_BITS)
144 print("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
145 print("TLB_BITS", TLB_BITS)
146 print("TLB_EA_TAG_BITS", TLB_EA_TAG_BITS)
147 print("TLB_PTE_BITS", TLB_PTE_BITS)
148
149
150
151
152 # architecture rtl of icache is
153 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
154 #-- ROW_PER_LINE is the number of row (wishbone
155 #-- transactions) in a line
156 #constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
157 #-- BRAM_ROWS is the number of rows in BRAM
158 #-- needed to represent the full
159 #-- icache
160 #constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
161 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
162 #constant INSN_PER_ROW : natural := ROW_SIZE_BITS / 32;
163 #-- Bit fields counts in the address
164 #
165 #-- INSN_BITS is the number of bits to select
166 #-- an instruction in a row
167 #constant INSN_BITS : natural := log2(INSN_PER_ROW);
168 #-- ROW_BITS is the number of bits to select a row
169 #constant ROW_BITS : natural := log2(BRAM_ROWS);
170 #-- ROW_LINEBITS is the number of bits to
171 #-- select a row within a line
172 #constant ROW_LINEBITS : natural := log2(ROW_PER_LINE);
173 #-- LINE_OFF_BITS is the number of bits for the offset
174 #-- in a cache line
175 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
176 #-- ROW_OFF_BITS is the number of bits for the offset in a row
177 #constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
178 #-- INDEX_BITS is the number of bits to select a cache line
179 #constant INDEX_BITS : natural := log2(NUM_LINES);
180 #-- SET_SIZE_BITS is the log base 2 of the set size
181 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
182 #-- TAG_BITS is the number of bits of the tag part of the address
183 #constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
184 #-- WAY_BITS is the number of bits to select a way
185 #constant WAY_BITS : natural := log2(NUM_WAYS);
186
187 #-- Example of layout for 32 lines of 64 bytes:
188 #--
189 #-- .. tag |index| line |
190 #-- .. | row | |
191 #-- .. | | | |00| zero (2)
192 #-- .. | | |-| | INSN_BITS (1)
193 #-- .. | |---| | ROW_LINEBITS (3)
194 #-- .. | |--- - --| LINE_OFF_BITS (6)
195 #-- .. | |- --| ROW_OFF_BITS (3)
196 #-- .. |----- ---| | ROW_BITS (8)
197 #-- .. |-----| | INDEX_BITS (5)
198 #-- .. --------| | TAG_BITS (53)
199 # Example of layout for 32 lines of 64 bytes:
200 #
201 # .. tag |index| line |
202 # .. | row | |
203 # .. | | | |00| zero (2)
204 # .. | | |-| | INSN_BITS (1)
205 # .. | |---| | ROW_LINEBITS (3)
206 # .. | |--- - --| LINE_OFF_BITS (6)
207 # .. | |- --| ROW_OFF_BITS (3)
208 # .. |----- ---| | ROW_BITS (8)
209 # .. |-----| | INDEX_BITS (5)
210 # .. --------| | TAG_BITS (53)
211
212 #subtype row_t is integer range 0 to BRAM_ROWS-1;
213 #subtype index_t is integer range 0 to NUM_LINES-1;
214 #subtype way_t is integer range 0 to NUM_WAYS-1;
215 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
216 #
217 #-- The cache data BRAM organized as described above for each way
218 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
219 #
220 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
221 #-- not handle a clean (commented) definition of the cache tags as a 3d
222 #-- memory. For now, work around it by putting all the tags
223 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
224 # type cache_tags_set_t is array(way_t) of cache_tag_t;
225 # type cache_tags_array_t is array(index_t) of cache_tags_set_t;
226 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
227 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
228 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
229 def CacheTagArray():
230 return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
231 for x in range(NUM_LINES))
232
233 #-- The cache valid bits
234 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
235 #type cache_valids_t is array(index_t) of cache_way_valids_t;
236 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
237 def CacheValidBitsArray():
238 return Array(Signal(NUM_WAYS, name="cachevalid_%d" %x) \
239 for x in range(NUM_LINES))
240
241 def RowPerLineValidArray():
242 return Array(Signal(name="rows_valid_%d" %x) \
243 for x in range(ROW_PER_LINE))
244
245
246 #attribute ram_style : string;
247 #attribute ram_style of cache_tags : signal is "distributed";
248 # TODO to be passed to nigmen as ram attributes
249 # attribute ram_style : string;
250 # attribute ram_style of cache_tags : signal is "distributed";
251
252
253 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
254 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
255 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
256 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
257 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
258 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
259 def TLBValidBitsArray():
260 return Array(Signal(name="tlbvalid_%d" %x) \
261 for x in range(TLB_SIZE))
262
263 def TLBTagArray():
264 return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
265 for x in range(TLB_SIZE))
266
267 def TLBPtesArray():
268 return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
269 for x in range(TLB_SIZE))
270
271
272 #-- Cache RAM interface
273 #type cache_ram_out_t is array(way_t) of cache_row_t;
274 # Cache RAM interface
275 def CacheRamOut():
276 return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
277 for x in range(NUM_WAYS))
278
279 #-- PLRU output interface
280 #type plru_out_t is array(index_t) of
281 # std_ulogic_vector(WAY_BITS-1 downto 0);
282 # PLRU output interface
283 def PLRUOut():
284 return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
285 for x in range(NUM_LINES))
286
287 # -- Return the cache line index (tag index) for an address
288 # function get_index(addr: std_ulogic_vector(63 downto 0))
289 # return index_t is
290 # begin
291 # return to_integer(unsigned(
292 # addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
293 # ));
294 # end;
295 # Return the cache line index (tag index) for an address
296 def get_index(addr):
297 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
298
299 # -- Return the cache row index (data memory) for an address
300 # function get_row(addr: std_ulogic_vector(63 downto 0))
301 # return row_t is
302 # begin
303 # return to_integer(unsigned(
304 # addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
305 # ));
306 # end;
307 # Return the cache row index (data memory) for an address
308 def get_row(addr):
309 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
310
311 # -- Return the index of a row within a line
312 # function get_row_of_line(row: row_t) return row_in_line_t is
313 # variable row_v : unsigned(ROW_BITS-1 downto 0);
314 # begin
315 # row_v := to_unsigned(row, ROW_BITS);
316 # return row_v(ROW_LINEBITS-1 downto 0);
317 # end;
318 # Return the index of a row within a line
319 def get_row_of_line(row):
320 return row[:ROW_LINE_BITS]
321
322 # -- Returns whether this is the last row of a line
323 # function is_last_row_addr(addr: wishbone_addr_type;
324 # last: row_in_line_t
325 # )
326 # return boolean is
327 # begin
328 # return unsigned(
329 # addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
330 # ) = last;
331 # end;
332 # Returns whether this is the last row of a line
333 def is_last_row_addr(addr, last):
334 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
335
336 # -- Returns whether this is the last row of a line
337 # function is_last_row(row: row_t;
338 # last: row_in_line_t) return boolean is
339 # begin
340 # return get_row_of_line(row) = last;
341 # end;
342 # Returns whether this is the last row of a line
343 def is_last_row(row, last):
344 return get_row_of_line(row) == last
345
346 # -- Return the next row in the current cache line. We use a dedicated
347 # -- function in order to limit the size of the generated adder to be
348 # -- only the bits within a cache line (3 bits with default settings)
349 # function next_row(row: row_t) return row_t is
350 # variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
351 # variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
352 # variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
353 # begin
354 # row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
355 # row_idx := row_v(ROW_LINEBITS-1 downto 0);
356 # row_v(ROW_LINEBITS-1 downto 0) :=
357 # std_ulogic_vector(unsigned(row_idx) + 1);
358 # return to_integer(unsigned(row_v));
359 # end;
360 # Return the next row in the current cache line. We use a dedicated
361 # function in order to limit the size of the generated adder to be
362 # only the bits within a cache line (3 bits with default settings)
363 def next_row(row):
364 row_v = row[0:ROW_LINE_BITS] + 1
365 return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
366 # -- Read the instruction word for the given address in the
367 # -- current cache row
368 # function read_insn_word(addr: std_ulogic_vector(63 downto 0);
369 # data: cache_row_t) return std_ulogic_vector is
370 # variable word: integer range 0 to INSN_PER_ROW-1;
371 # begin
372 # word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
373 # return data(31+word*32 downto word*32);
374 # end;
375 # Read the instruction word for the given address
376 # in the current cache row
377 def read_insn_word(addr, data):
378 word = addr[2:INSN_BITS+2]
379 return data.word_select(word, 32)
380
381 # -- Get the tag value from the address
382 # function get_tag(
383 # addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
384 # )
385 # return cache_tag_t is
386 # begin
387 # return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
388 # end;
389 # Get the tag value from the address
390 def get_tag(addr):
391 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
392
393 # -- Read a tag from a tag memory row
394 # function read_tag(way: way_t; tagset: cache_tags_set_t)
395 # return cache_tag_t is
396 # begin
397 # return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
398 # end;
399 # Read a tag from a tag memory row
400 def read_tag(way, tagset):
401 return tagset.word_select(way, TAG_BITS)
402
403 # -- Write a tag to tag memory row
404 # procedure write_tag(way: in way_t;
405 # tagset: inout cache_tags_set_t; tag: cache_tag_t) is
406 # begin
407 # tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
408 # end;
409 # Write a tag to tag memory row
410 def write_tag(way, tagset, tag):
411 return read_tag(way, tagset).eq(tag)
412
413 # -- Simple hash for direct-mapped TLB index
414 # function hash_ea(addr: std_ulogic_vector(63 downto 0))
415 # return tlb_index_t is
416 # variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
417 # begin
418 # hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
419 # xor addr(
420 # TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
421 # TLB_LG_PGSZ + TLB_BITS
422 # )
423 # xor addr(
424 # TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
425 # TLB_LG_PGSZ + 2 * TLB_BITS
426 # );
427 # return to_integer(unsigned(hash));
428 # end;
429 # Simple hash for direct-mapped TLB index
430 def hash_ea(addr):
431 hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
432 TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
433 ] ^ addr[
434 TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
435 ]
436 return hsh
437
438 # begin
439 #
440 # assert LINE_SIZE mod ROW_SIZE = 0;
441 # assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
442 # severity FAILURE;
443 # assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
444 # severity FAILURE;
445 # assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
446 # severity FAILURE;
447 # assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
448 # severity FAILURE;
449 # assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
450 # report "geometry bits don't add up" severity FAILURE;
451 # assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
452 # report "geometry bits don't add up" severity FAILURE;
453 # assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
454 # report "geometry bits don't add up" severity FAILURE;
455 # assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
456 # report "geometry bits don't add up" severity FAILURE;
457 #
458 # sim_debug: if SIM generate
459 # debug: process
460 # begin
461 # report "ROW_SIZE = " & natural'image(ROW_SIZE);
462 # report "ROW_PER_LINE = " & natural'image(ROW_PER_LINE);
463 # report "BRAM_ROWS = " & natural'image(BRAM_ROWS);
464 # report "INSN_PER_ROW = " & natural'image(INSN_PER_ROW);
465 # report "INSN_BITS = " & natural'image(INSN_BITS);
466 # report "ROW_BITS = " & natural'image(ROW_BITS);
467 # report "ROW_LINEBITS = " & natural'image(ROW_LINEBITS);
468 # report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
469 # report "ROW_OFF_BITS = " & natural'image(ROW_OFF_BITS);
470 # report "INDEX_BITS = " & natural'image(INDEX_BITS);
471 # report "TAG_BITS = " & natural'image(TAG_BITS);
472 # report "WAY_BITS = " & natural'image(WAY_BITS);
473 # wait;
474 # end process;
475 # end generate;
476
477 # Cache reload state machine
478 @unique
479 class State(Enum):
480 IDLE = 0
481 CLR_TAG = 1
482 WAIT_ACK = 2
483
484 # type reg_internal_t is record
485 # -- Cache hit state (Latches for 1 cycle BRAM access)
486 # hit_way : way_t;
487 # hit_nia : std_ulogic_vector(63 downto 0);
488 # hit_smark : std_ulogic;
489 # hit_valid : std_ulogic;
490 #
491 # -- Cache miss state (reload state machine)
492 # state : state_t;
493 # wb : wishbone_master_out;
494 # store_way : way_t;
495 # store_index : index_t;
496 # store_row : row_t;
497 # store_tag : cache_tag_t;
498 # store_valid : std_ulogic;
499 # end_row_ix : row_in_line_t;
500 # rows_valid : row_per_line_valid_t;
501 #
502 # -- TLB miss state
503 # fetch_failed : std_ulogic;
504 # end record;
505 class RegInternal(RecordObject):
506 def __init__(self):
507 super().__init__()
508 # Cache hit state (Latches for 1 cycle BRAM access)
509 self.hit_way = Signal(NUM_WAYS)
510 self.hit_nia = Signal(64)
511 self.hit_smark = Signal()
512 self.hit_valid = Signal()
513
514 # Cache miss state (reload state machine)
515 self.state = Signal(State, reset=State.IDLE)
516 self.wb = WBMasterOut("wb")
517 self.req_adr = Signal(64)
518 self.store_way = Signal(NUM_WAYS)
519 self.store_index = Signal(NUM_LINES)
520 self.store_row = Signal(BRAM_ROWS)
521 self.store_tag = Signal(TAG_BITS)
522 self.store_valid = Signal()
523 self.end_row_ix = Signal(ROW_LINE_BITS)
524 self.rows_valid = RowPerLineValidArray()
525
526 # TLB miss state
527 self.fetch_failed = Signal()
528
529 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
530 #
531 # entity icache is
532 # generic (
533 # SIM : boolean := false;
534 # -- Line size in bytes
535 # LINE_SIZE : positive := 64;
536 # -- BRAM organisation: We never access more
537 # -- than wishbone_data_bits
538 # -- at a time so to save resources we make the
539 # -- array only that wide,
540 # -- and use consecutive indices for to make a cache "line"
541 # --
542 # -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
543 # -- so 64-bits)
544 # ROW_SIZE : positive := wishbone_data_bits / 8;
545 # -- Number of lines in a set
546 # NUM_LINES : positive := 32;
547 # -- Number of ways
548 # NUM_WAYS : positive := 4;
549 # -- L1 ITLB number of entries (direct mapped)
550 # TLB_SIZE : positive := 64;
551 # -- L1 ITLB log_2(page_size)
552 # TLB_LG_PGSZ : positive := 12;
553 # -- Number of real address bits that we store
554 # REAL_ADDR_BITS : positive := 56;
555 # -- Non-zero to enable log data collection
556 # LOG_LENGTH : natural := 0
557 # );
558 # port (
559 # clk : in std_ulogic;
560 # rst : in std_ulogic;
561 #
562 # i_in : in Fetch1ToIcacheType;
563 # i_out : out IcacheToDecode1Type;
564 #
565 # m_in : in MmuToIcacheType;
566 #
567 # stall_in : in std_ulogic;
568 # stall_out : out std_ulogic;
569 # flush_in : in std_ulogic;
570 # inval_in : in std_ulogic;
571 #
572 # wishbone_out : out wishbone_master_out;
573 # wishbone_in : in wishbone_slave_out;
574 #
575 # log_out : out std_ulogic_vector(53 downto 0)
576 # );
577 # end entity icache;
578 # 64 bit direct mapped icache. All instructions are 4B aligned.
579 class ICache(Elaboratable):
580 """64 bit direct mapped icache. All instructions are 4B aligned."""
581 def __init__(self):
582 self.i_in = Fetch1ToICacheType(name="i_in")
583 self.i_out = ICacheToDecode1Type(name="i_out")
584
585 self.m_in = MMUToICacheType(name="m_in")
586
587 self.stall_in = Signal()
588 self.stall_out = Signal()
589 self.flush_in = Signal()
590 self.inval_in = Signal()
591
592 self.wb_out = WBMasterOut(name="wb_out")
593 self.wb_in = WBSlaveOut(name="wb_in")
594
595 self.log_out = Signal(54)
596
597
598 # -- Generate a cache RAM for each way
599 # rams: for i in 0 to NUM_WAYS-1 generate
600 # signal do_read : std_ulogic;
601 # signal do_write : std_ulogic;
602 # signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
603 # signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
604 # signal dout : cache_row_t;
605 # signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);
606 # begin
607 # way: entity work.cache_ram
608 # generic map (
609 # ROW_BITS => ROW_BITS,
610 # WIDTH => ROW_SIZE_BITS
611 # )
612 # port map (
613 # clk => clk,
614 # rd_en => do_read,
615 # rd_addr => rd_addr,
616 # rd_data => dout,
617 # wr_sel => wr_sel,
618 # wr_addr => wr_addr,
619 # wr_data => wishbone_in.dat
620 # );
621 # process(all)
622 # begin
623 # do_read <= not (stall_in or use_previous);
624 # do_write <= '0';
625 # if wishbone_in.ack = '1' and replace_way = i then
626 # do_write <= '1';
627 # end if;
628 # cache_out(i) <= dout;
629 # rd_addr <=
630 # std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
631 # wr_addr <=
632 # std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
633 # for i in 0 to ROW_SIZE-1 loop
634 # wr_sel(i) <= do_write;
635 # end loop;
636 # end process;
637 # end generate;
638 def rams(self, m, r, cache_out_row, use_previous, replace_way, req_row):
639 comb = m.d.comb
640
641 wb_in, stall_in = self.wb_in, self.stall_in
642
643
644 for i in range(NUM_WAYS):
645 do_read = Signal(name="do_rd_%d" % i)
646 do_write = Signal(name="do_wr_%d" % i)
647 rd_addr = Signal(ROW_BITS)
648 wr_addr = Signal(ROW_BITS)
649 d_out = Signal(ROW_SIZE_BITS, name="d_out_%d" % i)
650 wr_sel = Signal(ROW_SIZE)
651
652 way = CacheRam(ROW_BITS, ROW_SIZE_BITS, True)
653 setattr(m.submodules, "cacheram_%d" % i, way)
654
655 comb += way.rd_en.eq(do_read)
656 comb += way.rd_addr.eq(rd_addr)
657 comb += d_out.eq(way.rd_data_o)
658 comb += way.wr_sel.eq(wr_sel)
659 comb += way.wr_addr.eq(wr_addr)
660 comb += way.wr_data.eq(wb_in.dat)
661
662 comb += do_read.eq(~(stall_in | use_previous))
663
664 with m.If(wb_in.ack & (replace_way == i)):
665 comb += do_write.eq(1)
666
667 with m.If(r.hit_way == i):
668 comb += cache_out_row.eq(d_out)
669 comb += rd_addr.eq(req_row)
670 comb += wr_addr.eq(r.store_row)
671 for j in range(ROW_SIZE):
672 comb += wr_sel[j].eq(do_write)
673
674 # -- Generate PLRUs
675 # maybe_plrus: if NUM_WAYS > 1 generate
676 # begin
677 # plrus: for i in 0 to NUM_LINES-1 generate
678 # -- PLRU interface
679 # signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
680 # signal plru_acc_en : std_ulogic;
681 # signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
682 #
683 # begin
684 # plru : entity work.plru
685 # generic map (
686 # BITS => WAY_BITS
687 # )
688 # port map (
689 # clk => clk,
690 # rst => rst,
691 # acc => plru_acc,
692 # acc_en => plru_acc_en,
693 # lru => plru_out
694 # );
695 #
696 # process(all)
697 # begin
698 # -- PLRU interface
699 # if get_index(r.hit_nia) = i then
700 # plru_acc_en <= r.hit_valid;
701 # else
702 # plru_acc_en <= '0';
703 # end if;
704 # plru_acc <=
705 # std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
706 # plru_victim(i) <= plru_out;
707 # end process;
708 # end generate;
709 # end generate;
710 def maybe_plrus(self, m, r, plru_victim):
711 comb = m.d.comb
712
713 with m.If(NUM_WAYS > 1):
714 for i in range(NUM_LINES):
715 plru_acc_i = Signal(WAY_BITS)
716 plru_acc_en = Signal()
717 plru = PLRU(WAY_BITS)
718 setattr(m.submodules, "plru_%d" % i, plru)
719
720 comb += plru.acc_i.eq(plru_acc_i)
721 comb += plru.acc_en.eq(plru_acc_en)
722
723 # PLRU interface
724 with m.If(get_index(r.hit_nia) == i):
725 comb += plru.acc_en.eq(r.hit_valid)
726
727 comb += plru.acc_i.eq(r.hit_way)
728 comb += plru_victim[i].eq(plru.lru_o)
729
730 # -- TLB hit detection and real address generation
731 # itlb_lookup : process(all)
732 # variable pte : tlb_pte_t;
733 # variable ttag : tlb_tag_t;
734 # begin
735 # tlb_req_index <= hash_ea(i_in.nia);
736 # pte := itlb_ptes(tlb_req_index);
737 # ttag := itlb_tags(tlb_req_index);
738 # if i_in.virt_mode = '1' then
739 # real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
740 # i_in.nia(TLB_LG_PGSZ - 1 downto 0);
741 # if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
742 # ra_valid <= itlb_valids(tlb_req_index);
743 # else
744 # ra_valid <= '0';
745 # end if;
746 # eaa_priv <= pte(3);
747 # else
748 # real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
749 # ra_valid <= '1';
750 # eaa_priv <= '1';
751 # end if;
752 #
753 # -- no IAMR, so no KUEP support for now
754 # priv_fault <= eaa_priv and not i_in.priv_mode;
755 # access_ok <= ra_valid and not priv_fault;
756 # end process;
757 # TLB hit detection and real address generation
758 def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
759 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
760 priv_fault, access_ok):
761 comb = m.d.comb
762
763 i_in = self.i_in
764
765 pte = Signal(TLB_PTE_BITS)
766 ttag = Signal(TLB_EA_TAG_BITS)
767
768 comb += tlb_req_index.eq(hash_ea(i_in.nia))
769 comb += pte.eq(itlb_ptes[tlb_req_index])
770 comb += ttag.eq(itlb_tags[tlb_req_index])
771
772 with m.If(i_in.virt_mode):
773 comb += real_addr.eq(Cat(
774 i_in.nia[:TLB_LG_PGSZ],
775 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
776 ))
777
778 with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
779 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
780
781 comb += eaa_priv.eq(pte[3])
782
783 with m.Else():
784 comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
785 comb += ra_valid.eq(1)
786 comb += eaa_priv.eq(1)
787
788 # No IAMR, so no KUEP support for now
789 comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
790 comb += access_ok.eq(ra_valid & ~priv_fault)
791
792 # -- iTLB update
793 # itlb_update: process(clk)
794 # variable wr_index : tlb_index_t;
795 # begin
796 # if rising_edge(clk) then
797 # wr_index := hash_ea(m_in.addr);
798 # if rst = '1' or
799 # (m_in.tlbie = '1' and m_in.doall = '1') then
800 # -- clear all valid bits
801 # for i in tlb_index_t loop
802 # itlb_valids(i) <= '0';
803 # end loop;
804 # elsif m_in.tlbie = '1' then
805 # -- clear entry regardless of hit or miss
806 # itlb_valids(wr_index) <= '0';
807 # elsif m_in.tlbld = '1' then
808 # itlb_tags(wr_index) <=
809 # m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
810 # itlb_ptes(wr_index) <= m_in.pte;
811 # itlb_valids(wr_index) <= '1';
812 # end if;
813 # end if;
814 # end process;
815 # iTLB update
816 def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
817 comb = m.d.comb
818 sync = m.d.sync
819
820 m_in = self.m_in
821
822 wr_index = Signal(TLB_SIZE)
823 sync += wr_index.eq(hash_ea(m_in.addr))
824
825 with m.If(m_in.tlbie & m_in.doall):
826 # Clear all valid bits
827 for i in range(TLB_SIZE):
828 sync += itlb_valid_bits[i].eq(0)
829
830 with m.Elif(m_in.tlbie):
831 # Clear entry regardless of hit or miss
832 sync += itlb_valid_bits[wr_index].eq(0)
833
834 with m.Elif(m_in.tlbld):
835 sync += itlb_tags[wr_index].eq(
836 m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
837 )
838 sync += itlb_ptes[wr_index].eq(m_in.pte)
839 sync += itlb_valid_bits[wr_index].eq(1)
840
841 # -- Cache hit detection, output to fetch2 and other misc logic
842 # icache_comb : process(all)
843 # Cache hit detection, output to fetch2 and other misc logic
844 def icache_comb(self, m, use_previous, r, req_index, req_row,
845 req_tag, real_addr, req_laddr, cache_valid_bits,
846 cache_tags, access_ok, req_is_hit,
847 req_is_miss, replace_way, plru_victim, cache_out_row):
848 # variable is_hit : std_ulogic;
849 # variable hit_way : way_t;
850 comb = m.d.comb
851
852 #comb += Display("ENTER icache_comb - use_previous:%x req_index:%x " \
853 # "req_row:%x req_tag:%x real_addr:%x req_laddr:%x " \
854 # "access_ok:%x req_is_hit:%x req_is_miss:%x " \
855 # "replace_way:%x", use_previous, req_index, req_row, \
856 # req_tag, real_addr, req_laddr, access_ok, \
857 # req_is_hit, req_is_miss, replace_way)
858
859 i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
860 flush_in, stall_out = self.flush_in, self.stall_out
861
862 is_hit = Signal()
863 hit_way = Signal(NUM_WAYS)
864 # begin
865 # -- i_in.sequential means that i_in.nia this cycle
866 # -- is 4 more than last cycle. If we read more
867 # -- than 32 bits at a time, had a cache hit last
868 # -- cycle, and we don't want the first 32-bit chunk
869 # -- then we can keep the data we read last cycle
870 # -- and just use that.
871 # if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
872 # use_previous <= i_in.sequential and r.hit_valid;
873 # else
874 # use_previous <= '0';
875 # end if;
876 # i_in.sequential means that i_in.nia this cycle is 4 more than
877 # last cycle. If we read more than 32 bits at a time, had a
878 # cache hit last cycle, and we don't want the first 32-bit chunk
879 # then we can keep the data we read last cycle and just use that.
880 with m.If(i_in.nia[2:INSN_BITS+2] != 0):
881 comb += use_previous.eq(i_in.sequential & r.hit_valid)
882
883 # -- Extract line, row and tag from request
884 # req_index <= get_index(i_in.nia);
885 # req_row <= get_row(i_in.nia);
886 # req_tag <= get_tag(real_addr);
887 # Extract line, row and tag from request
888 comb += req_index.eq(get_index(i_in.nia))
889 comb += req_row.eq(get_row(i_in.nia))
890 comb += req_tag.eq(get_tag(real_addr))
891
892 # -- Calculate address of beginning of cache row, will be
893 # -- used for cache miss processing if needed
894 # req_laddr <=
895 # (63 downto REAL_ADDR_BITS => '0') &
896 # real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
897 # (ROW_OFF_BITS-1 downto 0 => '0');
898 # Calculate address of beginning of cache row, will be
899 # used for cache miss processing if needed
900 comb += req_laddr.eq(Cat(
901 Const(0b0, ROW_OFF_BITS),
902 real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
903 Const(0b0, 8)
904 ))
905
906 # -- Test if pending request is a hit on any way
907 # hit_way := 0;
908 # is_hit := '0';
909 # for i in way_t loop
910 # if i_in.req = '1' and
911 # (cache_valids(req_index)(i) = '1' or
912 # (r.state = WAIT_ACK and
913 # req_index = r.store_index and
914 # i = r.store_way and
915 # r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
916 # if read_tag(i, cache_tags(req_index)) = req_tag then
917 # hit_way := i;
918 # is_hit := '1';
919 # end if;
920 # end if;
921 # end loop;
922 # Test if pending request is a hit on any way
923 hitcond = Signal()
924 comb += hitcond.eq((r.state == State.WAIT_ACK)
925 & (req_index == r.store_index)
926 & r.rows_valid[req_row % ROW_PER_LINE])
927 with m.If(i_in.req):
928 cvb = Signal(NUM_WAYS)
929 ctag = Signal(TAG_RAM_WIDTH)
930 comb += ctag.eq(cache_tags[req_index])
931 comb += cvb.eq(cache_valid_bits[req_index])
932 for i in range(NUM_WAYS):
933 tagi = Signal(TAG_BITS, name="ti%d" % i)
934 comb += tagi.eq(read_tag(i, ctag))
935 hit_test = Signal(name="hit_test%d" % i)
936 comb += hit_test.eq(i == r.store_way)
937 with m.If((cvb[i] | (hitcond & hit_test)) & (tagi == req_tag)):
938 comb += hit_way.eq(i)
939 comb += is_hit.eq(1)
940
941 # -- Generate the "hit" and "miss" signals
942 # -- for the synchronous blocks
943 # if i_in.req = '1' and access_ok = '1' and flush_in = '0'
944 # and rst = '0' then
945 # req_is_hit <= is_hit;
946 # req_is_miss <= not is_hit;
947 # else
948 # req_is_hit <= '0';
949 # req_is_miss <= '0';
950 # end if;
951 # req_hit_way <= hit_way;
952 # Generate the "hit" and "miss" signals
953 # for the synchronous blocks
954 with m.If(i_in.req & access_ok & ~flush_in):
955 comb += req_is_hit.eq(is_hit)
956 comb += req_is_miss.eq(~is_hit)
957
958 with m.Else():
959 comb += req_is_hit.eq(0)
960 comb += req_is_miss.eq(0)
961
962 # -- The way to replace on a miss
963 # if r.state = CLR_TAG then
964 # replace_way <=
965 # to_integer(unsigned(plru_victim(r.store_index)));
966 # else
967 # replace_way <= r.store_way;
968 # end if;
969 # The way to replace on a miss
970 with m.If(r.state == State.CLR_TAG):
971 comb += replace_way.eq(plru_victim[r.store_index])
972
973 with m.Else():
974 comb += replace_way.eq(r.store_way)
975
976 # -- Output instruction from current cache row
977 # --
978 # -- Note: This is a mild violation of our design principle of
979 # -- having pipeline stages output from a clean latch. In this
980 # -- case we output the result of a mux. The alternative would
981 # -- be output an entire row which I prefer not to do just yet
982 # -- as it would force fetch2 to know about some of the cache
983 # -- geometry information.
984 # i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
985 # i_out.valid <= r.hit_valid;
986 # i_out.nia <= r.hit_nia;
987 # i_out.stop_mark <= r.hit_smark;
988 # i_out.fetch_failed <= r.fetch_failed;
989 # Output instruction from current cache row
990 #
991 # Note: This is a mild violation of our design principle of
992 # having pipeline stages output from a clean latch. In this
993 # case we output the result of a mux. The alternative would
994 # be output an entire row which I prefer not to do just yet
995 # as it would force fetch2 to know about some of the cache
996 # geometry information.
997 #comb += Display("BEFORE read_insn_word - r.hit_nia:%x " \
998 # "r.hit_way:%x, cache_out[r.hit_way]:%x", r.hit_nia, \
999 # r.hit_way, cache_out[r.hit_way])
1000 comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out_row))
1001 comb += i_out.valid.eq(r.hit_valid)
1002 comb += i_out.nia.eq(r.hit_nia)
1003 comb += i_out.stop_mark.eq(r.hit_smark)
1004 comb += i_out.fetch_failed.eq(r.fetch_failed)
1005
1006 # -- Stall fetch1 if we have a miss on cache or TLB
1007 # -- or a protection fault
1008 # stall_out <= not (is_hit and access_ok);
1009 # Stall fetch1 if we have a miss on cache or TLB
1010 # or a protection fault
1011 comb += stall_out.eq(~(is_hit & access_ok))
1012
1013 # -- Wishbone requests output (from the cache miss reload machine)
1014 # wishbone_out <= r.wb;
1015 # Wishbone requests output (from the cache miss reload machine)
1016 comb += wb_out.eq(r.wb)
1017 # end process;
1018
1019 # -- Cache hit synchronous machine
1020 # icache_hit : process(clk)
1021 # Cache hit synchronous machine
1022 def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
1023 req_index, req_tag, real_addr):
1024 sync = m.d.sync
1025
1026 i_in, stall_in = self.i_in, self.stall_in
1027 flush_in = self.flush_in
1028
1029 # begin
1030 # if rising_edge(clk) then
1031 # -- keep outputs to fetch2 unchanged on a stall
1032 # -- except that flush or reset sets valid to 0
1033 # -- If use_previous, keep the same data as last
1034 # -- cycle and use the second half
1035 # if stall_in = '1' or use_previous = '1' then
1036 # if rst = '1' or flush_in = '1' then
1037 # r.hit_valid <= '0';
1038 # end if;
1039 # keep outputs to fetch2 unchanged on a stall
1040 # except that flush or reset sets valid to 0
1041 # If use_previous, keep the same data as last
1042 # cycle and use the second half
1043 with m.If(stall_in | use_previous):
1044 with m.If(flush_in):
1045 sync += r.hit_valid.eq(0)
1046 # else
1047 # -- On a hit, latch the request for the next cycle,
1048 # -- when the BRAM data will be available on the
1049 # -- cache_out output of the corresponding way
1050 # r.hit_valid <= req_is_hit;
1051 # if req_is_hit = '1' then
1052 # r.hit_way <= req_hit_way;
1053 with m.Else():
1054 # On a hit, latch the request for the next cycle,
1055 # when the BRAM data will be available on the
1056 # cache_out output of the corresponding way
1057 sync += r.hit_valid.eq(req_is_hit)
1058
1059 with m.If(req_is_hit):
1060 sync += r.hit_way.eq(req_hit_way)
1061
1062 # report "cache hit nia:" & to_hstring(i_in.nia) &
1063 # " IR:" & std_ulogic'image(i_in.virt_mode) &
1064 # " SM:" & std_ulogic'image(i_in.stop_mark) &
1065 # " idx:" & integer'image(req_index) &
1066 # " tag:" & to_hstring(req_tag) &
1067 # " way:" & integer'image(req_hit_way) &
1068 # " RA:" & to_hstring(real_addr);
1069 sync += Display("cache hit nia:%x IR:%x SM:%x idx:%x " \
1070 "tag:%x way:%x RA:%x", i_in.nia, \
1071 i_in.virt_mode, i_in.stop_mark, req_index, \
1072 req_tag, req_hit_way, real_addr)
1073
1074
1075
1076 # end if;
1077 # end if;
1078 # if stall_in = '0' then
1079 # -- Send stop marks and NIA down regardless of validity
1080 # r.hit_smark <= i_in.stop_mark;
1081 # r.hit_nia <= i_in.nia;
1082 # end if;
1083 with m.If(~stall_in):
1084 # Send stop marks and NIA down regardless of validity
1085 sync += r.hit_smark.eq(i_in.stop_mark)
1086 sync += r.hit_nia.eq(i_in.nia)
1087 # end if;
1088 # end process;
1089
1090 # -- Cache miss/reload synchronous machine
1091 # icache_miss : process(clk)
1092 # Cache miss/reload synchronous machine
1093 def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1094 req_index, req_laddr, req_tag, replace_way,
1095 cache_tags, access_ok, real_addr):
1096 comb = m.d.comb
1097 sync = m.d.sync
1098
1099 i_in, wb_in, m_in = self.i_in, self.wb_in, self.m_in
1100 stall_in, flush_in = self.stall_in, self.flush_in
1101 inval_in = self.inval_in
1102
1103 # variable tagset : cache_tags_set_t;
1104 # variable stbs_done : boolean;
1105
1106 tagset = Signal(TAG_RAM_WIDTH)
1107 stbs_done = Signal()
1108
1109 # begin
1110 # if rising_edge(clk) then
1111 # -- On reset, clear all valid bits to force misses
1112 # if rst = '1' then
1113 # On reset, clear all valid bits to force misses
1114 # for i in index_t loop
1115 # cache_valids(i) <= (others => '0');
1116 # end loop;
1117 # r.state <= IDLE;
1118 # r.wb.cyc <= '0';
1119 # r.wb.stb <= '0';
1120 # -- We only ever do reads on wishbone
1121 # r.wb.dat <= (others => '0');
1122 # r.wb.sel <= "11111111";
1123 # r.wb.we <= '0';
1124
1125 # -- Not useful normally but helps avoiding
1126 # -- tons of sim warnings
1127 # r.wb.adr <= (others => '0');
1128
1129 # else
1130
1131 # -- Process cache invalidations
1132 # if inval_in = '1' then
1133 # for i in index_t loop
1134 # cache_valids(i) <= (others => '0');
1135 # end loop;
1136 # r.store_valid <= '0';
1137 # end if;
1138 comb += r.wb.sel.eq(-1)
1139 comb += r.wb.adr.eq(r.req_adr[3:])
1140
1141 # Process cache invalidations
1142 with m.If(inval_in):
1143 for i in range(NUM_LINES):
1144 sync += cache_valid_bits[i].eq(0)
1145 sync += r.store_valid.eq(0)
1146
1147 # -- Main state machine
1148 # case r.state is
1149 # Main state machine
1150 with m.Switch(r.state):
1151
1152 # when IDLE =>
1153 with m.Case(State.IDLE):
1154 # -- Reset per-row valid flags,
1155 # -- only used in WAIT_ACK
1156 # for i in 0 to ROW_PER_LINE - 1 loop
1157 # r.rows_valid(i) <= '0';
1158 # end loop;
1159 # Reset per-row valid flags,
1160 # only used in WAIT_ACK
1161 for i in range(ROW_PER_LINE):
1162 sync += r.rows_valid[i].eq(0)
1163
1164 # -- We need to read a cache line
1165 # if req_is_miss = '1' then
1166 # report "cache miss nia:" & to_hstring(i_in.nia) &
1167 # " IR:" & std_ulogic'image(i_in.virt_mode) &
1168 # " SM:" & std_ulogic'image(i_in.stop_mark) &
1169 # " idx:" & integer'image(req_index) &
1170 # " way:" & integer'image(replace_way) &
1171 # " tag:" & to_hstring(req_tag) &
1172 # " RA:" & to_hstring(real_addr);
1173 # We need to read a cache line
1174 with m.If(req_is_miss):
1175 sync += Display(
1176 "cache miss nia:%x IR:%x SM:%x idx:%x " \
1177 " way:%x tag:%x RA:%x", i_in.nia, \
1178 i_in.virt_mode, i_in.stop_mark, req_index, \
1179 replace_way, req_tag, real_addr)
1180
1181 # -- Keep track of our index and way for
1182 # -- subsequent stores
1183 # r.store_index <= req_index;
1184 # r.store_row <= get_row(req_laddr);
1185 # r.store_tag <= req_tag;
1186 # r.store_valid <= '1';
1187 # r.end_row_ix <=
1188 # get_row_of_line(get_row(req_laddr)) - 1;
1189 # Keep track of our index and way
1190 # for subsequent stores
1191 sync += r.store_index.eq(req_index)
1192 sync += r.store_row.eq(get_row(req_laddr))
1193 sync += r.store_tag.eq(req_tag)
1194 sync += r.store_valid.eq(1)
1195 sync += r.end_row_ix.eq(
1196 get_row_of_line(
1197 get_row(req_laddr)
1198 ) - 1
1199 )
1200
1201 # -- Prep for first wishbone read. We calculate the
1202 # -- address of the start of the cache line and
1203 # -- start the WB cycle.
1204 # r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1205 # r.wb.cyc <= '1';
1206 # r.wb.stb <= '1';
1207 # Prep for first wishbone read.
1208 # We calculate the
1209 # address of the start of the cache line and
1210 # start the WB cycle.
1211 sync += r.req_adr.eq(req_laddr)
1212 sync += r.wb.cyc.eq(1)
1213 sync += r.wb.stb.eq(1)
1214
1215 # -- Track that we had one request sent
1216 # r.state <= CLR_TAG;
1217 # Track that we had one request sent
1218 sync += r.state.eq(State.CLR_TAG)
1219 # end if;
1220
1221 # when CLR_TAG | WAIT_ACK =>
1222 with m.Case(State.CLR_TAG, State.WAIT_ACK):
1223 # if r.state = CLR_TAG then
1224 with m.If(r.state == State.CLR_TAG):
1225 # -- Get victim way from plru
1226 # r.store_way <= replace_way;
1227 # Get victim way from plru
1228 sync += r.store_way.eq(replace_way)
1229 #
1230 # -- Force misses on that way while
1231 # -- reloading that line
1232 # cache_valids(req_index)(replace_way) <= '0';
1233 # Force misses on that way while
1234 # realoading that line
1235 cv = Signal(INDEX_BITS)
1236 comb += cv.eq(cache_valid_bits[req_index])
1237 comb += cv.bit_select(replace_way, 1).eq(0)
1238 sync += cache_valid_bits[req_index].eq(cv)
1239
1240 # -- Store new tag in selected way
1241 # for i in 0 to NUM_WAYS-1 loop
1242 # if i = replace_way then
1243 # tagset := cache_tags(r.store_index);
1244 # write_tag(i, tagset, r.store_tag);
1245 # cache_tags(r.store_index) <= tagset;
1246 # end if;
1247 # end loop;
1248 for i in range(NUM_WAYS):
1249 with m.If(i == replace_way):
1250 comb += tagset.eq(cache_tags[r.store_index])
1251 comb += write_tag(i, tagset, r.store_tag)
1252 sync += cache_tags[r.store_index].eq(tagset)
1253
1254 # r.state <= WAIT_ACK;
1255 sync += r.state.eq(State.WAIT_ACK)
1256 # end if;
1257
1258 # -- Requests are all sent if stb is 0
1259 # stbs_done := r.wb.stb = '0';
1260 # Requests are all sent if stb is 0
1261 stbs_zero = Signal()
1262 comb += stbs_zero.eq(r.wb.stb == 0)
1263 comb += stbs_done.eq(stbs_zero)
1264
1265 # -- If we are still sending requests,
1266 # -- was one accepted ?
1267 # if wishbone_in.stall = '0' and not stbs_done then
1268 # If we are still sending requests,
1269 # was one accepted?
1270 with m.If(~wb_in.stall & ~stbs_zero):
1271 # -- That was the last word ? We are done sending.
1272 # -- Clear stb and set stbs_done so we can handle
1273 # -- an eventual last ack on the same cycle.
1274 # if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1275 # r.wb.stb <= '0';
1276 # stbs_done := true;
1277 # end if;
1278 # That was the last word ?
1279 # We are done sending.
1280 # Clear stb and set stbs_done
1281 # so we can handle
1282 # an eventual last ack on
1283 # the same cycle.
1284 with m.If(is_last_row_addr(r.req_adr, r.end_row_ix)):
1285 sync += Display("IS_LAST_ROW_ADDR " \
1286 "r.wb.addr:%x r.end_row_ix:%x " \
1287 "r.wb.stb:%x stbs_zero:%x " \
1288 "stbs_done:%x", r.wb.adr, \
1289 r.end_row_ix, r.wb.stb, \
1290 stbs_zero, stbs_done)
1291 sync += r.wb.stb.eq(0)
1292 comb += stbs_done.eq(1)
1293
1294 # -- Calculate the next row address
1295 # r.wb.adr <= next_row_addr(r.wb.adr);
1296 # Calculate the next row address
1297 rarange = Signal(LINE_OFF_BITS - ROW_OFF_BITS)
1298 comb += rarange.eq(
1299 r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
1300 )
1301 sync += r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS].eq(
1302 rarange
1303 )
1304 sync += Display("RARANGE r.wb.adr:%x stbs_zero:%x " \
1305 "stbs_done:%x", rarange, stbs_zero, \
1306 stbs_done)
1307 # end if;
1308
1309 # -- Incoming acks processing
1310 # if wishbone_in.ack = '1' then
1311 # Incoming acks processing
1312 with m.If(wb_in.ack):
1313 # r.rows_valid(r.store_row mod ROW_PER_LINE)
1314 # <= '1';
1315 sync += Display("WB_IN_ACK stbs_zero:%x " \
1316 "stbs_done:%x", \
1317 stbs_zero, stbs_done)
1318
1319 sync += r.rows_valid[r.store_row % ROW_PER_LINE].eq(1)
1320
1321 # -- Check for completion
1322 # if stbs_done and
1323 # is_last_row(r.store_row, r.end_row_ix) then
1324 # Check for completion
1325 with m.If(stbs_done &
1326 is_last_row(r.store_row, r.end_row_ix)):
1327 # -- Complete wishbone cycle
1328 # r.wb.cyc <= '0';
1329 # Complete wishbone cycle
1330 sync += r.wb.cyc.eq(0)
1331
1332 # -- Cache line is now valid
1333 # cache_valids(r.store_index)(replace_way) <=
1334 # r.store_valid and not inval_in;
1335 # Cache line is now valid
1336 cv = Signal(INDEX_BITS)
1337 comb += cv.eq(cache_valid_bits[r.store_index])
1338 comb += cv.bit_select(replace_way, 1).eq(
1339 r.store_valid & ~inval_in
1340 )
1341 sync += cache_valid_bits[r.store_index].eq(cv)
1342
1343 # -- We are done
1344 # r.state <= IDLE;
1345 # We are done
1346 sync += r.state.eq(State.IDLE)
1347 # end if;
1348
1349 # -- Increment store row counter
1350 # r.store_row <= next_row(r.store_row);
1351 # Increment store row counter
1352 sync += r.store_row.eq(next_row(r.store_row))
1353 # end if;
1354 # end case;
1355 # end if;
1356 #
1357 # -- TLB miss and protection fault processing
1358 # if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1359 # r.fetch_failed <= '0';
1360 # elsif i_in.req = '1' and access_ok = '0' and
1361 # stall_in = '0' then
1362 # r.fetch_failed <= '1';
1363 # end if;
1364 # TLB miss and protection fault processing
1365 with m.If(flush_in | m_in.tlbld):
1366 sync += r.fetch_failed.eq(0)
1367
1368 with m.Elif(i_in.req & ~access_ok & ~stall_in):
1369 sync += r.fetch_failed.eq(1)
1370 # end if;
1371 # end process;
1372
1373 # icache_log: if LOG_LENGTH > 0 generate
1374 def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1375 req_is_miss, req_is_hit, lway, wstate, r):
1376 comb = m.d.comb
1377 sync = m.d.sync
1378
1379 wb_in, i_out = self.wb_in, self.i_out
1380 log_out, stall_out = self.log_out, self.stall_out
1381
1382 # -- Output data to logger
1383 # signal log_data : std_ulogic_vector(53 downto 0);
1384 # begin
1385 # data_log: process(clk)
1386 # variable lway: way_t;
1387 # variable wstate: std_ulogic;
1388 # Output data to logger
1389 for i in range(LOG_LENGTH):
1390 # Output data to logger
1391 log_data = Signal(54)
1392 lway = Signal(NUM_WAYS)
1393 wstate = Signal()
1394
1395 # begin
1396 # if rising_edge(clk) then
1397 # lway := req_hit_way;
1398 # wstate := '0';
1399 sync += lway.eq(req_hit_way)
1400 sync += wstate.eq(0)
1401
1402 # if r.state /= IDLE then
1403 # wstate := '1';
1404 # end if;
1405 with m.If(r.state != State.IDLE):
1406 sync += wstate.eq(1)
1407
1408 # log_data <= i_out.valid &
1409 # i_out.insn &
1410 # wishbone_in.ack &
1411 # r.wb.adr(5 downto 3) &
1412 # r.wb.stb & r.wb.cyc &
1413 # wishbone_in.stall &
1414 # stall_out &
1415 # r.fetch_failed &
1416 # r.hit_nia(5 downto 2) &
1417 # wstate &
1418 # std_ulogic_vector(to_unsigned(lway, 3)) &
1419 # req_is_hit & req_is_miss &
1420 # access_ok &
1421 # ra_valid;
1422 sync += log_data.eq(Cat(
1423 ra_valid, access_ok, req_is_miss, req_is_hit,
1424 lway, wstate, r.hit_nia[2:6],
1425 r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1426 r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1427 i_out.valid
1428 ))
1429 # end if;
1430 # end process;
1431 # log_out <= log_data;
1432 comb += log_out.eq(log_data)
1433 # end generate;
1434 # end;
1435
1436 def elaborate(self, platform):
1437
1438 m = Module()
1439 comb = m.d.comb
1440
1441 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1442 cache_tags = CacheTagArray()
1443 cache_valid_bits = CacheValidBitsArray()
1444
1445 # signal itlb_valids : tlb_valids_t;
1446 # signal itlb_tags : tlb_tags_t;
1447 # signal itlb_ptes : tlb_ptes_t;
1448 # attribute ram_style of itlb_tags : signal is "distributed";
1449 # attribute ram_style of itlb_ptes : signal is "distributed";
1450 itlb_valid_bits = TLBValidBitsArray()
1451 itlb_tags = TLBTagArray()
1452 itlb_ptes = TLBPtesArray()
1453 # TODO to be passed to nmigen as ram attributes
1454 # attribute ram_style of itlb_tags : signal is "distributed";
1455 # attribute ram_style of itlb_ptes : signal is "distributed";
1456
1457 # -- Privilege bit from PTE EAA field
1458 # signal eaa_priv : std_ulogic;
1459 # Privilege bit from PTE EAA field
1460 eaa_priv = Signal()
1461
1462 # signal r : reg_internal_t;
1463 r = RegInternal()
1464
1465 # -- Async signals on incoming request
1466 # signal req_index : index_t;
1467 # signal req_row : row_t;
1468 # signal req_hit_way : way_t;
1469 # signal req_tag : cache_tag_t;
1470 # signal req_is_hit : std_ulogic;
1471 # signal req_is_miss : std_ulogic;
1472 # signal req_laddr : std_ulogic_vector(63 downto 0);
1473 # Async signal on incoming request
1474 req_index = Signal(NUM_LINES)
1475 req_row = Signal(BRAM_ROWS)
1476 req_hit_way = Signal(NUM_WAYS)
1477 req_tag = Signal(TAG_BITS)
1478 req_is_hit = Signal()
1479 req_is_miss = Signal()
1480 req_laddr = Signal(64)
1481
1482 # signal tlb_req_index : tlb_index_t;
1483 # signal real_addr : std_ulogic_vector(
1484 # REAL_ADDR_BITS - 1 downto 0
1485 # );
1486 # signal ra_valid : std_ulogic;
1487 # signal priv_fault : std_ulogic;
1488 # signal access_ok : std_ulogic;
1489 # signal use_previous : std_ulogic;
1490 tlb_req_index = Signal(TLB_SIZE)
1491 real_addr = Signal(REAL_ADDR_BITS)
1492 ra_valid = Signal()
1493 priv_fault = Signal()
1494 access_ok = Signal()
1495 use_previous = Signal()
1496
1497 # signal cache_out : cache_ram_out_t;
1498 cache_out_row = Signal(ROW_SIZE_BITS)
1499
1500 # signal plru_victim : plru_out_t;
1501 # signal replace_way : way_t;
1502 plru_victim = PLRUOut()
1503 replace_way = Signal(NUM_WAYS)
1504
1505 # call sub-functions putting everything together, using shared
1506 # signals established above
1507 self.rams(m, r, cache_out_row, use_previous, replace_way, req_row)
1508 self.maybe_plrus(m, r, plru_victim)
1509 self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1510 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1511 priv_fault, access_ok)
1512 self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1513 self.icache_comb(m, use_previous, r, req_index, req_row,
1514 req_tag, real_addr, req_laddr, cache_valid_bits,
1515 cache_tags, access_ok, req_is_hit, req_is_miss,
1516 replace_way, plru_victim, cache_out_row)
1517 self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1518 req_index, req_tag, real_addr)
1519 self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1520 req_laddr, req_tag, replace_way, cache_tags,
1521 access_ok, real_addr)
1522 #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1523 # req_is_miss, req_is_hit, lway, wstate, r)
1524
1525 return m
1526
1527
1528 # icache_tb.vhdl
1529 #
1530 # library ieee;
1531 # use ieee.std_logic_1164.all;
1532 #
1533 # library work;
1534 # use work.common.all;
1535 # use work.wishbone_types.all;
1536 #
1537 # entity icache_tb is
1538 # end icache_tb;
1539 #
1540 # architecture behave of icache_tb is
1541 # signal clk : std_ulogic;
1542 # signal rst : std_ulogic;
1543 #
1544 # signal i_out : Fetch1ToIcacheType;
1545 # signal i_in : IcacheToDecode1Type;
1546 #
1547 # signal m_out : MmuToIcacheType;
1548 #
1549 # signal wb_bram_in : wishbone_master_out;
1550 # signal wb_bram_out : wishbone_slave_out;
1551 #
1552 # constant clk_period : time := 10 ns;
1553 # begin
1554 # icache0: entity work.icache
1555 # generic map(
1556 # LINE_SIZE => 64,
1557 # NUM_LINES => 4
1558 # )
1559 # port map(
1560 # clk => clk,
1561 # rst => rst,
1562 # i_in => i_out,
1563 # i_out => i_in,
1564 # m_in => m_out,
1565 # stall_in => '0',
1566 # flush_in => '0',
1567 # inval_in => '0',
1568 # wishbone_out => wb_bram_in,
1569 # wishbone_in => wb_bram_out
1570 # );
1571 #
1572 # -- BRAM Memory slave
1573 # bram0: entity work.wishbone_bram_wrapper
1574 # generic map(
1575 # MEMORY_SIZE => 1024,
1576 # RAM_INIT_FILE => "icache_test.bin"
1577 # )
1578 # port map(
1579 # clk => clk,
1580 # rst => rst,
1581 # wishbone_in => wb_bram_in,
1582 # wishbone_out => wb_bram_out
1583 # );
1584 #
1585 # clk_process: process
1586 # begin
1587 # clk <= '0';
1588 # wait for clk_period/2;
1589 # clk <= '1';
1590 # wait for clk_period/2;
1591 # end process;
1592 #
1593 # rst_process: process
1594 # begin
1595 # rst <= '1';
1596 # wait for 2*clk_period;
1597 # rst <= '0';
1598 # wait;
1599 # end process;
1600 #
1601 # stim: process
1602 # begin
1603 # i_out.req <= '0';
1604 # i_out.nia <= (others => '0');
1605 # i_out.stop_mark <= '0';
1606 #
1607 # m_out.tlbld <= '0';
1608 # m_out.tlbie <= '0';
1609 # m_out.addr <= (others => '0');
1610 # m_out.pte <= (others => '0');
1611 #
1612 # wait until rising_edge(clk);
1613 # wait until rising_edge(clk);
1614 # wait until rising_edge(clk);
1615 # wait until rising_edge(clk);
1616 #
1617 # i_out.req <= '1';
1618 # i_out.nia <= x"0000000000000004";
1619 #
1620 # wait for 30*clk_period;
1621 # wait until rising_edge(clk);
1622 #
1623 # assert i_in.valid = '1' severity failure;
1624 # assert i_in.insn = x"00000001"
1625 # report "insn @" & to_hstring(i_out.nia) &
1626 # "=" & to_hstring(i_in.insn) &
1627 # " expected 00000001"
1628 # severity failure;
1629 #
1630 # i_out.req <= '0';
1631 #
1632 # wait until rising_edge(clk);
1633 #
1634 # -- hit
1635 # i_out.req <= '1';
1636 # i_out.nia <= x"0000000000000008";
1637 # wait until rising_edge(clk);
1638 # wait until rising_edge(clk);
1639 # assert i_in.valid = '1' severity failure;
1640 # assert i_in.insn = x"00000002"
1641 # report "insn @" & to_hstring(i_out.nia) &
1642 # "=" & to_hstring(i_in.insn) &
1643 # " expected 00000002"
1644 # severity failure;
1645 # wait until rising_edge(clk);
1646 #
1647 # -- another miss
1648 # i_out.req <= '1';
1649 # i_out.nia <= x"0000000000000040";
1650 #
1651 # wait for 30*clk_period;
1652 # wait until rising_edge(clk);
1653 #
1654 # assert i_in.valid = '1' severity failure;
1655 # assert i_in.insn = x"00000010"
1656 # report "insn @" & to_hstring(i_out.nia) &
1657 # "=" & to_hstring(i_in.insn) &
1658 # " expected 00000010"
1659 # severity failure;
1660 #
1661 # -- test something that aliases
1662 # i_out.req <= '1';
1663 # i_out.nia <= x"0000000000000100";
1664 # wait until rising_edge(clk);
1665 # wait until rising_edge(clk);
1666 # assert i_in.valid = '0' severity failure;
1667 # wait until rising_edge(clk);
1668 #
1669 # wait for 30*clk_period;
1670 # wait until rising_edge(clk);
1671 #
1672 # assert i_in.valid = '1' severity failure;
1673 # assert i_in.insn = x"00000040"
1674 # report "insn @" & to_hstring(i_out.nia) &
1675 # "=" & to_hstring(i_in.insn) &
1676 # " expected 00000040"
1677 # severity failure;
1678 #
1679 # i_out.req <= '0';
1680 #
1681 # std.env.finish;
1682 # end process;
1683 # end;
1684 def icache_sim(dut):
1685 i_out = dut.i_in
1686 i_in = dut.i_out
1687 m_out = dut.m_in
1688
1689 yield i_in.valid.eq(0)
1690 yield i_out.priv_mode.eq(1)
1691 yield i_out.req.eq(0)
1692 yield i_out.nia.eq(0)
1693 yield i_out.stop_mark.eq(0)
1694 yield m_out.tlbld.eq(0)
1695 yield m_out.tlbie.eq(0)
1696 yield m_out.addr.eq(0)
1697 yield m_out.pte.eq(0)
1698 yield
1699 yield
1700 yield
1701 yield
1702 yield i_out.req.eq(1)
1703 yield i_out.nia.eq(Const(0x0000000000000004, 64))
1704 for i in range(30):
1705 yield
1706 yield
1707 valid = yield i_in.valid
1708 nia = yield i_out.nia
1709 insn = yield i_in.insn
1710 print(f"valid? {valid}")
1711 assert valid
1712 assert insn == 0x00000001, \
1713 "insn @%x=%x expected 00000001" % (nia, insn)
1714 yield i_out.req.eq(0)
1715 yield
1716
1717 # hit
1718 yield i_out.req.eq(1)
1719 yield i_out.nia.eq(Const(0x0000000000000008, 64))
1720 yield
1721 yield
1722 valid = yield i_in.valid
1723 nia = yield i_in.nia
1724 insn = yield i_in.insn
1725 assert valid
1726 assert insn == 0x00000002, \
1727 "insn @%x=%x expected 00000002" % (nia, insn)
1728 yield
1729
1730 # another miss
1731 yield i_out.req.eq(1)
1732 yield i_out.nia.eq(Const(0x0000000000000040, 64))
1733 for i in range(30):
1734 yield
1735 yield
1736 valid = yield i_in.valid
1737 nia = yield i_out.nia
1738 insn = yield i_in.insn
1739 assert valid
1740 assert insn == 0x00000010, \
1741 "insn @%x=%x expected 00000010" % (nia, insn)
1742
1743 # test something that aliases
1744 yield i_out.req.eq(1)
1745 yield i_out.nia.eq(Const(0x0000000000000100, 64))
1746 yield
1747 yield
1748 valid = yield i_in.valid
1749 assert ~valid
1750 for i in range(30):
1751 yield
1752 yield
1753 insn = yield i_in.insn
1754 valid = yield i_in.valid
1755 insn = yield i_in.insn
1756 assert valid
1757 assert insn == 0x00000040, \
1758 "insn @%x=%x expected 00000040" % (nia, insn)
1759 yield i_out.req.eq(0)
1760
1761
1762
1763 def test_icache(mem):
1764 dut = ICache()
1765
1766 memory = Memory(width=64, depth=16*64, init=mem)
1767 sram = SRAM(memory=memory, granularity=8)
1768
1769 m = Module()
1770
1771 m.submodules.icache = dut
1772 m.submodules.sram = sram
1773
1774 m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1775 m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1776 m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1777 m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1778 m.d.comb += sram.bus.adr.eq(dut.wb_out.adr)
1779 m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1780
1781 m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1782 m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1783
1784 # nmigen Simulation
1785 sim = Simulator(m)
1786 sim.add_clock(1e-6)
1787
1788 sim.add_sync_process(wrap(icache_sim(dut)))
1789 with sim.write_vcd('test_icache.vcd'):
1790 sim.run()
1791
1792 if __name__ == '__main__':
1793 dut = ICache()
1794 vl = rtlil.convert(dut, ports=[])
1795 with open("test_icache.il", "w") as f:
1796 f.write(vl)
1797
1798 mem = []
1799 for i in range(512):
1800 mem.append((i*2)| ((i*2+1)<<32))
1801
1802 test_icache(mem)
1803