icache.py trying to sort out test failure, added r field req_adr to
[soc.git] / src / soc / experiment / icache.py
1 """ICache
2
3 based on Anton Blanchard microwatt icache.vhdl
4
5 Set associative icache
6
7 TODO (in no specific order):
8 * Add debug interface to inspect cache content
9 * Add snoop/invalidate path
10 * Add multi-hit error detection
11 * Pipelined bus interface (wb or axi)
12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
13 * Add optimization: service hits on partially loaded lines
14 * Add optimization: (maybe) interrupt reload on fluch/redirect
15 * Check if playing with the geometry of the cache tags allow for more
16 efficient use of distributed RAM and less logic/muxes. Currently we
17 write TAG_BITS width which may not match full ram blocks and might
18 cause muxes to be inferred for "partial writes".
19 * Check if making the read size of PLRU a ROM helps utilization
20
21 """
22 from enum import Enum, unique
23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const)
24 from nmigen.cli import main, rtlil
25 from nmutil.iocontrol import RecordObject
26 from nmigen.utils import log2_int
27 from nmutil.util import Display
28
29 #from nmutil.plru import PLRU
30 from soc.experiment.cache_ram import CacheRam
31 from soc.experiment.plru import PLRU
32
33 from soc.experiment.mem_types import (Fetch1ToICacheType,
34 ICacheToDecode1Type,
35 MMUToICacheType)
36
37 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
38 WB_SEL_BITS, WBAddrType, WBDataType,
39 WBSelType, WBMasterOut, WBSlaveOut,
40 WBMasterOutVector, WBSlaveOutVector,
41 WBIOMasterOut, WBIOSlaveOut)
42
43 # for test
44 from nmigen_soc.wishbone.sram import SRAM
45 from nmigen import Memory
46 from nmutil.util import wrap
47 from nmigen.cli import main, rtlil
48 if True:
49 from nmigen.back.pysim import Simulator, Delay, Settle
50 else:
51 from nmigen.sim.cxxsim import Simulator, Delay, Settle
52
53
54 SIM = 0
55 LINE_SIZE = 64
56 # BRAM organisation: We never access more than wishbone_data_bits
57 # at a time so to save resources we make the array only that wide,
58 # and use consecutive indices for to make a cache "line"
59 #
60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
61 ROW_SIZE = WB_DATA_BITS // 8
62 # Number of lines in a set
63 NUM_LINES = 32
64 # Number of ways
65 NUM_WAYS = 4
66 # L1 ITLB number of entries (direct mapped)
67 TLB_SIZE = 64
68 # L1 ITLB log_2(page_size)
69 TLB_LG_PGSZ = 12
70 # Number of real address bits that we store
71 REAL_ADDR_BITS = 56
72 # Non-zero to enable log data collection
73 LOG_LENGTH = 0
74
75 ROW_SIZE_BITS = ROW_SIZE * 8
76 # ROW_PER_LINE is the number of row
77 # (wishbone) transactions in a line
78 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
79 # BRAM_ROWS is the number of rows in
80 # BRAM needed to represent the full icache
81 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
82 # INSN_PER_ROW is the number of 32bit
83 # instructions per BRAM row
84 INSN_PER_ROW = ROW_SIZE_BITS // 32
85
86 print("ROW_SIZE", ROW_SIZE)
87 print("ROW_SIZE_BITS", ROW_SIZE_BITS)
88 print("ROW_PER_LINE", ROW_PER_LINE)
89 print("BRAM_ROWS", BRAM_ROWS)
90 print("INSN_PER_ROW", INSN_PER_ROW)
91
92 # Bit fields counts in the address
93 #
94 # INSN_BITS is the number of bits to
95 # select an instruction in a row
96 INSN_BITS = log2_int(INSN_PER_ROW)
97 # ROW_BITS is the number of bits to
98 # select a row
99 ROW_BITS = log2_int(BRAM_ROWS)
100 # ROW_LINEBITS is the number of bits to
101 # select a row within a line
102 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
103 # LINE_OFF_BITS is the number of bits for
104 # the offset in a cache line
105 LINE_OFF_BITS = log2_int(LINE_SIZE)
106 # ROW_OFF_BITS is the number of bits for
107 # the offset in a row
108 ROW_OFF_BITS = log2_int(ROW_SIZE)
109 # INDEX_BITS is the number of bits to
110 # select a cache line
111 INDEX_BITS = log2_int(NUM_LINES)
112 # SET_SIZE_BITS is the log base 2 of
113 # the set size
114 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
115 # TAG_BITS is the number of bits of
116 # the tag part of the address
117 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
118 # TAG_WIDTH is the width in bits of each way of the tag RAM
119 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
120
121 # WAY_BITS is the number of bits to
122 # select a way
123 WAY_BITS = log2_int(NUM_WAYS)
124 TAG_RAM_WIDTH = TAG_BITS * NUM_WAYS
125
126 # -- L1 ITLB.
127 # constant TLB_BITS : natural := log2(TLB_SIZE);
128 # constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
129 # constant TLB_PTE_BITS : natural := 64;
130 TLB_BITS = log2_int(TLB_SIZE)
131 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
132 TLB_PTE_BITS = 64
133
134
135 print("INSN_BITS", INSN_BITS)
136 print("ROW_BITS", ROW_BITS)
137 print("ROW_LINE_BITS", ROW_LINE_BITS)
138 print("LINE_OFF_BITS", LINE_OFF_BITS)
139 print("ROW_OFF_BITS", ROW_OFF_BITS)
140 print("INDEX_BITS", INDEX_BITS)
141 print("SET_SIZE_BITS", SET_SIZE_BITS)
142 print("TAG_BITS", TAG_BITS)
143 print("WAY_BITS", WAY_BITS)
144 print("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
145 print("TLB_BITS", TLB_BITS)
146 print("TLB_EA_TAG_BITS", TLB_EA_TAG_BITS)
147 print("TLB_PTE_BITS", TLB_PTE_BITS)
148
149
150
151
152 # architecture rtl of icache is
153 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
154 #-- ROW_PER_LINE is the number of row (wishbone
155 #-- transactions) in a line
156 #constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
157 #-- BRAM_ROWS is the number of rows in BRAM
158 #-- needed to represent the full
159 #-- icache
160 #constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
161 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
162 #constant INSN_PER_ROW : natural := ROW_SIZE_BITS / 32;
163 #-- Bit fields counts in the address
164 #
165 #-- INSN_BITS is the number of bits to select
166 #-- an instruction in a row
167 #constant INSN_BITS : natural := log2(INSN_PER_ROW);
168 #-- ROW_BITS is the number of bits to select a row
169 #constant ROW_BITS : natural := log2(BRAM_ROWS);
170 #-- ROW_LINEBITS is the number of bits to
171 #-- select a row within a line
172 #constant ROW_LINEBITS : natural := log2(ROW_PER_LINE);
173 #-- LINE_OFF_BITS is the number of bits for the offset
174 #-- in a cache line
175 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
176 #-- ROW_OFF_BITS is the number of bits for the offset in a row
177 #constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
178 #-- INDEX_BITS is the number of bits to select a cache line
179 #constant INDEX_BITS : natural := log2(NUM_LINES);
180 #-- SET_SIZE_BITS is the log base 2 of the set size
181 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
182 #-- TAG_BITS is the number of bits of the tag part of the address
183 #constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
184 #-- WAY_BITS is the number of bits to select a way
185 #constant WAY_BITS : natural := log2(NUM_WAYS);
186
187 #-- Example of layout for 32 lines of 64 bytes:
188 #--
189 #-- .. tag |index| line |
190 #-- .. | row | |
191 #-- .. | | | |00| zero (2)
192 #-- .. | | |-| | INSN_BITS (1)
193 #-- .. | |---| | ROW_LINEBITS (3)
194 #-- .. | |--- - --| LINE_OFF_BITS (6)
195 #-- .. | |- --| ROW_OFF_BITS (3)
196 #-- .. |----- ---| | ROW_BITS (8)
197 #-- .. |-----| | INDEX_BITS (5)
198 #-- .. --------| | TAG_BITS (53)
199 # Example of layout for 32 lines of 64 bytes:
200 #
201 # .. tag |index| line |
202 # .. | row | |
203 # .. | | | |00| zero (2)
204 # .. | | |-| | INSN_BITS (1)
205 # .. | |---| | ROW_LINEBITS (3)
206 # .. | |--- - --| LINE_OFF_BITS (6)
207 # .. | |- --| ROW_OFF_BITS (3)
208 # .. |----- ---| | ROW_BITS (8)
209 # .. |-----| | INDEX_BITS (5)
210 # .. --------| | TAG_BITS (53)
211
212 #subtype row_t is integer range 0 to BRAM_ROWS-1;
213 #subtype index_t is integer range 0 to NUM_LINES-1;
214 #subtype way_t is integer range 0 to NUM_WAYS-1;
215 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
216 #
217 #-- The cache data BRAM organized as described above for each way
218 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
219 #
220 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
221 #-- not handle a clean (commented) definition of the cache tags as a 3d
222 #-- memory. For now, work around it by putting all the tags
223 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
224 # type cache_tags_set_t is array(way_t) of cache_tag_t;
225 # type cache_tags_array_t is array(index_t) of cache_tags_set_t;
226 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
227 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
228 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
229 def CacheTagArray():
230 return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
231 for x in range(NUM_LINES))
232
233 #-- The cache valid bits
234 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
235 #type cache_valids_t is array(index_t) of cache_way_valids_t;
236 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
237 def CacheValidBitsArray():
238 return Array(Signal(NUM_WAYS, name="cachevalid_%d" %x) \
239 for x in range(NUM_LINES))
240
241 def RowPerLineValidArray():
242 return Array(Signal(name="rows_valid_%d" %x) \
243 for x in range(ROW_PER_LINE))
244
245
246 #attribute ram_style : string;
247 #attribute ram_style of cache_tags : signal is "distributed";
248 # TODO to be passed to nigmen as ram attributes
249 # attribute ram_style : string;
250 # attribute ram_style of cache_tags : signal is "distributed";
251
252
253 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
254 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
255 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
256 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
257 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
258 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
259 def TLBValidBitsArray():
260 return Array(Signal(name="tlbvalid_%d" %x) \
261 for x in range(TLB_SIZE))
262
263 def TLBTagArray():
264 return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
265 for x in range(TLB_SIZE))
266
267 def TLBPtesArray():
268 return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
269 for x in range(TLB_SIZE))
270
271
272 #-- Cache RAM interface
273 #type cache_ram_out_t is array(way_t) of cache_row_t;
274 # Cache RAM interface
275 def CacheRamOut():
276 return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
277 for x in range(NUM_WAYS))
278
279 #-- PLRU output interface
280 #type plru_out_t is array(index_t) of
281 # std_ulogic_vector(WAY_BITS-1 downto 0);
282 # PLRU output interface
283 def PLRUOut():
284 return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
285 for x in range(NUM_LINES))
286
287 # -- Return the cache line index (tag index) for an address
288 # function get_index(addr: std_ulogic_vector(63 downto 0))
289 # return index_t is
290 # begin
291 # return to_integer(unsigned(
292 # addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
293 # ));
294 # end;
295 # Return the cache line index (tag index) for an address
296 def get_index(addr):
297 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
298
299 # -- Return the cache row index (data memory) for an address
300 # function get_row(addr: std_ulogic_vector(63 downto 0))
301 # return row_t is
302 # begin
303 # return to_integer(unsigned(
304 # addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
305 # ));
306 # end;
307 # Return the cache row index (data memory) for an address
308 def get_row(addr):
309 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
310
311 # -- Return the index of a row within a line
312 # function get_row_of_line(row: row_t) return row_in_line_t is
313 # variable row_v : unsigned(ROW_BITS-1 downto 0);
314 # begin
315 # row_v := to_unsigned(row, ROW_BITS);
316 # return row_v(ROW_LINEBITS-1 downto 0);
317 # end;
318 # Return the index of a row within a line
319 def get_row_of_line(row):
320 return row[:ROW_LINE_BITS]
321
322 # -- Returns whether this is the last row of a line
323 # function is_last_row_addr(addr: wishbone_addr_type;
324 # last: row_in_line_t
325 # )
326 # return boolean is
327 # begin
328 # return unsigned(
329 # addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
330 # ) = last;
331 # end;
332 # Returns whether this is the last row of a line
333 def is_last_row_addr(addr, last):
334 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
335
336 # -- Returns whether this is the last row of a line
337 # function is_last_row(row: row_t;
338 # last: row_in_line_t) return boolean is
339 # begin
340 # return get_row_of_line(row) = last;
341 # end;
342 # Returns whether this is the last row of a line
343 def is_last_row(row, last):
344 return get_row_of_line(row) == last
345
346 # -- Return the next row in the current cache line. We use a dedicated
347 # -- function in order to limit the size of the generated adder to be
348 # -- only the bits within a cache line (3 bits with default settings)
349 # function next_row(row: row_t) return row_t is
350 # variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
351 # variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
352 # variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
353 # begin
354 # row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
355 # row_idx := row_v(ROW_LINEBITS-1 downto 0);
356 # row_v(ROW_LINEBITS-1 downto 0) :=
357 # std_ulogic_vector(unsigned(row_idx) + 1);
358 # return to_integer(unsigned(row_v));
359 # end;
360 # Return the next row in the current cache line. We use a dedicated
361 # function in order to limit the size of the generated adder to be
362 # only the bits within a cache line (3 bits with default settings)
363 def next_row(row):
364 row_v = row[0:ROW_LINE_BITS] + 1
365 return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
366 # -- Read the instruction word for the given address in the
367 # -- current cache row
368 # function read_insn_word(addr: std_ulogic_vector(63 downto 0);
369 # data: cache_row_t) return std_ulogic_vector is
370 # variable word: integer range 0 to INSN_PER_ROW-1;
371 # begin
372 # word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
373 # return data(31+word*32 downto word*32);
374 # end;
375 # Read the instruction word for the given address
376 # in the current cache row
377 def read_insn_word(addr, data):
378 word = addr[2:INSN_BITS+2]
379 return data.word_select(word, 32)
380
381 # -- Get the tag value from the address
382 # function get_tag(
383 # addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
384 # )
385 # return cache_tag_t is
386 # begin
387 # return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
388 # end;
389 # Get the tag value from the address
390 def get_tag(addr):
391 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
392
393 # -- Read a tag from a tag memory row
394 # function read_tag(way: way_t; tagset: cache_tags_set_t)
395 # return cache_tag_t is
396 # begin
397 # return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
398 # end;
399 # Read a tag from a tag memory row
400 def read_tag(way, tagset):
401 return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
402
403 # -- Write a tag to tag memory row
404 # procedure write_tag(way: in way_t;
405 # tagset: inout cache_tags_set_t; tag: cache_tag_t) is
406 # begin
407 # tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
408 # end;
409 # Write a tag to tag memory row
410 def write_tag(way, tagset, tag):
411 return tagset[way * TAG_BITS:(way + 1) * TAG_BITS].eq(tag)
412
413 # -- Simple hash for direct-mapped TLB index
414 # function hash_ea(addr: std_ulogic_vector(63 downto 0))
415 # return tlb_index_t is
416 # variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
417 # begin
418 # hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
419 # xor addr(
420 # TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
421 # TLB_LG_PGSZ + TLB_BITS
422 # )
423 # xor addr(
424 # TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
425 # TLB_LG_PGSZ + 2 * TLB_BITS
426 # );
427 # return to_integer(unsigned(hash));
428 # end;
429 # Simple hash for direct-mapped TLB index
430 def hash_ea(addr):
431 hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
432 TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
433 ] ^ addr[
434 TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
435 ]
436 return hsh
437
438 # begin
439 #
440 # assert LINE_SIZE mod ROW_SIZE = 0;
441 # assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
442 # severity FAILURE;
443 # assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
444 # severity FAILURE;
445 # assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
446 # severity FAILURE;
447 # assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
448 # severity FAILURE;
449 # assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
450 # report "geometry bits don't add up" severity FAILURE;
451 # assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
452 # report "geometry bits don't add up" severity FAILURE;
453 # assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
454 # report "geometry bits don't add up" severity FAILURE;
455 # assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
456 # report "geometry bits don't add up" severity FAILURE;
457 #
458 # sim_debug: if SIM generate
459 # debug: process
460 # begin
461 # report "ROW_SIZE = " & natural'image(ROW_SIZE);
462 # report "ROW_PER_LINE = " & natural'image(ROW_PER_LINE);
463 # report "BRAM_ROWS = " & natural'image(BRAM_ROWS);
464 # report "INSN_PER_ROW = " & natural'image(INSN_PER_ROW);
465 # report "INSN_BITS = " & natural'image(INSN_BITS);
466 # report "ROW_BITS = " & natural'image(ROW_BITS);
467 # report "ROW_LINEBITS = " & natural'image(ROW_LINEBITS);
468 # report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
469 # report "ROW_OFF_BITS = " & natural'image(ROW_OFF_BITS);
470 # report "INDEX_BITS = " & natural'image(INDEX_BITS);
471 # report "TAG_BITS = " & natural'image(TAG_BITS);
472 # report "WAY_BITS = " & natural'image(WAY_BITS);
473 # wait;
474 # end process;
475 # end generate;
476
477 # Cache reload state machine
478 @unique
479 class State(Enum):
480 IDLE = 0
481 CLR_TAG = 1
482 WAIT_ACK = 2
483
484 # type reg_internal_t is record
485 # -- Cache hit state (Latches for 1 cycle BRAM access)
486 # hit_way : way_t;
487 # hit_nia : std_ulogic_vector(63 downto 0);
488 # hit_smark : std_ulogic;
489 # hit_valid : std_ulogic;
490 #
491 # -- Cache miss state (reload state machine)
492 # state : state_t;
493 # wb : wishbone_master_out;
494 # store_way : way_t;
495 # store_index : index_t;
496 # store_row : row_t;
497 # store_tag : cache_tag_t;
498 # store_valid : std_ulogic;
499 # end_row_ix : row_in_line_t;
500 # rows_valid : row_per_line_valid_t;
501 #
502 # -- TLB miss state
503 # fetch_failed : std_ulogic;
504 # end record;
505 class RegInternal(RecordObject):
506 def __init__(self):
507 super().__init__()
508 # Cache hit state (Latches for 1 cycle BRAM access)
509 self.hit_way = Signal(NUM_WAYS)
510 self.hit_nia = Signal(64)
511 self.hit_smark = Signal()
512 self.hit_valid = Signal()
513
514 # Cache miss state (reload state machine)
515 self.state = Signal(State, reset=State.IDLE)
516 self.wb = WBMasterOut("wb")
517 self.req_adr = Signal(64)
518 self.store_way = Signal(NUM_WAYS)
519 self.store_index = Signal(NUM_LINES)
520 self.store_row = Signal(BRAM_ROWS)
521 self.store_tag = Signal(TAG_BITS)
522 self.store_valid = Signal()
523 self.end_row_ix = Signal(ROW_LINE_BITS)
524 self.rows_valid = RowPerLineValidArray()
525
526 # TLB miss state
527 self.fetch_failed = Signal()
528
529 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
530 #
531 # entity icache is
532 # generic (
533 # SIM : boolean := false;
534 # -- Line size in bytes
535 # LINE_SIZE : positive := 64;
536 # -- BRAM organisation: We never access more
537 # -- than wishbone_data_bits
538 # -- at a time so to save resources we make the
539 # -- array only that wide,
540 # -- and use consecutive indices for to make a cache "line"
541 # --
542 # -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
543 # -- so 64-bits)
544 # ROW_SIZE : positive := wishbone_data_bits / 8;
545 # -- Number of lines in a set
546 # NUM_LINES : positive := 32;
547 # -- Number of ways
548 # NUM_WAYS : positive := 4;
549 # -- L1 ITLB number of entries (direct mapped)
550 # TLB_SIZE : positive := 64;
551 # -- L1 ITLB log_2(page_size)
552 # TLB_LG_PGSZ : positive := 12;
553 # -- Number of real address bits that we store
554 # REAL_ADDR_BITS : positive := 56;
555 # -- Non-zero to enable log data collection
556 # LOG_LENGTH : natural := 0
557 # );
558 # port (
559 # clk : in std_ulogic;
560 # rst : in std_ulogic;
561 #
562 # i_in : in Fetch1ToIcacheType;
563 # i_out : out IcacheToDecode1Type;
564 #
565 # m_in : in MmuToIcacheType;
566 #
567 # stall_in : in std_ulogic;
568 # stall_out : out std_ulogic;
569 # flush_in : in std_ulogic;
570 # inval_in : in std_ulogic;
571 #
572 # wishbone_out : out wishbone_master_out;
573 # wishbone_in : in wishbone_slave_out;
574 #
575 # log_out : out std_ulogic_vector(53 downto 0)
576 # );
577 # end entity icache;
578 # 64 bit direct mapped icache. All instructions are 4B aligned.
579 class ICache(Elaboratable):
580 """64 bit direct mapped icache. All instructions are 4B aligned."""
581 def __init__(self):
582 self.i_in = Fetch1ToICacheType(name="i_in")
583 self.i_out = ICacheToDecode1Type(name="i_out")
584
585 self.m_in = MMUToICacheType(name="m_in")
586
587 self.stall_in = Signal()
588 self.stall_out = Signal()
589 self.flush_in = Signal()
590 self.inval_in = Signal()
591
592 self.wb_out = WBMasterOut(name="wb_out")
593 self.wb_in = WBSlaveOut(name="wb_in")
594
595 self.log_out = Signal(54)
596
597
598 # -- Generate a cache RAM for each way
599 # rams: for i in 0 to NUM_WAYS-1 generate
600 # signal do_read : std_ulogic;
601 # signal do_write : std_ulogic;
602 # signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
603 # signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
604 # signal dout : cache_row_t;
605 # signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);
606 # begin
607 # way: entity work.cache_ram
608 # generic map (
609 # ROW_BITS => ROW_BITS,
610 # WIDTH => ROW_SIZE_BITS
611 # )
612 # port map (
613 # clk => clk,
614 # rd_en => do_read,
615 # rd_addr => rd_addr,
616 # rd_data => dout,
617 # wr_sel => wr_sel,
618 # wr_addr => wr_addr,
619 # wr_data => wishbone_in.dat
620 # );
621 # process(all)
622 # begin
623 # do_read <= not (stall_in or use_previous);
624 # do_write <= '0';
625 # if wishbone_in.ack = '1' and replace_way = i then
626 # do_write <= '1';
627 # end if;
628 # cache_out(i) <= dout;
629 # rd_addr <=
630 # std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
631 # wr_addr <=
632 # std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
633 # for i in 0 to ROW_SIZE-1 loop
634 # wr_sel(i) <= do_write;
635 # end loop;
636 # end process;
637 # end generate;
638 def rams(self, m, r, cache_out, use_previous, replace_way, req_row):
639 comb = m.d.comb
640
641 wb_in, stall_in = self.wb_in, self.stall_in
642
643
644 for i in range(NUM_WAYS):
645 do_read = Signal(name="do_rd_%d" % i)
646 do_write = Signal(name="do_wr_%d" % i)
647 rd_addr = Signal(ROW_BITS)
648 wr_addr = Signal(ROW_BITS)
649 d_out = Signal(ROW_SIZE_BITS, name="d_out_%d" % i)
650 wr_sel = Signal(ROW_SIZE)
651
652 way = CacheRam(ROW_BITS, ROW_SIZE_BITS, True)
653 setattr(m.submodules, "cacheram_%d" % i, way)
654
655 comb += way.rd_en.eq(do_read)
656 comb += way.rd_addr.eq(rd_addr)
657 comb += d_out.eq(way.rd_data_o)
658 comb += way.wr_sel.eq(wr_sel)
659 comb += way.wr_addr.eq(wr_addr)
660 comb += way.wr_data.eq(wb_in.dat)
661
662 comb += do_read.eq(~(stall_in | use_previous))
663
664 with m.If(wb_in.ack & (replace_way == i)):
665 comb += do_write.eq(1)
666
667 comb += cache_out[i].eq(d_out)
668 comb += rd_addr.eq(req_row)
669 comb += wr_addr.eq(r.store_row)
670 for j in range(ROW_SIZE):
671 comb += wr_sel[j].eq(do_write)
672
673 # -- Generate PLRUs
674 # maybe_plrus: if NUM_WAYS > 1 generate
675 # begin
676 # plrus: for i in 0 to NUM_LINES-1 generate
677 # -- PLRU interface
678 # signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
679 # signal plru_acc_en : std_ulogic;
680 # signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
681 #
682 # begin
683 # plru : entity work.plru
684 # generic map (
685 # BITS => WAY_BITS
686 # )
687 # port map (
688 # clk => clk,
689 # rst => rst,
690 # acc => plru_acc,
691 # acc_en => plru_acc_en,
692 # lru => plru_out
693 # );
694 #
695 # process(all)
696 # begin
697 # -- PLRU interface
698 # if get_index(r.hit_nia) = i then
699 # plru_acc_en <= r.hit_valid;
700 # else
701 # plru_acc_en <= '0';
702 # end if;
703 # plru_acc <=
704 # std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
705 # plru_victim(i) <= plru_out;
706 # end process;
707 # end generate;
708 # end generate;
709 def maybe_plrus(self, m, r, plru_victim):
710 comb = m.d.comb
711
712 with m.If(NUM_WAYS > 1):
713 for i in range(NUM_LINES):
714 plru_acc_i = Signal(WAY_BITS)
715 plru_acc_en = Signal()
716 plru_out = Signal(WAY_BITS)
717 plru = PLRU(WAY_BITS)
718 comb += plru.acc_i.eq(plru_acc_i)
719 comb += plru.acc_en.eq(plru_acc_en)
720 comb += plru.lru_o.eq(plru_out)
721
722 # PLRU interface
723 with m.If(get_index(r.hit_nia) == i):
724 comb += plru.acc_en.eq(r.hit_valid)
725
726 comb += plru.acc_i.eq(r.hit_way)
727 comb += plru_victim[i].eq(plru.lru_o)
728
729 # -- TLB hit detection and real address generation
730 # itlb_lookup : process(all)
731 # variable pte : tlb_pte_t;
732 # variable ttag : tlb_tag_t;
733 # begin
734 # tlb_req_index <= hash_ea(i_in.nia);
735 # pte := itlb_ptes(tlb_req_index);
736 # ttag := itlb_tags(tlb_req_index);
737 # if i_in.virt_mode = '1' then
738 # real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
739 # i_in.nia(TLB_LG_PGSZ - 1 downto 0);
740 # if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
741 # ra_valid <= itlb_valids(tlb_req_index);
742 # else
743 # ra_valid <= '0';
744 # end if;
745 # eaa_priv <= pte(3);
746 # else
747 # real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
748 # ra_valid <= '1';
749 # eaa_priv <= '1';
750 # end if;
751 #
752 # -- no IAMR, so no KUEP support for now
753 # priv_fault <= eaa_priv and not i_in.priv_mode;
754 # access_ok <= ra_valid and not priv_fault;
755 # end process;
756 # TLB hit detection and real address generation
757 def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
758 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
759 priv_fault, access_ok):
760 comb = m.d.comb
761
762 i_in = self.i_in
763
764 pte = Signal(TLB_PTE_BITS)
765 ttag = Signal(TLB_EA_TAG_BITS)
766
767 comb += tlb_req_index.eq(hash_ea(i_in.nia))
768 comb += pte.eq(itlb_ptes[tlb_req_index])
769 comb += ttag.eq(itlb_tags[tlb_req_index])
770
771 with m.If(i_in.virt_mode):
772 comb += real_addr.eq(Cat(
773 i_in.nia[:TLB_LG_PGSZ],
774 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
775 ))
776
777 with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
778 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
779
780 comb += eaa_priv.eq(pte[3])
781
782 with m.Else():
783 comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
784 comb += ra_valid.eq(1)
785 comb += eaa_priv.eq(1)
786
787 # No IAMR, so no KUEP support for now
788 comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
789 comb += access_ok.eq(ra_valid & ~priv_fault)
790
791 # -- iTLB update
792 # itlb_update: process(clk)
793 # variable wr_index : tlb_index_t;
794 # begin
795 # if rising_edge(clk) then
796 # wr_index := hash_ea(m_in.addr);
797 # if rst = '1' or
798 # (m_in.tlbie = '1' and m_in.doall = '1') then
799 # -- clear all valid bits
800 # for i in tlb_index_t loop
801 # itlb_valids(i) <= '0';
802 # end loop;
803 # elsif m_in.tlbie = '1' then
804 # -- clear entry regardless of hit or miss
805 # itlb_valids(wr_index) <= '0';
806 # elsif m_in.tlbld = '1' then
807 # itlb_tags(wr_index) <=
808 # m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
809 # itlb_ptes(wr_index) <= m_in.pte;
810 # itlb_valids(wr_index) <= '1';
811 # end if;
812 # end if;
813 # end process;
814 # iTLB update
815 def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
816 comb = m.d.comb
817 sync = m.d.sync
818
819 m_in = self.m_in
820
821 wr_index = Signal(TLB_SIZE)
822 sync += wr_index.eq(hash_ea(m_in.addr))
823
824 with m.If(m_in.tlbie & m_in.doall):
825 # Clear all valid bits
826 for i in range(TLB_SIZE):
827 sync += itlb_valid_bits[i].eq(0)
828
829 with m.Elif(m_in.tlbie):
830 # Clear entry regardless of hit or miss
831 sync += itlb_valid_bits[wr_index].eq(0)
832
833 with m.Elif(m_in.tlbld):
834 sync += itlb_tags[wr_index].eq(
835 m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
836 )
837 sync += itlb_ptes[wr_index].eq(m_in.pte)
838 sync += itlb_valid_bits[wr_index].eq(1)
839
840 # -- Cache hit detection, output to fetch2 and other misc logic
841 # icache_comb : process(all)
842 # Cache hit detection, output to fetch2 and other misc logic
843 def icache_comb(self, m, use_previous, r, req_index, req_row,
844 req_tag, real_addr, req_laddr, cache_valid_bits,
845 cache_tags, access_ok, req_is_hit,
846 req_is_miss, replace_way, plru_victim, cache_out):
847 # variable is_hit : std_ulogic;
848 # variable hit_way : way_t;
849 comb = m.d.comb
850
851 #comb += Display("ENTER icache_comb - use_previous:%x req_index:%x " \
852 # "req_row:%x req_tag:%x real_addr:%x req_laddr:%x " \
853 # "access_ok:%x req_is_hit:%x req_is_miss:%x " \
854 # "replace_way:%x", use_previous, req_index, req_row, \
855 # req_tag, real_addr, req_laddr, access_ok, \
856 # req_is_hit, req_is_miss, replace_way)
857
858 i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
859 flush_in, stall_out = self.flush_in, self.stall_out
860
861 is_hit = Signal()
862 hit_way = Signal(NUM_WAYS)
863 # begin
864 # -- i_in.sequential means that i_in.nia this cycle
865 # -- is 4 more than last cycle. If we read more
866 # -- than 32 bits at a time, had a cache hit last
867 # -- cycle, and we don't want the first 32-bit chunk
868 # -- then we can keep the data we read last cycle
869 # -- and just use that.
870 # if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
871 # use_previous <= i_in.sequential and r.hit_valid;
872 # else
873 # use_previous <= '0';
874 # end if;
875 # i_in.sequential means that i_in.nia this cycle is 4 more than
876 # last cycle. If we read more than 32 bits at a time, had a
877 # cache hit last cycle, and we don't want the first 32-bit chunk
878 # then we can keep the data we read last cycle and just use that.
879 with m.If(i_in.nia[2:INSN_BITS+2] != 0):
880 comb += use_previous.eq(i_in.sequential & r.hit_valid)
881
882 # -- Extract line, row and tag from request
883 # req_index <= get_index(i_in.nia);
884 # req_row <= get_row(i_in.nia);
885 # req_tag <= get_tag(real_addr);
886 # Extract line, row and tag from request
887 comb += req_index.eq(get_index(i_in.nia))
888 comb += req_row.eq(get_row(i_in.nia))
889 comb += req_tag.eq(get_tag(real_addr))
890
891 # -- Calculate address of beginning of cache row, will be
892 # -- used for cache miss processing if needed
893 # req_laddr <=
894 # (63 downto REAL_ADDR_BITS => '0') &
895 # real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
896 # (ROW_OFF_BITS-1 downto 0 => '0');
897 # Calculate address of beginning of cache row, will be
898 # used for cache miss processing if needed
899 comb += req_laddr.eq(Cat(
900 Const(0b0, ROW_OFF_BITS),
901 real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
902 Const(0b0, 8)
903 ))
904
905 # -- Test if pending request is a hit on any way
906 # hit_way := 0;
907 # is_hit := '0';
908 # for i in way_t loop
909 # if i_in.req = '1' and
910 # (cache_valids(req_index)(i) = '1' or
911 # (r.state = WAIT_ACK and
912 # req_index = r.store_index and
913 # i = r.store_way and
914 # r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
915 # if read_tag(i, cache_tags(req_index)) = req_tag then
916 # hit_way := i;
917 # is_hit := '1';
918 # end if;
919 # end if;
920 # end loop;
921 # Test if pending request is a hit on any way
922 for i in range(NUM_WAYS):
923 with m.If(i_in.req &
924 (cache_valid_bits[req_index][i] |
925 ((r.state == State.WAIT_ACK)
926 & (req_index == r.store_index)
927 & (i == r.store_way)
928 & r.rows_valid[req_row % ROW_PER_LINE]))):
929 with m.If(read_tag(i, cache_tags[req_index]) == req_tag):
930 comb += hit_way.eq(i)
931 comb += is_hit.eq(1)
932
933 # -- Generate the "hit" and "miss" signals
934 # -- for the synchronous blocks
935 # if i_in.req = '1' and access_ok = '1' and flush_in = '0'
936 # and rst = '0' then
937 # req_is_hit <= is_hit;
938 # req_is_miss <= not is_hit;
939 # else
940 # req_is_hit <= '0';
941 # req_is_miss <= '0';
942 # end if;
943 # req_hit_way <= hit_way;
944 # Generate the "hit" and "miss" signals
945 # for the synchronous blocks
946 with m.If(i_in.req & access_ok & ~flush_in):
947 comb += req_is_hit.eq(is_hit)
948 comb += req_is_miss.eq(~is_hit)
949
950 with m.Else():
951 comb += req_is_hit.eq(0)
952 comb += req_is_miss.eq(0)
953
954 # -- The way to replace on a miss
955 # if r.state = CLR_TAG then
956 # replace_way <=
957 # to_integer(unsigned(plru_victim(r.store_index)));
958 # else
959 # replace_way <= r.store_way;
960 # end if;
961 # The way to replace on a miss
962 with m.If(r.state == State.CLR_TAG):
963 comb += replace_way.eq(plru_victim[r.store_index])
964
965 with m.Else():
966 comb += replace_way.eq(r.store_way)
967
968 # -- Output instruction from current cache row
969 # --
970 # -- Note: This is a mild violation of our design principle of
971 # -- having pipeline stages output from a clean latch. In this
972 # -- case we output the result of a mux. The alternative would
973 # -- be output an entire row which I prefer not to do just yet
974 # -- as it would force fetch2 to know about some of the cache
975 # -- geometry information.
976 # i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
977 # i_out.valid <= r.hit_valid;
978 # i_out.nia <= r.hit_nia;
979 # i_out.stop_mark <= r.hit_smark;
980 # i_out.fetch_failed <= r.fetch_failed;
981 # Output instruction from current cache row
982 #
983 # Note: This is a mild violation of our design principle of
984 # having pipeline stages output from a clean latch. In this
985 # case we output the result of a mux. The alternative would
986 # be output an entire row which I prefer not to do just yet
987 # as it would force fetch2 to know about some of the cache
988 # geometry information.
989 #comb += Display("BEFORE read_insn_word - r.hit_nia:%x " \
990 # "r.hit_way:%x, cache_out[r.hit_way]:%x", r.hit_nia, \
991 # r.hit_way, cache_out[r.hit_way])
992 comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out[r.hit_way]))
993 comb += i_out.valid.eq(r.hit_valid)
994 comb += i_out.nia.eq(r.hit_nia)
995 comb += i_out.stop_mark.eq(r.hit_smark)
996 comb += i_out.fetch_failed.eq(r.fetch_failed)
997
998 # -- Stall fetch1 if we have a miss on cache or TLB
999 # -- or a protection fault
1000 # stall_out <= not (is_hit and access_ok);
1001 # Stall fetch1 if we have a miss on cache or TLB
1002 # or a protection fault
1003 comb += stall_out.eq(~(is_hit & access_ok))
1004
1005 # -- Wishbone requests output (from the cache miss reload machine)
1006 # wishbone_out <= r.wb;
1007 # Wishbone requests output (from the cache miss reload machine)
1008 comb += wb_out.eq(r.wb)
1009 # end process;
1010
1011 # -- Cache hit synchronous machine
1012 # icache_hit : process(clk)
1013 # Cache hit synchronous machine
1014 def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
1015 req_index, req_tag, real_addr):
1016 sync = m.d.sync
1017
1018 i_in, stall_in = self.i_in, self.stall_in
1019 flush_in = self.flush_in
1020
1021 # begin
1022 # if rising_edge(clk) then
1023 # -- keep outputs to fetch2 unchanged on a stall
1024 # -- except that flush or reset sets valid to 0
1025 # -- If use_previous, keep the same data as last
1026 # -- cycle and use the second half
1027 # if stall_in = '1' or use_previous = '1' then
1028 # if rst = '1' or flush_in = '1' then
1029 # r.hit_valid <= '0';
1030 # end if;
1031 # keep outputs to fetch2 unchanged on a stall
1032 # except that flush or reset sets valid to 0
1033 # If use_previous, keep the same data as last
1034 # cycle and use the second half
1035 with m.If(stall_in | use_previous):
1036 with m.If(flush_in):
1037 sync += r.hit_valid.eq(0)
1038 # else
1039 # -- On a hit, latch the request for the next cycle,
1040 # -- when the BRAM data will be available on the
1041 # -- cache_out output of the corresponding way
1042 # r.hit_valid <= req_is_hit;
1043 # if req_is_hit = '1' then
1044 # r.hit_way <= req_hit_way;
1045 with m.Else():
1046 # On a hit, latch the request for the next cycle,
1047 # when the BRAM data will be available on the
1048 # cache_out output of the corresponding way
1049 sync += r.hit_valid.eq(req_is_hit)
1050
1051 with m.If(req_is_hit):
1052 sync += r.hit_way.eq(req_hit_way)
1053
1054 # report "cache hit nia:" & to_hstring(i_in.nia) &
1055 # " IR:" & std_ulogic'image(i_in.virt_mode) &
1056 # " SM:" & std_ulogic'image(i_in.stop_mark) &
1057 # " idx:" & integer'image(req_index) &
1058 # " tag:" & to_hstring(req_tag) &
1059 # " way:" & integer'image(req_hit_way) &
1060 # " RA:" & to_hstring(real_addr);
1061 sync += Display("cache hit nia:%x, IR:%x, SM:%x, idx:%x, " \
1062 "tag:%x, way:%x, RA:%x", i_in.nia, \
1063 i_in.virt_mode, i_in.stop_mark, req_index, \
1064 req_tag, req_hit_way, real_addr)
1065
1066
1067
1068 # end if;
1069 # end if;
1070 # if stall_in = '0' then
1071 # -- Send stop marks and NIA down regardless of validity
1072 # r.hit_smark <= i_in.stop_mark;
1073 # r.hit_nia <= i_in.nia;
1074 # end if;
1075 with m.If(~stall_in):
1076 # Send stop marks and NIA down regardless of validity
1077 sync += r.hit_smark.eq(i_in.stop_mark)
1078 sync += r.hit_nia.eq(i_in.nia)
1079 # end if;
1080 # end process;
1081
1082 # -- Cache miss/reload synchronous machine
1083 # icache_miss : process(clk)
1084 # Cache miss/reload synchronous machine
1085 def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1086 req_index, req_laddr, req_tag, replace_way,
1087 cache_tags, access_ok, real_addr):
1088 comb = m.d.comb
1089 sync = m.d.sync
1090
1091 i_in, wb_in, m_in = self.i_in, self.wb_in, self.m_in
1092 stall_in, flush_in = self.stall_in, self.flush_in
1093 inval_in = self.inval_in
1094
1095 # variable tagset : cache_tags_set_t;
1096 # variable stbs_done : boolean;
1097
1098 tagset = Signal(TAG_RAM_WIDTH)
1099 stbs_done = Signal()
1100
1101 # begin
1102 # if rising_edge(clk) then
1103 # -- On reset, clear all valid bits to force misses
1104 # if rst = '1' then
1105 # On reset, clear all valid bits to force misses
1106 # for i in index_t loop
1107 # cache_valids(i) <= (others => '0');
1108 # end loop;
1109 # r.state <= IDLE;
1110 # r.wb.cyc <= '0';
1111 # r.wb.stb <= '0';
1112 # -- We only ever do reads on wishbone
1113 # r.wb.dat <= (others => '0');
1114 # r.wb.sel <= "11111111";
1115 # r.wb.we <= '0';
1116
1117 # -- Not useful normally but helps avoiding
1118 # -- tons of sim warnings
1119 # r.wb.adr <= (others => '0');
1120
1121 # else
1122
1123 # -- Process cache invalidations
1124 # if inval_in = '1' then
1125 # for i in index_t loop
1126 # cache_valids(i) <= (others => '0');
1127 # end loop;
1128 # r.store_valid <= '0';
1129 # end if;
1130 comb += r.wb.sel.eq(-1)
1131 comb += r.wb.adr.eq(r.req_adr[3:])
1132
1133 # Process cache invalidations
1134 with m.If(inval_in):
1135 for i in range(NUM_LINES):
1136 sync += cache_valid_bits[i].eq(0)
1137 sync += r.store_valid.eq(0)
1138
1139 # -- Main state machine
1140 # case r.state is
1141 # Main state machine
1142 with m.Switch(r.state):
1143
1144 # when IDLE =>
1145 with m.Case(State.IDLE):
1146 # -- Reset per-row valid flags,
1147 # -- only used in WAIT_ACK
1148 # for i in 0 to ROW_PER_LINE - 1 loop
1149 # r.rows_valid(i) <= '0';
1150 # end loop;
1151 # Reset per-row valid flags,
1152 # only used in WAIT_ACK
1153 for i in range(ROW_PER_LINE):
1154 sync += r.rows_valid[i].eq(0)
1155
1156 # -- We need to read a cache line
1157 # if req_is_miss = '1' then
1158 # report "cache miss nia:" & to_hstring(i_in.nia) &
1159 # " IR:" & std_ulogic'image(i_in.virt_mode) &
1160 # " SM:" & std_ulogic'image(i_in.stop_mark) &
1161 # " idx:" & integer'image(req_index) &
1162 # " way:" & integer'image(replace_way) &
1163 # " tag:" & to_hstring(req_tag) &
1164 # " RA:" & to_hstring(real_addr);
1165 # We need to read a cache line
1166 with m.If(req_is_miss):
1167 sync += Display(
1168 "cache miss nia:%x IR:%x SM:%x idx:%x way:%x " \
1169 "tag:%x RA:%x", i_in.nia, i_in.virt_mode, \
1170 i_in.stop_mark, req_index, replace_way, \
1171 req_tag, real_addr)
1172
1173 # -- Keep track of our index and way for
1174 # -- subsequent stores
1175 # r.store_index <= req_index;
1176 # r.store_row <= get_row(req_laddr);
1177 # r.store_tag <= req_tag;
1178 # r.store_valid <= '1';
1179 # r.end_row_ix <=
1180 # get_row_of_line(get_row(req_laddr)) - 1;
1181 # Keep track of our index and way
1182 # for subsequent stores
1183 sync += r.store_index.eq(req_index)
1184 sync += r.store_row.eq(get_row(req_laddr))
1185 sync += r.store_tag.eq(req_tag)
1186 sync += r.store_valid.eq(1)
1187 sync += r.end_row_ix.eq(
1188 get_row_of_line(
1189 get_row(req_laddr)
1190 ) - 1
1191 )
1192
1193 # -- Prep for first wishbone read. We calculate the
1194 # -- address of the start of the cache line and
1195 # -- start the WB cycle.
1196 # r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1197 # r.wb.cyc <= '1';
1198 # r.wb.stb <= '1';
1199 # Prep for first wishbone read.
1200 # We calculate the
1201 # address of the start of the cache line and
1202 # start the WB cycle.
1203 sync += r.req_adr.eq(req_laddr)
1204 sync += r.wb.cyc.eq(1)
1205 sync += r.wb.stb.eq(1)
1206
1207 # -- Track that we had one request sent
1208 # r.state <= CLR_TAG;
1209 # Track that we had one request sent
1210 sync += r.state.eq(State.CLR_TAG)
1211 # end if;
1212
1213 # when CLR_TAG | WAIT_ACK =>
1214 with m.Case(State.CLR_TAG, State.WAIT_ACK):
1215 # if r.state = CLR_TAG then
1216 with m.If(r.state == State.CLR_TAG):
1217 # -- Get victim way from plru
1218 # r.store_way <= replace_way;
1219 # Get victim way from plru
1220 sync += r.store_way.eq(replace_way)
1221 #
1222 # -- Force misses on that way while
1223 # -- reloading that line
1224 # cache_valids(req_index)(replace_way) <= '0';
1225 # Force misses on that way while
1226 # realoading that line
1227 cv = Signal(INDEX_BITS)
1228 comb += cv.eq(cache_valid_bits[req_index])
1229 comb += cv.bit_select(replace_way, 1).eq(0)
1230 sync += cache_valid_bits[req_index].eq(cv)
1231
1232 # -- Store new tag in selected way
1233 # for i in 0 to NUM_WAYS-1 loop
1234 # if i = replace_way then
1235 # tagset := cache_tags(r.store_index);
1236 # write_tag(i, tagset, r.store_tag);
1237 # cache_tags(r.store_index) <= tagset;
1238 # end if;
1239 # end loop;
1240 for i in range(NUM_WAYS):
1241 with m.If(i == replace_way):
1242 comb += tagset.eq(cache_tags[r.store_index])
1243 comb += write_tag(i, tagset, r.store_tag)
1244 sync += cache_tags[r.store_index].eq(tagset)
1245
1246 # r.state <= WAIT_ACK;
1247 sync += r.state.eq(State.WAIT_ACK)
1248 # end if;
1249
1250 # -- Requests are all sent if stb is 0
1251 # stbs_done := r.wb.stb = '0';
1252 # Requests are all sent if stb is 0
1253 stbs_zero = Signal()
1254 comb += stbs_zero.eq(r.wb.stb == 0)
1255 comb += stbs_done.eq(stbs_zero)
1256
1257 # -- If we are still sending requests,
1258 # -- was one accepted ?
1259 # if wishbone_in.stall = '0' and not stbs_done then
1260 # If we are still sending requests,
1261 # was one accepted?
1262 with m.If(~wb_in.stall & ~stbs_done):
1263 # -- That was the last word ? We are done sending.
1264 # -- Clear stb and set stbs_done so we can handle
1265 # -- an eventual last ack on the same cycle.
1266 # if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1267 # r.wb.stb <= '0';
1268 # stbs_done := true;
1269 # end if;
1270 # That was the last word ?
1271 # We are done sending.
1272 # Clear stb and set stbs_done
1273 # so we can handle
1274 # an eventual last ack on
1275 # the same cycle.
1276 with m.If(is_last_row_addr(r.req_adr, r.end_row_ix)):
1277 sync += Display("r.wb.addr:%x r.end_row_ix:%x " \
1278 "r.wb.stb:%x stbs_zero:%x " \
1279 "stbs_done:%x", r.wb.adr, \
1280 r.end_row_ix, r.wb.stb, \
1281 stbs_zero, stbs_done)
1282 sync += r.wb.stb.eq(0)
1283 comb += stbs_done.eq(1)
1284
1285 # -- Calculate the next row address
1286 # r.wb.adr <= next_row_addr(r.wb.adr);
1287 # Calculate the next row address
1288 rarange = Signal(64)
1289 comb += rarange.eq(
1290 r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
1291 )
1292 sync += r.req_adr.eq(rarange)
1293 sync += Display("r.wb.adr:%x stbs_zero:%x " \
1294 "stbs_done:%x", rarange, stbs_zero, \
1295 stbs_done)
1296 # end if;
1297
1298 # -- Incoming acks processing
1299 # if wishbone_in.ack = '1' then
1300 # Incoming acks processing
1301 with m.If(wb_in.ack):
1302 # r.rows_valid(r.store_row mod ROW_PER_LINE)
1303 # <= '1';
1304 sync += Display("wb_in.ack TEST stbs_zero:%x " \
1305 "stbs_done:%x", \
1306 stbs_zero, stbs_done)
1307
1308 sync += r.rows_valid[r.store_row % ROW_PER_LINE].eq(1)
1309
1310 # -- Check for completion
1311 # if stbs_done and
1312 # is_last_row(r.store_row, r.end_row_ix) then
1313 # Check for completion
1314 with m.If(stbs_done &
1315 is_last_row(r.store_row, r.end_row_ix)):
1316 # -- Complete wishbone cycle
1317 # r.wb.cyc <= '0';
1318 # Complete wishbone cycle
1319 sync += r.wb.cyc.eq(0)
1320
1321 # -- Cache line is now valid
1322 # cache_valids(r.store_index)(replace_way) <=
1323 # r.store_valid and not inval_in;
1324 # Cache line is now valid
1325 cv = Signal(INDEX_BITS)
1326 comb += cv.eq(cache_valid_bits[r.store_index])
1327 comb += cv.bit_select(replace_way, 1).eq(
1328 r.store_valid & ~inval_in
1329 )
1330 sync += cache_valid_bits[r.store_index].eq(cv)
1331
1332 # -- We are done
1333 # r.state <= IDLE;
1334 # We are done
1335 sync += r.state.eq(State.IDLE)
1336 # end if;
1337
1338 # -- Increment store row counter
1339 # r.store_row <= next_row(r.store_row);
1340 # Increment store row counter
1341 sync += r.store_row.eq(next_row(r.store_row))
1342 # end if;
1343 # end case;
1344 # end if;
1345 #
1346 # -- TLB miss and protection fault processing
1347 # if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1348 # r.fetch_failed <= '0';
1349 # elsif i_in.req = '1' and access_ok = '0' and
1350 # stall_in = '0' then
1351 # r.fetch_failed <= '1';
1352 # end if;
1353 # TLB miss and protection fault processing
1354 with m.If(flush_in | m_in.tlbld):
1355 sync += r.fetch_failed.eq(0)
1356
1357 with m.Elif(i_in.req & ~access_ok & ~stall_in):
1358 sync += r.fetch_failed.eq(1)
1359 # end if;
1360 # end process;
1361
1362 # icache_log: if LOG_LENGTH > 0 generate
1363 def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1364 req_is_miss, req_is_hit, lway, wstate, r):
1365 comb = m.d.comb
1366 sync = m.d.sync
1367
1368 wb_in, i_out = self.wb_in, self.i_out
1369 log_out, stall_out = self.log_out, self.stall_out
1370
1371 # -- Output data to logger
1372 # signal log_data : std_ulogic_vector(53 downto 0);
1373 # begin
1374 # data_log: process(clk)
1375 # variable lway: way_t;
1376 # variable wstate: std_ulogic;
1377 # Output data to logger
1378 for i in range(LOG_LENGTH):
1379 # Output data to logger
1380 log_data = Signal(54)
1381 lway = Signal(NUM_WAYS)
1382 wstate = Signal()
1383
1384 # begin
1385 # if rising_edge(clk) then
1386 # lway := req_hit_way;
1387 # wstate := '0';
1388 sync += lway.eq(req_hit_way)
1389 sync += wstate.eq(0)
1390
1391 # if r.state /= IDLE then
1392 # wstate := '1';
1393 # end if;
1394 with m.If(r.state != State.IDLE):
1395 sync += wstate.eq(1)
1396
1397 # log_data <= i_out.valid &
1398 # i_out.insn &
1399 # wishbone_in.ack &
1400 # r.wb.adr(5 downto 3) &
1401 # r.wb.stb & r.wb.cyc &
1402 # wishbone_in.stall &
1403 # stall_out &
1404 # r.fetch_failed &
1405 # r.hit_nia(5 downto 2) &
1406 # wstate &
1407 # std_ulogic_vector(to_unsigned(lway, 3)) &
1408 # req_is_hit & req_is_miss &
1409 # access_ok &
1410 # ra_valid;
1411 sync += log_data.eq(Cat(
1412 ra_valid, access_ok, req_is_miss, req_is_hit,
1413 lway, wstate, r.hit_nia[2:6],
1414 r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1415 r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1416 i_out.valid
1417 ))
1418 # end if;
1419 # end process;
1420 # log_out <= log_data;
1421 comb += log_out.eq(log_data)
1422 # end generate;
1423 # end;
1424
1425 def elaborate(self, platform):
1426
1427 m = Module()
1428 comb = m.d.comb
1429
1430 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1431 cache_tags = CacheTagArray()
1432 cache_valid_bits = CacheValidBitsArray()
1433
1434 # signal itlb_valids : tlb_valids_t;
1435 # signal itlb_tags : tlb_tags_t;
1436 # signal itlb_ptes : tlb_ptes_t;
1437 # attribute ram_style of itlb_tags : signal is "distributed";
1438 # attribute ram_style of itlb_ptes : signal is "distributed";
1439 itlb_valid_bits = TLBValidBitsArray()
1440 itlb_tags = TLBTagArray()
1441 itlb_ptes = TLBPtesArray()
1442 # TODO to be passed to nmigen as ram attributes
1443 # attribute ram_style of itlb_tags : signal is "distributed";
1444 # attribute ram_style of itlb_ptes : signal is "distributed";
1445
1446 # -- Privilege bit from PTE EAA field
1447 # signal eaa_priv : std_ulogic;
1448 # Privilege bit from PTE EAA field
1449 eaa_priv = Signal()
1450
1451 # signal r : reg_internal_t;
1452 r = RegInternal()
1453
1454 # -- Async signals on incoming request
1455 # signal req_index : index_t;
1456 # signal req_row : row_t;
1457 # signal req_hit_way : way_t;
1458 # signal req_tag : cache_tag_t;
1459 # signal req_is_hit : std_ulogic;
1460 # signal req_is_miss : std_ulogic;
1461 # signal req_laddr : std_ulogic_vector(63 downto 0);
1462 # Async signal on incoming request
1463 req_index = Signal(NUM_LINES)
1464 req_row = Signal(BRAM_ROWS)
1465 req_hit_way = Signal(NUM_WAYS)
1466 req_tag = Signal(TAG_BITS)
1467 req_is_hit = Signal()
1468 req_is_miss = Signal()
1469 req_laddr = Signal(64)
1470
1471 # signal tlb_req_index : tlb_index_t;
1472 # signal real_addr : std_ulogic_vector(
1473 # REAL_ADDR_BITS - 1 downto 0
1474 # );
1475 # signal ra_valid : std_ulogic;
1476 # signal priv_fault : std_ulogic;
1477 # signal access_ok : std_ulogic;
1478 # signal use_previous : std_ulogic;
1479 tlb_req_index = Signal(TLB_SIZE)
1480 real_addr = Signal(REAL_ADDR_BITS)
1481 ra_valid = Signal()
1482 priv_fault = Signal()
1483 access_ok = Signal()
1484 use_previous = Signal()
1485
1486 # signal cache_out : cache_ram_out_t;
1487 cache_out = CacheRamOut()
1488
1489 # signal plru_victim : plru_out_t;
1490 # signal replace_way : way_t;
1491 plru_victim = PLRUOut()
1492 replace_way = Signal(NUM_WAYS)
1493
1494 # call sub-functions putting everything together, using shared
1495 # signals established above
1496 self.rams(m, r, cache_out, use_previous, replace_way, req_row)
1497 self.maybe_plrus(m, r, plru_victim)
1498 self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1499 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1500 priv_fault, access_ok)
1501 self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1502 self.icache_comb(m, use_previous, r, req_index, req_row,
1503 req_tag, real_addr, req_laddr, cache_valid_bits,
1504 cache_tags, access_ok, req_is_hit, req_is_miss,
1505 replace_way, plru_victim, cache_out)
1506 self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1507 req_index, req_tag, real_addr)
1508 self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1509 req_laddr, req_tag, replace_way, cache_tags,
1510 access_ok, real_addr)
1511 #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1512 # req_is_miss, req_is_hit, lway, wstate, r)
1513
1514 return m
1515
1516
1517 # icache_tb.vhdl
1518 #
1519 # library ieee;
1520 # use ieee.std_logic_1164.all;
1521 #
1522 # library work;
1523 # use work.common.all;
1524 # use work.wishbone_types.all;
1525 #
1526 # entity icache_tb is
1527 # end icache_tb;
1528 #
1529 # architecture behave of icache_tb is
1530 # signal clk : std_ulogic;
1531 # signal rst : std_ulogic;
1532 #
1533 # signal i_out : Fetch1ToIcacheType;
1534 # signal i_in : IcacheToDecode1Type;
1535 #
1536 # signal m_out : MmuToIcacheType;
1537 #
1538 # signal wb_bram_in : wishbone_master_out;
1539 # signal wb_bram_out : wishbone_slave_out;
1540 #
1541 # constant clk_period : time := 10 ns;
1542 # begin
1543 # icache0: entity work.icache
1544 # generic map(
1545 # LINE_SIZE => 64,
1546 # NUM_LINES => 4
1547 # )
1548 # port map(
1549 # clk => clk,
1550 # rst => rst,
1551 # i_in => i_out,
1552 # i_out => i_in,
1553 # m_in => m_out,
1554 # stall_in => '0',
1555 # flush_in => '0',
1556 # inval_in => '0',
1557 # wishbone_out => wb_bram_in,
1558 # wishbone_in => wb_bram_out
1559 # );
1560 #
1561 # -- BRAM Memory slave
1562 # bram0: entity work.wishbone_bram_wrapper
1563 # generic map(
1564 # MEMORY_SIZE => 1024,
1565 # RAM_INIT_FILE => "icache_test.bin"
1566 # )
1567 # port map(
1568 # clk => clk,
1569 # rst => rst,
1570 # wishbone_in => wb_bram_in,
1571 # wishbone_out => wb_bram_out
1572 # );
1573 #
1574 # clk_process: process
1575 # begin
1576 # clk <= '0';
1577 # wait for clk_period/2;
1578 # clk <= '1';
1579 # wait for clk_period/2;
1580 # end process;
1581 #
1582 # rst_process: process
1583 # begin
1584 # rst <= '1';
1585 # wait for 2*clk_period;
1586 # rst <= '0';
1587 # wait;
1588 # end process;
1589 #
1590 # stim: process
1591 # begin
1592 # i_out.req <= '0';
1593 # i_out.nia <= (others => '0');
1594 # i_out.stop_mark <= '0';
1595 #
1596 # m_out.tlbld <= '0';
1597 # m_out.tlbie <= '0';
1598 # m_out.addr <= (others => '0');
1599 # m_out.pte <= (others => '0');
1600 #
1601 # wait until rising_edge(clk);
1602 # wait until rising_edge(clk);
1603 # wait until rising_edge(clk);
1604 # wait until rising_edge(clk);
1605 #
1606 # i_out.req <= '1';
1607 # i_out.nia <= x"0000000000000004";
1608 #
1609 # wait for 30*clk_period;
1610 # wait until rising_edge(clk);
1611 #
1612 # assert i_in.valid = '1' severity failure;
1613 # assert i_in.insn = x"00000001"
1614 # report "insn @" & to_hstring(i_out.nia) &
1615 # "=" & to_hstring(i_in.insn) &
1616 # " expected 00000001"
1617 # severity failure;
1618 #
1619 # i_out.req <= '0';
1620 #
1621 # wait until rising_edge(clk);
1622 #
1623 # -- hit
1624 # i_out.req <= '1';
1625 # i_out.nia <= x"0000000000000008";
1626 # wait until rising_edge(clk);
1627 # wait until rising_edge(clk);
1628 # assert i_in.valid = '1' severity failure;
1629 # assert i_in.insn = x"00000002"
1630 # report "insn @" & to_hstring(i_out.nia) &
1631 # "=" & to_hstring(i_in.insn) &
1632 # " expected 00000002"
1633 # severity failure;
1634 # wait until rising_edge(clk);
1635 #
1636 # -- another miss
1637 # i_out.req <= '1';
1638 # i_out.nia <= x"0000000000000040";
1639 #
1640 # wait for 30*clk_period;
1641 # wait until rising_edge(clk);
1642 #
1643 # assert i_in.valid = '1' severity failure;
1644 # assert i_in.insn = x"00000010"
1645 # report "insn @" & to_hstring(i_out.nia) &
1646 # "=" & to_hstring(i_in.insn) &
1647 # " expected 00000010"
1648 # severity failure;
1649 #
1650 # -- test something that aliases
1651 # i_out.req <= '1';
1652 # i_out.nia <= x"0000000000000100";
1653 # wait until rising_edge(clk);
1654 # wait until rising_edge(clk);
1655 # assert i_in.valid = '0' severity failure;
1656 # wait until rising_edge(clk);
1657 #
1658 # wait for 30*clk_period;
1659 # wait until rising_edge(clk);
1660 #
1661 # assert i_in.valid = '1' severity failure;
1662 # assert i_in.insn = x"00000040"
1663 # report "insn @" & to_hstring(i_out.nia) &
1664 # "=" & to_hstring(i_in.insn) &
1665 # " expected 00000040"
1666 # severity failure;
1667 #
1668 # i_out.req <= '0';
1669 #
1670 # std.env.finish;
1671 # end process;
1672 # end;
1673 def icache_sim(dut):
1674 i_out = dut.i_in
1675 i_in = dut.i_out
1676 m_out = dut.m_in
1677
1678 yield i_in.valid.eq(0)
1679 yield i_out.priv_mode.eq(1)
1680 yield i_out.req.eq(0)
1681 yield i_out.nia.eq(0)
1682 yield i_out.stop_mark.eq(0)
1683 yield m_out.tlbld.eq(0)
1684 yield m_out.tlbie.eq(0)
1685 yield m_out.addr.eq(0)
1686 yield m_out.pte.eq(0)
1687 yield
1688 yield
1689 yield
1690 yield
1691 yield i_out.req.eq(1)
1692 yield i_out.nia.eq(Const(0x0000000000000004, 64))
1693 for i in range(30):
1694 yield
1695 yield
1696 valid = yield i_in.valid
1697 nia = yield i_out.nia
1698 insn = yield i_in.insn
1699 print(f"valid? {valid}")
1700 assert valid
1701 assert insn == 0x00000001, \
1702 "insn @%x=%x expected 00000001" % (nia, insn)
1703 yield i_out.req.eq(0)
1704 yield
1705
1706 # hit
1707 yield i_out.req.eq(1)
1708 yield i_out.nia.eq(Const(0x0000000000000008, 64))
1709 yield
1710 yield
1711 valid = yield i_in.valid
1712 nia = yield i_in.nia
1713 insn = yield i_in.insn
1714 assert valid
1715 assert insn == 0x00000002, \
1716 "insn @%x=%x expected 00000002" % (nia, insn)
1717 yield
1718
1719 # another miss
1720 yield i_out.req.eq(1)
1721 yield i_out.nia.eq(Const(0x0000000000000040, 64))
1722 for i in range(30):
1723 yield
1724 yield
1725 valid = yield i_in.valid
1726 nia = yield i_out.nia
1727 insn = yield i_in.insn
1728 assert valid
1729 assert insn == 0x00000010, \
1730 "insn @%x=%x expected 00000010" % (nia, insn)
1731
1732 # test something that aliases
1733 yield i_out.req.eq(1)
1734 yield i_out.nia.eq(Const(0x0000000000000100, 64))
1735 yield
1736 yield
1737 valid = yield i_in.valid
1738 assert ~valid
1739 for i in range(30):
1740 yield
1741 yield
1742 insn = yield i_in.insn
1743 valid = yield i_in.valid
1744 insn = yield i_in.insn
1745 assert valid
1746 assert insn == 0x00000040, \
1747 "insn @%x=%x expected 00000040" % (nia, insn)
1748 yield i_out.req.eq(0)
1749
1750
1751
1752 def test_icache(mem):
1753 dut = ICache()
1754
1755 memory = Memory(width=64, depth=16*64, init=mem)
1756 sram = SRAM(memory=memory, granularity=8)
1757
1758 m = Module()
1759
1760 m.submodules.icache = dut
1761 m.submodules.sram = sram
1762
1763 m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1764 m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1765 m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1766 m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1767 m.d.comb += sram.bus.adr.eq(dut.wb_out.adr)
1768 m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1769
1770 m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1771 m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1772
1773 # nmigen Simulation
1774 sim = Simulator(m)
1775 sim.add_clock(1e-6)
1776
1777 sim.add_sync_process(wrap(icache_sim(dut)))
1778 with sim.write_vcd('test_icache.vcd'):
1779 sim.run()
1780
1781 if __name__ == '__main__':
1782 dut = ICache()
1783 vl = rtlil.convert(dut, ports=[])
1784 with open("test_icache.il", "w") as f:
1785 f.write(vl)
1786
1787 mem = []
1788 for i in range(512):
1789 mem.append((i*2)| ((i*2+1)<<32))
1790
1791 test_icache(mem)
1792