need asserts
[soc.git] / src / soc / experiment / icache.py
1 """ICache
2
3 based on Anton Blanchard microwatt icache.vhdl
4
5 Set associative icache
6
7 TODO (in no specific order):
8 * Add debug interface to inspect cache content
9 * Add snoop/invalidate path
10 * Add multi-hit error detection
11 * Pipelined bus interface (wb or axi)
12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
13 * Add optimization: service hits on partially loaded lines
14 * Add optimization: (maybe) interrupt reload on fluch/redirect
15 * Check if playing with the geometry of the cache tags allow for more
16 efficient use of distributed RAM and less logic/muxes. Currently we
17 write TAG_BITS width which may not match full ram blocks and might
18 cause muxes to be inferred for "partial writes".
19 * Check if making the read size of PLRU a ROM helps utilization
20
21 """
22 from enum import Enum, unique
23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const, Repl)
24 from nmigen.cli import main, rtlil
25 from nmutil.iocontrol import RecordObject
26 from nmigen.utils import log2_int
27 from nmutil.util import Display
28
29 #from nmutil.plru import PLRU
30 from soc.experiment.cache_ram import CacheRam
31 from soc.experiment.plru import PLRU
32
33 from soc.experiment.mem_types import (Fetch1ToICacheType,
34 ICacheToDecode1Type,
35 MMUToICacheType)
36
37 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
38 WB_SEL_BITS, WBAddrType, WBDataType,
39 WBSelType, WBMasterOut, WBSlaveOut,
40 WBMasterOutVector, WBSlaveOutVector,
41 WBIOMasterOut, WBIOSlaveOut)
42
43 # for test
44 from nmigen_soc.wishbone.sram import SRAM
45 from nmigen import Memory
46 from nmutil.util import wrap
47 from nmigen.cli import main, rtlil
48 if True:
49 from nmigen.back.pysim import Simulator, Delay, Settle
50 else:
51 from nmigen.sim.cxxsim import Simulator, Delay, Settle
52
53
54 SIM = 0
55 LINE_SIZE = 64
56 # BRAM organisation: We never access more than wishbone_data_bits
57 # at a time so to save resources we make the array only that wide,
58 # and use consecutive indices for to make a cache "line"
59 #
60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
61 ROW_SIZE = WB_DATA_BITS // 8
62 # Number of lines in a set
63 NUM_LINES = 16
64 # Number of ways
65 NUM_WAYS = 4
66 # L1 ITLB number of entries (direct mapped)
67 TLB_SIZE = 64
68 # L1 ITLB log_2(page_size)
69 TLB_LG_PGSZ = 12
70 # Number of real address bits that we store
71 REAL_ADDR_BITS = 56
72 # Non-zero to enable log data collection
73 LOG_LENGTH = 0
74
75 ROW_SIZE_BITS = ROW_SIZE * 8
76 # ROW_PER_LINE is the number of row
77 # (wishbone) transactions in a line
78 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
79 # BRAM_ROWS is the number of rows in
80 # BRAM needed to represent the full icache
81 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
82 # INSN_PER_ROW is the number of 32bit
83 # instructions per BRAM row
84 INSN_PER_ROW = ROW_SIZE_BITS // 32
85
86 print("ROW_SIZE", ROW_SIZE)
87 print("ROW_SIZE_BITS", ROW_SIZE_BITS)
88 print("ROW_PER_LINE", ROW_PER_LINE)
89 print("BRAM_ROWS", BRAM_ROWS)
90 print("INSN_PER_ROW", INSN_PER_ROW)
91
92 # Bit fields counts in the address
93 #
94 # INSN_BITS is the number of bits to
95 # select an instruction in a row
96 INSN_BITS = log2_int(INSN_PER_ROW)
97 # ROW_BITS is the number of bits to
98 # select a row
99 ROW_BITS = log2_int(BRAM_ROWS)
100 # ROW_LINEBITS is the number of bits to
101 # select a row within a line
102 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
103 # LINE_OFF_BITS is the number of bits for
104 # the offset in a cache line
105 LINE_OFF_BITS = log2_int(LINE_SIZE)
106 # ROW_OFF_BITS is the number of bits for
107 # the offset in a row
108 ROW_OFF_BITS = log2_int(ROW_SIZE)
109 # INDEX_BITS is the number of bits to
110 # select a cache line
111 INDEX_BITS = log2_int(NUM_LINES)
112 # SET_SIZE_BITS is the log base 2 of
113 # the set size
114 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
115 # TAG_BITS is the number of bits of
116 # the tag part of the address
117 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
118 # TAG_WIDTH is the width in bits of each way of the tag RAM
119 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
120
121 # WAY_BITS is the number of bits to
122 # select a way
123 WAY_BITS = log2_int(NUM_WAYS)
124 TAG_RAM_WIDTH = TAG_BITS * NUM_WAYS
125
126 # -- L1 ITLB.
127 # constant TLB_BITS : natural := log2(TLB_SIZE);
128 # constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
129 # constant TLB_PTE_BITS : natural := 64;
130 TLB_BITS = log2_int(TLB_SIZE)
131 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
132 TLB_PTE_BITS = 64
133
134
135 print("INSN_BITS", INSN_BITS)
136 print("ROW_BITS", ROW_BITS)
137 print("ROW_LINE_BITS", ROW_LINE_BITS)
138 print("LINE_OFF_BITS", LINE_OFF_BITS)
139 print("ROW_OFF_BITS", ROW_OFF_BITS)
140 print("INDEX_BITS", INDEX_BITS)
141 print("SET_SIZE_BITS", SET_SIZE_BITS)
142 print("TAG_BITS", TAG_BITS)
143 print("WAY_BITS", WAY_BITS)
144 print("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
145 print("TLB_BITS", TLB_BITS)
146 print("TLB_EA_TAG_BITS", TLB_EA_TAG_BITS)
147 print("TLB_PTE_BITS", TLB_PTE_BITS)
148
149
150
151
152 # architecture rtl of icache is
153 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
154 #-- ROW_PER_LINE is the number of row (wishbone
155 #-- transactions) in a line
156 #constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
157 #-- BRAM_ROWS is the number of rows in BRAM
158 #-- needed to represent the full
159 #-- icache
160 #constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
161 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
162 #constant INSN_PER_ROW : natural := ROW_SIZE_BITS / 32;
163 #-- Bit fields counts in the address
164 #
165 #-- INSN_BITS is the number of bits to select
166 #-- an instruction in a row
167 #constant INSN_BITS : natural := log2(INSN_PER_ROW);
168 #-- ROW_BITS is the number of bits to select a row
169 #constant ROW_BITS : natural := log2(BRAM_ROWS);
170 #-- ROW_LINEBITS is the number of bits to
171 #-- select a row within a line
172 #constant ROW_LINEBITS : natural := log2(ROW_PER_LINE);
173 #-- LINE_OFF_BITS is the number of bits for the offset
174 #-- in a cache line
175 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
176 #-- ROW_OFF_BITS is the number of bits for the offset in a row
177 #constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
178 #-- INDEX_BITS is the number of bits to select a cache line
179 #constant INDEX_BITS : natural := log2(NUM_LINES);
180 #-- SET_SIZE_BITS is the log base 2 of the set size
181 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
182 #-- TAG_BITS is the number of bits of the tag part of the address
183 #constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
184 #-- WAY_BITS is the number of bits to select a way
185 #constant WAY_BITS : natural := log2(NUM_WAYS);
186
187 #-- Example of layout for 32 lines of 64 bytes:
188 #--
189 #-- .. tag |index| line |
190 #-- .. | row | |
191 #-- .. | | | |00| zero (2)
192 #-- .. | | |-| | INSN_BITS (1)
193 #-- .. | |---| | ROW_LINEBITS (3)
194 #-- .. | |--- - --| LINE_OFF_BITS (6)
195 #-- .. | |- --| ROW_OFF_BITS (3)
196 #-- .. |----- ---| | ROW_BITS (8)
197 #-- .. |-----| | INDEX_BITS (5)
198 #-- .. --------| | TAG_BITS (53)
199 # Example of layout for 32 lines of 64 bytes:
200 #
201 # .. tag |index| line |
202 # .. | row | |
203 # .. | | | |00| zero (2)
204 # .. | | |-| | INSN_BITS (1)
205 # .. | |---| | ROW_LINEBITS (3)
206 # .. | |--- - --| LINE_OFF_BITS (6)
207 # .. | |- --| ROW_OFF_BITS (3)
208 # .. |----- ---| | ROW_BITS (8)
209 # .. |-----| | INDEX_BITS (5)
210 # .. --------| | TAG_BITS (53)
211
212 #subtype row_t is integer range 0 to BRAM_ROWS-1;
213 #subtype index_t is integer range 0 to NUM_LINES-1;
214 #subtype way_t is integer range 0 to NUM_WAYS-1;
215 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
216 #
217 #-- The cache data BRAM organized as described above for each way
218 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
219 #
220 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
221 #-- not handle a clean (commented) definition of the cache tags as a 3d
222 #-- memory. For now, work around it by putting all the tags
223 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
224 # type cache_tags_set_t is array(way_t) of cache_tag_t;
225 # type cache_tags_array_t is array(index_t) of cache_tags_set_t;
226 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
227 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
228 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
229 def CacheTagArray():
230 return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
231 for x in range(NUM_LINES))
232
233 #-- The cache valid bits
234 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
235 #type cache_valids_t is array(index_t) of cache_way_valids_t;
236 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
237 def CacheValidBitsArray():
238 return Array(Signal(NUM_WAYS, name="cachevalid_%d" %x) \
239 for x in range(NUM_LINES))
240
241 def RowPerLineValidArray():
242 return Array(Signal(name="rows_valid_%d" %x) \
243 for x in range(ROW_PER_LINE))
244
245
246 #attribute ram_style : string;
247 #attribute ram_style of cache_tags : signal is "distributed";
248 # TODO to be passed to nigmen as ram attributes
249 # attribute ram_style : string;
250 # attribute ram_style of cache_tags : signal is "distributed";
251
252
253 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
254 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
255 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
256 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
257 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
258 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
259 def TLBValidBitsArray():
260 return Array(Signal(name="tlbvalid_%d" %x) \
261 for x in range(TLB_SIZE))
262
263 def TLBTagArray():
264 return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
265 for x in range(TLB_SIZE))
266
267 def TLBPtesArray():
268 return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
269 for x in range(TLB_SIZE))
270
271
272 #-- Cache RAM interface
273 #type cache_ram_out_t is array(way_t) of cache_row_t;
274 # Cache RAM interface
275 def CacheRamOut():
276 return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
277 for x in range(NUM_WAYS))
278
279 #-- PLRU output interface
280 #type plru_out_t is array(index_t) of
281 # std_ulogic_vector(WAY_BITS-1 downto 0);
282 # PLRU output interface
283 def PLRUOut():
284 return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
285 for x in range(NUM_LINES))
286
287 # -- Return the cache line index (tag index) for an address
288 # function get_index(addr: std_ulogic_vector(63 downto 0))
289 # return index_t is
290 # begin
291 # return to_integer(unsigned(
292 # addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
293 # ));
294 # end;
295 # Return the cache line index (tag index) for an address
296 def get_index(addr):
297 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
298
299 # -- Return the cache row index (data memory) for an address
300 # function get_row(addr: std_ulogic_vector(63 downto 0))
301 # return row_t is
302 # begin
303 # return to_integer(unsigned(
304 # addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
305 # ));
306 # end;
307 # Return the cache row index (data memory) for an address
308 def get_row(addr):
309 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
310
311 # -- Return the index of a row within a line
312 # function get_row_of_line(row: row_t) return row_in_line_t is
313 # variable row_v : unsigned(ROW_BITS-1 downto 0);
314 # begin
315 # row_v := to_unsigned(row, ROW_BITS);
316 # return row_v(ROW_LINEBITS-1 downto 0);
317 # end;
318 # Return the index of a row within a line
319 def get_row_of_line(row):
320 return row[:ROW_LINE_BITS]
321
322 # -- Returns whether this is the last row of a line
323 # function is_last_row_addr(addr: wishbone_addr_type;
324 # last: row_in_line_t
325 # )
326 # return boolean is
327 # begin
328 # return unsigned(
329 # addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
330 # ) = last;
331 # end;
332 # Returns whether this is the last row of a line
333 def is_last_row_addr(addr, last):
334 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
335
336 # -- Returns whether this is the last row of a line
337 # function is_last_row(row: row_t;
338 # last: row_in_line_t) return boolean is
339 # begin
340 # return get_row_of_line(row) = last;
341 # end;
342 # Returns whether this is the last row of a line
343 def is_last_row(row, last):
344 return get_row_of_line(row) == last
345
346 # -- Return the next row in the current cache line. We use a dedicated
347 # -- function in order to limit the size of the generated adder to be
348 # -- only the bits within a cache line (3 bits with default settings)
349 # function next_row(row: row_t) return row_t is
350 # variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
351 # variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
352 # variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
353 # begin
354 # row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
355 # row_idx := row_v(ROW_LINEBITS-1 downto 0);
356 # row_v(ROW_LINEBITS-1 downto 0) :=
357 # std_ulogic_vector(unsigned(row_idx) + 1);
358 # return to_integer(unsigned(row_v));
359 # end;
360 # Return the next row in the current cache line. We use a dedicated
361 # function in order to limit the size of the generated adder to be
362 # only the bits within a cache line (3 bits with default settings)
363 def next_row(row):
364 row_v = row[0:ROW_LINE_BITS] + 1
365 return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
366 # -- Read the instruction word for the given address in the
367 # -- current cache row
368 # function read_insn_word(addr: std_ulogic_vector(63 downto 0);
369 # data: cache_row_t) return std_ulogic_vector is
370 # variable word: integer range 0 to INSN_PER_ROW-1;
371 # begin
372 # word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
373 # return data(31+word*32 downto word*32);
374 # end;
375 # Read the instruction word for the given address
376 # in the current cache row
377 def read_insn_word(addr, data):
378 word = addr[2:INSN_BITS+2]
379 return data.word_select(word, 32)
380
381 # -- Get the tag value from the address
382 # function get_tag(
383 # addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
384 # )
385 # return cache_tag_t is
386 # begin
387 # return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
388 # end;
389 # Get the tag value from the address
390 def get_tag(addr):
391 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
392
393 # -- Read a tag from a tag memory row
394 # function read_tag(way: way_t; tagset: cache_tags_set_t)
395 # return cache_tag_t is
396 # begin
397 # return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
398 # end;
399 # Read a tag from a tag memory row
400 def read_tag(way, tagset):
401 return tagset.word_select(way, TAG_BITS)
402
403 # -- Write a tag to tag memory row
404 # procedure write_tag(way: in way_t;
405 # tagset: inout cache_tags_set_t; tag: cache_tag_t) is
406 # begin
407 # tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
408 # end;
409 # Write a tag to tag memory row
410 def write_tag(way, tagset, tag):
411 return read_tag(way, tagset).eq(tag)
412
413 # -- Simple hash for direct-mapped TLB index
414 # function hash_ea(addr: std_ulogic_vector(63 downto 0))
415 # return tlb_index_t is
416 # variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
417 # begin
418 # hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
419 # xor addr(
420 # TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
421 # TLB_LG_PGSZ + TLB_BITS
422 # )
423 # xor addr(
424 # TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
425 # TLB_LG_PGSZ + 2 * TLB_BITS
426 # );
427 # return to_integer(unsigned(hash));
428 # end;
429 # Simple hash for direct-mapped TLB index
430 def hash_ea(addr):
431 hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
432 TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
433 ] ^ addr[
434 TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
435 ]
436 return hsh
437
438 # begin
439 #
440 # XXX put these assert statements in - as python asserts
441 #
442 # assert LINE_SIZE mod ROW_SIZE = 0;
443 # assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
444 # assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
445 # assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
446 # assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
447 # assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
448 # report "geometry bits don't add up"
449 # assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
450 # report "geometry bits don't add up"
451 # assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
452 # report "geometry bits don't add up"
453 # assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
454 # report "geometry bits don't add up"
455 #
456 # sim_debug: if SIM generate
457 # debug: process
458 # begin
459 # report "ROW_SIZE = " & natural'image(ROW_SIZE);
460 # report "ROW_PER_LINE = " & natural'image(ROW_PER_LINE);
461 # report "BRAM_ROWS = " & natural'image(BRAM_ROWS);
462 # report "INSN_PER_ROW = " & natural'image(INSN_PER_ROW);
463 # report "INSN_BITS = " & natural'image(INSN_BITS);
464 # report "ROW_BITS = " & natural'image(ROW_BITS);
465 # report "ROW_LINEBITS = " & natural'image(ROW_LINEBITS);
466 # report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
467 # report "ROW_OFF_BITS = " & natural'image(ROW_OFF_BITS);
468 # report "INDEX_BITS = " & natural'image(INDEX_BITS);
469 # report "TAG_BITS = " & natural'image(TAG_BITS);
470 # report "WAY_BITS = " & natural'image(WAY_BITS);
471 # wait;
472 # end process;
473 # end generate;
474
475 # Cache reload state machine
476 @unique
477 class State(Enum):
478 IDLE = 0
479 CLR_TAG = 1
480 WAIT_ACK = 2
481
482 # type reg_internal_t is record
483 # -- Cache hit state (Latches for 1 cycle BRAM access)
484 # hit_way : way_t;
485 # hit_nia : std_ulogic_vector(63 downto 0);
486 # hit_smark : std_ulogic;
487 # hit_valid : std_ulogic;
488 #
489 # -- Cache miss state (reload state machine)
490 # state : state_t;
491 # wb : wishbone_master_out;
492 # store_way : way_t;
493 # store_index : index_t;
494 # store_row : row_t;
495 # store_tag : cache_tag_t;
496 # store_valid : std_ulogic;
497 # end_row_ix : row_in_line_t;
498 # rows_valid : row_per_line_valid_t;
499 #
500 # -- TLB miss state
501 # fetch_failed : std_ulogic;
502 # end record;
503 class RegInternal(RecordObject):
504 def __init__(self):
505 super().__init__()
506 # Cache hit state (Latches for 1 cycle BRAM access)
507 self.hit_way = Signal(NUM_WAYS)
508 self.hit_nia = Signal(64)
509 self.hit_smark = Signal()
510 self.hit_valid = Signal()
511
512 # Cache miss state (reload state machine)
513 self.state = Signal(State, reset=State.IDLE)
514 self.wb = WBMasterOut("wb")
515 self.req_adr = Signal(64)
516 self.store_way = Signal(NUM_WAYS)
517 self.store_index = Signal(NUM_LINES)
518 self.store_row = Signal(BRAM_ROWS)
519 self.store_tag = Signal(TAG_BITS)
520 self.store_valid = Signal()
521 self.end_row_ix = Signal(ROW_LINE_BITS)
522 self.rows_valid = RowPerLineValidArray()
523
524 # TLB miss state
525 self.fetch_failed = Signal()
526
527 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
528 #
529 # entity icache is
530 # generic (
531 # SIM : boolean := false;
532 # -- Line size in bytes
533 # LINE_SIZE : positive := 64;
534 # -- BRAM organisation: We never access more
535 # -- than wishbone_data_bits
536 # -- at a time so to save resources we make the
537 # -- array only that wide,
538 # -- and use consecutive indices for to make a cache "line"
539 # --
540 # -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
541 # -- so 64-bits)
542 # ROW_SIZE : positive := wishbone_data_bits / 8;
543 # -- Number of lines in a set
544 # NUM_LINES : positive := 32;
545 # -- Number of ways
546 # NUM_WAYS : positive := 4;
547 # -- L1 ITLB number of entries (direct mapped)
548 # TLB_SIZE : positive := 64;
549 # -- L1 ITLB log_2(page_size)
550 # TLB_LG_PGSZ : positive := 12;
551 # -- Number of real address bits that we store
552 # REAL_ADDR_BITS : positive := 56;
553 # -- Non-zero to enable log data collection
554 # LOG_LENGTH : natural := 0
555 # );
556 # port (
557 # clk : in std_ulogic;
558 # rst : in std_ulogic;
559 #
560 # i_in : in Fetch1ToIcacheType;
561 # i_out : out IcacheToDecode1Type;
562 #
563 # m_in : in MmuToIcacheType;
564 #
565 # stall_in : in std_ulogic;
566 # stall_out : out std_ulogic;
567 # flush_in : in std_ulogic;
568 # inval_in : in std_ulogic;
569 #
570 # wishbone_out : out wishbone_master_out;
571 # wishbone_in : in wishbone_slave_out;
572 #
573 # log_out : out std_ulogic_vector(53 downto 0)
574 # );
575 # end entity icache;
576 # 64 bit direct mapped icache. All instructions are 4B aligned.
577 class ICache(Elaboratable):
578 """64 bit direct mapped icache. All instructions are 4B aligned."""
579 def __init__(self):
580 self.i_in = Fetch1ToICacheType(name="i_in")
581 self.i_out = ICacheToDecode1Type(name="i_out")
582
583 self.m_in = MMUToICacheType(name="m_in")
584
585 self.stall_in = Signal()
586 self.stall_out = Signal()
587 self.flush_in = Signal()
588 self.inval_in = Signal()
589
590 self.wb_out = WBMasterOut(name="wb_out")
591 self.wb_in = WBSlaveOut(name="wb_in")
592
593 self.log_out = Signal(54)
594
595
596 # -- Generate a cache RAM for each way
597 # rams: for i in 0 to NUM_WAYS-1 generate
598 # signal do_read : std_ulogic;
599 # signal do_write : std_ulogic;
600 # signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
601 # signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
602 # signal dout : cache_row_t;
603 # signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);
604 # begin
605 # way: entity work.cache_ram
606 # generic map (
607 # ROW_BITS => ROW_BITS,
608 # WIDTH => ROW_SIZE_BITS
609 # )
610 # port map (
611 # clk => clk,
612 # rd_en => do_read,
613 # rd_addr => rd_addr,
614 # rd_data => dout,
615 # wr_sel => wr_sel,
616 # wr_addr => wr_addr,
617 # wr_data => wishbone_in.dat
618 # );
619 # process(all)
620 # begin
621 # do_read <= not (stall_in or use_previous);
622 # do_write <= '0';
623 # if wishbone_in.ack = '1' and replace_way = i then
624 # do_write <= '1';
625 # end if;
626 # cache_out(i) <= dout;
627 # rd_addr <=
628 # std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
629 # wr_addr <=
630 # std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
631 # for i in 0 to ROW_SIZE-1 loop
632 # wr_sel(i) <= do_write;
633 # end loop;
634 # end process;
635 # end generate;
636 def rams(self, m, r, cache_out_row, use_previous, replace_way, req_row):
637 comb = m.d.comb
638
639 wb_in, stall_in = self.wb_in, self.stall_in
640
641
642 for i in range(NUM_WAYS):
643 do_read = Signal(name="do_rd_%d" % i)
644 do_write = Signal(name="do_wr_%d" % i)
645 rd_addr = Signal(ROW_BITS)
646 wr_addr = Signal(ROW_BITS)
647 d_out = Signal(ROW_SIZE_BITS, name="d_out_%d" % i)
648 wr_sel = Signal(ROW_SIZE)
649
650 way = CacheRam(ROW_BITS, ROW_SIZE_BITS, True)
651 setattr(m.submodules, "cacheram_%d" % i, way)
652
653 comb += way.rd_en.eq(do_read)
654 comb += way.rd_addr.eq(rd_addr)
655 comb += d_out.eq(way.rd_data_o)
656 comb += way.wr_sel.eq(wr_sel)
657 comb += way.wr_addr.eq(wr_addr)
658 comb += way.wr_data.eq(wb_in.dat)
659
660 comb += do_read.eq(~(stall_in | use_previous))
661 comb += do_write.eq(wb_in.ack & (replace_way == i))
662
663 with m.If(r.hit_way == i):
664 comb += cache_out_row.eq(d_out)
665 comb += rd_addr.eq(req_row)
666 comb += wr_addr.eq(r.store_row)
667 comb += wr_sel.eq(Repl(do_write, ROW_SIZE))
668
669 # -- Generate PLRUs
670 # maybe_plrus: if NUM_WAYS > 1 generate
671 # begin
672 # plrus: for i in 0 to NUM_LINES-1 generate
673 # -- PLRU interface
674 # signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
675 # signal plru_acc_en : std_ulogic;
676 # signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
677 #
678 # begin
679 # plru : entity work.plru
680 # generic map (
681 # BITS => WAY_BITS
682 # )
683 # port map (
684 # clk => clk,
685 # rst => rst,
686 # acc => plru_acc,
687 # acc_en => plru_acc_en,
688 # lru => plru_out
689 # );
690 #
691 # process(all)
692 # begin
693 # -- PLRU interface
694 # if get_index(r.hit_nia) = i then
695 # plru_acc_en <= r.hit_valid;
696 # else
697 # plru_acc_en <= '0';
698 # end if;
699 # plru_acc <=
700 # std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
701 # plru_victim(i) <= plru_out;
702 # end process;
703 # end generate;
704 # end generate;
705 def maybe_plrus(self, m, r, plru_victim):
706 comb = m.d.comb
707
708 with m.If(NUM_WAYS > 1):
709 for i in range(NUM_LINES):
710 plru_acc_i = Signal(WAY_BITS)
711 plru_acc_en = Signal()
712 plru = PLRU(WAY_BITS)
713 setattr(m.submodules, "plru_%d" % i, plru)
714
715 comb += plru.acc_i.eq(plru_acc_i)
716 comb += plru.acc_en.eq(plru_acc_en)
717
718 # PLRU interface
719 with m.If(get_index(r.hit_nia) == i):
720 comb += plru.acc_en.eq(r.hit_valid)
721
722 comb += plru.acc_i.eq(r.hit_way)
723 comb += plru_victim[i].eq(plru.lru_o)
724
725 # -- TLB hit detection and real address generation
726 # itlb_lookup : process(all)
727 # variable pte : tlb_pte_t;
728 # variable ttag : tlb_tag_t;
729 # begin
730 # tlb_req_index <= hash_ea(i_in.nia);
731 # pte := itlb_ptes(tlb_req_index);
732 # ttag := itlb_tags(tlb_req_index);
733 # if i_in.virt_mode = '1' then
734 # real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
735 # i_in.nia(TLB_LG_PGSZ - 1 downto 0);
736 # if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
737 # ra_valid <= itlb_valids(tlb_req_index);
738 # else
739 # ra_valid <= '0';
740 # end if;
741 # eaa_priv <= pte(3);
742 # else
743 # real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
744 # ra_valid <= '1';
745 # eaa_priv <= '1';
746 # end if;
747 #
748 # -- no IAMR, so no KUEP support for now
749 # priv_fault <= eaa_priv and not i_in.priv_mode;
750 # access_ok <= ra_valid and not priv_fault;
751 # end process;
752 # TLB hit detection and real address generation
753 def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
754 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
755 priv_fault, access_ok):
756 comb = m.d.comb
757
758 i_in = self.i_in
759
760 pte = Signal(TLB_PTE_BITS)
761 ttag = Signal(TLB_EA_TAG_BITS)
762
763 comb += tlb_req_index.eq(hash_ea(i_in.nia))
764 comb += pte.eq(itlb_ptes[tlb_req_index])
765 comb += ttag.eq(itlb_tags[tlb_req_index])
766
767 with m.If(i_in.virt_mode):
768 comb += real_addr.eq(Cat(
769 i_in.nia[:TLB_LG_PGSZ],
770 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
771 ))
772
773 with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
774 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
775
776 comb += eaa_priv.eq(pte[3])
777
778 with m.Else():
779 comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
780 comb += ra_valid.eq(1)
781 comb += eaa_priv.eq(1)
782
783 # No IAMR, so no KUEP support for now
784 comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
785 comb += access_ok.eq(ra_valid & ~priv_fault)
786
787 # -- iTLB update
788 # itlb_update: process(clk)
789 # variable wr_index : tlb_index_t;
790 # begin
791 # if rising_edge(clk) then
792 # wr_index := hash_ea(m_in.addr);
793 # if rst = '1' or
794 # (m_in.tlbie = '1' and m_in.doall = '1') then
795 # -- clear all valid bits
796 # for i in tlb_index_t loop
797 # itlb_valids(i) <= '0';
798 # end loop;
799 # elsif m_in.tlbie = '1' then
800 # -- clear entry regardless of hit or miss
801 # itlb_valids(wr_index) <= '0';
802 # elsif m_in.tlbld = '1' then
803 # itlb_tags(wr_index) <=
804 # m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
805 # itlb_ptes(wr_index) <= m_in.pte;
806 # itlb_valids(wr_index) <= '1';
807 # end if;
808 # end if;
809 # end process;
810 # iTLB update
811 def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
812 comb = m.d.comb
813 sync = m.d.sync
814
815 m_in = self.m_in
816
817 wr_index = Signal(TLB_SIZE)
818 sync += wr_index.eq(hash_ea(m_in.addr))
819
820 with m.If(m_in.tlbie & m_in.doall):
821 # Clear all valid bits
822 for i in range(TLB_SIZE):
823 sync += itlb_valid_bits[i].eq(0)
824
825 with m.Elif(m_in.tlbie):
826 # Clear entry regardless of hit or miss
827 sync += itlb_valid_bits[wr_index].eq(0)
828
829 with m.Elif(m_in.tlbld):
830 sync += itlb_tags[wr_index].eq(
831 m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
832 )
833 sync += itlb_ptes[wr_index].eq(m_in.pte)
834 sync += itlb_valid_bits[wr_index].eq(1)
835
836 # -- Cache hit detection, output to fetch2 and other misc logic
837 # icache_comb : process(all)
838 # Cache hit detection, output to fetch2 and other misc logic
839 def icache_comb(self, m, use_previous, r, req_index, req_row,
840 req_tag, real_addr, req_laddr, cache_valid_bits,
841 cache_tags, access_ok, req_is_hit,
842 req_is_miss, replace_way, plru_victim, cache_out_row):
843 # variable is_hit : std_ulogic;
844 # variable hit_way : way_t;
845 comb = m.d.comb
846
847 #comb += Display("ENTER icache_comb - use_previous:%x req_index:%x " \
848 # "req_row:%x req_tag:%x real_addr:%x req_laddr:%x " \
849 # "access_ok:%x req_is_hit:%x req_is_miss:%x " \
850 # "replace_way:%x", use_previous, req_index, req_row, \
851 # req_tag, real_addr, req_laddr, access_ok, \
852 # req_is_hit, req_is_miss, replace_way)
853
854 i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
855 flush_in, stall_out = self.flush_in, self.stall_out
856
857 is_hit = Signal()
858 hit_way = Signal(NUM_WAYS)
859 # begin
860 # -- i_in.sequential means that i_in.nia this cycle
861 # -- is 4 more than last cycle. If we read more
862 # -- than 32 bits at a time, had a cache hit last
863 # -- cycle, and we don't want the first 32-bit chunk
864 # -- then we can keep the data we read last cycle
865 # -- and just use that.
866 # if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
867 # use_previous <= i_in.sequential and r.hit_valid;
868 # else
869 # use_previous <= '0';
870 # end if;
871 # i_in.sequential means that i_in.nia this cycle is 4 more than
872 # last cycle. If we read more than 32 bits at a time, had a
873 # cache hit last cycle, and we don't want the first 32-bit chunk
874 # then we can keep the data we read last cycle and just use that.
875 with m.If(i_in.nia[2:INSN_BITS+2] != 0):
876 comb += use_previous.eq(i_in.sequential & r.hit_valid)
877
878 # -- Extract line, row and tag from request
879 # req_index <= get_index(i_in.nia);
880 # req_row <= get_row(i_in.nia);
881 # req_tag <= get_tag(real_addr);
882 # Extract line, row and tag from request
883 comb += req_index.eq(get_index(i_in.nia))
884 comb += req_row.eq(get_row(i_in.nia))
885 comb += req_tag.eq(get_tag(real_addr))
886
887 # -- Calculate address of beginning of cache row, will be
888 # -- used for cache miss processing if needed
889 # req_laddr <=
890 # (63 downto REAL_ADDR_BITS => '0') &
891 # real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
892 # (ROW_OFF_BITS-1 downto 0 => '0');
893 # Calculate address of beginning of cache row, will be
894 # used for cache miss processing if needed
895 comb += req_laddr.eq(Cat(
896 Const(0b0, ROW_OFF_BITS),
897 real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
898 Const(0b0, 8)
899 ))
900
901 # -- Test if pending request is a hit on any way
902 # hit_way := 0;
903 # is_hit := '0';
904 # for i in way_t loop
905 # if i_in.req = '1' and
906 # (cache_valids(req_index)(i) = '1' or
907 # (r.state = WAIT_ACK and
908 # req_index = r.store_index and
909 # i = r.store_way and
910 # r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
911 # if read_tag(i, cache_tags(req_index)) = req_tag then
912 # hit_way := i;
913 # is_hit := '1';
914 # end if;
915 # end if;
916 # end loop;
917 # Test if pending request is a hit on any way
918 hitcond = Signal()
919 comb += hitcond.eq((r.state == State.WAIT_ACK)
920 & (req_index == r.store_index)
921 & r.rows_valid[req_row % ROW_PER_LINE])
922 with m.If(i_in.req):
923 cvb = Signal(NUM_WAYS)
924 ctag = Signal(TAG_RAM_WIDTH)
925 comb += ctag.eq(cache_tags[req_index])
926 comb += cvb.eq(cache_valid_bits[req_index])
927 for i in range(NUM_WAYS):
928 tagi = Signal(TAG_BITS, name="ti%d" % i)
929 comb += tagi.eq(read_tag(i, ctag))
930 hit_test = Signal(name="hit_test%d" % i)
931 comb += hit_test.eq(i == r.store_way)
932 with m.If((cvb[i] | (hitcond & hit_test)) & (tagi == req_tag)):
933 comb += hit_way.eq(i)
934 comb += is_hit.eq(1)
935
936 # -- Generate the "hit" and "miss" signals
937 # -- for the synchronous blocks
938 # if i_in.req = '1' and access_ok = '1' and flush_in = '0'
939 # and rst = '0' then
940 # req_is_hit <= is_hit;
941 # req_is_miss <= not is_hit;
942 # else
943 # req_is_hit <= '0';
944 # req_is_miss <= '0';
945 # end if;
946 # req_hit_way <= hit_way;
947 # Generate the "hit" and "miss" signals
948 # for the synchronous blocks
949 with m.If(i_in.req & access_ok & ~flush_in):
950 comb += req_is_hit.eq(is_hit)
951 comb += req_is_miss.eq(~is_hit)
952
953 with m.Else():
954 comb += req_is_hit.eq(0)
955 comb += req_is_miss.eq(0)
956
957 # -- The way to replace on a miss
958 # if r.state = CLR_TAG then
959 # replace_way <=
960 # to_integer(unsigned(plru_victim(r.store_index)));
961 # else
962 # replace_way <= r.store_way;
963 # end if;
964 # The way to replace on a miss
965 with m.If(r.state == State.CLR_TAG):
966 comb += replace_way.eq(plru_victim[r.store_index])
967
968 with m.Else():
969 comb += replace_way.eq(r.store_way)
970
971 # -- Output instruction from current cache row
972 # --
973 # -- Note: This is a mild violation of our design principle of
974 # -- having pipeline stages output from a clean latch. In this
975 # -- case we output the result of a mux. The alternative would
976 # -- be output an entire row which I prefer not to do just yet
977 # -- as it would force fetch2 to know about some of the cache
978 # -- geometry information.
979 # i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
980 # i_out.valid <= r.hit_valid;
981 # i_out.nia <= r.hit_nia;
982 # i_out.stop_mark <= r.hit_smark;
983 # i_out.fetch_failed <= r.fetch_failed;
984 # Output instruction from current cache row
985 #
986 # Note: This is a mild violation of our design principle of
987 # having pipeline stages output from a clean latch. In this
988 # case we output the result of a mux. The alternative would
989 # be output an entire row which I prefer not to do just yet
990 # as it would force fetch2 to know about some of the cache
991 # geometry information.
992 #comb += Display("BEFORE read_insn_word - r.hit_nia:%x " \
993 # "r.hit_way:%x, cache_out[r.hit_way]:%x", r.hit_nia, \
994 # r.hit_way, cache_out[r.hit_way])
995 comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out_row))
996 comb += i_out.valid.eq(r.hit_valid)
997 comb += i_out.nia.eq(r.hit_nia)
998 comb += i_out.stop_mark.eq(r.hit_smark)
999 comb += i_out.fetch_failed.eq(r.fetch_failed)
1000
1001 # -- Stall fetch1 if we have a miss on cache or TLB
1002 # -- or a protection fault
1003 # stall_out <= not (is_hit and access_ok);
1004 # Stall fetch1 if we have a miss on cache or TLB
1005 # or a protection fault
1006 comb += stall_out.eq(~(is_hit & access_ok))
1007
1008 # -- Wishbone requests output (from the cache miss reload machine)
1009 # wishbone_out <= r.wb;
1010 # Wishbone requests output (from the cache miss reload machine)
1011 comb += wb_out.eq(r.wb)
1012 # end process;
1013
1014 # -- Cache hit synchronous machine
1015 # icache_hit : process(clk)
1016 # Cache hit synchronous machine
1017 def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
1018 req_index, req_tag, real_addr):
1019 sync = m.d.sync
1020
1021 i_in, stall_in = self.i_in, self.stall_in
1022 flush_in = self.flush_in
1023
1024 # begin
1025 # if rising_edge(clk) then
1026 # -- keep outputs to fetch2 unchanged on a stall
1027 # -- except that flush or reset sets valid to 0
1028 # -- If use_previous, keep the same data as last
1029 # -- cycle and use the second half
1030 # if stall_in = '1' or use_previous = '1' then
1031 # if rst = '1' or flush_in = '1' then
1032 # r.hit_valid <= '0';
1033 # end if;
1034 # keep outputs to fetch2 unchanged on a stall
1035 # except that flush or reset sets valid to 0
1036 # If use_previous, keep the same data as last
1037 # cycle and use the second half
1038 with m.If(stall_in | use_previous):
1039 with m.If(flush_in):
1040 sync += r.hit_valid.eq(0)
1041 # else
1042 # -- On a hit, latch the request for the next cycle,
1043 # -- when the BRAM data will be available on the
1044 # -- cache_out output of the corresponding way
1045 # r.hit_valid <= req_is_hit;
1046 # if req_is_hit = '1' then
1047 # r.hit_way <= req_hit_way;
1048 with m.Else():
1049 # On a hit, latch the request for the next cycle,
1050 # when the BRAM data will be available on the
1051 # cache_out output of the corresponding way
1052 sync += r.hit_valid.eq(req_is_hit)
1053
1054 with m.If(req_is_hit):
1055 sync += r.hit_way.eq(req_hit_way)
1056
1057 # report "cache hit nia:" & to_hstring(i_in.nia) &
1058 # " IR:" & std_ulogic'image(i_in.virt_mode) &
1059 # " SM:" & std_ulogic'image(i_in.stop_mark) &
1060 # " idx:" & integer'image(req_index) &
1061 # " tag:" & to_hstring(req_tag) &
1062 # " way:" & integer'image(req_hit_way) &
1063 # " RA:" & to_hstring(real_addr);
1064 sync += Display("cache hit nia:%x IR:%x SM:%x idx:%x " \
1065 "tag:%x way:%x RA:%x", i_in.nia, \
1066 i_in.virt_mode, i_in.stop_mark, req_index, \
1067 req_tag, req_hit_way, real_addr)
1068
1069
1070
1071 # end if;
1072 # end if;
1073 # if stall_in = '0' then
1074 # -- Send stop marks and NIA down regardless of validity
1075 # r.hit_smark <= i_in.stop_mark;
1076 # r.hit_nia <= i_in.nia;
1077 # end if;
1078 with m.If(~stall_in):
1079 # Send stop marks and NIA down regardless of validity
1080 sync += r.hit_smark.eq(i_in.stop_mark)
1081 sync += r.hit_nia.eq(i_in.nia)
1082 # end if;
1083 # end process;
1084
1085 # -- Cache miss/reload synchronous machine
1086 # icache_miss : process(clk)
1087 # Cache miss/reload synchronous machine
1088 def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1089 req_index, req_laddr, req_tag, replace_way,
1090 cache_tags, access_ok, real_addr):
1091 comb = m.d.comb
1092 sync = m.d.sync
1093
1094 i_in, wb_in, m_in = self.i_in, self.wb_in, self.m_in
1095 stall_in, flush_in = self.stall_in, self.flush_in
1096 inval_in = self.inval_in
1097
1098 # variable tagset : cache_tags_set_t;
1099 # variable stbs_done : boolean;
1100
1101 tagset = Signal(TAG_RAM_WIDTH)
1102 stbs_done = Signal()
1103
1104 # begin
1105 # if rising_edge(clk) then
1106 # -- On reset, clear all valid bits to force misses
1107 # if rst = '1' then
1108 # On reset, clear all valid bits to force misses
1109 # for i in index_t loop
1110 # cache_valids(i) <= (others => '0');
1111 # end loop;
1112 # r.state <= IDLE;
1113 # r.wb.cyc <= '0';
1114 # r.wb.stb <= '0';
1115 # -- We only ever do reads on wishbone
1116 # r.wb.dat <= (others => '0');
1117 # r.wb.sel <= "11111111";
1118 # r.wb.we <= '0';
1119
1120 # -- Not useful normally but helps avoiding
1121 # -- tons of sim warnings
1122 # r.wb.adr <= (others => '0');
1123
1124 # else
1125
1126 # -- Process cache invalidations
1127 # if inval_in = '1' then
1128 # for i in index_t loop
1129 # cache_valids(i) <= (others => '0');
1130 # end loop;
1131 # r.store_valid <= '0';
1132 # end if;
1133 comb += r.wb.sel.eq(-1)
1134 comb += r.wb.adr.eq(r.req_adr[3:])
1135
1136 # Process cache invalidations
1137 with m.If(inval_in):
1138 for i in range(NUM_LINES):
1139 sync += cache_valid_bits[i].eq(0)
1140 sync += r.store_valid.eq(0)
1141
1142 # -- Main state machine
1143 # case r.state is
1144 # Main state machine
1145 with m.Switch(r.state):
1146
1147 # when IDLE =>
1148 with m.Case(State.IDLE):
1149 # -- Reset per-row valid flags,
1150 # -- only used in WAIT_ACK
1151 # for i in 0 to ROW_PER_LINE - 1 loop
1152 # r.rows_valid(i) <= '0';
1153 # end loop;
1154 # Reset per-row valid flags,
1155 # only used in WAIT_ACK
1156 for i in range(ROW_PER_LINE):
1157 sync += r.rows_valid[i].eq(0)
1158
1159 # -- We need to read a cache line
1160 # if req_is_miss = '1' then
1161 # report "cache miss nia:" & to_hstring(i_in.nia) &
1162 # " IR:" & std_ulogic'image(i_in.virt_mode) &
1163 # " SM:" & std_ulogic'image(i_in.stop_mark) &
1164 # " idx:" & integer'image(req_index) &
1165 # " way:" & integer'image(replace_way) &
1166 # " tag:" & to_hstring(req_tag) &
1167 # " RA:" & to_hstring(real_addr);
1168 # We need to read a cache line
1169 with m.If(req_is_miss):
1170 sync += Display(
1171 "cache miss nia:%x IR:%x SM:%x idx:%x " \
1172 " way:%x tag:%x RA:%x", i_in.nia, \
1173 i_in.virt_mode, i_in.stop_mark, req_index, \
1174 replace_way, req_tag, real_addr)
1175
1176 # -- Keep track of our index and way for
1177 # -- subsequent stores
1178 # r.store_index <= req_index;
1179 # r.store_row <= get_row(req_laddr);
1180 # r.store_tag <= req_tag;
1181 # r.store_valid <= '1';
1182 # r.end_row_ix <=
1183 # get_row_of_line(get_row(req_laddr)) - 1;
1184 # Keep track of our index and way
1185 # for subsequent stores
1186 sync += r.store_index.eq(req_index)
1187 sync += r.store_row.eq(get_row(req_laddr))
1188 sync += r.store_tag.eq(req_tag)
1189 sync += r.store_valid.eq(1)
1190 sync += r.end_row_ix.eq(
1191 get_row_of_line(
1192 get_row(req_laddr)
1193 ) - 1
1194 )
1195
1196 # -- Prep for first wishbone read. We calculate the
1197 # -- address of the start of the cache line and
1198 # -- start the WB cycle.
1199 # r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1200 # r.wb.cyc <= '1';
1201 # r.wb.stb <= '1';
1202 # Prep for first wishbone read.
1203 # We calculate the
1204 # address of the start of the cache line and
1205 # start the WB cycle.
1206 sync += r.req_adr.eq(req_laddr)
1207 sync += r.wb.cyc.eq(1)
1208 sync += r.wb.stb.eq(1)
1209
1210 # -- Track that we had one request sent
1211 # r.state <= CLR_TAG;
1212 # Track that we had one request sent
1213 sync += r.state.eq(State.CLR_TAG)
1214 # end if;
1215
1216 # when CLR_TAG | WAIT_ACK =>
1217 with m.Case(State.CLR_TAG, State.WAIT_ACK):
1218 # if r.state = CLR_TAG then
1219 with m.If(r.state == State.CLR_TAG):
1220 # -- Get victim way from plru
1221 # r.store_way <= replace_way;
1222 # Get victim way from plru
1223 sync += r.store_way.eq(replace_way)
1224 #
1225 # -- Force misses on that way while
1226 # -- reloading that line
1227 # cache_valids(req_index)(replace_way) <= '0';
1228 # Force misses on that way while
1229 # realoading that line
1230 cv = Signal(INDEX_BITS)
1231 comb += cv.eq(cache_valid_bits[req_index])
1232 comb += cv.bit_select(replace_way, 1).eq(0)
1233 sync += cache_valid_bits[req_index].eq(cv)
1234
1235 # -- Store new tag in selected way
1236 # for i in 0 to NUM_WAYS-1 loop
1237 # if i = replace_way then
1238 # tagset := cache_tags(r.store_index);
1239 # write_tag(i, tagset, r.store_tag);
1240 # cache_tags(r.store_index) <= tagset;
1241 # end if;
1242 # end loop;
1243 for i in range(NUM_WAYS):
1244 with m.If(i == replace_way):
1245 comb += tagset.eq(cache_tags[r.store_index])
1246 comb += write_tag(i, tagset, r.store_tag)
1247 sync += cache_tags[r.store_index].eq(tagset)
1248
1249 # r.state <= WAIT_ACK;
1250 sync += r.state.eq(State.WAIT_ACK)
1251 # end if;
1252
1253 # -- Requests are all sent if stb is 0
1254 # stbs_done := r.wb.stb = '0';
1255 # Requests are all sent if stb is 0
1256 stbs_zero = Signal()
1257 comb += stbs_zero.eq(r.wb.stb == 0)
1258 comb += stbs_done.eq(stbs_zero)
1259
1260 # -- If we are still sending requests,
1261 # -- was one accepted ?
1262 # if wishbone_in.stall = '0' and not stbs_done then
1263 # If we are still sending requests,
1264 # was one accepted?
1265 with m.If(~wb_in.stall & ~stbs_zero):
1266 # -- That was the last word ? We are done sending.
1267 # -- Clear stb and set stbs_done so we can handle
1268 # -- an eventual last ack on the same cycle.
1269 # if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1270 # r.wb.stb <= '0';
1271 # stbs_done := true;
1272 # end if;
1273 # That was the last word ?
1274 # We are done sending.
1275 # Clear stb and set stbs_done
1276 # so we can handle
1277 # an eventual last ack on
1278 # the same cycle.
1279 with m.If(is_last_row_addr(r.req_adr, r.end_row_ix)):
1280 sync += Display("IS_LAST_ROW_ADDR " \
1281 "r.wb.addr:%x r.end_row_ix:%x " \
1282 "r.wb.stb:%x stbs_zero:%x " \
1283 "stbs_done:%x", r.wb.adr, \
1284 r.end_row_ix, r.wb.stb, \
1285 stbs_zero, stbs_done)
1286 sync += r.wb.stb.eq(0)
1287 comb += stbs_done.eq(1)
1288
1289 # -- Calculate the next row address
1290 # r.wb.adr <= next_row_addr(r.wb.adr);
1291 # Calculate the next row address
1292 rarange = Signal(LINE_OFF_BITS - ROW_OFF_BITS)
1293 comb += rarange.eq(
1294 r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
1295 )
1296 sync += r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS].eq(
1297 rarange
1298 )
1299 sync += Display("RARANGE r.wb.adr:%x stbs_zero:%x " \
1300 "stbs_done:%x", rarange, stbs_zero, \
1301 stbs_done)
1302 # end if;
1303
1304 # -- Incoming acks processing
1305 # if wishbone_in.ack = '1' then
1306 # Incoming acks processing
1307 with m.If(wb_in.ack):
1308 # r.rows_valid(r.store_row mod ROW_PER_LINE)
1309 # <= '1';
1310 sync += Display("WB_IN_ACK stbs_zero:%x " \
1311 "stbs_done:%x", \
1312 stbs_zero, stbs_done)
1313
1314 sync += r.rows_valid[r.store_row % ROW_PER_LINE].eq(1)
1315
1316 # -- Check for completion
1317 # if stbs_done and
1318 # is_last_row(r.store_row, r.end_row_ix) then
1319 # Check for completion
1320 with m.If(stbs_done &
1321 is_last_row(r.store_row, r.end_row_ix)):
1322 # -- Complete wishbone cycle
1323 # r.wb.cyc <= '0';
1324 # Complete wishbone cycle
1325 sync += r.wb.cyc.eq(0)
1326
1327 # -- Cache line is now valid
1328 # cache_valids(r.store_index)(replace_way) <=
1329 # r.store_valid and not inval_in;
1330 # Cache line is now valid
1331 cv = Signal(INDEX_BITS)
1332 comb += cv.eq(cache_valid_bits[r.store_index])
1333 comb += cv.bit_select(replace_way, 1).eq(
1334 r.store_valid & ~inval_in
1335 )
1336 sync += cache_valid_bits[r.store_index].eq(cv)
1337
1338 # -- We are done
1339 # r.state <= IDLE;
1340 # We are done
1341 sync += r.state.eq(State.IDLE)
1342 # end if;
1343
1344 # -- Increment store row counter
1345 # r.store_row <= next_row(r.store_row);
1346 # Increment store row counter
1347 sync += r.store_row.eq(next_row(r.store_row))
1348 # end if;
1349 # end case;
1350 # end if;
1351 #
1352 # -- TLB miss and protection fault processing
1353 # if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1354 # r.fetch_failed <= '0';
1355 # elsif i_in.req = '1' and access_ok = '0' and
1356 # stall_in = '0' then
1357 # r.fetch_failed <= '1';
1358 # end if;
1359 # TLB miss and protection fault processing
1360 with m.If(flush_in | m_in.tlbld):
1361 sync += r.fetch_failed.eq(0)
1362
1363 with m.Elif(i_in.req & ~access_ok & ~stall_in):
1364 sync += r.fetch_failed.eq(1)
1365 # end if;
1366 # end process;
1367
1368 # icache_log: if LOG_LENGTH > 0 generate
1369 def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1370 req_is_miss, req_is_hit, lway, wstate, r):
1371 comb = m.d.comb
1372 sync = m.d.sync
1373
1374 wb_in, i_out = self.wb_in, self.i_out
1375 log_out, stall_out = self.log_out, self.stall_out
1376
1377 # -- Output data to logger
1378 # signal log_data : std_ulogic_vector(53 downto 0);
1379 # begin
1380 # data_log: process(clk)
1381 # variable lway: way_t;
1382 # variable wstate: std_ulogic;
1383 # Output data to logger
1384 for i in range(LOG_LENGTH):
1385 # Output data to logger
1386 log_data = Signal(54)
1387 lway = Signal(NUM_WAYS)
1388 wstate = Signal()
1389
1390 # begin
1391 # if rising_edge(clk) then
1392 # lway := req_hit_way;
1393 # wstate := '0';
1394 sync += lway.eq(req_hit_way)
1395 sync += wstate.eq(0)
1396
1397 # if r.state /= IDLE then
1398 # wstate := '1';
1399 # end if;
1400 with m.If(r.state != State.IDLE):
1401 sync += wstate.eq(1)
1402
1403 # log_data <= i_out.valid &
1404 # i_out.insn &
1405 # wishbone_in.ack &
1406 # r.wb.adr(5 downto 3) &
1407 # r.wb.stb & r.wb.cyc &
1408 # wishbone_in.stall &
1409 # stall_out &
1410 # r.fetch_failed &
1411 # r.hit_nia(5 downto 2) &
1412 # wstate &
1413 # std_ulogic_vector(to_unsigned(lway, 3)) &
1414 # req_is_hit & req_is_miss &
1415 # access_ok &
1416 # ra_valid;
1417 sync += log_data.eq(Cat(
1418 ra_valid, access_ok, req_is_miss, req_is_hit,
1419 lway, wstate, r.hit_nia[2:6],
1420 r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1421 r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1422 i_out.valid
1423 ))
1424 # end if;
1425 # end process;
1426 # log_out <= log_data;
1427 comb += log_out.eq(log_data)
1428 # end generate;
1429 # end;
1430
1431 def elaborate(self, platform):
1432
1433 m = Module()
1434 comb = m.d.comb
1435
1436 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1437 cache_tags = CacheTagArray()
1438 cache_valid_bits = CacheValidBitsArray()
1439
1440 # signal itlb_valids : tlb_valids_t;
1441 # signal itlb_tags : tlb_tags_t;
1442 # signal itlb_ptes : tlb_ptes_t;
1443 # attribute ram_style of itlb_tags : signal is "distributed";
1444 # attribute ram_style of itlb_ptes : signal is "distributed";
1445 itlb_valid_bits = TLBValidBitsArray()
1446 itlb_tags = TLBTagArray()
1447 itlb_ptes = TLBPtesArray()
1448 # TODO to be passed to nmigen as ram attributes
1449 # attribute ram_style of itlb_tags : signal is "distributed";
1450 # attribute ram_style of itlb_ptes : signal is "distributed";
1451
1452 # -- Privilege bit from PTE EAA field
1453 # signal eaa_priv : std_ulogic;
1454 # Privilege bit from PTE EAA field
1455 eaa_priv = Signal()
1456
1457 # signal r : reg_internal_t;
1458 r = RegInternal()
1459
1460 # -- Async signals on incoming request
1461 # signal req_index : index_t;
1462 # signal req_row : row_t;
1463 # signal req_hit_way : way_t;
1464 # signal req_tag : cache_tag_t;
1465 # signal req_is_hit : std_ulogic;
1466 # signal req_is_miss : std_ulogic;
1467 # signal req_laddr : std_ulogic_vector(63 downto 0);
1468 # Async signal on incoming request
1469 req_index = Signal(NUM_LINES)
1470 req_row = Signal(BRAM_ROWS)
1471 req_hit_way = Signal(NUM_WAYS)
1472 req_tag = Signal(TAG_BITS)
1473 req_is_hit = Signal()
1474 req_is_miss = Signal()
1475 req_laddr = Signal(64)
1476
1477 # signal tlb_req_index : tlb_index_t;
1478 # signal real_addr : std_ulogic_vector(
1479 # REAL_ADDR_BITS - 1 downto 0
1480 # );
1481 # signal ra_valid : std_ulogic;
1482 # signal priv_fault : std_ulogic;
1483 # signal access_ok : std_ulogic;
1484 # signal use_previous : std_ulogic;
1485 tlb_req_index = Signal(TLB_SIZE)
1486 real_addr = Signal(REAL_ADDR_BITS)
1487 ra_valid = Signal()
1488 priv_fault = Signal()
1489 access_ok = Signal()
1490 use_previous = Signal()
1491
1492 # signal cache_out : cache_ram_out_t;
1493 cache_out_row = Signal(ROW_SIZE_BITS)
1494
1495 # signal plru_victim : plru_out_t;
1496 # signal replace_way : way_t;
1497 plru_victim = PLRUOut()
1498 replace_way = Signal(NUM_WAYS)
1499
1500 # call sub-functions putting everything together, using shared
1501 # signals established above
1502 self.rams(m, r, cache_out_row, use_previous, replace_way, req_row)
1503 self.maybe_plrus(m, r, plru_victim)
1504 self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1505 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1506 priv_fault, access_ok)
1507 self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1508 self.icache_comb(m, use_previous, r, req_index, req_row,
1509 req_tag, real_addr, req_laddr, cache_valid_bits,
1510 cache_tags, access_ok, req_is_hit, req_is_miss,
1511 replace_way, plru_victim, cache_out_row)
1512 self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1513 req_index, req_tag, real_addr)
1514 self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1515 req_laddr, req_tag, replace_way, cache_tags,
1516 access_ok, real_addr)
1517 #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1518 # req_is_miss, req_is_hit, lway, wstate, r)
1519
1520 return m
1521
1522
1523 # icache_tb.vhdl
1524 #
1525 # library ieee;
1526 # use ieee.std_logic_1164.all;
1527 #
1528 # library work;
1529 # use work.common.all;
1530 # use work.wishbone_types.all;
1531 #
1532 # entity icache_tb is
1533 # end icache_tb;
1534 #
1535 # architecture behave of icache_tb is
1536 # signal clk : std_ulogic;
1537 # signal rst : std_ulogic;
1538 #
1539 # signal i_out : Fetch1ToIcacheType;
1540 # signal i_in : IcacheToDecode1Type;
1541 #
1542 # signal m_out : MmuToIcacheType;
1543 #
1544 # signal wb_bram_in : wishbone_master_out;
1545 # signal wb_bram_out : wishbone_slave_out;
1546 #
1547 # constant clk_period : time := 10 ns;
1548 # begin
1549 # icache0: entity work.icache
1550 # generic map(
1551 # LINE_SIZE => 64,
1552 # NUM_LINES => 4
1553 # )
1554 # port map(
1555 # clk => clk,
1556 # rst => rst,
1557 # i_in => i_out,
1558 # i_out => i_in,
1559 # m_in => m_out,
1560 # stall_in => '0',
1561 # flush_in => '0',
1562 # inval_in => '0',
1563 # wishbone_out => wb_bram_in,
1564 # wishbone_in => wb_bram_out
1565 # );
1566 #
1567 # -- BRAM Memory slave
1568 # bram0: entity work.wishbone_bram_wrapper
1569 # generic map(
1570 # MEMORY_SIZE => 1024,
1571 # RAM_INIT_FILE => "icache_test.bin"
1572 # )
1573 # port map(
1574 # clk => clk,
1575 # rst => rst,
1576 # wishbone_in => wb_bram_in,
1577 # wishbone_out => wb_bram_out
1578 # );
1579 #
1580 # clk_process: process
1581 # begin
1582 # clk <= '0';
1583 # wait for clk_period/2;
1584 # clk <= '1';
1585 # wait for clk_period/2;
1586 # end process;
1587 #
1588 # rst_process: process
1589 # begin
1590 # rst <= '1';
1591 # wait for 2*clk_period;
1592 # rst <= '0';
1593 # wait;
1594 # end process;
1595 #
1596 # stim: process
1597 # begin
1598 # i_out.req <= '0';
1599 # i_out.nia <= (others => '0');
1600 # i_out.stop_mark <= '0';
1601 #
1602 # m_out.tlbld <= '0';
1603 # m_out.tlbie <= '0';
1604 # m_out.addr <= (others => '0');
1605 # m_out.pte <= (others => '0');
1606 #
1607 # wait until rising_edge(clk);
1608 # wait until rising_edge(clk);
1609 # wait until rising_edge(clk);
1610 # wait until rising_edge(clk);
1611 #
1612 # i_out.req <= '1';
1613 # i_out.nia <= x"0000000000000004";
1614 #
1615 # wait for 30*clk_period;
1616 # wait until rising_edge(clk);
1617 #
1618 # assert i_in.valid = '1' severity failure;
1619 # assert i_in.insn = x"00000001"
1620 # report "insn @" & to_hstring(i_out.nia) &
1621 # "=" & to_hstring(i_in.insn) &
1622 # " expected 00000001"
1623 # severity failure;
1624 #
1625 # i_out.req <= '0';
1626 #
1627 # wait until rising_edge(clk);
1628 #
1629 # -- hit
1630 # i_out.req <= '1';
1631 # i_out.nia <= x"0000000000000008";
1632 # wait until rising_edge(clk);
1633 # wait until rising_edge(clk);
1634 # assert i_in.valid = '1' severity failure;
1635 # assert i_in.insn = x"00000002"
1636 # report "insn @" & to_hstring(i_out.nia) &
1637 # "=" & to_hstring(i_in.insn) &
1638 # " expected 00000002"
1639 # severity failure;
1640 # wait until rising_edge(clk);
1641 #
1642 # -- another miss
1643 # i_out.req <= '1';
1644 # i_out.nia <= x"0000000000000040";
1645 #
1646 # wait for 30*clk_period;
1647 # wait until rising_edge(clk);
1648 #
1649 # assert i_in.valid = '1' severity failure;
1650 # assert i_in.insn = x"00000010"
1651 # report "insn @" & to_hstring(i_out.nia) &
1652 # "=" & to_hstring(i_in.insn) &
1653 # " expected 00000010"
1654 # severity failure;
1655 #
1656 # -- test something that aliases
1657 # i_out.req <= '1';
1658 # i_out.nia <= x"0000000000000100";
1659 # wait until rising_edge(clk);
1660 # wait until rising_edge(clk);
1661 # assert i_in.valid = '0' severity failure;
1662 # wait until rising_edge(clk);
1663 #
1664 # wait for 30*clk_period;
1665 # wait until rising_edge(clk);
1666 #
1667 # assert i_in.valid = '1' severity failure;
1668 # assert i_in.insn = x"00000040"
1669 # report "insn @" & to_hstring(i_out.nia) &
1670 # "=" & to_hstring(i_in.insn) &
1671 # " expected 00000040"
1672 # severity failure;
1673 #
1674 # i_out.req <= '0';
1675 #
1676 # std.env.finish;
1677 # end process;
1678 # end;
1679 def icache_sim(dut):
1680 i_out = dut.i_in
1681 i_in = dut.i_out
1682 m_out = dut.m_in
1683
1684 yield i_in.valid.eq(0)
1685 yield i_out.priv_mode.eq(1)
1686 yield i_out.req.eq(0)
1687 yield i_out.nia.eq(0)
1688 yield i_out.stop_mark.eq(0)
1689 yield m_out.tlbld.eq(0)
1690 yield m_out.tlbie.eq(0)
1691 yield m_out.addr.eq(0)
1692 yield m_out.pte.eq(0)
1693 yield
1694 yield
1695 yield
1696 yield
1697 yield i_out.req.eq(1)
1698 yield i_out.nia.eq(Const(0x0000000000000004, 64))
1699 for i in range(30):
1700 yield
1701 yield
1702 valid = yield i_in.valid
1703 nia = yield i_out.nia
1704 insn = yield i_in.insn
1705 print(f"valid? {valid}")
1706 assert valid
1707 assert insn == 0x00000001, \
1708 "insn @%x=%x expected 00000001" % (nia, insn)
1709 yield i_out.req.eq(0)
1710 yield
1711
1712 # hit
1713 yield i_out.req.eq(1)
1714 yield i_out.nia.eq(Const(0x0000000000000008, 64))
1715 yield
1716 yield
1717 valid = yield i_in.valid
1718 nia = yield i_in.nia
1719 insn = yield i_in.insn
1720 assert valid
1721 assert insn == 0x00000002, \
1722 "insn @%x=%x expected 00000002" % (nia, insn)
1723 yield
1724
1725 # another miss
1726 yield i_out.req.eq(1)
1727 yield i_out.nia.eq(Const(0x0000000000000040, 64))
1728 for i in range(30):
1729 yield
1730 yield
1731 valid = yield i_in.valid
1732 nia = yield i_out.nia
1733 insn = yield i_in.insn
1734 assert valid
1735 assert insn == 0x00000010, \
1736 "insn @%x=%x expected 00000010" % (nia, insn)
1737
1738 # test something that aliases
1739 yield i_out.req.eq(1)
1740 yield i_out.nia.eq(Const(0x0000000000000100, 64))
1741 yield
1742 yield
1743 valid = yield i_in.valid
1744 assert ~valid
1745 for i in range(30):
1746 yield
1747 yield
1748 insn = yield i_in.insn
1749 valid = yield i_in.valid
1750 insn = yield i_in.insn
1751 assert valid
1752 assert insn == 0x00000040, \
1753 "insn @%x=%x expected 00000040" % (nia, insn)
1754 yield i_out.req.eq(0)
1755
1756
1757
1758 def test_icache(mem):
1759 dut = ICache()
1760
1761 memory = Memory(width=64, depth=16*64, init=mem)
1762 sram = SRAM(memory=memory, granularity=8)
1763
1764 m = Module()
1765
1766 m.submodules.icache = dut
1767 m.submodules.sram = sram
1768
1769 m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1770 m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1771 m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1772 m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1773 m.d.comb += sram.bus.adr.eq(dut.wb_out.adr)
1774 m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1775
1776 m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1777 m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1778
1779 # nmigen Simulation
1780 sim = Simulator(m)
1781 sim.add_clock(1e-6)
1782
1783 sim.add_sync_process(wrap(icache_sim(dut)))
1784 with sim.write_vcd('test_icache.vcd'):
1785 sim.run()
1786
1787 if __name__ == '__main__':
1788 dut = ICache()
1789 vl = rtlil.convert(dut, ports=[])
1790 with open("test_icache.il", "w") as f:
1791 f.write(vl)
1792
1793 mem = []
1794 for i in range(512):
1795 mem.append((i*2)| ((i*2+1)<<32))
1796
1797 test_icache(mem)
1798