use Repl rather than for-loop to copy bit
[soc.git] / src / soc / experiment / icache.py
1 """ICache
2
3 based on Anton Blanchard microwatt icache.vhdl
4
5 Set associative icache
6
7 TODO (in no specific order):
8 * Add debug interface to inspect cache content
9 * Add snoop/invalidate path
10 * Add multi-hit error detection
11 * Pipelined bus interface (wb or axi)
12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
13 * Add optimization: service hits on partially loaded lines
14 * Add optimization: (maybe) interrupt reload on fluch/redirect
15 * Check if playing with the geometry of the cache tags allow for more
16 efficient use of distributed RAM and less logic/muxes. Currently we
17 write TAG_BITS width which may not match full ram blocks and might
18 cause muxes to be inferred for "partial writes".
19 * Check if making the read size of PLRU a ROM helps utilization
20
21 """
22 from enum import Enum, unique
23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const, Repl)
24 from nmigen.cli import main, rtlil
25 from nmutil.iocontrol import RecordObject
26 from nmigen.utils import log2_int
27 from nmutil.util import Display
28
29 #from nmutil.plru import PLRU
30 from soc.experiment.cache_ram import CacheRam
31 from soc.experiment.plru import PLRU
32
33 from soc.experiment.mem_types import (Fetch1ToICacheType,
34 ICacheToDecode1Type,
35 MMUToICacheType)
36
37 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
38 WB_SEL_BITS, WBAddrType, WBDataType,
39 WBSelType, WBMasterOut, WBSlaveOut,
40 WBMasterOutVector, WBSlaveOutVector,
41 WBIOMasterOut, WBIOSlaveOut)
42
43 # for test
44 from nmigen_soc.wishbone.sram import SRAM
45 from nmigen import Memory
46 from nmutil.util import wrap
47 from nmigen.cli import main, rtlil
48 if True:
49 from nmigen.back.pysim import Simulator, Delay, Settle
50 else:
51 from nmigen.sim.cxxsim import Simulator, Delay, Settle
52
53
54 SIM = 0
55 LINE_SIZE = 64
56 # BRAM organisation: We never access more than wishbone_data_bits
57 # at a time so to save resources we make the array only that wide,
58 # and use consecutive indices for to make a cache "line"
59 #
60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
61 ROW_SIZE = WB_DATA_BITS // 8
62 # Number of lines in a set
63 NUM_LINES = 32
64 # Number of ways
65 NUM_WAYS = 4
66 # L1 ITLB number of entries (direct mapped)
67 TLB_SIZE = 64
68 # L1 ITLB log_2(page_size)
69 TLB_LG_PGSZ = 12
70 # Number of real address bits that we store
71 REAL_ADDR_BITS = 56
72 # Non-zero to enable log data collection
73 LOG_LENGTH = 0
74
75 ROW_SIZE_BITS = ROW_SIZE * 8
76 # ROW_PER_LINE is the number of row
77 # (wishbone) transactions in a line
78 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
79 # BRAM_ROWS is the number of rows in
80 # BRAM needed to represent the full icache
81 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
82 # INSN_PER_ROW is the number of 32bit
83 # instructions per BRAM row
84 INSN_PER_ROW = ROW_SIZE_BITS // 32
85
86 print("ROW_SIZE", ROW_SIZE)
87 print("ROW_SIZE_BITS", ROW_SIZE_BITS)
88 print("ROW_PER_LINE", ROW_PER_LINE)
89 print("BRAM_ROWS", BRAM_ROWS)
90 print("INSN_PER_ROW", INSN_PER_ROW)
91
92 # Bit fields counts in the address
93 #
94 # INSN_BITS is the number of bits to
95 # select an instruction in a row
96 INSN_BITS = log2_int(INSN_PER_ROW)
97 # ROW_BITS is the number of bits to
98 # select a row
99 ROW_BITS = log2_int(BRAM_ROWS)
100 # ROW_LINEBITS is the number of bits to
101 # select a row within a line
102 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
103 # LINE_OFF_BITS is the number of bits for
104 # the offset in a cache line
105 LINE_OFF_BITS = log2_int(LINE_SIZE)
106 # ROW_OFF_BITS is the number of bits for
107 # the offset in a row
108 ROW_OFF_BITS = log2_int(ROW_SIZE)
109 # INDEX_BITS is the number of bits to
110 # select a cache line
111 INDEX_BITS = log2_int(NUM_LINES)
112 # SET_SIZE_BITS is the log base 2 of
113 # the set size
114 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
115 # TAG_BITS is the number of bits of
116 # the tag part of the address
117 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
118 # TAG_WIDTH is the width in bits of each way of the tag RAM
119 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
120
121 # WAY_BITS is the number of bits to
122 # select a way
123 WAY_BITS = log2_int(NUM_WAYS)
124 TAG_RAM_WIDTH = TAG_BITS * NUM_WAYS
125
126 # -- L1 ITLB.
127 # constant TLB_BITS : natural := log2(TLB_SIZE);
128 # constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
129 # constant TLB_PTE_BITS : natural := 64;
130 TLB_BITS = log2_int(TLB_SIZE)
131 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
132 TLB_PTE_BITS = 64
133
134
135 print("INSN_BITS", INSN_BITS)
136 print("ROW_BITS", ROW_BITS)
137 print("ROW_LINE_BITS", ROW_LINE_BITS)
138 print("LINE_OFF_BITS", LINE_OFF_BITS)
139 print("ROW_OFF_BITS", ROW_OFF_BITS)
140 print("INDEX_BITS", INDEX_BITS)
141 print("SET_SIZE_BITS", SET_SIZE_BITS)
142 print("TAG_BITS", TAG_BITS)
143 print("WAY_BITS", WAY_BITS)
144 print("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
145 print("TLB_BITS", TLB_BITS)
146 print("TLB_EA_TAG_BITS", TLB_EA_TAG_BITS)
147 print("TLB_PTE_BITS", TLB_PTE_BITS)
148
149
150
151
152 # architecture rtl of icache is
153 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
154 #-- ROW_PER_LINE is the number of row (wishbone
155 #-- transactions) in a line
156 #constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
157 #-- BRAM_ROWS is the number of rows in BRAM
158 #-- needed to represent the full
159 #-- icache
160 #constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
161 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
162 #constant INSN_PER_ROW : natural := ROW_SIZE_BITS / 32;
163 #-- Bit fields counts in the address
164 #
165 #-- INSN_BITS is the number of bits to select
166 #-- an instruction in a row
167 #constant INSN_BITS : natural := log2(INSN_PER_ROW);
168 #-- ROW_BITS is the number of bits to select a row
169 #constant ROW_BITS : natural := log2(BRAM_ROWS);
170 #-- ROW_LINEBITS is the number of bits to
171 #-- select a row within a line
172 #constant ROW_LINEBITS : natural := log2(ROW_PER_LINE);
173 #-- LINE_OFF_BITS is the number of bits for the offset
174 #-- in a cache line
175 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
176 #-- ROW_OFF_BITS is the number of bits for the offset in a row
177 #constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
178 #-- INDEX_BITS is the number of bits to select a cache line
179 #constant INDEX_BITS : natural := log2(NUM_LINES);
180 #-- SET_SIZE_BITS is the log base 2 of the set size
181 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
182 #-- TAG_BITS is the number of bits of the tag part of the address
183 #constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
184 #-- WAY_BITS is the number of bits to select a way
185 #constant WAY_BITS : natural := log2(NUM_WAYS);
186
187 #-- Example of layout for 32 lines of 64 bytes:
188 #--
189 #-- .. tag |index| line |
190 #-- .. | row | |
191 #-- .. | | | |00| zero (2)
192 #-- .. | | |-| | INSN_BITS (1)
193 #-- .. | |---| | ROW_LINEBITS (3)
194 #-- .. | |--- - --| LINE_OFF_BITS (6)
195 #-- .. | |- --| ROW_OFF_BITS (3)
196 #-- .. |----- ---| | ROW_BITS (8)
197 #-- .. |-----| | INDEX_BITS (5)
198 #-- .. --------| | TAG_BITS (53)
199 # Example of layout for 32 lines of 64 bytes:
200 #
201 # .. tag |index| line |
202 # .. | row | |
203 # .. | | | |00| zero (2)
204 # .. | | |-| | INSN_BITS (1)
205 # .. | |---| | ROW_LINEBITS (3)
206 # .. | |--- - --| LINE_OFF_BITS (6)
207 # .. | |- --| ROW_OFF_BITS (3)
208 # .. |----- ---| | ROW_BITS (8)
209 # .. |-----| | INDEX_BITS (5)
210 # .. --------| | TAG_BITS (53)
211
212 #subtype row_t is integer range 0 to BRAM_ROWS-1;
213 #subtype index_t is integer range 0 to NUM_LINES-1;
214 #subtype way_t is integer range 0 to NUM_WAYS-1;
215 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
216 #
217 #-- The cache data BRAM organized as described above for each way
218 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
219 #
220 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
221 #-- not handle a clean (commented) definition of the cache tags as a 3d
222 #-- memory. For now, work around it by putting all the tags
223 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
224 # type cache_tags_set_t is array(way_t) of cache_tag_t;
225 # type cache_tags_array_t is array(index_t) of cache_tags_set_t;
226 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
227 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
228 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
229 def CacheTagArray():
230 return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
231 for x in range(NUM_LINES))
232
233 #-- The cache valid bits
234 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
235 #type cache_valids_t is array(index_t) of cache_way_valids_t;
236 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
237 def CacheValidBitsArray():
238 return Array(Signal(NUM_WAYS, name="cachevalid_%d" %x) \
239 for x in range(NUM_LINES))
240
241 def RowPerLineValidArray():
242 return Array(Signal(name="rows_valid_%d" %x) \
243 for x in range(ROW_PER_LINE))
244
245
246 #attribute ram_style : string;
247 #attribute ram_style of cache_tags : signal is "distributed";
248 # TODO to be passed to nigmen as ram attributes
249 # attribute ram_style : string;
250 # attribute ram_style of cache_tags : signal is "distributed";
251
252
253 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
254 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
255 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
256 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
257 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
258 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
259 def TLBValidBitsArray():
260 return Array(Signal(name="tlbvalid_%d" %x) \
261 for x in range(TLB_SIZE))
262
263 def TLBTagArray():
264 return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
265 for x in range(TLB_SIZE))
266
267 def TLBPtesArray():
268 return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
269 for x in range(TLB_SIZE))
270
271
272 #-- Cache RAM interface
273 #type cache_ram_out_t is array(way_t) of cache_row_t;
274 # Cache RAM interface
275 def CacheRamOut():
276 return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
277 for x in range(NUM_WAYS))
278
279 #-- PLRU output interface
280 #type plru_out_t is array(index_t) of
281 # std_ulogic_vector(WAY_BITS-1 downto 0);
282 # PLRU output interface
283 def PLRUOut():
284 return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
285 for x in range(NUM_LINES))
286
287 # -- Return the cache line index (tag index) for an address
288 # function get_index(addr: std_ulogic_vector(63 downto 0))
289 # return index_t is
290 # begin
291 # return to_integer(unsigned(
292 # addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
293 # ));
294 # end;
295 # Return the cache line index (tag index) for an address
296 def get_index(addr):
297 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
298
299 # -- Return the cache row index (data memory) for an address
300 # function get_row(addr: std_ulogic_vector(63 downto 0))
301 # return row_t is
302 # begin
303 # return to_integer(unsigned(
304 # addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
305 # ));
306 # end;
307 # Return the cache row index (data memory) for an address
308 def get_row(addr):
309 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
310
311 # -- Return the index of a row within a line
312 # function get_row_of_line(row: row_t) return row_in_line_t is
313 # variable row_v : unsigned(ROW_BITS-1 downto 0);
314 # begin
315 # row_v := to_unsigned(row, ROW_BITS);
316 # return row_v(ROW_LINEBITS-1 downto 0);
317 # end;
318 # Return the index of a row within a line
319 def get_row_of_line(row):
320 return row[:ROW_LINE_BITS]
321
322 # -- Returns whether this is the last row of a line
323 # function is_last_row_addr(addr: wishbone_addr_type;
324 # last: row_in_line_t
325 # )
326 # return boolean is
327 # begin
328 # return unsigned(
329 # addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
330 # ) = last;
331 # end;
332 # Returns whether this is the last row of a line
333 def is_last_row_addr(addr, last):
334 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
335
336 # -- Returns whether this is the last row of a line
337 # function is_last_row(row: row_t;
338 # last: row_in_line_t) return boolean is
339 # begin
340 # return get_row_of_line(row) = last;
341 # end;
342 # Returns whether this is the last row of a line
343 def is_last_row(row, last):
344 return get_row_of_line(row) == last
345
346 # -- Return the next row in the current cache line. We use a dedicated
347 # -- function in order to limit the size of the generated adder to be
348 # -- only the bits within a cache line (3 bits with default settings)
349 # function next_row(row: row_t) return row_t is
350 # variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
351 # variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
352 # variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
353 # begin
354 # row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
355 # row_idx := row_v(ROW_LINEBITS-1 downto 0);
356 # row_v(ROW_LINEBITS-1 downto 0) :=
357 # std_ulogic_vector(unsigned(row_idx) + 1);
358 # return to_integer(unsigned(row_v));
359 # end;
360 # Return the next row in the current cache line. We use a dedicated
361 # function in order to limit the size of the generated adder to be
362 # only the bits within a cache line (3 bits with default settings)
363 def next_row(row):
364 row_v = row[0:ROW_LINE_BITS] + 1
365 return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
366 # -- Read the instruction word for the given address in the
367 # -- current cache row
368 # function read_insn_word(addr: std_ulogic_vector(63 downto 0);
369 # data: cache_row_t) return std_ulogic_vector is
370 # variable word: integer range 0 to INSN_PER_ROW-1;
371 # begin
372 # word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
373 # return data(31+word*32 downto word*32);
374 # end;
375 # Read the instruction word for the given address
376 # in the current cache row
377 def read_insn_word(addr, data):
378 word = addr[2:INSN_BITS+2]
379 return data.word_select(word, 32)
380
381 # -- Get the tag value from the address
382 # function get_tag(
383 # addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
384 # )
385 # return cache_tag_t is
386 # begin
387 # return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
388 # end;
389 # Get the tag value from the address
390 def get_tag(addr):
391 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
392
393 # -- Read a tag from a tag memory row
394 # function read_tag(way: way_t; tagset: cache_tags_set_t)
395 # return cache_tag_t is
396 # begin
397 # return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
398 # end;
399 # Read a tag from a tag memory row
400 def read_tag(way, tagset):
401 return tagset.word_select(way, TAG_BITS)
402
403 # -- Write a tag to tag memory row
404 # procedure write_tag(way: in way_t;
405 # tagset: inout cache_tags_set_t; tag: cache_tag_t) is
406 # begin
407 # tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
408 # end;
409 # Write a tag to tag memory row
410 def write_tag(way, tagset, tag):
411 return read_tag(way, tagset).eq(tag)
412
413 # -- Simple hash for direct-mapped TLB index
414 # function hash_ea(addr: std_ulogic_vector(63 downto 0))
415 # return tlb_index_t is
416 # variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
417 # begin
418 # hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
419 # xor addr(
420 # TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
421 # TLB_LG_PGSZ + TLB_BITS
422 # )
423 # xor addr(
424 # TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
425 # TLB_LG_PGSZ + 2 * TLB_BITS
426 # );
427 # return to_integer(unsigned(hash));
428 # end;
429 # Simple hash for direct-mapped TLB index
430 def hash_ea(addr):
431 hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
432 TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
433 ] ^ addr[
434 TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
435 ]
436 return hsh
437
438 # begin
439 #
440 # assert LINE_SIZE mod ROW_SIZE = 0;
441 # assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
442 # severity FAILURE;
443 # assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
444 # severity FAILURE;
445 # assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
446 # severity FAILURE;
447 # assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
448 # severity FAILURE;
449 # assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
450 # report "geometry bits don't add up" severity FAILURE;
451 # assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
452 # report "geometry bits don't add up" severity FAILURE;
453 # assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
454 # report "geometry bits don't add up" severity FAILURE;
455 # assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
456 # report "geometry bits don't add up" severity FAILURE;
457 #
458 # sim_debug: if SIM generate
459 # debug: process
460 # begin
461 # report "ROW_SIZE = " & natural'image(ROW_SIZE);
462 # report "ROW_PER_LINE = " & natural'image(ROW_PER_LINE);
463 # report "BRAM_ROWS = " & natural'image(BRAM_ROWS);
464 # report "INSN_PER_ROW = " & natural'image(INSN_PER_ROW);
465 # report "INSN_BITS = " & natural'image(INSN_BITS);
466 # report "ROW_BITS = " & natural'image(ROW_BITS);
467 # report "ROW_LINEBITS = " & natural'image(ROW_LINEBITS);
468 # report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
469 # report "ROW_OFF_BITS = " & natural'image(ROW_OFF_BITS);
470 # report "INDEX_BITS = " & natural'image(INDEX_BITS);
471 # report "TAG_BITS = " & natural'image(TAG_BITS);
472 # report "WAY_BITS = " & natural'image(WAY_BITS);
473 # wait;
474 # end process;
475 # end generate;
476
477 # Cache reload state machine
478 @unique
479 class State(Enum):
480 IDLE = 0
481 CLR_TAG = 1
482 WAIT_ACK = 2
483
484 # type reg_internal_t is record
485 # -- Cache hit state (Latches for 1 cycle BRAM access)
486 # hit_way : way_t;
487 # hit_nia : std_ulogic_vector(63 downto 0);
488 # hit_smark : std_ulogic;
489 # hit_valid : std_ulogic;
490 #
491 # -- Cache miss state (reload state machine)
492 # state : state_t;
493 # wb : wishbone_master_out;
494 # store_way : way_t;
495 # store_index : index_t;
496 # store_row : row_t;
497 # store_tag : cache_tag_t;
498 # store_valid : std_ulogic;
499 # end_row_ix : row_in_line_t;
500 # rows_valid : row_per_line_valid_t;
501 #
502 # -- TLB miss state
503 # fetch_failed : std_ulogic;
504 # end record;
505 class RegInternal(RecordObject):
506 def __init__(self):
507 super().__init__()
508 # Cache hit state (Latches for 1 cycle BRAM access)
509 self.hit_way = Signal(NUM_WAYS)
510 self.hit_nia = Signal(64)
511 self.hit_smark = Signal()
512 self.hit_valid = Signal()
513
514 # Cache miss state (reload state machine)
515 self.state = Signal(State, reset=State.IDLE)
516 self.wb = WBMasterOut("wb")
517 self.req_adr = Signal(64)
518 self.store_way = Signal(NUM_WAYS)
519 self.store_index = Signal(NUM_LINES)
520 self.store_row = Signal(BRAM_ROWS)
521 self.store_tag = Signal(TAG_BITS)
522 self.store_valid = Signal()
523 self.end_row_ix = Signal(ROW_LINE_BITS)
524 self.rows_valid = RowPerLineValidArray()
525
526 # TLB miss state
527 self.fetch_failed = Signal()
528
529 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
530 #
531 # entity icache is
532 # generic (
533 # SIM : boolean := false;
534 # -- Line size in bytes
535 # LINE_SIZE : positive := 64;
536 # -- BRAM organisation: We never access more
537 # -- than wishbone_data_bits
538 # -- at a time so to save resources we make the
539 # -- array only that wide,
540 # -- and use consecutive indices for to make a cache "line"
541 # --
542 # -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
543 # -- so 64-bits)
544 # ROW_SIZE : positive := wishbone_data_bits / 8;
545 # -- Number of lines in a set
546 # NUM_LINES : positive := 32;
547 # -- Number of ways
548 # NUM_WAYS : positive := 4;
549 # -- L1 ITLB number of entries (direct mapped)
550 # TLB_SIZE : positive := 64;
551 # -- L1 ITLB log_2(page_size)
552 # TLB_LG_PGSZ : positive := 12;
553 # -- Number of real address bits that we store
554 # REAL_ADDR_BITS : positive := 56;
555 # -- Non-zero to enable log data collection
556 # LOG_LENGTH : natural := 0
557 # );
558 # port (
559 # clk : in std_ulogic;
560 # rst : in std_ulogic;
561 #
562 # i_in : in Fetch1ToIcacheType;
563 # i_out : out IcacheToDecode1Type;
564 #
565 # m_in : in MmuToIcacheType;
566 #
567 # stall_in : in std_ulogic;
568 # stall_out : out std_ulogic;
569 # flush_in : in std_ulogic;
570 # inval_in : in std_ulogic;
571 #
572 # wishbone_out : out wishbone_master_out;
573 # wishbone_in : in wishbone_slave_out;
574 #
575 # log_out : out std_ulogic_vector(53 downto 0)
576 # );
577 # end entity icache;
578 # 64 bit direct mapped icache. All instructions are 4B aligned.
579 class ICache(Elaboratable):
580 """64 bit direct mapped icache. All instructions are 4B aligned."""
581 def __init__(self):
582 self.i_in = Fetch1ToICacheType(name="i_in")
583 self.i_out = ICacheToDecode1Type(name="i_out")
584
585 self.m_in = MMUToICacheType(name="m_in")
586
587 self.stall_in = Signal()
588 self.stall_out = Signal()
589 self.flush_in = Signal()
590 self.inval_in = Signal()
591
592 self.wb_out = WBMasterOut(name="wb_out")
593 self.wb_in = WBSlaveOut(name="wb_in")
594
595 self.log_out = Signal(54)
596
597
598 # -- Generate a cache RAM for each way
599 # rams: for i in 0 to NUM_WAYS-1 generate
600 # signal do_read : std_ulogic;
601 # signal do_write : std_ulogic;
602 # signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
603 # signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
604 # signal dout : cache_row_t;
605 # signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);
606 # begin
607 # way: entity work.cache_ram
608 # generic map (
609 # ROW_BITS => ROW_BITS,
610 # WIDTH => ROW_SIZE_BITS
611 # )
612 # port map (
613 # clk => clk,
614 # rd_en => do_read,
615 # rd_addr => rd_addr,
616 # rd_data => dout,
617 # wr_sel => wr_sel,
618 # wr_addr => wr_addr,
619 # wr_data => wishbone_in.dat
620 # );
621 # process(all)
622 # begin
623 # do_read <= not (stall_in or use_previous);
624 # do_write <= '0';
625 # if wishbone_in.ack = '1' and replace_way = i then
626 # do_write <= '1';
627 # end if;
628 # cache_out(i) <= dout;
629 # rd_addr <=
630 # std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
631 # wr_addr <=
632 # std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
633 # for i in 0 to ROW_SIZE-1 loop
634 # wr_sel(i) <= do_write;
635 # end loop;
636 # end process;
637 # end generate;
638 def rams(self, m, r, cache_out_row, use_previous, replace_way, req_row):
639 comb = m.d.comb
640
641 wb_in, stall_in = self.wb_in, self.stall_in
642
643
644 for i in range(NUM_WAYS):
645 do_read = Signal(name="do_rd_%d" % i)
646 do_write = Signal(name="do_wr_%d" % i)
647 rd_addr = Signal(ROW_BITS)
648 wr_addr = Signal(ROW_BITS)
649 d_out = Signal(ROW_SIZE_BITS, name="d_out_%d" % i)
650 wr_sel = Signal(ROW_SIZE)
651
652 way = CacheRam(ROW_BITS, ROW_SIZE_BITS, True)
653 setattr(m.submodules, "cacheram_%d" % i, way)
654
655 comb += way.rd_en.eq(do_read)
656 comb += way.rd_addr.eq(rd_addr)
657 comb += d_out.eq(way.rd_data_o)
658 comb += way.wr_sel.eq(wr_sel)
659 comb += way.wr_addr.eq(wr_addr)
660 comb += way.wr_data.eq(wb_in.dat)
661
662 comb += do_read.eq(~(stall_in | use_previous))
663 comb += do_write.eq(wb_in.ack & (replace_way == i))
664
665 with m.If(r.hit_way == i):
666 comb += cache_out_row.eq(d_out)
667 comb += rd_addr.eq(req_row)
668 comb += wr_addr.eq(r.store_row)
669 comb += wr_sel.eq(Repl(do_write, ROW_SIZE))
670
671 # -- Generate PLRUs
672 # maybe_plrus: if NUM_WAYS > 1 generate
673 # begin
674 # plrus: for i in 0 to NUM_LINES-1 generate
675 # -- PLRU interface
676 # signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
677 # signal plru_acc_en : std_ulogic;
678 # signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
679 #
680 # begin
681 # plru : entity work.plru
682 # generic map (
683 # BITS => WAY_BITS
684 # )
685 # port map (
686 # clk => clk,
687 # rst => rst,
688 # acc => plru_acc,
689 # acc_en => plru_acc_en,
690 # lru => plru_out
691 # );
692 #
693 # process(all)
694 # begin
695 # -- PLRU interface
696 # if get_index(r.hit_nia) = i then
697 # plru_acc_en <= r.hit_valid;
698 # else
699 # plru_acc_en <= '0';
700 # end if;
701 # plru_acc <=
702 # std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
703 # plru_victim(i) <= plru_out;
704 # end process;
705 # end generate;
706 # end generate;
707 def maybe_plrus(self, m, r, plru_victim):
708 comb = m.d.comb
709
710 with m.If(NUM_WAYS > 1):
711 for i in range(NUM_LINES):
712 plru_acc_i = Signal(WAY_BITS)
713 plru_acc_en = Signal()
714 plru = PLRU(WAY_BITS)
715 setattr(m.submodules, "plru_%d" % i, plru)
716
717 comb += plru.acc_i.eq(plru_acc_i)
718 comb += plru.acc_en.eq(plru_acc_en)
719
720 # PLRU interface
721 with m.If(get_index(r.hit_nia) == i):
722 comb += plru.acc_en.eq(r.hit_valid)
723
724 comb += plru.acc_i.eq(r.hit_way)
725 comb += plru_victim[i].eq(plru.lru_o)
726
727 # -- TLB hit detection and real address generation
728 # itlb_lookup : process(all)
729 # variable pte : tlb_pte_t;
730 # variable ttag : tlb_tag_t;
731 # begin
732 # tlb_req_index <= hash_ea(i_in.nia);
733 # pte := itlb_ptes(tlb_req_index);
734 # ttag := itlb_tags(tlb_req_index);
735 # if i_in.virt_mode = '1' then
736 # real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
737 # i_in.nia(TLB_LG_PGSZ - 1 downto 0);
738 # if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
739 # ra_valid <= itlb_valids(tlb_req_index);
740 # else
741 # ra_valid <= '0';
742 # end if;
743 # eaa_priv <= pte(3);
744 # else
745 # real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
746 # ra_valid <= '1';
747 # eaa_priv <= '1';
748 # end if;
749 #
750 # -- no IAMR, so no KUEP support for now
751 # priv_fault <= eaa_priv and not i_in.priv_mode;
752 # access_ok <= ra_valid and not priv_fault;
753 # end process;
754 # TLB hit detection and real address generation
755 def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
756 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
757 priv_fault, access_ok):
758 comb = m.d.comb
759
760 i_in = self.i_in
761
762 pte = Signal(TLB_PTE_BITS)
763 ttag = Signal(TLB_EA_TAG_BITS)
764
765 comb += tlb_req_index.eq(hash_ea(i_in.nia))
766 comb += pte.eq(itlb_ptes[tlb_req_index])
767 comb += ttag.eq(itlb_tags[tlb_req_index])
768
769 with m.If(i_in.virt_mode):
770 comb += real_addr.eq(Cat(
771 i_in.nia[:TLB_LG_PGSZ],
772 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
773 ))
774
775 with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
776 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
777
778 comb += eaa_priv.eq(pte[3])
779
780 with m.Else():
781 comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
782 comb += ra_valid.eq(1)
783 comb += eaa_priv.eq(1)
784
785 # No IAMR, so no KUEP support for now
786 comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
787 comb += access_ok.eq(ra_valid & ~priv_fault)
788
789 # -- iTLB update
790 # itlb_update: process(clk)
791 # variable wr_index : tlb_index_t;
792 # begin
793 # if rising_edge(clk) then
794 # wr_index := hash_ea(m_in.addr);
795 # if rst = '1' or
796 # (m_in.tlbie = '1' and m_in.doall = '1') then
797 # -- clear all valid bits
798 # for i in tlb_index_t loop
799 # itlb_valids(i) <= '0';
800 # end loop;
801 # elsif m_in.tlbie = '1' then
802 # -- clear entry regardless of hit or miss
803 # itlb_valids(wr_index) <= '0';
804 # elsif m_in.tlbld = '1' then
805 # itlb_tags(wr_index) <=
806 # m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
807 # itlb_ptes(wr_index) <= m_in.pte;
808 # itlb_valids(wr_index) <= '1';
809 # end if;
810 # end if;
811 # end process;
812 # iTLB update
813 def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
814 comb = m.d.comb
815 sync = m.d.sync
816
817 m_in = self.m_in
818
819 wr_index = Signal(TLB_SIZE)
820 sync += wr_index.eq(hash_ea(m_in.addr))
821
822 with m.If(m_in.tlbie & m_in.doall):
823 # Clear all valid bits
824 for i in range(TLB_SIZE):
825 sync += itlb_valid_bits[i].eq(0)
826
827 with m.Elif(m_in.tlbie):
828 # Clear entry regardless of hit or miss
829 sync += itlb_valid_bits[wr_index].eq(0)
830
831 with m.Elif(m_in.tlbld):
832 sync += itlb_tags[wr_index].eq(
833 m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
834 )
835 sync += itlb_ptes[wr_index].eq(m_in.pte)
836 sync += itlb_valid_bits[wr_index].eq(1)
837
838 # -- Cache hit detection, output to fetch2 and other misc logic
839 # icache_comb : process(all)
840 # Cache hit detection, output to fetch2 and other misc logic
841 def icache_comb(self, m, use_previous, r, req_index, req_row,
842 req_tag, real_addr, req_laddr, cache_valid_bits,
843 cache_tags, access_ok, req_is_hit,
844 req_is_miss, replace_way, plru_victim, cache_out_row):
845 # variable is_hit : std_ulogic;
846 # variable hit_way : way_t;
847 comb = m.d.comb
848
849 #comb += Display("ENTER icache_comb - use_previous:%x req_index:%x " \
850 # "req_row:%x req_tag:%x real_addr:%x req_laddr:%x " \
851 # "access_ok:%x req_is_hit:%x req_is_miss:%x " \
852 # "replace_way:%x", use_previous, req_index, req_row, \
853 # req_tag, real_addr, req_laddr, access_ok, \
854 # req_is_hit, req_is_miss, replace_way)
855
856 i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
857 flush_in, stall_out = self.flush_in, self.stall_out
858
859 is_hit = Signal()
860 hit_way = Signal(NUM_WAYS)
861 # begin
862 # -- i_in.sequential means that i_in.nia this cycle
863 # -- is 4 more than last cycle. If we read more
864 # -- than 32 bits at a time, had a cache hit last
865 # -- cycle, and we don't want the first 32-bit chunk
866 # -- then we can keep the data we read last cycle
867 # -- and just use that.
868 # if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
869 # use_previous <= i_in.sequential and r.hit_valid;
870 # else
871 # use_previous <= '0';
872 # end if;
873 # i_in.sequential means that i_in.nia this cycle is 4 more than
874 # last cycle. If we read more than 32 bits at a time, had a
875 # cache hit last cycle, and we don't want the first 32-bit chunk
876 # then we can keep the data we read last cycle and just use that.
877 with m.If(i_in.nia[2:INSN_BITS+2] != 0):
878 comb += use_previous.eq(i_in.sequential & r.hit_valid)
879
880 # -- Extract line, row and tag from request
881 # req_index <= get_index(i_in.nia);
882 # req_row <= get_row(i_in.nia);
883 # req_tag <= get_tag(real_addr);
884 # Extract line, row and tag from request
885 comb += req_index.eq(get_index(i_in.nia))
886 comb += req_row.eq(get_row(i_in.nia))
887 comb += req_tag.eq(get_tag(real_addr))
888
889 # -- Calculate address of beginning of cache row, will be
890 # -- used for cache miss processing if needed
891 # req_laddr <=
892 # (63 downto REAL_ADDR_BITS => '0') &
893 # real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
894 # (ROW_OFF_BITS-1 downto 0 => '0');
895 # Calculate address of beginning of cache row, will be
896 # used for cache miss processing if needed
897 comb += req_laddr.eq(Cat(
898 Const(0b0, ROW_OFF_BITS),
899 real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
900 Const(0b0, 8)
901 ))
902
903 # -- Test if pending request is a hit on any way
904 # hit_way := 0;
905 # is_hit := '0';
906 # for i in way_t loop
907 # if i_in.req = '1' and
908 # (cache_valids(req_index)(i) = '1' or
909 # (r.state = WAIT_ACK and
910 # req_index = r.store_index and
911 # i = r.store_way and
912 # r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
913 # if read_tag(i, cache_tags(req_index)) = req_tag then
914 # hit_way := i;
915 # is_hit := '1';
916 # end if;
917 # end if;
918 # end loop;
919 # Test if pending request is a hit on any way
920 hitcond = Signal()
921 comb += hitcond.eq((r.state == State.WAIT_ACK)
922 & (req_index == r.store_index)
923 & r.rows_valid[req_row % ROW_PER_LINE])
924 with m.If(i_in.req):
925 cvb = Signal(NUM_WAYS)
926 ctag = Signal(TAG_RAM_WIDTH)
927 comb += ctag.eq(cache_tags[req_index])
928 comb += cvb.eq(cache_valid_bits[req_index])
929 for i in range(NUM_WAYS):
930 tagi = Signal(TAG_BITS, name="ti%d" % i)
931 comb += tagi.eq(read_tag(i, ctag))
932 hit_test = Signal(name="hit_test%d" % i)
933 comb += hit_test.eq(i == r.store_way)
934 with m.If((cvb[i] | (hitcond & hit_test)) & (tagi == req_tag)):
935 comb += hit_way.eq(i)
936 comb += is_hit.eq(1)
937
938 # -- Generate the "hit" and "miss" signals
939 # -- for the synchronous blocks
940 # if i_in.req = '1' and access_ok = '1' and flush_in = '0'
941 # and rst = '0' then
942 # req_is_hit <= is_hit;
943 # req_is_miss <= not is_hit;
944 # else
945 # req_is_hit <= '0';
946 # req_is_miss <= '0';
947 # end if;
948 # req_hit_way <= hit_way;
949 # Generate the "hit" and "miss" signals
950 # for the synchronous blocks
951 with m.If(i_in.req & access_ok & ~flush_in):
952 comb += req_is_hit.eq(is_hit)
953 comb += req_is_miss.eq(~is_hit)
954
955 with m.Else():
956 comb += req_is_hit.eq(0)
957 comb += req_is_miss.eq(0)
958
959 # -- The way to replace on a miss
960 # if r.state = CLR_TAG then
961 # replace_way <=
962 # to_integer(unsigned(plru_victim(r.store_index)));
963 # else
964 # replace_way <= r.store_way;
965 # end if;
966 # The way to replace on a miss
967 with m.If(r.state == State.CLR_TAG):
968 comb += replace_way.eq(plru_victim[r.store_index])
969
970 with m.Else():
971 comb += replace_way.eq(r.store_way)
972
973 # -- Output instruction from current cache row
974 # --
975 # -- Note: This is a mild violation of our design principle of
976 # -- having pipeline stages output from a clean latch. In this
977 # -- case we output the result of a mux. The alternative would
978 # -- be output an entire row which I prefer not to do just yet
979 # -- as it would force fetch2 to know about some of the cache
980 # -- geometry information.
981 # i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
982 # i_out.valid <= r.hit_valid;
983 # i_out.nia <= r.hit_nia;
984 # i_out.stop_mark <= r.hit_smark;
985 # i_out.fetch_failed <= r.fetch_failed;
986 # Output instruction from current cache row
987 #
988 # Note: This is a mild violation of our design principle of
989 # having pipeline stages output from a clean latch. In this
990 # case we output the result of a mux. The alternative would
991 # be output an entire row which I prefer not to do just yet
992 # as it would force fetch2 to know about some of the cache
993 # geometry information.
994 #comb += Display("BEFORE read_insn_word - r.hit_nia:%x " \
995 # "r.hit_way:%x, cache_out[r.hit_way]:%x", r.hit_nia, \
996 # r.hit_way, cache_out[r.hit_way])
997 comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out_row))
998 comb += i_out.valid.eq(r.hit_valid)
999 comb += i_out.nia.eq(r.hit_nia)
1000 comb += i_out.stop_mark.eq(r.hit_smark)
1001 comb += i_out.fetch_failed.eq(r.fetch_failed)
1002
1003 # -- Stall fetch1 if we have a miss on cache or TLB
1004 # -- or a protection fault
1005 # stall_out <= not (is_hit and access_ok);
1006 # Stall fetch1 if we have a miss on cache or TLB
1007 # or a protection fault
1008 comb += stall_out.eq(~(is_hit & access_ok))
1009
1010 # -- Wishbone requests output (from the cache miss reload machine)
1011 # wishbone_out <= r.wb;
1012 # Wishbone requests output (from the cache miss reload machine)
1013 comb += wb_out.eq(r.wb)
1014 # end process;
1015
1016 # -- Cache hit synchronous machine
1017 # icache_hit : process(clk)
1018 # Cache hit synchronous machine
1019 def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
1020 req_index, req_tag, real_addr):
1021 sync = m.d.sync
1022
1023 i_in, stall_in = self.i_in, self.stall_in
1024 flush_in = self.flush_in
1025
1026 # begin
1027 # if rising_edge(clk) then
1028 # -- keep outputs to fetch2 unchanged on a stall
1029 # -- except that flush or reset sets valid to 0
1030 # -- If use_previous, keep the same data as last
1031 # -- cycle and use the second half
1032 # if stall_in = '1' or use_previous = '1' then
1033 # if rst = '1' or flush_in = '1' then
1034 # r.hit_valid <= '0';
1035 # end if;
1036 # keep outputs to fetch2 unchanged on a stall
1037 # except that flush or reset sets valid to 0
1038 # If use_previous, keep the same data as last
1039 # cycle and use the second half
1040 with m.If(stall_in | use_previous):
1041 with m.If(flush_in):
1042 sync += r.hit_valid.eq(0)
1043 # else
1044 # -- On a hit, latch the request for the next cycle,
1045 # -- when the BRAM data will be available on the
1046 # -- cache_out output of the corresponding way
1047 # r.hit_valid <= req_is_hit;
1048 # if req_is_hit = '1' then
1049 # r.hit_way <= req_hit_way;
1050 with m.Else():
1051 # On a hit, latch the request for the next cycle,
1052 # when the BRAM data will be available on the
1053 # cache_out output of the corresponding way
1054 sync += r.hit_valid.eq(req_is_hit)
1055
1056 with m.If(req_is_hit):
1057 sync += r.hit_way.eq(req_hit_way)
1058
1059 # report "cache hit nia:" & to_hstring(i_in.nia) &
1060 # " IR:" & std_ulogic'image(i_in.virt_mode) &
1061 # " SM:" & std_ulogic'image(i_in.stop_mark) &
1062 # " idx:" & integer'image(req_index) &
1063 # " tag:" & to_hstring(req_tag) &
1064 # " way:" & integer'image(req_hit_way) &
1065 # " RA:" & to_hstring(real_addr);
1066 sync += Display("cache hit nia:%x IR:%x SM:%x idx:%x " \
1067 "tag:%x way:%x RA:%x", i_in.nia, \
1068 i_in.virt_mode, i_in.stop_mark, req_index, \
1069 req_tag, req_hit_way, real_addr)
1070
1071
1072
1073 # end if;
1074 # end if;
1075 # if stall_in = '0' then
1076 # -- Send stop marks and NIA down regardless of validity
1077 # r.hit_smark <= i_in.stop_mark;
1078 # r.hit_nia <= i_in.nia;
1079 # end if;
1080 with m.If(~stall_in):
1081 # Send stop marks and NIA down regardless of validity
1082 sync += r.hit_smark.eq(i_in.stop_mark)
1083 sync += r.hit_nia.eq(i_in.nia)
1084 # end if;
1085 # end process;
1086
1087 # -- Cache miss/reload synchronous machine
1088 # icache_miss : process(clk)
1089 # Cache miss/reload synchronous machine
1090 def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1091 req_index, req_laddr, req_tag, replace_way,
1092 cache_tags, access_ok, real_addr):
1093 comb = m.d.comb
1094 sync = m.d.sync
1095
1096 i_in, wb_in, m_in = self.i_in, self.wb_in, self.m_in
1097 stall_in, flush_in = self.stall_in, self.flush_in
1098 inval_in = self.inval_in
1099
1100 # variable tagset : cache_tags_set_t;
1101 # variable stbs_done : boolean;
1102
1103 tagset = Signal(TAG_RAM_WIDTH)
1104 stbs_done = Signal()
1105
1106 # begin
1107 # if rising_edge(clk) then
1108 # -- On reset, clear all valid bits to force misses
1109 # if rst = '1' then
1110 # On reset, clear all valid bits to force misses
1111 # for i in index_t loop
1112 # cache_valids(i) <= (others => '0');
1113 # end loop;
1114 # r.state <= IDLE;
1115 # r.wb.cyc <= '0';
1116 # r.wb.stb <= '0';
1117 # -- We only ever do reads on wishbone
1118 # r.wb.dat <= (others => '0');
1119 # r.wb.sel <= "11111111";
1120 # r.wb.we <= '0';
1121
1122 # -- Not useful normally but helps avoiding
1123 # -- tons of sim warnings
1124 # r.wb.adr <= (others => '0');
1125
1126 # else
1127
1128 # -- Process cache invalidations
1129 # if inval_in = '1' then
1130 # for i in index_t loop
1131 # cache_valids(i) <= (others => '0');
1132 # end loop;
1133 # r.store_valid <= '0';
1134 # end if;
1135 comb += r.wb.sel.eq(-1)
1136 comb += r.wb.adr.eq(r.req_adr[3:])
1137
1138 # Process cache invalidations
1139 with m.If(inval_in):
1140 for i in range(NUM_LINES):
1141 sync += cache_valid_bits[i].eq(0)
1142 sync += r.store_valid.eq(0)
1143
1144 # -- Main state machine
1145 # case r.state is
1146 # Main state machine
1147 with m.Switch(r.state):
1148
1149 # when IDLE =>
1150 with m.Case(State.IDLE):
1151 # -- Reset per-row valid flags,
1152 # -- only used in WAIT_ACK
1153 # for i in 0 to ROW_PER_LINE - 1 loop
1154 # r.rows_valid(i) <= '0';
1155 # end loop;
1156 # Reset per-row valid flags,
1157 # only used in WAIT_ACK
1158 for i in range(ROW_PER_LINE):
1159 sync += r.rows_valid[i].eq(0)
1160
1161 # -- We need to read a cache line
1162 # if req_is_miss = '1' then
1163 # report "cache miss nia:" & to_hstring(i_in.nia) &
1164 # " IR:" & std_ulogic'image(i_in.virt_mode) &
1165 # " SM:" & std_ulogic'image(i_in.stop_mark) &
1166 # " idx:" & integer'image(req_index) &
1167 # " way:" & integer'image(replace_way) &
1168 # " tag:" & to_hstring(req_tag) &
1169 # " RA:" & to_hstring(real_addr);
1170 # We need to read a cache line
1171 with m.If(req_is_miss):
1172 sync += Display(
1173 "cache miss nia:%x IR:%x SM:%x idx:%x " \
1174 " way:%x tag:%x RA:%x", i_in.nia, \
1175 i_in.virt_mode, i_in.stop_mark, req_index, \
1176 replace_way, req_tag, real_addr)
1177
1178 # -- Keep track of our index and way for
1179 # -- subsequent stores
1180 # r.store_index <= req_index;
1181 # r.store_row <= get_row(req_laddr);
1182 # r.store_tag <= req_tag;
1183 # r.store_valid <= '1';
1184 # r.end_row_ix <=
1185 # get_row_of_line(get_row(req_laddr)) - 1;
1186 # Keep track of our index and way
1187 # for subsequent stores
1188 sync += r.store_index.eq(req_index)
1189 sync += r.store_row.eq(get_row(req_laddr))
1190 sync += r.store_tag.eq(req_tag)
1191 sync += r.store_valid.eq(1)
1192 sync += r.end_row_ix.eq(
1193 get_row_of_line(
1194 get_row(req_laddr)
1195 ) - 1
1196 )
1197
1198 # -- Prep for first wishbone read. We calculate the
1199 # -- address of the start of the cache line and
1200 # -- start the WB cycle.
1201 # r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1202 # r.wb.cyc <= '1';
1203 # r.wb.stb <= '1';
1204 # Prep for first wishbone read.
1205 # We calculate the
1206 # address of the start of the cache line and
1207 # start the WB cycle.
1208 sync += r.req_adr.eq(req_laddr)
1209 sync += r.wb.cyc.eq(1)
1210 sync += r.wb.stb.eq(1)
1211
1212 # -- Track that we had one request sent
1213 # r.state <= CLR_TAG;
1214 # Track that we had one request sent
1215 sync += r.state.eq(State.CLR_TAG)
1216 # end if;
1217
1218 # when CLR_TAG | WAIT_ACK =>
1219 with m.Case(State.CLR_TAG, State.WAIT_ACK):
1220 # if r.state = CLR_TAG then
1221 with m.If(r.state == State.CLR_TAG):
1222 # -- Get victim way from plru
1223 # r.store_way <= replace_way;
1224 # Get victim way from plru
1225 sync += r.store_way.eq(replace_way)
1226 #
1227 # -- Force misses on that way while
1228 # -- reloading that line
1229 # cache_valids(req_index)(replace_way) <= '0';
1230 # Force misses on that way while
1231 # realoading that line
1232 cv = Signal(INDEX_BITS)
1233 comb += cv.eq(cache_valid_bits[req_index])
1234 comb += cv.bit_select(replace_way, 1).eq(0)
1235 sync += cache_valid_bits[req_index].eq(cv)
1236
1237 # -- Store new tag in selected way
1238 # for i in 0 to NUM_WAYS-1 loop
1239 # if i = replace_way then
1240 # tagset := cache_tags(r.store_index);
1241 # write_tag(i, tagset, r.store_tag);
1242 # cache_tags(r.store_index) <= tagset;
1243 # end if;
1244 # end loop;
1245 for i in range(NUM_WAYS):
1246 with m.If(i == replace_way):
1247 comb += tagset.eq(cache_tags[r.store_index])
1248 comb += write_tag(i, tagset, r.store_tag)
1249 sync += cache_tags[r.store_index].eq(tagset)
1250
1251 # r.state <= WAIT_ACK;
1252 sync += r.state.eq(State.WAIT_ACK)
1253 # end if;
1254
1255 # -- Requests are all sent if stb is 0
1256 # stbs_done := r.wb.stb = '0';
1257 # Requests are all sent if stb is 0
1258 stbs_zero = Signal()
1259 comb += stbs_zero.eq(r.wb.stb == 0)
1260 comb += stbs_done.eq(stbs_zero)
1261
1262 # -- If we are still sending requests,
1263 # -- was one accepted ?
1264 # if wishbone_in.stall = '0' and not stbs_done then
1265 # If we are still sending requests,
1266 # was one accepted?
1267 with m.If(~wb_in.stall & ~stbs_zero):
1268 # -- That was the last word ? We are done sending.
1269 # -- Clear stb and set stbs_done so we can handle
1270 # -- an eventual last ack on the same cycle.
1271 # if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1272 # r.wb.stb <= '0';
1273 # stbs_done := true;
1274 # end if;
1275 # That was the last word ?
1276 # We are done sending.
1277 # Clear stb and set stbs_done
1278 # so we can handle
1279 # an eventual last ack on
1280 # the same cycle.
1281 with m.If(is_last_row_addr(r.req_adr, r.end_row_ix)):
1282 sync += Display("IS_LAST_ROW_ADDR " \
1283 "r.wb.addr:%x r.end_row_ix:%x " \
1284 "r.wb.stb:%x stbs_zero:%x " \
1285 "stbs_done:%x", r.wb.adr, \
1286 r.end_row_ix, r.wb.stb, \
1287 stbs_zero, stbs_done)
1288 sync += r.wb.stb.eq(0)
1289 comb += stbs_done.eq(1)
1290
1291 # -- Calculate the next row address
1292 # r.wb.adr <= next_row_addr(r.wb.adr);
1293 # Calculate the next row address
1294 rarange = Signal(LINE_OFF_BITS - ROW_OFF_BITS)
1295 comb += rarange.eq(
1296 r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
1297 )
1298 sync += r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS].eq(
1299 rarange
1300 )
1301 sync += Display("RARANGE r.wb.adr:%x stbs_zero:%x " \
1302 "stbs_done:%x", rarange, stbs_zero, \
1303 stbs_done)
1304 # end if;
1305
1306 # -- Incoming acks processing
1307 # if wishbone_in.ack = '1' then
1308 # Incoming acks processing
1309 with m.If(wb_in.ack):
1310 # r.rows_valid(r.store_row mod ROW_PER_LINE)
1311 # <= '1';
1312 sync += Display("WB_IN_ACK stbs_zero:%x " \
1313 "stbs_done:%x", \
1314 stbs_zero, stbs_done)
1315
1316 sync += r.rows_valid[r.store_row % ROW_PER_LINE].eq(1)
1317
1318 # -- Check for completion
1319 # if stbs_done and
1320 # is_last_row(r.store_row, r.end_row_ix) then
1321 # Check for completion
1322 with m.If(stbs_done &
1323 is_last_row(r.store_row, r.end_row_ix)):
1324 # -- Complete wishbone cycle
1325 # r.wb.cyc <= '0';
1326 # Complete wishbone cycle
1327 sync += r.wb.cyc.eq(0)
1328
1329 # -- Cache line is now valid
1330 # cache_valids(r.store_index)(replace_way) <=
1331 # r.store_valid and not inval_in;
1332 # Cache line is now valid
1333 cv = Signal(INDEX_BITS)
1334 comb += cv.eq(cache_valid_bits[r.store_index])
1335 comb += cv.bit_select(replace_way, 1).eq(
1336 r.store_valid & ~inval_in
1337 )
1338 sync += cache_valid_bits[r.store_index].eq(cv)
1339
1340 # -- We are done
1341 # r.state <= IDLE;
1342 # We are done
1343 sync += r.state.eq(State.IDLE)
1344 # end if;
1345
1346 # -- Increment store row counter
1347 # r.store_row <= next_row(r.store_row);
1348 # Increment store row counter
1349 sync += r.store_row.eq(next_row(r.store_row))
1350 # end if;
1351 # end case;
1352 # end if;
1353 #
1354 # -- TLB miss and protection fault processing
1355 # if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1356 # r.fetch_failed <= '0';
1357 # elsif i_in.req = '1' and access_ok = '0' and
1358 # stall_in = '0' then
1359 # r.fetch_failed <= '1';
1360 # end if;
1361 # TLB miss and protection fault processing
1362 with m.If(flush_in | m_in.tlbld):
1363 sync += r.fetch_failed.eq(0)
1364
1365 with m.Elif(i_in.req & ~access_ok & ~stall_in):
1366 sync += r.fetch_failed.eq(1)
1367 # end if;
1368 # end process;
1369
1370 # icache_log: if LOG_LENGTH > 0 generate
1371 def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1372 req_is_miss, req_is_hit, lway, wstate, r):
1373 comb = m.d.comb
1374 sync = m.d.sync
1375
1376 wb_in, i_out = self.wb_in, self.i_out
1377 log_out, stall_out = self.log_out, self.stall_out
1378
1379 # -- Output data to logger
1380 # signal log_data : std_ulogic_vector(53 downto 0);
1381 # begin
1382 # data_log: process(clk)
1383 # variable lway: way_t;
1384 # variable wstate: std_ulogic;
1385 # Output data to logger
1386 for i in range(LOG_LENGTH):
1387 # Output data to logger
1388 log_data = Signal(54)
1389 lway = Signal(NUM_WAYS)
1390 wstate = Signal()
1391
1392 # begin
1393 # if rising_edge(clk) then
1394 # lway := req_hit_way;
1395 # wstate := '0';
1396 sync += lway.eq(req_hit_way)
1397 sync += wstate.eq(0)
1398
1399 # if r.state /= IDLE then
1400 # wstate := '1';
1401 # end if;
1402 with m.If(r.state != State.IDLE):
1403 sync += wstate.eq(1)
1404
1405 # log_data <= i_out.valid &
1406 # i_out.insn &
1407 # wishbone_in.ack &
1408 # r.wb.adr(5 downto 3) &
1409 # r.wb.stb & r.wb.cyc &
1410 # wishbone_in.stall &
1411 # stall_out &
1412 # r.fetch_failed &
1413 # r.hit_nia(5 downto 2) &
1414 # wstate &
1415 # std_ulogic_vector(to_unsigned(lway, 3)) &
1416 # req_is_hit & req_is_miss &
1417 # access_ok &
1418 # ra_valid;
1419 sync += log_data.eq(Cat(
1420 ra_valid, access_ok, req_is_miss, req_is_hit,
1421 lway, wstate, r.hit_nia[2:6],
1422 r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1423 r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1424 i_out.valid
1425 ))
1426 # end if;
1427 # end process;
1428 # log_out <= log_data;
1429 comb += log_out.eq(log_data)
1430 # end generate;
1431 # end;
1432
1433 def elaborate(self, platform):
1434
1435 m = Module()
1436 comb = m.d.comb
1437
1438 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1439 cache_tags = CacheTagArray()
1440 cache_valid_bits = CacheValidBitsArray()
1441
1442 # signal itlb_valids : tlb_valids_t;
1443 # signal itlb_tags : tlb_tags_t;
1444 # signal itlb_ptes : tlb_ptes_t;
1445 # attribute ram_style of itlb_tags : signal is "distributed";
1446 # attribute ram_style of itlb_ptes : signal is "distributed";
1447 itlb_valid_bits = TLBValidBitsArray()
1448 itlb_tags = TLBTagArray()
1449 itlb_ptes = TLBPtesArray()
1450 # TODO to be passed to nmigen as ram attributes
1451 # attribute ram_style of itlb_tags : signal is "distributed";
1452 # attribute ram_style of itlb_ptes : signal is "distributed";
1453
1454 # -- Privilege bit from PTE EAA field
1455 # signal eaa_priv : std_ulogic;
1456 # Privilege bit from PTE EAA field
1457 eaa_priv = Signal()
1458
1459 # signal r : reg_internal_t;
1460 r = RegInternal()
1461
1462 # -- Async signals on incoming request
1463 # signal req_index : index_t;
1464 # signal req_row : row_t;
1465 # signal req_hit_way : way_t;
1466 # signal req_tag : cache_tag_t;
1467 # signal req_is_hit : std_ulogic;
1468 # signal req_is_miss : std_ulogic;
1469 # signal req_laddr : std_ulogic_vector(63 downto 0);
1470 # Async signal on incoming request
1471 req_index = Signal(NUM_LINES)
1472 req_row = Signal(BRAM_ROWS)
1473 req_hit_way = Signal(NUM_WAYS)
1474 req_tag = Signal(TAG_BITS)
1475 req_is_hit = Signal()
1476 req_is_miss = Signal()
1477 req_laddr = Signal(64)
1478
1479 # signal tlb_req_index : tlb_index_t;
1480 # signal real_addr : std_ulogic_vector(
1481 # REAL_ADDR_BITS - 1 downto 0
1482 # );
1483 # signal ra_valid : std_ulogic;
1484 # signal priv_fault : std_ulogic;
1485 # signal access_ok : std_ulogic;
1486 # signal use_previous : std_ulogic;
1487 tlb_req_index = Signal(TLB_SIZE)
1488 real_addr = Signal(REAL_ADDR_BITS)
1489 ra_valid = Signal()
1490 priv_fault = Signal()
1491 access_ok = Signal()
1492 use_previous = Signal()
1493
1494 # signal cache_out : cache_ram_out_t;
1495 cache_out_row = Signal(ROW_SIZE_BITS)
1496
1497 # signal plru_victim : plru_out_t;
1498 # signal replace_way : way_t;
1499 plru_victim = PLRUOut()
1500 replace_way = Signal(NUM_WAYS)
1501
1502 # call sub-functions putting everything together, using shared
1503 # signals established above
1504 self.rams(m, r, cache_out_row, use_previous, replace_way, req_row)
1505 self.maybe_plrus(m, r, plru_victim)
1506 self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1507 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1508 priv_fault, access_ok)
1509 self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1510 self.icache_comb(m, use_previous, r, req_index, req_row,
1511 req_tag, real_addr, req_laddr, cache_valid_bits,
1512 cache_tags, access_ok, req_is_hit, req_is_miss,
1513 replace_way, plru_victim, cache_out_row)
1514 self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1515 req_index, req_tag, real_addr)
1516 self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1517 req_laddr, req_tag, replace_way, cache_tags,
1518 access_ok, real_addr)
1519 #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1520 # req_is_miss, req_is_hit, lway, wstate, r)
1521
1522 return m
1523
1524
1525 # icache_tb.vhdl
1526 #
1527 # library ieee;
1528 # use ieee.std_logic_1164.all;
1529 #
1530 # library work;
1531 # use work.common.all;
1532 # use work.wishbone_types.all;
1533 #
1534 # entity icache_tb is
1535 # end icache_tb;
1536 #
1537 # architecture behave of icache_tb is
1538 # signal clk : std_ulogic;
1539 # signal rst : std_ulogic;
1540 #
1541 # signal i_out : Fetch1ToIcacheType;
1542 # signal i_in : IcacheToDecode1Type;
1543 #
1544 # signal m_out : MmuToIcacheType;
1545 #
1546 # signal wb_bram_in : wishbone_master_out;
1547 # signal wb_bram_out : wishbone_slave_out;
1548 #
1549 # constant clk_period : time := 10 ns;
1550 # begin
1551 # icache0: entity work.icache
1552 # generic map(
1553 # LINE_SIZE => 64,
1554 # NUM_LINES => 4
1555 # )
1556 # port map(
1557 # clk => clk,
1558 # rst => rst,
1559 # i_in => i_out,
1560 # i_out => i_in,
1561 # m_in => m_out,
1562 # stall_in => '0',
1563 # flush_in => '0',
1564 # inval_in => '0',
1565 # wishbone_out => wb_bram_in,
1566 # wishbone_in => wb_bram_out
1567 # );
1568 #
1569 # -- BRAM Memory slave
1570 # bram0: entity work.wishbone_bram_wrapper
1571 # generic map(
1572 # MEMORY_SIZE => 1024,
1573 # RAM_INIT_FILE => "icache_test.bin"
1574 # )
1575 # port map(
1576 # clk => clk,
1577 # rst => rst,
1578 # wishbone_in => wb_bram_in,
1579 # wishbone_out => wb_bram_out
1580 # );
1581 #
1582 # clk_process: process
1583 # begin
1584 # clk <= '0';
1585 # wait for clk_period/2;
1586 # clk <= '1';
1587 # wait for clk_period/2;
1588 # end process;
1589 #
1590 # rst_process: process
1591 # begin
1592 # rst <= '1';
1593 # wait for 2*clk_period;
1594 # rst <= '0';
1595 # wait;
1596 # end process;
1597 #
1598 # stim: process
1599 # begin
1600 # i_out.req <= '0';
1601 # i_out.nia <= (others => '0');
1602 # i_out.stop_mark <= '0';
1603 #
1604 # m_out.tlbld <= '0';
1605 # m_out.tlbie <= '0';
1606 # m_out.addr <= (others => '0');
1607 # m_out.pte <= (others => '0');
1608 #
1609 # wait until rising_edge(clk);
1610 # wait until rising_edge(clk);
1611 # wait until rising_edge(clk);
1612 # wait until rising_edge(clk);
1613 #
1614 # i_out.req <= '1';
1615 # i_out.nia <= x"0000000000000004";
1616 #
1617 # wait for 30*clk_period;
1618 # wait until rising_edge(clk);
1619 #
1620 # assert i_in.valid = '1' severity failure;
1621 # assert i_in.insn = x"00000001"
1622 # report "insn @" & to_hstring(i_out.nia) &
1623 # "=" & to_hstring(i_in.insn) &
1624 # " expected 00000001"
1625 # severity failure;
1626 #
1627 # i_out.req <= '0';
1628 #
1629 # wait until rising_edge(clk);
1630 #
1631 # -- hit
1632 # i_out.req <= '1';
1633 # i_out.nia <= x"0000000000000008";
1634 # wait until rising_edge(clk);
1635 # wait until rising_edge(clk);
1636 # assert i_in.valid = '1' severity failure;
1637 # assert i_in.insn = x"00000002"
1638 # report "insn @" & to_hstring(i_out.nia) &
1639 # "=" & to_hstring(i_in.insn) &
1640 # " expected 00000002"
1641 # severity failure;
1642 # wait until rising_edge(clk);
1643 #
1644 # -- another miss
1645 # i_out.req <= '1';
1646 # i_out.nia <= x"0000000000000040";
1647 #
1648 # wait for 30*clk_period;
1649 # wait until rising_edge(clk);
1650 #
1651 # assert i_in.valid = '1' severity failure;
1652 # assert i_in.insn = x"00000010"
1653 # report "insn @" & to_hstring(i_out.nia) &
1654 # "=" & to_hstring(i_in.insn) &
1655 # " expected 00000010"
1656 # severity failure;
1657 #
1658 # -- test something that aliases
1659 # i_out.req <= '1';
1660 # i_out.nia <= x"0000000000000100";
1661 # wait until rising_edge(clk);
1662 # wait until rising_edge(clk);
1663 # assert i_in.valid = '0' severity failure;
1664 # wait until rising_edge(clk);
1665 #
1666 # wait for 30*clk_period;
1667 # wait until rising_edge(clk);
1668 #
1669 # assert i_in.valid = '1' severity failure;
1670 # assert i_in.insn = x"00000040"
1671 # report "insn @" & to_hstring(i_out.nia) &
1672 # "=" & to_hstring(i_in.insn) &
1673 # " expected 00000040"
1674 # severity failure;
1675 #
1676 # i_out.req <= '0';
1677 #
1678 # std.env.finish;
1679 # end process;
1680 # end;
1681 def icache_sim(dut):
1682 i_out = dut.i_in
1683 i_in = dut.i_out
1684 m_out = dut.m_in
1685
1686 yield i_in.valid.eq(0)
1687 yield i_out.priv_mode.eq(1)
1688 yield i_out.req.eq(0)
1689 yield i_out.nia.eq(0)
1690 yield i_out.stop_mark.eq(0)
1691 yield m_out.tlbld.eq(0)
1692 yield m_out.tlbie.eq(0)
1693 yield m_out.addr.eq(0)
1694 yield m_out.pte.eq(0)
1695 yield
1696 yield
1697 yield
1698 yield
1699 yield i_out.req.eq(1)
1700 yield i_out.nia.eq(Const(0x0000000000000004, 64))
1701 for i in range(30):
1702 yield
1703 yield
1704 valid = yield i_in.valid
1705 nia = yield i_out.nia
1706 insn = yield i_in.insn
1707 print(f"valid? {valid}")
1708 assert valid
1709 assert insn == 0x00000001, \
1710 "insn @%x=%x expected 00000001" % (nia, insn)
1711 yield i_out.req.eq(0)
1712 yield
1713
1714 # hit
1715 yield i_out.req.eq(1)
1716 yield i_out.nia.eq(Const(0x0000000000000008, 64))
1717 yield
1718 yield
1719 valid = yield i_in.valid
1720 nia = yield i_in.nia
1721 insn = yield i_in.insn
1722 assert valid
1723 assert insn == 0x00000002, \
1724 "insn @%x=%x expected 00000002" % (nia, insn)
1725 yield
1726
1727 # another miss
1728 yield i_out.req.eq(1)
1729 yield i_out.nia.eq(Const(0x0000000000000040, 64))
1730 for i in range(30):
1731 yield
1732 yield
1733 valid = yield i_in.valid
1734 nia = yield i_out.nia
1735 insn = yield i_in.insn
1736 assert valid
1737 assert insn == 0x00000010, \
1738 "insn @%x=%x expected 00000010" % (nia, insn)
1739
1740 # test something that aliases
1741 yield i_out.req.eq(1)
1742 yield i_out.nia.eq(Const(0x0000000000000100, 64))
1743 yield
1744 yield
1745 valid = yield i_in.valid
1746 assert ~valid
1747 for i in range(30):
1748 yield
1749 yield
1750 insn = yield i_in.insn
1751 valid = yield i_in.valid
1752 insn = yield i_in.insn
1753 assert valid
1754 assert insn == 0x00000040, \
1755 "insn @%x=%x expected 00000040" % (nia, insn)
1756 yield i_out.req.eq(0)
1757
1758
1759
1760 def test_icache(mem):
1761 dut = ICache()
1762
1763 memory = Memory(width=64, depth=16*64, init=mem)
1764 sram = SRAM(memory=memory, granularity=8)
1765
1766 m = Module()
1767
1768 m.submodules.icache = dut
1769 m.submodules.sram = sram
1770
1771 m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1772 m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1773 m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1774 m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1775 m.d.comb += sram.bus.adr.eq(dut.wb_out.adr)
1776 m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1777
1778 m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1779 m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1780
1781 # nmigen Simulation
1782 sim = Simulator(m)
1783 sim.add_clock(1e-6)
1784
1785 sim.add_sync_process(wrap(icache_sim(dut)))
1786 with sim.write_vcd('test_icache.vcd'):
1787 sim.run()
1788
1789 if __name__ == '__main__':
1790 dut = ICache()
1791 vl = rtlil.convert(dut, ports=[])
1792 with open("test_icache.il", "w") as f:
1793 f.write(vl)
1794
1795 mem = []
1796 for i in range(512):
1797 mem.append((i*2)| ((i*2+1)<<32))
1798
1799 test_icache(mem)
1800