icache.py move icache_miss WAIT_ACK FSM state into method icache_miss_wait_ack()...
[soc.git] / src / soc / experiment / icache.py
1 """ICache
2
3 based on Anton Blanchard microwatt icache.vhdl
4
5 Set associative icache
6
7 TODO (in no specific order):
8 * Add debug interface to inspect cache content
9 * Add snoop/invalidate path
10 * Add multi-hit error detection
11 * Pipelined bus interface (wb or axi)
12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
13 * Add optimization: service hits on partially loaded lines
14 * Add optimization: (maybe) interrupt reload on fluch/redirect
15 * Check if playing with the geometry of the cache tags allow for more
16 efficient use of distributed RAM and less logic/muxes. Currently we
17 write TAG_BITS width which may not match full ram blocks and might
18 cause muxes to be inferred for "partial writes".
19 * Check if making the read size of PLRU a ROM helps utilization
20
21 """
22 from enum import Enum, unique
23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const, Repl)
24 from nmigen.cli import main, rtlil
25 from nmutil.iocontrol import RecordObject
26 from nmigen.utils import log2_int
27 from nmutil.util import Display
28
29 #from nmutil.plru import PLRU
30 from soc.experiment.cache_ram import CacheRam
31 from soc.experiment.plru import PLRU
32
33 from soc.experiment.mem_types import (Fetch1ToICacheType,
34 ICacheToDecode1Type,
35 MMUToICacheType)
36
37 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
38 WB_SEL_BITS, WBAddrType, WBDataType,
39 WBSelType, WBMasterOut, WBSlaveOut,
40 WBMasterOutVector, WBSlaveOutVector,
41 WBIOMasterOut, WBIOSlaveOut)
42
43 # for test
44 from nmigen_soc.wishbone.sram import SRAM
45 from nmigen import Memory
46 from nmutil.util import wrap
47 from nmigen.cli import main, rtlil
48 if True:
49 from nmigen.back.pysim import Simulator, Delay, Settle
50 else:
51 from nmigen.sim.cxxsim import Simulator, Delay, Settle
52
53
54 SIM = 0
55 LINE_SIZE = 64
56 # BRAM organisation: We never access more than wishbone_data_bits
57 # at a time so to save resources we make the array only that wide,
58 # and use consecutive indices for to make a cache "line"
59 #
60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
61 ROW_SIZE = WB_DATA_BITS // 8
62 # Number of lines in a set
63 NUM_LINES = 16
64 # Number of ways
65 NUM_WAYS = 4
66 # L1 ITLB number of entries (direct mapped)
67 TLB_SIZE = 64
68 # L1 ITLB log_2(page_size)
69 TLB_LG_PGSZ = 12
70 # Number of real address bits that we store
71 REAL_ADDR_BITS = 56
72 # Non-zero to enable log data collection
73 LOG_LENGTH = 0
74
75 ROW_SIZE_BITS = ROW_SIZE * 8
76 # ROW_PER_LINE is the number of row
77 # (wishbone) transactions in a line
78 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
79 # BRAM_ROWS is the number of rows in
80 # BRAM needed to represent the full icache
81 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
82 # INSN_PER_ROW is the number of 32bit
83 # instructions per BRAM row
84 INSN_PER_ROW = ROW_SIZE_BITS // 32
85
86 print("ROW_SIZE", ROW_SIZE)
87 print("ROW_SIZE_BITS", ROW_SIZE_BITS)
88 print("ROW_PER_LINE", ROW_PER_LINE)
89 print("BRAM_ROWS", BRAM_ROWS)
90 print("INSN_PER_ROW", INSN_PER_ROW)
91
92 # Bit fields counts in the address
93 #
94 # INSN_BITS is the number of bits to
95 # select an instruction in a row
96 INSN_BITS = log2_int(INSN_PER_ROW)
97 # ROW_BITS is the number of bits to
98 # select a row
99 ROW_BITS = log2_int(BRAM_ROWS)
100 # ROW_LINEBITS is the number of bits to
101 # select a row within a line
102 ROW_LINEBITS = log2_int(ROW_PER_LINE)
103 # LINE_OFF_BITS is the number of bits for
104 # the offset in a cache line
105 LINE_OFF_BITS = log2_int(LINE_SIZE)
106 # ROW_OFF_BITS is the number of bits for
107 # the offset in a row
108 ROW_OFF_BITS = log2_int(ROW_SIZE)
109 # INDEX_BITS is the number of bits to
110 # select a cache line
111 INDEX_BITS = log2_int(NUM_LINES)
112 # SET_SIZE_BITS is the log base 2 of
113 # the set size
114 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
115 # TAG_BITS is the number of bits of
116 # the tag part of the address
117 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
118 # TAG_WIDTH is the width in bits of each way of the tag RAM
119 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
120
121 # WAY_BITS is the number of bits to
122 # select a way
123 WAY_BITS = log2_int(NUM_WAYS)
124 TAG_RAM_WIDTH = TAG_BITS * NUM_WAYS
125
126 # -- L1 ITLB.
127 # constant TLB_BITS : natural := log2(TLB_SIZE);
128 # constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
129 # constant TLB_PTE_BITS : natural := 64;
130 TLB_BITS = log2_int(TLB_SIZE)
131 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
132 TLB_PTE_BITS = 64
133
134
135 print("INSN_BITS", INSN_BITS)
136 print("ROW_BITS", ROW_BITS)
137 print("ROW_LINEBITS", ROW_LINEBITS)
138 print("LINE_OFF_BITS", LINE_OFF_BITS)
139 print("ROW_OFF_BITS", ROW_OFF_BITS)
140 print("INDEX_BITS", INDEX_BITS)
141 print("SET_SIZE_BITS", SET_SIZE_BITS)
142 print("TAG_BITS", TAG_BITS)
143 print("WAY_BITS", WAY_BITS)
144 print("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
145 print("TLB_BITS", TLB_BITS)
146 print("TLB_EA_TAG_BITS", TLB_EA_TAG_BITS)
147 print("TLB_PTE_BITS", TLB_PTE_BITS)
148
149
150
151
152 # architecture rtl of icache is
153 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
154 #-- ROW_PER_LINE is the number of row (wishbone
155 #-- transactions) in a line
156 #constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
157 #-- BRAM_ROWS is the number of rows in BRAM
158 #-- needed to represent the full
159 #-- icache
160 #constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
161 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
162 #constant INSN_PER_ROW : natural := ROW_SIZE_BITS / 32;
163 #-- Bit fields counts in the address
164 #
165 #-- INSN_BITS is the number of bits to select
166 #-- an instruction in a row
167 #constant INSN_BITS : natural := log2(INSN_PER_ROW);
168 #-- ROW_BITS is the number of bits to select a row
169 #constant ROW_BITS : natural := log2(BRAM_ROWS);
170 #-- ROW_LINEBITS is the number of bits to
171 #-- select a row within a line
172 #constant ROW_LINEBITS : natural := log2(ROW_PER_LINE);
173 #-- LINE_OFF_BITS is the number of bits for the offset
174 #-- in a cache line
175 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
176 #-- ROW_OFF_BITS is the number of bits for the offset in a row
177 #constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
178 #-- INDEX_BITS is the number of bits to select a cache line
179 #constant INDEX_BITS : natural := log2(NUM_LINES);
180 #-- SET_SIZE_BITS is the log base 2 of the set size
181 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
182 #-- TAG_BITS is the number of bits of the tag part of the address
183 #constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
184 #-- WAY_BITS is the number of bits to select a way
185 #constant WAY_BITS : natural := log2(NUM_WAYS);
186
187 #-- Example of layout for 32 lines of 64 bytes:
188 #--
189 #-- .. tag |index| line |
190 #-- .. | row | |
191 #-- .. | | | |00| zero (2)
192 #-- .. | | |-| | INSN_BITS (1)
193 #-- .. | |---| | ROW_LINEBITS (3)
194 #-- .. | |--- - --| LINE_OFF_BITS (6)
195 #-- .. | |- --| ROW_OFF_BITS (3)
196 #-- .. |----- ---| | ROW_BITS (8)
197 #-- .. |-----| | INDEX_BITS (5)
198 #-- .. --------| | TAG_BITS (53)
199 # Example of layout for 32 lines of 64 bytes:
200 #
201 # .. tag |index| line |
202 # .. | row | |
203 # .. | | | |00| zero (2)
204 # .. | | |-| | INSN_BITS (1)
205 # .. | |---| | ROW_LINEBITS (3)
206 # .. | |--- - --| LINE_OFF_BITS (6)
207 # .. | |- --| ROW_OFF_BITS (3)
208 # .. |----- ---| | ROW_BITS (8)
209 # .. |-----| | INDEX_BITS (5)
210 # .. --------| | TAG_BITS (53)
211
212 #subtype row_t is integer range 0 to BRAM_ROWS-1;
213 #subtype index_t is integer range 0 to NUM_LINES-1;
214 #subtype way_t is integer range 0 to NUM_WAYS-1;
215 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
216 #
217 #-- The cache data BRAM organized as described above for each way
218 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
219 #
220 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
221 #-- not handle a clean (commented) definition of the cache tags as a 3d
222 #-- memory. For now, work around it by putting all the tags
223 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
224 # type cache_tags_set_t is array(way_t) of cache_tag_t;
225 # type cache_tags_array_t is array(index_t) of cache_tags_set_t;
226 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
227 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
228 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
229 def CacheTagArray():
230 return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
231 for x in range(NUM_LINES))
232
233 #-- The cache valid bits
234 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
235 #type cache_valids_t is array(index_t) of cache_way_valids_t;
236 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
237 def CacheValidBitsArray():
238 return Array(Signal(NUM_WAYS, name="cachevalid_%d" %x) \
239 for x in range(NUM_LINES))
240
241 def RowPerLineValidArray():
242 return Array(Signal(name="rows_valid_%d" %x) \
243 for x in range(ROW_PER_LINE))
244
245
246 #attribute ram_style : string;
247 #attribute ram_style of cache_tags : signal is "distributed";
248 # TODO to be passed to nigmen as ram attributes
249 # attribute ram_style : string;
250 # attribute ram_style of cache_tags : signal is "distributed";
251
252
253 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
254 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
255 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
256 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
257 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
258 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
259 def TLBValidBitsArray():
260 return Array(Signal(name="tlbvalid_%d" %x) \
261 for x in range(TLB_SIZE))
262
263 def TLBTagArray():
264 return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
265 for x in range(TLB_SIZE))
266
267 def TLBPtesArray():
268 return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
269 for x in range(TLB_SIZE))
270
271
272 #-- Cache RAM interface
273 #type cache_ram_out_t is array(way_t) of cache_row_t;
274 # Cache RAM interface
275 def CacheRamOut():
276 return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
277 for x in range(NUM_WAYS))
278
279 #-- PLRU output interface
280 #type plru_out_t is array(index_t) of
281 # std_ulogic_vector(WAY_BITS-1 downto 0);
282 # PLRU output interface
283 def PLRUOut():
284 return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
285 for x in range(NUM_LINES))
286
287 # -- Return the cache line index (tag index) for an address
288 # function get_index(addr: std_ulogic_vector(63 downto 0))
289 # return index_t is
290 # begin
291 # return to_integer(unsigned(
292 # addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
293 # ));
294 # end;
295 # Return the cache line index (tag index) for an address
296 def get_index(addr):
297 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
298
299 # -- Return the cache row index (data memory) for an address
300 # function get_row(addr: std_ulogic_vector(63 downto 0))
301 # return row_t is
302 # begin
303 # return to_integer(unsigned(
304 # addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
305 # ));
306 # end;
307 # Return the cache row index (data memory) for an address
308 def get_row(addr):
309 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
310
311 # -- Return the index of a row within a line
312 # function get_row_of_line(row: row_t) return row_in_line_t is
313 # variable row_v : unsigned(ROW_BITS-1 downto 0);
314 # begin
315 # row_v := to_unsigned(row, ROW_BITS);
316 # return row_v(ROW_LINEBITS-1 downto 0);
317 # end;
318 # Return the index of a row within a line
319 def get_row_of_line(row):
320 return row[:ROW_LINEBITS]
321
322 # -- Returns whether this is the last row of a line
323 # function is_last_row_addr(addr: wishbone_addr_type;
324 # last: row_in_line_t
325 # )
326 # return boolean is
327 # begin
328 # return unsigned(
329 # addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
330 # ) = last;
331 # end;
332 # Returns whether this is the last row of a line
333 def is_last_row_addr(addr, last):
334 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
335
336 # -- Returns whether this is the last row of a line
337 # function is_last_row(row: row_t;
338 # last: row_in_line_t) return boolean is
339 # begin
340 # return get_row_of_line(row) = last;
341 # end;
342 # Returns whether this is the last row of a line
343 def is_last_row(row, last):
344 return get_row_of_line(row) == last
345
346 # -- Return the next row in the current cache line. We use a dedicated
347 # -- function in order to limit the size of the generated adder to be
348 # -- only the bits within a cache line (3 bits with default settings)
349 # function next_row(row: row_t) return row_t is
350 # variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
351 # variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
352 # variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
353 # begin
354 # row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
355 # row_idx := row_v(ROW_LINEBITS-1 downto 0);
356 # row_v(ROW_LINEBITS-1 downto 0) :=
357 # std_ulogic_vector(unsigned(row_idx) + 1);
358 # return to_integer(unsigned(row_v));
359 # end;
360 # Return the next row in the current cache line. We use a dedicated
361 # function in order to limit the size of the generated adder to be
362 # only the bits within a cache line (3 bits with default settings)
363 def next_row(row):
364 row_v = row[0:ROW_LINEBITS] + 1
365 return Cat(row_v[:ROW_LINEBITS], row[ROW_LINEBITS:])
366 # -- Read the instruction word for the given address in the
367 # -- current cache row
368 # function read_insn_word(addr: std_ulogic_vector(63 downto 0);
369 # data: cache_row_t) return std_ulogic_vector is
370 # variable word: integer range 0 to INSN_PER_ROW-1;
371 # begin
372 # word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
373 # return data(31+word*32 downto word*32);
374 # end;
375 # Read the instruction word for the given address
376 # in the current cache row
377 def read_insn_word(addr, data):
378 word = addr[2:INSN_BITS+2]
379 return data.word_select(word, 32)
380
381 # -- Get the tag value from the address
382 # function get_tag(
383 # addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
384 # )
385 # return cache_tag_t is
386 # begin
387 # return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
388 # end;
389 # Get the tag value from the address
390 def get_tag(addr):
391 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
392
393 # -- Read a tag from a tag memory row
394 # function read_tag(way: way_t; tagset: cache_tags_set_t)
395 # return cache_tag_t is
396 # begin
397 # return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
398 # end;
399 # Read a tag from a tag memory row
400 def read_tag(way, tagset):
401 return tagset.word_select(way, TAG_BITS)
402
403 # -- Write a tag to tag memory row
404 # procedure write_tag(way: in way_t;
405 # tagset: inout cache_tags_set_t; tag: cache_tag_t) is
406 # begin
407 # tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
408 # end;
409 # Write a tag to tag memory row
410 def write_tag(way, tagset, tag):
411 return read_tag(way, tagset).eq(tag)
412
413 # -- Simple hash for direct-mapped TLB index
414 # function hash_ea(addr: std_ulogic_vector(63 downto 0))
415 # return tlb_index_t is
416 # variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
417 # begin
418 # hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
419 # xor addr(
420 # TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
421 # TLB_LG_PGSZ + TLB_BITS
422 # )
423 # xor addr(
424 # TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
425 # TLB_LG_PGSZ + 2 * TLB_BITS
426 # );
427 # return to_integer(unsigned(hash));
428 # end;
429 # Simple hash for direct-mapped TLB index
430 def hash_ea(addr):
431 hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
432 TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
433 ] ^ addr[
434 TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
435 ]
436 return hsh
437
438 # begin
439 #
440 # XXX put these assert statements in - as python asserts
441 #
442 # assert LINE_SIZE mod ROW_SIZE = 0;
443 # assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
444 # assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
445 # assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
446 # assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
447 # assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
448 # report "geometry bits don't add up"
449 # assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
450 # report "geometry bits don't add up"
451 # assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
452 # report "geometry bits don't add up"
453 # assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
454 # report "geometry bits don't add up"
455 #
456 # sim_debug: if SIM generate
457 # debug: process
458 # begin
459 # report "ROW_SIZE = " & natural'image(ROW_SIZE);
460 # report "ROW_PER_LINE = " & natural'image(ROW_PER_LINE);
461 # report "BRAM_ROWS = " & natural'image(BRAM_ROWS);
462 # report "INSN_PER_ROW = " & natural'image(INSN_PER_ROW);
463 # report "INSN_BITS = " & natural'image(INSN_BITS);
464 # report "ROW_BITS = " & natural'image(ROW_BITS);
465 # report "ROW_LINEBITS = " & natural'image(ROW_LINEBITS);
466 # report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
467 # report "ROW_OFF_BITS = " & natural'image(ROW_OFF_BITS);
468 # report "INDEX_BITS = " & natural'image(INDEX_BITS);
469 # report "TAG_BITS = " & natural'image(TAG_BITS);
470 # report "WAY_BITS = " & natural'image(WAY_BITS);
471 # wait;
472 # end process;
473 # end generate;
474
475 # Cache reload state machine
476 @unique
477 class State(Enum):
478 IDLE = 0
479 CLR_TAG = 1
480 WAIT_ACK = 2
481
482
483 class RegInternal(RecordObject):
484 def __init__(self):
485 super().__init__()
486 # Cache hit state (Latches for 1 cycle BRAM access)
487 self.hit_way = Signal(NUM_WAYS)
488 self.hit_nia = Signal(64)
489 self.hit_smark = Signal()
490 self.hit_valid = Signal()
491
492 # Cache miss state (reload state machine)
493 self.state = Signal(State, reset=State.IDLE)
494 self.wb = WBMasterOut("wb")
495 self.req_adr = Signal(64)
496 self.store_way = Signal(NUM_WAYS)
497 self.store_index = Signal(NUM_LINES)
498 self.store_row = Signal(BRAM_ROWS)
499 self.store_tag = Signal(TAG_BITS)
500 self.store_valid = Signal()
501 self.end_row_ix = Signal(ROW_LINEBITS)
502 self.rows_valid = RowPerLineValidArray()
503
504 # TLB miss state
505 self.fetch_failed = Signal()
506
507 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
508 #
509 # entity icache is
510 # generic (
511 # SIM : boolean := false;
512 # -- Line size in bytes
513 # LINE_SIZE : positive := 64;
514 # -- BRAM organisation: We never access more
515 # -- than wishbone_data_bits
516 # -- at a time so to save resources we make the
517 # -- array only that wide,
518 # -- and use consecutive indices for to make a cache "line"
519 # --
520 # -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
521 # -- so 64-bits)
522 # ROW_SIZE : positive := wishbone_data_bits / 8;
523 # -- Number of lines in a set
524 # NUM_LINES : positive := 32;
525 # -- Number of ways
526 # NUM_WAYS : positive := 4;
527 # -- L1 ITLB number of entries (direct mapped)
528 # TLB_SIZE : positive := 64;
529 # -- L1 ITLB log_2(page_size)
530 # TLB_LG_PGSZ : positive := 12;
531 # -- Number of real address bits that we store
532 # REAL_ADDR_BITS : positive := 56;
533 # -- Non-zero to enable log data collection
534 # LOG_LENGTH : natural := 0
535 # );
536 # port (
537 # clk : in std_ulogic;
538 # rst : in std_ulogic;
539 #
540 # i_in : in Fetch1ToIcacheType;
541 # i_out : out IcacheToDecode1Type;
542 #
543 # m_in : in MmuToIcacheType;
544 #
545 # stall_in : in std_ulogic;
546 # stall_out : out std_ulogic;
547 # flush_in : in std_ulogic;
548 # inval_in : in std_ulogic;
549 #
550 # wishbone_out : out wishbone_master_out;
551 # wishbone_in : in wishbone_slave_out;
552 #
553 # log_out : out std_ulogic_vector(53 downto 0)
554 # );
555 # end entity icache;
556 # 64 bit direct mapped icache. All instructions are 4B aligned.
557 class ICache(Elaboratable):
558 """64 bit direct mapped icache. All instructions are 4B aligned."""
559 def __init__(self):
560 self.i_in = Fetch1ToICacheType(name="i_in")
561 self.i_out = ICacheToDecode1Type(name="i_out")
562
563 self.m_in = MMUToICacheType(name="m_in")
564
565 self.stall_in = Signal()
566 self.stall_out = Signal()
567 self.flush_in = Signal()
568 self.inval_in = Signal()
569
570 self.wb_out = WBMasterOut(name="wb_out")
571 self.wb_in = WBSlaveOut(name="wb_in")
572
573 self.log_out = Signal(54)
574
575
576 # Generate a cache RAM for each way
577 def rams(self, m, r, cache_out_row, use_previous, replace_way, req_row):
578 comb = m.d.comb
579 sync = m.d.sync
580
581 wb_in, stall_in = self.wb_in, self.stall_in
582
583 for i in range(NUM_WAYS):
584 do_read = Signal(name="do_rd_%d" % i)
585 do_write = Signal(name="do_wr_%d" % i)
586 rd_addr = Signal(ROW_BITS)
587 wr_addr = Signal(ROW_BITS)
588 d_out = Signal(ROW_SIZE_BITS, name="d_out_%d" % i)
589 wr_sel = Signal(ROW_SIZE)
590
591 way = CacheRam(ROW_BITS, ROW_SIZE_BITS, True)
592 setattr(m.submodules, "cacheram_%d" % i, way)
593
594 comb += way.rd_en.eq(do_read)
595 comb += way.rd_addr.eq(rd_addr)
596 comb += d_out.eq(way.rd_data_o)
597 comb += way.wr_sel.eq(wr_sel)
598 comb += way.wr_addr.eq(wr_addr)
599 comb += way.wr_data.eq(wb_in.dat)
600
601 comb += do_read.eq(~(stall_in | use_previous))
602 comb += do_write.eq(wb_in.ack & (replace_way == i))
603
604 with m.If(do_write):
605 sync += Display("cache write adr: %x data: %lx",
606 wr_addr, way.wr_data)
607
608 with m.If(r.hit_way == i):
609 comb += cache_out_row.eq(d_out)
610 with m.If(do_read):
611 sync += Display("cache read adr: %x data: %x",
612 req_row, d_out)
613
614 comb += rd_addr.eq(req_row)
615 comb += wr_addr.eq(r.store_row)
616 comb += wr_sel.eq(Repl(do_write, ROW_SIZE))
617
618 # -- Generate PLRUs
619 def maybe_plrus(self, m, r, plru_victim):
620 comb = m.d.comb
621
622 with m.If(NUM_WAYS > 1):
623 for i in range(NUM_LINES):
624 plru_acc_i = Signal(WAY_BITS)
625 plru_acc_en = Signal()
626 plru = PLRU(WAY_BITS)
627 setattr(m.submodules, "plru_%d" % i, plru)
628
629 comb += plru.acc_i.eq(plru_acc_i)
630 comb += plru.acc_en.eq(plru_acc_en)
631
632 # PLRU interface
633 with m.If(get_index(r.hit_nia) == i):
634 comb += plru.acc_en.eq(r.hit_valid)
635
636 comb += plru.acc_i.eq(r.hit_way)
637 comb += plru_victim[i].eq(plru.lru_o)
638
639 # TLB hit detection and real address generation
640 def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
641 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
642 priv_fault, access_ok):
643 comb = m.d.comb
644
645 i_in = self.i_in
646
647 pte = Signal(TLB_PTE_BITS)
648 ttag = Signal(TLB_EA_TAG_BITS)
649
650 comb += tlb_req_index.eq(hash_ea(i_in.nia))
651 comb += pte.eq(itlb_ptes[tlb_req_index])
652 comb += ttag.eq(itlb_tags[tlb_req_index])
653
654 with m.If(i_in.virt_mode):
655 comb += real_addr.eq(Cat(
656 i_in.nia[:TLB_LG_PGSZ],
657 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
658 ))
659
660 with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
661 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
662
663 comb += eaa_priv.eq(pte[3])
664
665 with m.Else():
666 comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
667 comb += ra_valid.eq(1)
668 comb += eaa_priv.eq(1)
669
670 # No IAMR, so no KUEP support for now
671 comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
672 comb += access_ok.eq(ra_valid & ~priv_fault)
673
674 # iTLB update
675 def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
676 comb = m.d.comb
677 sync = m.d.sync
678
679 m_in = self.m_in
680
681 wr_index = Signal(TLB_SIZE)
682 comb += wr_index.eq(hash_ea(m_in.addr))
683
684 with m.If(m_in.tlbie & m_in.doall):
685 # Clear all valid bits
686 for i in range(TLB_SIZE):
687 sync += itlb_valid_bits[i].eq(0)
688
689 with m.Elif(m_in.tlbie):
690 # Clear entry regardless of hit or miss
691 sync += itlb_valid_bits[wr_index].eq(0)
692
693 with m.Elif(m_in.tlbld):
694 sync += itlb_tags[wr_index].eq(
695 m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
696 )
697 sync += itlb_ptes[wr_index].eq(m_in.pte)
698 sync += itlb_valid_bits[wr_index].eq(1)
699
700 # Cache hit detection, output to fetch2 and other misc logic
701 def icache_comb(self, m, use_previous, r, req_index, req_row, req_hit_way,
702 req_tag, real_addr, req_laddr, cache_valid_bits,
703 cache_tags, access_ok, req_is_hit,
704 req_is_miss, replace_way, plru_victim, cache_out_row):
705 comb = m.d.comb
706
707 #comb += Display("ENTER icache_comb - use_previous:%x req_index:%x "
708 # "req_row:%x req_tag:%x real_addr:%x req_laddr:%x "
709 # "access_ok:%x req_is_hit:%x req_is_miss:%x "
710 # "replace_way:%x", use_previous, req_index, req_row,
711 # req_tag, real_addr, req_laddr, access_ok,
712 # req_is_hit, req_is_miss, replace_way)
713
714 i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
715 flush_in, stall_out = self.flush_in, self.stall_out
716
717 is_hit = Signal()
718 hit_way = Signal(NUM_WAYS)
719
720 # i_in.sequential means that i_in.nia this cycle is 4 more than
721 # last cycle. If we read more than 32 bits at a time, had a
722 # cache hit last cycle, and we don't want the first 32-bit chunk
723 # then we can keep the data we read last cycle and just use that.
724 with m.If(i_in.nia[2:INSN_BITS+2] != 0):
725 comb += use_previous.eq(i_in.sequential & r.hit_valid)
726
727 # Extract line, row and tag from request
728 comb += req_index.eq(get_index(i_in.nia))
729 comb += req_row.eq(get_row(i_in.nia))
730 comb += req_tag.eq(get_tag(real_addr))
731
732 # Calculate address of beginning of cache row, will be
733 # used for cache miss processing if needed
734 comb += req_laddr.eq(Cat(
735 Const(0, ROW_OFF_BITS),
736 real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
737 ))
738
739 # Test if pending request is a hit on any way
740 hitcond = Signal()
741 comb += hitcond.eq((r.state == State.WAIT_ACK)
742 & (req_index == r.store_index)
743 & r.rows_valid[req_row % ROW_PER_LINE])
744 with m.If(i_in.req):
745 cvb = Signal(NUM_WAYS)
746 ctag = Signal(TAG_RAM_WIDTH)
747 comb += ctag.eq(cache_tags[req_index])
748 comb += cvb.eq(cache_valid_bits[req_index])
749 for i in range(NUM_WAYS):
750 tagi = Signal(TAG_BITS, name="ti%d" % i)
751 comb += tagi.eq(read_tag(i, ctag))
752 hit_test = Signal(name="hit_test%d" % i)
753 comb += hit_test.eq(i == r.store_way)
754 with m.If((cvb[i] | (hitcond & hit_test)) & (tagi == req_tag)):
755 comb += hit_way.eq(i)
756 comb += is_hit.eq(1)
757
758 # Generate the "hit" and "miss" signals
759 # for the synchronous blocks
760 with m.If(i_in.req & access_ok & ~flush_in):
761 comb += req_is_hit.eq(is_hit)
762 comb += req_is_miss.eq(~is_hit)
763
764 with m.Else():
765 comb += req_is_hit.eq(0)
766 comb += req_is_miss.eq(0)
767
768 comb += req_hit_way.eq(hit_way)
769
770 # The way to replace on a miss
771 with m.If(r.state == State.CLR_TAG):
772 comb += replace_way.eq(plru_victim[r.store_index])
773 with m.Else():
774 comb += replace_way.eq(r.store_way)
775
776 # Output instruction from current cache row
777 #
778 # Note: This is a mild violation of our design principle of
779 # having pipeline stages output from a clean latch. In this
780 # case we output the result of a mux. The alternative would
781 # be output an entire row which I prefer not to do just yet
782 # as it would force fetch2 to know about some of the cache
783 # geometry information.
784 #comb += Display("BEFORE read_insn_word - r.hit_nia:%x " \
785 # "r.hit_way:%x, cache_out[r.hit_way]:%x", r.hit_nia, \
786 # r.hit_way, cache_out[r.hit_way])
787 comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out_row))
788 comb += i_out.valid.eq(r.hit_valid)
789 comb += i_out.nia.eq(r.hit_nia)
790 comb += i_out.stop_mark.eq(r.hit_smark)
791 comb += i_out.fetch_failed.eq(r.fetch_failed)
792
793 # Stall fetch1 if we have a miss on cache or TLB
794 # or a protection fault
795 comb += stall_out.eq(~(is_hit & access_ok))
796
797 # Wishbone requests output (from the cache miss reload machine)
798 comb += wb_out.eq(r.wb)
799
800 # Cache hit synchronous machine
801 def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
802 req_index, req_tag, real_addr):
803 sync = m.d.sync
804
805 i_in, stall_in = self.i_in, self.stall_in
806 flush_in = self.flush_in
807
808 # keep outputs to fetch2 unchanged on a stall
809 # except that flush or reset sets valid to 0
810 # If use_previous, keep the same data as last
811 # cycle and use the second half
812 with m.If(stall_in | use_previous):
813 with m.If(flush_in):
814 sync += r.hit_valid.eq(0)
815 with m.Else():
816 # On a hit, latch the request for the next cycle,
817 # when the BRAM data will be available on the
818 # cache_out output of the corresponding way
819 sync += r.hit_valid.eq(req_is_hit)
820
821 with m.If(req_is_hit):
822 sync += r.hit_way.eq(req_hit_way)
823 sync += Display("cache hit nia:%x IR:%x SM:%x idx:%x " \
824 "tag:%x way:%x RA:%x", i_in.nia, \
825 i_in.virt_mode, i_in.stop_mark, req_index, \
826 req_tag, req_hit_way, real_addr)
827
828
829
830 with m.If(~stall_in):
831 # Send stop marks and NIA down regardless of validity
832 sync += r.hit_smark.eq(i_in.stop_mark)
833 sync += r.hit_nia.eq(i_in.nia)
834
835 def icache_miss_idle(self, m, r, req_is_miss, req_laddr,
836 req_index, req_tag, replace_way, real_addr):
837 comb = m.d.comb
838 sync = m.d.sync
839
840 i_in = self.i_in
841
842 # Reset per-row valid flags,
843 # only used in WAIT_ACK
844 for i in range(ROW_PER_LINE):
845 sync += r.rows_valid[i].eq(0)
846
847 # We need to read a cache line
848 with m.If(req_is_miss):
849 sync += Display(
850 "cache miss nia:%x IR:%x SM:%x idx:%x "
851 " way:%x tag:%x RA:%x", i_in.nia,
852 i_in.virt_mode, i_in.stop_mark, req_index,
853 replace_way, req_tag, real_addr
854 )
855
856 # Keep track of our index and way
857 # for subsequent stores
858 st_row = Signal(BRAM_ROWS)
859 comb += st_row.eq(get_row(req_laddr))
860 sync += r.store_index.eq(req_index)
861 sync += r.store_row.eq(st_row)
862 sync += r.store_tag.eq(req_tag)
863 sync += r.store_valid.eq(1)
864 sync += r.end_row_ix.eq(get_row_of_line(st_row) - 1)
865
866 # Prep for first wishbone read. We calculate the
867 # address of the start of the cache line and
868 # start the WB cycle.
869 sync += r.req_adr.eq(req_laddr)
870 sync += r.wb.cyc.eq(1)
871 sync += r.wb.stb.eq(1)
872
873 # Track that we had one request sent
874 sync += r.state.eq(State.CLR_TAG)
875
876 def icache_miss_clr_tag(self, m, r, replace_way,
877 cache_valid_bits, req_index,
878 tagset, cache_tags):
879
880 comb = m.d.comb
881 sync = m.d.sync
882
883 # Get victim way from plru
884 sync += r.store_way.eq(replace_way)
885 # Force misses on that way while reloading that line
886 cv = Signal(INDEX_BITS)
887 comb += cv.eq(cache_valid_bits[req_index])
888 comb += cv.bit_select(replace_way, 1).eq(0)
889 sync += cache_valid_bits[req_index].eq(cv)
890
891 for i in range(NUM_WAYS):
892 with m.If(i == replace_way):
893 comb += tagset.eq(cache_tags[r.store_index])
894 comb += write_tag(i, tagset, r.store_tag)
895 sync += cache_tags[r.store_index].eq(tagset)
896
897 sync += r.state.eq(State.WAIT_ACK)
898
899 def icache_miss_wait_ack(self, m, r, replace_way, inval_in,
900 stbs_done, cache_valid_bits):
901 comb = m.d.comb
902 sync = m.d.sync
903
904 wb_in = self.wb_in
905
906 # Requests are all sent if stb is 0
907 stbs_zero = Signal()
908 comb += stbs_zero.eq(r.wb.stb == 0)
909 comb += stbs_done.eq(stbs_zero)
910
911 # If we are still sending requests, was one accepted?
912 with m.If(~wb_in.stall & ~stbs_zero):
913 # That was the last word ? # We are done sending.
914 # Clear stb and set stbs_done # so we can handle
915 # an eventual last ack on # the same cycle.
916 with m.If(is_last_row_addr(r.req_adr, r.end_row_ix)):
917 sync += Display("IS_LAST_ROW_ADDR " \
918 "r.wb.addr:%x r.end_row_ix:%x " \
919 "r.wb.stb:%x stbs_zero:%x " \
920 "stbs_done:%x", r.wb.adr, \
921 r.end_row_ix, r.wb.stb, \
922 stbs_zero, stbs_done)
923 sync += r.wb.stb.eq(0)
924 comb += stbs_done.eq(1)
925
926 # Calculate the next row address
927 rarange = Signal(LINE_OFF_BITS - ROW_OFF_BITS)
928 comb += rarange.eq(
929 r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
930 )
931 sync += r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS].eq(
932 rarange
933 )
934 sync += Display("RARANGE r.req_adr:%x rarange:%x "
935 "stbs_zero:%x stbs_done:%x",
936 r.req_adr, rarange, stbs_zero, stbs_done)
937
938 # Incoming acks processing
939 with m.If(wb_in.ack):
940 sync += Display("WB_IN_ACK data:%x stbs_zero:%x "
941 "stbs_done:%x",
942 wb_in.dat, stbs_zero, stbs_done)
943
944 sync += r.rows_valid[r.store_row % ROW_PER_LINE].eq(1)
945
946 # Check for completion
947 with m.If(stbs_done &
948 is_last_row(r.store_row, r.end_row_ix)):
949 # Complete wishbone cycle
950 sync += r.wb.cyc.eq(0)
951 sync += r.req_adr.eq(0) # be nice, clear addr
952
953 # Cache line is now valid
954 cv = Signal(INDEX_BITS)
955 comb += cv.eq(cache_valid_bits[r.store_index])
956 comb += cv.bit_select(replace_way, 1).eq(
957 r.store_valid & ~inval_in
958 )
959 sync += cache_valid_bits[r.store_index].eq(cv)
960
961 sync += r.state.eq(State.IDLE)
962
963 # not completed, move on to next request in row
964 with m.Else():
965 # Increment store row counter
966 sync += r.store_row.eq(next_row(r.store_row))
967
968
969 # Cache miss/reload synchronous machine
970 def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
971 req_index, req_laddr, req_tag, replace_way,
972 cache_tags, access_ok, real_addr):
973 comb = m.d.comb
974 sync = m.d.sync
975
976 i_in, wb_in, m_in = self.i_in, self.wb_in, self.m_in
977 stall_in, flush_in = self.stall_in, self.flush_in
978 inval_in = self.inval_in
979
980 # variable tagset : cache_tags_set_t;
981 # variable stbs_done : boolean;
982
983 tagset = Signal(TAG_RAM_WIDTH)
984 stbs_done = Signal()
985
986 comb += r.wb.sel.eq(-1)
987 comb += r.wb.adr.eq(r.req_adr[3:])
988
989 # Process cache invalidations
990 with m.If(inval_in):
991 for i in range(NUM_LINES):
992 sync += cache_valid_bits[i].eq(0)
993 sync += r.store_valid.eq(0)
994
995 # Main state machine
996 with m.Switch(r.state):
997
998 with m.Case(State.IDLE):
999 self.icache_miss_idle(
1000 m, r, req_is_miss, req_laddr,
1001 req_index, req_tag, replace_way,
1002 real_addr
1003 )
1004
1005 with m.Case(State.CLR_TAG, State.WAIT_ACK):
1006 with m.If(r.state == State.CLR_TAG):
1007 self.icache_miss_clr_tag(
1008 m, r, replace_way,
1009 cache_valid_bits, req_index,
1010 tagset, cache_tags
1011 )
1012
1013 self.icache_miss_wait_ack(
1014 m, r, replace_way, inval_in,
1015 stbs_done, cache_valid_bits
1016 )
1017
1018 # TLB miss and protection fault processing
1019 with m.If(flush_in | m_in.tlbld):
1020 sync += r.fetch_failed.eq(0)
1021 with m.Elif(i_in.req & ~access_ok & ~stall_in):
1022 sync += r.fetch_failed.eq(1)
1023
1024 # icache_log: if LOG_LENGTH > 0 generate
1025 def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1026 req_is_miss, req_is_hit, lway, wstate, r):
1027 comb = m.d.comb
1028 sync = m.d.sync
1029
1030 wb_in, i_out = self.wb_in, self.i_out
1031 log_out, stall_out = self.log_out, self.stall_out
1032
1033 # -- Output data to logger
1034 # signal log_data : std_ulogic_vector(53 downto 0);
1035 # begin
1036 # data_log: process(clk)
1037 # variable lway: way_t;
1038 # variable wstate: std_ulogic;
1039 # Output data to logger
1040 for i in range(LOG_LENGTH):
1041 # Output data to logger
1042 log_data = Signal(54)
1043 lway = Signal(NUM_WAYS)
1044 wstate = Signal()
1045
1046 # begin
1047 # if rising_edge(clk) then
1048 # lway := req_hit_way;
1049 # wstate := '0';
1050 sync += lway.eq(req_hit_way)
1051 sync += wstate.eq(0)
1052
1053 # if r.state /= IDLE then
1054 # wstate := '1';
1055 # end if;
1056 with m.If(r.state != State.IDLE):
1057 sync += wstate.eq(1)
1058
1059 # log_data <= i_out.valid &
1060 # i_out.insn &
1061 # wishbone_in.ack &
1062 # r.wb.adr(5 downto 3) &
1063 # r.wb.stb & r.wb.cyc &
1064 # wishbone_in.stall &
1065 # stall_out &
1066 # r.fetch_failed &
1067 # r.hit_nia(5 downto 2) &
1068 # wstate &
1069 # std_ulogic_vector(to_unsigned(lway, 3)) &
1070 # req_is_hit & req_is_miss &
1071 # access_ok &
1072 # ra_valid;
1073 sync += log_data.eq(Cat(
1074 ra_valid, access_ok, req_is_miss, req_is_hit,
1075 lway, wstate, r.hit_nia[2:6],
1076 r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1077 r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1078 i_out.valid
1079 ))
1080 # end if;
1081 # end process;
1082 # log_out <= log_data;
1083 comb += log_out.eq(log_data)
1084 # end generate;
1085 # end;
1086
1087 def elaborate(self, platform):
1088
1089 m = Module()
1090 comb = m.d.comb
1091
1092 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1093 cache_tags = CacheTagArray()
1094 cache_valid_bits = CacheValidBitsArray()
1095
1096 # signal itlb_valids : tlb_valids_t;
1097 # signal itlb_tags : tlb_tags_t;
1098 # signal itlb_ptes : tlb_ptes_t;
1099 # attribute ram_style of itlb_tags : signal is "distributed";
1100 # attribute ram_style of itlb_ptes : signal is "distributed";
1101 itlb_valid_bits = TLBValidBitsArray()
1102 itlb_tags = TLBTagArray()
1103 itlb_ptes = TLBPtesArray()
1104 # TODO to be passed to nmigen as ram attributes
1105 # attribute ram_style of itlb_tags : signal is "distributed";
1106 # attribute ram_style of itlb_ptes : signal is "distributed";
1107
1108 # -- Privilege bit from PTE EAA field
1109 # signal eaa_priv : std_ulogic;
1110 # Privilege bit from PTE EAA field
1111 eaa_priv = Signal()
1112
1113 # signal r : reg_internal_t;
1114 r = RegInternal()
1115
1116 # -- Async signals on incoming request
1117 # signal req_index : index_t;
1118 # signal req_row : row_t;
1119 # signal req_hit_way : way_t;
1120 # signal req_tag : cache_tag_t;
1121 # signal req_is_hit : std_ulogic;
1122 # signal req_is_miss : std_ulogic;
1123 # signal req_laddr : std_ulogic_vector(63 downto 0);
1124 # Async signal on incoming request
1125 req_index = Signal(NUM_LINES)
1126 req_row = Signal(BRAM_ROWS)
1127 req_hit_way = Signal(NUM_WAYS)
1128 req_tag = Signal(TAG_BITS)
1129 req_is_hit = Signal()
1130 req_is_miss = Signal()
1131 req_laddr = Signal(64)
1132
1133 # signal tlb_req_index : tlb_index_t;
1134 # signal real_addr : std_ulogic_vector(
1135 # REAL_ADDR_BITS - 1 downto 0
1136 # );
1137 # signal ra_valid : std_ulogic;
1138 # signal priv_fault : std_ulogic;
1139 # signal access_ok : std_ulogic;
1140 # signal use_previous : std_ulogic;
1141 tlb_req_index = Signal(TLB_SIZE)
1142 real_addr = Signal(REAL_ADDR_BITS)
1143 ra_valid = Signal()
1144 priv_fault = Signal()
1145 access_ok = Signal()
1146 use_previous = Signal()
1147
1148 # signal cache_out : cache_ram_out_t;
1149 cache_out_row = Signal(ROW_SIZE_BITS)
1150
1151 # signal plru_victim : plru_out_t;
1152 # signal replace_way : way_t;
1153 plru_victim = PLRUOut()
1154 replace_way = Signal(NUM_WAYS)
1155
1156 # call sub-functions putting everything together, using shared
1157 # signals established above
1158 self.rams(m, r, cache_out_row, use_previous, replace_way, req_row)
1159 self.maybe_plrus(m, r, plru_victim)
1160 self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1161 real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1162 priv_fault, access_ok)
1163 self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1164 self.icache_comb(m, use_previous, r, req_index, req_row, req_hit_way,
1165 req_tag, real_addr, req_laddr, cache_valid_bits,
1166 cache_tags, access_ok, req_is_hit, req_is_miss,
1167 replace_way, plru_victim, cache_out_row)
1168 self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1169 req_index, req_tag, real_addr)
1170 self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1171 req_laddr, req_tag, replace_way, cache_tags,
1172 access_ok, real_addr)
1173 #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1174 # req_is_miss, req_is_hit, lway, wstate, r)
1175
1176 return m
1177
1178
1179 # icache_tb.vhdl
1180 #
1181 # library ieee;
1182 # use ieee.std_logic_1164.all;
1183 #
1184 # library work;
1185 # use work.common.all;
1186 # use work.wishbone_types.all;
1187 #
1188 # entity icache_tb is
1189 # end icache_tb;
1190 #
1191 # architecture behave of icache_tb is
1192 # signal clk : std_ulogic;
1193 # signal rst : std_ulogic;
1194 #
1195 # signal i_out : Fetch1ToIcacheType;
1196 # signal i_in : IcacheToDecode1Type;
1197 #
1198 # signal m_out : MmuToIcacheType;
1199 #
1200 # signal wb_bram_in : wishbone_master_out;
1201 # signal wb_bram_out : wishbone_slave_out;
1202 #
1203 # constant clk_period : time := 10 ns;
1204 # begin
1205 # icache0: entity work.icache
1206 # generic map(
1207 # LINE_SIZE => 64,
1208 # NUM_LINES => 4
1209 # )
1210 # port map(
1211 # clk => clk,
1212 # rst => rst,
1213 # i_in => i_out,
1214 # i_out => i_in,
1215 # m_in => m_out,
1216 # stall_in => '0',
1217 # flush_in => '0',
1218 # inval_in => '0',
1219 # wishbone_out => wb_bram_in,
1220 # wishbone_in => wb_bram_out
1221 # );
1222 #
1223 # -- BRAM Memory slave
1224 # bram0: entity work.wishbone_bram_wrapper
1225 # generic map(
1226 # MEMORY_SIZE => 1024,
1227 # RAM_INIT_FILE => "icache_test.bin"
1228 # )
1229 # port map(
1230 # clk => clk,
1231 # rst => rst,
1232 # wishbone_in => wb_bram_in,
1233 # wishbone_out => wb_bram_out
1234 # );
1235 #
1236 # clk_process: process
1237 # begin
1238 # clk <= '0';
1239 # wait for clk_period/2;
1240 # clk <= '1';
1241 # wait for clk_period/2;
1242 # end process;
1243 #
1244 # rst_process: process
1245 # begin
1246 # rst <= '1';
1247 # wait for 2*clk_period;
1248 # rst <= '0';
1249 # wait;
1250 # end process;
1251 #
1252 # stim: process
1253 # begin
1254 # i_out.req <= '0';
1255 # i_out.nia <= (others => '0');
1256 # i_out.stop_mark <= '0';
1257 #
1258 # m_out.tlbld <= '0';
1259 # m_out.tlbie <= '0';
1260 # m_out.addr <= (others => '0');
1261 # m_out.pte <= (others => '0');
1262 #
1263 # wait until rising_edge(clk);
1264 # wait until rising_edge(clk);
1265 # wait until rising_edge(clk);
1266 # wait until rising_edge(clk);
1267 #
1268 # i_out.req <= '1';
1269 # i_out.nia <= x"0000000000000004";
1270 #
1271 # wait for 30*clk_period;
1272 # wait until rising_edge(clk);
1273 #
1274 # assert i_in.valid = '1' severity failure;
1275 # assert i_in.insn = x"00000001"
1276 # report "insn @" & to_hstring(i_out.nia) &
1277 # "=" & to_hstring(i_in.insn) &
1278 # " expected 00000001"
1279 # severity failure;
1280 #
1281 # i_out.req <= '0';
1282 #
1283 # wait until rising_edge(clk);
1284 #
1285 # -- hit
1286 # i_out.req <= '1';
1287 # i_out.nia <= x"0000000000000008";
1288 # wait until rising_edge(clk);
1289 # wait until rising_edge(clk);
1290 # assert i_in.valid = '1' severity failure;
1291 # assert i_in.insn = x"00000002"
1292 # report "insn @" & to_hstring(i_out.nia) &
1293 # "=" & to_hstring(i_in.insn) &
1294 # " expected 00000002"
1295 # severity failure;
1296 # wait until rising_edge(clk);
1297 #
1298 # -- another miss
1299 # i_out.req <= '1';
1300 # i_out.nia <= x"0000000000000040";
1301 #
1302 # wait for 30*clk_period;
1303 # wait until rising_edge(clk);
1304 #
1305 # assert i_in.valid = '1' severity failure;
1306 # assert i_in.insn = x"00000010"
1307 # report "insn @" & to_hstring(i_out.nia) &
1308 # "=" & to_hstring(i_in.insn) &
1309 # " expected 00000010"
1310 # severity failure;
1311 #
1312 # -- test something that aliases
1313 # i_out.req <= '1';
1314 # i_out.nia <= x"0000000000000100";
1315 # wait until rising_edge(clk);
1316 # wait until rising_edge(clk);
1317 # assert i_in.valid = '0' severity failure;
1318 # wait until rising_edge(clk);
1319 #
1320 # wait for 30*clk_period;
1321 # wait until rising_edge(clk);
1322 #
1323 # assert i_in.valid = '1' severity failure;
1324 # assert i_in.insn = x"00000040"
1325 # report "insn @" & to_hstring(i_out.nia) &
1326 # "=" & to_hstring(i_in.insn) &
1327 # " expected 00000040"
1328 # severity failure;
1329 #
1330 # i_out.req <= '0';
1331 #
1332 # std.env.finish;
1333 # end process;
1334 # end;
1335 def icache_sim(dut):
1336 i_out = dut.i_in
1337 i_in = dut.i_out
1338 m_out = dut.m_in
1339
1340 yield i_in.valid.eq(0)
1341 yield i_out.priv_mode.eq(1)
1342 yield i_out.req.eq(0)
1343 yield i_out.nia.eq(0)
1344 yield i_out.stop_mark.eq(0)
1345 yield m_out.tlbld.eq(0)
1346 yield m_out.tlbie.eq(0)
1347 yield m_out.addr.eq(0)
1348 yield m_out.pte.eq(0)
1349 yield
1350 yield
1351 yield
1352 yield
1353 yield i_out.req.eq(1)
1354 yield i_out.nia.eq(Const(0x0000000000000004, 64))
1355 for i in range(30):
1356 yield
1357 yield
1358 valid = yield i_in.valid
1359 nia = yield i_out.nia
1360 insn = yield i_in.insn
1361 print(f"valid? {valid}")
1362 assert valid
1363 assert insn == 0x00000001, \
1364 "insn @%x=%x expected 00000001" % (nia, insn)
1365 yield i_out.req.eq(0)
1366 yield
1367
1368 # hit
1369 yield
1370 yield
1371 yield i_out.req.eq(1)
1372 yield i_out.nia.eq(Const(0x0000000000000008, 64))
1373 yield
1374 yield
1375 valid = yield i_in.valid
1376 nia = yield i_in.nia
1377 insn = yield i_in.insn
1378 assert valid
1379 assert insn == 0x00000002, \
1380 "insn @%x=%x expected 00000002" % (nia, insn)
1381 yield
1382
1383 # another miss
1384 yield i_out.req.eq(1)
1385 yield i_out.nia.eq(Const(0x0000000000000040, 64))
1386 for i in range(30):
1387 yield
1388 yield
1389 valid = yield i_in.valid
1390 nia = yield i_out.nia
1391 insn = yield i_in.insn
1392 assert valid
1393 assert insn == 0x00000010, \
1394 "insn @%x=%x expected 00000010" % (nia, insn)
1395
1396 # test something that aliases
1397 yield i_out.req.eq(1)
1398 yield i_out.nia.eq(Const(0x0000000000000100, 64))
1399 yield
1400 yield
1401 valid = yield i_in.valid
1402 assert ~valid
1403 for i in range(30):
1404 yield
1405 yield
1406 insn = yield i_in.insn
1407 valid = yield i_in.valid
1408 insn = yield i_in.insn
1409 assert valid
1410 assert insn == 0x00000040, \
1411 "insn @%x=%x expected 00000040" % (nia, insn)
1412 yield i_out.req.eq(0)
1413
1414
1415
1416 def test_icache(mem):
1417 dut = ICache()
1418
1419 memory = Memory(width=64, depth=512, init=mem)
1420 sram = SRAM(memory=memory, granularity=8)
1421
1422 m = Module()
1423
1424 m.submodules.icache = dut
1425 m.submodules.sram = sram
1426
1427 m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1428 m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1429 m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1430 m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1431 m.d.comb += sram.bus.adr.eq(dut.wb_out.adr)
1432 m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1433
1434 m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1435 m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1436
1437 # nmigen Simulation
1438 sim = Simulator(m)
1439 sim.add_clock(1e-6)
1440
1441 sim.add_sync_process(wrap(icache_sim(dut)))
1442 with sim.write_vcd('test_icache.vcd'):
1443 sim.run()
1444
1445 if __name__ == '__main__':
1446 dut = ICache()
1447 vl = rtlil.convert(dut, ports=[])
1448 with open("test_icache.il", "w") as f:
1449 f.write(vl)
1450
1451 mem = []
1452 for i in range(512):
1453 mem.append((i*2)| ((i*2+1)<<32))
1454
1455 test_icache(mem)
1456