src/soc/experiment/icache.py

   1 """ICache
   2
   3 based on Anton Blanchard microwatt icache.vhdl
   4
   5 Set associative icache
   6
   7 TODO (in no specific order):
   8 * Add debug interface to inspect cache content
   9 * Add snoop/invalidate path
  10 * Add multi-hit error detection
  11 * Pipelined bus interface (wb or axi)
  12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
  13 * Add optimization: service hits on partially loaded lines
  14 * Add optimization: (maybe) interrupt reload on fluch/redirect
  15 * Check if playing with the geometry of the cache tags allow for more
  16   efficient use of distributed RAM and less logic/muxes. Currently we
  17   write TAG_BITS width which may not match full ram blocks and might
  18   cause muxes to be inferred for "partial writes".
  19 * Check if making the read size of PLRU a ROM helps utilization
  20
  21 """
  22 from enum import Enum, unique
  23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const)
  24 from nmigen.cli import main
  25 from nmigen.cli import rtlil
  26 from nmutil.iocontrol import RecordObject
  27 from nmutil.byterev import byte_reverse
  28 from nmutil.mask import Mask
  29 from nmigen.utils import log2_int
  30 from nmutil.util import Display
  31
  32 from soc.experiment.mem_types import (Fetch1ToICacheType,
  33                                       ICacheToDecode1Type,
  34                                       MMUToICacheType)
  35
  36 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
  37                                      WB_SEL_BITS, WBAddrType, WBDataType,
  38                                      WBSelType, WBMasterOut, WBSlaveOut,
  39                                      WBMasterOutVector, WBSlaveOutVector,
  40                                      WBIOMasterOut, WBIOSlaveOut)
  41
  42 from soc.experiment.cache_ram import CacheRam
  43 from soc.experiment.plru import PLRU
  44
  45 # for test
  46 from nmigen_soc.wishbone.sram import SRAM
  47 from nmigen import Memory
  48 from nmigen.cli import rtlil
  49 if True:
  50     from nmigen.back.pysim import Simulator, Delay, Settle
  51 else:
  52     from nmigen.sim.cxxsim import Simulator, Delay, Settle
  53 from nmutil.util import wrap
  54
  55
  56
  57 SIM            = 0
  58 LINE_SIZE      = 64
  59 # BRAM organisation: We never access more than wishbone_data_bits
  60 # at a time so to save resources we make the array only that wide,
  61 # and use consecutive indices for to make a cache "line"
  62 #
  63 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
  64 ROW_SIZE       = WB_DATA_BITS // 8
  65 # Number of lines in a set
  66 NUM_LINES      = 32
  67 # Number of ways
  68 NUM_WAYS       = 4
  69 # L1 ITLB number of entries (direct mapped)
  70 TLB_SIZE       = 64
  71 # L1 ITLB log_2(page_size)
  72 TLB_LG_PGSZ    = 12
  73 # Number of real address bits that we store
  74 REAL_ADDR_BITS = 56
  75 # Non-zero to enable log data collection
  76 LOG_LENGTH     = 0
  77
  78 ROW_SIZE_BITS  = ROW_SIZE * 8
  79 # ROW_PER_LINE is the number of row
  80 # (wishbone) transactions in a line
  81 ROW_PER_LINE   = LINE_SIZE // ROW_SIZE
  82 # BRAM_ROWS is the number of rows in
  83 # BRAM needed to represent the full icache
  84 BRAM_ROWS      = NUM_LINES * ROW_PER_LINE
  85 # INSN_PER_ROW is the number of 32bit
  86 # instructions per BRAM row
  87 INSN_PER_ROW   = ROW_SIZE_BITS // 32
  88
  89 # Bit fields counts in the address
  90 #
  91 # INSN_BITS is the number of bits to
  92 # select an instruction in a row
  93 INSN_BITS      = log2_int(INSN_PER_ROW)
  94 # ROW_BITS is the number of bits to
  95 # select a row
  96 ROW_BITS       = log2_int(BRAM_ROWS)
  97 # ROW_LINEBITS is the number of bits to
  98 # select a row within a line
  99 ROW_LINE_BITS   = log2_int(ROW_PER_LINE)
 100 # LINE_OFF_BITS is the number of bits for
 101 # the offset in a cache line
 102 LINE_OFF_BITS  = log2_int(LINE_SIZE)
 103 # ROW_OFF_BITS is the number of bits for
 104 # the offset in a row
 105 ROW_OFF_BITS   = log2_int(ROW_SIZE)
 106 # INDEX_BITS is the number of bits to
 107 # select a cache line
 108 INDEX_BITS     = log2_int(NUM_LINES)
 109 # SET_SIZE_BITS is the log base 2 of
 110 # the set size
 111 SET_SIZE_BITS  = LINE_OFF_BITS + INDEX_BITS
 112 # TAG_BITS is the number of bits of
 113 # the tag part of the address
 114 TAG_BITS       = REAL_ADDR_BITS - SET_SIZE_BITS
 115 # WAY_BITS is the number of bits to
 116 # select a way
 117 WAY_BITS       = log2_int(NUM_WAYS)
 118 TAG_RAM_WIDTH  = TAG_BITS * NUM_WAYS
 119
 120 #     -- L1 ITLB.
 121 #     constant TLB_BITS : natural := log2(TLB_SIZE);
 122 #     constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
 123 #     constant TLB_PTE_BITS : natural := 64;
 124 TLB_BITS        = log2_int(TLB_SIZE)
 125 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
 126 TLB_PTE_BITS    = 64
 127
 128 # architecture rtl of icache is
 129 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
 130 #-- ROW_PER_LINE is the number of row (wishbone
 131 #-- transactions) in a line
 132 #constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 133 #-- BRAM_ROWS is the number of rows in BRAM
 134 #-- needed to represent the full
 135 #-- icache
 136 #constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
 137 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
 138 #constant INSN_PER_ROW  : natural := ROW_SIZE_BITS / 32;
 139 #-- Bit fields counts in the address
 140 #
 141 #-- INSN_BITS is the number of bits to select
 142 #-- an instruction in a row
 143 #constant INSN_BITS     : natural := log2(INSN_PER_ROW);
 144 #-- ROW_BITS is the number of bits to select a row
 145 #constant ROW_BITS      : natural := log2(BRAM_ROWS);
 146 #-- ROW_LINEBITS is the number of bits to
 147 #-- select a row within a line
 148 #constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 149 #-- LINE_OFF_BITS is the number of bits for the offset
 150 #-- in a cache line
 151 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 152 #-- ROW_OFF_BITS is the number of bits for the offset in a row
 153 #constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
 154 #-- INDEX_BITS is the number of bits to select a cache line
 155 #constant INDEX_BITS    : natural := log2(NUM_LINES);
 156 #-- SET_SIZE_BITS is the log base 2 of the set size
 157 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
 158 #-- TAG_BITS is the number of bits of the tag part of the address
 159 #constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 160 #-- WAY_BITS is the number of bits to select a way
 161 #constant WAY_BITS     : natural := log2(NUM_WAYS);
 162
 163 #-- Example of layout for 32 lines of 64 bytes:
 164 #--
 165 #-- ..  tag    |index|  line  |
 166 #-- ..         |   row   |    |
 167 #-- ..         |     |   | |00| zero          (2)
 168 #-- ..         |     |   |-|  | INSN_BITS     (1)
 169 #-- ..         |     |---|    | ROW_LINEBITS  (3)
 170 #-- ..         |     |--- - --| LINE_OFF_BITS (6)
 171 #-- ..         |         |- --| ROW_OFF_BITS  (3)
 172 #-- ..         |----- ---|    | ROW_BITS      (8)
 173 #-- ..         |-----|        | INDEX_BITS    (5)
 174 #-- .. --------|              | TAG_BITS      (53)
 175    # Example of layout for 32 lines of 64 bytes:
 176    #
 177    # ..  tag    |index|  line  |
 178    # ..         |   row   |    |
 179    # ..         |     |   | |00| zero          (2)
 180    # ..         |     |   |-|  | INSN_BITS     (1)
 181    # ..         |     |---|    | ROW_LINEBITS  (3)
 182    # ..         |     |--- - --| LINE_OFF_BITS (6)
 183    # ..         |         |- --| ROW_OFF_BITS  (3)
 184    # ..         |----- ---|    | ROW_BITS      (8)
 185    # ..         |-----|        | INDEX_BITS    (5)
 186    # .. --------|              | TAG_BITS      (53)
 187
 188 #subtype row_t is integer range 0 to BRAM_ROWS-1;
 189 #subtype index_t is integer range 0 to NUM_LINES-1;
 190 #subtype way_t is integer range 0 to NUM_WAYS-1;
 191 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
 192 #
 193 #-- The cache data BRAM organized as described above for each way
 194 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
 195 #
 196 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
 197 #-- not handle a clean (commented) definition of the cache tags as a 3d
 198 #-- memory. For now, work around it by putting all the tags
 199 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 200 #  type cache_tags_set_t is array(way_t) of cache_tag_t;
 201 #  type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 202 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
 203 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 204 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 205 def CacheTagArray():  # XXX name
 206     return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
 207
 208 #-- The cache valid bits
 209 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
 210 #type cache_valids_t is array(index_t) of cache_way_valids_t;
 211 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
 212 def CacheValidBitsArray():  # XXX name
 213     return Array(Signal(NUM_WAYS) for x in range(NUM_LINES))
 214
 215 def RowPerLineValidArray():  # XXX name
 216     return Array(Signal() for x in range(ROW_PER_LINE))
 217
 218
 219 #attribute ram_style : string;
 220 #attribute ram_style of cache_tags : signal is "distributed";
 221    # TODO to be passed to nigmen as ram attributes
 222    # attribute ram_style : string;
 223    # attribute ram_style of cache_tags : signal is "distributed";
 224
 225
 226 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
 227 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
 228 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 229 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
 230 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 231 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
 232 def TLBValidBitsArray():  # XXX name
 233     return Array(Signal() for x in range(TLB_SIZE))
 234
 235 def TLBTagArray():  # XXX name
 236     return Array(Signal(TLB_EA_TAG_BITS) for x in range(TLB_SIZE))
 237
 238 def TLBPTEArray():  # XXX name
 239     return Array(Signal(TLB_PTE_BITS) for x in range(TLB_SIZE))
 240
 241
 242 #-- Cache RAM interface
 243 #type cache_ram_out_t is array(way_t) of cache_row_t;
 244 # Cache RAM interface
 245 def CacheRamOut():  # XXX name
 246     return Array(Signal(ROW_SIZE_BITS) for x in range(NUM_WAYS))
 247
 248 #-- PLRU output interface
 249 #type plru_out_t is array(index_t) of
 250 # std_ulogic_vector(WAY_BITS-1 downto 0);
 251 # PLRU output interface
 252 def PLRUOut():
 253     return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
 254
 255 #     -- Return the cache line index (tag index) for an address
 256 #     function get_index(addr: std_ulogic_vector(63 downto 0))
 257 #      return index_t is
 258 #     begin
 259 #         return to_integer(unsigned(
 260 #          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
 261 #         ));
 262 #     end;
 263 # Return the cache line index (tag index) for an address
 264 def get_index(addr):
 265     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 266
 267 #     -- Return the cache row index (data memory) for an address
 268 #     function get_row(addr: std_ulogic_vector(63 downto 0))
 269 #       return row_t is
 270 #     begin
 271 #         return to_integer(unsigned(
 272 #          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
 273 #         ));
 274 #     end;
 275 # Return the cache row index (data memory) for an address
 276 def get_row(addr):
 277     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 278
 279 #     -- Return the index of a row within a line
 280 #     function get_row_of_line(row: row_t) return row_in_line_t is
 281 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 282 #     begin
 283 #       row_v := to_unsigned(row, ROW_BITS);
 284 #         return row_v(ROW_LINEBITS-1 downto 0);
 285 #     end;
 286 # Return the index of a row within a line
 287 def get_row_of_line(row):
 288     return row[:ROW_LINE_BITS]
 289
 290 #     -- Returns whether this is the last row of a line
 291 #     function is_last_row_addr(addr: wishbone_addr_type;
 292 #      last: row_in_line_t
 293 #     )
 294 #      return boolean is
 295 #     begin
 296 #       return unsigned(
 297 #        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
 298 #       ) = last;
 299 #     end;
 300 # Returns whether this is the last row of a line
 301 def is_last_row_addr(addr, last):
 302     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 303
 304 #     -- Returns whether this is the last row of a line
 305 #     function is_last_row(row: row_t;
 306 #      last: row_in_line_t) return boolean is
 307 #     begin
 308 #       return get_row_of_line(row) = last;
 309 #     end;
 310 # Returns whether this is the last row of a line
 311 def is_last_row(row, last):
 312     return get_row_of_line(row) == last
 313
 314 #     -- Return the address of the next row in the current cache line
 315 #     function next_row_addr(addr: wishbone_addr_type)
 316 #       return std_ulogic_vector is
 317 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 318 #       variable result  : wishbone_addr_type;
 319 #     begin
 320 #       -- Is there no simpler way in VHDL to generate that 3 bits adder ?
 321 #       row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
 322 #       row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
 323 #       result := addr;
 324 #       result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
 325 #       return result;
 326 #     end;
 327 # Return the address of the next row in the current cache line
 328 def next_row_addr(addr):
 329     row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
 330     return addr[ROW_OFF_BITS:LINE_OFF_BITS].eq(row_idx)
 331
 332 #     -- Return the next row in the current cache line. We use a dedicated
 333 #     -- function in order to limit the size of the generated adder to be
 334 #     -- only the bits within a cache line (3 bits with default settings)
 335 #     function next_row(row: row_t) return row_t is
 336 #       variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
 337 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 338 #       variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
 339 #     begin
 340 #       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 341 #       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 342 #       row_v(ROW_LINEBITS-1 downto 0) :=
 343 #        std_ulogic_vector(unsigned(row_idx) + 1);
 344 #       return to_integer(unsigned(row_v));
 345 #     end;
 346 # Return the next row in the current cache line. We use a dedicated
 347 # function in order to limit the size of the generated adder to be
 348 # only the bits within a cache line (3 bits with default settings)
 349 def next_row(row):
 350     row_idx = row[:ROW_LINE_BITS]
 351     return row[:ROW_LINE_BITS].eq(row_idx + 1)
 352
 353 #     -- Read the instruction word for the given address in the
 354 #     -- current cache row
 355 #     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
 356 #                           data: cache_row_t) return std_ulogic_vector is
 357 #       variable word: integer range 0 to INSN_PER_ROW-1;
 358 #     begin
 359 #         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
 360 #       return data(31+word*32 downto word*32);
 361 #     end;
 362 # Read the instruction word for the given address
 363 # in the current cache row
 364 def read_insn_word(addr, data):
 365     word = addr[2:INSN_BITS+3]
 366     return data.word_select(word, 32)
 367
 368 #     -- Get the tag value from the address
 369 #     function get_tag(
 370 #      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
 371 #     )
 372 #      return cache_tag_t is
 373 #     begin
 374 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 375 #     end;
 376 # Get the tag value from the address
 377 def get_tag(addr):
 378     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 379
 380 #     -- Read a tag from a tag memory row
 381 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 382 #      return cache_tag_t is
 383 #     begin
 384 #       return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 385 #     end;
 386 # Read a tag from a tag memory row
 387 def read_tag(way, tagset):
 388     return tagset[way * TAG_BITS:(way + 1) * TAG_BITS]
 389
 390 #     -- Write a tag to tag memory row
 391 #     procedure write_tag(way: in way_t;
 392 #      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
 393 #     begin
 394 #       tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 395 #     end;
 396 # Write a tag to tag memory row
 397 def write_tag(way, tagset, tag):
 398     tagset[way * TAG_BITS:(way + 1) * TAG_BITS] = tag
 399
 400 #     -- Simple hash for direct-mapped TLB index
 401 #     function hash_ea(addr: std_ulogic_vector(63 downto 0))
 402 #      return tlb_index_t is
 403 #         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
 404 #     begin
 405 #         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
 406 #                 xor addr(
 407 #                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
 408 #                  TLB_LG_PGSZ + TLB_BITS
 409 #                 )
 410 #                 xor addr(
 411 #                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
 412 #                  TLB_LG_PGSZ + 2 * TLB_BITS
 413 #                 );
 414 #         return to_integer(unsigned(hash));
 415 #     end;
 416 # Simple hash for direct-mapped TLB index
 417 def hash_ea(addr):
 418     hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
 419            TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
 420           ] ^ addr[
 421            TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
 422           ]
 423     return hsh
 424
 425 # begin
 426 #
 427 #     assert LINE_SIZE mod ROW_SIZE = 0;
 428 #     assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
 429 #      severity FAILURE;
 430 #     assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
 431 #      severity FAILURE;
 432 #     assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
 433 #      severity FAILURE;
 434 #     assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
 435 #      severity FAILURE;
 436 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
 437 #       report "geometry bits don't add up" severity FAILURE;
 438 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
 439 #       report "geometry bits don't add up" severity FAILURE;
 440 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
 441 #       report "geometry bits don't add up" severity FAILURE;
 442 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
 443 #       report "geometry bits don't add up" severity FAILURE;
 444 #
 445 #     sim_debug: if SIM generate
 446 #     debug: process
 447 #     begin
 448 #       report "ROW_SIZE      = " & natural'image(ROW_SIZE);
 449 #       report "ROW_PER_LINE  = " & natural'image(ROW_PER_LINE);
 450 #       report "BRAM_ROWS     = " & natural'image(BRAM_ROWS);
 451 #       report "INSN_PER_ROW  = " & natural'image(INSN_PER_ROW);
 452 #       report "INSN_BITS     = " & natural'image(INSN_BITS);
 453 #       report "ROW_BITS      = " & natural'image(ROW_BITS);
 454 #       report "ROW_LINEBITS  = " & natural'image(ROW_LINEBITS);
 455 #       report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
 456 #       report "ROW_OFF_BITS  = " & natural'image(ROW_OFF_BITS);
 457 #       report "INDEX_BITS    = " & natural'image(INDEX_BITS);
 458 #       report "TAG_BITS      = " & natural'image(TAG_BITS);
 459 #       report "WAY_BITS      = " & natural'image(WAY_BITS);
 460 #       wait;
 461 #     end process;
 462 #     end generate;
 463
 464 # Cache reload state machine
 465 @unique
 466 class State(Enum):
 467     IDLE     = 0
 468     CLR_TAG  = 1
 469     WAIT_ACK = 2
 470
 471 #     type reg_internal_t is record
 472 #       -- Cache hit state (Latches for 1 cycle BRAM access)
 473 #       hit_way   : way_t;
 474 #       hit_nia   : std_ulogic_vector(63 downto 0);
 475 #       hit_smark : std_ulogic;
 476 #       hit_valid : std_ulogic;
 477 #
 478 #       -- Cache miss state (reload state machine)
 479 #         state            : state_t;
 480 #         wb               : wishbone_master_out;
 481 #       store_way        : way_t;
 482 #         store_index      : index_t;
 483 #       store_row        : row_t;
 484 #         store_tag        : cache_tag_t;
 485 #         store_valid      : std_ulogic;
 486 #         end_row_ix       : row_in_line_t;
 487 #         rows_valid       : row_per_line_valid_t;
 488 #
 489 #         -- TLB miss state
 490 #         fetch_failed     : std_ulogic;
 491 #     end record;
 492 class RegInternal(RecordObject):
 493     def __init__(self):
 494         super().__init__()
 495         # Cache hit state (Latches for 1 cycle BRAM access)
 496         self.hit_way      = Signal(NUM_WAYS)
 497         self.hit_nia      = Signal(64)
 498         self.hit_smark    = Signal()
 499         self.hit_valid    = Signal()
 500
 501         # Cache miss state (reload state machine)
 502         self.state        = Signal(State)
 503         self.wb           = WBMasterOut()  # XXX name
 504         self.store_way    = Signal(NUM_WAYS)
 505         self.store_index  = Signal(NUM_LINES)
 506         self.store_row    = Signal(BRAM_ROWS)
 507         self.store_tag    = Signal(TAG_BITS)
 508         self.store_valid  = Signal()
 509         self.end_row_ix   = Signal(ROW_LINE_BITS)
 510         self.rows_valid   = RowPerLineValidArray()  # XXX name
 511
 512         # TLB miss state
 513         self.fetch_failed = Signal()
 514
 515 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
 516 #
 517 # entity icache is
 518 #     generic (
 519 #         SIM : boolean := false;
 520 #         -- Line size in bytes
 521 #         LINE_SIZE : positive := 64;
 522 #         -- BRAM organisation: We never access more
 523 #         -- than wishbone_data_bits
 524 #         -- at a time so to save resources we make the
 525 #         -- array only that wide,
 526 #         -- and use consecutive indices for to make a cache "line"
 527 #         --
 528 #         -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
 529 #         -- so 64-bits)
 530 #         ROW_SIZE  : positive := wishbone_data_bits / 8;
 531 #         -- Number of lines in a set
 532 #         NUM_LINES : positive := 32;
 533 #         -- Number of ways
 534 #         NUM_WAYS  : positive := 4;
 535 #         -- L1 ITLB number of entries (direct mapped)
 536 #         TLB_SIZE : positive := 64;
 537 #         -- L1 ITLB log_2(page_size)
 538 #         TLB_LG_PGSZ : positive := 12;
 539 #         -- Number of real address bits that we store
 540 #         REAL_ADDR_BITS : positive := 56;
 541 #         -- Non-zero to enable log data collection
 542 #         LOG_LENGTH : natural := 0
 543 #         );
 544 #     port (
 545 #         clk          : in std_ulogic;
 546 #         rst          : in std_ulogic;
 547 #
 548 #         i_in         : in Fetch1ToIcacheType;
 549 #         i_out        : out IcacheToDecode1Type;
 550 #
 551 #         m_in         : in MmuToIcacheType;
 552 #
 553 #         stall_in     : in std_ulogic;
 554 #       stall_out    : out std_ulogic;
 555 #       flush_in     : in std_ulogic;
 556 #       inval_in     : in std_ulogic;
 557 #
 558 #         wishbone_out : out wishbone_master_out;
 559 #         wishbone_in  : in wishbone_slave_out;
 560 #
 561 #         log_out      : out std_ulogic_vector(53 downto 0)
 562 #         );
 563 # end entity icache;
 564 # 64 bit direct mapped icache. All instructions are 4B aligned.
 565 class ICache(Elaboratable):
 566     """64 bit direct mapped icache. All instructions are 4B aligned."""
 567     def __init__(self):
 568         self.i_in           = Fetch1ToICacheType()  # XXX name
 569         self.i_out          = ICacheToDecode1Type()  # XXX name
 570
 571         self.m_in           = MMUToICacheType()  # XXX name
 572
 573         self.stall_in       = Signal()
 574         self.stall_out      = Signal()
 575         self.flush_in       = Signal()
 576         self.inval_in       = Signal()
 577
 578         self.wb_out         = WBMasterOut()  # XXX name
 579         self.wb_in          = WBSlaveOut()  # XXX name
 580
 581         self.log_out        = Signal(54)
 582
 583
 584 #     -- Generate a cache RAM for each way
 585 #     rams: for i in 0 to NUM_WAYS-1 generate
 586 #       signal do_read  : std_ulogic;
 587 #       signal do_write : std_ulogic;
 588 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 589 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 590 #       signal dout     : cache_row_t;
 591 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 592 #     begin
 593 #       way: entity work.cache_ram
 594 #           generic map (
 595 #               ROW_BITS => ROW_BITS,
 596 #               WIDTH => ROW_SIZE_BITS
 597 #               )
 598 #           port map (
 599 #               clk     => clk,
 600 #               rd_en   => do_read,
 601 #               rd_addr => rd_addr,
 602 #               rd_data => dout,
 603 #               wr_sel  => wr_sel,
 604 #               wr_addr => wr_addr,
 605 #               wr_data => wishbone_in.dat
 606 #               );
 607 #       process(all)
 608 #       begin
 609 #           do_read <= not (stall_in or use_previous);
 610 #           do_write <= '0';
 611 #           if wishbone_in.ack = '1' and replace_way = i then
 612 #               do_write <= '1';
 613 #           end if;
 614 #           cache_out(i) <= dout;
 615 #           rd_addr <=
 616 #            std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 617 #           wr_addr <=
 618 #            std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
 619 #             for i in 0 to ROW_SIZE-1 loop
 620 #                 wr_sel(i) <= do_write;
 621 #             end loop;
 622 #       end process;
 623 #     end generate;
 624     def rams(self, m, r, cache_out, use_previous, replace_way, req_row):
 625         comb = m.d.comb
 626
 627         wb_in, stall_in = self.wb_in, self.stall_in
 628
 629         do_read  = Signal()
 630         do_write = Signal()
 631         rd_addr  = Signal(ROW_BITS)
 632         wr_addr  = Signal(ROW_BITS)
 633         _d_out   = Signal(ROW_SIZE_BITS)
 634         wr_sel   = Signal(ROW_SIZE)
 635
 636         for i in range(NUM_WAYS):
 637             way = CacheRam(ROW_BITS, ROW_SIZE_BITS)
 638             comb += way.rd_en.eq(do_read)
 639             comb += way.rd_addr.eq(rd_addr)
 640             comb += way.rd_data_o.eq(_d_out)
 641             comb += way.wr_sel.eq(wr_sel)
 642             comb += way.wr_addr.eq(wr_addr)
 643             comb += way.wr_data.eq(wb_in.dat)
 644
 645             comb += do_read.eq(~(stall_in | use_previous))
 646
 647             with m.If(wb_in.ack & (replace_way == i)):
 648                 comb += do_write.eq(1)
 649
 650             comb += cache_out[i].eq(_d_out)
 651             comb += rd_addr.eq(req_row)
 652             comb += wr_addr.eq(r.store_row)
 653             for j in range(ROW_SIZE):
 654                 comb += wr_sel[j].eq(do_write)
 655
 656 #     -- Generate PLRUs
 657 #     maybe_plrus: if NUM_WAYS > 1 generate
 658 #     begin
 659 #       plrus: for i in 0 to NUM_LINES-1 generate
 660 #           -- PLRU interface
 661 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 662 #           signal plru_acc_en : std_ulogic;
 663 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 664 #
 665 #       begin
 666 #           plru : entity work.plru
 667 #               generic map (
 668 #                   BITS => WAY_BITS
 669 #                   )
 670 #               port map (
 671 #                   clk => clk,
 672 #                   rst => rst,
 673 #                   acc => plru_acc,
 674 #                   acc_en => plru_acc_en,
 675 #                   lru => plru_out
 676 #                   );
 677 #
 678 #           process(all)
 679 #           begin
 680 #               -- PLRU interface
 681 #               if get_index(r.hit_nia) = i then
 682 #                   plru_acc_en <= r.hit_valid;
 683 #               else
 684 #                   plru_acc_en <= '0';
 685 #               end if;
 686 #               plru_acc <=
 687 #                std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
 688 #               plru_victim(i) <= plru_out;
 689 #           end process;
 690 #       end generate;
 691 #     end generate;
 692     def maybe_plrus(self, m, r, plru_victim):
 693         comb = m.d.comb
 694
 695         with m.If(NUM_WAYS > 1):
 696             for i in range(NUM_LINES):
 697                 plru_acc_i  = Signal(WAY_BITS)
 698                 plru_acc_en = Signal()
 699                 plru_out    = Signal(WAY_BITS)
 700                 plru        = PLRU(WAY_BITS)
 701                 comb += plru.acc_i.eq(plru_acc_i)
 702                 comb += plru.acc_en.eq(plru_acc_en)
 703                 comb += plru.lru_o.eq(plru_out)
 704
 705                 # PLRU interface
 706                 with m.If(get_index(r.hit_nia) == i):
 707                     comb += plru.acc_en.eq(r.hit_valid)
 708
 709                 comb += plru.acc_i.eq(r.hit_way)
 710                 comb += plru_victim[i].eq(plru.lru_o)
 711
 712 #     -- TLB hit detection and real address generation
 713 #     itlb_lookup : process(all)
 714 #         variable pte : tlb_pte_t;
 715 #         variable ttag : tlb_tag_t;
 716 #     begin
 717 #         tlb_req_index <= hash_ea(i_in.nia);
 718 #         pte := itlb_ptes(tlb_req_index);
 719 #         ttag := itlb_tags(tlb_req_index);
 720 #         if i_in.virt_mode = '1' then
 721 #             real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
 722 #                          i_in.nia(TLB_LG_PGSZ - 1 downto 0);
 723 #             if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
 724 #                 ra_valid <= itlb_valids(tlb_req_index);
 725 #             else
 726 #                 ra_valid <= '0';
 727 #             end if;
 728 #             eaa_priv <= pte(3);
 729 #         else
 730 #             real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
 731 #             ra_valid <= '1';
 732 #             eaa_priv <= '1';
 733 #         end if;
 734 #
 735 #         -- no IAMR, so no KUEP support for now
 736 #         priv_fault <= eaa_priv and not i_in.priv_mode;
 737 #         access_ok <= ra_valid and not priv_fault;
 738 #     end process;
 739     # TLB hit detection and real address generation
 740     def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
 741                     real_addr, itlb_valid_bits, ra_valid, eaa_priv,
 742                     priv_fault, access_ok):
 743         comb = m.d.comb
 744
 745         i_in = self.i_in
 746
 747         pte  = Signal(TLB_PTE_BITS)
 748         ttag = Signal(TLB_EA_TAG_BITS)
 749
 750         comb += tlb_req_index.eq(hash_ea(i_in.nia))
 751         comb += pte.eq(itlb_ptes[tlb_req_index])
 752         comb += ttag.eq(itlb_tags[tlb_req_index])
 753
 754         with m.If(i_in.virt_mode):
 755             comb += real_addr.eq(Cat(
 756                      i_in.nia[:TLB_LG_PGSZ],
 757                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 758                     ))
 759
 760             with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
 761                 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
 762
 763         with m.Else():
 764             comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
 765             comb += ra_valid.eq(1)
 766             comb += eaa_priv.eq(1)
 767
 768         # No IAMR, so no KUEP support for now
 769         comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
 770         comb += access_ok.eq(ra_valid & ~priv_fault)
 771
 772 #     -- iTLB update
 773 #     itlb_update: process(clk)
 774 #         variable wr_index : tlb_index_t;
 775 #     begin
 776 #         if rising_edge(clk) then
 777 #             wr_index := hash_ea(m_in.addr);
 778 #             if rst = '1' or
 779 #              (m_in.tlbie = '1' and m_in.doall = '1') then
 780 #                 -- clear all valid bits
 781 #                 for i in tlb_index_t loop
 782 #                     itlb_valids(i) <= '0';
 783 #                 end loop;
 784 #             elsif m_in.tlbie = '1' then
 785 #                 -- clear entry regardless of hit or miss
 786 #                 itlb_valids(wr_index) <= '0';
 787 #             elsif m_in.tlbld = '1' then
 788 #                 itlb_tags(wr_index) <=
 789 #                  m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
 790 #                 itlb_ptes(wr_index) <= m_in.pte;
 791 #                 itlb_valids(wr_index) <= '1';
 792 #             end if;
 793 #         end if;
 794 #     end process;
 795     # iTLB update
 796     def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
 797         comb = m.d.comb
 798         sync = m.d.sync
 799
 800         m_in = self.m_in
 801
 802         wr_index = Signal(TLB_SIZE)
 803         comb += wr_index.eq(hash_ea(m_in.addr))
 804
 805         with m.If(m_in.tlbie & m_in.doall):
 806             # Clear all valid bits
 807             for i in range(TLB_SIZE):
 808                 sync += itlb_valid_bits[i].eq(0)
 809
 810         with m.Elif(m_in.tlbie):
 811             # Clear entry regardless of hit or miss
 812             sync += itlb_valid_bits[wr_index].eq(0)
 813
 814         with m.Elif(m_in.tlbld):
 815             sync += itlb_tags[wr_index].eq(
 816                      m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
 817                     )
 818             sync += itlb_ptes[wr_index].eq(m_in.pte)
 819             sync += itlb_valid_bits[wr_index].eq(1)
 820
 821 #     -- Cache hit detection, output to fetch2 and other misc logic
 822 #     icache_comb : process(all)
 823     # Cache hit detection, output to fetch2 and other misc logic
 824     def icache_comb(self, m, use_previous, r, req_index, req_row,
 825                     req_tag, real_addr, req_laddr, cache_valid_bits,
 826                     cache_tags, access_ok, req_is_hit,
 827                     req_is_miss, replace_way, plru_victim, cache_out):
 828 #       variable is_hit  : std_ulogic;
 829 #       variable hit_way : way_t;
 830         comb = m.d.comb
 831
 832         i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
 833         flush_in, stall_out = self.flush_in, self.stall_out
 834
 835         is_hit  = Signal()
 836         hit_way = Signal(NUM_WAYS)
 837 #     begin
 838 #         -- i_in.sequential means that i_in.nia this cycle
 839 #         -- is 4 more than last cycle.  If we read more
 840 #         -- than 32 bits at a time, had a cache hit last
 841 #         -- cycle, and we don't want the first 32-bit chunk
 842 #         -- then we can keep the data we read last cycle
 843 #         -- and just use that.
 844 #         if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
 845 #             use_previous <= i_in.sequential and r.hit_valid;
 846 #         else
 847 #             use_previous <= '0';
 848 #         end if;
 849         # i_in.sequential means that i_in.nia this cycle is 4 more than
 850         # last cycle.  If we read more than 32 bits at a time, had a
 851         # cache hit last cycle, and we don't want the first 32-bit chunk
 852         # then we can keep the data we read last cycle and just use that.
 853         with m.If(i_in.nia[2:INSN_BITS+2] != 0):
 854             comb += use_previous.eq(i_in.sequential & r.hit_valid)
 855
 856 #       -- Extract line, row and tag from request
 857 #         req_index <= get_index(i_in.nia);
 858 #         req_row <= get_row(i_in.nia);
 859 #         req_tag <= get_tag(real_addr);
 860         # Extract line, row and tag from request
 861         comb += req_index.eq(get_index(i_in.nia))
 862         comb += req_row.eq(get_row(i_in.nia))
 863         comb += req_tag.eq(get_tag(real_addr))
 864
 865 #       -- Calculate address of beginning of cache row, will be
 866 #       -- used for cache miss processing if needed
 867 #       req_laddr <=
 868 #        (63 downto REAL_ADDR_BITS => '0') &
 869 #        real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
 870 #        (ROW_OFF_BITS-1 downto 0 => '0');
 871         # Calculate address of beginning of cache row, will be
 872         # used for cache miss processing if needed
 873         comb += req_laddr.eq(Cat(
 874                  Const(0b0, ROW_OFF_BITS),
 875                  real_addr[ROW_OFF_BITS:REAL_ADDR_BITS]
 876                 ))
 877
 878 #       -- Test if pending request is a hit on any way
 879 #       hit_way := 0;
 880 #       is_hit := '0';
 881 #       for i in way_t loop
 882 #           if i_in.req = '1' and
 883 #                 (cache_valids(req_index)(i) = '1' or
 884 #                  (r.state = WAIT_ACK and
 885 #                   req_index = r.store_index and
 886 #                   i = r.store_way and
 887 #                   r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
 888 #               if read_tag(i, cache_tags(req_index)) = req_tag then
 889 #                   hit_way := i;
 890 #                   is_hit := '1';
 891 #               end if;
 892 #           end if;
 893 #       end loop;
 894         # Test if pending request is a hit on any way
 895         for i in range(NUM_WAYS):
 896             with m.If(i_in.req &
 897                       (cache_valid_bits[req_index][i] |
 898                        ((r.state == State.WAIT_ACK)
 899                         & (req_index == r.store_index)
 900                         & (i == r.store_way)
 901                         & r.rows_valid[req_row % ROW_PER_LINE]))):
 902                 with m.If(read_tag(i, cache_tags[req_index]) == req_tag):
 903                     comb += hit_way.eq(i)
 904                     comb += is_hit.eq(1)
 905
 906 #       -- Generate the "hit" and "miss" signals
 907 #       -- for the synchronous blocks
 908 #       if i_in.req = '1' and access_ok = '1' and flush_in = '0'
 909 #        and rst = '0' then
 910 #           req_is_hit  <= is_hit;
 911 #           req_is_miss <= not is_hit;
 912 #       else
 913 #           req_is_hit  <= '0';
 914 #           req_is_miss <= '0';
 915 #       end if;
 916 #       req_hit_way <= hit_way;
 917         # Generate the "hit" and "miss" signals
 918         # for the synchronous blocks
 919         with m.If(i_in.req & access_ok & ~flush_in):
 920             comb += req_is_hit.eq(is_hit)
 921             comb += req_is_miss.eq(~is_hit)
 922
 923         with m.Else():
 924             comb += req_is_hit.eq(0)
 925             comb += req_is_miss.eq(0)
 926
 927 #       -- The way to replace on a miss
 928 #       if r.state = CLR_TAG then
 929 #           replace_way <=
 930 #            to_integer(unsigned(plru_victim(r.store_index)));
 931 #       else
 932 #           replace_way <= r.store_way;
 933 #       end if;
 934         # The way to replace on a miss
 935         with m.If(r.state == State.CLR_TAG):
 936             comb += replace_way.eq(plru_victim[r.store_index])
 937
 938         with m.Else():
 939             comb += replace_way.eq(r.store_way)
 940
 941 #       -- Output instruction from current cache row
 942 #       --
 943 #       -- Note: This is a mild violation of our design principle of
 944 #       -- having pipeline stages output from a clean latch. In this
 945 #       -- case we output the result of a mux. The alternative would
 946 #       -- be output an entire row which I prefer not to do just yet
 947 #       -- as it would force fetch2 to know about some of the cache
 948 #       -- geometry information.
 949 #       i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
 950 #       i_out.valid <= r.hit_valid;
 951 #       i_out.nia <= r.hit_nia;
 952 #       i_out.stop_mark <= r.hit_smark;
 953 #       i_out.fetch_failed <= r.fetch_failed;
 954         # Output instruction from current cache row
 955         #
 956         # Note: This is a mild violation of our design principle of
 957         # having pipeline stages output from a clean latch. In this
 958         # case we output the result of a mux. The alternative would
 959         # be output an entire row which I prefer not to do just yet
 960         # as it would force fetch2 to know about some of the cache
 961         # geometry information.
 962         comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out[r.hit_way]))
 963         comb += i_out.valid.eq(r.hit_valid)
 964         comb += i_out.nia.eq(r.hit_nia)
 965         comb += i_out.stop_mark.eq(r.hit_smark)
 966         comb += i_out.fetch_failed.eq(r.fetch_failed)
 967
 968 #       -- Stall fetch1 if we have a miss on cache or TLB
 969 #       -- or a protection fault
 970 #       stall_out <= not (is_hit and access_ok);
 971         # Stall fetch1 if we have a miss on cache or TLB
 972         # or a protection fault
 973         comb += stall_out.eq(~(is_hit & access_ok))
 974
 975 #       -- Wishbone requests output (from the cache miss reload machine)
 976 #       wishbone_out <= r.wb;
 977         # Wishbone requests output (from the cache miss reload machine)
 978         comb += wb_out.eq(r.wb)
 979 #     end process;
 980
 981 #     -- Cache hit synchronous machine
 982 #     icache_hit : process(clk)
 983     # Cache hit synchronous machine
 984     def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
 985                    req_index, req_tag, real_addr):
 986         sync = m.d.sync
 987
 988         i_in, stall_in = self.i_in, self.stall_in
 989         flush_in       = self.flush_in
 990
 991 #     begin
 992 #         if rising_edge(clk) then
 993 #             -- keep outputs to fetch2 unchanged on a stall
 994 #             -- except that flush or reset sets valid to 0
 995 #             -- If use_previous, keep the same data as last
 996 #             -- cycle and use the second half
 997 #             if stall_in = '1' or use_previous = '1' then
 998 #                 if rst = '1' or flush_in = '1' then
 999 #                     r.hit_valid <= '0';
1000 #             end if;
1001         # keep outputs to fetch2 unchanged on a stall
1002         # except that flush or reset sets valid to 0
1003         # If use_previous, keep the same data as last
1004         # cycle and use the second half
1005         with m.If(stall_in | use_previous):
1006             with m.If(flush_in):
1007                 sync += r.hit_valid.eq(0)
1008 #             else
1009 #                 -- On a hit, latch the request for the next cycle,
1010 #                 -- when the BRAM data will be available on the
1011 #                 -- cache_out output of the corresponding way
1012 #                 r.hit_valid <= req_is_hit;
1013 #                 if req_is_hit = '1' then
1014 #                     r.hit_way <= req_hit_way;
1015         with m.Else():
1016             # On a hit, latch the request for the next cycle,
1017             # when the BRAM data will be available on the
1018             # cache_out output of the corresponding way
1019             sync += r.hit_valid.eq(req_is_hit)
1020
1021             with m.If(req_is_hit):
1022                 sync += r.hit_way.eq(req_hit_way)
1023
1024 #                     report "cache hit nia:" & to_hstring(i_in.nia) &
1025 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1026 #                         " SM:" & std_ulogic'image(i_in.stop_mark) &
1027 #                         " idx:" & integer'image(req_index) &
1028 #                         " tag:" & to_hstring(req_tag) &
1029 #                         " way:" & integer'image(req_hit_way) &
1030 #                         " RA:" & to_hstring(real_addr);
1031                 # XXX NO do not use f"" use %d and %x.  see dcache.py Display
1032                 print(f"cache hit nia:{i_in.nia}, " \
1033                       f"IR:{i_in.virt_mode}, " \
1034                       f"SM:{i_in.stop_mark}, idx:{req_index}, " \
1035                       f"tag:{req_tag}, way:{req_hit_way}, " \
1036                       f"RA:{real_addr}")
1037 #                 end if;
1038 #           end if;
1039 #             if stall_in = '0' then
1040 #                 -- Send stop marks and NIA down regardless of validity
1041 #                 r.hit_smark <= i_in.stop_mark;
1042 #                 r.hit_nia <= i_in.nia;
1043 #             end if;
1044         with m.If(~stall_in):
1045             # Send stop marks and NIA down regardless of validity
1046             sync += r.hit_smark.eq(i_in.stop_mark)
1047             sync += r.hit_nia.eq(i_in.nia)
1048 #       end if;
1049 #     end process;
1050
1051 #     -- Cache miss/reload synchronous machine
1052 #     icache_miss : process(clk)
1053     # Cache miss/reload synchronous machine
1054     def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1055                     req_index, req_laddr, req_tag, replace_way,
1056                     cache_tags, access_ok, real_addr):
1057         comb = m.d.comb
1058         sync = m.d.sync
1059
1060         i_in, wb_in, m_in  = self.i_in, self.wb_in, self.m_in
1061         stall_in, flush_in = self.stall_in, self.flush_in
1062         inval_in           = self.inval_in
1063
1064 #       variable tagset    : cache_tags_set_t;
1065 #       variable stbs_done : boolean;
1066
1067         tagset    = Signal(TAG_RAM_WIDTH)
1068         stbs_done = Signal()
1069
1070 #     begin
1071 #         if rising_edge(clk) then
1072 #           -- On reset, clear all valid bits to force misses
1073 #             if rst = '1' then
1074         # On reset, clear all valid bits to force misses
1075 #               for i in index_t loop
1076 #                   cache_valids(i) <= (others => '0');
1077 #               end loop;
1078 #                 r.state <= IDLE;
1079 #                 r.wb.cyc <= '0';
1080 #                 r.wb.stb <= '0';
1081 #               -- We only ever do reads on wishbone
1082 #               r.wb.dat <= (others => '0');
1083 #               r.wb.sel <= "11111111";
1084 #               r.wb.we  <= '0';
1085
1086         # We only ever do reads on wishbone
1087         comb += r.wb.sel.eq(~0) # set to all 1s
1088
1089 #               -- Not useful normally but helps avoiding
1090 #               -- tons of sim warnings
1091 #               r.wb.adr <= (others => '0');
1092
1093 #             else
1094
1095 #                 -- Process cache invalidations
1096 #                 if inval_in = '1' then
1097 #                     for i in index_t loop
1098 #                         cache_valids(i) <= (others => '0');
1099 #                     end loop;
1100 #                     r.store_valid <= '0';
1101 #                 end if;
1102         # Process cache invalidations
1103         with m.If(inval_in):
1104             for i in range(NUM_LINES):
1105                 sync += cache_valid_bits[i].eq(~1) # XXX NO just set to zero.
1106                                                    # look again: others == 0
1107
1108             sync += r.store_valid.eq(0)
1109
1110 #               -- Main state machine
1111 #               case r.state is
1112             # Main state machine
1113             with m.Switch(r.state):
1114
1115 #               when IDLE =>
1116                 with m.Case(State.IDLE):
1117 #                     -- Reset per-row valid flags,
1118 #                     -- only used in WAIT_ACK
1119 #                     for i in 0 to ROW_PER_LINE - 1 loop
1120 #                         r.rows_valid(i) <= '0';
1121 #                     end loop;
1122                     # Reset per-row valid flags,
1123                     # only used in WAIT_ACK
1124                     for i in range(ROW_PER_LINE):
1125                         sync += r.rows_valid[i].eq(0)
1126
1127 #                   -- We need to read a cache line
1128 #                   if req_is_miss = '1' then
1129 #                       report "cache miss nia:" & to_hstring(i_in.nia) &
1130 #                             " IR:" & std_ulogic'image(i_in.virt_mode) &
1131 #                           " SM:" & std_ulogic'image(i_in.stop_mark) &
1132 #                           " idx:" & integer'image(req_index) &
1133 #                           " way:" & integer'image(replace_way) &
1134 #                           " tag:" & to_hstring(req_tag) &
1135 #                             " RA:" & to_hstring(real_addr);
1136                     # We need to read a cache line
1137                     with m.If(req_is_miss):
1138                         # XXX no, do not use "f".  use sync += Display
1139                         # and use %d for integer, %x for hex.
1140                         print(f"cache miss nia:{i_in.nia} " \
1141                               f"IR:{i_in.virt_mode} " \
1142                               f"SM:{i_in.stop_mark} " \
1143                               F"idx:{req_index} " \
1144                               f"way:{replace_way} tag:{req_tag} " \
1145                               f"RA:{real_addr}")
1146
1147 #                       -- Keep track of our index and way for
1148 #                       -- subsequent stores
1149 #                       r.store_index <= req_index;
1150 #                       r.store_row <= get_row(req_laddr);
1151 #                       r.store_tag <= req_tag;
1152 #                       r.store_valid <= '1';
1153 #                       r.end_row_ix <=
1154 #                        get_row_of_line(get_row(req_laddr)) - 1;
1155                         # Keep track of our index and way
1156                         # for subsequent stores
1157                         sync += r.store_index.eq(req_index)
1158                         sync += r.store_row.eq(get_row(req_laddr))
1159                         sync += r.store_tag.eq(req_tag)
1160                         sync += r.store_valid.eq(1)
1161                         sync += r.end_row_ix.eq(
1162                                  get_row_of_line(
1163                                   get_row(req_laddr)
1164                                  ) - 1
1165                                 )
1166
1167 #                       -- Prep for first wishbone read. We calculate the
1168 #                       -- address of the start of the cache line and
1169 #                       -- start the WB cycle.
1170 #                       r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1171 #                       r.wb.cyc <= '1';
1172 #                       r.wb.stb <= '1';
1173                         # Prep for first wishbone read.
1174                         # We calculate the
1175                         # address of the start of the cache line and
1176                         # start the WB cycle.
1177                         sync += r.wb.adr.eq(req_laddr)
1178                         sync += r.wb.cyc.eq(1)
1179                         sync += r.wb.stb.eq(1)
1180
1181 #                       -- Track that we had one request sent
1182 #                       r.state <= CLR_TAG;
1183                         # Track that we had one request sent
1184                         sync += r.state.eq(State.CLR_TAG)
1185 #                   end if;
1186
1187 #               when CLR_TAG | WAIT_ACK =>
1188                 with m.Case(State.CLR_TAG, State.WAIT_ACK):
1189 #                     if r.state = CLR_TAG then
1190                     with m.If(r.state == State.CLR_TAG):
1191 #                         -- Get victim way from plru
1192 #                       r.store_way <= replace_way;
1193                         # Get victim way from plru
1194                         sync += r.store_way.eq(replace_way)
1195 #
1196 #                       -- Force misses on that way while
1197 #                       -- reloading that line
1198 #                       cache_valids(req_index)(replace_way) <= '0';
1199                         # Force misses on that way while
1200                         # realoading that line
1201                         # XXX see dcache.py
1202                         sync += cache_valid_bits[req_index][replace_way].eq(0)
1203
1204 #                       -- Store new tag in selected way
1205 #                       for i in 0 to NUM_WAYS-1 loop
1206 #                           if i = replace_way then
1207 #                               tagset := cache_tags(r.store_index);
1208 #                               write_tag(i, tagset, r.store_tag);
1209 #                               cache_tags(r.store_index) <= tagset;
1210 #                           end if;
1211 #                       end loop;
1212                         for i in range(NUM_WAYS):
1213                             with m.If(i == replace_way):
1214                                 comb += tagset.eq(cache_tags[r.store_index])
1215                                 sync += write_tag(i, tagset, r.store_tag)
1216                                 sync += cache_tags[r.store_index].eq(tagset)
1217
1218 #                         r.state <= WAIT_ACK;
1219                         sync += r.state.eq(State.WAIT_ACK)
1220 #                     end if;
1221
1222 #                   -- Requests are all sent if stb is 0
1223 #                   stbs_done := r.wb.stb = '0';
1224                     # Requests are all sent if stb is 0
1225                     comb += stbs_done.eq(r.wb.stb == 0)
1226
1227 #                   -- If we are still sending requests,
1228 #                   -- was one accepted ?
1229 #                   if wishbone_in.stall = '0' and not stbs_done then
1230                     # If we are still sending requests,
1231                     # was one accepted?
1232                     with m.If(~wb_in.stall & ~stbs_done):
1233 #                       -- That was the last word ? We are done sending.
1234 #                       -- Clear stb and set stbs_done so we can handle
1235 #                       -- an eventual last ack on the same cycle.
1236 #                       if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1237 #                           r.wb.stb <= '0';
1238 #                           stbs_done := true;
1239 #                       end if;
1240                         # That was the last word ?
1241                         # We are done sending.
1242                         # Clear stb and set stbs_done
1243                         # so we can handle
1244                         # an eventual last ack on
1245                         # the same cycle.
1246                         with m.If(is_last_row_addr(r.wb.adr, r.end_row_ix)):
1247                             sync += r.wb.stb.eq(0)
1248                             comb += stbs_done.eq(1)
1249
1250 #                       -- Calculate the next row address
1251 #                       r.wb.adr <= next_row_addr(r.wb.adr);
1252                         # Calculate the next row address
1253                         sync += r.wb.adr.eq(next_row_addr(r.wb.adr))
1254 #                   end if;
1255
1256 #                   -- Incoming acks processing
1257 #                   if wishbone_in.ack = '1' then
1258                     # Incoming acks processing
1259                     with m.If(wb_in.ack):
1260 #                         r.rows_valid(r.store_row mod ROW_PER_LINE)
1261 #                          <= '1';
1262                         sync += r.rows_valid[r.store_row & ROW_PER_LINE].eq(1)
1263
1264 #                       -- Check for completion
1265 #                       if stbs_done and
1266 #                        is_last_row(r.store_row, r.end_row_ix) then
1267                         # Check for completion
1268                         with m.If(stbs_done &
1269                                   (is_last_row(r.store_row, r.end_row_ix)):
1270 #                           -- Complete wishbone cycle
1271 #                           r.wb.cyc <= '0';
1272                             # Complete wishbone cycle
1273                             sync += r.wb.cyc.eq(0)
1274
1275 #                           -- Cache line is now valid
1276 #                           cache_valids(r.store_index)(replace_way) <=
1277 #                            r.store_valid and not inval_in;
1278                             # Cache line is now valid
1279                             sync += cache_valid_bits[r.store_index]
1280                                 [relace_way].eq(r.store_valid & ~inval_in)
1281
1282 #                           -- We are done
1283 #                           r.state <= IDLE;
1284                             # We are done
1285                             sync += r.state.eq(State.IDLE)
1286 #                       end if;
1287
1288 #                       -- Increment store row counter
1289 #                       r.store_row <= next_row(r.store_row);
1290                         # Increment store row counter
1291                         sync += store_row.eq(next_row(r.store_row))
1292 #                   end if;
1293 #               end case;
1294 #           end if;
1295 #
1296 #             -- TLB miss and protection fault processing
1297 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1298 #                 r.fetch_failed <= '0';
1299 #             elsif i_in.req = '1' and access_ok = '0' and
1300 #              stall_in = '0' then
1301 #                 r.fetch_failed <= '1';
1302 #             end if;
1303         # TLB miss and protection fault processing
1304         with m.If(flush_in | m_in.tlbld):
1305             sync += r.fetch_failed.eq(0)
1306         with m.Elif(i_in.req & ~access_ok & ~stall_in):
1307             sync += r.fetch_failed.eq(1)
1308 #       end if;
1309 #     end process;
1310
1311 #     icache_log: if LOG_LENGTH > 0 generate
1312     def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1313                    req_is_miss, req_is_hit, lway, wstate, r):
1314         comb = m.d.comb
1315         sync = m.d.sync
1316
1317         wb_in, i_out       = self.wb_in, self.i_out
1318         log_out, stall_out = self.log_out, self.stall_out
1319
1320 #         -- Output data to logger
1321 #         signal log_data    : std_ulogic_vector(53 downto 0);
1322 #     begin
1323 #         data_log: process(clk)
1324 #             variable lway: way_t;
1325 #             variable wstate: std_ulogic;
1326         # Output data to logger
1327         for i in range(LOG_LENGTH):
1328             # Output data to logger
1329             log_data = Signal(54)
1330             lway     = Signal(NUM_WAYS)
1331             wstate   = Signal()
1332
1333 #         begin
1334 #             if rising_edge(clk) then
1335 #                 lway := req_hit_way;
1336 #                 wstate := '0';
1337             comb += lway.eq(req_hit_way)
1338             comb += wstate.eq(0)
1339
1340 #                 if r.state /= IDLE then
1341 #                     wstate := '1';
1342 #                 end if;
1343             with m.If(r.state != State.IDLE):
1344                 sync += wstate.eq(1)
1345
1346 #                 log_data <= i_out.valid &
1347 #                             i_out.insn &
1348 #                             wishbone_in.ack &
1349 #                             r.wb.adr(5 downto 3) &
1350 #                             r.wb.stb & r.wb.cyc &
1351 #                             wishbone_in.stall &
1352 #                             stall_out &
1353 #                             r.fetch_failed &
1354 #                             r.hit_nia(5 downto 2) &
1355 #                             wstate &
1356 #                             std_ulogic_vector(to_unsigned(lway, 3)) &
1357 #                             req_is_hit & req_is_miss &
1358 #                             access_ok &
1359 #                             ra_valid;
1360             sync += log_data.eq(Cat(
1361                      ra_valid, access_ok, req_is_miss, req_is_hit,
1362                      lway, wstate, r.hit_nia[2:6],
1363                      r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1364                      r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1365                      i_out.valid
1366                     ))
1367 #             end if;
1368 #         end process;
1369 #         log_out <= log_data;
1370             comb += log_out.eq(log_data)
1371 #     end generate;
1372 # end;
1373
1374     def elaborate(self, platform):
1375
1376         m                = Module()
1377         comb             = m.d.comb
1378
1379         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1380         cache_tags       = CacheTagArray()
1381         cache_valid_bits = CacheValidBitsArray()
1382
1383 #     signal itlb_valids : tlb_valids_t;
1384 #     signal itlb_tags : tlb_tags_t;
1385 #     signal itlb_ptes : tlb_ptes_t;
1386 #     attribute ram_style of itlb_tags : signal is "distributed";
1387 #     attribute ram_style of itlb_ptes : signal is "distributed";
1388         itlb_valid_bits  = TLBValidBitsArray()
1389         itlb_tags        = TLBTagArray()
1390         itlb_ptes        = TLBPTEArray()
1391         # TODO to be passed to nmigen as ram attributes
1392         # attribute ram_style of itlb_tags : signal is "distributed";
1393         # attribute ram_style of itlb_ptes : signal is "distributed";
1394
1395 #     -- Privilege bit from PTE EAA field
1396 #     signal eaa_priv  : std_ulogic;
1397         # Privilege bit from PTE EAA field
1398         eaa_priv         = Signal()
1399
1400 #     signal r : reg_internal_t;
1401         r                = RegInternal()
1402
1403 #     -- Async signals on incoming request
1404 #     signal req_index   : index_t;
1405 #     signal req_row     : row_t;
1406 #     signal req_hit_way : way_t;
1407 #     signal req_tag     : cache_tag_t;
1408 #     signal req_is_hit  : std_ulogic;
1409 #     signal req_is_miss : std_ulogic;
1410 #     signal req_laddr   : std_ulogic_vector(63 downto 0);
1411         # Async signal on incoming request
1412         req_index        = Signal(NUM_LINES)
1413         req_row          = Signal(BRAM_ROWS)
1414         req_hit_way      = Signal(NUM_WAYS)
1415         req_tag          = Signal(TAG_BITS)
1416         req_is_hit       = Signal()
1417         req_is_miss      = Signal()
1418         req_laddr        = Signal(64)
1419
1420 #     signal tlb_req_index : tlb_index_t;
1421 #     signal real_addr     : std_ulogic_vector(
1422 #                             REAL_ADDR_BITS - 1 downto 0
1423 #                            );
1424 #     signal ra_valid      : std_ulogic;
1425 #     signal priv_fault    : std_ulogic;
1426 #     signal access_ok     : std_ulogic;
1427 #     signal use_previous  : std_ulogic;
1428         tlb_req_index    = Signal(TLB_SIZE)
1429         real_addr        = Signal(REAL_ADDR_BITS)
1430         ra_valid         = Signal()
1431         priv_fault       = Signal()
1432         access_ok        = Signal()
1433         use_previous     = Signal()
1434
1435 #     signal cache_out   : cache_ram_out_t;
1436         cache_out        = CacheRamOut()
1437
1438 #     signal plru_victim : plru_out_t;
1439 #     signal replace_way : way_t;
1440         plru_victim      = PLRUOut()
1441         replace_way      = Signal(NUM_WAYS)
1442
1443         # call sub-functions putting everything together, using shared
1444         # signals established above
1445         self.rams(m, r, cache_out, use_previous, replace_way, req_row)
1446         self.maybe_plrus(m, r, plru_victim)
1447         self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1448                          real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1449                          priv_fault, access_ok)
1450         self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1451         self.icache_comb(m, use_previous, r, req_index, req_row,
1452                          req_tag, real_addr, req_laddr, cache_valid_bits,
1453                          cache_tags, access_ok, req_is_hit, req_is_miss,
1454                          replace_way, plru_victim, cache_out)
1455         self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1456                         req_index, req_tag, real_addr)
1457         self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1458                          req_laddr, req_tag, replace_way, cache_tags,
1459                          access_ok, real_addr)
1460         #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1461         #                req_is_miss, req_is_hit, lway, wstate, r)
1462
1463         return m
1464
1465
1466 # icache_tb.vhdl
1467 #
1468 # library ieee;
1469 # use ieee.std_logic_1164.all;
1470 #
1471 # library work;
1472 # use work.common.all;
1473 # use work.wishbone_types.all;
1474 #
1475 # entity icache_tb is
1476 # end icache_tb;
1477 #
1478 # architecture behave of icache_tb is
1479 #     signal clk          : std_ulogic;
1480 #     signal rst          : std_ulogic;
1481 #
1482 #     signal i_out        : Fetch1ToIcacheType;
1483 #     signal i_in         : IcacheToDecode1Type;
1484 #
1485 #     signal m_out        : MmuToIcacheType;
1486 #
1487 #     signal wb_bram_in   : wishbone_master_out;
1488 #     signal wb_bram_out  : wishbone_slave_out;
1489 #
1490 #     constant clk_period : time := 10 ns;
1491 # begin
1492 #     icache0: entity work.icache
1493 #         generic map(
1494 #             LINE_SIZE => 64,
1495 #             NUM_LINES => 4
1496 #             )
1497 #         port map(
1498 #             clk => clk,
1499 #             rst => rst,
1500 #             i_in => i_out,
1501 #             i_out => i_in,
1502 #             m_in => m_out,
1503 #             stall_in => '0',
1504 #           flush_in => '0',
1505 #             inval_in => '0',
1506 #             wishbone_out => wb_bram_in,
1507 #             wishbone_in => wb_bram_out
1508 #             );
1509 #
1510 #     -- BRAM Memory slave
1511 #     bram0: entity work.wishbone_bram_wrapper
1512 #         generic map(
1513 #             MEMORY_SIZE   => 1024,
1514 #             RAM_INIT_FILE => "icache_test.bin"
1515 #             )
1516 #         port map(
1517 #             clk => clk,
1518 #             rst => rst,
1519 #             wishbone_in => wb_bram_in,
1520 #             wishbone_out => wb_bram_out
1521 #             );
1522 #
1523 #     clk_process: process
1524 #     begin
1525 #         clk <= '0';
1526 #         wait for clk_period/2;
1527 #         clk <= '1';
1528 #         wait for clk_period/2;
1529 #     end process;
1530 #
1531 #     rst_process: process
1532 #     begin
1533 #         rst <= '1';
1534 #         wait for 2*clk_period;
1535 #         rst <= '0';
1536 #         wait;
1537 #     end process;
1538 #
1539 #     stim: process
1540 #     begin
1541 #         i_out.req <= '0';
1542 #         i_out.nia <= (others => '0');
1543 #       i_out.stop_mark <= '0';
1544 #
1545 #         m_out.tlbld <= '0';
1546 #         m_out.tlbie <= '0';
1547 #         m_out.addr <= (others => '0');
1548 #         m_out.pte <= (others => '0');
1549 #
1550 #         wait until rising_edge(clk);
1551 #         wait until rising_edge(clk);
1552 #         wait until rising_edge(clk);
1553 #         wait until rising_edge(clk);
1554 #
1555 #         i_out.req <= '1';
1556 #         i_out.nia <= x"0000000000000004";
1557 #
1558 #         wait for 30*clk_period;
1559 #         wait until rising_edge(clk);
1560 #
1561 #         assert i_in.valid = '1' severity failure;
1562 #         assert i_in.insn = x"00000001"
1563 #           report "insn @" & to_hstring(i_out.nia) &
1564 #           "=" & to_hstring(i_in.insn) &
1565 #           " expected 00000001"
1566 #           severity failure;
1567 #
1568 #         i_out.req <= '0';
1569 #
1570 #         wait until rising_edge(clk);
1571 #
1572 #         -- hit
1573 #         i_out.req <= '1';
1574 #         i_out.nia <= x"0000000000000008";
1575 #         wait until rising_edge(clk);
1576 #         wait until rising_edge(clk);
1577 #         assert i_in.valid = '1' severity failure;
1578 #         assert i_in.insn = x"00000002"
1579 #           report "insn @" & to_hstring(i_out.nia) &
1580 #           "=" & to_hstring(i_in.insn) &
1581 #           " expected 00000002"
1582 #           severity failure;
1583 #         wait until rising_edge(clk);
1584 #
1585 #         -- another miss
1586 #         i_out.req <= '1';
1587 #         i_out.nia <= x"0000000000000040";
1588 #
1589 #         wait for 30*clk_period;
1590 #         wait until rising_edge(clk);
1591 #
1592 #         assert i_in.valid = '1' severity failure;
1593 #         assert i_in.insn = x"00000010"
1594 #           report "insn @" & to_hstring(i_out.nia) &
1595 #           "=" & to_hstring(i_in.insn) &
1596 #           " expected 00000010"
1597 #           severity failure;
1598 #
1599 #         -- test something that aliases
1600 #         i_out.req <= '1';
1601 #         i_out.nia <= x"0000000000000100";
1602 #         wait until rising_edge(clk);
1603 #         wait until rising_edge(clk);
1604 #         assert i_in.valid = '0' severity failure;
1605 #         wait until rising_edge(clk);
1606 #
1607 #         wait for 30*clk_period;
1608 #         wait until rising_edge(clk);
1609 #
1610 #         assert i_in.valid = '1' severity failure;
1611 #         assert i_in.insn = x"00000040"
1612 #           report "insn @" & to_hstring(i_out.nia) &
1613 #           "=" & to_hstring(i_in.insn) &
1614 #           " expected 00000040"
1615 #           severity failure;
1616 #
1617 #         i_out.req <= '0';
1618 #
1619 #         std.env.finish;
1620 #     end process;
1621 # end;
1622 def icache_sim(dut):
1623     i_out, i_in, m_out, m_in = dut.i_out, dut.i_in, dut.m_out, dut.m_in
1624
1625     yield i_out.req.eq(0)
1626     yield i_out.nia.eq(~1)
1627     yield i_out.stop_mark.eq(0)
1628     yield m_out.tlbld.eq(0)
1629     yield m_out.tlbie.eq(0)
1630     yield m_out.addr.eq(~1)
1631     yield m_out.pte.eq(~1)
1632     yield
1633     yield
1634     yield
1635     yield
1636     yield i_out.req.eq(1)
1637     yield i_out.nia.eq(Const(0x0000000000000004, 64))
1638     for i in range(30):
1639         yield
1640     yield
1641     assert i_in.valid
1642     assert i_in.insn == Const(0x00000001, 32), \
1643         ("insn @%x=%x expected 00000001" % i_out.nia, i_in.insn)
1644     yield i_out.req.eq(0)
1645     yield
1646
1647     # hit
1648     yield i_out.req.eq(1)
1649     yield i_out.nia.eq(Const(0x0000000000000008, 64))
1650     yield
1651     yield
1652     assert i_in.valid
1653     assert i_in.insn == Const(0x00000002, 32), \
1654         ("insn @%x=%x expected 00000002" % i_out.nia, i_in.insn)
1655     yield
1656
1657     # another miss
1658     yield i_out.req(1)
1659     yield i_out.nia.eq(Const(0x0000000000000040, 64))
1660     for i in range(30):
1661         yield
1662     yield
1663     assert i_in.valid
1664     assert i_in.insn == Const(0x00000010, 32), \
1665         ("insn @%x=%x expected 00000010" % i_out.nia, i_in.insn)
1666
1667     # test something that aliases
1668     yield i_out.req.eq(1)
1669     yield i_out.nia.eq(Const(0x0000000000000100, 64))
1670     yield
1671     yield
1672     assert i_in.valid
1673     for i in range(30):
1674         yield
1675     yield
1676     assert i_in.valid
1677     assert i_in.insn == Const(0x00000040, 32), \
1678          ("insn @%x=%x expected 00000040" % i_out.nia, i_in.insn)
1679     yield i_out.req.eq(0)
1680
1681
1682 def test_icache():
1683     dut = ICache()
1684
1685     m = Module()
1686     m.submodules.icache = dut
1687
1688     # nmigen Simulation
1689     sim = Simulator(m)
1690     sim.add_clock(1e-6)
1691
1692     sim.add_sync_process(wrap(icache_sim(dut)))
1693     with sim.write_vcd('test_icache.vcd'):
1694         sim.run()
1695
1696 if __name__ == '__main__':
1697     dut = ICache()
1698     vl = rtlil.convert(dut, ports=[])
1699     with open("test_icache.il", "w") as f:
1700         f.write(vl)
1701
1702     test_icache()