src/soc/experiment/icache.py

   1 """ICache
   2
   3 based on Anton Blanchard microwatt icache.vhdl
   4
   5 Set associative icache
   6
   7 TODO (in no specific order):
   8 * Add debug interface to inspect cache content
   9 * Add snoop/invalidate path
  10 * Add multi-hit error detection
  11 * Pipelined bus interface (wb or axi)
  12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
  13 * Add optimization: service hits on partially loaded lines
  14 * Add optimization: (maybe) interrupt reload on fluch/redirect
  15 * Check if playing with the geometry of the cache tags allow for more
  16   efficient use of distributed RAM and less logic/muxes. Currently we
  17   write TAG_BITS width which may not match full ram blocks and might
  18   cause muxes to be inferred for "partial writes".
  19 * Check if making the read size of PLRU a ROM helps utilization
  20
  21 """
  22 from enum import Enum, unique
  23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const)
  24 from nmigen.cli import main
  25 from nmigen.cli import rtlil
  26 from nmutil.iocontrol import RecordObject
  27 from nmutil.byterev import byte_reverse
  28 from nmutil.mask import Mask
  29 from nmigen.utils import log2_int
  30 from nmutil.util import Display
  31
  32 from soc.experiment.mem_types import (Fetch1ToICacheType,
  33                                       ICacheToDecode1Type,
  34                                       MMUToICacheType)
  35
  36 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
  37                                      WB_SEL_BITS, WBAddrType, WBDataType,
  38                                      WBSelType, WBMasterOut, WBSlaveOut,
  39                                      WBMasterOutVector, WBSlaveOutVector,
  40                                      WBIOMasterOut, WBIOSlaveOut)
  41
  42 from soc.experiment.cache_ram import CacheRam
  43 from soc.experiment.plru import PLRU
  44
  45 # for test
  46 from nmigen_soc.wishbone.sram import SRAM
  47 from nmigen import Memory
  48 from nmigen.cli import rtlil
  49 if True:
  50     from nmigen.back.pysim import Simulator, Delay, Settle
  51 else:
  52     from nmigen.sim.cxxsim import Simulator, Delay, Settle
  53 from nmutil.util import wrap
  54
  55
  56
  57 SIM            = 0
  58 LINE_SIZE      = 64
  59 # BRAM organisation: We never access more than wishbone_data_bits
  60 # at a time so to save resources we make the array only that wide,
  61 # and use consecutive indices for to make a cache "line"
  62 #
  63 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
  64 ROW_SIZE       = WB_DATA_BITS // 8
  65 # Number of lines in a set
  66 NUM_LINES      = 32
  67 # Number of ways
  68 NUM_WAYS       = 4
  69 # L1 ITLB number of entries (direct mapped)
  70 TLB_SIZE       = 64
  71 # L1 ITLB log_2(page_size)
  72 TLB_LG_PGSZ    = 12
  73 # Number of real address bits that we store
  74 REAL_ADDR_BITS = 56
  75 # Non-zero to enable log data collection
  76 LOG_LENGTH     = 0
  77
  78 ROW_SIZE_BITS  = ROW_SIZE * 8
  79 # ROW_PER_LINE is the number of row
  80 # (wishbone) transactions in a line
  81 ROW_PER_LINE   = LINE_SIZE // ROW_SIZE
  82 # BRAM_ROWS is the number of rows in
  83 # BRAM needed to represent the full icache
  84 BRAM_ROWS      = NUM_LINES * ROW_PER_LINE
  85 # INSN_PER_ROW is the number of 32bit
  86 # instructions per BRAM row
  87 INSN_PER_ROW   = ROW_SIZE_BITS // 32
  88
  89 # Bit fields counts in the address
  90 #
  91 # INSN_BITS is the number of bits to
  92 # select an instruction in a row
  93 INSN_BITS      = log2_int(INSN_PER_ROW)
  94 # ROW_BITS is the number of bits to
  95 # select a row
  96 ROW_BITS       = log2_int(BRAM_ROWS)
  97 # ROW_LINEBITS is the number of bits to
  98 # select a row within a line
  99 ROW_LINE_BITS   = log2_int(ROW_PER_LINE)
 100 # LINE_OFF_BITS is the number of bits for
 101 # the offset in a cache line
 102 LINE_OFF_BITS  = log2_int(LINE_SIZE)
 103 # ROW_OFF_BITS is the number of bits for
 104 # the offset in a row
 105 ROW_OFF_BITS   = log2_int(ROW_SIZE)
 106 # INDEX_BITS is the number of bits to
 107 # select a cache line
 108 INDEX_BITS     = log2_int(NUM_LINES)
 109 # SET_SIZE_BITS is the log base 2 of
 110 # the set size
 111 SET_SIZE_BITS  = LINE_OFF_BITS + INDEX_BITS
 112 # TAG_BITS is the number of bits of
 113 # the tag part of the address
 114 TAG_BITS       = REAL_ADDR_BITS - SET_SIZE_BITS
 115 # WAY_BITS is the number of bits to
 116 # select a way
 117 WAY_BITS       = log2_int(NUM_WAYS)
 118 TAG_RAM_WIDTH  = TAG_BITS * NUM_WAYS
 119
 120 #     -- L1 ITLB.
 121 #     constant TLB_BITS : natural := log2(TLB_SIZE);
 122 #     constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
 123 #     constant TLB_PTE_BITS : natural := 64;
 124 TLB_BITS        = log2_int(TLB_SIZE)
 125 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
 126 TLB_PTE_BITS    = 64
 127
 128 # architecture rtl of icache is
 129 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
 130 #-- ROW_PER_LINE is the number of row (wishbone
 131 #-- transactions) in a line
 132 #constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 133 #-- BRAM_ROWS is the number of rows in BRAM
 134 #-- needed to represent the full
 135 #-- icache
 136 #constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
 137 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
 138 #constant INSN_PER_ROW  : natural := ROW_SIZE_BITS / 32;
 139 #-- Bit fields counts in the address
 140 #
 141 #-- INSN_BITS is the number of bits to select
 142 #-- an instruction in a row
 143 #constant INSN_BITS     : natural := log2(INSN_PER_ROW);
 144 #-- ROW_BITS is the number of bits to select a row
 145 #constant ROW_BITS      : natural := log2(BRAM_ROWS);
 146 #-- ROW_LINEBITS is the number of bits to
 147 #-- select a row within a line
 148 #constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 149 #-- LINE_OFF_BITS is the number of bits for the offset
 150 #-- in a cache line
 151 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 152 #-- ROW_OFF_BITS is the number of bits for the offset in a row
 153 #constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
 154 #-- INDEX_BITS is the number of bits to select a cache line
 155 #constant INDEX_BITS    : natural := log2(NUM_LINES);
 156 #-- SET_SIZE_BITS is the log base 2 of the set size
 157 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
 158 #-- TAG_BITS is the number of bits of the tag part of the address
 159 #constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 160 #-- WAY_BITS is the number of bits to select a way
 161 #constant WAY_BITS     : natural := log2(NUM_WAYS);
 162
 163 #-- Example of layout for 32 lines of 64 bytes:
 164 #--
 165 #-- ..  tag    |index|  line  |
 166 #-- ..         |   row   |    |
 167 #-- ..         |     |   | |00| zero          (2)
 168 #-- ..         |     |   |-|  | INSN_BITS     (1)
 169 #-- ..         |     |---|    | ROW_LINEBITS  (3)
 170 #-- ..         |     |--- - --| LINE_OFF_BITS (6)
 171 #-- ..         |         |- --| ROW_OFF_BITS  (3)
 172 #-- ..         |----- ---|    | ROW_BITS      (8)
 173 #-- ..         |-----|        | INDEX_BITS    (5)
 174 #-- .. --------|              | TAG_BITS      (53)
 175    # Example of layout for 32 lines of 64 bytes:
 176    #
 177    # ..  tag    |index|  line  |
 178    # ..         |   row   |    |
 179    # ..         |     |   | |00| zero          (2)
 180    # ..         |     |   |-|  | INSN_BITS     (1)
 181    # ..         |     |---|    | ROW_LINEBITS  (3)
 182    # ..         |     |--- - --| LINE_OFF_BITS (6)
 183    # ..         |         |- --| ROW_OFF_BITS  (3)
 184    # ..         |----- ---|    | ROW_BITS      (8)
 185    # ..         |-----|        | INDEX_BITS    (5)
 186    # .. --------|              | TAG_BITS      (53)
 187
 188 #subtype row_t is integer range 0 to BRAM_ROWS-1;
 189 #subtype index_t is integer range 0 to NUM_LINES-1;
 190 #subtype way_t is integer range 0 to NUM_WAYS-1;
 191 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
 192 #
 193 #-- The cache data BRAM organized as described above for each way
 194 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
 195 #
 196 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
 197 #-- not handle a clean (commented) definition of the cache tags as a 3d
 198 #-- memory. For now, work around it by putting all the tags
 199 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 200 #  type cache_tags_set_t is array(way_t) of cache_tag_t;
 201 #  type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 202 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
 203 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 204 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 205 def CacheTagArray():
 206     return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
 207
 208 #-- The cache valid bits
 209 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
 210 #type cache_valids_t is array(index_t) of cache_way_valids_t;
 211 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
 212 def CacheValidBitsArray():
 213     return Array(Signal(NUM_WAYS) for x in range(NUM_LINES))
 214
 215 def RowPerLineValidArray():
 216     return Array(Signal() for x in range(ROW_PER_LINE))
 217
 218
 219 #attribute ram_style : string;
 220 #attribute ram_style of cache_tags : signal is "distributed";
 221    # TODO to be passed to nigmen as ram attributes
 222    # attribute ram_style : string;
 223    # attribute ram_style of cache_tags : signal is "distributed";
 224
 225
 226 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
 227 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
 228 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 229 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
 230 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 231 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
 232 def TLBValidBitsArray():
 233     return Array(Signal() for x in range(TLB_SIZE))
 234
 235 def TLBTagArray():
 236     return Array(Signal(TLB_EA_TAG_BITS) for x in range(TLB_SIZE))
 237
 238 def TLBPTEArray():
 239     return Array(Signal(TLB_PTE_BITS) for x in range(TLB_SIZE))
 240
 241
 242 #-- Cache RAM interface
 243 #type cache_ram_out_t is array(way_t) of cache_row_t;
 244 # Cache RAM interface
 245 def CacheRamOut():
 246     return Array(Signal(ROW_SIZE_BITS) for x in range(NUM_WAYS))
 247
 248 #-- PLRU output interface
 249 #type plru_out_t is array(index_t) of
 250 # std_ulogic_vector(WAY_BITS-1 downto 0);
 251 # PLRU output interface
 252 def PLRUOut():
 253     return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
 254
 255 #     -- Return the cache line index (tag index) for an address
 256 #     function get_index(addr: std_ulogic_vector(63 downto 0))
 257 #      return index_t is
 258 #     begin
 259 #         return to_integer(unsigned(
 260 #          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
 261 #         ));
 262 #     end;
 263 # Return the cache line index (tag index) for an address
 264 def get_index(addr):
 265     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 266
 267 #     -- Return the cache row index (data memory) for an address
 268 #     function get_row(addr: std_ulogic_vector(63 downto 0))
 269 #       return row_t is
 270 #     begin
 271 #         return to_integer(unsigned(
 272 #          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
 273 #         ));
 274 #     end;
 275 # Return the cache row index (data memory) for an address
 276 def get_row(addr):
 277     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 278
 279 #     -- Return the index of a row within a line
 280 #     function get_row_of_line(row: row_t) return row_in_line_t is
 281 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 282 #     begin
 283 #       row_v := to_unsigned(row, ROW_BITS);
 284 #         return row_v(ROW_LINEBITS-1 downto 0);
 285 #     end;
 286 # Return the index of a row within a line
 287 def get_row_of_line(row):
 288     row[:ROW_LINE_BITS]
 289
 290 #     -- Returns whether this is the last row of a line
 291 #     function is_last_row_addr(addr: wishbone_addr_type;
 292 #      last: row_in_line_t
 293 #     )
 294 #      return boolean is
 295 #     begin
 296 #       return unsigned(
 297 #        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
 298 #       ) = last;
 299 #     end;
 300 # Returns whether this is the last row of a line
 301 def is_last_row_addr(addr, last):
 302     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 303
 304 #     -- Returns whether this is the last row of a line
 305 #     function is_last_row(row: row_t;
 306 #      last: row_in_line_t) return boolean is
 307 #     begin
 308 #       return get_row_of_line(row) = last;
 309 #     end;
 310 # Returns whether this is the last row of a line
 311 def is_last_row(row, last):
 312     return get_row_of_line(row) == last
 313
 314 #     -- Return the address of the next row in the current cache line
 315 #     function next_row_addr(addr: wishbone_addr_type)
 316 #       return std_ulogic_vector is
 317 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 318 #       variable result  : wishbone_addr_type;
 319 #     begin
 320 #       -- Is there no simpler way in VHDL to generate that 3 bits adder ?
 321 #       row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
 322 #       row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
 323 #       result := addr;
 324 #       result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
 325 #       return result;
 326 #     end;
 327 # Return the address of the next row in the current cache line
 328 def next_row_addr(addr):
 329     # TODO no idea what's going on here, looks like double assignments
 330     # overriding earlier assignments ??? Help please!
 331     pass
 332
 333 #     -- Return the next row in the current cache line. We use a dedicated
 334 #     -- function in order to limit the size of the generated adder to be
 335 #     -- only the bits within a cache line (3 bits with default settings)
 336 #     function next_row(row: row_t) return row_t is
 337 #       variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
 338 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 339 #       variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
 340 #     begin
 341 #       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 342 #       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 343 #       row_v(ROW_LINEBITS-1 downto 0) :=
 344 #        std_ulogic_vector(unsigned(row_idx) + 1);
 345 #       return to_integer(unsigned(row_v));
 346 #     end;
 347 # Return the next row in the current cache line. We use a dedicated
 348 # function in order to limit the size of the generated adder to be
 349 # only the bits within a cache line (3 bits with default settings)
 350 def next_row(row):
 351     # TODO no idea what's going on here, looks like double assignments
 352     # overriding earlier assignments ??? Help please!
 353     pass
 354
 355 #     -- Read the instruction word for the given address in the
 356 #     -- current cache row
 357 #     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
 358 #                           data: cache_row_t) return std_ulogic_vector is
 359 #       variable word: integer range 0 to INSN_PER_ROW-1;
 360 #     begin
 361 #         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
 362 #       return data(31+word*32 downto word*32);
 363 #     end;
 364 # Read the instruction word for the given address
 365 # in the current cache row
 366 def read_insn_word(addr, data):
 367     word = addr[2:INSN_BITS+3]
 368     return data.word_select(word, 32)
 369
 370 #     -- Get the tag value from the address
 371 #     function get_tag(
 372 #      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
 373 #     )
 374 #      return cache_tag_t is
 375 #     begin
 376 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 377 #     end;
 378 # Get the tag value from the address
 379 def get_tag(addr):
 380     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 381
 382 #     -- Read a tag from a tag memory row
 383 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 384 #      return cache_tag_t is
 385 #     begin
 386 #       return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 387 #     end;
 388 # Read a tag from a tag memory row
 389 def read_tag(way, tagset):
 390     return tagset[way * TAG_BITS:(way + 1) * TAG_BITS]
 391
 392 #     -- Write a tag to tag memory row
 393 #     procedure write_tag(way: in way_t;
 394 #      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
 395 #     begin
 396 #       tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 397 #     end;
 398 # Write a tag to tag memory row
 399 def write_tag(way, tagset, tag):
 400     tagset[way * TAG_BITS:(way + 1) * TAG_BITS] = tag
 401
 402 #     -- Simple hash for direct-mapped TLB index
 403 #     function hash_ea(addr: std_ulogic_vector(63 downto 0))
 404 #      return tlb_index_t is
 405 #         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
 406 #     begin
 407 #         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
 408 #                 xor addr(
 409 #                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
 410 #                  TLB_LG_PGSZ + TLB_BITS
 411 #                 )
 412 #                 xor addr(
 413 #                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
 414 #                  TLB_LG_PGSZ + 2 * TLB_BITS
 415 #                 );
 416 #         return to_integer(unsigned(hash));
 417 #     end;
 418 # Simple hash for direct-mapped TLB index
 419 def hash_ea(addr):
 420     hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
 421            TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
 422           ] ^ addr[
 423            TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
 424           ]
 425     return hsh
 426
 427 # begin
 428 #
 429 #     assert LINE_SIZE mod ROW_SIZE = 0;
 430 #     assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
 431 #      severity FAILURE;
 432 #     assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
 433 #      severity FAILURE;
 434 #     assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
 435 #      severity FAILURE;
 436 #     assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
 437 #      severity FAILURE;
 438 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
 439 #       report "geometry bits don't add up" severity FAILURE;
 440 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
 441 #       report "geometry bits don't add up" severity FAILURE;
 442 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
 443 #       report "geometry bits don't add up" severity FAILURE;
 444 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
 445 #       report "geometry bits don't add up" severity FAILURE;
 446 #
 447 #     sim_debug: if SIM generate
 448 #     debug: process
 449 #     begin
 450 #       report "ROW_SIZE      = " & natural'image(ROW_SIZE);
 451 #       report "ROW_PER_LINE  = " & natural'image(ROW_PER_LINE);
 452 #       report "BRAM_ROWS     = " & natural'image(BRAM_ROWS);
 453 #       report "INSN_PER_ROW  = " & natural'image(INSN_PER_ROW);
 454 #       report "INSN_BITS     = " & natural'image(INSN_BITS);
 455 #       report "ROW_BITS      = " & natural'image(ROW_BITS);
 456 #       report "ROW_LINEBITS  = " & natural'image(ROW_LINEBITS);
 457 #       report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
 458 #       report "ROW_OFF_BITS  = " & natural'image(ROW_OFF_BITS);
 459 #       report "INDEX_BITS    = " & natural'image(INDEX_BITS);
 460 #       report "TAG_BITS      = " & natural'image(TAG_BITS);
 461 #       report "WAY_BITS      = " & natural'image(WAY_BITS);
 462 #       wait;
 463 #     end process;
 464 #     end generate;
 465
 466 # Cache reload state machine
 467 @unique
 468 class State(Enum):
 469     IDLE     = 0
 470     CLR_TAG  = 1
 471     WAIT_ACK = 2
 472
 473 #     type reg_internal_t is record
 474 #       -- Cache hit state (Latches for 1 cycle BRAM access)
 475 #       hit_way   : way_t;
 476 #       hit_nia   : std_ulogic_vector(63 downto 0);
 477 #       hit_smark : std_ulogic;
 478 #       hit_valid : std_ulogic;
 479 #
 480 #       -- Cache miss state (reload state machine)
 481 #         state            : state_t;
 482 #         wb               : wishbone_master_out;
 483 #       store_way        : way_t;
 484 #         store_index      : index_t;
 485 #       store_row        : row_t;
 486 #         store_tag        : cache_tag_t;
 487 #         store_valid      : std_ulogic;
 488 #         end_row_ix       : row_in_line_t;
 489 #         rows_valid       : row_per_line_valid_t;
 490 #
 491 #         -- TLB miss state
 492 #         fetch_failed     : std_ulogic;
 493 #     end record;
 494 class RegInternal(RecordObject):
 495     def __init__(self):
 496         super().__init__()
 497         # Cache hit state (Latches for 1 cycle BRAM access)
 498         self.hit_way      = Signal(NUM_WAYS)
 499         self.hit_nia      = Signal(64)
 500         self.hit_smark    = Signal()
 501         self.hit_valid    = Signal()
 502
 503         # Cache miss state (reload state machine)
 504         self.state        = Signal(State)
 505         self.wb           = WBMasterOut()
 506         self.store_way    = Signal(NUM_WAYS)
 507         self.store_index  = Signal(NUM_LINES)
 508         self.store_row    = Signal(BRAM_ROWS)
 509         self.store_tag    = Signal(TAG_BITS)
 510         self.store_valid  = Signal()
 511         self.end_row_ix   = Signal(ROW_LINE_BITS)
 512         self.rows_valid   = RowPerLineValidArray()
 513
 514         # TLB miss state
 515         self.fetch_failed = Signal()
 516
 517 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
 518 #
 519 # entity icache is
 520 #     generic (
 521 #         SIM : boolean := false;
 522 #         -- Line size in bytes
 523 #         LINE_SIZE : positive := 64;
 524 #         -- BRAM organisation: We never access more
 525 #         -- than wishbone_data_bits
 526 #         -- at a time so to save resources we make the
 527 #         -- array only that wide,
 528 #         -- and use consecutive indices for to make a cache "line"
 529 #         --
 530 #         -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
 531 #         -- so 64-bits)
 532 #         ROW_SIZE  : positive := wishbone_data_bits / 8;
 533 #         -- Number of lines in a set
 534 #         NUM_LINES : positive := 32;
 535 #         -- Number of ways
 536 #         NUM_WAYS  : positive := 4;
 537 #         -- L1 ITLB number of entries (direct mapped)
 538 #         TLB_SIZE : positive := 64;
 539 #         -- L1 ITLB log_2(page_size)
 540 #         TLB_LG_PGSZ : positive := 12;
 541 #         -- Number of real address bits that we store
 542 #         REAL_ADDR_BITS : positive := 56;
 543 #         -- Non-zero to enable log data collection
 544 #         LOG_LENGTH : natural := 0
 545 #         );
 546 #     port (
 547 #         clk          : in std_ulogic;
 548 #         rst          : in std_ulogic;
 549 #
 550 #         i_in         : in Fetch1ToIcacheType;
 551 #         i_out        : out IcacheToDecode1Type;
 552 #
 553 #         m_in         : in MmuToIcacheType;
 554 #
 555 #         stall_in     : in std_ulogic;
 556 #       stall_out    : out std_ulogic;
 557 #       flush_in     : in std_ulogic;
 558 #       inval_in     : in std_ulogic;
 559 #
 560 #         wishbone_out : out wishbone_master_out;
 561 #         wishbone_in  : in wishbone_slave_out;
 562 #
 563 #         log_out      : out std_ulogic_vector(53 downto 0)
 564 #         );
 565 # end entity icache;
 566 # 64 bit direct mapped icache. All instructions are 4B aligned.
 567 class ICache(Elaboratable):
 568     """64 bit direct mapped icache. All instructions are 4B aligned."""
 569     def __init__(self):
 570         self.i_in           = Fetch1ToICacheType()
 571         self.i_out          = ICacheToDecode1Type()
 572
 573         self.m_in           = MMUToICacheType()
 574
 575         self.stall_in       = Signal()
 576         self.stall_out      = Signal()
 577         self.flush_in       = Signal()
 578         self.inval_in       = Signal()
 579
 580         self.wb_out         = WBMasterOut()
 581         self.wb_in          = WBSlaveOut()
 582
 583         self.log_out        = Signal(54)
 584
 585
 586 #     -- Generate a cache RAM for each way
 587 #     rams: for i in 0 to NUM_WAYS-1 generate
 588 #       signal do_read  : std_ulogic;
 589 #       signal do_write : std_ulogic;
 590 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 591 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 592 #       signal dout     : cache_row_t;
 593 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 594 #     begin
 595 #       way: entity work.cache_ram
 596 #           generic map (
 597 #               ROW_BITS => ROW_BITS,
 598 #               WIDTH => ROW_SIZE_BITS
 599 #               )
 600 #           port map (
 601 #               clk     => clk,
 602 #               rd_en   => do_read,
 603 #               rd_addr => rd_addr,
 604 #               rd_data => dout,
 605 #               wr_sel  => wr_sel,
 606 #               wr_addr => wr_addr,
 607 #               wr_data => wishbone_in.dat
 608 #               );
 609 #       process(all)
 610 #       begin
 611 #           do_read <= not (stall_in or use_previous);
 612 #           do_write <= '0';
 613 #           if wishbone_in.ack = '1' and replace_way = i then
 614 #               do_write <= '1';
 615 #           end if;
 616 #           cache_out(i) <= dout;
 617 #           rd_addr <=
 618 #            std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 619 #           wr_addr <=
 620 #            std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
 621 #             for i in 0 to ROW_SIZE-1 loop
 622 #                 wr_sel(i) <= do_write;
 623 #             end loop;
 624 #       end process;
 625 #     end generate;
 626     def rams(self, m, r, cache_out, use_previous, replace_way, req_row):
 627         comb = m.d.comb
 628
 629         wb_in, stall_in = self.wb_in, self.stall_in
 630
 631         do_read  = Signal()
 632         do_write = Signal()
 633         rd_addr  = Signal(ROW_BITS)
 634         wr_addr  = Signal(ROW_BITS)
 635         _d_out   = Signal(ROW_SIZE_BITS)
 636         wr_sel   = Signal(ROW_SIZE)
 637
 638         for i in range(NUM_WAYS):
 639             way = CacheRam(ROW_BITS, ROW_SIZE_BITS)
 640             comb += way.rd_en.eq(do_read)
 641             comb += way.rd_addr.eq(rd_addr)
 642             comb += way.rd_data_o.eq(_d_out)
 643             comb += way.wr_sel.eq(wr_sel)
 644             comb += way.wr_addr.eq(wr_addr)
 645             comb += way.wr_data.eq(wb_in.dat)
 646
 647             comb += do_read.eq(~(stall_in | use_previous))
 648             comb += do_write.eq(0)
 649
 650             with m.If(wb_in.ack & (replace_way == i)):
 651                 comb += do_write.eq(1)
 652
 653             comb += cache_out[i].eq(_d_out)
 654             comb += rd_addr.eq(req_row)
 655             comb += wr_addr.eq(r.store_row)
 656             for j in range(ROW_SIZE):
 657                 comb += wr_sel[j].eq(do_write)
 658
 659 #     -- Generate PLRUs
 660 #     maybe_plrus: if NUM_WAYS > 1 generate
 661 #     begin
 662 #       plrus: for i in 0 to NUM_LINES-1 generate
 663 #           -- PLRU interface
 664 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 665 #           signal plru_acc_en : std_ulogic;
 666 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 667 #
 668 #       begin
 669 #           plru : entity work.plru
 670 #               generic map (
 671 #                   BITS => WAY_BITS
 672 #                   )
 673 #               port map (
 674 #                   clk => clk,
 675 #                   rst => rst,
 676 #                   acc => plru_acc,
 677 #                   acc_en => plru_acc_en,
 678 #                   lru => plru_out
 679 #                   );
 680 #
 681 #           process(all)
 682 #           begin
 683 #               -- PLRU interface
 684 #               if get_index(r.hit_nia) = i then
 685 #                   plru_acc_en <= r.hit_valid;
 686 #               else
 687 #                   plru_acc_en <= '0';
 688 #               end if;
 689 #               plru_acc <=
 690 #                std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
 691 #               plru_victim(i) <= plru_out;
 692 #           end process;
 693 #       end generate;
 694 #     end generate;
 695     def maybe_plrus(self, m, r, plru_victim):
 696         comb = m.d.comb
 697
 698         with m.If(NUM_WAYS > 1):
 699             for i in range(NUM_LINES):
 700                 plru_acc    = Signal(WAY_BITS)
 701                 plru_acc_en = Signal()
 702                 plru_out    = Signal(WAY_BITS)
 703                 plru        = PLRU(WAY_BITS)
 704                 comb += plru.acc.eq(plru_acc)
 705                 comb += plru.acc_en.eq(plru_acc_en)
 706                 comb += plru.lru_o.eq(plru_out)
 707
 708                 # PLRU interface
 709                 with m.If(get_index(r.hit_nia) == i):
 710                     comb += plru.acc_en.eq(r.hit_valid)
 711
 712                 with m.Else():
 713                     comb += plru.acc_en.eq(0)
 714
 715                 comb += plru.acc.eq(r.hit_way)
 716                 comb += plru_victim[i].eq(plru.lru_o)
 717
 718 #     -- TLB hit detection and real address generation
 719 #     itlb_lookup : process(all)
 720 #         variable pte : tlb_pte_t;
 721 #         variable ttag : tlb_tag_t;
 722 #     begin
 723 #         tlb_req_index <= hash_ea(i_in.nia);
 724 #         pte := itlb_ptes(tlb_req_index);
 725 #         ttag := itlb_tags(tlb_req_index);
 726 #         if i_in.virt_mode = '1' then
 727 #             real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
 728 #                          i_in.nia(TLB_LG_PGSZ - 1 downto 0);
 729 #             if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
 730 #                 ra_valid <= itlb_valids(tlb_req_index);
 731 #             else
 732 #                 ra_valid <= '0';
 733 #             end if;
 734 #             eaa_priv <= pte(3);
 735 #         else
 736 #             real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
 737 #             ra_valid <= '1';
 738 #             eaa_priv <= '1';
 739 #         end if;
 740 #
 741 #         -- no IAMR, so no KUEP support for now
 742 #         priv_fault <= eaa_priv and not i_in.priv_mode;
 743 #         access_ok <= ra_valid and not priv_fault;
 744 #     end process;
 745     # TLB hit detection and real address generation
 746     def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
 747                     real_addr, itlb_valid_bits, ra_valid, eaa_priv,
 748                     priv_fault, access_ok):
 749         comb = m.d.comb
 750
 751         i_in = self.i_in
 752
 753         pte  = Signal(TLB_PTE_BITS)
 754         ttag = Signal(TLB_EA_TAG_BITS)
 755
 756         comb += tlb_req_index.eq(hash_ea(i_in.nia))
 757         comb += pte.eq(itlb_ptes[tlb_req_index])
 758         comb += ttag.eq(itlb_tags[tlb_req_index])
 759
 760         with m.If(i_in.virt_mode):
 761             comb += real_addr.eq(Cat(
 762                      i_in.nia[:TLB_LG_PGSZ],
 763                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 764                     ))
 765
 766             with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
 767                 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
 768
 769             with m.Else():
 770                 comb += ra_valid.eq(0)
 771
 772         with m.Else():
 773             comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
 774             comb += ra_valid.eq(1)
 775             comb += eaa_priv.eq(1)
 776
 777         # No IAMR, so no KUEP support for now
 778         comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
 779         comb += access_ok.eq(ra_valid & ~priv_fault)
 780
 781 #     -- iTLB update
 782 #     itlb_update: process(clk)
 783 #         variable wr_index : tlb_index_t;
 784 #     begin
 785 #         if rising_edge(clk) then
 786 #             wr_index := hash_ea(m_in.addr);
 787 #             if rst = '1' or
 788 #              (m_in.tlbie = '1' and m_in.doall = '1') then
 789 #                 -- clear all valid bits
 790 #                 for i in tlb_index_t loop
 791 #                     itlb_valids(i) <= '0';
 792 #                 end loop;
 793 #             elsif m_in.tlbie = '1' then
 794 #                 -- clear entry regardless of hit or miss
 795 #                 itlb_valids(wr_index) <= '0';
 796 #             elsif m_in.tlbld = '1' then
 797 #                 itlb_tags(wr_index) <=
 798 #                  m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
 799 #                 itlb_ptes(wr_index) <= m_in.pte;
 800 #                 itlb_valids(wr_index) <= '1';
 801 #             end if;
 802 #         end if;
 803 #     end process;
 804     # iTLB update
 805     def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
 806         comb = m.d.comb
 807         sync = m.d.sync
 808
 809         m_in = self.m_in
 810
 811         wr_index = Signal(TLB_SIZE)
 812         comb += wr_index.eq(hash_ea(m_in.addr))
 813
 814         with m.If(m_in.tlbie & m_in.doall):
 815             # Clear all valid bits
 816             for i in range(TLB_SIZE):
 817                 sync += itlb_valid_bits[i].eq(0)
 818
 819         with m.Elif(m_in.tlbie):
 820             # Clear entry regardless of hit or miss
 821             sync += itlb_valid_bits[wr_index].eq(0)
 822
 823         with m.Elif(m_in.tlbld):
 824             sync += itlb_tags[wr_index].eq(
 825                      m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
 826                     )
 827             sync += itlb_ptes[wr_index].eq(m_in.pte)
 828             sync += itlb_valid_bits[wr_index].eq(1)
 829
 830 #     -- Cache hit detection, output to fetch2 and other misc logic
 831 #     icache_comb : process(all)
 832     # Cache hit detection, output to fetch2 and other misc logic
 833     def icache_comb(self, m, use_previous, r, req_index, req_row,
 834                     req_tag, real_addr, req_laddr, cache_valid_bits,
 835                     cache_tags, access_ok, req_is_hit,
 836                     req_is_miss, replace_way, plru_victim, cache_out):
 837 #       variable is_hit  : std_ulogic;
 838 #       variable hit_way : way_t;
 839         comb = m.d.comb
 840
 841         i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
 842         flush_in, stall_out = self.flush_in, self.stall_out
 843
 844         is_hit  = Signal()
 845         hit_way = Signal(NUM_WAYS)
 846 #     begin
 847 #         -- i_in.sequential means that i_in.nia this cycle
 848 #         -- is 4 more than last cycle.  If we read more
 849 #         -- than 32 bits at a time, had a cache hit last
 850 #         -- cycle, and we don't want the first 32-bit chunk
 851 #         -- then we can keep the data we read last cycle
 852 #         -- and just use that.
 853 #         if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
 854 #             use_previous <= i_in.sequential and r.hit_valid;
 855 #         else
 856 #             use_previous <= '0';
 857 #         end if;
 858         # i_in.sequential means that i_in.nia this cycle is 4 more than
 859         # last cycle.  If we read more than 32 bits at a time, had a
 860         # cache hit last cycle, and we don't want the first 32-bit chunk
 861         # then we can keep the data we read last cycle and just use that.
 862         with m.If(i_in.nia[2:INSN_BITS+2] != 0):
 863             comb += use_previous.eq(i_in.sequential & r.hit_valid)
 864
 865         with m.Else():
 866             comb += use_previous.eq(0)
 867
 868 #       -- Extract line, row and tag from request
 869 #         req_index <= get_index(i_in.nia);
 870 #         req_row <= get_row(i_in.nia);
 871 #         req_tag <= get_tag(real_addr);
 872         # Extract line, row and tag from request
 873         comb += req_index.eq(get_index(i_in.nia))
 874         comb += req_row.eq(get_row(i_in.nia))
 875         comb += req_tag.eq(get_tag(real_addr))
 876
 877 #       -- Calculate address of beginning of cache row, will be
 878 #       -- used for cache miss processing if needed
 879 #       req_laddr <=
 880 #        (63 downto REAL_ADDR_BITS => '0') &
 881 #        real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
 882 #        (ROW_OFF_BITS-1 downto 0 => '0');
 883         # Calculate address of beginning of cache row, will be
 884         # used for cache miss processing if needed
 885         comb += req_laddr.eq(Cat(
 886                  Const(0b0, ROW_OFF_BITS),
 887                  real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
 888                  Const(0, REAL_ADDR_BITS)
 889                 ))
 890
 891 #       -- Test if pending request is a hit on any way
 892 #       hit_way := 0;
 893 #       is_hit := '0';
 894 #       for i in way_t loop
 895 #           if i_in.req = '1' and
 896 #                 (cache_valids(req_index)(i) = '1' or
 897 #                  (r.state = WAIT_ACK and
 898 #                   req_index = r.store_index and
 899 #                   i = r.store_way and
 900 #                   r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
 901 #               if read_tag(i, cache_tags(req_index)) = req_tag then
 902 #                   hit_way := i;
 903 #                   is_hit := '1';
 904 #               end if;
 905 #           end if;
 906 #       end loop;
 907         # Test if pending request is a hit on any way
 908         for i in range(NUM_WAYS):
 909             with m.If(i_in.req &
 910                       (cache_valid_bits[req_index][i] |
 911                        ((r.state == State.WAIT_ACK)
 912                         & (req_index == r.store_index)
 913                         & (i == r.store_way)
 914                         & r.rows_valid[req_row % ROW_PER_LINE]))):
 915                 with m.If(read_tag(i, cache_tags[req_index]) == req_tag):
 916                     comb += hit_way.eq(i)
 917                     comb += is_hit.eq(1)
 918
 919 #       -- Generate the "hit" and "miss" signals
 920 #       -- for the synchronous blocks
 921 #       if i_in.req = '1' and access_ok = '1' and flush_in = '0'
 922 #        and rst = '0' then
 923 #           req_is_hit  <= is_hit;
 924 #           req_is_miss <= not is_hit;
 925 #       else
 926 #           req_is_hit  <= '0';
 927 #           req_is_miss <= '0';
 928 #       end if;
 929 #       req_hit_way <= hit_way;
 930         # Generate the "hit" and "miss" signals
 931         # for the synchronous blocks
 932         with m.If(i_in.req & access_ok & ~flush_in):
 933             comb += req_is_hit.eq(is_hit)
 934             comb += req_is_miss.eq(~is_hit)
 935
 936         with m.Else():
 937             comb += req_is_hit.eq(0)
 938             comb += req_is_miss.eq(0)
 939
 940 #       -- The way to replace on a miss
 941 #       if r.state = CLR_TAG then
 942 #           replace_way <=
 943 #            to_integer(unsigned(plru_victim(r.store_index)));
 944 #       else
 945 #           replace_way <= r.store_way;
 946 #       end if;
 947         # The way to replace on a miss
 948         with m.If(r.state == State.CLR_TAG):
 949             comb += replace_way.eq(plru_victim[r.store_index])
 950
 951         with m.Else():
 952             comb += replace_way.eq(r.store_way)
 953
 954 #       -- Output instruction from current cache row
 955 #       --
 956 #       -- Note: This is a mild violation of our design principle of
 957 #       -- having pipeline stages output from a clean latch. In this
 958 #       -- case we output the result of a mux. The alternative would
 959 #       -- be output an entire row which I prefer not to do just yet
 960 #       -- as it would force fetch2 to know about some of the cache
 961 #       -- geometry information.
 962 #       i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
 963 #       i_out.valid <= r.hit_valid;
 964 #       i_out.nia <= r.hit_nia;
 965 #       i_out.stop_mark <= r.hit_smark;
 966 #       i_out.fetch_failed <= r.fetch_failed;
 967         # Output instruction from current cache row
 968         #
 969         # Note: This is a mild violation of our design principle of
 970         # having pipeline stages output from a clean latch. In this
 971         # case we output the result of a mux. The alternative would
 972         # be output an entire row which I prefer not to do just yet
 973         # as it would force fetch2 to know about some of the cache
 974         # geometry information.
 975         comb += i_out.insn.eq(
 976                  read_insn_word(r.hit_nia, cache_out[r.hit_way])
 977                 )
 978         comb += i_out.valid.eq(r.hit_valid)
 979         comb += i_out.nia.eq(r.hit_nia)
 980         comb += i_out.stop_mark.eq(r.hit_smark)
 981         comb += i_out.fetch_failed.eq(r.fetch_failed)
 982
 983 #       -- Stall fetch1 if we have a miss on cache or TLB
 984 #       -- or a protection fault
 985 #       stall_out <= not (is_hit and access_ok);
 986         # Stall fetch1 if we have a miss on cache or TLB
 987         # or a protection fault
 988         comb += stall_out.eq(~(is_hit & access_ok))
 989
 990 #       -- Wishbone requests output (from the cache miss reload machine)
 991 #       wishbone_out <= r.wb;
 992         # Wishbone requests output (from the cache miss reload machine)
 993         comb += wb_out.eq(r.wb)
 994 #     end process;
 995
 996 #     -- Cache hit synchronous machine
 997 #     icache_hit : process(clk)
 998     # Cache hit synchronous machine
 999     def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
1000                    req_index, req_tag, real_addr):
1001         sync = m.d.sync
1002
1003         i_in, stall_in = self.i_in, self.stall_in
1004         flush_in       = self.flush_in
1005
1006 #     begin
1007 #         if rising_edge(clk) then
1008 #             -- keep outputs to fetch2 unchanged on a stall
1009 #             -- except that flush or reset sets valid to 0
1010 #             -- If use_previous, keep the same data as last
1011 #             -- cycle and use the second half
1012 #             if stall_in = '1' or use_previous = '1' then
1013 #                 if rst = '1' or flush_in = '1' then
1014 #                     r.hit_valid <= '0';
1015 #             end if;
1016         # keep outputs to fetch2 unchanged on a stall
1017         # except that flush or reset sets valid to 0
1018         # If use_previous, keep the same data as last
1019         # cycle and use the second half
1020         with m.If(stall_in | use_previous):
1021             with m.If(flush_in):
1022                 sync += r.hit_valid.eq(0)
1023 #             else
1024 #                 -- On a hit, latch the request for the next cycle,
1025 #                 -- when the BRAM data will be available on the
1026 #                 -- cache_out output of the corresponding way
1027 #                 r.hit_valid <= req_is_hit;
1028 #                 if req_is_hit = '1' then
1029 #                     r.hit_way <= req_hit_way;
1030         with m.Else():
1031             # On a hit, latch the request for the next cycle,
1032             # when the BRAM data will be available on the
1033             # cache_out output of the corresponding way
1034             sync += r.hit_valid.eq(req_is_hit)
1035
1036             with m.If(req_is_hit):
1037                 sync += r.hit_way.eq(req_hit_way)
1038
1039 #                     report "cache hit nia:" & to_hstring(i_in.nia) &
1040 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1041 #                         " SM:" & std_ulogic'image(i_in.stop_mark) &
1042 #                         " idx:" & integer'image(req_index) &
1043 #                         " tag:" & to_hstring(req_tag) &
1044 #                         " way:" & integer'image(req_hit_way) &
1045 #                         " RA:" & to_hstring(real_addr);
1046                 print(f"cache hit nia:{i_in.nia}, " \
1047                       f"IR:{i_in.virt_mode}, " \
1048                       f"SM:{i_in.stop_mark}, idx:{req_index}, " \
1049                       f"tag:{req_tag}, way:{req_hit_way}, " \
1050                       f"RA:{real_addr}")
1051 #                 end if;
1052 #           end if;
1053 #             if stall_in = '0' then
1054 #                 -- Send stop marks and NIA down regardless of validity
1055 #                 r.hit_smark <= i_in.stop_mark;
1056 #                 r.hit_nia <= i_in.nia;
1057 #             end if;
1058         with m.If(~stall_in):
1059             # Send stop marks and NIA down regardless of validity
1060             sync += r.hit_smark.eq(i_in.stop_mark)
1061             sync += r.hit_nia.eq(i_in.nia)
1062 #       end if;
1063 #     end process;
1064
1065 #     -- Cache miss/reload synchronous machine
1066 #     icache_miss : process(clk)
1067     # Cache miss/reload synchronous machine
1068     def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1069                     req_index, req_laddr, req_tag, replace_way,
1070                     cache_tags, access_ok):
1071         comb = m.d.comb
1072         sync = m.d.sync
1073
1074         i_in, wb_in, m_in  = self.i_in, self.wb_in, self.m_in
1075         stall_in, flush_in = self.stall_in, self.flush_in
1076         inval_in           = self.inval_in
1077
1078 #       variable tagset    : cache_tags_set_t;
1079 #       variable stbs_done : boolean;
1080
1081         tagset    = Signal(TAG_RAM_WIDTH)
1082         stbs_done = Signal()
1083
1084 #     begin
1085 #         if rising_edge(clk) then
1086 #           -- On reset, clear all valid bits to force misses
1087 #             if rst = '1' then
1088         # On reset, clear all valid bits to force misses
1089 #               for i in index_t loop
1090 #                   cache_valids(i) <= (others => '0');
1091 #               end loop;
1092 #                 r.state <= IDLE;
1093 #                 r.wb.cyc <= '0';
1094 #                 r.wb.stb <= '0';
1095 #               -- We only ever do reads on wishbone
1096 #               r.wb.dat <= (others => '0');
1097 #               r.wb.sel <= "11111111";
1098 #               r.wb.we  <= '0';
1099
1100         # We only ever do reads on wishbone
1101         comb += r.wb.sel.eq(~0) # set to all 1s
1102
1103 #               -- Not useful normally but helps avoiding
1104 #               -- tons of sim warnings
1105 #               r.wb.adr <= (others => '0');
1106
1107 #             else
1108
1109 #                 -- Process cache invalidations
1110 #                 if inval_in = '1' then
1111 #                     for i in index_t loop
1112 #                         cache_valids(i) <= (others => '0');
1113 #                     end loop;
1114 #                     r.store_valid <= '0';
1115 #                 end if;
1116         # Process cache invalidations
1117         with m.If(inval_in):
1118             for i in range(NUM_LINES):
1119                 sync += cache_valid_bits[i].eq(~1) # NO just set to zero.
1120                                                    # look again: others == 0
1121
1122             sync += r.store_valid.eq(0)
1123
1124 #               -- Main state machine
1125 #               case r.state is
1126             # Main state machine
1127             with m.Switch(r.state):
1128
1129 #               when IDLE =>
1130                 with m.Case(State.IDLE):
1131 #                     -- Reset per-row valid flags,
1132 #                     -- only used in WAIT_ACK
1133 #                     for i in 0 to ROW_PER_LINE - 1 loop
1134 #                         r.rows_valid(i) <= '0';
1135 #                     end loop;
1136                     # Reset per-row valid flags,
1137                     # only used in WAIT_ACK
1138                     for i in range(ROW_PER_LINE):
1139                         sync += r.rows_valid[i].eq(0)
1140
1141 #                   -- We need to read a cache line
1142 #                   if req_is_miss = '1' then
1143 #                       report "cache miss nia:" & to_hstring(i_in.nia) &
1144 #                             " IR:" & std_ulogic'image(i_in.virt_mode) &
1145 #                           " SM:" & std_ulogic'image(i_in.stop_mark) &
1146 #                           " idx:" & integer'image(req_index) &
1147 #                           " way:" & integer'image(replace_way) &
1148 #                           " tag:" & to_hstring(req_tag) &
1149 #                             " RA:" & to_hstring(real_addr);
1150                     # We need to read a cache line
1151                     with m.If(req_is_miss):
1152                         print(f"cache miss nia:{i_in.nia} " \
1153                               f"IR:{i_in.virt_mode} " \
1154                               f"SM:{i_in.stop_mark} " \
1155                               F"idx:{req_index} " \
1156                               f"way:{replace_way} tag:{req_tag} " \
1157                               f"RA:{real_addr}")
1158
1159 #                       -- Keep track of our index and way for
1160 #                       -- subsequent stores
1161 #                       r.store_index <= req_index;
1162 #                       r.store_row <= get_row(req_laddr);
1163 #                       r.store_tag <= req_tag;
1164 #                       r.store_valid <= '1';
1165 #                       r.end_row_ix <=
1166 #                        get_row_of_line(get_row(req_laddr)) - 1;
1167                         # Keep track of our index and way
1168                         # for subsequent stores
1169                         sync += r.store_index.eq(req_index)
1170                         sync += r.store_row.eq(get_row(req_laddr))
1171                         sync += r.store_tag.eq(req_tag)
1172                         sync += r.store_valid.eq(1)
1173                         sync += r.end_row_ix.eq(
1174                                  get_row_of_line(
1175                                   get_row(req_laddr)
1176                                  ) - 1
1177                                 )
1178
1179 #                       -- Prep for first wishbone read. We calculate the
1180 #                       -- address of the start of the cache line and
1181 #                       -- start the WB cycle.
1182 #                       r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1183 #                       r.wb.cyc <= '1';
1184 #                       r.wb.stb <= '1';
1185                         # Prep for first wishbone read.
1186                         # We calculate the
1187                         # address of the start of the cache line and
1188                         # start the WB cycle.
1189                         sync += r.wb.adr.eq(
1190                                  req_laddr[:r.wb.adr]
1191                                 )
1192
1193 #                       -- Track that we had one request sent
1194 #                       r.state <= CLR_TAG;
1195                         # Track that we had one request sent
1196                         sync += r.state.eq(State.CLR_TAG)
1197 #                   end if;
1198
1199 #               when CLR_TAG | WAIT_ACK =>
1200                 with m.Case(State.CLR_TAG, State.WAIT_ACK):
1201 #                     if r.state = CLR_TAG then
1202                     with m.If(r.state == State.CLR_TAG):
1203 #                         -- Get victim way from plru
1204 #                       r.store_way <= replace_way;
1205                         # Get victim way from plru
1206                         sync += r.store_way.eq(replace_way)
1207 #
1208 #                       -- Force misses on that way while
1209 #                       -- reloading that line
1210 #                       cache_valids(req_index)(replace_way) <= '0';
1211                         # Force misses on that way while
1212                         # realoading that line
1213                         sync += cache_valid_bits[
1214                                  req_index
1215                                 ][replace_way].eq(0)
1216
1217 #                       -- Store new tag in selected way
1218 #                       for i in 0 to NUM_WAYS-1 loop
1219 #                           if i = replace_way then
1220 #                               tagset := cache_tags(r.store_index);
1221 #                               write_tag(i, tagset, r.store_tag);
1222 #                               cache_tags(r.store_index) <= tagset;
1223 #                           end if;
1224 #                       end loop;
1225                         for i in range(NUM_WAYS):
1226                             with m.If(i == replace_way):
1227                                 comb += tagset.eq(
1228                                          cache_tags[r.store_index]
1229                                         )
1230                                 sync += write_tag(
1231                                          i, tagset, r.store_tag
1232                                         )
1233                                 sync += cache_tags[r.store_index].eq(
1234                                          tagset
1235                                         )
1236
1237 #                         r.state <= WAIT_ACK;
1238                         sync += r.state.eq(State.WAIT_ACK)
1239 #                     end if;
1240
1241 #                   -- Requests are all sent if stb is 0
1242 #                   stbs_done := r.wb.stb = '0';
1243                     # Requests are all sent if stb is 0
1244                     comb += stbs_done.eq(r.wb.stb == 0)
1245
1246 #                   -- If we are still sending requests,
1247 #                   -- was one accepted ?
1248 #                   if wishbone_in.stall = '0' and not stbs_done then
1249                     # If we are still sending requests,
1250                     # was one accepted?
1251                     with m.If(~wb_in.stall & ~stbs_done):
1252 #                       -- That was the last word ? We are done sending.
1253 #                       -- Clear stb and set stbs_done so we can handle
1254 #                       -- an eventual last ack on the same cycle.
1255 #                       if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1256 #                           r.wb.stb <= '0';
1257 #                           stbs_done := true;
1258 #                       end if;
1259                         # That was the last word ?
1260                         # We are done sending.
1261                         # Clear stb and set stbs_done
1262                         # so we can handle
1263                         # an eventual last ack on
1264                         # the same cycle.
1265                         with m.If(is_last_row_addr(
1266                                   r.wb.adr, r.end_row_ix)):
1267                             sync += r.wb.stb.eq(0)
1268                             stbs_done.eq(1)
1269
1270 #                       -- Calculate the next row address
1271 #                       r.wb.adr <= next_row_addr(r.wb.adr);
1272                         # Calculate the next row address
1273                         sync += r.wb.adr.eq(next_row_addr(r.wb.adr))
1274 #                   end if;
1275
1276 #                   -- Incoming acks processing
1277 #                   if wishbone_in.ack = '1' then
1278                     # Incoming acks processing
1279                     with m.If(wb_in.ack):
1280 #                         r.rows_valid(r.store_row mod ROW_PER_LINE)
1281 #                          <= '1';
1282                         sync += r.rows_valid[
1283                                  r.store_row & ROW_PER_LINE
1284                                 ].eq(1)
1285
1286 #                       -- Check for completion
1287 #                       if stbs_done and
1288 #                        is_last_row(r.store_row, r.end_row_ix) then
1289                         # Check for completion
1290                         with m.If(stbs_done & is_last_row(
1291                                   r.store_row, r.end_row_ix)):
1292 #                           -- Complete wishbone cycle
1293 #                           r.wb.cyc <= '0';
1294                             # Complete wishbone cycle
1295                             sync += r.wb.cyc.eq(0)
1296
1297 #                           -- Cache line is now valid
1298 #                           cache_valids(r.store_index)(replace_way) <=
1299 #                            r.store_valid and not inval_in;
1300                             # Cache line is now valid
1301                             sync += cache_valid_bits[
1302                                      r.store_index
1303                                     ][relace_way].eq(
1304                                      r.store_valid & ~inval_in
1305                                     )
1306
1307 #                           -- We are done
1308 #                           r.state <= IDLE;
1309                             # We are done
1310                             sync += r.state.eq(State.IDLE)
1311 #                       end if;
1312
1313 #                       -- Increment store row counter
1314 #                       r.store_row <= next_row(r.store_row);
1315                         # Increment store row counter
1316                         sync += store_row.eq(next_row(r.store_row))
1317 #                   end if;
1318 #               end case;
1319 #           end if;
1320 #
1321 #             -- TLB miss and protection fault processing
1322 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1323 #                 r.fetch_failed <= '0';
1324 #             elsif i_in.req = '1' and access_ok = '0' and
1325 #              stall_in = '0' then
1326 #                 r.fetch_failed <= '1';
1327 #             end if;
1328         # TLB miss and protection fault processing
1329         with m.If('''TODO nmigen rst''' | flush_in | m_in.tlbld):
1330             sync += r.fetch_failed.eq(0)
1331
1332         with m.Elif(i_in.req & ~access_ok & ~stall_in):
1333             sync += r.fetch_failed.eq(1)
1334 #       end if;
1335 #     end process;
1336
1337 #     icache_log: if LOG_LENGTH > 0 generate
1338     def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1339                    req_is_miss, req_is_hit, lway, wstate, r):
1340         comb = m.d.comb
1341         sync = m.d.sync
1342
1343         wb_in, i_out       = self.wb_in, self.i_out
1344         log_out, stall_out = self.log_out, self.stall_out
1345
1346 #         -- Output data to logger
1347 #         signal log_data    : std_ulogic_vector(53 downto 0);
1348 #     begin
1349 #         data_log: process(clk)
1350 #             variable lway: way_t;
1351 #             variable wstate: std_ulogic;
1352         # Output data to logger
1353         for i in range(LOG_LENGTH):
1354             # Output data to logger
1355             log_data = Signal(54)
1356             lway     = Signal(NUM_WAYS)
1357             wstate   = Signal()
1358
1359 #         begin
1360 #             if rising_edge(clk) then
1361 #                 lway := req_hit_way;
1362 #                 wstate := '0';
1363             comb += lway.eq(req_hit_way)
1364             comb += wstate.eq(0)
1365
1366 #                 if r.state /= IDLE then
1367 #                     wstate := '1';
1368 #                 end if;
1369             with m.If(r.state != State.IDLE):
1370                 sync += wstate.eq(1)
1371
1372 #                 log_data <= i_out.valid &
1373 #                             i_out.insn &
1374 #                             wishbone_in.ack &
1375 #                             r.wb.adr(5 downto 3) &
1376 #                             r.wb.stb & r.wb.cyc &
1377 #                             wishbone_in.stall &
1378 #                             stall_out &
1379 #                             r.fetch_failed &
1380 #                             r.hit_nia(5 downto 2) &
1381 #                             wstate &
1382 #                             std_ulogic_vector(to_unsigned(lway, 3)) &
1383 #                             req_is_hit & req_is_miss &
1384 #                             access_ok &
1385 #                             ra_valid;
1386             sync += log_data.eq(Cat(
1387                      ra_valid, access_ok, req_is_miss, req_is_hit,
1388                      lway, wstate, r.hit_nia[2:6],
1389                      r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1390                      r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1391                      i_out.valid
1392                     ))
1393 #             end if;
1394 #         end process;
1395 #         log_out <= log_data;
1396             comb += log_out.eq(log_data)
1397 #     end generate;
1398 # end;
1399
1400     def elaborate(self, platform):
1401
1402         m                = Module()
1403         comb             = m.d.comb
1404
1405         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1406         cache_tags       = CacheTagArray()
1407         cache_valid_bits = CacheValidBitsArray()
1408
1409 #     signal itlb_valids : tlb_valids_t;
1410 #     signal itlb_tags : tlb_tags_t;
1411 #     signal itlb_ptes : tlb_ptes_t;
1412 #     attribute ram_style of itlb_tags : signal is "distributed";
1413 #     attribute ram_style of itlb_ptes : signal is "distributed";
1414         itlb_valid_bits  = TLBValidBitsArray()
1415         itlb_tags        = TLBTagArray()
1416         itlb_ptes        = TLBPTEArray()
1417         # TODO to be passed to nmigen as ram attributes
1418         # attribute ram_style of itlb_tags : signal is "distributed";
1419         # attribute ram_style of itlb_ptes : signal is "distributed";
1420
1421 #     -- Privilege bit from PTE EAA field
1422 #     signal eaa_priv  : std_ulogic;
1423         # Privilege bit from PTE EAA field
1424         eaa_priv         = Signal()
1425
1426 #     signal r : reg_internal_t;
1427         r                = RegInternal()
1428
1429 #     -- Async signals on incoming request
1430 #     signal req_index   : index_t;
1431 #     signal req_row     : row_t;
1432 #     signal req_hit_way : way_t;
1433 #     signal req_tag     : cache_tag_t;
1434 #     signal req_is_hit  : std_ulogic;
1435 #     signal req_is_miss : std_ulogic;
1436 #     signal req_laddr   : std_ulogic_vector(63 downto 0);
1437         # Async signal on incoming request
1438         req_index        = Signal(NUM_LINES)
1439         req_row          = Signal(BRAM_ROWS)
1440         req_hit_way      = Signal(NUM_WAYS)
1441         req_tag          = Signal(TAG_BITS)
1442         req_is_hit       = Signal()
1443         req_is_miss      = Signal()
1444         req_laddr        = Signal(64)
1445
1446 #     signal tlb_req_index : tlb_index_t;
1447 #     signal real_addr     : std_ulogic_vector(
1448 #                             REAL_ADDR_BITS - 1 downto 0
1449 #                            );
1450 #     signal ra_valid      : std_ulogic;
1451 #     signal priv_fault    : std_ulogic;
1452 #     signal access_ok     : std_ulogic;
1453 #     signal use_previous  : std_ulogic;
1454         tlb_req_index    = Signal(TLB_SIZE)
1455         real_addr        = Signal(REAL_ADDR_BITS)
1456         ra_valid         = Signal()
1457         priv_fault       = Signal()
1458         access_ok        = Signal()
1459         use_previous     = Signal()
1460
1461 #     signal cache_out   : cache_ram_out_t;
1462         cache_out        = CacheRamOut()
1463
1464 #     signal plru_victim : plru_out_t;
1465 #     signal replace_way : way_t;
1466         plru_victim      = PLRUOut()
1467         replace_way      = Signal(NUM_WAYS)
1468
1469         # call sub-functions putting everything together, using shared
1470         # signals established above
1471         self.rams(m, r, cache_out, use_previous, replace_way, req_row)
1472         self.maybe_plrus(m, r, plru_victim)
1473         self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1474                          real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1475                          priv_fault, access_ok)
1476         self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1477         self.icache_comb(m, use_previous, r, req_index, req_row,
1478                          req_tag, real_addr, req_laddr, cache_valid_bits,
1479                          cache_tags, access_ok, req_is_hit, req_is_miss,
1480                          replace_way, plru_victim, cache_out)
1481         self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1482                         req_index, req_tag, real_addr)
1483         self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1484                          req_laddr, req_tag, replace_way, cache_tags,
1485                          access_ok)
1486         #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1487         #                req_is_miss, req_is_hit, lway, wstate, r)
1488
1489         return m
1490
1491
1492 # icache_tb.vhdl
1493 #
1494 # library ieee;
1495 # use ieee.std_logic_1164.all;
1496 #
1497 # library work;
1498 # use work.common.all;
1499 # use work.wishbone_types.all;
1500 #
1501 # entity icache_tb is
1502 # end icache_tb;
1503 #
1504 # architecture behave of icache_tb is
1505 #     signal clk          : std_ulogic;
1506 #     signal rst          : std_ulogic;
1507 #
1508 #     signal i_out        : Fetch1ToIcacheType;
1509 #     signal i_in         : IcacheToDecode1Type;
1510 #
1511 #     signal m_out        : MmuToIcacheType;
1512 #
1513 #     signal wb_bram_in   : wishbone_master_out;
1514 #     signal wb_bram_out  : wishbone_slave_out;
1515 #
1516 #     constant clk_period : time := 10 ns;
1517 # begin
1518 #     icache0: entity work.icache
1519 #         generic map(
1520 #             LINE_SIZE => 64,
1521 #             NUM_LINES => 4
1522 #             )
1523 #         port map(
1524 #             clk => clk,
1525 #             rst => rst,
1526 #             i_in => i_out,
1527 #             i_out => i_in,
1528 #             m_in => m_out,
1529 #             stall_in => '0',
1530 #           flush_in => '0',
1531 #             inval_in => '0',
1532 #             wishbone_out => wb_bram_in,
1533 #             wishbone_in => wb_bram_out
1534 #             );
1535 #
1536 #     -- BRAM Memory slave
1537 #     bram0: entity work.wishbone_bram_wrapper
1538 #         generic map(
1539 #             MEMORY_SIZE   => 1024,
1540 #             RAM_INIT_FILE => "icache_test.bin"
1541 #             )
1542 #         port map(
1543 #             clk => clk,
1544 #             rst => rst,
1545 #             wishbone_in => wb_bram_in,
1546 #             wishbone_out => wb_bram_out
1547 #             );
1548 #
1549 #     clk_process: process
1550 #     begin
1551 #         clk <= '0';
1552 #         wait for clk_period/2;
1553 #         clk <= '1';
1554 #         wait for clk_period/2;
1555 #     end process;
1556 #
1557 #     rst_process: process
1558 #     begin
1559 #         rst <= '1';
1560 #         wait for 2*clk_period;
1561 #         rst <= '0';
1562 #         wait;
1563 #     end process;
1564 #
1565 #     stim: process
1566 #     begin
1567 #         i_out.req <= '0';
1568 #         i_out.nia <= (others => '0');
1569 #       i_out.stop_mark <= '0';
1570 #
1571 #         m_out.tlbld <= '0';
1572 #         m_out.tlbie <= '0';
1573 #         m_out.addr <= (others => '0');
1574 #         m_out.pte <= (others => '0');
1575 #
1576 #         wait until rising_edge(clk);
1577 #         wait until rising_edge(clk);
1578 #         wait until rising_edge(clk);
1579 #         wait until rising_edge(clk);
1580 #
1581 #         i_out.req <= '1';
1582 #         i_out.nia <= x"0000000000000004";
1583 #
1584 #         wait for 30*clk_period;
1585 #         wait until rising_edge(clk);
1586 #
1587 #         assert i_in.valid = '1' severity failure;
1588 #         assert i_in.insn = x"00000001"
1589 #           report "insn @" & to_hstring(i_out.nia) &
1590 #           "=" & to_hstring(i_in.insn) &
1591 #           " expected 00000001"
1592 #           severity failure;
1593 #
1594 #         i_out.req <= '0';
1595 #
1596 #         wait until rising_edge(clk);
1597 #
1598 #         -- hit
1599 #         i_out.req <= '1';
1600 #         i_out.nia <= x"0000000000000008";
1601 #         wait until rising_edge(clk);
1602 #         wait until rising_edge(clk);
1603 #         assert i_in.valid = '1' severity failure;
1604 #         assert i_in.insn = x"00000002"
1605 #           report "insn @" & to_hstring(i_out.nia) &
1606 #           "=" & to_hstring(i_in.insn) &
1607 #           " expected 00000002"
1608 #           severity failure;
1609 #         wait until rising_edge(clk);
1610 #
1611 #         -- another miss
1612 #         i_out.req <= '1';
1613 #         i_out.nia <= x"0000000000000040";
1614 #
1615 #         wait for 30*clk_period;
1616 #         wait until rising_edge(clk);
1617 #
1618 #         assert i_in.valid = '1' severity failure;
1619 #         assert i_in.insn = x"00000010"
1620 #           report "insn @" & to_hstring(i_out.nia) &
1621 #           "=" & to_hstring(i_in.insn) &
1622 #           " expected 00000010"
1623 #           severity failure;
1624 #
1625 #         -- test something that aliases
1626 #         i_out.req <= '1';
1627 #         i_out.nia <= x"0000000000000100";
1628 #         wait until rising_edge(clk);
1629 #         wait until rising_edge(clk);
1630 #         assert i_in.valid = '0' severity failure;
1631 #         wait until rising_edge(clk);
1632 #
1633 #         wait for 30*clk_period;
1634 #         wait until rising_edge(clk);
1635 #
1636 #         assert i_in.valid = '1' severity failure;
1637 #         assert i_in.insn = x"00000040"
1638 #           report "insn @" & to_hstring(i_out.nia) &
1639 #           "=" & to_hstring(i_in.insn) &
1640 #           " expected 00000040"
1641 #           severity failure;
1642 #
1643 #         i_out.req <= '0';
1644 #
1645 #         std.env.finish;
1646 #     end process;
1647 # end;
1648 def icache_sim(dut):
1649     i_out, i_in, m_out, m_in = dut.i_out, dut.i_in, dut.m_out, dut.m_in
1650
1651     yield i_out.req.eq(0)
1652     yield i_out.nia.eq(~1)
1653     yield i_out.stop_mark.eq(0)
1654     yield m_out.tlbld.eq(0)
1655     yield m_out.tlbie.eq(0)
1656     yield m_out.addr.eq(~1)
1657     yield m_out.pte.eq(~1)
1658     yield
1659     yield
1660     yield
1661     yield
1662     yield i_out.req.eq(1)
1663     yield i_out.nia.eq(Const(0x0000000000000004, 64))
1664     for i in range(30):
1665         yield
1666     yield
1667     assert i_in.valid
1668     assert i_in.insn == Const(0x00000001, 32), \
1669         ("insn @%x=%x expected 00000001" % i_out.nia, i_in.insn)
1670     yield i_out.req.eq(0)
1671     yield
1672
1673     # hit
1674     yield i_out.req.eq(1)
1675     yield i_out.nia.eq(Const(0x0000000000000008, 64))
1676     yield
1677     yield
1678     assert i_in.valid
1679     assert i_in.insn == Const(0x00000002, 32), \
1680         ("insn @%x=%x expected 00000002" % i_out.nia, i_in.insn)
1681     yield
1682
1683     # another miss
1684     yield i_out.req(1)
1685     yield i_out.nia.eq(Const(0x0000000000000040, 64))
1686     for i in range(30):
1687         yield
1688     yield
1689     assert i_in.valid
1690     assert i_in.insn == Const(0x00000010, 32), \
1691         ("insn @%x=%x expected 00000010" % i_out.nia, i_in.insn)
1692
1693     # test something that aliases
1694     yield i_out.req.eq(1)
1695     yield i_out.nia.eq(Const(0x0000000000000100, 64))
1696     yield
1697     yield
1698     assert i_in.valid
1699     for i in range(30):
1700         yield
1701     yield
1702     assert i_in.valid
1703     assert i_in.insn == Const(0x00000040, 32), \
1704          ("insn @%x=%x expected 00000040" % i_out.nia, i_in.insn)
1705     yield i_out.req.eq(0)
1706
1707
1708 def test_icache():
1709     dut = ICache()
1710
1711     m = Module()
1712     m.submodules.icache = dut
1713
1714     # nmigen Simulation
1715     sim = Simulator(m)
1716     sim.add_clock(1e-6)
1717
1718     sim.add_sync_process(wrap(icache_sim(dut)))
1719     with sim.write_vcd('test_icache.vcd'):
1720         sim.run()
1721
1722 if __name__ == '__main__':
1723     dut = ICache()
1724     vl = rtlil.convert(dut, ports=[])
1725     with open("test_icache.il", "w") as f:
1726         f.write(vl)
1727
1728     test_icache()