src/soc/experiment/icache.py

   1 """ICache
   2
   3 based on Anton Blanchard microwatt icache.vhdl
   4
   5 Set associative icache
   6
   7 TODO (in no specific order):
   8 * Add debug interface to inspect cache content
   9 * Add snoop/invalidate path
  10 * Add multi-hit error detection
  11 * Pipelined bus interface (wb or axi)
  12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
  13 * Add optimization: service hits on partially loaded lines
  14 * Add optimization: (maybe) interrupt reload on fluch/redirect
  15 * Check if playing with the geometry of the cache tags allow for more
  16   efficient use of distributed RAM and less logic/muxes. Currently we
  17   write TAG_BITS width which may not match full ram blocks and might
  18   cause muxes to be inferred for "partial writes".
  19 * Check if making the read size of PLRU a ROM helps utilization
  20
  21 """
  22 from enum import Enum, unique
  23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const)
  24 from nmigen.cli import main
  25 from nmigen.cli import rtlil
  26 from nmutil.iocontrol import RecordObject
  27 from nmutil.byterev import byte_reverse
  28 from nmutil.mask import Mask
  29 from nmigen.utils import log2_int
  30 from nmutil.util import Display
  31
  32 from soc.experiment.mem_types import (Fetch1ToICacheType,
  33                                       ICacheToDecode1Type,
  34                                       MMUToICacheType)
  35
  36 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
  37                                      WB_SEL_BITS, WBAddrType, WBDataType,
  38                                      WBSelType, WBMasterOut, WBSlaveOut,
  39                                      WBMasterOutVector, WBSlaveOutVector,
  40                                      WBIOMasterOut, WBIOSlaveOut)
  41
  42 from soc.experiment.cache_ram import CacheRam
  43 from soc.experiment.plru import PLRU
  44
  45 # for test
  46 from nmigen_soc.wishbone.sram import SRAM
  47 from nmigen import Memory
  48 from nmigen.cli import rtlil
  49 if True:
  50     from nmigen.back.pysim import Simulator, Delay, Settle
  51 else:
  52     from nmigen.sim.cxxsim import Simulator, Delay, Settle
  53 from nmutil.util import wrap
  54
  55
  56
  57 SIM            = 0
  58 LINE_SIZE      = 64
  59 # BRAM organisation: We never access more than wishbone_data_bits
  60 # at a time so to save resources we make the array only that wide,
  61 # and use consecutive indices for to make a cache "line"
  62 #
  63 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
  64 ROW_SIZE       = WB_DATA_BITS // 8
  65 # Number of lines in a set
  66 NUM_LINES      = 32
  67 # Number of ways
  68 NUM_WAYS       = 4
  69 # L1 ITLB number of entries (direct mapped)
  70 TLB_SIZE       = 64
  71 # L1 ITLB log_2(page_size)
  72 TLB_LG_PGSZ    = 12
  73 # Number of real address bits that we store
  74 REAL_ADDR_BITS = 56
  75 # Non-zero to enable log data collection
  76 LOG_LENGTH     = 0
  77
  78 ROW_SIZE_BITS  = ROW_SIZE * 8
  79 # ROW_PER_LINE is the number of row
  80 # (wishbone) transactions in a line
  81 ROW_PER_LINE   = LINE_SIZE // ROW_SIZE
  82 # BRAM_ROWS is the number of rows in
  83 # BRAM needed to represent the full icache
  84 BRAM_ROWS      = NUM_LINES * ROW_PER_LINE
  85 # INSN_PER_ROW is the number of 32bit
  86 # instructions per BRAM row
  87 INSN_PER_ROW   = ROW_SIZE_BITS // 32
  88
  89 # Bit fields counts in the address
  90 #
  91 # INSN_BITS is the number of bits to
  92 # select an instruction in a row
  93 INSN_BITS      = log2_int(INSN_PER_ROW)
  94 # ROW_BITS is the number of bits to
  95 # select a row
  96 ROW_BITS       = log2_int(BRAM_ROWS)
  97 # ROW_LINEBITS is the number of bits to
  98 # select a row within a line
  99 ROW_LINE_BITS   = log2_int(ROW_PER_LINE)
 100 # LINE_OFF_BITS is the number of bits for
 101 # the offset in a cache line
 102 LINE_OFF_BITS  = log2_int(LINE_SIZE)
 103 # ROW_OFF_BITS is the number of bits for
 104 # the offset in a row
 105 ROW_OFF_BITS   = log2_int(ROW_SIZE)
 106 # INDEX_BITS is the number of bits to
 107 # select a cache line
 108 INDEX_BITS     = log2_int(NUM_LINES)
 109 # SET_SIZE_BITS is the log base 2 of
 110 # the set size
 111 SET_SIZE_BITS  = LINE_OFF_BITS + INDEX_BITS
 112 # TAG_BITS is the number of bits of
 113 # the tag part of the address
 114 TAG_BITS       = REAL_ADDR_BITS - SET_SIZE_BITS
 115 # WAY_BITS is the number of bits to
 116 # select a way
 117 WAY_BITS       = log2_int(NUM_WAYS)
 118 TAG_RAM_WIDTH  = TAG_BITS * NUM_WAYS
 119
 120 #     -- L1 ITLB.
 121 #     constant TLB_BITS : natural := log2(TLB_SIZE);
 122 #     constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
 123 #     constant TLB_PTE_BITS : natural := 64;
 124 TLB_BITS        = log2_int(TLB_SIZE)
 125 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
 126 TLB_PTE_BITS    = 64
 127
 128 # architecture rtl of icache is
 129 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
 130 #-- ROW_PER_LINE is the number of row (wishbone
 131 #-- transactions) in a line
 132 #constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 133 #-- BRAM_ROWS is the number of rows in BRAM
 134 #-- needed to represent the full
 135 #-- icache
 136 #constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
 137 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
 138 #constant INSN_PER_ROW  : natural := ROW_SIZE_BITS / 32;
 139 #-- Bit fields counts in the address
 140 #
 141 #-- INSN_BITS is the number of bits to select
 142 #-- an instruction in a row
 143 #constant INSN_BITS     : natural := log2(INSN_PER_ROW);
 144 #-- ROW_BITS is the number of bits to select a row
 145 #constant ROW_BITS      : natural := log2(BRAM_ROWS);
 146 #-- ROW_LINEBITS is the number of bits to
 147 #-- select a row within a line
 148 #constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 149 #-- LINE_OFF_BITS is the number of bits for the offset
 150 #-- in a cache line
 151 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 152 #-- ROW_OFF_BITS is the number of bits for the offset in a row
 153 #constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
 154 #-- INDEX_BITS is the number of bits to select a cache line
 155 #constant INDEX_BITS    : natural := log2(NUM_LINES);
 156 #-- SET_SIZE_BITS is the log base 2 of the set size
 157 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
 158 #-- TAG_BITS is the number of bits of the tag part of the address
 159 #constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 160 #-- WAY_BITS is the number of bits to select a way
 161 #constant WAY_BITS     : natural := log2(NUM_WAYS);
 162
 163 #-- Example of layout for 32 lines of 64 bytes:
 164 #--
 165 #-- ..  tag    |index|  line  |
 166 #-- ..         |   row   |    |
 167 #-- ..         |     |   | |00| zero          (2)
 168 #-- ..         |     |   |-|  | INSN_BITS     (1)
 169 #-- ..         |     |---|    | ROW_LINEBITS  (3)
 170 #-- ..         |     |--- - --| LINE_OFF_BITS (6)
 171 #-- ..         |         |- --| ROW_OFF_BITS  (3)
 172 #-- ..         |----- ---|    | ROW_BITS      (8)
 173 #-- ..         |-----|        | INDEX_BITS    (5)
 174 #-- .. --------|              | TAG_BITS      (53)
 175    # Example of layout for 32 lines of 64 bytes:
 176    #
 177    # ..  tag    |index|  line  |
 178    # ..         |   row   |    |
 179    # ..         |     |   | |00| zero          (2)
 180    # ..         |     |   |-|  | INSN_BITS     (1)
 181    # ..         |     |---|    | ROW_LINEBITS  (3)
 182    # ..         |     |--- - --| LINE_OFF_BITS (6)
 183    # ..         |         |- --| ROW_OFF_BITS  (3)
 184    # ..         |----- ---|    | ROW_BITS      (8)
 185    # ..         |-----|        | INDEX_BITS    (5)
 186    # .. --------|              | TAG_BITS      (53)
 187
 188 #subtype row_t is integer range 0 to BRAM_ROWS-1;
 189 #subtype index_t is integer range 0 to NUM_LINES-1;
 190 #subtype way_t is integer range 0 to NUM_WAYS-1;
 191 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
 192 #
 193 #-- The cache data BRAM organized as described above for each way
 194 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
 195 #
 196 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
 197 #-- not handle a clean (commented) definition of the cache tags as a 3d
 198 #-- memory. For now, work around it by putting all the tags
 199 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 200 #  type cache_tags_set_t is array(way_t) of cache_tag_t;
 201 #  type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 202 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
 203 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 204 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 205 def CacheTagArray():
 206     return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
 207
 208 #-- The cache valid bits
 209 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
 210 #type cache_valids_t is array(index_t) of cache_way_valids_t;
 211 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
 212 def CacheValidBitsArray():
 213     return Array(Signal(NUM_WAYS) for x in range(NUM_LINES))
 214
 215 def RowPerLineValidArray():
 216     return Array(Signal() for x in range(ROW_PER_LINE))
 217
 218
 219 #attribute ram_style : string;
 220 #attribute ram_style of cache_tags : signal is "distributed";
 221    # TODO to be passed to nigmen as ram attributes
 222    # attribute ram_style : string;
 223    # attribute ram_style of cache_tags : signal is "distributed";
 224
 225
 226 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
 227 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
 228 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 229 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
 230 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 231 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
 232 def TLBValidBitsArray():
 233     return Array(Signal() for x in range(TLB_SIZE))
 234
 235 def TLBTagArray():
 236     return Array(Signal(TLB_EA_TAG_BITS) for x in range(TLB_SIZE))
 237
 238 def TLBPTEArray():
 239     return Array(Signal(TLB_PTE_BITS) for x in range(TLB_SIZE))
 240
 241
 242 #-- Cache RAM interface
 243 #type cache_ram_out_t is array(way_t) of cache_row_t;
 244 # Cache RAM interface
 245 def CacheRamOut():
 246     return Array(Signal(ROW_SIZE_BITS) for x in range(NUM_WAYS))
 247
 248 #-- PLRU output interface
 249 #type plru_out_t is array(index_t) of
 250 # std_ulogic_vector(WAY_BITS-1 downto 0);
 251 # PLRU output interface
 252 def PLRUOut():
 253     return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
 254
 255 #     -- Return the cache line index (tag index) for an address
 256 #     function get_index(addr: std_ulogic_vector(63 downto 0))
 257 #      return index_t is
 258 #     begin
 259 #         return to_integer(unsigned(
 260 #          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
 261 #         ));
 262 #     end;
 263 # Return the cache line index (tag index) for an address
 264 def get_index(addr):
 265     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 266
 267 #     -- Return the cache row index (data memory) for an address
 268 #     function get_row(addr: std_ulogic_vector(63 downto 0))
 269 #       return row_t is
 270 #     begin
 271 #         return to_integer(unsigned(
 272 #          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
 273 #         ));
 274 #     end;
 275 # Return the cache row index (data memory) for an address
 276 def get_row(addr):
 277     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 278
 279 #     -- Return the index of a row within a line
 280 #     function get_row_of_line(row: row_t) return row_in_line_t is
 281 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 282 #     begin
 283 #       row_v := to_unsigned(row, ROW_BITS);
 284 #         return row_v(ROW_LINEBITS-1 downto 0);
 285 #     end;
 286 # Return the index of a row within a line
 287 def get_row_of_line(row):
 288     row[:ROW_LINE_BITS]
 289
 290 #     -- Returns whether this is the last row of a line
 291 #     function is_last_row_addr(addr: wishbone_addr_type;
 292 #      last: row_in_line_t
 293 #     )
 294 #      return boolean is
 295 #     begin
 296 #       return unsigned(
 297 #        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
 298 #       ) = last;
 299 #     end;
 300 # Returns whether this is the last row of a line
 301 def is_last_row_addr(addr, last):
 302     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 303
 304 #     -- Returns whether this is the last row of a line
 305 #     function is_last_row(row: row_t;
 306 #      last: row_in_line_t) return boolean is
 307 #     begin
 308 #       return get_row_of_line(row) = last;
 309 #     end;
 310 # Returns whether this is the last row of a line
 311 def is_last_row(row, last):
 312     return get_row_of_line(row) == last
 313
 314 #     -- Return the address of the next row in the current cache line
 315 #     function next_row_addr(addr: wishbone_addr_type)
 316 #       return std_ulogic_vector is
 317 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 318 #       variable result  : wishbone_addr_type;
 319 #     begin
 320 #       -- Is there no simpler way in VHDL to generate that 3 bits adder ?
 321 #       row_idx := addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS);
 322 #       row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
 323 #       result := addr;
 324 #       result(LINE_OFF_BITS-1 downto ROW_OFF_BITS) := row_idx;
 325 #       return result;
 326 #     end;
 327 # Return the address of the next row in the current cache line
 328 def next_row_addr(addr):
 329     # TODO no idea what's going on here, looks like double assignments
 330     # overriding earlier assignments ??? Help please!
 331     pass
 332
 333 #     -- Return the next row in the current cache line. We use a dedicated
 334 #     -- function in order to limit the size of the generated adder to be
 335 #     -- only the bits within a cache line (3 bits with default settings)
 336 #     function next_row(row: row_t) return row_t is
 337 #       variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
 338 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 339 #       variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
 340 #     begin
 341 #       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 342 #       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 343 #       row_v(ROW_LINEBITS-1 downto 0) :=
 344 #        std_ulogic_vector(unsigned(row_idx) + 1);
 345 #       return to_integer(unsigned(row_v));
 346 #     end;
 347 # Return the next row in the current cache line. We use a dedicated
 348 # function in order to limit the size of the generated adder to be
 349 # only the bits within a cache line (3 bits with default settings)
 350 def next_row(row):
 351     # TODO no idea what's going on here, looks like double assignments
 352     # overriding earlier assignments ??? Help please!
 353     pass
 354
 355 #     -- Read the instruction word for the given address in the
 356 #     -- current cache row
 357 #     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
 358 #                           data: cache_row_t) return std_ulogic_vector is
 359 #       variable word: integer range 0 to INSN_PER_ROW-1;
 360 #     begin
 361 #         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
 362 #       return data(31+word*32 downto word*32);
 363 #     end;
 364 # Read the instruction word for the given address
 365 # in the current cache row
 366 def read_insn_word(addr, data):
 367     word = addr[2:INSN_BITS+3]
 368     return data.word_select(word, 32)
 369
 370 #     -- Get the tag value from the address
 371 #     function get_tag(
 372 #      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
 373 #     )
 374 #      return cache_tag_t is
 375 #     begin
 376 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 377 #     end;
 378 # Get the tag value from the address
 379 def get_tag(addr):
 380     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 381
 382 #     -- Read a tag from a tag memory row
 383 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 384 #      return cache_tag_t is
 385 #     begin
 386 #       return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 387 #     end;
 388 # Read a tag from a tag memory row
 389 def read_tag(way, tagset):
 390     return tagset[way * TAG_BITS:(way + 1) * TAG_BITS]
 391
 392 #     -- Write a tag to tag memory row
 393 #     procedure write_tag(way: in way_t;
 394 #      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
 395 #     begin
 396 #       tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 397 #     end;
 398 # Write a tag to tag memory row
 399 def write_tag(way, tagset, tag):
 400     tagset[way * TAG_BITS:(way + 1) * TAG_BITS] = tag
 401
 402 #     -- Simple hash for direct-mapped TLB index
 403 #     function hash_ea(addr: std_ulogic_vector(63 downto 0))
 404 #      return tlb_index_t is
 405 #         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
 406 #     begin
 407 #         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
 408 #                 xor addr(
 409 #                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
 410 #                  TLB_LG_PGSZ + TLB_BITS
 411 #                 )
 412 #                 xor addr(
 413 #                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
 414 #                  TLB_LG_PGSZ + 2 * TLB_BITS
 415 #                 );
 416 #         return to_integer(unsigned(hash));
 417 #     end;
 418 # Simple hash for direct-mapped TLB index
 419 def hash_ea(addr):
 420     hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
 421            TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
 422           ] ^ addr[
 423            TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
 424           ]
 425     return hsh
 426
 427 # begin
 428 #
 429 #     assert LINE_SIZE mod ROW_SIZE = 0;
 430 #     assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
 431 #      severity FAILURE;
 432 #     assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
 433 #      severity FAILURE;
 434 #     assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
 435 #      severity FAILURE;
 436 #     assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
 437 #      severity FAILURE;
 438 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
 439 #       report "geometry bits don't add up" severity FAILURE;
 440 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
 441 #       report "geometry bits don't add up" severity FAILURE;
 442 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
 443 #       report "geometry bits don't add up" severity FAILURE;
 444 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
 445 #       report "geometry bits don't add up" severity FAILURE;
 446 #
 447 #     sim_debug: if SIM generate
 448 #     debug: process
 449 #     begin
 450 #       report "ROW_SIZE      = " & natural'image(ROW_SIZE);
 451 #       report "ROW_PER_LINE  = " & natural'image(ROW_PER_LINE);
 452 #       report "BRAM_ROWS     = " & natural'image(BRAM_ROWS);
 453 #       report "INSN_PER_ROW  = " & natural'image(INSN_PER_ROW);
 454 #       report "INSN_BITS     = " & natural'image(INSN_BITS);
 455 #       report "ROW_BITS      = " & natural'image(ROW_BITS);
 456 #       report "ROW_LINEBITS  = " & natural'image(ROW_LINEBITS);
 457 #       report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
 458 #       report "ROW_OFF_BITS  = " & natural'image(ROW_OFF_BITS);
 459 #       report "INDEX_BITS    = " & natural'image(INDEX_BITS);
 460 #       report "TAG_BITS      = " & natural'image(TAG_BITS);
 461 #       report "WAY_BITS      = " & natural'image(WAY_BITS);
 462 #       wait;
 463 #     end process;
 464 #     end generate;
 465
 466 # Cache reload state machine
 467 @unique
 468 class State(Enum):
 469     IDLE     = 0
 470     CLR_TAG  = 1
 471     WAIT_ACK = 2
 472
 473 #     type reg_internal_t is record
 474 #       -- Cache hit state (Latches for 1 cycle BRAM access)
 475 #       hit_way   : way_t;
 476 #       hit_nia   : std_ulogic_vector(63 downto 0);
 477 #       hit_smark : std_ulogic;
 478 #       hit_valid : std_ulogic;
 479 #
 480 #       -- Cache miss state (reload state machine)
 481 #         state            : state_t;
 482 #         wb               : wishbone_master_out;
 483 #       store_way        : way_t;
 484 #         store_index      : index_t;
 485 #       store_row        : row_t;
 486 #         store_tag        : cache_tag_t;
 487 #         store_valid      : std_ulogic;
 488 #         end_row_ix       : row_in_line_t;
 489 #         rows_valid       : row_per_line_valid_t;
 490 #
 491 #         -- TLB miss state
 492 #         fetch_failed     : std_ulogic;
 493 #     end record;
 494 class RegInternal(RecordObject):
 495     def __init__(self):
 496         super().__init__()
 497         # Cache hit state (Latches for 1 cycle BRAM access)
 498         self.hit_way      = Signal(NUM_WAYS)
 499         self.hit_nia      = Signal(64)
 500         self.hit_smark    = Signal()
 501         self.hit_valid    = Signal()
 502
 503         # Cache miss state (reload state machine)
 504         self.state        = Signal(State)
 505         self.wb           = WBMasterOut()
 506         self.store_way    = Signal(NUM_WAYS)
 507         self.store_index  = Signal(NUM_LINES)
 508         self.store_row    = Signal(BRAM_ROWS)
 509         self.store_tag    = Signal(TAG_BITS)
 510         self.store_valid  = Signal()
 511         self.end_row_ix   = Signal(ROW_LINE_BITS)
 512         self.rows_valid   = RowPerLineValidArray()
 513
 514         # TLB miss state
 515         self.fetch_failed = Signal()
 516
 517 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
 518 #
 519 # entity icache is
 520 #     generic (
 521 #         SIM : boolean := false;
 522 #         -- Line size in bytes
 523 #         LINE_SIZE : positive := 64;
 524 #         -- BRAM organisation: We never access more
 525 #         -- than wishbone_data_bits
 526 #         -- at a time so to save resources we make the
 527 #         -- array only that wide,
 528 #         -- and use consecutive indices for to make a cache "line"
 529 #         --
 530 #         -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
 531 #         -- so 64-bits)
 532 #         ROW_SIZE  : positive := wishbone_data_bits / 8;
 533 #         -- Number of lines in a set
 534 #         NUM_LINES : positive := 32;
 535 #         -- Number of ways
 536 #         NUM_WAYS  : positive := 4;
 537 #         -- L1 ITLB number of entries (direct mapped)
 538 #         TLB_SIZE : positive := 64;
 539 #         -- L1 ITLB log_2(page_size)
 540 #         TLB_LG_PGSZ : positive := 12;
 541 #         -- Number of real address bits that we store
 542 #         REAL_ADDR_BITS : positive := 56;
 543 #         -- Non-zero to enable log data collection
 544 #         LOG_LENGTH : natural := 0
 545 #         );
 546 #     port (
 547 #         clk          : in std_ulogic;
 548 #         rst          : in std_ulogic;
 549 #
 550 #         i_in         : in Fetch1ToIcacheType;
 551 #         i_out        : out IcacheToDecode1Type;
 552 #
 553 #         m_in         : in MmuToIcacheType;
 554 #
 555 #         stall_in     : in std_ulogic;
 556 #       stall_out    : out std_ulogic;
 557 #       flush_in     : in std_ulogic;
 558 #       inval_in     : in std_ulogic;
 559 #
 560 #         wishbone_out : out wishbone_master_out;
 561 #         wishbone_in  : in wishbone_slave_out;
 562 #
 563 #         log_out      : out std_ulogic_vector(53 downto 0)
 564 #         );
 565 # end entity icache;
 566 # 64 bit direct mapped icache. All instructions are 4B aligned.
 567 class ICache(Elaboratable):
 568     """64 bit direct mapped icache. All instructions are 4B aligned."""
 569     def __init__(self):
 570         self.i_in           = Fetch1ToICacheType()
 571         self.i_out          = ICacheToDecode1Type()
 572
 573         self.m_in           = MMUToICacheType()
 574
 575         self.stall_in       = Signal()
 576         self.stall_out      = Signal()
 577         self.flush_in       = Signal()
 578         self.inval_in       = Signal()
 579
 580         self.wb_out         = WBMasterOut()
 581         self.wb_in          = WBSlaveOut()
 582
 583         self.log_out        = Signal(54)
 584
 585
 586 #     -- Generate a cache RAM for each way
 587 #     rams: for i in 0 to NUM_WAYS-1 generate
 588 #       signal do_read  : std_ulogic;
 589 #       signal do_write : std_ulogic;
 590 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 591 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 592 #       signal dout     : cache_row_t;
 593 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 594 #     begin
 595 #       way: entity work.cache_ram
 596 #           generic map (
 597 #               ROW_BITS => ROW_BITS,
 598 #               WIDTH => ROW_SIZE_BITS
 599 #               )
 600 #           port map (
 601 #               clk     => clk,
 602 #               rd_en   => do_read,
 603 #               rd_addr => rd_addr,
 604 #               rd_data => dout,
 605 #               wr_sel  => wr_sel,
 606 #               wr_addr => wr_addr,
 607 #               wr_data => wishbone_in.dat
 608 #               );
 609 #       process(all)
 610 #       begin
 611 #           do_read <= not (stall_in or use_previous);
 612 #           do_write <= '0';
 613 #           if wishbone_in.ack = '1' and replace_way = i then
 614 #               do_write <= '1';
 615 #           end if;
 616 #           cache_out(i) <= dout;
 617 #           rd_addr <=
 618 #            std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 619 #           wr_addr <=
 620 #            std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
 621 #             for i in 0 to ROW_SIZE-1 loop
 622 #                 wr_sel(i) <= do_write;
 623 #             end loop;
 624 #       end process;
 625 #     end generate;
 626     def rams(self, m, r, cache_out, use_previous, replace_way, req_row):
 627         comb = m.d.comb
 628
 629         wb_in, stall_in = self.wb_in, self.stall_in
 630
 631         do_read  = Signal()
 632         do_write = Signal()
 633         rd_addr  = Signal(ROW_BITS)
 634         wr_addr  = Signal(ROW_BITS)
 635         _d_out   = Signal(ROW_SIZE_BITS)
 636         wr_sel   = Signal(ROW_SIZE)
 637
 638         for i in range(NUM_WAYS):
 639             way = CacheRam(ROW_BITS, ROW_SIZE_BITS)
 640             comb += way.rd_en.eq(do_read)
 641             comb += way.rd_addr.eq(rd_addr)
 642             comb += way.rd_data_o.eq(_d_out)
 643             comb += way.wr_sel.eq(wr_sel)
 644             comb += way.wr_addr.eq(wr_addr)
 645             comb += way.wr_data.eq(wb_in.dat)
 646
 647             comb += do_read.eq(~(stall_in | use_previous))
 648             comb += do_write.eq(0)
 649
 650             with m.If(wb_in.ack & (replace_way == i)):
 651                 comb += do_write.eq(1)
 652
 653             comb += cache_out[i].eq(_d_out)
 654             comb += rd_addr.eq(req_row)
 655             comb += wr_addr.eq(r.store_row)
 656             for j in range(ROW_SIZE):
 657                 comb += wr_sel[j].eq(do_write)
 658
 659 #     -- Generate PLRUs
 660 #     maybe_plrus: if NUM_WAYS > 1 generate
 661 #     begin
 662 #       plrus: for i in 0 to NUM_LINES-1 generate
 663 #           -- PLRU interface
 664 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 665 #           signal plru_acc_en : std_ulogic;
 666 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 667 #
 668 #       begin
 669 #           plru : entity work.plru
 670 #               generic map (
 671 #                   BITS => WAY_BITS
 672 #                   )
 673 #               port map (
 674 #                   clk => clk,
 675 #                   rst => rst,
 676 #                   acc => plru_acc,
 677 #                   acc_en => plru_acc_en,
 678 #                   lru => plru_out
 679 #                   );
 680 #
 681 #           process(all)
 682 #           begin
 683 #               -- PLRU interface
 684 #               if get_index(r.hit_nia) = i then
 685 #                   plru_acc_en <= r.hit_valid;
 686 #               else
 687 #                   plru_acc_en <= '0';
 688 #               end if;
 689 #               plru_acc <=
 690 #                std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
 691 #               plru_victim(i) <= plru_out;
 692 #           end process;
 693 #       end generate;
 694 #     end generate;
 695     def maybe_plrus(self, m, r, plru_victim):
 696         comb = m.d.comb
 697
 698         with m.If(NUM_WAYS > 1):
 699             for i in range(NUM_LINES):
 700                 plru_acc    = Signal(WAY_BITS)
 701                 plru_acc_en = Signal()
 702                 plru_out    = Signal(WAY_BITS)
 703                 plru        = PLRU(WAY_BITS)
 704                 comb += plru.acc.eq(plru_acc)
 705                 comb += plru.acc_en.eq(plru_acc_en)
 706                 comb += plru.lru_o.eq(plru_out)
 707
 708                 # PLRU interface
 709                 with m.If(get_index(r.hit_nia) == i):
 710                     comb += plru.acc_en.eq(r.hit_valid)
 711
 712                 with m.Else():
 713                     comb += plru.acc_en.eq(0)
 714
 715                 comb += plru.acc.eq(r.hit_way)
 716                 comb += plru_victim[i].eq(plru.lru_o)
 717
 718 #     -- TLB hit detection and real address generation
 719 #     itlb_lookup : process(all)
 720 #         variable pte : tlb_pte_t;
 721 #         variable ttag : tlb_tag_t;
 722 #     begin
 723 #         tlb_req_index <= hash_ea(i_in.nia);
 724 #         pte := itlb_ptes(tlb_req_index);
 725 #         ttag := itlb_tags(tlb_req_index);
 726 #         if i_in.virt_mode = '1' then
 727 #             real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
 728 #                          i_in.nia(TLB_LG_PGSZ - 1 downto 0);
 729 #             if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
 730 #                 ra_valid <= itlb_valids(tlb_req_index);
 731 #             else
 732 #                 ra_valid <= '0';
 733 #             end if;
 734 #             eaa_priv <= pte(3);
 735 #         else
 736 #             real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
 737 #             ra_valid <= '1';
 738 #             eaa_priv <= '1';
 739 #         end if;
 740 #
 741 #         -- no IAMR, so no KUEP support for now
 742 #         priv_fault <= eaa_priv and not i_in.priv_mode;
 743 #         access_ok <= ra_valid and not priv_fault;
 744 #     end process;
 745     # TLB hit detection and real address generation
 746     def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
 747                     real_addr, itlb_valid_bits, ra_valid, eaa_priv,
 748                     priv_fault, access_ok):
 749         comb = m.d.comb
 750
 751         i_in = self.i_in
 752
 753         pte  = Signal(TLB_PTE_BITS)
 754         ttag = Signal(TLB_EA_TAG_BITS)
 755
 756         comb += tlb_req_index.eq(hash_ea(i_in.nia))
 757         comb += pte.eq(itlb_ptes[tlb_req_index])
 758         comb += ttag.eq(itlb_tags[tlb_req_index])
 759
 760         with m.If(i_in.virt_mode):
 761             comb += real_addr.eq(Cat(
 762                      i_in.nia[:TLB_LG_PGSZ],
 763                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 764                     ))
 765
 766             with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
 767                 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
 768
 769             with m.Else():
 770                 comb += ra_valid.eq(0)
 771
 772         with m.Else():
 773             comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
 774             comb += ra_valid.eq(1)
 775             comb += eaa_priv.eq(1)
 776
 777         # No IAMR, so no KUEP support for now
 778         comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
 779         comb += access_ok.eq(ra_valid & ~priv_fault)
 780
 781 #     -- iTLB update
 782 #     itlb_update: process(clk)
 783 #         variable wr_index : tlb_index_t;
 784 #     begin
 785 #         if rising_edge(clk) then
 786 #             wr_index := hash_ea(m_in.addr);
 787 #             if rst = '1' or
 788 #              (m_in.tlbie = '1' and m_in.doall = '1') then
 789 #                 -- clear all valid bits
 790 #                 for i in tlb_index_t loop
 791 #                     itlb_valids(i) <= '0';
 792 #                 end loop;
 793 #             elsif m_in.tlbie = '1' then
 794 #                 -- clear entry regardless of hit or miss
 795 #                 itlb_valids(wr_index) <= '0';
 796 #             elsif m_in.tlbld = '1' then
 797 #                 itlb_tags(wr_index) <=
 798 #                  m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
 799 #                 itlb_ptes(wr_index) <= m_in.pte;
 800 #                 itlb_valids(wr_index) <= '1';
 801 #             end if;
 802 #         end if;
 803 #     end process;
 804     # iTLB update
 805     def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
 806         comb = m.d.comb
 807         sync = m.d.sync
 808
 809         m_in = self.m_in
 810
 811         wr_index = Signal(TLB_SIZE)
 812         comb += wr_index.eq(hash_ea(m_in.addr))
 813
 814         with m.If(m_in.tlbie & m_in.doall):
 815             # Clear all valid bits
 816             for i in range(TLB_SIZE):
 817                 sync += itlb_valid_bits[i].eq(0)
 818
 819         with m.Elif(m_in.tlbie):
 820             # Clear entry regardless of hit or miss
 821             sync += itlb_valid_bits[wr_index].eq(0)
 822
 823         with m.Elif(m_in.tlbld):
 824             sync += itlb_tags[wr_index].eq(
 825                      m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
 826                     )
 827             sync += itlb_ptes[wr_index].eq(m_in.pte)
 828             sync += itlb_valid_bits[wr_index].eq(1)
 829
 830 #     -- Cache hit detection, output to fetch2 and other misc logic
 831 #     icache_comb : process(all)
 832     # Cache hit detection, output to fetch2 and other misc logic
 833     def icache_comb(self, m, use_previous, r, req_index, req_row,
 834                     req_tag, real_addr, req_laddr, cache_valid_bits,
 835                     cache_tags, access_ok, req_is_hit,
 836                     req_is_miss, replace_way, plru_victim, cache_out):
 837 #       variable is_hit  : std_ulogic;
 838 #       variable hit_way : way_t;
 839         comb = m.d.comb
 840
 841         i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
 842         flush_in, stall_out = self.flush_in, self.stall_out
 843
 844         is_hit  = Signal()
 845         hit_way = Signal(NUM_WAYS)
 846 #     begin
 847 #         -- i_in.sequential means that i_in.nia this cycle
 848 #         -- is 4 more than last cycle.  If we read more
 849 #         -- than 32 bits at a time, had a cache hit last
 850 #         -- cycle, and we don't want the first 32-bit chunk
 851 #         -- then we can keep the data we read last cycle
 852 #         -- and just use that.
 853 #         if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
 854 #             use_previous <= i_in.sequential and r.hit_valid;
 855 #         else
 856 #             use_previous <= '0';
 857 #         end if;
 858         # i_in.sequential means that i_in.nia this cycle is 4 more than
 859         # last cycle.  If we read more than 32 bits at a time, had a
 860         # cache hit last cycle, and we don't want the first 32-bit chunk
 861         # then we can keep the data we read last cycle and just use that.
 862         with m.If(i_in.nia[2:INSN_BITS+2] != 0):
 863             comb += use_previous.eq(i_in.sequential & r.hit_valid)
 864
 865         with m.Else():
 866             comb += use_previous.eq(0)
 867
 868 #       -- Extract line, row and tag from request
 869 #         req_index <= get_index(i_in.nia);
 870 #         req_row <= get_row(i_in.nia);
 871 #         req_tag <= get_tag(real_addr);
 872         # Extract line, row and tag from request
 873         comb += req_index.eq(get_index(i_in.nia))
 874         comb += req_row.eq(get_row(i_in.nia))
 875         comb += req_tag.eq(get_tag(real_addr))
 876
 877 #       -- Calculate address of beginning of cache row, will be
 878 #       -- used for cache miss processing if needed
 879 #       req_laddr <=
 880 #        (63 downto REAL_ADDR_BITS => '0') &
 881 #        real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
 882 #        (ROW_OFF_BITS-1 downto 0 => '0');
 883         # Calculate address of beginning of cache row, will be
 884         # used for cache miss processing if needed
 885         comb += req_laddr.eq(Cat(
 886                  Const(0b0, ROW_OFF_BITS),
 887                  real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
 888                  Const(0, REAL_ADDR_BITS)
 889                 ))
 890
 891 #       -- Test if pending request is a hit on any way
 892 #       hit_way := 0;
 893 #       is_hit := '0';
 894 #       for i in way_t loop
 895 #           if i_in.req = '1' and
 896 #                 (cache_valids(req_index)(i) = '1' or
 897 #                  (r.state = WAIT_ACK and
 898 #                   req_index = r.store_index and
 899 #                   i = r.store_way and
 900 #                   r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
 901 #               if read_tag(i, cache_tags(req_index)) = req_tag then
 902 #                   hit_way := i;
 903 #                   is_hit := '1';
 904 #               end if;
 905 #           end if;
 906 #       end loop;
 907         # Test if pending request is a hit on any way
 908         for i in range(NUM_WAYS):
 909             with m.If(i_in.req &
 910                       (cache_valid_bits[req_index][i] |
 911                        ((r.state == State.WAIT_ACK)
 912                         & (req_index == r.store_index)
 913                         & (i == r.store_way)
 914                         & r.rows_valid[req_row % ROW_PER_LINE]))):
 915                 with m.If(read_tag(i, cache_tags[req_index]) == req_tag):
 916                     comb += hit_way.eq(i)
 917                     comb += is_hit.eq(1)
 918
 919 #       -- Generate the "hit" and "miss" signals
 920 #       -- for the synchronous blocks
 921 #       if i_in.req = '1' and access_ok = '1' and flush_in = '0'
 922 #        and rst = '0' then
 923 #           req_is_hit  <= is_hit;
 924 #           req_is_miss <= not is_hit;
 925 #       else
 926 #           req_is_hit  <= '0';
 927 #           req_is_miss <= '0';
 928 #       end if;
 929 #       req_hit_way <= hit_way;
 930         # Generate the "hit" and "miss" signals
 931         # for the synchronous blocks
 932         with m.If(i_in.req & access_ok & ~flush_in):
 933             comb += req_is_hit.eq(is_hit)
 934             comb += req_is_miss.eq(~is_hit)
 935
 936         with m.Else():
 937             comb += req_is_hit.eq(0)
 938             comb += req_is_miss.eq(0)
 939
 940 #       -- The way to replace on a miss
 941 #       if r.state = CLR_TAG then
 942 #           replace_way <=
 943 #            to_integer(unsigned(plru_victim(r.store_index)));
 944 #       else
 945 #           replace_way <= r.store_way;
 946 #       end if;
 947         # The way to replace on a miss
 948         with m.If(r.state == State.CLR_TAG):
 949             comb += replace_way.eq(plru_victim[r.store_index])
 950
 951         with m.Else():
 952             comb += replace_way.eq(r.store_way)
 953
 954 #       -- Output instruction from current cache row
 955 #       --
 956 #       -- Note: This is a mild violation of our design principle of
 957 #       -- having pipeline stages output from a clean latch. In this
 958 #       -- case we output the result of a mux. The alternative would
 959 #       -- be output an entire row which I prefer not to do just yet
 960 #       -- as it would force fetch2 to know about some of the cache
 961 #       -- geometry information.
 962 #       i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
 963 #       i_out.valid <= r.hit_valid;
 964 #       i_out.nia <= r.hit_nia;
 965 #       i_out.stop_mark <= r.hit_smark;
 966 #       i_out.fetch_failed <= r.fetch_failed;
 967         # Output instruction from current cache row
 968         #
 969         # Note: This is a mild violation of our design principle of
 970         # having pipeline stages output from a clean latch. In this
 971         # case we output the result of a mux. The alternative would
 972         # be output an entire row which I prefer not to do just yet
 973         # as it would force fetch2 to know about some of the cache
 974         # geometry information.
 975         comb += i_out.insn.eq(
 976                  read_insn_word(r.hit_nia, cache_out[r.hit_way])
 977                 )
 978         comb += i_out.valid.eq(r.hit_valid)
 979         comb += i_out.nia.eq(r.hit_nia)
 980         comb += i_out.stop_mark.eq(r.hit_smark)
 981         comb += i_out.fetch_failed.eq(r.fetch_failed)
 982
 983 #       -- Stall fetch1 if we have a miss on cache or TLB
 984 #       -- or a protection fault
 985 #       stall_out <= not (is_hit and access_ok);
 986         # Stall fetch1 if we have a miss on cache or TLB
 987         # or a protection fault
 988         comb += stall_out.eq(~(is_hit & access_ok))
 989
 990 #       -- Wishbone requests output (from the cache miss reload machine)
 991 #       wishbone_out <= r.wb;
 992         # Wishbone requests output (from the cache miss reload machine)
 993         comb += wb_out.eq(r.wb)
 994 #     end process;
 995
 996 #     -- Cache hit synchronous machine
 997 #     icache_hit : process(clk)
 998     # Cache hit synchronous machine
 999     def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
1000                    req_index, req_tag, real_addr):
1001         sync = m.d.sync
1002
1003         i_in, stall_in = self.i_in, self.stall_in
1004         flush_in       = self.flush_in
1005
1006 #     begin
1007 #         if rising_edge(clk) then
1008 #             -- keep outputs to fetch2 unchanged on a stall
1009 #             -- except that flush or reset sets valid to 0
1010 #             -- If use_previous, keep the same data as last
1011 #             -- cycle and use the second half
1012 #             if stall_in = '1' or use_previous = '1' then
1013 #                 if rst = '1' or flush_in = '1' then
1014 #                     r.hit_valid <= '0';
1015 #             end if;
1016         # keep outputs to fetch2 unchanged on a stall
1017         # except that flush or reset sets valid to 0
1018         # If use_previous, keep the same data as last
1019         # cycle and use the second half
1020         with m.If(stall_in | use_previous):
1021             with m.If(flush_in):
1022                 sync += r.hit_valid.eq(0)
1023 #             else
1024 #                 -- On a hit, latch the request for the next cycle,
1025 #                 -- when the BRAM data will be available on the
1026 #                 -- cache_out output of the corresponding way
1027 #                 r.hit_valid <= req_is_hit;
1028 #                 if req_is_hit = '1' then
1029 #                     r.hit_way <= req_hit_way;
1030         with m.Else():
1031             # On a hit, latch the request for the next cycle,
1032             # when the BRAM data will be available on the
1033             # cache_out output of the corresponding way
1034             sync += r.hit_valid.eq(req_is_hit)
1035
1036             with m.If(req_is_hit):
1037                 sync += r.hit_way.eq(req_hit_way)
1038
1039 #                     report "cache hit nia:" & to_hstring(i_in.nia) &
1040 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1041 #                         " SM:" & std_ulogic'image(i_in.stop_mark) &
1042 #                         " idx:" & integer'image(req_index) &
1043 #                         " tag:" & to_hstring(req_tag) &
1044 #                         " way:" & integer'image(req_hit_way) &
1045 #                         " RA:" & to_hstring(real_addr);
1046                 print(f"cache hit nia:{i_in.nia}, " \
1047                       f"IR:{i_in.virt_mode}, " \
1048                       f"SM:{i_in.stop_mark}, idx:{req_index}, " \
1049                       f"tag:{req_tag}, way:{req_hit_way}, " \
1050                       f"RA:{real_addr}")
1051 #                 end if;
1052 #           end if;
1053 #             if stall_in = '0' then
1054 #                 -- Send stop marks and NIA down regardless of validity
1055 #                 r.hit_smark <= i_in.stop_mark;
1056 #                 r.hit_nia <= i_in.nia;
1057 #             end if;
1058         with m.If(~stall_in):
1059             # Send stop marks and NIA down regardless of validity
1060             sync += r.hit_smark.eq(i_in.stop_mark)
1061             sync += r.hit_nia.eq(i_in.nia)
1062 #       end if;
1063 #     end process;
1064
1065 #     -- Cache miss/reload synchronous machine
1066 #     icache_miss : process(clk)
1067     # Cache miss/reload synchronous machine
1068     def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1069                     req_index, req_laddr, req_tag, replace_way,
1070                     cache_tags, access_ok):
1071         comb = m.d.comb
1072         sync = m.d.sync
1073
1074         i_in, wb_in, m_in  = self.i_in, self.wb_in, self.m_in
1075         stall_in, flush_in = self.stall_in, self.flush_in
1076         inval_in           = self.inval_in
1077
1078 #       variable tagset    : cache_tags_set_t;
1079 #       variable stbs_done : boolean;
1080
1081         tagset    = Signal(TAG_RAM_WIDTH)
1082         stbs_done = Signal()
1083
1084 #     begin
1085 #         if rising_edge(clk) then
1086 #           -- On reset, clear all valid bits to force misses
1087 #             if rst = '1' then
1088         # On reset, clear all valid bits to force misses
1089         with m.If('''TODO rst nmigen'''):
1090 #               for i in index_t loop
1091 #                   cache_valids(i) <= (others => '0');
1092 #               end loop;
1093             for i in Signal(NUM_LINES):
1094                 sync += cache_valid_bits[i].eq(~1)
1095
1096 #                 r.state <= IDLE;
1097 #                 r.wb.cyc <= '0';
1098 #                 r.wb.stb <= '0';
1099             sync += r.state.eq(State.IDLE)
1100             sync += r.wb.cyc.eq(0)
1101             sync += r.wb.stb.eq(0)
1102
1103 #               -- We only ever do reads on wishbone
1104 #               r.wb.dat <= (others => '0');
1105 #               r.wb.sel <= "11111111";
1106 #               r.wb.we  <= '0';
1107             # We only ever do reads on wishbone
1108             sync += r.wb.dat.eq(~1)
1109             sync += r.wb.sel.eq(Const(0b11111111, 8))
1110             sync += r.wb.we.eq(0)
1111
1112 #               -- Not useful normally but helps avoiding
1113 #               -- tons of sim warnings
1114 #               r.wb.adr <= (others => '0');
1115             # Not useful normally but helps avoiding tons of sim warnings
1116             sync += r.wb.adr.eq(~1)
1117
1118 #             else
1119         with m.Else():
1120 #                 -- Process cache invalidations
1121 #                 if inval_in = '1' then
1122 #                     for i in index_t loop
1123 #                         cache_valids(i) <= (others => '0');
1124 #                     end loop;
1125 #                     r.store_valid <= '0';
1126 #                 end if;
1127             # Process cache invalidations
1128             with m.If(inval_in):
1129                 for i in range(NUM_LINES):
1130                     sync += cache_valid_bits[i].eq(~1)
1131
1132                 sync += r.store_valid.eq(0)
1133
1134 #               -- Main state machine
1135 #               case r.state is
1136                 # Main state machine
1137                 with m.Switch(r.state):
1138
1139 #               when IDLE =>
1140                     with m.Case(State.IDLE):
1141 #                     -- Reset per-row valid flags,
1142 #                     -- only used in WAIT_ACK
1143 #                     for i in 0 to ROW_PER_LINE - 1 loop
1144 #                         r.rows_valid(i) <= '0';
1145 #                     end loop;
1146                         # Reset per-row valid flags,
1147                         # only used in WAIT_ACK
1148                         for i in range(ROW_PER_LINE):
1149                             sync += r.rows_valid[i].eq(0)
1150
1151 #                   -- We need to read a cache line
1152 #                   if req_is_miss = '1' then
1153 #                       report "cache miss nia:" & to_hstring(i_in.nia) &
1154 #                             " IR:" & std_ulogic'image(i_in.virt_mode) &
1155 #                           " SM:" & std_ulogic'image(i_in.stop_mark) &
1156 #                           " idx:" & integer'image(req_index) &
1157 #                           " way:" & integer'image(replace_way) &
1158 #                           " tag:" & to_hstring(req_tag) &
1159 #                             " RA:" & to_hstring(real_addr);
1160                         # We need to read a cache line
1161                         with m.If(req_is_miss):
1162                             print(f"cache miss nia:{i_in.nia} " \
1163                                   f"IR:{i_in.virt_mode} " \
1164                                   f"SM:{i_in.stop_mark} " \
1165                                   F"idx:{req_index} " \
1166                                   f"way:{replace_way} tag:{req_tag} " \
1167                                   f"RA:{real_addr}")
1168
1169 #                       -- Keep track of our index and way for
1170 #                       -- subsequent stores
1171 #                       r.store_index <= req_index;
1172 #                       r.store_row <= get_row(req_laddr);
1173 #                       r.store_tag <= req_tag;
1174 #                       r.store_valid <= '1';
1175 #                       r.end_row_ix <=
1176 #                        get_row_of_line(get_row(req_laddr)) - 1;
1177                             # Keep track of our index and way
1178                             # for subsequent stores
1179                             sync += r.store_index.eq(req_index)
1180                             sync += r.store_row.eq(get_row(req_laddr))
1181                             sync += r.store_tag.eq(req_tag)
1182                             sync += r.store_valid.eq(1)
1183                             sync += r.end_row_ix.eq(
1184                                      get_row_of_line(
1185                                       get_row(req_laddr)
1186                                      ) - 1
1187                                     )
1188
1189 #                       -- Prep for first wishbone read. We calculate the
1190 #                       -- address of the start of the cache line and
1191 #                       -- start the WB cycle.
1192 #                       r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1193 #                       r.wb.cyc <= '1';
1194 #                       r.wb.stb <= '1';
1195                             # Prep for first wishbone read.
1196                             # We calculate the
1197                             # address of the start of the cache line and
1198                             # start the WB cycle.
1199                             sync += r.wb.adr.eq(
1200                                      req_laddr[:r.wb.adr]
1201                                     )
1202
1203 #                       -- Track that we had one request sent
1204 #                       r.state <= CLR_TAG;
1205                             # Track that we had one request sent
1206                             sync += r.state.eq(State.CLR_TAG)
1207 #                   end if;
1208
1209 #               when CLR_TAG | WAIT_ACK =>
1210                     with m.Case(State.CLR_TAG, State.WAIT_ACK):
1211 #                     if r.state = CLR_TAG then
1212                         with m.If(r.state == State.CLR_TAG):
1213 #                         -- Get victim way from plru
1214 #                       r.store_way <= replace_way;
1215                             # Get victim way from plru
1216                             sync += r.store_way.eq(replace_way)
1217 #
1218 #                       -- Force misses on that way while
1219 #                       -- reloading that line
1220 #                       cache_valids(req_index)(replace_way) <= '0';
1221                             # Force misses on that way while
1222                             # realoading that line
1223                             sync += cache_valid_bits[
1224                                      req_index
1225                                     ][replace_way].eq(0)
1226
1227 #                       -- Store new tag in selected way
1228 #                       for i in 0 to NUM_WAYS-1 loop
1229 #                           if i = replace_way then
1230 #                               tagset := cache_tags(r.store_index);
1231 #                               write_tag(i, tagset, r.store_tag);
1232 #                               cache_tags(r.store_index) <= tagset;
1233 #                           end if;
1234 #                       end loop;
1235                             for i in range(NUM_WAYS):
1236                                 with m.If(i == replace_way):
1237                                     comb += tagset.eq(
1238                                              cache_tags[r.store_index]
1239                                             )
1240                                     sync += write_tag(
1241                                              i, tagset, r.store_tag
1242                                             )
1243                                     sync += cache_tags[r.store_index].eq(
1244                                              tagset
1245                                             )
1246
1247 #                         r.state <= WAIT_ACK;
1248                             sync += r.state.eq(State.WAIT_ACK)
1249 #                     end if;
1250
1251 #                   -- Requests are all sent if stb is 0
1252 #                   stbs_done := r.wb.stb = '0';
1253                         # Requests are all sent if stb is 0
1254                         comb += stbs_done.eq(r.wb.stb == 0)
1255
1256 #                   -- If we are still sending requests,
1257 #                   -- was one accepted ?
1258 #                   if wishbone_in.stall = '0' and not stbs_done then
1259                         # If we are still sending requests,
1260                         # was one accepted?
1261                         with m.If(~wb_in.stall & ~stbs_done):
1262 #                       -- That was the last word ? We are done sending.
1263 #                       -- Clear stb and set stbs_done so we can handle
1264 #                       -- an eventual last ack on the same cycle.
1265 #                       if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1266 #                           r.wb.stb <= '0';
1267 #                           stbs_done := true;
1268 #                       end if;
1269                             # That was the last word ?
1270                             # We are done sending.
1271                             # Clear stb and set stbs_done
1272                             # so we can handle
1273                             # an eventual last ack on
1274                             # the same cycle.
1275                             with m.If(is_last_row_addr(
1276                                       r.wb.adr, r.end_row_ix)):
1277                                 sync += r.wb.stb.eq(0)
1278                                 stbs_done.eq(1)
1279
1280 #                       -- Calculate the next row address
1281 #                       r.wb.adr <= next_row_addr(r.wb.adr);
1282                             # Calculate the next row address
1283                             sync += r.wb.adr.eq(next_row_addr(r.wb.adr))
1284 #                   end if;
1285
1286 #                   -- Incoming acks processing
1287 #                   if wishbone_in.ack = '1' then
1288                         # Incoming acks processing
1289                         with m.If(wb_in.ack):
1290 #                         r.rows_valid(r.store_row mod ROW_PER_LINE)
1291 #                          <= '1';
1292                             sync += r.rows_valid[
1293                                      r.store_row & ROW_PER_LINE
1294                                     ].eq(1)
1295
1296 #                       -- Check for completion
1297 #                       if stbs_done and
1298 #                        is_last_row(r.store_row, r.end_row_ix) then
1299                             # Check for completion
1300                             with m.If(stbs_done & is_last_row(
1301                                       r.store_row, r.end_row_ix)):
1302 #                           -- Complete wishbone cycle
1303 #                           r.wb.cyc <= '0';
1304                                 # Complete wishbone cycle
1305                                 sync += r.wb.cyc.eq(0)
1306
1307 #                           -- Cache line is now valid
1308 #                           cache_valids(r.store_index)(replace_way) <=
1309 #                            r.store_valid and not inval_in;
1310                                 # Cache line is now valid
1311                                 sync += cache_valid_bits[
1312                                          r.store_index
1313                                         ][relace_way].eq(
1314                                          r.store_valid & ~inval_in
1315                                         )
1316
1317 #                           -- We are done
1318 #                           r.state <= IDLE;
1319                                 # We are done
1320                                 sync += r.state.eq(State.IDLE)
1321 #                       end if;
1322
1323 #                       -- Increment store row counter
1324 #                       r.store_row <= next_row(r.store_row);
1325                             # Increment store row counter
1326                             sync += store_row.eq(next_row(r.store_row))
1327 #                   end if;
1328 #               end case;
1329 #           end if;
1330 #
1331 #             -- TLB miss and protection fault processing
1332 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1333 #                 r.fetch_failed <= '0';
1334 #             elsif i_in.req = '1' and access_ok = '0' and
1335 #              stall_in = '0' then
1336 #                 r.fetch_failed <= '1';
1337 #             end if;
1338             # TLB miss and protection fault processing
1339             with m.If('''TODO nmigen rst''' | flush_in | m_in.tlbld):
1340                 sync += r.fetch_failed.eq(0)
1341
1342             with m.Elif(i_in.req & ~access_ok & ~stall_in):
1343                 sync += r.fetch_failed.eq(1)
1344 #       end if;
1345 #     end process;
1346
1347 #     icache_log: if LOG_LENGTH > 0 generate
1348     def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1349                    req_is_miss, req_is_hit, lway, wstate, r):
1350         comb = m.d.comb
1351         sync = m.d.sync
1352
1353         wb_in, i_out       = self.wb_in, self.i_out
1354         log_out, stall_out = self.log_out, self.stall_out
1355
1356 #         -- Output data to logger
1357 #         signal log_data    : std_ulogic_vector(53 downto 0);
1358 #     begin
1359 #         data_log: process(clk)
1360 #             variable lway: way_t;
1361 #             variable wstate: std_ulogic;
1362         # Output data to logger
1363         for i in range(LOG_LENGTH):
1364             # Output data to logger
1365             log_data = Signal(54)
1366             lway     = Signal(NUM_WAYS)
1367             wstate   = Signal()
1368
1369 #         begin
1370 #             if rising_edge(clk) then
1371 #                 lway := req_hit_way;
1372 #                 wstate := '0';
1373             comb += lway.eq(req_hit_way)
1374             comb += wstate.eq(0)
1375
1376 #                 if r.state /= IDLE then
1377 #                     wstate := '1';
1378 #                 end if;
1379             with m.If(r.state != State.IDLE):
1380                 sync += wstate.eq(1)
1381
1382 #                 log_data <= i_out.valid &
1383 #                             i_out.insn &
1384 #                             wishbone_in.ack &
1385 #                             r.wb.adr(5 downto 3) &
1386 #                             r.wb.stb & r.wb.cyc &
1387 #                             wishbone_in.stall &
1388 #                             stall_out &
1389 #                             r.fetch_failed &
1390 #                             r.hit_nia(5 downto 2) &
1391 #                             wstate &
1392 #                             std_ulogic_vector(to_unsigned(lway, 3)) &
1393 #                             req_is_hit & req_is_miss &
1394 #                             access_ok &
1395 #                             ra_valid;
1396             sync += log_data.eq(Cat(
1397                      ra_valid, access_ok, req_is_miss, req_is_hit,
1398                      lway, wstate, r.hit_nia[2:6],
1399                      r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1400                      r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1401                      i_out.valid
1402                     ))
1403 #             end if;
1404 #         end process;
1405 #         log_out <= log_data;
1406             comb += log_out.eq(log_data)
1407 #     end generate;
1408 # end;
1409
1410     def elaborate(self, platform):
1411
1412         m                = Module()
1413         comb             = m.d.comb
1414
1415         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1416         cache_tags       = CacheTagArray()
1417         cache_valid_bits = CacheValidBitsArray()
1418
1419 #     signal itlb_valids : tlb_valids_t;
1420 #     signal itlb_tags : tlb_tags_t;
1421 #     signal itlb_ptes : tlb_ptes_t;
1422 #     attribute ram_style of itlb_tags : signal is "distributed";
1423 #     attribute ram_style of itlb_ptes : signal is "distributed";
1424         itlb_valid_bits  = TLBValidBitsArray()
1425         itlb_tags        = TLBTagArray()
1426         itlb_ptes        = TLBPTEArray()
1427         # TODO to be passed to nmigen as ram attributes
1428         # attribute ram_style of itlb_tags : signal is "distributed";
1429         # attribute ram_style of itlb_ptes : signal is "distributed";
1430
1431 #     -- Privilege bit from PTE EAA field
1432 #     signal eaa_priv  : std_ulogic;
1433         # Privilege bit from PTE EAA field
1434         eaa_priv         = Signal()
1435
1436 #     signal r : reg_internal_t;
1437         r                = RegInternal()
1438
1439 #     -- Async signals on incoming request
1440 #     signal req_index   : index_t;
1441 #     signal req_row     : row_t;
1442 #     signal req_hit_way : way_t;
1443 #     signal req_tag     : cache_tag_t;
1444 #     signal req_is_hit  : std_ulogic;
1445 #     signal req_is_miss : std_ulogic;
1446 #     signal req_laddr   : std_ulogic_vector(63 downto 0);
1447         # Async signal on incoming request
1448         req_index        = Signal(NUM_LINES)
1449         req_row          = Signal(BRAM_ROWS)
1450         req_hit_way      = Signal(NUM_WAYS)
1451         req_tag          = Signal(TAG_BITS)
1452         req_is_hit       = Signal()
1453         req_is_miss      = Signal()
1454         req_laddr        = Signal(64)
1455
1456 #     signal tlb_req_index : tlb_index_t;
1457 #     signal real_addr     : std_ulogic_vector(
1458 #                             REAL_ADDR_BITS - 1 downto 0
1459 #                            );
1460 #     signal ra_valid      : std_ulogic;
1461 #     signal priv_fault    : std_ulogic;
1462 #     signal access_ok     : std_ulogic;
1463 #     signal use_previous  : std_ulogic;
1464         tlb_req_index    = Signal(TLB_SIZE)
1465         real_addr        = Signal(REAL_ADDR_BITS)
1466         ra_valid         = Signal()
1467         priv_fault       = Signal()
1468         access_ok        = Signal()
1469         use_previous     = Signal()
1470
1471 #     signal cache_out   : cache_ram_out_t;
1472         cache_out        = CacheRamOut()
1473
1474 #     signal plru_victim : plru_out_t;
1475 #     signal replace_way : way_t;
1476         plru_victim      = PLRUOut()
1477         replace_way      = Signal(NUM_WAYS)
1478
1479         # call sub-functions putting everything together, using shared
1480         # signals established above
1481         self.rams(m, r, cache_out, use_previous, replace_way, req_row)
1482         self.maybe_plrus(m, r, plru_victim)
1483         self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1484                          real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1485                          priv_fault, access_ok)
1486         self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1487         self.icache_comb(m, use_previous, r, req_index, req_row,
1488                          req_tag, real_addr, req_laddr, cache_valid_bits,
1489                          cache_tags, access_ok, req_is_hit, req_is_miss,
1490                          replace_way, plru_victim, cache_out)
1491         self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1492                         req_index, req_tag, real_addr)
1493         self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1494                          req_laddr, req_tag, replace_way, cache_tags,
1495                          access_ok)
1496         #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1497         #                req_is_miss, req_is_hit, lway, wstate, r)
1498
1499         return m
1500
1501
1502 # icache_tb.vhdl
1503 #
1504 # library ieee;
1505 # use ieee.std_logic_1164.all;
1506 #
1507 # library work;
1508 # use work.common.all;
1509 # use work.wishbone_types.all;
1510 #
1511 # entity icache_tb is
1512 # end icache_tb;
1513 #
1514 # architecture behave of icache_tb is
1515 #     signal clk          : std_ulogic;
1516 #     signal rst          : std_ulogic;
1517 #
1518 #     signal i_out        : Fetch1ToIcacheType;
1519 #     signal i_in         : IcacheToDecode1Type;
1520 #
1521 #     signal m_out        : MmuToIcacheType;
1522 #
1523 #     signal wb_bram_in   : wishbone_master_out;
1524 #     signal wb_bram_out  : wishbone_slave_out;
1525 #
1526 #     constant clk_period : time := 10 ns;
1527 # begin
1528 #     icache0: entity work.icache
1529 #         generic map(
1530 #             LINE_SIZE => 64,
1531 #             NUM_LINES => 4
1532 #             )
1533 #         port map(
1534 #             clk => clk,
1535 #             rst => rst,
1536 #             i_in => i_out,
1537 #             i_out => i_in,
1538 #             m_in => m_out,
1539 #             stall_in => '0',
1540 #           flush_in => '0',
1541 #             inval_in => '0',
1542 #             wishbone_out => wb_bram_in,
1543 #             wishbone_in => wb_bram_out
1544 #             );
1545 #
1546 #     -- BRAM Memory slave
1547 #     bram0: entity work.wishbone_bram_wrapper
1548 #         generic map(
1549 #             MEMORY_SIZE   => 1024,
1550 #             RAM_INIT_FILE => "icache_test.bin"
1551 #             )
1552 #         port map(
1553 #             clk => clk,
1554 #             rst => rst,
1555 #             wishbone_in => wb_bram_in,
1556 #             wishbone_out => wb_bram_out
1557 #             );
1558 #
1559 #     clk_process: process
1560 #     begin
1561 #         clk <= '0';
1562 #         wait for clk_period/2;
1563 #         clk <= '1';
1564 #         wait for clk_period/2;
1565 #     end process;
1566 #
1567 #     rst_process: process
1568 #     begin
1569 #         rst <= '1';
1570 #         wait for 2*clk_period;
1571 #         rst <= '0';
1572 #         wait;
1573 #     end process;
1574 #
1575 #     stim: process
1576 #     begin
1577 #         i_out.req <= '0';
1578 #         i_out.nia <= (others => '0');
1579 #       i_out.stop_mark <= '0';
1580 #
1581 #         m_out.tlbld <= '0';
1582 #         m_out.tlbie <= '0';
1583 #         m_out.addr <= (others => '0');
1584 #         m_out.pte <= (others => '0');
1585 #
1586 #         wait until rising_edge(clk);
1587 #         wait until rising_edge(clk);
1588 #         wait until rising_edge(clk);
1589 #         wait until rising_edge(clk);
1590 #
1591 #         i_out.req <= '1';
1592 #         i_out.nia <= x"0000000000000004";
1593 #
1594 #         wait for 30*clk_period;
1595 #         wait until rising_edge(clk);
1596 #
1597 #         assert i_in.valid = '1' severity failure;
1598 #         assert i_in.insn = x"00000001"
1599 #           report "insn @" & to_hstring(i_out.nia) &
1600 #           "=" & to_hstring(i_in.insn) &
1601 #           " expected 00000001"
1602 #           severity failure;
1603 #
1604 #         i_out.req <= '0';
1605 #
1606 #         wait until rising_edge(clk);
1607 #
1608 #         -- hit
1609 #         i_out.req <= '1';
1610 #         i_out.nia <= x"0000000000000008";
1611 #         wait until rising_edge(clk);
1612 #         wait until rising_edge(clk);
1613 #         assert i_in.valid = '1' severity failure;
1614 #         assert i_in.insn = x"00000002"
1615 #           report "insn @" & to_hstring(i_out.nia) &
1616 #           "=" & to_hstring(i_in.insn) &
1617 #           " expected 00000002"
1618 #           severity failure;
1619 #         wait until rising_edge(clk);
1620 #
1621 #         -- another miss
1622 #         i_out.req <= '1';
1623 #         i_out.nia <= x"0000000000000040";
1624 #
1625 #         wait for 30*clk_period;
1626 #         wait until rising_edge(clk);
1627 #
1628 #         assert i_in.valid = '1' severity failure;
1629 #         assert i_in.insn = x"00000010"
1630 #           report "insn @" & to_hstring(i_out.nia) &
1631 #           "=" & to_hstring(i_in.insn) &
1632 #           " expected 00000010"
1633 #           severity failure;
1634 #
1635 #         -- test something that aliases
1636 #         i_out.req <= '1';
1637 #         i_out.nia <= x"0000000000000100";
1638 #         wait until rising_edge(clk);
1639 #         wait until rising_edge(clk);
1640 #         assert i_in.valid = '0' severity failure;
1641 #         wait until rising_edge(clk);
1642 #
1643 #         wait for 30*clk_period;
1644 #         wait until rising_edge(clk);
1645 #
1646 #         assert i_in.valid = '1' severity failure;
1647 #         assert i_in.insn = x"00000040"
1648 #           report "insn @" & to_hstring(i_out.nia) &
1649 #           "=" & to_hstring(i_in.insn) &
1650 #           " expected 00000040"
1651 #           severity failure;
1652 #
1653 #         i_out.req <= '0';
1654 #
1655 #         std.env.finish;
1656 #     end process;
1657 # end;
1658 def icache_sim(dut):
1659     i_out, i_in, m_out, m_in = dut.i_out, dut.i_in, dut.m_out, dut.m_in
1660
1661     yield i_out.req.eq(0)
1662     yield i_out.nia.eq(~1)
1663     yield i_out.stop_mark.eq(0)
1664     yield m_out.tlbld.eq(0)
1665     yield m_out.tlbie.eq(0)
1666     yield m_out.addr.eq(~1)
1667     yield m_out.pte.eq(~1)
1668     yield
1669     yield
1670     yield
1671     yield
1672     yield i_out.req.eq(1)
1673     yield i_out.nia.eq(Const(0x0000000000000004, 64))
1674     for i in range(30):
1675         yield
1676     yield
1677     assert i_in.valid
1678     assert i_in.insn == Const(0x00000001, 32), \
1679         ("insn @%x=%x expected 00000001" % i_out.nia, i_in.insn)
1680     yield i_out.req.eq(0)
1681     yield
1682
1683     # hit
1684     yield i_out.req.eq(1)
1685     yield i_out.nia.eq(Const(0x0000000000000008, 64))
1686     yield
1687     yield
1688     assert i_in.valid
1689     assert i_in.insn == Const(0x00000002, 32), \
1690         ("insn @%x=%x expected 00000002" % i_out.nia, i_in.insn)
1691     yield
1692
1693     # another miss
1694     yield i_out.req(1)
1695     yield i_out.nia.eq(Const(0x0000000000000040, 64))
1696     for i in range(30):
1697         yield
1698     yield
1699     assert i_in.valid
1700     assert i_in.insn == Const(0x00000010, 32), \
1701         ("insn @%x=%x expected 00000010" % i_out.nia, i_in.insn)
1702
1703     # test something that aliases
1704     yield i_out.req.eq(1)
1705     yield i_out.nia.eq(Const(0x0000000000000100, 64))
1706     yield
1707     yield
1708     assert i_in.valid
1709     for i in range(30):
1710         yield
1711     yield
1712     assert i_in.valid
1713     assert i_in.insn == Const(0x00000040, 32), \
1714          ("insn @%x=%x expected 00000040" % i_out.nia, i_in.insn)
1715     yield i_out.req.eq(0)
1716
1717
1718 def test_icache():
1719     dut = ICache()
1720
1721     m = Module()
1722     m.submodules.icache = dut
1723
1724     # nmigen Simulation
1725     sim = Simulator(m)
1726     sim.add_clock(1e-6)
1727
1728     sim.add_sync_process(wrap(icache_sim(dut)))
1729     with sim.write_vcd('test_icache.vcd'):
1730         sim.run()
1731
1732 if __name__ == '__main__':
1733     dut = ICache()
1734     vl = rtlil.convert(dut, ports=[])
1735     with open("test_icache.il", "w") as f:
1736         f.write(vl)
1737
1738     test_icache()