src/soc/experiment/icache.py

   1 """ICache
   2
   3 based on Anton Blanchard microwatt icache.vhdl
   4
   5 Set associative icache
   6
   7 TODO (in no specific order):
   8 * Add debug interface to inspect cache content
   9 * Add snoop/invalidate path
  10 * Add multi-hit error detection
  11 * Pipelined bus interface (wb or axi)
  12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
  13 * Add optimization: service hits on partially loaded lines
  14 * Add optimization: (maybe) interrupt reload on fluch/redirect
  15 * Check if playing with the geometry of the cache tags allow for more
  16   efficient use of distributed RAM and less logic/muxes. Currently we
  17   write TAG_BITS width which may not match full ram blocks and might
  18   cause muxes to be inferred for "partial writes".
  19 * Check if making the read size of PLRU a ROM helps utilization
  20
  21 """
  22 from enum import Enum, unique
  23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const)
  24 from nmigen.cli import main, rtlil
  25 from nmutil.iocontrol import RecordObject
  26 from nmigen.utils import log2_int
  27 from nmutil.util import Display
  28
  29 #from nmutil.plru import PLRU
  30 from soc.experiment.cache_ram import CacheRam
  31 from soc.experiment.plru import PLRU
  32
  33 from soc.experiment.mem_types import (Fetch1ToICacheType,
  34                                       ICacheToDecode1Type,
  35                                       MMUToICacheType)
  36
  37 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
  38                                      WB_SEL_BITS, WBAddrType, WBDataType,
  39                                      WBSelType, WBMasterOut, WBSlaveOut,
  40                                      WBMasterOutVector, WBSlaveOutVector,
  41                                      WBIOMasterOut, WBIOSlaveOut)
  42
  43 # for test
  44 from nmigen_soc.wishbone.sram import SRAM
  45 from nmigen import Memory
  46 from nmutil.util import wrap
  47 from nmigen.cli import main, rtlil
  48 if True:
  49     from nmigen.back.pysim import Simulator, Delay, Settle
  50 else:
  51     from nmigen.sim.cxxsim import Simulator, Delay, Settle
  52
  53
  54 SIM            = 0
  55 LINE_SIZE      = 64
  56 # BRAM organisation: We never access more than wishbone_data_bits
  57 # at a time so to save resources we make the array only that wide,
  58 # and use consecutive indices for to make a cache "line"
  59 #
  60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
  61 ROW_SIZE       = WB_DATA_BITS // 8
  62 # Number of lines in a set
  63 NUM_LINES      = 32
  64 # Number of ways
  65 NUM_WAYS       = 4
  66 # L1 ITLB number of entries (direct mapped)
  67 TLB_SIZE       = 64
  68 # L1 ITLB log_2(page_size)
  69 TLB_LG_PGSZ    = 12
  70 # Number of real address bits that we store
  71 REAL_ADDR_BITS = 56
  72 # Non-zero to enable log data collection
  73 LOG_LENGTH     = 0
  74
  75 ROW_SIZE_BITS  = ROW_SIZE * 8
  76 # ROW_PER_LINE is the number of row
  77 # (wishbone) transactions in a line
  78 ROW_PER_LINE   = LINE_SIZE // ROW_SIZE
  79 # BRAM_ROWS is the number of rows in
  80 # BRAM needed to represent the full icache
  81 BRAM_ROWS      = NUM_LINES * ROW_PER_LINE
  82 # INSN_PER_ROW is the number of 32bit
  83 # instructions per BRAM row
  84 INSN_PER_ROW   = ROW_SIZE_BITS // 32
  85
  86 print("ROW_SIZE", ROW_SIZE)
  87 print("ROW_SIZE_BITS", ROW_SIZE_BITS)
  88 print("ROW_PER_LINE", ROW_PER_LINE)
  89 print("BRAM_ROWS", BRAM_ROWS)
  90 print("INSN_PER_ROW", INSN_PER_ROW)
  91
  92 # Bit fields counts in the address
  93 #
  94 # INSN_BITS is the number of bits to
  95 # select an instruction in a row
  96 INSN_BITS      = log2_int(INSN_PER_ROW)
  97 # ROW_BITS is the number of bits to
  98 # select a row
  99 ROW_BITS       = log2_int(BRAM_ROWS)
 100 # ROW_LINEBITS is the number of bits to
 101 # select a row within a line
 102 ROW_LINE_BITS   = log2_int(ROW_PER_LINE)
 103 # LINE_OFF_BITS is the number of bits for
 104 # the offset in a cache line
 105 LINE_OFF_BITS  = log2_int(LINE_SIZE)
 106 # ROW_OFF_BITS is the number of bits for
 107 # the offset in a row
 108 ROW_OFF_BITS   = log2_int(ROW_SIZE)
 109 # INDEX_BITS is the number of bits to
 110 # select a cache line
 111 INDEX_BITS     = log2_int(NUM_LINES)
 112 # SET_SIZE_BITS is the log base 2 of
 113 # the set size
 114 SET_SIZE_BITS  = LINE_OFF_BITS + INDEX_BITS
 115 # TAG_BITS is the number of bits of
 116 # the tag part of the address
 117 TAG_BITS       = REAL_ADDR_BITS - SET_SIZE_BITS
 118 # TAG_WIDTH is the width in bits of each way of the tag RAM
 119 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
 120
 121 # WAY_BITS is the number of bits to
 122 # select a way
 123 WAY_BITS       = log2_int(NUM_WAYS)
 124 TAG_RAM_WIDTH  = TAG_BITS * NUM_WAYS
 125
 126 #     -- L1 ITLB.
 127 #     constant TLB_BITS : natural := log2(TLB_SIZE);
 128 #     constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
 129 #     constant TLB_PTE_BITS : natural := 64;
 130 TLB_BITS        = log2_int(TLB_SIZE)
 131 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
 132 TLB_PTE_BITS    = 64
 133
 134
 135 print("INSN_BITS", INSN_BITS)
 136 print("ROW_BITS", ROW_BITS)
 137 print("ROW_LINE_BITS", ROW_LINE_BITS)
 138 print("LINE_OFF_BITS", LINE_OFF_BITS)
 139 print("ROW_OFF_BITS", ROW_OFF_BITS)
 140 print("INDEX_BITS", INDEX_BITS)
 141 print("SET_SIZE_BITS", SET_SIZE_BITS)
 142 print("TAG_BITS", TAG_BITS)
 143 print("WAY_BITS", WAY_BITS)
 144 print("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
 145 print("TLB_BITS", TLB_BITS)
 146 print("TLB_EA_TAG_BITS", TLB_EA_TAG_BITS)
 147 print("TLB_PTE_BITS", TLB_PTE_BITS)
 148
 149
 150
 151
 152 # architecture rtl of icache is
 153 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
 154 #-- ROW_PER_LINE is the number of row (wishbone
 155 #-- transactions) in a line
 156 #constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 157 #-- BRAM_ROWS is the number of rows in BRAM
 158 #-- needed to represent the full
 159 #-- icache
 160 #constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
 161 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
 162 #constant INSN_PER_ROW  : natural := ROW_SIZE_BITS / 32;
 163 #-- Bit fields counts in the address
 164 #
 165 #-- INSN_BITS is the number of bits to select
 166 #-- an instruction in a row
 167 #constant INSN_BITS     : natural := log2(INSN_PER_ROW);
 168 #-- ROW_BITS is the number of bits to select a row
 169 #constant ROW_BITS      : natural := log2(BRAM_ROWS);
 170 #-- ROW_LINEBITS is the number of bits to
 171 #-- select a row within a line
 172 #constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 173 #-- LINE_OFF_BITS is the number of bits for the offset
 174 #-- in a cache line
 175 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 176 #-- ROW_OFF_BITS is the number of bits for the offset in a row
 177 #constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
 178 #-- INDEX_BITS is the number of bits to select a cache line
 179 #constant INDEX_BITS    : natural := log2(NUM_LINES);
 180 #-- SET_SIZE_BITS is the log base 2 of the set size
 181 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
 182 #-- TAG_BITS is the number of bits of the tag part of the address
 183 #constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 184 #-- WAY_BITS is the number of bits to select a way
 185 #constant WAY_BITS     : natural := log2(NUM_WAYS);
 186
 187 #-- Example of layout for 32 lines of 64 bytes:
 188 #--
 189 #-- ..  tag    |index|  line  |
 190 #-- ..         |   row   |    |
 191 #-- ..         |     |   | |00| zero          (2)
 192 #-- ..         |     |   |-|  | INSN_BITS     (1)
 193 #-- ..         |     |---|    | ROW_LINEBITS  (3)
 194 #-- ..         |     |--- - --| LINE_OFF_BITS (6)
 195 #-- ..         |         |- --| ROW_OFF_BITS  (3)
 196 #-- ..         |----- ---|    | ROW_BITS      (8)
 197 #-- ..         |-----|        | INDEX_BITS    (5)
 198 #-- .. --------|              | TAG_BITS      (53)
 199    # Example of layout for 32 lines of 64 bytes:
 200    #
 201    # ..  tag    |index|  line  |
 202    # ..         |   row   |    |
 203    # ..         |     |   | |00| zero          (2)
 204    # ..         |     |   |-|  | INSN_BITS     (1)
 205    # ..         |     |---|    | ROW_LINEBITS  (3)
 206    # ..         |     |--- - --| LINE_OFF_BITS (6)
 207    # ..         |         |- --| ROW_OFF_BITS  (3)
 208    # ..         |----- ---|    | ROW_BITS      (8)
 209    # ..         |-----|        | INDEX_BITS    (5)
 210    # .. --------|              | TAG_BITS      (53)
 211
 212 #subtype row_t is integer range 0 to BRAM_ROWS-1;
 213 #subtype index_t is integer range 0 to NUM_LINES-1;
 214 #subtype way_t is integer range 0 to NUM_WAYS-1;
 215 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
 216 #
 217 #-- The cache data BRAM organized as described above for each way
 218 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
 219 #
 220 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
 221 #-- not handle a clean (commented) definition of the cache tags as a 3d
 222 #-- memory. For now, work around it by putting all the tags
 223 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 224 #  type cache_tags_set_t is array(way_t) of cache_tag_t;
 225 #  type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 226 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
 227 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 228 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 229 def CacheTagArray():
 230     return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
 231                  for x in range(NUM_LINES))
 232
 233 #-- The cache valid bits
 234 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
 235 #type cache_valids_t is array(index_t) of cache_way_valids_t;
 236 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
 237 def CacheValidBitsArray():
 238     return Array(Signal(NUM_WAYS, name="cachevalid_%d" %x) \
 239                  for x in range(NUM_LINES))
 240
 241 def RowPerLineValidArray():
 242     return Array(Signal(name="rows_valid_%d" %x) \
 243                  for x in range(ROW_PER_LINE))
 244
 245
 246 #attribute ram_style : string;
 247 #attribute ram_style of cache_tags : signal is "distributed";
 248    # TODO to be passed to nigmen as ram attributes
 249    # attribute ram_style : string;
 250    # attribute ram_style of cache_tags : signal is "distributed";
 251
 252
 253 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
 254 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
 255 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 256 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
 257 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 258 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
 259 def TLBValidBitsArray():
 260     return Array(Signal(name="tlbvalid_%d" %x) \
 261                  for x in range(TLB_SIZE))
 262
 263 def TLBTagArray():
 264     return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
 265                  for x in range(TLB_SIZE))
 266
 267 def TLBPtesArray():
 268     return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
 269                  for x in range(TLB_SIZE))
 270
 271
 272 #-- Cache RAM interface
 273 #type cache_ram_out_t is array(way_t) of cache_row_t;
 274 # Cache RAM interface
 275 def CacheRamOut():
 276     return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
 277                  for x in range(NUM_WAYS))
 278
 279 #-- PLRU output interface
 280 #type plru_out_t is array(index_t) of
 281 # std_ulogic_vector(WAY_BITS-1 downto 0);
 282 # PLRU output interface
 283 def PLRUOut():
 284     return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
 285                  for x in range(NUM_LINES))
 286
 287 #     -- Return the cache line index (tag index) for an address
 288 #     function get_index(addr: std_ulogic_vector(63 downto 0))
 289 #      return index_t is
 290 #     begin
 291 #         return to_integer(unsigned(
 292 #          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
 293 #         ));
 294 #     end;
 295 # Return the cache line index (tag index) for an address
 296 def get_index(addr):
 297     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 298
 299 #     -- Return the cache row index (data memory) for an address
 300 #     function get_row(addr: std_ulogic_vector(63 downto 0))
 301 #       return row_t is
 302 #     begin
 303 #         return to_integer(unsigned(
 304 #          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
 305 #         ));
 306 #     end;
 307 # Return the cache row index (data memory) for an address
 308 def get_row(addr):
 309     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 310
 311 #     -- Return the index of a row within a line
 312 #     function get_row_of_line(row: row_t) return row_in_line_t is
 313 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 314 #     begin
 315 #       row_v := to_unsigned(row, ROW_BITS);
 316 #         return row_v(ROW_LINEBITS-1 downto 0);
 317 #     end;
 318 # Return the index of a row within a line
 319 def get_row_of_line(row):
 320     return row[:ROW_LINE_BITS]
 321
 322 #     -- Returns whether this is the last row of a line
 323 #     function is_last_row_addr(addr: wishbone_addr_type;
 324 #      last: row_in_line_t
 325 #     )
 326 #      return boolean is
 327 #     begin
 328 #       return unsigned(
 329 #        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
 330 #       ) = last;
 331 #     end;
 332 # Returns whether this is the last row of a line
 333 def is_last_row_addr(addr, last):
 334     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 335
 336 #     -- Returns whether this is the last row of a line
 337 #     function is_last_row(row: row_t;
 338 #      last: row_in_line_t) return boolean is
 339 #     begin
 340 #       return get_row_of_line(row) = last;
 341 #     end;
 342 # Returns whether this is the last row of a line
 343 def is_last_row(row, last):
 344     return get_row_of_line(row) == last
 345
 346 #     -- Return the next row in the current cache line. We use a dedicated
 347 #     -- function in order to limit the size of the generated adder to be
 348 #     -- only the bits within a cache line (3 bits with default settings)
 349 #     function next_row(row: row_t) return row_t is
 350 #       variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
 351 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 352 #       variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
 353 #     begin
 354 #       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 355 #       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 356 #       row_v(ROW_LINEBITS-1 downto 0) :=
 357 #        std_ulogic_vector(unsigned(row_idx) + 1);
 358 #       return to_integer(unsigned(row_v));
 359 #     end;
 360 # Return the next row in the current cache line. We use a dedicated
 361 # function in order to limit the size of the generated adder to be
 362 # only the bits within a cache line (3 bits with default settings)
 363 def next_row(row):
 364     row_v = row[0:ROW_LINE_BITS] + 1
 365     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 366 #     -- Read the instruction word for the given address in the
 367 #     -- current cache row
 368 #     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
 369 #                           data: cache_row_t) return std_ulogic_vector is
 370 #       variable word: integer range 0 to INSN_PER_ROW-1;
 371 #     begin
 372 #         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
 373 #       return data(31+word*32 downto word*32);
 374 #     end;
 375 # Read the instruction word for the given address
 376 # in the current cache row
 377 def read_insn_word(addr, data):
 378     word = addr[2:INSN_BITS+2]
 379     return data.word_select(word, 32)
 380
 381 #     -- Get the tag value from the address
 382 #     function get_tag(
 383 #      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
 384 #     )
 385 #      return cache_tag_t is
 386 #     begin
 387 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 388 #     end;
 389 # Get the tag value from the address
 390 def get_tag(addr):
 391     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 392
 393 #     -- Read a tag from a tag memory row
 394 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 395 #      return cache_tag_t is
 396 #     begin
 397 #       return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 398 #     end;
 399 # Read a tag from a tag memory row
 400 def read_tag(way, tagset):
 401     return tagset.word_select(way, TAG_BITS)
 402
 403 #     -- Write a tag to tag memory row
 404 #     procedure write_tag(way: in way_t;
 405 #      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
 406 #     begin
 407 #       tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 408 #     end;
 409 # Write a tag to tag memory row
 410 def write_tag(way, tagset, tag):
 411     return read_tag(way, tagset).eq(tag)
 412
 413 #     -- Simple hash for direct-mapped TLB index
 414 #     function hash_ea(addr: std_ulogic_vector(63 downto 0))
 415 #      return tlb_index_t is
 416 #         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
 417 #     begin
 418 #         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
 419 #                 xor addr(
 420 #                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
 421 #                  TLB_LG_PGSZ + TLB_BITS
 422 #                 )
 423 #                 xor addr(
 424 #                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
 425 #                  TLB_LG_PGSZ + 2 * TLB_BITS
 426 #                 );
 427 #         return to_integer(unsigned(hash));
 428 #     end;
 429 # Simple hash for direct-mapped TLB index
 430 def hash_ea(addr):
 431     hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
 432            TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
 433           ] ^ addr[
 434            TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
 435           ]
 436     return hsh
 437
 438 # begin
 439 #
 440 #     assert LINE_SIZE mod ROW_SIZE = 0;
 441 #     assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
 442 #      severity FAILURE;
 443 #     assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
 444 #      severity FAILURE;
 445 #     assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
 446 #      severity FAILURE;
 447 #     assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
 448 #      severity FAILURE;
 449 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
 450 #       report "geometry bits don't add up" severity FAILURE;
 451 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
 452 #       report "geometry bits don't add up" severity FAILURE;
 453 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
 454 #       report "geometry bits don't add up" severity FAILURE;
 455 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
 456 #       report "geometry bits don't add up" severity FAILURE;
 457 #
 458 #     sim_debug: if SIM generate
 459 #     debug: process
 460 #     begin
 461 #       report "ROW_SIZE      = " & natural'image(ROW_SIZE);
 462 #       report "ROW_PER_LINE  = " & natural'image(ROW_PER_LINE);
 463 #       report "BRAM_ROWS     = " & natural'image(BRAM_ROWS);
 464 #       report "INSN_PER_ROW  = " & natural'image(INSN_PER_ROW);
 465 #       report "INSN_BITS     = " & natural'image(INSN_BITS);
 466 #       report "ROW_BITS      = " & natural'image(ROW_BITS);
 467 #       report "ROW_LINEBITS  = " & natural'image(ROW_LINEBITS);
 468 #       report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
 469 #       report "ROW_OFF_BITS  = " & natural'image(ROW_OFF_BITS);
 470 #       report "INDEX_BITS    = " & natural'image(INDEX_BITS);
 471 #       report "TAG_BITS      = " & natural'image(TAG_BITS);
 472 #       report "WAY_BITS      = " & natural'image(WAY_BITS);
 473 #       wait;
 474 #     end process;
 475 #     end generate;
 476
 477 # Cache reload state machine
 478 @unique
 479 class State(Enum):
 480     IDLE     = 0
 481     CLR_TAG  = 1
 482     WAIT_ACK = 2
 483
 484 #     type reg_internal_t is record
 485 #       -- Cache hit state (Latches for 1 cycle BRAM access)
 486 #       hit_way   : way_t;
 487 #       hit_nia   : std_ulogic_vector(63 downto 0);
 488 #       hit_smark : std_ulogic;
 489 #       hit_valid : std_ulogic;
 490 #
 491 #       -- Cache miss state (reload state machine)
 492 #         state            : state_t;
 493 #         wb               : wishbone_master_out;
 494 #       store_way        : way_t;
 495 #         store_index      : index_t;
 496 #       store_row        : row_t;
 497 #         store_tag        : cache_tag_t;
 498 #         store_valid      : std_ulogic;
 499 #         end_row_ix       : row_in_line_t;
 500 #         rows_valid       : row_per_line_valid_t;
 501 #
 502 #         -- TLB miss state
 503 #         fetch_failed     : std_ulogic;
 504 #     end record;
 505 class RegInternal(RecordObject):
 506     def __init__(self):
 507         super().__init__()
 508         # Cache hit state (Latches for 1 cycle BRAM access)
 509         self.hit_way      = Signal(NUM_WAYS)
 510         self.hit_nia      = Signal(64)
 511         self.hit_smark    = Signal()
 512         self.hit_valid    = Signal()
 513
 514         # Cache miss state (reload state machine)
 515         self.state        = Signal(State, reset=State.IDLE)
 516         self.wb           = WBMasterOut("wb")
 517         self.req_adr      = Signal(64)
 518         self.store_way    = Signal(NUM_WAYS)
 519         self.store_index  = Signal(NUM_LINES)
 520         self.store_row    = Signal(BRAM_ROWS)
 521         self.store_tag    = Signal(TAG_BITS)
 522         self.store_valid  = Signal()
 523         self.end_row_ix   = Signal(ROW_LINE_BITS)
 524         self.rows_valid   = RowPerLineValidArray()
 525
 526         # TLB miss state
 527         self.fetch_failed = Signal()
 528
 529 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
 530 #
 531 # entity icache is
 532 #     generic (
 533 #         SIM : boolean := false;
 534 #         -- Line size in bytes
 535 #         LINE_SIZE : positive := 64;
 536 #         -- BRAM organisation: We never access more
 537 #         -- than wishbone_data_bits
 538 #         -- at a time so to save resources we make the
 539 #         -- array only that wide,
 540 #         -- and use consecutive indices for to make a cache "line"
 541 #         --
 542 #         -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
 543 #         -- so 64-bits)
 544 #         ROW_SIZE  : positive := wishbone_data_bits / 8;
 545 #         -- Number of lines in a set
 546 #         NUM_LINES : positive := 32;
 547 #         -- Number of ways
 548 #         NUM_WAYS  : positive := 4;
 549 #         -- L1 ITLB number of entries (direct mapped)
 550 #         TLB_SIZE : positive := 64;
 551 #         -- L1 ITLB log_2(page_size)
 552 #         TLB_LG_PGSZ : positive := 12;
 553 #         -- Number of real address bits that we store
 554 #         REAL_ADDR_BITS : positive := 56;
 555 #         -- Non-zero to enable log data collection
 556 #         LOG_LENGTH : natural := 0
 557 #         );
 558 #     port (
 559 #         clk          : in std_ulogic;
 560 #         rst          : in std_ulogic;
 561 #
 562 #         i_in         : in Fetch1ToIcacheType;
 563 #         i_out        : out IcacheToDecode1Type;
 564 #
 565 #         m_in         : in MmuToIcacheType;
 566 #
 567 #         stall_in     : in std_ulogic;
 568 #       stall_out    : out std_ulogic;
 569 #       flush_in     : in std_ulogic;
 570 #       inval_in     : in std_ulogic;
 571 #
 572 #         wishbone_out : out wishbone_master_out;
 573 #         wishbone_in  : in wishbone_slave_out;
 574 #
 575 #         log_out      : out std_ulogic_vector(53 downto 0)
 576 #         );
 577 # end entity icache;
 578 # 64 bit direct mapped icache. All instructions are 4B aligned.
 579 class ICache(Elaboratable):
 580     """64 bit direct mapped icache. All instructions are 4B aligned."""
 581     def __init__(self):
 582         self.i_in           = Fetch1ToICacheType(name="i_in")
 583         self.i_out          = ICacheToDecode1Type(name="i_out")
 584
 585         self.m_in           = MMUToICacheType(name="m_in")
 586
 587         self.stall_in       = Signal()
 588         self.stall_out      = Signal()
 589         self.flush_in       = Signal()
 590         self.inval_in       = Signal()
 591
 592         self.wb_out         = WBMasterOut(name="wb_out")
 593         self.wb_in          = WBSlaveOut(name="wb_in")
 594
 595         self.log_out        = Signal(54)
 596
 597
 598 #     -- Generate a cache RAM for each way
 599 #     rams: for i in 0 to NUM_WAYS-1 generate
 600 #       signal do_read  : std_ulogic;
 601 #       signal do_write : std_ulogic;
 602 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 603 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 604 #       signal dout     : cache_row_t;
 605 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 606 #     begin
 607 #       way: entity work.cache_ram
 608 #           generic map (
 609 #               ROW_BITS => ROW_BITS,
 610 #               WIDTH => ROW_SIZE_BITS
 611 #               )
 612 #           port map (
 613 #               clk     => clk,
 614 #               rd_en   => do_read,
 615 #               rd_addr => rd_addr,
 616 #               rd_data => dout,
 617 #               wr_sel  => wr_sel,
 618 #               wr_addr => wr_addr,
 619 #               wr_data => wishbone_in.dat
 620 #               );
 621 #       process(all)
 622 #       begin
 623 #           do_read <= not (stall_in or use_previous);
 624 #           do_write <= '0';
 625 #           if wishbone_in.ack = '1' and replace_way = i then
 626 #               do_write <= '1';
 627 #           end if;
 628 #           cache_out(i) <= dout;
 629 #           rd_addr <=
 630 #            std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 631 #           wr_addr <=
 632 #            std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
 633 #             for i in 0 to ROW_SIZE-1 loop
 634 #                 wr_sel(i) <= do_write;
 635 #             end loop;
 636 #       end process;
 637 #     end generate;
 638     def rams(self, m, r, cache_out_row, use_previous, replace_way, req_row):
 639         comb = m.d.comb
 640
 641         wb_in, stall_in = self.wb_in, self.stall_in
 642
 643
 644         for i in range(NUM_WAYS):
 645             do_read  = Signal(name="do_rd_%d" % i)
 646             do_write = Signal(name="do_wr_%d" % i)
 647             rd_addr  = Signal(ROW_BITS)
 648             wr_addr  = Signal(ROW_BITS)
 649             d_out    = Signal(ROW_SIZE_BITS, name="d_out_%d" % i)
 650             wr_sel   = Signal(ROW_SIZE)
 651
 652             way = CacheRam(ROW_BITS, ROW_SIZE_BITS, True)
 653             setattr(m.submodules, "cacheram_%d" % i, way)
 654
 655             comb += way.rd_en.eq(do_read)
 656             comb += way.rd_addr.eq(rd_addr)
 657             comb += d_out.eq(way.rd_data_o)
 658             comb += way.wr_sel.eq(wr_sel)
 659             comb += way.wr_addr.eq(wr_addr)
 660             comb += way.wr_data.eq(wb_in.dat)
 661
 662             comb += do_read.eq(~(stall_in | use_previous))
 663
 664             with m.If(wb_in.ack & (replace_way == i)):
 665                 comb += do_write.eq(1)
 666
 667             with m.If(r.hit_way == i):
 668                 comb += cache_out_row.eq(d_out)
 669             comb += rd_addr.eq(req_row)
 670             comb += wr_addr.eq(r.store_row)
 671             for j in range(ROW_SIZE):
 672                 comb += wr_sel[j].eq(do_write)
 673
 674 #     -- Generate PLRUs
 675 #     maybe_plrus: if NUM_WAYS > 1 generate
 676 #     begin
 677 #       plrus: for i in 0 to NUM_LINES-1 generate
 678 #           -- PLRU interface
 679 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 680 #           signal plru_acc_en : std_ulogic;
 681 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 682 #
 683 #       begin
 684 #           plru : entity work.plru
 685 #               generic map (
 686 #                   BITS => WAY_BITS
 687 #                   )
 688 #               port map (
 689 #                   clk => clk,
 690 #                   rst => rst,
 691 #                   acc => plru_acc,
 692 #                   acc_en => plru_acc_en,
 693 #                   lru => plru_out
 694 #                   );
 695 #
 696 #           process(all)
 697 #           begin
 698 #               -- PLRU interface
 699 #               if get_index(r.hit_nia) = i then
 700 #                   plru_acc_en <= r.hit_valid;
 701 #               else
 702 #                   plru_acc_en <= '0';
 703 #               end if;
 704 #               plru_acc <=
 705 #                std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
 706 #               plru_victim(i) <= plru_out;
 707 #           end process;
 708 #       end generate;
 709 #     end generate;
 710     def maybe_plrus(self, m, r, plru_victim):
 711         comb = m.d.comb
 712
 713         with m.If(NUM_WAYS > 1):
 714             for i in range(NUM_LINES):
 715                 plru_acc_i  = Signal(WAY_BITS)
 716                 plru_acc_en = Signal()
 717                 plru        = PLRU(WAY_BITS)
 718                 setattr(m.submodules, "plru_%d" % i, plru)
 719
 720                 comb += plru.acc_i.eq(plru_acc_i)
 721                 comb += plru.acc_en.eq(plru_acc_en)
 722
 723                 # PLRU interface
 724                 with m.If(get_index(r.hit_nia) == i):
 725                     comb += plru.acc_en.eq(r.hit_valid)
 726
 727                 comb += plru.acc_i.eq(r.hit_way)
 728                 comb += plru_victim[i].eq(plru.lru_o)
 729
 730 #     -- TLB hit detection and real address generation
 731 #     itlb_lookup : process(all)
 732 #         variable pte : tlb_pte_t;
 733 #         variable ttag : tlb_tag_t;
 734 #     begin
 735 #         tlb_req_index <= hash_ea(i_in.nia);
 736 #         pte := itlb_ptes(tlb_req_index);
 737 #         ttag := itlb_tags(tlb_req_index);
 738 #         if i_in.virt_mode = '1' then
 739 #             real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
 740 #                          i_in.nia(TLB_LG_PGSZ - 1 downto 0);
 741 #             if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
 742 #                 ra_valid <= itlb_valids(tlb_req_index);
 743 #             else
 744 #                 ra_valid <= '0';
 745 #             end if;
 746 #             eaa_priv <= pte(3);
 747 #         else
 748 #             real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
 749 #             ra_valid <= '1';
 750 #             eaa_priv <= '1';
 751 #         end if;
 752 #
 753 #         -- no IAMR, so no KUEP support for now
 754 #         priv_fault <= eaa_priv and not i_in.priv_mode;
 755 #         access_ok <= ra_valid and not priv_fault;
 756 #     end process;
 757     # TLB hit detection and real address generation
 758     def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
 759                     real_addr, itlb_valid_bits, ra_valid, eaa_priv,
 760                     priv_fault, access_ok):
 761         comb = m.d.comb
 762
 763         i_in = self.i_in
 764
 765         pte  = Signal(TLB_PTE_BITS)
 766         ttag = Signal(TLB_EA_TAG_BITS)
 767
 768         comb += tlb_req_index.eq(hash_ea(i_in.nia))
 769         comb += pte.eq(itlb_ptes[tlb_req_index])
 770         comb += ttag.eq(itlb_tags[tlb_req_index])
 771
 772         with m.If(i_in.virt_mode):
 773             comb += real_addr.eq(Cat(
 774                      i_in.nia[:TLB_LG_PGSZ],
 775                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 776                     ))
 777
 778             with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
 779                 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
 780
 781             comb += eaa_priv.eq(pte[3])
 782
 783         with m.Else():
 784             comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
 785             comb += ra_valid.eq(1)
 786             comb += eaa_priv.eq(1)
 787
 788         # No IAMR, so no KUEP support for now
 789         comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
 790         comb += access_ok.eq(ra_valid & ~priv_fault)
 791
 792 #     -- iTLB update
 793 #     itlb_update: process(clk)
 794 #         variable wr_index : tlb_index_t;
 795 #     begin
 796 #         if rising_edge(clk) then
 797 #             wr_index := hash_ea(m_in.addr);
 798 #             if rst = '1' or
 799 #              (m_in.tlbie = '1' and m_in.doall = '1') then
 800 #                 -- clear all valid bits
 801 #                 for i in tlb_index_t loop
 802 #                     itlb_valids(i) <= '0';
 803 #                 end loop;
 804 #             elsif m_in.tlbie = '1' then
 805 #                 -- clear entry regardless of hit or miss
 806 #                 itlb_valids(wr_index) <= '0';
 807 #             elsif m_in.tlbld = '1' then
 808 #                 itlb_tags(wr_index) <=
 809 #                  m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
 810 #                 itlb_ptes(wr_index) <= m_in.pte;
 811 #                 itlb_valids(wr_index) <= '1';
 812 #             end if;
 813 #         end if;
 814 #     end process;
 815     # iTLB update
 816     def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
 817         comb = m.d.comb
 818         sync = m.d.sync
 819
 820         m_in = self.m_in
 821
 822         wr_index = Signal(TLB_SIZE)
 823         sync += wr_index.eq(hash_ea(m_in.addr))
 824
 825         with m.If(m_in.tlbie & m_in.doall):
 826             # Clear all valid bits
 827             for i in range(TLB_SIZE):
 828                 sync += itlb_valid_bits[i].eq(0)
 829
 830         with m.Elif(m_in.tlbie):
 831             # Clear entry regardless of hit or miss
 832             sync += itlb_valid_bits[wr_index].eq(0)
 833
 834         with m.Elif(m_in.tlbld):
 835             sync += itlb_tags[wr_index].eq(
 836                      m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
 837                     )
 838             sync += itlb_ptes[wr_index].eq(m_in.pte)
 839             sync += itlb_valid_bits[wr_index].eq(1)
 840
 841 #     -- Cache hit detection, output to fetch2 and other misc logic
 842 #     icache_comb : process(all)
 843     # Cache hit detection, output to fetch2 and other misc logic
 844     def icache_comb(self, m, use_previous, r, req_index, req_row,
 845                     req_tag, real_addr, req_laddr, cache_valid_bits,
 846                     cache_tags, access_ok, req_is_hit,
 847                     req_is_miss, replace_way, plru_victim, cache_out_row):
 848 #       variable is_hit  : std_ulogic;
 849 #       variable hit_way : way_t;
 850         comb = m.d.comb
 851
 852         #comb += Display("ENTER icache_comb - use_previous:%x req_index:%x " \
 853         #                "req_row:%x req_tag:%x real_addr:%x req_laddr:%x " \
 854         #                "access_ok:%x req_is_hit:%x req_is_miss:%x " \
 855         #                "replace_way:%x", use_previous, req_index, req_row, \
 856         #                req_tag, real_addr, req_laddr, access_ok, \
 857         #                req_is_hit, req_is_miss, replace_way)
 858
 859         i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
 860         flush_in, stall_out = self.flush_in, self.stall_out
 861
 862         is_hit  = Signal()
 863         hit_way = Signal(NUM_WAYS)
 864 #     begin
 865 #         -- i_in.sequential means that i_in.nia this cycle
 866 #         -- is 4 more than last cycle.  If we read more
 867 #         -- than 32 bits at a time, had a cache hit last
 868 #         -- cycle, and we don't want the first 32-bit chunk
 869 #         -- then we can keep the data we read last cycle
 870 #         -- and just use that.
 871 #         if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
 872 #             use_previous <= i_in.sequential and r.hit_valid;
 873 #         else
 874 #             use_previous <= '0';
 875 #         end if;
 876         # i_in.sequential means that i_in.nia this cycle is 4 more than
 877         # last cycle.  If we read more than 32 bits at a time, had a
 878         # cache hit last cycle, and we don't want the first 32-bit chunk
 879         # then we can keep the data we read last cycle and just use that.
 880         with m.If(i_in.nia[2:INSN_BITS+2] != 0):
 881             comb += use_previous.eq(i_in.sequential & r.hit_valid)
 882
 883 #       -- Extract line, row and tag from request
 884 #         req_index <= get_index(i_in.nia);
 885 #         req_row <= get_row(i_in.nia);
 886 #         req_tag <= get_tag(real_addr);
 887         # Extract line, row and tag from request
 888         comb += req_index.eq(get_index(i_in.nia))
 889         comb += req_row.eq(get_row(i_in.nia))
 890         comb += req_tag.eq(get_tag(real_addr))
 891
 892 #       -- Calculate address of beginning of cache row, will be
 893 #       -- used for cache miss processing if needed
 894 #       req_laddr <=
 895 #        (63 downto REAL_ADDR_BITS => '0') &
 896 #        real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
 897 #        (ROW_OFF_BITS-1 downto 0 => '0');
 898         # Calculate address of beginning of cache row, will be
 899         # used for cache miss processing if needed
 900         comb += req_laddr.eq(Cat(
 901                  Const(0b0, ROW_OFF_BITS),
 902                  real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
 903                  Const(0b0, 8)
 904                 ))
 905
 906 #       -- Test if pending request is a hit on any way
 907 #       hit_way := 0;
 908 #       is_hit := '0';
 909 #       for i in way_t loop
 910 #           if i_in.req = '1' and
 911 #                 (cache_valids(req_index)(i) = '1' or
 912 #                  (r.state = WAIT_ACK and
 913 #                   req_index = r.store_index and
 914 #                   i = r.store_way and
 915 #                   r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
 916 #               if read_tag(i, cache_tags(req_index)) = req_tag then
 917 #                   hit_way := i;
 918 #                   is_hit := '1';
 919 #               end if;
 920 #           end if;
 921 #       end loop;
 922         # Test if pending request is a hit on any way
 923         hitcond = Signal()
 924         comb += hitcond.eq((r.state == State.WAIT_ACK)
 925                     & (req_index == r.store_index)
 926                     & r.rows_valid[req_row % ROW_PER_LINE])
 927         with m.If(i_in.req):
 928             cvb = Signal(NUM_WAYS)
 929             ctag = Signal(TAG_RAM_WIDTH)
 930             comb += ctag.eq(cache_tags[req_index])
 931             comb += cvb.eq(cache_valid_bits[req_index])
 932             for i in range(NUM_WAYS):
 933                 tagi = Signal(TAG_BITS, name="ti%d" % i)
 934                 comb += tagi.eq(read_tag(i, ctag))
 935                 hit_test = Signal(name="hit_test%d" % i)
 936                 comb += hit_test.eq(i == r.store_way)
 937                 with m.If((cvb[i] | (hitcond & hit_test)) & (tagi == req_tag)):
 938                     comb += hit_way.eq(i)
 939                     comb += is_hit.eq(1)
 940
 941 #       -- Generate the "hit" and "miss" signals
 942 #       -- for the synchronous blocks
 943 #       if i_in.req = '1' and access_ok = '1' and flush_in = '0'
 944 #        and rst = '0' then
 945 #           req_is_hit  <= is_hit;
 946 #           req_is_miss <= not is_hit;
 947 #       else
 948 #           req_is_hit  <= '0';
 949 #           req_is_miss <= '0';
 950 #       end if;
 951 #       req_hit_way <= hit_way;
 952         # Generate the "hit" and "miss" signals
 953         # for the synchronous blocks
 954         with m.If(i_in.req & access_ok & ~flush_in):
 955             comb += req_is_hit.eq(is_hit)
 956             comb += req_is_miss.eq(~is_hit)
 957
 958         with m.Else():
 959             comb += req_is_hit.eq(0)
 960             comb += req_is_miss.eq(0)
 961
 962 #       -- The way to replace on a miss
 963 #       if r.state = CLR_TAG then
 964 #           replace_way <=
 965 #            to_integer(unsigned(plru_victim(r.store_index)));
 966 #       else
 967 #           replace_way <= r.store_way;
 968 #       end if;
 969         # The way to replace on a miss
 970         with m.If(r.state == State.CLR_TAG):
 971             comb += replace_way.eq(plru_victim[r.store_index])
 972
 973         with m.Else():
 974             comb += replace_way.eq(r.store_way)
 975
 976 #       -- Output instruction from current cache row
 977 #       --
 978 #       -- Note: This is a mild violation of our design principle of
 979 #       -- having pipeline stages output from a clean latch. In this
 980 #       -- case we output the result of a mux. The alternative would
 981 #       -- be output an entire row which I prefer not to do just yet
 982 #       -- as it would force fetch2 to know about some of the cache
 983 #       -- geometry information.
 984 #       i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
 985 #       i_out.valid <= r.hit_valid;
 986 #       i_out.nia <= r.hit_nia;
 987 #       i_out.stop_mark <= r.hit_smark;
 988 #       i_out.fetch_failed <= r.fetch_failed;
 989         # Output instruction from current cache row
 990         #
 991         # Note: This is a mild violation of our design principle of
 992         # having pipeline stages output from a clean latch. In this
 993         # case we output the result of a mux. The alternative would
 994         # be output an entire row which I prefer not to do just yet
 995         # as it would force fetch2 to know about some of the cache
 996         # geometry information.
 997         #comb += Display("BEFORE read_insn_word - r.hit_nia:%x " \
 998         #                "r.hit_way:%x, cache_out[r.hit_way]:%x", r.hit_nia, \
 999         #                r.hit_way, cache_out[r.hit_way])
1000         comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out_row))
1001         comb += i_out.valid.eq(r.hit_valid)
1002         comb += i_out.nia.eq(r.hit_nia)
1003         comb += i_out.stop_mark.eq(r.hit_smark)
1004         comb += i_out.fetch_failed.eq(r.fetch_failed)
1005
1006 #       -- Stall fetch1 if we have a miss on cache or TLB
1007 #       -- or a protection fault
1008 #       stall_out <= not (is_hit and access_ok);
1009         # Stall fetch1 if we have a miss on cache or TLB
1010         # or a protection fault
1011         comb += stall_out.eq(~(is_hit & access_ok))
1012
1013 #       -- Wishbone requests output (from the cache miss reload machine)
1014 #       wishbone_out <= r.wb;
1015         # Wishbone requests output (from the cache miss reload machine)
1016         comb += wb_out.eq(r.wb)
1017 #     end process;
1018
1019 #     -- Cache hit synchronous machine
1020 #     icache_hit : process(clk)
1021     # Cache hit synchronous machine
1022     def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
1023                    req_index, req_tag, real_addr):
1024         sync = m.d.sync
1025
1026         i_in, stall_in = self.i_in, self.stall_in
1027         flush_in       = self.flush_in
1028
1029 #     begin
1030 #         if rising_edge(clk) then
1031 #             -- keep outputs to fetch2 unchanged on a stall
1032 #             -- except that flush or reset sets valid to 0
1033 #             -- If use_previous, keep the same data as last
1034 #             -- cycle and use the second half
1035 #             if stall_in = '1' or use_previous = '1' then
1036 #                 if rst = '1' or flush_in = '1' then
1037 #                     r.hit_valid <= '0';
1038 #             end if;
1039         # keep outputs to fetch2 unchanged on a stall
1040         # except that flush or reset sets valid to 0
1041         # If use_previous, keep the same data as last
1042         # cycle and use the second half
1043         with m.If(stall_in | use_previous):
1044             with m.If(flush_in):
1045                 sync += r.hit_valid.eq(0)
1046 #             else
1047 #                 -- On a hit, latch the request for the next cycle,
1048 #                 -- when the BRAM data will be available on the
1049 #                 -- cache_out output of the corresponding way
1050 #                 r.hit_valid <= req_is_hit;
1051 #                 if req_is_hit = '1' then
1052 #                     r.hit_way <= req_hit_way;
1053         with m.Else():
1054             # On a hit, latch the request for the next cycle,
1055             # when the BRAM data will be available on the
1056             # cache_out output of the corresponding way
1057             sync += r.hit_valid.eq(req_is_hit)
1058
1059             with m.If(req_is_hit):
1060                 sync += r.hit_way.eq(req_hit_way)
1061
1062 #                     report "cache hit nia:" & to_hstring(i_in.nia) &
1063 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1064 #                         " SM:" & std_ulogic'image(i_in.stop_mark) &
1065 #                         " idx:" & integer'image(req_index) &
1066 #                         " tag:" & to_hstring(req_tag) &
1067 #                         " way:" & integer'image(req_hit_way) &
1068 #                         " RA:" & to_hstring(real_addr);
1069                 sync += Display("cache hit nia:%x IR:%x SM:%x idx:%x " \
1070                                 "tag:%x way:%x RA:%x", i_in.nia, \
1071                                 i_in.virt_mode, i_in.stop_mark, req_index, \
1072                                 req_tag, req_hit_way, real_addr)
1073
1074
1075
1076 #                 end if;
1077 #           end if;
1078 #             if stall_in = '0' then
1079 #                 -- Send stop marks and NIA down regardless of validity
1080 #                 r.hit_smark <= i_in.stop_mark;
1081 #                 r.hit_nia <= i_in.nia;
1082 #             end if;
1083         with m.If(~stall_in):
1084             # Send stop marks and NIA down regardless of validity
1085             sync += r.hit_smark.eq(i_in.stop_mark)
1086             sync += r.hit_nia.eq(i_in.nia)
1087 #       end if;
1088 #     end process;
1089
1090 #     -- Cache miss/reload synchronous machine
1091 #     icache_miss : process(clk)
1092     # Cache miss/reload synchronous machine
1093     def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1094                     req_index, req_laddr, req_tag, replace_way,
1095                     cache_tags, access_ok, real_addr):
1096         comb = m.d.comb
1097         sync = m.d.sync
1098
1099         i_in, wb_in, m_in  = self.i_in, self.wb_in, self.m_in
1100         stall_in, flush_in = self.stall_in, self.flush_in
1101         inval_in           = self.inval_in
1102
1103 #       variable tagset    : cache_tags_set_t;
1104 #       variable stbs_done : boolean;
1105
1106         tagset    = Signal(TAG_RAM_WIDTH)
1107         stbs_done = Signal()
1108
1109 #     begin
1110 #         if rising_edge(clk) then
1111 #           -- On reset, clear all valid bits to force misses
1112 #             if rst = '1' then
1113         # On reset, clear all valid bits to force misses
1114 #               for i in index_t loop
1115 #                   cache_valids(i) <= (others => '0');
1116 #               end loop;
1117 #                 r.state <= IDLE;
1118 #                 r.wb.cyc <= '0';
1119 #                 r.wb.stb <= '0';
1120 #               -- We only ever do reads on wishbone
1121 #               r.wb.dat <= (others => '0');
1122 #               r.wb.sel <= "11111111";
1123 #               r.wb.we  <= '0';
1124
1125 #               -- Not useful normally but helps avoiding
1126 #               -- tons of sim warnings
1127 #               r.wb.adr <= (others => '0');
1128
1129 #             else
1130
1131 #                 -- Process cache invalidations
1132 #                 if inval_in = '1' then
1133 #                     for i in index_t loop
1134 #                         cache_valids(i) <= (others => '0');
1135 #                     end loop;
1136 #                     r.store_valid <= '0';
1137 #                 end if;
1138         comb += r.wb.sel.eq(-1)
1139         comb += r.wb.adr.eq(r.req_adr[3:])
1140
1141         # Process cache invalidations
1142         with m.If(inval_in):
1143             for i in range(NUM_LINES):
1144                 sync += cache_valid_bits[i].eq(0)
1145             sync += r.store_valid.eq(0)
1146
1147 #               -- Main state machine
1148 #               case r.state is
1149         # Main state machine
1150         with m.Switch(r.state):
1151
1152 #           when IDLE =>
1153             with m.Case(State.IDLE):
1154 #                 -- Reset per-row valid flags,
1155 #                 -- only used in WAIT_ACK
1156 #                 for i in 0 to ROW_PER_LINE - 1 loop
1157 #                     r.rows_valid(i) <= '0';
1158 #                 end loop;
1159                 # Reset per-row valid flags,
1160                 # only used in WAIT_ACK
1161                 for i in range(ROW_PER_LINE):
1162                     sync += r.rows_valid[i].eq(0)
1163
1164 #               -- We need to read a cache line
1165 #               if req_is_miss = '1' then
1166 #               report "cache miss nia:" & to_hstring(i_in.nia) &
1167 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1168 #                   " SM:" & std_ulogic'image(i_in.stop_mark) &
1169 #                   " idx:" & integer'image(req_index) &
1170 #                   " way:" & integer'image(replace_way) &
1171 #                   " tag:" & to_hstring(req_tag) &
1172 #                         " RA:" & to_hstring(real_addr);
1173                 # We need to read a cache line
1174                 with m.If(req_is_miss):
1175                     sync += Display(
1176                              "cache miss nia:%x IR:%x SM:%x idx:%x " \
1177                              " way:%x tag:%x RA:%x", i_in.nia, \
1178                              i_in.virt_mode, i_in.stop_mark, req_index, \
1179                              replace_way, req_tag, real_addr)
1180
1181 #               -- Keep track of our index and way for
1182 #                   -- subsequent stores
1183 #               r.store_index <= req_index;
1184 #               r.store_row <= get_row(req_laddr);
1185 #                   r.store_tag <= req_tag;
1186 #                   r.store_valid <= '1';
1187 #                   r.end_row_ix <=
1188 #                    get_row_of_line(get_row(req_laddr)) - 1;
1189                     # Keep track of our index and way
1190                     # for subsequent stores
1191                     sync += r.store_index.eq(req_index)
1192                     sync += r.store_row.eq(get_row(req_laddr))
1193                     sync += r.store_tag.eq(req_tag)
1194                     sync += r.store_valid.eq(1)
1195                     sync += r.end_row_ix.eq(
1196                              get_row_of_line(
1197                               get_row(req_laddr)
1198                              ) - 1
1199                             )
1200
1201 #               -- Prep for first wishbone read. We calculate the
1202 #                   -- address of the start of the cache line and
1203 #                   -- start the WB cycle.
1204 #               r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1205 #               r.wb.cyc <= '1';
1206 #               r.wb.stb <= '1';
1207                     # Prep for first wishbone read.
1208                     # We calculate the
1209                     # address of the start of the cache line and
1210                     # start the WB cycle.
1211                     sync += r.req_adr.eq(req_laddr)
1212                     sync += r.wb.cyc.eq(1)
1213                     sync += r.wb.stb.eq(1)
1214
1215 #               -- Track that we had one request sent
1216 #               r.state <= CLR_TAG;
1217                     # Track that we had one request sent
1218                     sync += r.state.eq(State.CLR_TAG)
1219 #               end if;
1220
1221 #           when CLR_TAG | WAIT_ACK =>
1222             with m.Case(State.CLR_TAG, State.WAIT_ACK):
1223 #                 if r.state = CLR_TAG then
1224                 with m.If(r.state == State.CLR_TAG):
1225 #                     -- Get victim way from plru
1226 #               r.store_way <= replace_way;
1227                     # Get victim way from plru
1228                     sync += r.store_way.eq(replace_way)
1229 #
1230 #               -- Force misses on that way while
1231 #                   -- reloading that line
1232 #               cache_valids(req_index)(replace_way) <= '0';
1233                     # Force misses on that way while
1234                     # realoading that line
1235                     cv = Signal(INDEX_BITS)
1236                     comb += cv.eq(cache_valid_bits[req_index])
1237                     comb += cv.bit_select(replace_way, 1).eq(0)
1238                     sync += cache_valid_bits[req_index].eq(cv)
1239
1240 #               -- Store new tag in selected way
1241 #               for i in 0 to NUM_WAYS-1 loop
1242 #                   if i = replace_way then
1243 #                       tagset := cache_tags(r.store_index);
1244 #                       write_tag(i, tagset, r.store_tag);
1245 #                       cache_tags(r.store_index) <= tagset;
1246 #                   end if;
1247 #               end loop;
1248                     for i in range(NUM_WAYS):
1249                         with m.If(i == replace_way):
1250                             comb += tagset.eq(cache_tags[r.store_index])
1251                             comb += write_tag(i, tagset, r.store_tag)
1252                             sync += cache_tags[r.store_index].eq(tagset)
1253
1254 #                     r.state <= WAIT_ACK;
1255                     sync += r.state.eq(State.WAIT_ACK)
1256 #                 end if;
1257
1258 #               -- Requests are all sent if stb is 0
1259 #               stbs_done := r.wb.stb = '0';
1260                 # Requests are all sent if stb is 0
1261                 stbs_zero = Signal()
1262                 comb += stbs_zero.eq(r.wb.stb == 0)
1263                 comb += stbs_done.eq(stbs_zero)
1264
1265 #               -- If we are still sending requests,
1266 #               -- was one accepted ?
1267 #               if wishbone_in.stall = '0' and not stbs_done then
1268                 # If we are still sending requests,
1269                 # was one accepted?
1270                 with m.If(~wb_in.stall & ~stbs_zero):
1271 #               -- That was the last word ? We are done sending.
1272 #                   -- Clear stb and set stbs_done so we can handle
1273 #                   -- an eventual last ack on the same cycle.
1274 #               if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1275 #                   r.wb.stb <= '0';
1276 #                   stbs_done := true;
1277 #               end if;
1278                     # That was the last word ?
1279                     # We are done sending.
1280                     # Clear stb and set stbs_done
1281                     # so we can handle
1282                     # an eventual last ack on
1283                     # the same cycle.
1284                     with m.If(is_last_row_addr(r.req_adr, r.end_row_ix)):
1285                         sync += Display("IS_LAST_ROW_ADDR " \
1286                                         "r.wb.addr:%x r.end_row_ix:%x " \
1287                                         "r.wb.stb:%x stbs_zero:%x " \
1288                                         "stbs_done:%x", r.wb.adr, \
1289                                         r.end_row_ix, r.wb.stb, \
1290                                         stbs_zero, stbs_done)
1291                         sync += r.wb.stb.eq(0)
1292                         comb += stbs_done.eq(1)
1293
1294 #               -- Calculate the next row address
1295 #               r.wb.adr <= next_row_addr(r.wb.adr);
1296                     # Calculate the next row address
1297                     rarange = Signal(LINE_OFF_BITS - ROW_OFF_BITS)
1298                     comb += rarange.eq(
1299                              r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
1300                             )
1301                     sync += r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS].eq(
1302                              rarange
1303                             )
1304                     sync += Display("RARANGE r.wb.adr:%x stbs_zero:%x " \
1305                                     "stbs_done:%x", rarange, stbs_zero, \
1306                                     stbs_done)
1307 #               end if;
1308
1309 #               -- Incoming acks processing
1310 #               if wishbone_in.ack = '1' then
1311                 # Incoming acks processing
1312                 with m.If(wb_in.ack):
1313 #                     r.rows_valid(r.store_row mod ROW_PER_LINE)
1314 #                      <= '1';
1315                     sync += Display("WB_IN_ACK stbs_zero:%x " \
1316                                     "stbs_done:%x", \
1317                                     stbs_zero, stbs_done)
1318
1319                     sync += r.rows_valid[r.store_row % ROW_PER_LINE].eq(1)
1320
1321 #               -- Check for completion
1322 #               if stbs_done and
1323 #                    is_last_row(r.store_row, r.end_row_ix) then
1324                     # Check for completion
1325                     with m.If(stbs_done &
1326                               is_last_row(r.store_row, r.end_row_ix)):
1327 #                   -- Complete wishbone cycle
1328 #                   r.wb.cyc <= '0';
1329                         # Complete wishbone cycle
1330                         sync += r.wb.cyc.eq(0)
1331
1332 #                   -- Cache line is now valid
1333 #                   cache_valids(r.store_index)(replace_way) <=
1334 #                        r.store_valid and not inval_in;
1335                         # Cache line is now valid
1336                         cv = Signal(INDEX_BITS)
1337                         comb += cv.eq(cache_valid_bits[r.store_index])
1338                         comb += cv.bit_select(replace_way, 1).eq(
1339                                  r.store_valid & ~inval_in
1340                                 )
1341                         sync += cache_valid_bits[r.store_index].eq(cv)
1342
1343 #                   -- We are done
1344 #                   r.state <= IDLE;
1345                         # We are done
1346                         sync += r.state.eq(State.IDLE)
1347 #               end if;
1348
1349 #               -- Increment store row counter
1350 #               r.store_row <= next_row(r.store_row);
1351                     # Increment store row counter
1352                     sync += r.store_row.eq(next_row(r.store_row))
1353 #               end if;
1354 #           end case;
1355 #       end if;
1356 #
1357 #             -- TLB miss and protection fault processing
1358 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1359 #                 r.fetch_failed <= '0';
1360 #             elsif i_in.req = '1' and access_ok = '0' and
1361 #              stall_in = '0' then
1362 #                 r.fetch_failed <= '1';
1363 #             end if;
1364         # TLB miss and protection fault processing
1365         with m.If(flush_in | m_in.tlbld):
1366             sync += r.fetch_failed.eq(0)
1367
1368         with m.Elif(i_in.req & ~access_ok & ~stall_in):
1369             sync += r.fetch_failed.eq(1)
1370 #       end if;
1371 #     end process;
1372
1373 #     icache_log: if LOG_LENGTH > 0 generate
1374     def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1375                    req_is_miss, req_is_hit, lway, wstate, r):
1376         comb = m.d.comb
1377         sync = m.d.sync
1378
1379         wb_in, i_out       = self.wb_in, self.i_out
1380         log_out, stall_out = self.log_out, self.stall_out
1381
1382 #         -- Output data to logger
1383 #         signal log_data    : std_ulogic_vector(53 downto 0);
1384 #     begin
1385 #         data_log: process(clk)
1386 #             variable lway: way_t;
1387 #             variable wstate: std_ulogic;
1388         # Output data to logger
1389         for i in range(LOG_LENGTH):
1390             # Output data to logger
1391             log_data = Signal(54)
1392             lway     = Signal(NUM_WAYS)
1393             wstate   = Signal()
1394
1395 #         begin
1396 #             if rising_edge(clk) then
1397 #                 lway := req_hit_way;
1398 #                 wstate := '0';
1399             sync += lway.eq(req_hit_way)
1400             sync += wstate.eq(0)
1401
1402 #                 if r.state /= IDLE then
1403 #                     wstate := '1';
1404 #                 end if;
1405             with m.If(r.state != State.IDLE):
1406                 sync += wstate.eq(1)
1407
1408 #                 log_data <= i_out.valid &
1409 #                             i_out.insn &
1410 #                             wishbone_in.ack &
1411 #                             r.wb.adr(5 downto 3) &
1412 #                             r.wb.stb & r.wb.cyc &
1413 #                             wishbone_in.stall &
1414 #                             stall_out &
1415 #                             r.fetch_failed &
1416 #                             r.hit_nia(5 downto 2) &
1417 #                             wstate &
1418 #                             std_ulogic_vector(to_unsigned(lway, 3)) &
1419 #                             req_is_hit & req_is_miss &
1420 #                             access_ok &
1421 #                             ra_valid;
1422             sync += log_data.eq(Cat(
1423                      ra_valid, access_ok, req_is_miss, req_is_hit,
1424                      lway, wstate, r.hit_nia[2:6],
1425                      r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1426                      r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1427                      i_out.valid
1428                     ))
1429 #             end if;
1430 #         end process;
1431 #         log_out <= log_data;
1432             comb += log_out.eq(log_data)
1433 #     end generate;
1434 # end;
1435
1436     def elaborate(self, platform):
1437
1438         m                = Module()
1439         comb             = m.d.comb
1440
1441         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1442         cache_tags       = CacheTagArray()
1443         cache_valid_bits = CacheValidBitsArray()
1444
1445 #     signal itlb_valids : tlb_valids_t;
1446 #     signal itlb_tags : tlb_tags_t;
1447 #     signal itlb_ptes : tlb_ptes_t;
1448 #     attribute ram_style of itlb_tags : signal is "distributed";
1449 #     attribute ram_style of itlb_ptes : signal is "distributed";
1450         itlb_valid_bits  = TLBValidBitsArray()
1451         itlb_tags        = TLBTagArray()
1452         itlb_ptes        = TLBPtesArray()
1453         # TODO to be passed to nmigen as ram attributes
1454         # attribute ram_style of itlb_tags : signal is "distributed";
1455         # attribute ram_style of itlb_ptes : signal is "distributed";
1456
1457 #     -- Privilege bit from PTE EAA field
1458 #     signal eaa_priv  : std_ulogic;
1459         # Privilege bit from PTE EAA field
1460         eaa_priv         = Signal()
1461
1462 #     signal r : reg_internal_t;
1463         r                = RegInternal()
1464
1465 #     -- Async signals on incoming request
1466 #     signal req_index   : index_t;
1467 #     signal req_row     : row_t;
1468 #     signal req_hit_way : way_t;
1469 #     signal req_tag     : cache_tag_t;
1470 #     signal req_is_hit  : std_ulogic;
1471 #     signal req_is_miss : std_ulogic;
1472 #     signal req_laddr   : std_ulogic_vector(63 downto 0);
1473         # Async signal on incoming request
1474         req_index        = Signal(NUM_LINES)
1475         req_row          = Signal(BRAM_ROWS)
1476         req_hit_way      = Signal(NUM_WAYS)
1477         req_tag          = Signal(TAG_BITS)
1478         req_is_hit       = Signal()
1479         req_is_miss      = Signal()
1480         req_laddr        = Signal(64)
1481
1482 #     signal tlb_req_index : tlb_index_t;
1483 #     signal real_addr     : std_ulogic_vector(
1484 #                             REAL_ADDR_BITS - 1 downto 0
1485 #                            );
1486 #     signal ra_valid      : std_ulogic;
1487 #     signal priv_fault    : std_ulogic;
1488 #     signal access_ok     : std_ulogic;
1489 #     signal use_previous  : std_ulogic;
1490         tlb_req_index    = Signal(TLB_SIZE)
1491         real_addr        = Signal(REAL_ADDR_BITS)
1492         ra_valid         = Signal()
1493         priv_fault       = Signal()
1494         access_ok        = Signal()
1495         use_previous     = Signal()
1496
1497 #     signal cache_out   : cache_ram_out_t;
1498         cache_out_row    = Signal(ROW_SIZE_BITS)
1499
1500 #     signal plru_victim : plru_out_t;
1501 #     signal replace_way : way_t;
1502         plru_victim      = PLRUOut()
1503         replace_way      = Signal(NUM_WAYS)
1504
1505         # call sub-functions putting everything together, using shared
1506         # signals established above
1507         self.rams(m, r, cache_out_row, use_previous, replace_way, req_row)
1508         self.maybe_plrus(m, r, plru_victim)
1509         self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1510                          real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1511                          priv_fault, access_ok)
1512         self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1513         self.icache_comb(m, use_previous, r, req_index, req_row,
1514                          req_tag, real_addr, req_laddr, cache_valid_bits,
1515                          cache_tags, access_ok, req_is_hit, req_is_miss,
1516                          replace_way, plru_victim, cache_out_row)
1517         self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1518                         req_index, req_tag, real_addr)
1519         self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1520                          req_laddr, req_tag, replace_way, cache_tags,
1521                          access_ok, real_addr)
1522         #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1523         #                req_is_miss, req_is_hit, lway, wstate, r)
1524
1525         return m
1526
1527
1528 # icache_tb.vhdl
1529 #
1530 # library ieee;
1531 # use ieee.std_logic_1164.all;
1532 #
1533 # library work;
1534 # use work.common.all;
1535 # use work.wishbone_types.all;
1536 #
1537 # entity icache_tb is
1538 # end icache_tb;
1539 #
1540 # architecture behave of icache_tb is
1541 #     signal clk          : std_ulogic;
1542 #     signal rst          : std_ulogic;
1543 #
1544 #     signal i_out        : Fetch1ToIcacheType;
1545 #     signal i_in         : IcacheToDecode1Type;
1546 #
1547 #     signal m_out        : MmuToIcacheType;
1548 #
1549 #     signal wb_bram_in   : wishbone_master_out;
1550 #     signal wb_bram_out  : wishbone_slave_out;
1551 #
1552 #     constant clk_period : time := 10 ns;
1553 # begin
1554 #     icache0: entity work.icache
1555 #         generic map(
1556 #             LINE_SIZE => 64,
1557 #             NUM_LINES => 4
1558 #             )
1559 #         port map(
1560 #             clk => clk,
1561 #             rst => rst,
1562 #             i_in => i_out,
1563 #             i_out => i_in,
1564 #             m_in => m_out,
1565 #             stall_in => '0',
1566 #           flush_in => '0',
1567 #             inval_in => '0',
1568 #             wishbone_out => wb_bram_in,
1569 #             wishbone_in => wb_bram_out
1570 #             );
1571 #
1572 #     -- BRAM Memory slave
1573 #     bram0: entity work.wishbone_bram_wrapper
1574 #         generic map(
1575 #             MEMORY_SIZE   => 1024,
1576 #             RAM_INIT_FILE => "icache_test.bin"
1577 #             )
1578 #         port map(
1579 #             clk => clk,
1580 #             rst => rst,
1581 #             wishbone_in => wb_bram_in,
1582 #             wishbone_out => wb_bram_out
1583 #             );
1584 #
1585 #     clk_process: process
1586 #     begin
1587 #         clk <= '0';
1588 #         wait for clk_period/2;
1589 #         clk <= '1';
1590 #         wait for clk_period/2;
1591 #     end process;
1592 #
1593 #     rst_process: process
1594 #     begin
1595 #         rst <= '1';
1596 #         wait for 2*clk_period;
1597 #         rst <= '0';
1598 #         wait;
1599 #     end process;
1600 #
1601 #     stim: process
1602 #     begin
1603 #         i_out.req <= '0';
1604 #         i_out.nia <= (others => '0');
1605 #       i_out.stop_mark <= '0';
1606 #
1607 #         m_out.tlbld <= '0';
1608 #         m_out.tlbie <= '0';
1609 #         m_out.addr <= (others => '0');
1610 #         m_out.pte <= (others => '0');
1611 #
1612 #         wait until rising_edge(clk);
1613 #         wait until rising_edge(clk);
1614 #         wait until rising_edge(clk);
1615 #         wait until rising_edge(clk);
1616 #
1617 #         i_out.req <= '1';
1618 #         i_out.nia <= x"0000000000000004";
1619 #
1620 #         wait for 30*clk_period;
1621 #         wait until rising_edge(clk);
1622 #
1623 #         assert i_in.valid = '1' severity failure;
1624 #         assert i_in.insn = x"00000001"
1625 #           report "insn @" & to_hstring(i_out.nia) &
1626 #           "=" & to_hstring(i_in.insn) &
1627 #           " expected 00000001"
1628 #           severity failure;
1629 #
1630 #         i_out.req <= '0';
1631 #
1632 #         wait until rising_edge(clk);
1633 #
1634 #         -- hit
1635 #         i_out.req <= '1';
1636 #         i_out.nia <= x"0000000000000008";
1637 #         wait until rising_edge(clk);
1638 #         wait until rising_edge(clk);
1639 #         assert i_in.valid = '1' severity failure;
1640 #         assert i_in.insn = x"00000002"
1641 #           report "insn @" & to_hstring(i_out.nia) &
1642 #           "=" & to_hstring(i_in.insn) &
1643 #           " expected 00000002"
1644 #           severity failure;
1645 #         wait until rising_edge(clk);
1646 #
1647 #         -- another miss
1648 #         i_out.req <= '1';
1649 #         i_out.nia <= x"0000000000000040";
1650 #
1651 #         wait for 30*clk_period;
1652 #         wait until rising_edge(clk);
1653 #
1654 #         assert i_in.valid = '1' severity failure;
1655 #         assert i_in.insn = x"00000010"
1656 #           report "insn @" & to_hstring(i_out.nia) &
1657 #           "=" & to_hstring(i_in.insn) &
1658 #           " expected 00000010"
1659 #           severity failure;
1660 #
1661 #         -- test something that aliases
1662 #         i_out.req <= '1';
1663 #         i_out.nia <= x"0000000000000100";
1664 #         wait until rising_edge(clk);
1665 #         wait until rising_edge(clk);
1666 #         assert i_in.valid = '0' severity failure;
1667 #         wait until rising_edge(clk);
1668 #
1669 #         wait for 30*clk_period;
1670 #         wait until rising_edge(clk);
1671 #
1672 #         assert i_in.valid = '1' severity failure;
1673 #         assert i_in.insn = x"00000040"
1674 #           report "insn @" & to_hstring(i_out.nia) &
1675 #           "=" & to_hstring(i_in.insn) &
1676 #           " expected 00000040"
1677 #           severity failure;
1678 #
1679 #         i_out.req <= '0';
1680 #
1681 #         std.env.finish;
1682 #     end process;
1683 # end;
1684 def icache_sim(dut):
1685     i_out = dut.i_in
1686     i_in  = dut.i_out
1687     m_out = dut.m_in
1688
1689     yield i_in.valid.eq(0)
1690     yield i_out.priv_mode.eq(1)
1691     yield i_out.req.eq(0)
1692     yield i_out.nia.eq(0)
1693     yield i_out.stop_mark.eq(0)
1694     yield m_out.tlbld.eq(0)
1695     yield m_out.tlbie.eq(0)
1696     yield m_out.addr.eq(0)
1697     yield m_out.pte.eq(0)
1698     yield
1699     yield
1700     yield
1701     yield
1702     yield i_out.req.eq(1)
1703     yield i_out.nia.eq(Const(0x0000000000000004, 64))
1704     for i in range(30):
1705         yield
1706     yield
1707     valid = yield i_in.valid
1708     nia   = yield i_out.nia
1709     insn  = yield i_in.insn
1710     print(f"valid? {valid}")
1711     assert valid
1712     assert insn == 0x00000001, \
1713         "insn @%x=%x expected 00000001" % (nia, insn)
1714     yield i_out.req.eq(0)
1715     yield
1716
1717     # hit
1718     yield i_out.req.eq(1)
1719     yield i_out.nia.eq(Const(0x0000000000000008, 64))
1720     yield
1721     yield
1722     valid = yield i_in.valid
1723     nia   = yield i_in.nia
1724     insn  = yield i_in.insn
1725     assert valid
1726     assert insn == 0x00000002, \
1727         "insn @%x=%x expected 00000002" % (nia, insn)
1728     yield
1729
1730     # another miss
1731     yield i_out.req.eq(1)
1732     yield i_out.nia.eq(Const(0x0000000000000040, 64))
1733     for i in range(30):
1734         yield
1735     yield
1736     valid = yield i_in.valid
1737     nia   = yield i_out.nia
1738     insn  = yield i_in.insn
1739     assert valid
1740     assert insn == 0x00000010, \
1741         "insn @%x=%x expected 00000010" % (nia, insn)
1742
1743     # test something that aliases
1744     yield i_out.req.eq(1)
1745     yield i_out.nia.eq(Const(0x0000000000000100, 64))
1746     yield
1747     yield
1748     valid = yield i_in.valid
1749     assert ~valid
1750     for i in range(30):
1751         yield
1752     yield
1753     insn  = yield i_in.insn
1754     valid = yield i_in.valid
1755     insn  = yield i_in.insn
1756     assert valid
1757     assert insn == 0x00000040, \
1758          "insn @%x=%x expected 00000040" % (nia, insn)
1759     yield i_out.req.eq(0)
1760
1761
1762
1763 def test_icache(mem):
1764      dut    = ICache()
1765
1766      memory = Memory(width=64, depth=16*64, init=mem)
1767      sram   = SRAM(memory=memory, granularity=8)
1768
1769      m      = Module()
1770
1771      m.submodules.icache = dut
1772      m.submodules.sram   = sram
1773
1774      m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1775      m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1776      m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1777      m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1778      m.d.comb += sram.bus.adr.eq(dut.wb_out.adr)
1779      m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1780
1781      m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1782      m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1783
1784      # nmigen Simulation
1785      sim = Simulator(m)
1786      sim.add_clock(1e-6)
1787
1788      sim.add_sync_process(wrap(icache_sim(dut)))
1789      with sim.write_vcd('test_icache.vcd'):
1790          sim.run()
1791
1792 if __name__ == '__main__':
1793     dut = ICache()
1794     vl = rtlil.convert(dut, ports=[])
1795     with open("test_icache.il", "w") as f:
1796         f.write(vl)
1797
1798     mem = []
1799     for i in range(512):
1800         mem.append((i*2)| ((i*2+1)<<32))
1801
1802     test_icache(mem)
1803