src/soc/experiment/icache.py

   1 """ICache
   2
   3 based on Anton Blanchard microwatt icache.vhdl
   4
   5 Set associative icache
   6
   7 TODO (in no specific order):
   8 * Add debug interface to inspect cache content
   9 * Add snoop/invalidate path
  10 * Add multi-hit error detection
  11 * Pipelined bus interface (wb or axi)
  12 * Maybe add parity? There's a few bits free in each BRAM row on Xilinx
  13 * Add optimization: service hits on partially loaded lines
  14 * Add optimization: (maybe) interrupt reload on fluch/redirect
  15 * Check if playing with the geometry of the cache tags allow for more
  16   efficient use of distributed RAM and less logic/muxes. Currently we
  17   write TAG_BITS width which may not match full ram blocks and might
  18   cause muxes to be inferred for "partial writes".
  19 * Check if making the read size of PLRU a ROM helps utilization
  20
  21 """
  22 from enum import Enum, unique
  23 from nmigen import (Module, Signal, Elaboratable, Cat, Array, Const, Repl)
  24 from nmigen.cli import main, rtlil
  25 from nmutil.iocontrol import RecordObject
  26 from nmigen.utils import log2_int
  27 from nmutil.util import Display
  28
  29 #from nmutil.plru import PLRU
  30 from soc.experiment.cache_ram import CacheRam
  31 from soc.experiment.plru import PLRU
  32
  33 from soc.experiment.mem_types import (Fetch1ToICacheType,
  34                                       ICacheToDecode1Type,
  35                                       MMUToICacheType)
  36
  37 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS,
  38                                      WB_SEL_BITS, WBAddrType, WBDataType,
  39                                      WBSelType, WBMasterOut, WBSlaveOut,
  40                                      WBMasterOutVector, WBSlaveOutVector,
  41                                      WBIOMasterOut, WBIOSlaveOut)
  42
  43 # for test
  44 from nmigen_soc.wishbone.sram import SRAM
  45 from nmigen import Memory
  46 from nmutil.util import wrap
  47 from nmigen.cli import main, rtlil
  48 if True:
  49     from nmigen.back.pysim import Simulator, Delay, Settle
  50 else:
  51     from nmigen.sim.cxxsim import Simulator, Delay, Settle
  52
  53
  54 SIM            = 0
  55 LINE_SIZE      = 64
  56 # BRAM organisation: We never access more than wishbone_data_bits
  57 # at a time so to save resources we make the array only that wide,
  58 # and use consecutive indices for to make a cache "line"
  59 #
  60 # ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
  61 ROW_SIZE       = WB_DATA_BITS // 8
  62 # Number of lines in a set
  63 NUM_LINES      = 16
  64 # Number of ways
  65 NUM_WAYS       = 4
  66 # L1 ITLB number of entries (direct mapped)
  67 TLB_SIZE       = 64
  68 # L1 ITLB log_2(page_size)
  69 TLB_LG_PGSZ    = 12
  70 # Number of real address bits that we store
  71 REAL_ADDR_BITS = 56
  72 # Non-zero to enable log data collection
  73 LOG_LENGTH     = 0
  74
  75 ROW_SIZE_BITS  = ROW_SIZE * 8
  76 # ROW_PER_LINE is the number of row
  77 # (wishbone) transactions in a line
  78 ROW_PER_LINE   = LINE_SIZE // ROW_SIZE
  79 # BRAM_ROWS is the number of rows in
  80 # BRAM needed to represent the full icache
  81 BRAM_ROWS      = NUM_LINES * ROW_PER_LINE
  82 # INSN_PER_ROW is the number of 32bit
  83 # instructions per BRAM row
  84 INSN_PER_ROW   = ROW_SIZE_BITS // 32
  85
  86 print("ROW_SIZE", ROW_SIZE)
  87 print("ROW_SIZE_BITS", ROW_SIZE_BITS)
  88 print("ROW_PER_LINE", ROW_PER_LINE)
  89 print("BRAM_ROWS", BRAM_ROWS)
  90 print("INSN_PER_ROW", INSN_PER_ROW)
  91
  92 # Bit fields counts in the address
  93 #
  94 # INSN_BITS is the number of bits to
  95 # select an instruction in a row
  96 INSN_BITS      = log2_int(INSN_PER_ROW)
  97 # ROW_BITS is the number of bits to
  98 # select a row
  99 ROW_BITS       = log2_int(BRAM_ROWS)
 100 # ROW_LINEBITS is the number of bits to
 101 # select a row within a line
 102 ROW_LINE_BITS   = log2_int(ROW_PER_LINE)
 103 # LINE_OFF_BITS is the number of bits for
 104 # the offset in a cache line
 105 LINE_OFF_BITS  = log2_int(LINE_SIZE)
 106 # ROW_OFF_BITS is the number of bits for
 107 # the offset in a row
 108 ROW_OFF_BITS   = log2_int(ROW_SIZE)
 109 # INDEX_BITS is the number of bits to
 110 # select a cache line
 111 INDEX_BITS     = log2_int(NUM_LINES)
 112 # SET_SIZE_BITS is the log base 2 of
 113 # the set size
 114 SET_SIZE_BITS  = LINE_OFF_BITS + INDEX_BITS
 115 # TAG_BITS is the number of bits of
 116 # the tag part of the address
 117 TAG_BITS       = REAL_ADDR_BITS - SET_SIZE_BITS
 118 # TAG_WIDTH is the width in bits of each way of the tag RAM
 119 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
 120
 121 # WAY_BITS is the number of bits to
 122 # select a way
 123 WAY_BITS       = log2_int(NUM_WAYS)
 124 TAG_RAM_WIDTH  = TAG_BITS * NUM_WAYS
 125
 126 #     -- L1 ITLB.
 127 #     constant TLB_BITS : natural := log2(TLB_SIZE);
 128 #     constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
 129 #     constant TLB_PTE_BITS : natural := 64;
 130 TLB_BITS        = log2_int(TLB_SIZE)
 131 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_BITS)
 132 TLB_PTE_BITS    = 64
 133
 134
 135 print("INSN_BITS", INSN_BITS)
 136 print("ROW_BITS", ROW_BITS)
 137 print("ROW_LINE_BITS", ROW_LINE_BITS)
 138 print("LINE_OFF_BITS", LINE_OFF_BITS)
 139 print("ROW_OFF_BITS", ROW_OFF_BITS)
 140 print("INDEX_BITS", INDEX_BITS)
 141 print("SET_SIZE_BITS", SET_SIZE_BITS)
 142 print("TAG_BITS", TAG_BITS)
 143 print("WAY_BITS", WAY_BITS)
 144 print("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
 145 print("TLB_BITS", TLB_BITS)
 146 print("TLB_EA_TAG_BITS", TLB_EA_TAG_BITS)
 147 print("TLB_PTE_BITS", TLB_PTE_BITS)
 148
 149
 150
 151
 152 # architecture rtl of icache is
 153 #constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
 154 #-- ROW_PER_LINE is the number of row (wishbone
 155 #-- transactions) in a line
 156 #constant ROW_PER_LINE  : natural := LINE_SIZE / ROW_SIZE;
 157 #-- BRAM_ROWS is the number of rows in BRAM
 158 #-- needed to represent the full
 159 #-- icache
 160 #constant BRAM_ROWS     : natural := NUM_LINES * ROW_PER_LINE;
 161 #-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
 162 #constant INSN_PER_ROW  : natural := ROW_SIZE_BITS / 32;
 163 #-- Bit fields counts in the address
 164 #
 165 #-- INSN_BITS is the number of bits to select
 166 #-- an instruction in a row
 167 #constant INSN_BITS     : natural := log2(INSN_PER_ROW);
 168 #-- ROW_BITS is the number of bits to select a row
 169 #constant ROW_BITS      : natural := log2(BRAM_ROWS);
 170 #-- ROW_LINEBITS is the number of bits to
 171 #-- select a row within a line
 172 #constant ROW_LINEBITS  : natural := log2(ROW_PER_LINE);
 173 #-- LINE_OFF_BITS is the number of bits for the offset
 174 #-- in a cache line
 175 #constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
 176 #-- ROW_OFF_BITS is the number of bits for the offset in a row
 177 #constant ROW_OFF_BITS  : natural := log2(ROW_SIZE);
 178 #-- INDEX_BITS is the number of bits to select a cache line
 179 #constant INDEX_BITS    : natural := log2(NUM_LINES);
 180 #-- SET_SIZE_BITS is the log base 2 of the set size
 181 #constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
 182 #-- TAG_BITS is the number of bits of the tag part of the address
 183 #constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
 184 #-- WAY_BITS is the number of bits to select a way
 185 #constant WAY_BITS     : natural := log2(NUM_WAYS);
 186
 187 #-- Example of layout for 32 lines of 64 bytes:
 188 #--
 189 #-- ..  tag    |index|  line  |
 190 #-- ..         |   row   |    |
 191 #-- ..         |     |   | |00| zero          (2)
 192 #-- ..         |     |   |-|  | INSN_BITS     (1)
 193 #-- ..         |     |---|    | ROW_LINEBITS  (3)
 194 #-- ..         |     |--- - --| LINE_OFF_BITS (6)
 195 #-- ..         |         |- --| ROW_OFF_BITS  (3)
 196 #-- ..         |----- ---|    | ROW_BITS      (8)
 197 #-- ..         |-----|        | INDEX_BITS    (5)
 198 #-- .. --------|              | TAG_BITS      (53)
 199    # Example of layout for 32 lines of 64 bytes:
 200    #
 201    # ..  tag    |index|  line  |
 202    # ..         |   row   |    |
 203    # ..         |     |   | |00| zero          (2)
 204    # ..         |     |   |-|  | INSN_BITS     (1)
 205    # ..         |     |---|    | ROW_LINEBITS  (3)
 206    # ..         |     |--- - --| LINE_OFF_BITS (6)
 207    # ..         |         |- --| ROW_OFF_BITS  (3)
 208    # ..         |----- ---|    | ROW_BITS      (8)
 209    # ..         |-----|        | INDEX_BITS    (5)
 210    # .. --------|              | TAG_BITS      (53)
 211
 212 #subtype row_t is integer range 0 to BRAM_ROWS-1;
 213 #subtype index_t is integer range 0 to NUM_LINES-1;
 214 #subtype way_t is integer range 0 to NUM_WAYS-1;
 215 #subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
 216 #
 217 #-- The cache data BRAM organized as described above for each way
 218 #subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
 219 #
 220 #-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
 221 #-- not handle a clean (commented) definition of the cache tags as a 3d
 222 #-- memory. For now, work around it by putting all the tags
 223 #subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
 224 #  type cache_tags_set_t is array(way_t) of cache_tag_t;
 225 #  type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 226 #constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
 227 #subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
 228 #type cache_tags_array_t is array(index_t) of cache_tags_set_t;
 229 def CacheTagArray():
 230     return Array(Signal(TAG_RAM_WIDTH, name="cachetag_%d" %x) \
 231                  for x in range(NUM_LINES))
 232
 233 #-- The cache valid bits
 234 #subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
 235 #type cache_valids_t is array(index_t) of cache_way_valids_t;
 236 #type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
 237 def CacheValidBitsArray():
 238     return Array(Signal(NUM_WAYS, name="cachevalid_%d" %x) \
 239                  for x in range(NUM_LINES))
 240
 241 def RowPerLineValidArray():
 242     return Array(Signal(name="rows_valid_%d" %x) \
 243                  for x in range(ROW_PER_LINE))
 244
 245
 246 #attribute ram_style : string;
 247 #attribute ram_style of cache_tags : signal is "distributed";
 248    # TODO to be passed to nigmen as ram attributes
 249    # attribute ram_style : string;
 250    # attribute ram_style of cache_tags : signal is "distributed";
 251
 252
 253 #subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
 254 #type tlb_valids_t is array(tlb_index_t) of std_ulogic;
 255 #subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
 256 #type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
 257 #subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
 258 #type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;
 259 def TLBValidBitsArray():
 260     return Array(Signal(name="tlbvalid_%d" %x) \
 261                  for x in range(TLB_SIZE))
 262
 263 def TLBTagArray():
 264     return Array(Signal(TLB_EA_TAG_BITS, name="tlbtag_%d" %x) \
 265                  for x in range(TLB_SIZE))
 266
 267 def TLBPtesArray():
 268     return Array(Signal(TLB_PTE_BITS, name="tlbptes_%d" %x) \
 269                  for x in range(TLB_SIZE))
 270
 271
 272 #-- Cache RAM interface
 273 #type cache_ram_out_t is array(way_t) of cache_row_t;
 274 # Cache RAM interface
 275 def CacheRamOut():
 276     return Array(Signal(ROW_SIZE_BITS, name="cache_out_%d" %x) \
 277                  for x in range(NUM_WAYS))
 278
 279 #-- PLRU output interface
 280 #type plru_out_t is array(index_t) of
 281 # std_ulogic_vector(WAY_BITS-1 downto 0);
 282 # PLRU output interface
 283 def PLRUOut():
 284     return Array(Signal(WAY_BITS, name="plru_out_%d" %x) \
 285                  for x in range(NUM_LINES))
 286
 287 #     -- Return the cache line index (tag index) for an address
 288 #     function get_index(addr: std_ulogic_vector(63 downto 0))
 289 #      return index_t is
 290 #     begin
 291 #         return to_integer(unsigned(
 292 #          addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)
 293 #         ));
 294 #     end;
 295 # Return the cache line index (tag index) for an address
 296 def get_index(addr):
 297     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 298
 299 #     -- Return the cache row index (data memory) for an address
 300 #     function get_row(addr: std_ulogic_vector(63 downto 0))
 301 #       return row_t is
 302 #     begin
 303 #         return to_integer(unsigned(
 304 #          addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)
 305 #         ));
 306 #     end;
 307 # Return the cache row index (data memory) for an address
 308 def get_row(addr):
 309     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 310
 311 #     -- Return the index of a row within a line
 312 #     function get_row_of_line(row: row_t) return row_in_line_t is
 313 #       variable row_v : unsigned(ROW_BITS-1 downto 0);
 314 #     begin
 315 #       row_v := to_unsigned(row, ROW_BITS);
 316 #         return row_v(ROW_LINEBITS-1 downto 0);
 317 #     end;
 318 # Return the index of a row within a line
 319 def get_row_of_line(row):
 320     return row[:ROW_LINE_BITS]
 321
 322 #     -- Returns whether this is the last row of a line
 323 #     function is_last_row_addr(addr: wishbone_addr_type;
 324 #      last: row_in_line_t
 325 #     )
 326 #      return boolean is
 327 #     begin
 328 #       return unsigned(
 329 #        addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS)
 330 #       ) = last;
 331 #     end;
 332 # Returns whether this is the last row of a line
 333 def is_last_row_addr(addr, last):
 334     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 335
 336 #     -- Returns whether this is the last row of a line
 337 #     function is_last_row(row: row_t;
 338 #      last: row_in_line_t) return boolean is
 339 #     begin
 340 #       return get_row_of_line(row) = last;
 341 #     end;
 342 # Returns whether this is the last row of a line
 343 def is_last_row(row, last):
 344     return get_row_of_line(row) == last
 345
 346 #     -- Return the next row in the current cache line. We use a dedicated
 347 #     -- function in order to limit the size of the generated adder to be
 348 #     -- only the bits within a cache line (3 bits with default settings)
 349 #     function next_row(row: row_t) return row_t is
 350 #       variable row_v   : std_ulogic_vector(ROW_BITS-1 downto 0);
 351 #       variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
 352 #       variable result  : std_ulogic_vector(ROW_BITS-1 downto 0);
 353 #     begin
 354 #       row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
 355 #       row_idx := row_v(ROW_LINEBITS-1 downto 0);
 356 #       row_v(ROW_LINEBITS-1 downto 0) :=
 357 #        std_ulogic_vector(unsigned(row_idx) + 1);
 358 #       return to_integer(unsigned(row_v));
 359 #     end;
 360 # Return the next row in the current cache line. We use a dedicated
 361 # function in order to limit the size of the generated adder to be
 362 # only the bits within a cache line (3 bits with default settings)
 363 def next_row(row):
 364     row_v = row[0:ROW_LINE_BITS] + 1
 365     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 366 #     -- Read the instruction word for the given address in the
 367 #     -- current cache row
 368 #     function read_insn_word(addr: std_ulogic_vector(63 downto 0);
 369 #                           data: cache_row_t) return std_ulogic_vector is
 370 #       variable word: integer range 0 to INSN_PER_ROW-1;
 371 #     begin
 372 #         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
 373 #       return data(31+word*32 downto word*32);
 374 #     end;
 375 # Read the instruction word for the given address
 376 # in the current cache row
 377 def read_insn_word(addr, data):
 378     word = addr[2:INSN_BITS+2]
 379     return data.word_select(word, 32)
 380
 381 #     -- Get the tag value from the address
 382 #     function get_tag(
 383 #      addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)
 384 #     )
 385 #      return cache_tag_t is
 386 #     begin
 387 #         return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
 388 #     end;
 389 # Get the tag value from the address
 390 def get_tag(addr):
 391     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 392
 393 #     -- Read a tag from a tag memory row
 394 #     function read_tag(way: way_t; tagset: cache_tags_set_t)
 395 #      return cache_tag_t is
 396 #     begin
 397 #       return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
 398 #     end;
 399 # Read a tag from a tag memory row
 400 def read_tag(way, tagset):
 401     return tagset.word_select(way, TAG_BITS)
 402
 403 #     -- Write a tag to tag memory row
 404 #     procedure write_tag(way: in way_t;
 405 #      tagset: inout cache_tags_set_t; tag: cache_tag_t) is
 406 #     begin
 407 #       tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
 408 #     end;
 409 # Write a tag to tag memory row
 410 def write_tag(way, tagset, tag):
 411     return read_tag(way, tagset).eq(tag)
 412
 413 #     -- Simple hash for direct-mapped TLB index
 414 #     function hash_ea(addr: std_ulogic_vector(63 downto 0))
 415 #      return tlb_index_t is
 416 #         variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
 417 #     begin
 418 #         hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
 419 #                 xor addr(
 420 #                  TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto
 421 #                  TLB_LG_PGSZ + TLB_BITS
 422 #                 )
 423 #                 xor addr(
 424 #                  TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto
 425 #                  TLB_LG_PGSZ + 2 * TLB_BITS
 426 #                 );
 427 #         return to_integer(unsigned(hash));
 428 #     end;
 429 # Simple hash for direct-mapped TLB index
 430 def hash_ea(addr):
 431     hsh = addr[TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_BITS] ^ addr[
 432            TLB_LG_PGSZ + TLB_BITS:TLB_LG_PGSZ + 2 * TLB_BITS
 433           ] ^ addr[
 434            TLB_LG_PGSZ + 2 * TLB_BITS:TLB_LG_PGSZ + 3 * TLB_BITS
 435           ]
 436     return hsh
 437
 438 # begin
 439 #
 440 # XXX put these assert statements in - as python asserts
 441 #
 442 #     assert LINE_SIZE mod ROW_SIZE = 0;
 443 #     assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2"
 444 #     assert ispow2(NUM_LINES) report "NUM_LINES not power of 2"
 445 #     assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2"
 446 #     assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2"
 447 #     assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
 448 #       report "geometry bits don't add up"
 449 #     assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
 450 #       report "geometry bits don't add up"
 451 #     assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
 452 #       report "geometry bits don't add up"
 453 #     assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
 454 #       report "geometry bits don't add up"
 455 #
 456 #     sim_debug: if SIM generate
 457 #     debug: process
 458 #     begin
 459 #       report "ROW_SIZE      = " & natural'image(ROW_SIZE);
 460 #       report "ROW_PER_LINE  = " & natural'image(ROW_PER_LINE);
 461 #       report "BRAM_ROWS     = " & natural'image(BRAM_ROWS);
 462 #       report "INSN_PER_ROW  = " & natural'image(INSN_PER_ROW);
 463 #       report "INSN_BITS     = " & natural'image(INSN_BITS);
 464 #       report "ROW_BITS      = " & natural'image(ROW_BITS);
 465 #       report "ROW_LINEBITS  = " & natural'image(ROW_LINEBITS);
 466 #       report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
 467 #       report "ROW_OFF_BITS  = " & natural'image(ROW_OFF_BITS);
 468 #       report "INDEX_BITS    = " & natural'image(INDEX_BITS);
 469 #       report "TAG_BITS      = " & natural'image(TAG_BITS);
 470 #       report "WAY_BITS      = " & natural'image(WAY_BITS);
 471 #       wait;
 472 #     end process;
 473 #     end generate;
 474
 475 # Cache reload state machine
 476 @unique
 477 class State(Enum):
 478     IDLE     = 0
 479     CLR_TAG  = 1
 480     WAIT_ACK = 2
 481
 482 #     type reg_internal_t is record
 483 #       -- Cache hit state (Latches for 1 cycle BRAM access)
 484 #       hit_way   : way_t;
 485 #       hit_nia   : std_ulogic_vector(63 downto 0);
 486 #       hit_smark : std_ulogic;
 487 #       hit_valid : std_ulogic;
 488 #
 489 #       -- Cache miss state (reload state machine)
 490 #         state            : state_t;
 491 #         wb               : wishbone_master_out;
 492 #       store_way        : way_t;
 493 #         store_index      : index_t;
 494 #       store_row        : row_t;
 495 #         store_tag        : cache_tag_t;
 496 #         store_valid      : std_ulogic;
 497 #         end_row_ix       : row_in_line_t;
 498 #         rows_valid       : row_per_line_valid_t;
 499 #
 500 #         -- TLB miss state
 501 #         fetch_failed     : std_ulogic;
 502 #     end record;
 503 class RegInternal(RecordObject):
 504     def __init__(self):
 505         super().__init__()
 506         # Cache hit state (Latches for 1 cycle BRAM access)
 507         self.hit_way      = Signal(NUM_WAYS)
 508         self.hit_nia      = Signal(64)
 509         self.hit_smark    = Signal()
 510         self.hit_valid    = Signal()
 511
 512         # Cache miss state (reload state machine)
 513         self.state        = Signal(State, reset=State.IDLE)
 514         self.wb           = WBMasterOut("wb")
 515         self.req_adr      = Signal(64)
 516         self.store_way    = Signal(NUM_WAYS)
 517         self.store_index  = Signal(NUM_LINES)
 518         self.store_row    = Signal(BRAM_ROWS)
 519         self.store_tag    = Signal(TAG_BITS)
 520         self.store_valid  = Signal()
 521         self.end_row_ix   = Signal(ROW_LINE_BITS)
 522         self.rows_valid   = RowPerLineValidArray()
 523
 524         # TLB miss state
 525         self.fetch_failed = Signal()
 526
 527 # -- 64 bit direct mapped icache. All instructions are 4B aligned.
 528 #
 529 # entity icache is
 530 #     generic (
 531 #         SIM : boolean := false;
 532 #         -- Line size in bytes
 533 #         LINE_SIZE : positive := 64;
 534 #         -- BRAM organisation: We never access more
 535 #         -- than wishbone_data_bits
 536 #         -- at a time so to save resources we make the
 537 #         -- array only that wide,
 538 #         -- and use consecutive indices for to make a cache "line"
 539 #         --
 540 #         -- ROW_SIZE is the width in bytes of the BRAM (based on WB,
 541 #         -- so 64-bits)
 542 #         ROW_SIZE  : positive := wishbone_data_bits / 8;
 543 #         -- Number of lines in a set
 544 #         NUM_LINES : positive := 32;
 545 #         -- Number of ways
 546 #         NUM_WAYS  : positive := 4;
 547 #         -- L1 ITLB number of entries (direct mapped)
 548 #         TLB_SIZE : positive := 64;
 549 #         -- L1 ITLB log_2(page_size)
 550 #         TLB_LG_PGSZ : positive := 12;
 551 #         -- Number of real address bits that we store
 552 #         REAL_ADDR_BITS : positive := 56;
 553 #         -- Non-zero to enable log data collection
 554 #         LOG_LENGTH : natural := 0
 555 #         );
 556 #     port (
 557 #         clk          : in std_ulogic;
 558 #         rst          : in std_ulogic;
 559 #
 560 #         i_in         : in Fetch1ToIcacheType;
 561 #         i_out        : out IcacheToDecode1Type;
 562 #
 563 #         m_in         : in MmuToIcacheType;
 564 #
 565 #         stall_in     : in std_ulogic;
 566 #       stall_out    : out std_ulogic;
 567 #       flush_in     : in std_ulogic;
 568 #       inval_in     : in std_ulogic;
 569 #
 570 #         wishbone_out : out wishbone_master_out;
 571 #         wishbone_in  : in wishbone_slave_out;
 572 #
 573 #         log_out      : out std_ulogic_vector(53 downto 0)
 574 #         );
 575 # end entity icache;
 576 # 64 bit direct mapped icache. All instructions are 4B aligned.
 577 class ICache(Elaboratable):
 578     """64 bit direct mapped icache. All instructions are 4B aligned."""
 579     def __init__(self):
 580         self.i_in           = Fetch1ToICacheType(name="i_in")
 581         self.i_out          = ICacheToDecode1Type(name="i_out")
 582
 583         self.m_in           = MMUToICacheType(name="m_in")
 584
 585         self.stall_in       = Signal()
 586         self.stall_out      = Signal()
 587         self.flush_in       = Signal()
 588         self.inval_in       = Signal()
 589
 590         self.wb_out         = WBMasterOut(name="wb_out")
 591         self.wb_in          = WBSlaveOut(name="wb_in")
 592
 593         self.log_out        = Signal(54)
 594
 595
 596 #     -- Generate a cache RAM for each way
 597 #     rams: for i in 0 to NUM_WAYS-1 generate
 598 #       signal do_read  : std_ulogic;
 599 #       signal do_write : std_ulogic;
 600 #       signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 601 #       signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
 602 #       signal dout     : cache_row_t;
 603 #       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
 604 #     begin
 605 #       way: entity work.cache_ram
 606 #           generic map (
 607 #               ROW_BITS => ROW_BITS,
 608 #               WIDTH => ROW_SIZE_BITS
 609 #               )
 610 #           port map (
 611 #               clk     => clk,
 612 #               rd_en   => do_read,
 613 #               rd_addr => rd_addr,
 614 #               rd_data => dout,
 615 #               wr_sel  => wr_sel,
 616 #               wr_addr => wr_addr,
 617 #               wr_data => wishbone_in.dat
 618 #               );
 619 #       process(all)
 620 #       begin
 621 #           do_read <= not (stall_in or use_previous);
 622 #           do_write <= '0';
 623 #           if wishbone_in.ack = '1' and replace_way = i then
 624 #               do_write <= '1';
 625 #           end if;
 626 #           cache_out(i) <= dout;
 627 #           rd_addr <=
 628 #            std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
 629 #           wr_addr <=
 630 #            std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
 631 #             for i in 0 to ROW_SIZE-1 loop
 632 #                 wr_sel(i) <= do_write;
 633 #             end loop;
 634 #       end process;
 635 #     end generate;
 636     def rams(self, m, r, cache_out_row, use_previous, replace_way, req_row):
 637         comb = m.d.comb
 638
 639         wb_in, stall_in = self.wb_in, self.stall_in
 640
 641
 642         for i in range(NUM_WAYS):
 643             do_read  = Signal(name="do_rd_%d" % i)
 644             do_write = Signal(name="do_wr_%d" % i)
 645             rd_addr  = Signal(ROW_BITS)
 646             wr_addr  = Signal(ROW_BITS)
 647             d_out    = Signal(ROW_SIZE_BITS, name="d_out_%d" % i)
 648             wr_sel   = Signal(ROW_SIZE)
 649
 650             way = CacheRam(ROW_BITS, ROW_SIZE_BITS, True)
 651             setattr(m.submodules, "cacheram_%d" % i, way)
 652
 653             comb += way.rd_en.eq(do_read)
 654             comb += way.rd_addr.eq(rd_addr)
 655             comb += d_out.eq(way.rd_data_o)
 656             comb += way.wr_sel.eq(wr_sel)
 657             comb += way.wr_addr.eq(wr_addr)
 658             comb += way.wr_data.eq(wb_in.dat)
 659
 660             comb += do_read.eq(~(stall_in | use_previous))
 661             comb += do_write.eq(wb_in.ack & (replace_way == i))
 662
 663             with m.If(r.hit_way == i):
 664                 comb += cache_out_row.eq(d_out)
 665             comb += rd_addr.eq(req_row)
 666             comb += wr_addr.eq(r.store_row)
 667             comb += wr_sel.eq(Repl(do_write, ROW_SIZE))
 668
 669 #     -- Generate PLRUs
 670 #     maybe_plrus: if NUM_WAYS > 1 generate
 671 #     begin
 672 #       plrus: for i in 0 to NUM_LINES-1 generate
 673 #           -- PLRU interface
 674 #           signal plru_acc    : std_ulogic_vector(WAY_BITS-1 downto 0);
 675 #           signal plru_acc_en : std_ulogic;
 676 #           signal plru_out    : std_ulogic_vector(WAY_BITS-1 downto 0);
 677 #
 678 #       begin
 679 #           plru : entity work.plru
 680 #               generic map (
 681 #                   BITS => WAY_BITS
 682 #                   )
 683 #               port map (
 684 #                   clk => clk,
 685 #                   rst => rst,
 686 #                   acc => plru_acc,
 687 #                   acc_en => plru_acc_en,
 688 #                   lru => plru_out
 689 #                   );
 690 #
 691 #           process(all)
 692 #           begin
 693 #               -- PLRU interface
 694 #               if get_index(r.hit_nia) = i then
 695 #                   plru_acc_en <= r.hit_valid;
 696 #               else
 697 #                   plru_acc_en <= '0';
 698 #               end if;
 699 #               plru_acc <=
 700 #                std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
 701 #               plru_victim(i) <= plru_out;
 702 #           end process;
 703 #       end generate;
 704 #     end generate;
 705     def maybe_plrus(self, m, r, plru_victim):
 706         comb = m.d.comb
 707
 708         with m.If(NUM_WAYS > 1):
 709             for i in range(NUM_LINES):
 710                 plru_acc_i  = Signal(WAY_BITS)
 711                 plru_acc_en = Signal()
 712                 plru        = PLRU(WAY_BITS)
 713                 setattr(m.submodules, "plru_%d" % i, plru)
 714
 715                 comb += plru.acc_i.eq(plru_acc_i)
 716                 comb += plru.acc_en.eq(plru_acc_en)
 717
 718                 # PLRU interface
 719                 with m.If(get_index(r.hit_nia) == i):
 720                     comb += plru.acc_en.eq(r.hit_valid)
 721
 722                 comb += plru.acc_i.eq(r.hit_way)
 723                 comb += plru_victim[i].eq(plru.lru_o)
 724
 725 #     -- TLB hit detection and real address generation
 726 #     itlb_lookup : process(all)
 727 #         variable pte : tlb_pte_t;
 728 #         variable ttag : tlb_tag_t;
 729 #     begin
 730 #         tlb_req_index <= hash_ea(i_in.nia);
 731 #         pte := itlb_ptes(tlb_req_index);
 732 #         ttag := itlb_tags(tlb_req_index);
 733 #         if i_in.virt_mode = '1' then
 734 #             real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
 735 #                          i_in.nia(TLB_LG_PGSZ - 1 downto 0);
 736 #             if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
 737 #                 ra_valid <= itlb_valids(tlb_req_index);
 738 #             else
 739 #                 ra_valid <= '0';
 740 #             end if;
 741 #             eaa_priv <= pte(3);
 742 #         else
 743 #             real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
 744 #             ra_valid <= '1';
 745 #             eaa_priv <= '1';
 746 #         end if;
 747 #
 748 #         -- no IAMR, so no KUEP support for now
 749 #         priv_fault <= eaa_priv and not i_in.priv_mode;
 750 #         access_ok <= ra_valid and not priv_fault;
 751 #     end process;
 752     # TLB hit detection and real address generation
 753     def itlb_lookup(self, m, tlb_req_index, itlb_ptes, itlb_tags,
 754                     real_addr, itlb_valid_bits, ra_valid, eaa_priv,
 755                     priv_fault, access_ok):
 756         comb = m.d.comb
 757
 758         i_in = self.i_in
 759
 760         pte  = Signal(TLB_PTE_BITS)
 761         ttag = Signal(TLB_EA_TAG_BITS)
 762
 763         comb += tlb_req_index.eq(hash_ea(i_in.nia))
 764         comb += pte.eq(itlb_ptes[tlb_req_index])
 765         comb += ttag.eq(itlb_tags[tlb_req_index])
 766
 767         with m.If(i_in.virt_mode):
 768             comb += real_addr.eq(Cat(
 769                      i_in.nia[:TLB_LG_PGSZ],
 770                      pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
 771                     ))
 772
 773             with m.If(ttag == i_in.nia[TLB_LG_PGSZ + TLB_BITS:64]):
 774                 comb += ra_valid.eq(itlb_valid_bits[tlb_req_index])
 775
 776             comb += eaa_priv.eq(pte[3])
 777
 778         with m.Else():
 779             comb += real_addr.eq(i_in.nia[:REAL_ADDR_BITS])
 780             comb += ra_valid.eq(1)
 781             comb += eaa_priv.eq(1)
 782
 783         # No IAMR, so no KUEP support for now
 784         comb += priv_fault.eq(eaa_priv & ~i_in.priv_mode)
 785         comb += access_ok.eq(ra_valid & ~priv_fault)
 786
 787 #     -- iTLB update
 788 #     itlb_update: process(clk)
 789 #         variable wr_index : tlb_index_t;
 790 #     begin
 791 #         if rising_edge(clk) then
 792 #             wr_index := hash_ea(m_in.addr);
 793 #             if rst = '1' or
 794 #              (m_in.tlbie = '1' and m_in.doall = '1') then
 795 #                 -- clear all valid bits
 796 #                 for i in tlb_index_t loop
 797 #                     itlb_valids(i) <= '0';
 798 #                 end loop;
 799 #             elsif m_in.tlbie = '1' then
 800 #                 -- clear entry regardless of hit or miss
 801 #                 itlb_valids(wr_index) <= '0';
 802 #             elsif m_in.tlbld = '1' then
 803 #                 itlb_tags(wr_index) <=
 804 #                  m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
 805 #                 itlb_ptes(wr_index) <= m_in.pte;
 806 #                 itlb_valids(wr_index) <= '1';
 807 #             end if;
 808 #         end if;
 809 #     end process;
 810     # iTLB update
 811     def itlb_update(self, m, itlb_valid_bits, itlb_tags, itlb_ptes):
 812         comb = m.d.comb
 813         sync = m.d.sync
 814
 815         m_in = self.m_in
 816
 817         wr_index = Signal(TLB_SIZE)
 818         sync += wr_index.eq(hash_ea(m_in.addr))
 819
 820         with m.If(m_in.tlbie & m_in.doall):
 821             # Clear all valid bits
 822             for i in range(TLB_SIZE):
 823                 sync += itlb_valid_bits[i].eq(0)
 824
 825         with m.Elif(m_in.tlbie):
 826             # Clear entry regardless of hit or miss
 827             sync += itlb_valid_bits[wr_index].eq(0)
 828
 829         with m.Elif(m_in.tlbld):
 830             sync += itlb_tags[wr_index].eq(
 831                      m_in.addr[TLB_LG_PGSZ + TLB_BITS:64]
 832                     )
 833             sync += itlb_ptes[wr_index].eq(m_in.pte)
 834             sync += itlb_valid_bits[wr_index].eq(1)
 835
 836 #     -- Cache hit detection, output to fetch2 and other misc logic
 837 #     icache_comb : process(all)
 838     # Cache hit detection, output to fetch2 and other misc logic
 839     def icache_comb(self, m, use_previous, r, req_index, req_row,
 840                     req_tag, real_addr, req_laddr, cache_valid_bits,
 841                     cache_tags, access_ok, req_is_hit,
 842                     req_is_miss, replace_way, plru_victim, cache_out_row):
 843 #       variable is_hit  : std_ulogic;
 844 #       variable hit_way : way_t;
 845         comb = m.d.comb
 846
 847         #comb += Display("ENTER icache_comb - use_previous:%x req_index:%x " \
 848         #                "req_row:%x req_tag:%x real_addr:%x req_laddr:%x " \
 849         #                "access_ok:%x req_is_hit:%x req_is_miss:%x " \
 850         #                "replace_way:%x", use_previous, req_index, req_row, \
 851         #                req_tag, real_addr, req_laddr, access_ok, \
 852         #                req_is_hit, req_is_miss, replace_way)
 853
 854         i_in, i_out, wb_out = self.i_in, self.i_out, self.wb_out
 855         flush_in, stall_out = self.flush_in, self.stall_out
 856
 857         is_hit  = Signal()
 858         hit_way = Signal(NUM_WAYS)
 859 #     begin
 860 #         -- i_in.sequential means that i_in.nia this cycle
 861 #         -- is 4 more than last cycle.  If we read more
 862 #         -- than 32 bits at a time, had a cache hit last
 863 #         -- cycle, and we don't want the first 32-bit chunk
 864 #         -- then we can keep the data we read last cycle
 865 #         -- and just use that.
 866 #         if unsigned(i_in.nia(INSN_BITS+2-1 downto 2)) /= 0 then
 867 #             use_previous <= i_in.sequential and r.hit_valid;
 868 #         else
 869 #             use_previous <= '0';
 870 #         end if;
 871         # i_in.sequential means that i_in.nia this cycle is 4 more than
 872         # last cycle.  If we read more than 32 bits at a time, had a
 873         # cache hit last cycle, and we don't want the first 32-bit chunk
 874         # then we can keep the data we read last cycle and just use that.
 875         with m.If(i_in.nia[2:INSN_BITS+2] != 0):
 876             comb += use_previous.eq(i_in.sequential & r.hit_valid)
 877
 878 #       -- Extract line, row and tag from request
 879 #         req_index <= get_index(i_in.nia);
 880 #         req_row <= get_row(i_in.nia);
 881 #         req_tag <= get_tag(real_addr);
 882         # Extract line, row and tag from request
 883         comb += req_index.eq(get_index(i_in.nia))
 884         comb += req_row.eq(get_row(i_in.nia))
 885         comb += req_tag.eq(get_tag(real_addr))
 886
 887 #       -- Calculate address of beginning of cache row, will be
 888 #       -- used for cache miss processing if needed
 889 #       req_laddr <=
 890 #        (63 downto REAL_ADDR_BITS => '0') &
 891 #        real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
 892 #        (ROW_OFF_BITS-1 downto 0 => '0');
 893         # Calculate address of beginning of cache row, will be
 894         # used for cache miss processing if needed
 895         comb += req_laddr.eq(Cat(
 896                  Const(0b0, ROW_OFF_BITS),
 897                  real_addr[ROW_OFF_BITS:REAL_ADDR_BITS],
 898                  Const(0b0, 8)
 899                 ))
 900
 901 #       -- Test if pending request is a hit on any way
 902 #       hit_way := 0;
 903 #       is_hit := '0';
 904 #       for i in way_t loop
 905 #           if i_in.req = '1' and
 906 #                 (cache_valids(req_index)(i) = '1' or
 907 #                  (r.state = WAIT_ACK and
 908 #                   req_index = r.store_index and
 909 #                   i = r.store_way and
 910 #                   r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
 911 #               if read_tag(i, cache_tags(req_index)) = req_tag then
 912 #                   hit_way := i;
 913 #                   is_hit := '1';
 914 #               end if;
 915 #           end if;
 916 #       end loop;
 917         # Test if pending request is a hit on any way
 918         hitcond = Signal()
 919         comb += hitcond.eq((r.state == State.WAIT_ACK)
 920                     & (req_index == r.store_index)
 921                     & r.rows_valid[req_row % ROW_PER_LINE])
 922         with m.If(i_in.req):
 923             cvb = Signal(NUM_WAYS)
 924             ctag = Signal(TAG_RAM_WIDTH)
 925             comb += ctag.eq(cache_tags[req_index])
 926             comb += cvb.eq(cache_valid_bits[req_index])
 927             for i in range(NUM_WAYS):
 928                 tagi = Signal(TAG_BITS, name="ti%d" % i)
 929                 comb += tagi.eq(read_tag(i, ctag))
 930                 hit_test = Signal(name="hit_test%d" % i)
 931                 comb += hit_test.eq(i == r.store_way)
 932                 with m.If((cvb[i] | (hitcond & hit_test)) & (tagi == req_tag)):
 933                     comb += hit_way.eq(i)
 934                     comb += is_hit.eq(1)
 935
 936 #       -- Generate the "hit" and "miss" signals
 937 #       -- for the synchronous blocks
 938 #       if i_in.req = '1' and access_ok = '1' and flush_in = '0'
 939 #        and rst = '0' then
 940 #           req_is_hit  <= is_hit;
 941 #           req_is_miss <= not is_hit;
 942 #       else
 943 #           req_is_hit  <= '0';
 944 #           req_is_miss <= '0';
 945 #       end if;
 946 #       req_hit_way <= hit_way;
 947         # Generate the "hit" and "miss" signals
 948         # for the synchronous blocks
 949         with m.If(i_in.req & access_ok & ~flush_in):
 950             comb += req_is_hit.eq(is_hit)
 951             comb += req_is_miss.eq(~is_hit)
 952
 953         with m.Else():
 954             comb += req_is_hit.eq(0)
 955             comb += req_is_miss.eq(0)
 956
 957 #       -- The way to replace on a miss
 958 #       if r.state = CLR_TAG then
 959 #           replace_way <=
 960 #            to_integer(unsigned(plru_victim(r.store_index)));
 961 #       else
 962 #           replace_way <= r.store_way;
 963 #       end if;
 964         # The way to replace on a miss
 965         with m.If(r.state == State.CLR_TAG):
 966             comb += replace_way.eq(plru_victim[r.store_index])
 967
 968         with m.Else():
 969             comb += replace_way.eq(r.store_way)
 970
 971 #       -- Output instruction from current cache row
 972 #       --
 973 #       -- Note: This is a mild violation of our design principle of
 974 #       -- having pipeline stages output from a clean latch. In this
 975 #       -- case we output the result of a mux. The alternative would
 976 #       -- be output an entire row which I prefer not to do just yet
 977 #       -- as it would force fetch2 to know about some of the cache
 978 #       -- geometry information.
 979 #       i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
 980 #       i_out.valid <= r.hit_valid;
 981 #       i_out.nia <= r.hit_nia;
 982 #       i_out.stop_mark <= r.hit_smark;
 983 #       i_out.fetch_failed <= r.fetch_failed;
 984         # Output instruction from current cache row
 985         #
 986         # Note: This is a mild violation of our design principle of
 987         # having pipeline stages output from a clean latch. In this
 988         # case we output the result of a mux. The alternative would
 989         # be output an entire row which I prefer not to do just yet
 990         # as it would force fetch2 to know about some of the cache
 991         # geometry information.
 992         #comb += Display("BEFORE read_insn_word - r.hit_nia:%x " \
 993         #                "r.hit_way:%x, cache_out[r.hit_way]:%x", r.hit_nia, \
 994         #                r.hit_way, cache_out[r.hit_way])
 995         comb += i_out.insn.eq(read_insn_word(r.hit_nia, cache_out_row))
 996         comb += i_out.valid.eq(r.hit_valid)
 997         comb += i_out.nia.eq(r.hit_nia)
 998         comb += i_out.stop_mark.eq(r.hit_smark)
 999         comb += i_out.fetch_failed.eq(r.fetch_failed)
1000
1001 #       -- Stall fetch1 if we have a miss on cache or TLB
1002 #       -- or a protection fault
1003 #       stall_out <= not (is_hit and access_ok);
1004         # Stall fetch1 if we have a miss on cache or TLB
1005         # or a protection fault
1006         comb += stall_out.eq(~(is_hit & access_ok))
1007
1008 #       -- Wishbone requests output (from the cache miss reload machine)
1009 #       wishbone_out <= r.wb;
1010         # Wishbone requests output (from the cache miss reload machine)
1011         comb += wb_out.eq(r.wb)
1012 #     end process;
1013
1014 #     -- Cache hit synchronous machine
1015 #     icache_hit : process(clk)
1016     # Cache hit synchronous machine
1017     def icache_hit(self, m, use_previous, r, req_is_hit, req_hit_way,
1018                    req_index, req_tag, real_addr):
1019         sync = m.d.sync
1020
1021         i_in, stall_in = self.i_in, self.stall_in
1022         flush_in       = self.flush_in
1023
1024 #     begin
1025 #         if rising_edge(clk) then
1026 #             -- keep outputs to fetch2 unchanged on a stall
1027 #             -- except that flush or reset sets valid to 0
1028 #             -- If use_previous, keep the same data as last
1029 #             -- cycle and use the second half
1030 #             if stall_in = '1' or use_previous = '1' then
1031 #                 if rst = '1' or flush_in = '1' then
1032 #                     r.hit_valid <= '0';
1033 #             end if;
1034         # keep outputs to fetch2 unchanged on a stall
1035         # except that flush or reset sets valid to 0
1036         # If use_previous, keep the same data as last
1037         # cycle and use the second half
1038         with m.If(stall_in | use_previous):
1039             with m.If(flush_in):
1040                 sync += r.hit_valid.eq(0)
1041 #             else
1042 #                 -- On a hit, latch the request for the next cycle,
1043 #                 -- when the BRAM data will be available on the
1044 #                 -- cache_out output of the corresponding way
1045 #                 r.hit_valid <= req_is_hit;
1046 #                 if req_is_hit = '1' then
1047 #                     r.hit_way <= req_hit_way;
1048         with m.Else():
1049             # On a hit, latch the request for the next cycle,
1050             # when the BRAM data will be available on the
1051             # cache_out output of the corresponding way
1052             sync += r.hit_valid.eq(req_is_hit)
1053
1054             with m.If(req_is_hit):
1055                 sync += r.hit_way.eq(req_hit_way)
1056
1057 #                     report "cache hit nia:" & to_hstring(i_in.nia) &
1058 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1059 #                         " SM:" & std_ulogic'image(i_in.stop_mark) &
1060 #                         " idx:" & integer'image(req_index) &
1061 #                         " tag:" & to_hstring(req_tag) &
1062 #                         " way:" & integer'image(req_hit_way) &
1063 #                         " RA:" & to_hstring(real_addr);
1064                 sync += Display("cache hit nia:%x IR:%x SM:%x idx:%x " \
1065                                 "tag:%x way:%x RA:%x", i_in.nia, \
1066                                 i_in.virt_mode, i_in.stop_mark, req_index, \
1067                                 req_tag, req_hit_way, real_addr)
1068
1069
1070
1071 #                 end if;
1072 #           end if;
1073 #             if stall_in = '0' then
1074 #                 -- Send stop marks and NIA down regardless of validity
1075 #                 r.hit_smark <= i_in.stop_mark;
1076 #                 r.hit_nia <= i_in.nia;
1077 #             end if;
1078         with m.If(~stall_in):
1079             # Send stop marks and NIA down regardless of validity
1080             sync += r.hit_smark.eq(i_in.stop_mark)
1081             sync += r.hit_nia.eq(i_in.nia)
1082 #       end if;
1083 #     end process;
1084
1085 #     -- Cache miss/reload synchronous machine
1086 #     icache_miss : process(clk)
1087     # Cache miss/reload synchronous machine
1088     def icache_miss(self, m, cache_valid_bits, r, req_is_miss,
1089                     req_index, req_laddr, req_tag, replace_way,
1090                     cache_tags, access_ok, real_addr):
1091         comb = m.d.comb
1092         sync = m.d.sync
1093
1094         i_in, wb_in, m_in  = self.i_in, self.wb_in, self.m_in
1095         stall_in, flush_in = self.stall_in, self.flush_in
1096         inval_in           = self.inval_in
1097
1098 #       variable tagset    : cache_tags_set_t;
1099 #       variable stbs_done : boolean;
1100
1101         tagset    = Signal(TAG_RAM_WIDTH)
1102         stbs_done = Signal()
1103
1104 #     begin
1105 #         if rising_edge(clk) then
1106 #           -- On reset, clear all valid bits to force misses
1107 #             if rst = '1' then
1108         # On reset, clear all valid bits to force misses
1109 #               for i in index_t loop
1110 #                   cache_valids(i) <= (others => '0');
1111 #               end loop;
1112 #                 r.state <= IDLE;
1113 #                 r.wb.cyc <= '0';
1114 #                 r.wb.stb <= '0';
1115 #               -- We only ever do reads on wishbone
1116 #               r.wb.dat <= (others => '0');
1117 #               r.wb.sel <= "11111111";
1118 #               r.wb.we  <= '0';
1119
1120 #               -- Not useful normally but helps avoiding
1121 #               -- tons of sim warnings
1122 #               r.wb.adr <= (others => '0');
1123
1124 #             else
1125
1126 #                 -- Process cache invalidations
1127 #                 if inval_in = '1' then
1128 #                     for i in index_t loop
1129 #                         cache_valids(i) <= (others => '0');
1130 #                     end loop;
1131 #                     r.store_valid <= '0';
1132 #                 end if;
1133         comb += r.wb.sel.eq(-1)
1134         comb += r.wb.adr.eq(r.req_adr[3:])
1135
1136         # Process cache invalidations
1137         with m.If(inval_in):
1138             for i in range(NUM_LINES):
1139                 sync += cache_valid_bits[i].eq(0)
1140             sync += r.store_valid.eq(0)
1141
1142 #               -- Main state machine
1143 #               case r.state is
1144         # Main state machine
1145         with m.Switch(r.state):
1146
1147 #           when IDLE =>
1148             with m.Case(State.IDLE):
1149 #                 -- Reset per-row valid flags,
1150 #                 -- only used in WAIT_ACK
1151 #                 for i in 0 to ROW_PER_LINE - 1 loop
1152 #                     r.rows_valid(i) <= '0';
1153 #                 end loop;
1154                 # Reset per-row valid flags,
1155                 # only used in WAIT_ACK
1156                 for i in range(ROW_PER_LINE):
1157                     sync += r.rows_valid[i].eq(0)
1158
1159 #               -- We need to read a cache line
1160 #               if req_is_miss = '1' then
1161 #               report "cache miss nia:" & to_hstring(i_in.nia) &
1162 #                         " IR:" & std_ulogic'image(i_in.virt_mode) &
1163 #                   " SM:" & std_ulogic'image(i_in.stop_mark) &
1164 #                   " idx:" & integer'image(req_index) &
1165 #                   " way:" & integer'image(replace_way) &
1166 #                   " tag:" & to_hstring(req_tag) &
1167 #                         " RA:" & to_hstring(real_addr);
1168                 # We need to read a cache line
1169                 with m.If(req_is_miss):
1170                     sync += Display(
1171                              "cache miss nia:%x IR:%x SM:%x idx:%x " \
1172                              " way:%x tag:%x RA:%x", i_in.nia, \
1173                              i_in.virt_mode, i_in.stop_mark, req_index, \
1174                              replace_way, req_tag, real_addr)
1175
1176 #               -- Keep track of our index and way for
1177 #                   -- subsequent stores
1178 #               r.store_index <= req_index;
1179 #               r.store_row <= get_row(req_laddr);
1180 #                   r.store_tag <= req_tag;
1181 #                   r.store_valid <= '1';
1182 #                   r.end_row_ix <=
1183 #                    get_row_of_line(get_row(req_laddr)) - 1;
1184                     # Keep track of our index and way
1185                     # for subsequent stores
1186                     sync += r.store_index.eq(req_index)
1187                     sync += r.store_row.eq(get_row(req_laddr))
1188                     sync += r.store_tag.eq(req_tag)
1189                     sync += r.store_valid.eq(1)
1190                     sync += r.end_row_ix.eq(
1191                              get_row_of_line(
1192                               get_row(req_laddr)
1193                              ) - 1
1194                             )
1195
1196 #               -- Prep for first wishbone read. We calculate the
1197 #                   -- address of the start of the cache line and
1198 #                   -- start the WB cycle.
1199 #               r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
1200 #               r.wb.cyc <= '1';
1201 #               r.wb.stb <= '1';
1202                     # Prep for first wishbone read.
1203                     # We calculate the
1204                     # address of the start of the cache line and
1205                     # start the WB cycle.
1206                     sync += r.req_adr.eq(req_laddr)
1207                     sync += r.wb.cyc.eq(1)
1208                     sync += r.wb.stb.eq(1)
1209
1210 #               -- Track that we had one request sent
1211 #               r.state <= CLR_TAG;
1212                     # Track that we had one request sent
1213                     sync += r.state.eq(State.CLR_TAG)
1214 #               end if;
1215
1216 #           when CLR_TAG | WAIT_ACK =>
1217             with m.Case(State.CLR_TAG, State.WAIT_ACK):
1218 #                 if r.state = CLR_TAG then
1219                 with m.If(r.state == State.CLR_TAG):
1220 #                     -- Get victim way from plru
1221 #               r.store_way <= replace_way;
1222                     # Get victim way from plru
1223                     sync += r.store_way.eq(replace_way)
1224 #
1225 #               -- Force misses on that way while
1226 #                   -- reloading that line
1227 #               cache_valids(req_index)(replace_way) <= '0';
1228                     # Force misses on that way while
1229                     # realoading that line
1230                     cv = Signal(INDEX_BITS)
1231                     comb += cv.eq(cache_valid_bits[req_index])
1232                     comb += cv.bit_select(replace_way, 1).eq(0)
1233                     sync += cache_valid_bits[req_index].eq(cv)
1234
1235 #               -- Store new tag in selected way
1236 #               for i in 0 to NUM_WAYS-1 loop
1237 #                   if i = replace_way then
1238 #                       tagset := cache_tags(r.store_index);
1239 #                       write_tag(i, tagset, r.store_tag);
1240 #                       cache_tags(r.store_index) <= tagset;
1241 #                   end if;
1242 #               end loop;
1243                     for i in range(NUM_WAYS):
1244                         with m.If(i == replace_way):
1245                             comb += tagset.eq(cache_tags[r.store_index])
1246                             comb += write_tag(i, tagset, r.store_tag)
1247                             sync += cache_tags[r.store_index].eq(tagset)
1248
1249 #                     r.state <= WAIT_ACK;
1250                     sync += r.state.eq(State.WAIT_ACK)
1251 #                 end if;
1252
1253 #               -- Requests are all sent if stb is 0
1254 #               stbs_done := r.wb.stb = '0';
1255                 # Requests are all sent if stb is 0
1256                 stbs_zero = Signal()
1257                 comb += stbs_zero.eq(r.wb.stb == 0)
1258                 comb += stbs_done.eq(stbs_zero)
1259
1260 #               -- If we are still sending requests,
1261 #               -- was one accepted ?
1262 #               if wishbone_in.stall = '0' and not stbs_done then
1263                 # If we are still sending requests,
1264                 # was one accepted?
1265                 with m.If(~wb_in.stall & ~stbs_zero):
1266 #               -- That was the last word ? We are done sending.
1267 #                   -- Clear stb and set stbs_done so we can handle
1268 #                   -- an eventual last ack on the same cycle.
1269 #               if is_last_row_addr(r.wb.adr, r.end_row_ix) then
1270 #                   r.wb.stb <= '0';
1271 #                   stbs_done := true;
1272 #               end if;
1273                     # That was the last word ?
1274                     # We are done sending.
1275                     # Clear stb and set stbs_done
1276                     # so we can handle
1277                     # an eventual last ack on
1278                     # the same cycle.
1279                     with m.If(is_last_row_addr(r.req_adr, r.end_row_ix)):
1280                         sync += Display("IS_LAST_ROW_ADDR " \
1281                                         "r.wb.addr:%x r.end_row_ix:%x " \
1282                                         "r.wb.stb:%x stbs_zero:%x " \
1283                                         "stbs_done:%x", r.wb.adr, \
1284                                         r.end_row_ix, r.wb.stb, \
1285                                         stbs_zero, stbs_done)
1286                         sync += r.wb.stb.eq(0)
1287                         comb += stbs_done.eq(1)
1288
1289 #               -- Calculate the next row address
1290 #               r.wb.adr <= next_row_addr(r.wb.adr);
1291                     # Calculate the next row address
1292                     rarange = Signal(LINE_OFF_BITS - ROW_OFF_BITS)
1293                     comb += rarange.eq(
1294                              r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS] + 1
1295                             )
1296                     sync += r.req_adr[ROW_OFF_BITS:LINE_OFF_BITS].eq(
1297                              rarange
1298                             )
1299                     sync += Display("RARANGE r.wb.adr:%x stbs_zero:%x " \
1300                                     "stbs_done:%x", rarange, stbs_zero, \
1301                                     stbs_done)
1302 #               end if;
1303
1304 #               -- Incoming acks processing
1305 #               if wishbone_in.ack = '1' then
1306                 # Incoming acks processing
1307                 with m.If(wb_in.ack):
1308 #                     r.rows_valid(r.store_row mod ROW_PER_LINE)
1309 #                      <= '1';
1310                     sync += Display("WB_IN_ACK stbs_zero:%x " \
1311                                     "stbs_done:%x", \
1312                                     stbs_zero, stbs_done)
1313
1314                     sync += r.rows_valid[r.store_row % ROW_PER_LINE].eq(1)
1315
1316 #               -- Check for completion
1317 #               if stbs_done and
1318 #                    is_last_row(r.store_row, r.end_row_ix) then
1319                     # Check for completion
1320                     with m.If(stbs_done &
1321                               is_last_row(r.store_row, r.end_row_ix)):
1322 #                   -- Complete wishbone cycle
1323 #                   r.wb.cyc <= '0';
1324                         # Complete wishbone cycle
1325                         sync += r.wb.cyc.eq(0)
1326
1327 #                   -- Cache line is now valid
1328 #                   cache_valids(r.store_index)(replace_way) <=
1329 #                        r.store_valid and not inval_in;
1330                         # Cache line is now valid
1331                         cv = Signal(INDEX_BITS)
1332                         comb += cv.eq(cache_valid_bits[r.store_index])
1333                         comb += cv.bit_select(replace_way, 1).eq(
1334                                  r.store_valid & ~inval_in
1335                                 )
1336                         sync += cache_valid_bits[r.store_index].eq(cv)
1337
1338 #                   -- We are done
1339 #                   r.state <= IDLE;
1340                         # We are done
1341                         sync += r.state.eq(State.IDLE)
1342 #               end if;
1343
1344 #               -- Increment store row counter
1345 #               r.store_row <= next_row(r.store_row);
1346                     # Increment store row counter
1347                     sync += r.store_row.eq(next_row(r.store_row))
1348 #               end if;
1349 #           end case;
1350 #       end if;
1351 #
1352 #             -- TLB miss and protection fault processing
1353 #             if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
1354 #                 r.fetch_failed <= '0';
1355 #             elsif i_in.req = '1' and access_ok = '0' and
1356 #              stall_in = '0' then
1357 #                 r.fetch_failed <= '1';
1358 #             end if;
1359         # TLB miss and protection fault processing
1360         with m.If(flush_in | m_in.tlbld):
1361             sync += r.fetch_failed.eq(0)
1362
1363         with m.Elif(i_in.req & ~access_ok & ~stall_in):
1364             sync += r.fetch_failed.eq(1)
1365 #       end if;
1366 #     end process;
1367
1368 #     icache_log: if LOG_LENGTH > 0 generate
1369     def icache_log(self, m, req_hit_way, ra_valid, access_ok,
1370                    req_is_miss, req_is_hit, lway, wstate, r):
1371         comb = m.d.comb
1372         sync = m.d.sync
1373
1374         wb_in, i_out       = self.wb_in, self.i_out
1375         log_out, stall_out = self.log_out, self.stall_out
1376
1377 #         -- Output data to logger
1378 #         signal log_data    : std_ulogic_vector(53 downto 0);
1379 #     begin
1380 #         data_log: process(clk)
1381 #             variable lway: way_t;
1382 #             variable wstate: std_ulogic;
1383         # Output data to logger
1384         for i in range(LOG_LENGTH):
1385             # Output data to logger
1386             log_data = Signal(54)
1387             lway     = Signal(NUM_WAYS)
1388             wstate   = Signal()
1389
1390 #         begin
1391 #             if rising_edge(clk) then
1392 #                 lway := req_hit_way;
1393 #                 wstate := '0';
1394             sync += lway.eq(req_hit_way)
1395             sync += wstate.eq(0)
1396
1397 #                 if r.state /= IDLE then
1398 #                     wstate := '1';
1399 #                 end if;
1400             with m.If(r.state != State.IDLE):
1401                 sync += wstate.eq(1)
1402
1403 #                 log_data <= i_out.valid &
1404 #                             i_out.insn &
1405 #                             wishbone_in.ack &
1406 #                             r.wb.adr(5 downto 3) &
1407 #                             r.wb.stb & r.wb.cyc &
1408 #                             wishbone_in.stall &
1409 #                             stall_out &
1410 #                             r.fetch_failed &
1411 #                             r.hit_nia(5 downto 2) &
1412 #                             wstate &
1413 #                             std_ulogic_vector(to_unsigned(lway, 3)) &
1414 #                             req_is_hit & req_is_miss &
1415 #                             access_ok &
1416 #                             ra_valid;
1417             sync += log_data.eq(Cat(
1418                      ra_valid, access_ok, req_is_miss, req_is_hit,
1419                      lway, wstate, r.hit_nia[2:6],
1420                      r.fetch_failed, stall_out, wb_in.stall, r.wb.cyc,
1421                      r.wb.stb, r.wb.adr[3:6], wb_in.ack, i_out.insn,
1422                      i_out.valid
1423                     ))
1424 #             end if;
1425 #         end process;
1426 #         log_out <= log_data;
1427             comb += log_out.eq(log_data)
1428 #     end generate;
1429 # end;
1430
1431     def elaborate(self, platform):
1432
1433         m                = Module()
1434         comb             = m.d.comb
1435
1436         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1437         cache_tags       = CacheTagArray()
1438         cache_valid_bits = CacheValidBitsArray()
1439
1440 #     signal itlb_valids : tlb_valids_t;
1441 #     signal itlb_tags : tlb_tags_t;
1442 #     signal itlb_ptes : tlb_ptes_t;
1443 #     attribute ram_style of itlb_tags : signal is "distributed";
1444 #     attribute ram_style of itlb_ptes : signal is "distributed";
1445         itlb_valid_bits  = TLBValidBitsArray()
1446         itlb_tags        = TLBTagArray()
1447         itlb_ptes        = TLBPtesArray()
1448         # TODO to be passed to nmigen as ram attributes
1449         # attribute ram_style of itlb_tags : signal is "distributed";
1450         # attribute ram_style of itlb_ptes : signal is "distributed";
1451
1452 #     -- Privilege bit from PTE EAA field
1453 #     signal eaa_priv  : std_ulogic;
1454         # Privilege bit from PTE EAA field
1455         eaa_priv         = Signal()
1456
1457 #     signal r : reg_internal_t;
1458         r                = RegInternal()
1459
1460 #     -- Async signals on incoming request
1461 #     signal req_index   : index_t;
1462 #     signal req_row     : row_t;
1463 #     signal req_hit_way : way_t;
1464 #     signal req_tag     : cache_tag_t;
1465 #     signal req_is_hit  : std_ulogic;
1466 #     signal req_is_miss : std_ulogic;
1467 #     signal req_laddr   : std_ulogic_vector(63 downto 0);
1468         # Async signal on incoming request
1469         req_index        = Signal(NUM_LINES)
1470         req_row          = Signal(BRAM_ROWS)
1471         req_hit_way      = Signal(NUM_WAYS)
1472         req_tag          = Signal(TAG_BITS)
1473         req_is_hit       = Signal()
1474         req_is_miss      = Signal()
1475         req_laddr        = Signal(64)
1476
1477 #     signal tlb_req_index : tlb_index_t;
1478 #     signal real_addr     : std_ulogic_vector(
1479 #                             REAL_ADDR_BITS - 1 downto 0
1480 #                            );
1481 #     signal ra_valid      : std_ulogic;
1482 #     signal priv_fault    : std_ulogic;
1483 #     signal access_ok     : std_ulogic;
1484 #     signal use_previous  : std_ulogic;
1485         tlb_req_index    = Signal(TLB_SIZE)
1486         real_addr        = Signal(REAL_ADDR_BITS)
1487         ra_valid         = Signal()
1488         priv_fault       = Signal()
1489         access_ok        = Signal()
1490         use_previous     = Signal()
1491
1492 #     signal cache_out   : cache_ram_out_t;
1493         cache_out_row    = Signal(ROW_SIZE_BITS)
1494
1495 #     signal plru_victim : plru_out_t;
1496 #     signal replace_way : way_t;
1497         plru_victim      = PLRUOut()
1498         replace_way      = Signal(NUM_WAYS)
1499
1500         # call sub-functions putting everything together, using shared
1501         # signals established above
1502         self.rams(m, r, cache_out_row, use_previous, replace_way, req_row)
1503         self.maybe_plrus(m, r, plru_victim)
1504         self.itlb_lookup(m, tlb_req_index, itlb_ptes, itlb_tags,
1505                          real_addr, itlb_valid_bits, ra_valid, eaa_priv,
1506                          priv_fault, access_ok)
1507         self.itlb_update(m, itlb_valid_bits, itlb_tags, itlb_ptes)
1508         self.icache_comb(m, use_previous, r, req_index, req_row,
1509                          req_tag, real_addr, req_laddr, cache_valid_bits,
1510                          cache_tags, access_ok, req_is_hit, req_is_miss,
1511                          replace_way, plru_victim, cache_out_row)
1512         self.icache_hit(m, use_previous, r, req_is_hit, req_hit_way,
1513                         req_index, req_tag, real_addr)
1514         self.icache_miss(m, cache_valid_bits, r, req_is_miss, req_index,
1515                          req_laddr, req_tag, replace_way, cache_tags,
1516                          access_ok, real_addr)
1517         #self.icache_log(m, log_out, req_hit_way, ra_valid, access_ok,
1518         #                req_is_miss, req_is_hit, lway, wstate, r)
1519
1520         return m
1521
1522
1523 # icache_tb.vhdl
1524 #
1525 # library ieee;
1526 # use ieee.std_logic_1164.all;
1527 #
1528 # library work;
1529 # use work.common.all;
1530 # use work.wishbone_types.all;
1531 #
1532 # entity icache_tb is
1533 # end icache_tb;
1534 #
1535 # architecture behave of icache_tb is
1536 #     signal clk          : std_ulogic;
1537 #     signal rst          : std_ulogic;
1538 #
1539 #     signal i_out        : Fetch1ToIcacheType;
1540 #     signal i_in         : IcacheToDecode1Type;
1541 #
1542 #     signal m_out        : MmuToIcacheType;
1543 #
1544 #     signal wb_bram_in   : wishbone_master_out;
1545 #     signal wb_bram_out  : wishbone_slave_out;
1546 #
1547 #     constant clk_period : time := 10 ns;
1548 # begin
1549 #     icache0: entity work.icache
1550 #         generic map(
1551 #             LINE_SIZE => 64,
1552 #             NUM_LINES => 4
1553 #             )
1554 #         port map(
1555 #             clk => clk,
1556 #             rst => rst,
1557 #             i_in => i_out,
1558 #             i_out => i_in,
1559 #             m_in => m_out,
1560 #             stall_in => '0',
1561 #           flush_in => '0',
1562 #             inval_in => '0',
1563 #             wishbone_out => wb_bram_in,
1564 #             wishbone_in => wb_bram_out
1565 #             );
1566 #
1567 #     -- BRAM Memory slave
1568 #     bram0: entity work.wishbone_bram_wrapper
1569 #         generic map(
1570 #             MEMORY_SIZE   => 1024,
1571 #             RAM_INIT_FILE => "icache_test.bin"
1572 #             )
1573 #         port map(
1574 #             clk => clk,
1575 #             rst => rst,
1576 #             wishbone_in => wb_bram_in,
1577 #             wishbone_out => wb_bram_out
1578 #             );
1579 #
1580 #     clk_process: process
1581 #     begin
1582 #         clk <= '0';
1583 #         wait for clk_period/2;
1584 #         clk <= '1';
1585 #         wait for clk_period/2;
1586 #     end process;
1587 #
1588 #     rst_process: process
1589 #     begin
1590 #         rst <= '1';
1591 #         wait for 2*clk_period;
1592 #         rst <= '0';
1593 #         wait;
1594 #     end process;
1595 #
1596 #     stim: process
1597 #     begin
1598 #         i_out.req <= '0';
1599 #         i_out.nia <= (others => '0');
1600 #       i_out.stop_mark <= '0';
1601 #
1602 #         m_out.tlbld <= '0';
1603 #         m_out.tlbie <= '0';
1604 #         m_out.addr <= (others => '0');
1605 #         m_out.pte <= (others => '0');
1606 #
1607 #         wait until rising_edge(clk);
1608 #         wait until rising_edge(clk);
1609 #         wait until rising_edge(clk);
1610 #         wait until rising_edge(clk);
1611 #
1612 #         i_out.req <= '1';
1613 #         i_out.nia <= x"0000000000000004";
1614 #
1615 #         wait for 30*clk_period;
1616 #         wait until rising_edge(clk);
1617 #
1618 #         assert i_in.valid = '1' severity failure;
1619 #         assert i_in.insn = x"00000001"
1620 #           report "insn @" & to_hstring(i_out.nia) &
1621 #           "=" & to_hstring(i_in.insn) &
1622 #           " expected 00000001"
1623 #           severity failure;
1624 #
1625 #         i_out.req <= '0';
1626 #
1627 #         wait until rising_edge(clk);
1628 #
1629 #         -- hit
1630 #         i_out.req <= '1';
1631 #         i_out.nia <= x"0000000000000008";
1632 #         wait until rising_edge(clk);
1633 #         wait until rising_edge(clk);
1634 #         assert i_in.valid = '1' severity failure;
1635 #         assert i_in.insn = x"00000002"
1636 #           report "insn @" & to_hstring(i_out.nia) &
1637 #           "=" & to_hstring(i_in.insn) &
1638 #           " expected 00000002"
1639 #           severity failure;
1640 #         wait until rising_edge(clk);
1641 #
1642 #         -- another miss
1643 #         i_out.req <= '1';
1644 #         i_out.nia <= x"0000000000000040";
1645 #
1646 #         wait for 30*clk_period;
1647 #         wait until rising_edge(clk);
1648 #
1649 #         assert i_in.valid = '1' severity failure;
1650 #         assert i_in.insn = x"00000010"
1651 #           report "insn @" & to_hstring(i_out.nia) &
1652 #           "=" & to_hstring(i_in.insn) &
1653 #           " expected 00000010"
1654 #           severity failure;
1655 #
1656 #         -- test something that aliases
1657 #         i_out.req <= '1';
1658 #         i_out.nia <= x"0000000000000100";
1659 #         wait until rising_edge(clk);
1660 #         wait until rising_edge(clk);
1661 #         assert i_in.valid = '0' severity failure;
1662 #         wait until rising_edge(clk);
1663 #
1664 #         wait for 30*clk_period;
1665 #         wait until rising_edge(clk);
1666 #
1667 #         assert i_in.valid = '1' severity failure;
1668 #         assert i_in.insn = x"00000040"
1669 #           report "insn @" & to_hstring(i_out.nia) &
1670 #           "=" & to_hstring(i_in.insn) &
1671 #           " expected 00000040"
1672 #           severity failure;
1673 #
1674 #         i_out.req <= '0';
1675 #
1676 #         std.env.finish;
1677 #     end process;
1678 # end;
1679 def icache_sim(dut):
1680     i_out = dut.i_in
1681     i_in  = dut.i_out
1682     m_out = dut.m_in
1683
1684     yield i_in.valid.eq(0)
1685     yield i_out.priv_mode.eq(1)
1686     yield i_out.req.eq(0)
1687     yield i_out.nia.eq(0)
1688     yield i_out.stop_mark.eq(0)
1689     yield m_out.tlbld.eq(0)
1690     yield m_out.tlbie.eq(0)
1691     yield m_out.addr.eq(0)
1692     yield m_out.pte.eq(0)
1693     yield
1694     yield
1695     yield
1696     yield
1697     yield i_out.req.eq(1)
1698     yield i_out.nia.eq(Const(0x0000000000000004, 64))
1699     for i in range(30):
1700         yield
1701     yield
1702     valid = yield i_in.valid
1703     nia   = yield i_out.nia
1704     insn  = yield i_in.insn
1705     print(f"valid? {valid}")
1706     assert valid
1707     assert insn == 0x00000001, \
1708         "insn @%x=%x expected 00000001" % (nia, insn)
1709     yield i_out.req.eq(0)
1710     yield
1711
1712     # hit
1713     yield i_out.req.eq(1)
1714     yield i_out.nia.eq(Const(0x0000000000000008, 64))
1715     yield
1716     yield
1717     valid = yield i_in.valid
1718     nia   = yield i_in.nia
1719     insn  = yield i_in.insn
1720     assert valid
1721     assert insn == 0x00000002, \
1722         "insn @%x=%x expected 00000002" % (nia, insn)
1723     yield
1724
1725     # another miss
1726     yield i_out.req.eq(1)
1727     yield i_out.nia.eq(Const(0x0000000000000040, 64))
1728     for i in range(30):
1729         yield
1730     yield
1731     valid = yield i_in.valid
1732     nia   = yield i_out.nia
1733     insn  = yield i_in.insn
1734     assert valid
1735     assert insn == 0x00000010, \
1736         "insn @%x=%x expected 00000010" % (nia, insn)
1737
1738     # test something that aliases
1739     yield i_out.req.eq(1)
1740     yield i_out.nia.eq(Const(0x0000000000000100, 64))
1741     yield
1742     yield
1743     valid = yield i_in.valid
1744     assert ~valid
1745     for i in range(30):
1746         yield
1747     yield
1748     insn  = yield i_in.insn
1749     valid = yield i_in.valid
1750     insn  = yield i_in.insn
1751     assert valid
1752     assert insn == 0x00000040, \
1753          "insn @%x=%x expected 00000040" % (nia, insn)
1754     yield i_out.req.eq(0)
1755
1756
1757
1758 def test_icache(mem):
1759      dut    = ICache()
1760
1761      memory = Memory(width=64, depth=16*64, init=mem)
1762      sram   = SRAM(memory=memory, granularity=8)
1763
1764      m      = Module()
1765
1766      m.submodules.icache = dut
1767      m.submodules.sram   = sram
1768
1769      m.d.comb += sram.bus.cyc.eq(dut.wb_out.cyc)
1770      m.d.comb += sram.bus.stb.eq(dut.wb_out.stb)
1771      m.d.comb += sram.bus.we.eq(dut.wb_out.we)
1772      m.d.comb += sram.bus.sel.eq(dut.wb_out.sel)
1773      m.d.comb += sram.bus.adr.eq(dut.wb_out.adr)
1774      m.d.comb += sram.bus.dat_w.eq(dut.wb_out.dat)
1775
1776      m.d.comb += dut.wb_in.ack.eq(sram.bus.ack)
1777      m.d.comb += dut.wb_in.dat.eq(sram.bus.dat_r)
1778
1779      # nmigen Simulation
1780      sim = Simulator(m)
1781      sim.add_clock(1e-6)
1782
1783      sim.add_sync_process(wrap(icache_sim(dut)))
1784      with sim.write_vcd('test_icache.vcd'):
1785          sim.run()
1786
1787 if __name__ == '__main__':
1788     dut = ICache()
1789     vl = rtlil.convert(dut, ports=[])
1790     with open("test_icache.il", "w") as f:
1791         f.write(vl)
1792
1793     mem = []
1794     for i in range(512):
1795         mem.append((i*2)| ((i*2+1)<<32))
1796
1797     test_icache(mem)
1798