src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 note that the microwatt dcache wishbone interface expects "stall".
   6 for simplicity at the moment this is hard-coded to cyc & ~ack.
   7 see WB4 spec, p84, section 5.2.1
   8
   9 IMPORTANT: for store, the data is sampled the cycle AFTER the "valid"
  10 is raised.  sigh
  11
  12 Links:
  13
  14 * https://libre-soc.org/3d_gpu/architecture/set_associative_cache.jpg
  15 * https://bugs.libre-soc.org/show_bug.cgi?id=469
  16
  17 """
  18
  19 import sys
  20
  21 from nmutil.gtkw import write_gtkw
  22
  23 sys.setrecursionlimit(1000000)
  24
  25 from enum import Enum, unique
  26
  27 from nmigen import (Module, Signal, Elaboratable, Cat, Repl, Array, Const,
  28                     Record)
  29 from nmutil.util import Display
  30
  31 from copy import deepcopy
  32 from random import randint, seed
  33
  34 from nmigen_soc.wishbone.bus import Interface
  35
  36 from nmigen.cli import main
  37 from nmutil.iocontrol import RecordObject
  38 from nmigen.utils import log2_int
  39 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
  40                                      DCacheToLoadStore1Type,
  41                                      MMUToDCacheType,
  42                                      DCacheToMMUType)
  43
  44 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  45                                 WBAddrType, WBDataType, WBSelType,
  46                                 WBMasterOut, WBSlaveOut,
  47                                 WBMasterOutVector, WBSlaveOutVector,
  48                                 WBIOMasterOut, WBIOSlaveOut)
  49
  50 from soc.experiment.cache_ram import CacheRam
  51 #from soc.experiment.plru import PLRU
  52 from nmutil.plru import PLRU
  53
  54 # for test
  55 from soc.bus.sram import SRAM
  56 from nmigen import Memory
  57 from nmigen.cli import rtlil
  58
  59 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
  60 # Also, check out the cxxsim nmigen branch, and latest yosys from git
  61 from nmutil.sim_tmp_alternative import Simulator
  62
  63 from nmutil.util import wrap
  64
  65
  66 # TODO: make these parameters of DCache at some point
  67 LINE_SIZE = 64    # Line size in bytes
  68 NUM_LINES = 16    # Number of lines in a set
  69 NUM_WAYS = 4      # Number of ways
  70 TLB_SET_SIZE = 64 # L1 DTLB entries per set
  71 TLB_NUM_WAYS = 2  # L1 DTLB number of sets
  72 TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
  73 LOG_LENGTH = 0    # Non-zero to enable log data collection
  74
  75 # BRAM organisation: We never access more than
  76 #     -- WB_DATA_BITS at a time so to save
  77 #     -- resources we make the array only that wide, and
  78 #     -- use consecutive indices to make a cache "line"
  79 #     --
  80 #     -- ROW_SIZE is the width in bytes of the BRAM
  81 #     -- (based on WB, so 64-bits)
  82 ROW_SIZE = WB_DATA_BITS // 8;
  83
  84 # ROW_PER_LINE is the number of row (wishbone
  85 # transactions) in a line
  86 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
  87
  88 # BRAM_ROWS is the number of rows in BRAM needed
  89 # to represent the full dcache
  90 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
  91
  92 print ("ROW_SIZE", ROW_SIZE)
  93 print ("ROW_PER_LINE", ROW_PER_LINE)
  94 print ("BRAM_ROWS", BRAM_ROWS)
  95 print ("NUM_WAYS", NUM_WAYS)
  96
  97 # Bit fields counts in the address
  98
  99 # REAL_ADDR_BITS is the number of real address
 100 # bits that we store
 101 REAL_ADDR_BITS = 56
 102
 103 # ROW_BITS is the number of bits to select a row
 104 ROW_BITS = log2_int(BRAM_ROWS)
 105
 106 # ROW_LINE_BITS is the number of bits to select
 107 # a row within a line
 108 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
 109
 110 # LINE_OFF_BITS is the number of bits for
 111 # the offset in a cache line
 112 LINE_OFF_BITS = log2_int(LINE_SIZE)
 113
 114 # ROW_OFF_BITS is the number of bits for
 115 # the offset in a row
 116 ROW_OFF_BITS = log2_int(ROW_SIZE)
 117
 118 # INDEX_BITS is the number if bits to
 119 # select a cache line
 120 INDEX_BITS = log2_int(NUM_LINES)
 121
 122 # SET_SIZE_BITS is the log base 2 of the set size
 123 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
 124
 125 # TAG_BITS is the number of bits of
 126 # the tag part of the address
 127 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
 128
 129 # TAG_WIDTH is the width in bits of each way of the tag RAM
 130 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
 131
 132 # WAY_BITS is the number of bits to select a way
 133 WAY_BITS = log2_int(NUM_WAYS)
 134
 135 # Example of layout for 32 lines of 64 bytes:
 136 layout = """\
 137   ..  tag    |index|  line  |
 138   ..         |   row   |    |
 139   ..         |     |---|    | ROW_LINE_BITS  (3)
 140   ..         |     |--- - --| LINE_OFF_BITS (6)
 141   ..         |         |- --| ROW_OFF_BITS  (3)
 142   ..         |----- ---|    | ROW_BITS      (8)
 143   ..         |-----|        | INDEX_BITS    (5)
 144   .. --------|              | TAG_BITS      (45)
 145 """
 146 print (layout)
 147 print ("Dcache TAG %d IDX %d ROW_BITS %d ROFF %d LOFF %d RLB %d" % \
 148             (TAG_BITS, INDEX_BITS, ROW_BITS,
 149              ROW_OFF_BITS, LINE_OFF_BITS, ROW_LINE_BITS))
 150 print ("index @: %d-%d" % (LINE_OFF_BITS, SET_SIZE_BITS))
 151 print ("row @: %d-%d" % (LINE_OFF_BITS, ROW_OFF_BITS))
 152 print ("tag @: %d-%d width %d" % (SET_SIZE_BITS, REAL_ADDR_BITS, TAG_WIDTH))
 153
 154 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 155
 156 print ("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
 157
 158 def CacheTagArray():
 159     tag_layout = [('valid', 1),
 160                   ('tag', TAG_RAM_WIDTH),
 161                  ]
 162     return Array(Record(tag_layout, name="tag%d" % x) for x in range(NUM_LINES))
 163
 164 def RowPerLineValidArray():
 165     return Array(Signal(name="rows_valid%d" % x) \
 166                         for x in range(ROW_PER_LINE))
 167
 168 # L1 TLB
 169 TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 170 TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 171 TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 172 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 173 TLB_PTE_BITS     = 64
 174 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 175
 176 def ispow2(x):
 177     return (1<<log2_int(x, False)) == x
 178
 179 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
 180 assert ispow2(LINE_SIZE), "LINE_SIZE not power of 2"
 181 assert ispow2(NUM_LINES), "NUM_LINES not power of 2"
 182 assert ispow2(ROW_PER_LINE), "ROW_PER_LINE not power of 2"
 183 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
 184 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
 185         "geometry bits don't add up"
 186 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
 187         "geometry bits don't add up"
 188 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
 189          "geometry bits don't add up"
 190 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
 191 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 192
 193 def TLBHit(name):
 194     return Record([('valid', 1),
 195                    ('way', TLB_WAY_BITS)], name=name)
 196
 197 def TLBTagEAArray():
 198     return Array(Signal(TLB_EA_TAG_BITS, name="tlbtagea%d" % x) \
 199                 for x in range (TLB_NUM_WAYS))
 200
 201 def TLBRecord(name):
 202     tlb_layout = [('valid', TLB_NUM_WAYS),
 203                   ('tag', TLB_TAG_WAY_BITS),
 204                   ('pte', TLB_PTE_WAY_BITS)
 205                  ]
 206     return Record(tlb_layout, name=name)
 207
 208 def TLBArray():
 209     return Array(TLBRecord(name="tlb%d" % x) for x in range(TLB_SET_SIZE))
 210
 211 def HitWaySet():
 212     return Array(Signal(WAY_BITS, name="hitway_%d" % x) \
 213                         for x in range(TLB_NUM_WAYS))
 214
 215 # Cache RAM interface
 216 def CacheRamOut():
 217     return Array(Signal(WB_DATA_BITS, name="cache_out%d" % x) \
 218                  for x in range(NUM_WAYS))
 219
 220 # PLRU output interface
 221 def PLRUOut():
 222     return Array(Signal(WAY_BITS, name="plru_out%d" % x) \
 223                 for x in range(NUM_LINES))
 224
 225 # TLB PLRU output interface
 226 def TLBPLRUOut():
 227     return Array(Signal(TLB_WAY_BITS, name="tlbplru_out%d" % x) \
 228                 for x in range(TLB_SET_SIZE))
 229
 230 # Helper functions to decode incoming requests
 231 #
 232 # Return the cache line index (tag index) for an address
 233 def get_index(addr):
 234     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 235
 236 # Return the cache row index (data memory) for an address
 237 def get_row(addr):
 238     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 239
 240 # Return the index of a row within a line
 241 def get_row_of_line(row):
 242     return row[:ROW_BITS][:ROW_LINE_BITS]
 243
 244 # Returns whether this is the last row of a line
 245 def is_last_row_addr(addr, last):
 246     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 247
 248 # Returns whether this is the last row of a line
 249 def is_last_row(row, last):
 250     return get_row_of_line(row) == last
 251
 252 # Return the next row in the current cache line. We use a
 253 # dedicated function in order to limit the size of the
 254 # generated adder to be only the bits within a cache line
 255 # (3 bits with default settings)
 256 def next_row(row):
 257     row_v = row[0:ROW_LINE_BITS] + 1
 258     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 259
 260 # Get the tag value from the address
 261 def get_tag(addr):
 262     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 263
 264 # Read a tag from a tag memory row
 265 def read_tag(way, tagset):
 266     return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
 267
 268 # Read a TLB tag from a TLB tag memory row
 269 def read_tlb_tag(way, tags):
 270     return tags.word_select(way, TLB_EA_TAG_BITS)
 271
 272 # Write a TLB tag to a TLB tag memory row
 273 def write_tlb_tag(way, tags, tag):
 274     return read_tlb_tag(way, tags).eq(tag)
 275
 276 # Read a PTE from a TLB PTE memory row
 277 def read_tlb_pte(way, ptes):
 278     return ptes.word_select(way, TLB_PTE_BITS)
 279
 280 def write_tlb_pte(way, ptes, newpte):
 281     return read_tlb_pte(way, ptes).eq(newpte)
 282
 283
 284 # Record for storing permission, attribute, etc. bits from a PTE
 285 class PermAttr(RecordObject):
 286     def __init__(self, name=None):
 287         super().__init__(name=name)
 288         self.reference = Signal()
 289         self.changed   = Signal()
 290         self.nocache   = Signal()
 291         self.priv      = Signal()
 292         self.rd_perm   = Signal()
 293         self.wr_perm   = Signal()
 294
 295
 296 def extract_perm_attr(pte):
 297     pa = PermAttr()
 298     return pa;
 299
 300
 301 # Type of operation on a "valid" input
 302 @unique
 303 class Op(Enum):
 304     OP_NONE       = 0
 305     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 306     OP_STCX_FAIL  = 2 # conditional store w/o reservation
 307     OP_LOAD_HIT   = 3 # Cache hit on load
 308     OP_LOAD_MISS  = 4 # Load missing cache
 309     OP_LOAD_NC    = 5 # Non-cachable load
 310     OP_STORE_HIT  = 6 # Store hitting cache
 311     OP_STORE_MISS = 7 # Store missing cache
 312
 313
 314 # Cache state machine
 315 @unique
 316 class State(Enum):
 317     IDLE             = 0 # Normal load hit processing
 318     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 319     STORE_WAIT_ACK   = 2 # Store wait ack
 320     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 321
 322
 323 # Dcache operations:
 324 #
 325 # In order to make timing, we use the BRAMs with
 326 # an output buffer, which means that the BRAM
 327 # output is delayed by an extra cycle.
 328 #
 329 # Thus, the dcache has a 2-stage internal pipeline
 330 # for cache hits with no stalls.
 331 #
 332 # All other operations are handled via stalling
 333 # in the first stage.
 334 #
 335 # The second stage can thus complete a hit at the same
 336 # time as the first stage emits a stall for a complex op.
 337 #
 338 # Stage 0 register, basically contains just the latched request
 339
 340 class RegStage0(RecordObject):
 341     def __init__(self, name=None):
 342         super().__init__(name=name)
 343         self.req     = LoadStore1ToDCacheType(name="lsmem")
 344         self.tlbie   = Signal() # indicates a tlbie request (from MMU)
 345         self.doall   = Signal() # with tlbie, indicates flush whole TLB
 346         self.tlbld   = Signal() # indicates a TLB load request (from MMU)
 347         self.mmu_req = Signal() # indicates source of request
 348         self.d_valid = Signal() # indicates req.data is valid now
 349
 350
 351 class MemAccessRequest(RecordObject):
 352     def __init__(self, name=None):
 353         super().__init__(name=name)
 354         self.op        = Signal(Op)
 355         self.valid     = Signal()
 356         self.dcbz      = Signal()
 357         self.real_addr = Signal(REAL_ADDR_BITS)
 358         self.data      = Signal(64)
 359         self.byte_sel  = Signal(8)
 360         self.hit_way   = Signal(WAY_BITS)
 361         self.same_tag  = Signal()
 362         self.mmu_req   = Signal()
 363
 364
 365 # First stage register, contains state for stage 1 of load hits
 366 # and for the state machine used by all other operations
 367 class RegStage1(RecordObject):
 368     def __init__(self, name=None):
 369         super().__init__(name=name)
 370         # Info about the request
 371         self.full             = Signal() # have uncompleted request
 372         self.mmu_req          = Signal() # request is from MMU
 373         self.req              = MemAccessRequest(name="reqmem")
 374
 375         # Cache hit state
 376         self.hit_way          = Signal(WAY_BITS)
 377         self.hit_load_valid   = Signal()
 378         self.hit_index        = Signal(INDEX_BITS)
 379         self.cache_hit        = Signal()
 380
 381         # TLB hit state
 382         self.tlb_hit          = TLBHit("tlb_hit")
 383         self.tlb_hit_index    = Signal(TLB_SET_BITS)
 384
 385         # 2-stage data buffer for data forwarded from writes to reads
 386         self.forward_data1    = Signal(64)
 387         self.forward_data2    = Signal(64)
 388         self.forward_sel1     = Signal(8)
 389         self.forward_valid1   = Signal()
 390         self.forward_way1     = Signal(WAY_BITS)
 391         self.forward_row1     = Signal(ROW_BITS)
 392         self.use_forward1     = Signal()
 393         self.forward_sel      = Signal(8)
 394
 395         # Cache miss state (reload state machine)
 396         self.state            = Signal(State)
 397         self.dcbz             = Signal()
 398         self.write_bram       = Signal()
 399         self.write_tag        = Signal()
 400         self.slow_valid       = Signal()
 401         self.wb               = WBMasterOut("wb")
 402         self.reload_tag       = Signal(TAG_BITS)
 403         self.store_way        = Signal(WAY_BITS)
 404         self.store_row        = Signal(ROW_BITS)
 405         self.store_index      = Signal(INDEX_BITS)
 406         self.end_row_ix       = Signal(ROW_LINE_BITS)
 407         self.rows_valid       = RowPerLineValidArray()
 408         self.acks_pending     = Signal(3)
 409         self.inc_acks         = Signal()
 410         self.dec_acks         = Signal()
 411
 412         # Signals to complete (possibly with error)
 413         self.ls_valid         = Signal()
 414         self.ls_error         = Signal()
 415         self.mmu_done         = Signal()
 416         self.mmu_error        = Signal()
 417         self.cache_paradox    = Signal()
 418
 419         # Signal to complete a failed stcx.
 420         self.stcx_fail        = Signal()
 421
 422
 423 # Reservation information
 424 class Reservation(RecordObject):
 425     def __init__(self):
 426         super().__init__()
 427         self.valid = Signal()
 428         self.addr  = Signal(64-LINE_OFF_BITS)
 429
 430
 431 class DTLBUpdate(Elaboratable):
 432     def __init__(self):
 433         self.tlbie    = Signal()
 434         self.tlbwe    = Signal()
 435         self.doall    = Signal()
 436         self.updated  = Signal()
 437         self.v_updated  = Signal()
 438         self.tlb_hit     = TLBHit("tlb_hit")
 439         self.tlb_req_index = Signal(TLB_SET_BITS)
 440
 441         self.tlb_tag_way     = Signal(TLB_TAG_WAY_BITS)
 442         self.tlb_pte_way     = Signal(TLB_PTE_WAY_BITS)
 443         self.repl_way        = Signal(TLB_WAY_BITS)
 444         self.eatag           = Signal(TLB_EA_TAG_BITS)
 445         self.pte_data        = Signal(TLB_PTE_BITS)
 446
 447         self.dv = Signal(TLB_NUM_WAYS) # tlb_way_valids_t
 448
 449         self.tb_out = Signal(TLB_TAG_WAY_BITS) # tlb_way_tags_t
 450         self.db_out = Signal(TLB_NUM_WAYS)     # tlb_way_valids_t
 451         self.pb_out = Signal(TLB_PTE_WAY_BITS) # tlb_way_ptes_t
 452
 453     def elaborate(self, platform):
 454         m = Module()
 455         comb = m.d.comb
 456         sync = m.d.sync
 457
 458         tagset   = Signal(TLB_TAG_WAY_BITS)
 459         pteset   = Signal(TLB_PTE_WAY_BITS)
 460
 461         tb_out, pb_out, db_out = self.tb_out, self.pb_out, self.db_out
 462         comb += db_out.eq(self.dv)
 463
 464         with m.If(self.tlbie & self.doall):
 465             pass # clear all back in parent
 466         with m.Elif(self.tlbie):
 467             with m.If(self.tlb_hit.valid):
 468                 comb += db_out.bit_select(self.tlb_hit.way, 1).eq(0)
 469                 comb += self.v_updated.eq(1)
 470
 471         with m.Elif(self.tlbwe):
 472
 473             comb += tagset.eq(self.tlb_tag_way)
 474             comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
 475             comb += tb_out.eq(tagset)
 476
 477             comb += pteset.eq(self.tlb_pte_way)
 478             comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
 479             comb += pb_out.eq(pteset)
 480
 481             comb += db_out.bit_select(self.repl_way, 1).eq(1)
 482
 483             comb += self.updated.eq(1)
 484             comb += self.v_updated.eq(1)
 485
 486         return m
 487
 488
 489 class DCachePendingHit(Elaboratable):
 490
 491     def __init__(self, tlb_way,
 492                       cache_i_validdx, cache_tag_set,
 493                     req_addr,
 494                     hit_set):
 495
 496         self.go          = Signal()
 497         self.virt_mode   = Signal()
 498         self.is_hit      = Signal()
 499         self.tlb_hit      = TLBHit("tlb_hit")
 500         self.hit_way     = Signal(WAY_BITS)
 501         self.rel_match   = Signal()
 502         self.req_index   = Signal(INDEX_BITS)
 503         self.reload_tag  = Signal(TAG_BITS)
 504
 505         self.tlb_way = tlb_way
 506         self.cache_i_validdx = cache_i_validdx
 507         self.cache_tag_set = cache_tag_set
 508         self.req_addr = req_addr
 509         self.hit_set = hit_set
 510
 511     def elaborate(self, platform):
 512         m = Module()
 513         comb = m.d.comb
 514         sync = m.d.sync
 515
 516         go = self.go
 517         virt_mode = self.virt_mode
 518         is_hit = self.is_hit
 519         tlb_way = self.tlb_way
 520         cache_i_validdx = self.cache_i_validdx
 521         cache_tag_set = self.cache_tag_set
 522         req_addr = self.req_addr
 523         tlb_hit = self.tlb_hit
 524         hit_set = self.hit_set
 525         hit_way = self.hit_way
 526         rel_match = self.rel_match
 527         req_index = self.req_index
 528         reload_tag = self.reload_tag
 529
 530         rel_matches = Array(Signal(name="rel_matches_%d" % i) \
 531                                     for i in range(TLB_NUM_WAYS))
 532         hit_way_set = HitWaySet()
 533
 534         # Test if pending request is a hit on any way
 535         # In order to make timing in virtual mode,
 536         # when we are using the TLB, we compare each
 537         # way with each of the real addresses from each way of
 538         # the TLB, and then decide later which match to use.
 539
 540         with m.If(virt_mode):
 541             for j in range(TLB_NUM_WAYS): # tlb_num_way_t
 542                 s_tag       = Signal(TAG_BITS, name="s_tag%d" % j)
 543                 s_hit       = Signal()
 544                 s_pte       = Signal(TLB_PTE_BITS)
 545                 s_ra        = Signal(REAL_ADDR_BITS)
 546                 comb += s_pte.eq(read_tlb_pte(j, tlb_way.pte))
 547                 comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
 548                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 549                 comb += s_tag.eq(get_tag(s_ra))
 550
 551                 for i in range(NUM_WAYS): # way_t
 552                     is_tag_hit = Signal(name="is_tag_hit_%d_%d" % (j, i))
 553                     comb += is_tag_hit.eq(go & cache_i_validdx[i] &
 554                                   (read_tag(i, cache_tag_set) == s_tag)
 555                                   & (tlb_way.valid[j]))
 556                     with m.If(is_tag_hit):
 557                         comb += hit_way_set[j].eq(i)
 558                         comb += s_hit.eq(1)
 559                 comb += hit_set[j].eq(s_hit)
 560                 with m.If(s_tag == reload_tag):
 561                     comb += rel_matches[j].eq(1)
 562             with m.If(tlb_hit.way):
 563                 comb += is_hit.eq(hit_set[tlb_hit.way])
 564                 comb += hit_way.eq(hit_way_set[tlb_hit.way])
 565                 comb += rel_match.eq(rel_matches[tlb_hit.way])
 566         with m.Else():
 567             s_tag       = Signal(TAG_BITS)
 568             comb += s_tag.eq(get_tag(req_addr))
 569             for i in range(NUM_WAYS): # way_t
 570                 is_tag_hit = Signal(name="is_tag_hit_%d" % i)
 571                 comb += is_tag_hit.eq(go & cache_i_validdx[i] &
 572                           (read_tag(i, cache_tag_set) == s_tag))
 573                 with m.If(is_tag_hit):
 574                     comb += hit_way.eq(i)
 575                     comb += is_hit.eq(1)
 576             with m.If(s_tag == reload_tag):
 577                 comb += rel_match.eq(1)
 578
 579         return m
 580
 581
 582 class DCache(Elaboratable):
 583     """Set associative dcache write-through
 584
 585     TODO (in no specific order):
 586     * See list in icache.vhdl
 587     * Complete load misses on the cycle when WB data comes instead of
 588       at the end of line (this requires dealing with requests coming in
 589       while not idle...)
 590     """
 591     def __init__(self):
 592         self.d_in      = LoadStore1ToDCacheType("d_in")
 593         self.d_out     = DCacheToLoadStore1Type("d_out")
 594
 595         self.m_in      = MMUToDCacheType("m_in")
 596         self.m_out     = DCacheToMMUType("m_out")
 597
 598         self.stall_out = Signal()
 599
 600         # standard naming (wired to non-standard for compatibility)
 601         self.bus = Interface(addr_width=32,
 602                             data_width=64,
 603                             granularity=8,
 604                             features={'stall'},
 605                             alignment=0,
 606                             name="dcache")
 607
 608         self.log_out   = Signal(20)
 609
 610     def stage_0(self, m, r0, r1, r0_full):
 611         """Latch the request in r0.req as long as we're not stalling
 612         """
 613         comb = m.d.comb
 614         sync = m.d.sync
 615         d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
 616
 617         r = RegStage0("stage0")
 618
 619         # TODO, this goes in unit tests and formal proofs
 620         with m.If(d_in.valid & m_in.valid):
 621             sync += Display("request collision loadstore vs MMU")
 622
 623         with m.If(m_in.valid):
 624             comb += r.req.valid.eq(1)
 625             comb += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))# no invalidate
 626             comb += r.req.dcbz.eq(0)
 627             comb += r.req.nc.eq(0)
 628             comb += r.req.reserve.eq(0)
 629             comb += r.req.virt_mode.eq(0)
 630             comb += r.req.priv_mode.eq(1)
 631             comb += r.req.addr.eq(m_in.addr)
 632             comb += r.req.data.eq(m_in.pte)
 633             comb += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
 634             comb += r.tlbie.eq(m_in.tlbie)
 635             comb += r.doall.eq(m_in.doall)
 636             comb += r.tlbld.eq(m_in.tlbld)
 637             comb += r.mmu_req.eq(1)
 638             m.d.sync += Display("    DCACHE req mmu addr %x pte %x ld %d",
 639                                  m_in.addr, m_in.pte, r.req.load)
 640
 641         with m.Else():
 642             comb += r.req.eq(d_in)
 643             comb += r.req.data.eq(0)
 644             comb += r.tlbie.eq(0)
 645             comb += r.doall.eq(0)
 646             comb += r.tlbld.eq(0)
 647             comb += r.mmu_req.eq(0)
 648         with m.If((~r1.full & ~d_in.hold) | ~r0_full):
 649             sync += r0.eq(r)
 650             sync += r0_full.eq(r.req.valid)
 651             # Sample data the cycle after a request comes in from loadstore1.
 652             # If another request has come in already then the data will get
 653             # put directly into req.data below.
 654             with m.If(r0.req.valid & ~r.req.valid & ~r0.d_valid &
 655                      ~r0.mmu_req):
 656                 sync += r0.req.data.eq(d_in.data)
 657                 sync += r0.d_valid.eq(1)
 658         with m.If(d_in.valid):
 659             m.d.sync += Display("    DCACHE req cache "
 660                                 "virt %d addr %x data %x ld %d",
 661                                  r.req.virt_mode, r.req.addr,
 662                                  r.req.data, r.req.load)
 663
 664     def tlb_read(self, m, r0_stall, tlb_way, dtlb):
 665         """TLB
 666         Operates in the second cycle on the request latched in r0.req.
 667         TLB updates write the entry at the end of the second cycle.
 668         """
 669         comb = m.d.comb
 670         sync = m.d.sync
 671         m_in, d_in = self.m_in, self.d_in
 672
 673         index    = Signal(TLB_SET_BITS)
 674         addrbits = Signal(TLB_SET_BITS)
 675
 676         amin = TLB_LG_PGSZ
 677         amax = TLB_LG_PGSZ + TLB_SET_BITS
 678
 679         with m.If(m_in.valid):
 680             comb += addrbits.eq(m_in.addr[amin : amax])
 681         with m.Else():
 682             comb += addrbits.eq(d_in.addr[amin : amax])
 683         comb += index.eq(addrbits)
 684
 685         # If we have any op and the previous op isn't finished,
 686         # then keep the same output for next cycle.
 687         with m.If(~r0_stall):
 688             sync += tlb_way.eq(dtlb[index])
 689
 690     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim):
 691         """Generate TLB PLRUs
 692         """
 693         comb = m.d.comb
 694         sync = m.d.sync
 695
 696         if TLB_NUM_WAYS == 0:
 697             return
 698         for i in range(TLB_SET_SIZE):
 699             # TLB PLRU interface
 700             tlb_plru        = PLRU(TLB_WAY_BITS)
 701             setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
 702             tlb_plru_acc_en = Signal()
 703
 704             comb += tlb_plru_acc_en.eq(r1.tlb_hit.valid &
 705                                        (r1.tlb_hit_index == i))
 706             comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 707             comb += tlb_plru.acc_i.eq(r1.tlb_hit.way)
 708             comb += tlb_plru_victim[i].eq(tlb_plru.lru_o)
 709
 710     def tlb_search(self, m, tlb_req_index, r0, r0_valid,
 711                    tlb_way,
 712                    pte, tlb_hit, valid_ra, perm_attr, ra):
 713
 714         comb = m.d.comb
 715
 716         hitway = Signal(TLB_WAY_BITS)
 717         hit    = Signal()
 718         eatag  = Signal(TLB_EA_TAG_BITS)
 719
 720         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 721         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 722         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 723
 724         for i in range(TLB_NUM_WAYS):
 725             is_tag_hit = Signal(name="is_tag_hit%d" % i)
 726             tlb_tag = Signal(TLB_EA_TAG_BITS, name="tlb_tag%d" % i)
 727             comb += tlb_tag.eq(read_tlb_tag(i, tlb_way.tag))
 728             comb += is_tag_hit.eq((tlb_way.valid[i]) & (tlb_tag == eatag))
 729             with m.If(is_tag_hit):
 730                 comb += hitway.eq(i)
 731                 comb += hit.eq(1)
 732
 733         comb += tlb_hit.valid.eq(hit & r0_valid)
 734         comb += tlb_hit.way.eq(hitway)
 735
 736         with m.If(tlb_hit.valid):
 737             comb += pte.eq(read_tlb_pte(hitway, tlb_way.pte))
 738         comb += valid_ra.eq(tlb_hit.valid | ~r0.req.virt_mode)
 739
 740         with m.If(r0.req.virt_mode):
 741             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 742                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 743                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 744             comb += perm_attr.reference.eq(pte[8])
 745             comb += perm_attr.changed.eq(pte[7])
 746             comb += perm_attr.nocache.eq(pte[5])
 747             comb += perm_attr.priv.eq(pte[3])
 748             comb += perm_attr.rd_perm.eq(pte[2])
 749             comb += perm_attr.wr_perm.eq(pte[1])
 750         with m.Else():
 751             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 752                               r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 753             comb += perm_attr.reference.eq(1)
 754             comb += perm_attr.changed.eq(1)
 755             comb += perm_attr.nocache.eq(0)
 756             comb += perm_attr.priv.eq(1)
 757             comb += perm_attr.rd_perm.eq(1)
 758             comb += perm_attr.wr_perm.eq(1)
 759
 760         with m.If(valid_ra):
 761             m.d.sync += Display("DCACHE virt mode %d hit %d ra %x pte %x",
 762                                 r0.req.virt_mode, tlb_hit.valid, ra, pte)
 763             m.d.sync += Display("       perm ref=%d", perm_attr.reference)
 764             m.d.sync += Display("       perm chg=%d", perm_attr.changed)
 765             m.d.sync += Display("       perm noc=%d", perm_attr.nocache)
 766             m.d.sync += Display("       perm prv=%d", perm_attr.priv)
 767             m.d.sync += Display("       perm rdp=%d", perm_attr.rd_perm)
 768             m.d.sync += Display("       perm wrp=%d", perm_attr.wr_perm)
 769
 770     def tlb_update(self, m, r0_valid, r0, dtlb, tlb_req_index,
 771                     tlb_hit, tlb_plru_victim, tlb_way):
 772
 773         comb = m.d.comb
 774         sync = m.d.sync
 775
 776         tlbie    = Signal()
 777         tlbwe    = Signal()
 778
 779         comb += tlbie.eq(r0_valid & r0.tlbie)
 780         comb += tlbwe.eq(r0_valid & r0.tlbld)
 781
 782         m.submodules.tlb_update = d = DTLBUpdate()
 783         with m.If(tlbie & r0.doall):
 784             # clear all valid bits at once
 785             for i in range(TLB_SET_SIZE):
 786                 sync += dtlb[i].valid.eq(0)
 787         with m.If(d.updated):
 788             sync += dtlb[tlb_req_index].tag.eq(d.tb_out)
 789             sync += dtlb[tlb_req_index].pte.eq(d.pb_out)
 790         with m.If(d.v_updated):
 791             sync += dtlb[tlb_req_index].valid.eq(d.db_out)
 792
 793         comb += d.dv.eq(dtlb[tlb_req_index].valid)
 794
 795         comb += d.tlbie.eq(tlbie)
 796         comb += d.tlbwe.eq(tlbwe)
 797         comb += d.doall.eq(r0.doall)
 798         comb += d.tlb_hit.eq(tlb_hit)
 799         comb += d.tlb_tag_way.eq(tlb_way.tag)
 800         comb += d.tlb_pte_way.eq(tlb_way.pte)
 801         comb += d.tlb_req_index.eq(tlb_req_index)
 802
 803         with m.If(tlb_hit.valid):
 804             comb += d.repl_way.eq(tlb_hit.way)
 805         with m.Else():
 806             comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
 807         comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 808         comb += d.pte_data.eq(r0.req.data)
 809
 810     def maybe_plrus(self, m, r1, plru_victim):
 811         """Generate PLRUs
 812         """
 813         comb = m.d.comb
 814         sync = m.d.sync
 815
 816         if TLB_NUM_WAYS == 0:
 817             return
 818
 819         for i in range(NUM_LINES):
 820             # PLRU interface
 821             plru        = PLRU(WAY_BITS)
 822             setattr(m.submodules, "plru%d" % i, plru)
 823             plru_acc_en = Signal()
 824
 825             comb += plru_acc_en.eq(r1.cache_hit & (r1.hit_index == i))
 826             comb += plru.acc_en.eq(plru_acc_en)
 827             comb += plru.acc_i.eq(r1.hit_way)
 828             comb += plru_victim[i].eq(plru.lru_o)
 829
 830     def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
 831         """Cache tag RAM read port
 832         """
 833         comb = m.d.comb
 834         sync = m.d.sync
 835         m_in, d_in = self.m_in, self.d_in
 836
 837         index = Signal(INDEX_BITS)
 838
 839         with m.If(r0_stall):
 840             comb += index.eq(req_index)
 841         with m.Elif(m_in.valid):
 842             comb += index.eq(get_index(m_in.addr))
 843         with m.Else():
 844             comb += index.eq(get_index(d_in.addr))
 845         sync += cache_tag_set.eq(cache_tags[index].tag)
 846
 847     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 848                        r0_valid, r1, cache_tags, replace_way,
 849                        use_forward1_next, use_forward2_next,
 850                        req_hit_way, plru_victim, rc_ok, perm_attr,
 851                        valid_ra, perm_ok, access_ok, req_op, req_go,
 852                        tlb_hit, tlb_way, cache_tag_set,
 853                        cancel_store, req_same_tag, r0_stall, early_req_row):
 854         """Cache request parsing and hit detection
 855         """
 856
 857         comb = m.d.comb
 858         m_in, d_in = self.m_in, self.d_in
 859
 860         is_hit      = Signal()
 861         hit_way     = Signal(WAY_BITS)
 862         op          = Signal(Op)
 863         opsel       = Signal(3)
 864         go          = Signal()
 865         nc          = Signal()
 866         hit_set     = Array(Signal(name="hit_set_%d" % i) \
 867                                   for i in range(TLB_NUM_WAYS))
 868         cache_i_validdx = Signal(NUM_WAYS)
 869
 870         # Extract line, row and tag from request
 871         comb += req_index.eq(get_index(r0.req.addr))
 872         comb += req_row.eq(get_row(r0.req.addr))
 873         comb += req_tag.eq(get_tag(ra))
 874
 875         if False: # display on comb is a bit... busy.
 876             comb += Display("dcache_req addr:%x ra: %x idx: %x tag: %x row: %x",
 877                     r0.req.addr, ra, req_index, req_tag, req_row)
 878
 879         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 880         comb += cache_i_validdx.eq(cache_tags[req_index].valid)
 881
 882         m.submodules.dcache_pend = dc = DCachePendingHit(tlb_way,
 883                                             cache_i_validdx, cache_tag_set,
 884                                             r0.req.addr,
 885                                             hit_set)
 886         comb += dc.tlb_hit.eq(tlb_hit)
 887         comb += dc.reload_tag.eq(r1.reload_tag)
 888         comb += dc.virt_mode.eq(r0.req.virt_mode)
 889         comb += dc.go.eq(go)
 890         comb += dc.req_index.eq(req_index)
 891
 892         comb += is_hit.eq(dc.is_hit)
 893         comb += hit_way.eq(dc.hit_way)
 894         comb += req_same_tag.eq(dc.rel_match)
 895
 896         # See if the request matches the line currently being reloaded
 897         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 898                   (req_index == r1.store_index) & req_same_tag):
 899             # For a store, consider this a hit even if the row isn't
 900             # valid since it will be by the time we perform the store.
 901             # For a load, check the appropriate row valid bit.
 902             rrow = Signal(ROW_LINE_BITS)
 903             comb += rrow.eq(req_row)
 904             valid = r1.rows_valid[rrow]
 905             comb += is_hit.eq((~r0.req.load) | valid)
 906             comb += hit_way.eq(replace_way)
 907
 908         # Whether to use forwarded data for a load or not
 909         with m.If((get_row(r1.req.real_addr) == req_row) &
 910                   (r1.req.hit_way == hit_way)):
 911             # Only need to consider r1.write_bram here, since if we
 912             # are writing refill data here, then we don't have a
 913             # cache hit this cycle on the line being refilled.
 914             # (There is the possibility that the load following the
 915             # load miss that started the refill could be to the old
 916             # contents of the victim line, since it is a couple of
 917             # cycles after the refill starts before we see the updated
 918             # cache tag. In that case we don't use the bypass.)
 919             comb += use_forward1_next.eq(r1.write_bram)
 920         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 921             comb += use_forward2_next.eq(r1.forward_valid1)
 922
 923         # The way that matched on a hit
 924         comb += req_hit_way.eq(hit_way)
 925
 926         # The way to replace on a miss
 927         with m.If(r1.write_tag):
 928             comb += replace_way.eq(plru_victim[r1.store_index])
 929         with m.Else():
 930             comb += replace_way.eq(r1.store_way)
 931
 932         # work out whether we have permission for this access
 933         # NB we don't yet implement AMR, thus no KUAP
 934         comb += rc_ok.eq(perm_attr.reference
 935                          & (r0.req.load | perm_attr.changed))
 936         comb += perm_ok.eq((r0.req.priv_mode | (~perm_attr.priv)) &
 937                            (perm_attr.wr_perm |
 938                               (r0.req.load & perm_attr.rd_perm)))
 939         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 940         # Combine the request and cache hit status to decide what
 941         # operation needs to be done
 942         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 943         comb += op.eq(Op.OP_NONE)
 944         with m.If(go):
 945             with m.If(~access_ok):
 946                 m.d.sync += Display("DCACHE access fail valid_ra=%d p=%d rc=%d",
 947                                  valid_ra, perm_ok, rc_ok)
 948                 comb += op.eq(Op.OP_BAD)
 949             with m.Elif(cancel_store):
 950                 m.d.sync += Display("DCACHE cancel store")
 951                 comb += op.eq(Op.OP_STCX_FAIL)
 952             with m.Else():
 953                 m.d.sync += Display("DCACHE valid_ra=%d nc=%d ld=%d",
 954                                  valid_ra, nc, r0.req.load)
 955                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 956                 with m.Switch(opsel):
 957                     with m.Case(0b101): comb += op.eq(Op.OP_LOAD_HIT)
 958                     with m.Case(0b100): comb += op.eq(Op.OP_LOAD_MISS)
 959                     with m.Case(0b110): comb += op.eq(Op.OP_LOAD_NC)
 960                     with m.Case(0b001): comb += op.eq(Op.OP_STORE_HIT)
 961                     with m.Case(0b000): comb += op.eq(Op.OP_STORE_MISS)
 962                     with m.Case(0b010): comb += op.eq(Op.OP_STORE_MISS)
 963                     with m.Case(0b011): comb += op.eq(Op.OP_BAD)
 964                     with m.Case(0b111): comb += op.eq(Op.OP_BAD)
 965         comb += req_op.eq(op)
 966         comb += req_go.eq(go)
 967
 968         # Version of the row number that is valid one cycle earlier
 969         # in the cases where we need to read the cache data BRAM.
 970         # If we're stalling then we need to keep reading the last
 971         # row requested.
 972         with m.If(~r0_stall):
 973             with m.If(m_in.valid):
 974                 comb += early_req_row.eq(get_row(m_in.addr))
 975             with m.Else():
 976                 comb += early_req_row.eq(get_row(d_in.addr))
 977         with m.Else():
 978             comb += early_req_row.eq(req_row)
 979
 980     def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
 981                          r0_valid, r0, reservation):
 982         """Handle load-with-reservation and store-conditional instructions
 983         """
 984         comb = m.d.comb
 985
 986         with m.If(r0_valid & r0.req.reserve):
 987             # XXX generate alignment interrupt if address
 988             # is not aligned XXX or if r0.req.nc = '1'
 989             with m.If(r0.req.load):
 990                 comb += set_rsrv.eq(r0.req.atomic_last) # load with reservation
 991             with m.Else():
 992                 comb += clear_rsrv.eq(r0.req.atomic_last) # store conditional
 993                 with m.If((~reservation.valid) |
 994                          (r0.req.addr[LINE_OFF_BITS:64] != reservation.addr)):
 995                     comb += cancel_store.eq(1)
 996
 997     def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
 998                         reservation, r0):
 999
1000         comb = m.d.comb
1001         sync = m.d.sync
1002
1003         with m.If(r0_valid & access_ok):
1004             with m.If(clear_rsrv):
1005                 sync += reservation.valid.eq(0)
1006             with m.Elif(set_rsrv):
1007                 sync += reservation.valid.eq(1)
1008                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
1009
1010     def writeback_control(self, m, r1, cache_out_row):
1011         """Return data for loads & completion control logic
1012         """
1013         comb = m.d.comb
1014         sync = m.d.sync
1015         d_out, m_out = self.d_out, self.m_out
1016
1017         data_out = Signal(64)
1018         data_fwd = Signal(64)
1019
1020         # Use the bypass if are reading the row that was
1021         # written 1 or 2 cycles ago, including for the
1022         # slow_valid = 1 case (i.e. completing a load
1023         # miss or a non-cacheable load).
1024         with m.If(r1.use_forward1):
1025             comb += data_fwd.eq(r1.forward_data1)
1026         with m.Else():
1027             comb += data_fwd.eq(r1.forward_data2)
1028
1029         comb += data_out.eq(cache_out_row)
1030
1031         for i in range(8):
1032             with m.If(r1.forward_sel[i]):
1033                 dsel = data_fwd.word_select(i, 8)
1034                 comb += data_out.word_select(i, 8).eq(dsel)
1035
1036         comb += d_out.valid.eq(r1.ls_valid)
1037         comb += d_out.data.eq(data_out)
1038         comb += d_out.store_done.eq(~r1.stcx_fail)
1039         comb += d_out.error.eq(r1.ls_error)
1040         comb += d_out.cache_paradox.eq(r1.cache_paradox)
1041
1042         # Outputs to MMU
1043         comb += m_out.done.eq(r1.mmu_done)
1044         comb += m_out.err.eq(r1.mmu_error)
1045         comb += m_out.data.eq(data_out)
1046
1047         # We have a valid load or store hit or we just completed
1048         # a slow op such as a load miss, a NC load or a store
1049         #
1050         # Note: the load hit is delayed by one cycle. However it
1051         # can still not collide with r.slow_valid (well unless I
1052         # miscalculated) because slow_valid can only be set on a
1053         # subsequent request and not on its first cycle (the state
1054         # machine must have advanced), which makes slow_valid
1055         # at least 2 cycles from the previous hit_load_valid.
1056
1057         # Sanity: Only one of these must be set in any given cycle
1058
1059         if False: # TODO: need Display to get this to work
1060             assert (r1.slow_valid & r1.stcx_fail) != 1, \
1061             "unexpected slow_valid collision with stcx_fail"
1062
1063             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
1064              "unexpected hit_load_delayed collision with slow_valid"
1065
1066         with m.If(~r1.mmu_req):
1067             # Request came from loadstore1...
1068             # Load hit case is the standard path
1069             with m.If(r1.hit_load_valid):
1070                 sync += Display("completing load hit data=%x", data_out)
1071
1072             # error cases complete without stalling
1073             with m.If(r1.ls_error):
1074                 with m.If(r1.dcbz):
1075                     sync += Display("completing dcbz with error")
1076                 with m.Else():
1077                     sync += Display("completing ld/st with error")
1078
1079             # Slow ops (load miss, NC, stores)
1080             with m.If(r1.slow_valid):
1081                 sync += Display("completing store or load miss adr=%x data=%x",
1082                                 r1.req.real_addr, data_out)
1083
1084         with m.Else():
1085             # Request came from MMU
1086             with m.If(r1.hit_load_valid):
1087                 sync += Display("completing load hit to MMU, data=%x",
1088                                 m_out.data)
1089             # error cases complete without stalling
1090             with m.If(r1.mmu_error):
1091                 sync += Display("combpleting MMU ld with error")
1092
1093             # Slow ops (i.e. load miss)
1094             with m.If(r1.slow_valid):
1095                 sync += Display("completing MMU load miss, adr=%x data=%x",
1096                                 r1.req.real_addr, m_out.data)
1097
1098     def rams(self, m, r1, early_req_row, cache_out_row, replace_way):
1099         """rams
1100         Generate a cache RAM for each way. This handles the normal
1101         reads, writes from reloads and the special store-hit update
1102         path as well.
1103
1104         Note: the BRAMs have an extra read buffer, meaning the output
1105         is pipelined an extra cycle. This differs from the
1106         icache. The writeback logic needs to take that into
1107         account by using 1-cycle delayed signals for load hits.
1108         """
1109         comb = m.d.comb
1110         bus = self.bus
1111
1112         for i in range(NUM_WAYS):
1113             do_read  = Signal(name="do_rd%d" % i)
1114             rd_addr  = Signal(ROW_BITS, name="rd_addr_%d" % i)
1115             do_write = Signal(name="do_wr%d" % i)
1116             wr_addr  = Signal(ROW_BITS, name="wr_addr_%d" % i)
1117             wr_data  = Signal(WB_DATA_BITS, name="din_%d" % i)
1118             wr_sel   = Signal(ROW_SIZE)
1119             wr_sel_m = Signal(ROW_SIZE)
1120             _d_out   = Signal(WB_DATA_BITS, name="dout_%d" % i) # cache_row_t
1121
1122             way = CacheRam(ROW_BITS, WB_DATA_BITS, ADD_BUF=True, ram_num=i)
1123             setattr(m.submodules, "cacheram_%d" % i, way)
1124
1125             comb += way.rd_en.eq(do_read)
1126             comb += way.rd_addr.eq(rd_addr)
1127             comb += _d_out.eq(way.rd_data_o)
1128             comb += way.wr_sel.eq(wr_sel_m)
1129             comb += way.wr_addr.eq(wr_addr)
1130             comb += way.wr_data.eq(wr_data)
1131
1132             # Cache hit reads
1133             comb += do_read.eq(1)
1134             comb += rd_addr.eq(early_req_row)
1135             with m.If(r1.hit_way == i):
1136                 comb += cache_out_row.eq(_d_out)
1137
1138             # Write mux:
1139             #
1140             # Defaults to wishbone read responses (cache refill)
1141             #
1142             # For timing, the mux on wr_data/sel/addr is not
1143             # dependent on anything other than the current state.
1144
1145             with m.If(r1.write_bram):
1146                 # Write store data to BRAM.  This happens one
1147                 # cycle after the store is in r0.
1148                 comb += wr_data.eq(r1.req.data)
1149                 comb += wr_sel.eq(r1.req.byte_sel)
1150                 comb += wr_addr.eq(get_row(r1.req.real_addr))
1151
1152                 with m.If(i == r1.req.hit_way):
1153                     comb += do_write.eq(1)
1154             with m.Else():
1155                 # Otherwise, we might be doing a reload or a DCBZ
1156                 with m.If(r1.dcbz):
1157                     comb += wr_data.eq(0)
1158                 with m.Else():
1159                     comb += wr_data.eq(bus.dat_r)
1160                 comb += wr_addr.eq(r1.store_row)
1161                 comb += wr_sel.eq(~0) # all 1s
1162
1163                 with m.If((r1.state == State.RELOAD_WAIT_ACK)
1164                           & bus.ack & (replace_way == i)):
1165                     comb += do_write.eq(1)
1166
1167             # Mask write selects with do_write since BRAM
1168             # doesn't have a global write-enable
1169             with m.If(do_write):
1170                 comb += wr_sel_m.eq(wr_sel)
1171
1172     # Cache hit synchronous machine for the easy case.
1173     # This handles load hits.
1174     # It also handles error cases (TLB miss, cache paradox)
1175     def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1176                         req_hit_way, req_index, req_tag, access_ok,
1177                         tlb_hit, tlb_req_index):
1178
1179         comb = m.d.comb
1180         sync = m.d.sync
1181
1182         with m.If(req_op != Op.OP_NONE):
1183             sync += Display("op:%d addr:%x nc: %d idx: %x tag: %x way: %x",
1184                     req_op, r0.req.addr, r0.req.nc,
1185                     req_index, req_tag, req_hit_way)
1186
1187         with m.If(r0_valid):
1188             sync += r1.mmu_req.eq(r0.mmu_req)
1189
1190         # Fast path for load/store hits.
1191         # Set signals for the writeback controls.
1192         sync += r1.hit_way.eq(req_hit_way)
1193         sync += r1.hit_index.eq(req_index)
1194
1195         with m.If(req_op == Op.OP_LOAD_HIT):
1196             sync += r1.hit_load_valid.eq(1)
1197         with m.Else():
1198             sync += r1.hit_load_valid.eq(0)
1199
1200         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1201             sync += r1.cache_hit.eq(1)
1202         with m.Else():
1203             sync += r1.cache_hit.eq(0)
1204
1205         with m.If(req_op == Op.OP_BAD):
1206             sync += Display("Signalling ld/st error "
1207                             "ls_error=%i mmu_error=%i cache_paradox=%i",
1208                             ~r0.mmu_req,r0.mmu_req,access_ok)
1209             sync += r1.ls_error.eq(~r0.mmu_req)
1210             sync += r1.mmu_error.eq(r0.mmu_req)
1211             sync += r1.cache_paradox.eq(access_ok)
1212
1213         with m.Else():
1214             sync += r1.ls_error.eq(0)
1215             sync += r1.mmu_error.eq(0)
1216             sync += r1.cache_paradox.eq(0)
1217
1218         with m.If(req_op == Op.OP_STCX_FAIL):
1219             sync += r1.stcx_fail.eq(1)
1220         with m.Else():
1221             sync += r1.stcx_fail.eq(0)
1222
1223         # Record TLB hit information for updating TLB PLRU
1224         sync += r1.tlb_hit.eq(tlb_hit)
1225         sync += r1.tlb_hit_index.eq(tlb_req_index)
1226
1227     # Memory accesses are handled by this state machine:
1228     #
1229     #   * Cache load miss/reload (in conjunction with "rams")
1230     #   * Load hits for non-cachable forms
1231     #   * Stores (the collision case is handled in "rams")
1232     #
1233     # All wishbone requests generation is done here.
1234     # This machine operates at stage 1.
1235     def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1236                     r0, replace_way,
1237                     req_hit_way, req_same_tag,
1238                     r0_valid, req_op, cache_tags, req_go, ra):
1239
1240         comb = m.d.comb
1241         sync = m.d.sync
1242         bus = self.bus
1243         d_in = self.d_in
1244
1245         req         = MemAccessRequest("mreq_ds")
1246
1247         req_row = Signal(ROW_BITS)
1248         req_idx = Signal(INDEX_BITS)
1249         req_tag = Signal(TAG_BITS)
1250         comb += req_idx.eq(get_index(req.real_addr))
1251         comb += req_row.eq(get_row(req.real_addr))
1252         comb += req_tag.eq(get_tag(req.real_addr))
1253
1254         sync += r1.use_forward1.eq(use_forward1_next)
1255         sync += r1.forward_sel.eq(0)
1256
1257         with m.If(use_forward1_next):
1258             sync += r1.forward_sel.eq(r1.req.byte_sel)
1259         with m.Elif(use_forward2_next):
1260             sync += r1.forward_sel.eq(r1.forward_sel1)
1261
1262         sync += r1.forward_data2.eq(r1.forward_data1)
1263         with m.If(r1.write_bram):
1264             sync += r1.forward_data1.eq(r1.req.data)
1265             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1266             sync += r1.forward_way1.eq(r1.req.hit_way)
1267             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1268             sync += r1.forward_valid1.eq(1)
1269         with m.Else():
1270             with m.If(r1.dcbz):
1271                 sync += r1.forward_data1.eq(0)
1272             with m.Else():
1273                 sync += r1.forward_data1.eq(bus.dat_r)
1274             sync += r1.forward_sel1.eq(~0) # all 1s
1275             sync += r1.forward_way1.eq(replace_way)
1276             sync += r1.forward_row1.eq(r1.store_row)
1277             sync += r1.forward_valid1.eq(0)
1278
1279         # One cycle pulses reset
1280         sync += r1.slow_valid.eq(0)
1281         sync += r1.write_bram.eq(0)
1282         sync += r1.inc_acks.eq(0)
1283         sync += r1.dec_acks.eq(0)
1284
1285         sync += r1.ls_valid.eq(0)
1286         # complete tlbies and TLB loads in the third cycle
1287         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1288
1289         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STCX_FAIL)):
1290             with m.If(~r0.mmu_req):
1291                 sync += r1.ls_valid.eq(1)
1292             with m.Else():
1293                 sync += r1.mmu_done.eq(1)
1294
1295         with m.If(r1.write_tag):
1296             # Store new tag in selected way
1297             for i in range(NUM_WAYS):
1298                 with m.If(i == replace_way):
1299                     ct = Signal(TAG_RAM_WIDTH)
1300                     comb += ct.eq(cache_tags[r1.store_index].tag)
1301                     """
1302 TODO: check this
1303 cache_tags(r1.store_index)((i + 1) * TAG_WIDTH - 1 downto i * TAG_WIDTH) <=
1304                     (TAG_WIDTH - 1 downto TAG_BITS => '0') & r1.reload_tag;
1305                     """
1306                     comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1307                     sync += cache_tags[r1.store_index].tag.eq(ct)
1308             sync += r1.store_way.eq(replace_way)
1309             sync += r1.write_tag.eq(0)
1310
1311         # Take request from r1.req if there is one there,
1312         # else from req_op, ra, etc.
1313         with m.If(r1.full):
1314             comb += req.eq(r1.req)
1315         with m.Else():
1316             comb += req.op.eq(req_op)
1317             comb += req.valid.eq(req_go)
1318             comb += req.mmu_req.eq(r0.mmu_req)
1319             comb += req.dcbz.eq(r0.req.dcbz)
1320             comb += req.real_addr.eq(ra)
1321
1322             with m.If(r0.req.dcbz):
1323                 # force data to 0 for dcbz
1324                 comb += req.data.eq(0)
1325             with m.Elif(r0.d_valid):
1326                 comb += req.data.eq(r0.req.data)
1327             with m.Else():
1328                 comb += req.data.eq(d_in.data)
1329
1330             # Select all bytes for dcbz
1331             # and for cacheable loads
1332             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1333                 comb += req.byte_sel.eq(~0) # all 1s
1334             with m.Else():
1335                 comb += req.byte_sel.eq(r0.req.byte_sel)
1336             comb += req.hit_way.eq(req_hit_way)
1337             comb += req.same_tag.eq(req_same_tag)
1338
1339             # Store the incoming request from r0,
1340             # if it is a slow request
1341             # Note that r1.full = 1 implies req_op = OP_NONE
1342             with m.If((req_op == Op.OP_LOAD_MISS)
1343                       | (req_op == Op.OP_LOAD_NC)
1344                       | (req_op == Op.OP_STORE_MISS)
1345                       | (req_op == Op.OP_STORE_HIT)):
1346                 sync += r1.req.eq(req)
1347                 sync += r1.full.eq(1)
1348
1349         # Main state machine
1350         with m.Switch(r1.state):
1351
1352             with m.Case(State.IDLE):
1353                 sync += r1.wb.adr.eq(req.real_addr[ROW_LINE_BITS:])
1354                 sync += r1.wb.sel.eq(req.byte_sel)
1355                 sync += r1.wb.dat.eq(req.data)
1356                 sync += r1.dcbz.eq(req.dcbz)
1357
1358                 # Keep track of our index and way
1359                 # for subsequent stores.
1360                 sync += r1.store_index.eq(req_idx)
1361                 sync += r1.store_row.eq(req_row)
1362                 sync += r1.end_row_ix.eq(get_row_of_line(req_row)-1)
1363                 sync += r1.reload_tag.eq(req_tag)
1364                 sync += r1.req.same_tag.eq(1)
1365
1366                 with m.If(req.op == Op.OP_STORE_HIT):
1367                     sync += r1.store_way.eq(req.hit_way)
1368
1369                 # Reset per-row valid bits,
1370                 # ready for handling OP_LOAD_MISS
1371                 for i in range(ROW_PER_LINE):
1372                     sync += r1.rows_valid[i].eq(0)
1373
1374                 with m.If(req_op != Op.OP_NONE):
1375                     sync += Display("cache op %d", req.op)
1376
1377                 with m.Switch(req.op):
1378                     with m.Case(Op.OP_LOAD_HIT):
1379                         # stay in IDLE state
1380                         pass
1381
1382                     with m.Case(Op.OP_LOAD_MISS):
1383                         sync += Display("cache miss real addr: %x " \
1384                                 "idx: %x tag: %x",
1385                                 req.real_addr, req_row, req_tag)
1386
1387                         # Start the wishbone cycle
1388                         sync += r1.wb.we.eq(0)
1389                         sync += r1.wb.cyc.eq(1)
1390                         sync += r1.wb.stb.eq(1)
1391
1392                         # Track that we had one request sent
1393                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1394                         sync += r1.write_tag.eq(1)
1395
1396                     with m.Case(Op.OP_LOAD_NC):
1397                         sync += r1.wb.cyc.eq(1)
1398                         sync += r1.wb.stb.eq(1)
1399                         sync += r1.wb.we.eq(0)
1400                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1401
1402                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1403                         with m.If(~req.dcbz):
1404                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1405                             sync += r1.acks_pending.eq(1)
1406                             sync += r1.full.eq(0)
1407                             sync += r1.slow_valid.eq(1)
1408
1409                             with m.If(~req.mmu_req):
1410                                 sync += r1.ls_valid.eq(1)
1411                             with m.Else():
1412                                 sync += r1.mmu_done.eq(1)
1413
1414                             with m.If(req.op == Op.OP_STORE_HIT):
1415                                 sync += r1.write_bram.eq(1)
1416                         with m.Else():
1417                             # dcbz is handled much like a load miss except
1418                             # that we are writing to memory instead of reading
1419                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1420
1421                             with m.If(req.op == Op.OP_STORE_MISS):
1422                                 sync += r1.write_tag.eq(1)
1423
1424                         sync += r1.wb.we.eq(1)
1425                         sync += r1.wb.cyc.eq(1)
1426                         sync += r1.wb.stb.eq(1)
1427
1428                     # OP_NONE and OP_BAD do nothing
1429                     # OP_BAD & OP_STCX_FAIL were
1430                     # handled above already
1431                     with m.Case(Op.OP_NONE):
1432                         pass
1433                     with m.Case(Op.OP_BAD):
1434                         pass
1435                     with m.Case(Op.OP_STCX_FAIL):
1436                         pass
1437
1438             with m.Case(State.RELOAD_WAIT_ACK):
1439                 ld_stbs_done = Signal()
1440                 # Requests are all sent if stb is 0
1441                 comb += ld_stbs_done.eq(~r1.wb.stb)
1442
1443                 # If we are still sending requests, was one accepted?
1444                 with m.If((~bus.stall) & r1.wb.stb):
1445                     # That was the last word?  We are done sending.
1446                     # Clear stb and set ld_stbs_done so we can handle an
1447                     # eventual last ack on the same cycle.
1448                     # sigh - reconstruct wb adr with 3 extra 0s at front
1449                     wb_adr = Cat(Const(0, ROW_OFF_BITS), r1.wb.adr)
1450                     with m.If(is_last_row_addr(wb_adr, r1.end_row_ix)):
1451                         sync += r1.wb.stb.eq(0)
1452                         comb += ld_stbs_done.eq(1)
1453
1454                     # Calculate the next row address in the current cache line
1455                     row = Signal(LINE_OFF_BITS-ROW_OFF_BITS)
1456                     comb += row.eq(r1.wb.adr)
1457                     sync += r1.wb.adr[:LINE_OFF_BITS-ROW_OFF_BITS].eq(row+1)
1458
1459                 # Incoming acks processing
1460                 sync += r1.forward_valid1.eq(bus.ack)
1461                 with m.If(bus.ack):
1462                     srow = Signal(ROW_LINE_BITS)
1463                     comb += srow.eq(r1.store_row)
1464                     sync += r1.rows_valid[srow].eq(1)
1465
1466                     # If this is the data we were looking for,
1467                     # we can complete the request next cycle.
1468                     # Compare the whole address in case the
1469                     # request in r1.req is not the one that
1470                     # started this refill.
1471                     with m.If(req.valid & r1.req.same_tag &
1472                               ((r1.dcbz & r1.req.dcbz) |
1473                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1474                                 (r1.store_row == get_row(req.real_addr))):
1475                         sync += r1.full.eq(0)
1476                         sync += r1.slow_valid.eq(1)
1477                         with m.If(~r1.mmu_req):
1478                             sync += r1.ls_valid.eq(1)
1479                         with m.Else():
1480                             sync += r1.mmu_done.eq(1)
1481                         sync += r1.forward_sel.eq(~0) # all 1s
1482                         sync += r1.use_forward1.eq(1)
1483
1484                     # Check for completion
1485                     with m.If(ld_stbs_done & is_last_row(r1.store_row,
1486                                                       r1.end_row_ix)):
1487                         # Complete wishbone cycle
1488                         sync += r1.wb.cyc.eq(0)
1489
1490                         # Cache line is now valid
1491                         cv = Signal(INDEX_BITS)
1492                         comb += cv.eq(cache_tags[r1.store_index].valid)
1493                         comb += cv.bit_select(r1.store_way, 1).eq(1)
1494                         sync += cache_tags[r1.store_index].valid.eq(cv)
1495
1496                         sync += r1.state.eq(State.IDLE)
1497                         sync += Display("cache valid set %x "
1498                                         "idx %d way %d",
1499                                          cv, r1.store_index, r1.store_way)
1500
1501                     # Increment store row counter
1502                     sync += r1.store_row.eq(next_row(r1.store_row))
1503
1504             with m.Case(State.STORE_WAIT_ACK):
1505                 st_stbs_done = Signal()
1506                 acks        = Signal(3)
1507                 adjust_acks = Signal(3)
1508
1509                 comb += st_stbs_done.eq(~r1.wb.stb)
1510                 comb += acks.eq(r1.acks_pending)
1511
1512                 with m.If(r1.inc_acks != r1.dec_acks):
1513                     with m.If(r1.inc_acks):
1514                         comb += adjust_acks.eq(acks + 1)
1515                     with m.Else():
1516                         comb += adjust_acks.eq(acks - 1)
1517                 with m.Else():
1518                     comb += adjust_acks.eq(acks)
1519
1520                 sync += r1.acks_pending.eq(adjust_acks)
1521
1522                 # Clear stb when slave accepted request
1523                 with m.If(~bus.stall):
1524                     # See if there is another store waiting
1525                     # to be done which is in the same real page.
1526                     with m.If(req.valid):
1527                         _ra = req.real_addr[ROW_LINE_BITS:SET_SIZE_BITS]
1528                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(_ra)
1529                         sync += r1.wb.dat.eq(req.data)
1530                         sync += r1.wb.sel.eq(req.byte_sel)
1531
1532                     with m.If((adjust_acks < 7) & req.same_tag &
1533                                 ((req.op == Op.OP_STORE_MISS)
1534                                  | (req.op == Op.OP_STORE_HIT))):
1535                         sync += r1.wb.stb.eq(1)
1536                         comb += st_stbs_done.eq(0)
1537
1538                         with m.If(req.op == Op.OP_STORE_HIT):
1539                             sync += r1.write_bram.eq(1)
1540                         sync += r1.full.eq(0)
1541                         sync += r1.slow_valid.eq(1)
1542
1543                         # Store requests never come from the MMU
1544                         sync += r1.ls_valid.eq(1)
1545                         comb += st_stbs_done.eq(0)
1546                         sync += r1.inc_acks.eq(1)
1547                     with m.Else():
1548                         sync += r1.wb.stb.eq(0)
1549                         comb += st_stbs_done.eq(1)
1550
1551                 # Got ack ? See if complete.
1552                 with m.If(bus.ack):
1553                     with m.If(st_stbs_done & (adjust_acks == 1)):
1554                         sync += r1.state.eq(State.IDLE)
1555                         sync += r1.wb.cyc.eq(0)
1556                         sync += r1.wb.stb.eq(0)
1557                     sync += r1.dec_acks.eq(1)
1558
1559             with m.Case(State.NC_LOAD_WAIT_ACK):
1560                 # Clear stb when slave accepted request
1561                 with m.If(~bus.stall):
1562                     sync += r1.wb.stb.eq(0)
1563
1564                 # Got ack ? complete.
1565                 with m.If(bus.ack):
1566                     sync += r1.state.eq(State.IDLE)
1567                     sync += r1.full.eq(0)
1568                     sync += r1.slow_valid.eq(1)
1569
1570                     with m.If(~r1.mmu_req):
1571                         sync += r1.ls_valid.eq(1)
1572                     with m.Else():
1573                         sync += r1.mmu_done.eq(1)
1574
1575                     sync += r1.forward_sel.eq(~0) # all 1s
1576                     sync += r1.use_forward1.eq(1)
1577                     sync += r1.wb.cyc.eq(0)
1578                     sync += r1.wb.stb.eq(0)
1579
1580     def dcache_log(self, m, r1, valid_ra, tlb_hit, stall_out):
1581
1582         sync = m.d.sync
1583         d_out, bus, log_out = self.d_out, self.bus, self.log_out
1584
1585         sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit.way[:3],
1586                                stall_out, req_op[:3], d_out.valid, d_out.error,
1587                                r1.wb.cyc, r1.wb.stb, bus.ack, bus.stall,
1588                                r1.real_adr[3:6]))
1589
1590     def elaborate(self, platform):
1591
1592         m = Module()
1593         comb = m.d.comb
1594         d_in = self.d_in
1595
1596         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1597         cache_tags       = CacheTagArray()
1598         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1599
1600         # TODO attribute ram_style : string;
1601         # TODO attribute ram_style of cache_tags : signal is "distributed";
1602
1603         """note: these are passed to nmigen.hdl.Memory as "attributes".
1604            don't know how, just that they are.
1605         """
1606         dtlb            = TLBArray()
1607         # TODO attribute ram_style of
1608         #  dtlb_tags : signal is "distributed";
1609         # TODO attribute ram_style of
1610         #  dtlb_ptes : signal is "distributed";
1611
1612         r0      = RegStage0("r0")
1613         r0_full = Signal()
1614
1615         r1 = RegStage1("r1")
1616
1617         reservation = Reservation()
1618
1619         # Async signals on incoming request
1620         req_index    = Signal(INDEX_BITS)
1621         req_row      = Signal(ROW_BITS)
1622         req_hit_way  = Signal(WAY_BITS)
1623         req_tag      = Signal(TAG_BITS)
1624         req_op       = Signal(Op)
1625         req_data     = Signal(64)
1626         req_same_tag = Signal()
1627         req_go       = Signal()
1628
1629         early_req_row     = Signal(ROW_BITS)
1630
1631         cancel_store      = Signal()
1632         set_rsrv          = Signal()
1633         clear_rsrv        = Signal()
1634
1635         r0_valid          = Signal()
1636         r0_stall          = Signal()
1637
1638         use_forward1_next = Signal()
1639         use_forward2_next = Signal()
1640
1641         cache_out_row     = Signal(WB_DATA_BITS)
1642
1643         plru_victim       = PLRUOut()
1644         replace_way       = Signal(WAY_BITS)
1645
1646         # Wishbone read/write/cache write formatting signals
1647         bus_sel           = Signal(8)
1648
1649         # TLB signals
1650         tlb_way       = TLBRecord("tlb_way")
1651         tlb_req_index = Signal(TLB_SET_BITS)
1652         tlb_hit       = TLBHit("tlb_hit")
1653         pte           = Signal(TLB_PTE_BITS)
1654         ra            = Signal(REAL_ADDR_BITS)
1655         valid_ra      = Signal()
1656         perm_attr     = PermAttr("dc_perms")
1657         rc_ok         = Signal()
1658         perm_ok       = Signal()
1659         access_ok     = Signal()
1660
1661         tlb_plru_victim = TLBPLRUOut()
1662
1663         # we don't yet handle collisions between loadstore1 requests
1664         # and MMU requests
1665         comb += self.m_out.stall.eq(0)
1666
1667         # Hold off the request in r0 when r1 has an uncompleted request
1668         comb += r0_stall.eq(r0_full & (r1.full | d_in.hold))
1669         comb += r0_valid.eq(r0_full & ~r1.full & ~d_in.hold)
1670         comb += self.stall_out.eq(r0_stall)
1671
1672         # deal with litex not doing wishbone pipeline mode
1673         # XXX in wrong way.  FIFOs are needed in the SRAM test
1674         # so that stb/ack match up. same thing done in icache.py
1675         comb += self.bus.stall.eq(self.bus.cyc & ~self.bus.ack)
1676
1677         # Wire up wishbone request latch out of stage 1
1678         comb += self.bus.we.eq(r1.wb.we)
1679         comb += self.bus.adr.eq(r1.wb.adr)
1680         comb += self.bus.sel.eq(r1.wb.sel)
1681         comb += self.bus.stb.eq(r1.wb.stb)
1682         comb += self.bus.dat_w.eq(r1.wb.dat)
1683         comb += self.bus.cyc.eq(r1.wb.cyc)
1684
1685         # call sub-functions putting everything together, using shared
1686         # signals established above
1687         self.stage_0(m, r0, r1, r0_full)
1688         self.tlb_read(m, r0_stall, tlb_way, dtlb)
1689         self.tlb_search(m, tlb_req_index, r0, r0_valid,
1690                         tlb_way,
1691                         pte, tlb_hit, valid_ra, perm_attr, ra)
1692         self.tlb_update(m, r0_valid, r0, dtlb, tlb_req_index,
1693                         tlb_hit, tlb_plru_victim,
1694                         tlb_way)
1695         self.maybe_plrus(m, r1, plru_victim)
1696         self.maybe_tlb_plrus(m, r1, tlb_plru_victim)
1697         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1698         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1699                            r0_valid, r1, cache_tags, replace_way,
1700                            use_forward1_next, use_forward2_next,
1701                            req_hit_way, plru_victim, rc_ok, perm_attr,
1702                            valid_ra, perm_ok, access_ok, req_op, req_go,
1703                            tlb_hit, tlb_way, cache_tag_set,
1704                            cancel_store, req_same_tag, r0_stall, early_req_row)
1705         self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1706                            r0_valid, r0, reservation)
1707         self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1708                            reservation, r0)
1709         self.writeback_control(m, r1, cache_out_row)
1710         self.rams(m, r1, early_req_row, cache_out_row, replace_way)
1711         self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1712                         req_hit_way, req_index, req_tag, access_ok,
1713                         tlb_hit, tlb_req_index)
1714         self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1715                     r0, replace_way,
1716                     req_hit_way, req_same_tag,
1717                          r0_valid, req_op, cache_tags, req_go, ra)
1718         #self.dcache_log(m, r1, valid_ra, tlb_hit, stall_out)
1719
1720         return m
1721
1722
1723 if __name__ == '__main__':
1724     dut = DCache()
1725     vl = rtlil.convert(dut, ports=[])
1726     with open("test_dcache.il", "w") as f:
1727         f.write(vl)