src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 note that the microwatt dcache wishbone interface expects "stall".
   6 for simplicity at the moment this is hard-coded to cyc & ~ack.
   7 see WB4 spec, p84, section 5.2.1
   8
   9 IMPORTANT: for store, the data is sampled the cycle AFTER the "valid"
  10 is raised.  sigh
  11
  12 Links:
  13
  14 * https://libre-soc.org/3d_gpu/architecture/set_associative_cache.jpg
  15 * https://bugs.libre-soc.org/show_bug.cgi?id=469
  16
  17 """
  18
  19 import sys
  20
  21 from nmutil.gtkw import write_gtkw
  22
  23 sys.setrecursionlimit(1000000)
  24
  25 from enum import Enum, unique
  26
  27 from nmigen import (Module, Signal, Elaboratable, Cat, Repl, Array, Const,
  28                     Record)
  29 from nmutil.util import Display
  30
  31 from copy import deepcopy
  32 from random import randint, seed
  33
  34 from nmigen_soc.wishbone.bus import Interface
  35
  36 from nmigen.cli import main
  37 from nmutil.iocontrol import RecordObject
  38 from nmigen.utils import log2_int
  39 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
  40                                      DCacheToLoadStore1Type,
  41                                      MMUToDCacheType,
  42                                      DCacheToMMUType)
  43
  44 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  45                                 WBAddrType, WBDataType, WBSelType,
  46                                 WBMasterOut, WBSlaveOut,
  47                                 WBMasterOutVector, WBSlaveOutVector,
  48                                 WBIOMasterOut, WBIOSlaveOut)
  49
  50 from soc.experiment.cache_ram import CacheRam
  51 #from soc.experiment.plru import PLRU
  52 from nmutil.plru import PLRU
  53
  54 # for test
  55 from soc.bus.sram import SRAM
  56 from nmigen import Memory
  57 from nmigen.cli import rtlil
  58
  59 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
  60 # Also, check out the cxxsim nmigen branch, and latest yosys from git
  61 from nmutil.sim_tmp_alternative import Simulator
  62
  63 from nmutil.util import wrap
  64
  65
  66 # TODO: make these parameters of DCache at some point
  67 LINE_SIZE = 64    # Line size in bytes
  68 NUM_LINES = 16    # Number of lines in a set
  69 NUM_WAYS = 4      # Number of ways
  70 TLB_SET_SIZE = 64 # L1 DTLB entries per set
  71 TLB_NUM_WAYS = 2  # L1 DTLB number of sets
  72 TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
  73 LOG_LENGTH = 0    # Non-zero to enable log data collection
  74
  75 # BRAM organisation: We never access more than
  76 #     -- WB_DATA_BITS at a time so to save
  77 #     -- resources we make the array only that wide, and
  78 #     -- use consecutive indices to make a cache "line"
  79 #     --
  80 #     -- ROW_SIZE is the width in bytes of the BRAM
  81 #     -- (based on WB, so 64-bits)
  82 ROW_SIZE = WB_DATA_BITS // 8;
  83
  84 # ROW_PER_LINE is the number of row (wishbone
  85 # transactions) in a line
  86 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
  87
  88 # BRAM_ROWS is the number of rows in BRAM needed
  89 # to represent the full dcache
  90 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
  91
  92 print ("ROW_SIZE", ROW_SIZE)
  93 print ("ROW_PER_LINE", ROW_PER_LINE)
  94 print ("BRAM_ROWS", BRAM_ROWS)
  95 print ("NUM_WAYS", NUM_WAYS)
  96
  97 # Bit fields counts in the address
  98
  99 # REAL_ADDR_BITS is the number of real address
 100 # bits that we store
 101 REAL_ADDR_BITS = 56
 102
 103 # ROW_BITS is the number of bits to select a row
 104 ROW_BITS = log2_int(BRAM_ROWS)
 105
 106 # ROW_LINE_BITS is the number of bits to select
 107 # a row within a line
 108 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
 109
 110 # LINE_OFF_BITS is the number of bits for
 111 # the offset in a cache line
 112 LINE_OFF_BITS = log2_int(LINE_SIZE)
 113
 114 # ROW_OFF_BITS is the number of bits for
 115 # the offset in a row
 116 ROW_OFF_BITS = log2_int(ROW_SIZE)
 117
 118 # INDEX_BITS is the number if bits to
 119 # select a cache line
 120 INDEX_BITS = log2_int(NUM_LINES)
 121
 122 # SET_SIZE_BITS is the log base 2 of the set size
 123 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
 124
 125 # TAG_BITS is the number of bits of
 126 # the tag part of the address
 127 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
 128
 129 # TAG_WIDTH is the width in bits of each way of the tag RAM
 130 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
 131
 132 # WAY_BITS is the number of bits to select a way
 133 WAY_BITS = log2_int(NUM_WAYS)
 134
 135 # Example of layout for 32 lines of 64 bytes:
 136 layout = """\
 137   ..  tag    |index|  line  |
 138   ..         |   row   |    |
 139   ..         |     |---|    | ROW_LINE_BITS  (3)
 140   ..         |     |--- - --| LINE_OFF_BITS (6)
 141   ..         |         |- --| ROW_OFF_BITS  (3)
 142   ..         |----- ---|    | ROW_BITS      (8)
 143   ..         |-----|        | INDEX_BITS    (5)
 144   .. --------|              | TAG_BITS      (45)
 145 """
 146 print (layout)
 147 print ("Dcache TAG %d IDX %d ROW_BITS %d ROFF %d LOFF %d RLB %d" % \
 148             (TAG_BITS, INDEX_BITS, ROW_BITS,
 149              ROW_OFF_BITS, LINE_OFF_BITS, ROW_LINE_BITS))
 150 print ("index @: %d-%d" % (LINE_OFF_BITS, SET_SIZE_BITS))
 151 print ("row @: %d-%d" % (LINE_OFF_BITS, ROW_OFF_BITS))
 152 print ("tag @: %d-%d width %d" % (SET_SIZE_BITS, REAL_ADDR_BITS, TAG_WIDTH))
 153
 154 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 155
 156 print ("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
 157
 158 def CacheTagArray():
 159     tag_layout = [('valid', 1),
 160                   ('tag', TAG_RAM_WIDTH),
 161                  ]
 162     return Array(Record(tag_layout, name="tag%d" % x) for x in range(NUM_LINES))
 163
 164 def RowPerLineValidArray():
 165     return Array(Signal(name="rows_valid%d" % x) \
 166                         for x in range(ROW_PER_LINE))
 167
 168 # L1 TLB
 169 TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 170 TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 171 TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 172 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 173 TLB_PTE_BITS     = 64
 174 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 175
 176 def ispow2(x):
 177     return (1<<log2_int(x, False)) == x
 178
 179 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
 180 assert ispow2(LINE_SIZE), "LINE_SIZE not power of 2"
 181 assert ispow2(NUM_LINES), "NUM_LINES not power of 2"
 182 assert ispow2(ROW_PER_LINE), "ROW_PER_LINE not power of 2"
 183 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
 184 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
 185         "geometry bits don't add up"
 186 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
 187         "geometry bits don't add up"
 188 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
 189          "geometry bits don't add up"
 190 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
 191 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 192
 193 def TLBHit(name):
 194     return Record([('valid', 1),
 195                    ('way', TLB_WAY_BITS)], name=name)
 196
 197 def TLBTagEAArray():
 198     return Array(Signal(TLB_EA_TAG_BITS, name="tlbtagea%d" % x) \
 199                 for x in range (TLB_NUM_WAYS))
 200
 201 def TLBRecord(name):
 202     tlb_layout = [('valid', TLB_NUM_WAYS),
 203                   ('tag', TLB_TAG_WAY_BITS),
 204                   ('pte', TLB_PTE_WAY_BITS)
 205                  ]
 206     return Record(tlb_layout, name=name)
 207
 208 def TLBArray():
 209     return Array(TLBRecord(name="tlb%d" % x) for x in range(TLB_SET_SIZE))
 210
 211 def HitWaySet():
 212     return Array(Signal(WAY_BITS, name="hitway_%d" % x) \
 213                         for x in range(TLB_NUM_WAYS))
 214
 215 # Cache RAM interface
 216 def CacheRamOut():
 217     return Array(Signal(WB_DATA_BITS, name="cache_out%d" % x) \
 218                  for x in range(NUM_WAYS))
 219
 220 # PLRU output interface
 221 def PLRUOut():
 222     return Array(Signal(WAY_BITS, name="plru_out%d" % x) \
 223                 for x in range(NUM_LINES))
 224
 225 # TLB PLRU output interface
 226 def TLBPLRUOut():
 227     return Array(Signal(TLB_WAY_BITS, name="tlbplru_out%d" % x) \
 228                 for x in range(TLB_SET_SIZE))
 229
 230 # Helper functions to decode incoming requests
 231 #
 232 # Return the cache line index (tag index) for an address
 233 def get_index(addr):
 234     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 235
 236 # Return the cache row index (data memory) for an address
 237 def get_row(addr):
 238     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 239
 240 # Return the index of a row within a line
 241 def get_row_of_line(row):
 242     return row[:ROW_BITS][:ROW_LINE_BITS]
 243
 244 # Returns whether this is the last row of a line
 245 def is_last_row_addr(addr, last):
 246     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 247
 248 # Returns whether this is the last row of a line
 249 def is_last_row(row, last):
 250     return get_row_of_line(row) == last
 251
 252 # Return the next row in the current cache line. We use a
 253 # dedicated function in order to limit the size of the
 254 # generated adder to be only the bits within a cache line
 255 # (3 bits with default settings)
 256 def next_row(row):
 257     row_v = row[0:ROW_LINE_BITS] + 1
 258     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 259
 260 # Get the tag value from the address
 261 def get_tag(addr):
 262     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 263
 264 # Read a tag from a tag memory row
 265 def read_tag(way, tagset):
 266     return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
 267
 268 # Read a TLB tag from a TLB tag memory row
 269 def read_tlb_tag(way, tags):
 270     return tags.word_select(way, TLB_EA_TAG_BITS)
 271
 272 # Write a TLB tag to a TLB tag memory row
 273 def write_tlb_tag(way, tags, tag):
 274     return read_tlb_tag(way, tags).eq(tag)
 275
 276 # Read a PTE from a TLB PTE memory row
 277 def read_tlb_pte(way, ptes):
 278     return ptes.word_select(way, TLB_PTE_BITS)
 279
 280 def write_tlb_pte(way, ptes, newpte):
 281     return read_tlb_pte(way, ptes).eq(newpte)
 282
 283
 284 # Record for storing permission, attribute, etc. bits from a PTE
 285 class PermAttr(RecordObject):
 286     def __init__(self, name=None):
 287         super().__init__(name=name)
 288         self.reference = Signal()
 289         self.changed   = Signal()
 290         self.nocache   = Signal()
 291         self.priv      = Signal()
 292         self.rd_perm   = Signal()
 293         self.wr_perm   = Signal()
 294
 295
 296 def extract_perm_attr(pte):
 297     pa = PermAttr()
 298     return pa;
 299
 300
 301 # Type of operation on a "valid" input
 302 @unique
 303 class Op(Enum):
 304     OP_NONE       = 0
 305     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 306     OP_STCX_FAIL  = 2 # conditional store w/o reservation
 307     OP_LOAD_HIT   = 3 # Cache hit on load
 308     OP_LOAD_MISS  = 4 # Load missing cache
 309     OP_LOAD_NC    = 5 # Non-cachable load
 310     OP_STORE_HIT  = 6 # Store hitting cache
 311     OP_STORE_MISS = 7 # Store missing cache
 312
 313
 314 # Cache state machine
 315 @unique
 316 class State(Enum):
 317     IDLE             = 0 # Normal load hit processing
 318     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 319     STORE_WAIT_ACK   = 2 # Store wait ack
 320     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 321
 322
 323 # Dcache operations:
 324 #
 325 # In order to make timing, we use the BRAMs with
 326 # an output buffer, which means that the BRAM
 327 # output is delayed by an extra cycle.
 328 #
 329 # Thus, the dcache has a 2-stage internal pipeline
 330 # for cache hits with no stalls.
 331 #
 332 # All other operations are handled via stalling
 333 # in the first stage.
 334 #
 335 # The second stage can thus complete a hit at the same
 336 # time as the first stage emits a stall for a complex op.
 337 #
 338 # Stage 0 register, basically contains just the latched request
 339
 340 class RegStage0(RecordObject):
 341     def __init__(self, name=None):
 342         super().__init__(name=name)
 343         self.req     = LoadStore1ToDCacheType(name="lsmem")
 344         self.tlbie   = Signal() # indicates a tlbie request (from MMU)
 345         self.doall   = Signal() # with tlbie, indicates flush whole TLB
 346         self.tlbld   = Signal() # indicates a TLB load request (from MMU)
 347         self.mmu_req = Signal() # indicates source of request
 348         self.d_valid = Signal() # indicates req.data is valid now
 349
 350
 351 class MemAccessRequest(RecordObject):
 352     def __init__(self, name=None):
 353         super().__init__(name=name)
 354         self.op        = Signal(Op)
 355         self.valid     = Signal()
 356         self.dcbz      = Signal()
 357         self.real_addr = Signal(REAL_ADDR_BITS)
 358         self.data      = Signal(64)
 359         self.byte_sel  = Signal(8)
 360         self.hit_way   = Signal(WAY_BITS)
 361         self.same_tag  = Signal()
 362         self.mmu_req   = Signal()
 363
 364
 365 # First stage register, contains state for stage 1 of load hits
 366 # and for the state machine used by all other operations
 367 class RegStage1(RecordObject):
 368     def __init__(self, name=None):
 369         super().__init__(name=name)
 370         # Info about the request
 371         self.full             = Signal() # have uncompleted request
 372         self.mmu_req          = Signal() # request is from MMU
 373         self.req              = MemAccessRequest(name="reqmem")
 374
 375         # Cache hit state
 376         self.hit_way          = Signal(WAY_BITS)
 377         self.hit_load_valid   = Signal()
 378         self.hit_index        = Signal(INDEX_BITS)
 379         self.cache_hit        = Signal()
 380
 381         # TLB hit state
 382         self.tlb_hit          = TLBHit("tlb_hit")
 383         self.tlb_hit_index    = Signal(TLB_SET_BITS)
 384
 385         # 2-stage data buffer for data forwarded from writes to reads
 386         self.forward_data1    = Signal(64)
 387         self.forward_data2    = Signal(64)
 388         self.forward_sel1     = Signal(8)
 389         self.forward_valid1   = Signal()
 390         self.forward_way1     = Signal(WAY_BITS)
 391         self.forward_row1     = Signal(ROW_BITS)
 392         self.use_forward1     = Signal()
 393         self.forward_sel      = Signal(8)
 394
 395         # Cache miss state (reload state machine)
 396         self.state            = Signal(State)
 397         self.dcbz             = Signal()
 398         self.write_bram       = Signal()
 399         self.write_tag        = Signal()
 400         self.slow_valid       = Signal()
 401         self.wb               = WBMasterOut("wb")
 402         self.reload_tag       = Signal(TAG_BITS)
 403         self.store_way        = Signal(WAY_BITS)
 404         self.store_row        = Signal(ROW_BITS)
 405         self.store_index      = Signal(INDEX_BITS)
 406         self.end_row_ix       = Signal(ROW_LINE_BITS)
 407         self.rows_valid       = RowPerLineValidArray()
 408         self.acks_pending     = Signal(3)
 409         self.inc_acks         = Signal()
 410         self.dec_acks         = Signal()
 411
 412         # Signals to complete (possibly with error)
 413         self.ls_valid         = Signal()
 414         self.ls_error         = Signal()
 415         self.mmu_done         = Signal()
 416         self.mmu_error        = Signal()
 417         self.cache_paradox    = Signal()
 418
 419         # Signal to complete a failed stcx.
 420         self.stcx_fail        = Signal()
 421
 422
 423 # Reservation information
 424 class Reservation(RecordObject):
 425     def __init__(self):
 426         super().__init__()
 427         self.valid = Signal()
 428         self.addr  = Signal(64-LINE_OFF_BITS)
 429
 430
 431 class DTLBUpdate(Elaboratable):
 432     def __init__(self):
 433         self.tlbie    = Signal()
 434         self.tlbwe    = Signal()
 435         self.doall    = Signal()
 436         self.updated  = Signal()
 437         self.v_updated  = Signal()
 438         self.tlb_hit     = TLBHit("tlb_hit")
 439         self.tlb_req_index = Signal(TLB_SET_BITS)
 440
 441         self.tlb_tag_way     = Signal(TLB_TAG_WAY_BITS)
 442         self.tlb_pte_way     = Signal(TLB_PTE_WAY_BITS)
 443         self.repl_way        = Signal(TLB_WAY_BITS)
 444         self.eatag           = Signal(TLB_EA_TAG_BITS)
 445         self.pte_data        = Signal(TLB_PTE_BITS)
 446
 447         self.dv = Signal(TLB_NUM_WAYS) # tlb_way_valids_t
 448
 449         self.tb_out = Signal(TLB_TAG_WAY_BITS) # tlb_way_tags_t
 450         self.db_out = Signal(TLB_NUM_WAYS)     # tlb_way_valids_t
 451         self.pb_out = Signal(TLB_PTE_WAY_BITS) # tlb_way_ptes_t
 452
 453     def elaborate(self, platform):
 454         m = Module()
 455         comb = m.d.comb
 456         sync = m.d.sync
 457
 458         tagset   = Signal(TLB_TAG_WAY_BITS)
 459         pteset   = Signal(TLB_PTE_WAY_BITS)
 460
 461         tb_out, pb_out, db_out = self.tb_out, self.pb_out, self.db_out
 462         comb += db_out.eq(self.dv)
 463
 464         with m.If(self.tlbie & self.doall):
 465             pass # clear all back in parent
 466         with m.Elif(self.tlbie):
 467             with m.If(self.tlb_hit.valid):
 468                 comb += db_out.bit_select(self.tlb_hit.way, 1).eq(0)
 469                 comb += self.v_updated.eq(1)
 470
 471         with m.Elif(self.tlbwe):
 472
 473             comb += tagset.eq(self.tlb_tag_way)
 474             comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
 475             comb += tb_out.eq(tagset)
 476
 477             comb += pteset.eq(self.tlb_pte_way)
 478             comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
 479             comb += pb_out.eq(pteset)
 480
 481             comb += db_out.bit_select(self.repl_way, 1).eq(1)
 482
 483             comb += self.updated.eq(1)
 484             comb += self.v_updated.eq(1)
 485
 486         return m
 487
 488
 489 class DCachePendingHit(Elaboratable):
 490
 491     def __init__(self, tlb_way,
 492                       cache_i_validdx, cache_tag_set,
 493                     req_addr,
 494                     hit_set):
 495
 496         self.go          = Signal()
 497         self.virt_mode   = Signal()
 498         self.is_hit      = Signal()
 499         self.tlb_hit      = TLBHit("tlb_hit")
 500         self.hit_way     = Signal(WAY_BITS)
 501         self.rel_match   = Signal()
 502         self.req_index   = Signal(INDEX_BITS)
 503         self.reload_tag  = Signal(TAG_BITS)
 504
 505         self.tlb_way = tlb_way
 506         self.cache_i_validdx = cache_i_validdx
 507         self.cache_tag_set = cache_tag_set
 508         self.req_addr = req_addr
 509         self.hit_set = hit_set
 510
 511     def elaborate(self, platform):
 512         m = Module()
 513         comb = m.d.comb
 514         sync = m.d.sync
 515
 516         go = self.go
 517         virt_mode = self.virt_mode
 518         is_hit = self.is_hit
 519         tlb_way = self.tlb_way
 520         cache_i_validdx = self.cache_i_validdx
 521         cache_tag_set = self.cache_tag_set
 522         req_addr = self.req_addr
 523         tlb_hit = self.tlb_hit
 524         hit_set = self.hit_set
 525         hit_way = self.hit_way
 526         rel_match = self.rel_match
 527         req_index = self.req_index
 528         reload_tag = self.reload_tag
 529
 530         rel_matches = Array(Signal(name="rel_matches_%d" % i) \
 531                                     for i in range(TLB_NUM_WAYS))
 532         hit_way_set = HitWaySet()
 533
 534         # Test if pending request is a hit on any way
 535         # In order to make timing in virtual mode,
 536         # when we are using the TLB, we compare each
 537         # way with each of the real addresses from each way of
 538         # the TLB, and then decide later which match to use.
 539
 540         with m.If(virt_mode):
 541             for j in range(TLB_NUM_WAYS): # tlb_num_way_t
 542                 s_tag       = Signal(TAG_BITS, name="s_tag%d" % j)
 543                 s_hit       = Signal()
 544                 s_pte       = Signal(TLB_PTE_BITS)
 545                 s_ra        = Signal(REAL_ADDR_BITS)
 546                 comb += s_pte.eq(read_tlb_pte(j, tlb_way.pte))
 547                 comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
 548                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 549                 comb += s_tag.eq(get_tag(s_ra))
 550
 551                 for i in range(NUM_WAYS): # way_t
 552                     is_tag_hit = Signal(name="is_tag_hit_%d_%d" % (j, i))
 553                     comb += is_tag_hit.eq(go & cache_i_validdx[i] &
 554                                   (read_tag(i, cache_tag_set) == s_tag)
 555                                   & (tlb_way.valid[j]))
 556                     with m.If(is_tag_hit):
 557                         comb += hit_way_set[j].eq(i)
 558                         comb += s_hit.eq(1)
 559                 comb += hit_set[j].eq(s_hit)
 560                 with m.If(s_tag == reload_tag):
 561                     comb += rel_matches[j].eq(1)
 562             with m.If(tlb_hit.way):
 563                 comb += is_hit.eq(hit_set[tlb_hit.way])
 564                 comb += hit_way.eq(hit_way_set[tlb_hit.way])
 565                 comb += rel_match.eq(rel_matches[tlb_hit.way])
 566         with m.Else():
 567             s_tag       = Signal(TAG_BITS)
 568             comb += s_tag.eq(get_tag(req_addr))
 569             for i in range(NUM_WAYS): # way_t
 570                 is_tag_hit = Signal(name="is_tag_hit_%d" % i)
 571                 comb += is_tag_hit.eq(go & cache_i_validdx[i] &
 572                           (read_tag(i, cache_tag_set) == s_tag))
 573                 with m.If(is_tag_hit):
 574                     comb += hit_way.eq(i)
 575                     comb += is_hit.eq(1)
 576             with m.If(s_tag == reload_tag):
 577                 comb += rel_match.eq(1)
 578
 579         return m
 580
 581
 582 class DCache(Elaboratable):
 583     """Set associative dcache write-through
 584
 585     TODO (in no specific order):
 586     * See list in icache.vhdl
 587     * Complete load misses on the cycle when WB data comes instead of
 588       at the end of line (this requires dealing with requests coming in
 589       while not idle...)
 590     """
 591     def __init__(self):
 592         self.d_in      = LoadStore1ToDCacheType("d_in")
 593         self.d_out     = DCacheToLoadStore1Type("d_out")
 594
 595         self.m_in      = MMUToDCacheType("m_in")
 596         self.m_out     = DCacheToMMUType("m_out")
 597
 598         self.stall_out = Signal()
 599
 600         # standard naming (wired to non-standard for compatibility)
 601         self.bus = Interface(addr_width=32,
 602                             data_width=64,
 603                             granularity=8,
 604                             features={'stall'},
 605                             alignment=0,
 606                             name="dcache")
 607
 608         self.log_out   = Signal(20)
 609
 610     def stage_0(self, m, r0, r1, r0_full):
 611         """Latch the request in r0.req as long as we're not stalling
 612         """
 613         comb = m.d.comb
 614         sync = m.d.sync
 615         d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
 616
 617         r = RegStage0("stage0")
 618
 619         # TODO, this goes in unit tests and formal proofs
 620         with m.If(d_in.valid & m_in.valid):
 621             sync += Display("request collision loadstore vs MMU")
 622
 623         with m.If(m_in.valid):
 624             comb += r.req.valid.eq(1)
 625             comb += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))# no invalidate
 626             comb += r.req.dcbz.eq(0)
 627             comb += r.req.nc.eq(0)
 628             comb += r.req.reserve.eq(0)
 629             comb += r.req.virt_mode.eq(0)
 630             comb += r.req.priv_mode.eq(1)
 631             comb += r.req.addr.eq(m_in.addr)
 632             comb += r.req.data.eq(m_in.pte)
 633             comb += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
 634             comb += r.tlbie.eq(m_in.tlbie)
 635             comb += r.doall.eq(m_in.doall)
 636             comb += r.tlbld.eq(m_in.tlbld)
 637             comb += r.mmu_req.eq(1)
 638             m.d.sync += Display("    DCACHE req mmu addr %x pte %x ld %d",
 639                                  m_in.addr, m_in.pte, r.req.load)
 640
 641         with m.Else():
 642             comb += r.req.eq(d_in)
 643             comb += r.req.data.eq(0)
 644             comb += r.tlbie.eq(0)
 645             comb += r.doall.eq(0)
 646             comb += r.tlbld.eq(0)
 647             comb += r.mmu_req.eq(0)
 648         with m.If((~r1.full & ~d_in.hold) | ~r0_full):
 649             sync += r0.eq(r)
 650             sync += r0_full.eq(r.req.valid)
 651             # Sample data the cycle after a request comes in from loadstore1.
 652             # If another request has come in already then the data will get
 653             # put directly into req.data below.
 654             with m.If(r0.req.valid & ~r.req.valid & ~r0.d_valid &
 655                      ~r0.mmu_req):
 656                 sync += r0.req.data.eq(d_in.data)
 657                 sync += r0.d_valid.eq(1)
 658         with m.If(d_in.valid):
 659             m.d.sync += Display("    DCACHE req cache "
 660                                 "virt %d addr %x data %x ld %d",
 661                                  r.req.virt_mode, r.req.addr,
 662                                  r.req.data, r.req.load)
 663
 664     def tlb_read(self, m, r0_stall, tlb_way, dtlb):
 665         """TLB
 666         Operates in the second cycle on the request latched in r0.req.
 667         TLB updates write the entry at the end of the second cycle.
 668         """
 669         comb = m.d.comb
 670         sync = m.d.sync
 671         m_in, d_in = self.m_in, self.d_in
 672
 673         index    = Signal(TLB_SET_BITS)
 674         addrbits = Signal(TLB_SET_BITS)
 675
 676         amin = TLB_LG_PGSZ
 677         amax = TLB_LG_PGSZ + TLB_SET_BITS
 678
 679         with m.If(m_in.valid):
 680             comb += addrbits.eq(m_in.addr[amin : amax])
 681         with m.Else():
 682             comb += addrbits.eq(d_in.addr[amin : amax])
 683         comb += index.eq(addrbits)
 684
 685         # If we have any op and the previous op isn't finished,
 686         # then keep the same output for next cycle.
 687         with m.If(~r0_stall):
 688             sync += tlb_way.eq(dtlb[index])
 689
 690     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim):
 691         """Generate TLB PLRUs
 692         """
 693         comb = m.d.comb
 694         sync = m.d.sync
 695
 696         if TLB_NUM_WAYS == 0:
 697             return
 698         for i in range(TLB_SET_SIZE):
 699             # TLB PLRU interface
 700             tlb_plru        = PLRU(TLB_WAY_BITS)
 701             setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
 702             tlb_plru_acc_en = Signal()
 703
 704             comb += tlb_plru_acc_en.eq(r1.tlb_hit.valid &
 705                                        (r1.tlb_hit_index == i))
 706             comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 707             comb += tlb_plru.acc_i.eq(r1.tlb_hit.way)
 708             comb += tlb_plru_victim[i].eq(tlb_plru.lru_o)
 709
 710     def tlb_search(self, m, tlb_req_index, r0, r0_valid,
 711                    tlb_way,
 712                    pte, tlb_hit, valid_ra, perm_attr, ra):
 713
 714         comb = m.d.comb
 715
 716         hitway = Signal(TLB_WAY_BITS)
 717         hit    = Signal()
 718         eatag  = Signal(TLB_EA_TAG_BITS)
 719
 720         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 721         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 722         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 723
 724         for i in range(TLB_NUM_WAYS):
 725             is_tag_hit = Signal(name="is_tag_hit%d" % i)
 726             tlb_tag = Signal(TLB_EA_TAG_BITS, name="tlb_tag%d" % i)
 727             comb += tlb_tag.eq(read_tlb_tag(i, tlb_way.tag))
 728             comb += is_tag_hit.eq((tlb_way.valid[i]) & (tlb_tag == eatag))
 729             with m.If(is_tag_hit):
 730                 comb += hitway.eq(i)
 731                 comb += hit.eq(1)
 732
 733         comb += tlb_hit.valid.eq(hit & r0_valid)
 734         comb += tlb_hit.way.eq(hitway)
 735
 736         with m.If(tlb_hit.valid):
 737             comb += pte.eq(read_tlb_pte(hitway, tlb_way.pte))
 738         comb += valid_ra.eq(tlb_hit.valid | ~r0.req.virt_mode)
 739
 740         with m.If(r0.req.virt_mode):
 741             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 742                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 743                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 744             comb += perm_attr.reference.eq(pte[8])
 745             comb += perm_attr.changed.eq(pte[7])
 746             comb += perm_attr.nocache.eq(pte[5])
 747             comb += perm_attr.priv.eq(pte[3])
 748             comb += perm_attr.rd_perm.eq(pte[2])
 749             comb += perm_attr.wr_perm.eq(pte[1])
 750         with m.Else():
 751             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 752                               r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 753             comb += perm_attr.reference.eq(1)
 754             comb += perm_attr.changed.eq(1)
 755             comb += perm_attr.nocache.eq(0)
 756             comb += perm_attr.priv.eq(1)
 757             comb += perm_attr.rd_perm.eq(1)
 758             comb += perm_attr.wr_perm.eq(1)
 759
 760         with m.If(valid_ra):
 761             m.d.sync += Display("DCACHE virt mode %d hit %d ra %x pte %x",
 762                                 r0.req.virt_mode, tlb_hit.valid, ra, pte)
 763             m.d.sync += Display("       perm ref=%d", perm_attr.reference)
 764             m.d.sync += Display("       perm chg=%d", perm_attr.changed)
 765             m.d.sync += Display("       perm noc=%d", perm_attr.nocache)
 766             m.d.sync += Display("       perm prv=%d", perm_attr.priv)
 767             m.d.sync += Display("       perm rdp=%d", perm_attr.rd_perm)
 768             m.d.sync += Display("       perm wrp=%d", perm_attr.wr_perm)
 769
 770     def tlb_update(self, m, r0_valid, r0, dtlb, tlb_req_index,
 771                     tlb_hit, tlb_plru_victim, tlb_way):
 772
 773         comb = m.d.comb
 774         sync = m.d.sync
 775
 776         tlbie    = Signal()
 777         tlbwe    = Signal()
 778
 779         comb += tlbie.eq(r0_valid & r0.tlbie)
 780         comb += tlbwe.eq(r0_valid & r0.tlbld)
 781
 782         m.submodules.tlb_update = d = DTLBUpdate()
 783         with m.If(tlbie & r0.doall):
 784             # clear all valid bits at once
 785             for i in range(TLB_SET_SIZE):
 786                 sync += dtlb[i].valid.eq(0)
 787         with m.If(d.updated):
 788             sync += dtlb[tlb_req_index].tag.eq(d.tb_out)
 789             sync += dtlb[tlb_req_index].pte.eq(d.pb_out)
 790         with m.If(d.v_updated):
 791             sync += dtlb[tlb_req_index].valid.eq(d.db_out)
 792
 793         comb += d.dv.eq(dtlb[tlb_req_index].valid)
 794
 795         comb += d.tlbie.eq(tlbie)
 796         comb += d.tlbwe.eq(tlbwe)
 797         comb += d.doall.eq(r0.doall)
 798         comb += d.tlb_hit.eq(tlb_hit)
 799         comb += d.tlb_tag_way.eq(tlb_way.tag)
 800         comb += d.tlb_pte_way.eq(tlb_way.pte)
 801         comb += d.tlb_req_index.eq(tlb_req_index)
 802
 803         with m.If(tlb_hit.valid):
 804             comb += d.repl_way.eq(tlb_hit.way)
 805         with m.Else():
 806             comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
 807         comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 808         comb += d.pte_data.eq(r0.req.data)
 809
 810     def maybe_plrus(self, m, r1, plru_victim):
 811         """Generate PLRUs
 812         """
 813         comb = m.d.comb
 814         sync = m.d.sync
 815
 816         if TLB_NUM_WAYS == 0:
 817             return
 818
 819         for i in range(NUM_LINES):
 820             # PLRU interface
 821             plru        = PLRU(WAY_BITS)
 822             setattr(m.submodules, "plru%d" % i, plru)
 823             plru_acc_en = Signal()
 824
 825             comb += plru_acc_en.eq(r1.cache_hit & (r1.hit_index == i))
 826             comb += plru.acc_en.eq(plru_acc_en)
 827             comb += plru.acc_i.eq(r1.hit_way)
 828             comb += plru_victim[i].eq(plru.lru_o)
 829
 830     def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
 831         """Cache tag RAM read port
 832         """
 833         comb = m.d.comb
 834         sync = m.d.sync
 835         m_in, d_in = self.m_in, self.d_in
 836
 837         index = Signal(INDEX_BITS)
 838
 839         with m.If(r0_stall):
 840             comb += index.eq(req_index)
 841         with m.Elif(m_in.valid):
 842             comb += index.eq(get_index(m_in.addr))
 843         with m.Else():
 844             comb += index.eq(get_index(d_in.addr))
 845         sync += cache_tag_set.eq(cache_tags[index].tag)
 846
 847     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 848                        r0_valid, r1, cache_tags, replace_way,
 849                        use_forward1_next, use_forward2_next,
 850                        req_hit_way, plru_victim, rc_ok, perm_attr,
 851                        valid_ra, perm_ok, access_ok, req_op, req_go,
 852                        tlb_hit, tlb_way, cache_tag_set,
 853                        cancel_store, req_same_tag, r0_stall, early_req_row):
 854         """Cache request parsing and hit detection
 855         """
 856
 857         comb = m.d.comb
 858         m_in, d_in = self.m_in, self.d_in
 859
 860         is_hit      = Signal()
 861         hit_way     = Signal(WAY_BITS)
 862         op          = Signal(Op)
 863         opsel       = Signal(3)
 864         go          = Signal()
 865         nc          = Signal()
 866         hit_set     = Array(Signal(name="hit_set_%d" % i) \
 867                                   for i in range(TLB_NUM_WAYS))
 868         cache_i_validdx = Signal(NUM_WAYS)
 869
 870         # Extract line, row and tag from request
 871         comb += req_index.eq(get_index(r0.req.addr))
 872         comb += req_row.eq(get_row(r0.req.addr))
 873         comb += req_tag.eq(get_tag(ra))
 874
 875         if False: # display on comb is a bit... busy.
 876             comb += Display("dcache_req addr:%x ra: %x idx: %x tag: %x row: %x",
 877                     r0.req.addr, ra, req_index, req_tag, req_row)
 878
 879         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 880         comb += cache_i_validdx.eq(cache_tags[req_index].valid)
 881
 882         m.submodules.dcache_pend = dc = DCachePendingHit(tlb_way,
 883                                             cache_i_validdx, cache_tag_set,
 884                                             r0.req.addr,
 885                                             hit_set)
 886         comb += dc.tlb_hit.eq(tlb_hit)
 887         comb += dc.reload_tag.eq(r1.reload_tag)
 888         comb += dc.virt_mode.eq(r0.req.virt_mode)
 889         comb += dc.go.eq(go)
 890         comb += dc.req_index.eq(req_index)
 891
 892         comb += is_hit.eq(dc.is_hit)
 893         comb += hit_way.eq(dc.hit_way)
 894         comb += req_same_tag.eq(dc.rel_match)
 895
 896         # See if the request matches the line currently being reloaded
 897         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 898                   (req_index == r1.store_index) & req_same_tag):
 899             # For a store, consider this a hit even if the row isn't
 900             # valid since it will be by the time we perform the store.
 901             # For a load, check the appropriate row valid bit.
 902             rrow = Signal(ROW_LINE_BITS)
 903             comb += rrow.eq(req_row)
 904             valid = r1.rows_valid[rrow]
 905             comb += is_hit.eq((~r0.req.load) | valid)
 906             comb += hit_way.eq(replace_way)
 907
 908         # Whether to use forwarded data for a load or not
 909         with m.If((get_row(r1.req.real_addr) == req_row) &
 910                   (r1.req.hit_way == hit_way)):
 911             # Only need to consider r1.write_bram here, since if we
 912             # are writing refill data here, then we don't have a
 913             # cache hit this cycle on the line being refilled.
 914             # (There is the possibility that the load following the
 915             # load miss that started the refill could be to the old
 916             # contents of the victim line, since it is a couple of
 917             # cycles after the refill starts before we see the updated
 918             # cache tag. In that case we don't use the bypass.)
 919             comb += use_forward1_next.eq(r1.write_bram)
 920         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 921             comb += use_forward2_next.eq(r1.forward_valid1)
 922
 923         # The way that matched on a hit
 924         comb += req_hit_way.eq(hit_way)
 925
 926         # The way to replace on a miss
 927         with m.If(r1.write_tag):
 928             comb += replace_way.eq(plru_victim[r1.store_index])
 929         with m.Else():
 930             comb += replace_way.eq(r1.store_way)
 931
 932         # work out whether we have permission for this access
 933         # NB we don't yet implement AMR, thus no KUAP
 934         comb += rc_ok.eq(perm_attr.reference
 935                          & (r0.req.load | perm_attr.changed))
 936         comb += perm_ok.eq((r0.req.priv_mode | (~perm_attr.priv)) &
 937                            (perm_attr.wr_perm |
 938                               (r0.req.load & perm_attr.rd_perm)))
 939         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 940
 941         # Combine the request and cache hit status to decide what
 942         # operation needs to be done
 943         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 944         comb += op.eq(Op.OP_NONE)
 945         with m.If(go):
 946             with m.If(~access_ok):
 947                 m.d.sync += Display("DCACHE access fail valid_ra=%d p=%d rc=%d",
 948                                  valid_ra, perm_ok, rc_ok)
 949                 comb += op.eq(Op.OP_BAD)
 950             with m.Elif(cancel_store):
 951                 m.d.sync += Display("DCACHE cancel store")
 952                 comb += op.eq(Op.OP_STCX_FAIL)
 953             with m.Else():
 954                 m.d.sync += Display("DCACHE valid_ra=%d nc=%d ld=%d",
 955                                  valid_ra, nc, r0.req.load)
 956                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 957                 with m.Switch(opsel):
 958                     with m.Case(0b101): comb += op.eq(Op.OP_LOAD_HIT)
 959                     with m.Case(0b100): comb += op.eq(Op.OP_LOAD_MISS)
 960                     with m.Case(0b110): comb += op.eq(Op.OP_LOAD_NC)
 961                     with m.Case(0b001): comb += op.eq(Op.OP_STORE_HIT)
 962                     with m.Case(0b000): comb += op.eq(Op.OP_STORE_MISS)
 963                     with m.Case(0b010): comb += op.eq(Op.OP_STORE_MISS)
 964                     with m.Case(0b011): comb += op.eq(Op.OP_BAD)
 965                     with m.Case(0b111): comb += op.eq(Op.OP_BAD)
 966         comb += req_op.eq(op)
 967         comb += req_go.eq(go)
 968
 969         # Version of the row number that is valid one cycle earlier
 970         # in the cases where we need to read the cache data BRAM.
 971         # If we're stalling then we need to keep reading the last
 972         # row requested.
 973         with m.If(~r0_stall):
 974             with m.If(m_in.valid):
 975                 comb += early_req_row.eq(get_row(m_in.addr))
 976             with m.Else():
 977                 comb += early_req_row.eq(get_row(d_in.addr))
 978         with m.Else():
 979             comb += early_req_row.eq(req_row)
 980
 981     def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
 982                          r0_valid, r0, reservation):
 983         """Handle load-with-reservation and store-conditional instructions
 984         """
 985         comb = m.d.comb
 986
 987         with m.If(r0_valid & r0.req.reserve):
 988             # XXX generate alignment interrupt if address
 989             # is not aligned XXX or if r0.req.nc = '1'
 990             with m.If(r0.req.load):
 991                 comb += set_rsrv.eq(r0.req.atomic_last) # load with reservation
 992             with m.Else():
 993                 comb += clear_rsrv.eq(r0.req.atomic_last) # store conditional
 994                 with m.If((~reservation.valid) |
 995                          (r0.req.addr[LINE_OFF_BITS:64] != reservation.addr)):
 996                     comb += cancel_store.eq(1)
 997
 998     def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
 999                         reservation, r0):
1000         comb = m.d.comb
1001         sync = m.d.sync
1002
1003         with m.If(r0_valid & access_ok):
1004             with m.If(clear_rsrv):
1005                 sync += reservation.valid.eq(0)
1006             with m.Elif(set_rsrv):
1007                 sync += reservation.valid.eq(1)
1008                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
1009
1010     def writeback_control(self, m, r1, cache_out_row):
1011         """Return data for loads & completion control logic
1012         """
1013         comb = m.d.comb
1014         sync = m.d.sync
1015         d_out, m_out = self.d_out, self.m_out
1016
1017         data_out = Signal(64)
1018         data_fwd = Signal(64)
1019
1020         # Use the bypass if are reading the row that was
1021         # written 1 or 2 cycles ago, including for the
1022         # slow_valid = 1 case (i.e. completing a load
1023         # miss or a non-cacheable load).
1024         with m.If(r1.use_forward1):
1025             comb += data_fwd.eq(r1.forward_data1)
1026         with m.Else():
1027             comb += data_fwd.eq(r1.forward_data2)
1028
1029         comb += data_out.eq(cache_out_row)
1030
1031         for i in range(8):
1032             with m.If(r1.forward_sel[i]):
1033                 dsel = data_fwd.word_select(i, 8)
1034                 comb += data_out.word_select(i, 8).eq(dsel)
1035
1036         # DCache output to LoadStore
1037         comb += d_out.valid.eq(r1.ls_valid)
1038         comb += d_out.data.eq(data_out)
1039         comb += d_out.store_done.eq(~r1.stcx_fail)
1040         comb += d_out.error.eq(r1.ls_error)
1041         comb += d_out.cache_paradox.eq(r1.cache_paradox)
1042
1043         # Outputs to MMU
1044         comb += m_out.done.eq(r1.mmu_done)
1045         comb += m_out.err.eq(r1.mmu_error)
1046         comb += m_out.data.eq(data_out)
1047
1048         # We have a valid load or store hit or we just completed
1049         # a slow op such as a load miss, a NC load or a store
1050         #
1051         # Note: the load hit is delayed by one cycle. However it
1052         # can still not collide with r.slow_valid (well unless I
1053         # miscalculated) because slow_valid can only be set on a
1054         # subsequent request and not on its first cycle (the state
1055         # machine must have advanced), which makes slow_valid
1056         # at least 2 cycles from the previous hit_load_valid.
1057
1058         # Sanity: Only one of these must be set in any given cycle
1059
1060         if False: # TODO: need Display to get this to work
1061             assert (r1.slow_valid & r1.stcx_fail) != 1, \
1062             "unexpected slow_valid collision with stcx_fail"
1063
1064             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
1065              "unexpected hit_load_delayed collision with slow_valid"
1066
1067         with m.If(~r1.mmu_req):
1068             # Request came from loadstore1...
1069             # Load hit case is the standard path
1070             with m.If(r1.hit_load_valid):
1071                 sync += Display("completing load hit data=%x", data_out)
1072
1073             # error cases complete without stalling
1074             with m.If(r1.ls_error):
1075                 with m.If(r1.dcbz):
1076                     sync += Display("completing dcbz with error")
1077                 with m.Else():
1078                     sync += Display("completing ld/st with error")
1079
1080             # Slow ops (load miss, NC, stores)
1081             with m.If(r1.slow_valid):
1082                 sync += Display("completing store or load miss adr=%x data=%x",
1083                                 r1.req.real_addr, data_out)
1084
1085         with m.Else():
1086             # Request came from MMU
1087             with m.If(r1.hit_load_valid):
1088                 sync += Display("completing load hit to MMU, data=%x",
1089                                 m_out.data)
1090             # error cases complete without stalling
1091             with m.If(r1.mmu_error):
1092                 sync += Display("combpleting MMU ld with error")
1093
1094             # Slow ops (i.e. load miss)
1095             with m.If(r1.slow_valid):
1096                 sync += Display("completing MMU load miss, adr=%x data=%x",
1097                                 r1.req.real_addr, m_out.data)
1098
1099     def rams(self, m, r1, early_req_row, cache_out_row, replace_way):
1100         """rams
1101         Generate a cache RAM for each way. This handles the normal
1102         reads, writes from reloads and the special store-hit update
1103         path as well.
1104
1105         Note: the BRAMs have an extra read buffer, meaning the output
1106         is pipelined an extra cycle. This differs from the
1107         icache. The writeback logic needs to take that into
1108         account by using 1-cycle delayed signals for load hits.
1109         """
1110         comb = m.d.comb
1111         bus = self.bus
1112
1113         for i in range(NUM_WAYS):
1114             do_read  = Signal(name="do_rd%d" % i)
1115             rd_addr  = Signal(ROW_BITS, name="rd_addr_%d" % i)
1116             do_write = Signal(name="do_wr%d" % i)
1117             wr_addr  = Signal(ROW_BITS, name="wr_addr_%d" % i)
1118             wr_data  = Signal(WB_DATA_BITS, name="din_%d" % i)
1119             wr_sel   = Signal(ROW_SIZE)
1120             wr_sel_m = Signal(ROW_SIZE)
1121             _d_out   = Signal(WB_DATA_BITS, name="dout_%d" % i) # cache_row_t
1122
1123             way = CacheRam(ROW_BITS, WB_DATA_BITS, ADD_BUF=True, ram_num=i)
1124             setattr(m.submodules, "cacheram_%d" % i, way)
1125
1126             comb += way.rd_en.eq(do_read)
1127             comb += way.rd_addr.eq(rd_addr)
1128             comb += _d_out.eq(way.rd_data_o)
1129             comb += way.wr_sel.eq(wr_sel_m)
1130             comb += way.wr_addr.eq(wr_addr)
1131             comb += way.wr_data.eq(wr_data)
1132
1133             # Cache hit reads
1134             comb += do_read.eq(1)
1135             comb += rd_addr.eq(early_req_row)
1136             with m.If(r1.hit_way == i):
1137                 comb += cache_out_row.eq(_d_out)
1138
1139             # Write mux:
1140             #
1141             # Defaults to wishbone read responses (cache refill)
1142             #
1143             # For timing, the mux on wr_data/sel/addr is not
1144             # dependent on anything other than the current state.
1145
1146             with m.If(r1.write_bram):
1147                 # Write store data to BRAM.  This happens one
1148                 # cycle after the store is in r0.
1149                 comb += wr_data.eq(r1.req.data)
1150                 comb += wr_sel.eq(r1.req.byte_sel)
1151                 comb += wr_addr.eq(get_row(r1.req.real_addr))
1152
1153                 with m.If(i == r1.req.hit_way):
1154                     comb += do_write.eq(1)
1155             with m.Else():
1156                 # Otherwise, we might be doing a reload or a DCBZ
1157                 with m.If(r1.dcbz):
1158                     comb += wr_data.eq(0)
1159                 with m.Else():
1160                     comb += wr_data.eq(bus.dat_r)
1161                 comb += wr_addr.eq(r1.store_row)
1162                 comb += wr_sel.eq(~0) # all 1s
1163
1164                 with m.If((r1.state == State.RELOAD_WAIT_ACK)
1165                           & bus.ack & (replace_way == i)):
1166                     comb += do_write.eq(1)
1167
1168             # Mask write selects with do_write since BRAM
1169             # doesn't have a global write-enable
1170             with m.If(do_write):
1171                 comb += wr_sel_m.eq(wr_sel)
1172
1173     # Cache hit synchronous machine for the easy case.
1174     # This handles load hits.
1175     # It also handles error cases (TLB miss, cache paradox)
1176     def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1177                         req_hit_way, req_index, req_tag, access_ok,
1178                         tlb_hit, tlb_req_index):
1179         comb = m.d.comb
1180         sync = m.d.sync
1181
1182         with m.If(req_op != Op.OP_NONE):
1183             sync += Display("op:%d addr:%x nc: %d idx: %x tag: %x way: %x",
1184                     req_op, r0.req.addr, r0.req.nc,
1185                     req_index, req_tag, req_hit_way)
1186
1187         with m.If(r0_valid):
1188             sync += r1.mmu_req.eq(r0.mmu_req)
1189
1190         # Fast path for load/store hits.
1191         # Set signals for the writeback controls.
1192         sync += r1.hit_way.eq(req_hit_way)
1193         sync += r1.hit_index.eq(req_index)
1194
1195         sync += r1.hit_load_valid.eq(req_op == Op.OP_LOAD_HIT)
1196         sync += r1.cache_hit.eq((req_op == Op.OP_LOAD_HIT) |
1197                                 (req_op == Op.OP_STORE_HIT))
1198
1199         with m.If(req_op == Op.OP_BAD):
1200             sync += Display("Signalling ld/st error "
1201                             "ls_error=%i mmu_error=%i cache_paradox=%i",
1202                             ~r0.mmu_req,r0.mmu_req,access_ok)
1203             sync += r1.ls_error.eq(~r0.mmu_req)
1204             sync += r1.mmu_error.eq(r0.mmu_req)
1205             sync += r1.cache_paradox.eq(access_ok)
1206         with m.Else():
1207             sync += r1.ls_error.eq(0)
1208             sync += r1.mmu_error.eq(0)
1209             sync += r1.cache_paradox.eq(0)
1210
1211         sync += r1.stcx_fail.eq(req_op == Op.OP_STCX_FAIL)
1212
1213         # Record TLB hit information for updating TLB PLRU
1214         sync += r1.tlb_hit.eq(tlb_hit)
1215         sync += r1.tlb_hit_index.eq(tlb_req_index)
1216
1217     # Memory accesses are handled by this state machine:
1218     #
1219     #   * Cache load miss/reload (in conjunction with "rams")
1220     #   * Load hits for non-cachable forms
1221     #   * Stores (the collision case is handled in "rams")
1222     #
1223     # All wishbone requests generation is done here.
1224     # This machine operates at stage 1.
1225     def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1226                     r0, replace_way,
1227                     req_hit_way, req_same_tag,
1228                     r0_valid, req_op, cache_tags, req_go, ra):
1229
1230         comb = m.d.comb
1231         sync = m.d.sync
1232         bus = self.bus
1233         d_in = self.d_in
1234
1235         req         = MemAccessRequest("mreq_ds")
1236
1237         req_row = Signal(ROW_BITS)
1238         req_idx = Signal(INDEX_BITS)
1239         req_tag = Signal(TAG_BITS)
1240         comb += req_idx.eq(get_index(req.real_addr))
1241         comb += req_row.eq(get_row(req.real_addr))
1242         comb += req_tag.eq(get_tag(req.real_addr))
1243
1244         sync += r1.use_forward1.eq(use_forward1_next)
1245         sync += r1.forward_sel.eq(0)
1246
1247         with m.If(use_forward1_next):
1248             sync += r1.forward_sel.eq(r1.req.byte_sel)
1249         with m.Elif(use_forward2_next):
1250             sync += r1.forward_sel.eq(r1.forward_sel1)
1251
1252         sync += r1.forward_data2.eq(r1.forward_data1)
1253         with m.If(r1.write_bram):
1254             sync += r1.forward_data1.eq(r1.req.data)
1255             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1256             sync += r1.forward_way1.eq(r1.req.hit_way)
1257             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1258             sync += r1.forward_valid1.eq(1)
1259         with m.Else():
1260             with m.If(r1.dcbz):
1261                 sync += r1.forward_data1.eq(0)
1262             with m.Else():
1263                 sync += r1.forward_data1.eq(bus.dat_r)
1264             sync += r1.forward_sel1.eq(~0) # all 1s
1265             sync += r1.forward_way1.eq(replace_way)
1266             sync += r1.forward_row1.eq(r1.store_row)
1267             sync += r1.forward_valid1.eq(0)
1268
1269         # One cycle pulses reset
1270         sync += r1.slow_valid.eq(0)
1271         sync += r1.write_bram.eq(0)
1272         sync += r1.inc_acks.eq(0)
1273         sync += r1.dec_acks.eq(0)
1274
1275         sync += r1.ls_valid.eq(0)
1276         # complete tlbies and TLB loads in the third cycle
1277         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1278
1279         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STCX_FAIL)):
1280             with m.If(~r0.mmu_req):
1281                 sync += r1.ls_valid.eq(1)
1282             with m.Else():
1283                 sync += r1.mmu_done.eq(1)
1284
1285         with m.If(r1.write_tag):
1286             # Store new tag in selected way
1287             for i in range(NUM_WAYS):
1288                 with m.If(i == replace_way):
1289                     ct = Signal(TAG_RAM_WIDTH)
1290                     comb += ct.eq(cache_tags[r1.store_index].tag)
1291                     comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1292                     sync += cache_tags[r1.store_index].tag.eq(ct)
1293             sync += r1.store_way.eq(replace_way)
1294             sync += r1.write_tag.eq(0)
1295
1296         # Take request from r1.req if there is one there,
1297         # else from req_op, ra, etc.
1298         with m.If(r1.full):
1299             comb += req.eq(r1.req)
1300         with m.Else():
1301             comb += req.op.eq(req_op)
1302             comb += req.valid.eq(req_go)
1303             comb += req.mmu_req.eq(r0.mmu_req)
1304             comb += req.dcbz.eq(r0.req.dcbz)
1305             comb += req.real_addr.eq(ra)
1306
1307             with m.If(r0.req.dcbz):
1308                 # force data to 0 for dcbz
1309                 comb += req.data.eq(0)
1310             with m.Elif(r0.d_valid):
1311                 comb += req.data.eq(r0.req.data)
1312             with m.Else():
1313                 comb += req.data.eq(d_in.data)
1314
1315             # Select all bytes for dcbz
1316             # and for cacheable loads
1317             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1318                 comb += req.byte_sel.eq(~0) # all 1s
1319             with m.Else():
1320                 comb += req.byte_sel.eq(r0.req.byte_sel)
1321             comb += req.hit_way.eq(req_hit_way)
1322             comb += req.same_tag.eq(req_same_tag)
1323
1324             # Store the incoming request from r0,
1325             # if it is a slow request
1326             # Note that r1.full = 1 implies req_op = OP_NONE
1327             with m.If((req_op == Op.OP_LOAD_MISS)
1328                       | (req_op == Op.OP_LOAD_NC)
1329                       | (req_op == Op.OP_STORE_MISS)
1330                       | (req_op == Op.OP_STORE_HIT)):
1331                 sync += r1.req.eq(req)
1332                 sync += r1.full.eq(1)
1333
1334         # Main state machine
1335         with m.Switch(r1.state):
1336
1337             with m.Case(State.IDLE):
1338                 sync += r1.wb.adr.eq(req.real_addr[ROW_LINE_BITS:])
1339                 sync += r1.wb.sel.eq(req.byte_sel)
1340                 sync += r1.wb.dat.eq(req.data)
1341                 sync += r1.dcbz.eq(req.dcbz)
1342
1343                 # Keep track of our index and way
1344                 # for subsequent stores.
1345                 sync += r1.store_index.eq(req_idx)
1346                 sync += r1.store_row.eq(req_row)
1347                 sync += r1.end_row_ix.eq(get_row_of_line(req_row)-1)
1348                 sync += r1.reload_tag.eq(req_tag)
1349                 sync += r1.req.same_tag.eq(1)
1350
1351                 with m.If(req.op == Op.OP_STORE_HIT):
1352                     sync += r1.store_way.eq(req.hit_way)
1353
1354                 # Reset per-row valid bits,
1355                 # ready for handling OP_LOAD_MISS
1356                 for i in range(ROW_PER_LINE):
1357                     sync += r1.rows_valid[i].eq(0)
1358
1359                 with m.If(req_op != Op.OP_NONE):
1360                     sync += Display("cache op %d", req.op)
1361
1362                 with m.Switch(req.op):
1363                     with m.Case(Op.OP_LOAD_HIT):
1364                         # stay in IDLE state
1365                         pass
1366
1367                     with m.Case(Op.OP_LOAD_MISS):
1368                         sync += Display("cache miss real addr: %x " \
1369                                 "idx: %x tag: %x",
1370                                 req.real_addr, req_row, req_tag)
1371
1372                         # Start the wishbone cycle
1373                         sync += r1.wb.we.eq(0)
1374                         sync += r1.wb.cyc.eq(1)
1375                         sync += r1.wb.stb.eq(1)
1376
1377                         # Track that we had one request sent
1378                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1379                         sync += r1.write_tag.eq(1)
1380
1381                     with m.Case(Op.OP_LOAD_NC):
1382                         sync += r1.wb.cyc.eq(1)
1383                         sync += r1.wb.stb.eq(1)
1384                         sync += r1.wb.we.eq(0)
1385                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1386
1387                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1388                         with m.If(~req.dcbz):
1389                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1390                             sync += r1.acks_pending.eq(1)
1391                             sync += r1.full.eq(0)
1392                             sync += r1.slow_valid.eq(1)
1393
1394                             with m.If(~req.mmu_req):
1395                                 sync += r1.ls_valid.eq(1)
1396                             with m.Else():
1397                                 sync += r1.mmu_done.eq(1)
1398
1399                             with m.If(req.op == Op.OP_STORE_HIT):
1400                                 sync += r1.write_bram.eq(1)
1401                         with m.Else():
1402                             # dcbz is handled much like a load miss except
1403                             # that we are writing to memory instead of reading
1404                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1405
1406                             with m.If(req.op == Op.OP_STORE_MISS):
1407                                 sync += r1.write_tag.eq(1)
1408
1409                         sync += r1.wb.we.eq(1)
1410                         sync += r1.wb.cyc.eq(1)
1411                         sync += r1.wb.stb.eq(1)
1412
1413                     # OP_NONE and OP_BAD do nothing
1414                     # OP_BAD & OP_STCX_FAIL were
1415                     # handled above already
1416                     with m.Case(Op.OP_NONE):
1417                         pass
1418                     with m.Case(Op.OP_BAD):
1419                         pass
1420                     with m.Case(Op.OP_STCX_FAIL):
1421                         pass
1422
1423             with m.Case(State.RELOAD_WAIT_ACK):
1424                 ld_stbs_done = Signal()
1425                 # Requests are all sent if stb is 0
1426                 comb += ld_stbs_done.eq(~r1.wb.stb)
1427
1428                 # If we are still sending requests, was one accepted?
1429                 with m.If((~bus.stall) & r1.wb.stb):
1430                     # That was the last word?  We are done sending.
1431                     # Clear stb and set ld_stbs_done so we can handle an
1432                     # eventual last ack on the same cycle.
1433                     # sigh - reconstruct wb adr with 3 extra 0s at front
1434                     wb_adr = Cat(Const(0, ROW_OFF_BITS), r1.wb.adr)
1435                     with m.If(is_last_row_addr(wb_adr, r1.end_row_ix)):
1436                         sync += r1.wb.stb.eq(0)
1437                         comb += ld_stbs_done.eq(1)
1438
1439                     # Calculate the next row address in the current cache line
1440                     row = Signal(LINE_OFF_BITS-ROW_OFF_BITS)
1441                     comb += row.eq(r1.wb.adr)
1442                     sync += r1.wb.adr[:LINE_OFF_BITS-ROW_OFF_BITS].eq(row+1)
1443
1444                 # Incoming acks processing
1445                 sync += r1.forward_valid1.eq(bus.ack)
1446                 with m.If(bus.ack):
1447                     srow = Signal(ROW_LINE_BITS)
1448                     comb += srow.eq(r1.store_row)
1449                     sync += r1.rows_valid[srow].eq(1)
1450
1451                     # If this is the data we were looking for,
1452                     # we can complete the request next cycle.
1453                     # Compare the whole address in case the
1454                     # request in r1.req is not the one that
1455                     # started this refill.
1456                     with m.If(req.valid & r1.req.same_tag &
1457                               ((r1.dcbz & r1.req.dcbz) |
1458                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1459                                 (r1.store_row == get_row(req.real_addr))):
1460                         sync += r1.full.eq(0)
1461                         sync += r1.slow_valid.eq(1)
1462                         with m.If(~r1.mmu_req):
1463                             sync += r1.ls_valid.eq(1)
1464                         with m.Else():
1465                             sync += r1.mmu_done.eq(1)
1466                         sync += r1.forward_sel.eq(~0) # all 1s
1467                         sync += r1.use_forward1.eq(1)
1468
1469                     # Check for completion
1470                     with m.If(ld_stbs_done & is_last_row(r1.store_row,
1471                                                       r1.end_row_ix)):
1472                         # Complete wishbone cycle
1473                         sync += r1.wb.cyc.eq(0)
1474
1475                         # Cache line is now valid
1476                         cv = Signal(INDEX_BITS)
1477                         comb += cv.eq(cache_tags[r1.store_index].valid)
1478                         comb += cv.bit_select(r1.store_way, 1).eq(1)
1479                         sync += cache_tags[r1.store_index].valid.eq(cv)
1480
1481                         sync += r1.state.eq(State.IDLE)
1482                         sync += Display("cache valid set %x "
1483                                         "idx %d way %d",
1484                                          cv, r1.store_index, r1.store_way)
1485
1486                     # Increment store row counter
1487                     sync += r1.store_row.eq(next_row(r1.store_row))
1488
1489             with m.Case(State.STORE_WAIT_ACK):
1490                 st_stbs_done = Signal()
1491                 acks        = Signal(3)
1492                 adjust_acks = Signal(3)
1493
1494                 comb += st_stbs_done.eq(~r1.wb.stb)
1495                 comb += acks.eq(r1.acks_pending)
1496
1497                 with m.If(r1.inc_acks != r1.dec_acks):
1498                     with m.If(r1.inc_acks):
1499                         comb += adjust_acks.eq(acks + 1)
1500                     with m.Else():
1501                         comb += adjust_acks.eq(acks - 1)
1502                 with m.Else():
1503                     comb += adjust_acks.eq(acks)
1504
1505                 sync += r1.acks_pending.eq(adjust_acks)
1506
1507                 # Clear stb when slave accepted request
1508                 with m.If(~bus.stall):
1509                     # See if there is another store waiting
1510                     # to be done which is in the same real page.
1511                     with m.If(req.valid):
1512                         _ra = req.real_addr[ROW_LINE_BITS:SET_SIZE_BITS]
1513                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(_ra)
1514                         sync += r1.wb.dat.eq(req.data)
1515                         sync += r1.wb.sel.eq(req.byte_sel)
1516
1517                     with m.If((adjust_acks < 7) & req.same_tag &
1518                                 ((req.op == Op.OP_STORE_MISS)
1519                                  | (req.op == Op.OP_STORE_HIT))):
1520                         sync += r1.wb.stb.eq(1)
1521                         comb += st_stbs_done.eq(0)
1522
1523                         with m.If(req.op == Op.OP_STORE_HIT):
1524                             sync += r1.write_bram.eq(1)
1525                         sync += r1.full.eq(0)
1526                         sync += r1.slow_valid.eq(1)
1527
1528                         # Store requests never come from the MMU
1529                         sync += r1.ls_valid.eq(1)
1530                         comb += st_stbs_done.eq(0)
1531                         sync += r1.inc_acks.eq(1)
1532                     with m.Else():
1533                         sync += r1.wb.stb.eq(0)
1534                         comb += st_stbs_done.eq(1)
1535
1536                 # Got ack ? See if complete.
1537                 with m.If(bus.ack):
1538                     with m.If(st_stbs_done & (adjust_acks == 1)):
1539                         sync += r1.state.eq(State.IDLE)
1540                         sync += r1.wb.cyc.eq(0)
1541                         sync += r1.wb.stb.eq(0)
1542                     sync += r1.dec_acks.eq(1)
1543
1544             with m.Case(State.NC_LOAD_WAIT_ACK):
1545                 # Clear stb when slave accepted request
1546                 with m.If(~bus.stall):
1547                     sync += r1.wb.stb.eq(0)
1548
1549                 # Got ack ? complete.
1550                 with m.If(bus.ack):
1551                     sync += r1.state.eq(State.IDLE)
1552                     sync += r1.full.eq(0)
1553                     sync += r1.slow_valid.eq(1)
1554
1555                     with m.If(~r1.mmu_req):
1556                         sync += r1.ls_valid.eq(1)
1557                     with m.Else():
1558                         sync += r1.mmu_done.eq(1)
1559
1560                     sync += r1.forward_sel.eq(~0) # all 1s
1561                     sync += r1.use_forward1.eq(1)
1562                     sync += r1.wb.cyc.eq(0)
1563                     sync += r1.wb.stb.eq(0)
1564
1565     def dcache_log(self, m, r1, valid_ra, tlb_hit, stall_out):
1566
1567         sync = m.d.sync
1568         d_out, bus, log_out = self.d_out, self.bus, self.log_out
1569
1570         sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit.way[:3],
1571                                stall_out, req_op[:3], d_out.valid, d_out.error,
1572                                r1.wb.cyc, r1.wb.stb, bus.ack, bus.stall,
1573                                r1.real_adr[3:6]))
1574
1575     def elaborate(self, platform):
1576
1577         m = Module()
1578         comb = m.d.comb
1579         d_in = self.d_in
1580
1581         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1582         cache_tags       = CacheTagArray()
1583         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1584
1585         # TODO attribute ram_style : string;
1586         # TODO attribute ram_style of cache_tags : signal is "distributed";
1587
1588         """note: these are passed to nmigen.hdl.Memory as "attributes".
1589            don't know how, just that they are.
1590         """
1591         dtlb            = TLBArray()
1592         # TODO attribute ram_style of
1593         #  dtlb_tags : signal is "distributed";
1594         # TODO attribute ram_style of
1595         #  dtlb_ptes : signal is "distributed";
1596
1597         r0      = RegStage0("r0")
1598         r0_full = Signal()
1599
1600         r1 = RegStage1("r1")
1601
1602         reservation = Reservation()
1603
1604         # Async signals on incoming request
1605         req_index    = Signal(INDEX_BITS)
1606         req_row      = Signal(ROW_BITS)
1607         req_hit_way  = Signal(WAY_BITS)
1608         req_tag      = Signal(TAG_BITS)
1609         req_op       = Signal(Op)
1610         req_data     = Signal(64)
1611         req_same_tag = Signal()
1612         req_go       = Signal()
1613
1614         early_req_row     = Signal(ROW_BITS)
1615
1616         cancel_store      = Signal()
1617         set_rsrv          = Signal()
1618         clear_rsrv        = Signal()
1619
1620         r0_valid          = Signal()
1621         r0_stall          = Signal()
1622
1623         use_forward1_next = Signal()
1624         use_forward2_next = Signal()
1625
1626         cache_out_row     = Signal(WB_DATA_BITS)
1627
1628         plru_victim       = PLRUOut()
1629         replace_way       = Signal(WAY_BITS)
1630
1631         # Wishbone read/write/cache write formatting signals
1632         bus_sel           = Signal(8)
1633
1634         # TLB signals
1635         tlb_way       = TLBRecord("tlb_way")
1636         tlb_req_index = Signal(TLB_SET_BITS)
1637         tlb_hit       = TLBHit("tlb_hit")
1638         pte           = Signal(TLB_PTE_BITS)
1639         ra            = Signal(REAL_ADDR_BITS)
1640         valid_ra      = Signal()
1641         perm_attr     = PermAttr("dc_perms")
1642         rc_ok         = Signal()
1643         perm_ok       = Signal()
1644         access_ok     = Signal()
1645
1646         tlb_plru_victim = TLBPLRUOut()
1647
1648         # we don't yet handle collisions between loadstore1 requests
1649         # and MMU requests
1650         comb += self.m_out.stall.eq(0)
1651
1652         # Hold off the request in r0 when r1 has an uncompleted request
1653         comb += r0_stall.eq(r0_full & (r1.full | d_in.hold))
1654         comb += r0_valid.eq(r0_full & ~r1.full & ~d_in.hold)
1655         comb += self.stall_out.eq(r0_stall)
1656
1657         # deal with litex not doing wishbone pipeline mode
1658         # XXX in wrong way.  FIFOs are needed in the SRAM test
1659         # so that stb/ack match up. same thing done in icache.py
1660         comb += self.bus.stall.eq(self.bus.cyc & ~self.bus.ack)
1661
1662         # Wire up wishbone request latch out of stage 1
1663         comb += self.bus.we.eq(r1.wb.we)
1664         comb += self.bus.adr.eq(r1.wb.adr)
1665         comb += self.bus.sel.eq(r1.wb.sel)
1666         comb += self.bus.stb.eq(r1.wb.stb)
1667         comb += self.bus.dat_w.eq(r1.wb.dat)
1668         comb += self.bus.cyc.eq(r1.wb.cyc)
1669
1670         # call sub-functions putting everything together, using shared
1671         # signals established above
1672         self.stage_0(m, r0, r1, r0_full)
1673         self.tlb_read(m, r0_stall, tlb_way, dtlb)
1674         self.tlb_search(m, tlb_req_index, r0, r0_valid,
1675                         tlb_way,
1676                         pte, tlb_hit, valid_ra, perm_attr, ra)
1677         self.tlb_update(m, r0_valid, r0, dtlb, tlb_req_index,
1678                         tlb_hit, tlb_plru_victim,
1679                         tlb_way)
1680         self.maybe_plrus(m, r1, plru_victim)
1681         self.maybe_tlb_plrus(m, r1, tlb_plru_victim)
1682         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1683         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1684                            r0_valid, r1, cache_tags, replace_way,
1685                            use_forward1_next, use_forward2_next,
1686                            req_hit_way, plru_victim, rc_ok, perm_attr,
1687                            valid_ra, perm_ok, access_ok, req_op, req_go,
1688                            tlb_hit, tlb_way, cache_tag_set,
1689                            cancel_store, req_same_tag, r0_stall, early_req_row)
1690         self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1691                            r0_valid, r0, reservation)
1692         self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1693                            reservation, r0)
1694         self.writeback_control(m, r1, cache_out_row)
1695         self.rams(m, r1, early_req_row, cache_out_row, replace_way)
1696         self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1697                         req_hit_way, req_index, req_tag, access_ok,
1698                         tlb_hit, tlb_req_index)
1699         self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1700                     r0, replace_way,
1701                     req_hit_way, req_same_tag,
1702                          r0_valid, req_op, cache_tags, req_go, ra)
1703         #self.dcache_log(m, r1, valid_ra, tlb_hit, stall_out)
1704
1705         return m
1706
1707
1708 if __name__ == '__main__':
1709     dut = DCache()
1710     vl = rtlil.convert(dut, ports=[])
1711     with open("test_dcache.il", "w") as f:
1712         f.write(vl)