src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 note that the microwatt dcache wishbone interface expects "stall".
   6 for simplicity at the moment this is hard-coded to cyc & ~ack.
   7 see WB4 spec, p84, section 5.2.1
   8
   9 IMPORTANT: for store, the data is sampled the cycle AFTER the "valid"
  10 is raised.  sigh
  11
  12 Links:
  13
  14 * https://libre-soc.org/3d_gpu/architecture/set_associative_cache.jpg
  15 * https://bugs.libre-soc.org/show_bug.cgi?id=469
  16
  17 """
  18
  19 import sys
  20
  21 from nmutil.gtkw import write_gtkw
  22
  23 sys.setrecursionlimit(1000000)
  24
  25 from enum import Enum, unique
  26
  27 from nmigen import (Module, Signal, Elaboratable, Cat, Repl, Array, Const,
  28                     Record)
  29 from nmutil.util import Display
  30
  31 from copy import deepcopy
  32 from random import randint, seed
  33
  34 from nmigen_soc.wishbone.bus import Interface
  35
  36 from nmigen.cli import main
  37 from nmutil.iocontrol import RecordObject
  38 from nmigen.utils import log2_int
  39 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
  40                                      DCacheToLoadStore1Type,
  41                                      MMUToDCacheType,
  42                                      DCacheToMMUType)
  43
  44 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  45                                 WBAddrType, WBDataType, WBSelType,
  46                                 WBMasterOut, WBSlaveOut,
  47                                 WBMasterOutVector, WBSlaveOutVector,
  48                                 WBIOMasterOut, WBIOSlaveOut)
  49
  50 from soc.experiment.cache_ram import CacheRam
  51 #from soc.experiment.plru import PLRU
  52 from nmutil.plru import PLRU
  53
  54 # for test
  55 from soc.bus.sram import SRAM
  56 from nmigen import Memory
  57 from nmigen.cli import rtlil
  58
  59 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
  60 # Also, check out the cxxsim nmigen branch, and latest yosys from git
  61 from nmutil.sim_tmp_alternative import Simulator
  62
  63 from nmutil.util import wrap
  64
  65
  66 # TODO: make these parameters of DCache at some point
  67 LINE_SIZE = 64    # Line size in bytes
  68 NUM_LINES = 16    # Number of lines in a set
  69 NUM_WAYS = 4      # Number of ways
  70 TLB_SET_SIZE = 64 # L1 DTLB entries per set
  71 TLB_NUM_WAYS = 2  # L1 DTLB number of sets
  72 TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
  73 LOG_LENGTH = 0    # Non-zero to enable log data collection
  74
  75 # BRAM organisation: We never access more than
  76 #     -- WB_DATA_BITS at a time so to save
  77 #     -- resources we make the array only that wide, and
  78 #     -- use consecutive indices to make a cache "line"
  79 #     --
  80 #     -- ROW_SIZE is the width in bytes of the BRAM
  81 #     -- (based on WB, so 64-bits)
  82 ROW_SIZE = WB_DATA_BITS // 8;
  83
  84 # ROW_PER_LINE is the number of row (wishbone
  85 # transactions) in a line
  86 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
  87
  88 # BRAM_ROWS is the number of rows in BRAM needed
  89 # to represent the full dcache
  90 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
  91
  92 print ("ROW_SIZE", ROW_SIZE)
  93 print ("ROW_PER_LINE", ROW_PER_LINE)
  94 print ("BRAM_ROWS", BRAM_ROWS)
  95 print ("NUM_WAYS", NUM_WAYS)
  96
  97 # Bit fields counts in the address
  98
  99 # REAL_ADDR_BITS is the number of real address
 100 # bits that we store
 101 REAL_ADDR_BITS = 56
 102
 103 # ROW_BITS is the number of bits to select a row
 104 ROW_BITS = log2_int(BRAM_ROWS)
 105
 106 # ROW_LINE_BITS is the number of bits to select
 107 # a row within a line
 108 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
 109
 110 # LINE_OFF_BITS is the number of bits for
 111 # the offset in a cache line
 112 LINE_OFF_BITS = log2_int(LINE_SIZE)
 113
 114 # ROW_OFF_BITS is the number of bits for
 115 # the offset in a row
 116 ROW_OFF_BITS = log2_int(ROW_SIZE)
 117
 118 # INDEX_BITS is the number if bits to
 119 # select a cache line
 120 INDEX_BITS = log2_int(NUM_LINES)
 121
 122 # SET_SIZE_BITS is the log base 2 of the set size
 123 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
 124
 125 # TAG_BITS is the number of bits of
 126 # the tag part of the address
 127 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
 128
 129 # TAG_WIDTH is the width in bits of each way of the tag RAM
 130 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
 131
 132 # WAY_BITS is the number of bits to select a way
 133 WAY_BITS = log2_int(NUM_WAYS)
 134
 135 # Example of layout for 32 lines of 64 bytes:
 136 layout = """\
 137   ..  tag    |index|  line  |
 138   ..         |   row   |    |
 139   ..         |     |---|    | ROW_LINE_BITS  (3)
 140   ..         |     |--- - --| LINE_OFF_BITS (6)
 141   ..         |         |- --| ROW_OFF_BITS  (3)
 142   ..         |----- ---|    | ROW_BITS      (8)
 143   ..         |-----|        | INDEX_BITS    (5)
 144   .. --------|              | TAG_BITS      (45)
 145 """
 146 print (layout)
 147 print ("Dcache TAG %d IDX %d ROW_BITS %d ROFF %d LOFF %d RLB %d" % \
 148             (TAG_BITS, INDEX_BITS, ROW_BITS,
 149              ROW_OFF_BITS, LINE_OFF_BITS, ROW_LINE_BITS))
 150 print ("index @: %d-%d" % (LINE_OFF_BITS, SET_SIZE_BITS))
 151 print ("row @: %d-%d" % (LINE_OFF_BITS, ROW_OFF_BITS))
 152 print ("tag @: %d-%d width %d" % (SET_SIZE_BITS, REAL_ADDR_BITS, TAG_WIDTH))
 153
 154 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 155
 156 print ("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
 157
 158 def CacheTagArray():
 159     tag_layout = [('valid', 1),
 160                   ('tag', TAG_RAM_WIDTH),
 161                  ]
 162     return Array(Record(tag_layout, name="tag%d" % x) for x in range(NUM_LINES))
 163
 164 def RowPerLineValidArray():
 165     return Array(Signal(name="rows_valid%d" % x) \
 166                         for x in range(ROW_PER_LINE))
 167
 168 # L1 TLB
 169 TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 170 TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 171 TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 172 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 173 TLB_PTE_BITS     = 64
 174 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 175
 176 def ispow2(x):
 177     return (1<<log2_int(x, False)) == x
 178
 179 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
 180 assert ispow2(LINE_SIZE), "LINE_SIZE not power of 2"
 181 assert ispow2(NUM_LINES), "NUM_LINES not power of 2"
 182 assert ispow2(ROW_PER_LINE), "ROW_PER_LINE not power of 2"
 183 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
 184 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
 185         "geometry bits don't add up"
 186 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
 187         "geometry bits don't add up"
 188 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
 189          "geometry bits don't add up"
 190 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
 191 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 192
 193
 194 def TLBTagEAArray():
 195     return Array(Signal(TLB_EA_TAG_BITS, name="tlbtagea%d" % x) \
 196                 for x in range (TLB_NUM_WAYS))
 197
 198 def TLBRecord(name):
 199     tlb_layout = [('valid', TLB_NUM_WAYS),
 200                   ('tag', TLB_TAG_WAY_BITS),
 201                   ('pte', TLB_PTE_WAY_BITS)
 202                  ]
 203     return Record(tlb_layout, name=name)
 204
 205 def TLBArray():
 206     return Array(TLBRecord(name="tlb%d" % x) for x in range(TLB_SET_SIZE))
 207
 208 def HitWaySet():
 209     return Array(Signal(WAY_BITS, name="hitway_%d" % x) \
 210                         for x in range(TLB_NUM_WAYS))
 211
 212 # Cache RAM interface
 213 def CacheRamOut():
 214     return Array(Signal(WB_DATA_BITS, name="cache_out%d" % x) \
 215                  for x in range(NUM_WAYS))
 216
 217 # PLRU output interface
 218 def PLRUOut():
 219     return Array(Signal(WAY_BITS, name="plru_out%d" % x) \
 220                 for x in range(NUM_LINES))
 221
 222 # TLB PLRU output interface
 223 def TLBPLRUOut():
 224     return Array(Signal(TLB_WAY_BITS, name="tlbplru_out%d" % x) \
 225                 for x in range(TLB_SET_SIZE))
 226
 227 # Helper functions to decode incoming requests
 228 #
 229 # Return the cache line index (tag index) for an address
 230 def get_index(addr):
 231     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 232
 233 # Return the cache row index (data memory) for an address
 234 def get_row(addr):
 235     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 236
 237 # Return the index of a row within a line
 238 def get_row_of_line(row):
 239     return row[:ROW_BITS][:ROW_LINE_BITS]
 240
 241 # Returns whether this is the last row of a line
 242 def is_last_row_addr(addr, last):
 243     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 244
 245 # Returns whether this is the last row of a line
 246 def is_last_row(row, last):
 247     return get_row_of_line(row) == last
 248
 249 # Return the next row in the current cache line. We use a
 250 # dedicated function in order to limit the size of the
 251 # generated adder to be only the bits within a cache line
 252 # (3 bits with default settings)
 253 def next_row(row):
 254     row_v = row[0:ROW_LINE_BITS] + 1
 255     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 256
 257 # Get the tag value from the address
 258 def get_tag(addr):
 259     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 260
 261 # Read a tag from a tag memory row
 262 def read_tag(way, tagset):
 263     return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
 264
 265 # Read a TLB tag from a TLB tag memory row
 266 def read_tlb_tag(way, tags):
 267     return tags.word_select(way, TLB_EA_TAG_BITS)
 268
 269 # Write a TLB tag to a TLB tag memory row
 270 def write_tlb_tag(way, tags, tag):
 271     return read_tlb_tag(way, tags).eq(tag)
 272
 273 # Read a PTE from a TLB PTE memory row
 274 def read_tlb_pte(way, ptes):
 275     return ptes.word_select(way, TLB_PTE_BITS)
 276
 277 def write_tlb_pte(way, ptes, newpte):
 278     return read_tlb_pte(way, ptes).eq(newpte)
 279
 280
 281 # Record for storing permission, attribute, etc. bits from a PTE
 282 class PermAttr(RecordObject):
 283     def __init__(self, name=None):
 284         super().__init__(name=name)
 285         self.reference = Signal()
 286         self.changed   = Signal()
 287         self.nocache   = Signal()
 288         self.priv      = Signal()
 289         self.rd_perm   = Signal()
 290         self.wr_perm   = Signal()
 291
 292
 293 def extract_perm_attr(pte):
 294     pa = PermAttr()
 295     return pa;
 296
 297
 298 # Type of operation on a "valid" input
 299 @unique
 300 class Op(Enum):
 301     OP_NONE       = 0
 302     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 303     OP_STCX_FAIL  = 2 # conditional store w/o reservation
 304     OP_LOAD_HIT   = 3 # Cache hit on load
 305     OP_LOAD_MISS  = 4 # Load missing cache
 306     OP_LOAD_NC    = 5 # Non-cachable load
 307     OP_STORE_HIT  = 6 # Store hitting cache
 308     OP_STORE_MISS = 7 # Store missing cache
 309
 310
 311 # Cache state machine
 312 @unique
 313 class State(Enum):
 314     IDLE             = 0 # Normal load hit processing
 315     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 316     STORE_WAIT_ACK   = 2 # Store wait ack
 317     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 318
 319
 320 # Dcache operations:
 321 #
 322 # In order to make timing, we use the BRAMs with
 323 # an output buffer, which means that the BRAM
 324 # output is delayed by an extra cycle.
 325 #
 326 # Thus, the dcache has a 2-stage internal pipeline
 327 # for cache hits with no stalls.
 328 #
 329 # All other operations are handled via stalling
 330 # in the first stage.
 331 #
 332 # The second stage can thus complete a hit at the same
 333 # time as the first stage emits a stall for a complex op.
 334 #
 335 # Stage 0 register, basically contains just the latched request
 336
 337 class RegStage0(RecordObject):
 338     def __init__(self, name=None):
 339         super().__init__(name=name)
 340         self.req     = LoadStore1ToDCacheType(name="lsmem")
 341         self.tlbie   = Signal() # indicates a tlbie request (from MMU)
 342         self.doall   = Signal() # with tlbie, indicates flush whole TLB
 343         self.tlbld   = Signal() # indicates a TLB load request (from MMU)
 344         self.mmu_req = Signal() # indicates source of request
 345         self.d_valid = Signal() # indicates req.data is valid now
 346
 347
 348 class MemAccessRequest(RecordObject):
 349     def __init__(self, name=None):
 350         super().__init__(name=name)
 351         self.op        = Signal(Op)
 352         self.valid     = Signal()
 353         self.dcbz      = Signal()
 354         self.real_addr = Signal(REAL_ADDR_BITS)
 355         self.data      = Signal(64)
 356         self.byte_sel  = Signal(8)
 357         self.hit_way   = Signal(WAY_BITS)
 358         self.same_tag  = Signal()
 359         self.mmu_req   = Signal()
 360
 361
 362 # First stage register, contains state for stage 1 of load hits
 363 # and for the state machine used by all other operations
 364 class RegStage1(RecordObject):
 365     def __init__(self, name=None):
 366         super().__init__(name=name)
 367         # Info about the request
 368         self.full             = Signal() # have uncompleted request
 369         self.mmu_req          = Signal() # request is from MMU
 370         self.req              = MemAccessRequest(name="reqmem")
 371
 372         # Cache hit state
 373         self.hit_way          = Signal(WAY_BITS)
 374         self.hit_load_valid   = Signal()
 375         self.hit_index        = Signal(INDEX_BITS)
 376         self.cache_hit        = Signal()
 377
 378         # TLB hit state
 379         self.tlb_hit          = Signal()
 380         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 381         self.tlb_hit_index    = Signal(TLB_WAY_BITS)
 382
 383         # 2-stage data buffer for data forwarded from writes to reads
 384         self.forward_data1    = Signal(64)
 385         self.forward_data2    = Signal(64)
 386         self.forward_sel1     = Signal(8)
 387         self.forward_valid1   = Signal()
 388         self.forward_way1     = Signal(WAY_BITS)
 389         self.forward_row1     = Signal(ROW_BITS)
 390         self.use_forward1     = Signal()
 391         self.forward_sel      = Signal(8)
 392
 393         # Cache miss state (reload state machine)
 394         self.state            = Signal(State)
 395         self.dcbz             = Signal()
 396         self.write_bram       = Signal()
 397         self.write_tag        = Signal()
 398         self.slow_valid       = Signal()
 399         self.wb               = WBMasterOut("wb")
 400         self.reload_tag       = Signal(TAG_BITS)
 401         self.store_way        = Signal(WAY_BITS)
 402         self.store_row        = Signal(ROW_BITS)
 403         self.store_index      = Signal(INDEX_BITS)
 404         self.end_row_ix       = Signal(ROW_LINE_BITS)
 405         self.rows_valid       = RowPerLineValidArray()
 406         self.acks_pending     = Signal(3)
 407         self.inc_acks         = Signal()
 408         self.dec_acks         = Signal()
 409
 410         # Signals to complete (possibly with error)
 411         self.ls_valid         = Signal()
 412         self.ls_error         = Signal()
 413         self.mmu_done         = Signal()
 414         self.mmu_error        = Signal()
 415         self.cache_paradox    = Signal()
 416
 417         # Signal to complete a failed stcx.
 418         self.stcx_fail        = Signal()
 419
 420
 421 # Reservation information
 422 class Reservation(RecordObject):
 423     def __init__(self):
 424         super().__init__()
 425         self.valid = Signal()
 426         self.addr  = Signal(64-LINE_OFF_BITS)
 427
 428
 429 class DTLBUpdate(Elaboratable):
 430     def __init__(self):
 431         self.tlbie    = Signal()
 432         self.tlbwe    = Signal()
 433         self.doall    = Signal()
 434         self.updated  = Signal()
 435         self.v_updated  = Signal()
 436         self.tlb_hit    = Signal()
 437         self.tlb_req_index = Signal(TLB_SET_BITS)
 438
 439         self.tlb_hit_way     = Signal(TLB_WAY_BITS)
 440         self.tlb_tag_way     = Signal(TLB_TAG_WAY_BITS)
 441         self.tlb_pte_way     = Signal(TLB_PTE_WAY_BITS)
 442         self.repl_way        = Signal(TLB_WAY_BITS)
 443         self.eatag           = Signal(TLB_EA_TAG_BITS)
 444         self.pte_data        = Signal(TLB_PTE_BITS)
 445
 446         self.dv = Signal(TLB_NUM_WAYS) # tlb_way_valids_t
 447
 448         self.tb_out = Signal(TLB_TAG_WAY_BITS) # tlb_way_tags_t
 449         self.db_out = Signal(TLB_NUM_WAYS)     # tlb_way_valids_t
 450         self.pb_out = Signal(TLB_PTE_WAY_BITS) # tlb_way_ptes_t
 451
 452     def elaborate(self, platform):
 453         m = Module()
 454         comb = m.d.comb
 455         sync = m.d.sync
 456
 457         tagset   = Signal(TLB_TAG_WAY_BITS)
 458         pteset   = Signal(TLB_PTE_WAY_BITS)
 459
 460         tb_out, pb_out, db_out = self.tb_out, self.pb_out, self.db_out
 461         comb += db_out.eq(self.dv)
 462
 463         with m.If(self.tlbie & self.doall):
 464             pass # clear all back in parent
 465         with m.Elif(self.tlbie):
 466             with m.If(self.tlb_hit):
 467                 comb += db_out.bit_select(self.tlb_hit_way, 1).eq(0)
 468                 comb += self.v_updated.eq(1)
 469
 470         with m.Elif(self.tlbwe):
 471
 472             comb += tagset.eq(self.tlb_tag_way)
 473             comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
 474             comb += tb_out.eq(tagset)
 475
 476             comb += pteset.eq(self.tlb_pte_way)
 477             comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
 478             comb += pb_out.eq(pteset)
 479
 480             comb += db_out.bit_select(self.repl_way, 1).eq(1)
 481
 482             comb += self.updated.eq(1)
 483             comb += self.v_updated.eq(1)
 484
 485         return m
 486
 487
 488 class DCachePendingHit(Elaboratable):
 489
 490     def __init__(self, tlb_way, tlb_hit_way,
 491                       cache_i_validdx, cache_tag_set,
 492                     req_addr,
 493                     hit_set):
 494
 495         self.go          = Signal()
 496         self.virt_mode   = Signal()
 497         self.is_hit      = Signal()
 498         self.tlb_hit     = Signal()
 499         self.hit_way     = Signal(WAY_BITS)
 500         self.rel_match   = Signal()
 501         self.req_index   = Signal(INDEX_BITS)
 502         self.reload_tag  = Signal(TAG_BITS)
 503
 504         self.tlb_hit_way = tlb_hit_way
 505         self.tlb_way = tlb_way
 506         self.cache_i_validdx = cache_i_validdx
 507         self.cache_tag_set = cache_tag_set
 508         self.req_addr = req_addr
 509         self.hit_set = hit_set
 510
 511     def elaborate(self, platform):
 512         m = Module()
 513         comb = m.d.comb
 514         sync = m.d.sync
 515
 516         go = self.go
 517         virt_mode = self.virt_mode
 518         is_hit = self.is_hit
 519         tlb_way = self.tlb_way
 520         cache_i_validdx = self.cache_i_validdx
 521         cache_tag_set = self.cache_tag_set
 522         req_addr = self.req_addr
 523         tlb_hit_way = self.tlb_hit_way
 524         tlb_hit = self.tlb_hit
 525         hit_set = self.hit_set
 526         hit_way = self.hit_way
 527         rel_match = self.rel_match
 528         req_index = self.req_index
 529         reload_tag = self.reload_tag
 530
 531         rel_matches = Array(Signal(name="rel_matches_%d" % i) \
 532                                     for i in range(TLB_NUM_WAYS))
 533         hit_way_set = HitWaySet()
 534
 535         # Test if pending request is a hit on any way
 536         # In order to make timing in virtual mode,
 537         # when we are using the TLB, we compare each
 538         # way with each of the real addresses from each way of
 539         # the TLB, and then decide later which match to use.
 540
 541         with m.If(virt_mode):
 542             for j in range(TLB_NUM_WAYS): # tlb_num_way_t
 543                 s_tag       = Signal(TAG_BITS, name="s_tag%d" % j)
 544                 s_hit       = Signal()
 545                 s_pte       = Signal(TLB_PTE_BITS)
 546                 s_ra        = Signal(REAL_ADDR_BITS)
 547                 comb += s_pte.eq(read_tlb_pte(j, tlb_way.pte))
 548                 comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
 549                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 550                 comb += s_tag.eq(get_tag(s_ra))
 551
 552                 for i in range(NUM_WAYS): # way_t
 553                     is_tag_hit = Signal(name="is_tag_hit_%d_%d" % (j, i))
 554                     comb += is_tag_hit.eq(go & cache_i_validdx[i] &
 555                                   (read_tag(i, cache_tag_set) == s_tag)
 556                                   & (tlb_way.valid[j]))
 557                     with m.If(is_tag_hit):
 558                         comb += hit_way_set[j].eq(i)
 559                         comb += s_hit.eq(1)
 560                 comb += hit_set[j].eq(s_hit)
 561                 with m.If(s_tag == reload_tag):
 562                     comb += rel_matches[j].eq(1)
 563             with m.If(tlb_hit):
 564                 comb += is_hit.eq(hit_set[tlb_hit_way])
 565                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 566                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 567         with m.Else():
 568             s_tag       = Signal(TAG_BITS)
 569             comb += s_tag.eq(get_tag(req_addr))
 570             for i in range(NUM_WAYS): # way_t
 571                 is_tag_hit = Signal(name="is_tag_hit_%d" % i)
 572                 comb += is_tag_hit.eq(go & cache_i_validdx[i] &
 573                           (read_tag(i, cache_tag_set) == s_tag))
 574                 with m.If(is_tag_hit):
 575                     comb += hit_way.eq(i)
 576                     comb += is_hit.eq(1)
 577             with m.If(s_tag == reload_tag):
 578                 comb += rel_match.eq(1)
 579
 580         return m
 581
 582
 583 class DCache(Elaboratable):
 584     """Set associative dcache write-through
 585
 586     TODO (in no specific order):
 587     * See list in icache.vhdl
 588     * Complete load misses on the cycle when WB data comes instead of
 589       at the end of line (this requires dealing with requests coming in
 590       while not idle...)
 591     """
 592     def __init__(self):
 593         self.d_in      = LoadStore1ToDCacheType("d_in")
 594         self.d_out     = DCacheToLoadStore1Type("d_out")
 595
 596         self.m_in      = MMUToDCacheType("m_in")
 597         self.m_out     = DCacheToMMUType("m_out")
 598
 599         self.stall_out = Signal()
 600
 601         # standard naming (wired to non-standard for compatibility)
 602         self.bus = Interface(addr_width=32,
 603                             data_width=64,
 604                             granularity=8,
 605                             features={'stall'},
 606                             alignment=0,
 607                             name="dcache")
 608
 609         self.log_out   = Signal(20)
 610
 611     def stage_0(self, m, r0, r1, r0_full):
 612         """Latch the request in r0.req as long as we're not stalling
 613         """
 614         comb = m.d.comb
 615         sync = m.d.sync
 616         d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
 617
 618         r = RegStage0("stage0")
 619
 620         # TODO, this goes in unit tests and formal proofs
 621         with m.If(d_in.valid & m_in.valid):
 622             sync += Display("request collision loadstore vs MMU")
 623
 624         with m.If(m_in.valid):
 625             comb += r.req.valid.eq(1)
 626             comb += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))# no invalidate
 627             comb += r.req.dcbz.eq(0)
 628             comb += r.req.nc.eq(0)
 629             comb += r.req.reserve.eq(0)
 630             comb += r.req.virt_mode.eq(0)
 631             comb += r.req.priv_mode.eq(1)
 632             comb += r.req.addr.eq(m_in.addr)
 633             comb += r.req.data.eq(m_in.pte)
 634             comb += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
 635             comb += r.tlbie.eq(m_in.tlbie)
 636             comb += r.doall.eq(m_in.doall)
 637             comb += r.tlbld.eq(m_in.tlbld)
 638             comb += r.mmu_req.eq(1)
 639             m.d.sync += Display("    DCACHE req mmu addr %x pte %x ld %d",
 640                                  m_in.addr, m_in.pte, r.req.load)
 641
 642         with m.Else():
 643             comb += r.req.eq(d_in)
 644             comb += r.req.data.eq(0)
 645             comb += r.tlbie.eq(0)
 646             comb += r.doall.eq(0)
 647             comb += r.tlbld.eq(0)
 648             comb += r.mmu_req.eq(0)
 649         with m.If((~r1.full & ~d_in.hold) | ~r0_full):
 650             sync += r0.eq(r)
 651             sync += r0_full.eq(r.req.valid)
 652             # Sample data the cycle after a request comes in from loadstore1.
 653             # If another request has come in already then the data will get
 654             # put directly into req.data below.
 655             with m.If(r0.req.valid & ~r.req.valid & ~r0.d_valid &
 656                      ~r0.mmu_req):
 657                 sync += r0.req.data.eq(d_in.data)
 658                 sync += r0.d_valid.eq(1)
 659         with m.If(d_in.valid):
 660             m.d.sync += Display("    DCACHE req cache "
 661                                 "virt %d addr %x data %x ld %d",
 662                                  r.req.virt_mode, r.req.addr,
 663                                  r.req.data, r.req.load)
 664
 665     def tlb_read(self, m, r0_stall, tlb_way, dtlb):
 666         """TLB
 667         Operates in the second cycle on the request latched in r0.req.
 668         TLB updates write the entry at the end of the second cycle.
 669         """
 670         comb = m.d.comb
 671         sync = m.d.sync
 672         m_in, d_in = self.m_in, self.d_in
 673
 674         index    = Signal(TLB_SET_BITS)
 675         addrbits = Signal(TLB_SET_BITS)
 676
 677         amin = TLB_LG_PGSZ
 678         amax = TLB_LG_PGSZ + TLB_SET_BITS
 679
 680         with m.If(m_in.valid):
 681             comb += addrbits.eq(m_in.addr[amin : amax])
 682         with m.Else():
 683             comb += addrbits.eq(d_in.addr[amin : amax])
 684         comb += index.eq(addrbits)
 685
 686         # If we have any op and the previous op isn't finished,
 687         # then keep the same output for next cycle.
 688         with m.If(~r0_stall):
 689             sync += tlb_way.eq(dtlb[index])
 690
 691     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim):
 692         """Generate TLB PLRUs
 693         """
 694         comb = m.d.comb
 695         sync = m.d.sync
 696
 697         if TLB_NUM_WAYS == 0:
 698             return
 699         for i in range(TLB_SET_SIZE):
 700             # TLB PLRU interface
 701             tlb_plru        = PLRU(TLB_WAY_BITS)
 702             setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
 703             tlb_plru_acc_en = Signal()
 704
 705             comb += tlb_plru_acc_en.eq(r1.tlb_hit & (r1.tlb_hit_index == i))
 706             comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 707             comb += tlb_plru.acc_i.eq(r1.tlb_hit_way)
 708             comb += tlb_plru_victim[i].eq(tlb_plru.lru_o)
 709
 710     def tlb_search(self, m, tlb_req_index, r0, r0_valid,
 711                    tlb_way, tlb_hit_way,
 712                    pte, tlb_hit, valid_ra, perm_attr, ra):
 713
 714         comb = m.d.comb
 715
 716         hitway = Signal(TLB_WAY_BITS)
 717         hit    = Signal()
 718         eatag  = Signal(TLB_EA_TAG_BITS)
 719
 720         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 721         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 722         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 723
 724         for i in range(TLB_NUM_WAYS):
 725             is_tag_hit = Signal(name="is_tag_hit%d" % i)
 726             tlb_tag = Signal(TLB_EA_TAG_BITS, name="tlb_tag%d" % i)
 727             comb += tlb_tag.eq(read_tlb_tag(i, tlb_way.tag))
 728             comb += is_tag_hit.eq((tlb_way.valid[i]) & (tlb_tag == eatag))
 729             with m.If(is_tag_hit):
 730                 comb += hitway.eq(i)
 731                 comb += hit.eq(1)
 732
 733         comb += tlb_hit.eq(hit & r0_valid)
 734         comb += tlb_hit_way.eq(hitway)
 735
 736         with m.If(tlb_hit):
 737             comb += pte.eq(read_tlb_pte(hitway, tlb_way.pte))
 738         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 739
 740         with m.If(r0.req.virt_mode):
 741             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 742                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 743                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 744             comb += perm_attr.reference.eq(pte[8])
 745             comb += perm_attr.changed.eq(pte[7])
 746             comb += perm_attr.nocache.eq(pte[5])
 747             comb += perm_attr.priv.eq(pte[3])
 748             comb += perm_attr.rd_perm.eq(pte[2])
 749             comb += perm_attr.wr_perm.eq(pte[1])
 750         with m.Else():
 751             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 752                               r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 753             comb += perm_attr.reference.eq(1)
 754             comb += perm_attr.changed.eq(1)
 755             comb += perm_attr.nocache.eq(0)
 756             comb += perm_attr.priv.eq(1)
 757             comb += perm_attr.rd_perm.eq(1)
 758             comb += perm_attr.wr_perm.eq(1)
 759
 760         with m.If(valid_ra):
 761             m.d.sync += Display("DCACHE virt mode %d hit %d ra %x pte %x",
 762                                 r0.req.virt_mode, tlb_hit, ra, pte)
 763             m.d.sync += Display("       perm ref=%d", perm_attr.reference)
 764             m.d.sync += Display("       perm chg=%d", perm_attr.changed)
 765             m.d.sync += Display("       perm noc=%d", perm_attr.nocache)
 766             m.d.sync += Display("       perm prv=%d", perm_attr.priv)
 767             m.d.sync += Display("       perm rdp=%d", perm_attr.rd_perm)
 768             m.d.sync += Display("       perm wrp=%d", perm_attr.wr_perm)
 769
 770     def tlb_update(self, m, r0_valid, r0, dtlb, tlb_req_index,
 771                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_way):
 772
 773         comb = m.d.comb
 774         sync = m.d.sync
 775
 776         tlbie    = Signal()
 777         tlbwe    = Signal()
 778
 779         comb += tlbie.eq(r0_valid & r0.tlbie)
 780         comb += tlbwe.eq(r0_valid & r0.tlbld)
 781
 782         m.submodules.tlb_update = d = DTLBUpdate()
 783         with m.If(tlbie & r0.doall):
 784             # clear all valid bits at once
 785             for i in range(TLB_SET_SIZE):
 786                 sync += dtlb[i].valid.eq(0)
 787         with m.If(d.updated):
 788             sync += dtlb[tlb_req_index].tag.eq(d.tb_out)
 789             sync += dtlb[tlb_req_index].pte.eq(d.pb_out)
 790         with m.If(d.v_updated):
 791             sync += dtlb[tlb_req_index].valid.eq(d.db_out)
 792
 793         comb += d.dv.eq(dtlb[tlb_req_index].valid)
 794
 795         comb += d.tlbie.eq(tlbie)
 796         comb += d.tlbwe.eq(tlbwe)
 797         comb += d.doall.eq(r0.doall)
 798         comb += d.tlb_hit.eq(tlb_hit)
 799         comb += d.tlb_hit_way.eq(tlb_hit_way)
 800         comb += d.tlb_tag_way.eq(tlb_way.tag)
 801         comb += d.tlb_pte_way.eq(tlb_way.pte)
 802         comb += d.tlb_req_index.eq(tlb_req_index)
 803
 804         with m.If(tlb_hit):
 805             comb += d.repl_way.eq(tlb_hit_way)
 806         with m.Else():
 807             comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
 808         comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 809         comb += d.pte_data.eq(r0.req.data)
 810
 811     def maybe_plrus(self, m, r1, plru_victim):
 812         """Generate PLRUs
 813         """
 814         comb = m.d.comb
 815         sync = m.d.sync
 816
 817         if TLB_NUM_WAYS == 0:
 818             return
 819
 820         for i in range(NUM_LINES):
 821             # PLRU interface
 822             plru        = PLRU(WAY_BITS)
 823             setattr(m.submodules, "plru%d" % i, plru)
 824             plru_acc_en = Signal()
 825
 826             comb += plru_acc_en.eq(r1.cache_hit & (r1.hit_index == i))
 827             comb += plru.acc_en.eq(plru_acc_en)
 828             comb += plru.acc_i.eq(r1.hit_way)
 829             comb += plru_victim[i].eq(plru.lru_o)
 830
 831     def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
 832         """Cache tag RAM read port
 833         """
 834         comb = m.d.comb
 835         sync = m.d.sync
 836         m_in, d_in = self.m_in, self.d_in
 837
 838         index = Signal(INDEX_BITS)
 839
 840         with m.If(r0_stall):
 841             comb += index.eq(req_index)
 842         with m.Elif(m_in.valid):
 843             comb += index.eq(get_index(m_in.addr))
 844         with m.Else():
 845             comb += index.eq(get_index(d_in.addr))
 846         sync += cache_tag_set.eq(cache_tags[index].tag)
 847
 848     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 849                        r0_valid, r1, cache_tags, replace_way,
 850                        use_forward1_next, use_forward2_next,
 851                        req_hit_way, plru_victim, rc_ok, perm_attr,
 852                        valid_ra, perm_ok, access_ok, req_op, req_go,
 853                        tlb_hit, tlb_hit_way, tlb_way, cache_tag_set,
 854                        cancel_store, req_same_tag, r0_stall, early_req_row):
 855         """Cache request parsing and hit detection
 856         """
 857
 858         comb = m.d.comb
 859         m_in, d_in = self.m_in, self.d_in
 860
 861         is_hit      = Signal()
 862         hit_way     = Signal(WAY_BITS)
 863         op          = Signal(Op)
 864         opsel       = Signal(3)
 865         go          = Signal()
 866         nc          = Signal()
 867         hit_set     = Array(Signal(name="hit_set_%d" % i) \
 868                                   for i in range(TLB_NUM_WAYS))
 869         cache_i_validdx = Signal(NUM_WAYS)
 870
 871         # Extract line, row and tag from request
 872         comb += req_index.eq(get_index(r0.req.addr))
 873         comb += req_row.eq(get_row(r0.req.addr))
 874         comb += req_tag.eq(get_tag(ra))
 875
 876         if False: # display on comb is a bit... busy.
 877             comb += Display("dcache_req addr:%x ra: %x idx: %x tag: %x row: %x",
 878                     r0.req.addr, ra, req_index, req_tag, req_row)
 879
 880         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 881         comb += cache_i_validdx.eq(cache_tags[req_index].valid)
 882
 883         m.submodules.dcache_pend = dc = DCachePendingHit(
 884                                 tlb_way, tlb_hit_way,
 885                                 cache_i_validdx, cache_tag_set,
 886                                 r0.req.addr,
 887                                 hit_set)
 888
 889         comb += dc.tlb_hit.eq(tlb_hit)
 890         comb += dc.reload_tag.eq(r1.reload_tag)
 891         comb += dc.virt_mode.eq(r0.req.virt_mode)
 892         comb += dc.go.eq(go)
 893         comb += dc.req_index.eq(req_index)
 894         comb += is_hit.eq(dc.is_hit)
 895         comb += hit_way.eq(dc.hit_way)
 896         comb += req_same_tag.eq(dc.rel_match)
 897
 898         # See if the request matches the line currently being reloaded
 899         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 900                   (req_index == r1.store_index) & req_same_tag):
 901             # For a store, consider this a hit even if the row isn't
 902             # valid since it will be by the time we perform the store.
 903             # For a load, check the appropriate row valid bit.
 904             rrow = Signal(ROW_LINE_BITS)
 905             comb += rrow.eq(req_row)
 906             valid = r1.rows_valid[rrow]
 907             comb += is_hit.eq((~r0.req.load) | valid)
 908             comb += hit_way.eq(replace_way)
 909
 910         # Whether to use forwarded data for a load or not
 911         with m.If((get_row(r1.req.real_addr) == req_row) &
 912                   (r1.req.hit_way == hit_way)):
 913             # Only need to consider r1.write_bram here, since if we
 914             # are writing refill data here, then we don't have a
 915             # cache hit this cycle on the line being refilled.
 916             # (There is the possibility that the load following the
 917             # load miss that started the refill could be to the old
 918             # contents of the victim line, since it is a couple of
 919             # cycles after the refill starts before we see the updated
 920             # cache tag. In that case we don't use the bypass.)
 921             comb += use_forward1_next.eq(r1.write_bram)
 922         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 923             comb += use_forward2_next.eq(r1.forward_valid1)
 924
 925         # The way that matched on a hit
 926         comb += req_hit_way.eq(hit_way)
 927
 928         # The way to replace on a miss
 929         with m.If(r1.write_tag):
 930             comb += replace_way.eq(plru_victim[r1.store_index])
 931         with m.Else():
 932             comb += replace_way.eq(r1.store_way)
 933
 934         # work out whether we have permission for this access
 935         # NB we don't yet implement AMR, thus no KUAP
 936         comb += rc_ok.eq(perm_attr.reference
 937                          & (r0.req.load | perm_attr.changed))
 938         comb += perm_ok.eq((r0.req.priv_mode | (~perm_attr.priv)) &
 939                            (perm_attr.wr_perm |
 940                               (r0.req.load & perm_attr.rd_perm)))
 941         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 942         # Combine the request and cache hit status to decide what
 943         # operation needs to be done
 944         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 945         comb += op.eq(Op.OP_NONE)
 946         with m.If(go):
 947             with m.If(~access_ok):
 948                 m.d.sync += Display("DCACHE access fail valid_ra=%d p=%d rc=%d",
 949                                  valid_ra, perm_ok, rc_ok)
 950                 comb += op.eq(Op.OP_BAD)
 951             with m.Elif(cancel_store):
 952                 m.d.sync += Display("DCACHE cancel store")
 953                 comb += op.eq(Op.OP_STCX_FAIL)
 954             with m.Else():
 955                 m.d.sync += Display("DCACHE valid_ra=%d nc=%d ld=%d",
 956                                  valid_ra, nc, r0.req.load)
 957                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 958                 with m.Switch(opsel):
 959                     with m.Case(0b101): comb += op.eq(Op.OP_LOAD_HIT)
 960                     with m.Case(0b100): comb += op.eq(Op.OP_LOAD_MISS)
 961                     with m.Case(0b110): comb += op.eq(Op.OP_LOAD_NC)
 962                     with m.Case(0b001): comb += op.eq(Op.OP_STORE_HIT)
 963                     with m.Case(0b000): comb += op.eq(Op.OP_STORE_MISS)
 964                     with m.Case(0b010): comb += op.eq(Op.OP_STORE_MISS)
 965                     with m.Case(0b011): comb += op.eq(Op.OP_BAD)
 966                     with m.Case(0b111): comb += op.eq(Op.OP_BAD)
 967         comb += req_op.eq(op)
 968         comb += req_go.eq(go)
 969
 970         # Version of the row number that is valid one cycle earlier
 971         # in the cases where we need to read the cache data BRAM.
 972         # If we're stalling then we need to keep reading the last
 973         # row requested.
 974         with m.If(~r0_stall):
 975             with m.If(m_in.valid):
 976                 comb += early_req_row.eq(get_row(m_in.addr))
 977             with m.Else():
 978                 comb += early_req_row.eq(get_row(d_in.addr))
 979         with m.Else():
 980             comb += early_req_row.eq(req_row)
 981
 982     def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
 983                          r0_valid, r0, reservation):
 984         """Handle load-with-reservation and store-conditional instructions
 985         """
 986         comb = m.d.comb
 987
 988         with m.If(r0_valid & r0.req.reserve):
 989             # XXX generate alignment interrupt if address
 990             # is not aligned XXX or if r0.req.nc = '1'
 991             with m.If(r0.req.load):
 992                 comb += set_rsrv.eq(r0.req.atomic_last) # load with reservation
 993             with m.Else():
 994                 comb += clear_rsrv.eq(r0.req.atomic_last) # store conditional
 995                 with m.If((~reservation.valid) |
 996                          (r0.req.addr[LINE_OFF_BITS:64] != reservation.addr)):
 997                     comb += cancel_store.eq(1)
 998
 999     def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1000                         reservation, r0):
1001
1002         comb = m.d.comb
1003         sync = m.d.sync
1004
1005         with m.If(r0_valid & access_ok):
1006             with m.If(clear_rsrv):
1007                 sync += reservation.valid.eq(0)
1008             with m.Elif(set_rsrv):
1009                 sync += reservation.valid.eq(1)
1010                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
1011
1012     def writeback_control(self, m, r1, cache_out_row):
1013         """Return data for loads & completion control logic
1014         """
1015         comb = m.d.comb
1016         sync = m.d.sync
1017         d_out, m_out = self.d_out, self.m_out
1018
1019         data_out = Signal(64)
1020         data_fwd = Signal(64)
1021
1022         # Use the bypass if are reading the row that was
1023         # written 1 or 2 cycles ago, including for the
1024         # slow_valid = 1 case (i.e. completing a load
1025         # miss or a non-cacheable load).
1026         with m.If(r1.use_forward1):
1027             comb += data_fwd.eq(r1.forward_data1)
1028         with m.Else():
1029             comb += data_fwd.eq(r1.forward_data2)
1030
1031         comb += data_out.eq(cache_out_row)
1032
1033         for i in range(8):
1034             with m.If(r1.forward_sel[i]):
1035                 dsel = data_fwd.word_select(i, 8)
1036                 comb += data_out.word_select(i, 8).eq(dsel)
1037
1038         comb += d_out.valid.eq(r1.ls_valid)
1039         comb += d_out.data.eq(data_out)
1040         comb += d_out.store_done.eq(~r1.stcx_fail)
1041         comb += d_out.error.eq(r1.ls_error)
1042         comb += d_out.cache_paradox.eq(r1.cache_paradox)
1043
1044         # Outputs to MMU
1045         comb += m_out.done.eq(r1.mmu_done)
1046         comb += m_out.err.eq(r1.mmu_error)
1047         comb += m_out.data.eq(data_out)
1048
1049         # We have a valid load or store hit or we just completed
1050         # a slow op such as a load miss, a NC load or a store
1051         #
1052         # Note: the load hit is delayed by one cycle. However it
1053         # can still not collide with r.slow_valid (well unless I
1054         # miscalculated) because slow_valid can only be set on a
1055         # subsequent request and not on its first cycle (the state
1056         # machine must have advanced), which makes slow_valid
1057         # at least 2 cycles from the previous hit_load_valid.
1058
1059         # Sanity: Only one of these must be set in any given cycle
1060
1061         if False: # TODO: need Display to get this to work
1062             assert (r1.slow_valid & r1.stcx_fail) != 1, \
1063             "unexpected slow_valid collision with stcx_fail"
1064
1065             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
1066              "unexpected hit_load_delayed collision with slow_valid"
1067
1068         with m.If(~r1.mmu_req):
1069             # Request came from loadstore1...
1070             # Load hit case is the standard path
1071             with m.If(r1.hit_load_valid):
1072                 sync += Display("completing load hit data=%x", data_out)
1073
1074             # error cases complete without stalling
1075             with m.If(r1.ls_error):
1076                 with m.If(r1.dcbz):
1077                     sync += Display("completing dcbz with error")
1078                 with m.Else():
1079                     sync += Display("completing ld/st with error")
1080
1081             # Slow ops (load miss, NC, stores)
1082             with m.If(r1.slow_valid):
1083                 sync += Display("completing store or load miss adr=%x data=%x",
1084                                 r1.req.real_addr, data_out)
1085
1086         with m.Else():
1087             # Request came from MMU
1088             with m.If(r1.hit_load_valid):
1089                 sync += Display("completing load hit to MMU, data=%x",
1090                                 m_out.data)
1091             # error cases complete without stalling
1092             with m.If(r1.mmu_error):
1093                 sync += Display("combpleting MMU ld with error")
1094
1095             # Slow ops (i.e. load miss)
1096             with m.If(r1.slow_valid):
1097                 sync += Display("completing MMU load miss, adr=%x data=%x",
1098                                 r1.req.real_addr, m_out.data)
1099
1100     def rams(self, m, r1, early_req_row, cache_out_row, replace_way):
1101         """rams
1102         Generate a cache RAM for each way. This handles the normal
1103         reads, writes from reloads and the special store-hit update
1104         path as well.
1105
1106         Note: the BRAMs have an extra read buffer, meaning the output
1107         is pipelined an extra cycle. This differs from the
1108         icache. The writeback logic needs to take that into
1109         account by using 1-cycle delayed signals for load hits.
1110         """
1111         comb = m.d.comb
1112         bus = self.bus
1113
1114         for i in range(NUM_WAYS):
1115             do_read  = Signal(name="do_rd%d" % i)
1116             rd_addr  = Signal(ROW_BITS, name="rd_addr_%d" % i)
1117             do_write = Signal(name="do_wr%d" % i)
1118             wr_addr  = Signal(ROW_BITS, name="wr_addr_%d" % i)
1119             wr_data  = Signal(WB_DATA_BITS, name="din_%d" % i)
1120             wr_sel   = Signal(ROW_SIZE)
1121             wr_sel_m = Signal(ROW_SIZE)
1122             _d_out   = Signal(WB_DATA_BITS, name="dout_%d" % i) # cache_row_t
1123
1124             way = CacheRam(ROW_BITS, WB_DATA_BITS, ADD_BUF=True, ram_num=i)
1125             setattr(m.submodules, "cacheram_%d" % i, way)
1126
1127             comb += way.rd_en.eq(do_read)
1128             comb += way.rd_addr.eq(rd_addr)
1129             comb += _d_out.eq(way.rd_data_o)
1130             comb += way.wr_sel.eq(wr_sel_m)
1131             comb += way.wr_addr.eq(wr_addr)
1132             comb += way.wr_data.eq(wr_data)
1133
1134             # Cache hit reads
1135             comb += do_read.eq(1)
1136             comb += rd_addr.eq(early_req_row)
1137             with m.If(r1.hit_way == i):
1138                 comb += cache_out_row.eq(_d_out)
1139
1140             # Write mux:
1141             #
1142             # Defaults to wishbone read responses (cache refill)
1143             #
1144             # For timing, the mux on wr_data/sel/addr is not
1145             # dependent on anything other than the current state.
1146
1147             with m.If(r1.write_bram):
1148                 # Write store data to BRAM.  This happens one
1149                 # cycle after the store is in r0.
1150                 comb += wr_data.eq(r1.req.data)
1151                 comb += wr_sel.eq(r1.req.byte_sel)
1152                 comb += wr_addr.eq(get_row(r1.req.real_addr))
1153
1154                 with m.If(i == r1.req.hit_way):
1155                     comb += do_write.eq(1)
1156             with m.Else():
1157                 # Otherwise, we might be doing a reload or a DCBZ
1158                 with m.If(r1.dcbz):
1159                     comb += wr_data.eq(0)
1160                 with m.Else():
1161                     comb += wr_data.eq(bus.dat_r)
1162                 comb += wr_addr.eq(r1.store_row)
1163                 comb += wr_sel.eq(~0) # all 1s
1164
1165                 with m.If((r1.state == State.RELOAD_WAIT_ACK)
1166                           & bus.ack & (replace_way == i)):
1167                     comb += do_write.eq(1)
1168
1169             # Mask write selects with do_write since BRAM
1170             # doesn't have a global write-enable
1171             with m.If(do_write):
1172                 comb += wr_sel_m.eq(wr_sel)
1173
1174     # Cache hit synchronous machine for the easy case.
1175     # This handles load hits.
1176     # It also handles error cases (TLB miss, cache paradox)
1177     def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1178                         req_hit_way, req_index, req_tag, access_ok,
1179                         tlb_hit, tlb_hit_way, tlb_req_index):
1180
1181         comb = m.d.comb
1182         sync = m.d.sync
1183
1184         with m.If(req_op != Op.OP_NONE):
1185             sync += Display("op:%d addr:%x nc: %d idx: %x tag: %x way: %x",
1186                     req_op, r0.req.addr, r0.req.nc,
1187                     req_index, req_tag, req_hit_way)
1188
1189         with m.If(r0_valid):
1190             sync += r1.mmu_req.eq(r0.mmu_req)
1191
1192         # Fast path for load/store hits.
1193         # Set signals for the writeback controls.
1194         sync += r1.hit_way.eq(req_hit_way)
1195         sync += r1.hit_index.eq(req_index)
1196
1197         with m.If(req_op == Op.OP_LOAD_HIT):
1198             sync += r1.hit_load_valid.eq(1)
1199         with m.Else():
1200             sync += r1.hit_load_valid.eq(0)
1201
1202         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1203             sync += r1.cache_hit.eq(1)
1204         with m.Else():
1205             sync += r1.cache_hit.eq(0)
1206
1207         with m.If(req_op == Op.OP_BAD):
1208             sync += Display("Signalling ld/st error "
1209                             "ls_error=%i mmu_error=%i cache_paradox=%i",
1210                             ~r0.mmu_req,r0.mmu_req,access_ok)
1211             sync += r1.ls_error.eq(~r0.mmu_req)
1212             sync += r1.mmu_error.eq(r0.mmu_req)
1213             sync += r1.cache_paradox.eq(access_ok)
1214
1215         with m.Else():
1216             sync += r1.ls_error.eq(0)
1217             sync += r1.mmu_error.eq(0)
1218             sync += r1.cache_paradox.eq(0)
1219
1220         with m.If(req_op == Op.OP_STCX_FAIL):
1221             sync += r1.stcx_fail.eq(1)
1222         with m.Else():
1223             sync += r1.stcx_fail.eq(0)
1224
1225         # Record TLB hit information for updating TLB PLRU
1226         sync += r1.tlb_hit.eq(tlb_hit)
1227         sync += r1.tlb_hit_way.eq(tlb_hit_way)
1228         sync += r1.tlb_hit_index.eq(tlb_req_index)
1229
1230     # Memory accesses are handled by this state machine:
1231     #
1232     #   * Cache load miss/reload (in conjunction with "rams")
1233     #   * Load hits for non-cachable forms
1234     #   * Stores (the collision case is handled in "rams")
1235     #
1236     # All wishbone requests generation is done here.
1237     # This machine operates at stage 1.
1238     def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1239                     r0, replace_way,
1240                     req_hit_way, req_same_tag,
1241                     r0_valid, req_op, cache_tags, req_go, ra):
1242
1243         comb = m.d.comb
1244         sync = m.d.sync
1245         bus = self.bus
1246         d_in = self.d_in
1247
1248         req         = MemAccessRequest("mreq_ds")
1249
1250         req_row = Signal(ROW_BITS)
1251         req_idx = Signal(INDEX_BITS)
1252         req_tag = Signal(TAG_BITS)
1253         comb += req_idx.eq(get_index(req.real_addr))
1254         comb += req_row.eq(get_row(req.real_addr))
1255         comb += req_tag.eq(get_tag(req.real_addr))
1256
1257         sync += r1.use_forward1.eq(use_forward1_next)
1258         sync += r1.forward_sel.eq(0)
1259
1260         with m.If(use_forward1_next):
1261             sync += r1.forward_sel.eq(r1.req.byte_sel)
1262         with m.Elif(use_forward2_next):
1263             sync += r1.forward_sel.eq(r1.forward_sel1)
1264
1265         sync += r1.forward_data2.eq(r1.forward_data1)
1266         with m.If(r1.write_bram):
1267             sync += r1.forward_data1.eq(r1.req.data)
1268             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1269             sync += r1.forward_way1.eq(r1.req.hit_way)
1270             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1271             sync += r1.forward_valid1.eq(1)
1272         with m.Else():
1273             with m.If(r1.dcbz):
1274                 sync += r1.forward_data1.eq(0)
1275             with m.Else():
1276                 sync += r1.forward_data1.eq(bus.dat_r)
1277             sync += r1.forward_sel1.eq(~0) # all 1s
1278             sync += r1.forward_way1.eq(replace_way)
1279             sync += r1.forward_row1.eq(r1.store_row)
1280             sync += r1.forward_valid1.eq(0)
1281
1282         # One cycle pulses reset
1283         sync += r1.slow_valid.eq(0)
1284         sync += r1.write_bram.eq(0)
1285         sync += r1.inc_acks.eq(0)
1286         sync += r1.dec_acks.eq(0)
1287
1288         sync += r1.ls_valid.eq(0)
1289         # complete tlbies and TLB loads in the third cycle
1290         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1291
1292         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STCX_FAIL)):
1293             with m.If(~r0.mmu_req):
1294                 sync += r1.ls_valid.eq(1)
1295             with m.Else():
1296                 sync += r1.mmu_done.eq(1)
1297
1298         with m.If(r1.write_tag):
1299             # Store new tag in selected way
1300             for i in range(NUM_WAYS):
1301                 with m.If(i == replace_way):
1302                     ct = Signal(TAG_RAM_WIDTH)
1303                     comb += ct.eq(cache_tags[r1.store_index].tag)
1304                     """
1305 TODO: check this
1306 cache_tags(r1.store_index)((i + 1) * TAG_WIDTH - 1 downto i * TAG_WIDTH) <=
1307                     (TAG_WIDTH - 1 downto TAG_BITS => '0') & r1.reload_tag;
1308                     """
1309                     comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1310                     sync += cache_tags[r1.store_index].tag.eq(ct)
1311             sync += r1.store_way.eq(replace_way)
1312             sync += r1.write_tag.eq(0)
1313
1314         # Take request from r1.req if there is one there,
1315         # else from req_op, ra, etc.
1316         with m.If(r1.full):
1317             comb += req.eq(r1.req)
1318         with m.Else():
1319             comb += req.op.eq(req_op)
1320             comb += req.valid.eq(req_go)
1321             comb += req.mmu_req.eq(r0.mmu_req)
1322             comb += req.dcbz.eq(r0.req.dcbz)
1323             comb += req.real_addr.eq(ra)
1324
1325             with m.If(r0.req.dcbz):
1326                 # force data to 0 for dcbz
1327                 comb += req.data.eq(0)
1328             with m.Elif(r0.d_valid):
1329                 comb += req.data.eq(r0.req.data)
1330             with m.Else():
1331                 comb += req.data.eq(d_in.data)
1332
1333             # Select all bytes for dcbz
1334             # and for cacheable loads
1335             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1336                 comb += req.byte_sel.eq(~0) # all 1s
1337             with m.Else():
1338                 comb += req.byte_sel.eq(r0.req.byte_sel)
1339             comb += req.hit_way.eq(req_hit_way)
1340             comb += req.same_tag.eq(req_same_tag)
1341
1342             # Store the incoming request from r0,
1343             # if it is a slow request
1344             # Note that r1.full = 1 implies req_op = OP_NONE
1345             with m.If((req_op == Op.OP_LOAD_MISS)
1346                       | (req_op == Op.OP_LOAD_NC)
1347                       | (req_op == Op.OP_STORE_MISS)
1348                       | (req_op == Op.OP_STORE_HIT)):
1349                 sync += r1.req.eq(req)
1350                 sync += r1.full.eq(1)
1351
1352         # Main state machine
1353         with m.Switch(r1.state):
1354
1355             with m.Case(State.IDLE):
1356                 sync += r1.wb.adr.eq(req.real_addr[ROW_LINE_BITS:])
1357                 sync += r1.wb.sel.eq(req.byte_sel)
1358                 sync += r1.wb.dat.eq(req.data)
1359                 sync += r1.dcbz.eq(req.dcbz)
1360
1361                 # Keep track of our index and way
1362                 # for subsequent stores.
1363                 sync += r1.store_index.eq(req_idx)
1364                 sync += r1.store_row.eq(req_row)
1365                 sync += r1.end_row_ix.eq(get_row_of_line(req_row)-1)
1366                 sync += r1.reload_tag.eq(req_tag)
1367                 sync += r1.req.same_tag.eq(1)
1368
1369                 with m.If(req.op == Op.OP_STORE_HIT):
1370                     sync += r1.store_way.eq(req.hit_way)
1371
1372                 # Reset per-row valid bits,
1373                 # ready for handling OP_LOAD_MISS
1374                 for i in range(ROW_PER_LINE):
1375                     sync += r1.rows_valid[i].eq(0)
1376
1377                 with m.If(req_op != Op.OP_NONE):
1378                     sync += Display("cache op %d", req.op)
1379
1380                 with m.Switch(req.op):
1381                     with m.Case(Op.OP_LOAD_HIT):
1382                         # stay in IDLE state
1383                         pass
1384
1385                     with m.Case(Op.OP_LOAD_MISS):
1386                         sync += Display("cache miss real addr: %x " \
1387                                 "idx: %x tag: %x",
1388                                 req.real_addr, req_row, req_tag)
1389
1390                         # Start the wishbone cycle
1391                         sync += r1.wb.we.eq(0)
1392                         sync += r1.wb.cyc.eq(1)
1393                         sync += r1.wb.stb.eq(1)
1394
1395                         # Track that we had one request sent
1396                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1397                         sync += r1.write_tag.eq(1)
1398
1399                     with m.Case(Op.OP_LOAD_NC):
1400                         sync += r1.wb.cyc.eq(1)
1401                         sync += r1.wb.stb.eq(1)
1402                         sync += r1.wb.we.eq(0)
1403                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1404
1405                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1406                         with m.If(~req.dcbz):
1407                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1408                             sync += r1.acks_pending.eq(1)
1409                             sync += r1.full.eq(0)
1410                             sync += r1.slow_valid.eq(1)
1411
1412                             with m.If(~req.mmu_req):
1413                                 sync += r1.ls_valid.eq(1)
1414                             with m.Else():
1415                                 sync += r1.mmu_done.eq(1)
1416
1417                             with m.If(req.op == Op.OP_STORE_HIT):
1418                                 sync += r1.write_bram.eq(1)
1419                         with m.Else():
1420                             # dcbz is handled much like a load miss except
1421                             # that we are writing to memory instead of reading
1422                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1423
1424                             with m.If(req.op == Op.OP_STORE_MISS):
1425                                 sync += r1.write_tag.eq(1)
1426
1427                         sync += r1.wb.we.eq(1)
1428                         sync += r1.wb.cyc.eq(1)
1429                         sync += r1.wb.stb.eq(1)
1430
1431                     # OP_NONE and OP_BAD do nothing
1432                     # OP_BAD & OP_STCX_FAIL were
1433                     # handled above already
1434                     with m.Case(Op.OP_NONE):
1435                         pass
1436                     with m.Case(Op.OP_BAD):
1437                         pass
1438                     with m.Case(Op.OP_STCX_FAIL):
1439                         pass
1440
1441             with m.Case(State.RELOAD_WAIT_ACK):
1442                 ld_stbs_done = Signal()
1443                 # Requests are all sent if stb is 0
1444                 comb += ld_stbs_done.eq(~r1.wb.stb)
1445
1446                 # If we are still sending requests, was one accepted?
1447                 with m.If((~bus.stall) & r1.wb.stb):
1448                     # That was the last word?  We are done sending.
1449                     # Clear stb and set ld_stbs_done so we can handle an
1450                     # eventual last ack on the same cycle.
1451                     # sigh - reconstruct wb adr with 3 extra 0s at front
1452                     wb_adr = Cat(Const(0, ROW_OFF_BITS), r1.wb.adr)
1453                     with m.If(is_last_row_addr(wb_adr, r1.end_row_ix)):
1454                         sync += r1.wb.stb.eq(0)
1455                         comb += ld_stbs_done.eq(1)
1456
1457                     # Calculate the next row address in the current cache line
1458                     row = Signal(LINE_OFF_BITS-ROW_OFF_BITS)
1459                     comb += row.eq(r1.wb.adr)
1460                     sync += r1.wb.adr[:LINE_OFF_BITS-ROW_OFF_BITS].eq(row+1)
1461
1462                 # Incoming acks processing
1463                 sync += r1.forward_valid1.eq(bus.ack)
1464                 with m.If(bus.ack):
1465                     srow = Signal(ROW_LINE_BITS)
1466                     comb += srow.eq(r1.store_row)
1467                     sync += r1.rows_valid[srow].eq(1)
1468
1469                     # If this is the data we were looking for,
1470                     # we can complete the request next cycle.
1471                     # Compare the whole address in case the
1472                     # request in r1.req is not the one that
1473                     # started this refill.
1474                     with m.If(req.valid & r1.req.same_tag &
1475                               ((r1.dcbz & r1.req.dcbz) |
1476                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1477                                 (r1.store_row == get_row(req.real_addr))):
1478                         sync += r1.full.eq(0)
1479                         sync += r1.slow_valid.eq(1)
1480                         with m.If(~r1.mmu_req):
1481                             sync += r1.ls_valid.eq(1)
1482                         with m.Else():
1483                             sync += r1.mmu_done.eq(1)
1484                         sync += r1.forward_sel.eq(~0) # all 1s
1485                         sync += r1.use_forward1.eq(1)
1486
1487                     # Check for completion
1488                     with m.If(ld_stbs_done & is_last_row(r1.store_row,
1489                                                       r1.end_row_ix)):
1490                         # Complete wishbone cycle
1491                         sync += r1.wb.cyc.eq(0)
1492
1493                         # Cache line is now valid
1494                         cv = Signal(INDEX_BITS)
1495                         comb += cv.eq(cache_tags[r1.store_index].valid)
1496                         comb += cv.bit_select(r1.store_way, 1).eq(1)
1497                         sync += cache_tags[r1.store_index].valid.eq(cv)
1498
1499                         sync += r1.state.eq(State.IDLE)
1500                         sync += Display("cache valid set %x "
1501                                         "idx %d way %d",
1502                                          cv, r1.store_index, r1.store_way)
1503
1504                     # Increment store row counter
1505                     sync += r1.store_row.eq(next_row(r1.store_row))
1506
1507             with m.Case(State.STORE_WAIT_ACK):
1508                 st_stbs_done = Signal()
1509                 acks        = Signal(3)
1510                 adjust_acks = Signal(3)
1511
1512                 comb += st_stbs_done.eq(~r1.wb.stb)
1513                 comb += acks.eq(r1.acks_pending)
1514
1515                 with m.If(r1.inc_acks != r1.dec_acks):
1516                     with m.If(r1.inc_acks):
1517                         comb += adjust_acks.eq(acks + 1)
1518                     with m.Else():
1519                         comb += adjust_acks.eq(acks - 1)
1520                 with m.Else():
1521                     comb += adjust_acks.eq(acks)
1522
1523                 sync += r1.acks_pending.eq(adjust_acks)
1524
1525                 # Clear stb when slave accepted request
1526                 with m.If(~bus.stall):
1527                     # See if there is another store waiting
1528                     # to be done which is in the same real page.
1529                     with m.If(req.valid):
1530                         _ra = req.real_addr[ROW_LINE_BITS:SET_SIZE_BITS]
1531                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(_ra)
1532                         sync += r1.wb.dat.eq(req.data)
1533                         sync += r1.wb.sel.eq(req.byte_sel)
1534
1535                     with m.If((adjust_acks < 7) & req.same_tag &
1536                                 ((req.op == Op.OP_STORE_MISS)
1537                                  | (req.op == Op.OP_STORE_HIT))):
1538                         sync += r1.wb.stb.eq(1)
1539                         comb += st_stbs_done.eq(0)
1540
1541                         with m.If(req.op == Op.OP_STORE_HIT):
1542                             sync += r1.write_bram.eq(1)
1543                         sync += r1.full.eq(0)
1544                         sync += r1.slow_valid.eq(1)
1545
1546                         # Store requests never come from the MMU
1547                         sync += r1.ls_valid.eq(1)
1548                         comb += st_stbs_done.eq(0)
1549                         sync += r1.inc_acks.eq(1)
1550                     with m.Else():
1551                         sync += r1.wb.stb.eq(0)
1552                         comb += st_stbs_done.eq(1)
1553
1554                 # Got ack ? See if complete.
1555                 with m.If(bus.ack):
1556                     with m.If(st_stbs_done & (adjust_acks == 1)):
1557                         sync += r1.state.eq(State.IDLE)
1558                         sync += r1.wb.cyc.eq(0)
1559                         sync += r1.wb.stb.eq(0)
1560                     sync += r1.dec_acks.eq(1)
1561
1562             with m.Case(State.NC_LOAD_WAIT_ACK):
1563                 # Clear stb when slave accepted request
1564                 with m.If(~bus.stall):
1565                     sync += r1.wb.stb.eq(0)
1566
1567                 # Got ack ? complete.
1568                 with m.If(bus.ack):
1569                     sync += r1.state.eq(State.IDLE)
1570                     sync += r1.full.eq(0)
1571                     sync += r1.slow_valid.eq(1)
1572
1573                     with m.If(~r1.mmu_req):
1574                         sync += r1.ls_valid.eq(1)
1575                     with m.Else():
1576                         sync += r1.mmu_done.eq(1)
1577
1578                     sync += r1.forward_sel.eq(~0) # all 1s
1579                     sync += r1.use_forward1.eq(1)
1580                     sync += r1.wb.cyc.eq(0)
1581                     sync += r1.wb.stb.eq(0)
1582
1583     def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1584
1585         sync = m.d.sync
1586         d_out, bus, log_out = self.d_out, self.bus, self.log_out
1587
1588         sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1589                                stall_out, req_op[:3], d_out.valid, d_out.error,
1590                                r1.wb.cyc, r1.wb.stb, bus.ack, bus.stall,
1591                                r1.real_adr[3:6]))
1592
1593     def elaborate(self, platform):
1594
1595         m = Module()
1596         comb = m.d.comb
1597         d_in = self.d_in
1598
1599         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1600         cache_tags       = CacheTagArray()
1601         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1602
1603         # TODO attribute ram_style : string;
1604         # TODO attribute ram_style of cache_tags : signal is "distributed";
1605
1606         """note: these are passed to nmigen.hdl.Memory as "attributes".
1607            don't know how, just that they are.
1608         """
1609         dtlb            = TLBArray()
1610         # TODO attribute ram_style of
1611         #  dtlb_tags : signal is "distributed";
1612         # TODO attribute ram_style of
1613         #  dtlb_ptes : signal is "distributed";
1614
1615         r0      = RegStage0("r0")
1616         r0_full = Signal()
1617
1618         r1 = RegStage1("r1")
1619
1620         reservation = Reservation()
1621
1622         # Async signals on incoming request
1623         req_index    = Signal(INDEX_BITS)
1624         req_row      = Signal(ROW_BITS)
1625         req_hit_way  = Signal(WAY_BITS)
1626         req_tag      = Signal(TAG_BITS)
1627         req_op       = Signal(Op)
1628         req_data     = Signal(64)
1629         req_same_tag = Signal()
1630         req_go       = Signal()
1631
1632         early_req_row     = Signal(ROW_BITS)
1633
1634         cancel_store      = Signal()
1635         set_rsrv          = Signal()
1636         clear_rsrv        = Signal()
1637
1638         r0_valid          = Signal()
1639         r0_stall          = Signal()
1640
1641         use_forward1_next = Signal()
1642         use_forward2_next = Signal()
1643
1644         cache_out_row     = Signal(WB_DATA_BITS)
1645
1646         plru_victim       = PLRUOut()
1647         replace_way       = Signal(WAY_BITS)
1648
1649         # Wishbone read/write/cache write formatting signals
1650         bus_sel           = Signal(8)
1651
1652         # TLB signals
1653         tlb_way       = TLBRecord("tlb_way")
1654         tlb_req_index = Signal(TLB_SET_BITS)
1655         tlb_hit       = Signal()
1656         tlb_hit_way   = Signal(TLB_WAY_BITS)
1657         pte           = Signal(TLB_PTE_BITS)
1658         ra            = Signal(REAL_ADDR_BITS)
1659         valid_ra      = Signal()
1660         perm_attr     = PermAttr("dc_perms")
1661         rc_ok         = Signal()
1662         perm_ok       = Signal()
1663         access_ok     = Signal()
1664
1665         tlb_plru_victim = TLBPLRUOut()
1666
1667         # we don't yet handle collisions between loadstore1 requests
1668         # and MMU requests
1669         comb += self.m_out.stall.eq(0)
1670
1671         # Hold off the request in r0 when r1 has an uncompleted request
1672         comb += r0_stall.eq(r0_full & (r1.full | d_in.hold))
1673         comb += r0_valid.eq(r0_full & ~r1.full & ~d_in.hold)
1674         comb += self.stall_out.eq(r0_stall)
1675
1676         # deal with litex not doing wishbone pipeline mode
1677         # XXX in wrong way.  FIFOs are needed in the SRAM test
1678         # so that stb/ack match up. same thing done in icache.py
1679         comb += self.bus.stall.eq(self.bus.cyc & ~self.bus.ack)
1680
1681         # Wire up wishbone request latch out of stage 1
1682         comb += self.bus.we.eq(r1.wb.we)
1683         comb += self.bus.adr.eq(r1.wb.adr)
1684         comb += self.bus.sel.eq(r1.wb.sel)
1685         comb += self.bus.stb.eq(r1.wb.stb)
1686         comb += self.bus.dat_w.eq(r1.wb.dat)
1687         comb += self.bus.cyc.eq(r1.wb.cyc)
1688
1689         # call sub-functions putting everything together, using shared
1690         # signals established above
1691         self.stage_0(m, r0, r1, r0_full)
1692         self.tlb_read(m, r0_stall, tlb_way, dtlb)
1693         self.tlb_search(m, tlb_req_index, r0, r0_valid,
1694                         tlb_way, tlb_hit_way,
1695                         pte, tlb_hit, valid_ra, perm_attr, ra)
1696         self.tlb_update(m, r0_valid, r0, dtlb, tlb_req_index,
1697                         tlb_hit_way, tlb_hit, tlb_plru_victim,
1698                         tlb_way)
1699         self.maybe_plrus(m, r1, plru_victim)
1700         self.maybe_tlb_plrus(m, r1, tlb_plru_victim)
1701         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1702         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1703                            r0_valid, r1, cache_tags, replace_way,
1704                            use_forward1_next, use_forward2_next,
1705                            req_hit_way, plru_victim, rc_ok, perm_attr,
1706                            valid_ra, perm_ok, access_ok, req_op, req_go,
1707                            tlb_hit, tlb_hit_way, tlb_way, cache_tag_set,
1708                            cancel_store, req_same_tag, r0_stall, early_req_row)
1709         self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1710                            r0_valid, r0, reservation)
1711         self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1712                            reservation, r0)
1713         self.writeback_control(m, r1, cache_out_row)
1714         self.rams(m, r1, early_req_row, cache_out_row, replace_way)
1715         self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1716                         req_hit_way, req_index, req_tag, access_ok,
1717                         tlb_hit, tlb_hit_way, tlb_req_index)
1718         self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1719                     r0, replace_way,
1720                     req_hit_way, req_same_tag,
1721                          r0_valid, req_op, cache_tags, req_go, ra)
1722         #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1723
1724         return m
1725
1726
1727 if __name__ == '__main__':
1728     dut = DCache()
1729     vl = rtlil.convert(dut, ports=[])
1730     with open("test_dcache.il", "w") as f:
1731         f.write(vl)