src/soc/experiment/dcache.py

   1 """DCache
   2
   3 based on Anton Blanchard microwatt dcache.vhdl
   4
   5 """
   6
   7 from enum import Enum, unique
   8
   9 from nmigen import Module, Signal, Elaboratable, Cat, Repl, Array, Const
  10 from nmigen.cli import main
  11 from nmutil.iocontrol import RecordObject
  12 from nmigen.utils import log2_int
  13 from nmigen.cli import rtlil
  14
  15
  16 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
  17                                      DCacheToLoadStore1Type,
  18                                      MMUToDCacheType,
  19                                      DCacheToMMUType)
  20
  21 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
  22                                 WBAddrType, WBDataType, WBSelType,
  23                                 WBMasterOut, WBSlaveOut,
  24                                 WBMasterOutVector, WBSlaveOutVector,
  25                                 WBIOMasterOut, WBIOSlaveOut)
  26
  27 from soc.experiment.cache_ram import CacheRam
  28 from soc.experiment.plru import PLRU
  29
  30
  31 # TODO: make these parameters of DCache at some point
  32 LINE_SIZE = 64    # Line size in bytes
  33 NUM_LINES = 32    # Number of lines in a set
  34 NUM_WAYS = 4      # Number of ways
  35 TLB_SET_SIZE = 64 # L1 DTLB entries per set
  36 TLB_NUM_WAYS = 2  # L1 DTLB number of sets
  37 TLB_LG_PGSZ = 12  # L1 DTLB log_2(page_size)
  38 LOG_LENGTH = 0    # Non-zero to enable log data collection
  39
  40 # BRAM organisation: We never access more than
  41 #     -- WB_DATA_BITS at a time so to save
  42 #     -- resources we make the array only that wide, and
  43 #     -- use consecutive indices for to make a cache "line"
  44 #     --
  45 #     -- ROW_SIZE is the width in bytes of the BRAM
  46 #     -- (based on WB, so 64-bits)
  47 ROW_SIZE = WB_DATA_BITS // 8;
  48
  49 # ROW_PER_LINE is the number of row (wishbone
  50 # transactions) in a line
  51 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
  52
  53 # BRAM_ROWS is the number of rows in BRAM needed
  54 # to represent the full dcache
  55 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
  56
  57
  58 # Bit fields counts in the address
  59
  60 # REAL_ADDR_BITS is the number of real address
  61 # bits that we store
  62 REAL_ADDR_BITS = 56
  63
  64 # ROW_BITS is the number of bits to select a row
  65 ROW_BITS = log2_int(BRAM_ROWS)
  66
  67 # ROW_LINE_BITS is the number of bits to select
  68 # a row within a line
  69 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
  70
  71 # LINE_OFF_BITS is the number of bits for
  72 # the offset in a cache line
  73 LINE_OFF_BITS = log2_int(LINE_SIZE)
  74
  75 # ROW_OFF_BITS is the number of bits for
  76 # the offset in a row
  77 ROW_OFF_BITS = log2_int(ROW_SIZE)
  78
  79 # INDEX_BITS is the number if bits to
  80 # select a cache line
  81 INDEX_BITS = log2_int(NUM_LINES)
  82
  83 # SET_SIZE_BITS is the log base 2 of the set size
  84 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
  85
  86 # TAG_BITS is the number of bits of
  87 # the tag part of the address
  88 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
  89
  90 # TAG_WIDTH is the width in bits of each way of the tag RAM
  91 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
  92
  93 # WAY_BITS is the number of bits to select a way
  94 WAY_BITS = log2_int(NUM_WAYS)
  95
  96 # Example of layout for 32 lines of 64 bytes:
  97 #
  98 # ..  tag    |index|  line  |
  99 # ..         |   row   |    |
 100 # ..         |     |---|    | ROW_LINE_BITS  (3)
 101 # ..         |     |--- - --| LINE_OFF_BITS (6)
 102 # ..         |         |- --| ROW_OFF_BITS  (3)
 103 # ..         |----- ---|    | ROW_BITS      (8)
 104 # ..         |-----|        | INDEX_BITS    (5)
 105 # .. --------|              | TAG_BITS      (45)
 106
 107 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
 108
 109 def CacheTagArray():
 110     return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
 111
 112 def CacheValidBitsArray():
 113     return Array(Signal(INDEX_BITS) for x in range(NUM_LINES))
 114
 115 def RowPerLineValidArray():
 116     return Array(Signal() for x in range(ROW_PER_LINE))
 117
 118 # L1 TLB
 119 TLB_SET_BITS     = log2_int(TLB_SET_SIZE)
 120 TLB_WAY_BITS     = log2_int(TLB_NUM_WAYS)
 121 TLB_EA_TAG_BITS  = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
 122 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
 123 TLB_PTE_BITS     = 64
 124 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
 125
 126 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
 127 assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
 128 assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
 129 assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
 130 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
 131 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
 132         "geometry bits don't add up"
 133 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
 134         "geometry bits don't add up"
 135 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
 136          "geometry bits don't add up"
 137 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
 138 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
 139
 140
 141 def TLBValidBitsArray():
 142     return Array(Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE))
 143
 144 def TLBTagEAArray():
 145     return Array(Signal(TLB_EA_TAG_BITS) for x in range (TLB_NUM_WAYS))
 146
 147 def TLBTagsArray():
 148     return Array(Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE))
 149
 150 def TLBPtesArray():
 151     return Array(Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE))
 152
 153 def HitWaySet():
 154     return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
 155
 156 # Cache RAM interface
 157 def CacheRamOut():
 158     return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
 159
 160 # PLRU output interface
 161 def PLRUOut():
 162     return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
 163
 164 # TLB PLRU output interface
 165 def TLBPLRUOut():
 166     return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
 167
 168 # Helper functions to decode incoming requests
 169 #
 170 # Return the cache line index (tag index) for an address
 171 def get_index(addr):
 172     return addr[LINE_OFF_BITS:SET_SIZE_BITS]
 173
 174 # Return the cache row index (data memory) for an address
 175 def get_row(addr):
 176     return addr[ROW_OFF_BITS:SET_SIZE_BITS]
 177
 178 # Return the index of a row within a line
 179 def get_row_of_line(row):
 180     return row[:ROW_LINE_BITS]
 181
 182 # Returns whether this is the last row of a line
 183 def is_last_row_addr(addr, last):
 184     return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
 185
 186 # Returns whether this is the last row of a line
 187 def is_last_row(row, last):
 188     return get_row_of_line(row) == last
 189
 190 # Return the next row in the current cache line. We use a
 191 # dedicated function in order to limit the size of the
 192 # generated adder to be only the bits within a cache line
 193 # (3 bits with default settings)
 194 def next_row(row):
 195     row_v = row[0:ROW_LINE_BITS] + 1
 196     return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
 197
 198 # Get the tag value from the address
 199 def get_tag(addr):
 200     return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
 201
 202 # Read a tag from a tag memory row
 203 def read_tag(way, tagset):
 204     return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
 205
 206 # Read a TLB tag from a TLB tag memory row
 207 def read_tlb_tag(way, tags):
 208     return tags.word_select(way, TLB_EA_TAG_BITS)
 209
 210 # Write a TLB tag to a TLB tag memory row
 211 def write_tlb_tag(way, tags, tag):
 212     return read_tlb_tag(way, tags).eq(tag)
 213
 214 # Read a PTE from a TLB PTE memory row
 215 def read_tlb_pte(way, ptes):
 216     return ptes.word_select(way, TLB_PTE_BITS)
 217
 218 def write_tlb_pte(way, ptes, newpte):
 219     return read_tlb_pte(way, ptes).eq(newpte)
 220
 221
 222 # Record for storing permission, attribute, etc. bits from a PTE
 223 class PermAttr(RecordObject):
 224     def __init__(self):
 225         super().__init__()
 226         self.reference = Signal()
 227         self.changed   = Signal()
 228         self.nocache   = Signal()
 229         self.priv      = Signal()
 230         self.rd_perm   = Signal()
 231         self.wr_perm   = Signal()
 232
 233
 234 def extract_perm_attr(pte):
 235     pa = PermAttr()
 236     pa.reference = pte[8]
 237     pa.changed   = pte[7]
 238     pa.nocache   = pte[5]
 239     pa.priv      = pte[3]
 240     pa.rd_perm   = pte[2]
 241     pa.wr_perm   = pte[1]
 242     return pa;
 243
 244
 245 # Type of operation on a "valid" input
 246 @unique
 247 class Op(Enum):
 248     OP_NONE       = 0
 249     OP_BAD        = 1 # NC cache hit, TLB miss, prot/RC failure
 250     OP_STCX_FAIL  = 2 # conditional store w/o reservation
 251     OP_LOAD_HIT   = 3 # Cache hit on load
 252     OP_LOAD_MISS  = 4 # Load missing cache
 253     OP_LOAD_NC    = 5 # Non-cachable load
 254     OP_STORE_HIT  = 6 # Store hitting cache
 255     OP_STORE_MISS = 7 # Store missing cache
 256
 257
 258 # Cache state machine
 259 @unique
 260 class State(Enum):
 261     IDLE             = 0 # Normal load hit processing
 262     RELOAD_WAIT_ACK  = 1 # Cache reload wait ack
 263     STORE_WAIT_ACK   = 2 # Store wait ack
 264     NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
 265
 266
 267 # Dcache operations:
 268 #
 269 # In order to make timing, we use the BRAMs with
 270 # an output buffer, which means that the BRAM
 271 # output is delayed by an extra cycle.
 272 #
 273 # Thus, the dcache has a 2-stage internal pipeline
 274 # for cache hits with no stalls.
 275 #
 276 # All other operations are handled via stalling
 277 # in the first stage.
 278 #
 279 # The second stage can thus complete a hit at the same
 280 # time as the first stage emits a stall for a complex op.
 281 #
 282 # Stage 0 register, basically contains just the latched request
 283
 284 class RegStage0(RecordObject):
 285     def __init__(self):
 286         super().__init__()
 287         self.req     = LoadStore1ToDCacheType()
 288         self.tlbie   = Signal()
 289         self.doall   = Signal()
 290         self.tlbld   = Signal()
 291         self.mmu_req = Signal() # indicates source of request
 292
 293
 294 class MemAccessRequest(RecordObject):
 295     def __init__(self):
 296         super().__init__()
 297         self.op        = Signal(Op)
 298         self.valid     = Signal()
 299         self.dcbz      = Signal()
 300         self.real_addr = Signal(REAL_ADDR_BITS)
 301         self.data      = Signal(64)
 302         self.byte_sel  = Signal(8)
 303         self.hit_way   = Signal(WAY_BITS)
 304         self.same_tag  = Signal()
 305         self.mmu_req   = Signal()
 306
 307
 308 # First stage register, contains state for stage 1 of load hits
 309 # and for the state machine used by all other operations
 310 class RegStage1(RecordObject):
 311     def __init__(self):
 312         super().__init__()
 313         # Info about the request
 314         self.full             = Signal() # have uncompleted request
 315         self.mmu_req          = Signal() # request is from MMU
 316         self.req              = MemAccessRequest()
 317
 318         # Cache hit state
 319         self.hit_way          = Signal(WAY_BITS)
 320         self.hit_load_valid   = Signal()
 321         self.hit_index        = Signal(INDEX_BITS)
 322         self.cache_hit        = Signal()
 323
 324         # TLB hit state
 325         self.tlb_hit          = Signal()
 326         self.tlb_hit_way      = Signal(TLB_NUM_WAYS)
 327         self.tlb_hit_index    = Signal(TLB_WAY_BITS)
 328
 329         # 2-stage data buffer for data forwarded from writes to reads
 330         self.forward_data1    = Signal(64)
 331         self.forward_data2    = Signal(64)
 332         self.forward_sel1     = Signal(8)
 333         self.forward_valid1   = Signal()
 334         self.forward_way1     = Signal(WAY_BITS)
 335         self.forward_row1     = Signal(ROW_BITS)
 336         self.use_forward1     = Signal()
 337         self.forward_sel      = Signal(8)
 338
 339         # Cache miss state (reload state machine)
 340         self.state            = Signal(State)
 341         self.dcbz             = Signal()
 342         self.write_bram       = Signal()
 343         self.write_tag        = Signal()
 344         self.slow_valid       = Signal()
 345         self.wb               = WBMasterOut()
 346         self.reload_tag       = Signal(TAG_BITS)
 347         self.store_way        = Signal(WAY_BITS)
 348         self.store_row        = Signal(ROW_BITS)
 349         self.store_index      = Signal(INDEX_BITS)
 350         self.end_row_ix       = Signal(log2_int(ROW_LINE_BITS, False))
 351         self.rows_valid       = RowPerLineValidArray()
 352         self.acks_pending     = Signal(3)
 353         self.inc_acks         = Signal()
 354         self.dec_acks         = Signal()
 355
 356         # Signals to complete (possibly with error)
 357         self.ls_valid         = Signal()
 358         self.ls_error         = Signal()
 359         self.mmu_done         = Signal()
 360         self.mmu_error        = Signal()
 361         self.cache_paradox    = Signal()
 362
 363         # Signal to complete a failed stcx.
 364         self.stcx_fail        = Signal()
 365
 366
 367 # Reservation information
 368 class Reservation(RecordObject):
 369     def __init__(self):
 370         super().__init__()
 371         self.valid = Signal()
 372         self.addr  = Signal(64-LINE_OFF_BITS)
 373
 374
 375 class DTLBUpdate(Elaboratable):
 376     def __init__(self):
 377         self.tlbie    = Signal()
 378         self.tlbwe    = Signal()
 379         self.doall    = Signal()
 380         self.updated  = Signal()
 381         self.v_updated  = Signal()
 382         self.tlb_hit    = Signal()
 383         self.tlb_req_index = Signal(TLB_SET_BITS)
 384
 385         self.tlb_hit_way     = Signal(TLB_WAY_BITS)
 386         self.tlb_tag_way     = Signal(TLB_TAG_WAY_BITS)
 387         self.tlb_pte_way     = Signal(TLB_PTE_WAY_BITS)
 388         self.repl_way        = Signal(TLB_WAY_BITS)
 389         self.eatag           = Signal(TLB_EA_TAG_BITS)
 390         self.pte_data        = Signal(TLB_PTE_BITS)
 391
 392         self.dv = Signal(TLB_PTE_WAY_BITS)
 393
 394         self.tb_out = Signal(TLB_TAG_WAY_BITS)
 395         self.pb_out = Signal(TLB_NUM_WAYS)
 396         self.db_out = Signal(TLB_PTE_WAY_BITS)
 397
 398     def elaborate(self, platform):
 399         m = Module()
 400         comb = m.d.comb
 401         sync = m.d.sync
 402
 403         tagset   = Signal(TLB_TAG_WAY_BITS)
 404         pteset   = Signal(TLB_PTE_WAY_BITS)
 405
 406         tb_out, pb_out, db_out = self.tb_out, self.pb_out, self.db_out
 407
 408         with m.If(self.tlbie & self.doall):
 409             pass # clear all back in parent
 410         with m.Elif(self.tlbie):
 411             with m.If(self.tlb_hit):
 412                 comb += db_out.eq(self.dv)
 413                 comb += db_out.bit_select(self.tlb_hit_way, 1).eq(1)
 414                 comb += self.v_updated.eq(1)
 415
 416         with m.Elif(self.tlbwe):
 417
 418             comb += tagset.eq(self.tlb_tag_way)
 419             comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
 420             comb += tb_out.eq(tagset)
 421
 422             comb += pteset.eq(self.tlb_pte_way)
 423             comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
 424             comb += pb_out.eq(pteset)
 425
 426             comb += db_out.bit_select(self.repl_way, 1).eq(1)
 427
 428             comb += self.updated.eq(1)
 429             comb += self.v_updated.eq(1)
 430
 431         return m
 432
 433     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 434                        r0_valid, r1, cache_valid_bits, replace_way,
 435                        use_forward1_next, use_forward2_next,
 436                        req_hit_way, plru_victim, rc_ok, perm_attr,
 437                        valid_ra, perm_ok, access_ok, req_op, req_go,
 438                        tlb_pte_way,
 439                        tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
 440                        cancel_store, req_same_tag, r0_stall, early_req_row):
 441         """Cache request parsing and hit detection
 442         """
 443
 444 class DCachePendingHit(Elaboratable):
 445
 446     def __init__(self, tlb_pte_way, tlb_valid_way, tlb_hit_way,
 447                       cache_valid_idx, cache_tag_set,
 448                     req_addr,
 449                     hit_set):
 450
 451         self.go          = Signal()
 452         self.virt_mode   = Signal()
 453         self.is_hit      = Signal()
 454         self.tlb_hit     = Signal()
 455         self.hit_way     = Signal(WAY_BITS)
 456         self.rel_match   = Signal()
 457         self.req_index   = Signal(INDEX_BITS)
 458         self.reload_tag  = Signal(TAG_BITS)
 459
 460         self.tlb_hit_way = tlb_hit_way
 461         self.tlb_pte_way = tlb_pte_way
 462         self.tlb_valid_way = tlb_valid_way
 463         self.cache_valid_idx = cache_valid_idx
 464         self.cache_tag_set = cache_tag_set
 465         self.req_addr = req_addr
 466         self.hit_set = hit_set
 467
 468     def elaborate(self, platform):
 469         m = Module()
 470         comb = m.d.comb
 471         sync = m.d.sync
 472
 473         go = self.go
 474         virt_mode = self.virt_mode
 475         is_hit = self.is_hit
 476         tlb_pte_way = self.tlb_pte_way
 477         tlb_valid_way = self.tlb_valid_way
 478         cache_valid_idx = self.cache_valid_idx
 479         cache_tag_set = self.cache_tag_set
 480         req_addr = self.req_addr
 481         tlb_hit_way = self.tlb_hit_way
 482         tlb_hit = self.tlb_hit
 483         hit_set = self.hit_set
 484         hit_way = self.hit_way
 485         rel_match = self.rel_match
 486         req_index = self.req_index
 487         reload_tag = self.reload_tag
 488
 489         rel_matches = Array(Signal() for i in range(TLB_NUM_WAYS))
 490         hit_way_set = HitWaySet()
 491
 492         # Test if pending request is a hit on any way
 493         # In order to make timing in virtual mode,
 494         # when we are using the TLB, we compare each
 495         # way with each of the real addresses from each way of
 496         # the TLB, and then decide later which match to use.
 497
 498         with m.If(virt_mode):
 499             for j in range(TLB_NUM_WAYS):
 500                 s_tag       = Signal(TAG_BITS, name="s_tag%d" % j)
 501                 s_hit       = Signal()
 502                 s_pte       = Signal(TLB_PTE_BITS)
 503                 s_ra        = Signal(REAL_ADDR_BITS)
 504                 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
 505                 comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
 506                                     s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 507                 comb += s_tag.eq(get_tag(s_ra))
 508
 509                 for i in range(NUM_WAYS):
 510                     is_tag_hit = Signal()
 511                     comb += is_tag_hit.eq(go & cache_valid_idx[i] &
 512                                   (read_tag(i, cache_tag_set) == s_tag)
 513                                   & tlb_valid_way[j])
 514                     with m.If(is_tag_hit):
 515                         comb += hit_way_set[j].eq(i)
 516                         comb += s_hit.eq(1)
 517                 comb += hit_set[j].eq(s_hit)
 518                 with m.If(s_tag == reload_tag):
 519                     comb += rel_matches[j].eq(1)
 520             with m.If(tlb_hit):
 521                 comb += is_hit.eq(hit_set[tlb_hit_way])
 522                 comb += hit_way.eq(hit_way_set[tlb_hit_way])
 523                 comb += rel_match.eq(rel_matches[tlb_hit_way])
 524         with m.Else():
 525             s_tag       = Signal(TAG_BITS)
 526             comb += s_tag.eq(get_tag(req_addr))
 527             for i in range(NUM_WAYS):
 528                 is_tag_hit = Signal()
 529                 comb += is_tag_hit.eq(go & cache_valid_idx[i] &
 530                           read_tag(i, cache_tag_set) == s_tag)
 531                 with m.If(is_tag_hit):
 532                     comb += hit_way.eq(i)
 533                     comb += is_hit.eq(1)
 534             with m.If(s_tag == reload_tag):
 535                 comb += rel_match.eq(1)
 536
 537         return m
 538
 539
 540 class DCache(Elaboratable):
 541     """Set associative dcache write-through
 542     TODO (in no specific order):
 543     * See list in icache.vhdl
 544     * Complete load misses on the cycle when WB data comes instead of
 545       at the end of line (this requires dealing with requests coming in
 546       while not idle...)
 547     """
 548     def __init__(self):
 549         self.d_in      = LoadStore1ToDCacheType()
 550         self.d_out     = DCacheToLoadStore1Type()
 551
 552         self.m_in      = MMUToDCacheType()
 553         self.m_out     = DCacheToMMUType()
 554
 555         self.stall_out = Signal()
 556
 557         self.wb_out    = WBMasterOut()
 558         self.wb_in     = WBSlaveOut()
 559
 560         self.log_out   = Signal(20)
 561
 562     def stage_0(self, m, r0, r1, r0_full):
 563         """Latch the request in r0.req as long as we're not stalling
 564         """
 565         comb = m.d.comb
 566         sync = m.d.sync
 567         d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
 568
 569         r = RegStage0()
 570
 571         # TODO, this goes in unit tests and formal proofs
 572         with m.If(~(d_in.valid & m_in.valid)):
 573             #sync += Display("request collision loadstore vs MMU")
 574             pass
 575
 576         with m.If(m_in.valid):
 577             sync += r.req.valid.eq(1)
 578             sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
 579             sync += r.req.dcbz.eq(0)
 580             sync += r.req.nc.eq(0)
 581             sync += r.req.reserve.eq(0)
 582             sync += r.req.virt_mode.eq(1)
 583             sync += r.req.priv_mode.eq(1)
 584             sync += r.req.addr.eq(m_in.addr)
 585             sync += r.req.data.eq(m_in.pte)
 586             sync += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
 587             sync += r.tlbie.eq(m_in.tlbie)
 588             sync += r.doall.eq(m_in.doall)
 589             sync += r.tlbld.eq(m_in.tlbld)
 590             sync += r.mmu_req.eq(1)
 591         with m.Else():
 592             sync += r.req.eq(d_in)
 593             sync += r.tlbie.eq(0)
 594             sync += r.doall.eq(0)
 595             sync += r.tlbld.eq(0)
 596             sync += r.mmu_req.eq(0)
 597             with m.If(~(r1.full & r0_full)):
 598                 sync += r0.eq(r)
 599                 sync += r0_full.eq(r.req.valid)
 600
 601     def tlb_read(self, m, r0_stall, tlb_valid_way,
 602                  tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
 603                  dtlb_tags, dtlb_ptes):
 604         """TLB
 605         Operates in the second cycle on the request latched in r0.req.
 606         TLB updates write the entry at the end of the second cycle.
 607         """
 608         comb = m.d.comb
 609         sync = m.d.sync
 610         m_in, d_in = self.m_in, self.d_in
 611
 612         index    = Signal(TLB_SET_BITS)
 613         addrbits = Signal(TLB_SET_BITS)
 614
 615         amin = TLB_LG_PGSZ
 616         amax = TLB_LG_PGSZ + TLB_SET_BITS
 617
 618         with m.If(m_in.valid):
 619             comb += addrbits.eq(m_in.addr[amin : amax])
 620         with m.Else():
 621             comb += addrbits.eq(d_in.addr[amin : amax])
 622         comb += index.eq(addrbits)
 623
 624         # If we have any op and the previous op isn't finished,
 625         # then keep the same output for next cycle.
 626         with m.If(~r0_stall):
 627             sync += tlb_valid_way.eq(dtlb_valid_bits[index])
 628             sync += tlb_tag_way.eq(dtlb_tags[index])
 629             sync += tlb_pte_way.eq(dtlb_ptes[index])
 630
 631     def maybe_tlb_plrus(self, m, r1, tlb_plru_victim):
 632         """Generate TLB PLRUs
 633         """
 634         comb = m.d.comb
 635         sync = m.d.sync
 636
 637         if TLB_NUM_WAYS == 0:
 638             return
 639         for i in range(TLB_SET_SIZE):
 640             # TLB PLRU interface
 641             tlb_plru        = PLRU(WAY_BITS)
 642             setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
 643             tlb_plru_acc_en = Signal()
 644
 645             comb += tlb_plru_acc_en.eq(r1.tlb_hit & (r1.tlb_hit_index == i))
 646             comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
 647             comb += tlb_plru.acc.eq(r1.tlb_hit_way)
 648             comb += tlb_plru_victim[i].eq(tlb_plru.lru_o)
 649
 650     def tlb_search(self, m, tlb_req_index, r0, r0_valid,
 651                    tlb_valid_way, tlb_tag_way, tlb_hit_way,
 652                    tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
 653
 654         comb = m.d.comb
 655         sync = m.d.sync
 656
 657         hitway = Signal(TLB_WAY_BITS)
 658         hit    = Signal()
 659         eatag  = Signal(TLB_EA_TAG_BITS)
 660
 661         TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
 662         comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
 663         comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
 664
 665         for i in range(TLB_NUM_WAYS):
 666             is_tag_hit = Signal()
 667             comb += is_tag_hit.eq(tlb_valid_way[i]
 668                                   & read_tlb_tag(i, tlb_tag_way) == eatag)
 669             with m.If(is_tag_hit):
 670                 comb += hitway.eq(i)
 671                 comb += hit.eq(1)
 672
 673         comb += tlb_hit.eq(hit & r0_valid)
 674         comb += tlb_hit_way.eq(hitway)
 675
 676         with m.If(tlb_hit):
 677             comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
 678         with m.Else():
 679             comb += pte.eq(0)
 680         comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
 681         with m.If(r0.req.virt_mode):
 682             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 683                               r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
 684                               pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
 685             comb += perm_attr.eq(extract_perm_attr(pte))
 686         with m.Else():
 687             comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
 688                               r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
 689
 690             comb += perm_attr.reference.eq(1)
 691             comb += perm_attr.changed.eq(1)
 692             comb += perm_attr.priv.eq(1)
 693             comb += perm_attr.nocache.eq(0)
 694             comb += perm_attr.rd_perm.eq(1)
 695             comb += perm_attr.wr_perm.eq(1)
 696
 697     def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
 698                     tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
 699                     dtlb_tags, tlb_pte_way, dtlb_ptes):
 700
 701         comb = m.d.comb
 702         sync = m.d.sync
 703
 704         tlbie    = Signal()
 705         tlbwe    = Signal()
 706
 707         comb += tlbie.eq(r0_valid & r0.tlbie)
 708         comb += tlbwe.eq(r0_valid & r0.tlbld)
 709
 710         m.submodules.tlb_update = d = DTLBUpdate()
 711         with m.If(tlbie & r0.doall):
 712             # clear all valid bits at once
 713             for i in range(TLB_SET_SIZE):
 714                 sync += dtlb_valid_bits[i].eq(0)
 715         with m.If(d.updated):
 716             sync += dtlb_tags[tlb_req_index].eq(d.tb_out)
 717             sync += dtlb_ptes[tlb_req_index].eq(d.pb_out)
 718         with m.If(d.v_updated):
 719             sync += dtlb_valid_bits[tlb_req_index].eq(d.db_out)
 720
 721         comb += d.dv.eq(dtlb_valid_bits[tlb_req_index])
 722
 723         comb += d.tlbie.eq(tlbie)
 724         comb += d.tlbwe.eq(tlbwe)
 725         comb += d.doall.eq(r0.doall)
 726         comb += d.tlb_hit.eq(tlb_hit)
 727         comb += d.tlb_hit_way.eq(tlb_hit_way)
 728         comb += d.tlb_tag_way.eq(tlb_tag_way)
 729         comb += d.tlb_pte_way.eq(tlb_pte_way)
 730         comb += d.tlb_req_index.eq(tlb_req_index)
 731
 732         with m.If(tlb_hit):
 733             comb += d.repl_way.eq(tlb_hit_way)
 734         with m.Else():
 735             comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
 736         comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
 737         comb += d.pte_data.eq(r0.req.data)
 738
 739     def maybe_plrus(self, m, r1, plru_victim):
 740         """Generate PLRUs
 741         """
 742         comb = m.d.comb
 743         sync = m.d.sync
 744
 745         if TLB_NUM_WAYS == 0:
 746             return
 747
 748         for i in range(NUM_LINES):
 749             # PLRU interface
 750             plru        = PLRU(WAY_BITS)
 751             setattr(m.submodules, "plru%d" % i, plru)
 752             plru_acc_en = Signal()
 753
 754             comb += plru_acc_en.eq(r1.cache_hit & (r1.hit_index == i))
 755             comb += plru.acc_en.eq(plru_acc_en)
 756             comb += plru.acc.eq(r1.hit_way)
 757             comb += plru_victim[i].eq(plru.lru_o)
 758
 759     def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
 760         """Cache tag RAM read port
 761         """
 762         comb = m.d.comb
 763         sync = m.d.sync
 764         m_in, d_in = self.m_in, self.d_in
 765
 766         index = Signal(INDEX_BITS)
 767
 768         with m.If(r0_stall):
 769             comb += index.eq(req_index)
 770         with m.Elif(m_in.valid):
 771             comb += index.eq(get_index(m_in.addr))
 772         with m.Else():
 773             comb += index.eq(get_index(d_in.addr))
 774         sync += cache_tag_set.eq(cache_tags[index])
 775
 776     def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
 777                        r0_valid, r1, cache_valid_bits, replace_way,
 778                        use_forward1_next, use_forward2_next,
 779                        req_hit_way, plru_victim, rc_ok, perm_attr,
 780                        valid_ra, perm_ok, access_ok, req_op, req_go,
 781                        tlb_pte_way,
 782                        tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
 783                        cancel_store, req_same_tag, r0_stall, early_req_row):
 784         """Cache request parsing and hit detection
 785         """
 786
 787         comb = m.d.comb
 788         sync = m.d.sync
 789         m_in, d_in = self.m_in, self.d_in
 790
 791         is_hit      = Signal()
 792         hit_way     = Signal(WAY_BITS)
 793         op          = Signal(Op)
 794         opsel       = Signal(3)
 795         go          = Signal()
 796         nc          = Signal()
 797         hit_set     = Array(Signal() for i in range(TLB_NUM_WAYS))
 798         cache_valid_idx = Signal(INDEX_BITS)
 799
 800         # Extract line, row and tag from request
 801         comb += req_index.eq(get_index(r0.req.addr))
 802         comb += req_row.eq(get_row(r0.req.addr))
 803         comb += req_tag.eq(get_tag(ra))
 804
 805         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
 806         comb += cache_valid_idx.eq(cache_valid_bits[req_index])
 807
 808         m.submodules.dcache_pend = dc = DCachePendingHit(tlb_pte_way,
 809                                 tlb_valid_way, tlb_hit_way,
 810                                 cache_valid_idx, cache_tag_set,
 811                                 r0.req.addr,
 812                                 hit_set)
 813
 814         comb += dc.tlb_hit.eq(tlb_hit)
 815         comb += dc.reload_tag.eq(r1.reload_tag)
 816         comb += dc.virt_mode.eq(r0.req.virt_mode)
 817         comb += dc.go.eq(go)
 818         comb += dc.req_index.eq(req_index)
 819         comb += is_hit.eq(dc.is_hit)
 820         comb += hit_way.eq(dc.hit_way)
 821         comb += req_same_tag.eq(dc.rel_match)
 822
 823         # See if the request matches the line currently being reloaded
 824         with m.If((r1.state == State.RELOAD_WAIT_ACK) &
 825                   (req_index == r1.store_index) & req_same_tag):
 826             # For a store, consider this a hit even if the row isn't
 827             # valid since it will be by the time we perform the store.
 828             # For a load, check the appropriate row valid bit.
 829             valid = r1.rows_valid[req_row % ROW_PER_LINE]
 830             comb += is_hit.eq(~r0.req.load | valid)
 831             comb += hit_way.eq(replace_way)
 832
 833         # Whether to use forwarded data for a load or not
 834         comb += use_forward1_next.eq(0)
 835         with m.If((get_row(r1.req.real_addr) == req_row) &
 836                   (r1.req.hit_way == hit_way)):
 837             # Only need to consider r1.write_bram here, since if we
 838             # are writing refill data here, then we don't have a
 839             # cache hit this cycle on the line being refilled.
 840             # (There is the possibility that the load following the
 841             # load miss that started the refill could be to the old
 842             # contents of the victim line, since it is a couple of
 843             # cycles after the refill starts before we see the updated
 844             # cache tag. In that case we don't use the bypass.)
 845             comb += use_forward1_next.eq(r1.write_bram)
 846         comb += use_forward2_next.eq(0)
 847         with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
 848             comb += use_forward2_next.eq(r1.forward_valid1)
 849
 850         # The way that matched on a hit
 851         comb += req_hit_way.eq(hit_way)
 852
 853         # The way to replace on a miss
 854         with m.If(r1.write_tag):
 855             comb += replace_way.eq(plru_victim[r1.store_index])
 856         with m.Else():
 857             comb += replace_way.eq(r1.store_way)
 858
 859         # work out whether we have permission for this access
 860         # NB we don't yet implement AMR, thus no KUAP
 861         comb += rc_ok.eq(perm_attr.reference
 862                          & (r0.req.load | perm_attr.changed)
 863                 )
 864         comb += perm_ok.eq((r0.req.priv_mode | ~perm_attr.priv)
 865                            & perm_attr.wr_perm
 866                            | (r0.req.load & perm_attr.rd_perm)
 867                           )
 868         comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
 869         # Combine the request and cache hit status to decide what
 870         # operation needs to be done
 871         comb += nc.eq(r0.req.nc | perm_attr.nocache)
 872         comb += op.eq(Op.OP_NONE)
 873         with m.If(go):
 874             with m.If(~access_ok):
 875                 comb += op.eq(Op.OP_BAD)
 876             with m.Elif(cancel_store):
 877                 comb += op.eq(Op.OP_STCX_FAIL)
 878             with m.Else():
 879                 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
 880                 with m.Switch(opsel):
 881                     with m.Case(0b101):
 882                         comb += op.eq(Op.OP_LOAD_HIT)
 883                     with m.Case(0b100):
 884                         comb += op.eq(Op.OP_LOAD_MISS)
 885                     with m.Case(0b110):
 886                         comb += op.eq(Op.OP_LOAD_NC)
 887                     with m.Case(0b001):
 888                         comb += op.eq(Op.OP_STORE_HIT)
 889                     with m.Case(0b000):
 890                         comb += op.eq(Op.OP_STORE_MISS)
 891                     with m.Case(0b010):
 892                         comb += op.eq(Op.OP_STORE_MISS)
 893                     with m.Case(0b011):
 894                         comb += op.eq(Op.OP_BAD)
 895                     with m.Case(0b111):
 896                         comb += op.eq(Op.OP_BAD)
 897                     with m.Default():
 898                         comb += op.eq(Op.OP_NONE)
 899         comb += req_op.eq(op)
 900         comb += req_go.eq(go)
 901
 902         # Version of the row number that is valid one cycle earlier
 903         # in the cases where we need to read the cache data BRAM.
 904         # If we're stalling then we need to keep reading the last
 905         # row requested.
 906         with m.If(~r0_stall):
 907             with m.If(m_in.valid):
 908                 comb += early_req_row.eq(get_row(m_in.addr))
 909             with m.Else():
 910                 comb += early_req_row.eq(get_row(d_in.addr))
 911         with m.Else():
 912             comb += early_req_row.eq(req_row)
 913
 914     def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
 915                          r0_valid, r0, reservation):
 916         """Handle load-with-reservation and store-conditional instructions
 917         """
 918         comb = m.d.comb
 919         sync = m.d.sync
 920
 921         with m.If(r0_valid & r0.req.reserve):
 922
 923             # XXX generate alignment interrupt if address
 924             # is not aligned XXX or if r0.req.nc = '1'
 925             with m.If(r0.req.load):
 926                 comb += set_rsrv.eq(1) # load with reservation
 927             with m.Else():
 928                 comb += clear_rsrv.eq(1) # store conditional
 929                 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
 930                     comb += cancel_store.eq(1)
 931
 932     def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
 933                         reservation, r0):
 934
 935         comb = m.d.comb
 936         sync = m.d.sync
 937
 938         with m.If(r0_valid & access_ok):
 939             with m.If(clear_rsrv):
 940                 sync += reservation.valid.eq(0)
 941             with m.Elif(set_rsrv):
 942                 sync += reservation.valid.eq(1)
 943                 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
 944
 945     def writeback_control(self, m, r1, cache_out):
 946         """Return data for loads & completion control logic
 947         """
 948         comb = m.d.comb
 949         sync = m.d.sync
 950         d_out, m_out = self.d_out, self.m_out
 951
 952         data_out = Signal(64)
 953         data_fwd = Signal(64)
 954
 955         # Use the bypass if are reading the row that was
 956         # written 1 or 2 cycles ago, including for the
 957         # slow_valid = 1 case (i.e. completing a load
 958         # miss or a non-cacheable load).
 959         with m.If(r1.use_forward1):
 960             comb += data_fwd.eq(r1.forward_data1)
 961         with m.Else():
 962             comb += data_fwd.eq(r1.forward_data2)
 963
 964         comb += data_out.eq(cache_out[r1.hit_way])
 965
 966         for i in range(8):
 967             with m.If(r1.forward_sel[i]):
 968                 dsel = data_fwd.word_select(i, 8)
 969                 comb += data_out.word_select(i, 8).eq(dsel)
 970
 971         comb += d_out.valid.eq(r1.ls_valid)
 972         comb += d_out.data.eq(data_out)
 973         comb += d_out.store_done.eq(~r1.stcx_fail)
 974         comb += d_out.error.eq(r1.ls_error)
 975         comb += d_out.cache_paradox.eq(r1.cache_paradox)
 976
 977         # Outputs to MMU
 978         comb += m_out.done.eq(r1.mmu_done)
 979         comb += m_out.err.eq(r1.mmu_error)
 980         comb += m_out.data.eq(data_out)
 981
 982         # We have a valid load or store hit or we just completed
 983         # a slow op such as a load miss, a NC load or a store
 984         #
 985         # Note: the load hit is delayed by one cycle. However it
 986         # can still not collide with r.slow_valid (well unless I
 987         # miscalculated) because slow_valid can only be set on a
 988         # subsequent request and not on its first cycle (the state
 989         # machine must have advanced), which makes slow_valid
 990         # at least 2 cycles from the previous hit_load_valid.
 991
 992         # Sanity: Only one of these must be set in any given cycle
 993
 994         if False: # TODO: need Display to get this to work
 995             assert (r1.slow_valid & r1.stcx_fail) != 1, \
 996             "unexpected slow_valid collision with stcx_fail"
 997
 998             assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
 999              "unexpected hit_load_delayed collision with slow_valid"
1000
1001         with m.If(~r1.mmu_req):
1002             # Request came from loadstore1...
1003             # Load hit case is the standard path
1004             with m.If(r1.hit_load_valid):
1005                 #Display(f"completing load hit data={data_out}")
1006                 pass
1007
1008             # error cases complete without stalling
1009             with m.If(r1.ls_error):
1010                 # Display("completing ld/st with error")
1011                 pass
1012
1013             # Slow ops (load miss, NC, stores)
1014             with m.If(r1.slow_valid):
1015                 #Display(f"completing store or load miss data={data_out}")
1016                 pass
1017
1018         with m.Else():
1019             # Request came from MMU
1020             with m.If(r1.hit_load_valid):
1021                 # Display(f"completing load hit to MMU, data={m_out.data}")
1022                 pass
1023             # error cases complete without stalling
1024             with m.If(r1.mmu_error):
1025                 #Display("combpleting MMU ld with error")
1026                 pass
1027
1028             # Slow ops (i.e. load miss)
1029             with m.If(r1.slow_valid):
1030                 #Display("completing MMU load miss, data={m_out.data}")
1031                 pass
1032
1033     def rams(self, m, r1, early_req_row, cache_out, replace_way):
1034         """rams
1035         Generate a cache RAM for each way. This handles the normal
1036         reads, writes from reloads and the special store-hit update
1037         path as well.
1038
1039         Note: the BRAMs have an extra read buffer, meaning the output
1040         is pipelined an extra cycle. This differs from the
1041         icache. The writeback logic needs to take that into
1042         account by using 1-cycle delayed signals for load hits.
1043         """
1044         comb = m.d.comb
1045         wb_in = self.wb_in
1046
1047         for i in range(NUM_WAYS):
1048             do_read  = Signal()
1049             rd_addr  = Signal(ROW_BITS)
1050             do_write = Signal()
1051             wr_addr  = Signal(ROW_BITS)
1052             wr_data  = Signal(WB_DATA_BITS)
1053             wr_sel   = Signal(ROW_SIZE)
1054             wr_sel_m = Signal(ROW_SIZE)
1055             _d_out   = Signal(WB_DATA_BITS)
1056
1057             way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
1058             setattr(m.submodules, "cacheram_%d" % i, way)
1059
1060             comb += way.rd_en.eq(do_read)
1061             comb += way.rd_addr.eq(rd_addr)
1062             comb += _d_out.eq(way.rd_data_o)
1063             comb += way.wr_sel.eq(wr_sel_m)
1064             comb += way.wr_addr.eq(wr_addr)
1065             comb += way.wr_data.eq(wr_data)
1066
1067             # Cache hit reads
1068             comb += do_read.eq(1)
1069             comb += rd_addr.eq(early_req_row)
1070             comb += cache_out[i].eq(_d_out)
1071
1072             # Write mux:
1073             #
1074             # Defaults to wishbone read responses (cache refill)
1075             #
1076             # For timing, the mux on wr_data/sel/addr is not
1077             # dependent on anything other than the current state.
1078
1079             with m.If(r1.write_bram):
1080                 # Write store data to BRAM.  This happens one
1081                 # cycle after the store is in r0.
1082                 comb += wr_data.eq(r1.req.data)
1083                 comb += wr_sel.eq(r1.req.byte_sel)
1084                 comb += wr_addr.eq(get_row(r1.req.real_addr))
1085
1086                 with m.If(i == r1.req.hit_way):
1087                     comb += do_write.eq(1)
1088             with m.Else():
1089                 # Otherwise, we might be doing a reload or a DCBZ
1090                 with m.If(r1.dcbz):
1091                     comb += wr_data.eq(0)
1092                 with m.Else():
1093                     comb += wr_data.eq(wb_in.dat)
1094                 comb += wr_addr.eq(r1.store_row)
1095                 comb += wr_sel.eq(~0) # all 1s
1096
1097             with m.If((r1.state == State.RELOAD_WAIT_ACK)
1098                       & wb_in.ack & (replace_way == i)):
1099                 comb += do_write.eq(1)
1100
1101                 # Mask write selects with do_write since BRAM
1102                 # doesn't have a global write-enable
1103                 with m.If(do_write):
1104                     comb += wr_sel_m.eq(wr_sel)
1105
1106     # Cache hit synchronous machine for the easy case.
1107     # This handles load hits.
1108     # It also handles error cases (TLB miss, cache paradox)
1109     def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1110                         req_hit_way, req_index, access_ok,
1111                         tlb_hit, tlb_hit_way, tlb_req_index):
1112
1113         comb = m.d.comb
1114         sync = m.d.sync
1115
1116         with m.If(req_op != Op.OP_NONE):
1117             #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1118             #      f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1119             #     )
1120             pass
1121
1122         with m.If(r0_valid):
1123             sync += r1.mmu_req.eq(r0.mmu_req)
1124
1125         # Fast path for load/store hits.
1126         # Set signals for the writeback controls.
1127         sync += r1.hit_way.eq(req_hit_way)
1128         sync += r1.hit_index.eq(req_index)
1129
1130         with m.If(req_op == Op.OP_LOAD_HIT):
1131             sync += r1.hit_load_valid.eq(1)
1132         with m.Else():
1133             sync += r1.hit_load_valid.eq(0)
1134
1135         with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1136             sync += r1.cache_hit.eq(1)
1137         with m.Else():
1138             sync += r1.cache_hit.eq(0)
1139
1140         with m.If(req_op == Op.OP_BAD):
1141             # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1142             #      f"rc_ok={rc_ok} perm_ok={perm_ok}"
1143             sync += r1.ls_error.eq(~r0.mmu_req)
1144             sync += r1.mmu_error.eq(r0.mmu_req)
1145             sync += r1.cache_paradox.eq(access_ok)
1146
1147             with m.Else():
1148                 sync += r1.ls_error.eq(0)
1149                 sync += r1.mmu_error.eq(0)
1150                 sync += r1.cache_paradox.eq(0)
1151
1152         with m.If(req_op == Op.OP_STCX_FAIL):
1153             r1.stcx_fail.eq(1)
1154         with m.Else():
1155             sync += r1.stcx_fail.eq(0)
1156
1157         # Record TLB hit information for updating TLB PLRU
1158         sync += r1.tlb_hit.eq(tlb_hit)
1159         sync += r1.tlb_hit_way.eq(tlb_hit_way)
1160         sync += r1.tlb_hit_index.eq(tlb_req_index)
1161
1162     # Memory accesses are handled by this state machine:
1163     #
1164     #   * Cache load miss/reload (in conjunction with "rams")
1165     #   * Load hits for non-cachable forms
1166     #   * Stores (the collision case is handled in "rams")
1167     #
1168     # All wishbone requests generation is done here.
1169     # This machine operates at stage 1.
1170     def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1171                     cache_valid_bits, r0, replace_way,
1172                     req_hit_way, req_same_tag,
1173                     r0_valid, req_op, cache_tag, req_go, ra):
1174
1175         comb = m.d.comb
1176         sync = m.d.sync
1177         wb_in = self.wb_in
1178
1179         req         = MemAccessRequest()
1180         acks        = Signal(3)
1181         adjust_acks = Signal(3)
1182         stbs_done = Signal()
1183
1184         sync += r1.use_forward1.eq(use_forward1_next)
1185         sync += r1.forward_sel.eq(0)
1186
1187         with m.If(use_forward1_next):
1188             sync += r1.forward_sel.eq(r1.req.byte_sel)
1189         with m.Elif(use_forward2_next):
1190             sync += r1.forward_sel.eq(r1.forward_sel1)
1191
1192         sync += r1.forward_data2.eq(r1.forward_data1)
1193         with m.If(r1.write_bram):
1194             sync += r1.forward_data1.eq(r1.req.data)
1195             sync += r1.forward_sel1.eq(r1.req.byte_sel)
1196             sync += r1.forward_way1.eq(r1.req.hit_way)
1197             sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1198             sync += r1.forward_valid1.eq(1)
1199         with m.Else():
1200             with m.If(r1.dcbz):
1201                 sync += r1.forward_data1.eq(0)
1202             with m.Else():
1203                 sync += r1.forward_data1.eq(wb_in.dat)
1204             sync += r1.forward_sel1.eq(~0) # all 1s
1205             sync += r1.forward_way1.eq(replace_way)
1206             sync += r1.forward_row1.eq(r1.store_row)
1207             sync += r1.forward_valid1.eq(0)
1208
1209         # One cycle pulses reset
1210         sync += r1.slow_valid.eq(0)
1211         sync += r1.write_bram.eq(0)
1212         sync += r1.inc_acks.eq(0)
1213         sync += r1.dec_acks.eq(0)
1214
1215         sync += r1.ls_valid.eq(0)
1216         # complete tlbies and TLB loads in the third cycle
1217         sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1218
1219         with m.If((req_op == Op.OP_LOAD_HIT)
1220                   | (req_op == Op.OP_STCX_FAIL)):
1221             with m.If(~r0.mmu_req):
1222                 sync += r1.ls_valid.eq(1)
1223             with m.Else():
1224                 sync += r1.mmu_done.eq(1)
1225
1226         with m.If(r1.write_tag):
1227             # Store new tag in selected way
1228             for i in range(NUM_WAYS):
1229                 with m.If(i == replace_way):
1230                     ct = Signal(TAG_RAM_WIDTH)
1231                     comb += ct.eq(cache_tag[r1.store_index])
1232                     comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1233                     sync += cache_tag[r1.store_index].eq(ct)
1234             sync += r1.store_way.eq(replace_way)
1235             sync += r1.write_tag.eq(0)
1236
1237         # Take request from r1.req if there is one there,
1238         # else from req_op, ra, etc.
1239         with m.If(r1.full):
1240             comb += req.eq(r1.req)
1241         with m.Else():
1242             comb += req.op.eq(req_op)
1243             comb += req.valid.eq(req_go)
1244             comb += req.mmu_req.eq(r0.mmu_req)
1245             comb += req.dcbz.eq(r0.req.dcbz)
1246             comb += req.real_addr.eq(ra)
1247
1248             with m.If(~r0.req.dcbz):
1249                 comb += req.data.eq(r0.req.data)
1250             with m.Else():
1251                 comb += req.data.eq(0)
1252
1253             # Select all bytes for dcbz
1254             # and for cacheable loads
1255             with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1256                 comb += req.byte_sel.eq(~0) # all 1s
1257             with m.Else():
1258                 comb += req.byte_sel.eq(r0.req.byte_sel)
1259             comb += req.hit_way.eq(req_hit_way)
1260             comb += req.same_tag.eq(req_same_tag)
1261
1262             # Store the incoming request from r0,
1263             # if it is a slow request
1264             # Note that r1.full = 1 implies req_op = OP_NONE
1265             with m.If((req_op == Op.OP_LOAD_MISS)
1266                       | (req_op == Op.OP_LOAD_NC)
1267                       | (req_op == Op.OP_STORE_MISS)
1268                       | (req_op == Op.OP_STORE_HIT)):
1269                 sync += r1.req.eq(req)
1270                 sync += r1.full.eq(1)
1271
1272         # Main state machine
1273         with m.Switch(r1.state):
1274
1275             with m.Case(State.IDLE):
1276 # XXX check 'left downto.  probably means len(r1.wb.adr)
1277 #                     r1.wb.adr <= req.real_addr(
1278 #                                   r1.wb.adr'left downto 0
1279 #                                  );
1280                 sync += r1.wb.adr.eq(req.real_addr)
1281                 sync += r1.wb.sel.eq(req.byte_sel)
1282                 sync += r1.wb.dat.eq(req.data)
1283                 sync += r1.dcbz.eq(req.dcbz)
1284
1285                 # Keep track of our index and way
1286                 # for subsequent stores.
1287                 sync += r1.store_index.eq(get_index(req.real_addr))
1288                 sync += r1.store_row.eq(get_row(req.real_addr))
1289                 sync += r1.end_row_ix.eq(
1290                          get_row_of_line(get_row(req.real_addr))
1291                         )
1292                 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1293                 sync += r1.req.same_tag.eq(1)
1294
1295                 with m.If(req.op == Op.OP_STORE_HIT):
1296                     sync += r1.store_way.eq(req.hit_way)
1297
1298                 # Reset per-row valid bits,
1299                 # ready for handling OP_LOAD_MISS
1300                 for i in range(ROW_PER_LINE):
1301                     sync += r1.rows_valid[i].eq(0)
1302
1303                 with m.Switch(req.op):
1304                     with m.Case(Op.OP_LOAD_HIT):
1305                         # stay in IDLE state
1306                         pass
1307
1308                     with m.Case(Op.OP_LOAD_MISS):
1309                         #Display(f"cache miss real addr:" \
1310                         #      f"{req_real_addr}" \
1311                         #      f" idx:{get_index(req_real_addr)}" \
1312                         #      f" tag:{get_tag(req.real_addr)}")
1313                         pass
1314
1315                         # Start the wishbone cycle
1316                         sync += r1.wb.we.eq(0)
1317                         sync += r1.wb.cyc.eq(1)
1318                         sync += r1.wb.stb.eq(1)
1319
1320                         # Track that we had one request sent
1321                         sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1322                         sync += r1.write_tag.eq(1)
1323
1324                     with m.Case(Op.OP_LOAD_NC):
1325                         sync += r1.wb.cyc.eq(1)
1326                         sync += r1.wb.stb.eq(1)
1327                         sync += r1.wb.we.eq(0)
1328                         sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1329
1330                     with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1331                         with m.If(~req.dcbz):
1332                             sync += r1.state.eq(State.STORE_WAIT_ACK)
1333                             sync += r1.acks_pending.eq(1)
1334                             sync += r1.full.eq(0)
1335                             sync += r1.slow_valid.eq(1)
1336
1337                             with m.If(~req.mmu_req):
1338                                 sync += r1.ls_valid.eq(1)
1339                             with m.Else():
1340                                 sync += r1.mmu_done.eq(1)
1341
1342                             with m.If(req.op == Op.OP_STORE_HIT):
1343                                 sync += r1.write_bram.eq(1)
1344                         with m.Else():
1345                             sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1346
1347                             with m.If(req.op == Op.OP_STORE_MISS):
1348                                 sync += r1.write_tag.eq(1)
1349
1350                         sync += r1.wb.we.eq(1)
1351                         sync += r1.wb.cyc.eq(1)
1352                         sync += r1.wb.stb.eq(1)
1353
1354                     # OP_NONE and OP_BAD do nothing
1355                     # OP_BAD & OP_STCX_FAIL were
1356                     # handled above already
1357                     with m.Case(Op.OP_NONE):
1358                         pass
1359                     with m.Case(Op.OP_BAD):
1360                         pass
1361                     with m.Case(Op.OP_STCX_FAIL):
1362                         pass
1363
1364             with m.Case(State.RELOAD_WAIT_ACK):
1365                 # Requests are all sent if stb is 0
1366                 comb += stbs_done.eq(~r1.wb.stb)
1367
1368                 with m.If(~wb_in.stall & ~stbs_done):
1369                     # That was the last word?
1370                     # We are done sending.
1371                     # Clear stb and set stbs_done
1372                     # so we can handle an eventual
1373                     # last ack on the same cycle.
1374                     with m.If(is_last_row_addr(
1375                               r1.wb.adr, r1.end_row_ix)):
1376                         sync += r1.wb.stb.eq(0)
1377                         comb += stbs_done.eq(0)
1378
1379                     # Calculate the next row address in the current cache line
1380                     rarange = r1.wb.adr[ROW_OFF_BITS : LINE_OFF_BITS]
1381                     sync += rarange.eq(rarange + 1)
1382
1383                 # Incoming acks processing
1384                 sync += r1.forward_valid1.eq(wb_in.ack)
1385                 with m.If(wb_in.ack):
1386                     # XXX needs an Array bit-accessor here
1387                     sync += r1.rows_valid[r1.store_row % ROW_PER_LINE].eq(1)
1388
1389                     # If this is the data we were looking for,
1390                     # we can complete the request next cycle.
1391                     # Compare the whole address in case the
1392                     # request in r1.req is not the one that
1393                     # started this refill.
1394                     with m.If(r1.full & r1.req.same_tag &
1395                               ((r1.dcbz & r1.req.dcbz) |
1396                                (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1397                                 (r1.store_row == get_row(r1.req.real_addr))):
1398                         sync += r1.full.eq(0)
1399                         sync += r1.slow_valid.eq(1)
1400                         with m.If(~r1.mmu_req):
1401                             sync += r1.ls_valid.eq(1)
1402                         with m.Else():
1403                             sync += r1.mmu_done.eq(1)
1404                         sync += r1.forward_sel.eq(~0) # all 1s
1405                         sync += r1.use_forward1.eq(1)
1406
1407                     # Check for completion
1408                     with m.If(stbs_done & is_last_row(r1.store_row,
1409                                                       r1.end_row_ix)):
1410                         # Complete wishbone cycle
1411                         sync += r1.wb.cyc.eq(0)
1412
1413                         # Cache line is now valid
1414                         cv = Signal(INDEX_BITS)
1415                         sync += cv.eq(cache_valid_bits[r1.store_index])
1416                         sync += cv.bit_select(r1.store_way, 1).eq(1)
1417                         sync += r1.state.eq(State.IDLE)
1418
1419                     # Increment store row counter
1420                     sync += r1.store_row.eq(next_row(r1.store_row))
1421
1422             with m.Case(State.STORE_WAIT_ACK):
1423                 comb += stbs_done.eq(~r1.wb.stb)
1424                 comb += acks.eq(r1.acks_pending)
1425
1426                 with m.If(r1.inc_acks != r1.dec_acks):
1427                     with m.If(r1.inc_acks):
1428                         comb += adjust_acks.eq(acks + 1)
1429                     with m.Else():
1430                         comb += adjust_acks.eq(acks - 1)
1431                 with m.Else():
1432                     comb += adjust_acks.eq(acks)
1433
1434                 sync += r1.acks_pending.eq(adjust_acks)
1435
1436                 # Clear stb when slave accepted request
1437                 with m.If(~wb_in.stall):
1438                     # See if there is another store waiting
1439                     # to be done which is in the same real page.
1440                     with m.If(req.valid):
1441                         ra = req.real_addr[0:SET_SIZE_BITS]
1442                         sync += r1.wb.adr[0:SET_SIZE_BITS].eq(ra)
1443                         sync += r1.wb.dat.eq(req.data)
1444                         sync += r1.wb.sel.eq(req.byte_sel)
1445
1446                     with m.Elif((adjust_acks < 7) & req.same_tag &
1447                                 ((req.op == Op.OP_STORE_MISS)
1448                                  | (req.op == Op.OP_STORE_HIT))):
1449                         sync += r1.wb.stb.eq(1)
1450                         comb += stbs_done.eq(0)
1451
1452                         with m.If(req.op == Op.OP_STORE_HIT):
1453                             sync += r1.write_bram.eq(1)
1454                         sync += r1.full.eq(0)
1455                         sync += r1.slow_valid.eq(1)
1456
1457                         # Store requests never come from the MMU
1458                         sync += r1.ls_valid.eq(1)
1459                         comb += stbs_done.eq(0)
1460                         sync += r1.inc_acks.eq(1)
1461                     with m.Else():
1462                         sync += r1.wb.stb.eq(0)
1463                         comb += stbs_done.eq(1)
1464
1465                 # Got ack ? See if complete.
1466                 with m.If(wb_in.ack):
1467                     with m.If(stbs_done & (adjust_acks == 1)):
1468                         sync += r1.state.eq(State.IDLE)
1469                         sync += r1.wb.cyc.eq(0)
1470                         sync += r1.wb.stb.eq(0)
1471                     sync += r1.dec_acks.eq(1)
1472
1473             with m.Case(State.NC_LOAD_WAIT_ACK):
1474                 # Clear stb when slave accepted request
1475                 with m.If(~wb_in.stall):
1476                     sync += r1.wb.stb.eq(0)
1477
1478                 # Got ack ? complete.
1479                 with m.If(wb_in.ack):
1480                     sync += r1.state.eq(State.IDLE)
1481                     sync += r1.full.eq(0)
1482                     sync += r1.slow_valid.eq(1)
1483
1484                     with m.If(~r1.mmu_req):
1485                         sync += r1.ls_valid.eq(1)
1486                     with m.Else():
1487                         sync += r1.mmu_done.eq(1)
1488
1489                     sync += r1.forward_sel.eq(~0) # all 1s
1490                     sync += r1.use_forward1.eq(1)
1491                     sync += r1.wb.cyc.eq(0)
1492                     sync += r1.wb.stb.eq(0)
1493
1494     def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1495
1496         sync = m.d.sync
1497         d_out, wb_in, log_out = self.d_out, self.wb_in, self.log_out
1498
1499         sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1500                                stall_out, req_op[:3], d_out.valid, d_out.error,
1501                                r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1502                                r1.wb.adr[3:6]))
1503
1504     def elaborate(self, platform):
1505
1506         m = Module()
1507         comb = m.d.comb
1508
1509         # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1510         cache_tags       = CacheTagArray()
1511         cache_tag_set    = Signal(TAG_RAM_WIDTH)
1512         cache_valid_bits = CacheValidBitsArray()
1513
1514         # TODO attribute ram_style : string;
1515         # TODO attribute ram_style of cache_tags : signal is "distributed";
1516
1517         """note: these are passed to nmigen.hdl.Memory as "attributes".
1518            don't know how, just that they are.
1519         """
1520         dtlb_valid_bits = TLBValidBitsArray()
1521         dtlb_tags       = TLBTagsArray()
1522         dtlb_ptes       = TLBPtesArray()
1523         # TODO attribute ram_style of
1524         #  dtlb_tags : signal is "distributed";
1525         # TODO attribute ram_style of
1526         #  dtlb_ptes : signal is "distributed";
1527
1528         r0      = RegStage0()
1529         r0_full = Signal()
1530
1531         r1 = RegStage1()
1532
1533         reservation = Reservation()
1534
1535         # Async signals on incoming request
1536         req_index    = Signal(INDEX_BITS)
1537         req_row      = Signal(ROW_BITS)
1538         req_hit_way  = Signal(WAY_BITS)
1539         req_tag      = Signal(TAG_BITS)
1540         req_op       = Signal(Op)
1541         req_data     = Signal(64)
1542         req_same_tag = Signal()
1543         req_go       = Signal()
1544
1545         early_req_row     = Signal(ROW_BITS)
1546
1547         cancel_store      = Signal()
1548         set_rsrv          = Signal()
1549         clear_rsrv        = Signal()
1550
1551         r0_valid          = Signal()
1552         r0_stall          = Signal()
1553
1554         use_forward1_next = Signal()
1555         use_forward2_next = Signal()
1556
1557         cache_out         = CacheRamOut()
1558
1559         plru_victim       = PLRUOut()
1560         replace_way       = Signal(WAY_BITS)
1561
1562         # Wishbone read/write/cache write formatting signals
1563         bus_sel           = Signal(8)
1564
1565         # TLB signals
1566         tlb_tag_way   = Signal(TLB_TAG_WAY_BITS)
1567         tlb_pte_way   = Signal(TLB_PTE_WAY_BITS)
1568         tlb_valid_way = Signal(TLB_NUM_WAYS)
1569         tlb_req_index = Signal(TLB_SET_BITS)
1570         tlb_hit       = Signal()
1571         tlb_hit_way   = Signal(TLB_WAY_BITS)
1572         pte           = Signal(TLB_PTE_BITS)
1573         ra            = Signal(REAL_ADDR_BITS)
1574         valid_ra      = Signal()
1575         perm_attr     = PermAttr()
1576         rc_ok         = Signal()
1577         perm_ok       = Signal()
1578         access_ok     = Signal()
1579
1580         tlb_plru_victim = TLBPLRUOut()
1581
1582         # we don't yet handle collisions between loadstore1 requests
1583         # and MMU requests
1584         comb += self.m_out.stall.eq(0)
1585
1586         # Hold off the request in r0 when r1 has an uncompleted request
1587         comb += r0_stall.eq(r0_full & r1.full)
1588         comb += r0_valid.eq(r0_full & ~r1.full)
1589         comb += self.stall_out.eq(r0_stall)
1590
1591         # Wire up wishbone request latch out of stage 1
1592         comb += self.wb_out.eq(r1.wb)
1593
1594         # call sub-functions putting everything together, using shared
1595         # signals established above
1596         self.stage_0(m, r0, r1, r0_full)
1597         self.tlb_read(m, r0_stall, tlb_valid_way,
1598                       tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
1599                       dtlb_tags, dtlb_ptes)
1600         self.tlb_search(m, tlb_req_index, r0, r0_valid,
1601                         tlb_valid_way, tlb_tag_way, tlb_hit_way,
1602                         tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1603         self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
1604                         tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1605                         dtlb_tags, tlb_pte_way, dtlb_ptes)
1606         self.maybe_plrus(m, r1, plru_victim)
1607         self.maybe_tlb_plrus(m, r1, tlb_plru_victim)
1608         self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1609         self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1610                            r0_valid, r1, cache_valid_bits, replace_way,
1611                            use_forward1_next, use_forward2_next,
1612                            req_hit_way, plru_victim, rc_ok, perm_attr,
1613                            valid_ra, perm_ok, access_ok, req_op, req_go,
1614                            tlb_pte_way,
1615                            tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
1616                            cancel_store, req_same_tag, r0_stall, early_req_row)
1617         self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1618                            r0_valid, r0, reservation)
1619         self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1620                            reservation, r0)
1621         self.writeback_control(m, r1, cache_out)
1622         self.rams(m, r1, early_req_row, cache_out, replace_way)
1623         self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1624                         req_hit_way, req_index, access_ok,
1625                         tlb_hit, tlb_hit_way, tlb_req_index)
1626         self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1627                     cache_valid_bits, r0, replace_way,
1628                     req_hit_way, req_same_tag,
1629                          r0_valid, req_op, cache_tags, req_go, ra)
1630         #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1631
1632         return m
1633
1634
1635 # dcache_tb.vhdl
1636 #
1637 # entity dcache_tb is
1638 # end dcache_tb;
1639 #
1640 # architecture behave of dcache_tb is
1641 #     signal clk          : std_ulogic;
1642 #     signal rst          : std_ulogic;
1643 #
1644 #     signal d_in         : Loadstore1ToDcacheType;
1645 #     signal d_out        : DcacheToLoadstore1Type;
1646 #
1647 #     signal m_in         : MmuToDcacheType;
1648 #     signal m_out        : DcacheToMmuType;
1649 #
1650 #     signal wb_bram_in   : wishbone_master_out;
1651 #     signal wb_bram_out  : wishbone_slave_out;
1652 #
1653 #     constant clk_period : time := 10 ns;
1654 # begin
1655 #     dcache0: entity work.dcache
1656 #         generic map(
1657 #
1658 #             LINE_SIZE => 64,
1659 #             NUM_LINES => 4
1660 #             )
1661 #         port map(
1662 #             clk => clk,
1663 #             rst => rst,
1664 #             d_in => d_in,
1665 #             d_out => d_out,
1666 #             m_in => m_in,
1667 #             m_out => m_out,
1668 #             wishbone_out => wb_bram_in,
1669 #             wishbone_in => wb_bram_out
1670 #             );
1671 #
1672 #     -- BRAM Memory slave
1673 #     bram0: entity work.wishbone_bram_wrapper
1674 #         generic map(
1675 #             MEMORY_SIZE   => 1024,
1676 #             RAM_INIT_FILE => "icache_test.bin"
1677 #             )
1678 #         port map(
1679 #             clk => clk,
1680 #             rst => rst,
1681 #             wishbone_in => wb_bram_in,
1682 #             wishbone_out => wb_bram_out
1683 #             );
1684 #
1685 #     clk_process: process
1686 #     begin
1687 #         clk <= '0';
1688 #         wait for clk_period/2;
1689 #         clk <= '1';
1690 #         wait for clk_period/2;
1691 #     end process;
1692 #
1693 #     rst_process: process
1694 #     begin
1695 #         rst <= '1';
1696 #         wait for 2*clk_period;
1697 #         rst <= '0';
1698 #         wait;
1699 #     end process;
1700 #
1701 #     stim: process
1702 #     begin
1703 #     -- Clear stuff
1704 #     d_in.valid <= '0';
1705 #     d_in.load <= '0';
1706 #     d_in.nc <= '0';
1707 #     d_in.addr <= (others => '0');
1708 #     d_in.data <= (others => '0');
1709 #         m_in.valid <= '0';
1710 #         m_in.addr <= (others => '0');
1711 #         m_in.pte <= (others => '0');
1712 #
1713 #         wait for 4*clk_period;
1714 #     wait until rising_edge(clk);
1715 #
1716 #     -- Cacheable read of address 4
1717 #     d_in.load <= '1';
1718 #     d_in.nc <= '0';
1719 #         d_in.addr <= x"0000000000000004";
1720 #         d_in.valid <= '1';
1721 #     wait until rising_edge(clk);
1722 #         d_in.valid <= '0';
1723 #
1724 #     wait until rising_edge(clk) and d_out.valid = '1';
1725 #         assert d_out.data = x"0000000100000000"
1726 #         report "data @" & to_hstring(d_in.addr) &
1727 #         "=" & to_hstring(d_out.data) &
1728 #         " expected 0000000100000000"
1729 #         severity failure;
1730 # --      wait for clk_period;
1731 #
1732 #     -- Cacheable read of address 30
1733 #     d_in.load <= '1';
1734 #     d_in.nc <= '0';
1735 #         d_in.addr <= x"0000000000000030";
1736 #         d_in.valid <= '1';
1737 #     wait until rising_edge(clk);
1738 #         d_in.valid <= '0';
1739 #
1740 #     wait until rising_edge(clk) and d_out.valid = '1';
1741 #         assert d_out.data = x"0000000D0000000C"
1742 #         report "data @" & to_hstring(d_in.addr) &
1743 #         "=" & to_hstring(d_out.data) &
1744 #         " expected 0000000D0000000C"
1745 #         severity failure;
1746 #
1747 #     -- Non-cacheable read of address 100
1748 #     d_in.load <= '1';
1749 #     d_in.nc <= '1';
1750 #         d_in.addr <= x"0000000000000100";
1751 #         d_in.valid <= '1';
1752 #     wait until rising_edge(clk);
1753 #     d_in.valid <= '0';
1754 #     wait until rising_edge(clk) and d_out.valid = '1';
1755 #         assert d_out.data = x"0000004100000040"
1756 #         report "data @" & to_hstring(d_in.addr) &
1757 #         "=" & to_hstring(d_out.data) &
1758 #         " expected 0000004100000040"
1759 #         severity failure;
1760 #
1761 #     wait until rising_edge(clk);
1762 #     wait until rising_edge(clk);
1763 #     wait until rising_edge(clk);
1764 #     wait until rising_edge(clk);
1765 #
1766 #     std.env.finish;
1767 #     end process;
1768 # end;
1769 def dcache_sim(dut):
1770     # clear stuff
1771     yield dut.d_in.valid.eq(0)
1772     yield dut.d_in.load.eq(0)
1773     yield dut.d_in.nc.eq(0)
1774     yield dut.d_in.adrr.eq(0)
1775     yield dut.d_in.data.eq(0)
1776     yield dut.m_in.valid.eq(0)
1777     yield dut.m_in.addr.eq(0)
1778     yield dut.m_in.pte.eq(0)
1779     # wait 4 * clk_period
1780     yield
1781     yield
1782     yield
1783     yield
1784     # wait_until rising_edge(clk)
1785     yield
1786     # Cacheable read of address 4
1787     yield dut.d_in.load.eq(1)
1788     yield dut.d_in.nc.eq(0)
1789     yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
1790     yield dut.d_in.valid.eq(1)
1791     # wait-until rising_edge(clk)
1792     yield
1793     yield dut.d_in.valid.eq(0)
1794     yield
1795     while not (yield dut.d_out.valid):
1796         yield
1797     assert dut.d_out.data == 0x0000000100000000, \
1798         f"data @ {dut.d_in.addr}={dut.d_in.data} expected 0000000100000000"
1799
1800
1801     # Cacheable read of address 30
1802     yield dut.d_in.load.eq(1)
1803     yield dut.d_in.nc.eq(0)
1804     yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
1805     yield dut.d_in.valid.eq(1)
1806     yield
1807     yield dut.d_in.valid.eq(0)
1808     yield
1809     while not (yield dut.d_out.valid):
1810         yield
1811     assert dut.d_out.data == 0x0000000D0000000C, \
1812         f"data @{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C"
1813
1814     # Non-cacheable read of address 100
1815     yield dut.d_in.load.eq(1)
1816     yield dut.d_in.nc.eq(1)
1817     yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
1818     yield dut.d_in.valid.eq(1)
1819     yield
1820     yield dut.d_in.valid.eq(0)
1821     yield
1822     while not (yield dut.d_out.valid):
1823         yield
1824     assert dut.d_out.data == 0x0000004100000040, \
1825         f"data @ {dut.d_in.addr}={dut.d_out.data} expected 0000004100000040"
1826
1827     yield
1828     yield
1829     yield
1830     yield
1831
1832
1833 def test_dcache():
1834     dut = DCache()
1835     vl = rtlil.convert(dut, ports=[])
1836     with open("test_dcache.il", "w") as f:
1837         f.write(vl)
1838
1839     #run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1840
1841 if __name__ == '__main__':
1842     test_dcache()
1843