missing comb +=
[soc.git] / src / soc / experiment / dcache.py
1 """DCache
2
3 based on Anton Blanchard microwatt dcache.vhdl
4
5 """
6
7 from enum import Enum, unique
8
9 from nmigen import Module, Signal, Elaboratable, Cat, Repl, Array, Const
10 from nmigen.cli import main
11 from nmutil.iocontrol import RecordObject
12 from nmigen.utils import log2_int
13 from nmigen.cli import rtlil
14
15
16 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
17 DCacheToLoadStore1Type,
18 MMUToDCacheType,
19 DCacheToMMUType)
20
21 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
22 WBAddrType, WBDataType, WBSelType,
23 WBMasterOut, WBSlaveOut,
24 WBMasterOutVector, WBSlaveOutVector,
25 WBIOMasterOut, WBIOSlaveOut)
26
27 from soc.experiment.cache_ram import CacheRam
28 from soc.experiment.plru import PLRU
29
30
31 # TODO: make these parameters of DCache at some point
32 LINE_SIZE = 64 # Line size in bytes
33 NUM_LINES = 32 # Number of lines in a set
34 NUM_WAYS = 4 # Number of ways
35 TLB_SET_SIZE = 64 # L1 DTLB entries per set
36 TLB_NUM_WAYS = 2 # L1 DTLB number of sets
37 TLB_LG_PGSZ = 12 # L1 DTLB log_2(page_size)
38 LOG_LENGTH = 0 # Non-zero to enable log data collection
39
40 # BRAM organisation: We never access more than
41 # -- WB_DATA_BITS at a time so to save
42 # -- resources we make the array only that wide, and
43 # -- use consecutive indices for to make a cache "line"
44 # --
45 # -- ROW_SIZE is the width in bytes of the BRAM
46 # -- (based on WB, so 64-bits)
47 ROW_SIZE = WB_DATA_BITS // 8;
48
49 # ROW_PER_LINE is the number of row (wishbone
50 # transactions) in a line
51 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
52
53 # BRAM_ROWS is the number of rows in BRAM needed
54 # to represent the full dcache
55 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
56
57
58 # Bit fields counts in the address
59
60 # REAL_ADDR_BITS is the number of real address
61 # bits that we store
62 REAL_ADDR_BITS = 56
63
64 # ROW_BITS is the number of bits to select a row
65 ROW_BITS = log2_int(BRAM_ROWS)
66
67 # ROW_LINE_BITS is the number of bits to select
68 # a row within a line
69 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
70
71 # LINE_OFF_BITS is the number of bits for
72 # the offset in a cache line
73 LINE_OFF_BITS = log2_int(LINE_SIZE)
74
75 # ROW_OFF_BITS is the number of bits for
76 # the offset in a row
77 ROW_OFF_BITS = log2_int(ROW_SIZE)
78
79 # INDEX_BITS is the number if bits to
80 # select a cache line
81 INDEX_BITS = log2_int(NUM_LINES)
82
83 # SET_SIZE_BITS is the log base 2 of the set size
84 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
85
86 # TAG_BITS is the number of bits of
87 # the tag part of the address
88 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
89
90 # TAG_WIDTH is the width in bits of each way of the tag RAM
91 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
92
93 # WAY_BITS is the number of bits to select a way
94 WAY_BITS = log2_int(NUM_WAYS)
95
96 # Example of layout for 32 lines of 64 bytes:
97 #
98 # .. tag |index| line |
99 # .. | row | |
100 # .. | |---| | ROW_LINE_BITS (3)
101 # .. | |--- - --| LINE_OFF_BITS (6)
102 # .. | |- --| ROW_OFF_BITS (3)
103 # .. |----- ---| | ROW_BITS (8)
104 # .. |-----| | INDEX_BITS (5)
105 # .. --------| | TAG_BITS (45)
106
107 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
108
109 def CacheTagArray():
110 return Array(Signal(TAG_RAM_WIDTH) for x in range(NUM_LINES))
111
112 def CacheValidBitsArray():
113 return Array(Signal(INDEX_BITS) for x in range(NUM_LINES))
114
115 def RowPerLineValidArray():
116 return Array(Signal() for x in range(ROW_PER_LINE))
117
118 # L1 TLB
119 TLB_SET_BITS = log2_int(TLB_SET_SIZE)
120 TLB_WAY_BITS = log2_int(TLB_NUM_WAYS)
121 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
122 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
123 TLB_PTE_BITS = 64
124 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
125
126 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
127 assert (LINE_SIZE % 2) == 0, "LINE_SIZE not power of 2"
128 assert (NUM_LINES % 2) == 0, "NUM_LINES not power of 2"
129 assert (ROW_PER_LINE % 2) == 0, "ROW_PER_LINE not power of 2"
130 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
131 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
132 "geometry bits don't add up"
133 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
134 "geometry bits don't add up"
135 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
136 "geometry bits don't add up"
137 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
138 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
139
140
141 def TLBValidBitsArray():
142 return Array(Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE))
143
144 def TLBTagEAArray():
145 return Array(Signal(TLB_EA_TAG_BITS) for x in range (TLB_NUM_WAYS))
146
147 def TLBTagsArray():
148 return Array(Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE))
149
150 def TLBPtesArray():
151 return Array(Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE))
152
153 def HitWaySet():
154 return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
155
156 # Cache RAM interface
157 def CacheRamOut():
158 return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
159
160 # PLRU output interface
161 def PLRUOut():
162 return Array(Signal(WAY_BITS) for x in range(NUM_LINES))
163
164 # TLB PLRU output interface
165 def TLBPLRUOut():
166 return Array(Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE))
167
168 # Helper functions to decode incoming requests
169 #
170 # Return the cache line index (tag index) for an address
171 def get_index(addr):
172 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
173
174 # Return the cache row index (data memory) for an address
175 def get_row(addr):
176 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
177
178 # Return the index of a row within a line
179 def get_row_of_line(row):
180 return row[:ROW_LINE_BITS]
181
182 # Returns whether this is the last row of a line
183 def is_last_row_addr(addr, last):
184 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
185
186 # Returns whether this is the last row of a line
187 def is_last_row(row, last):
188 return get_row_of_line(row) == last
189
190 # Return the next row in the current cache line. We use a
191 # dedicated function in order to limit the size of the
192 # generated adder to be only the bits within a cache line
193 # (3 bits with default settings)
194 def next_row(row):
195 row_v = row[0:ROW_LINE_BITS] + 1
196 return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
197
198 # Get the tag value from the address
199 def get_tag(addr):
200 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
201
202 # Read a tag from a tag memory row
203 def read_tag(way, tagset):
204 return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
205
206 # Read a TLB tag from a TLB tag memory row
207 def read_tlb_tag(way, tags):
208 return tags.word_select(way, TLB_EA_TAG_BITS)
209
210 # Write a TLB tag to a TLB tag memory row
211 def write_tlb_tag(way, tags, tag):
212 return read_tlb_tag(way, tags).eq(tag)
213
214 # Read a PTE from a TLB PTE memory row
215 def read_tlb_pte(way, ptes):
216 return ptes.word_select(way, TLB_PTE_BITS)
217
218 def write_tlb_pte(way, ptes, newpte):
219 return read_tlb_pte(way, ptes).eq(newpte)
220
221
222 # Record for storing permission, attribute, etc. bits from a PTE
223 class PermAttr(RecordObject):
224 def __init__(self):
225 super().__init__()
226 self.reference = Signal()
227 self.changed = Signal()
228 self.nocache = Signal()
229 self.priv = Signal()
230 self.rd_perm = Signal()
231 self.wr_perm = Signal()
232
233
234 def extract_perm_attr(pte):
235 pa = PermAttr()
236 pa.reference = pte[8]
237 pa.changed = pte[7]
238 pa.nocache = pte[5]
239 pa.priv = pte[3]
240 pa.rd_perm = pte[2]
241 pa.wr_perm = pte[1]
242 return pa;
243
244
245 # Type of operation on a "valid" input
246 @unique
247 class Op(Enum):
248 OP_NONE = 0
249 OP_BAD = 1 # NC cache hit, TLB miss, prot/RC failure
250 OP_STCX_FAIL = 2 # conditional store w/o reservation
251 OP_LOAD_HIT = 3 # Cache hit on load
252 OP_LOAD_MISS = 4 # Load missing cache
253 OP_LOAD_NC = 5 # Non-cachable load
254 OP_STORE_HIT = 6 # Store hitting cache
255 OP_STORE_MISS = 7 # Store missing cache
256
257
258 # Cache state machine
259 @unique
260 class State(Enum):
261 IDLE = 0 # Normal load hit processing
262 RELOAD_WAIT_ACK = 1 # Cache reload wait ack
263 STORE_WAIT_ACK = 2 # Store wait ack
264 NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
265
266
267 # Dcache operations:
268 #
269 # In order to make timing, we use the BRAMs with
270 # an output buffer, which means that the BRAM
271 # output is delayed by an extra cycle.
272 #
273 # Thus, the dcache has a 2-stage internal pipeline
274 # for cache hits with no stalls.
275 #
276 # All other operations are handled via stalling
277 # in the first stage.
278 #
279 # The second stage can thus complete a hit at the same
280 # time as the first stage emits a stall for a complex op.
281 #
282 # Stage 0 register, basically contains just the latched request
283
284 class RegStage0(RecordObject):
285 def __init__(self):
286 super().__init__()
287 self.req = LoadStore1ToDCacheType()
288 self.tlbie = Signal()
289 self.doall = Signal()
290 self.tlbld = Signal()
291 self.mmu_req = Signal() # indicates source of request
292
293
294 class MemAccessRequest(RecordObject):
295 def __init__(self):
296 super().__init__()
297 self.op = Signal(Op)
298 self.valid = Signal()
299 self.dcbz = Signal()
300 self.real_addr = Signal(REAL_ADDR_BITS)
301 self.data = Signal(64)
302 self.byte_sel = Signal(8)
303 self.hit_way = Signal(WAY_BITS)
304 self.same_tag = Signal()
305 self.mmu_req = Signal()
306
307
308 # First stage register, contains state for stage 1 of load hits
309 # and for the state machine used by all other operations
310 class RegStage1(RecordObject):
311 def __init__(self):
312 super().__init__()
313 # Info about the request
314 self.full = Signal() # have uncompleted request
315 self.mmu_req = Signal() # request is from MMU
316 self.req = MemAccessRequest()
317
318 # Cache hit state
319 self.hit_way = Signal(WAY_BITS)
320 self.hit_load_valid = Signal()
321 self.hit_index = Signal(INDEX_BITS)
322 self.cache_hit = Signal()
323
324 # TLB hit state
325 self.tlb_hit = Signal()
326 self.tlb_hit_way = Signal(TLB_NUM_WAYS)
327 self.tlb_hit_index = Signal(TLB_WAY_BITS)
328
329 # 2-stage data buffer for data forwarded from writes to reads
330 self.forward_data1 = Signal(64)
331 self.forward_data2 = Signal(64)
332 self.forward_sel1 = Signal(8)
333 self.forward_valid1 = Signal()
334 self.forward_way1 = Signal(WAY_BITS)
335 self.forward_row1 = Signal(ROW_BITS)
336 self.use_forward1 = Signal()
337 self.forward_sel = Signal(8)
338
339 # Cache miss state (reload state machine)
340 self.state = Signal(State)
341 self.dcbz = Signal()
342 self.write_bram = Signal()
343 self.write_tag = Signal()
344 self.slow_valid = Signal()
345 self.wb = WBMasterOut()
346 self.reload_tag = Signal(TAG_BITS)
347 self.store_way = Signal(WAY_BITS)
348 self.store_row = Signal(ROW_BITS)
349 self.store_index = Signal(INDEX_BITS)
350 self.end_row_ix = Signal(log2_int(ROW_LINE_BITS, False))
351 self.rows_valid = RowPerLineValidArray()
352 self.acks_pending = Signal(3)
353 self.inc_acks = Signal()
354 self.dec_acks = Signal()
355
356 # Signals to complete (possibly with error)
357 self.ls_valid = Signal()
358 self.ls_error = Signal()
359 self.mmu_done = Signal()
360 self.mmu_error = Signal()
361 self.cache_paradox = Signal()
362
363 # Signal to complete a failed stcx.
364 self.stcx_fail = Signal()
365
366
367 # Reservation information
368 class Reservation(RecordObject):
369 def __init__(self):
370 super().__init__()
371 self.valid = Signal()
372 self.addr = Signal(64-LINE_OFF_BITS)
373
374
375 class DTLBUpdate(Elaboratable):
376 def __init__(self):
377 self.tlbie = Signal()
378 self.tlbwe = Signal()
379 self.doall = Signal()
380 self.updated = Signal()
381 self.v_updated = Signal()
382 self.tlb_hit = Signal()
383 self.tlb_req_index = Signal(TLB_SET_BITS)
384
385 self.tlb_hit_way = Signal(TLB_WAY_BITS)
386 self.tlb_tag_way = Signal(TLB_TAG_WAY_BITS)
387 self.tlb_pte_way = Signal(TLB_PTE_WAY_BITS)
388 self.repl_way = Signal(TLB_WAY_BITS)
389 self.eatag = Signal(TLB_EA_TAG_BITS)
390 self.pte_data = Signal(TLB_PTE_BITS)
391
392 self.dv = Signal(TLB_PTE_WAY_BITS)
393
394 self.tb_out = Signal(TLB_TAG_WAY_BITS)
395 self.pb_out = Signal(TLB_NUM_WAYS)
396 self.db_out = Signal(TLB_PTE_WAY_BITS)
397
398 def elaborate(self, platform):
399 m = Module()
400 comb = m.d.comb
401 sync = m.d.sync
402
403 tagset = Signal(TLB_TAG_WAY_BITS)
404 pteset = Signal(TLB_PTE_WAY_BITS)
405
406 tb_out, pb_out, db_out = self.tb_out, self.pb_out, self.db_out
407
408 with m.If(self.tlbie & self.doall):
409 pass # clear all back in parent
410 with m.Elif(self.tlbie):
411 with m.If(self.tlb_hit):
412 comb += db_out.eq(self.dv)
413 comb += db_out.bit_select(self.tlb_hit_way, 1).eq(1)
414 comb += self.v_updated.eq(1)
415
416 with m.Elif(self.tlbwe):
417
418 comb += tagset.eq(self.tlb_tag_way)
419 comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
420 comb += tb_out.eq(tagset)
421
422 comb += pteset.eq(self.tlb_pte_way)
423 comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
424 comb += pb_out.eq(pteset)
425
426 comb += db_out.bit_select(self.repl_way, 1).eq(1)
427
428 comb += self.updated.eq(1)
429 comb += self.v_updated.eq(1)
430
431 return m
432
433 def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
434 r0_valid, r1, cache_valid_bits, replace_way,
435 use_forward1_next, use_forward2_next,
436 req_hit_way, plru_victim, rc_ok, perm_attr,
437 valid_ra, perm_ok, access_ok, req_op, req_go,
438 tlb_pte_way,
439 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
440 cancel_store, req_same_tag, r0_stall, early_req_row):
441 """Cache request parsing and hit detection
442 """
443
444 class DCachePendingHit(Elaboratable):
445
446 def __init__(self, tlb_pte_way, tlb_valid_way, tlb_hit_way,
447 cache_valid_idx, cache_tag_set,
448 req_addr,
449 hit_set):
450
451 self.go = Signal()
452 self.virt_mode = Signal()
453 self.is_hit = Signal()
454 self.tlb_hit = Signal()
455 self.hit_way = Signal(WAY_BITS)
456 self.rel_match = Signal()
457 self.req_index = Signal(INDEX_BITS)
458 self.reload_tag = Signal(TAG_BITS)
459
460 self.tlb_hit_way = tlb_hit_way
461 self.tlb_pte_way = tlb_pte_way
462 self.tlb_valid_way = tlb_valid_way
463 self.cache_valid_idx = cache_valid_idx
464 self.cache_tag_set = cache_tag_set
465 self.req_addr = req_addr
466 self.hit_set = hit_set
467
468 def elaborate(self, platform):
469 m = Module()
470 comb = m.d.comb
471 sync = m.d.sync
472
473 go = self.go
474 virt_mode = self.virt_mode
475 is_hit = self.is_hit
476 tlb_pte_way = self.tlb_pte_way
477 tlb_valid_way = self.tlb_valid_way
478 cache_valid_idx = self.cache_valid_idx
479 cache_tag_set = self.cache_tag_set
480 req_addr = self.req_addr
481 tlb_hit_way = self.tlb_hit_way
482 tlb_hit = self.tlb_hit
483 hit_set = self.hit_set
484 hit_way = self.hit_way
485 rel_match = self.rel_match
486 req_index = self.req_index
487 reload_tag = self.reload_tag
488
489 rel_matches = Array(Signal() for i in range(TLB_NUM_WAYS))
490 hit_way_set = HitWaySet()
491
492 # Test if pending request is a hit on any way
493 # In order to make timing in virtual mode,
494 # when we are using the TLB, we compare each
495 # way with each of the real addresses from each way of
496 # the TLB, and then decide later which match to use.
497
498 with m.If(virt_mode):
499 for j in range(TLB_NUM_WAYS):
500 s_tag = Signal(TAG_BITS, name="s_tag%d" % j)
501 s_hit = Signal()
502 s_pte = Signal(TLB_PTE_BITS)
503 s_ra = Signal(REAL_ADDR_BITS)
504 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
505 comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
506 s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
507 comb += s_tag.eq(get_tag(s_ra))
508
509 for i in range(NUM_WAYS):
510 is_tag_hit = Signal()
511 comb += is_tag_hit.eq(go & cache_valid_idx[i] &
512 (read_tag(i, cache_tag_set) == s_tag)
513 & tlb_valid_way[j])
514 with m.If(is_tag_hit):
515 comb += hit_way_set[j].eq(i)
516 comb += s_hit.eq(1)
517 comb += hit_set[j].eq(s_hit)
518 with m.If(s_tag == reload_tag):
519 comb += rel_matches[j].eq(1)
520 with m.If(tlb_hit):
521 comb += is_hit.eq(hit_set[tlb_hit_way])
522 comb += hit_way.eq(hit_way_set[tlb_hit_way])
523 comb += rel_match.eq(rel_matches[tlb_hit_way])
524 with m.Else():
525 s_tag = Signal(TAG_BITS)
526 comb += s_tag.eq(get_tag(req_addr))
527 for i in range(NUM_WAYS):
528 is_tag_hit = Signal()
529 comb += is_tag_hit.eq(go & cache_valid_idx[i] &
530 read_tag(i, cache_tag_set) == s_tag)
531 with m.If(is_tag_hit):
532 comb += hit_way.eq(i)
533 comb += is_hit.eq(1)
534 with m.If(s_tag == reload_tag):
535 comb += rel_match.eq(1)
536
537 return m
538
539
540 class DCache(Elaboratable):
541 """Set associative dcache write-through
542 TODO (in no specific order):
543 * See list in icache.vhdl
544 * Complete load misses on the cycle when WB data comes instead of
545 at the end of line (this requires dealing with requests coming in
546 while not idle...)
547 """
548 def __init__(self):
549 self.d_in = LoadStore1ToDCacheType()
550 self.d_out = DCacheToLoadStore1Type()
551
552 self.m_in = MMUToDCacheType()
553 self.m_out = DCacheToMMUType()
554
555 self.stall_out = Signal()
556
557 self.wb_out = WBMasterOut()
558 self.wb_in = WBSlaveOut()
559
560 self.log_out = Signal(20)
561
562 def stage_0(self, m, r0, r1, r0_full):
563 """Latch the request in r0.req as long as we're not stalling
564 """
565 comb = m.d.comb
566 sync = m.d.sync
567 d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
568
569 r = RegStage0()
570
571 # TODO, this goes in unit tests and formal proofs
572 with m.If(~(d_in.valid & m_in.valid)):
573 #sync += Display("request collision loadstore vs MMU")
574 pass
575
576 with m.If(m_in.valid):
577 sync += r.req.valid.eq(1)
578 sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
579 sync += r.req.dcbz.eq(0)
580 sync += r.req.nc.eq(0)
581 sync += r.req.reserve.eq(0)
582 sync += r.req.virt_mode.eq(1)
583 sync += r.req.priv_mode.eq(1)
584 sync += r.req.addr.eq(m_in.addr)
585 sync += r.req.data.eq(m_in.pte)
586 sync += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
587 sync += r.tlbie.eq(m_in.tlbie)
588 sync += r.doall.eq(m_in.doall)
589 sync += r.tlbld.eq(m_in.tlbld)
590 sync += r.mmu_req.eq(1)
591 with m.Else():
592 sync += r.req.eq(d_in)
593 sync += r.tlbie.eq(0)
594 sync += r.doall.eq(0)
595 sync += r.tlbld.eq(0)
596 sync += r.mmu_req.eq(0)
597 with m.If(~(r1.full & r0_full)):
598 sync += r0.eq(r)
599 sync += r0_full.eq(r.req.valid)
600
601 def tlb_read(self, m, r0_stall, tlb_valid_way,
602 tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
603 dtlb_tags, dtlb_ptes):
604 """TLB
605 Operates in the second cycle on the request latched in r0.req.
606 TLB updates write the entry at the end of the second cycle.
607 """
608 comb = m.d.comb
609 sync = m.d.sync
610 m_in, d_in = self.m_in, self.d_in
611
612 index = Signal(TLB_SET_BITS)
613 addrbits = Signal(TLB_SET_BITS)
614
615 amin = TLB_LG_PGSZ
616 amax = TLB_LG_PGSZ + TLB_SET_BITS
617
618 with m.If(m_in.valid):
619 comb += addrbits.eq(m_in.addr[amin : amax])
620 with m.Else():
621 comb += addrbits.eq(d_in.addr[amin : amax])
622 comb += index.eq(addrbits)
623
624 # If we have any op and the previous op isn't finished,
625 # then keep the same output for next cycle.
626 with m.If(~r0_stall):
627 sync += tlb_valid_way.eq(dtlb_valid_bits[index])
628 sync += tlb_tag_way.eq(dtlb_tags[index])
629 sync += tlb_pte_way.eq(dtlb_ptes[index])
630
631 def maybe_tlb_plrus(self, m, r1, tlb_plru_victim):
632 """Generate TLB PLRUs
633 """
634 comb = m.d.comb
635 sync = m.d.sync
636
637 if TLB_NUM_WAYS == 0:
638 return
639 for i in range(TLB_SET_SIZE):
640 # TLB PLRU interface
641 tlb_plru = PLRU(WAY_BITS)
642 setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
643 tlb_plru_acc_en = Signal()
644
645 comb += tlb_plru_acc_en.eq(r1.tlb_hit & (r1.tlb_hit_index == i))
646 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
647 comb += tlb_plru.acc.eq(r1.tlb_hit_way)
648 comb += tlb_plru_victim[i].eq(tlb_plru.lru_o)
649
650 def tlb_search(self, m, tlb_req_index, r0, r0_valid,
651 tlb_valid_way, tlb_tag_way, tlb_hit_way,
652 tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
653
654 comb = m.d.comb
655 sync = m.d.sync
656
657 hitway = Signal(TLB_WAY_BITS)
658 hit = Signal()
659 eatag = Signal(TLB_EA_TAG_BITS)
660
661 TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
662 comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
663 comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
664
665 for i in range(TLB_NUM_WAYS):
666 is_tag_hit = Signal()
667 comb += is_tag_hit.eq(tlb_valid_way[i]
668 & read_tlb_tag(i, tlb_tag_way) == eatag)
669 with m.If(is_tag_hit):
670 comb += hitway.eq(i)
671 comb += hit.eq(1)
672
673 comb += tlb_hit.eq(hit & r0_valid)
674 comb += tlb_hit_way.eq(hitway)
675
676 with m.If(tlb_hit):
677 comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
678 with m.Else():
679 comb += pte.eq(0)
680 comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
681 with m.If(r0.req.virt_mode):
682 comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
683 r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
684 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
685 comb += perm_attr.eq(extract_perm_attr(pte))
686 with m.Else():
687 comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
688 r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
689
690 comb += perm_attr.reference.eq(1)
691 comb += perm_attr.changed.eq(1)
692 comb += perm_attr.priv.eq(1)
693 comb += perm_attr.nocache.eq(0)
694 comb += perm_attr.rd_perm.eq(1)
695 comb += perm_attr.wr_perm.eq(1)
696
697 def tlb_update(self, m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
698 tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
699 dtlb_tags, tlb_pte_way, dtlb_ptes):
700
701 comb = m.d.comb
702 sync = m.d.sync
703
704 tlbie = Signal()
705 tlbwe = Signal()
706
707 comb += tlbie.eq(r0_valid & r0.tlbie)
708 comb += tlbwe.eq(r0_valid & r0.tlbld)
709
710 m.submodules.tlb_update = d = DTLBUpdate()
711 with m.If(tlbie & r0.doall):
712 # clear all valid bits at once
713 for i in range(TLB_SET_SIZE):
714 sync += dtlb_valid_bits[i].eq(0)
715 with m.If(d.updated):
716 sync += dtlb_tags[tlb_req_index].eq(d.tb_out)
717 sync += dtlb_ptes[tlb_req_index].eq(d.pb_out)
718 with m.If(d.v_updated):
719 sync += dtlb_valid_bits[tlb_req_index].eq(d.db_out)
720
721 comb += d.dv.eq(dtlb_valid_bits[tlb_req_index])
722
723 comb += d.tlbie.eq(tlbie)
724 comb += d.tlbwe.eq(tlbwe)
725 comb += d.doall.eq(r0.doall)
726 comb += d.tlb_hit.eq(tlb_hit)
727 comb += d.tlb_hit_way.eq(tlb_hit_way)
728 comb += d.tlb_tag_way.eq(tlb_tag_way)
729 comb += d.tlb_pte_way.eq(tlb_pte_way)
730 comb += d.tlb_req_index.eq(tlb_req_index)
731
732 with m.If(tlb_hit):
733 comb += d.repl_way.eq(tlb_hit_way)
734 with m.Else():
735 comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
736 comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
737 comb += d.pte_data.eq(r0.req.data)
738
739 def maybe_plrus(self, m, r1, plru_victim):
740 """Generate PLRUs
741 """
742 comb = m.d.comb
743 sync = m.d.sync
744
745 if TLB_NUM_WAYS == 0:
746 return
747
748 for i in range(NUM_LINES):
749 # PLRU interface
750 plru = PLRU(WAY_BITS)
751 setattr(m.submodules, "plru%d" % i, plru)
752 plru_acc_en = Signal()
753
754 comb += plru_acc_en.eq(r1.cache_hit & (r1.hit_index == i))
755 comb += plru.acc_en.eq(plru_acc_en)
756 comb += plru.acc.eq(r1.hit_way)
757 comb += plru_victim[i].eq(plru.lru_o)
758
759 def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
760 """Cache tag RAM read port
761 """
762 comb = m.d.comb
763 sync = m.d.sync
764 m_in, d_in = self.m_in, self.d_in
765
766 index = Signal(INDEX_BITS)
767
768 with m.If(r0_stall):
769 comb += index.eq(req_index)
770 with m.Elif(m_in.valid):
771 comb += index.eq(get_index(m_in.addr))
772 with m.Else():
773 comb += index.eq(get_index(d_in.addr))
774 sync += cache_tag_set.eq(cache_tags[index])
775
776 def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
777 r0_valid, r1, cache_valid_bits, replace_way,
778 use_forward1_next, use_forward2_next,
779 req_hit_way, plru_victim, rc_ok, perm_attr,
780 valid_ra, perm_ok, access_ok, req_op, req_go,
781 tlb_pte_way,
782 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
783 cancel_store, req_same_tag, r0_stall, early_req_row):
784 """Cache request parsing and hit detection
785 """
786
787 comb = m.d.comb
788 sync = m.d.sync
789 m_in, d_in = self.m_in, self.d_in
790
791 is_hit = Signal()
792 hit_way = Signal(WAY_BITS)
793 op = Signal(Op)
794 opsel = Signal(3)
795 go = Signal()
796 nc = Signal()
797 hit_set = Array(Signal() for i in range(TLB_NUM_WAYS))
798 cache_valid_idx = Signal(INDEX_BITS)
799
800 # Extract line, row and tag from request
801 comb += req_index.eq(get_index(r0.req.addr))
802 comb += req_row.eq(get_row(r0.req.addr))
803 comb += req_tag.eq(get_tag(ra))
804
805 comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
806 comb += cache_valid_idx.eq(cache_valid_bits[req_index])
807
808 m.submodules.dcache_pend = dc = DCachePendingHit(tlb_pte_way,
809 tlb_valid_way, tlb_hit_way,
810 cache_valid_idx, cache_tag_set,
811 r0.req.addr,
812 hit_set)
813
814 comb += dc.tlb_hit.eq(tlb_hit)
815 comb += dc.reload_tag.eq(r1.reload_tag)
816 comb += dc.virt_mode.eq(r0.req.virt_mode)
817 comb += dc.go.eq(go)
818 comb += dc.req_index.eq(req_index)
819 comb += is_hit.eq(dc.is_hit)
820 comb += hit_way.eq(dc.hit_way)
821 comb += req_same_tag.eq(dc.rel_match)
822
823 # See if the request matches the line currently being reloaded
824 with m.If((r1.state == State.RELOAD_WAIT_ACK) &
825 (req_index == r1.store_index) & req_same_tag):
826 # For a store, consider this a hit even if the row isn't
827 # valid since it will be by the time we perform the store.
828 # For a load, check the appropriate row valid bit.
829 valid = r1.rows_valid[req_row % ROW_PER_LINE]
830 comb += is_hit.eq(~r0.req.load | valid)
831 comb += hit_way.eq(replace_way)
832
833 # Whether to use forwarded data for a load or not
834 comb += use_forward1_next.eq(0)
835 with m.If((get_row(r1.req.real_addr) == req_row) &
836 (r1.req.hit_way == hit_way)):
837 # Only need to consider r1.write_bram here, since if we
838 # are writing refill data here, then we don't have a
839 # cache hit this cycle on the line being refilled.
840 # (There is the possibility that the load following the
841 # load miss that started the refill could be to the old
842 # contents of the victim line, since it is a couple of
843 # cycles after the refill starts before we see the updated
844 # cache tag. In that case we don't use the bypass.)
845 comb += use_forward1_next.eq(r1.write_bram)
846 comb += use_forward2_next.eq(0)
847 with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
848 comb += use_forward2_next.eq(r1.forward_valid1)
849
850 # The way that matched on a hit
851 comb += req_hit_way.eq(hit_way)
852
853 # The way to replace on a miss
854 with m.If(r1.write_tag):
855 comb += replace_way.eq(plru_victim[r1.store_index])
856 with m.Else():
857 comb += replace_way.eq(r1.store_way)
858
859 # work out whether we have permission for this access
860 # NB we don't yet implement AMR, thus no KUAP
861 comb += rc_ok.eq(perm_attr.reference
862 & (r0.req.load | perm_attr.changed)
863 )
864 comb += perm_ok.eq((r0.req.priv_mode | ~perm_attr.priv)
865 & perm_attr.wr_perm
866 | (r0.req.load & perm_attr.rd_perm)
867 )
868 comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
869 # Combine the request and cache hit status to decide what
870 # operation needs to be done
871 comb += nc.eq(r0.req.nc | perm_attr.nocache)
872 comb += op.eq(Op.OP_NONE)
873 with m.If(go):
874 with m.If(~access_ok):
875 comb += op.eq(Op.OP_BAD)
876 with m.Elif(cancel_store):
877 comb += op.eq(Op.OP_STCX_FAIL)
878 with m.Else():
879 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
880 with m.Switch(opsel):
881 with m.Case(0b101):
882 comb += op.eq(Op.OP_LOAD_HIT)
883 with m.Case(0b100):
884 comb += op.eq(Op.OP_LOAD_MISS)
885 with m.Case(0b110):
886 comb += op.eq(Op.OP_LOAD_NC)
887 with m.Case(0b001):
888 comb += op.eq(Op.OP_STORE_HIT)
889 with m.Case(0b000):
890 comb += op.eq(Op.OP_STORE_MISS)
891 with m.Case(0b010):
892 comb += op.eq(Op.OP_STORE_MISS)
893 with m.Case(0b011):
894 comb += op.eq(Op.OP_BAD)
895 with m.Case(0b111):
896 comb += op.eq(Op.OP_BAD)
897 with m.Default():
898 comb += op.eq(Op.OP_NONE)
899 comb += req_op.eq(op)
900 comb += req_go.eq(go)
901
902 # Version of the row number that is valid one cycle earlier
903 # in the cases where we need to read the cache data BRAM.
904 # If we're stalling then we need to keep reading the last
905 # row requested.
906 with m.If(~r0_stall):
907 with m.If(m_in.valid):
908 comb += early_req_row.eq(get_row(m_in.addr))
909 with m.Else():
910 comb += early_req_row.eq(get_row(d_in.addr))
911 with m.Else():
912 comb += early_req_row.eq(req_row)
913
914 def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
915 r0_valid, r0, reservation):
916 """Handle load-with-reservation and store-conditional instructions
917 """
918 comb = m.d.comb
919 sync = m.d.sync
920
921 with m.If(r0_valid & r0.req.reserve):
922
923 # XXX generate alignment interrupt if address
924 # is not aligned XXX or if r0.req.nc = '1'
925 with m.If(r0.req.load):
926 comb += set_rsrv.eq(1) # load with reservation
927 with m.Else():
928 comb += clear_rsrv.eq(1) # store conditional
929 with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]):
930 comb += cancel_store.eq(1)
931
932 def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
933 reservation, r0):
934
935 comb = m.d.comb
936 sync = m.d.sync
937
938 with m.If(r0_valid & access_ok):
939 with m.If(clear_rsrv):
940 sync += reservation.valid.eq(0)
941 with m.Elif(set_rsrv):
942 sync += reservation.valid.eq(1)
943 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
944
945 def writeback_control(self, m, r1, cache_out):
946 """Return data for loads & completion control logic
947 """
948 comb = m.d.comb
949 sync = m.d.sync
950 d_out, m_out = self.d_out, self.m_out
951
952 data_out = Signal(64)
953 data_fwd = Signal(64)
954
955 # Use the bypass if are reading the row that was
956 # written 1 or 2 cycles ago, including for the
957 # slow_valid = 1 case (i.e. completing a load
958 # miss or a non-cacheable load).
959 with m.If(r1.use_forward1):
960 comb += data_fwd.eq(r1.forward_data1)
961 with m.Else():
962 comb += data_fwd.eq(r1.forward_data2)
963
964 comb += data_out.eq(cache_out[r1.hit_way])
965
966 for i in range(8):
967 with m.If(r1.forward_sel[i]):
968 dsel = data_fwd.word_select(i, 8)
969 comb += data_out.word_select(i, 8).eq(dsel)
970
971 comb += d_out.valid.eq(r1.ls_valid)
972 comb += d_out.data.eq(data_out)
973 comb += d_out.store_done.eq(~r1.stcx_fail)
974 comb += d_out.error.eq(r1.ls_error)
975 comb += d_out.cache_paradox.eq(r1.cache_paradox)
976
977 # Outputs to MMU
978 comb += m_out.done.eq(r1.mmu_done)
979 comb += m_out.err.eq(r1.mmu_error)
980 comb += m_out.data.eq(data_out)
981
982 # We have a valid load or store hit or we just completed
983 # a slow op such as a load miss, a NC load or a store
984 #
985 # Note: the load hit is delayed by one cycle. However it
986 # can still not collide with r.slow_valid (well unless I
987 # miscalculated) because slow_valid can only be set on a
988 # subsequent request and not on its first cycle (the state
989 # machine must have advanced), which makes slow_valid
990 # at least 2 cycles from the previous hit_load_valid.
991
992 # Sanity: Only one of these must be set in any given cycle
993
994 if False: # TODO: need Display to get this to work
995 assert (r1.slow_valid & r1.stcx_fail) != 1, \
996 "unexpected slow_valid collision with stcx_fail"
997
998 assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
999 "unexpected hit_load_delayed collision with slow_valid"
1000
1001 with m.If(~r1.mmu_req):
1002 # Request came from loadstore1...
1003 # Load hit case is the standard path
1004 with m.If(r1.hit_load_valid):
1005 #Display(f"completing load hit data={data_out}")
1006 pass
1007
1008 # error cases complete without stalling
1009 with m.If(r1.ls_error):
1010 # Display("completing ld/st with error")
1011 pass
1012
1013 # Slow ops (load miss, NC, stores)
1014 with m.If(r1.slow_valid):
1015 #Display(f"completing store or load miss data={data_out}")
1016 pass
1017
1018 with m.Else():
1019 # Request came from MMU
1020 with m.If(r1.hit_load_valid):
1021 # Display(f"completing load hit to MMU, data={m_out.data}")
1022 pass
1023 # error cases complete without stalling
1024 with m.If(r1.mmu_error):
1025 #Display("combpleting MMU ld with error")
1026 pass
1027
1028 # Slow ops (i.e. load miss)
1029 with m.If(r1.slow_valid):
1030 #Display("completing MMU load miss, data={m_out.data}")
1031 pass
1032
1033 def rams(self, m, r1, early_req_row, cache_out, replace_way):
1034 """rams
1035 Generate a cache RAM for each way. This handles the normal
1036 reads, writes from reloads and the special store-hit update
1037 path as well.
1038
1039 Note: the BRAMs have an extra read buffer, meaning the output
1040 is pipelined an extra cycle. This differs from the
1041 icache. The writeback logic needs to take that into
1042 account by using 1-cycle delayed signals for load hits.
1043 """
1044 comb = m.d.comb
1045 wb_in = self.wb_in
1046
1047 for i in range(NUM_WAYS):
1048 do_read = Signal()
1049 rd_addr = Signal(ROW_BITS)
1050 do_write = Signal()
1051 wr_addr = Signal(ROW_BITS)
1052 wr_data = Signal(WB_DATA_BITS)
1053 wr_sel = Signal(ROW_SIZE)
1054 wr_sel_m = Signal(ROW_SIZE)
1055 _d_out = Signal(WB_DATA_BITS)
1056
1057 way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
1058 setattr(m.submodules, "cacheram_%d" % i, way)
1059
1060 comb += way.rd_en.eq(do_read)
1061 comb += way.rd_addr.eq(rd_addr)
1062 comb += _d_out.eq(way.rd_data_o)
1063 comb += way.wr_sel.eq(wr_sel_m)
1064 comb += way.wr_addr.eq(wr_addr)
1065 comb += way.wr_data.eq(wr_data)
1066
1067 # Cache hit reads
1068 comb += do_read.eq(1)
1069 comb += rd_addr.eq(early_req_row)
1070 comb += cache_out[i].eq(_d_out)
1071
1072 # Write mux:
1073 #
1074 # Defaults to wishbone read responses (cache refill)
1075 #
1076 # For timing, the mux on wr_data/sel/addr is not
1077 # dependent on anything other than the current state.
1078
1079 with m.If(r1.write_bram):
1080 # Write store data to BRAM. This happens one
1081 # cycle after the store is in r0.
1082 comb += wr_data.eq(r1.req.data)
1083 comb += wr_sel.eq(r1.req.byte_sel)
1084 comb += wr_addr.eq(get_row(r1.req.real_addr))
1085
1086 with m.If(i == r1.req.hit_way):
1087 comb += do_write.eq(1)
1088 with m.Else():
1089 # Otherwise, we might be doing a reload or a DCBZ
1090 with m.If(r1.dcbz):
1091 comb += wr_data.eq(0)
1092 with m.Else():
1093 comb += wr_data.eq(wb_in.dat)
1094 comb += wr_addr.eq(r1.store_row)
1095 comb += wr_sel.eq(~0) # all 1s
1096
1097 with m.If((r1.state == State.RELOAD_WAIT_ACK)
1098 & wb_in.ack & (replace_way == i)):
1099 comb += do_write.eq(1)
1100
1101 # Mask write selects with do_write since BRAM
1102 # doesn't have a global write-enable
1103 with m.If(do_write):
1104 comb += wr_sel_m.eq(wr_sel)
1105
1106 # Cache hit synchronous machine for the easy case.
1107 # This handles load hits.
1108 # It also handles error cases (TLB miss, cache paradox)
1109 def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1110 req_hit_way, req_index, access_ok,
1111 tlb_hit, tlb_hit_way, tlb_req_index):
1112
1113 comb = m.d.comb
1114 sync = m.d.sync
1115
1116 with m.If(req_op != Op.OP_NONE):
1117 #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1118 # f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1119 # )
1120 pass
1121
1122 with m.If(r0_valid):
1123 sync += r1.mmu_req.eq(r0.mmu_req)
1124
1125 # Fast path for load/store hits.
1126 # Set signals for the writeback controls.
1127 sync += r1.hit_way.eq(req_hit_way)
1128 sync += r1.hit_index.eq(req_index)
1129
1130 with m.If(req_op == Op.OP_LOAD_HIT):
1131 sync += r1.hit_load_valid.eq(1)
1132 with m.Else():
1133 sync += r1.hit_load_valid.eq(0)
1134
1135 with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1136 sync += r1.cache_hit.eq(1)
1137 with m.Else():
1138 sync += r1.cache_hit.eq(0)
1139
1140 with m.If(req_op == Op.OP_BAD):
1141 # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1142 # f"rc_ok={rc_ok} perm_ok={perm_ok}"
1143 sync += r1.ls_error.eq(~r0.mmu_req)
1144 sync += r1.mmu_error.eq(r0.mmu_req)
1145 sync += r1.cache_paradox.eq(access_ok)
1146
1147 with m.Else():
1148 sync += r1.ls_error.eq(0)
1149 sync += r1.mmu_error.eq(0)
1150 sync += r1.cache_paradox.eq(0)
1151
1152 with m.If(req_op == Op.OP_STCX_FAIL):
1153 r1.stcx_fail.eq(1)
1154 with m.Else():
1155 sync += r1.stcx_fail.eq(0)
1156
1157 # Record TLB hit information for updating TLB PLRU
1158 sync += r1.tlb_hit.eq(tlb_hit)
1159 sync += r1.tlb_hit_way.eq(tlb_hit_way)
1160 sync += r1.tlb_hit_index.eq(tlb_req_index)
1161
1162 # Memory accesses are handled by this state machine:
1163 #
1164 # * Cache load miss/reload (in conjunction with "rams")
1165 # * Load hits for non-cachable forms
1166 # * Stores (the collision case is handled in "rams")
1167 #
1168 # All wishbone requests generation is done here.
1169 # This machine operates at stage 1.
1170 def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1171 cache_valid_bits, r0, replace_way,
1172 req_hit_way, req_same_tag,
1173 r0_valid, req_op, cache_tag, req_go, ra):
1174
1175 comb = m.d.comb
1176 sync = m.d.sync
1177 wb_in = self.wb_in
1178
1179 req = MemAccessRequest()
1180 acks = Signal(3)
1181 adjust_acks = Signal(3)
1182 stbs_done = Signal()
1183
1184 sync += r1.use_forward1.eq(use_forward1_next)
1185 sync += r1.forward_sel.eq(0)
1186
1187 with m.If(use_forward1_next):
1188 sync += r1.forward_sel.eq(r1.req.byte_sel)
1189 with m.Elif(use_forward2_next):
1190 sync += r1.forward_sel.eq(r1.forward_sel1)
1191
1192 sync += r1.forward_data2.eq(r1.forward_data1)
1193 with m.If(r1.write_bram):
1194 sync += r1.forward_data1.eq(r1.req.data)
1195 sync += r1.forward_sel1.eq(r1.req.byte_sel)
1196 sync += r1.forward_way1.eq(r1.req.hit_way)
1197 sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1198 sync += r1.forward_valid1.eq(1)
1199 with m.Else():
1200 with m.If(r1.dcbz):
1201 sync += r1.forward_data1.eq(0)
1202 with m.Else():
1203 sync += r1.forward_data1.eq(wb_in.dat)
1204 sync += r1.forward_sel1.eq(~0) # all 1s
1205 sync += r1.forward_way1.eq(replace_way)
1206 sync += r1.forward_row1.eq(r1.store_row)
1207 sync += r1.forward_valid1.eq(0)
1208
1209 # One cycle pulses reset
1210 sync += r1.slow_valid.eq(0)
1211 sync += r1.write_bram.eq(0)
1212 sync += r1.inc_acks.eq(0)
1213 sync += r1.dec_acks.eq(0)
1214
1215 sync += r1.ls_valid.eq(0)
1216 # complete tlbies and TLB loads in the third cycle
1217 sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1218
1219 with m.If((req_op == Op.OP_LOAD_HIT)
1220 | (req_op == Op.OP_STCX_FAIL)):
1221 with m.If(~r0.mmu_req):
1222 sync += r1.ls_valid.eq(1)
1223 with m.Else():
1224 sync += r1.mmu_done.eq(1)
1225
1226 with m.If(r1.write_tag):
1227 # Store new tag in selected way
1228 for i in range(NUM_WAYS):
1229 with m.If(i == replace_way):
1230 ct = Signal(TAG_RAM_WIDTH)
1231 comb += ct.eq(cache_tag[r1.store_index])
1232 comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1233 sync += cache_tag[r1.store_index].eq(ct)
1234 sync += r1.store_way.eq(replace_way)
1235 sync += r1.write_tag.eq(0)
1236
1237 # Take request from r1.req if there is one there,
1238 # else from req_op, ra, etc.
1239 with m.If(r1.full):
1240 comb += req.eq(r1.req)
1241 with m.Else():
1242 comb += req.op.eq(req_op)
1243 comb += req.valid.eq(req_go)
1244 comb += req.mmu_req.eq(r0.mmu_req)
1245 comb += req.dcbz.eq(r0.req.dcbz)
1246 comb += req.real_addr.eq(ra)
1247
1248 with m.If(~r0.req.dcbz):
1249 comb += req.data.eq(r0.req.data)
1250 with m.Else():
1251 comb += req.data.eq(0)
1252
1253 # Select all bytes for dcbz
1254 # and for cacheable loads
1255 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1256 comb += req.byte_sel.eq(~0) # all 1s
1257 with m.Else():
1258 comb += req.byte_sel.eq(r0.req.byte_sel)
1259 comb += req.hit_way.eq(req_hit_way)
1260 comb += req.same_tag.eq(req_same_tag)
1261
1262 # Store the incoming request from r0,
1263 # if it is a slow request
1264 # Note that r1.full = 1 implies req_op = OP_NONE
1265 with m.If((req_op == Op.OP_LOAD_MISS)
1266 | (req_op == Op.OP_LOAD_NC)
1267 | (req_op == Op.OP_STORE_MISS)
1268 | (req_op == Op.OP_STORE_HIT)):
1269 sync += r1.req.eq(req)
1270 sync += r1.full.eq(1)
1271
1272 # Main state machine
1273 with m.Switch(r1.state):
1274
1275 with m.Case(State.IDLE):
1276 # XXX check 'left downto. probably means len(r1.wb.adr)
1277 # r1.wb.adr <= req.real_addr(
1278 # r1.wb.adr'left downto 0
1279 # );
1280 sync += r1.wb.adr.eq(req.real_addr)
1281 sync += r1.wb.sel.eq(req.byte_sel)
1282 sync += r1.wb.dat.eq(req.data)
1283 sync += r1.dcbz.eq(req.dcbz)
1284
1285 # Keep track of our index and way
1286 # for subsequent stores.
1287 sync += r1.store_index.eq(get_index(req.real_addr))
1288 sync += r1.store_row.eq(get_row(req.real_addr))
1289 sync += r1.end_row_ix.eq(
1290 get_row_of_line(get_row(req.real_addr))
1291 )
1292 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1293 sync += r1.req.same_tag.eq(1)
1294
1295 with m.If(req.op == Op.OP_STORE_HIT):
1296 sync += r1.store_way.eq(req.hit_way)
1297
1298 # Reset per-row valid bits,
1299 # ready for handling OP_LOAD_MISS
1300 for i in range(ROW_PER_LINE):
1301 sync += r1.rows_valid[i].eq(0)
1302
1303 with m.Switch(req.op):
1304 with m.Case(Op.OP_LOAD_HIT):
1305 # stay in IDLE state
1306 pass
1307
1308 with m.Case(Op.OP_LOAD_MISS):
1309 #Display(f"cache miss real addr:" \
1310 # f"{req_real_addr}" \
1311 # f" idx:{get_index(req_real_addr)}" \
1312 # f" tag:{get_tag(req.real_addr)}")
1313 pass
1314
1315 # Start the wishbone cycle
1316 sync += r1.wb.we.eq(0)
1317 sync += r1.wb.cyc.eq(1)
1318 sync += r1.wb.stb.eq(1)
1319
1320 # Track that we had one request sent
1321 sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1322 sync += r1.write_tag.eq(1)
1323
1324 with m.Case(Op.OP_LOAD_NC):
1325 sync += r1.wb.cyc.eq(1)
1326 sync += r1.wb.stb.eq(1)
1327 sync += r1.wb.we.eq(0)
1328 sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1329
1330 with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1331 with m.If(~req.dcbz):
1332 sync += r1.state.eq(State.STORE_WAIT_ACK)
1333 sync += r1.acks_pending.eq(1)
1334 sync += r1.full.eq(0)
1335 sync += r1.slow_valid.eq(1)
1336
1337 with m.If(~req.mmu_req):
1338 sync += r1.ls_valid.eq(1)
1339 with m.Else():
1340 sync += r1.mmu_done.eq(1)
1341
1342 with m.If(req.op == Op.OP_STORE_HIT):
1343 sync += r1.write_bram.eq(1)
1344 with m.Else():
1345 sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1346
1347 with m.If(req.op == Op.OP_STORE_MISS):
1348 sync += r1.write_tag.eq(1)
1349
1350 sync += r1.wb.we.eq(1)
1351 sync += r1.wb.cyc.eq(1)
1352 sync += r1.wb.stb.eq(1)
1353
1354 # OP_NONE and OP_BAD do nothing
1355 # OP_BAD & OP_STCX_FAIL were
1356 # handled above already
1357 with m.Case(Op.OP_NONE):
1358 pass
1359 with m.Case(Op.OP_BAD):
1360 pass
1361 with m.Case(Op.OP_STCX_FAIL):
1362 pass
1363
1364 with m.Case(State.RELOAD_WAIT_ACK):
1365 # Requests are all sent if stb is 0
1366 comb += stbs_done.eq(~r1.wb.stb)
1367
1368 with m.If(~wb_in.stall & ~stbs_done):
1369 # That was the last word?
1370 # We are done sending.
1371 # Clear stb and set stbs_done
1372 # so we can handle an eventual
1373 # last ack on the same cycle.
1374 with m.If(is_last_row_addr(
1375 r1.wb.adr, r1.end_row_ix)):
1376 sync += r1.wb.stb.eq(0)
1377 comb += stbs_done.eq(0)
1378
1379 # Calculate the next row address in the current cache line
1380 rarange = r1.wb.adr[ROW_OFF_BITS : LINE_OFF_BITS]
1381 sync += rarange.eq(rarange + 1)
1382
1383 # Incoming acks processing
1384 sync += r1.forward_valid1.eq(wb_in.ack)
1385 with m.If(wb_in.ack):
1386 # XXX needs an Array bit-accessor here
1387 sync += r1.rows_valid[r1.store_row % ROW_PER_LINE].eq(1)
1388
1389 # If this is the data we were looking for,
1390 # we can complete the request next cycle.
1391 # Compare the whole address in case the
1392 # request in r1.req is not the one that
1393 # started this refill.
1394 with m.If(r1.full & r1.req.same_tag &
1395 ((r1.dcbz & r1.req.dcbz) |
1396 (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1397 (r1.store_row == get_row(r1.req.real_addr))):
1398 sync += r1.full.eq(0)
1399 sync += r1.slow_valid.eq(1)
1400 with m.If(~r1.mmu_req):
1401 sync += r1.ls_valid.eq(1)
1402 with m.Else():
1403 sync += r1.mmu_done.eq(1)
1404 sync += r1.forward_sel.eq(~0) # all 1s
1405 sync += r1.use_forward1.eq(1)
1406
1407 # Check for completion
1408 with m.If(stbs_done & is_last_row(r1.store_row,
1409 r1.end_row_ix)):
1410 # Complete wishbone cycle
1411 sync += r1.wb.cyc.eq(0)
1412
1413 # Cache line is now valid
1414 cv = Signal(INDEX_BITS)
1415 sync += cv.eq(cache_valid_bits[r1.store_index])
1416 sync += cv.bit_select(r1.store_way, 1).eq(1)
1417 sync += r1.state.eq(State.IDLE)
1418
1419 # Increment store row counter
1420 sync += r1.store_row.eq(next_row(r1.store_row))
1421
1422 with m.Case(State.STORE_WAIT_ACK):
1423 comb += stbs_done.eq(~r1.wb.stb)
1424 comb += acks.eq(r1.acks_pending)
1425
1426 with m.If(r1.inc_acks != r1.dec_acks):
1427 with m.If(r1.inc_acks):
1428 comb += adjust_acks.eq(acks + 1)
1429 with m.Else():
1430 comb += adjust_acks.eq(acks - 1)
1431 with m.Else():
1432 comb += adjust_acks.eq(acks)
1433
1434 sync += r1.acks_pending.eq(adjust_acks)
1435
1436 # Clear stb when slave accepted request
1437 with m.If(~wb_in.stall):
1438 # See if there is another store waiting
1439 # to be done which is in the same real page.
1440 with m.If(req.valid):
1441 ra = req.real_addr[0:SET_SIZE_BITS]
1442 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(ra)
1443 sync += r1.wb.dat.eq(req.data)
1444 sync += r1.wb.sel.eq(req.byte_sel)
1445
1446 with m.Elif((adjust_acks < 7) & req.same_tag &
1447 ((req.op == Op.OP_STORE_MISS)
1448 | (req.op == Op.OP_STORE_HIT))):
1449 sync += r1.wb.stb.eq(1)
1450 comb += stbs_done.eq(0)
1451
1452 with m.If(req.op == Op.OP_STORE_HIT):
1453 sync += r1.write_bram.eq(1)
1454 sync += r1.full.eq(0)
1455 sync += r1.slow_valid.eq(1)
1456
1457 # Store requests never come from the MMU
1458 sync += r1.ls_valid.eq(1)
1459 comb += stbs_done.eq(0)
1460 sync += r1.inc_acks.eq(1)
1461 with m.Else():
1462 sync += r1.wb.stb.eq(0)
1463 comb += stbs_done.eq(1)
1464
1465 # Got ack ? See if complete.
1466 with m.If(wb_in.ack):
1467 with m.If(stbs_done & (adjust_acks == 1)):
1468 sync += r1.state.eq(State.IDLE)
1469 sync += r1.wb.cyc.eq(0)
1470 sync += r1.wb.stb.eq(0)
1471 sync += r1.dec_acks.eq(1)
1472
1473 with m.Case(State.NC_LOAD_WAIT_ACK):
1474 # Clear stb when slave accepted request
1475 with m.If(~wb_in.stall):
1476 sync += r1.wb.stb.eq(0)
1477
1478 # Got ack ? complete.
1479 with m.If(wb_in.ack):
1480 sync += r1.state.eq(State.IDLE)
1481 sync += r1.full.eq(0)
1482 sync += r1.slow_valid.eq(1)
1483
1484 with m.If(~r1.mmu_req):
1485 sync += r1.ls_valid.eq(1)
1486 with m.Else():
1487 sync += r1.mmu_done.eq(1)
1488
1489 sync += r1.forward_sel.eq(~0) # all 1s
1490 sync += r1.use_forward1.eq(1)
1491 sync += r1.wb.cyc.eq(0)
1492 sync += r1.wb.stb.eq(0)
1493
1494 def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1495
1496 sync = m.d.sync
1497 d_out, wb_in, log_out = self.d_out, self.wb_in, self.log_out
1498
1499 sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1500 stall_out, req_op[:3], d_out.valid, d_out.error,
1501 r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
1502 r1.wb.adr[3:6]))
1503
1504 def elaborate(self, platform):
1505
1506 m = Module()
1507 comb = m.d.comb
1508
1509 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1510 cache_tags = CacheTagArray()
1511 cache_tag_set = Signal(TAG_RAM_WIDTH)
1512 cache_valid_bits = CacheValidBitsArray()
1513
1514 # TODO attribute ram_style : string;
1515 # TODO attribute ram_style of cache_tags : signal is "distributed";
1516
1517 """note: these are passed to nmigen.hdl.Memory as "attributes".
1518 don't know how, just that they are.
1519 """
1520 dtlb_valid_bits = TLBValidBitsArray()
1521 dtlb_tags = TLBTagsArray()
1522 dtlb_ptes = TLBPtesArray()
1523 # TODO attribute ram_style of
1524 # dtlb_tags : signal is "distributed";
1525 # TODO attribute ram_style of
1526 # dtlb_ptes : signal is "distributed";
1527
1528 r0 = RegStage0()
1529 r0_full = Signal()
1530
1531 r1 = RegStage1()
1532
1533 reservation = Reservation()
1534
1535 # Async signals on incoming request
1536 req_index = Signal(INDEX_BITS)
1537 req_row = Signal(ROW_BITS)
1538 req_hit_way = Signal(WAY_BITS)
1539 req_tag = Signal(TAG_BITS)
1540 req_op = Signal(Op)
1541 req_data = Signal(64)
1542 req_same_tag = Signal()
1543 req_go = Signal()
1544
1545 early_req_row = Signal(ROW_BITS)
1546
1547 cancel_store = Signal()
1548 set_rsrv = Signal()
1549 clear_rsrv = Signal()
1550
1551 r0_valid = Signal()
1552 r0_stall = Signal()
1553
1554 use_forward1_next = Signal()
1555 use_forward2_next = Signal()
1556
1557 cache_out = CacheRamOut()
1558
1559 plru_victim = PLRUOut()
1560 replace_way = Signal(WAY_BITS)
1561
1562 # Wishbone read/write/cache write formatting signals
1563 bus_sel = Signal(8)
1564
1565 # TLB signals
1566 tlb_tag_way = Signal(TLB_TAG_WAY_BITS)
1567 tlb_pte_way = Signal(TLB_PTE_WAY_BITS)
1568 tlb_valid_way = Signal(TLB_NUM_WAYS)
1569 tlb_req_index = Signal(TLB_SET_BITS)
1570 tlb_hit = Signal()
1571 tlb_hit_way = Signal(TLB_WAY_BITS)
1572 pte = Signal(TLB_PTE_BITS)
1573 ra = Signal(REAL_ADDR_BITS)
1574 valid_ra = Signal()
1575 perm_attr = PermAttr()
1576 rc_ok = Signal()
1577 perm_ok = Signal()
1578 access_ok = Signal()
1579
1580 tlb_plru_victim = TLBPLRUOut()
1581
1582 # we don't yet handle collisions between loadstore1 requests
1583 # and MMU requests
1584 comb += self.m_out.stall.eq(0)
1585
1586 # Hold off the request in r0 when r1 has an uncompleted request
1587 comb += r0_stall.eq(r0_full & r1.full)
1588 comb += r0_valid.eq(r0_full & ~r1.full)
1589 comb += self.stall_out.eq(r0_stall)
1590
1591 # Wire up wishbone request latch out of stage 1
1592 comb += self.wb_out.eq(r1.wb)
1593
1594 # call sub-functions putting everything together, using shared
1595 # signals established above
1596 self.stage_0(m, r0, r1, r0_full)
1597 self.tlb_read(m, r0_stall, tlb_valid_way,
1598 tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
1599 dtlb_tags, dtlb_ptes)
1600 self.tlb_search(m, tlb_req_index, r0, r0_valid,
1601 tlb_valid_way, tlb_tag_way, tlb_hit_way,
1602 tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1603 self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
1604 tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1605 dtlb_tags, tlb_pte_way, dtlb_ptes)
1606 self.maybe_plrus(m, r1, plru_victim)
1607 self.maybe_tlb_plrus(m, r1, tlb_plru_victim)
1608 self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1609 self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1610 r0_valid, r1, cache_valid_bits, replace_way,
1611 use_forward1_next, use_forward2_next,
1612 req_hit_way, plru_victim, rc_ok, perm_attr,
1613 valid_ra, perm_ok, access_ok, req_op, req_go,
1614 tlb_pte_way,
1615 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
1616 cancel_store, req_same_tag, r0_stall, early_req_row)
1617 self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1618 r0_valid, r0, reservation)
1619 self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1620 reservation, r0)
1621 self.writeback_control(m, r1, cache_out)
1622 self.rams(m, r1, early_req_row, cache_out, replace_way)
1623 self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1624 req_hit_way, req_index, access_ok,
1625 tlb_hit, tlb_hit_way, tlb_req_index)
1626 self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1627 cache_valid_bits, r0, replace_way,
1628 req_hit_way, req_same_tag,
1629 r0_valid, req_op, cache_tags, req_go, ra)
1630 #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1631
1632 return m
1633
1634
1635 # dcache_tb.vhdl
1636 #
1637 # entity dcache_tb is
1638 # end dcache_tb;
1639 #
1640 # architecture behave of dcache_tb is
1641 # signal clk : std_ulogic;
1642 # signal rst : std_ulogic;
1643 #
1644 # signal d_in : Loadstore1ToDcacheType;
1645 # signal d_out : DcacheToLoadstore1Type;
1646 #
1647 # signal m_in : MmuToDcacheType;
1648 # signal m_out : DcacheToMmuType;
1649 #
1650 # signal wb_bram_in : wishbone_master_out;
1651 # signal wb_bram_out : wishbone_slave_out;
1652 #
1653 # constant clk_period : time := 10 ns;
1654 # begin
1655 # dcache0: entity work.dcache
1656 # generic map(
1657 #
1658 # LINE_SIZE => 64,
1659 # NUM_LINES => 4
1660 # )
1661 # port map(
1662 # clk => clk,
1663 # rst => rst,
1664 # d_in => d_in,
1665 # d_out => d_out,
1666 # m_in => m_in,
1667 # m_out => m_out,
1668 # wishbone_out => wb_bram_in,
1669 # wishbone_in => wb_bram_out
1670 # );
1671 #
1672 # -- BRAM Memory slave
1673 # bram0: entity work.wishbone_bram_wrapper
1674 # generic map(
1675 # MEMORY_SIZE => 1024,
1676 # RAM_INIT_FILE => "icache_test.bin"
1677 # )
1678 # port map(
1679 # clk => clk,
1680 # rst => rst,
1681 # wishbone_in => wb_bram_in,
1682 # wishbone_out => wb_bram_out
1683 # );
1684 #
1685 # clk_process: process
1686 # begin
1687 # clk <= '0';
1688 # wait for clk_period/2;
1689 # clk <= '1';
1690 # wait for clk_period/2;
1691 # end process;
1692 #
1693 # rst_process: process
1694 # begin
1695 # rst <= '1';
1696 # wait for 2*clk_period;
1697 # rst <= '0';
1698 # wait;
1699 # end process;
1700 #
1701 # stim: process
1702 # begin
1703 # -- Clear stuff
1704 # d_in.valid <= '0';
1705 # d_in.load <= '0';
1706 # d_in.nc <= '0';
1707 # d_in.addr <= (others => '0');
1708 # d_in.data <= (others => '0');
1709 # m_in.valid <= '0';
1710 # m_in.addr <= (others => '0');
1711 # m_in.pte <= (others => '0');
1712 #
1713 # wait for 4*clk_period;
1714 # wait until rising_edge(clk);
1715 #
1716 # -- Cacheable read of address 4
1717 # d_in.load <= '1';
1718 # d_in.nc <= '0';
1719 # d_in.addr <= x"0000000000000004";
1720 # d_in.valid <= '1';
1721 # wait until rising_edge(clk);
1722 # d_in.valid <= '0';
1723 #
1724 # wait until rising_edge(clk) and d_out.valid = '1';
1725 # assert d_out.data = x"0000000100000000"
1726 # report "data @" & to_hstring(d_in.addr) &
1727 # "=" & to_hstring(d_out.data) &
1728 # " expected 0000000100000000"
1729 # severity failure;
1730 # -- wait for clk_period;
1731 #
1732 # -- Cacheable read of address 30
1733 # d_in.load <= '1';
1734 # d_in.nc <= '0';
1735 # d_in.addr <= x"0000000000000030";
1736 # d_in.valid <= '1';
1737 # wait until rising_edge(clk);
1738 # d_in.valid <= '0';
1739 #
1740 # wait until rising_edge(clk) and d_out.valid = '1';
1741 # assert d_out.data = x"0000000D0000000C"
1742 # report "data @" & to_hstring(d_in.addr) &
1743 # "=" & to_hstring(d_out.data) &
1744 # " expected 0000000D0000000C"
1745 # severity failure;
1746 #
1747 # -- Non-cacheable read of address 100
1748 # d_in.load <= '1';
1749 # d_in.nc <= '1';
1750 # d_in.addr <= x"0000000000000100";
1751 # d_in.valid <= '1';
1752 # wait until rising_edge(clk);
1753 # d_in.valid <= '0';
1754 # wait until rising_edge(clk) and d_out.valid = '1';
1755 # assert d_out.data = x"0000004100000040"
1756 # report "data @" & to_hstring(d_in.addr) &
1757 # "=" & to_hstring(d_out.data) &
1758 # " expected 0000004100000040"
1759 # severity failure;
1760 #
1761 # wait until rising_edge(clk);
1762 # wait until rising_edge(clk);
1763 # wait until rising_edge(clk);
1764 # wait until rising_edge(clk);
1765 #
1766 # std.env.finish;
1767 # end process;
1768 # end;
1769 def dcache_sim(dut):
1770 # clear stuff
1771 yield dut.d_in.valid.eq(0)
1772 yield dut.d_in.load.eq(0)
1773 yield dut.d_in.nc.eq(0)
1774 yield dut.d_in.adrr.eq(0)
1775 yield dut.d_in.data.eq(0)
1776 yield dut.m_in.valid.eq(0)
1777 yield dut.m_in.addr.eq(0)
1778 yield dut.m_in.pte.eq(0)
1779 # wait 4 * clk_period
1780 yield
1781 yield
1782 yield
1783 yield
1784 # wait_until rising_edge(clk)
1785 yield
1786 # Cacheable read of address 4
1787 yield dut.d_in.load.eq(1)
1788 yield dut.d_in.nc.eq(0)
1789 yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
1790 yield dut.d_in.valid.eq(1)
1791 # wait-until rising_edge(clk)
1792 yield
1793 yield dut.d_in.valid.eq(0)
1794 yield
1795 while not (yield dut.d_out.valid):
1796 yield
1797 assert dut.d_out.data == 0x0000000100000000, \
1798 f"data @ {dut.d_in.addr}={dut.d_in.data} expected 0000000100000000"
1799
1800
1801 # Cacheable read of address 30
1802 yield dut.d_in.load.eq(1)
1803 yield dut.d_in.nc.eq(0)
1804 yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
1805 yield dut.d_in.valid.eq(1)
1806 yield
1807 yield dut.d_in.valid.eq(0)
1808 yield
1809 while not (yield dut.d_out.valid):
1810 yield
1811 assert dut.d_out.data == 0x0000000D0000000C, \
1812 f"data @{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C"
1813
1814 # Non-cacheable read of address 100
1815 yield dut.d_in.load.eq(1)
1816 yield dut.d_in.nc.eq(1)
1817 yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
1818 yield dut.d_in.valid.eq(1)
1819 yield
1820 yield dut.d_in.valid.eq(0)
1821 yield
1822 while not (yield dut.d_out.valid):
1823 yield
1824 assert dut.d_out.data == 0x0000004100000040, \
1825 f"data @ {dut.d_in.addr}={dut.d_out.data} expected 0000004100000040"
1826
1827 yield
1828 yield
1829 yield
1830 yield
1831
1832
1833 def test_dcache():
1834 dut = DCache()
1835 vl = rtlil.convert(dut, ports=[])
1836 with open("test_dcache.il", "w") as f:
1837 f.write(vl)
1838
1839 #run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1840
1841 if __name__ == '__main__':
1842 test_dcache()
1843