3 based on Anton Blanchard microwatt dcache.vhdl
7 from enum
import Enum
, unique
9 from nmigen
import Module
, Signal
, Elaboratable
, Cat
, Repl
, Array
, Const
10 from nmigen
.cli
import main
11 from nmutil
.iocontrol
import RecordObject
12 from nmigen
.utils
import log2_int
13 from nmigen
.cli
import rtlil
16 from soc
.experiment
.mem_types
import (LoadStore1ToDCacheType
,
17 DCacheToLoadStore1Type
,
21 from soc
.experiment
.wb_types
import (WB_ADDR_BITS
, WB_DATA_BITS
, WB_SEL_BITS
,
22 WBAddrType
, WBDataType
, WBSelType
,
23 WBMasterOut
, WBSlaveOut
,
24 WBMasterOutVector
, WBSlaveOutVector
,
25 WBIOMasterOut
, WBIOSlaveOut
)
27 from soc
.experiment
.cache_ram
import CacheRam
28 from soc
.experiment
.plru
import PLRU
31 # TODO: make these parameters of DCache at some point
32 LINE_SIZE
= 64 # Line size in bytes
33 NUM_LINES
= 32 # Number of lines in a set
34 NUM_WAYS
= 4 # Number of ways
35 TLB_SET_SIZE
= 64 # L1 DTLB entries per set
36 TLB_NUM_WAYS
= 2 # L1 DTLB number of sets
37 TLB_LG_PGSZ
= 12 # L1 DTLB log_2(page_size)
38 LOG_LENGTH
= 0 # Non-zero to enable log data collection
40 # BRAM organisation: We never access more than
41 # -- WB_DATA_BITS at a time so to save
42 # -- resources we make the array only that wide, and
43 # -- use consecutive indices for to make a cache "line"
45 # -- ROW_SIZE is the width in bytes of the BRAM
46 # -- (based on WB, so 64-bits)
47 ROW_SIZE
= WB_DATA_BITS
// 8;
49 # ROW_PER_LINE is the number of row (wishbone
50 # transactions) in a line
51 ROW_PER_LINE
= LINE_SIZE
// ROW_SIZE
53 # BRAM_ROWS is the number of rows in BRAM needed
54 # to represent the full dcache
55 BRAM_ROWS
= NUM_LINES
* ROW_PER_LINE
58 # Bit fields counts in the address
60 # REAL_ADDR_BITS is the number of real address
64 # ROW_BITS is the number of bits to select a row
65 ROW_BITS
= log2_int(BRAM_ROWS
)
67 # ROW_LINE_BITS is the number of bits to select
69 ROW_LINE_BITS
= log2_int(ROW_PER_LINE
)
71 # LINE_OFF_BITS is the number of bits for
72 # the offset in a cache line
73 LINE_OFF_BITS
= log2_int(LINE_SIZE
)
75 # ROW_OFF_BITS is the number of bits for
77 ROW_OFF_BITS
= log2_int(ROW_SIZE
)
79 # INDEX_BITS is the number if bits to
81 INDEX_BITS
= log2_int(NUM_LINES
)
83 # SET_SIZE_BITS is the log base 2 of the set size
84 SET_SIZE_BITS
= LINE_OFF_BITS
+ INDEX_BITS
86 # TAG_BITS is the number of bits of
87 # the tag part of the address
88 TAG_BITS
= REAL_ADDR_BITS
- SET_SIZE_BITS
90 # TAG_WIDTH is the width in bits of each way of the tag RAM
91 TAG_WIDTH
= TAG_BITS
+ 7 - ((TAG_BITS
+ 7) % 8)
93 # WAY_BITS is the number of bits to select a way
94 WAY_BITS
= log2_int(NUM_WAYS
)
96 # Example of layout for 32 lines of 64 bytes:
98 # .. tag |index| line |
100 # .. | |---| | ROW_LINE_BITS (3)
101 # .. | |--- - --| LINE_OFF_BITS (6)
102 # .. | |- --| ROW_OFF_BITS (3)
103 # .. |----- ---| | ROW_BITS (8)
104 # .. |-----| | INDEX_BITS (5)
105 # .. --------| | TAG_BITS (45)
107 TAG_RAM_WIDTH
= TAG_WIDTH
* NUM_WAYS
110 return Array(Signal(TAG_RAM_WIDTH
) for x
in range(NUM_LINES
))
112 def CacheValidBitsArray():
113 return Array(Signal(INDEX_BITS
) for x
in range(NUM_LINES
))
115 def RowPerLineValidArray():
116 return Array(Signal() for x
in range(ROW_PER_LINE
))
119 TLB_SET_BITS
= log2_int(TLB_SET_SIZE
)
120 TLB_WAY_BITS
= log2_int(TLB_NUM_WAYS
)
121 TLB_EA_TAG_BITS
= 64 - (TLB_LG_PGSZ
+ TLB_SET_BITS
)
122 TLB_TAG_WAY_BITS
= TLB_NUM_WAYS
* TLB_EA_TAG_BITS
124 TLB_PTE_WAY_BITS
= TLB_NUM_WAYS
* TLB_PTE_BITS
;
126 assert (LINE_SIZE
% ROW_SIZE
) == 0, "LINE_SIZE not multiple of ROW_SIZE"
127 assert (LINE_SIZE
% 2) == 0, "LINE_SIZE not power of 2"
128 assert (NUM_LINES
% 2) == 0, "NUM_LINES not power of 2"
129 assert (ROW_PER_LINE
% 2) == 0, "ROW_PER_LINE not power of 2"
130 assert ROW_BITS
== (INDEX_BITS
+ ROW_LINE_BITS
), "geometry bits don't add up"
131 assert (LINE_OFF_BITS
== ROW_OFF_BITS
+ ROW_LINE_BITS
), \
132 "geometry bits don't add up"
133 assert REAL_ADDR_BITS
== (TAG_BITS
+ INDEX_BITS
+ LINE_OFF_BITS
), \
134 "geometry bits don't add up"
135 assert REAL_ADDR_BITS
== (TAG_BITS
+ ROW_BITS
+ ROW_OFF_BITS
), \
136 "geometry bits don't add up"
137 assert 64 == WB_DATA_BITS
, "Can't yet handle wb width that isn't 64-bits"
138 assert SET_SIZE_BITS
<= TLB_LG_PGSZ
, "Set indexed by virtual address"
141 def TLBValidBitsArray():
142 return Array(Signal(TLB_NUM_WAYS
) for x
in range(TLB_SET_SIZE
))
145 return Array(Signal(TLB_EA_TAG_BITS
) for x
in range (TLB_NUM_WAYS
))
148 return Array(Signal(TLB_TAG_WAY_BITS
) for x
in range (TLB_SET_SIZE
))
151 return Array(Signal(TLB_PTE_WAY_BITS
) for x
in range(TLB_SET_SIZE
))
154 return Array(Signal(NUM_WAYS
) for x
in range(TLB_NUM_WAYS
))
156 # Cache RAM interface
158 return Array(Signal(WB_DATA_BITS
) for x
in range(NUM_WAYS
))
160 # PLRU output interface
162 return Array(Signal(WAY_BITS
) for x
in range(NUM_LINES
))
164 # TLB PLRU output interface
166 return Array(Signal(TLB_WAY_BITS
) for x
in range(TLB_SET_SIZE
))
168 # Helper functions to decode incoming requests
170 # Return the cache line index (tag index) for an address
172 return addr
[LINE_OFF_BITS
:SET_SIZE_BITS
]
174 # Return the cache row index (data memory) for an address
176 return addr
[ROW_OFF_BITS
:SET_SIZE_BITS
]
178 # Return the index of a row within a line
179 def get_row_of_line(row
):
180 return row
[:ROW_LINE_BITS
]
182 # Returns whether this is the last row of a line
183 def is_last_row_addr(addr
, last
):
184 return addr
[ROW_OFF_BITS
:LINE_OFF_BITS
] == last
186 # Returns whether this is the last row of a line
187 def is_last_row(row
, last
):
188 return get_row_of_line(row
) == last
190 # Return the next row in the current cache line. We use a
191 # dedicated function in order to limit the size of the
192 # generated adder to be only the bits within a cache line
193 # (3 bits with default settings)
195 row_v
= row
[0:ROW_LINE_BITS
] + 1
196 return Cat(row_v
[:ROW_LINE_BITS
], row
[ROW_LINE_BITS
:])
198 # Get the tag value from the address
200 return addr
[SET_SIZE_BITS
:REAL_ADDR_BITS
]
202 # Read a tag from a tag memory row
203 def read_tag(way
, tagset
):
204 return tagset
.word_select(way
, TAG_WIDTH
)[:TAG_BITS
]
206 # Read a TLB tag from a TLB tag memory row
207 def read_tlb_tag(way
, tags
):
208 return tags
.word_select(way
, TLB_EA_TAG_BITS
)
210 # Write a TLB tag to a TLB tag memory row
211 def write_tlb_tag(way
, tags
, tag
):
212 return read_tlb_tag(way
, tags
).eq(tag
)
214 # Read a PTE from a TLB PTE memory row
215 def read_tlb_pte(way
, ptes
):
216 return ptes
.word_select(way
, TLB_PTE_BITS
)
218 def write_tlb_pte(way
, ptes
, newpte
):
219 return read_tlb_pte(way
, ptes
).eq(newpte
)
222 # Record for storing permission, attribute, etc. bits from a PTE
223 class PermAttr(RecordObject
):
226 self
.reference
= Signal()
227 self
.changed
= Signal()
228 self
.nocache
= Signal()
230 self
.rd_perm
= Signal()
231 self
.wr_perm
= Signal()
234 def extract_perm_attr(pte
):
236 pa
.reference
= pte
[8]
245 # Type of operation on a "valid" input
249 OP_BAD
= 1 # NC cache hit, TLB miss, prot/RC failure
250 OP_STCX_FAIL
= 2 # conditional store w/o reservation
251 OP_LOAD_HIT
= 3 # Cache hit on load
252 OP_LOAD_MISS
= 4 # Load missing cache
253 OP_LOAD_NC
= 5 # Non-cachable load
254 OP_STORE_HIT
= 6 # Store hitting cache
255 OP_STORE_MISS
= 7 # Store missing cache
258 # Cache state machine
261 IDLE
= 0 # Normal load hit processing
262 RELOAD_WAIT_ACK
= 1 # Cache reload wait ack
263 STORE_WAIT_ACK
= 2 # Store wait ack
264 NC_LOAD_WAIT_ACK
= 3 # Non-cachable load wait ack
269 # In order to make timing, we use the BRAMs with
270 # an output buffer, which means that the BRAM
271 # output is delayed by an extra cycle.
273 # Thus, the dcache has a 2-stage internal pipeline
274 # for cache hits with no stalls.
276 # All other operations are handled via stalling
277 # in the first stage.
279 # The second stage can thus complete a hit at the same
280 # time as the first stage emits a stall for a complex op.
282 # Stage 0 register, basically contains just the latched request
284 class RegStage0(RecordObject
):
287 self
.req
= LoadStore1ToDCacheType()
288 self
.tlbie
= Signal()
289 self
.doall
= Signal()
290 self
.tlbld
= Signal()
291 self
.mmu_req
= Signal() # indicates source of request
294 class MemAccessRequest(RecordObject
):
298 self
.valid
= Signal()
300 self
.real_addr
= Signal(REAL_ADDR_BITS
)
301 self
.data
= Signal(64)
302 self
.byte_sel
= Signal(8)
303 self
.hit_way
= Signal(WAY_BITS
)
304 self
.same_tag
= Signal()
305 self
.mmu_req
= Signal()
308 # First stage register, contains state for stage 1 of load hits
309 # and for the state machine used by all other operations
310 class RegStage1(RecordObject
):
313 # Info about the request
314 self
.full
= Signal() # have uncompleted request
315 self
.mmu_req
= Signal() # request is from MMU
316 self
.req
= MemAccessRequest()
319 self
.hit_way
= Signal(WAY_BITS
)
320 self
.hit_load_valid
= Signal()
321 self
.hit_index
= Signal(INDEX_BITS
)
322 self
.cache_hit
= Signal()
325 self
.tlb_hit
= Signal()
326 self
.tlb_hit_way
= Signal(TLB_NUM_WAYS
)
327 self
.tlb_hit_index
= Signal(TLB_WAY_BITS
)
329 # 2-stage data buffer for data forwarded from writes to reads
330 self
.forward_data1
= Signal(64)
331 self
.forward_data2
= Signal(64)
332 self
.forward_sel1
= Signal(8)
333 self
.forward_valid1
= Signal()
334 self
.forward_way1
= Signal(WAY_BITS
)
335 self
.forward_row1
= Signal(ROW_BITS
)
336 self
.use_forward1
= Signal()
337 self
.forward_sel
= Signal(8)
339 # Cache miss state (reload state machine)
340 self
.state
= Signal(State
)
342 self
.write_bram
= Signal()
343 self
.write_tag
= Signal()
344 self
.slow_valid
= Signal()
345 self
.wb
= WBMasterOut()
346 self
.reload_tag
= Signal(TAG_BITS
)
347 self
.store_way
= Signal(WAY_BITS
)
348 self
.store_row
= Signal(ROW_BITS
)
349 self
.store_index
= Signal(INDEX_BITS
)
350 self
.end_row_ix
= Signal(log2_int(ROW_LINE_BITS
, False))
351 self
.rows_valid
= RowPerLineValidArray()
352 self
.acks_pending
= Signal(3)
353 self
.inc_acks
= Signal()
354 self
.dec_acks
= Signal()
356 # Signals to complete (possibly with error)
357 self
.ls_valid
= Signal()
358 self
.ls_error
= Signal()
359 self
.mmu_done
= Signal()
360 self
.mmu_error
= Signal()
361 self
.cache_paradox
= Signal()
363 # Signal to complete a failed stcx.
364 self
.stcx_fail
= Signal()
367 # Reservation information
368 class Reservation(RecordObject
):
371 self
.valid
= Signal()
372 self
.addr
= Signal(64-LINE_OFF_BITS
)
375 class DTLBUpdate(Elaboratable
):
377 self
.tlbie
= Signal()
378 self
.tlbwe
= Signal()
379 self
.doall
= Signal()
380 self
.updated
= Signal()
381 self
.v_updated
= Signal()
382 self
.tlb_hit
= Signal()
383 self
.tlb_req_index
= Signal(TLB_SET_BITS
)
385 self
.tlb_hit_way
= Signal(TLB_WAY_BITS
)
386 self
.tlb_tag_way
= Signal(TLB_TAG_WAY_BITS
)
387 self
.tlb_pte_way
= Signal(TLB_PTE_WAY_BITS
)
388 self
.repl_way
= Signal(TLB_WAY_BITS
)
389 self
.eatag
= Signal(TLB_EA_TAG_BITS
)
390 self
.pte_data
= Signal(TLB_PTE_BITS
)
392 self
.dv
= Signal(TLB_PTE_WAY_BITS
)
394 self
.tb_out
= Signal(TLB_TAG_WAY_BITS
)
395 self
.pb_out
= Signal(TLB_NUM_WAYS
)
396 self
.db_out
= Signal(TLB_PTE_WAY_BITS
)
398 def elaborate(self
, platform
):
403 tagset
= Signal(TLB_TAG_WAY_BITS
)
404 pteset
= Signal(TLB_PTE_WAY_BITS
)
406 tb_out
, pb_out
, db_out
= self
.tb_out
, self
.pb_out
, self
.db_out
408 with m
.If(self
.tlbie
& self
.doall
):
409 pass # clear all back in parent
410 with m
.Elif(self
.tlbie
):
411 with m
.If(self
.tlb_hit
):
412 comb
+= db_out
.eq(self
.dv
)
413 comb
+= db_out
.bit_select(self
.tlb_hit_way
, 1).eq(1)
414 comb
+= self
.v_updated
.eq(1)
416 with m
.Elif(self
.tlbwe
):
418 comb
+= tagset
.eq(self
.tlb_tag_way
)
419 comb
+= write_tlb_tag(self
.repl_way
, tagset
, self
.eatag
)
420 comb
+= tb_out
.eq(tagset
)
422 comb
+= pteset
.eq(self
.tlb_pte_way
)
423 comb
+= write_tlb_pte(self
.repl_way
, pteset
, self
.pte_data
)
424 comb
+= pb_out
.eq(pteset
)
426 comb
+= db_out
.bit_select(self
.repl_way
, 1).eq(1)
428 comb
+= self
.updated
.eq(1)
429 comb
+= self
.v_updated
.eq(1)
433 def dcache_request(self
, m
, r0
, ra
, req_index
, req_row
, req_tag
,
434 r0_valid
, r1
, cache_valid_bits
, replace_way
,
435 use_forward1_next
, use_forward2_next
,
436 req_hit_way
, plru_victim
, rc_ok
, perm_attr
,
437 valid_ra
, perm_ok
, access_ok
, req_op
, req_go
,
439 tlb_hit
, tlb_hit_way
, tlb_valid_way
, cache_tag_set
,
440 cancel_store
, req_same_tag
, r0_stall
, early_req_row
):
441 """Cache request parsing and hit detection
444 class DCachePendingHit(Elaboratable
):
446 def __init__(self
, tlb_pte_way
, tlb_valid_way
, tlb_hit_way
,
447 cache_valid_idx
, cache_tag_set
,
452 self
.virt_mode
= Signal()
453 self
.is_hit
= Signal()
454 self
.tlb_hit
= Signal()
455 self
.hit_way
= Signal(WAY_BITS
)
456 self
.rel_match
= Signal()
457 self
.req_index
= Signal(INDEX_BITS
)
458 self
.reload_tag
= Signal(TAG_BITS
)
460 self
.tlb_hit_way
= tlb_hit_way
461 self
.tlb_pte_way
= tlb_pte_way
462 self
.tlb_valid_way
= tlb_valid_way
463 self
.cache_valid_idx
= cache_valid_idx
464 self
.cache_tag_set
= cache_tag_set
465 self
.req_addr
= req_addr
466 self
.hit_set
= hit_set
468 def elaborate(self
, platform
):
474 virt_mode
= self
.virt_mode
476 tlb_pte_way
= self
.tlb_pte_way
477 tlb_valid_way
= self
.tlb_valid_way
478 cache_valid_idx
= self
.cache_valid_idx
479 cache_tag_set
= self
.cache_tag_set
480 req_addr
= self
.req_addr
481 tlb_hit_way
= self
.tlb_hit_way
482 tlb_hit
= self
.tlb_hit
483 hit_set
= self
.hit_set
484 hit_way
= self
.hit_way
485 rel_match
= self
.rel_match
486 req_index
= self
.req_index
487 reload_tag
= self
.reload_tag
489 rel_matches
= Array(Signal() for i
in range(TLB_NUM_WAYS
))
490 hit_way_set
= HitWaySet()
492 # Test if pending request is a hit on any way
493 # In order to make timing in virtual mode,
494 # when we are using the TLB, we compare each
495 # way with each of the real addresses from each way of
496 # the TLB, and then decide later which match to use.
498 with m
.If(virt_mode
):
499 for j
in range(TLB_NUM_WAYS
):
500 s_tag
= Signal(TAG_BITS
, name
="s_tag%d" % j
)
502 s_pte
= Signal(TLB_PTE_BITS
)
503 s_ra
= Signal(REAL_ADDR_BITS
)
504 comb
+= s_pte
.eq(read_tlb_pte(j
, tlb_pte_way
))
505 comb
+= s_ra
.eq(Cat(req_addr
[0:TLB_LG_PGSZ
],
506 s_pte
[TLB_LG_PGSZ
:REAL_ADDR_BITS
]))
507 comb
+= s_tag
.eq(get_tag(s_ra
))
509 for i
in range(NUM_WAYS
):
510 is_tag_hit
= Signal()
511 comb
+= is_tag_hit
.eq(go
& cache_valid_idx
[i
] &
512 (read_tag(i
, cache_tag_set
) == s_tag
)
514 with m
.If(is_tag_hit
):
515 comb
+= hit_way_set
[j
].eq(i
)
517 comb
+= hit_set
[j
].eq(s_hit
)
518 with m
.If(s_tag
== reload_tag
):
519 comb
+= rel_matches
[j
].eq(1)
521 comb
+= is_hit
.eq(hit_set
[tlb_hit_way
])
522 comb
+= hit_way
.eq(hit_way_set
[tlb_hit_way
])
523 comb
+= rel_match
.eq(rel_matches
[tlb_hit_way
])
525 s_tag
= Signal(TAG_BITS
)
526 comb
+= s_tag
.eq(get_tag(req_addr
))
527 for i
in range(NUM_WAYS
):
528 is_tag_hit
= Signal()
529 comb
+= is_tag_hit
.eq(go
& cache_valid_idx
[i
] &
530 read_tag(i
, cache_tag_set
) == s_tag
)
531 with m
.If(is_tag_hit
):
532 comb
+= hit_way
.eq(i
)
534 with m
.If(s_tag
== reload_tag
):
535 comb
+= rel_match
.eq(1)
540 class DCache(Elaboratable
):
541 """Set associative dcache write-through
542 TODO (in no specific order):
543 * See list in icache.vhdl
544 * Complete load misses on the cycle when WB data comes instead of
545 at the end of line (this requires dealing with requests coming in
549 self
.d_in
= LoadStore1ToDCacheType()
550 self
.d_out
= DCacheToLoadStore1Type()
552 self
.m_in
= MMUToDCacheType()
553 self
.m_out
= DCacheToMMUType()
555 self
.stall_out
= Signal()
557 self
.wb_out
= WBMasterOut()
558 self
.wb_in
= WBSlaveOut()
560 self
.log_out
= Signal(20)
562 def stage_0(self
, m
, r0
, r1
, r0_full
):
563 """Latch the request in r0.req as long as we're not stalling
567 d_in
, d_out
, m_in
= self
.d_in
, self
.d_out
, self
.m_in
571 # TODO, this goes in unit tests and formal proofs
572 with m
.If(~
(d_in
.valid
& m_in
.valid
)):
573 #sync += Display("request collision loadstore vs MMU")
576 with m
.If(m_in
.valid
):
577 sync
+= r
.req
.valid
.eq(1)
578 sync
+= r
.req
.load
.eq(~
(m_in
.tlbie | m_in
.tlbld
))
579 sync
+= r
.req
.dcbz
.eq(0)
580 sync
+= r
.req
.nc
.eq(0)
581 sync
+= r
.req
.reserve
.eq(0)
582 sync
+= r
.req
.virt_mode
.eq(1)
583 sync
+= r
.req
.priv_mode
.eq(1)
584 sync
+= r
.req
.addr
.eq(m_in
.addr
)
585 sync
+= r
.req
.data
.eq(m_in
.pte
)
586 sync
+= r
.req
.byte_sel
.eq(~
0) # Const -1 sets all to 0b111....
587 sync
+= r
.tlbie
.eq(m_in
.tlbie
)
588 sync
+= r
.doall
.eq(m_in
.doall
)
589 sync
+= r
.tlbld
.eq(m_in
.tlbld
)
590 sync
+= r
.mmu_req
.eq(1)
592 sync
+= r
.req
.eq(d_in
)
593 sync
+= r
.tlbie
.eq(0)
594 sync
+= r
.doall
.eq(0)
595 sync
+= r
.tlbld
.eq(0)
596 sync
+= r
.mmu_req
.eq(0)
597 with m
.If(~
(r1
.full
& r0_full
)):
599 sync
+= r0_full
.eq(r
.req
.valid
)
601 def tlb_read(self
, m
, r0_stall
, tlb_valid_way
,
602 tlb_tag_way
, tlb_pte_way
, dtlb_valid_bits
,
603 dtlb_tags
, dtlb_ptes
):
605 Operates in the second cycle on the request latched in r0.req.
606 TLB updates write the entry at the end of the second cycle.
610 m_in
, d_in
= self
.m_in
, self
.d_in
612 index
= Signal(TLB_SET_BITS
)
613 addrbits
= Signal(TLB_SET_BITS
)
616 amax
= TLB_LG_PGSZ
+ TLB_SET_BITS
618 with m
.If(m_in
.valid
):
619 comb
+= addrbits
.eq(m_in
.addr
[amin
: amax
])
621 comb
+= addrbits
.eq(d_in
.addr
[amin
: amax
])
622 comb
+= index
.eq(addrbits
)
624 # If we have any op and the previous op isn't finished,
625 # then keep the same output for next cycle.
626 with m
.If(~r0_stall
):
627 sync
+= tlb_valid_way
.eq(dtlb_valid_bits
[index
])
628 sync
+= tlb_tag_way
.eq(dtlb_tags
[index
])
629 sync
+= tlb_pte_way
.eq(dtlb_ptes
[index
])
631 def maybe_tlb_plrus(self
, m
, r1
, tlb_plru_victim
):
632 """Generate TLB PLRUs
637 if TLB_NUM_WAYS
== 0:
639 for i
in range(TLB_SET_SIZE
):
641 tlb_plru
= PLRU(WAY_BITS
)
642 setattr(m
.submodules
, "maybe_plru_%d" % i
, tlb_plru
)
643 tlb_plru_acc_en
= Signal()
645 comb
+= tlb_plru_acc_en
.eq(r1
.tlb_hit
& (r1
.tlb_hit_index
== i
))
646 comb
+= tlb_plru
.acc_en
.eq(tlb_plru_acc_en
)
647 comb
+= tlb_plru
.acc
.eq(r1
.tlb_hit_way
)
648 comb
+= tlb_plru_victim
[i
].eq(tlb_plru
.lru_o
)
650 def tlb_search(self
, m
, tlb_req_index
, r0
, r0_valid
,
651 tlb_valid_way
, tlb_tag_way
, tlb_hit_way
,
652 tlb_pte_way
, pte
, tlb_hit
, valid_ra
, perm_attr
, ra
):
657 hitway
= Signal(TLB_WAY_BITS
)
659 eatag
= Signal(TLB_EA_TAG_BITS
)
661 TLB_LG_END
= TLB_LG_PGSZ
+ TLB_SET_BITS
662 comb
+= tlb_req_index
.eq(r0
.req
.addr
[TLB_LG_PGSZ
: TLB_LG_END
])
663 comb
+= eatag
.eq(r0
.req
.addr
[TLB_LG_END
: 64 ])
665 for i
in range(TLB_NUM_WAYS
):
666 is_tag_hit
= Signal()
667 comb
+= is_tag_hit
.eq(tlb_valid_way
[i
]
668 & read_tlb_tag(i
, tlb_tag_way
) == eatag
)
669 with m
.If(is_tag_hit
):
673 comb
+= tlb_hit
.eq(hit
& r0_valid
)
674 comb
+= tlb_hit_way
.eq(hitway
)
677 comb
+= pte
.eq(read_tlb_pte(hitway
, tlb_pte_way
))
680 comb
+= valid_ra
.eq(tlb_hit | ~r0
.req
.virt_mode
)
681 with m
.If(r0
.req
.virt_mode
):
682 comb
+= ra
.eq(Cat(Const(0, ROW_OFF_BITS
),
683 r0
.req
.addr
[ROW_OFF_BITS
:TLB_LG_PGSZ
],
684 pte
[TLB_LG_PGSZ
:REAL_ADDR_BITS
]))
685 comb
+= perm_attr
.eq(extract_perm_attr(pte
))
687 comb
+= ra
.eq(Cat(Const(0, ROW_OFF_BITS
),
688 r0
.req
.addr
[ROW_OFF_BITS
:REAL_ADDR_BITS
]))
690 comb
+= perm_attr
.reference
.eq(1)
691 comb
+= perm_attr
.changed
.eq(1)
692 comb
+= perm_attr
.priv
.eq(1)
693 comb
+= perm_attr
.nocache
.eq(0)
694 comb
+= perm_attr
.rd_perm
.eq(1)
695 comb
+= perm_attr
.wr_perm
.eq(1)
697 def tlb_update(self
, m
, r0_valid
, r0
, dtlb_valid_bits
, tlb_req_index
,
698 tlb_hit_way
, tlb_hit
, tlb_plru_victim
, tlb_tag_way
,
699 dtlb_tags
, tlb_pte_way
, dtlb_ptes
):
707 comb
+= tlbie
.eq(r0_valid
& r0
.tlbie
)
708 comb
+= tlbwe
.eq(r0_valid
& r0
.tlbld
)
710 m
.submodules
.tlb_update
= d
= DTLBUpdate()
711 with m
.If(tlbie
& r0
.doall
):
712 # clear all valid bits at once
713 for i
in range(TLB_SET_SIZE
):
714 sync
+= dtlb_valid_bits
[i
].eq(0)
715 with m
.If(d
.updated
):
716 sync
+= dtlb_tags
[tlb_req_index
].eq(d
.tb_out
)
717 sync
+= dtlb_ptes
[tlb_req_index
].eq(d
.pb_out
)
718 with m
.If(d
.v_updated
):
719 sync
+= dtlb_valid_bits
[tlb_req_index
].eq(d
.db_out
)
721 comb
+= d
.dv
.eq(dtlb_valid_bits
[tlb_req_index
])
723 comb
+= d
.tlbie
.eq(tlbie
)
724 comb
+= d
.tlbwe
.eq(tlbwe
)
725 comb
+= d
.doall
.eq(r0
.doall
)
726 comb
+= d
.tlb_hit
.eq(tlb_hit
)
727 comb
+= d
.tlb_hit_way
.eq(tlb_hit_way
)
728 comb
+= d
.tlb_tag_way
.eq(tlb_tag_way
)
729 comb
+= d
.tlb_pte_way
.eq(tlb_pte_way
)
730 comb
+= d
.tlb_req_index
.eq(tlb_req_index
)
733 comb
+= d
.repl_way
.eq(tlb_hit_way
)
735 comb
+= d
.repl_way
.eq(tlb_plru_victim
[tlb_req_index
])
736 comb
+= d
.eatag
.eq(r0
.req
.addr
[TLB_LG_PGSZ
+ TLB_SET_BITS
:64])
737 comb
+= d
.pte_data
.eq(r0
.req
.data
)
739 def maybe_plrus(self
, m
, r1
, plru_victim
):
745 if TLB_NUM_WAYS
== 0:
748 for i
in range(NUM_LINES
):
750 plru
= PLRU(WAY_BITS
)
751 setattr(m
.submodules
, "plru%d" % i
, plru
)
752 plru_acc_en
= Signal()
754 comb
+= plru_acc_en
.eq(r1
.cache_hit
& (r1
.hit_index
== i
))
755 comb
+= plru
.acc_en
.eq(plru_acc_en
)
756 comb
+= plru
.acc
.eq(r1
.hit_way
)
757 comb
+= plru_victim
[i
].eq(plru
.lru_o
)
759 def cache_tag_read(self
, m
, r0_stall
, req_index
, cache_tag_set
, cache_tags
):
760 """Cache tag RAM read port
764 m_in
, d_in
= self
.m_in
, self
.d_in
766 index
= Signal(INDEX_BITS
)
769 comb
+= index
.eq(req_index
)
770 with m
.Elif(m_in
.valid
):
771 comb
+= index
.eq(get_index(m_in
.addr
))
773 comb
+= index
.eq(get_index(d_in
.addr
))
774 sync
+= cache_tag_set
.eq(cache_tags
[index
])
776 def dcache_request(self
, m
, r0
, ra
, req_index
, req_row
, req_tag
,
777 r0_valid
, r1
, cache_valid_bits
, replace_way
,
778 use_forward1_next
, use_forward2_next
,
779 req_hit_way
, plru_victim
, rc_ok
, perm_attr
,
780 valid_ra
, perm_ok
, access_ok
, req_op
, req_go
,
782 tlb_hit
, tlb_hit_way
, tlb_valid_way
, cache_tag_set
,
783 cancel_store
, req_same_tag
, r0_stall
, early_req_row
):
784 """Cache request parsing and hit detection
789 m_in
, d_in
= self
.m_in
, self
.d_in
792 hit_way
= Signal(WAY_BITS
)
797 hit_set
= Array(Signal() for i
in range(TLB_NUM_WAYS
))
798 cache_valid_idx
= Signal(INDEX_BITS
)
800 # Extract line, row and tag from request
801 comb
+= req_index
.eq(get_index(r0
.req
.addr
))
802 comb
+= req_row
.eq(get_row(r0
.req
.addr
))
803 comb
+= req_tag
.eq(get_tag(ra
))
805 comb
+= go
.eq(r0_valid
& ~
(r0
.tlbie | r0
.tlbld
) & ~r1
.ls_error
)
806 comb
+= cache_valid_idx
.eq(cache_valid_bits
[req_index
])
808 m
.submodules
.dcache_pend
= dc
= DCachePendingHit(tlb_pte_way
,
809 tlb_valid_way
, tlb_hit_way
,
810 cache_valid_idx
, cache_tag_set
,
814 comb
+= dc
.tlb_hit
.eq(tlb_hit
)
815 comb
+= dc
.reload_tag
.eq(r1
.reload_tag
)
816 comb
+= dc
.virt_mode
.eq(r0
.req
.virt_mode
)
818 comb
+= dc
.req_index
.eq(req_index
)
819 comb
+= is_hit
.eq(dc
.is_hit
)
820 comb
+= hit_way
.eq(dc
.hit_way
)
821 comb
+= req_same_tag
.eq(dc
.rel_match
)
823 # See if the request matches the line currently being reloaded
824 with m
.If((r1
.state
== State
.RELOAD_WAIT_ACK
) &
825 (req_index
== r1
.store_index
) & req_same_tag
):
826 # For a store, consider this a hit even if the row isn't
827 # valid since it will be by the time we perform the store.
828 # For a load, check the appropriate row valid bit.
829 valid
= r1
.rows_valid
[req_row
% ROW_PER_LINE
]
830 comb
+= is_hit
.eq(~r0
.req
.load | valid
)
831 comb
+= hit_way
.eq(replace_way
)
833 # Whether to use forwarded data for a load or not
834 comb
+= use_forward1_next
.eq(0)
835 with m
.If((get_row(r1
.req
.real_addr
) == req_row
) &
836 (r1
.req
.hit_way
== hit_way
)):
837 # Only need to consider r1.write_bram here, since if we
838 # are writing refill data here, then we don't have a
839 # cache hit this cycle on the line being refilled.
840 # (There is the possibility that the load following the
841 # load miss that started the refill could be to the old
842 # contents of the victim line, since it is a couple of
843 # cycles after the refill starts before we see the updated
844 # cache tag. In that case we don't use the bypass.)
845 comb
+= use_forward1_next
.eq(r1
.write_bram
)
846 comb
+= use_forward2_next
.eq(0)
847 with m
.If((r1
.forward_row1
== req_row
) & (r1
.forward_way1
== hit_way
)):
848 comb
+= use_forward2_next
.eq(r1
.forward_valid1
)
850 # The way that matched on a hit
851 comb
+= req_hit_way
.eq(hit_way
)
853 # The way to replace on a miss
854 with m
.If(r1
.write_tag
):
855 comb
+= replace_way
.eq(plru_victim
[r1
.store_index
])
857 comb
+= replace_way
.eq(r1
.store_way
)
859 # work out whether we have permission for this access
860 # NB we don't yet implement AMR, thus no KUAP
861 comb
+= rc_ok
.eq(perm_attr
.reference
862 & (r0
.req
.load | perm_attr
.changed
)
864 comb
+= perm_ok
.eq((r0
.req
.priv_mode | ~perm_attr
.priv
)
866 |
(r0
.req
.load
& perm_attr
.rd_perm
)
868 comb
+= access_ok
.eq(valid_ra
& perm_ok
& rc_ok
)
869 # Combine the request and cache hit status to decide what
870 # operation needs to be done
871 comb
+= nc
.eq(r0
.req
.nc | perm_attr
.nocache
)
872 comb
+= op
.eq(Op
.OP_NONE
)
874 with m
.If(~access_ok
):
875 comb
+= op
.eq(Op
.OP_BAD
)
876 with m
.Elif(cancel_store
):
877 comb
+= op
.eq(Op
.OP_STCX_FAIL
)
879 comb
+= opsel
.eq(Cat(is_hit
, nc
, r0
.req
.load
))
880 with m
.Switch(opsel
):
882 comb
+= op
.eq(Op
.OP_LOAD_HIT
)
884 comb
+= op
.eq(Op
.OP_LOAD_MISS
)
886 comb
+= op
.eq(Op
.OP_LOAD_NC
)
888 comb
+= op
.eq(Op
.OP_STORE_HIT
)
890 comb
+= op
.eq(Op
.OP_STORE_MISS
)
892 comb
+= op
.eq(Op
.OP_STORE_MISS
)
894 comb
+= op
.eq(Op
.OP_BAD
)
896 comb
+= op
.eq(Op
.OP_BAD
)
898 comb
+= op
.eq(Op
.OP_NONE
)
899 comb
+= req_op
.eq(op
)
900 comb
+= req_go
.eq(go
)
902 # Version of the row number that is valid one cycle earlier
903 # in the cases where we need to read the cache data BRAM.
904 # If we're stalling then we need to keep reading the last
906 with m
.If(~r0_stall
):
907 with m
.If(m_in
.valid
):
908 comb
+= early_req_row
.eq(get_row(m_in
.addr
))
910 comb
+= early_req_row
.eq(get_row(d_in
.addr
))
912 comb
+= early_req_row
.eq(req_row
)
914 def reservation_comb(self
, m
, cancel_store
, set_rsrv
, clear_rsrv
,
915 r0_valid
, r0
, reservation
):
916 """Handle load-with-reservation and store-conditional instructions
921 with m
.If(r0_valid
& r0
.req
.reserve
):
923 # XXX generate alignment interrupt if address
924 # is not aligned XXX or if r0.req.nc = '1'
925 with m
.If(r0
.req
.load
):
926 comb
+= set_rsrv
.eq(1) # load with reservation
928 comb
+= clear_rsrv
.eq(1) # store conditional
929 with m
.If(~reservation
.valid | r0
.req
.addr
[LINE_OFF_BITS
:64]):
930 comb
+= cancel_store
.eq(1)
932 def reservation_reg(self
, m
, r0_valid
, access_ok
, set_rsrv
, clear_rsrv
,
938 with m
.If(r0_valid
& access_ok
):
939 with m
.If(clear_rsrv
):
940 sync
+= reservation
.valid
.eq(0)
941 with m
.Elif(set_rsrv
):
942 sync
+= reservation
.valid
.eq(1)
943 sync
+= reservation
.addr
.eq(r0
.req
.addr
[LINE_OFF_BITS
:64])
945 def writeback_control(self
, m
, r1
, cache_out
):
946 """Return data for loads & completion control logic
950 d_out
, m_out
= self
.d_out
, self
.m_out
952 data_out
= Signal(64)
953 data_fwd
= Signal(64)
955 # Use the bypass if are reading the row that was
956 # written 1 or 2 cycles ago, including for the
957 # slow_valid = 1 case (i.e. completing a load
958 # miss or a non-cacheable load).
959 with m
.If(r1
.use_forward1
):
960 comb
+= data_fwd
.eq(r1
.forward_data1
)
962 comb
+= data_fwd
.eq(r1
.forward_data2
)
964 comb
+= data_out
.eq(cache_out
[r1
.hit_way
])
967 with m
.If(r1
.forward_sel
[i
]):
968 dsel
= data_fwd
.word_select(i
, 8)
969 comb
+= data_out
.word_select(i
, 8).eq(dsel
)
971 comb
+= d_out
.valid
.eq(r1
.ls_valid
)
972 comb
+= d_out
.data
.eq(data_out
)
973 comb
+= d_out
.store_done
.eq(~r1
.stcx_fail
)
974 comb
+= d_out
.error
.eq(r1
.ls_error
)
975 comb
+= d_out
.cache_paradox
.eq(r1
.cache_paradox
)
978 comb
+= m_out
.done
.eq(r1
.mmu_done
)
979 comb
+= m_out
.err
.eq(r1
.mmu_error
)
980 comb
+= m_out
.data
.eq(data_out
)
982 # We have a valid load or store hit or we just completed
983 # a slow op such as a load miss, a NC load or a store
985 # Note: the load hit is delayed by one cycle. However it
986 # can still not collide with r.slow_valid (well unless I
987 # miscalculated) because slow_valid can only be set on a
988 # subsequent request and not on its first cycle (the state
989 # machine must have advanced), which makes slow_valid
990 # at least 2 cycles from the previous hit_load_valid.
992 # Sanity: Only one of these must be set in any given cycle
994 if False: # TODO: need Display to get this to work
995 assert (r1
.slow_valid
& r1
.stcx_fail
) != 1, \
996 "unexpected slow_valid collision with stcx_fail"
998 assert ((r1
.slow_valid | r1
.stcx_fail
) | r1
.hit_load_valid
) != 1, \
999 "unexpected hit_load_delayed collision with slow_valid"
1001 with m
.If(~r1
.mmu_req
):
1002 # Request came from loadstore1...
1003 # Load hit case is the standard path
1004 with m
.If(r1
.hit_load_valid
):
1005 #Display(f"completing load hit data={data_out}")
1008 # error cases complete without stalling
1009 with m
.If(r1
.ls_error
):
1010 # Display("completing ld/st with error")
1013 # Slow ops (load miss, NC, stores)
1014 with m
.If(r1
.slow_valid
):
1015 #Display(f"completing store or load miss data={data_out}")
1019 # Request came from MMU
1020 with m
.If(r1
.hit_load_valid
):
1021 # Display(f"completing load hit to MMU, data={m_out.data}")
1023 # error cases complete without stalling
1024 with m
.If(r1
.mmu_error
):
1025 #Display("combpleting MMU ld with error")
1028 # Slow ops (i.e. load miss)
1029 with m
.If(r1
.slow_valid
):
1030 #Display("completing MMU load miss, data={m_out.data}")
1033 def rams(self
, m
, r1
, early_req_row
, cache_out
, replace_way
):
1035 Generate a cache RAM for each way. This handles the normal
1036 reads, writes from reloads and the special store-hit update
1039 Note: the BRAMs have an extra read buffer, meaning the output
1040 is pipelined an extra cycle. This differs from the
1041 icache. The writeback logic needs to take that into
1042 account by using 1-cycle delayed signals for load hits.
1047 for i
in range(NUM_WAYS
):
1049 rd_addr
= Signal(ROW_BITS
)
1051 wr_addr
= Signal(ROW_BITS
)
1052 wr_data
= Signal(WB_DATA_BITS
)
1053 wr_sel
= Signal(ROW_SIZE
)
1054 wr_sel_m
= Signal(ROW_SIZE
)
1055 _d_out
= Signal(WB_DATA_BITS
)
1057 way
= CacheRam(ROW_BITS
, WB_DATA_BITS
, True)
1058 setattr(m
.submodules
, "cacheram_%d" % i
, way
)
1060 comb
+= way
.rd_en
.eq(do_read
)
1061 comb
+= way
.rd_addr
.eq(rd_addr
)
1062 comb
+= _d_out
.eq(way
.rd_data_o
)
1063 comb
+= way
.wr_sel
.eq(wr_sel_m
)
1064 comb
+= way
.wr_addr
.eq(wr_addr
)
1065 comb
+= way
.wr_data
.eq(wr_data
)
1068 comb
+= do_read
.eq(1)
1069 comb
+= rd_addr
.eq(early_req_row
)
1070 comb
+= cache_out
[i
].eq(_d_out
)
1074 # Defaults to wishbone read responses (cache refill)
1076 # For timing, the mux on wr_data/sel/addr is not
1077 # dependent on anything other than the current state.
1079 with m
.If(r1
.write_bram
):
1080 # Write store data to BRAM. This happens one
1081 # cycle after the store is in r0.
1082 comb
+= wr_data
.eq(r1
.req
.data
)
1083 comb
+= wr_sel
.eq(r1
.req
.byte_sel
)
1084 comb
+= wr_addr
.eq(get_row(r1
.req
.real_addr
))
1086 with m
.If(i
== r1
.req
.hit_way
):
1087 comb
+= do_write
.eq(1)
1089 # Otherwise, we might be doing a reload or a DCBZ
1091 comb
+= wr_data
.eq(0)
1093 comb
+= wr_data
.eq(wb_in
.dat
)
1094 comb
+= wr_addr
.eq(r1
.store_row
)
1095 comb
+= wr_sel
.eq(~
0) # all 1s
1097 with m
.If((r1
.state
== State
.RELOAD_WAIT_ACK
)
1098 & wb_in
.ack
& (replace_way
== i
)):
1099 comb
+= do_write
.eq(1)
1101 # Mask write selects with do_write since BRAM
1102 # doesn't have a global write-enable
1103 with m
.If(do_write
):
1104 comb
+= wr_sel_m
.eq(wr_sel
)
1106 # Cache hit synchronous machine for the easy case.
1107 # This handles load hits.
1108 # It also handles error cases (TLB miss, cache paradox)
1109 def dcache_fast_hit(self
, m
, req_op
, r0_valid
, r0
, r1
,
1110 req_hit_way
, req_index
, access_ok
,
1111 tlb_hit
, tlb_hit_way
, tlb_req_index
):
1116 with m
.If(req_op
!= Op
.OP_NONE
):
1117 #Display(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1118 # f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1122 with m
.If(r0_valid
):
1123 sync
+= r1
.mmu_req
.eq(r0
.mmu_req
)
1125 # Fast path for load/store hits.
1126 # Set signals for the writeback controls.
1127 sync
+= r1
.hit_way
.eq(req_hit_way
)
1128 sync
+= r1
.hit_index
.eq(req_index
)
1130 with m
.If(req_op
== Op
.OP_LOAD_HIT
):
1131 sync
+= r1
.hit_load_valid
.eq(1)
1133 sync
+= r1
.hit_load_valid
.eq(0)
1135 with m
.If((req_op
== Op
.OP_LOAD_HIT
) |
(req_op
== Op
.OP_STORE_HIT
)):
1136 sync
+= r1
.cache_hit
.eq(1)
1138 sync
+= r1
.cache_hit
.eq(0)
1140 with m
.If(req_op
== Op
.OP_BAD
):
1141 # Display(f"Signalling ld/st error valid_ra={valid_ra}"
1142 # f"rc_ok={rc_ok} perm_ok={perm_ok}"
1143 sync
+= r1
.ls_error
.eq(~r0
.mmu_req
)
1144 sync
+= r1
.mmu_error
.eq(r0
.mmu_req
)
1145 sync
+= r1
.cache_paradox
.eq(access_ok
)
1148 sync
+= r1
.ls_error
.eq(0)
1149 sync
+= r1
.mmu_error
.eq(0)
1150 sync
+= r1
.cache_paradox
.eq(0)
1152 with m
.If(req_op
== Op
.OP_STCX_FAIL
):
1155 sync
+= r1
.stcx_fail
.eq(0)
1157 # Record TLB hit information for updating TLB PLRU
1158 sync
+= r1
.tlb_hit
.eq(tlb_hit
)
1159 sync
+= r1
.tlb_hit_way
.eq(tlb_hit_way
)
1160 sync
+= r1
.tlb_hit_index
.eq(tlb_req_index
)
1162 # Memory accesses are handled by this state machine:
1164 # * Cache load miss/reload (in conjunction with "rams")
1165 # * Load hits for non-cachable forms
1166 # * Stores (the collision case is handled in "rams")
1168 # All wishbone requests generation is done here.
1169 # This machine operates at stage 1.
1170 def dcache_slow(self
, m
, r1
, use_forward1_next
, use_forward2_next
,
1171 cache_valid_bits
, r0
, replace_way
,
1172 req_hit_way
, req_same_tag
,
1173 r0_valid
, req_op
, cache_tag
, req_go
, ra
):
1179 req
= MemAccessRequest()
1181 adjust_acks
= Signal(3)
1182 stbs_done
= Signal()
1184 sync
+= r1
.use_forward1
.eq(use_forward1_next
)
1185 sync
+= r1
.forward_sel
.eq(0)
1187 with m
.If(use_forward1_next
):
1188 sync
+= r1
.forward_sel
.eq(r1
.req
.byte_sel
)
1189 with m
.Elif(use_forward2_next
):
1190 sync
+= r1
.forward_sel
.eq(r1
.forward_sel1
)
1192 sync
+= r1
.forward_data2
.eq(r1
.forward_data1
)
1193 with m
.If(r1
.write_bram
):
1194 sync
+= r1
.forward_data1
.eq(r1
.req
.data
)
1195 sync
+= r1
.forward_sel1
.eq(r1
.req
.byte_sel
)
1196 sync
+= r1
.forward_way1
.eq(r1
.req
.hit_way
)
1197 sync
+= r1
.forward_row1
.eq(get_row(r1
.req
.real_addr
))
1198 sync
+= r1
.forward_valid1
.eq(1)
1201 sync
+= r1
.forward_data1
.eq(0)
1203 sync
+= r1
.forward_data1
.eq(wb_in
.dat
)
1204 sync
+= r1
.forward_sel1
.eq(~
0) # all 1s
1205 sync
+= r1
.forward_way1
.eq(replace_way
)
1206 sync
+= r1
.forward_row1
.eq(r1
.store_row
)
1207 sync
+= r1
.forward_valid1
.eq(0)
1209 # One cycle pulses reset
1210 sync
+= r1
.slow_valid
.eq(0)
1211 sync
+= r1
.write_bram
.eq(0)
1212 sync
+= r1
.inc_acks
.eq(0)
1213 sync
+= r1
.dec_acks
.eq(0)
1215 sync
+= r1
.ls_valid
.eq(0)
1216 # complete tlbies and TLB loads in the third cycle
1217 sync
+= r1
.mmu_done
.eq(r0_valid
& (r0
.tlbie | r0
.tlbld
))
1219 with m
.If((req_op
== Op
.OP_LOAD_HIT
)
1220 |
(req_op
== Op
.OP_STCX_FAIL
)):
1221 with m
.If(~r0
.mmu_req
):
1222 sync
+= r1
.ls_valid
.eq(1)
1224 sync
+= r1
.mmu_done
.eq(1)
1226 with m
.If(r1
.write_tag
):
1227 # Store new tag in selected way
1228 for i
in range(NUM_WAYS
):
1229 with m
.If(i
== replace_way
):
1230 ct
= Signal(TAG_RAM_WIDTH
)
1231 comb
+= ct
.eq(cache_tag
[r1
.store_index
])
1232 comb
+= ct
.word_select(i
, TAG_WIDTH
).eq(r1
.reload_tag
)
1233 sync
+= cache_tag
[r1
.store_index
].eq(ct
)
1234 sync
+= r1
.store_way
.eq(replace_way
)
1235 sync
+= r1
.write_tag
.eq(0)
1237 # Take request from r1.req if there is one there,
1238 # else from req_op, ra, etc.
1240 comb
+= req
.eq(r1
.req
)
1242 comb
+= req
.op
.eq(req_op
)
1243 comb
+= req
.valid
.eq(req_go
)
1244 comb
+= req
.mmu_req
.eq(r0
.mmu_req
)
1245 comb
+= req
.dcbz
.eq(r0
.req
.dcbz
)
1246 comb
+= req
.real_addr
.eq(ra
)
1248 with m
.If(~r0
.req
.dcbz
):
1249 comb
+= req
.data
.eq(r0
.req
.data
)
1251 comb
+= req
.data
.eq(0)
1253 # Select all bytes for dcbz
1254 # and for cacheable loads
1255 with m
.If(r0
.req
.dcbz |
(r0
.req
.load
& ~r0
.req
.nc
)):
1256 comb
+= req
.byte_sel
.eq(~
0) # all 1s
1258 comb
+= req
.byte_sel
.eq(r0
.req
.byte_sel
)
1259 comb
+= req
.hit_way
.eq(req_hit_way
)
1260 comb
+= req
.same_tag
.eq(req_same_tag
)
1262 # Store the incoming request from r0,
1263 # if it is a slow request
1264 # Note that r1.full = 1 implies req_op = OP_NONE
1265 with m
.If((req_op
== Op
.OP_LOAD_MISS
)
1266 |
(req_op
== Op
.OP_LOAD_NC
)
1267 |
(req_op
== Op
.OP_STORE_MISS
)
1268 |
(req_op
== Op
.OP_STORE_HIT
)):
1269 sync
+= r1
.req
.eq(req
)
1270 sync
+= r1
.full
.eq(1)
1272 # Main state machine
1273 with m
.Switch(r1
.state
):
1275 with m
.Case(State
.IDLE
):
1276 # XXX check 'left downto. probably means len(r1.wb.adr)
1277 # r1.wb.adr <= req.real_addr(
1278 # r1.wb.adr'left downto 0
1280 sync
+= r1
.wb
.adr
.eq(req
.real_addr
)
1281 sync
+= r1
.wb
.sel
.eq(req
.byte_sel
)
1282 sync
+= r1
.wb
.dat
.eq(req
.data
)
1283 sync
+= r1
.dcbz
.eq(req
.dcbz
)
1285 # Keep track of our index and way
1286 # for subsequent stores.
1287 sync
+= r1
.store_index
.eq(get_index(req
.real_addr
))
1288 sync
+= r1
.store_row
.eq(get_row(req
.real_addr
))
1289 sync
+= r1
.end_row_ix
.eq(
1290 get_row_of_line(get_row(req
.real_addr
))
1292 sync
+= r1
.reload_tag
.eq(get_tag(req
.real_addr
))
1293 sync
+= r1
.req
.same_tag
.eq(1)
1295 with m
.If(req
.op
== Op
.OP_STORE_HIT
):
1296 sync
+= r1
.store_way
.eq(req
.hit_way
)
1298 # Reset per-row valid bits,
1299 # ready for handling OP_LOAD_MISS
1300 for i
in range(ROW_PER_LINE
):
1301 sync
+= r1
.rows_valid
[i
].eq(0)
1303 with m
.Switch(req
.op
):
1304 with m
.Case(Op
.OP_LOAD_HIT
):
1305 # stay in IDLE state
1308 with m
.Case(Op
.OP_LOAD_MISS
):
1309 #Display(f"cache miss real addr:" \
1310 # f"{req_real_addr}" \
1311 # f" idx:{get_index(req_real_addr)}" \
1312 # f" tag:{get_tag(req.real_addr)}")
1315 # Start the wishbone cycle
1316 sync
+= r1
.wb
.we
.eq(0)
1317 sync
+= r1
.wb
.cyc
.eq(1)
1318 sync
+= r1
.wb
.stb
.eq(1)
1320 # Track that we had one request sent
1321 sync
+= r1
.state
.eq(State
.RELOAD_WAIT_ACK
)
1322 sync
+= r1
.write_tag
.eq(1)
1324 with m
.Case(Op
.OP_LOAD_NC
):
1325 sync
+= r1
.wb
.cyc
.eq(1)
1326 sync
+= r1
.wb
.stb
.eq(1)
1327 sync
+= r1
.wb
.we
.eq(0)
1328 sync
+= r1
.state
.eq(State
.NC_LOAD_WAIT_ACK
)
1330 with m
.Case(Op
.OP_STORE_HIT
, Op
.OP_STORE_MISS
):
1331 with m
.If(~req
.dcbz
):
1332 sync
+= r1
.state
.eq(State
.STORE_WAIT_ACK
)
1333 sync
+= r1
.acks_pending
.eq(1)
1334 sync
+= r1
.full
.eq(0)
1335 sync
+= r1
.slow_valid
.eq(1)
1337 with m
.If(~req
.mmu_req
):
1338 sync
+= r1
.ls_valid
.eq(1)
1340 sync
+= r1
.mmu_done
.eq(1)
1342 with m
.If(req
.op
== Op
.OP_STORE_HIT
):
1343 sync
+= r1
.write_bram
.eq(1)
1345 sync
+= r1
.state
.eq(State
.RELOAD_WAIT_ACK
)
1347 with m
.If(req
.op
== Op
.OP_STORE_MISS
):
1348 sync
+= r1
.write_tag
.eq(1)
1350 sync
+= r1
.wb
.we
.eq(1)
1351 sync
+= r1
.wb
.cyc
.eq(1)
1352 sync
+= r1
.wb
.stb
.eq(1)
1354 # OP_NONE and OP_BAD do nothing
1355 # OP_BAD & OP_STCX_FAIL were
1356 # handled above already
1357 with m
.Case(Op
.OP_NONE
):
1359 with m
.Case(Op
.OP_BAD
):
1361 with m
.Case(Op
.OP_STCX_FAIL
):
1364 with m
.Case(State
.RELOAD_WAIT_ACK
):
1365 # Requests are all sent if stb is 0
1366 comb
+= stbs_done
.eq(~r1
.wb
.stb
)
1368 with m
.If(~wb_in
.stall
& ~stbs_done
):
1369 # That was the last word?
1370 # We are done sending.
1371 # Clear stb and set stbs_done
1372 # so we can handle an eventual
1373 # last ack on the same cycle.
1374 with m
.If(is_last_row_addr(
1375 r1
.wb
.adr
, r1
.end_row_ix
)):
1376 sync
+= r1
.wb
.stb
.eq(0)
1377 comb
+= stbs_done
.eq(0)
1379 # Calculate the next row address in the current cache line
1380 rarange
= r1
.wb
.adr
[ROW_OFF_BITS
: LINE_OFF_BITS
]
1381 sync
+= rarange
.eq(rarange
+ 1)
1383 # Incoming acks processing
1384 sync
+= r1
.forward_valid1
.eq(wb_in
.ack
)
1385 with m
.If(wb_in
.ack
):
1386 # XXX needs an Array bit-accessor here
1387 sync
+= r1
.rows_valid
[r1
.store_row
% ROW_PER_LINE
].eq(1)
1389 # If this is the data we were looking for,
1390 # we can complete the request next cycle.
1391 # Compare the whole address in case the
1392 # request in r1.req is not the one that
1393 # started this refill.
1394 with m
.If(r1
.full
& r1
.req
.same_tag
&
1395 ((r1
.dcbz
& r1
.req
.dcbz
) |
1396 (~r1
.dcbz
& (r1
.req
.op
== Op
.OP_LOAD_MISS
))) &
1397 (r1
.store_row
== get_row(r1
.req
.real_addr
))):
1398 sync
+= r1
.full
.eq(0)
1399 sync
+= r1
.slow_valid
.eq(1)
1400 with m
.If(~r1
.mmu_req
):
1401 sync
+= r1
.ls_valid
.eq(1)
1403 sync
+= r1
.mmu_done
.eq(1)
1404 sync
+= r1
.forward_sel
.eq(~
0) # all 1s
1405 sync
+= r1
.use_forward1
.eq(1)
1407 # Check for completion
1408 with m
.If(stbs_done
& is_last_row(r1
.store_row
,
1410 # Complete wishbone cycle
1411 sync
+= r1
.wb
.cyc
.eq(0)
1413 # Cache line is now valid
1414 cv
= Signal(INDEX_BITS
)
1415 sync
+= cv
.eq(cache_valid_bits
[r1
.store_index
])
1416 sync
+= cv
.bit_select(r1
.store_way
, 1).eq(1)
1417 sync
+= r1
.state
.eq(State
.IDLE
)
1419 # Increment store row counter
1420 sync
+= r1
.store_row
.eq(next_row(r1
.store_row
))
1422 with m
.Case(State
.STORE_WAIT_ACK
):
1423 comb
+= stbs_done
.eq(~r1
.wb
.stb
)
1424 comb
+= acks
.eq(r1
.acks_pending
)
1426 with m
.If(r1
.inc_acks
!= r1
.dec_acks
):
1427 with m
.If(r1
.inc_acks
):
1428 comb
+= adjust_acks
.eq(acks
+ 1)
1430 comb
+= adjust_acks
.eq(acks
- 1)
1432 comb
+= adjust_acks
.eq(acks
)
1434 sync
+= r1
.acks_pending
.eq(adjust_acks
)
1436 # Clear stb when slave accepted request
1437 with m
.If(~wb_in
.stall
):
1438 # See if there is another store waiting
1439 # to be done which is in the same real page.
1440 with m
.If(req
.valid
):
1441 ra
= req
.real_addr
[0:SET_SIZE_BITS
]
1442 sync
+= r1
.wb
.adr
[0:SET_SIZE_BITS
].eq(ra
)
1443 sync
+= r1
.wb
.dat
.eq(req
.data
)
1444 sync
+= r1
.wb
.sel
.eq(req
.byte_sel
)
1446 with m
.Elif((adjust_acks
< 7) & req
.same_tag
&
1447 ((req
.op
== Op
.OP_STORE_MISS
)
1448 |
(req
.op
== Op
.OP_STORE_HIT
))):
1449 sync
+= r1
.wb
.stb
.eq(1)
1450 comb
+= stbs_done
.eq(0)
1452 with m
.If(req
.op
== Op
.OP_STORE_HIT
):
1453 sync
+= r1
.write_bram
.eq(1)
1454 sync
+= r1
.full
.eq(0)
1455 sync
+= r1
.slow_valid
.eq(1)
1457 # Store requests never come from the MMU
1458 sync
+= r1
.ls_valid
.eq(1)
1459 comb
+= stbs_done
.eq(0)
1460 sync
+= r1
.inc_acks
.eq(1)
1462 sync
+= r1
.wb
.stb
.eq(0)
1463 comb
+= stbs_done
.eq(1)
1465 # Got ack ? See if complete.
1466 with m
.If(wb_in
.ack
):
1467 with m
.If(stbs_done
& (adjust_acks
== 1)):
1468 sync
+= r1
.state
.eq(State
.IDLE
)
1469 sync
+= r1
.wb
.cyc
.eq(0)
1470 sync
+= r1
.wb
.stb
.eq(0)
1471 sync
+= r1
.dec_acks
.eq(1)
1473 with m
.Case(State
.NC_LOAD_WAIT_ACK
):
1474 # Clear stb when slave accepted request
1475 with m
.If(~wb_in
.stall
):
1476 sync
+= r1
.wb
.stb
.eq(0)
1478 # Got ack ? complete.
1479 with m
.If(wb_in
.ack
):
1480 sync
+= r1
.state
.eq(State
.IDLE
)
1481 sync
+= r1
.full
.eq(0)
1482 sync
+= r1
.slow_valid
.eq(1)
1484 with m
.If(~r1
.mmu_req
):
1485 sync
+= r1
.ls_valid
.eq(1)
1487 sync
+= r1
.mmu_done
.eq(1)
1489 sync
+= r1
.forward_sel
.eq(~
0) # all 1s
1490 sync
+= r1
.use_forward1
.eq(1)
1491 sync
+= r1
.wb
.cyc
.eq(0)
1492 sync
+= r1
.wb
.stb
.eq(0)
1494 def dcache_log(self
, m
, r1
, valid_ra
, tlb_hit_way
, stall_out
):
1497 d_out
, wb_in
, log_out
= self
.d_out
, self
.wb_in
, self
.log_out
1499 sync
+= log_out
.eq(Cat(r1
.state
[:3], valid_ra
, tlb_hit_way
[:3],
1500 stall_out
, req_op
[:3], d_out
.valid
, d_out
.error
,
1501 r1
.wb
.cyc
, r1
.wb
.stb
, wb_in
.ack
, wb_in
.stall
,
1504 def elaborate(self
, platform
):
1509 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1510 cache_tags
= CacheTagArray()
1511 cache_tag_set
= Signal(TAG_RAM_WIDTH
)
1512 cache_valid_bits
= CacheValidBitsArray()
1514 # TODO attribute ram_style : string;
1515 # TODO attribute ram_style of cache_tags : signal is "distributed";
1517 """note: these are passed to nmigen.hdl.Memory as "attributes".
1518 don't know how, just that they are.
1520 dtlb_valid_bits
= TLBValidBitsArray()
1521 dtlb_tags
= TLBTagsArray()
1522 dtlb_ptes
= TLBPtesArray()
1523 # TODO attribute ram_style of
1524 # dtlb_tags : signal is "distributed";
1525 # TODO attribute ram_style of
1526 # dtlb_ptes : signal is "distributed";
1533 reservation
= Reservation()
1535 # Async signals on incoming request
1536 req_index
= Signal(INDEX_BITS
)
1537 req_row
= Signal(ROW_BITS
)
1538 req_hit_way
= Signal(WAY_BITS
)
1539 req_tag
= Signal(TAG_BITS
)
1541 req_data
= Signal(64)
1542 req_same_tag
= Signal()
1545 early_req_row
= Signal(ROW_BITS
)
1547 cancel_store
= Signal()
1549 clear_rsrv
= Signal()
1554 use_forward1_next
= Signal()
1555 use_forward2_next
= Signal()
1557 cache_out
= CacheRamOut()
1559 plru_victim
= PLRUOut()
1560 replace_way
= Signal(WAY_BITS
)
1562 # Wishbone read/write/cache write formatting signals
1566 tlb_tag_way
= Signal(TLB_TAG_WAY_BITS
)
1567 tlb_pte_way
= Signal(TLB_PTE_WAY_BITS
)
1568 tlb_valid_way
= Signal(TLB_NUM_WAYS
)
1569 tlb_req_index
= Signal(TLB_SET_BITS
)
1571 tlb_hit_way
= Signal(TLB_WAY_BITS
)
1572 pte
= Signal(TLB_PTE_BITS
)
1573 ra
= Signal(REAL_ADDR_BITS
)
1575 perm_attr
= PermAttr()
1578 access_ok
= Signal()
1580 tlb_plru_victim
= TLBPLRUOut()
1582 # we don't yet handle collisions between loadstore1 requests
1584 comb
+= self
.m_out
.stall
.eq(0)
1586 # Hold off the request in r0 when r1 has an uncompleted request
1587 comb
+= r0_stall
.eq(r0_full
& r1
.full
)
1588 comb
+= r0_valid
.eq(r0_full
& ~r1
.full
)
1589 comb
+= self
.stall_out
.eq(r0_stall
)
1591 # Wire up wishbone request latch out of stage 1
1592 comb
+= self
.wb_out
.eq(r1
.wb
)
1594 # call sub-functions putting everything together, using shared
1595 # signals established above
1596 self
.stage_0(m
, r0
, r1
, r0_full
)
1597 self
.tlb_read(m
, r0_stall
, tlb_valid_way
,
1598 tlb_tag_way
, tlb_pte_way
, dtlb_valid_bits
,
1599 dtlb_tags
, dtlb_ptes
)
1600 self
.tlb_search(m
, tlb_req_index
, r0
, r0_valid
,
1601 tlb_valid_way
, tlb_tag_way
, tlb_hit_way
,
1602 tlb_pte_way
, pte
, tlb_hit
, valid_ra
, perm_attr
, ra
)
1603 self
.tlb_update(m
, r0_valid
, r0
, dtlb_valid_bits
, tlb_req_index
,
1604 tlb_hit_way
, tlb_hit
, tlb_plru_victim
, tlb_tag_way
,
1605 dtlb_tags
, tlb_pte_way
, dtlb_ptes
)
1606 self
.maybe_plrus(m
, r1
, plru_victim
)
1607 self
.maybe_tlb_plrus(m
, r1
, tlb_plru_victim
)
1608 self
.cache_tag_read(m
, r0_stall
, req_index
, cache_tag_set
, cache_tags
)
1609 self
.dcache_request(m
, r0
, ra
, req_index
, req_row
, req_tag
,
1610 r0_valid
, r1
, cache_valid_bits
, replace_way
,
1611 use_forward1_next
, use_forward2_next
,
1612 req_hit_way
, plru_victim
, rc_ok
, perm_attr
,
1613 valid_ra
, perm_ok
, access_ok
, req_op
, req_go
,
1615 tlb_hit
, tlb_hit_way
, tlb_valid_way
, cache_tag_set
,
1616 cancel_store
, req_same_tag
, r0_stall
, early_req_row
)
1617 self
.reservation_comb(m
, cancel_store
, set_rsrv
, clear_rsrv
,
1618 r0_valid
, r0
, reservation
)
1619 self
.reservation_reg(m
, r0_valid
, access_ok
, set_rsrv
, clear_rsrv
,
1621 self
.writeback_control(m
, r1
, cache_out
)
1622 self
.rams(m
, r1
, early_req_row
, cache_out
, replace_way
)
1623 self
.dcache_fast_hit(m
, req_op
, r0_valid
, r0
, r1
,
1624 req_hit_way
, req_index
, access_ok
,
1625 tlb_hit
, tlb_hit_way
, tlb_req_index
)
1626 self
.dcache_slow(m
, r1
, use_forward1_next
, use_forward2_next
,
1627 cache_valid_bits
, r0
, replace_way
,
1628 req_hit_way
, req_same_tag
,
1629 r0_valid
, req_op
, cache_tags
, req_go
, ra
)
1630 #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1637 # entity dcache_tb is
1640 # architecture behave of dcache_tb is
1641 # signal clk : std_ulogic;
1642 # signal rst : std_ulogic;
1644 # signal d_in : Loadstore1ToDcacheType;
1645 # signal d_out : DcacheToLoadstore1Type;
1647 # signal m_in : MmuToDcacheType;
1648 # signal m_out : DcacheToMmuType;
1650 # signal wb_bram_in : wishbone_master_out;
1651 # signal wb_bram_out : wishbone_slave_out;
1653 # constant clk_period : time := 10 ns;
1655 # dcache0: entity work.dcache
1668 # wishbone_out => wb_bram_in,
1669 # wishbone_in => wb_bram_out
1672 # -- BRAM Memory slave
1673 # bram0: entity work.wishbone_bram_wrapper
1675 # MEMORY_SIZE => 1024,
1676 # RAM_INIT_FILE => "icache_test.bin"
1681 # wishbone_in => wb_bram_in,
1682 # wishbone_out => wb_bram_out
1685 # clk_process: process
1688 # wait for clk_period/2;
1690 # wait for clk_period/2;
1693 # rst_process: process
1696 # wait for 2*clk_period;
1704 # d_in.valid <= '0';
1707 # d_in.addr <= (others => '0');
1708 # d_in.data <= (others => '0');
1709 # m_in.valid <= '0';
1710 # m_in.addr <= (others => '0');
1711 # m_in.pte <= (others => '0');
1713 # wait for 4*clk_period;
1714 # wait until rising_edge(clk);
1716 # -- Cacheable read of address 4
1719 # d_in.addr <= x"0000000000000004";
1720 # d_in.valid <= '1';
1721 # wait until rising_edge(clk);
1722 # d_in.valid <= '0';
1724 # wait until rising_edge(clk) and d_out.valid = '1';
1725 # assert d_out.data = x"0000000100000000"
1726 # report "data @" & to_hstring(d_in.addr) &
1727 # "=" & to_hstring(d_out.data) &
1728 # " expected 0000000100000000"
1730 # -- wait for clk_period;
1732 # -- Cacheable read of address 30
1735 # d_in.addr <= x"0000000000000030";
1736 # d_in.valid <= '1';
1737 # wait until rising_edge(clk);
1738 # d_in.valid <= '0';
1740 # wait until rising_edge(clk) and d_out.valid = '1';
1741 # assert d_out.data = x"0000000D0000000C"
1742 # report "data @" & to_hstring(d_in.addr) &
1743 # "=" & to_hstring(d_out.data) &
1744 # " expected 0000000D0000000C"
1747 # -- Non-cacheable read of address 100
1750 # d_in.addr <= x"0000000000000100";
1751 # d_in.valid <= '1';
1752 # wait until rising_edge(clk);
1753 # d_in.valid <= '0';
1754 # wait until rising_edge(clk) and d_out.valid = '1';
1755 # assert d_out.data = x"0000004100000040"
1756 # report "data @" & to_hstring(d_in.addr) &
1757 # "=" & to_hstring(d_out.data) &
1758 # " expected 0000004100000040"
1761 # wait until rising_edge(clk);
1762 # wait until rising_edge(clk);
1763 # wait until rising_edge(clk);
1764 # wait until rising_edge(clk);
1769 def dcache_sim(dut
):
1771 yield dut
.d_in
.valid
.eq(0)
1772 yield dut
.d_in
.load
.eq(0)
1773 yield dut
.d_in
.nc
.eq(0)
1774 yield dut
.d_in
.adrr
.eq(0)
1775 yield dut
.d_in
.data
.eq(0)
1776 yield dut
.m_in
.valid
.eq(0)
1777 yield dut
.m_in
.addr
.eq(0)
1778 yield dut
.m_in
.pte
.eq(0)
1779 # wait 4 * clk_period
1784 # wait_until rising_edge(clk)
1786 # Cacheable read of address 4
1787 yield dut
.d_in
.load
.eq(1)
1788 yield dut
.d_in
.nc
.eq(0)
1789 yield dut
.d_in
.addr
.eq(Const(0x0000000000000004, 64))
1790 yield dut
.d_in
.valid
.eq(1)
1791 # wait-until rising_edge(clk)
1793 yield dut
.d_in
.valid
.eq(0)
1795 while not (yield dut
.d_out
.valid
):
1797 assert dut
.d_out
.data
== 0x0000000100000000, \
1798 f
"data @ {dut.d_in.addr}={dut.d_in.data} expected 0000000100000000"
1801 # Cacheable read of address 30
1802 yield dut
.d_in
.load
.eq(1)
1803 yield dut
.d_in
.nc
.eq(0)
1804 yield dut
.d_in
.addr
.eq(Const(0x0000000000000030, 64))
1805 yield dut
.d_in
.valid
.eq(1)
1807 yield dut
.d_in
.valid
.eq(0)
1809 while not (yield dut
.d_out
.valid
):
1811 assert dut
.d_out
.data
== 0x0000000D0000000C, \
1812 f
"data @{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C"
1814 # Non-cacheable read of address 100
1815 yield dut
.d_in
.load
.eq(1)
1816 yield dut
.d_in
.nc
.eq(1)
1817 yield dut
.d_in
.addr
.eq(Const(0x0000000000000100, 64))
1818 yield dut
.d_in
.valid
.eq(1)
1820 yield dut
.d_in
.valid
.eq(0)
1822 while not (yield dut
.d_out
.valid
):
1824 assert dut
.d_out
.data
== 0x0000004100000040, \
1825 f
"data @ {dut.d_in.addr}={dut.d_out.data} expected 0000004100000040"
1835 vl
= rtlil
.convert(dut
, ports
=[])
1836 with
open("test_dcache.il", "w") as f
:
1839 #run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
1841 if __name__
== '__main__':