use Record in DCache for TLB
[soc.git] / src / soc / experiment / dcache.py
1 """DCache
2
3 based on Anton Blanchard microwatt dcache.vhdl
4
5 note that the microwatt dcache wishbone interface expects "stall".
6 for simplicity at the moment this is hard-coded to cyc & ~ack.
7 see WB4 spec, p84, section 5.2.1
8
9 IMPORTANT: for store, the data is sampled the cycle AFTER the "valid"
10 is raised. sigh
11
12 Links:
13
14 * https://libre-soc.org/3d_gpu/architecture/set_associative_cache.jpg
15 * https://bugs.libre-soc.org/show_bug.cgi?id=469
16
17 """
18
19 import sys
20
21 from nmutil.gtkw import write_gtkw
22
23 sys.setrecursionlimit(1000000)
24
25 from enum import Enum, unique
26
27 from nmigen import (Module, Signal, Elaboratable, Cat, Repl, Array, Const,
28 Record)
29 from nmutil.util import Display
30
31 from copy import deepcopy
32 from random import randint, seed
33
34 from nmigen_soc.wishbone.bus import Interface
35
36 from nmigen.cli import main
37 from nmutil.iocontrol import RecordObject
38 from nmigen.utils import log2_int
39 from soc.experiment.mem_types import (LoadStore1ToDCacheType,
40 DCacheToLoadStore1Type,
41 MMUToDCacheType,
42 DCacheToMMUType)
43
44 from soc.experiment.wb_types import (WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
45 WBAddrType, WBDataType, WBSelType,
46 WBMasterOut, WBSlaveOut,
47 WBMasterOutVector, WBSlaveOutVector,
48 WBIOMasterOut, WBIOSlaveOut)
49
50 from soc.experiment.cache_ram import CacheRam
51 #from soc.experiment.plru import PLRU
52 from nmutil.plru import PLRU
53
54 # for test
55 from soc.bus.sram import SRAM
56 from nmigen import Memory
57 from nmigen.cli import rtlil
58
59 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
60 # Also, check out the cxxsim nmigen branch, and latest yosys from git
61 from nmutil.sim_tmp_alternative import Simulator
62
63 from nmutil.util import wrap
64
65
66 # TODO: make these parameters of DCache at some point
67 LINE_SIZE = 64 # Line size in bytes
68 NUM_LINES = 16 # Number of lines in a set
69 NUM_WAYS = 4 # Number of ways
70 TLB_SET_SIZE = 64 # L1 DTLB entries per set
71 TLB_NUM_WAYS = 2 # L1 DTLB number of sets
72 TLB_LG_PGSZ = 12 # L1 DTLB log_2(page_size)
73 LOG_LENGTH = 0 # Non-zero to enable log data collection
74
75 # BRAM organisation: We never access more than
76 # -- WB_DATA_BITS at a time so to save
77 # -- resources we make the array only that wide, and
78 # -- use consecutive indices to make a cache "line"
79 # --
80 # -- ROW_SIZE is the width in bytes of the BRAM
81 # -- (based on WB, so 64-bits)
82 ROW_SIZE = WB_DATA_BITS // 8;
83
84 # ROW_PER_LINE is the number of row (wishbone
85 # transactions) in a line
86 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
87
88 # BRAM_ROWS is the number of rows in BRAM needed
89 # to represent the full dcache
90 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
91
92 print ("ROW_SIZE", ROW_SIZE)
93 print ("ROW_PER_LINE", ROW_PER_LINE)
94 print ("BRAM_ROWS", BRAM_ROWS)
95 print ("NUM_WAYS", NUM_WAYS)
96
97 # Bit fields counts in the address
98
99 # REAL_ADDR_BITS is the number of real address
100 # bits that we store
101 REAL_ADDR_BITS = 56
102
103 # ROW_BITS is the number of bits to select a row
104 ROW_BITS = log2_int(BRAM_ROWS)
105
106 # ROW_LINE_BITS is the number of bits to select
107 # a row within a line
108 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
109
110 # LINE_OFF_BITS is the number of bits for
111 # the offset in a cache line
112 LINE_OFF_BITS = log2_int(LINE_SIZE)
113
114 # ROW_OFF_BITS is the number of bits for
115 # the offset in a row
116 ROW_OFF_BITS = log2_int(ROW_SIZE)
117
118 # INDEX_BITS is the number if bits to
119 # select a cache line
120 INDEX_BITS = log2_int(NUM_LINES)
121
122 # SET_SIZE_BITS is the log base 2 of the set size
123 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
124
125 # TAG_BITS is the number of bits of
126 # the tag part of the address
127 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
128
129 # TAG_WIDTH is the width in bits of each way of the tag RAM
130 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
131
132 # WAY_BITS is the number of bits to select a way
133 WAY_BITS = log2_int(NUM_WAYS)
134
135 # Example of layout for 32 lines of 64 bytes:
136 layout = """\
137 .. tag |index| line |
138 .. | row | |
139 .. | |---| | ROW_LINE_BITS (3)
140 .. | |--- - --| LINE_OFF_BITS (6)
141 .. | |- --| ROW_OFF_BITS (3)
142 .. |----- ---| | ROW_BITS (8)
143 .. |-----| | INDEX_BITS (5)
144 .. --------| | TAG_BITS (45)
145 """
146 print (layout)
147 print ("Dcache TAG %d IDX %d ROW_BITS %d ROFF %d LOFF %d RLB %d" % \
148 (TAG_BITS, INDEX_BITS, ROW_BITS,
149 ROW_OFF_BITS, LINE_OFF_BITS, ROW_LINE_BITS))
150 print ("index @: %d-%d" % (LINE_OFF_BITS, SET_SIZE_BITS))
151 print ("row @: %d-%d" % (LINE_OFF_BITS, ROW_OFF_BITS))
152 print ("tag @: %d-%d width %d" % (SET_SIZE_BITS, REAL_ADDR_BITS, TAG_WIDTH))
153
154 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
155
156 print ("TAG_RAM_WIDTH", TAG_RAM_WIDTH)
157
158 def CacheTagArray():
159 tag_layout = [('valid', 1),
160 ('tag', TAG_RAM_WIDTH),
161 ]
162 return Array(Record(tag_layout, name="tag%d" % x) for x in range(NUM_LINES))
163
164 def RowPerLineValidArray():
165 return Array(Signal(name="rows_valid%d" % x) \
166 for x in range(ROW_PER_LINE))
167
168 # L1 TLB
169 TLB_SET_BITS = log2_int(TLB_SET_SIZE)
170 TLB_WAY_BITS = log2_int(TLB_NUM_WAYS)
171 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
172 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
173 TLB_PTE_BITS = 64
174 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
175
176 def ispow2(x):
177 return (1<<log2_int(x, False)) == x
178
179 assert (LINE_SIZE % ROW_SIZE) == 0, "LINE_SIZE not multiple of ROW_SIZE"
180 assert ispow2(LINE_SIZE), "LINE_SIZE not power of 2"
181 assert ispow2(NUM_LINES), "NUM_LINES not power of 2"
182 assert ispow2(ROW_PER_LINE), "ROW_PER_LINE not power of 2"
183 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS), "geometry bits don't add up"
184 assert (LINE_OFF_BITS == ROW_OFF_BITS + ROW_LINE_BITS), \
185 "geometry bits don't add up"
186 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS + LINE_OFF_BITS), \
187 "geometry bits don't add up"
188 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS), \
189 "geometry bits don't add up"
190 assert 64 == WB_DATA_BITS, "Can't yet handle wb width that isn't 64-bits"
191 assert SET_SIZE_BITS <= TLB_LG_PGSZ, "Set indexed by virtual address"
192
193
194 def TLBTagEAArray():
195 return Array(Signal(TLB_EA_TAG_BITS, name="tlbtagea%d" % x) \
196 for x in range (TLB_NUM_WAYS))
197
198 def TLBArray():
199 tlb_layout = [('valid', 1),
200 ('tag', TLB_TAG_WAY_BITS),
201 ('pte', TLB_PTE_WAY_BITS)
202 ]
203 return Array(Record(tlb_layout, name="tlb%d" % x) \
204 for x in range(TLB_SET_SIZE))
205
206 def HitWaySet():
207 return Array(Signal(WAY_BITS, name="hitway_%d" % x) \
208 for x in range(TLB_NUM_WAYS))
209
210 # Cache RAM interface
211 def CacheRamOut():
212 return Array(Signal(WB_DATA_BITS, name="cache_out%d" % x) \
213 for x in range(NUM_WAYS))
214
215 # PLRU output interface
216 def PLRUOut():
217 return Array(Signal(WAY_BITS, name="plru_out%d" % x) \
218 for x in range(NUM_LINES))
219
220 # TLB PLRU output interface
221 def TLBPLRUOut():
222 return Array(Signal(TLB_WAY_BITS, name="tlbplru_out%d" % x) \
223 for x in range(TLB_SET_SIZE))
224
225 # Helper functions to decode incoming requests
226 #
227 # Return the cache line index (tag index) for an address
228 def get_index(addr):
229 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
230
231 # Return the cache row index (data memory) for an address
232 def get_row(addr):
233 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
234
235 # Return the index of a row within a line
236 def get_row_of_line(row):
237 return row[:ROW_BITS][:ROW_LINE_BITS]
238
239 # Returns whether this is the last row of a line
240 def is_last_row_addr(addr, last):
241 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
242
243 # Returns whether this is the last row of a line
244 def is_last_row(row, last):
245 return get_row_of_line(row) == last
246
247 # Return the next row in the current cache line. We use a
248 # dedicated function in order to limit the size of the
249 # generated adder to be only the bits within a cache line
250 # (3 bits with default settings)
251 def next_row(row):
252 row_v = row[0:ROW_LINE_BITS] + 1
253 return Cat(row_v[:ROW_LINE_BITS], row[ROW_LINE_BITS:])
254
255 # Get the tag value from the address
256 def get_tag(addr):
257 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
258
259 # Read a tag from a tag memory row
260 def read_tag(way, tagset):
261 return tagset.word_select(way, TAG_WIDTH)[:TAG_BITS]
262
263 # Read a TLB tag from a TLB tag memory row
264 def read_tlb_tag(way, tags):
265 return tags.word_select(way, TLB_EA_TAG_BITS)
266
267 # Write a TLB tag to a TLB tag memory row
268 def write_tlb_tag(way, tags, tag):
269 return read_tlb_tag(way, tags).eq(tag)
270
271 # Read a PTE from a TLB PTE memory row
272 def read_tlb_pte(way, ptes):
273 return ptes.word_select(way, TLB_PTE_BITS)
274
275 def write_tlb_pte(way, ptes, newpte):
276 return read_tlb_pte(way, ptes).eq(newpte)
277
278
279 # Record for storing permission, attribute, etc. bits from a PTE
280 class PermAttr(RecordObject):
281 def __init__(self, name=None):
282 super().__init__(name=name)
283 self.reference = Signal()
284 self.changed = Signal()
285 self.nocache = Signal()
286 self.priv = Signal()
287 self.rd_perm = Signal()
288 self.wr_perm = Signal()
289
290
291 def extract_perm_attr(pte):
292 pa = PermAttr()
293 return pa;
294
295
296 # Type of operation on a "valid" input
297 @unique
298 class Op(Enum):
299 OP_NONE = 0
300 OP_BAD = 1 # NC cache hit, TLB miss, prot/RC failure
301 OP_STCX_FAIL = 2 # conditional store w/o reservation
302 OP_LOAD_HIT = 3 # Cache hit on load
303 OP_LOAD_MISS = 4 # Load missing cache
304 OP_LOAD_NC = 5 # Non-cachable load
305 OP_STORE_HIT = 6 # Store hitting cache
306 OP_STORE_MISS = 7 # Store missing cache
307
308
309 # Cache state machine
310 @unique
311 class State(Enum):
312 IDLE = 0 # Normal load hit processing
313 RELOAD_WAIT_ACK = 1 # Cache reload wait ack
314 STORE_WAIT_ACK = 2 # Store wait ack
315 NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
316
317
318 # Dcache operations:
319 #
320 # In order to make timing, we use the BRAMs with
321 # an output buffer, which means that the BRAM
322 # output is delayed by an extra cycle.
323 #
324 # Thus, the dcache has a 2-stage internal pipeline
325 # for cache hits with no stalls.
326 #
327 # All other operations are handled via stalling
328 # in the first stage.
329 #
330 # The second stage can thus complete a hit at the same
331 # time as the first stage emits a stall for a complex op.
332 #
333 # Stage 0 register, basically contains just the latched request
334
335 class RegStage0(RecordObject):
336 def __init__(self, name=None):
337 super().__init__(name=name)
338 self.req = LoadStore1ToDCacheType(name="lsmem")
339 self.tlbie = Signal() # indicates a tlbie request (from MMU)
340 self.doall = Signal() # with tlbie, indicates flush whole TLB
341 self.tlbld = Signal() # indicates a TLB load request (from MMU)
342 self.mmu_req = Signal() # indicates source of request
343 self.d_valid = Signal() # indicates req.data is valid now
344
345
346 class MemAccessRequest(RecordObject):
347 def __init__(self, name=None):
348 super().__init__(name=name)
349 self.op = Signal(Op)
350 self.valid = Signal()
351 self.dcbz = Signal()
352 self.real_addr = Signal(REAL_ADDR_BITS)
353 self.data = Signal(64)
354 self.byte_sel = Signal(8)
355 self.hit_way = Signal(WAY_BITS)
356 self.same_tag = Signal()
357 self.mmu_req = Signal()
358
359
360 # First stage register, contains state for stage 1 of load hits
361 # and for the state machine used by all other operations
362 class RegStage1(RecordObject):
363 def __init__(self, name=None):
364 super().__init__(name=name)
365 # Info about the request
366 self.full = Signal() # have uncompleted request
367 self.mmu_req = Signal() # request is from MMU
368 self.req = MemAccessRequest(name="reqmem")
369
370 # Cache hit state
371 self.hit_way = Signal(WAY_BITS)
372 self.hit_load_valid = Signal()
373 self.hit_index = Signal(INDEX_BITS)
374 self.cache_hit = Signal()
375
376 # TLB hit state
377 self.tlb_hit = Signal()
378 self.tlb_hit_way = Signal(TLB_NUM_WAYS)
379 self.tlb_hit_index = Signal(TLB_WAY_BITS)
380
381 # 2-stage data buffer for data forwarded from writes to reads
382 self.forward_data1 = Signal(64)
383 self.forward_data2 = Signal(64)
384 self.forward_sel1 = Signal(8)
385 self.forward_valid1 = Signal()
386 self.forward_way1 = Signal(WAY_BITS)
387 self.forward_row1 = Signal(ROW_BITS)
388 self.use_forward1 = Signal()
389 self.forward_sel = Signal(8)
390
391 # Cache miss state (reload state machine)
392 self.state = Signal(State)
393 self.dcbz = Signal()
394 self.write_bram = Signal()
395 self.write_tag = Signal()
396 self.slow_valid = Signal()
397 self.wb = WBMasterOut("wb")
398 self.reload_tag = Signal(TAG_BITS)
399 self.store_way = Signal(WAY_BITS)
400 self.store_row = Signal(ROW_BITS)
401 self.store_index = Signal(INDEX_BITS)
402 self.end_row_ix = Signal(ROW_LINE_BITS)
403 self.rows_valid = RowPerLineValidArray()
404 self.acks_pending = Signal(3)
405 self.inc_acks = Signal()
406 self.dec_acks = Signal()
407
408 # Signals to complete (possibly with error)
409 self.ls_valid = Signal()
410 self.ls_error = Signal()
411 self.mmu_done = Signal()
412 self.mmu_error = Signal()
413 self.cache_paradox = Signal()
414
415 # Signal to complete a failed stcx.
416 self.stcx_fail = Signal()
417
418
419 # Reservation information
420 class Reservation(RecordObject):
421 def __init__(self):
422 super().__init__()
423 self.valid = Signal()
424 self.addr = Signal(64-LINE_OFF_BITS)
425
426
427 class DTLBUpdate(Elaboratable):
428 def __init__(self):
429 self.tlbie = Signal()
430 self.tlbwe = Signal()
431 self.doall = Signal()
432 self.updated = Signal()
433 self.v_updated = Signal()
434 self.tlb_hit = Signal()
435 self.tlb_req_index = Signal(TLB_SET_BITS)
436
437 self.tlb_hit_way = Signal(TLB_WAY_BITS)
438 self.tlb_tag_way = Signal(TLB_TAG_WAY_BITS)
439 self.tlb_pte_way = Signal(TLB_PTE_WAY_BITS)
440 self.repl_way = Signal(TLB_WAY_BITS)
441 self.eatag = Signal(TLB_EA_TAG_BITS)
442 self.pte_data = Signal(TLB_PTE_BITS)
443
444 self.dv = Signal(TLB_NUM_WAYS) # tlb_way_valids_t
445
446 self.tb_out = Signal(TLB_TAG_WAY_BITS) # tlb_way_tags_t
447 self.db_out = Signal(TLB_NUM_WAYS) # tlb_way_valids_t
448 self.pb_out = Signal(TLB_PTE_WAY_BITS) # tlb_way_ptes_t
449
450 def elaborate(self, platform):
451 m = Module()
452 comb = m.d.comb
453 sync = m.d.sync
454
455 tagset = Signal(TLB_TAG_WAY_BITS)
456 pteset = Signal(TLB_PTE_WAY_BITS)
457
458 tb_out, pb_out, db_out = self.tb_out, self.pb_out, self.db_out
459 comb += db_out.eq(self.dv)
460
461 with m.If(self.tlbie & self.doall):
462 pass # clear all back in parent
463 with m.Elif(self.tlbie):
464 with m.If(self.tlb_hit):
465 comb += db_out.bit_select(self.tlb_hit_way, 1).eq(0)
466 comb += self.v_updated.eq(1)
467
468 with m.Elif(self.tlbwe):
469
470 comb += tagset.eq(self.tlb_tag_way)
471 comb += write_tlb_tag(self.repl_way, tagset, self.eatag)
472 comb += tb_out.eq(tagset)
473
474 comb += pteset.eq(self.tlb_pte_way)
475 comb += write_tlb_pte(self.repl_way, pteset, self.pte_data)
476 comb += pb_out.eq(pteset)
477
478 comb += db_out.bit_select(self.repl_way, 1).eq(1)
479
480 comb += self.updated.eq(1)
481 comb += self.v_updated.eq(1)
482
483 return m
484
485
486 class DCachePendingHit(Elaboratable):
487
488 def __init__(self, tlb_pte_way, tlb_valid_way, tlb_hit_way,
489 cache_i_validdx, cache_tag_set,
490 req_addr,
491 hit_set):
492
493 self.go = Signal()
494 self.virt_mode = Signal()
495 self.is_hit = Signal()
496 self.tlb_hit = Signal()
497 self.hit_way = Signal(WAY_BITS)
498 self.rel_match = Signal()
499 self.req_index = Signal(INDEX_BITS)
500 self.reload_tag = Signal(TAG_BITS)
501
502 self.tlb_hit_way = tlb_hit_way
503 self.tlb_pte_way = tlb_pte_way
504 self.tlb_valid_way = tlb_valid_way
505 self.cache_i_validdx = cache_i_validdx
506 self.cache_tag_set = cache_tag_set
507 self.req_addr = req_addr
508 self.hit_set = hit_set
509
510 def elaborate(self, platform):
511 m = Module()
512 comb = m.d.comb
513 sync = m.d.sync
514
515 go = self.go
516 virt_mode = self.virt_mode
517 is_hit = self.is_hit
518 tlb_pte_way = self.tlb_pte_way
519 tlb_valid_way = self.tlb_valid_way
520 cache_i_validdx = self.cache_i_validdx
521 cache_tag_set = self.cache_tag_set
522 req_addr = self.req_addr
523 tlb_hit_way = self.tlb_hit_way
524 tlb_hit = self.tlb_hit
525 hit_set = self.hit_set
526 hit_way = self.hit_way
527 rel_match = self.rel_match
528 req_index = self.req_index
529 reload_tag = self.reload_tag
530
531 rel_matches = Array(Signal(name="rel_matches_%d" % i) \
532 for i in range(TLB_NUM_WAYS))
533 hit_way_set = HitWaySet()
534
535 # Test if pending request is a hit on any way
536 # In order to make timing in virtual mode,
537 # when we are using the TLB, we compare each
538 # way with each of the real addresses from each way of
539 # the TLB, and then decide later which match to use.
540
541 with m.If(virt_mode):
542 for j in range(TLB_NUM_WAYS): # tlb_num_way_t
543 s_tag = Signal(TAG_BITS, name="s_tag%d" % j)
544 s_hit = Signal()
545 s_pte = Signal(TLB_PTE_BITS)
546 s_ra = Signal(REAL_ADDR_BITS)
547 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
548 comb += s_ra.eq(Cat(req_addr[0:TLB_LG_PGSZ],
549 s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
550 comb += s_tag.eq(get_tag(s_ra))
551
552 for i in range(NUM_WAYS): # way_t
553 is_tag_hit = Signal(name="is_tag_hit_%d_%d" % (j, i))
554 comb += is_tag_hit.eq(go & cache_i_validdx[i] &
555 (read_tag(i, cache_tag_set) == s_tag)
556 & tlb_valid_way[j])
557 with m.If(is_tag_hit):
558 comb += hit_way_set[j].eq(i)
559 comb += s_hit.eq(1)
560 comb += hit_set[j].eq(s_hit)
561 with m.If(s_tag == reload_tag):
562 comb += rel_matches[j].eq(1)
563 with m.If(tlb_hit):
564 comb += is_hit.eq(hit_set[tlb_hit_way])
565 comb += hit_way.eq(hit_way_set[tlb_hit_way])
566 comb += rel_match.eq(rel_matches[tlb_hit_way])
567 with m.Else():
568 s_tag = Signal(TAG_BITS)
569 comb += s_tag.eq(get_tag(req_addr))
570 for i in range(NUM_WAYS): # way_t
571 is_tag_hit = Signal(name="is_tag_hit_%d" % i)
572 comb += is_tag_hit.eq(go & cache_i_validdx[i] &
573 (read_tag(i, cache_tag_set) == s_tag))
574 with m.If(is_tag_hit):
575 comb += hit_way.eq(i)
576 comb += is_hit.eq(1)
577 with m.If(s_tag == reload_tag):
578 comb += rel_match.eq(1)
579
580 return m
581
582
583 class DCache(Elaboratable):
584 """Set associative dcache write-through
585
586 TODO (in no specific order):
587 * See list in icache.vhdl
588 * Complete load misses on the cycle when WB data comes instead of
589 at the end of line (this requires dealing with requests coming in
590 while not idle...)
591 """
592 def __init__(self):
593 self.d_in = LoadStore1ToDCacheType("d_in")
594 self.d_out = DCacheToLoadStore1Type("d_out")
595
596 self.m_in = MMUToDCacheType("m_in")
597 self.m_out = DCacheToMMUType("m_out")
598
599 self.stall_out = Signal()
600
601 # standard naming (wired to non-standard for compatibility)
602 self.bus = Interface(addr_width=32,
603 data_width=64,
604 granularity=8,
605 features={'stall'},
606 alignment=0,
607 name="dcache")
608
609 self.log_out = Signal(20)
610
611 def stage_0(self, m, r0, r1, r0_full):
612 """Latch the request in r0.req as long as we're not stalling
613 """
614 comb = m.d.comb
615 sync = m.d.sync
616 d_in, d_out, m_in = self.d_in, self.d_out, self.m_in
617
618 r = RegStage0("stage0")
619
620 # TODO, this goes in unit tests and formal proofs
621 with m.If(d_in.valid & m_in.valid):
622 sync += Display("request collision loadstore vs MMU")
623
624 with m.If(m_in.valid):
625 comb += r.req.valid.eq(1)
626 comb += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))# no invalidate
627 comb += r.req.dcbz.eq(0)
628 comb += r.req.nc.eq(0)
629 comb += r.req.reserve.eq(0)
630 comb += r.req.virt_mode.eq(0)
631 comb += r.req.priv_mode.eq(1)
632 comb += r.req.addr.eq(m_in.addr)
633 comb += r.req.data.eq(m_in.pte)
634 comb += r.req.byte_sel.eq(~0) # Const -1 sets all to 0b111....
635 comb += r.tlbie.eq(m_in.tlbie)
636 comb += r.doall.eq(m_in.doall)
637 comb += r.tlbld.eq(m_in.tlbld)
638 comb += r.mmu_req.eq(1)
639 m.d.sync += Display(" DCACHE req mmu addr %x pte %x ld %d",
640 m_in.addr, m_in.pte, r.req.load)
641
642 with m.Else():
643 comb += r.req.eq(d_in)
644 comb += r.req.data.eq(0)
645 comb += r.tlbie.eq(0)
646 comb += r.doall.eq(0)
647 comb += r.tlbld.eq(0)
648 comb += r.mmu_req.eq(0)
649 with m.If((~r1.full & ~d_in.hold) | ~r0_full):
650 sync += r0.eq(r)
651 sync += r0_full.eq(r.req.valid)
652 # Sample data the cycle after a request comes in from loadstore1.
653 # If another request has come in already then the data will get
654 # put directly into req.data below.
655 with m.If(r0.req.valid & ~r.req.valid & ~r0.d_valid &
656 ~r0.mmu_req):
657 sync += r0.req.data.eq(d_in.data)
658 sync += r0.d_valid.eq(1)
659 with m.If(d_in.valid):
660 m.d.sync += Display(" DCACHE req cache "
661 "virt %d addr %x data %x ld %d",
662 r.req.virt_mode, r.req.addr,
663 r.req.data, r.req.load)
664
665 def tlb_read(self, m, r0_stall, tlb_valid_way,
666 tlb_tag_way, tlb_pte_way, dtlb):
667 """TLB
668 Operates in the second cycle on the request latched in r0.req.
669 TLB updates write the entry at the end of the second cycle.
670 """
671 comb = m.d.comb
672 sync = m.d.sync
673 m_in, d_in = self.m_in, self.d_in
674
675 index = Signal(TLB_SET_BITS)
676 addrbits = Signal(TLB_SET_BITS)
677
678 amin = TLB_LG_PGSZ
679 amax = TLB_LG_PGSZ + TLB_SET_BITS
680
681 with m.If(m_in.valid):
682 comb += addrbits.eq(m_in.addr[amin : amax])
683 with m.Else():
684 comb += addrbits.eq(d_in.addr[amin : amax])
685 comb += index.eq(addrbits)
686
687 # If we have any op and the previous op isn't finished,
688 # then keep the same output for next cycle.
689 with m.If(~r0_stall):
690 sync += tlb_valid_way.eq(dtlb[index].valid)
691 sync += tlb_tag_way.eq(dtlb[index].tag)
692 sync += tlb_pte_way.eq(dtlb[index].pte)
693
694 def maybe_tlb_plrus(self, m, r1, tlb_plru_victim):
695 """Generate TLB PLRUs
696 """
697 comb = m.d.comb
698 sync = m.d.sync
699
700 if TLB_NUM_WAYS == 0:
701 return
702 for i in range(TLB_SET_SIZE):
703 # TLB PLRU interface
704 tlb_plru = PLRU(TLB_WAY_BITS)
705 setattr(m.submodules, "maybe_plru_%d" % i, tlb_plru)
706 tlb_plru_acc_en = Signal()
707
708 comb += tlb_plru_acc_en.eq(r1.tlb_hit & (r1.tlb_hit_index == i))
709 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
710 comb += tlb_plru.acc_i.eq(r1.tlb_hit_way)
711 comb += tlb_plru_victim[i].eq(tlb_plru.lru_o)
712
713 def tlb_search(self, m, tlb_req_index, r0, r0_valid,
714 tlb_valid_way, tlb_tag_way, tlb_hit_way,
715 tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
716
717 comb = m.d.comb
718
719 hitway = Signal(TLB_WAY_BITS)
720 hit = Signal()
721 eatag = Signal(TLB_EA_TAG_BITS)
722
723 TLB_LG_END = TLB_LG_PGSZ + TLB_SET_BITS
724 comb += tlb_req_index.eq(r0.req.addr[TLB_LG_PGSZ : TLB_LG_END])
725 comb += eatag.eq(r0.req.addr[TLB_LG_END : 64 ])
726
727 for i in range(TLB_NUM_WAYS):
728 is_tag_hit = Signal(name="is_tag_hit%d" % i)
729 tlb_tag = Signal(TLB_EA_TAG_BITS, name="tlb_tag%d" % i)
730 comb += tlb_tag.eq(read_tlb_tag(i, tlb_tag_way))
731 comb += is_tag_hit.eq(tlb_valid_way[i] & (tlb_tag == eatag))
732 with m.If(is_tag_hit):
733 comb += hitway.eq(i)
734 comb += hit.eq(1)
735
736 comb += tlb_hit.eq(hit & r0_valid)
737 comb += tlb_hit_way.eq(hitway)
738
739 with m.If(tlb_hit):
740 comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
741 comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
742
743 with m.If(r0.req.virt_mode):
744 comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
745 r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
746 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]))
747 comb += perm_attr.reference.eq(pte[8])
748 comb += perm_attr.changed.eq(pte[7])
749 comb += perm_attr.nocache.eq(pte[5])
750 comb += perm_attr.priv.eq(pte[3])
751 comb += perm_attr.rd_perm.eq(pte[2])
752 comb += perm_attr.wr_perm.eq(pte[1])
753 with m.Else():
754 comb += ra.eq(Cat(Const(0, ROW_OFF_BITS),
755 r0.req.addr[ROW_OFF_BITS:REAL_ADDR_BITS]))
756 comb += perm_attr.reference.eq(1)
757 comb += perm_attr.changed.eq(1)
758 comb += perm_attr.nocache.eq(0)
759 comb += perm_attr.priv.eq(1)
760 comb += perm_attr.rd_perm.eq(1)
761 comb += perm_attr.wr_perm.eq(1)
762
763 with m.If(valid_ra):
764 m.d.sync += Display("DCACHE virt mode %d hit %d ra %x pte %x",
765 r0.req.virt_mode, tlb_hit, ra, pte)
766 m.d.sync += Display(" perm ref=%d", perm_attr.reference)
767 m.d.sync += Display(" perm chg=%d", perm_attr.changed)
768 m.d.sync += Display(" perm noc=%d", perm_attr.nocache)
769 m.d.sync += Display(" perm prv=%d", perm_attr.priv)
770 m.d.sync += Display(" perm rdp=%d", perm_attr.rd_perm)
771 m.d.sync += Display(" perm wrp=%d", perm_attr.wr_perm)
772
773 def tlb_update(self, m, r0_valid, r0, dtlb, tlb_req_index,
774 tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
775 tlb_pte_way):
776
777 comb = m.d.comb
778 sync = m.d.sync
779
780 tlbie = Signal()
781 tlbwe = Signal()
782
783 comb += tlbie.eq(r0_valid & r0.tlbie)
784 comb += tlbwe.eq(r0_valid & r0.tlbld)
785
786 m.submodules.tlb_update = d = DTLBUpdate()
787 with m.If(tlbie & r0.doall):
788 # clear all valid bits at once
789 for i in range(TLB_SET_SIZE):
790 sync += dtlb[i].valid.eq(0)
791 with m.If(d.updated):
792 sync += dtlb[tlb_req_index].tag.eq(d.tb_out)
793 sync += dtlb[tlb_req_index].pte.eq(d.pb_out)
794 with m.If(d.v_updated):
795 sync += dtlb[tlb_req_index].valid.eq(d.db_out)
796
797 comb += d.dv.eq(dtlb[tlb_req_index].valid)
798
799 comb += d.tlbie.eq(tlbie)
800 comb += d.tlbwe.eq(tlbwe)
801 comb += d.doall.eq(r0.doall)
802 comb += d.tlb_hit.eq(tlb_hit)
803 comb += d.tlb_hit_way.eq(tlb_hit_way)
804 comb += d.tlb_tag_way.eq(tlb_tag_way)
805 comb += d.tlb_pte_way.eq(tlb_pte_way)
806 comb += d.tlb_req_index.eq(tlb_req_index)
807
808 with m.If(tlb_hit):
809 comb += d.repl_way.eq(tlb_hit_way)
810 with m.Else():
811 comb += d.repl_way.eq(tlb_plru_victim[tlb_req_index])
812 comb += d.eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
813 comb += d.pte_data.eq(r0.req.data)
814
815 def maybe_plrus(self, m, r1, plru_victim):
816 """Generate PLRUs
817 """
818 comb = m.d.comb
819 sync = m.d.sync
820
821 if TLB_NUM_WAYS == 0:
822 return
823
824 for i in range(NUM_LINES):
825 # PLRU interface
826 plru = PLRU(WAY_BITS)
827 setattr(m.submodules, "plru%d" % i, plru)
828 plru_acc_en = Signal()
829
830 comb += plru_acc_en.eq(r1.cache_hit & (r1.hit_index == i))
831 comb += plru.acc_en.eq(plru_acc_en)
832 comb += plru.acc_i.eq(r1.hit_way)
833 comb += plru_victim[i].eq(plru.lru_o)
834
835 def cache_tag_read(self, m, r0_stall, req_index, cache_tag_set, cache_tags):
836 """Cache tag RAM read port
837 """
838 comb = m.d.comb
839 sync = m.d.sync
840 m_in, d_in = self.m_in, self.d_in
841
842 index = Signal(INDEX_BITS)
843
844 with m.If(r0_stall):
845 comb += index.eq(req_index)
846 with m.Elif(m_in.valid):
847 comb += index.eq(get_index(m_in.addr))
848 with m.Else():
849 comb += index.eq(get_index(d_in.addr))
850 sync += cache_tag_set.eq(cache_tags[index].tag)
851
852 def dcache_request(self, m, r0, ra, req_index, req_row, req_tag,
853 r0_valid, r1, cache_tags, replace_way,
854 use_forward1_next, use_forward2_next,
855 req_hit_way, plru_victim, rc_ok, perm_attr,
856 valid_ra, perm_ok, access_ok, req_op, req_go,
857 tlb_pte_way,
858 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
859 cancel_store, req_same_tag, r0_stall, early_req_row):
860 """Cache request parsing and hit detection
861 """
862
863 comb = m.d.comb
864 m_in, d_in = self.m_in, self.d_in
865
866 is_hit = Signal()
867 hit_way = Signal(WAY_BITS)
868 op = Signal(Op)
869 opsel = Signal(3)
870 go = Signal()
871 nc = Signal()
872 hit_set = Array(Signal(name="hit_set_%d" % i) \
873 for i in range(TLB_NUM_WAYS))
874 cache_i_validdx = Signal(NUM_WAYS)
875
876 # Extract line, row and tag from request
877 comb += req_index.eq(get_index(r0.req.addr))
878 comb += req_row.eq(get_row(r0.req.addr))
879 comb += req_tag.eq(get_tag(ra))
880
881 if False: # display on comb is a bit... busy.
882 comb += Display("dcache_req addr:%x ra: %x idx: %x tag: %x row: %x",
883 r0.req.addr, ra, req_index, req_tag, req_row)
884
885 comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
886 comb += cache_i_validdx.eq(cache_tags[req_index].valid)
887
888 m.submodules.dcache_pend = dc = DCachePendingHit(tlb_pte_way,
889 tlb_valid_way, tlb_hit_way,
890 cache_i_validdx, cache_tag_set,
891 r0.req.addr,
892 hit_set)
893
894 comb += dc.tlb_hit.eq(tlb_hit)
895 comb += dc.reload_tag.eq(r1.reload_tag)
896 comb += dc.virt_mode.eq(r0.req.virt_mode)
897 comb += dc.go.eq(go)
898 comb += dc.req_index.eq(req_index)
899 comb += is_hit.eq(dc.is_hit)
900 comb += hit_way.eq(dc.hit_way)
901 comb += req_same_tag.eq(dc.rel_match)
902
903 # See if the request matches the line currently being reloaded
904 with m.If((r1.state == State.RELOAD_WAIT_ACK) &
905 (req_index == r1.store_index) & req_same_tag):
906 # For a store, consider this a hit even if the row isn't
907 # valid since it will be by the time we perform the store.
908 # For a load, check the appropriate row valid bit.
909 rrow = Signal(ROW_LINE_BITS)
910 comb += rrow.eq(req_row)
911 valid = r1.rows_valid[rrow]
912 comb += is_hit.eq((~r0.req.load) | valid)
913 comb += hit_way.eq(replace_way)
914
915 # Whether to use forwarded data for a load or not
916 with m.If((get_row(r1.req.real_addr) == req_row) &
917 (r1.req.hit_way == hit_way)):
918 # Only need to consider r1.write_bram here, since if we
919 # are writing refill data here, then we don't have a
920 # cache hit this cycle on the line being refilled.
921 # (There is the possibility that the load following the
922 # load miss that started the refill could be to the old
923 # contents of the victim line, since it is a couple of
924 # cycles after the refill starts before we see the updated
925 # cache tag. In that case we don't use the bypass.)
926 comb += use_forward1_next.eq(r1.write_bram)
927 with m.If((r1.forward_row1 == req_row) & (r1.forward_way1 == hit_way)):
928 comb += use_forward2_next.eq(r1.forward_valid1)
929
930 # The way that matched on a hit
931 comb += req_hit_way.eq(hit_way)
932
933 # The way to replace on a miss
934 with m.If(r1.write_tag):
935 comb += replace_way.eq(plru_victim[r1.store_index])
936 with m.Else():
937 comb += replace_way.eq(r1.store_way)
938
939 # work out whether we have permission for this access
940 # NB we don't yet implement AMR, thus no KUAP
941 comb += rc_ok.eq(perm_attr.reference
942 & (r0.req.load | perm_attr.changed))
943 comb += perm_ok.eq((r0.req.priv_mode | (~perm_attr.priv)) &
944 (perm_attr.wr_perm |
945 (r0.req.load & perm_attr.rd_perm)))
946 comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
947 # Combine the request and cache hit status to decide what
948 # operation needs to be done
949 comb += nc.eq(r0.req.nc | perm_attr.nocache)
950 comb += op.eq(Op.OP_NONE)
951 with m.If(go):
952 with m.If(~access_ok):
953 m.d.sync += Display("DCACHE access fail valid_ra=%d p=%d rc=%d",
954 valid_ra, perm_ok, rc_ok)
955 comb += op.eq(Op.OP_BAD)
956 with m.Elif(cancel_store):
957 m.d.sync += Display("DCACHE cancel store")
958 comb += op.eq(Op.OP_STCX_FAIL)
959 with m.Else():
960 m.d.sync += Display("DCACHE valid_ra=%d nc=%d ld=%d",
961 valid_ra, nc, r0.req.load)
962 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
963 with m.Switch(opsel):
964 with m.Case(0b101): comb += op.eq(Op.OP_LOAD_HIT)
965 with m.Case(0b100): comb += op.eq(Op.OP_LOAD_MISS)
966 with m.Case(0b110): comb += op.eq(Op.OP_LOAD_NC)
967 with m.Case(0b001): comb += op.eq(Op.OP_STORE_HIT)
968 with m.Case(0b000): comb += op.eq(Op.OP_STORE_MISS)
969 with m.Case(0b010): comb += op.eq(Op.OP_STORE_MISS)
970 with m.Case(0b011): comb += op.eq(Op.OP_BAD)
971 with m.Case(0b111): comb += op.eq(Op.OP_BAD)
972 comb += req_op.eq(op)
973 comb += req_go.eq(go)
974
975 # Version of the row number that is valid one cycle earlier
976 # in the cases where we need to read the cache data BRAM.
977 # If we're stalling then we need to keep reading the last
978 # row requested.
979 with m.If(~r0_stall):
980 with m.If(m_in.valid):
981 comb += early_req_row.eq(get_row(m_in.addr))
982 with m.Else():
983 comb += early_req_row.eq(get_row(d_in.addr))
984 with m.Else():
985 comb += early_req_row.eq(req_row)
986
987 def reservation_comb(self, m, cancel_store, set_rsrv, clear_rsrv,
988 r0_valid, r0, reservation):
989 """Handle load-with-reservation and store-conditional instructions
990 """
991 comb = m.d.comb
992
993 with m.If(r0_valid & r0.req.reserve):
994 # XXX generate alignment interrupt if address
995 # is not aligned XXX or if r0.req.nc = '1'
996 with m.If(r0.req.load):
997 comb += set_rsrv.eq(r0.req.atomic_last) # load with reservation
998 with m.Else():
999 comb += clear_rsrv.eq(r0.req.atomic_last) # store conditional
1000 with m.If((~reservation.valid) |
1001 (r0.req.addr[LINE_OFF_BITS:64] != reservation.addr)):
1002 comb += cancel_store.eq(1)
1003
1004 def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1005 reservation, r0):
1006
1007 comb = m.d.comb
1008 sync = m.d.sync
1009
1010 with m.If(r0_valid & access_ok):
1011 with m.If(clear_rsrv):
1012 sync += reservation.valid.eq(0)
1013 with m.Elif(set_rsrv):
1014 sync += reservation.valid.eq(1)
1015 sync += reservation.addr.eq(r0.req.addr[LINE_OFF_BITS:64])
1016
1017 def writeback_control(self, m, r1, cache_out_row):
1018 """Return data for loads & completion control logic
1019 """
1020 comb = m.d.comb
1021 sync = m.d.sync
1022 d_out, m_out = self.d_out, self.m_out
1023
1024 data_out = Signal(64)
1025 data_fwd = Signal(64)
1026
1027 # Use the bypass if are reading the row that was
1028 # written 1 or 2 cycles ago, including for the
1029 # slow_valid = 1 case (i.e. completing a load
1030 # miss or a non-cacheable load).
1031 with m.If(r1.use_forward1):
1032 comb += data_fwd.eq(r1.forward_data1)
1033 with m.Else():
1034 comb += data_fwd.eq(r1.forward_data2)
1035
1036 comb += data_out.eq(cache_out_row)
1037
1038 for i in range(8):
1039 with m.If(r1.forward_sel[i]):
1040 dsel = data_fwd.word_select(i, 8)
1041 comb += data_out.word_select(i, 8).eq(dsel)
1042
1043 comb += d_out.valid.eq(r1.ls_valid)
1044 comb += d_out.data.eq(data_out)
1045 comb += d_out.store_done.eq(~r1.stcx_fail)
1046 comb += d_out.error.eq(r1.ls_error)
1047 comb += d_out.cache_paradox.eq(r1.cache_paradox)
1048
1049 # Outputs to MMU
1050 comb += m_out.done.eq(r1.mmu_done)
1051 comb += m_out.err.eq(r1.mmu_error)
1052 comb += m_out.data.eq(data_out)
1053
1054 # We have a valid load or store hit or we just completed
1055 # a slow op such as a load miss, a NC load or a store
1056 #
1057 # Note: the load hit is delayed by one cycle. However it
1058 # can still not collide with r.slow_valid (well unless I
1059 # miscalculated) because slow_valid can only be set on a
1060 # subsequent request and not on its first cycle (the state
1061 # machine must have advanced), which makes slow_valid
1062 # at least 2 cycles from the previous hit_load_valid.
1063
1064 # Sanity: Only one of these must be set in any given cycle
1065
1066 if False: # TODO: need Display to get this to work
1067 assert (r1.slow_valid & r1.stcx_fail) != 1, \
1068 "unexpected slow_valid collision with stcx_fail"
1069
1070 assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \
1071 "unexpected hit_load_delayed collision with slow_valid"
1072
1073 with m.If(~r1.mmu_req):
1074 # Request came from loadstore1...
1075 # Load hit case is the standard path
1076 with m.If(r1.hit_load_valid):
1077 sync += Display("completing load hit data=%x", data_out)
1078
1079 # error cases complete without stalling
1080 with m.If(r1.ls_error):
1081 with m.If(r1.dcbz):
1082 sync += Display("completing dcbz with error")
1083 with m.Else():
1084 sync += Display("completing ld/st with error")
1085
1086 # Slow ops (load miss, NC, stores)
1087 with m.If(r1.slow_valid):
1088 sync += Display("completing store or load miss adr=%x data=%x",
1089 r1.req.real_addr, data_out)
1090
1091 with m.Else():
1092 # Request came from MMU
1093 with m.If(r1.hit_load_valid):
1094 sync += Display("completing load hit to MMU, data=%x",
1095 m_out.data)
1096 # error cases complete without stalling
1097 with m.If(r1.mmu_error):
1098 sync += Display("combpleting MMU ld with error")
1099
1100 # Slow ops (i.e. load miss)
1101 with m.If(r1.slow_valid):
1102 sync += Display("completing MMU load miss, adr=%x data=%x",
1103 r1.req.real_addr, m_out.data)
1104
1105 def rams(self, m, r1, early_req_row, cache_out_row, replace_way):
1106 """rams
1107 Generate a cache RAM for each way. This handles the normal
1108 reads, writes from reloads and the special store-hit update
1109 path as well.
1110
1111 Note: the BRAMs have an extra read buffer, meaning the output
1112 is pipelined an extra cycle. This differs from the
1113 icache. The writeback logic needs to take that into
1114 account by using 1-cycle delayed signals for load hits.
1115 """
1116 comb = m.d.comb
1117 bus = self.bus
1118
1119 for i in range(NUM_WAYS):
1120 do_read = Signal(name="do_rd%d" % i)
1121 rd_addr = Signal(ROW_BITS, name="rd_addr_%d" % i)
1122 do_write = Signal(name="do_wr%d" % i)
1123 wr_addr = Signal(ROW_BITS, name="wr_addr_%d" % i)
1124 wr_data = Signal(WB_DATA_BITS, name="din_%d" % i)
1125 wr_sel = Signal(ROW_SIZE)
1126 wr_sel_m = Signal(ROW_SIZE)
1127 _d_out = Signal(WB_DATA_BITS, name="dout_%d" % i) # cache_row_t
1128
1129 way = CacheRam(ROW_BITS, WB_DATA_BITS, ADD_BUF=True, ram_num=i)
1130 setattr(m.submodules, "cacheram_%d" % i, way)
1131
1132 comb += way.rd_en.eq(do_read)
1133 comb += way.rd_addr.eq(rd_addr)
1134 comb += _d_out.eq(way.rd_data_o)
1135 comb += way.wr_sel.eq(wr_sel_m)
1136 comb += way.wr_addr.eq(wr_addr)
1137 comb += way.wr_data.eq(wr_data)
1138
1139 # Cache hit reads
1140 comb += do_read.eq(1)
1141 comb += rd_addr.eq(early_req_row)
1142 with m.If(r1.hit_way == i):
1143 comb += cache_out_row.eq(_d_out)
1144
1145 # Write mux:
1146 #
1147 # Defaults to wishbone read responses (cache refill)
1148 #
1149 # For timing, the mux on wr_data/sel/addr is not
1150 # dependent on anything other than the current state.
1151
1152 with m.If(r1.write_bram):
1153 # Write store data to BRAM. This happens one
1154 # cycle after the store is in r0.
1155 comb += wr_data.eq(r1.req.data)
1156 comb += wr_sel.eq(r1.req.byte_sel)
1157 comb += wr_addr.eq(get_row(r1.req.real_addr))
1158
1159 with m.If(i == r1.req.hit_way):
1160 comb += do_write.eq(1)
1161 with m.Else():
1162 # Otherwise, we might be doing a reload or a DCBZ
1163 with m.If(r1.dcbz):
1164 comb += wr_data.eq(0)
1165 with m.Else():
1166 comb += wr_data.eq(bus.dat_r)
1167 comb += wr_addr.eq(r1.store_row)
1168 comb += wr_sel.eq(~0) # all 1s
1169
1170 with m.If((r1.state == State.RELOAD_WAIT_ACK)
1171 & bus.ack & (replace_way == i)):
1172 comb += do_write.eq(1)
1173
1174 # Mask write selects with do_write since BRAM
1175 # doesn't have a global write-enable
1176 with m.If(do_write):
1177 comb += wr_sel_m.eq(wr_sel)
1178
1179 # Cache hit synchronous machine for the easy case.
1180 # This handles load hits.
1181 # It also handles error cases (TLB miss, cache paradox)
1182 def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1,
1183 req_hit_way, req_index, req_tag, access_ok,
1184 tlb_hit, tlb_hit_way, tlb_req_index):
1185
1186 comb = m.d.comb
1187 sync = m.d.sync
1188
1189 with m.If(req_op != Op.OP_NONE):
1190 sync += Display("op:%d addr:%x nc: %d idx: %x tag: %x way: %x",
1191 req_op, r0.req.addr, r0.req.nc,
1192 req_index, req_tag, req_hit_way)
1193
1194 with m.If(r0_valid):
1195 sync += r1.mmu_req.eq(r0.mmu_req)
1196
1197 # Fast path for load/store hits.
1198 # Set signals for the writeback controls.
1199 sync += r1.hit_way.eq(req_hit_way)
1200 sync += r1.hit_index.eq(req_index)
1201
1202 with m.If(req_op == Op.OP_LOAD_HIT):
1203 sync += r1.hit_load_valid.eq(1)
1204 with m.Else():
1205 sync += r1.hit_load_valid.eq(0)
1206
1207 with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STORE_HIT)):
1208 sync += r1.cache_hit.eq(1)
1209 with m.Else():
1210 sync += r1.cache_hit.eq(0)
1211
1212 with m.If(req_op == Op.OP_BAD):
1213 sync += Display("Signalling ld/st error "
1214 "ls_error=%i mmu_error=%i cache_paradox=%i",
1215 ~r0.mmu_req,r0.mmu_req,access_ok)
1216 sync += r1.ls_error.eq(~r0.mmu_req)
1217 sync += r1.mmu_error.eq(r0.mmu_req)
1218 sync += r1.cache_paradox.eq(access_ok)
1219
1220 with m.Else():
1221 sync += r1.ls_error.eq(0)
1222 sync += r1.mmu_error.eq(0)
1223 sync += r1.cache_paradox.eq(0)
1224
1225 with m.If(req_op == Op.OP_STCX_FAIL):
1226 sync += r1.stcx_fail.eq(1)
1227 with m.Else():
1228 sync += r1.stcx_fail.eq(0)
1229
1230 # Record TLB hit information for updating TLB PLRU
1231 sync += r1.tlb_hit.eq(tlb_hit)
1232 sync += r1.tlb_hit_way.eq(tlb_hit_way)
1233 sync += r1.tlb_hit_index.eq(tlb_req_index)
1234
1235 # Memory accesses are handled by this state machine:
1236 #
1237 # * Cache load miss/reload (in conjunction with "rams")
1238 # * Load hits for non-cachable forms
1239 # * Stores (the collision case is handled in "rams")
1240 #
1241 # All wishbone requests generation is done here.
1242 # This machine operates at stage 1.
1243 def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next,
1244 r0, replace_way,
1245 req_hit_way, req_same_tag,
1246 r0_valid, req_op, cache_tags, req_go, ra):
1247
1248 comb = m.d.comb
1249 sync = m.d.sync
1250 bus = self.bus
1251 d_in = self.d_in
1252
1253 req = MemAccessRequest("mreq_ds")
1254
1255 req_row = Signal(ROW_BITS)
1256 req_idx = Signal(INDEX_BITS)
1257 req_tag = Signal(TAG_BITS)
1258 comb += req_idx.eq(get_index(req.real_addr))
1259 comb += req_row.eq(get_row(req.real_addr))
1260 comb += req_tag.eq(get_tag(req.real_addr))
1261
1262 sync += r1.use_forward1.eq(use_forward1_next)
1263 sync += r1.forward_sel.eq(0)
1264
1265 with m.If(use_forward1_next):
1266 sync += r1.forward_sel.eq(r1.req.byte_sel)
1267 with m.Elif(use_forward2_next):
1268 sync += r1.forward_sel.eq(r1.forward_sel1)
1269
1270 sync += r1.forward_data2.eq(r1.forward_data1)
1271 with m.If(r1.write_bram):
1272 sync += r1.forward_data1.eq(r1.req.data)
1273 sync += r1.forward_sel1.eq(r1.req.byte_sel)
1274 sync += r1.forward_way1.eq(r1.req.hit_way)
1275 sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1276 sync += r1.forward_valid1.eq(1)
1277 with m.Else():
1278 with m.If(r1.dcbz):
1279 sync += r1.forward_data1.eq(0)
1280 with m.Else():
1281 sync += r1.forward_data1.eq(bus.dat_r)
1282 sync += r1.forward_sel1.eq(~0) # all 1s
1283 sync += r1.forward_way1.eq(replace_way)
1284 sync += r1.forward_row1.eq(r1.store_row)
1285 sync += r1.forward_valid1.eq(0)
1286
1287 # One cycle pulses reset
1288 sync += r1.slow_valid.eq(0)
1289 sync += r1.write_bram.eq(0)
1290 sync += r1.inc_acks.eq(0)
1291 sync += r1.dec_acks.eq(0)
1292
1293 sync += r1.ls_valid.eq(0)
1294 # complete tlbies and TLB loads in the third cycle
1295 sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1296
1297 with m.If((req_op == Op.OP_LOAD_HIT) | (req_op == Op.OP_STCX_FAIL)):
1298 with m.If(~r0.mmu_req):
1299 sync += r1.ls_valid.eq(1)
1300 with m.Else():
1301 sync += r1.mmu_done.eq(1)
1302
1303 with m.If(r1.write_tag):
1304 # Store new tag in selected way
1305 for i in range(NUM_WAYS):
1306 with m.If(i == replace_way):
1307 ct = Signal(TAG_RAM_WIDTH)
1308 comb += ct.eq(cache_tags[r1.store_index].tag)
1309 """
1310 TODO: check this
1311 cache_tags(r1.store_index)((i + 1) * TAG_WIDTH - 1 downto i * TAG_WIDTH) <=
1312 (TAG_WIDTH - 1 downto TAG_BITS => '0') & r1.reload_tag;
1313 """
1314 comb += ct.word_select(i, TAG_WIDTH).eq(r1.reload_tag)
1315 sync += cache_tags[r1.store_index].tag.eq(ct)
1316 sync += r1.store_way.eq(replace_way)
1317 sync += r1.write_tag.eq(0)
1318
1319 # Take request from r1.req if there is one there,
1320 # else from req_op, ra, etc.
1321 with m.If(r1.full):
1322 comb += req.eq(r1.req)
1323 with m.Else():
1324 comb += req.op.eq(req_op)
1325 comb += req.valid.eq(req_go)
1326 comb += req.mmu_req.eq(r0.mmu_req)
1327 comb += req.dcbz.eq(r0.req.dcbz)
1328 comb += req.real_addr.eq(ra)
1329
1330 with m.If(r0.req.dcbz):
1331 # force data to 0 for dcbz
1332 comb += req.data.eq(0)
1333 with m.Elif(r0.d_valid):
1334 comb += req.data.eq(r0.req.data)
1335 with m.Else():
1336 comb += req.data.eq(d_in.data)
1337
1338 # Select all bytes for dcbz
1339 # and for cacheable loads
1340 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc)):
1341 comb += req.byte_sel.eq(~0) # all 1s
1342 with m.Else():
1343 comb += req.byte_sel.eq(r0.req.byte_sel)
1344 comb += req.hit_way.eq(req_hit_way)
1345 comb += req.same_tag.eq(req_same_tag)
1346
1347 # Store the incoming request from r0,
1348 # if it is a slow request
1349 # Note that r1.full = 1 implies req_op = OP_NONE
1350 with m.If((req_op == Op.OP_LOAD_MISS)
1351 | (req_op == Op.OP_LOAD_NC)
1352 | (req_op == Op.OP_STORE_MISS)
1353 | (req_op == Op.OP_STORE_HIT)):
1354 sync += r1.req.eq(req)
1355 sync += r1.full.eq(1)
1356
1357 # Main state machine
1358 with m.Switch(r1.state):
1359
1360 with m.Case(State.IDLE):
1361 sync += r1.wb.adr.eq(req.real_addr[ROW_LINE_BITS:])
1362 sync += r1.wb.sel.eq(req.byte_sel)
1363 sync += r1.wb.dat.eq(req.data)
1364 sync += r1.dcbz.eq(req.dcbz)
1365
1366 # Keep track of our index and way
1367 # for subsequent stores.
1368 sync += r1.store_index.eq(req_idx)
1369 sync += r1.store_row.eq(req_row)
1370 sync += r1.end_row_ix.eq(get_row_of_line(req_row)-1)
1371 sync += r1.reload_tag.eq(req_tag)
1372 sync += r1.req.same_tag.eq(1)
1373
1374 with m.If(req.op == Op.OP_STORE_HIT):
1375 sync += r1.store_way.eq(req.hit_way)
1376
1377 # Reset per-row valid bits,
1378 # ready for handling OP_LOAD_MISS
1379 for i in range(ROW_PER_LINE):
1380 sync += r1.rows_valid[i].eq(0)
1381
1382 with m.If(req_op != Op.OP_NONE):
1383 sync += Display("cache op %d", req.op)
1384
1385 with m.Switch(req.op):
1386 with m.Case(Op.OP_LOAD_HIT):
1387 # stay in IDLE state
1388 pass
1389
1390 with m.Case(Op.OP_LOAD_MISS):
1391 sync += Display("cache miss real addr: %x " \
1392 "idx: %x tag: %x",
1393 req.real_addr, req_row, req_tag)
1394
1395 # Start the wishbone cycle
1396 sync += r1.wb.we.eq(0)
1397 sync += r1.wb.cyc.eq(1)
1398 sync += r1.wb.stb.eq(1)
1399
1400 # Track that we had one request sent
1401 sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1402 sync += r1.write_tag.eq(1)
1403
1404 with m.Case(Op.OP_LOAD_NC):
1405 sync += r1.wb.cyc.eq(1)
1406 sync += r1.wb.stb.eq(1)
1407 sync += r1.wb.we.eq(0)
1408 sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1409
1410 with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS):
1411 with m.If(~req.dcbz):
1412 sync += r1.state.eq(State.STORE_WAIT_ACK)
1413 sync += r1.acks_pending.eq(1)
1414 sync += r1.full.eq(0)
1415 sync += r1.slow_valid.eq(1)
1416
1417 with m.If(~req.mmu_req):
1418 sync += r1.ls_valid.eq(1)
1419 with m.Else():
1420 sync += r1.mmu_done.eq(1)
1421
1422 with m.If(req.op == Op.OP_STORE_HIT):
1423 sync += r1.write_bram.eq(1)
1424 with m.Else():
1425 # dcbz is handled much like a load miss except
1426 # that we are writing to memory instead of reading
1427 sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1428
1429 with m.If(req.op == Op.OP_STORE_MISS):
1430 sync += r1.write_tag.eq(1)
1431
1432 sync += r1.wb.we.eq(1)
1433 sync += r1.wb.cyc.eq(1)
1434 sync += r1.wb.stb.eq(1)
1435
1436 # OP_NONE and OP_BAD do nothing
1437 # OP_BAD & OP_STCX_FAIL were
1438 # handled above already
1439 with m.Case(Op.OP_NONE):
1440 pass
1441 with m.Case(Op.OP_BAD):
1442 pass
1443 with m.Case(Op.OP_STCX_FAIL):
1444 pass
1445
1446 with m.Case(State.RELOAD_WAIT_ACK):
1447 ld_stbs_done = Signal()
1448 # Requests are all sent if stb is 0
1449 comb += ld_stbs_done.eq(~r1.wb.stb)
1450
1451 # If we are still sending requests, was one accepted?
1452 with m.If((~bus.stall) & r1.wb.stb):
1453 # That was the last word? We are done sending.
1454 # Clear stb and set ld_stbs_done so we can handle an
1455 # eventual last ack on the same cycle.
1456 # sigh - reconstruct wb adr with 3 extra 0s at front
1457 wb_adr = Cat(Const(0, ROW_OFF_BITS), r1.wb.adr)
1458 with m.If(is_last_row_addr(wb_adr, r1.end_row_ix)):
1459 sync += r1.wb.stb.eq(0)
1460 comb += ld_stbs_done.eq(1)
1461
1462 # Calculate the next row address in the current cache line
1463 row = Signal(LINE_OFF_BITS-ROW_OFF_BITS)
1464 comb += row.eq(r1.wb.adr)
1465 sync += r1.wb.adr[:LINE_OFF_BITS-ROW_OFF_BITS].eq(row+1)
1466
1467 # Incoming acks processing
1468 sync += r1.forward_valid1.eq(bus.ack)
1469 with m.If(bus.ack):
1470 srow = Signal(ROW_LINE_BITS)
1471 comb += srow.eq(r1.store_row)
1472 sync += r1.rows_valid[srow].eq(1)
1473
1474 # If this is the data we were looking for,
1475 # we can complete the request next cycle.
1476 # Compare the whole address in case the
1477 # request in r1.req is not the one that
1478 # started this refill.
1479 with m.If(req.valid & r1.req.same_tag &
1480 ((r1.dcbz & r1.req.dcbz) |
1481 (~r1.dcbz & (r1.req.op == Op.OP_LOAD_MISS))) &
1482 (r1.store_row == get_row(req.real_addr))):
1483 sync += r1.full.eq(0)
1484 sync += r1.slow_valid.eq(1)
1485 with m.If(~r1.mmu_req):
1486 sync += r1.ls_valid.eq(1)
1487 with m.Else():
1488 sync += r1.mmu_done.eq(1)
1489 sync += r1.forward_sel.eq(~0) # all 1s
1490 sync += r1.use_forward1.eq(1)
1491
1492 # Check for completion
1493 with m.If(ld_stbs_done & is_last_row(r1.store_row,
1494 r1.end_row_ix)):
1495 # Complete wishbone cycle
1496 sync += r1.wb.cyc.eq(0)
1497
1498 # Cache line is now valid
1499 cv = Signal(INDEX_BITS)
1500 comb += cv.eq(cache_tags[r1.store_index].valid)
1501 comb += cv.bit_select(r1.store_way, 1).eq(1)
1502 sync += cache_tags[r1.store_index].valid.eq(cv)
1503
1504 sync += r1.state.eq(State.IDLE)
1505 sync += Display("cache valid set %x "
1506 "idx %d way %d",
1507 cv, r1.store_index, r1.store_way)
1508
1509 # Increment store row counter
1510 sync += r1.store_row.eq(next_row(r1.store_row))
1511
1512 with m.Case(State.STORE_WAIT_ACK):
1513 st_stbs_done = Signal()
1514 acks = Signal(3)
1515 adjust_acks = Signal(3)
1516
1517 comb += st_stbs_done.eq(~r1.wb.stb)
1518 comb += acks.eq(r1.acks_pending)
1519
1520 with m.If(r1.inc_acks != r1.dec_acks):
1521 with m.If(r1.inc_acks):
1522 comb += adjust_acks.eq(acks + 1)
1523 with m.Else():
1524 comb += adjust_acks.eq(acks - 1)
1525 with m.Else():
1526 comb += adjust_acks.eq(acks)
1527
1528 sync += r1.acks_pending.eq(adjust_acks)
1529
1530 # Clear stb when slave accepted request
1531 with m.If(~bus.stall):
1532 # See if there is another store waiting
1533 # to be done which is in the same real page.
1534 with m.If(req.valid):
1535 _ra = req.real_addr[ROW_LINE_BITS:SET_SIZE_BITS]
1536 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(_ra)
1537 sync += r1.wb.dat.eq(req.data)
1538 sync += r1.wb.sel.eq(req.byte_sel)
1539
1540 with m.If((adjust_acks < 7) & req.same_tag &
1541 ((req.op == Op.OP_STORE_MISS)
1542 | (req.op == Op.OP_STORE_HIT))):
1543 sync += r1.wb.stb.eq(1)
1544 comb += st_stbs_done.eq(0)
1545
1546 with m.If(req.op == Op.OP_STORE_HIT):
1547 sync += r1.write_bram.eq(1)
1548 sync += r1.full.eq(0)
1549 sync += r1.slow_valid.eq(1)
1550
1551 # Store requests never come from the MMU
1552 sync += r1.ls_valid.eq(1)
1553 comb += st_stbs_done.eq(0)
1554 sync += r1.inc_acks.eq(1)
1555 with m.Else():
1556 sync += r1.wb.stb.eq(0)
1557 comb += st_stbs_done.eq(1)
1558
1559 # Got ack ? See if complete.
1560 with m.If(bus.ack):
1561 with m.If(st_stbs_done & (adjust_acks == 1)):
1562 sync += r1.state.eq(State.IDLE)
1563 sync += r1.wb.cyc.eq(0)
1564 sync += r1.wb.stb.eq(0)
1565 sync += r1.dec_acks.eq(1)
1566
1567 with m.Case(State.NC_LOAD_WAIT_ACK):
1568 # Clear stb when slave accepted request
1569 with m.If(~bus.stall):
1570 sync += r1.wb.stb.eq(0)
1571
1572 # Got ack ? complete.
1573 with m.If(bus.ack):
1574 sync += r1.state.eq(State.IDLE)
1575 sync += r1.full.eq(0)
1576 sync += r1.slow_valid.eq(1)
1577
1578 with m.If(~r1.mmu_req):
1579 sync += r1.ls_valid.eq(1)
1580 with m.Else():
1581 sync += r1.mmu_done.eq(1)
1582
1583 sync += r1.forward_sel.eq(~0) # all 1s
1584 sync += r1.use_forward1.eq(1)
1585 sync += r1.wb.cyc.eq(0)
1586 sync += r1.wb.stb.eq(0)
1587
1588 def dcache_log(self, m, r1, valid_ra, tlb_hit_way, stall_out):
1589
1590 sync = m.d.sync
1591 d_out, bus, log_out = self.d_out, self.bus, self.log_out
1592
1593 sync += log_out.eq(Cat(r1.state[:3], valid_ra, tlb_hit_way[:3],
1594 stall_out, req_op[:3], d_out.valid, d_out.error,
1595 r1.wb.cyc, r1.wb.stb, bus.ack, bus.stall,
1596 r1.real_adr[3:6]))
1597
1598 def elaborate(self, platform):
1599
1600 m = Module()
1601 comb = m.d.comb
1602 d_in = self.d_in
1603
1604 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
1605 cache_tags = CacheTagArray()
1606 cache_tag_set = Signal(TAG_RAM_WIDTH)
1607
1608 # TODO attribute ram_style : string;
1609 # TODO attribute ram_style of cache_tags : signal is "distributed";
1610
1611 """note: these are passed to nmigen.hdl.Memory as "attributes".
1612 don't know how, just that they are.
1613 """
1614 dtlb = TLBArray()
1615 # TODO attribute ram_style of
1616 # dtlb_tags : signal is "distributed";
1617 # TODO attribute ram_style of
1618 # dtlb_ptes : signal is "distributed";
1619
1620 r0 = RegStage0("r0")
1621 r0_full = Signal()
1622
1623 r1 = RegStage1("r1")
1624
1625 reservation = Reservation()
1626
1627 # Async signals on incoming request
1628 req_index = Signal(INDEX_BITS)
1629 req_row = Signal(ROW_BITS)
1630 req_hit_way = Signal(WAY_BITS)
1631 req_tag = Signal(TAG_BITS)
1632 req_op = Signal(Op)
1633 req_data = Signal(64)
1634 req_same_tag = Signal()
1635 req_go = Signal()
1636
1637 early_req_row = Signal(ROW_BITS)
1638
1639 cancel_store = Signal()
1640 set_rsrv = Signal()
1641 clear_rsrv = Signal()
1642
1643 r0_valid = Signal()
1644 r0_stall = Signal()
1645
1646 use_forward1_next = Signal()
1647 use_forward2_next = Signal()
1648
1649 cache_out_row = Signal(WB_DATA_BITS)
1650
1651 plru_victim = PLRUOut()
1652 replace_way = Signal(WAY_BITS)
1653
1654 # Wishbone read/write/cache write formatting signals
1655 bus_sel = Signal(8)
1656
1657 # TLB signals
1658 tlb_tag_way = Signal(TLB_TAG_WAY_BITS)
1659 tlb_pte_way = Signal(TLB_PTE_WAY_BITS)
1660 tlb_valid_way = Signal(TLB_NUM_WAYS)
1661 tlb_req_index = Signal(TLB_SET_BITS)
1662 tlb_hit = Signal()
1663 tlb_hit_way = Signal(TLB_WAY_BITS)
1664 pte = Signal(TLB_PTE_BITS)
1665 ra = Signal(REAL_ADDR_BITS)
1666 valid_ra = Signal()
1667 perm_attr = PermAttr("dc_perms")
1668 rc_ok = Signal()
1669 perm_ok = Signal()
1670 access_ok = Signal()
1671
1672 tlb_plru_victim = TLBPLRUOut()
1673
1674 # we don't yet handle collisions between loadstore1 requests
1675 # and MMU requests
1676 comb += self.m_out.stall.eq(0)
1677
1678 # Hold off the request in r0 when r1 has an uncompleted request
1679 comb += r0_stall.eq(r0_full & (r1.full | d_in.hold))
1680 comb += r0_valid.eq(r0_full & ~r1.full & ~d_in.hold)
1681 comb += self.stall_out.eq(r0_stall)
1682
1683
1684 # deal with litex not doing wishbone pipeline mode
1685 # XXX in wrong way. FIFOs are needed in the SRAM test
1686 # so that stb/ack match up. same thing done in icache.py
1687 comb += self.bus.stall.eq(self.bus.cyc & ~self.bus.ack)
1688
1689 # Wire up wishbone request latch out of stage 1
1690 comb += self.bus.we.eq(r1.wb.we)
1691 comb += self.bus.adr.eq(r1.wb.adr)
1692 comb += self.bus.sel.eq(r1.wb.sel)
1693 comb += self.bus.stb.eq(r1.wb.stb)
1694 comb += self.bus.dat_w.eq(r1.wb.dat)
1695 comb += self.bus.cyc.eq(r1.wb.cyc)
1696
1697 # call sub-functions putting everything together, using shared
1698 # signals established above
1699 self.stage_0(m, r0, r1, r0_full)
1700 self.tlb_read(m, r0_stall, tlb_valid_way,
1701 tlb_tag_way, tlb_pte_way, dtlb)
1702 self.tlb_search(m, tlb_req_index, r0, r0_valid,
1703 tlb_valid_way, tlb_tag_way, tlb_hit_way,
1704 tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra)
1705 self.tlb_update(m, r0_valid, r0, dtlb, tlb_req_index,
1706 tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
1707 tlb_pte_way)
1708 self.maybe_plrus(m, r1, plru_victim)
1709 self.maybe_tlb_plrus(m, r1, tlb_plru_victim)
1710 self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags)
1711 self.dcache_request(m, r0, ra, req_index, req_row, req_tag,
1712 r0_valid, r1, cache_tags, replace_way,
1713 use_forward1_next, use_forward2_next,
1714 req_hit_way, plru_victim, rc_ok, perm_attr,
1715 valid_ra, perm_ok, access_ok, req_op, req_go,
1716 tlb_pte_way,
1717 tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set,
1718 cancel_store, req_same_tag, r0_stall, early_req_row)
1719 self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv,
1720 r0_valid, r0, reservation)
1721 self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv,
1722 reservation, r0)
1723 self.writeback_control(m, r1, cache_out_row)
1724 self.rams(m, r1, early_req_row, cache_out_row, replace_way)
1725 self.dcache_fast_hit(m, req_op, r0_valid, r0, r1,
1726 req_hit_way, req_index, req_tag, access_ok,
1727 tlb_hit, tlb_hit_way, tlb_req_index)
1728 self.dcache_slow(m, r1, use_forward1_next, use_forward2_next,
1729 r0, replace_way,
1730 req_hit_way, req_same_tag,
1731 r0_valid, req_op, cache_tags, req_go, ra)
1732 #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out)
1733
1734 return m
1735
1736
1737 if __name__ == '__main__':
1738 dut = DCache()
1739 vl = rtlil.convert(dut, ports=[])
1740 with open("test_dcache.il", "w") as f:
1741 f.write(vl)