request a flush of icache to clear the instruction-fault state
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput, FetchInput
26
27 from nmigen.lib.coding import PriorityEncoder
28
29 from openpower.decoder.power_decoder import create_pdecode
30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
32 from openpower.decoder.decode2execute1 import Data
33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
34 SVP64PredMode)
35 from openpower.state import CoreState
36 from openpower.consts import (CR, SVP64CROffs)
37 from soc.experiment.testmem import TestMemory # test only for instructions
38 from soc.regfile.regfiles import StateRegs, FastRegs
39 from soc.simple.core import NonProductionCore
40 from soc.config.test.test_loadstore import TestMemPspec
41 from soc.config.ifetch import ConfigFetchUnit
42 from soc.debug.dmi import CoreDebug, DMIInterface
43 from soc.debug.jtag import JTAG
44 from soc.config.pinouts import get_pinspecs
45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
46 from soc.bus.simple_gpio import SimpleGPIO
47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
48 from soc.clock.select import ClockSelect
49 from soc.clock.dummypll import DummyPLL
50 from openpower.sv.svstate import SVSTATERec
51 from soc.experiment.icache import ICache
52
53 from nmutil.util import rising_edge
54
55
56 def get_insn(f_instr_o, pc):
57 if f_instr_o.width == 32:
58 return f_instr_o
59 else:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o.word_select(pc[2], 32)
62
63 # gets state input or reads from state regfile
64
65
66 def state_get(m, core_rst, state_i, name, regfile, regnum):
67 comb = m.d.comb
68 sync = m.d.sync
69 # read the PC
70 res = Signal(64, reset_less=True, name=name)
71 res_ok_delay = Signal(name="%s_ok_delay" % name)
72 with m.If(~core_rst):
73 sync += res_ok_delay.eq(~state_i.ok)
74 with m.If(state_i.ok):
75 # incoming override (start from pc_i)
76 comb += res.eq(state_i.data)
77 with m.Else():
78 # otherwise read StateRegs regfile for PC...
79 comb += regfile.ren.eq(1 << regnum)
80 # ... but on a 1-clock delay
81 with m.If(res_ok_delay):
82 comb += res.eq(regfile.o_data)
83 return res
84
85
86 def get_predint(m, mask, name):
87 """decode SVP64 predicate integer mask field to reg number and invert
88 this is identical to the equivalent function in ISACaller except that
89 it doesn't read the INT directly, it just decodes "what needs to be done"
90 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
91
92 * all1s is set to indicate that no mask is to be applied.
93 * regread indicates the GPR register number to be read
94 * invert is set to indicate that the register value is to be inverted
95 * unary indicates that the contents of the register is to be shifted 1<<r3
96 """
97 comb = m.d.comb
98 regread = Signal(5, name=name+"regread")
99 invert = Signal(name=name+"invert")
100 unary = Signal(name=name+"unary")
101 all1s = Signal(name=name+"all1s")
102 with m.Switch(mask):
103 with m.Case(SVP64PredInt.ALWAYS.value):
104 comb += all1s.eq(1) # use 0b1111 (all ones)
105 with m.Case(SVP64PredInt.R3_UNARY.value):
106 comb += regread.eq(3)
107 comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
108 with m.Case(SVP64PredInt.R3.value):
109 comb += regread.eq(3)
110 with m.Case(SVP64PredInt.R3_N.value):
111 comb += regread.eq(3)
112 comb += invert.eq(1)
113 with m.Case(SVP64PredInt.R10.value):
114 comb += regread.eq(10)
115 with m.Case(SVP64PredInt.R10_N.value):
116 comb += regread.eq(10)
117 comb += invert.eq(1)
118 with m.Case(SVP64PredInt.R30.value):
119 comb += regread.eq(30)
120 with m.Case(SVP64PredInt.R30_N.value):
121 comb += regread.eq(30)
122 comb += invert.eq(1)
123 return regread, invert, unary, all1s
124
125
126 def get_predcr(m, mask, name):
127 """decode SVP64 predicate CR to reg number field and invert status
128 this is identical to _get_predcr in ISACaller
129 """
130 comb = m.d.comb
131 idx = Signal(2, name=name+"idx")
132 invert = Signal(name=name+"crinvert")
133 with m.Switch(mask):
134 with m.Case(SVP64PredCR.LT.value):
135 comb += idx.eq(CR.LT)
136 comb += invert.eq(0)
137 with m.Case(SVP64PredCR.GE.value):
138 comb += idx.eq(CR.LT)
139 comb += invert.eq(1)
140 with m.Case(SVP64PredCR.GT.value):
141 comb += idx.eq(CR.GT)
142 comb += invert.eq(0)
143 with m.Case(SVP64PredCR.LE.value):
144 comb += idx.eq(CR.GT)
145 comb += invert.eq(1)
146 with m.Case(SVP64PredCR.EQ.value):
147 comb += idx.eq(CR.EQ)
148 comb += invert.eq(0)
149 with m.Case(SVP64PredCR.NE.value):
150 comb += idx.eq(CR.EQ)
151 comb += invert.eq(1)
152 with m.Case(SVP64PredCR.SO.value):
153 comb += idx.eq(CR.SO)
154 comb += invert.eq(0)
155 with m.Case(SVP64PredCR.NS.value):
156 comb += idx.eq(CR.SO)
157 comb += invert.eq(1)
158 return idx, invert
159
160
161 # Fetch Finite State Machine.
162 # WARNING: there are currently DriverConflicts but it's actually working.
163 # TODO, here: everything that is global in nature, information from the
164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
165 # not only that: TestIssuerInternal.imem can entirely move into here
166 # because imem is only ever accessed inside the FetchFSM.
167 class FetchFSM(ControlBase):
168 def __init__(self, allow_overlap, svp64_en, imem, core_rst,
169 pdecode2, cur_state,
170 dbg, core, svstate, nia, is_svp64_mode):
171 self.allow_overlap = allow_overlap
172 self.svp64_en = svp64_en
173 self.imem = imem
174 self.core_rst = core_rst
175 self.pdecode2 = pdecode2
176 self.cur_state = cur_state
177 self.dbg = dbg
178 self.core = core
179 self.svstate = svstate
180 self.nia = nia
181 self.is_svp64_mode = is_svp64_mode
182
183 # set up pipeline ControlBase and allocate i/o specs
184 # (unusual: normally done by the Pipeline API)
185 super().__init__(stage=self)
186 self.p.i_data, self.n.o_data = self.new_specs(None)
187 self.i, self.o = self.p.i_data, self.n.o_data
188
189 # next 3 functions are Stage API Compliance
190 def setup(self, m, i):
191 pass
192
193 def ispec(self):
194 return FetchInput()
195
196 def ospec(self):
197 return FetchOutput()
198
199 def elaborate(self, platform):
200 """fetch FSM
201
202 this FSM performs fetch of raw instruction data, partial-decodes
203 it 32-bit at a time to detect SVP64 prefixes, and will optionally
204 read a 2nd 32-bit quantity if that occurs.
205 """
206 m = super().elaborate(platform)
207
208 dbg = self.dbg
209 core = self.core
210 pc = self.i.pc
211 svstate = self.svstate
212 nia = self.nia
213 is_svp64_mode = self.is_svp64_mode
214 fetch_pc_o_ready = self.p.o_ready
215 fetch_pc_i_valid = self.p.i_valid
216 fetch_insn_o_valid = self.n.o_valid
217 fetch_insn_i_ready = self.n.i_ready
218
219 comb = m.d.comb
220 sync = m.d.sync
221 pdecode2 = self.pdecode2
222 cur_state = self.cur_state
223 dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
224
225 msr_read = Signal(reset=1)
226
227 # also note instruction fetch failed
228 if hasattr(core, "icache"):
229 fetch_failed = core.icache.i_out.fetch_failed
230 flush_needed = True
231 else:
232 fetch_failed = Const(0, 1)
233 flush_needed = False
234
235 # don't read msr every cycle
236 staterf = self.core.regs.rf['state']
237 state_r_msr = staterf.r_ports['msr'] # MSR rd
238
239 comb += state_r_msr.ren.eq(0)
240
241 with m.FSM(name='fetch_fsm'):
242
243 # waiting (zzz)
244 with m.State("IDLE"):
245 with m.If(~dbg.stopping_o & ~fetch_failed):
246 comb += fetch_pc_o_ready.eq(1)
247 with m.If(fetch_pc_i_valid & ~fetch_failed):
248 # instruction allowed to go: start by reading the PC
249 # capture the PC and also drop it into Insn Memory
250 # we have joined a pair of combinatorial memory
251 # lookups together. this is Generally Bad.
252 comb += self.imem.a_pc_i.eq(pc)
253 comb += self.imem.a_i_valid.eq(1)
254 comb += self.imem.f_i_valid.eq(1)
255 sync += cur_state.pc.eq(pc)
256 sync += cur_state.svstate.eq(svstate) # and svstate
257
258 # initiate read of MSR. arrives one clock later
259 comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
260 sync += msr_read.eq(0)
261
262 m.next = "INSN_READ" # move to "wait for bus" phase
263
264 # dummy pause to find out why simulation is not keeping up
265 with m.State("INSN_READ"):
266 if self.allow_overlap:
267 stopping = dbg.stopping_o
268 else:
269 stopping = Const(0)
270 with m.If(stopping):
271 # stopping: jump back to idle
272 m.next = "IDLE"
273 with m.Else():
274 # one cycle later, msr/sv read arrives. valid only once.
275 with m.If(~msr_read):
276 sync += msr_read.eq(1) # yeah don't read it again
277 sync += cur_state.msr.eq(state_r_msr.o_data)
278 with m.If(self.imem.f_busy_o & ~fetch_failed): # zzz...
279 # busy but not fetch failed: stay in wait-read
280 comb += self.imem.a_i_valid.eq(1)
281 comb += self.imem.f_i_valid.eq(1)
282 with m.Else():
283 # not busy (or fetch failed!): instruction fetched
284 # when fetch failed, the instruction gets ignored
285 # by the decoder
286 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
287 if self.svp64_en:
288 svp64 = self.svp64
289 # decode the SVP64 prefix, if any
290 comb += svp64.raw_opcode_in.eq(insn)
291 comb += svp64.bigendian.eq(self.core_bigendian_i)
292 # pass the decoded prefix (if any) to PowerDecoder2
293 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
294 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
295 # remember whether this is a prefixed instruction,
296 # so the FSM can readily loop when VL==0
297 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
298 # calculate the address of the following instruction
299 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
300 sync += nia.eq(cur_state.pc + insn_size)
301 with m.If(~svp64.is_svp64_mode):
302 # with no prefix, store the instruction
303 # and hand it directly to the next FSM
304 sync += dec_opcode_o.eq(insn)
305 m.next = "INSN_READY"
306 with m.Else():
307 # fetch the rest of the instruction from memory
308 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
309 comb += self.imem.a_i_valid.eq(1)
310 comb += self.imem.f_i_valid.eq(1)
311 m.next = "INSN_READ2"
312 else:
313 # not SVP64 - 32-bit only
314 sync += nia.eq(cur_state.pc + 4)
315 sync += dec_opcode_o.eq(insn)
316 m.next = "INSN_READY"
317
318 with m.State("INSN_READ2"):
319 with m.If(self.imem.f_busy_o): # zzz...
320 # busy: stay in wait-read
321 comb += self.imem.a_i_valid.eq(1)
322 comb += self.imem.f_i_valid.eq(1)
323 with m.Else():
324 # not busy: instruction fetched
325 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
326 sync += dec_opcode_o.eq(insn)
327 m.next = "INSN_READY"
328 # TODO: probably can start looking at pdecode2.rm_dec
329 # here or maybe even in INSN_READ state, if svp64_mode
330 # detected, in order to trigger - and wait for - the
331 # predicate reading.
332 if self.svp64_en:
333 pmode = pdecode2.rm_dec.predmode
334 """
335 if pmode != SVP64PredMode.ALWAYS.value:
336 fire predicate loading FSM and wait before
337 moving to INSN_READY
338 else:
339 sync += self.srcmask.eq(-1) # set to all 1s
340 sync += self.dstmask.eq(-1) # set to all 1s
341 m.next = "INSN_READY"
342 """
343
344 with m.State("INSN_READY"):
345 # hand over the instruction, to be decoded
346 comb += fetch_insn_o_valid.eq(1)
347 with m.If(fetch_insn_i_ready):
348 m.next = "IDLE"
349
350 # whatever was done above, over-ride it if core reset is held
351 with m.If(self.core_rst):
352 sync += nia.eq(0)
353
354 return m
355
356
357 class TestIssuerInternal(Elaboratable):
358 """TestIssuer - reads instructions from TestMemory and issues them
359
360 efficiency and speed is not the main goal here: functional correctness
361 and code clarity is. optimisations (which almost 100% interfere with
362 easy understanding) come later.
363 """
364
365 def __init__(self, pspec):
366
367 # test is SVP64 is to be enabled
368 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
369
370 # and if regfiles are reduced
371 self.regreduce_en = (hasattr(pspec, "regreduce") and
372 (pspec.regreduce == True))
373
374 # and if overlap requested
375 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
376 (pspec.allow_overlap == True))
377
378 # JTAG interface. add this right at the start because if it's
379 # added it *modifies* the pspec, by adding enable/disable signals
380 # for parts of the rest of the core
381 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
382 self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
383 # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
384 if self.jtag_en:
385 # XXX MUST keep this up-to-date with litex, and
386 # soc-cocotb-sim, and err.. all needs sorting out, argh
387 subset = ['uart',
388 'mtwi',
389 'eint', 'gpio', 'mspi0',
390 # 'mspi1', - disabled for now
391 # 'pwm', 'sd0', - disabled for now
392 'sdr']
393 self.jtag = JTAG(get_pinspecs(subset=subset),
394 domain=self.dbg_domain)
395 # add signals to pspec to enable/disable icache and dcache
396 # (or data and intstruction wishbone if icache/dcache not included)
397 # https://bugs.libre-soc.org/show_bug.cgi?id=520
398 # TODO: do we actually care if these are not domain-synchronised?
399 # honestly probably not.
400 pspec.wb_icache_en = self.jtag.wb_icache_en
401 pspec.wb_dcache_en = self.jtag.wb_dcache_en
402 self.wb_sram_en = self.jtag.wb_sram_en
403 else:
404 self.wb_sram_en = Const(1)
405
406 # add 4k sram blocks?
407 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
408 pspec.sram4x4kblock == True)
409 if self.sram4x4k:
410 self.sram4k = []
411 for i in range(4):
412 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
413 # features={'err'}
414 ))
415
416 # add interrupt controller?
417 self.xics = hasattr(pspec, "xics") and pspec.xics == True
418 if self.xics:
419 self.xics_icp = XICS_ICP()
420 self.xics_ics = XICS_ICS()
421 self.int_level_i = self.xics_ics.int_level_i
422
423 # add GPIO peripheral?
424 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
425 if self.gpio:
426 self.simple_gpio = SimpleGPIO()
427 self.gpio_o = self.simple_gpio.gpio_o
428
429 # main instruction core. suitable for prototyping / demo only
430 self.core = core = NonProductionCore(pspec)
431 self.core_rst = ResetSignal("coresync")
432
433 # instruction decoder. goes into Trap Record
434 #pdecode = create_pdecode()
435 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
436 self.pdecode2 = PowerDecode2(None, state=self.cur_state,
437 opkls=IssuerDecode2ToOperand,
438 svp64_en=self.svp64_en,
439 regreduce_en=self.regreduce_en)
440 pdecode = self.pdecode2.dec
441
442 if self.svp64_en:
443 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
444
445 # Test Instruction memory
446 if hasattr(core, "icache"):
447 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
448 # truly dreadful. needs a huge reorg.
449 pspec.icache = core.icache
450 self.imem = ConfigFetchUnit(pspec).fu
451
452 # DMI interface
453 self.dbg = CoreDebug()
454
455 # instruction go/monitor
456 self.pc_o = Signal(64, reset_less=True)
457 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
458 self.svstate_i = Data(64, "svstate_i") # ditto
459 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
460 self.busy_o = Signal(reset_less=True)
461 self.memerr_o = Signal(reset_less=True)
462
463 # STATE regfile read /write ports for PC, MSR, SVSTATE
464 staterf = self.core.regs.rf['state']
465 self.state_r_pc = staterf.r_ports['cia'] # PC rd
466 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
467 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
468 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
469
470 # DMI interface access
471 intrf = self.core.regs.rf['int']
472 crrf = self.core.regs.rf['cr']
473 xerrf = self.core.regs.rf['xer']
474 self.int_r = intrf.r_ports['dmi'] # INT read
475 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
476 self.xer_r = xerrf.r_ports['full_xer'] # XER read
477
478 if self.svp64_en:
479 # for predication
480 self.int_pred = intrf.r_ports['pred'] # INT predicate read
481 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
482
483 # hack method of keeping an eye on whether branch/trap set the PC
484 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
485 self.state_nia.wen.name = 'state_nia_wen'
486
487 # pulse to synchronize the simulator at instruction end
488 self.insn_done = Signal()
489
490 # indicate any instruction still outstanding, in execution
491 self.any_busy = Signal()
492
493 if self.svp64_en:
494 # store copies of predicate masks
495 self.srcmask = Signal(64)
496 self.dstmask = Signal(64)
497
498 def fetch_predicate_fsm(self, m,
499 pred_insn_i_valid, pred_insn_o_ready,
500 pred_mask_o_valid, pred_mask_i_ready):
501 """fetch_predicate_fsm - obtains (constructs in the case of CR)
502 src/dest predicate masks
503
504 https://bugs.libre-soc.org/show_bug.cgi?id=617
505 the predicates can be read here, by using IntRegs r_ports['pred']
506 or CRRegs r_ports['pred']. in the case of CRs it will have to
507 be done through multiple reads, extracting one relevant at a time.
508 later, a faster way would be to use the 32-bit-wide CR port but
509 this is more complex decoding, here. equivalent code used in
510 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
511
512 note: this ENTIRE FSM is not to be called when svp64 is disabled
513 """
514 comb = m.d.comb
515 sync = m.d.sync
516 pdecode2 = self.pdecode2
517 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
518 predmode = rm_dec.predmode
519 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
520 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
521 # get src/dst step, so we can skip already used mask bits
522 cur_state = self.cur_state
523 srcstep = cur_state.svstate.srcstep
524 dststep = cur_state.svstate.dststep
525 cur_vl = cur_state.svstate.vl
526
527 # decode predicates
528 sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
529 dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
530 sidx, scrinvert = get_predcr(m, srcpred, 's')
531 didx, dcrinvert = get_predcr(m, dstpred, 'd')
532
533 # store fetched masks, for either intpred or crpred
534 # when src/dst step is not zero, the skipped mask bits need to be
535 # shifted-out, before actually storing them in src/dest mask
536 new_srcmask = Signal(64, reset_less=True)
537 new_dstmask = Signal(64, reset_less=True)
538
539 with m.FSM(name="fetch_predicate"):
540
541 with m.State("FETCH_PRED_IDLE"):
542 comb += pred_insn_o_ready.eq(1)
543 with m.If(pred_insn_i_valid):
544 with m.If(predmode == SVP64PredMode.INT):
545 # skip fetching destination mask register, when zero
546 with m.If(dall1s):
547 sync += new_dstmask.eq(-1)
548 # directly go to fetch source mask register
549 # guaranteed not to be zero (otherwise predmode
550 # would be SVP64PredMode.ALWAYS, not INT)
551 comb += int_pred.addr.eq(sregread)
552 comb += int_pred.ren.eq(1)
553 m.next = "INT_SRC_READ"
554 # fetch destination predicate register
555 with m.Else():
556 comb += int_pred.addr.eq(dregread)
557 comb += int_pred.ren.eq(1)
558 m.next = "INT_DST_READ"
559 with m.Elif(predmode == SVP64PredMode.CR):
560 # go fetch masks from the CR register file
561 sync += new_srcmask.eq(0)
562 sync += new_dstmask.eq(0)
563 m.next = "CR_READ"
564 with m.Else():
565 sync += self.srcmask.eq(-1)
566 sync += self.dstmask.eq(-1)
567 m.next = "FETCH_PRED_DONE"
568
569 with m.State("INT_DST_READ"):
570 # store destination mask
571 inv = Repl(dinvert, 64)
572 with m.If(dunary):
573 # set selected mask bit for 1<<r3 mode
574 dst_shift = Signal(range(64))
575 comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
576 sync += new_dstmask.eq(1 << dst_shift)
577 with m.Else():
578 # invert mask if requested
579 sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
580 # skip fetching source mask register, when zero
581 with m.If(sall1s):
582 sync += new_srcmask.eq(-1)
583 m.next = "FETCH_PRED_SHIFT_MASK"
584 # fetch source predicate register
585 with m.Else():
586 comb += int_pred.addr.eq(sregread)
587 comb += int_pred.ren.eq(1)
588 m.next = "INT_SRC_READ"
589
590 with m.State("INT_SRC_READ"):
591 # store source mask
592 inv = Repl(sinvert, 64)
593 with m.If(sunary):
594 # set selected mask bit for 1<<r3 mode
595 src_shift = Signal(range(64))
596 comb += src_shift.eq(self.int_pred.o_data & 0b111111)
597 sync += new_srcmask.eq(1 << src_shift)
598 with m.Else():
599 # invert mask if requested
600 sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
601 m.next = "FETCH_PRED_SHIFT_MASK"
602
603 # fetch masks from the CR register file
604 # implements the following loop:
605 # idx, inv = get_predcr(mask)
606 # mask = 0
607 # for cr_idx in range(vl):
608 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
609 # if cr[idx] ^ inv:
610 # mask |= 1 << cr_idx
611 # return mask
612 with m.State("CR_READ"):
613 # CR index to be read, which will be ready by the next cycle
614 cr_idx = Signal.like(cur_vl, reset_less=True)
615 # submit the read operation to the regfile
616 with m.If(cr_idx != cur_vl):
617 # the CR read port is unary ...
618 # ren = 1 << cr_idx
619 # ... in MSB0 convention ...
620 # ren = 1 << (7 - cr_idx)
621 # ... and with an offset:
622 # ren = 1 << (7 - off - cr_idx)
623 idx = SVP64CROffs.CRPred + cr_idx
624 comb += cr_pred.ren.eq(1 << (7 - idx))
625 # signal data valid in the next cycle
626 cr_read = Signal(reset_less=True)
627 sync += cr_read.eq(1)
628 # load the next index
629 sync += cr_idx.eq(cr_idx + 1)
630 with m.Else():
631 # exit on loop end
632 sync += cr_read.eq(0)
633 sync += cr_idx.eq(0)
634 m.next = "FETCH_PRED_SHIFT_MASK"
635 with m.If(cr_read):
636 # compensate for the one cycle delay on the regfile
637 cur_cr_idx = Signal.like(cur_vl)
638 comb += cur_cr_idx.eq(cr_idx - 1)
639 # read the CR field, select the appropriate bit
640 cr_field = Signal(4)
641 scr_bit = Signal()
642 dcr_bit = Signal()
643 comb += cr_field.eq(cr_pred.o_data)
644 comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
645 ^ scrinvert)
646 comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
647 ^ dcrinvert)
648 # set the corresponding mask bit
649 bit_to_set = Signal.like(self.srcmask)
650 comb += bit_to_set.eq(1 << cur_cr_idx)
651 with m.If(scr_bit):
652 sync += new_srcmask.eq(new_srcmask | bit_to_set)
653 with m.If(dcr_bit):
654 sync += new_dstmask.eq(new_dstmask | bit_to_set)
655
656 with m.State("FETCH_PRED_SHIFT_MASK"):
657 # shift-out skipped mask bits
658 sync += self.srcmask.eq(new_srcmask >> srcstep)
659 sync += self.dstmask.eq(new_dstmask >> dststep)
660 m.next = "FETCH_PRED_DONE"
661
662 with m.State("FETCH_PRED_DONE"):
663 comb += pred_mask_o_valid.eq(1)
664 with m.If(pred_mask_i_ready):
665 m.next = "FETCH_PRED_IDLE"
666
667 def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
668 dbg, core_rst, is_svp64_mode,
669 fetch_pc_o_ready, fetch_pc_i_valid,
670 fetch_insn_o_valid, fetch_insn_i_ready,
671 pred_insn_i_valid, pred_insn_o_ready,
672 pred_mask_o_valid, pred_mask_i_ready,
673 exec_insn_i_valid, exec_insn_o_ready,
674 exec_pc_o_valid, exec_pc_i_ready):
675 """issue FSM
676
677 decode / issue FSM. this interacts with the "fetch" FSM
678 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
679 (outgoing). also interacts with the "execute" FSM
680 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
681 (incoming).
682 SVP64 RM prefixes have already been set up by the
683 "fetch" phase, so execute is fairly straightforward.
684 """
685
686 comb = m.d.comb
687 sync = m.d.sync
688 pdecode2 = self.pdecode2
689 cur_state = self.cur_state
690
691 # temporaries
692 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
693
694 # for updating svstate (things like srcstep etc.)
695 update_svstate = Signal() # set this (below) if updating
696 new_svstate = SVSTATERec("new_svstate")
697 comb += new_svstate.eq(cur_state.svstate)
698
699 # precalculate srcstep+1 and dststep+1
700 cur_srcstep = cur_state.svstate.srcstep
701 cur_dststep = cur_state.svstate.dststep
702 next_srcstep = Signal.like(cur_srcstep)
703 next_dststep = Signal.like(cur_dststep)
704 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
705 comb += next_dststep.eq(cur_state.svstate.dststep+1)
706
707 # note if an exception happened. in a pipelined or OoO design
708 # this needs to be accompanied by "shadowing" (or stalling)
709 exc_happened = self.core.o.exc_happened
710 # also note instruction fetch failed
711 if hasattr(core, "icache"):
712 fetch_failed = core.icache.i_out.fetch_failed
713 flush_needed = True
714 else:
715 fetch_failed = Const(0, 1)
716 flush_needed = False
717 # set to fault in decoder
718 # update (highest priority) instruction fault
719 rising_fetch_failed = rising_edge(m, fetch_failed)
720 with m.If(rising_fetch_failed):
721 sync += pdecode2.instr_fault.eq(1)
722
723 with m.FSM(name="issue_fsm"):
724
725 # sync with the "fetch" phase which is reading the instruction
726 # at this point, there is no instruction running, that
727 # could inadvertently update the PC.
728 with m.State("ISSUE_START"):
729 # reset instruction fault
730 sync += pdecode2.instr_fault.eq(0)
731 # wait on "core stop" release, before next fetch
732 # need to do this here, in case we are in a VL==0 loop
733 with m.If(~dbg.core_stop_o & ~core_rst):
734 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
735 with m.If(fetch_pc_o_ready): # fetch acknowledged us
736 m.next = "INSN_WAIT"
737 with m.Else():
738 # tell core it's stopped, and acknowledge debug handshake
739 comb += dbg.core_stopped_i.eq(1)
740 # while stopped, allow updating the PC and SVSTATE
741 with m.If(self.pc_i.ok):
742 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
743 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
744 sync += pc_changed.eq(1)
745 with m.If(self.svstate_i.ok):
746 comb += new_svstate.eq(self.svstate_i.data)
747 comb += update_svstate.eq(1)
748 sync += sv_changed.eq(1)
749
750 # wait for an instruction to arrive from Fetch
751 with m.State("INSN_WAIT"):
752 if self.allow_overlap:
753 stopping = dbg.stopping_o
754 else:
755 stopping = Const(0)
756 with m.If(stopping):
757 # stopping: jump back to idle
758 m.next = "ISSUE_START"
759 with m.Else():
760 comb += fetch_insn_i_ready.eq(1)
761 with m.If(fetch_insn_o_valid):
762 # loop into ISSUE_START if it's a SVP64 instruction
763 # and VL == 0. this because VL==0 is a for-loop
764 # from 0 to 0 i.e. always, always a NOP.
765 cur_vl = cur_state.svstate.vl
766 with m.If(is_svp64_mode & (cur_vl == 0)):
767 # update the PC before fetching the next instruction
768 # since we are in a VL==0 loop, no instruction was
769 # executed that we could be overwriting
770 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
771 comb += self.state_w_pc.i_data.eq(nia)
772 comb += self.insn_done.eq(1)
773 m.next = "ISSUE_START"
774 with m.Else():
775 if self.svp64_en:
776 m.next = "PRED_START" # fetching predicate
777 else:
778 m.next = "DECODE_SV" # skip predication
779
780 with m.State("PRED_START"):
781 comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start
782 with m.If(pred_insn_o_ready): # fetch_pred acknowledged us
783 m.next = "MASK_WAIT"
784
785 with m.State("MASK_WAIT"):
786 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
787 with m.If(pred_mask_o_valid): # predication masks are ready
788 m.next = "PRED_SKIP"
789
790 # skip zeros in predicate
791 with m.State("PRED_SKIP"):
792 with m.If(~is_svp64_mode):
793 m.next = "DECODE_SV" # nothing to do
794 with m.Else():
795 if self.svp64_en:
796 pred_src_zero = pdecode2.rm_dec.pred_sz
797 pred_dst_zero = pdecode2.rm_dec.pred_dz
798
799 # new srcstep, after skipping zeros
800 skip_srcstep = Signal.like(cur_srcstep)
801 # value to be added to the current srcstep
802 src_delta = Signal.like(cur_srcstep)
803 # add leading zeros to srcstep, if not in zero mode
804 with m.If(~pred_src_zero):
805 # priority encoder (count leading zeros)
806 # append guard bit, in case the mask is all zeros
807 pri_enc_src = PriorityEncoder(65)
808 m.submodules.pri_enc_src = pri_enc_src
809 comb += pri_enc_src.i.eq(Cat(self.srcmask,
810 Const(1, 1)))
811 comb += src_delta.eq(pri_enc_src.o)
812 # apply delta to srcstep
813 comb += skip_srcstep.eq(cur_srcstep + src_delta)
814 # shift-out all leading zeros from the mask
815 # plus the leading "one" bit
816 # TODO count leading zeros and shift-out the zero
817 # bits, in the same step, in hardware
818 sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
819
820 # same as above, but for dststep
821 skip_dststep = Signal.like(cur_dststep)
822 dst_delta = Signal.like(cur_dststep)
823 with m.If(~pred_dst_zero):
824 pri_enc_dst = PriorityEncoder(65)
825 m.submodules.pri_enc_dst = pri_enc_dst
826 comb += pri_enc_dst.i.eq(Cat(self.dstmask,
827 Const(1, 1)))
828 comb += dst_delta.eq(pri_enc_dst.o)
829 comb += skip_dststep.eq(cur_dststep + dst_delta)
830 sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
831
832 # TODO: initialize mask[VL]=1 to avoid passing past VL
833 with m.If((skip_srcstep >= cur_vl) |
834 (skip_dststep >= cur_vl)):
835 # end of VL loop. Update PC and reset src/dst step
836 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
837 comb += self.state_w_pc.i_data.eq(nia)
838 comb += new_svstate.srcstep.eq(0)
839 comb += new_svstate.dststep.eq(0)
840 comb += update_svstate.eq(1)
841 # synchronize with the simulator
842 comb += self.insn_done.eq(1)
843 # go back to Issue
844 m.next = "ISSUE_START"
845 with m.Else():
846 # update new src/dst step
847 comb += new_svstate.srcstep.eq(skip_srcstep)
848 comb += new_svstate.dststep.eq(skip_dststep)
849 comb += update_svstate.eq(1)
850 # proceed to Decode
851 m.next = "DECODE_SV"
852
853 # pass predicate mask bits through to satellite decoders
854 # TODO: for SIMD this will be *multiple* bits
855 sync += core.i.sv_pred_sm.eq(self.srcmask[0])
856 sync += core.i.sv_pred_dm.eq(self.dstmask[0])
857
858 # after src/dst step have been updated, we are ready
859 # to decode the instruction
860 with m.State("DECODE_SV"):
861 # decode the instruction
862 with m.If(~fetch_failed):
863 sync += pdecode2.instr_fault.eq(0)
864 sync += core.i.e.eq(pdecode2.e)
865 sync += core.i.state.eq(cur_state)
866 sync += core.i.raw_insn_i.eq(dec_opcode_i)
867 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
868 if self.svp64_en:
869 sync += core.i.sv_rm.eq(pdecode2.sv_rm)
870 # set RA_OR_ZERO detection in satellite decoders
871 sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
872 # and svp64 detection
873 sync += core.i.is_svp64_mode.eq(is_svp64_mode)
874 # and svp64 bit-rev'd ldst mode
875 ldst_dec = pdecode2.use_svp64_ldst_dec
876 sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
877 # after decoding, reset any previous exception condition,
878 # allowing it to be set again during the next execution
879 sync += pdecode2.ldst_exc.eq(0)
880
881 m.next = "INSN_EXECUTE" # move to "execute"
882
883 # handshake with execution FSM, move to "wait" once acknowledged
884 with m.State("INSN_EXECUTE"):
885 comb += exec_insn_i_valid.eq(1) # trigger execute
886 with m.If(exec_insn_o_ready): # execute acknowledged us
887 m.next = "EXECUTE_WAIT"
888
889 with m.State("EXECUTE_WAIT"):
890 # wait on "core stop" release, at instruction end
891 # need to do this here, in case we are in a VL>1 loop
892 with m.If(~dbg.core_stop_o & ~core_rst):
893 comb += exec_pc_i_ready.eq(1)
894 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
895 # the exception info needs to be blatted into
896 # pdecode.ldst_exc, and the instruction "re-run".
897 # when ldst_exc.happened is set, the PowerDecoder2
898 # reacts very differently: it re-writes the instruction
899 # with a "trap" (calls PowerDecoder2.trap()) which
900 # will *overwrite* whatever was requested and jump the
901 # PC to the exception address, as well as alter MSR.
902 # nothing else needs to be done other than to note
903 # the change of PC and MSR (and, later, SVSTATE)
904 with m.If(exc_happened):
905 mmu = core.fus.get_exc("mmu0")
906 ldst = core.fus.get_exc("ldst0")
907 with m.If(fetch_failed):
908 # instruction fetch: exception is from MMU
909 # reset instr_fault (highest priority)
910 sync += pdecode2.ldst_exc.eq(mmu)
911 sync += pdecode2.instr_fault.eq(0)
912 if flush_needed:
913 # request the icache to stop asserting "failed"
914 comb += core.icache.flush_in.eq(1)
915 with m.Else():
916 # otherwise assume it was a LDST exception
917 sync += pdecode2.ldst_exc.eq(ldst)
918
919 with m.If(exec_pc_o_valid):
920
921 # was this the last loop iteration?
922 is_last = Signal()
923 cur_vl = cur_state.svstate.vl
924 comb += is_last.eq(next_srcstep == cur_vl)
925
926 # return directly to Decode if Execute generated an
927 # exception.
928 with m.If(pdecode2.ldst_exc.happened):
929 m.next = "DECODE_SV"
930
931 # if either PC or SVSTATE were changed by the previous
932 # instruction, go directly back to Fetch, without
933 # updating either PC or SVSTATE
934 with m.Elif(pc_changed | sv_changed):
935 m.next = "ISSUE_START"
936
937 # also return to Fetch, when no output was a vector
938 # (regardless of SRCSTEP and VL), or when the last
939 # instruction was really the last one of the VL loop
940 with m.Elif((~pdecode2.loop_continue) | is_last):
941 # before going back to fetch, update the PC state
942 # register with the NIA.
943 # ok here we are not reading the branch unit.
944 # TODO: this just blithely overwrites whatever
945 # pipeline updated the PC
946 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
947 comb += self.state_w_pc.i_data.eq(nia)
948 # reset SRCSTEP before returning to Fetch
949 if self.svp64_en:
950 with m.If(pdecode2.loop_continue):
951 comb += new_svstate.srcstep.eq(0)
952 comb += new_svstate.dststep.eq(0)
953 comb += update_svstate.eq(1)
954 else:
955 comb += new_svstate.srcstep.eq(0)
956 comb += new_svstate.dststep.eq(0)
957 comb += update_svstate.eq(1)
958 m.next = "ISSUE_START"
959
960 # returning to Execute? then, first update SRCSTEP
961 with m.Else():
962 comb += new_svstate.srcstep.eq(next_srcstep)
963 comb += new_svstate.dststep.eq(next_dststep)
964 comb += update_svstate.eq(1)
965 # return to mask skip loop
966 m.next = "PRED_SKIP"
967
968 with m.Else():
969 comb += dbg.core_stopped_i.eq(1)
970 # while stopped, allow updating the PC and SVSTATE
971 with m.If(self.pc_i.ok):
972 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
973 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
974 sync += pc_changed.eq(1)
975 with m.If(self.svstate_i.ok):
976 comb += new_svstate.eq(self.svstate_i.data)
977 comb += update_svstate.eq(1)
978 sync += sv_changed.eq(1)
979
980 # check if svstate needs updating: if so, write it to State Regfile
981 with m.If(update_svstate):
982 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
983 comb += self.state_w_sv.i_data.eq(new_svstate)
984 sync += cur_state.svstate.eq(new_svstate) # for next clock
985
986 def execute_fsm(self, m, core, pc_changed, sv_changed,
987 exec_insn_i_valid, exec_insn_o_ready,
988 exec_pc_o_valid, exec_pc_i_ready):
989 """execute FSM
990
991 execute FSM. this interacts with the "issue" FSM
992 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
993 (outgoing). SVP64 RM prefixes have already been set up by the
994 "issue" phase, so execute is fairly straightforward.
995 """
996
997 comb = m.d.comb
998 sync = m.d.sync
999 pdecode2 = self.pdecode2
1000
1001 # temporaries
1002 core_busy_o = core.n.o_data.busy_o # core is busy
1003 core_ivalid_i = core.p.i_valid # instruction is valid
1004
1005 with m.FSM(name="exec_fsm"):
1006
1007 # waiting for instruction bus (stays there until not busy)
1008 with m.State("INSN_START"):
1009 comb += exec_insn_o_ready.eq(1)
1010 with m.If(exec_insn_i_valid):
1011 comb += core_ivalid_i.eq(1) # instruction is valid/issued
1012 sync += sv_changed.eq(0)
1013 sync += pc_changed.eq(0)
1014 with m.If(core.p.o_ready): # only move if accepted
1015 m.next = "INSN_ACTIVE" # move to "wait completion"
1016
1017 # instruction started: must wait till it finishes
1018 with m.State("INSN_ACTIVE"):
1019 # note changes to PC and SVSTATE
1020 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1021 sync += sv_changed.eq(1)
1022 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1023 sync += pc_changed.eq(1)
1024 with m.If(~core_busy_o): # instruction done!
1025 comb += exec_pc_o_valid.eq(1)
1026 with m.If(exec_pc_i_ready):
1027 # when finished, indicate "done".
1028 # however, if there was an exception, the instruction
1029 # is *not* yet done. this is an implementation
1030 # detail: we choose to implement exceptions by
1031 # taking the exception information from the LDST
1032 # unit, putting that *back* into the PowerDecoder2,
1033 # and *re-running the entire instruction*.
1034 # if we erroneously indicate "done" here, it is as if
1035 # there were *TWO* instructions:
1036 # 1) the failed LDST 2) a TRAP.
1037 with m.If(~pdecode2.ldst_exc.happened):
1038 comb += self.insn_done.eq(1)
1039 m.next = "INSN_START" # back to fetch
1040
1041 def setup_peripherals(self, m):
1042 comb, sync = m.d.comb, m.d.sync
1043
1044 # okaaaay so the debug module must be in coresync clock domain
1045 # but NOT its reset signal. to cope with this, set every single
1046 # submodule explicitly in coresync domain, debug and JTAG
1047 # in their own one but using *external* reset.
1048 csd = DomainRenamer("coresync")
1049 dbd = DomainRenamer(self.dbg_domain)
1050
1051 m.submodules.core = core = csd(self.core)
1052 # this _so_ needs sorting out. ICache is added down inside
1053 # LoadStore1 and is already a submodule of LoadStore1
1054 if not isinstance(self.imem, ICache):
1055 m.submodules.imem = imem = csd(self.imem)
1056 m.submodules.dbg = dbg = dbd(self.dbg)
1057 if self.jtag_en:
1058 m.submodules.jtag = jtag = dbd(self.jtag)
1059 # TODO: UART2GDB mux, here, from external pin
1060 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1061 sync += dbg.dmi.connect_to(jtag.dmi)
1062
1063 cur_state = self.cur_state
1064
1065 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
1066 if self.sram4x4k:
1067 for i, sram in enumerate(self.sram4k):
1068 m.submodules["sram4k_%d" % i] = csd(sram)
1069 comb += sram.enable.eq(self.wb_sram_en)
1070
1071 # XICS interrupt handler
1072 if self.xics:
1073 m.submodules.xics_icp = icp = csd(self.xics_icp)
1074 m.submodules.xics_ics = ics = csd(self.xics_ics)
1075 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
1076 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
1077
1078 # GPIO test peripheral
1079 if self.gpio:
1080 m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1081
1082 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1083 # XXX causes litex ECP5 test to get wrong idea about input and output
1084 # (but works with verilator sim *sigh*)
1085 # if self.gpio and self.xics:
1086 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1087
1088 # instruction decoder
1089 pdecode = create_pdecode()
1090 m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1091 if self.svp64_en:
1092 m.submodules.svp64 = svp64 = csd(self.svp64)
1093
1094 # convenience
1095 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1096 intrf = self.core.regs.rf['int']
1097
1098 # clock delay power-on reset
1099 cd_por = ClockDomain(reset_less=True)
1100 cd_sync = ClockDomain()
1101 core_sync = ClockDomain("coresync")
1102 m.domains += cd_por, cd_sync, core_sync
1103 if self.dbg_domain != "sync":
1104 dbg_sync = ClockDomain(self.dbg_domain)
1105 m.domains += dbg_sync
1106
1107 ti_rst = Signal(reset_less=True)
1108 delay = Signal(range(4), reset=3)
1109 with m.If(delay != 0):
1110 m.d.por += delay.eq(delay - 1)
1111 comb += cd_por.clk.eq(ClockSignal())
1112
1113 # power-on reset delay
1114 core_rst = ResetSignal("coresync")
1115 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1116 comb += core_rst.eq(ti_rst)
1117
1118 # debug clock is same as coresync, but reset is *main external*
1119 if self.dbg_domain != "sync":
1120 dbg_rst = ResetSignal(self.dbg_domain)
1121 comb += dbg_rst.eq(ResetSignal())
1122
1123 # busy/halted signals from core
1124 core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1125 comb += self.busy_o.eq(core_busy_o)
1126 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1127
1128 # temporary hack: says "go" immediately for both address gen and ST
1129 l0 = core.l0
1130 ldst = core.fus.fus['ldst0']
1131 st_go_edge = rising_edge(m, ldst.st.rel_o)
1132 # link addr-go direct to rel
1133 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
1134 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1135
1136 def elaborate(self, platform):
1137 m = Module()
1138 # convenience
1139 comb, sync = m.d.comb, m.d.sync
1140 cur_state = self.cur_state
1141 pdecode2 = self.pdecode2
1142 dbg = self.dbg
1143 core = self.core
1144
1145 # set up peripherals and core
1146 core_rst = self.core_rst
1147 self.setup_peripherals(m)
1148
1149 # reset current state if core reset requested
1150 with m.If(core_rst):
1151 m.d.sync += self.cur_state.eq(0)
1152
1153 # PC and instruction from I-Memory
1154 comb += self.pc_o.eq(cur_state.pc)
1155 pc_changed = Signal() # note write to PC
1156 sv_changed = Signal() # note write to SVSTATE
1157
1158 # indicate to outside world if any FU is still executing
1159 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1160
1161 # read state either from incoming override or from regfile
1162 # TODO: really should be doing MSR in the same way
1163 pc = state_get(m, core_rst, self.pc_i,
1164 "pc", # read PC
1165 self.state_r_pc, StateRegs.PC)
1166 svstate = state_get(m, core_rst, self.svstate_i,
1167 "svstate", # read SVSTATE
1168 self.state_r_sv, StateRegs.SVSTATE)
1169
1170 # don't write pc every cycle
1171 comb += self.state_w_pc.wen.eq(0)
1172 comb += self.state_w_pc.i_data.eq(0)
1173
1174 # address of the next instruction, in the absence of a branch
1175 # depends on the instruction size
1176 nia = Signal(64)
1177
1178 # connect up debug signals
1179 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1180 comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1181 comb += dbg.state.pc.eq(pc)
1182 comb += dbg.state.svstate.eq(svstate)
1183 comb += dbg.state.msr.eq(cur_state.msr)
1184
1185 # pass the prefix mode from Fetch to Issue, so the latter can loop
1186 # on VL==0
1187 is_svp64_mode = Signal()
1188
1189 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1190 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1191 # these are the handshake signals between each
1192
1193 # fetch FSM can run as soon as the PC is valid
1194 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1195 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1196
1197 # fetch FSM hands over the instruction to be decoded / issued
1198 fetch_insn_o_valid = Signal()
1199 fetch_insn_i_ready = Signal()
1200
1201 # predicate fetch FSM decodes and fetches the predicate
1202 pred_insn_i_valid = Signal()
1203 pred_insn_o_ready = Signal()
1204
1205 # predicate fetch FSM delivers the masks
1206 pred_mask_o_valid = Signal()
1207 pred_mask_i_ready = Signal()
1208
1209 # issue FSM delivers the instruction to the be executed
1210 exec_insn_i_valid = Signal()
1211 exec_insn_o_ready = Signal()
1212
1213 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1214 exec_pc_o_valid = Signal()
1215 exec_pc_i_ready = Signal()
1216
1217 # the FSMs here are perhaps unusual in that they detect conditions
1218 # then "hold" information, combinatorially, for the core
1219 # (as opposed to using sync - which would be on a clock's delay)
1220 # this includes the actual opcode, valid flags and so on.
1221
1222 # Fetch, then predicate fetch, then Issue, then Execute.
1223 # Issue is where the VL for-loop # lives. the ready/valid
1224 # signalling is used to communicate between the four.
1225
1226 # set up Fetch FSM
1227 fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1228 self.imem, core_rst, pdecode2, cur_state,
1229 dbg, core, svstate, nia, is_svp64_mode)
1230 m.submodules.fetch = fetch
1231 # connect up in/out data to existing Signals
1232 comb += fetch.p.i_data.pc.eq(pc)
1233 # and the ready/valid signalling
1234 comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1235 comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1236 comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1237 comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1238
1239 self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1240 dbg, core_rst, is_svp64_mode,
1241 fetch_pc_o_ready, fetch_pc_i_valid,
1242 fetch_insn_o_valid, fetch_insn_i_ready,
1243 pred_insn_i_valid, pred_insn_o_ready,
1244 pred_mask_o_valid, pred_mask_i_ready,
1245 exec_insn_i_valid, exec_insn_o_ready,
1246 exec_pc_o_valid, exec_pc_i_ready)
1247
1248 if self.svp64_en:
1249 self.fetch_predicate_fsm(m,
1250 pred_insn_i_valid, pred_insn_o_ready,
1251 pred_mask_o_valid, pred_mask_i_ready)
1252
1253 self.execute_fsm(m, core, pc_changed, sv_changed,
1254 exec_insn_i_valid, exec_insn_o_ready,
1255 exec_pc_o_valid, exec_pc_i_ready)
1256
1257 # this bit doesn't have to be in the FSM: connect up to read
1258 # regfiles on demand from DMI
1259 self.do_dmi(m, dbg)
1260
1261 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
1262 # (which uses that in PowerDecoder2 to raise 0x900 exception)
1263 self.tb_dec_fsm(m, cur_state.dec)
1264
1265 return m
1266
1267 def do_dmi(self, m, dbg):
1268 """deals with DMI debug requests
1269
1270 currently only provides read requests for the INT regfile, CR and XER
1271 it will later also deal with *writing* to these regfiles.
1272 """
1273 comb = m.d.comb
1274 sync = m.d.sync
1275 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1276 intrf = self.core.regs.rf['int']
1277
1278 with m.If(d_reg.req): # request for regfile access being made
1279 # TODO: error-check this
1280 # XXX should this be combinatorial? sync better?
1281 if intrf.unary:
1282 comb += self.int_r.ren.eq(1 << d_reg.addr)
1283 else:
1284 comb += self.int_r.addr.eq(d_reg.addr)
1285 comb += self.int_r.ren.eq(1)
1286 d_reg_delay = Signal()
1287 sync += d_reg_delay.eq(d_reg.req)
1288 with m.If(d_reg_delay):
1289 # data arrives one clock later
1290 comb += d_reg.data.eq(self.int_r.o_data)
1291 comb += d_reg.ack.eq(1)
1292
1293 # sigh same thing for CR debug
1294 with m.If(d_cr.req): # request for regfile access being made
1295 comb += self.cr_r.ren.eq(0b11111111) # enable all
1296 d_cr_delay = Signal()
1297 sync += d_cr_delay.eq(d_cr.req)
1298 with m.If(d_cr_delay):
1299 # data arrives one clock later
1300 comb += d_cr.data.eq(self.cr_r.o_data)
1301 comb += d_cr.ack.eq(1)
1302
1303 # aaand XER...
1304 with m.If(d_xer.req): # request for regfile access being made
1305 comb += self.xer_r.ren.eq(0b111111) # enable all
1306 d_xer_delay = Signal()
1307 sync += d_xer_delay.eq(d_xer.req)
1308 with m.If(d_xer_delay):
1309 # data arrives one clock later
1310 comb += d_xer.data.eq(self.xer_r.o_data)
1311 comb += d_xer.ack.eq(1)
1312
1313 def tb_dec_fsm(self, m, spr_dec):
1314 """tb_dec_fsm
1315
1316 this is a FSM for updating either dec or tb. it runs alternately
1317 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
1318 value to DEC, however the regfile has "passthrough" on it so this
1319 *should* be ok.
1320
1321 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1322 """
1323
1324 comb, sync = m.d.comb, m.d.sync
1325 fast_rf = self.core.regs.rf['fast']
1326 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1327 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1328
1329 with m.FSM() as fsm:
1330
1331 # initiates read of current DEC
1332 with m.State("DEC_READ"):
1333 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1334 comb += fast_r_dectb.ren.eq(1)
1335 m.next = "DEC_WRITE"
1336
1337 # waits for DEC read to arrive (1 cycle), updates with new value
1338 with m.State("DEC_WRITE"):
1339 new_dec = Signal(64)
1340 # TODO: MSR.LPCR 32-bit decrement mode
1341 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1342 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1343 comb += fast_w_dectb.wen.eq(1)
1344 comb += fast_w_dectb.i_data.eq(new_dec)
1345 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1346 m.next = "TB_READ"
1347
1348 # initiates read of current TB
1349 with m.State("TB_READ"):
1350 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1351 comb += fast_r_dectb.ren.eq(1)
1352 m.next = "TB_WRITE"
1353
1354 # waits for read TB to arrive, initiates write of current TB
1355 with m.State("TB_WRITE"):
1356 new_tb = Signal(64)
1357 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1358 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1359 comb += fast_w_dectb.wen.eq(1)
1360 comb += fast_w_dectb.i_data.eq(new_tb)
1361 m.next = "DEC_READ"
1362
1363 return m
1364
1365 def __iter__(self):
1366 yield from self.pc_i.ports()
1367 yield self.pc_o
1368 yield self.memerr_o
1369 yield from self.core.ports()
1370 yield from self.imem.ports()
1371 yield self.core_bigendian_i
1372 yield self.busy_o
1373
1374 def ports(self):
1375 return list(self)
1376
1377 def external_ports(self):
1378 ports = self.pc_i.ports()
1379 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1380 ]
1381
1382 if self.jtag_en:
1383 ports += list(self.jtag.external_ports())
1384 else:
1385 # don't add DMI if JTAG is enabled
1386 ports += list(self.dbg.dmi.ports())
1387
1388 ports += list(self.imem.ibus.fields.values())
1389 ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1390
1391 if self.sram4x4k:
1392 for sram in self.sram4k:
1393 ports += list(sram.bus.fields.values())
1394
1395 if self.xics:
1396 ports += list(self.xics_icp.bus.fields.values())
1397 ports += list(self.xics_ics.bus.fields.values())
1398 ports.append(self.int_level_i)
1399
1400 if self.gpio:
1401 ports += list(self.simple_gpio.bus.fields.values())
1402 ports.append(self.gpio_o)
1403
1404 return ports
1405
1406 def ports(self):
1407 return list(self)
1408
1409
1410 class TestIssuer(Elaboratable):
1411 def __init__(self, pspec):
1412 self.ti = TestIssuerInternal(pspec)
1413 self.pll = DummyPLL(instance=True)
1414
1415 # PLL direct clock or not
1416 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1417 if self.pll_en:
1418 self.pll_test_o = Signal(reset_less=True)
1419 self.pll_vco_o = Signal(reset_less=True)
1420 self.clk_sel_i = Signal(2, reset_less=True)
1421 self.ref_clk = ClockSignal() # can't rename it but that's ok
1422 self.pllclk_clk = ClockSignal("pllclk")
1423
1424 def elaborate(self, platform):
1425 m = Module()
1426 comb = m.d.comb
1427
1428 # TestIssuer nominally runs at main clock, actually it is
1429 # all combinatorial internally except for coresync'd components
1430 m.submodules.ti = ti = self.ti
1431
1432 if self.pll_en:
1433 # ClockSelect runs at PLL output internal clock rate
1434 m.submodules.wrappll = pll = self.pll
1435
1436 # add clock domains from PLL
1437 cd_pll = ClockDomain("pllclk")
1438 m.domains += cd_pll
1439
1440 # PLL clock established. has the side-effect of running clklsel
1441 # at the PLL's speed (see DomainRenamer("pllclk") above)
1442 pllclk = self.pllclk_clk
1443 comb += pllclk.eq(pll.clk_pll_o)
1444
1445 # wire up external 24mhz to PLL
1446 #comb += pll.clk_24_i.eq(self.ref_clk)
1447 # output 18 mhz PLL test signal, and analog oscillator out
1448 comb += self.pll_test_o.eq(pll.pll_test_o)
1449 comb += self.pll_vco_o.eq(pll.pll_vco_o)
1450
1451 # input to pll clock selection
1452 comb += pll.clk_sel_i.eq(self.clk_sel_i)
1453
1454 # now wire up ResetSignals. don't mind them being in this domain
1455 pll_rst = ResetSignal("pllclk")
1456 comb += pll_rst.eq(ResetSignal())
1457
1458 # internal clock is set to selector clock-out. has the side-effect of
1459 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1460 # debug clock runs at coresync internal clock
1461 cd_coresync = ClockDomain("coresync")
1462 #m.domains += cd_coresync
1463 if self.ti.dbg_domain != 'sync':
1464 cd_dbgsync = ClockDomain("dbgsync")
1465 #m.domains += cd_dbgsync
1466 intclk = ClockSignal("coresync")
1467 dbgclk = ClockSignal(self.ti.dbg_domain)
1468 # XXX BYPASS PLL XXX
1469 # XXX BYPASS PLL XXX
1470 # XXX BYPASS PLL XXX
1471 if self.pll_en:
1472 comb += intclk.eq(self.ref_clk)
1473 else:
1474 comb += intclk.eq(ClockSignal())
1475 if self.ti.dbg_domain != 'sync':
1476 dbgclk = ClockSignal(self.ti.dbg_domain)
1477 comb += dbgclk.eq(intclk)
1478
1479 return m
1480
1481 def ports(self):
1482 return list(self.ti.ports()) + list(self.pll.ports()) + \
1483 [ClockSignal(), ResetSignal()]
1484
1485 def external_ports(self):
1486 ports = self.ti.external_ports()
1487 ports.append(ClockSignal())
1488 ports.append(ResetSignal())
1489 if self.pll_en:
1490 ports.append(self.clk_sel_i)
1491 ports.append(self.pll.clk_24_i)
1492 ports.append(self.pll_test_o)
1493 ports.append(self.pll_vco_o)
1494 ports.append(self.pllclk_clk)
1495 ports.append(self.ref_clk)
1496 return ports
1497
1498
1499 if __name__ == '__main__':
1500 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1501 'spr': 1,
1502 'div': 1,
1503 'mul': 1,
1504 'shiftrot': 1
1505 }
1506 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1507 imem_ifacetype='bare_wb',
1508 addr_wid=48,
1509 mask_wid=8,
1510 reg_wid=64,
1511 units=units)
1512 dut = TestIssuer(pspec)
1513 vl = main(dut, ports=dut.ports(), name="test_issuer")
1514
1515 if len(sys.argv) == 1:
1516 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1517 with open("test_issuer.il", "w") as f:
1518 f.write(vl)