move alternative TestIssuerInternalInOrder to new file
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput, FetchInput
26
27 from nmigen.lib.coding import PriorityEncoder
28
29 from openpower.decoder.power_decoder import create_pdecode
30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
32 from openpower.decoder.decode2execute1 import Data
33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
34 SVP64PredMode)
35 from openpower.state import CoreState
36 from openpower.consts import (CR, SVP64CROffs)
37 from soc.experiment.testmem import TestMemory # test only for instructions
38 from soc.regfile.regfiles import StateRegs, FastRegs
39 from soc.simple.core import NonProductionCore
40 from soc.config.test.test_loadstore import TestMemPspec
41 from soc.config.ifetch import ConfigFetchUnit
42 from soc.debug.dmi import CoreDebug, DMIInterface
43 from soc.debug.jtag import JTAG
44 from soc.config.pinouts import get_pinspecs
45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
46 from soc.bus.simple_gpio import SimpleGPIO
47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
48 from soc.clock.select import ClockSelect
49 from soc.clock.dummypll import DummyPLL
50 from openpower.sv.svstate import SVSTATERec
51 from soc.experiment.icache import ICache
52
53 from nmutil.util import rising_edge
54
55
56 def get_insn(f_instr_o, pc):
57 if f_instr_o.width == 32:
58 return f_instr_o
59 else:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o.word_select(pc[2], 32)
62
63 # gets state input or reads from state regfile
64
65
66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
67 comb = m.d.comb
68 sync = m.d.sync
69 # read the {insert state variable here}
70 res_ok_delay = Signal(name="%s_ok_delay" % name)
71 with m.If(~core_rst):
72 sync += res_ok_delay.eq(~state_i.ok)
73 with m.If(state_i.ok):
74 # incoming override (start from pc_i)
75 comb += res.eq(state_i.data)
76 with m.Else():
77 # otherwise read StateRegs regfile for {insert state here}...
78 comb += regfile.ren.eq(1 << regnum)
79 # ... but on a 1-clock delay
80 with m.If(res_ok_delay):
81 comb += res.eq(regfile.o_data)
82
83
84 def get_predint(m, mask, name):
85 """decode SVP64 predicate integer mask field to reg number and invert
86 this is identical to the equivalent function in ISACaller except that
87 it doesn't read the INT directly, it just decodes "what needs to be done"
88 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
89
90 * all1s is set to indicate that no mask is to be applied.
91 * regread indicates the GPR register number to be read
92 * invert is set to indicate that the register value is to be inverted
93 * unary indicates that the contents of the register is to be shifted 1<<r3
94 """
95 comb = m.d.comb
96 regread = Signal(5, name=name+"regread")
97 invert = Signal(name=name+"invert")
98 unary = Signal(name=name+"unary")
99 all1s = Signal(name=name+"all1s")
100 with m.Switch(mask):
101 with m.Case(SVP64PredInt.ALWAYS.value):
102 comb += all1s.eq(1) # use 0b1111 (all ones)
103 with m.Case(SVP64PredInt.R3_UNARY.value):
104 comb += regread.eq(3)
105 comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
106 with m.Case(SVP64PredInt.R3.value):
107 comb += regread.eq(3)
108 with m.Case(SVP64PredInt.R3_N.value):
109 comb += regread.eq(3)
110 comb += invert.eq(1)
111 with m.Case(SVP64PredInt.R10.value):
112 comb += regread.eq(10)
113 with m.Case(SVP64PredInt.R10_N.value):
114 comb += regread.eq(10)
115 comb += invert.eq(1)
116 with m.Case(SVP64PredInt.R30.value):
117 comb += regread.eq(30)
118 with m.Case(SVP64PredInt.R30_N.value):
119 comb += regread.eq(30)
120 comb += invert.eq(1)
121 return regread, invert, unary, all1s
122
123
124 def get_predcr(m, mask, name):
125 """decode SVP64 predicate CR to reg number field and invert status
126 this is identical to _get_predcr in ISACaller
127 """
128 comb = m.d.comb
129 idx = Signal(2, name=name+"idx")
130 invert = Signal(name=name+"crinvert")
131 with m.Switch(mask):
132 with m.Case(SVP64PredCR.LT.value):
133 comb += idx.eq(CR.LT)
134 comb += invert.eq(0)
135 with m.Case(SVP64PredCR.GE.value):
136 comb += idx.eq(CR.LT)
137 comb += invert.eq(1)
138 with m.Case(SVP64PredCR.GT.value):
139 comb += idx.eq(CR.GT)
140 comb += invert.eq(0)
141 with m.Case(SVP64PredCR.LE.value):
142 comb += idx.eq(CR.GT)
143 comb += invert.eq(1)
144 with m.Case(SVP64PredCR.EQ.value):
145 comb += idx.eq(CR.EQ)
146 comb += invert.eq(0)
147 with m.Case(SVP64PredCR.NE.value):
148 comb += idx.eq(CR.EQ)
149 comb += invert.eq(1)
150 with m.Case(SVP64PredCR.SO.value):
151 comb += idx.eq(CR.SO)
152 comb += invert.eq(0)
153 with m.Case(SVP64PredCR.NS.value):
154 comb += idx.eq(CR.SO)
155 comb += invert.eq(1)
156 return idx, invert
157
158
159 class TestIssuerBase(Elaboratable):
160 """TestIssuerBase - common base class for Issuers
161
162 takes care of power-on reset, peripherals, debug, DEC/TB,
163 and gets PC/MSR/SVSTATE from the State Regfile etc.
164 """
165
166 def __init__(self, pspec):
167
168 # test is SVP64 is to be enabled
169 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
170
171 # and if regfiles are reduced
172 self.regreduce_en = (hasattr(pspec, "regreduce") and
173 (pspec.regreduce == True))
174
175 # and if overlap requested
176 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
177 (pspec.allow_overlap == True))
178
179 # JTAG interface. add this right at the start because if it's
180 # added it *modifies* the pspec, by adding enable/disable signals
181 # for parts of the rest of the core
182 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
183 self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
184 # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
185 if self.jtag_en:
186 # XXX MUST keep this up-to-date with litex, and
187 # soc-cocotb-sim, and err.. all needs sorting out, argh
188 subset = ['uart',
189 'mtwi',
190 'eint', 'gpio', 'mspi0',
191 # 'mspi1', - disabled for now
192 # 'pwm', 'sd0', - disabled for now
193 'sdr']
194 self.jtag = JTAG(get_pinspecs(subset=subset),
195 domain=self.dbg_domain)
196 # add signals to pspec to enable/disable icache and dcache
197 # (or data and intstruction wishbone if icache/dcache not included)
198 # https://bugs.libre-soc.org/show_bug.cgi?id=520
199 # TODO: do we actually care if these are not domain-synchronised?
200 # honestly probably not.
201 pspec.wb_icache_en = self.jtag.wb_icache_en
202 pspec.wb_dcache_en = self.jtag.wb_dcache_en
203 self.wb_sram_en = self.jtag.wb_sram_en
204 else:
205 self.wb_sram_en = Const(1)
206
207 # add 4k sram blocks?
208 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
209 pspec.sram4x4kblock == True)
210 if self.sram4x4k:
211 self.sram4k = []
212 for i in range(4):
213 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
214 # features={'err'}
215 ))
216
217 # add interrupt controller?
218 self.xics = hasattr(pspec, "xics") and pspec.xics == True
219 if self.xics:
220 self.xics_icp = XICS_ICP()
221 self.xics_ics = XICS_ICS()
222 self.int_level_i = self.xics_ics.int_level_i
223
224 # add GPIO peripheral?
225 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
226 if self.gpio:
227 self.simple_gpio = SimpleGPIO()
228 self.gpio_o = self.simple_gpio.gpio_o
229
230 # main instruction core. suitable for prototyping / demo only
231 self.core = core = NonProductionCore(pspec)
232 self.core_rst = ResetSignal("coresync")
233
234 # instruction decoder. goes into Trap Record
235 #pdecode = create_pdecode()
236 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
237 self.pdecode2 = PowerDecode2(None, state=self.cur_state,
238 opkls=IssuerDecode2ToOperand,
239 svp64_en=self.svp64_en,
240 regreduce_en=self.regreduce_en)
241 pdecode = self.pdecode2.dec
242
243 if self.svp64_en:
244 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
245
246 # Test Instruction memory
247 if hasattr(core, "icache"):
248 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
249 # truly dreadful. needs a huge reorg.
250 pspec.icache = core.icache
251 self.imem = ConfigFetchUnit(pspec).fu
252
253 # DMI interface
254 self.dbg = CoreDebug()
255
256 # instruction go/monitor
257 self.pc_o = Signal(64, reset_less=True)
258 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
259 self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
260 self.svstate_i = Data(64, "svstate_i") # ditto
261 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
262 self.busy_o = Signal(reset_less=True)
263 self.memerr_o = Signal(reset_less=True)
264
265 # STATE regfile read /write ports for PC, MSR, SVSTATE
266 staterf = self.core.regs.rf['state']
267 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
268 self.state_r_pc = staterf.r_ports['cia'] # PC rd
269 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
270
271 self.state_w_msr = staterf.w_ports['msr'] # MSR wr
272 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
273 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
274
275 # DMI interface access
276 intrf = self.core.regs.rf['int']
277 crrf = self.core.regs.rf['cr']
278 xerrf = self.core.regs.rf['xer']
279 self.int_r = intrf.r_ports['dmi'] # INT read
280 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
281 self.xer_r = xerrf.r_ports['full_xer'] # XER read
282
283 if self.svp64_en:
284 # for predication
285 self.int_pred = intrf.r_ports['pred'] # INT predicate read
286 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
287
288 # hack method of keeping an eye on whether branch/trap set the PC
289 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
290 self.state_nia.wen.name = 'state_nia_wen'
291
292 # pulse to synchronize the simulator at instruction end
293 self.insn_done = Signal()
294
295 # indicate any instruction still outstanding, in execution
296 self.any_busy = Signal()
297
298 if self.svp64_en:
299 # store copies of predicate masks
300 self.srcmask = Signal(64)
301 self.dstmask = Signal(64)
302
303 def setup_peripherals(self, m):
304 comb, sync = m.d.comb, m.d.sync
305
306 # okaaaay so the debug module must be in coresync clock domain
307 # but NOT its reset signal. to cope with this, set every single
308 # submodule explicitly in coresync domain, debug and JTAG
309 # in their own one but using *external* reset.
310 csd = DomainRenamer("coresync")
311 dbd = DomainRenamer(self.dbg_domain)
312
313 m.submodules.core = core = csd(self.core)
314 # this _so_ needs sorting out. ICache is added down inside
315 # LoadStore1 and is already a submodule of LoadStore1
316 if not isinstance(self.imem, ICache):
317 m.submodules.imem = imem = csd(self.imem)
318 m.submodules.dbg = dbg = dbd(self.dbg)
319 if self.jtag_en:
320 m.submodules.jtag = jtag = dbd(self.jtag)
321 # TODO: UART2GDB mux, here, from external pin
322 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
323 sync += dbg.dmi.connect_to(jtag.dmi)
324
325 cur_state = self.cur_state
326
327 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
328 if self.sram4x4k:
329 for i, sram in enumerate(self.sram4k):
330 m.submodules["sram4k_%d" % i] = csd(sram)
331 comb += sram.enable.eq(self.wb_sram_en)
332
333 # XICS interrupt handler
334 if self.xics:
335 m.submodules.xics_icp = icp = csd(self.xics_icp)
336 m.submodules.xics_ics = ics = csd(self.xics_ics)
337 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
338 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
339
340 # GPIO test peripheral
341 if self.gpio:
342 m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
343
344 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
345 # XXX causes litex ECP5 test to get wrong idea about input and output
346 # (but works with verilator sim *sigh*)
347 # if self.gpio and self.xics:
348 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
349
350 # instruction decoder
351 pdecode = create_pdecode()
352 m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
353 if self.svp64_en:
354 m.submodules.svp64 = svp64 = csd(self.svp64)
355
356 # convenience
357 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
358 intrf = self.core.regs.rf['int']
359
360 # clock delay power-on reset
361 cd_por = ClockDomain(reset_less=True)
362 cd_sync = ClockDomain()
363 core_sync = ClockDomain("coresync")
364 m.domains += cd_por, cd_sync, core_sync
365 if self.dbg_domain != "sync":
366 dbg_sync = ClockDomain(self.dbg_domain)
367 m.domains += dbg_sync
368
369 ti_rst = Signal(reset_less=True)
370 delay = Signal(range(4), reset=3)
371 with m.If(delay != 0):
372 m.d.por += delay.eq(delay - 1)
373 comb += cd_por.clk.eq(ClockSignal())
374
375 # power-on reset delay
376 core_rst = ResetSignal("coresync")
377 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
378 comb += core_rst.eq(ti_rst)
379
380 # debug clock is same as coresync, but reset is *main external*
381 if self.dbg_domain != "sync":
382 dbg_rst = ResetSignal(self.dbg_domain)
383 comb += dbg_rst.eq(ResetSignal())
384
385 # busy/halted signals from core
386 core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
387 comb += self.busy_o.eq(core_busy_o)
388 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
389
390 # temporary hack: says "go" immediately for both address gen and ST
391 l0 = core.l0
392 ldst = core.fus.fus['ldst0']
393 st_go_edge = rising_edge(m, ldst.st.rel_o)
394 # link addr-go direct to rel
395 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
396 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
397
398 def do_dmi(self, m, dbg):
399 """deals with DMI debug requests
400
401 currently only provides read requests for the INT regfile, CR and XER
402 it will later also deal with *writing* to these regfiles.
403 """
404 comb = m.d.comb
405 sync = m.d.sync
406 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
407 intrf = self.core.regs.rf['int']
408
409 with m.If(d_reg.req): # request for regfile access being made
410 # TODO: error-check this
411 # XXX should this be combinatorial? sync better?
412 if intrf.unary:
413 comb += self.int_r.ren.eq(1 << d_reg.addr)
414 else:
415 comb += self.int_r.addr.eq(d_reg.addr)
416 comb += self.int_r.ren.eq(1)
417 d_reg_delay = Signal()
418 sync += d_reg_delay.eq(d_reg.req)
419 with m.If(d_reg_delay):
420 # data arrives one clock later
421 comb += d_reg.data.eq(self.int_r.o_data)
422 comb += d_reg.ack.eq(1)
423
424 # sigh same thing for CR debug
425 with m.If(d_cr.req): # request for regfile access being made
426 comb += self.cr_r.ren.eq(0b11111111) # enable all
427 d_cr_delay = Signal()
428 sync += d_cr_delay.eq(d_cr.req)
429 with m.If(d_cr_delay):
430 # data arrives one clock later
431 comb += d_cr.data.eq(self.cr_r.o_data)
432 comb += d_cr.ack.eq(1)
433
434 # aaand XER...
435 with m.If(d_xer.req): # request for regfile access being made
436 comb += self.xer_r.ren.eq(0b111111) # enable all
437 d_xer_delay = Signal()
438 sync += d_xer_delay.eq(d_xer.req)
439 with m.If(d_xer_delay):
440 # data arrives one clock later
441 comb += d_xer.data.eq(self.xer_r.o_data)
442 comb += d_xer.ack.eq(1)
443
444 def tb_dec_fsm(self, m, spr_dec):
445 """tb_dec_fsm
446
447 this is a FSM for updating either dec or tb. it runs alternately
448 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
449 value to DEC, however the regfile has "passthrough" on it so this
450 *should* be ok.
451
452 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
453 """
454
455 comb, sync = m.d.comb, m.d.sync
456 fast_rf = self.core.regs.rf['fast']
457 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
458 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
459
460 with m.FSM() as fsm:
461
462 # initiates read of current DEC
463 with m.State("DEC_READ"):
464 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
465 comb += fast_r_dectb.ren.eq(1)
466 m.next = "DEC_WRITE"
467
468 # waits for DEC read to arrive (1 cycle), updates with new value
469 with m.State("DEC_WRITE"):
470 new_dec = Signal(64)
471 # TODO: MSR.LPCR 32-bit decrement mode
472 comb += new_dec.eq(fast_r_dectb.o_data - 1)
473 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
474 comb += fast_w_dectb.wen.eq(1)
475 comb += fast_w_dectb.i_data.eq(new_dec)
476 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
477 m.next = "TB_READ"
478
479 # initiates read of current TB
480 with m.State("TB_READ"):
481 comb += fast_r_dectb.addr.eq(FastRegs.TB)
482 comb += fast_r_dectb.ren.eq(1)
483 m.next = "TB_WRITE"
484
485 # waits for read TB to arrive, initiates write of current TB
486 with m.State("TB_WRITE"):
487 new_tb = Signal(64)
488 comb += new_tb.eq(fast_r_dectb.o_data + 1)
489 comb += fast_w_dectb.addr.eq(FastRegs.TB)
490 comb += fast_w_dectb.wen.eq(1)
491 comb += fast_w_dectb.i_data.eq(new_tb)
492 m.next = "DEC_READ"
493
494 return m
495
496 def elaborate(self, platform):
497 m = Module()
498 # convenience
499 comb, sync = m.d.comb, m.d.sync
500 cur_state = self.cur_state
501 pdecode2 = self.pdecode2
502 dbg = self.dbg
503
504 # set up peripherals and core
505 core_rst = self.core_rst
506 self.setup_peripherals(m)
507
508 # reset current state if core reset requested
509 with m.If(core_rst):
510 m.d.sync += self.cur_state.eq(0)
511
512 # PC and instruction from I-Memory
513 comb += self.pc_o.eq(cur_state.pc)
514 self.pc_changed = Signal() # note write to PC
515 self.msr_changed = Signal() # note write to MSR
516 self.sv_changed = Signal() # note write to SVSTATE
517
518 # read state either from incoming override or from regfile
519 state = CoreState("get") # current state (MSR/PC/SVSTATE)
520 state_get(m, state.msr, core_rst, self.msr_i,
521 "msr", # read MSR
522 self.state_r_msr, StateRegs.MSR)
523 state_get(m, state.pc, core_rst, self.pc_i,
524 "pc", # read PC
525 self.state_r_pc, StateRegs.PC)
526 state_get(m, state.svstate, core_rst, self.svstate_i,
527 "svstate", # read SVSTATE
528 self.state_r_sv, StateRegs.SVSTATE)
529
530 # don't write pc every cycle
531 comb += self.state_w_pc.wen.eq(0)
532 comb += self.state_w_pc.i_data.eq(0)
533
534 # connect up debug state. note "combinatorially same" below,
535 # this is a bit naff, passing state over in the dbg class, but
536 # because it is combinatorial it achieves the desired goal
537 comb += dbg.state.eq(state)
538
539 # this bit doesn't have to be in the FSM: connect up to read
540 # regfiles on demand from DMI
541 self.do_dmi(m, dbg)
542
543 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
544 # (which uses that in PowerDecoder2 to raise 0x900 exception)
545 self.tb_dec_fsm(m, cur_state.dec)
546
547 return m
548
549 def __iter__(self):
550 yield from self.pc_i.ports()
551 yield from self.msr_i.ports()
552 yield self.pc_o
553 yield self.memerr_o
554 yield from self.core.ports()
555 yield from self.imem.ports()
556 yield self.core_bigendian_i
557 yield self.busy_o
558
559 def ports(self):
560 return list(self)
561
562 def external_ports(self):
563 ports = self.pc_i.ports()
564 ports = self.msr_i.ports()
565 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
566 ]
567
568 if self.jtag_en:
569 ports += list(self.jtag.external_ports())
570 else:
571 # don't add DMI if JTAG is enabled
572 ports += list(self.dbg.dmi.ports())
573
574 ports += list(self.imem.ibus.fields.values())
575 ports += list(self.core.l0.cmpi.wb_bus().fields.values())
576
577 if self.sram4x4k:
578 for sram in self.sram4k:
579 ports += list(sram.bus.fields.values())
580
581 if self.xics:
582 ports += list(self.xics_icp.bus.fields.values())
583 ports += list(self.xics_ics.bus.fields.values())
584 ports.append(self.int_level_i)
585
586 if self.gpio:
587 ports += list(self.simple_gpio.bus.fields.values())
588 ports.append(self.gpio_o)
589
590 return ports
591
592 def ports(self):
593 return list(self)
594
595
596
597 # Fetch Finite State Machine.
598 # WARNING: there are currently DriverConflicts but it's actually working.
599 # TODO, here: everything that is global in nature, information from the
600 # main TestIssuerInternal, needs to move to either ispec() or ospec().
601 # not only that: TestIssuerInternal.imem can entirely move into here
602 # because imem is only ever accessed inside the FetchFSM.
603 class FetchFSM(ControlBase):
604 def __init__(self, allow_overlap, svp64_en, imem, core_rst,
605 pdecode2, cur_state,
606 dbg, core, svstate, nia, is_svp64_mode):
607 self.allow_overlap = allow_overlap
608 self.svp64_en = svp64_en
609 self.imem = imem
610 self.core_rst = core_rst
611 self.pdecode2 = pdecode2
612 self.cur_state = cur_state
613 self.dbg = dbg
614 self.core = core
615 self.svstate = svstate
616 self.nia = nia
617 self.is_svp64_mode = is_svp64_mode
618
619 # set up pipeline ControlBase and allocate i/o specs
620 # (unusual: normally done by the Pipeline API)
621 super().__init__(stage=self)
622 self.p.i_data, self.n.o_data = self.new_specs(None)
623 self.i, self.o = self.p.i_data, self.n.o_data
624
625 # next 3 functions are Stage API Compliance
626 def setup(self, m, i):
627 pass
628
629 def ispec(self):
630 return FetchInput()
631
632 def ospec(self):
633 return FetchOutput()
634
635 def elaborate(self, platform):
636 """fetch FSM
637
638 this FSM performs fetch of raw instruction data, partial-decodes
639 it 32-bit at a time to detect SVP64 prefixes, and will optionally
640 read a 2nd 32-bit quantity if that occurs.
641 """
642 m = super().elaborate(platform)
643
644 dbg = self.dbg
645 core = self.core
646 pc = self.i.pc
647 msr = self.i.msr
648 svstate = self.svstate
649 nia = self.nia
650 is_svp64_mode = self.is_svp64_mode
651 fetch_pc_o_ready = self.p.o_ready
652 fetch_pc_i_valid = self.p.i_valid
653 fetch_insn_o_valid = self.n.o_valid
654 fetch_insn_i_ready = self.n.i_ready
655
656 comb = m.d.comb
657 sync = m.d.sync
658 pdecode2 = self.pdecode2
659 cur_state = self.cur_state
660 dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
661
662 # also note instruction fetch failed
663 if hasattr(core, "icache"):
664 fetch_failed = core.icache.i_out.fetch_failed
665 flush_needed = True
666 else:
667 fetch_failed = Const(0, 1)
668 flush_needed = False
669
670 with m.FSM(name='fetch_fsm'):
671
672 # waiting (zzz)
673 with m.State("IDLE"):
674 with m.If(~dbg.stopping_o & ~fetch_failed):
675 comb += fetch_pc_o_ready.eq(1)
676 with m.If(fetch_pc_i_valid & ~fetch_failed):
677 # instruction allowed to go: start by reading the PC
678 # capture the PC and also drop it into Insn Memory
679 # we have joined a pair of combinatorial memory
680 # lookups together. this is Generally Bad.
681 comb += self.imem.a_pc_i.eq(pc)
682 comb += self.imem.a_i_valid.eq(1)
683 comb += self.imem.f_i_valid.eq(1)
684 sync += cur_state.pc.eq(pc)
685 sync += cur_state.svstate.eq(svstate) # and svstate
686 sync += cur_state.msr.eq(msr) # and msr
687
688 m.next = "INSN_READ" # move to "wait for bus" phase
689
690 # dummy pause to find out why simulation is not keeping up
691 with m.State("INSN_READ"):
692 if self.allow_overlap:
693 stopping = dbg.stopping_o
694 else:
695 stopping = Const(0)
696 with m.If(stopping):
697 # stopping: jump back to idle
698 m.next = "IDLE"
699 with m.Else():
700 with m.If(self.imem.f_busy_o & ~fetch_failed): # zzz...
701 # busy but not fetch failed: stay in wait-read
702 comb += self.imem.a_i_valid.eq(1)
703 comb += self.imem.f_i_valid.eq(1)
704 with m.Else():
705 # not busy (or fetch failed!): instruction fetched
706 # when fetch failed, the instruction gets ignored
707 # by the decoder
708 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
709 if self.svp64_en:
710 svp64 = self.svp64
711 # decode the SVP64 prefix, if any
712 comb += svp64.raw_opcode_in.eq(insn)
713 comb += svp64.bigendian.eq(self.core_bigendian_i)
714 # pass the decoded prefix (if any) to PowerDecoder2
715 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
716 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
717 # remember whether this is a prefixed instruction,
718 # so the FSM can readily loop when VL==0
719 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
720 # calculate the address of the following instruction
721 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
722 sync += nia.eq(cur_state.pc + insn_size)
723 with m.If(~svp64.is_svp64_mode):
724 # with no prefix, store the instruction
725 # and hand it directly to the next FSM
726 sync += dec_opcode_o.eq(insn)
727 m.next = "INSN_READY"
728 with m.Else():
729 # fetch the rest of the instruction from memory
730 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
731 comb += self.imem.a_i_valid.eq(1)
732 comb += self.imem.f_i_valid.eq(1)
733 m.next = "INSN_READ2"
734 else:
735 # not SVP64 - 32-bit only
736 sync += nia.eq(cur_state.pc + 4)
737 sync += dec_opcode_o.eq(insn)
738 m.next = "INSN_READY"
739
740 with m.State("INSN_READ2"):
741 with m.If(self.imem.f_busy_o): # zzz...
742 # busy: stay in wait-read
743 comb += self.imem.a_i_valid.eq(1)
744 comb += self.imem.f_i_valid.eq(1)
745 with m.Else():
746 # not busy: instruction fetched
747 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
748 sync += dec_opcode_o.eq(insn)
749 m.next = "INSN_READY"
750 # TODO: probably can start looking at pdecode2.rm_dec
751 # here or maybe even in INSN_READ state, if svp64_mode
752 # detected, in order to trigger - and wait for - the
753 # predicate reading.
754 if self.svp64_en:
755 pmode = pdecode2.rm_dec.predmode
756 """
757 if pmode != SVP64PredMode.ALWAYS.value:
758 fire predicate loading FSM and wait before
759 moving to INSN_READY
760 else:
761 sync += self.srcmask.eq(-1) # set to all 1s
762 sync += self.dstmask.eq(-1) # set to all 1s
763 m.next = "INSN_READY"
764 """
765
766 with m.State("INSN_READY"):
767 # hand over the instruction, to be decoded
768 comb += fetch_insn_o_valid.eq(1)
769 with m.If(fetch_insn_i_ready):
770 m.next = "IDLE"
771
772 # whatever was done above, over-ride it if core reset is held
773 with m.If(self.core_rst):
774 sync += nia.eq(0)
775
776 return m
777
778
779 class TestIssuerInternal(TestIssuerBase):
780 """TestIssuer - reads instructions from TestMemory and issues them
781
782 efficiency and speed is not the main goal here: functional correctness
783 and code clarity is. optimisations (which almost 100% interfere with
784 easy understanding) come later.
785 """
786
787 def fetch_predicate_fsm(self, m,
788 pred_insn_i_valid, pred_insn_o_ready,
789 pred_mask_o_valid, pred_mask_i_ready):
790 """fetch_predicate_fsm - obtains (constructs in the case of CR)
791 src/dest predicate masks
792
793 https://bugs.libre-soc.org/show_bug.cgi?id=617
794 the predicates can be read here, by using IntRegs r_ports['pred']
795 or CRRegs r_ports['pred']. in the case of CRs it will have to
796 be done through multiple reads, extracting one relevant at a time.
797 later, a faster way would be to use the 32-bit-wide CR port but
798 this is more complex decoding, here. equivalent code used in
799 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
800
801 note: this ENTIRE FSM is not to be called when svp64 is disabled
802 """
803 comb = m.d.comb
804 sync = m.d.sync
805 pdecode2 = self.pdecode2
806 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
807 predmode = rm_dec.predmode
808 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
809 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
810 # get src/dst step, so we can skip already used mask bits
811 cur_state = self.cur_state
812 srcstep = cur_state.svstate.srcstep
813 dststep = cur_state.svstate.dststep
814 cur_vl = cur_state.svstate.vl
815
816 # decode predicates
817 sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
818 dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
819 sidx, scrinvert = get_predcr(m, srcpred, 's')
820 didx, dcrinvert = get_predcr(m, dstpred, 'd')
821
822 # store fetched masks, for either intpred or crpred
823 # when src/dst step is not zero, the skipped mask bits need to be
824 # shifted-out, before actually storing them in src/dest mask
825 new_srcmask = Signal(64, reset_less=True)
826 new_dstmask = Signal(64, reset_less=True)
827
828 with m.FSM(name="fetch_predicate"):
829
830 with m.State("FETCH_PRED_IDLE"):
831 comb += pred_insn_o_ready.eq(1)
832 with m.If(pred_insn_i_valid):
833 with m.If(predmode == SVP64PredMode.INT):
834 # skip fetching destination mask register, when zero
835 with m.If(dall1s):
836 sync += new_dstmask.eq(-1)
837 # directly go to fetch source mask register
838 # guaranteed not to be zero (otherwise predmode
839 # would be SVP64PredMode.ALWAYS, not INT)
840 comb += int_pred.addr.eq(sregread)
841 comb += int_pred.ren.eq(1)
842 m.next = "INT_SRC_READ"
843 # fetch destination predicate register
844 with m.Else():
845 comb += int_pred.addr.eq(dregread)
846 comb += int_pred.ren.eq(1)
847 m.next = "INT_DST_READ"
848 with m.Elif(predmode == SVP64PredMode.CR):
849 # go fetch masks from the CR register file
850 sync += new_srcmask.eq(0)
851 sync += new_dstmask.eq(0)
852 m.next = "CR_READ"
853 with m.Else():
854 sync += self.srcmask.eq(-1)
855 sync += self.dstmask.eq(-1)
856 m.next = "FETCH_PRED_DONE"
857
858 with m.State("INT_DST_READ"):
859 # store destination mask
860 inv = Repl(dinvert, 64)
861 with m.If(dunary):
862 # set selected mask bit for 1<<r3 mode
863 dst_shift = Signal(range(64))
864 comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
865 sync += new_dstmask.eq(1 << dst_shift)
866 with m.Else():
867 # invert mask if requested
868 sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
869 # skip fetching source mask register, when zero
870 with m.If(sall1s):
871 sync += new_srcmask.eq(-1)
872 m.next = "FETCH_PRED_SHIFT_MASK"
873 # fetch source predicate register
874 with m.Else():
875 comb += int_pred.addr.eq(sregread)
876 comb += int_pred.ren.eq(1)
877 m.next = "INT_SRC_READ"
878
879 with m.State("INT_SRC_READ"):
880 # store source mask
881 inv = Repl(sinvert, 64)
882 with m.If(sunary):
883 # set selected mask bit for 1<<r3 mode
884 src_shift = Signal(range(64))
885 comb += src_shift.eq(self.int_pred.o_data & 0b111111)
886 sync += new_srcmask.eq(1 << src_shift)
887 with m.Else():
888 # invert mask if requested
889 sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
890 m.next = "FETCH_PRED_SHIFT_MASK"
891
892 # fetch masks from the CR register file
893 # implements the following loop:
894 # idx, inv = get_predcr(mask)
895 # mask = 0
896 # for cr_idx in range(vl):
897 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
898 # if cr[idx] ^ inv:
899 # mask |= 1 << cr_idx
900 # return mask
901 with m.State("CR_READ"):
902 # CR index to be read, which will be ready by the next cycle
903 cr_idx = Signal.like(cur_vl, reset_less=True)
904 # submit the read operation to the regfile
905 with m.If(cr_idx != cur_vl):
906 # the CR read port is unary ...
907 # ren = 1 << cr_idx
908 # ... in MSB0 convention ...
909 # ren = 1 << (7 - cr_idx)
910 # ... and with an offset:
911 # ren = 1 << (7 - off - cr_idx)
912 idx = SVP64CROffs.CRPred + cr_idx
913 comb += cr_pred.ren.eq(1 << (7 - idx))
914 # signal data valid in the next cycle
915 cr_read = Signal(reset_less=True)
916 sync += cr_read.eq(1)
917 # load the next index
918 sync += cr_idx.eq(cr_idx + 1)
919 with m.Else():
920 # exit on loop end
921 sync += cr_read.eq(0)
922 sync += cr_idx.eq(0)
923 m.next = "FETCH_PRED_SHIFT_MASK"
924 with m.If(cr_read):
925 # compensate for the one cycle delay on the regfile
926 cur_cr_idx = Signal.like(cur_vl)
927 comb += cur_cr_idx.eq(cr_idx - 1)
928 # read the CR field, select the appropriate bit
929 cr_field = Signal(4)
930 scr_bit = Signal()
931 dcr_bit = Signal()
932 comb += cr_field.eq(cr_pred.o_data)
933 comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
934 ^ scrinvert)
935 comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
936 ^ dcrinvert)
937 # set the corresponding mask bit
938 bit_to_set = Signal.like(self.srcmask)
939 comb += bit_to_set.eq(1 << cur_cr_idx)
940 with m.If(scr_bit):
941 sync += new_srcmask.eq(new_srcmask | bit_to_set)
942 with m.If(dcr_bit):
943 sync += new_dstmask.eq(new_dstmask | bit_to_set)
944
945 with m.State("FETCH_PRED_SHIFT_MASK"):
946 # shift-out skipped mask bits
947 sync += self.srcmask.eq(new_srcmask >> srcstep)
948 sync += self.dstmask.eq(new_dstmask >> dststep)
949 m.next = "FETCH_PRED_DONE"
950
951 with m.State("FETCH_PRED_DONE"):
952 comb += pred_mask_o_valid.eq(1)
953 with m.If(pred_mask_i_ready):
954 m.next = "FETCH_PRED_IDLE"
955
956 def issue_fsm(self, m, core, nia,
957 dbg, core_rst, is_svp64_mode,
958 fetch_pc_o_ready, fetch_pc_i_valid,
959 fetch_insn_o_valid, fetch_insn_i_ready,
960 pred_insn_i_valid, pred_insn_o_ready,
961 pred_mask_o_valid, pred_mask_i_ready,
962 exec_insn_i_valid, exec_insn_o_ready,
963 exec_pc_o_valid, exec_pc_i_ready):
964 """issue FSM
965
966 decode / issue FSM. this interacts with the "fetch" FSM
967 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
968 (outgoing). also interacts with the "execute" FSM
969 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
970 (incoming).
971 SVP64 RM prefixes have already been set up by the
972 "fetch" phase, so execute is fairly straightforward.
973 """
974
975 comb = m.d.comb
976 sync = m.d.sync
977 pdecode2 = self.pdecode2
978 cur_state = self.cur_state
979
980 # temporaries
981 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
982
983 # for updating svstate (things like srcstep etc.)
984 update_svstate = Signal() # set this (below) if updating
985 new_svstate = SVSTATERec("new_svstate")
986 comb += new_svstate.eq(cur_state.svstate)
987
988 # precalculate srcstep+1 and dststep+1
989 cur_srcstep = cur_state.svstate.srcstep
990 cur_dststep = cur_state.svstate.dststep
991 next_srcstep = Signal.like(cur_srcstep)
992 next_dststep = Signal.like(cur_dststep)
993 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
994 comb += next_dststep.eq(cur_state.svstate.dststep+1)
995
996 # note if an exception happened. in a pipelined or OoO design
997 # this needs to be accompanied by "shadowing" (or stalling)
998 exc_happened = self.core.o.exc_happened
999 # also note instruction fetch failed
1000 if hasattr(core, "icache"):
1001 fetch_failed = core.icache.i_out.fetch_failed
1002 flush_needed = True
1003 # set to fault in decoder
1004 # update (highest priority) instruction fault
1005 rising_fetch_failed = rising_edge(m, fetch_failed)
1006 with m.If(rising_fetch_failed):
1007 sync += pdecode2.instr_fault.eq(1)
1008 else:
1009 fetch_failed = Const(0, 1)
1010 flush_needed = False
1011
1012 with m.FSM(name="issue_fsm"):
1013
1014 # sync with the "fetch" phase which is reading the instruction
1015 # at this point, there is no instruction running, that
1016 # could inadvertently update the PC.
1017 with m.State("ISSUE_START"):
1018 # reset instruction fault
1019 sync += pdecode2.instr_fault.eq(0)
1020 # wait on "core stop" release, before next fetch
1021 # need to do this here, in case we are in a VL==0 loop
1022 with m.If(~dbg.core_stop_o & ~core_rst):
1023 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
1024 with m.If(fetch_pc_o_ready): # fetch acknowledged us
1025 m.next = "INSN_WAIT"
1026 with m.Else():
1027 # tell core it's stopped, and acknowledge debug handshake
1028 comb += dbg.core_stopped_i.eq(1)
1029 # while stopped, allow updating the MSR, PC and SVSTATE
1030 with m.If(self.pc_i.ok):
1031 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1032 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
1033 sync += self.pc_changed.eq(1)
1034 with m.If(self.msr_i.ok):
1035 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
1036 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
1037 sync += self.msr_changed.eq(1)
1038 with m.If(self.svstate_i.ok):
1039 comb += new_svstate.eq(self.svstate_i.data)
1040 comb += update_svstate.eq(1)
1041 sync += self.sv_changed.eq(1)
1042
1043 # wait for an instruction to arrive from Fetch
1044 with m.State("INSN_WAIT"):
1045 if self.allow_overlap:
1046 stopping = dbg.stopping_o
1047 else:
1048 stopping = Const(0)
1049 with m.If(stopping):
1050 # stopping: jump back to idle
1051 m.next = "ISSUE_START"
1052 if flush_needed:
1053 # request the icache to stop asserting "failed"
1054 comb += core.icache.flush_in.eq(1)
1055 # stop instruction fault
1056 sync += pdecode2.instr_fault.eq(0)
1057 with m.Else():
1058 comb += fetch_insn_i_ready.eq(1)
1059 with m.If(fetch_insn_o_valid):
1060 # loop into ISSUE_START if it's a SVP64 instruction
1061 # and VL == 0. this because VL==0 is a for-loop
1062 # from 0 to 0 i.e. always, always a NOP.
1063 cur_vl = cur_state.svstate.vl
1064 with m.If(is_svp64_mode & (cur_vl == 0)):
1065 # update the PC before fetching the next instruction
1066 # since we are in a VL==0 loop, no instruction was
1067 # executed that we could be overwriting
1068 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1069 comb += self.state_w_pc.i_data.eq(nia)
1070 comb += self.insn_done.eq(1)
1071 m.next = "ISSUE_START"
1072 with m.Else():
1073 if self.svp64_en:
1074 m.next = "PRED_START" # fetching predicate
1075 else:
1076 m.next = "DECODE_SV" # skip predication
1077
1078 with m.State("PRED_START"):
1079 comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start
1080 with m.If(pred_insn_o_ready): # fetch_pred acknowledged us
1081 m.next = "MASK_WAIT"
1082
1083 with m.State("MASK_WAIT"):
1084 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
1085 with m.If(pred_mask_o_valid): # predication masks are ready
1086 m.next = "PRED_SKIP"
1087
1088 # skip zeros in predicate
1089 with m.State("PRED_SKIP"):
1090 with m.If(~is_svp64_mode):
1091 m.next = "DECODE_SV" # nothing to do
1092 with m.Else():
1093 if self.svp64_en:
1094 pred_src_zero = pdecode2.rm_dec.pred_sz
1095 pred_dst_zero = pdecode2.rm_dec.pred_dz
1096
1097 # new srcstep, after skipping zeros
1098 skip_srcstep = Signal.like(cur_srcstep)
1099 # value to be added to the current srcstep
1100 src_delta = Signal.like(cur_srcstep)
1101 # add leading zeros to srcstep, if not in zero mode
1102 with m.If(~pred_src_zero):
1103 # priority encoder (count leading zeros)
1104 # append guard bit, in case the mask is all zeros
1105 pri_enc_src = PriorityEncoder(65)
1106 m.submodules.pri_enc_src = pri_enc_src
1107 comb += pri_enc_src.i.eq(Cat(self.srcmask,
1108 Const(1, 1)))
1109 comb += src_delta.eq(pri_enc_src.o)
1110 # apply delta to srcstep
1111 comb += skip_srcstep.eq(cur_srcstep + src_delta)
1112 # shift-out all leading zeros from the mask
1113 # plus the leading "one" bit
1114 # TODO count leading zeros and shift-out the zero
1115 # bits, in the same step, in hardware
1116 sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1117
1118 # same as above, but for dststep
1119 skip_dststep = Signal.like(cur_dststep)
1120 dst_delta = Signal.like(cur_dststep)
1121 with m.If(~pred_dst_zero):
1122 pri_enc_dst = PriorityEncoder(65)
1123 m.submodules.pri_enc_dst = pri_enc_dst
1124 comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1125 Const(1, 1)))
1126 comb += dst_delta.eq(pri_enc_dst.o)
1127 comb += skip_dststep.eq(cur_dststep + dst_delta)
1128 sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1129
1130 # TODO: initialize mask[VL]=1 to avoid passing past VL
1131 with m.If((skip_srcstep >= cur_vl) |
1132 (skip_dststep >= cur_vl)):
1133 # end of VL loop. Update PC and reset src/dst step
1134 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1135 comb += self.state_w_pc.i_data.eq(nia)
1136 comb += new_svstate.srcstep.eq(0)
1137 comb += new_svstate.dststep.eq(0)
1138 comb += update_svstate.eq(1)
1139 # synchronize with the simulator
1140 comb += self.insn_done.eq(1)
1141 # go back to Issue
1142 m.next = "ISSUE_START"
1143 with m.Else():
1144 # update new src/dst step
1145 comb += new_svstate.srcstep.eq(skip_srcstep)
1146 comb += new_svstate.dststep.eq(skip_dststep)
1147 comb += update_svstate.eq(1)
1148 # proceed to Decode
1149 m.next = "DECODE_SV"
1150
1151 # pass predicate mask bits through to satellite decoders
1152 # TODO: for SIMD this will be *multiple* bits
1153 sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1154 sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1155
1156 # after src/dst step have been updated, we are ready
1157 # to decode the instruction
1158 with m.State("DECODE_SV"):
1159 # decode the instruction
1160 with m.If(~fetch_failed):
1161 sync += pdecode2.instr_fault.eq(0)
1162 sync += core.i.e.eq(pdecode2.e)
1163 sync += core.i.state.eq(cur_state)
1164 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1165 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1166 if self.svp64_en:
1167 sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1168 # set RA_OR_ZERO detection in satellite decoders
1169 sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1170 # and svp64 detection
1171 sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1172 # and svp64 bit-rev'd ldst mode
1173 ldst_dec = pdecode2.use_svp64_ldst_dec
1174 sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1175 # after decoding, reset any previous exception condition,
1176 # allowing it to be set again during the next execution
1177 sync += pdecode2.ldst_exc.eq(0)
1178
1179 m.next = "INSN_EXECUTE" # move to "execute"
1180
1181 # handshake with execution FSM, move to "wait" once acknowledged
1182 with m.State("INSN_EXECUTE"):
1183 comb += exec_insn_i_valid.eq(1) # trigger execute
1184 with m.If(exec_insn_o_ready): # execute acknowledged us
1185 m.next = "EXECUTE_WAIT"
1186
1187 with m.State("EXECUTE_WAIT"):
1188 # wait on "core stop" release, at instruction end
1189 # need to do this here, in case we are in a VL>1 loop
1190 with m.If(~dbg.core_stop_o & ~core_rst):
1191 comb += exec_pc_i_ready.eq(1)
1192 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1193 # the exception info needs to be blatted into
1194 # pdecode.ldst_exc, and the instruction "re-run".
1195 # when ldst_exc.happened is set, the PowerDecoder2
1196 # reacts very differently: it re-writes the instruction
1197 # with a "trap" (calls PowerDecoder2.trap()) which
1198 # will *overwrite* whatever was requested and jump the
1199 # PC to the exception address, as well as alter MSR.
1200 # nothing else needs to be done other than to note
1201 # the change of PC and MSR (and, later, SVSTATE)
1202 with m.If(exc_happened):
1203 mmu = core.fus.get_exc("mmu0")
1204 ldst = core.fus.get_exc("ldst0")
1205 if mmu is not None:
1206 with m.If(fetch_failed):
1207 # instruction fetch: exception is from MMU
1208 # reset instr_fault (highest priority)
1209 sync += pdecode2.ldst_exc.eq(mmu)
1210 sync += pdecode2.instr_fault.eq(0)
1211 if flush_needed:
1212 # request icache to stop asserting "failed"
1213 comb += core.icache.flush_in.eq(1)
1214 with m.If(~fetch_failed):
1215 # otherwise assume it was a LDST exception
1216 sync += pdecode2.ldst_exc.eq(ldst)
1217
1218 with m.If(exec_pc_o_valid):
1219
1220 # was this the last loop iteration?
1221 is_last = Signal()
1222 cur_vl = cur_state.svstate.vl
1223 comb += is_last.eq(next_srcstep == cur_vl)
1224
1225 # return directly to Decode if Execute generated an
1226 # exception.
1227 with m.If(pdecode2.ldst_exc.happened):
1228 m.next = "DECODE_SV"
1229
1230 # if MSR, PC or SVSTATE were changed by the previous
1231 # instruction, go directly back to Fetch, without
1232 # updating either MSR PC or SVSTATE
1233 with m.Elif(self.msr_changed | self.pc_changed |
1234 self.sv_changed):
1235 m.next = "ISSUE_START"
1236
1237 # also return to Fetch, when no output was a vector
1238 # (regardless of SRCSTEP and VL), or when the last
1239 # instruction was really the last one of the VL loop
1240 with m.Elif((~pdecode2.loop_continue) | is_last):
1241 # before going back to fetch, update the PC state
1242 # register with the NIA.
1243 # ok here we are not reading the branch unit.
1244 # TODO: this just blithely overwrites whatever
1245 # pipeline updated the PC
1246 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1247 comb += self.state_w_pc.i_data.eq(nia)
1248 # reset SRCSTEP before returning to Fetch
1249 if self.svp64_en:
1250 with m.If(pdecode2.loop_continue):
1251 comb += new_svstate.srcstep.eq(0)
1252 comb += new_svstate.dststep.eq(0)
1253 comb += update_svstate.eq(1)
1254 else:
1255 comb += new_svstate.srcstep.eq(0)
1256 comb += new_svstate.dststep.eq(0)
1257 comb += update_svstate.eq(1)
1258 m.next = "ISSUE_START"
1259
1260 # returning to Execute? then, first update SRCSTEP
1261 with m.Else():
1262 comb += new_svstate.srcstep.eq(next_srcstep)
1263 comb += new_svstate.dststep.eq(next_dststep)
1264 comb += update_svstate.eq(1)
1265 # return to mask skip loop
1266 m.next = "PRED_SKIP"
1267
1268 with m.Else():
1269 comb += dbg.core_stopped_i.eq(1)
1270 if flush_needed:
1271 # request the icache to stop asserting "failed"
1272 comb += core.icache.flush_in.eq(1)
1273 # stop instruction fault
1274 sync += pdecode2.instr_fault.eq(0)
1275 if flush_needed:
1276 # request the icache to stop asserting "failed"
1277 comb += core.icache.flush_in.eq(1)
1278 # stop instruction fault
1279 sync += pdecode2.instr_fault.eq(0)
1280 # while stopped, allow updating the MSR, PC and SVSTATE
1281 with m.If(self.msr_i.ok):
1282 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
1283 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
1284 sync += self.msr_changed.eq(1)
1285 with m.If(self.pc_i.ok):
1286 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1287 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
1288 sync += self.pc_changed.eq(1)
1289 with m.If(self.svstate_i.ok):
1290 comb += new_svstate.eq(self.svstate_i.data)
1291 comb += update_svstate.eq(1)
1292 sync += self.sv_changed.eq(1)
1293
1294 # check if svstate needs updating: if so, write it to State Regfile
1295 with m.If(update_svstate):
1296 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
1297 comb += self.state_w_sv.i_data.eq(new_svstate)
1298 sync += cur_state.svstate.eq(new_svstate) # for next clock
1299
1300 def execute_fsm(self, m, core,
1301 exec_insn_i_valid, exec_insn_o_ready,
1302 exec_pc_o_valid, exec_pc_i_ready):
1303 """execute FSM
1304
1305 execute FSM. this interacts with the "issue" FSM
1306 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1307 (outgoing). SVP64 RM prefixes have already been set up by the
1308 "issue" phase, so execute is fairly straightforward.
1309 """
1310
1311 comb = m.d.comb
1312 sync = m.d.sync
1313 pdecode2 = self.pdecode2
1314
1315 # temporaries
1316 core_busy_o = core.n.o_data.busy_o # core is busy
1317 core_ivalid_i = core.p.i_valid # instruction is valid
1318
1319 if hasattr(core, "icache"):
1320 fetch_failed = core.icache.i_out.fetch_failed
1321 else:
1322 fetch_failed = Const(0, 1)
1323
1324 with m.FSM(name="exec_fsm"):
1325
1326 # waiting for instruction bus (stays there until not busy)
1327 with m.State("INSN_START"):
1328 comb += exec_insn_o_ready.eq(1)
1329 with m.If(exec_insn_i_valid):
1330 comb += core_ivalid_i.eq(1) # instruction is valid/issued
1331 sync += self.sv_changed.eq(0)
1332 sync += self.pc_changed.eq(0)
1333 sync += self.msr_changed.eq(0)
1334 with m.If(core.p.o_ready): # only move if accepted
1335 m.next = "INSN_ACTIVE" # move to "wait completion"
1336
1337 # instruction started: must wait till it finishes
1338 with m.State("INSN_ACTIVE"):
1339 # note changes to MSR, PC and SVSTATE
1340 # XXX oops, really must monitor *all* State Regfile write
1341 # ports looking for changes!
1342 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1343 sync += self.sv_changed.eq(1)
1344 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1345 sync += self.msr_changed.eq(1)
1346 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1347 sync += self.pc_changed.eq(1)
1348 with m.If(~core_busy_o): # instruction done!
1349 comb += exec_pc_o_valid.eq(1)
1350 with m.If(exec_pc_i_ready):
1351 # when finished, indicate "done".
1352 # however, if there was an exception, the instruction
1353 # is *not* yet done. this is an implementation
1354 # detail: we choose to implement exceptions by
1355 # taking the exception information from the LDST
1356 # unit, putting that *back* into the PowerDecoder2,
1357 # and *re-running the entire instruction*.
1358 # if we erroneously indicate "done" here, it is as if
1359 # there were *TWO* instructions:
1360 # 1) the failed LDST 2) a TRAP.
1361 with m.If(~pdecode2.ldst_exc.happened &
1362 ~fetch_failed):
1363 comb += self.insn_done.eq(1)
1364 m.next = "INSN_START" # back to fetch
1365
1366 def elaborate(self, platform):
1367 m = super().elaborate(platform)
1368 # convenience
1369 comb, sync = m.d.comb, m.d.sync
1370 cur_state = self.cur_state
1371 pdecode2 = self.pdecode2
1372 dbg = self.dbg
1373 core = self.core
1374
1375 # set up peripherals and core
1376 core_rst = self.core_rst
1377
1378 # indicate to outside world if any FU is still executing
1379 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1380
1381 # address of the next instruction, in the absence of a branch
1382 # depends on the instruction size
1383 nia = Signal(64)
1384
1385 # connect up debug signals
1386 comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1387
1388 # pass the prefix mode from Fetch to Issue, so the latter can loop
1389 # on VL==0
1390 is_svp64_mode = Signal()
1391
1392 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1393 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1394 # these are the handshake signals between each
1395
1396 # fetch FSM can run as soon as the PC is valid
1397 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1398 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1399
1400 # fetch FSM hands over the instruction to be decoded / issued
1401 fetch_insn_o_valid = Signal()
1402 fetch_insn_i_ready = Signal()
1403
1404 # predicate fetch FSM decodes and fetches the predicate
1405 pred_insn_i_valid = Signal()
1406 pred_insn_o_ready = Signal()
1407
1408 # predicate fetch FSM delivers the masks
1409 pred_mask_o_valid = Signal()
1410 pred_mask_i_ready = Signal()
1411
1412 # issue FSM delivers the instruction to the be executed
1413 exec_insn_i_valid = Signal()
1414 exec_insn_o_ready = Signal()
1415
1416 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1417 exec_pc_o_valid = Signal()
1418 exec_pc_i_ready = Signal()
1419
1420 # the FSMs here are perhaps unusual in that they detect conditions
1421 # then "hold" information, combinatorially, for the core
1422 # (as opposed to using sync - which would be on a clock's delay)
1423 # this includes the actual opcode, valid flags and so on.
1424
1425 # Fetch, then predicate fetch, then Issue, then Execute.
1426 # Issue is where the VL for-loop # lives. the ready/valid
1427 # signalling is used to communicate between the four.
1428
1429 # set up Fetch FSM
1430 fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1431 self.imem, core_rst, pdecode2, cur_state,
1432 dbg, core,
1433 dbg.state.svstate, # combinatorially same
1434 nia, is_svp64_mode)
1435 m.submodules.fetch = fetch
1436 # connect up in/out data to existing Signals
1437 comb += fetch.p.i_data.pc.eq(dbg.state.pc) # combinatorially same
1438 comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
1439 # and the ready/valid signalling
1440 comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1441 comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1442 comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1443 comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1444
1445 self.issue_fsm(m, core, nia,
1446 dbg, core_rst, is_svp64_mode,
1447 fetch_pc_o_ready, fetch_pc_i_valid,
1448 fetch_insn_o_valid, fetch_insn_i_ready,
1449 pred_insn_i_valid, pred_insn_o_ready,
1450 pred_mask_o_valid, pred_mask_i_ready,
1451 exec_insn_i_valid, exec_insn_o_ready,
1452 exec_pc_o_valid, exec_pc_i_ready)
1453
1454 if self.svp64_en:
1455 self.fetch_predicate_fsm(m,
1456 pred_insn_i_valid, pred_insn_o_ready,
1457 pred_mask_o_valid, pred_mask_i_ready)
1458
1459 self.execute_fsm(m, core,
1460 exec_insn_i_valid, exec_insn_o_ready,
1461 exec_pc_o_valid, exec_pc_i_ready)
1462
1463 return m
1464
1465
1466 class TestIssuer(Elaboratable):
1467 def __init__(self, pspec):
1468 #self.ti = TestIssuerInternal(pspec)
1469 from soc.simple.inorder import TestIssuerInternalInOrder
1470 self.ti = TestIssuerInternalInOrder(pspec)
1471 self.pll = DummyPLL(instance=True)
1472
1473 # PLL direct clock or not
1474 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1475 if self.pll_en:
1476 self.pll_test_o = Signal(reset_less=True)
1477 self.pll_vco_o = Signal(reset_less=True)
1478 self.clk_sel_i = Signal(2, reset_less=True)
1479 self.ref_clk = ClockSignal() # can't rename it but that's ok
1480 self.pllclk_clk = ClockSignal("pllclk")
1481
1482 def elaborate(self, platform):
1483 m = Module()
1484 comb = m.d.comb
1485
1486 # TestIssuer nominally runs at main clock, actually it is
1487 # all combinatorial internally except for coresync'd components
1488 m.submodules.ti = ti = self.ti
1489
1490 if self.pll_en:
1491 # ClockSelect runs at PLL output internal clock rate
1492 m.submodules.wrappll = pll = self.pll
1493
1494 # add clock domains from PLL
1495 cd_pll = ClockDomain("pllclk")
1496 m.domains += cd_pll
1497
1498 # PLL clock established. has the side-effect of running clklsel
1499 # at the PLL's speed (see DomainRenamer("pllclk") above)
1500 pllclk = self.pllclk_clk
1501 comb += pllclk.eq(pll.clk_pll_o)
1502
1503 # wire up external 24mhz to PLL
1504 #comb += pll.clk_24_i.eq(self.ref_clk)
1505 # output 18 mhz PLL test signal, and analog oscillator out
1506 comb += self.pll_test_o.eq(pll.pll_test_o)
1507 comb += self.pll_vco_o.eq(pll.pll_vco_o)
1508
1509 # input to pll clock selection
1510 comb += pll.clk_sel_i.eq(self.clk_sel_i)
1511
1512 # now wire up ResetSignals. don't mind them being in this domain
1513 pll_rst = ResetSignal("pllclk")
1514 comb += pll_rst.eq(ResetSignal())
1515
1516 # internal clock is set to selector clock-out. has the side-effect of
1517 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1518 # debug clock runs at coresync internal clock
1519 cd_coresync = ClockDomain("coresync")
1520 #m.domains += cd_coresync
1521 if self.ti.dbg_domain != 'sync':
1522 cd_dbgsync = ClockDomain("dbgsync")
1523 #m.domains += cd_dbgsync
1524 intclk = ClockSignal("coresync")
1525 dbgclk = ClockSignal(self.ti.dbg_domain)
1526 # XXX BYPASS PLL XXX
1527 # XXX BYPASS PLL XXX
1528 # XXX BYPASS PLL XXX
1529 if self.pll_en:
1530 comb += intclk.eq(self.ref_clk)
1531 else:
1532 comb += intclk.eq(ClockSignal())
1533 if self.ti.dbg_domain != 'sync':
1534 dbgclk = ClockSignal(self.ti.dbg_domain)
1535 comb += dbgclk.eq(intclk)
1536
1537 return m
1538
1539 def ports(self):
1540 return list(self.ti.ports()) + list(self.pll.ports()) + \
1541 [ClockSignal(), ResetSignal()]
1542
1543 def external_ports(self):
1544 ports = self.ti.external_ports()
1545 ports.append(ClockSignal())
1546 ports.append(ResetSignal())
1547 if self.pll_en:
1548 ports.append(self.clk_sel_i)
1549 ports.append(self.pll.clk_24_i)
1550 ports.append(self.pll_test_o)
1551 ports.append(self.pll_vco_o)
1552 ports.append(self.pllclk_clk)
1553 ports.append(self.ref_clk)
1554 return ports
1555
1556
1557 if __name__ == '__main__':
1558 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1559 'spr': 1,
1560 'div': 1,
1561 'mul': 1,
1562 'shiftrot': 1
1563 }
1564 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1565 imem_ifacetype='bare_wb',
1566 addr_wid=48,
1567 mask_wid=8,
1568 reg_wid=64,
1569 units=units)
1570 dut = TestIssuer(pspec)
1571 vl = main(dut, ports=dut.ports(), name="test_issuer")
1572
1573 if len(sys.argv) == 1:
1574 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1575 with open("test_issuer.il", "w") as f:
1576 f.write(vl)