add linux-5.7 unit test which showed a silly error:
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput, FetchInput
26
27 from nmigen.lib.coding import PriorityEncoder
28
29 from openpower.decoder.power_decoder import create_pdecode
30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
32 from openpower.decoder.decode2execute1 import Data
33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
34 SVP64PredMode)
35 from openpower.state import CoreState
36 from openpower.consts import (CR, SVP64CROffs, MSR)
37 from soc.experiment.testmem import TestMemory # test only for instructions
38 from soc.regfile.regfiles import StateRegs, FastRegs
39 from soc.simple.core import NonProductionCore
40 from soc.config.test.test_loadstore import TestMemPspec
41 from soc.config.ifetch import ConfigFetchUnit
42 from soc.debug.dmi import CoreDebug, DMIInterface
43 from soc.debug.jtag import JTAG
44 from soc.config.pinouts import get_pinspecs
45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
46 from soc.bus.simple_gpio import SimpleGPIO
47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
48 from soc.clock.select import ClockSelect
49 from soc.clock.dummypll import DummyPLL
50 from openpower.sv.svstate import SVSTATERec
51 from soc.experiment.icache import ICache
52
53 from nmutil.util import rising_edge
54
55
56 def get_insn(f_instr_o, pc):
57 if f_instr_o.width == 32:
58 return f_instr_o
59 else:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o.word_select(pc[2], 32)
62
63 # gets state input or reads from state regfile
64
65
66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
67 comb = m.d.comb
68 sync = m.d.sync
69 # read the {insert state variable here}
70 res_ok_delay = Signal(name="%s_ok_delay" % name)
71 with m.If(~core_rst):
72 sync += res_ok_delay.eq(~state_i.ok)
73 with m.If(state_i.ok):
74 # incoming override (start from pc_i)
75 comb += res.eq(state_i.data)
76 with m.Else():
77 # otherwise read StateRegs regfile for {insert state here}...
78 comb += regfile.ren.eq(1 << regnum)
79 # ... but on a 1-clock delay
80 with m.If(res_ok_delay):
81 comb += res.eq(regfile.o_data)
82
83
84 def get_predint(m, mask, name):
85 """decode SVP64 predicate integer mask field to reg number and invert
86 this is identical to the equivalent function in ISACaller except that
87 it doesn't read the INT directly, it just decodes "what needs to be done"
88 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
89
90 * all1s is set to indicate that no mask is to be applied.
91 * regread indicates the GPR register number to be read
92 * invert is set to indicate that the register value is to be inverted
93 * unary indicates that the contents of the register is to be shifted 1<<r3
94 """
95 comb = m.d.comb
96 regread = Signal(5, name=name+"regread")
97 invert = Signal(name=name+"invert")
98 unary = Signal(name=name+"unary")
99 all1s = Signal(name=name+"all1s")
100 with m.Switch(mask):
101 with m.Case(SVP64PredInt.ALWAYS.value):
102 comb += all1s.eq(1) # use 0b1111 (all ones)
103 with m.Case(SVP64PredInt.R3_UNARY.value):
104 comb += regread.eq(3)
105 comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
106 with m.Case(SVP64PredInt.R3.value):
107 comb += regread.eq(3)
108 with m.Case(SVP64PredInt.R3_N.value):
109 comb += regread.eq(3)
110 comb += invert.eq(1)
111 with m.Case(SVP64PredInt.R10.value):
112 comb += regread.eq(10)
113 with m.Case(SVP64PredInt.R10_N.value):
114 comb += regread.eq(10)
115 comb += invert.eq(1)
116 with m.Case(SVP64PredInt.R30.value):
117 comb += regread.eq(30)
118 with m.Case(SVP64PredInt.R30_N.value):
119 comb += regread.eq(30)
120 comb += invert.eq(1)
121 return regread, invert, unary, all1s
122
123
124 def get_predcr(m, mask, name):
125 """decode SVP64 predicate CR to reg number field and invert status
126 this is identical to _get_predcr in ISACaller
127 """
128 comb = m.d.comb
129 idx = Signal(2, name=name+"idx")
130 invert = Signal(name=name+"crinvert")
131 with m.Switch(mask):
132 with m.Case(SVP64PredCR.LT.value):
133 comb += idx.eq(CR.LT)
134 comb += invert.eq(0)
135 with m.Case(SVP64PredCR.GE.value):
136 comb += idx.eq(CR.LT)
137 comb += invert.eq(1)
138 with m.Case(SVP64PredCR.GT.value):
139 comb += idx.eq(CR.GT)
140 comb += invert.eq(0)
141 with m.Case(SVP64PredCR.LE.value):
142 comb += idx.eq(CR.GT)
143 comb += invert.eq(1)
144 with m.Case(SVP64PredCR.EQ.value):
145 comb += idx.eq(CR.EQ)
146 comb += invert.eq(0)
147 with m.Case(SVP64PredCR.NE.value):
148 comb += idx.eq(CR.EQ)
149 comb += invert.eq(1)
150 with m.Case(SVP64PredCR.SO.value):
151 comb += idx.eq(CR.SO)
152 comb += invert.eq(0)
153 with m.Case(SVP64PredCR.NS.value):
154 comb += idx.eq(CR.SO)
155 comb += invert.eq(1)
156 return idx, invert
157
158
159 class TestIssuerBase(Elaboratable):
160 """TestIssuerBase - common base class for Issuers
161
162 takes care of power-on reset, peripherals, debug, DEC/TB,
163 and gets PC/MSR/SVSTATE from the State Regfile etc.
164 """
165
166 def __init__(self, pspec):
167
168 # test if microwatt compatibility is to be enabled
169 self.microwatt_compat = (hasattr(pspec, "microwatt_compat") and
170 (pspec.microwatt_compat == True))
171 self.alt_reset = Signal(reset_less=True) # not connected yet (microwatt)
172
173 # test is SVP64 is to be enabled
174 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
175
176 # and if regfiles are reduced
177 self.regreduce_en = (hasattr(pspec, "regreduce") and
178 (pspec.regreduce == True))
179
180 # and if overlap requested
181 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
182 (pspec.allow_overlap == True))
183
184 # and get the core domain
185 self.core_domain = "coresync"
186 if (hasattr(pspec, "core_domain") and
187 isinstance(pspec.core_domain, str)):
188 self.core_domain = pspec.core_domain
189
190 # JTAG interface. add this right at the start because if it's
191 # added it *modifies* the pspec, by adding enable/disable signals
192 # for parts of the rest of the core
193 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
194 #self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
195 self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
196 if self.jtag_en:
197 # XXX MUST keep this up-to-date with litex, and
198 # soc-cocotb-sim, and err.. all needs sorting out, argh
199 subset = ['uart',
200 'mtwi',
201 'eint', 'gpio', 'mspi0',
202 # 'mspi1', - disabled for now
203 # 'pwm', 'sd0', - disabled for now
204 'sdr']
205 self.jtag = JTAG(get_pinspecs(subset=subset),
206 domain=self.dbg_domain)
207 # add signals to pspec to enable/disable icache and dcache
208 # (or data and intstruction wishbone if icache/dcache not included)
209 # https://bugs.libre-soc.org/show_bug.cgi?id=520
210 # TODO: do we actually care if these are not domain-synchronised?
211 # honestly probably not.
212 pspec.wb_icache_en = self.jtag.wb_icache_en
213 pspec.wb_dcache_en = self.jtag.wb_dcache_en
214 self.wb_sram_en = self.jtag.wb_sram_en
215 else:
216 self.wb_sram_en = Const(1)
217
218 # add 4k sram blocks?
219 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
220 pspec.sram4x4kblock == True)
221 if self.sram4x4k:
222 self.sram4k = []
223 for i in range(4):
224 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
225 # features={'err'}
226 ))
227
228 # add interrupt controller?
229 self.xics = hasattr(pspec, "xics") and pspec.xics == True
230 if self.xics:
231 self.xics_icp = XICS_ICP()
232 self.xics_ics = XICS_ICS()
233 self.int_level_i = self.xics_ics.int_level_i
234 else:
235 self.ext_irq = Signal()
236
237 # add GPIO peripheral?
238 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
239 if self.gpio:
240 self.simple_gpio = SimpleGPIO()
241 self.gpio_o = self.simple_gpio.gpio_o
242
243 # main instruction core. suitable for prototyping / demo only
244 self.core = core = NonProductionCore(pspec)
245 self.core_rst = ResetSignal(self.core_domain)
246
247 # instruction decoder. goes into Trap Record
248 #pdecode = create_pdecode()
249 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
250 self.pdecode2 = PowerDecode2(None, state=self.cur_state,
251 opkls=IssuerDecode2ToOperand,
252 svp64_en=self.svp64_en,
253 regreduce_en=self.regreduce_en)
254 pdecode = self.pdecode2.dec
255
256 if self.svp64_en:
257 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
258
259 self.update_svstate = Signal() # set this if updating svstate
260 self.new_svstate = new_svstate = SVSTATERec("new_svstate")
261
262 # Test Instruction memory
263 if hasattr(core, "icache"):
264 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
265 # truly dreadful. needs a huge reorg.
266 pspec.icache = core.icache
267 self.imem = ConfigFetchUnit(pspec).fu
268
269 # DMI interface
270 self.dbg = CoreDebug()
271 self.dbg_rst_i = Signal(reset_less=True)
272
273 # instruction go/monitor
274 self.pc_o = Signal(64, reset_less=True)
275 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
276 self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
277 self.svstate_i = Data(64, "svstate_i") # ditto
278 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
279 self.busy_o = Signal(reset_less=True)
280 self.memerr_o = Signal(reset_less=True)
281
282 # STATE regfile read /write ports for PC, MSR, SVSTATE
283 staterf = self.core.regs.rf['state']
284 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
285 self.state_r_pc = staterf.r_ports['cia'] # PC rd
286 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
287
288 self.state_w_msr = staterf.w_ports['d_wr2'] # MSR wr
289 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
290 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
291
292 # DMI interface access
293 intrf = self.core.regs.rf['int']
294 crrf = self.core.regs.rf['cr']
295 xerrf = self.core.regs.rf['xer']
296 self.int_r = intrf.r_ports['dmi'] # INT read
297 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
298 self.xer_r = xerrf.r_ports['full_xer'] # XER read
299
300 if self.svp64_en:
301 # for predication
302 self.int_pred = intrf.r_ports['pred'] # INT predicate read
303 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
304
305 # hack method of keeping an eye on whether branch/trap set the PC
306 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
307 self.state_nia.wen.name = 'state_nia_wen'
308
309 # pulse to synchronize the simulator at instruction end
310 self.insn_done = Signal()
311
312 # indicate any instruction still outstanding, in execution
313 self.any_busy = Signal()
314
315 if self.svp64_en:
316 # store copies of predicate masks
317 self.srcmask = Signal(64)
318 self.dstmask = Signal(64)
319
320 # sigh, the wishbone addresses are not wishbone-compliant in microwatt
321 if self.microwatt_compat:
322 self.ibus_adr = Signal(32, name='wishbone_insn_out.adr')
323 self.dbus_adr = Signal(32, name='wishbone_data_out.adr')
324
325 # add an output of the PC and instruction, and whether it was requested
326 # this is for verilator debug purposes
327 if self.microwatt_compat:
328 self.nia = Signal(64)
329 self.msr_o = Signal(64)
330 self.nia_req = Signal(1)
331 self.insn = Signal(32)
332
333 def setup_peripherals(self, m):
334 comb, sync = m.d.comb, m.d.sync
335
336 # okaaaay so the debug module must be in coresync clock domain
337 # but NOT its reset signal. to cope with this, set every single
338 # submodule explicitly in coresync domain, debug and JTAG
339 # in their own one but using *external* reset.
340 csd = DomainRenamer(self.core_domain)
341 dbd = DomainRenamer(self.dbg_domain)
342
343 if self.microwatt_compat:
344 m.submodules.core = core = self.core
345 else:
346 m.submodules.core = core = csd(self.core)
347 # this _so_ needs sorting out. ICache is added down inside
348 # LoadStore1 and is already a submodule of LoadStore1
349 if not isinstance(self.imem, ICache):
350 m.submodules.imem = imem = csd(self.imem)
351 m.submodules.dbg = dbg = dbd(self.dbg)
352 if self.jtag_en:
353 m.submodules.jtag = jtag = dbd(self.jtag)
354 # TODO: UART2GDB mux, here, from external pin
355 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
356 sync += dbg.dmi.connect_to(jtag.dmi)
357
358 # fixup the clocks in microwatt-compat mode (but leave resets alone
359 # so that microwatt soc.vhdl can pull a reset on the core or DMI
360 # can do it, just like in TestIssuer)
361 if self.microwatt_compat:
362 intclk = ClockSignal(self.core_domain)
363 dbgclk = ClockSignal(self.dbg_domain)
364 if self.core_domain != 'sync':
365 comb += intclk.eq(ClockSignal())
366 if self.dbg_domain != 'sync':
367 comb += dbgclk.eq(ClockSignal())
368
369 # drop the first 3 bits of the incoming wishbone addresses
370 # this can go if using later versions of microwatt (not now)
371 if self.microwatt_compat:
372 ibus = self.imem.ibus
373 dbus = self.core.l0.cmpi.wb_bus()
374 comb += self.ibus_adr.eq(Cat(Const(0, 3), ibus.adr))
375 comb += self.dbus_adr.eq(Cat(Const(0, 3), dbus.adr))
376
377 cur_state = self.cur_state
378
379 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
380 if self.sram4x4k:
381 for i, sram in enumerate(self.sram4k):
382 m.submodules["sram4k_%d" % i] = csd(sram)
383 comb += sram.enable.eq(self.wb_sram_en)
384
385 # XICS interrupt handler
386 if self.xics:
387 m.submodules.xics_icp = icp = csd(self.xics_icp)
388 m.submodules.xics_ics = ics = csd(self.xics_ics)
389 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
390 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
391 else:
392 sync += cur_state.eint.eq(self.ext_irq) # connect externally
393
394 # GPIO test peripheral
395 if self.gpio:
396 m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
397
398 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
399 # XXX causes litex ECP5 test to get wrong idea about input and output
400 # (but works with verilator sim *sigh*)
401 # if self.gpio and self.xics:
402 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
403
404 # instruction decoder
405 pdecode = create_pdecode()
406 m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
407 if self.svp64_en:
408 m.submodules.svp64 = svp64 = csd(self.svp64)
409
410 # convenience
411 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
412 intrf = self.core.regs.rf['int']
413
414 # clock delay power-on reset
415 cd_por = ClockDomain(reset_less=True)
416 cd_sync = ClockDomain()
417 m.domains += cd_por, cd_sync
418 core_sync = ClockDomain(self.core_domain)
419 if self.core_domain != "sync":
420 m.domains += core_sync
421 if self.dbg_domain != "sync":
422 dbg_sync = ClockDomain(self.dbg_domain)
423 m.domains += dbg_sync
424
425 ti_rst = Signal(reset_less=True)
426 delay = Signal(range(4), reset=3)
427 with m.If(delay != 0):
428 m.d.por += delay.eq(delay - 1)
429 comb += cd_por.clk.eq(ClockSignal())
430
431 # power-on reset delay
432 core_rst = ResetSignal(self.core_domain)
433 if self.core_domain != "sync":
434 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
435 comb += core_rst.eq(ti_rst)
436 else:
437 with m.If(delay != 0 | dbg.core_rst_o):
438 comb += core_rst.eq(1)
439
440 # connect external reset signal to DMI Reset
441 if self.dbg_domain != "sync":
442 dbg_rst = ResetSignal(self.dbg_domain)
443 comb += dbg_rst.eq(self.dbg_rst_i)
444
445 # busy/halted signals from core
446 core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
447 comb += self.busy_o.eq(core_busy_o)
448 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
449
450 # temporary hack: says "go" immediately for both address gen and ST
451 l0 = core.l0
452 ldst = core.fus.fus['ldst0']
453 st_go_edge = rising_edge(m, ldst.st.rel_o)
454 # link addr-go direct to rel
455 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
456 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
457
458 def do_dmi(self, m, dbg):
459 """deals with DMI debug requests
460
461 currently only provides read requests for the INT regfile, CR and XER
462 it will later also deal with *writing* to these regfiles.
463 """
464 comb = m.d.comb
465 sync = m.d.sync
466 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
467 intrf = self.core.regs.rf['int']
468
469 with m.If(d_reg.req): # request for regfile access being made
470 # TODO: error-check this
471 # XXX should this be combinatorial? sync better?
472 if intrf.unary:
473 comb += self.int_r.ren.eq(1 << d_reg.addr)
474 else:
475 comb += self.int_r.addr.eq(d_reg.addr)
476 comb += self.int_r.ren.eq(1)
477 d_reg_delay = Signal()
478 sync += d_reg_delay.eq(d_reg.req)
479 with m.If(d_reg_delay):
480 # data arrives one clock later
481 comb += d_reg.data.eq(self.int_r.o_data)
482 comb += d_reg.ack.eq(1)
483
484 # sigh same thing for CR debug
485 with m.If(d_cr.req): # request for regfile access being made
486 comb += self.cr_r.ren.eq(0b11111111) # enable all
487 d_cr_delay = Signal()
488 sync += d_cr_delay.eq(d_cr.req)
489 with m.If(d_cr_delay):
490 # data arrives one clock later
491 comb += d_cr.data.eq(self.cr_r.o_data)
492 comb += d_cr.ack.eq(1)
493
494 # aaand XER...
495 with m.If(d_xer.req): # request for regfile access being made
496 comb += self.xer_r.ren.eq(0b111111) # enable all
497 d_xer_delay = Signal()
498 sync += d_xer_delay.eq(d_xer.req)
499 with m.If(d_xer_delay):
500 # data arrives one clock later
501 comb += d_xer.data.eq(self.xer_r.o_data)
502 comb += d_xer.ack.eq(1)
503
504 def tb_dec_fsm(self, m, spr_dec):
505 """tb_dec_fsm
506
507 this is a FSM for updating either dec or tb. it runs alternately
508 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
509 value to DEC, however the regfile has "passthrough" on it so this
510 *should* be ok.
511
512 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
513 """
514
515 comb, sync = m.d.comb, m.d.sync
516 fast_rf = self.core.regs.rf['fast']
517 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
518 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
519
520 with m.FSM() as fsm:
521
522 # initiates read of current DEC
523 with m.State("DEC_READ"):
524 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
525 comb += fast_r_dectb.ren.eq(1)
526 m.next = "DEC_WRITE"
527
528 # waits for DEC read to arrive (1 cycle), updates with new value
529 with m.State("DEC_WRITE"):
530 new_dec = Signal(64)
531 # TODO: MSR.LPCR 32-bit decrement mode
532 comb += new_dec.eq(fast_r_dectb.o_data - 1)
533 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
534 comb += fast_w_dectb.wen.eq(1)
535 comb += fast_w_dectb.i_data.eq(new_dec)
536 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
537 m.next = "TB_READ"
538
539 # initiates read of current TB
540 with m.State("TB_READ"):
541 comb += fast_r_dectb.addr.eq(FastRegs.TB)
542 comb += fast_r_dectb.ren.eq(1)
543 m.next = "TB_WRITE"
544
545 # waits for read TB to arrive, initiates write of current TB
546 with m.State("TB_WRITE"):
547 new_tb = Signal(64)
548 comb += new_tb.eq(fast_r_dectb.o_data + 1)
549 comb += fast_w_dectb.addr.eq(FastRegs.TB)
550 comb += fast_w_dectb.wen.eq(1)
551 comb += fast_w_dectb.i_data.eq(new_tb)
552 m.next = "DEC_READ"
553
554 return m
555
556 def elaborate(self, platform):
557 m = Module()
558 # convenience
559 comb, sync = m.d.comb, m.d.sync
560 cur_state = self.cur_state
561 pdecode2 = self.pdecode2
562 dbg = self.dbg
563
564 # set up peripherals and core
565 core_rst = self.core_rst
566 self.setup_peripherals(m)
567
568 # reset current state if core reset requested
569 with m.If(core_rst):
570 m.d.sync += self.cur_state.eq(0)
571
572 # check halted condition: requested PC to execute matches DMI stop addr
573 # and immediately stop. address of 0xffff_ffff_ffff_ffff can never
574 # match
575 halted = Signal()
576 comb += halted.eq(dbg.stop_addr_o == dbg.state.pc)
577 with m.If(halted):
578 comb += dbg.core_stopped_i.eq(1)
579 comb += dbg.terminate_i.eq(1)
580
581 # PC and instruction from I-Memory
582 comb += self.pc_o.eq(cur_state.pc)
583 self.pc_changed = Signal() # note write to PC
584 self.msr_changed = Signal() # note write to MSR
585 self.sv_changed = Signal() # note write to SVSTATE
586
587 # read state either from incoming override or from regfile
588 state = CoreState("get") # current state (MSR/PC/SVSTATE)
589 state_get(m, state.msr, core_rst, self.msr_i,
590 "msr", # read MSR
591 self.state_r_msr, StateRegs.MSR)
592 state_get(m, state.pc, core_rst, self.pc_i,
593 "pc", # read PC
594 self.state_r_pc, StateRegs.PC)
595 state_get(m, state.svstate, core_rst, self.svstate_i,
596 "svstate", # read SVSTATE
597 self.state_r_sv, StateRegs.SVSTATE)
598
599 # don't write pc every cycle
600 comb += self.state_w_pc.wen.eq(0)
601 comb += self.state_w_pc.i_data.eq(0)
602
603 # connect up debug state. note "combinatorially same" below,
604 # this is a bit naff, passing state over in the dbg class, but
605 # because it is combinatorial it achieves the desired goal
606 comb += dbg.state.eq(state)
607
608 # this bit doesn't have to be in the FSM: connect up to read
609 # regfiles on demand from DMI
610 self.do_dmi(m, dbg)
611
612 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
613 # (which uses that in PowerDecoder2 to raise 0x900 exception)
614 self.tb_dec_fsm(m, cur_state.dec)
615
616 # while stopped, allow updating the MSR, PC and SVSTATE.
617 # these are mainly for debugging purposes (including DMI/JTAG)
618 with m.If(dbg.core_stopped_i):
619 with m.If(self.pc_i.ok):
620 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
621 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
622 sync += self.pc_changed.eq(1)
623 with m.If(self.msr_i.ok):
624 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
625 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
626 sync += self.msr_changed.eq(1)
627 with m.If(self.svstate_i.ok | self.update_svstate):
628 with m.If(self.svstate_i.ok): # over-ride from external source
629 comb += self.new_svstate.eq(self.svstate_i.data)
630 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
631 comb += self.state_w_sv.i_data.eq(self.new_svstate)
632 sync += self.sv_changed.eq(1)
633
634 # start renaming some of the ports to match microwatt
635 if self.microwatt_compat:
636 self.core.o.core_terminate_o.name = "terminated_out"
637 # names of DMI interface
638 self.dbg.dmi.addr_i.name = 'dmi_addr'
639 self.dbg.dmi.din.name = 'dmi_din'
640 self.dbg.dmi.dout.name = 'dmi_dout'
641 self.dbg.dmi.req_i.name = 'dmi_req'
642 self.dbg.dmi.we_i.name = 'dmi_wr'
643 self.dbg.dmi.ack_o.name = 'dmi_ack'
644 # wishbone instruction bus
645 ibus = self.imem.ibus
646 ibus.adr.name = 'wishbone_insn_out.adr'
647 ibus.dat_w.name = 'wishbone_insn_out.dat'
648 ibus.sel.name = 'wishbone_insn_out.sel'
649 ibus.cyc.name = 'wishbone_insn_out.cyc'
650 ibus.stb.name = 'wishbone_insn_out.stb'
651 ibus.we.name = 'wishbone_insn_out.we'
652 ibus.dat_r.name = 'wishbone_insn_in.dat'
653 ibus.ack.name = 'wishbone_insn_in.ack'
654 ibus.stall.name = 'wishbone_insn_in.stall'
655 # wishbone data bus
656 dbus = self.core.l0.cmpi.wb_bus()
657 dbus.adr.name = 'wishbone_data_out.adr'
658 dbus.dat_w.name = 'wishbone_data_out.dat'
659 dbus.sel.name = 'wishbone_data_out.sel'
660 dbus.cyc.name = 'wishbone_data_out.cyc'
661 dbus.stb.name = 'wishbone_data_out.stb'
662 dbus.we.name = 'wishbone_data_out.we'
663 dbus.dat_r.name = 'wishbone_data_in.dat'
664 dbus.ack.name = 'wishbone_data_in.ack'
665 dbus.stall.name = 'wishbone_data_in.stall'
666
667 return m
668
669 def __iter__(self):
670 yield from self.pc_i.ports()
671 yield from self.msr_i.ports()
672 yield self.pc_o
673 yield self.memerr_o
674 yield from self.core.ports()
675 yield from self.imem.ports()
676 yield self.core_bigendian_i
677 yield self.busy_o
678
679 def ports(self):
680 return list(self)
681
682 def external_ports(self):
683 if self.microwatt_compat:
684 ports = [self.core.o.core_terminate_o,
685 self.ext_irq,
686 self.alt_reset, # not connected yet
687 self.nia, self.insn, self.nia_req, self.msr_o,
688 ClockSignal(),
689 ResetSignal(),
690 ]
691 ports += list(self.dbg.dmi.ports())
692 # for dbus/ibus microwatt, exclude err btw and cti
693 for name, sig in self.imem.ibus.fields.items():
694 if name not in ['err', 'bte', 'cti', 'adr']:
695 ports.append(sig)
696 for name, sig in self.core.l0.cmpi.wb_bus().fields.items():
697 if name not in ['err', 'bte', 'cti', 'adr']:
698 ports.append(sig)
699 # microwatt non-compliant with wishbone
700 ports.append(self.ibus_adr)
701 ports.append(self.dbus_adr)
702 return ports
703
704 ports = self.pc_i.ports()
705 ports = self.msr_i.ports()
706 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
707 ]
708
709 if self.jtag_en:
710 ports += list(self.jtag.external_ports())
711 else:
712 # don't add DMI if JTAG is enabled
713 ports += list(self.dbg.dmi.ports())
714
715 ports += list(self.imem.ibus.fields.values())
716 ports += list(self.core.l0.cmpi.wb_bus().fields.values())
717
718 if self.sram4x4k:
719 for sram in self.sram4k:
720 ports += list(sram.bus.fields.values())
721
722 if self.xics:
723 ports += list(self.xics_icp.bus.fields.values())
724 ports += list(self.xics_ics.bus.fields.values())
725 ports.append(self.int_level_i)
726 else:
727 ports.append(self.ext_irq)
728
729 if self.gpio:
730 ports += list(self.simple_gpio.bus.fields.values())
731 ports.append(self.gpio_o)
732
733 return ports
734
735 def ports(self):
736 return list(self)
737
738
739 class TestIssuerInternal(TestIssuerBase):
740 """TestIssuer - reads instructions from TestMemory and issues them
741
742 efficiency and speed is not the main goal here: functional correctness
743 and code clarity is. optimisations (which almost 100% interfere with
744 easy understanding) come later.
745 """
746
747 def fetch_fsm(self, m, dbg, core, pc, msr, svstate, nia, is_svp64_mode,
748 fetch_pc_o_ready, fetch_pc_i_valid,
749 fetch_insn_o_valid, fetch_insn_i_ready):
750 """fetch FSM
751
752 this FSM performs fetch of raw instruction data, partial-decodes
753 it 32-bit at a time to detect SVP64 prefixes, and will optionally
754 read a 2nd 32-bit quantity if that occurs.
755 """
756 comb = m.d.comb
757 sync = m.d.sync
758 pdecode2 = self.pdecode2
759 cur_state = self.cur_state
760 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
761
762 # also note instruction fetch failed
763 if hasattr(core, "icache"):
764 fetch_failed = core.icache.i_out.fetch_failed
765 flush_needed = True
766 else:
767 fetch_failed = Const(0, 1)
768 flush_needed = False
769
770 # set priv / virt mode on I-Cache, sigh
771 if isinstance(self.imem, ICache):
772 comb += self.imem.i_in.priv_mode.eq(~msr[MSR.PR])
773 comb += self.imem.i_in.virt_mode.eq(msr[MSR.IR]) # Instr. Redir (VM)
774
775 with m.FSM(name='fetch_fsm'):
776
777 # waiting (zzz)
778 with m.State("IDLE"):
779 # fetch allowed if not failed and stopped but not stepping
780 # (see dmi.py for how core_stop_o is generated)
781 with m.If(~fetch_failed & ~dbg.core_stop_o):
782 comb += fetch_pc_o_ready.eq(1)
783 with m.If(fetch_pc_i_valid & ~pdecode2.instr_fault
784 & ~dbg.core_stop_o):
785 # instruction allowed to go: start by reading the PC
786 # capture the PC and also drop it into Insn Memory
787 # we have joined a pair of combinatorial memory
788 # lookups together. this is Generally Bad.
789 comb += self.imem.a_pc_i.eq(pc)
790 comb += self.imem.a_i_valid.eq(1)
791 comb += self.imem.f_i_valid.eq(1)
792 # transfer state to output
793 sync += cur_state.pc.eq(pc)
794 sync += cur_state.svstate.eq(svstate) # and svstate
795 sync += cur_state.msr.eq(msr) # and msr
796
797 m.next = "INSN_READ" # move to "wait for bus" phase
798
799 # dummy pause to find out why simulation is not keeping up
800 with m.State("INSN_READ"):
801 # when using "single-step" mode, checking dbg.stopping_o
802 # prevents progress. allow fetch to proceed once started
803 stopping = Const(0)
804 #if self.allow_overlap:
805 # stopping = dbg.stopping_o
806 with m.If(stopping):
807 # stopping: jump back to idle
808 m.next = "IDLE"
809 with m.Else():
810 with m.If(self.imem.f_busy_o &
811 ~pdecode2.instr_fault): # zzz...
812 # busy but not fetch failed: stay in wait-read
813 comb += self.imem.a_pc_i.eq(pc)
814 comb += self.imem.a_i_valid.eq(1)
815 comb += self.imem.f_i_valid.eq(1)
816 with m.Else():
817 # not busy (or fetch failed!): instruction fetched
818 # when fetch failed, the instruction gets ignored
819 # by the decoder
820 if hasattr(core, "icache"):
821 # blech, icache returns actual instruction
822 insn = self.imem.f_instr_o
823 else:
824 # but these return raw memory
825 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
826 if self.svp64_en:
827 svp64 = self.svp64
828 # decode the SVP64 prefix, if any
829 comb += svp64.raw_opcode_in.eq(insn)
830 comb += svp64.bigendian.eq(self.core_bigendian_i)
831 # pass the decoded prefix (if any) to PowerDecoder2
832 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
833 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
834 # remember whether this is a prefixed instruction,
835 # so the FSM can readily loop when VL==0
836 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
837 # calculate the address of the following instruction
838 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
839 sync += nia.eq(cur_state.pc + insn_size)
840 with m.If(~svp64.is_svp64_mode):
841 # with no prefix, store the instruction
842 # and hand it directly to the next FSM
843 sync += dec_opcode_i.eq(insn)
844 m.next = "INSN_READY"
845 with m.Else():
846 # fetch the rest of the instruction from memory
847 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
848 comb += self.imem.a_i_valid.eq(1)
849 comb += self.imem.f_i_valid.eq(1)
850 m.next = "INSN_READ2"
851 else:
852 # not SVP64 - 32-bit only
853 sync += nia.eq(cur_state.pc + 4)
854 sync += dec_opcode_i.eq(insn)
855 if self.microwatt_compat:
856 # for verilator debug purposes
857 comb += self.insn.eq(insn)
858 comb += self.nia.eq(cur_state.pc)
859 comb += self.msr_o.eq(cur_state.msr)
860 comb += self.nia_req.eq(1)
861 m.next = "INSN_READY"
862
863 with m.State("INSN_READ2"):
864 with m.If(self.imem.f_busy_o): # zzz...
865 # busy: stay in wait-read
866 comb += self.imem.a_i_valid.eq(1)
867 comb += self.imem.f_i_valid.eq(1)
868 with m.Else():
869 # not busy: instruction fetched
870 if hasattr(core, "icache"):
871 # blech, icache returns actual instruction
872 insn = self.imem.f_instr_o
873 else:
874 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
875 sync += dec_opcode_i.eq(insn)
876 m.next = "INSN_READY"
877 # TODO: probably can start looking at pdecode2.rm_dec
878 # here or maybe even in INSN_READ state, if svp64_mode
879 # detected, in order to trigger - and wait for - the
880 # predicate reading.
881 if self.svp64_en:
882 pmode = pdecode2.rm_dec.predmode
883 """
884 if pmode != SVP64PredMode.ALWAYS.value:
885 fire predicate loading FSM and wait before
886 moving to INSN_READY
887 else:
888 sync += self.srcmask.eq(-1) # set to all 1s
889 sync += self.dstmask.eq(-1) # set to all 1s
890 m.next = "INSN_READY"
891 """
892
893 with m.State("INSN_READY"):
894 # hand over the instruction, to be decoded
895 comb += fetch_insn_o_valid.eq(1)
896 with m.If(fetch_insn_i_ready):
897 m.next = "IDLE"
898
899
900 def fetch_predicate_fsm(self, m,
901 pred_insn_i_valid, pred_insn_o_ready,
902 pred_mask_o_valid, pred_mask_i_ready):
903 """fetch_predicate_fsm - obtains (constructs in the case of CR)
904 src/dest predicate masks
905
906 https://bugs.libre-soc.org/show_bug.cgi?id=617
907 the predicates can be read here, by using IntRegs r_ports['pred']
908 or CRRegs r_ports['pred']. in the case of CRs it will have to
909 be done through multiple reads, extracting one relevant at a time.
910 later, a faster way would be to use the 32-bit-wide CR port but
911 this is more complex decoding, here. equivalent code used in
912 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
913
914 note: this ENTIRE FSM is not to be called when svp64 is disabled
915 """
916 comb = m.d.comb
917 sync = m.d.sync
918 pdecode2 = self.pdecode2
919 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
920 predmode = rm_dec.predmode
921 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
922 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
923 # get src/dst step, so we can skip already used mask bits
924 cur_state = self.cur_state
925 srcstep = cur_state.svstate.srcstep
926 dststep = cur_state.svstate.dststep
927 cur_vl = cur_state.svstate.vl
928
929 # decode predicates
930 sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
931 dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
932 sidx, scrinvert = get_predcr(m, srcpred, 's')
933 didx, dcrinvert = get_predcr(m, dstpred, 'd')
934
935 # store fetched masks, for either intpred or crpred
936 # when src/dst step is not zero, the skipped mask bits need to be
937 # shifted-out, before actually storing them in src/dest mask
938 new_srcmask = Signal(64, reset_less=True)
939 new_dstmask = Signal(64, reset_less=True)
940
941 with m.FSM(name="fetch_predicate"):
942
943 with m.State("FETCH_PRED_IDLE"):
944 comb += pred_insn_o_ready.eq(1)
945 with m.If(pred_insn_i_valid):
946 with m.If(predmode == SVP64PredMode.INT):
947 # skip fetching destination mask register, when zero
948 with m.If(dall1s):
949 sync += new_dstmask.eq(-1)
950 # directly go to fetch source mask register
951 # guaranteed not to be zero (otherwise predmode
952 # would be SVP64PredMode.ALWAYS, not INT)
953 comb += int_pred.addr.eq(sregread)
954 comb += int_pred.ren.eq(1)
955 m.next = "INT_SRC_READ"
956 # fetch destination predicate register
957 with m.Else():
958 comb += int_pred.addr.eq(dregread)
959 comb += int_pred.ren.eq(1)
960 m.next = "INT_DST_READ"
961 with m.Elif(predmode == SVP64PredMode.CR):
962 # go fetch masks from the CR register file
963 sync += new_srcmask.eq(0)
964 sync += new_dstmask.eq(0)
965 m.next = "CR_READ"
966 with m.Else():
967 sync += self.srcmask.eq(-1)
968 sync += self.dstmask.eq(-1)
969 m.next = "FETCH_PRED_DONE"
970
971 with m.State("INT_DST_READ"):
972 # store destination mask
973 inv = Repl(dinvert, 64)
974 with m.If(dunary):
975 # set selected mask bit for 1<<r3 mode
976 dst_shift = Signal(range(64))
977 comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
978 sync += new_dstmask.eq(1 << dst_shift)
979 with m.Else():
980 # invert mask if requested
981 sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
982 # skip fetching source mask register, when zero
983 with m.If(sall1s):
984 sync += new_srcmask.eq(-1)
985 m.next = "FETCH_PRED_SHIFT_MASK"
986 # fetch source predicate register
987 with m.Else():
988 comb += int_pred.addr.eq(sregread)
989 comb += int_pred.ren.eq(1)
990 m.next = "INT_SRC_READ"
991
992 with m.State("INT_SRC_READ"):
993 # store source mask
994 inv = Repl(sinvert, 64)
995 with m.If(sunary):
996 # set selected mask bit for 1<<r3 mode
997 src_shift = Signal(range(64))
998 comb += src_shift.eq(self.int_pred.o_data & 0b111111)
999 sync += new_srcmask.eq(1 << src_shift)
1000 with m.Else():
1001 # invert mask if requested
1002 sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
1003 m.next = "FETCH_PRED_SHIFT_MASK"
1004
1005 # fetch masks from the CR register file
1006 # implements the following loop:
1007 # idx, inv = get_predcr(mask)
1008 # mask = 0
1009 # for cr_idx in range(vl):
1010 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
1011 # if cr[idx] ^ inv:
1012 # mask |= 1 << cr_idx
1013 # return mask
1014 with m.State("CR_READ"):
1015 # CR index to be read, which will be ready by the next cycle
1016 cr_idx = Signal.like(cur_vl, reset_less=True)
1017 # submit the read operation to the regfile
1018 with m.If(cr_idx != cur_vl):
1019 # the CR read port is unary ...
1020 # ren = 1 << cr_idx
1021 # ... in MSB0 convention ...
1022 # ren = 1 << (7 - cr_idx)
1023 # ... and with an offset:
1024 # ren = 1 << (7 - off - cr_idx)
1025 idx = SVP64CROffs.CRPred + cr_idx
1026 comb += cr_pred.ren.eq(1 << (7 - idx))
1027 # signal data valid in the next cycle
1028 cr_read = Signal(reset_less=True)
1029 sync += cr_read.eq(1)
1030 # load the next index
1031 sync += cr_idx.eq(cr_idx + 1)
1032 with m.Else():
1033 # exit on loop end
1034 sync += cr_read.eq(0)
1035 sync += cr_idx.eq(0)
1036 m.next = "FETCH_PRED_SHIFT_MASK"
1037 with m.If(cr_read):
1038 # compensate for the one cycle delay on the regfile
1039 cur_cr_idx = Signal.like(cur_vl)
1040 comb += cur_cr_idx.eq(cr_idx - 1)
1041 # read the CR field, select the appropriate bit
1042 cr_field = Signal(4)
1043 scr_bit = Signal()
1044 dcr_bit = Signal()
1045 comb += cr_field.eq(cr_pred.o_data)
1046 comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
1047 ^ scrinvert)
1048 comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
1049 ^ dcrinvert)
1050 # set the corresponding mask bit
1051 bit_to_set = Signal.like(self.srcmask)
1052 comb += bit_to_set.eq(1 << cur_cr_idx)
1053 with m.If(scr_bit):
1054 sync += new_srcmask.eq(new_srcmask | bit_to_set)
1055 with m.If(dcr_bit):
1056 sync += new_dstmask.eq(new_dstmask | bit_to_set)
1057
1058 with m.State("FETCH_PRED_SHIFT_MASK"):
1059 # shift-out skipped mask bits
1060 sync += self.srcmask.eq(new_srcmask >> srcstep)
1061 sync += self.dstmask.eq(new_dstmask >> dststep)
1062 m.next = "FETCH_PRED_DONE"
1063
1064 with m.State("FETCH_PRED_DONE"):
1065 comb += pred_mask_o_valid.eq(1)
1066 with m.If(pred_mask_i_ready):
1067 m.next = "FETCH_PRED_IDLE"
1068
1069 def issue_fsm(self, m, core, nia,
1070 dbg, core_rst, is_svp64_mode,
1071 fetch_pc_o_ready, fetch_pc_i_valid,
1072 fetch_insn_o_valid, fetch_insn_i_ready,
1073 pred_insn_i_valid, pred_insn_o_ready,
1074 pred_mask_o_valid, pred_mask_i_ready,
1075 exec_insn_i_valid, exec_insn_o_ready,
1076 exec_pc_o_valid, exec_pc_i_ready):
1077 """issue FSM
1078
1079 decode / issue FSM. this interacts with the "fetch" FSM
1080 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
1081 (outgoing). also interacts with the "execute" FSM
1082 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
1083 (incoming).
1084 SVP64 RM prefixes have already been set up by the
1085 "fetch" phase, so execute is fairly straightforward.
1086 """
1087
1088 comb = m.d.comb
1089 sync = m.d.sync
1090 pdecode2 = self.pdecode2
1091 cur_state = self.cur_state
1092 new_svstate = self.new_svstate
1093
1094 # temporaries
1095 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
1096
1097 # for updating svstate (things like srcstep etc.)
1098 comb += new_svstate.eq(cur_state.svstate)
1099
1100 # precalculate srcstep+1 and dststep+1
1101 cur_srcstep = cur_state.svstate.srcstep
1102 cur_dststep = cur_state.svstate.dststep
1103 next_srcstep = Signal.like(cur_srcstep)
1104 next_dststep = Signal.like(cur_dststep)
1105 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
1106 comb += next_dststep.eq(cur_state.svstate.dststep+1)
1107
1108 # note if an exception happened. in a pipelined or OoO design
1109 # this needs to be accompanied by "shadowing" (or stalling)
1110 exc_happened = self.core.o.exc_happened
1111 # also note instruction fetch failed
1112 if hasattr(core, "icache"):
1113 fetch_failed = core.icache.i_out.fetch_failed
1114 flush_needed = True
1115 # set to fault in decoder
1116 # update (highest priority) instruction fault
1117 rising_fetch_failed = rising_edge(m, fetch_failed)
1118 with m.If(rising_fetch_failed):
1119 sync += pdecode2.instr_fault.eq(1)
1120 else:
1121 fetch_failed = Const(0, 1)
1122 flush_needed = False
1123
1124 with m.FSM(name="issue_fsm"):
1125
1126 # sync with the "fetch" phase which is reading the instruction
1127 # at this point, there is no instruction running, that
1128 # could inadvertently update the PC.
1129 with m.State("ISSUE_START"):
1130 # reset instruction fault
1131 sync += pdecode2.instr_fault.eq(0)
1132 # wait on "core stop" release, before next fetch
1133 # need to do this here, in case we are in a VL==0 loop
1134 with m.If(~dbg.core_stop_o & ~core_rst):
1135 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
1136 with m.If(fetch_pc_o_ready): # fetch acknowledged us
1137 m.next = "INSN_WAIT"
1138 with m.Else():
1139 # tell core it's stopped, and acknowledge debug handshake
1140 comb += dbg.core_stopped_i.eq(1)
1141 # while stopped, allow updating SVSTATE
1142 with m.If(self.svstate_i.ok):
1143 comb += new_svstate.eq(self.svstate_i.data)
1144 comb += self.update_svstate.eq(1)
1145 sync += self.sv_changed.eq(1)
1146
1147 # wait for an instruction to arrive from Fetch
1148 with m.State("INSN_WAIT"):
1149 # when using "single-step" mode, checking dbg.stopping_o
1150 # prevents progress. allow issue to proceed once started
1151 stopping = Const(0)
1152 #if self.allow_overlap:
1153 # stopping = dbg.stopping_o
1154 with m.If(stopping):
1155 # stopping: jump back to idle
1156 m.next = "ISSUE_START"
1157 if flush_needed:
1158 # request the icache to stop asserting "failed"
1159 comb += core.icache.flush_in.eq(1)
1160 # stop instruction fault
1161 sync += pdecode2.instr_fault.eq(0)
1162 with m.Else():
1163 comb += fetch_insn_i_ready.eq(1)
1164 with m.If(fetch_insn_o_valid):
1165 # loop into ISSUE_START if it's a SVP64 instruction
1166 # and VL == 0. this because VL==0 is a for-loop
1167 # from 0 to 0 i.e. always, always a NOP.
1168 cur_vl = cur_state.svstate.vl
1169 with m.If(is_svp64_mode & (cur_vl == 0)):
1170 # update the PC before fetching the next instruction
1171 # since we are in a VL==0 loop, no instruction was
1172 # executed that we could be overwriting
1173 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1174 comb += self.state_w_pc.i_data.eq(nia)
1175 comb += self.insn_done.eq(1)
1176 m.next = "ISSUE_START"
1177 with m.Else():
1178 if self.svp64_en:
1179 m.next = "PRED_START" # fetching predicate
1180 else:
1181 m.next = "DECODE_SV" # skip predication
1182
1183 with m.State("PRED_START"):
1184 comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start
1185 with m.If(pred_insn_o_ready): # fetch_pred acknowledged us
1186 m.next = "MASK_WAIT"
1187
1188 with m.State("MASK_WAIT"):
1189 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
1190 with m.If(pred_mask_o_valid): # predication masks are ready
1191 m.next = "PRED_SKIP"
1192
1193 # skip zeros in predicate
1194 with m.State("PRED_SKIP"):
1195 with m.If(~is_svp64_mode):
1196 m.next = "DECODE_SV" # nothing to do
1197 with m.Else():
1198 if self.svp64_en:
1199 pred_src_zero = pdecode2.rm_dec.pred_sz
1200 pred_dst_zero = pdecode2.rm_dec.pred_dz
1201
1202 # new srcstep, after skipping zeros
1203 skip_srcstep = Signal.like(cur_srcstep)
1204 # value to be added to the current srcstep
1205 src_delta = Signal.like(cur_srcstep)
1206 # add leading zeros to srcstep, if not in zero mode
1207 with m.If(~pred_src_zero):
1208 # priority encoder (count leading zeros)
1209 # append guard bit, in case the mask is all zeros
1210 pri_enc_src = PriorityEncoder(65)
1211 m.submodules.pri_enc_src = pri_enc_src
1212 comb += pri_enc_src.i.eq(Cat(self.srcmask,
1213 Const(1, 1)))
1214 comb += src_delta.eq(pri_enc_src.o)
1215 # apply delta to srcstep
1216 comb += skip_srcstep.eq(cur_srcstep + src_delta)
1217 # shift-out all leading zeros from the mask
1218 # plus the leading "one" bit
1219 # TODO count leading zeros and shift-out the zero
1220 # bits, in the same step, in hardware
1221 sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1222
1223 # same as above, but for dststep
1224 skip_dststep = Signal.like(cur_dststep)
1225 dst_delta = Signal.like(cur_dststep)
1226 with m.If(~pred_dst_zero):
1227 pri_enc_dst = PriorityEncoder(65)
1228 m.submodules.pri_enc_dst = pri_enc_dst
1229 comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1230 Const(1, 1)))
1231 comb += dst_delta.eq(pri_enc_dst.o)
1232 comb += skip_dststep.eq(cur_dststep + dst_delta)
1233 sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1234
1235 # TODO: initialize mask[VL]=1 to avoid passing past VL
1236 with m.If((skip_srcstep >= cur_vl) |
1237 (skip_dststep >= cur_vl)):
1238 # end of VL loop. Update PC and reset src/dst step
1239 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1240 comb += self.state_w_pc.i_data.eq(nia)
1241 comb += new_svstate.srcstep.eq(0)
1242 comb += new_svstate.dststep.eq(0)
1243 comb += self.update_svstate.eq(1)
1244 # synchronize with the simulator
1245 comb += self.insn_done.eq(1)
1246 # go back to Issue
1247 m.next = "ISSUE_START"
1248 with m.Else():
1249 # update new src/dst step
1250 comb += new_svstate.srcstep.eq(skip_srcstep)
1251 comb += new_svstate.dststep.eq(skip_dststep)
1252 comb += self.update_svstate.eq(1)
1253 # proceed to Decode
1254 m.next = "DECODE_SV"
1255
1256 # pass predicate mask bits through to satellite decoders
1257 # TODO: for SIMD this will be *multiple* bits
1258 sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1259 sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1260
1261 # after src/dst step have been updated, we are ready
1262 # to decode the instruction
1263 with m.State("DECODE_SV"):
1264 # decode the instruction
1265 with m.If(~fetch_failed):
1266 sync += pdecode2.instr_fault.eq(0)
1267 sync += core.i.e.eq(pdecode2.e)
1268 sync += core.i.state.eq(cur_state)
1269 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1270 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1271 if self.svp64_en:
1272 sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1273 # set RA_OR_ZERO detection in satellite decoders
1274 sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1275 # and svp64 detection
1276 sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1277 # and svp64 bit-rev'd ldst mode
1278 ldst_dec = pdecode2.use_svp64_ldst_dec
1279 sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1280 # after decoding, reset any previous exception condition,
1281 # allowing it to be set again during the next execution
1282 sync += pdecode2.ldst_exc.eq(0)
1283
1284 m.next = "INSN_EXECUTE" # move to "execute"
1285
1286 # handshake with execution FSM, move to "wait" once acknowledged
1287 with m.State("INSN_EXECUTE"):
1288 # when using "single-step" mode, checking dbg.stopping_o
1289 # prevents progress. allow execute to proceed once started
1290 stopping = Const(0)
1291 #if self.allow_overlap:
1292 # stopping = dbg.stopping_o
1293 with m.If(stopping):
1294 # stopping: jump back to idle
1295 m.next = "ISSUE_START"
1296 if flush_needed:
1297 # request the icache to stop asserting "failed"
1298 comb += core.icache.flush_in.eq(1)
1299 # stop instruction fault
1300 sync += pdecode2.instr_fault.eq(0)
1301 with m.Else():
1302 comb += exec_insn_i_valid.eq(1) # trigger execute
1303 with m.If(exec_insn_o_ready): # execute acknowledged us
1304 m.next = "EXECUTE_WAIT"
1305
1306 with m.State("EXECUTE_WAIT"):
1307 comb += exec_pc_i_ready.eq(1)
1308 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1309 # the exception info needs to be blatted into
1310 # pdecode.ldst_exc, and the instruction "re-run".
1311 # when ldst_exc.happened is set, the PowerDecoder2
1312 # reacts very differently: it re-writes the instruction
1313 # with a "trap" (calls PowerDecoder2.trap()) which
1314 # will *overwrite* whatever was requested and jump the
1315 # PC to the exception address, as well as alter MSR.
1316 # nothing else needs to be done other than to note
1317 # the change of PC and MSR (and, later, SVSTATE)
1318 with m.If(exc_happened):
1319 mmu = core.fus.get_exc("mmu0")
1320 ldst = core.fus.get_exc("ldst0")
1321 if mmu is not None:
1322 with m.If(fetch_failed):
1323 # instruction fetch: exception is from MMU
1324 # reset instr_fault (highest priority)
1325 sync += pdecode2.ldst_exc.eq(mmu)
1326 sync += pdecode2.instr_fault.eq(0)
1327 if flush_needed:
1328 # request icache to stop asserting "failed"
1329 comb += core.icache.flush_in.eq(1)
1330 with m.If(~fetch_failed):
1331 # otherwise assume it was a LDST exception
1332 sync += pdecode2.ldst_exc.eq(ldst)
1333
1334 with m.If(exec_pc_o_valid):
1335
1336 # was this the last loop iteration?
1337 is_last = Signal()
1338 cur_vl = cur_state.svstate.vl
1339 comb += is_last.eq(next_srcstep == cur_vl)
1340
1341 with m.If(pdecode2.instr_fault):
1342 # reset instruction fault, try again
1343 sync += pdecode2.instr_fault.eq(0)
1344 m.next = "ISSUE_START"
1345
1346 # return directly to Decode if Execute generated an
1347 # exception.
1348 with m.Elif(pdecode2.ldst_exc.happened):
1349 m.next = "DECODE_SV"
1350
1351 # if MSR, PC or SVSTATE were changed by the previous
1352 # instruction, go directly back to Fetch, without
1353 # updating either MSR PC or SVSTATE
1354 with m.Elif(self.msr_changed | self.pc_changed |
1355 self.sv_changed):
1356 m.next = "ISSUE_START"
1357
1358 # also return to Fetch, when no output was a vector
1359 # (regardless of SRCSTEP and VL), or when the last
1360 # instruction was really the last one of the VL loop
1361 with m.Elif((~pdecode2.loop_continue) | is_last):
1362 # before going back to fetch, update the PC state
1363 # register with the NIA.
1364 # ok here we are not reading the branch unit.
1365 # TODO: this just blithely overwrites whatever
1366 # pipeline updated the PC
1367 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1368 comb += self.state_w_pc.i_data.eq(nia)
1369 # reset SRCSTEP before returning to Fetch
1370 if self.svp64_en:
1371 with m.If(pdecode2.loop_continue):
1372 comb += new_svstate.srcstep.eq(0)
1373 comb += new_svstate.dststep.eq(0)
1374 comb += self.update_svstate.eq(1)
1375 else:
1376 comb += new_svstate.srcstep.eq(0)
1377 comb += new_svstate.dststep.eq(0)
1378 comb += self.update_svstate.eq(1)
1379 m.next = "ISSUE_START"
1380
1381 # returning to Execute? then, first update SRCSTEP
1382 with m.Else():
1383 comb += new_svstate.srcstep.eq(next_srcstep)
1384 comb += new_svstate.dststep.eq(next_dststep)
1385 comb += self.update_svstate.eq(1)
1386 # return to mask skip loop
1387 m.next = "PRED_SKIP"
1388
1389
1390 # check if svstate needs updating: if so, write it to State Regfile
1391 with m.If(self.update_svstate):
1392 sync += cur_state.svstate.eq(self.new_svstate) # for next clock
1393
1394 def execute_fsm(self, m, core,
1395 exec_insn_i_valid, exec_insn_o_ready,
1396 exec_pc_o_valid, exec_pc_i_ready):
1397 """execute FSM
1398
1399 execute FSM. this interacts with the "issue" FSM
1400 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1401 (outgoing). SVP64 RM prefixes have already been set up by the
1402 "issue" phase, so execute is fairly straightforward.
1403 """
1404
1405 comb = m.d.comb
1406 sync = m.d.sync
1407 dbg = self.dbg
1408 pdecode2 = self.pdecode2
1409
1410 # temporaries
1411 core_busy_o = core.n.o_data.busy_o # core is busy
1412 core_ivalid_i = core.p.i_valid # instruction is valid
1413
1414 if hasattr(core, "icache"):
1415 fetch_failed = core.icache.i_out.fetch_failed
1416 else:
1417 fetch_failed = Const(0, 1)
1418
1419 with m.FSM(name="exec_fsm"):
1420
1421 # waiting for instruction bus (stays there until not busy)
1422 with m.State("INSN_START"):
1423 comb += exec_insn_o_ready.eq(1)
1424 with m.If(exec_insn_i_valid):
1425 comb += core_ivalid_i.eq(1) # instruction is valid/issued
1426 sync += self.sv_changed.eq(0)
1427 sync += self.pc_changed.eq(0)
1428 sync += self.msr_changed.eq(0)
1429 with m.If(core.p.o_ready): # only move if accepted
1430 m.next = "INSN_ACTIVE" # move to "wait completion"
1431
1432 # instruction started: must wait till it finishes
1433 with m.State("INSN_ACTIVE"):
1434 # note changes to MSR, PC and SVSTATE
1435 # XXX oops, really must monitor *all* State Regfile write
1436 # ports looking for changes!
1437 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1438 sync += self.sv_changed.eq(1)
1439 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1440 sync += self.msr_changed.eq(1)
1441 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1442 sync += self.pc_changed.eq(1)
1443 with m.If(~core_busy_o): # instruction done!
1444 comb += exec_pc_o_valid.eq(1)
1445 with m.If(exec_pc_i_ready):
1446 # when finished, indicate "done".
1447 # however, if there was an exception, the instruction
1448 # is *not* yet done. this is an implementation
1449 # detail: we choose to implement exceptions by
1450 # taking the exception information from the LDST
1451 # unit, putting that *back* into the PowerDecoder2,
1452 # and *re-running the entire instruction*.
1453 # if we erroneously indicate "done" here, it is as if
1454 # there were *TWO* instructions:
1455 # 1) the failed LDST 2) a TRAP.
1456 with m.If(~pdecode2.ldst_exc.happened &
1457 ~pdecode2.instr_fault):
1458 comb += self.insn_done.eq(1)
1459 m.next = "INSN_START" # back to fetch
1460 # terminate returns directly to INSN_START
1461 with m.If(dbg.terminate_i):
1462 # comb += self.insn_done.eq(1) - no because it's not
1463 m.next = "INSN_START" # back to fetch
1464
1465 def elaborate(self, platform):
1466 m = super().elaborate(platform)
1467 # convenience
1468 comb, sync = m.d.comb, m.d.sync
1469 cur_state = self.cur_state
1470 pdecode2 = self.pdecode2
1471 dbg = self.dbg
1472 core = self.core
1473
1474 # set up peripherals and core
1475 core_rst = self.core_rst
1476
1477 # indicate to outside world if any FU is still executing
1478 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1479
1480 # address of the next instruction, in the absence of a branch
1481 # depends on the instruction size
1482 nia = Signal(64)
1483
1484 # connect up debug signals
1485 with m.If(core.o.core_terminate_o):
1486 comb += dbg.terminate_i.eq(1)
1487
1488 # pass the prefix mode from Fetch to Issue, so the latter can loop
1489 # on VL==0
1490 is_svp64_mode = Signal()
1491
1492 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1493 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1494 # these are the handshake signals between each
1495
1496 # fetch FSM can run as soon as the PC is valid
1497 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1498 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1499
1500 # fetch FSM hands over the instruction to be decoded / issued
1501 fetch_insn_o_valid = Signal()
1502 fetch_insn_i_ready = Signal()
1503
1504 # predicate fetch FSM decodes and fetches the predicate
1505 pred_insn_i_valid = Signal()
1506 pred_insn_o_ready = Signal()
1507
1508 # predicate fetch FSM delivers the masks
1509 pred_mask_o_valid = Signal()
1510 pred_mask_i_ready = Signal()
1511
1512 # issue FSM delivers the instruction to the be executed
1513 exec_insn_i_valid = Signal()
1514 exec_insn_o_ready = Signal()
1515
1516 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1517 exec_pc_o_valid = Signal()
1518 exec_pc_i_ready = Signal()
1519
1520 # the FSMs here are perhaps unusual in that they detect conditions
1521 # then "hold" information, combinatorially, for the core
1522 # (as opposed to using sync - which would be on a clock's delay)
1523 # this includes the actual opcode, valid flags and so on.
1524
1525 # Fetch, then predicate fetch, then Issue, then Execute.
1526 # Issue is where the VL for-loop # lives. the ready/valid
1527 # signalling is used to communicate between the four.
1528
1529 self.fetch_fsm(m, dbg, core, dbg.state.pc, dbg.state.msr,
1530 dbg.state.svstate, nia, is_svp64_mode,
1531 fetch_pc_o_ready, fetch_pc_i_valid,
1532 fetch_insn_o_valid, fetch_insn_i_ready)
1533
1534 self.issue_fsm(m, core, nia,
1535 dbg, core_rst, is_svp64_mode,
1536 fetch_pc_o_ready, fetch_pc_i_valid,
1537 fetch_insn_o_valid, fetch_insn_i_ready,
1538 pred_insn_i_valid, pred_insn_o_ready,
1539 pred_mask_o_valid, pred_mask_i_ready,
1540 exec_insn_i_valid, exec_insn_o_ready,
1541 exec_pc_o_valid, exec_pc_i_ready)
1542
1543 if self.svp64_en:
1544 self.fetch_predicate_fsm(m,
1545 pred_insn_i_valid, pred_insn_o_ready,
1546 pred_mask_o_valid, pred_mask_i_ready)
1547
1548 self.execute_fsm(m, core,
1549 exec_insn_i_valid, exec_insn_o_ready,
1550 exec_pc_o_valid, exec_pc_i_ready)
1551
1552 # whatever was done above, over-ride it if core reset is held
1553 with m.If(core_rst):
1554 sync += nia.eq(0)
1555
1556 return m
1557
1558
1559 class TestIssuer(Elaboratable):
1560 def __init__(self, pspec):
1561 self.ti = TestIssuerInternal(pspec)
1562 self.pll = DummyPLL(instance=True)
1563
1564 self.dbg_rst_i = Signal(reset_less=True)
1565
1566 # PLL direct clock or not
1567 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1568 if self.pll_en:
1569 self.pll_test_o = Signal(reset_less=True)
1570 self.pll_vco_o = Signal(reset_less=True)
1571 self.clk_sel_i = Signal(2, reset_less=True)
1572 self.ref_clk = ClockSignal() # can't rename it but that's ok
1573 self.pllclk_clk = ClockSignal("pllclk")
1574
1575 def elaborate(self, platform):
1576 m = Module()
1577 comb = m.d.comb
1578
1579 # TestIssuer nominally runs at main clock, actually it is
1580 # all combinatorial internally except for coresync'd components
1581 m.submodules.ti = ti = self.ti
1582
1583 if self.pll_en:
1584 # ClockSelect runs at PLL output internal clock rate
1585 m.submodules.wrappll = pll = self.pll
1586
1587 # add clock domains from PLL
1588 cd_pll = ClockDomain("pllclk")
1589 m.domains += cd_pll
1590
1591 # PLL clock established. has the side-effect of running clklsel
1592 # at the PLL's speed (see DomainRenamer("pllclk") above)
1593 pllclk = self.pllclk_clk
1594 comb += pllclk.eq(pll.clk_pll_o)
1595
1596 # wire up external 24mhz to PLL
1597 #comb += pll.clk_24_i.eq(self.ref_clk)
1598 # output 18 mhz PLL test signal, and analog oscillator out
1599 comb += self.pll_test_o.eq(pll.pll_test_o)
1600 comb += self.pll_vco_o.eq(pll.pll_vco_o)
1601
1602 # input to pll clock selection
1603 comb += pll.clk_sel_i.eq(self.clk_sel_i)
1604
1605 # now wire up ResetSignals. don't mind them being in this domain
1606 pll_rst = ResetSignal("pllclk")
1607 comb += pll_rst.eq(ResetSignal())
1608
1609 # internal clock is set to selector clock-out. has the side-effect of
1610 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1611 # debug clock runs at coresync internal clock
1612 if self.ti.dbg_domain != 'sync':
1613 cd_dbgsync = ClockDomain("dbgsync")
1614 intclk = ClockSignal(self.ti.core_domain)
1615 dbgclk = ClockSignal(self.ti.dbg_domain)
1616 # XXX BYPASS PLL XXX
1617 # XXX BYPASS PLL XXX
1618 # XXX BYPASS PLL XXX
1619 if self.pll_en:
1620 comb += intclk.eq(self.ref_clk)
1621 assert self.ti.core_domain != 'sync', \
1622 "cannot set core_domain to sync and use pll at the same time"
1623 else:
1624 if self.ti.core_domain != 'sync':
1625 comb += intclk.eq(ClockSignal())
1626 if self.ti.dbg_domain != 'sync':
1627 dbgclk = ClockSignal(self.ti.dbg_domain)
1628 comb += dbgclk.eq(intclk)
1629 comb += self.ti.dbg_rst_i.eq(self.dbg_rst_i)
1630
1631 return m
1632
1633 def ports(self):
1634 return list(self.ti.ports()) + list(self.pll.ports()) + \
1635 [ClockSignal(), ResetSignal()]
1636
1637 def external_ports(self):
1638 ports = self.ti.external_ports()
1639 ports.append(ClockSignal())
1640 ports.append(ResetSignal())
1641 if self.pll_en:
1642 ports.append(self.clk_sel_i)
1643 ports.append(self.pll.clk_24_i)
1644 ports.append(self.pll_test_o)
1645 ports.append(self.pll_vco_o)
1646 ports.append(self.pllclk_clk)
1647 ports.append(self.ref_clk)
1648 return ports
1649
1650
1651 if __name__ == '__main__':
1652 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1653 'spr': 1,
1654 'div': 1,
1655 'mul': 1,
1656 'shiftrot': 1
1657 }
1658 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1659 imem_ifacetype='bare_wb',
1660 addr_wid=64,
1661 mask_wid=8,
1662 reg_wid=64,
1663 units=units)
1664 dut = TestIssuer(pspec)
1665 vl = main(dut, ports=dut.ports(), name="test_issuer")
1666
1667 if len(sys.argv) == 1:
1668 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1669 with open("test_issuer.il", "w") as f:
1670 f.write(vl)