src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the {insert state variable here}
  70     res_ok_delay = Signal(name="%s_ok_delay" % name)
  71     with m.If(~core_rst):
  72         sync += res_ok_delay.eq(~state_i.ok)
  73         with m.If(state_i.ok):
  74             # incoming override (start from pc_i)
  75             comb += res.eq(state_i.data)
  76         with m.Else():
  77             # otherwise read StateRegs regfile for {insert state here}...
  78             comb += regfile.ren.eq(1 << regnum)
  79         # ... but on a 1-clock delay
  80         with m.If(res_ok_delay):
  81             comb += res.eq(regfile.o_data)
  82
  83
  84 def get_predint(m, mask, name):
  85     """decode SVP64 predicate integer mask field to reg number and invert
  86     this is identical to the equivalent function in ISACaller except that
  87     it doesn't read the INT directly, it just decodes "what needs to be done"
  88     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  89
  90     * all1s is set to indicate that no mask is to be applied.
  91     * regread indicates the GPR register number to be read
  92     * invert is set to indicate that the register value is to be inverted
  93     * unary indicates that the contents of the register is to be shifted 1<<r3
  94     """
  95     comb = m.d.comb
  96     regread = Signal(5, name=name+"regread")
  97     invert = Signal(name=name+"invert")
  98     unary = Signal(name=name+"unary")
  99     all1s = Signal(name=name+"all1s")
 100     with m.Switch(mask):
 101         with m.Case(SVP64PredInt.ALWAYS.value):
 102             comb += all1s.eq(1)      # use 0b1111 (all ones)
 103         with m.Case(SVP64PredInt.R3_UNARY.value):
 104             comb += regread.eq(3)
 105             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 106         with m.Case(SVP64PredInt.R3.value):
 107             comb += regread.eq(3)
 108         with m.Case(SVP64PredInt.R3_N.value):
 109             comb += regread.eq(3)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R10.value):
 112             comb += regread.eq(10)
 113         with m.Case(SVP64PredInt.R10_N.value):
 114             comb += regread.eq(10)
 115             comb += invert.eq(1)
 116         with m.Case(SVP64PredInt.R30.value):
 117             comb += regread.eq(30)
 118         with m.Case(SVP64PredInt.R30_N.value):
 119             comb += regread.eq(30)
 120             comb += invert.eq(1)
 121     return regread, invert, unary, all1s
 122
 123
 124 def get_predcr(m, mask, name):
 125     """decode SVP64 predicate CR to reg number field and invert status
 126     this is identical to _get_predcr in ISACaller
 127     """
 128     comb = m.d.comb
 129     idx = Signal(2, name=name+"idx")
 130     invert = Signal(name=name+"crinvert")
 131     with m.Switch(mask):
 132         with m.Case(SVP64PredCR.LT.value):
 133             comb += idx.eq(CR.LT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.GE.value):
 136             comb += idx.eq(CR.LT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.GT.value):
 139             comb += idx.eq(CR.GT)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.LE.value):
 142             comb += idx.eq(CR.GT)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.EQ.value):
 145             comb += idx.eq(CR.EQ)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NE.value):
 148             comb += idx.eq(CR.EQ)
 149             comb += invert.eq(1)
 150         with m.Case(SVP64PredCR.SO.value):
 151             comb += idx.eq(CR.SO)
 152             comb += invert.eq(0)
 153         with m.Case(SVP64PredCR.NS.value):
 154             comb += idx.eq(CR.SO)
 155             comb += invert.eq(1)
 156     return idx, invert
 157
 158
 159 class TestIssuerBase(Elaboratable):
 160     """TestIssuerBase - common base class for Issuers
 161
 162     takes care of power-on reset, peripherals, debug, DEC/TB,
 163     and gets PC/MSR/SVSTATE from the State Regfile etc.
 164     """
 165
 166     def __init__(self, pspec):
 167
 168         # test is SVP64 is to be enabled
 169         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 170
 171         # and if regfiles are reduced
 172         self.regreduce_en = (hasattr(pspec, "regreduce") and
 173                              (pspec.regreduce == True))
 174
 175         # and if overlap requested
 176         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 177                               (pspec.allow_overlap == True))
 178
 179         # JTAG interface.  add this right at the start because if it's
 180         # added it *modifies* the pspec, by adding enable/disable signals
 181         # for parts of the rest of the core
 182         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 183         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 184         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 185         if self.jtag_en:
 186             # XXX MUST keep this up-to-date with litex, and
 187             # soc-cocotb-sim, and err.. all needs sorting out, argh
 188             subset = ['uart',
 189                       'mtwi',
 190                       'eint', 'gpio', 'mspi0',
 191                       # 'mspi1', - disabled for now
 192                       # 'pwm', 'sd0', - disabled for now
 193                       'sdr']
 194             self.jtag = JTAG(get_pinspecs(subset=subset),
 195                              domain=self.dbg_domain)
 196             # add signals to pspec to enable/disable icache and dcache
 197             # (or data and intstruction wishbone if icache/dcache not included)
 198             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 199             # TODO: do we actually care if these are not domain-synchronised?
 200             # honestly probably not.
 201             pspec.wb_icache_en = self.jtag.wb_icache_en
 202             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 203             self.wb_sram_en = self.jtag.wb_sram_en
 204         else:
 205             self.wb_sram_en = Const(1)
 206
 207         # add 4k sram blocks?
 208         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 209                          pspec.sram4x4kblock == True)
 210         if self.sram4x4k:
 211             self.sram4k = []
 212             for i in range(4):
 213                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 214                                                     # features={'err'}
 215                                                     ))
 216
 217         # add interrupt controller?
 218         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 219         if self.xics:
 220             self.xics_icp = XICS_ICP()
 221             self.xics_ics = XICS_ICS()
 222             self.int_level_i = self.xics_ics.int_level_i
 223
 224         # add GPIO peripheral?
 225         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 226         if self.gpio:
 227             self.simple_gpio = SimpleGPIO()
 228             self.gpio_o = self.simple_gpio.gpio_o
 229
 230         # main instruction core.  suitable for prototyping / demo only
 231         self.core = core = NonProductionCore(pspec)
 232         self.core_rst = ResetSignal("coresync")
 233
 234         # instruction decoder.  goes into Trap Record
 235         #pdecode = create_pdecode()
 236         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 237         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 238                                      opkls=IssuerDecode2ToOperand,
 239                                      svp64_en=self.svp64_en,
 240                                      regreduce_en=self.regreduce_en)
 241         pdecode = self.pdecode2.dec
 242
 243         if self.svp64_en:
 244             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 245
 246         self.update_svstate = Signal()  # set this if updating svstate
 247         self.new_svstate = new_svstate = SVSTATERec("new_svstate")
 248
 249         # Test Instruction memory
 250         if hasattr(core, "icache"):
 251             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 252             # truly dreadful.  needs a huge reorg.
 253             pspec.icache = core.icache
 254         self.imem = ConfigFetchUnit(pspec).fu
 255
 256         # DMI interface
 257         self.dbg = CoreDebug()
 258
 259         # instruction go/monitor
 260         self.pc_o = Signal(64, reset_less=True)
 261         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 262         self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
 263         self.svstate_i = Data(64, "svstate_i")  # ditto
 264         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 265         self.busy_o = Signal(reset_less=True)
 266         self.memerr_o = Signal(reset_less=True)
 267
 268         # STATE regfile read /write ports for PC, MSR, SVSTATE
 269         staterf = self.core.regs.rf['state']
 270         self.state_r_msr = staterf.r_ports['msr']  # MSR rd
 271         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 272         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 273
 274         self.state_w_msr = staterf.w_ports['msr']  # MSR wr
 275         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 276         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 277
 278         # DMI interface access
 279         intrf = self.core.regs.rf['int']
 280         crrf = self.core.regs.rf['cr']
 281         xerrf = self.core.regs.rf['xer']
 282         self.int_r = intrf.r_ports['dmi']  # INT read
 283         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 284         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 285
 286         if self.svp64_en:
 287             # for predication
 288             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 289             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 290
 291         # hack method of keeping an eye on whether branch/trap set the PC
 292         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 293         self.state_nia.wen.name = 'state_nia_wen'
 294
 295         # pulse to synchronize the simulator at instruction end
 296         self.insn_done = Signal()
 297
 298         # indicate any instruction still outstanding, in execution
 299         self.any_busy = Signal()
 300
 301         if self.svp64_en:
 302             # store copies of predicate masks
 303             self.srcmask = Signal(64)
 304             self.dstmask = Signal(64)
 305
 306     def setup_peripherals(self, m):
 307         comb, sync = m.d.comb, m.d.sync
 308
 309         # okaaaay so the debug module must be in coresync clock domain
 310         # but NOT its reset signal. to cope with this, set every single
 311         # submodule explicitly in coresync domain, debug and JTAG
 312         # in their own one but using *external* reset.
 313         csd = DomainRenamer("coresync")
 314         dbd = DomainRenamer(self.dbg_domain)
 315
 316         m.submodules.core = core = csd(self.core)
 317         # this _so_ needs sorting out.  ICache is added down inside
 318         # LoadStore1 and is already a submodule of LoadStore1
 319         if not isinstance(self.imem, ICache):
 320             m.submodules.imem = imem = csd(self.imem)
 321         m.submodules.dbg = dbg = dbd(self.dbg)
 322         if self.jtag_en:
 323             m.submodules.jtag = jtag = dbd(self.jtag)
 324             # TODO: UART2GDB mux, here, from external pin
 325             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 326             sync += dbg.dmi.connect_to(jtag.dmi)
 327
 328         cur_state = self.cur_state
 329
 330         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 331         if self.sram4x4k:
 332             for i, sram in enumerate(self.sram4k):
 333                 m.submodules["sram4k_%d" % i] = csd(sram)
 334                 comb += sram.enable.eq(self.wb_sram_en)
 335
 336         # XICS interrupt handler
 337         if self.xics:
 338             m.submodules.xics_icp = icp = csd(self.xics_icp)
 339             m.submodules.xics_ics = ics = csd(self.xics_ics)
 340             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 341             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
 342
 343         # GPIO test peripheral
 344         if self.gpio:
 345             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 346
 347         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 348         # XXX causes litex ECP5 test to get wrong idea about input and output
 349         # (but works with verilator sim *sigh*)
 350         # if self.gpio and self.xics:
 351         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 352
 353         # instruction decoder
 354         pdecode = create_pdecode()
 355         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 356         if self.svp64_en:
 357             m.submodules.svp64 = svp64 = csd(self.svp64)
 358
 359         # convenience
 360         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 361         intrf = self.core.regs.rf['int']
 362
 363         # clock delay power-on reset
 364         cd_por = ClockDomain(reset_less=True)
 365         cd_sync = ClockDomain()
 366         core_sync = ClockDomain("coresync")
 367         m.domains += cd_por, cd_sync, core_sync
 368         if self.dbg_domain != "sync":
 369             dbg_sync = ClockDomain(self.dbg_domain)
 370             m.domains += dbg_sync
 371
 372         ti_rst = Signal(reset_less=True)
 373         delay = Signal(range(4), reset=3)
 374         with m.If(delay != 0):
 375             m.d.por += delay.eq(delay - 1)
 376         comb += cd_por.clk.eq(ClockSignal())
 377
 378         # power-on reset delay
 379         core_rst = ResetSignal("coresync")
 380         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 381         comb += core_rst.eq(ti_rst)
 382
 383         # debug clock is same as coresync, but reset is *main external*
 384         if self.dbg_domain != "sync":
 385             dbg_rst = ResetSignal(self.dbg_domain)
 386             comb += dbg_rst.eq(ResetSignal())
 387
 388         # busy/halted signals from core
 389         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
 390         comb += self.busy_o.eq(core_busy_o)
 391         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 392
 393         # temporary hack: says "go" immediately for both address gen and ST
 394         l0 = core.l0
 395         ldst = core.fus.fus['ldst0']
 396         st_go_edge = rising_edge(m, ldst.st.rel_o)
 397         # link addr-go direct to rel
 398         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
 399         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
 400
 401     def do_dmi(self, m, dbg):
 402         """deals with DMI debug requests
 403
 404         currently only provides read requests for the INT regfile, CR and XER
 405         it will later also deal with *writing* to these regfiles.
 406         """
 407         comb = m.d.comb
 408         sync = m.d.sync
 409         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 410         intrf = self.core.regs.rf['int']
 411
 412         with m.If(d_reg.req):  # request for regfile access being made
 413             # TODO: error-check this
 414             # XXX should this be combinatorial?  sync better?
 415             if intrf.unary:
 416                 comb += self.int_r.ren.eq(1 << d_reg.addr)
 417             else:
 418                 comb += self.int_r.addr.eq(d_reg.addr)
 419                 comb += self.int_r.ren.eq(1)
 420         d_reg_delay = Signal()
 421         sync += d_reg_delay.eq(d_reg.req)
 422         with m.If(d_reg_delay):
 423             # data arrives one clock later
 424             comb += d_reg.data.eq(self.int_r.o_data)
 425             comb += d_reg.ack.eq(1)
 426
 427         # sigh same thing for CR debug
 428         with m.If(d_cr.req):  # request for regfile access being made
 429             comb += self.cr_r.ren.eq(0b11111111)  # enable all
 430         d_cr_delay = Signal()
 431         sync += d_cr_delay.eq(d_cr.req)
 432         with m.If(d_cr_delay):
 433             # data arrives one clock later
 434             comb += d_cr.data.eq(self.cr_r.o_data)
 435             comb += d_cr.ack.eq(1)
 436
 437         # aaand XER...
 438         with m.If(d_xer.req):  # request for regfile access being made
 439             comb += self.xer_r.ren.eq(0b111111)  # enable all
 440         d_xer_delay = Signal()
 441         sync += d_xer_delay.eq(d_xer.req)
 442         with m.If(d_xer_delay):
 443             # data arrives one clock later
 444             comb += d_xer.data.eq(self.xer_r.o_data)
 445             comb += d_xer.ack.eq(1)
 446
 447     def tb_dec_fsm(self, m, spr_dec):
 448         """tb_dec_fsm
 449
 450         this is a FSM for updating either dec or tb.  it runs alternately
 451         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
 452         value to DEC, however the regfile has "passthrough" on it so this
 453         *should* be ok.
 454
 455         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
 456         """
 457
 458         comb, sync = m.d.comb, m.d.sync
 459         fast_rf = self.core.regs.rf['fast']
 460         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
 461         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
 462
 463         with m.FSM() as fsm:
 464
 465             # initiates read of current DEC
 466             with m.State("DEC_READ"):
 467                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
 468                 comb += fast_r_dectb.ren.eq(1)
 469                 m.next = "DEC_WRITE"
 470
 471             # waits for DEC read to arrive (1 cycle), updates with new value
 472             with m.State("DEC_WRITE"):
 473                 new_dec = Signal(64)
 474                 # TODO: MSR.LPCR 32-bit decrement mode
 475                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
 476                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
 477                 comb += fast_w_dectb.wen.eq(1)
 478                 comb += fast_w_dectb.i_data.eq(new_dec)
 479                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
 480                 m.next = "TB_READ"
 481
 482             # initiates read of current TB
 483             with m.State("TB_READ"):
 484                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
 485                 comb += fast_r_dectb.ren.eq(1)
 486                 m.next = "TB_WRITE"
 487
 488             # waits for read TB to arrive, initiates write of current TB
 489             with m.State("TB_WRITE"):
 490                 new_tb = Signal(64)
 491                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
 492                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
 493                 comb += fast_w_dectb.wen.eq(1)
 494                 comb += fast_w_dectb.i_data.eq(new_tb)
 495                 m.next = "DEC_READ"
 496
 497         return m
 498
 499     def elaborate(self, platform):
 500         m = Module()
 501         # convenience
 502         comb, sync = m.d.comb, m.d.sync
 503         cur_state = self.cur_state
 504         pdecode2 = self.pdecode2
 505         dbg = self.dbg
 506
 507         # set up peripherals and core
 508         core_rst = self.core_rst
 509         self.setup_peripherals(m)
 510
 511         # reset current state if core reset requested
 512         with m.If(core_rst):
 513             m.d.sync += self.cur_state.eq(0)
 514
 515         # PC and instruction from I-Memory
 516         comb += self.pc_o.eq(cur_state.pc)
 517         self.pc_changed = Signal()  # note write to PC
 518         self.msr_changed = Signal()  # note write to MSR
 519         self.sv_changed = Signal()  # note write to SVSTATE
 520
 521         # read state either from incoming override or from regfile
 522         state = CoreState("get")  # current state (MSR/PC/SVSTATE)
 523         state_get(m, state.msr, core_rst, self.msr_i,
 524                        "msr",                  # read MSR
 525                        self.state_r_msr, StateRegs.MSR)
 526         state_get(m, state.pc, core_rst, self.pc_i,
 527                        "pc",                  # read PC
 528                        self.state_r_pc, StateRegs.PC)
 529         state_get(m, state.svstate, core_rst, self.svstate_i,
 530                             "svstate",   # read SVSTATE
 531                             self.state_r_sv, StateRegs.SVSTATE)
 532
 533         # don't write pc every cycle
 534         comb += self.state_w_pc.wen.eq(0)
 535         comb += self.state_w_pc.i_data.eq(0)
 536
 537         # connect up debug state.  note "combinatorially same" below,
 538         # this is a bit naff, passing state over in the dbg class, but
 539         # because it is combinatorial it achieves the desired goal
 540         comb += dbg.state.eq(state)
 541
 542         # this bit doesn't have to be in the FSM: connect up to read
 543         # regfiles on demand from DMI
 544         self.do_dmi(m, dbg)
 545
 546         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
 547         # (which uses that in PowerDecoder2 to raise 0x900 exception)
 548         self.tb_dec_fsm(m, cur_state.dec)
 549
 550         # while stopped, allow updating the MSR, PC and SVSTATE.
 551         # these are mainly for debugging purposes (including DMI/JTAG)
 552         with m.If(dbg.core_stopped_i):
 553             with m.If(self.pc_i.ok):
 554                 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 555                 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 556                 sync += self.pc_changed.eq(1)
 557             with m.If(self.msr_i.ok):
 558                 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
 559                 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
 560                 sync += self.msr_changed.eq(1)
 561             with m.If(self.svstate_i.ok | self.update_svstate):
 562                 with m.If(self.svstate_i.ok): # over-ride from external source
 563                     comb += self.new_svstate.eq(self.svstate_i.data)
 564                 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 565                 comb += self.state_w_sv.i_data.eq(self.new_svstate)
 566                 sync += self.sv_changed.eq(1)
 567
 568         return m
 569
 570     def __iter__(self):
 571         yield from self.pc_i.ports()
 572         yield from self.msr_i.ports()
 573         yield self.pc_o
 574         yield self.memerr_o
 575         yield from self.core.ports()
 576         yield from self.imem.ports()
 577         yield self.core_bigendian_i
 578         yield self.busy_o
 579
 580     def ports(self):
 581         return list(self)
 582
 583     def external_ports(self):
 584         ports = self.pc_i.ports()
 585         ports = self.msr_i.ports()
 586         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
 587                   ]
 588
 589         if self.jtag_en:
 590             ports += list(self.jtag.external_ports())
 591         else:
 592             # don't add DMI if JTAG is enabled
 593             ports += list(self.dbg.dmi.ports())
 594
 595         ports += list(self.imem.ibus.fields.values())
 596         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
 597
 598         if self.sram4x4k:
 599             for sram in self.sram4k:
 600                 ports += list(sram.bus.fields.values())
 601
 602         if self.xics:
 603             ports += list(self.xics_icp.bus.fields.values())
 604             ports += list(self.xics_ics.bus.fields.values())
 605             ports.append(self.int_level_i)
 606
 607         if self.gpio:
 608             ports += list(self.simple_gpio.bus.fields.values())
 609             ports.append(self.gpio_o)
 610
 611         return ports
 612
 613     def ports(self):
 614         return list(self)
 615
 616
 617
 618 # Fetch Finite State Machine.
 619 # WARNING: there are currently DriverConflicts but it's actually working.
 620 # TODO, here: everything that is global in nature, information from the
 621 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 622 # not only that: TestIssuerInternal.imem can entirely move into here
 623 # because imem is only ever accessed inside the FetchFSM.
 624 class FetchFSM(ControlBase):
 625     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 626                  pdecode2, cur_state,
 627                  dbg, core, svstate, nia, is_svp64_mode):
 628         self.allow_overlap = allow_overlap
 629         self.svp64_en = svp64_en
 630         self.imem = imem
 631         self.core_rst = core_rst
 632         self.pdecode2 = pdecode2
 633         self.cur_state = cur_state
 634         self.dbg = dbg
 635         self.core = core
 636         self.svstate = svstate
 637         self.nia = nia
 638         self.is_svp64_mode = is_svp64_mode
 639
 640         # set up pipeline ControlBase and allocate i/o specs
 641         # (unusual: normally done by the Pipeline API)
 642         super().__init__(stage=self)
 643         self.p.i_data, self.n.o_data = self.new_specs(None)
 644         self.i, self.o = self.p.i_data, self.n.o_data
 645
 646     # next 3 functions are Stage API Compliance
 647     def setup(self, m, i):
 648         pass
 649
 650     def ispec(self):
 651         return FetchInput()
 652
 653     def ospec(self):
 654         return FetchOutput()
 655
 656     def elaborate(self, platform):
 657         """fetch FSM
 658
 659         this FSM performs fetch of raw instruction data, partial-decodes
 660         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 661         read a 2nd 32-bit quantity if that occurs.
 662         """
 663         m = super().elaborate(platform)
 664
 665         dbg = self.dbg
 666         core = self.core
 667         pc = self.i.pc
 668         msr = self.i.msr
 669         svstate = self.svstate
 670         nia = self.nia
 671         is_svp64_mode = self.is_svp64_mode
 672         fetch_pc_o_ready = self.p.o_ready
 673         fetch_pc_i_valid = self.p.i_valid
 674         fetch_insn_o_valid = self.n.o_valid
 675         fetch_insn_i_ready = self.n.i_ready
 676
 677         comb = m.d.comb
 678         sync = m.d.sync
 679         pdecode2 = self.pdecode2
 680         cur_state = self.cur_state
 681         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 682
 683         # also note instruction fetch failed
 684         if hasattr(core, "icache"):
 685             fetch_failed = core.icache.i_out.fetch_failed
 686             flush_needed = True
 687         else:
 688             fetch_failed = Const(0, 1)
 689             flush_needed = False
 690
 691         with m.FSM(name='fetch_fsm'):
 692
 693             # waiting (zzz)
 694             with m.State("IDLE"):
 695                 with m.If(~dbg.stopping_o & ~fetch_failed):
 696                     comb += fetch_pc_o_ready.eq(1)
 697                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 698                     # instruction allowed to go: start by reading the PC
 699                     # capture the PC and also drop it into Insn Memory
 700                     # we have joined a pair of combinatorial memory
 701                     # lookups together.  this is Generally Bad.
 702                     comb += self.imem.a_pc_i.eq(pc)
 703                     comb += self.imem.a_i_valid.eq(1)
 704                     comb += self.imem.f_i_valid.eq(1)
 705                     sync += cur_state.pc.eq(pc)
 706                     sync += cur_state.svstate.eq(svstate)  # and svstate
 707                     sync += cur_state.msr.eq(msr)  # and msr
 708
 709                     m.next = "INSN_READ"  # move to "wait for bus" phase
 710
 711             # dummy pause to find out why simulation is not keeping up
 712             with m.State("INSN_READ"):
 713                 if self.allow_overlap:
 714                     stopping = dbg.stopping_o
 715                 else:
 716                     stopping = Const(0)
 717                 with m.If(stopping):
 718                     # stopping: jump back to idle
 719                     m.next = "IDLE"
 720                 with m.Else():
 721                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 722                         # busy but not fetch failed: stay in wait-read
 723                         comb += self.imem.a_i_valid.eq(1)
 724                         comb += self.imem.f_i_valid.eq(1)
 725                     with m.Else():
 726                         # not busy (or fetch failed!): instruction fetched
 727                         # when fetch failed, the instruction gets ignored
 728                         # by the decoder
 729                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 730                         if self.svp64_en:
 731                             svp64 = self.svp64
 732                             # decode the SVP64 prefix, if any
 733                             comb += svp64.raw_opcode_in.eq(insn)
 734                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 735                             # pass the decoded prefix (if any) to PowerDecoder2
 736                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 737                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 738                             # remember whether this is a prefixed instruction,
 739                             # so the FSM can readily loop when VL==0
 740                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 741                             # calculate the address of the following instruction
 742                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 743                             sync += nia.eq(cur_state.pc + insn_size)
 744                             with m.If(~svp64.is_svp64_mode):
 745                                 # with no prefix, store the instruction
 746                                 # and hand it directly to the next FSM
 747                                 sync += dec_opcode_o.eq(insn)
 748                                 m.next = "INSN_READY"
 749                             with m.Else():
 750                                 # fetch the rest of the instruction from memory
 751                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 752                                 comb += self.imem.a_i_valid.eq(1)
 753                                 comb += self.imem.f_i_valid.eq(1)
 754                                 m.next = "INSN_READ2"
 755                         else:
 756                             # not SVP64 - 32-bit only
 757                             sync += nia.eq(cur_state.pc + 4)
 758                             sync += dec_opcode_o.eq(insn)
 759                             m.next = "INSN_READY"
 760
 761             with m.State("INSN_READ2"):
 762                 with m.If(self.imem.f_busy_o):  # zzz...
 763                     # busy: stay in wait-read
 764                     comb += self.imem.a_i_valid.eq(1)
 765                     comb += self.imem.f_i_valid.eq(1)
 766                 with m.Else():
 767                     # not busy: instruction fetched
 768                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 769                     sync += dec_opcode_o.eq(insn)
 770                     m.next = "INSN_READY"
 771                     # TODO: probably can start looking at pdecode2.rm_dec
 772                     # here or maybe even in INSN_READ state, if svp64_mode
 773                     # detected, in order to trigger - and wait for - the
 774                     # predicate reading.
 775                     if self.svp64_en:
 776                         pmode = pdecode2.rm_dec.predmode
 777                     """
 778                     if pmode != SVP64PredMode.ALWAYS.value:
 779                         fire predicate loading FSM and wait before
 780                         moving to INSN_READY
 781                     else:
 782                         sync += self.srcmask.eq(-1) # set to all 1s
 783                         sync += self.dstmask.eq(-1) # set to all 1s
 784                         m.next = "INSN_READY"
 785                     """
 786
 787             with m.State("INSN_READY"):
 788                 # hand over the instruction, to be decoded
 789                 comb += fetch_insn_o_valid.eq(1)
 790                 with m.If(fetch_insn_i_ready):
 791                     m.next = "IDLE"
 792
 793         # whatever was done above, over-ride it if core reset is held
 794         with m.If(self.core_rst):
 795             sync += nia.eq(0)
 796
 797         return m
 798
 799
 800 class TestIssuerInternal(TestIssuerBase):
 801     """TestIssuer - reads instructions from TestMemory and issues them
 802
 803     efficiency and speed is not the main goal here: functional correctness
 804     and code clarity is.  optimisations (which almost 100% interfere with
 805     easy understanding) come later.
 806     """
 807
 808     def fetch_predicate_fsm(self, m,
 809                             pred_insn_i_valid, pred_insn_o_ready,
 810                             pred_mask_o_valid, pred_mask_i_ready):
 811         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 812            src/dest predicate masks
 813
 814         https://bugs.libre-soc.org/show_bug.cgi?id=617
 815         the predicates can be read here, by using IntRegs r_ports['pred']
 816         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 817         be done through multiple reads, extracting one relevant at a time.
 818         later, a faster way would be to use the 32-bit-wide CR port but
 819         this is more complex decoding, here.  equivalent code used in
 820         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 821
 822         note: this ENTIRE FSM is not to be called when svp64 is disabled
 823         """
 824         comb = m.d.comb
 825         sync = m.d.sync
 826         pdecode2 = self.pdecode2
 827         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 828         predmode = rm_dec.predmode
 829         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 830         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 831         # get src/dst step, so we can skip already used mask bits
 832         cur_state = self.cur_state
 833         srcstep = cur_state.svstate.srcstep
 834         dststep = cur_state.svstate.dststep
 835         cur_vl = cur_state.svstate.vl
 836
 837         # decode predicates
 838         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 839         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 840         sidx, scrinvert = get_predcr(m, srcpred, 's')
 841         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 842
 843         # store fetched masks, for either intpred or crpred
 844         # when src/dst step is not zero, the skipped mask bits need to be
 845         # shifted-out, before actually storing them in src/dest mask
 846         new_srcmask = Signal(64, reset_less=True)
 847         new_dstmask = Signal(64, reset_less=True)
 848
 849         with m.FSM(name="fetch_predicate"):
 850
 851             with m.State("FETCH_PRED_IDLE"):
 852                 comb += pred_insn_o_ready.eq(1)
 853                 with m.If(pred_insn_i_valid):
 854                     with m.If(predmode == SVP64PredMode.INT):
 855                         # skip fetching destination mask register, when zero
 856                         with m.If(dall1s):
 857                             sync += new_dstmask.eq(-1)
 858                             # directly go to fetch source mask register
 859                             # guaranteed not to be zero (otherwise predmode
 860                             # would be SVP64PredMode.ALWAYS, not INT)
 861                             comb += int_pred.addr.eq(sregread)
 862                             comb += int_pred.ren.eq(1)
 863                             m.next = "INT_SRC_READ"
 864                         # fetch destination predicate register
 865                         with m.Else():
 866                             comb += int_pred.addr.eq(dregread)
 867                             comb += int_pred.ren.eq(1)
 868                             m.next = "INT_DST_READ"
 869                     with m.Elif(predmode == SVP64PredMode.CR):
 870                         # go fetch masks from the CR register file
 871                         sync += new_srcmask.eq(0)
 872                         sync += new_dstmask.eq(0)
 873                         m.next = "CR_READ"
 874                     with m.Else():
 875                         sync += self.srcmask.eq(-1)
 876                         sync += self.dstmask.eq(-1)
 877                         m.next = "FETCH_PRED_DONE"
 878
 879             with m.State("INT_DST_READ"):
 880                 # store destination mask
 881                 inv = Repl(dinvert, 64)
 882                 with m.If(dunary):
 883                     # set selected mask bit for 1<<r3 mode
 884                     dst_shift = Signal(range(64))
 885                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 886                     sync += new_dstmask.eq(1 << dst_shift)
 887                 with m.Else():
 888                     # invert mask if requested
 889                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 890                 # skip fetching source mask register, when zero
 891                 with m.If(sall1s):
 892                     sync += new_srcmask.eq(-1)
 893                     m.next = "FETCH_PRED_SHIFT_MASK"
 894                 # fetch source predicate register
 895                 with m.Else():
 896                     comb += int_pred.addr.eq(sregread)
 897                     comb += int_pred.ren.eq(1)
 898                     m.next = "INT_SRC_READ"
 899
 900             with m.State("INT_SRC_READ"):
 901                 # store source mask
 902                 inv = Repl(sinvert, 64)
 903                 with m.If(sunary):
 904                     # set selected mask bit for 1<<r3 mode
 905                     src_shift = Signal(range(64))
 906                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 907                     sync += new_srcmask.eq(1 << src_shift)
 908                 with m.Else():
 909                     # invert mask if requested
 910                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 911                 m.next = "FETCH_PRED_SHIFT_MASK"
 912
 913             # fetch masks from the CR register file
 914             # implements the following loop:
 915             # idx, inv = get_predcr(mask)
 916             # mask = 0
 917             # for cr_idx in range(vl):
 918             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 919             #     if cr[idx] ^ inv:
 920             #         mask |= 1 << cr_idx
 921             # return mask
 922             with m.State("CR_READ"):
 923                 # CR index to be read, which will be ready by the next cycle
 924                 cr_idx = Signal.like(cur_vl, reset_less=True)
 925                 # submit the read operation to the regfile
 926                 with m.If(cr_idx != cur_vl):
 927                     # the CR read port is unary ...
 928                     # ren = 1 << cr_idx
 929                     # ... in MSB0 convention ...
 930                     # ren = 1 << (7 - cr_idx)
 931                     # ... and with an offset:
 932                     # ren = 1 << (7 - off - cr_idx)
 933                     idx = SVP64CROffs.CRPred + cr_idx
 934                     comb += cr_pred.ren.eq(1 << (7 - idx))
 935                     # signal data valid in the next cycle
 936                     cr_read = Signal(reset_less=True)
 937                     sync += cr_read.eq(1)
 938                     # load the next index
 939                     sync += cr_idx.eq(cr_idx + 1)
 940                 with m.Else():
 941                     # exit on loop end
 942                     sync += cr_read.eq(0)
 943                     sync += cr_idx.eq(0)
 944                     m.next = "FETCH_PRED_SHIFT_MASK"
 945                 with m.If(cr_read):
 946                     # compensate for the one cycle delay on the regfile
 947                     cur_cr_idx = Signal.like(cur_vl)
 948                     comb += cur_cr_idx.eq(cr_idx - 1)
 949                     # read the CR field, select the appropriate bit
 950                     cr_field = Signal(4)
 951                     scr_bit = Signal()
 952                     dcr_bit = Signal()
 953                     comb += cr_field.eq(cr_pred.o_data)
 954                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 955                                        ^ scrinvert)
 956                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 957                                        ^ dcrinvert)
 958                     # set the corresponding mask bit
 959                     bit_to_set = Signal.like(self.srcmask)
 960                     comb += bit_to_set.eq(1 << cur_cr_idx)
 961                     with m.If(scr_bit):
 962                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 963                     with m.If(dcr_bit):
 964                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 965
 966             with m.State("FETCH_PRED_SHIFT_MASK"):
 967                 # shift-out skipped mask bits
 968                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 969                 sync += self.dstmask.eq(new_dstmask >> dststep)
 970                 m.next = "FETCH_PRED_DONE"
 971
 972             with m.State("FETCH_PRED_DONE"):
 973                 comb += pred_mask_o_valid.eq(1)
 974                 with m.If(pred_mask_i_ready):
 975                     m.next = "FETCH_PRED_IDLE"
 976
 977     def issue_fsm(self, m, core, nia,
 978                   dbg, core_rst, is_svp64_mode,
 979                   fetch_pc_o_ready, fetch_pc_i_valid,
 980                   fetch_insn_o_valid, fetch_insn_i_ready,
 981                   pred_insn_i_valid, pred_insn_o_ready,
 982                   pred_mask_o_valid, pred_mask_i_ready,
 983                   exec_insn_i_valid, exec_insn_o_ready,
 984                   exec_pc_o_valid, exec_pc_i_ready):
 985         """issue FSM
 986
 987         decode / issue FSM.  this interacts with the "fetch" FSM
 988         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 989         (outgoing). also interacts with the "execute" FSM
 990         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 991         (incoming).
 992         SVP64 RM prefixes have already been set up by the
 993         "fetch" phase, so execute is fairly straightforward.
 994         """
 995
 996         comb = m.d.comb
 997         sync = m.d.sync
 998         pdecode2 = self.pdecode2
 999         cur_state = self.cur_state
1000         new_svstate = self.new_svstate
1001
1002         # temporaries
1003         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
1004
1005         # for updating svstate (things like srcstep etc.)
1006         comb += new_svstate.eq(cur_state.svstate)
1007
1008         # precalculate srcstep+1 and dststep+1
1009         cur_srcstep = cur_state.svstate.srcstep
1010         cur_dststep = cur_state.svstate.dststep
1011         next_srcstep = Signal.like(cur_srcstep)
1012         next_dststep = Signal.like(cur_dststep)
1013         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
1014         comb += next_dststep.eq(cur_state.svstate.dststep+1)
1015
1016         # note if an exception happened.  in a pipelined or OoO design
1017         # this needs to be accompanied by "shadowing" (or stalling)
1018         exc_happened = self.core.o.exc_happened
1019         # also note instruction fetch failed
1020         if hasattr(core, "icache"):
1021             fetch_failed = core.icache.i_out.fetch_failed
1022             flush_needed = True
1023             # set to fault in decoder
1024             # update (highest priority) instruction fault
1025             rising_fetch_failed = rising_edge(m, fetch_failed)
1026             with m.If(rising_fetch_failed):
1027                 sync += pdecode2.instr_fault.eq(1)
1028         else:
1029             fetch_failed = Const(0, 1)
1030             flush_needed = False
1031
1032         with m.FSM(name="issue_fsm"):
1033
1034             # sync with the "fetch" phase which is reading the instruction
1035             # at this point, there is no instruction running, that
1036             # could inadvertently update the PC.
1037             with m.State("ISSUE_START"):
1038                 # reset instruction fault
1039                 sync += pdecode2.instr_fault.eq(0)
1040                 # wait on "core stop" release, before next fetch
1041                 # need to do this here, in case we are in a VL==0 loop
1042                 with m.If(~dbg.core_stop_o & ~core_rst):
1043                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
1044                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
1045                         m.next = "INSN_WAIT"
1046                 with m.Else():
1047                     # tell core it's stopped, and acknowledge debug handshake
1048                     comb += dbg.core_stopped_i.eq(1)
1049                     # while stopped, allow updating SVSTATE
1050                     with m.If(self.svstate_i.ok):
1051                         comb += new_svstate.eq(self.svstate_i.data)
1052                         comb += self.update_svstate.eq(1)
1053                         sync += self.sv_changed.eq(1)
1054
1055             # wait for an instruction to arrive from Fetch
1056             with m.State("INSN_WAIT"):
1057                 if self.allow_overlap:
1058                     stopping = dbg.stopping_o
1059                 else:
1060                     stopping = Const(0)
1061                 with m.If(stopping):
1062                     # stopping: jump back to idle
1063                     m.next = "ISSUE_START"
1064                     if flush_needed:
1065                         # request the icache to stop asserting "failed"
1066                         comb += core.icache.flush_in.eq(1)
1067                     # stop instruction fault
1068                     sync += pdecode2.instr_fault.eq(0)
1069                 with m.Else():
1070                     comb += fetch_insn_i_ready.eq(1)
1071                     with m.If(fetch_insn_o_valid):
1072                         # loop into ISSUE_START if it's a SVP64 instruction
1073                         # and VL == 0.  this because VL==0 is a for-loop
1074                         # from 0 to 0 i.e. always, always a NOP.
1075                         cur_vl = cur_state.svstate.vl
1076                         with m.If(is_svp64_mode & (cur_vl == 0)):
1077                             # update the PC before fetching the next instruction
1078                             # since we are in a VL==0 loop, no instruction was
1079                             # executed that we could be overwriting
1080                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1081                             comb += self.state_w_pc.i_data.eq(nia)
1082                             comb += self.insn_done.eq(1)
1083                             m.next = "ISSUE_START"
1084                         with m.Else():
1085                             if self.svp64_en:
1086                                 m.next = "PRED_START"  # fetching predicate
1087                             else:
1088                                 m.next = "DECODE_SV"  # skip predication
1089
1090             with m.State("PRED_START"):
1091                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
1092                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
1093                     m.next = "MASK_WAIT"
1094
1095             with m.State("MASK_WAIT"):
1096                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
1097                 with m.If(pred_mask_o_valid):  # predication masks are ready
1098                     m.next = "PRED_SKIP"
1099
1100             # skip zeros in predicate
1101             with m.State("PRED_SKIP"):
1102                 with m.If(~is_svp64_mode):
1103                     m.next = "DECODE_SV"  # nothing to do
1104                 with m.Else():
1105                     if self.svp64_en:
1106                         pred_src_zero = pdecode2.rm_dec.pred_sz
1107                         pred_dst_zero = pdecode2.rm_dec.pred_dz
1108
1109                         # new srcstep, after skipping zeros
1110                         skip_srcstep = Signal.like(cur_srcstep)
1111                         # value to be added to the current srcstep
1112                         src_delta = Signal.like(cur_srcstep)
1113                         # add leading zeros to srcstep, if not in zero mode
1114                         with m.If(~pred_src_zero):
1115                             # priority encoder (count leading zeros)
1116                             # append guard bit, in case the mask is all zeros
1117                             pri_enc_src = PriorityEncoder(65)
1118                             m.submodules.pri_enc_src = pri_enc_src
1119                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
1120                                                          Const(1, 1)))
1121                             comb += src_delta.eq(pri_enc_src.o)
1122                         # apply delta to srcstep
1123                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
1124                         # shift-out all leading zeros from the mask
1125                         # plus the leading "one" bit
1126                         # TODO count leading zeros and shift-out the zero
1127                         #      bits, in the same step, in hardware
1128                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1129
1130                         # same as above, but for dststep
1131                         skip_dststep = Signal.like(cur_dststep)
1132                         dst_delta = Signal.like(cur_dststep)
1133                         with m.If(~pred_dst_zero):
1134                             pri_enc_dst = PriorityEncoder(65)
1135                             m.submodules.pri_enc_dst = pri_enc_dst
1136                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1137                                                          Const(1, 1)))
1138                             comb += dst_delta.eq(pri_enc_dst.o)
1139                         comb += skip_dststep.eq(cur_dststep + dst_delta)
1140                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1141
1142                         # TODO: initialize mask[VL]=1 to avoid passing past VL
1143                         with m.If((skip_srcstep >= cur_vl) |
1144                                   (skip_dststep >= cur_vl)):
1145                             # end of VL loop. Update PC and reset src/dst step
1146                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1147                             comb += self.state_w_pc.i_data.eq(nia)
1148                             comb += new_svstate.srcstep.eq(0)
1149                             comb += new_svstate.dststep.eq(0)
1150                             comb += self.update_svstate.eq(1)
1151                             # synchronize with the simulator
1152                             comb += self.insn_done.eq(1)
1153                             # go back to Issue
1154                             m.next = "ISSUE_START"
1155                         with m.Else():
1156                             # update new src/dst step
1157                             comb += new_svstate.srcstep.eq(skip_srcstep)
1158                             comb += new_svstate.dststep.eq(skip_dststep)
1159                             comb += self.update_svstate.eq(1)
1160                             # proceed to Decode
1161                             m.next = "DECODE_SV"
1162
1163                         # pass predicate mask bits through to satellite decoders
1164                         # TODO: for SIMD this will be *multiple* bits
1165                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1166                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1167
1168             # after src/dst step have been updated, we are ready
1169             # to decode the instruction
1170             with m.State("DECODE_SV"):
1171                 # decode the instruction
1172                 with m.If(~fetch_failed):
1173                     sync += pdecode2.instr_fault.eq(0)
1174                 sync += core.i.e.eq(pdecode2.e)
1175                 sync += core.i.state.eq(cur_state)
1176                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1177                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1178                 if self.svp64_en:
1179                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1180                     # set RA_OR_ZERO detection in satellite decoders
1181                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1182                     # and svp64 detection
1183                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1184                     # and svp64 bit-rev'd ldst mode
1185                     ldst_dec = pdecode2.use_svp64_ldst_dec
1186                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1187                 # after decoding, reset any previous exception condition,
1188                 # allowing it to be set again during the next execution
1189                 sync += pdecode2.ldst_exc.eq(0)
1190
1191                 m.next = "INSN_EXECUTE"  # move to "execute"
1192
1193             # handshake with execution FSM, move to "wait" once acknowledged
1194             with m.State("INSN_EXECUTE"):
1195                 comb += exec_insn_i_valid.eq(1)  # trigger execute
1196                 with m.If(exec_insn_o_ready):   # execute acknowledged us
1197                     m.next = "EXECUTE_WAIT"
1198
1199             with m.State("EXECUTE_WAIT"):
1200                 # wait on "core stop" release, at instruction end
1201                 # need to do this here, in case we are in a VL>1 loop
1202                 with m.If(~dbg.core_stop_o & ~core_rst):
1203                     comb += exec_pc_i_ready.eq(1)
1204                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1205                     # the exception info needs to be blatted into
1206                     # pdecode.ldst_exc, and the instruction "re-run".
1207                     # when ldst_exc.happened is set, the PowerDecoder2
1208                     # reacts very differently: it re-writes the instruction
1209                     # with a "trap" (calls PowerDecoder2.trap()) which
1210                     # will *overwrite* whatever was requested and jump the
1211                     # PC to the exception address, as well as alter MSR.
1212                     # nothing else needs to be done other than to note
1213                     # the change of PC and MSR (and, later, SVSTATE)
1214                     with m.If(exc_happened):
1215                         mmu = core.fus.get_exc("mmu0")
1216                         ldst = core.fus.get_exc("ldst0")
1217                         if mmu is not None:
1218                             with m.If(fetch_failed):
1219                                 # instruction fetch: exception is from MMU
1220                                 # reset instr_fault (highest priority)
1221                                 sync += pdecode2.ldst_exc.eq(mmu)
1222                                 sync += pdecode2.instr_fault.eq(0)
1223                                 if flush_needed:
1224                                     # request icache to stop asserting "failed"
1225                                     comb += core.icache.flush_in.eq(1)
1226                         with m.If(~fetch_failed):
1227                             # otherwise assume it was a LDST exception
1228                             sync += pdecode2.ldst_exc.eq(ldst)
1229
1230                     with m.If(exec_pc_o_valid):
1231
1232                         # was this the last loop iteration?
1233                         is_last = Signal()
1234                         cur_vl = cur_state.svstate.vl
1235                         comb += is_last.eq(next_srcstep == cur_vl)
1236
1237                         # return directly to Decode if Execute generated an
1238                         # exception.
1239                         with m.If(pdecode2.ldst_exc.happened):
1240                             m.next = "DECODE_SV"
1241
1242                         # if MSR, PC or SVSTATE were changed by the previous
1243                         # instruction, go directly back to Fetch, without
1244                         # updating either MSR PC or SVSTATE
1245                         with m.Elif(self.msr_changed | self.pc_changed |
1246                                     self.sv_changed):
1247                             m.next = "ISSUE_START"
1248
1249                         # also return to Fetch, when no output was a vector
1250                         # (regardless of SRCSTEP and VL), or when the last
1251                         # instruction was really the last one of the VL loop
1252                         with m.Elif((~pdecode2.loop_continue) | is_last):
1253                             # before going back to fetch, update the PC state
1254                             # register with the NIA.
1255                             # ok here we are not reading the branch unit.
1256                             # TODO: this just blithely overwrites whatever
1257                             #       pipeline updated the PC
1258                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1259                             comb += self.state_w_pc.i_data.eq(nia)
1260                             # reset SRCSTEP before returning to Fetch
1261                             if self.svp64_en:
1262                                 with m.If(pdecode2.loop_continue):
1263                                     comb += new_svstate.srcstep.eq(0)
1264                                     comb += new_svstate.dststep.eq(0)
1265                                     comb += self.update_svstate.eq(1)
1266                             else:
1267                                 comb += new_svstate.srcstep.eq(0)
1268                                 comb += new_svstate.dststep.eq(0)
1269                                 comb += self.update_svstate.eq(1)
1270                             m.next = "ISSUE_START"
1271
1272                         # returning to Execute? then, first update SRCSTEP
1273                         with m.Else():
1274                             comb += new_svstate.srcstep.eq(next_srcstep)
1275                             comb += new_svstate.dststep.eq(next_dststep)
1276                             comb += self.update_svstate.eq(1)
1277                             # return to mask skip loop
1278                             m.next = "PRED_SKIP"
1279
1280                 with m.Else():
1281                     comb += dbg.core_stopped_i.eq(1)
1282                     if flush_needed:
1283                         # request the icache to stop asserting "failed"
1284                         comb += core.icache.flush_in.eq(1)
1285                     # stop instruction fault
1286                     sync += pdecode2.instr_fault.eq(0)
1287                     if flush_needed:
1288                         # request the icache to stop asserting "failed"
1289                         comb += core.icache.flush_in.eq(1)
1290                     # stop instruction fault
1291                     sync += pdecode2.instr_fault.eq(0)
1292
1293         # check if svstate needs updating: if so, write it to State Regfile
1294         with m.If(self.update_svstate):
1295             sync += cur_state.svstate.eq(self.new_svstate)  # for next clock
1296
1297     def execute_fsm(self, m, core,
1298                     exec_insn_i_valid, exec_insn_o_ready,
1299                     exec_pc_o_valid, exec_pc_i_ready):
1300         """execute FSM
1301
1302         execute FSM. this interacts with the "issue" FSM
1303         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1304         (outgoing). SVP64 RM prefixes have already been set up by the
1305         "issue" phase, so execute is fairly straightforward.
1306         """
1307
1308         comb = m.d.comb
1309         sync = m.d.sync
1310         pdecode2 = self.pdecode2
1311
1312         # temporaries
1313         core_busy_o = core.n.o_data.busy_o  # core is busy
1314         core_ivalid_i = core.p.i_valid              # instruction is valid
1315
1316         if hasattr(core, "icache"):
1317             fetch_failed = core.icache.i_out.fetch_failed
1318         else:
1319             fetch_failed = Const(0, 1)
1320
1321         with m.FSM(name="exec_fsm"):
1322
1323             # waiting for instruction bus (stays there until not busy)
1324             with m.State("INSN_START"):
1325                 comb += exec_insn_o_ready.eq(1)
1326                 with m.If(exec_insn_i_valid):
1327                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1328                     sync += self.sv_changed.eq(0)
1329                     sync += self.pc_changed.eq(0)
1330                     sync += self.msr_changed.eq(0)
1331                     with m.If(core.p.o_ready):  # only move if accepted
1332                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1333
1334             # instruction started: must wait till it finishes
1335             with m.State("INSN_ACTIVE"):
1336                 # note changes to MSR, PC and SVSTATE
1337                 # XXX oops, really must monitor *all* State Regfile write
1338                 # ports looking for changes!
1339                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1340                     sync += self.sv_changed.eq(1)
1341                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1342                     sync += self.msr_changed.eq(1)
1343                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1344                     sync += self.pc_changed.eq(1)
1345                 with m.If(~core_busy_o):  # instruction done!
1346                     comb += exec_pc_o_valid.eq(1)
1347                     with m.If(exec_pc_i_ready):
1348                         # when finished, indicate "done".
1349                         # however, if there was an exception, the instruction
1350                         # is *not* yet done.  this is an implementation
1351                         # detail: we choose to implement exceptions by
1352                         # taking the exception information from the LDST
1353                         # unit, putting that *back* into the PowerDecoder2,
1354                         # and *re-running the entire instruction*.
1355                         # if we erroneously indicate "done" here, it is as if
1356                         # there were *TWO* instructions:
1357                         # 1) the failed LDST 2) a TRAP.
1358                         with m.If(~pdecode2.ldst_exc.happened &
1359                                   ~fetch_failed):
1360                             comb += self.insn_done.eq(1)
1361                         m.next = "INSN_START"  # back to fetch
1362
1363     def elaborate(self, platform):
1364         m = super().elaborate(platform)
1365         # convenience
1366         comb, sync = m.d.comb, m.d.sync
1367         cur_state = self.cur_state
1368         pdecode2 = self.pdecode2
1369         dbg = self.dbg
1370         core = self.core
1371
1372         # set up peripherals and core
1373         core_rst = self.core_rst
1374
1375         # indicate to outside world if any FU is still executing
1376         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1377
1378         # address of the next instruction, in the absence of a branch
1379         # depends on the instruction size
1380         nia = Signal(64)
1381
1382         # connect up debug signals
1383         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1384
1385         # pass the prefix mode from Fetch to Issue, so the latter can loop
1386         # on VL==0
1387         is_svp64_mode = Signal()
1388
1389         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1390         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1391         # these are the handshake signals between each
1392
1393         # fetch FSM can run as soon as the PC is valid
1394         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1395         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1396
1397         # fetch FSM hands over the instruction to be decoded / issued
1398         fetch_insn_o_valid = Signal()
1399         fetch_insn_i_ready = Signal()
1400
1401         # predicate fetch FSM decodes and fetches the predicate
1402         pred_insn_i_valid = Signal()
1403         pred_insn_o_ready = Signal()
1404
1405         # predicate fetch FSM delivers the masks
1406         pred_mask_o_valid = Signal()
1407         pred_mask_i_ready = Signal()
1408
1409         # issue FSM delivers the instruction to the be executed
1410         exec_insn_i_valid = Signal()
1411         exec_insn_o_ready = Signal()
1412
1413         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1414         exec_pc_o_valid = Signal()
1415         exec_pc_i_ready = Signal()
1416
1417         # the FSMs here are perhaps unusual in that they detect conditions
1418         # then "hold" information, combinatorially, for the core
1419         # (as opposed to using sync - which would be on a clock's delay)
1420         # this includes the actual opcode, valid flags and so on.
1421
1422         # Fetch, then predicate fetch, then Issue, then Execute.
1423         # Issue is where the VL for-loop # lives.  the ready/valid
1424         # signalling is used to communicate between the four.
1425
1426         # set up Fetch FSM
1427         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1428                          self.imem, core_rst, pdecode2, cur_state,
1429                          dbg, core,
1430                          dbg.state.svstate, # combinatorially same
1431                          nia, is_svp64_mode)
1432         m.submodules.fetch = fetch
1433         # connect up in/out data to existing Signals
1434         comb += fetch.p.i_data.pc.eq(dbg.state.pc)   # combinatorially same
1435         comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
1436         # and the ready/valid signalling
1437         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1438         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1439         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1440         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1441
1442         self.issue_fsm(m, core, nia,
1443                        dbg, core_rst, is_svp64_mode,
1444                        fetch_pc_o_ready, fetch_pc_i_valid,
1445                        fetch_insn_o_valid, fetch_insn_i_ready,
1446                        pred_insn_i_valid, pred_insn_o_ready,
1447                        pred_mask_o_valid, pred_mask_i_ready,
1448                        exec_insn_i_valid, exec_insn_o_ready,
1449                        exec_pc_o_valid, exec_pc_i_ready)
1450
1451         if self.svp64_en:
1452             self.fetch_predicate_fsm(m,
1453                                      pred_insn_i_valid, pred_insn_o_ready,
1454                                      pred_mask_o_valid, pred_mask_i_ready)
1455
1456         self.execute_fsm(m, core,
1457                          exec_insn_i_valid, exec_insn_o_ready,
1458                          exec_pc_o_valid, exec_pc_i_ready)
1459
1460         return m
1461
1462
1463 class TestIssuer(Elaboratable):
1464     def __init__(self, pspec):
1465         self.ti = TestIssuerInternal(pspec)
1466         # XXX TODO: make this a command-line selectable option from pspec
1467         #from soc.simple.inorder import TestIssuerInternalInOrder
1468         #self.ti = TestIssuerInternalInOrder(pspec)
1469         self.pll = DummyPLL(instance=True)
1470
1471         # PLL direct clock or not
1472         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1473         if self.pll_en:
1474             self.pll_test_o = Signal(reset_less=True)
1475             self.pll_vco_o = Signal(reset_less=True)
1476             self.clk_sel_i = Signal(2, reset_less=True)
1477             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1478             self.pllclk_clk = ClockSignal("pllclk")
1479
1480     def elaborate(self, platform):
1481         m = Module()
1482         comb = m.d.comb
1483
1484         # TestIssuer nominally runs at main clock, actually it is
1485         # all combinatorial internally except for coresync'd components
1486         m.submodules.ti = ti = self.ti
1487
1488         if self.pll_en:
1489             # ClockSelect runs at PLL output internal clock rate
1490             m.submodules.wrappll = pll = self.pll
1491
1492             # add clock domains from PLL
1493             cd_pll = ClockDomain("pllclk")
1494             m.domains += cd_pll
1495
1496             # PLL clock established.  has the side-effect of running clklsel
1497             # at the PLL's speed (see DomainRenamer("pllclk") above)
1498             pllclk = self.pllclk_clk
1499             comb += pllclk.eq(pll.clk_pll_o)
1500
1501             # wire up external 24mhz to PLL
1502             #comb += pll.clk_24_i.eq(self.ref_clk)
1503             # output 18 mhz PLL test signal, and analog oscillator out
1504             comb += self.pll_test_o.eq(pll.pll_test_o)
1505             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1506
1507             # input to pll clock selection
1508             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1509
1510             # now wire up ResetSignals.  don't mind them being in this domain
1511             pll_rst = ResetSignal("pllclk")
1512             comb += pll_rst.eq(ResetSignal())
1513
1514         # internal clock is set to selector clock-out.  has the side-effect of
1515         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1516         # debug clock runs at coresync internal clock
1517         cd_coresync = ClockDomain("coresync")
1518         #m.domains += cd_coresync
1519         if self.ti.dbg_domain != 'sync':
1520             cd_dbgsync = ClockDomain("dbgsync")
1521             #m.domains += cd_dbgsync
1522         intclk = ClockSignal("coresync")
1523         dbgclk = ClockSignal(self.ti.dbg_domain)
1524         # XXX BYPASS PLL XXX
1525         # XXX BYPASS PLL XXX
1526         # XXX BYPASS PLL XXX
1527         if self.pll_en:
1528             comb += intclk.eq(self.ref_clk)
1529         else:
1530             comb += intclk.eq(ClockSignal())
1531         if self.ti.dbg_domain != 'sync':
1532             dbgclk = ClockSignal(self.ti.dbg_domain)
1533             comb += dbgclk.eq(intclk)
1534
1535         return m
1536
1537     def ports(self):
1538         return list(self.ti.ports()) + list(self.pll.ports()) + \
1539             [ClockSignal(), ResetSignal()]
1540
1541     def external_ports(self):
1542         ports = self.ti.external_ports()
1543         ports.append(ClockSignal())
1544         ports.append(ResetSignal())
1545         if self.pll_en:
1546             ports.append(self.clk_sel_i)
1547             ports.append(self.pll.clk_24_i)
1548             ports.append(self.pll_test_o)
1549             ports.append(self.pll_vco_o)
1550             ports.append(self.pllclk_clk)
1551             ports.append(self.ref_clk)
1552         return ports
1553
1554
1555 if __name__ == '__main__':
1556     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1557              'spr': 1,
1558              'div': 1,
1559              'mul': 1,
1560              'shiftrot': 1
1561              }
1562     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1563                          imem_ifacetype='bare_wb',
1564                          addr_wid=48,
1565                          mask_wid=8,
1566                          reg_wid=64,
1567                          units=units)
1568     dut = TestIssuer(pspec)
1569     vl = main(dut, ports=dut.ports(), name="test_issuer")
1570
1571     if len(sys.argv) == 1:
1572         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1573         with open("test_issuer.il", "w") as f:
1574             f.write(vl)