src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs, MSR)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the {insert state variable here}
  70     res_ok_delay = Signal(name="%s_ok_delay" % name)
  71     with m.If(~core_rst):
  72         sync += res_ok_delay.eq(~state_i.ok)
  73         with m.If(state_i.ok):
  74             # incoming override (start from pc_i)
  75             comb += res.eq(state_i.data)
  76         with m.Else():
  77             # otherwise read StateRegs regfile for {insert state here}...
  78             comb += regfile.ren.eq(1 << regnum)
  79         # ... but on a 1-clock delay
  80         with m.If(res_ok_delay):
  81             comb += res.eq(regfile.o_data)
  82
  83
  84 def get_predint(m, mask, name):
  85     """decode SVP64 predicate integer mask field to reg number and invert
  86     this is identical to the equivalent function in ISACaller except that
  87     it doesn't read the INT directly, it just decodes "what needs to be done"
  88     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  89
  90     * all1s is set to indicate that no mask is to be applied.
  91     * regread indicates the GPR register number to be read
  92     * invert is set to indicate that the register value is to be inverted
  93     * unary indicates that the contents of the register is to be shifted 1<<r3
  94     """
  95     comb = m.d.comb
  96     regread = Signal(5, name=name+"regread")
  97     invert = Signal(name=name+"invert")
  98     unary = Signal(name=name+"unary")
  99     all1s = Signal(name=name+"all1s")
 100     with m.Switch(mask):
 101         with m.Case(SVP64PredInt.ALWAYS.value):
 102             comb += all1s.eq(1)      # use 0b1111 (all ones)
 103         with m.Case(SVP64PredInt.R3_UNARY.value):
 104             comb += regread.eq(3)
 105             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 106         with m.Case(SVP64PredInt.R3.value):
 107             comb += regread.eq(3)
 108         with m.Case(SVP64PredInt.R3_N.value):
 109             comb += regread.eq(3)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R10.value):
 112             comb += regread.eq(10)
 113         with m.Case(SVP64PredInt.R10_N.value):
 114             comb += regread.eq(10)
 115             comb += invert.eq(1)
 116         with m.Case(SVP64PredInt.R30.value):
 117             comb += regread.eq(30)
 118         with m.Case(SVP64PredInt.R30_N.value):
 119             comb += regread.eq(30)
 120             comb += invert.eq(1)
 121     return regread, invert, unary, all1s
 122
 123
 124 def get_predcr(m, mask, name):
 125     """decode SVP64 predicate CR to reg number field and invert status
 126     this is identical to _get_predcr in ISACaller
 127     """
 128     comb = m.d.comb
 129     idx = Signal(2, name=name+"idx")
 130     invert = Signal(name=name+"crinvert")
 131     with m.Switch(mask):
 132         with m.Case(SVP64PredCR.LT.value):
 133             comb += idx.eq(CR.LT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.GE.value):
 136             comb += idx.eq(CR.LT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.GT.value):
 139             comb += idx.eq(CR.GT)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.LE.value):
 142             comb += idx.eq(CR.GT)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.EQ.value):
 145             comb += idx.eq(CR.EQ)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NE.value):
 148             comb += idx.eq(CR.EQ)
 149             comb += invert.eq(1)
 150         with m.Case(SVP64PredCR.SO.value):
 151             comb += idx.eq(CR.SO)
 152             comb += invert.eq(0)
 153         with m.Case(SVP64PredCR.NS.value):
 154             comb += idx.eq(CR.SO)
 155             comb += invert.eq(1)
 156     return idx, invert
 157
 158
 159 class TestIssuerBase(Elaboratable):
 160     """TestIssuerBase - common base class for Issuers
 161
 162     takes care of power-on reset, peripherals, debug, DEC/TB,
 163     and gets PC/MSR/SVSTATE from the State Regfile etc.
 164     """
 165
 166     def __init__(self, pspec):
 167
 168         # test is SVP64 is to be enabled
 169         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 170
 171         # and if regfiles are reduced
 172         self.regreduce_en = (hasattr(pspec, "regreduce") and
 173                              (pspec.regreduce == True))
 174
 175         # and if overlap requested
 176         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 177                               (pspec.allow_overlap == True))
 178
 179         # JTAG interface.  add this right at the start because if it's
 180         # added it *modifies* the pspec, by adding enable/disable signals
 181         # for parts of the rest of the core
 182         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 183         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 184         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 185         if self.jtag_en:
 186             # XXX MUST keep this up-to-date with litex, and
 187             # soc-cocotb-sim, and err.. all needs sorting out, argh
 188             subset = ['uart',
 189                       'mtwi',
 190                       'eint', 'gpio', 'mspi0',
 191                       # 'mspi1', - disabled for now
 192                       # 'pwm', 'sd0', - disabled for now
 193                       'sdr']
 194             self.jtag = JTAG(get_pinspecs(subset=subset),
 195                              domain=self.dbg_domain)
 196             # add signals to pspec to enable/disable icache and dcache
 197             # (or data and intstruction wishbone if icache/dcache not included)
 198             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 199             # TODO: do we actually care if these are not domain-synchronised?
 200             # honestly probably not.
 201             pspec.wb_icache_en = self.jtag.wb_icache_en
 202             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 203             self.wb_sram_en = self.jtag.wb_sram_en
 204         else:
 205             self.wb_sram_en = Const(1)
 206
 207         # add 4k sram blocks?
 208         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 209                          pspec.sram4x4kblock == True)
 210         if self.sram4x4k:
 211             self.sram4k = []
 212             for i in range(4):
 213                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 214                                                     # features={'err'}
 215                                                     ))
 216
 217         # add interrupt controller?
 218         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 219         if self.xics:
 220             self.xics_icp = XICS_ICP()
 221             self.xics_ics = XICS_ICS()
 222             self.int_level_i = self.xics_ics.int_level_i
 223
 224         # add GPIO peripheral?
 225         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 226         if self.gpio:
 227             self.simple_gpio = SimpleGPIO()
 228             self.gpio_o = self.simple_gpio.gpio_o
 229
 230         # main instruction core.  suitable for prototyping / demo only
 231         self.core = core = NonProductionCore(pspec)
 232         self.core_rst = ResetSignal("coresync")
 233
 234         # instruction decoder.  goes into Trap Record
 235         #pdecode = create_pdecode()
 236         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 237         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 238                                      opkls=IssuerDecode2ToOperand,
 239                                      svp64_en=self.svp64_en,
 240                                      regreduce_en=self.regreduce_en)
 241         pdecode = self.pdecode2.dec
 242
 243         if self.svp64_en:
 244             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 245
 246         self.update_svstate = Signal()  # set this if updating svstate
 247         self.new_svstate = new_svstate = SVSTATERec("new_svstate")
 248
 249         # Test Instruction memory
 250         if hasattr(core, "icache"):
 251             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 252             # truly dreadful.  needs a huge reorg.
 253             pspec.icache = core.icache
 254         self.imem = ConfigFetchUnit(pspec).fu
 255
 256         # DMI interface
 257         self.dbg = CoreDebug()
 258
 259         # instruction go/monitor
 260         self.pc_o = Signal(64, reset_less=True)
 261         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 262         self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
 263         self.svstate_i = Data(64, "svstate_i")  # ditto
 264         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 265         self.busy_o = Signal(reset_less=True)
 266         self.memerr_o = Signal(reset_less=True)
 267
 268         # STATE regfile read /write ports for PC, MSR, SVSTATE
 269         staterf = self.core.regs.rf['state']
 270         self.state_r_msr = staterf.r_ports['msr']  # MSR rd
 271         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 272         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 273
 274         self.state_w_msr = staterf.w_ports['msr']  # MSR wr
 275         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 276         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 277
 278         # DMI interface access
 279         intrf = self.core.regs.rf['int']
 280         crrf = self.core.regs.rf['cr']
 281         xerrf = self.core.regs.rf['xer']
 282         self.int_r = intrf.r_ports['dmi']  # INT read
 283         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 284         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 285
 286         if self.svp64_en:
 287             # for predication
 288             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 289             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 290
 291         # hack method of keeping an eye on whether branch/trap set the PC
 292         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 293         self.state_nia.wen.name = 'state_nia_wen'
 294
 295         # pulse to synchronize the simulator at instruction end
 296         self.insn_done = Signal()
 297
 298         # indicate any instruction still outstanding, in execution
 299         self.any_busy = Signal()
 300
 301         if self.svp64_en:
 302             # store copies of predicate masks
 303             self.srcmask = Signal(64)
 304             self.dstmask = Signal(64)
 305
 306     def setup_peripherals(self, m):
 307         comb, sync = m.d.comb, m.d.sync
 308
 309         # okaaaay so the debug module must be in coresync clock domain
 310         # but NOT its reset signal. to cope with this, set every single
 311         # submodule explicitly in coresync domain, debug and JTAG
 312         # in their own one but using *external* reset.
 313         csd = DomainRenamer("coresync")
 314         dbd = DomainRenamer(self.dbg_domain)
 315
 316         m.submodules.core = core = csd(self.core)
 317         # this _so_ needs sorting out.  ICache is added down inside
 318         # LoadStore1 and is already a submodule of LoadStore1
 319         if not isinstance(self.imem, ICache):
 320             m.submodules.imem = imem = csd(self.imem)
 321         m.submodules.dbg = dbg = dbd(self.dbg)
 322         if self.jtag_en:
 323             m.submodules.jtag = jtag = dbd(self.jtag)
 324             # TODO: UART2GDB mux, here, from external pin
 325             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 326             sync += dbg.dmi.connect_to(jtag.dmi)
 327
 328         cur_state = self.cur_state
 329
 330         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 331         if self.sram4x4k:
 332             for i, sram in enumerate(self.sram4k):
 333                 m.submodules["sram4k_%d" % i] = csd(sram)
 334                 comb += sram.enable.eq(self.wb_sram_en)
 335
 336         # XICS interrupt handler
 337         if self.xics:
 338             m.submodules.xics_icp = icp = csd(self.xics_icp)
 339             m.submodules.xics_ics = ics = csd(self.xics_ics)
 340             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 341             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
 342
 343         # GPIO test peripheral
 344         if self.gpio:
 345             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 346
 347         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 348         # XXX causes litex ECP5 test to get wrong idea about input and output
 349         # (but works with verilator sim *sigh*)
 350         # if self.gpio and self.xics:
 351         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 352
 353         # instruction decoder
 354         pdecode = create_pdecode()
 355         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 356         if self.svp64_en:
 357             m.submodules.svp64 = svp64 = csd(self.svp64)
 358
 359         # convenience
 360         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 361         intrf = self.core.regs.rf['int']
 362
 363         # clock delay power-on reset
 364         cd_por = ClockDomain(reset_less=True)
 365         cd_sync = ClockDomain()
 366         core_sync = ClockDomain("coresync")
 367         m.domains += cd_por, cd_sync, core_sync
 368         if self.dbg_domain != "sync":
 369             dbg_sync = ClockDomain(self.dbg_domain)
 370             m.domains += dbg_sync
 371
 372         ti_rst = Signal(reset_less=True)
 373         delay = Signal(range(4), reset=3)
 374         with m.If(delay != 0):
 375             m.d.por += delay.eq(delay - 1)
 376         comb += cd_por.clk.eq(ClockSignal())
 377
 378         # power-on reset delay
 379         core_rst = ResetSignal("coresync")
 380         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 381         comb += core_rst.eq(ti_rst)
 382
 383         # debug clock is same as coresync, but reset is *main external*
 384         if self.dbg_domain != "sync":
 385             dbg_rst = ResetSignal(self.dbg_domain)
 386             comb += dbg_rst.eq(ResetSignal())
 387
 388         # busy/halted signals from core
 389         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
 390         comb += self.busy_o.eq(core_busy_o)
 391         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 392
 393         # temporary hack: says "go" immediately for both address gen and ST
 394         l0 = core.l0
 395         ldst = core.fus.fus['ldst0']
 396         st_go_edge = rising_edge(m, ldst.st.rel_o)
 397         # link addr-go direct to rel
 398         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
 399         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
 400
 401     def do_dmi(self, m, dbg):
 402         """deals with DMI debug requests
 403
 404         currently only provides read requests for the INT regfile, CR and XER
 405         it will later also deal with *writing* to these regfiles.
 406         """
 407         comb = m.d.comb
 408         sync = m.d.sync
 409         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 410         intrf = self.core.regs.rf['int']
 411
 412         with m.If(d_reg.req):  # request for regfile access being made
 413             # TODO: error-check this
 414             # XXX should this be combinatorial?  sync better?
 415             if intrf.unary:
 416                 comb += self.int_r.ren.eq(1 << d_reg.addr)
 417             else:
 418                 comb += self.int_r.addr.eq(d_reg.addr)
 419                 comb += self.int_r.ren.eq(1)
 420         d_reg_delay = Signal()
 421         sync += d_reg_delay.eq(d_reg.req)
 422         with m.If(d_reg_delay):
 423             # data arrives one clock later
 424             comb += d_reg.data.eq(self.int_r.o_data)
 425             comb += d_reg.ack.eq(1)
 426
 427         # sigh same thing for CR debug
 428         with m.If(d_cr.req):  # request for regfile access being made
 429             comb += self.cr_r.ren.eq(0b11111111)  # enable all
 430         d_cr_delay = Signal()
 431         sync += d_cr_delay.eq(d_cr.req)
 432         with m.If(d_cr_delay):
 433             # data arrives one clock later
 434             comb += d_cr.data.eq(self.cr_r.o_data)
 435             comb += d_cr.ack.eq(1)
 436
 437         # aaand XER...
 438         with m.If(d_xer.req):  # request for regfile access being made
 439             comb += self.xer_r.ren.eq(0b111111)  # enable all
 440         d_xer_delay = Signal()
 441         sync += d_xer_delay.eq(d_xer.req)
 442         with m.If(d_xer_delay):
 443             # data arrives one clock later
 444             comb += d_xer.data.eq(self.xer_r.o_data)
 445             comb += d_xer.ack.eq(1)
 446
 447     def tb_dec_fsm(self, m, spr_dec):
 448         """tb_dec_fsm
 449
 450         this is a FSM for updating either dec or tb.  it runs alternately
 451         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
 452         value to DEC, however the regfile has "passthrough" on it so this
 453         *should* be ok.
 454
 455         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
 456         """
 457
 458         comb, sync = m.d.comb, m.d.sync
 459         fast_rf = self.core.regs.rf['fast']
 460         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
 461         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
 462
 463         with m.FSM() as fsm:
 464
 465             # initiates read of current DEC
 466             with m.State("DEC_READ"):
 467                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
 468                 comb += fast_r_dectb.ren.eq(1)
 469                 m.next = "DEC_WRITE"
 470
 471             # waits for DEC read to arrive (1 cycle), updates with new value
 472             with m.State("DEC_WRITE"):
 473                 new_dec = Signal(64)
 474                 # TODO: MSR.LPCR 32-bit decrement mode
 475                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
 476                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
 477                 comb += fast_w_dectb.wen.eq(1)
 478                 comb += fast_w_dectb.i_data.eq(new_dec)
 479                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
 480                 m.next = "TB_READ"
 481
 482             # initiates read of current TB
 483             with m.State("TB_READ"):
 484                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
 485                 comb += fast_r_dectb.ren.eq(1)
 486                 m.next = "TB_WRITE"
 487
 488             # waits for read TB to arrive, initiates write of current TB
 489             with m.State("TB_WRITE"):
 490                 new_tb = Signal(64)
 491                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
 492                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
 493                 comb += fast_w_dectb.wen.eq(1)
 494                 comb += fast_w_dectb.i_data.eq(new_tb)
 495                 m.next = "DEC_READ"
 496
 497         return m
 498
 499     def elaborate(self, platform):
 500         m = Module()
 501         # convenience
 502         comb, sync = m.d.comb, m.d.sync
 503         cur_state = self.cur_state
 504         pdecode2 = self.pdecode2
 505         dbg = self.dbg
 506
 507         # set up peripherals and core
 508         core_rst = self.core_rst
 509         self.setup_peripherals(m)
 510
 511         # reset current state if core reset requested
 512         with m.If(core_rst):
 513             m.d.sync += self.cur_state.eq(0)
 514
 515         # PC and instruction from I-Memory
 516         comb += self.pc_o.eq(cur_state.pc)
 517         self.pc_changed = Signal()  # note write to PC
 518         self.msr_changed = Signal()  # note write to MSR
 519         self.sv_changed = Signal()  # note write to SVSTATE
 520
 521         # read state either from incoming override or from regfile
 522         state = CoreState("get")  # current state (MSR/PC/SVSTATE)
 523         state_get(m, state.msr, core_rst, self.msr_i,
 524                        "msr",                  # read MSR
 525                        self.state_r_msr, StateRegs.MSR)
 526         state_get(m, state.pc, core_rst, self.pc_i,
 527                        "pc",                  # read PC
 528                        self.state_r_pc, StateRegs.PC)
 529         state_get(m, state.svstate, core_rst, self.svstate_i,
 530                             "svstate",   # read SVSTATE
 531                             self.state_r_sv, StateRegs.SVSTATE)
 532
 533         # don't write pc every cycle
 534         comb += self.state_w_pc.wen.eq(0)
 535         comb += self.state_w_pc.i_data.eq(0)
 536
 537         # connect up debug state.  note "combinatorially same" below,
 538         # this is a bit naff, passing state over in the dbg class, but
 539         # because it is combinatorial it achieves the desired goal
 540         comb += dbg.state.eq(state)
 541
 542         # this bit doesn't have to be in the FSM: connect up to read
 543         # regfiles on demand from DMI
 544         self.do_dmi(m, dbg)
 545
 546         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
 547         # (which uses that in PowerDecoder2 to raise 0x900 exception)
 548         self.tb_dec_fsm(m, cur_state.dec)
 549
 550         # while stopped, allow updating the MSR, PC and SVSTATE.
 551         # these are mainly for debugging purposes (including DMI/JTAG)
 552         with m.If(dbg.core_stopped_i):
 553             with m.If(self.pc_i.ok):
 554                 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 555                 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 556                 sync += self.pc_changed.eq(1)
 557             with m.If(self.msr_i.ok):
 558                 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
 559                 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
 560                 sync += self.msr_changed.eq(1)
 561             with m.If(self.svstate_i.ok | self.update_svstate):
 562                 with m.If(self.svstate_i.ok): # over-ride from external source
 563                     comb += self.new_svstate.eq(self.svstate_i.data)
 564                 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 565                 comb += self.state_w_sv.i_data.eq(self.new_svstate)
 566                 sync += self.sv_changed.eq(1)
 567
 568         return m
 569
 570     def __iter__(self):
 571         yield from self.pc_i.ports()
 572         yield from self.msr_i.ports()
 573         yield self.pc_o
 574         yield self.memerr_o
 575         yield from self.core.ports()
 576         yield from self.imem.ports()
 577         yield self.core_bigendian_i
 578         yield self.busy_o
 579
 580     def ports(self):
 581         return list(self)
 582
 583     def external_ports(self):
 584         ports = self.pc_i.ports()
 585         ports = self.msr_i.ports()
 586         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
 587                   ]
 588
 589         if self.jtag_en:
 590             ports += list(self.jtag.external_ports())
 591         else:
 592             # don't add DMI if JTAG is enabled
 593             ports += list(self.dbg.dmi.ports())
 594
 595         ports += list(self.imem.ibus.fields.values())
 596         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
 597
 598         if self.sram4x4k:
 599             for sram in self.sram4k:
 600                 ports += list(sram.bus.fields.values())
 601
 602         if self.xics:
 603             ports += list(self.xics_icp.bus.fields.values())
 604             ports += list(self.xics_ics.bus.fields.values())
 605             ports.append(self.int_level_i)
 606
 607         if self.gpio:
 608             ports += list(self.simple_gpio.bus.fields.values())
 609             ports.append(self.gpio_o)
 610
 611         return ports
 612
 613     def ports(self):
 614         return list(self)
 615
 616
 617
 618 # Fetch Finite State Machine.
 619 # WARNING: there are currently DriverConflicts but it's actually working.
 620 # TODO, here: everything that is global in nature, information from the
 621 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 622 # not only that: TestIssuerInternal.imem can entirely move into here
 623 # because imem is only ever accessed inside the FetchFSM.
 624 class FetchFSM(ControlBase):
 625     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 626                  pdecode2, cur_state,
 627                  dbg, core, svstate, nia, is_svp64_mode):
 628         self.allow_overlap = allow_overlap
 629         self.svp64_en = svp64_en
 630         self.imem = imem
 631         self.core_rst = core_rst
 632         self.pdecode2 = pdecode2
 633         self.cur_state = cur_state
 634         self.dbg = dbg
 635         self.core = core
 636         self.svstate = svstate
 637         self.nia = nia
 638         self.is_svp64_mode = is_svp64_mode
 639
 640         # set up pipeline ControlBase and allocate i/o specs
 641         # (unusual: normally done by the Pipeline API)
 642         super().__init__(stage=self)
 643         self.p.i_data, self.n.o_data = self.new_specs(None)
 644         self.i, self.o = self.p.i_data, self.n.o_data
 645
 646     # next 3 functions are Stage API Compliance
 647     def setup(self, m, i):
 648         pass
 649
 650     def ispec(self):
 651         return FetchInput()
 652
 653     def ospec(self):
 654         return FetchOutput()
 655
 656     def elaborate(self, platform):
 657         """fetch FSM
 658
 659         this FSM performs fetch of raw instruction data, partial-decodes
 660         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 661         read a 2nd 32-bit quantity if that occurs.
 662         """
 663         m = super().elaborate(platform)
 664
 665         dbg = self.dbg
 666         core = self.core
 667         pc = self.i.pc
 668         msr = self.i.msr
 669         svstate = self.svstate
 670         nia = self.nia
 671         is_svp64_mode = self.is_svp64_mode
 672         fetch_pc_o_ready = self.p.o_ready
 673         fetch_pc_i_valid = self.p.i_valid
 674         fetch_insn_o_valid = self.n.o_valid
 675         fetch_insn_i_ready = self.n.i_ready
 676
 677         comb = m.d.comb
 678         sync = m.d.sync
 679         pdecode2 = self.pdecode2
 680         cur_state = self.cur_state
 681         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 682
 683         # also note instruction fetch failed
 684         if hasattr(core, "icache"):
 685             fetch_failed = core.icache.i_out.fetch_failed
 686             flush_needed = True
 687         else:
 688             fetch_failed = Const(0, 1)
 689             flush_needed = False
 690
 691         # set priv / virt mode on I-Cache, sigh
 692         if isinstance(self.imem, ICache):
 693             comb += self.imem.i_in.priv_mode.eq(~msr[MSR.PR])
 694             comb += self.imem.i_in.virt_mode.eq(msr[MSR.DR])
 695
 696         with m.FSM(name='fetch_fsm'):
 697
 698             # waiting (zzz)
 699             with m.State("IDLE"):
 700                 with m.If(~dbg.stopping_o & ~fetch_failed):
 701                     comb += fetch_pc_o_ready.eq(1)
 702                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 703                     # instruction allowed to go: start by reading the PC
 704                     # capture the PC and also drop it into Insn Memory
 705                     # we have joined a pair of combinatorial memory
 706                     # lookups together.  this is Generally Bad.
 707                     comb += self.imem.a_pc_i.eq(pc)
 708                     comb += self.imem.a_i_valid.eq(1)
 709                     comb += self.imem.f_i_valid.eq(1)
 710                     # transfer state to output
 711                     sync += cur_state.pc.eq(pc)
 712                     sync += cur_state.svstate.eq(svstate)  # and svstate
 713                     sync += cur_state.msr.eq(msr)  # and msr
 714
 715                     m.next = "INSN_READ"  # move to "wait for bus" phase
 716
 717             # dummy pause to find out why simulation is not keeping up
 718             with m.State("INSN_READ"):
 719                 if self.allow_overlap:
 720                     stopping = dbg.stopping_o
 721                 else:
 722                     stopping = Const(0)
 723                 with m.If(stopping):
 724                     # stopping: jump back to idle
 725                     m.next = "IDLE"
 726                 with m.Else():
 727                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 728                         # busy but not fetch failed: stay in wait-read
 729                         comb += self.imem.a_i_valid.eq(1)
 730                         comb += self.imem.f_i_valid.eq(1)
 731                     with m.Else():
 732                         # not busy (or fetch failed!): instruction fetched
 733                         # when fetch failed, the instruction gets ignored
 734                         # by the decoder
 735                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 736                         if self.svp64_en:
 737                             svp64 = self.svp64
 738                             # decode the SVP64 prefix, if any
 739                             comb += svp64.raw_opcode_in.eq(insn)
 740                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 741                             # pass the decoded prefix (if any) to PowerDecoder2
 742                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 743                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 744                             # remember whether this is a prefixed instruction,
 745                             # so the FSM can readily loop when VL==0
 746                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 747                             # calculate the address of the following instruction
 748                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 749                             sync += nia.eq(cur_state.pc + insn_size)
 750                             with m.If(~svp64.is_svp64_mode):
 751                                 # with no prefix, store the instruction
 752                                 # and hand it directly to the next FSM
 753                                 sync += dec_opcode_o.eq(insn)
 754                                 m.next = "INSN_READY"
 755                             with m.Else():
 756                                 # fetch the rest of the instruction from memory
 757                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 758                                 comb += self.imem.a_i_valid.eq(1)
 759                                 comb += self.imem.f_i_valid.eq(1)
 760                                 m.next = "INSN_READ2"
 761                         else:
 762                             # not SVP64 - 32-bit only
 763                             sync += nia.eq(cur_state.pc + 4)
 764                             sync += dec_opcode_o.eq(insn)
 765                             m.next = "INSN_READY"
 766
 767             with m.State("INSN_READ2"):
 768                 with m.If(self.imem.f_busy_o):  # zzz...
 769                     # busy: stay in wait-read
 770                     comb += self.imem.a_i_valid.eq(1)
 771                     comb += self.imem.f_i_valid.eq(1)
 772                 with m.Else():
 773                     # not busy: instruction fetched
 774                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 775                     sync += dec_opcode_o.eq(insn)
 776                     m.next = "INSN_READY"
 777                     # TODO: probably can start looking at pdecode2.rm_dec
 778                     # here or maybe even in INSN_READ state, if svp64_mode
 779                     # detected, in order to trigger - and wait for - the
 780                     # predicate reading.
 781                     if self.svp64_en:
 782                         pmode = pdecode2.rm_dec.predmode
 783                     """
 784                     if pmode != SVP64PredMode.ALWAYS.value:
 785                         fire predicate loading FSM and wait before
 786                         moving to INSN_READY
 787                     else:
 788                         sync += self.srcmask.eq(-1) # set to all 1s
 789                         sync += self.dstmask.eq(-1) # set to all 1s
 790                         m.next = "INSN_READY"
 791                     """
 792
 793             with m.State("INSN_READY"):
 794                 # hand over the instruction, to be decoded
 795                 comb += fetch_insn_o_valid.eq(1)
 796                 with m.If(fetch_insn_i_ready):
 797                     m.next = "IDLE"
 798
 799         # whatever was done above, over-ride it if core reset is held
 800         with m.If(self.core_rst):
 801             sync += nia.eq(0)
 802
 803         return m
 804
 805
 806 class TestIssuerInternal(TestIssuerBase):
 807     """TestIssuer - reads instructions from TestMemory and issues them
 808
 809     efficiency and speed is not the main goal here: functional correctness
 810     and code clarity is.  optimisations (which almost 100% interfere with
 811     easy understanding) come later.
 812     """
 813
 814     def fetch_predicate_fsm(self, m,
 815                             pred_insn_i_valid, pred_insn_o_ready,
 816                             pred_mask_o_valid, pred_mask_i_ready):
 817         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 818            src/dest predicate masks
 819
 820         https://bugs.libre-soc.org/show_bug.cgi?id=617
 821         the predicates can be read here, by using IntRegs r_ports['pred']
 822         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 823         be done through multiple reads, extracting one relevant at a time.
 824         later, a faster way would be to use the 32-bit-wide CR port but
 825         this is more complex decoding, here.  equivalent code used in
 826         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 827
 828         note: this ENTIRE FSM is not to be called when svp64 is disabled
 829         """
 830         comb = m.d.comb
 831         sync = m.d.sync
 832         pdecode2 = self.pdecode2
 833         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 834         predmode = rm_dec.predmode
 835         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 836         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 837         # get src/dst step, so we can skip already used mask bits
 838         cur_state = self.cur_state
 839         srcstep = cur_state.svstate.srcstep
 840         dststep = cur_state.svstate.dststep
 841         cur_vl = cur_state.svstate.vl
 842
 843         # decode predicates
 844         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 845         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 846         sidx, scrinvert = get_predcr(m, srcpred, 's')
 847         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 848
 849         # store fetched masks, for either intpred or crpred
 850         # when src/dst step is not zero, the skipped mask bits need to be
 851         # shifted-out, before actually storing them in src/dest mask
 852         new_srcmask = Signal(64, reset_less=True)
 853         new_dstmask = Signal(64, reset_less=True)
 854
 855         with m.FSM(name="fetch_predicate"):
 856
 857             with m.State("FETCH_PRED_IDLE"):
 858                 comb += pred_insn_o_ready.eq(1)
 859                 with m.If(pred_insn_i_valid):
 860                     with m.If(predmode == SVP64PredMode.INT):
 861                         # skip fetching destination mask register, when zero
 862                         with m.If(dall1s):
 863                             sync += new_dstmask.eq(-1)
 864                             # directly go to fetch source mask register
 865                             # guaranteed not to be zero (otherwise predmode
 866                             # would be SVP64PredMode.ALWAYS, not INT)
 867                             comb += int_pred.addr.eq(sregread)
 868                             comb += int_pred.ren.eq(1)
 869                             m.next = "INT_SRC_READ"
 870                         # fetch destination predicate register
 871                         with m.Else():
 872                             comb += int_pred.addr.eq(dregread)
 873                             comb += int_pred.ren.eq(1)
 874                             m.next = "INT_DST_READ"
 875                     with m.Elif(predmode == SVP64PredMode.CR):
 876                         # go fetch masks from the CR register file
 877                         sync += new_srcmask.eq(0)
 878                         sync += new_dstmask.eq(0)
 879                         m.next = "CR_READ"
 880                     with m.Else():
 881                         sync += self.srcmask.eq(-1)
 882                         sync += self.dstmask.eq(-1)
 883                         m.next = "FETCH_PRED_DONE"
 884
 885             with m.State("INT_DST_READ"):
 886                 # store destination mask
 887                 inv = Repl(dinvert, 64)
 888                 with m.If(dunary):
 889                     # set selected mask bit for 1<<r3 mode
 890                     dst_shift = Signal(range(64))
 891                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 892                     sync += new_dstmask.eq(1 << dst_shift)
 893                 with m.Else():
 894                     # invert mask if requested
 895                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 896                 # skip fetching source mask register, when zero
 897                 with m.If(sall1s):
 898                     sync += new_srcmask.eq(-1)
 899                     m.next = "FETCH_PRED_SHIFT_MASK"
 900                 # fetch source predicate register
 901                 with m.Else():
 902                     comb += int_pred.addr.eq(sregread)
 903                     comb += int_pred.ren.eq(1)
 904                     m.next = "INT_SRC_READ"
 905
 906             with m.State("INT_SRC_READ"):
 907                 # store source mask
 908                 inv = Repl(sinvert, 64)
 909                 with m.If(sunary):
 910                     # set selected mask bit for 1<<r3 mode
 911                     src_shift = Signal(range(64))
 912                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 913                     sync += new_srcmask.eq(1 << src_shift)
 914                 with m.Else():
 915                     # invert mask if requested
 916                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 917                 m.next = "FETCH_PRED_SHIFT_MASK"
 918
 919             # fetch masks from the CR register file
 920             # implements the following loop:
 921             # idx, inv = get_predcr(mask)
 922             # mask = 0
 923             # for cr_idx in range(vl):
 924             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 925             #     if cr[idx] ^ inv:
 926             #         mask |= 1 << cr_idx
 927             # return mask
 928             with m.State("CR_READ"):
 929                 # CR index to be read, which will be ready by the next cycle
 930                 cr_idx = Signal.like(cur_vl, reset_less=True)
 931                 # submit the read operation to the regfile
 932                 with m.If(cr_idx != cur_vl):
 933                     # the CR read port is unary ...
 934                     # ren = 1 << cr_idx
 935                     # ... in MSB0 convention ...
 936                     # ren = 1 << (7 - cr_idx)
 937                     # ... and with an offset:
 938                     # ren = 1 << (7 - off - cr_idx)
 939                     idx = SVP64CROffs.CRPred + cr_idx
 940                     comb += cr_pred.ren.eq(1 << (7 - idx))
 941                     # signal data valid in the next cycle
 942                     cr_read = Signal(reset_less=True)
 943                     sync += cr_read.eq(1)
 944                     # load the next index
 945                     sync += cr_idx.eq(cr_idx + 1)
 946                 with m.Else():
 947                     # exit on loop end
 948                     sync += cr_read.eq(0)
 949                     sync += cr_idx.eq(0)
 950                     m.next = "FETCH_PRED_SHIFT_MASK"
 951                 with m.If(cr_read):
 952                     # compensate for the one cycle delay on the regfile
 953                     cur_cr_idx = Signal.like(cur_vl)
 954                     comb += cur_cr_idx.eq(cr_idx - 1)
 955                     # read the CR field, select the appropriate bit
 956                     cr_field = Signal(4)
 957                     scr_bit = Signal()
 958                     dcr_bit = Signal()
 959                     comb += cr_field.eq(cr_pred.o_data)
 960                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 961                                        ^ scrinvert)
 962                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 963                                        ^ dcrinvert)
 964                     # set the corresponding mask bit
 965                     bit_to_set = Signal.like(self.srcmask)
 966                     comb += bit_to_set.eq(1 << cur_cr_idx)
 967                     with m.If(scr_bit):
 968                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 969                     with m.If(dcr_bit):
 970                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 971
 972             with m.State("FETCH_PRED_SHIFT_MASK"):
 973                 # shift-out skipped mask bits
 974                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 975                 sync += self.dstmask.eq(new_dstmask >> dststep)
 976                 m.next = "FETCH_PRED_DONE"
 977
 978             with m.State("FETCH_PRED_DONE"):
 979                 comb += pred_mask_o_valid.eq(1)
 980                 with m.If(pred_mask_i_ready):
 981                     m.next = "FETCH_PRED_IDLE"
 982
 983     def issue_fsm(self, m, core, nia,
 984                   dbg, core_rst, is_svp64_mode,
 985                   fetch_pc_o_ready, fetch_pc_i_valid,
 986                   fetch_insn_o_valid, fetch_insn_i_ready,
 987                   pred_insn_i_valid, pred_insn_o_ready,
 988                   pred_mask_o_valid, pred_mask_i_ready,
 989                   exec_insn_i_valid, exec_insn_o_ready,
 990                   exec_pc_o_valid, exec_pc_i_ready):
 991         """issue FSM
 992
 993         decode / issue FSM.  this interacts with the "fetch" FSM
 994         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 995         (outgoing). also interacts with the "execute" FSM
 996         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 997         (incoming).
 998         SVP64 RM prefixes have already been set up by the
 999         "fetch" phase, so execute is fairly straightforward.
1000         """
1001
1002         comb = m.d.comb
1003         sync = m.d.sync
1004         pdecode2 = self.pdecode2
1005         cur_state = self.cur_state
1006         new_svstate = self.new_svstate
1007
1008         # temporaries
1009         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
1010
1011         # for updating svstate (things like srcstep etc.)
1012         comb += new_svstate.eq(cur_state.svstate)
1013
1014         # precalculate srcstep+1 and dststep+1
1015         cur_srcstep = cur_state.svstate.srcstep
1016         cur_dststep = cur_state.svstate.dststep
1017         next_srcstep = Signal.like(cur_srcstep)
1018         next_dststep = Signal.like(cur_dststep)
1019         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
1020         comb += next_dststep.eq(cur_state.svstate.dststep+1)
1021
1022         # note if an exception happened.  in a pipelined or OoO design
1023         # this needs to be accompanied by "shadowing" (or stalling)
1024         exc_happened = self.core.o.exc_happened
1025         # also note instruction fetch failed
1026         if hasattr(core, "icache"):
1027             fetch_failed = core.icache.i_out.fetch_failed
1028             flush_needed = True
1029             # set to fault in decoder
1030             # update (highest priority) instruction fault
1031             rising_fetch_failed = rising_edge(m, fetch_failed)
1032             with m.If(rising_fetch_failed):
1033                 sync += pdecode2.instr_fault.eq(1)
1034         else:
1035             fetch_failed = Const(0, 1)
1036             flush_needed = False
1037
1038         with m.FSM(name="issue_fsm"):
1039
1040             # sync with the "fetch" phase which is reading the instruction
1041             # at this point, there is no instruction running, that
1042             # could inadvertently update the PC.
1043             with m.State("ISSUE_START"):
1044                 # reset instruction fault
1045                 sync += pdecode2.instr_fault.eq(0)
1046                 # wait on "core stop" release, before next fetch
1047                 # need to do this here, in case we are in a VL==0 loop
1048                 with m.If(~dbg.core_stop_o & ~core_rst):
1049                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
1050                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
1051                         m.next = "INSN_WAIT"
1052                 with m.Else():
1053                     # tell core it's stopped, and acknowledge debug handshake
1054                     comb += dbg.core_stopped_i.eq(1)
1055                     # while stopped, allow updating SVSTATE
1056                     with m.If(self.svstate_i.ok):
1057                         comb += new_svstate.eq(self.svstate_i.data)
1058                         comb += self.update_svstate.eq(1)
1059                         sync += self.sv_changed.eq(1)
1060
1061             # wait for an instruction to arrive from Fetch
1062             with m.State("INSN_WAIT"):
1063                 if self.allow_overlap:
1064                     stopping = dbg.stopping_o
1065                 else:
1066                     stopping = Const(0)
1067                 with m.If(stopping):
1068                     # stopping: jump back to idle
1069                     m.next = "ISSUE_START"
1070                     if flush_needed:
1071                         # request the icache to stop asserting "failed"
1072                         comb += core.icache.flush_in.eq(1)
1073                     # stop instruction fault
1074                     sync += pdecode2.instr_fault.eq(0)
1075                 with m.Else():
1076                     comb += fetch_insn_i_ready.eq(1)
1077                     with m.If(fetch_insn_o_valid):
1078                         # loop into ISSUE_START if it's a SVP64 instruction
1079                         # and VL == 0.  this because VL==0 is a for-loop
1080                         # from 0 to 0 i.e. always, always a NOP.
1081                         cur_vl = cur_state.svstate.vl
1082                         with m.If(is_svp64_mode & (cur_vl == 0)):
1083                             # update the PC before fetching the next instruction
1084                             # since we are in a VL==0 loop, no instruction was
1085                             # executed that we could be overwriting
1086                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1087                             comb += self.state_w_pc.i_data.eq(nia)
1088                             comb += self.insn_done.eq(1)
1089                             m.next = "ISSUE_START"
1090                         with m.Else():
1091                             if self.svp64_en:
1092                                 m.next = "PRED_START"  # fetching predicate
1093                             else:
1094                                 m.next = "DECODE_SV"  # skip predication
1095
1096             with m.State("PRED_START"):
1097                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
1098                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
1099                     m.next = "MASK_WAIT"
1100
1101             with m.State("MASK_WAIT"):
1102                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
1103                 with m.If(pred_mask_o_valid):  # predication masks are ready
1104                     m.next = "PRED_SKIP"
1105
1106             # skip zeros in predicate
1107             with m.State("PRED_SKIP"):
1108                 with m.If(~is_svp64_mode):
1109                     m.next = "DECODE_SV"  # nothing to do
1110                 with m.Else():
1111                     if self.svp64_en:
1112                         pred_src_zero = pdecode2.rm_dec.pred_sz
1113                         pred_dst_zero = pdecode2.rm_dec.pred_dz
1114
1115                         # new srcstep, after skipping zeros
1116                         skip_srcstep = Signal.like(cur_srcstep)
1117                         # value to be added to the current srcstep
1118                         src_delta = Signal.like(cur_srcstep)
1119                         # add leading zeros to srcstep, if not in zero mode
1120                         with m.If(~pred_src_zero):
1121                             # priority encoder (count leading zeros)
1122                             # append guard bit, in case the mask is all zeros
1123                             pri_enc_src = PriorityEncoder(65)
1124                             m.submodules.pri_enc_src = pri_enc_src
1125                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
1126                                                          Const(1, 1)))
1127                             comb += src_delta.eq(pri_enc_src.o)
1128                         # apply delta to srcstep
1129                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
1130                         # shift-out all leading zeros from the mask
1131                         # plus the leading "one" bit
1132                         # TODO count leading zeros and shift-out the zero
1133                         #      bits, in the same step, in hardware
1134                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1135
1136                         # same as above, but for dststep
1137                         skip_dststep = Signal.like(cur_dststep)
1138                         dst_delta = Signal.like(cur_dststep)
1139                         with m.If(~pred_dst_zero):
1140                             pri_enc_dst = PriorityEncoder(65)
1141                             m.submodules.pri_enc_dst = pri_enc_dst
1142                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1143                                                          Const(1, 1)))
1144                             comb += dst_delta.eq(pri_enc_dst.o)
1145                         comb += skip_dststep.eq(cur_dststep + dst_delta)
1146                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1147
1148                         # TODO: initialize mask[VL]=1 to avoid passing past VL
1149                         with m.If((skip_srcstep >= cur_vl) |
1150                                   (skip_dststep >= cur_vl)):
1151                             # end of VL loop. Update PC and reset src/dst step
1152                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1153                             comb += self.state_w_pc.i_data.eq(nia)
1154                             comb += new_svstate.srcstep.eq(0)
1155                             comb += new_svstate.dststep.eq(0)
1156                             comb += self.update_svstate.eq(1)
1157                             # synchronize with the simulator
1158                             comb += self.insn_done.eq(1)
1159                             # go back to Issue
1160                             m.next = "ISSUE_START"
1161                         with m.Else():
1162                             # update new src/dst step
1163                             comb += new_svstate.srcstep.eq(skip_srcstep)
1164                             comb += new_svstate.dststep.eq(skip_dststep)
1165                             comb += self.update_svstate.eq(1)
1166                             # proceed to Decode
1167                             m.next = "DECODE_SV"
1168
1169                         # pass predicate mask bits through to satellite decoders
1170                         # TODO: for SIMD this will be *multiple* bits
1171                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1172                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1173
1174             # after src/dst step have been updated, we are ready
1175             # to decode the instruction
1176             with m.State("DECODE_SV"):
1177                 # decode the instruction
1178                 with m.If(~fetch_failed):
1179                     sync += pdecode2.instr_fault.eq(0)
1180                 sync += core.i.e.eq(pdecode2.e)
1181                 sync += core.i.state.eq(cur_state)
1182                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1183                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1184                 if self.svp64_en:
1185                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1186                     # set RA_OR_ZERO detection in satellite decoders
1187                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1188                     # and svp64 detection
1189                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1190                     # and svp64 bit-rev'd ldst mode
1191                     ldst_dec = pdecode2.use_svp64_ldst_dec
1192                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1193                 # after decoding, reset any previous exception condition,
1194                 # allowing it to be set again during the next execution
1195                 sync += pdecode2.ldst_exc.eq(0)
1196
1197                 m.next = "INSN_EXECUTE"  # move to "execute"
1198
1199             # handshake with execution FSM, move to "wait" once acknowledged
1200             with m.State("INSN_EXECUTE"):
1201                 comb += exec_insn_i_valid.eq(1)  # trigger execute
1202                 with m.If(exec_insn_o_ready):   # execute acknowledged us
1203                     m.next = "EXECUTE_WAIT"
1204
1205             with m.State("EXECUTE_WAIT"):
1206                 # wait on "core stop" release, at instruction end
1207                 # need to do this here, in case we are in a VL>1 loop
1208                 with m.If(~dbg.core_stop_o & ~core_rst):
1209                     comb += exec_pc_i_ready.eq(1)
1210                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1211                     # the exception info needs to be blatted into
1212                     # pdecode.ldst_exc, and the instruction "re-run".
1213                     # when ldst_exc.happened is set, the PowerDecoder2
1214                     # reacts very differently: it re-writes the instruction
1215                     # with a "trap" (calls PowerDecoder2.trap()) which
1216                     # will *overwrite* whatever was requested and jump the
1217                     # PC to the exception address, as well as alter MSR.
1218                     # nothing else needs to be done other than to note
1219                     # the change of PC and MSR (and, later, SVSTATE)
1220                     with m.If(exc_happened):
1221                         mmu = core.fus.get_exc("mmu0")
1222                         ldst = core.fus.get_exc("ldst0")
1223                         if mmu is not None:
1224                             with m.If(fetch_failed):
1225                                 # instruction fetch: exception is from MMU
1226                                 # reset instr_fault (highest priority)
1227                                 sync += pdecode2.ldst_exc.eq(mmu)
1228                                 sync += pdecode2.instr_fault.eq(0)
1229                                 if flush_needed:
1230                                     # request icache to stop asserting "failed"
1231                                     comb += core.icache.flush_in.eq(1)
1232                         with m.If(~fetch_failed):
1233                             # otherwise assume it was a LDST exception
1234                             sync += pdecode2.ldst_exc.eq(ldst)
1235
1236                     with m.If(exec_pc_o_valid):
1237
1238                         # was this the last loop iteration?
1239                         is_last = Signal()
1240                         cur_vl = cur_state.svstate.vl
1241                         comb += is_last.eq(next_srcstep == cur_vl)
1242
1243                         # return directly to Decode if Execute generated an
1244                         # exception.
1245                         with m.If(pdecode2.ldst_exc.happened):
1246                             m.next = "DECODE_SV"
1247
1248                         # if MSR, PC or SVSTATE were changed by the previous
1249                         # instruction, go directly back to Fetch, without
1250                         # updating either MSR PC or SVSTATE
1251                         with m.Elif(self.msr_changed | self.pc_changed |
1252                                     self.sv_changed):
1253                             m.next = "ISSUE_START"
1254
1255                         # also return to Fetch, when no output was a vector
1256                         # (regardless of SRCSTEP and VL), or when the last
1257                         # instruction was really the last one of the VL loop
1258                         with m.Elif((~pdecode2.loop_continue) | is_last):
1259                             # before going back to fetch, update the PC state
1260                             # register with the NIA.
1261                             # ok here we are not reading the branch unit.
1262                             # TODO: this just blithely overwrites whatever
1263                             #       pipeline updated the PC
1264                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1265                             comb += self.state_w_pc.i_data.eq(nia)
1266                             # reset SRCSTEP before returning to Fetch
1267                             if self.svp64_en:
1268                                 with m.If(pdecode2.loop_continue):
1269                                     comb += new_svstate.srcstep.eq(0)
1270                                     comb += new_svstate.dststep.eq(0)
1271                                     comb += self.update_svstate.eq(1)
1272                             else:
1273                                 comb += new_svstate.srcstep.eq(0)
1274                                 comb += new_svstate.dststep.eq(0)
1275                                 comb += self.update_svstate.eq(1)
1276                             m.next = "ISSUE_START"
1277
1278                         # returning to Execute? then, first update SRCSTEP
1279                         with m.Else():
1280                             comb += new_svstate.srcstep.eq(next_srcstep)
1281                             comb += new_svstate.dststep.eq(next_dststep)
1282                             comb += self.update_svstate.eq(1)
1283                             # return to mask skip loop
1284                             m.next = "PRED_SKIP"
1285
1286                 with m.Else():
1287                     comb += dbg.core_stopped_i.eq(1)
1288                     if flush_needed:
1289                         # request the icache to stop asserting "failed"
1290                         comb += core.icache.flush_in.eq(1)
1291                     # stop instruction fault
1292                     sync += pdecode2.instr_fault.eq(0)
1293
1294         # check if svstate needs updating: if so, write it to State Regfile
1295         with m.If(self.update_svstate):
1296             sync += cur_state.svstate.eq(self.new_svstate)  # for next clock
1297
1298     def execute_fsm(self, m, core,
1299                     exec_insn_i_valid, exec_insn_o_ready,
1300                     exec_pc_o_valid, exec_pc_i_ready):
1301         """execute FSM
1302
1303         execute FSM. this interacts with the "issue" FSM
1304         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1305         (outgoing). SVP64 RM prefixes have already been set up by the
1306         "issue" phase, so execute is fairly straightforward.
1307         """
1308
1309         comb = m.d.comb
1310         sync = m.d.sync
1311         pdecode2 = self.pdecode2
1312
1313         # temporaries
1314         core_busy_o = core.n.o_data.busy_o  # core is busy
1315         core_ivalid_i = core.p.i_valid              # instruction is valid
1316
1317         if hasattr(core, "icache"):
1318             fetch_failed = core.icache.i_out.fetch_failed
1319         else:
1320             fetch_failed = Const(0, 1)
1321
1322         with m.FSM(name="exec_fsm"):
1323
1324             # waiting for instruction bus (stays there until not busy)
1325             with m.State("INSN_START"):
1326                 comb += exec_insn_o_ready.eq(1)
1327                 with m.If(exec_insn_i_valid):
1328                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1329                     sync += self.sv_changed.eq(0)
1330                     sync += self.pc_changed.eq(0)
1331                     sync += self.msr_changed.eq(0)
1332                     with m.If(core.p.o_ready):  # only move if accepted
1333                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1334
1335             # instruction started: must wait till it finishes
1336             with m.State("INSN_ACTIVE"):
1337                 # note changes to MSR, PC and SVSTATE
1338                 # XXX oops, really must monitor *all* State Regfile write
1339                 # ports looking for changes!
1340                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1341                     sync += self.sv_changed.eq(1)
1342                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1343                     sync += self.msr_changed.eq(1)
1344                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1345                     sync += self.pc_changed.eq(1)
1346                 with m.If(~core_busy_o):  # instruction done!
1347                     comb += exec_pc_o_valid.eq(1)
1348                     with m.If(exec_pc_i_ready):
1349                         # when finished, indicate "done".
1350                         # however, if there was an exception, the instruction
1351                         # is *not* yet done.  this is an implementation
1352                         # detail: we choose to implement exceptions by
1353                         # taking the exception information from the LDST
1354                         # unit, putting that *back* into the PowerDecoder2,
1355                         # and *re-running the entire instruction*.
1356                         # if we erroneously indicate "done" here, it is as if
1357                         # there were *TWO* instructions:
1358                         # 1) the failed LDST 2) a TRAP.
1359                         with m.If(~pdecode2.ldst_exc.happened &
1360                                   ~fetch_failed):
1361                             comb += self.insn_done.eq(1)
1362                         m.next = "INSN_START"  # back to fetch
1363
1364     def elaborate(self, platform):
1365         m = super().elaborate(platform)
1366         # convenience
1367         comb, sync = m.d.comb, m.d.sync
1368         cur_state = self.cur_state
1369         pdecode2 = self.pdecode2
1370         dbg = self.dbg
1371         core = self.core
1372
1373         # set up peripherals and core
1374         core_rst = self.core_rst
1375
1376         # indicate to outside world if any FU is still executing
1377         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1378
1379         # address of the next instruction, in the absence of a branch
1380         # depends on the instruction size
1381         nia = Signal(64)
1382
1383         # connect up debug signals
1384         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1385
1386         # pass the prefix mode from Fetch to Issue, so the latter can loop
1387         # on VL==0
1388         is_svp64_mode = Signal()
1389
1390         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1391         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1392         # these are the handshake signals between each
1393
1394         # fetch FSM can run as soon as the PC is valid
1395         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1396         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1397
1398         # fetch FSM hands over the instruction to be decoded / issued
1399         fetch_insn_o_valid = Signal()
1400         fetch_insn_i_ready = Signal()
1401
1402         # predicate fetch FSM decodes and fetches the predicate
1403         pred_insn_i_valid = Signal()
1404         pred_insn_o_ready = Signal()
1405
1406         # predicate fetch FSM delivers the masks
1407         pred_mask_o_valid = Signal()
1408         pred_mask_i_ready = Signal()
1409
1410         # issue FSM delivers the instruction to the be executed
1411         exec_insn_i_valid = Signal()
1412         exec_insn_o_ready = Signal()
1413
1414         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1415         exec_pc_o_valid = Signal()
1416         exec_pc_i_ready = Signal()
1417
1418         # the FSMs here are perhaps unusual in that they detect conditions
1419         # then "hold" information, combinatorially, for the core
1420         # (as opposed to using sync - which would be on a clock's delay)
1421         # this includes the actual opcode, valid flags and so on.
1422
1423         # Fetch, then predicate fetch, then Issue, then Execute.
1424         # Issue is where the VL for-loop # lives.  the ready/valid
1425         # signalling is used to communicate between the four.
1426
1427         # set up Fetch FSM
1428         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1429                          self.imem, core_rst, pdecode2, cur_state,
1430                          dbg, core,
1431                          dbg.state.svstate, # combinatorially same
1432                          nia, is_svp64_mode)
1433         m.submodules.fetch = fetch
1434         # connect up in/out data to existing Signals
1435         comb += fetch.p.i_data.pc.eq(dbg.state.pc)   # combinatorially same
1436         comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
1437         # and the ready/valid signalling
1438         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1439         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1440         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1441         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1442
1443         self.issue_fsm(m, core, nia,
1444                        dbg, core_rst, is_svp64_mode,
1445                        fetch_pc_o_ready, fetch_pc_i_valid,
1446                        fetch_insn_o_valid, fetch_insn_i_ready,
1447                        pred_insn_i_valid, pred_insn_o_ready,
1448                        pred_mask_o_valid, pred_mask_i_ready,
1449                        exec_insn_i_valid, exec_insn_o_ready,
1450                        exec_pc_o_valid, exec_pc_i_ready)
1451
1452         if self.svp64_en:
1453             self.fetch_predicate_fsm(m,
1454                                      pred_insn_i_valid, pred_insn_o_ready,
1455                                      pred_mask_o_valid, pred_mask_i_ready)
1456
1457         self.execute_fsm(m, core,
1458                          exec_insn_i_valid, exec_insn_o_ready,
1459                          exec_pc_o_valid, exec_pc_i_ready)
1460
1461         return m
1462
1463
1464 class TestIssuer(Elaboratable):
1465     def __init__(self, pspec):
1466         self.ti = TestIssuerInternal(pspec)
1467         # XXX TODO: make this a command-line selectable option from pspec
1468         #from soc.simple.inorder import TestIssuerInternalInOrder
1469         #self.ti = TestIssuerInternalInOrder(pspec)
1470         self.pll = DummyPLL(instance=True)
1471
1472         # PLL direct clock or not
1473         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1474         if self.pll_en:
1475             self.pll_test_o = Signal(reset_less=True)
1476             self.pll_vco_o = Signal(reset_less=True)
1477             self.clk_sel_i = Signal(2, reset_less=True)
1478             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1479             self.pllclk_clk = ClockSignal("pllclk")
1480
1481     def elaborate(self, platform):
1482         m = Module()
1483         comb = m.d.comb
1484
1485         # TestIssuer nominally runs at main clock, actually it is
1486         # all combinatorial internally except for coresync'd components
1487         m.submodules.ti = ti = self.ti
1488
1489         if self.pll_en:
1490             # ClockSelect runs at PLL output internal clock rate
1491             m.submodules.wrappll = pll = self.pll
1492
1493             # add clock domains from PLL
1494             cd_pll = ClockDomain("pllclk")
1495             m.domains += cd_pll
1496
1497             # PLL clock established.  has the side-effect of running clklsel
1498             # at the PLL's speed (see DomainRenamer("pllclk") above)
1499             pllclk = self.pllclk_clk
1500             comb += pllclk.eq(pll.clk_pll_o)
1501
1502             # wire up external 24mhz to PLL
1503             #comb += pll.clk_24_i.eq(self.ref_clk)
1504             # output 18 mhz PLL test signal, and analog oscillator out
1505             comb += self.pll_test_o.eq(pll.pll_test_o)
1506             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1507
1508             # input to pll clock selection
1509             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1510
1511             # now wire up ResetSignals.  don't mind them being in this domain
1512             pll_rst = ResetSignal("pllclk")
1513             comb += pll_rst.eq(ResetSignal())
1514
1515         # internal clock is set to selector clock-out.  has the side-effect of
1516         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1517         # debug clock runs at coresync internal clock
1518         cd_coresync = ClockDomain("coresync")
1519         #m.domains += cd_coresync
1520         if self.ti.dbg_domain != 'sync':
1521             cd_dbgsync = ClockDomain("dbgsync")
1522             #m.domains += cd_dbgsync
1523         intclk = ClockSignal("coresync")
1524         dbgclk = ClockSignal(self.ti.dbg_domain)
1525         # XXX BYPASS PLL XXX
1526         # XXX BYPASS PLL XXX
1527         # XXX BYPASS PLL XXX
1528         if self.pll_en:
1529             comb += intclk.eq(self.ref_clk)
1530         else:
1531             comb += intclk.eq(ClockSignal())
1532         if self.ti.dbg_domain != 'sync':
1533             dbgclk = ClockSignal(self.ti.dbg_domain)
1534             comb += dbgclk.eq(intclk)
1535
1536         return m
1537
1538     def ports(self):
1539         return list(self.ti.ports()) + list(self.pll.ports()) + \
1540             [ClockSignal(), ResetSignal()]
1541
1542     def external_ports(self):
1543         ports = self.ti.external_ports()
1544         ports.append(ClockSignal())
1545         ports.append(ResetSignal())
1546         if self.pll_en:
1547             ports.append(self.clk_sel_i)
1548             ports.append(self.pll.clk_24_i)
1549             ports.append(self.pll_test_o)
1550             ports.append(self.pll_vco_o)
1551             ports.append(self.pllclk_clk)
1552             ports.append(self.ref_clk)
1553         return ports
1554
1555
1556 if __name__ == '__main__':
1557     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1558              'spr': 1,
1559              'div': 1,
1560              'mul': 1,
1561              'shiftrot': 1
1562              }
1563     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1564                          imem_ifacetype='bare_wb',
1565                          addr_wid=48,
1566                          mask_wid=8,
1567                          reg_wid=64,
1568                          units=units)
1569     dut = TestIssuer(pspec)
1570     vl = main(dut, ports=dut.ports(), name="test_issuer")
1571
1572     if len(sys.argv) == 1:
1573         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1574         with open("test_issuer.il", "w") as f:
1575             f.write(vl)