src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the {insert state variable here}
  70     res_ok_delay = Signal(name="%s_ok_delay" % name)
  71     with m.If(~core_rst):
  72         sync += res_ok_delay.eq(~state_i.ok)
  73         with m.If(state_i.ok):
  74             # incoming override (start from pc_i)
  75             comb += res.eq(state_i.data)
  76         with m.Else():
  77             # otherwise read StateRegs regfile for {insert state here}...
  78             comb += regfile.ren.eq(1 << regnum)
  79         # ... but on a 1-clock delay
  80         with m.If(res_ok_delay):
  81             comb += res.eq(regfile.o_data)
  82
  83
  84 def get_predint(m, mask, name):
  85     """decode SVP64 predicate integer mask field to reg number and invert
  86     this is identical to the equivalent function in ISACaller except that
  87     it doesn't read the INT directly, it just decodes "what needs to be done"
  88     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  89
  90     * all1s is set to indicate that no mask is to be applied.
  91     * regread indicates the GPR register number to be read
  92     * invert is set to indicate that the register value is to be inverted
  93     * unary indicates that the contents of the register is to be shifted 1<<r3
  94     """
  95     comb = m.d.comb
  96     regread = Signal(5, name=name+"regread")
  97     invert = Signal(name=name+"invert")
  98     unary = Signal(name=name+"unary")
  99     all1s = Signal(name=name+"all1s")
 100     with m.Switch(mask):
 101         with m.Case(SVP64PredInt.ALWAYS.value):
 102             comb += all1s.eq(1)      # use 0b1111 (all ones)
 103         with m.Case(SVP64PredInt.R3_UNARY.value):
 104             comb += regread.eq(3)
 105             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 106         with m.Case(SVP64PredInt.R3.value):
 107             comb += regread.eq(3)
 108         with m.Case(SVP64PredInt.R3_N.value):
 109             comb += regread.eq(3)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R10.value):
 112             comb += regread.eq(10)
 113         with m.Case(SVP64PredInt.R10_N.value):
 114             comb += regread.eq(10)
 115             comb += invert.eq(1)
 116         with m.Case(SVP64PredInt.R30.value):
 117             comb += regread.eq(30)
 118         with m.Case(SVP64PredInt.R30_N.value):
 119             comb += regread.eq(30)
 120             comb += invert.eq(1)
 121     return regread, invert, unary, all1s
 122
 123
 124 def get_predcr(m, mask, name):
 125     """decode SVP64 predicate CR to reg number field and invert status
 126     this is identical to _get_predcr in ISACaller
 127     """
 128     comb = m.d.comb
 129     idx = Signal(2, name=name+"idx")
 130     invert = Signal(name=name+"crinvert")
 131     with m.Switch(mask):
 132         with m.Case(SVP64PredCR.LT.value):
 133             comb += idx.eq(CR.LT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.GE.value):
 136             comb += idx.eq(CR.LT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.GT.value):
 139             comb += idx.eq(CR.GT)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.LE.value):
 142             comb += idx.eq(CR.GT)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.EQ.value):
 145             comb += idx.eq(CR.EQ)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NE.value):
 148             comb += idx.eq(CR.EQ)
 149             comb += invert.eq(1)
 150         with m.Case(SVP64PredCR.SO.value):
 151             comb += idx.eq(CR.SO)
 152             comb += invert.eq(0)
 153         with m.Case(SVP64PredCR.NS.value):
 154             comb += idx.eq(CR.SO)
 155             comb += invert.eq(1)
 156     return idx, invert
 157
 158
 159 class TestIssuerBase(Elaboratable):
 160     """TestIssuerBase - common base class for Issuers
 161
 162     takes care of power-on reset, peripherals, debug, DEC/TB,
 163     and gets PC/MSR/SVSTATE from the State Regfile etc.
 164     """
 165
 166     def __init__(self, pspec):
 167
 168         # test is SVP64 is to be enabled
 169         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 170
 171         # and if regfiles are reduced
 172         self.regreduce_en = (hasattr(pspec, "regreduce") and
 173                              (pspec.regreduce == True))
 174
 175         # and if overlap requested
 176         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 177                               (pspec.allow_overlap == True))
 178
 179         # JTAG interface.  add this right at the start because if it's
 180         # added it *modifies* the pspec, by adding enable/disable signals
 181         # for parts of the rest of the core
 182         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 183         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 184         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 185         if self.jtag_en:
 186             # XXX MUST keep this up-to-date with litex, and
 187             # soc-cocotb-sim, and err.. all needs sorting out, argh
 188             subset = ['uart',
 189                       'mtwi',
 190                       'eint', 'gpio', 'mspi0',
 191                       # 'mspi1', - disabled for now
 192                       # 'pwm', 'sd0', - disabled for now
 193                       'sdr']
 194             self.jtag = JTAG(get_pinspecs(subset=subset),
 195                              domain=self.dbg_domain)
 196             # add signals to pspec to enable/disable icache and dcache
 197             # (or data and intstruction wishbone if icache/dcache not included)
 198             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 199             # TODO: do we actually care if these are not domain-synchronised?
 200             # honestly probably not.
 201             pspec.wb_icache_en = self.jtag.wb_icache_en
 202             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 203             self.wb_sram_en = self.jtag.wb_sram_en
 204         else:
 205             self.wb_sram_en = Const(1)
 206
 207         # add 4k sram blocks?
 208         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 209                          pspec.sram4x4kblock == True)
 210         if self.sram4x4k:
 211             self.sram4k = []
 212             for i in range(4):
 213                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 214                                                     # features={'err'}
 215                                                     ))
 216
 217         # add interrupt controller?
 218         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 219         if self.xics:
 220             self.xics_icp = XICS_ICP()
 221             self.xics_ics = XICS_ICS()
 222             self.int_level_i = self.xics_ics.int_level_i
 223
 224         # add GPIO peripheral?
 225         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 226         if self.gpio:
 227             self.simple_gpio = SimpleGPIO()
 228             self.gpio_o = self.simple_gpio.gpio_o
 229
 230         # main instruction core.  suitable for prototyping / demo only
 231         self.core = core = NonProductionCore(pspec)
 232         self.core_rst = ResetSignal("coresync")
 233
 234         # instruction decoder.  goes into Trap Record
 235         #pdecode = create_pdecode()
 236         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 237         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 238                                      opkls=IssuerDecode2ToOperand,
 239                                      svp64_en=self.svp64_en,
 240                                      regreduce_en=self.regreduce_en)
 241         pdecode = self.pdecode2.dec
 242
 243         if self.svp64_en:
 244             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 245
 246         # Test Instruction memory
 247         if hasattr(core, "icache"):
 248             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 249             # truly dreadful.  needs a huge reorg.
 250             pspec.icache = core.icache
 251         self.imem = ConfigFetchUnit(pspec).fu
 252
 253         # DMI interface
 254         self.dbg = CoreDebug()
 255
 256         # instruction go/monitor
 257         self.pc_o = Signal(64, reset_less=True)
 258         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 259         self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
 260         self.svstate_i = Data(64, "svstate_i")  # ditto
 261         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 262         self.busy_o = Signal(reset_less=True)
 263         self.memerr_o = Signal(reset_less=True)
 264
 265         # STATE regfile read /write ports for PC, MSR, SVSTATE
 266         staterf = self.core.regs.rf['state']
 267         self.state_r_msr = staterf.r_ports['msr']  # MSR rd
 268         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 269         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 270
 271         self.state_w_msr = staterf.w_ports['msr']  # MSR wr
 272         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 273         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 274
 275         # DMI interface access
 276         intrf = self.core.regs.rf['int']
 277         crrf = self.core.regs.rf['cr']
 278         xerrf = self.core.regs.rf['xer']
 279         self.int_r = intrf.r_ports['dmi']  # INT read
 280         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 281         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 282
 283         if self.svp64_en:
 284             # for predication
 285             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 286             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 287
 288         # hack method of keeping an eye on whether branch/trap set the PC
 289         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 290         self.state_nia.wen.name = 'state_nia_wen'
 291
 292         # pulse to synchronize the simulator at instruction end
 293         self.insn_done = Signal()
 294
 295         # indicate any instruction still outstanding, in execution
 296         self.any_busy = Signal()
 297
 298         if self.svp64_en:
 299             # store copies of predicate masks
 300             self.srcmask = Signal(64)
 301             self.dstmask = Signal(64)
 302
 303     def setup_peripherals(self, m):
 304         comb, sync = m.d.comb, m.d.sync
 305
 306         # okaaaay so the debug module must be in coresync clock domain
 307         # but NOT its reset signal. to cope with this, set every single
 308         # submodule explicitly in coresync domain, debug and JTAG
 309         # in their own one but using *external* reset.
 310         csd = DomainRenamer("coresync")
 311         dbd = DomainRenamer(self.dbg_domain)
 312
 313         m.submodules.core = core = csd(self.core)
 314         # this _so_ needs sorting out.  ICache is added down inside
 315         # LoadStore1 and is already a submodule of LoadStore1
 316         if not isinstance(self.imem, ICache):
 317             m.submodules.imem = imem = csd(self.imem)
 318         m.submodules.dbg = dbg = dbd(self.dbg)
 319         if self.jtag_en:
 320             m.submodules.jtag = jtag = dbd(self.jtag)
 321             # TODO: UART2GDB mux, here, from external pin
 322             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 323             sync += dbg.dmi.connect_to(jtag.dmi)
 324
 325         cur_state = self.cur_state
 326
 327         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 328         if self.sram4x4k:
 329             for i, sram in enumerate(self.sram4k):
 330                 m.submodules["sram4k_%d" % i] = csd(sram)
 331                 comb += sram.enable.eq(self.wb_sram_en)
 332
 333         # XICS interrupt handler
 334         if self.xics:
 335             m.submodules.xics_icp = icp = csd(self.xics_icp)
 336             m.submodules.xics_ics = ics = csd(self.xics_ics)
 337             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 338             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
 339
 340         # GPIO test peripheral
 341         if self.gpio:
 342             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 343
 344         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 345         # XXX causes litex ECP5 test to get wrong idea about input and output
 346         # (but works with verilator sim *sigh*)
 347         # if self.gpio and self.xics:
 348         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 349
 350         # instruction decoder
 351         pdecode = create_pdecode()
 352         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 353         if self.svp64_en:
 354             m.submodules.svp64 = svp64 = csd(self.svp64)
 355
 356         # convenience
 357         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 358         intrf = self.core.regs.rf['int']
 359
 360         # clock delay power-on reset
 361         cd_por = ClockDomain(reset_less=True)
 362         cd_sync = ClockDomain()
 363         core_sync = ClockDomain("coresync")
 364         m.domains += cd_por, cd_sync, core_sync
 365         if self.dbg_domain != "sync":
 366             dbg_sync = ClockDomain(self.dbg_domain)
 367             m.domains += dbg_sync
 368
 369         ti_rst = Signal(reset_less=True)
 370         delay = Signal(range(4), reset=3)
 371         with m.If(delay != 0):
 372             m.d.por += delay.eq(delay - 1)
 373         comb += cd_por.clk.eq(ClockSignal())
 374
 375         # power-on reset delay
 376         core_rst = ResetSignal("coresync")
 377         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 378         comb += core_rst.eq(ti_rst)
 379
 380         # debug clock is same as coresync, but reset is *main external*
 381         if self.dbg_domain != "sync":
 382             dbg_rst = ResetSignal(self.dbg_domain)
 383             comb += dbg_rst.eq(ResetSignal())
 384
 385         # busy/halted signals from core
 386         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
 387         comb += self.busy_o.eq(core_busy_o)
 388         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 389
 390         # temporary hack: says "go" immediately for both address gen and ST
 391         l0 = core.l0
 392         ldst = core.fus.fus['ldst0']
 393         st_go_edge = rising_edge(m, ldst.st.rel_o)
 394         # link addr-go direct to rel
 395         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
 396         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
 397
 398     def do_dmi(self, m, dbg):
 399         """deals with DMI debug requests
 400
 401         currently only provides read requests for the INT regfile, CR and XER
 402         it will later also deal with *writing* to these regfiles.
 403         """
 404         comb = m.d.comb
 405         sync = m.d.sync
 406         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 407         intrf = self.core.regs.rf['int']
 408
 409         with m.If(d_reg.req):  # request for regfile access being made
 410             # TODO: error-check this
 411             # XXX should this be combinatorial?  sync better?
 412             if intrf.unary:
 413                 comb += self.int_r.ren.eq(1 << d_reg.addr)
 414             else:
 415                 comb += self.int_r.addr.eq(d_reg.addr)
 416                 comb += self.int_r.ren.eq(1)
 417         d_reg_delay = Signal()
 418         sync += d_reg_delay.eq(d_reg.req)
 419         with m.If(d_reg_delay):
 420             # data arrives one clock later
 421             comb += d_reg.data.eq(self.int_r.o_data)
 422             comb += d_reg.ack.eq(1)
 423
 424         # sigh same thing for CR debug
 425         with m.If(d_cr.req):  # request for regfile access being made
 426             comb += self.cr_r.ren.eq(0b11111111)  # enable all
 427         d_cr_delay = Signal()
 428         sync += d_cr_delay.eq(d_cr.req)
 429         with m.If(d_cr_delay):
 430             # data arrives one clock later
 431             comb += d_cr.data.eq(self.cr_r.o_data)
 432             comb += d_cr.ack.eq(1)
 433
 434         # aaand XER...
 435         with m.If(d_xer.req):  # request for regfile access being made
 436             comb += self.xer_r.ren.eq(0b111111)  # enable all
 437         d_xer_delay = Signal()
 438         sync += d_xer_delay.eq(d_xer.req)
 439         with m.If(d_xer_delay):
 440             # data arrives one clock later
 441             comb += d_xer.data.eq(self.xer_r.o_data)
 442             comb += d_xer.ack.eq(1)
 443
 444     def tb_dec_fsm(self, m, spr_dec):
 445         """tb_dec_fsm
 446
 447         this is a FSM for updating either dec or tb.  it runs alternately
 448         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
 449         value to DEC, however the regfile has "passthrough" on it so this
 450         *should* be ok.
 451
 452         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
 453         """
 454
 455         comb, sync = m.d.comb, m.d.sync
 456         fast_rf = self.core.regs.rf['fast']
 457         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
 458         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
 459
 460         with m.FSM() as fsm:
 461
 462             # initiates read of current DEC
 463             with m.State("DEC_READ"):
 464                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
 465                 comb += fast_r_dectb.ren.eq(1)
 466                 m.next = "DEC_WRITE"
 467
 468             # waits for DEC read to arrive (1 cycle), updates with new value
 469             with m.State("DEC_WRITE"):
 470                 new_dec = Signal(64)
 471                 # TODO: MSR.LPCR 32-bit decrement mode
 472                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
 473                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
 474                 comb += fast_w_dectb.wen.eq(1)
 475                 comb += fast_w_dectb.i_data.eq(new_dec)
 476                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
 477                 m.next = "TB_READ"
 478
 479             # initiates read of current TB
 480             with m.State("TB_READ"):
 481                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
 482                 comb += fast_r_dectb.ren.eq(1)
 483                 m.next = "TB_WRITE"
 484
 485             # waits for read TB to arrive, initiates write of current TB
 486             with m.State("TB_WRITE"):
 487                 new_tb = Signal(64)
 488                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
 489                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
 490                 comb += fast_w_dectb.wen.eq(1)
 491                 comb += fast_w_dectb.i_data.eq(new_tb)
 492                 m.next = "DEC_READ"
 493
 494         return m
 495
 496     def elaborate(self, platform):
 497         m = Module()
 498         # convenience
 499         comb, sync = m.d.comb, m.d.sync
 500         cur_state = self.cur_state
 501         pdecode2 = self.pdecode2
 502         dbg = self.dbg
 503
 504         # set up peripherals and core
 505         core_rst = self.core_rst
 506         self.setup_peripherals(m)
 507
 508         # reset current state if core reset requested
 509         with m.If(core_rst):
 510             m.d.sync += self.cur_state.eq(0)
 511
 512         # PC and instruction from I-Memory
 513         comb += self.pc_o.eq(cur_state.pc)
 514         self.pc_changed = Signal()  # note write to PC
 515         self.msr_changed = Signal()  # note write to MSR
 516         self.sv_changed = Signal()  # note write to SVSTATE
 517
 518         # read state either from incoming override or from regfile
 519         state = CoreState("get")  # current state (MSR/PC/SVSTATE)
 520         state_get(m, state.msr, core_rst, self.msr_i,
 521                        "msr",                  # read MSR
 522                        self.state_r_msr, StateRegs.MSR)
 523         state_get(m, state.pc, core_rst, self.pc_i,
 524                        "pc",                  # read PC
 525                        self.state_r_pc, StateRegs.PC)
 526         state_get(m, state.svstate, core_rst, self.svstate_i,
 527                             "svstate",   # read SVSTATE
 528                             self.state_r_sv, StateRegs.SVSTATE)
 529
 530         # don't write pc every cycle
 531         comb += self.state_w_pc.wen.eq(0)
 532         comb += self.state_w_pc.i_data.eq(0)
 533
 534         # connect up debug state.  note "combinatorially same" below,
 535         # this is a bit naff, passing state over in the dbg class, but
 536         # because it is combinatorial it achieves the desired goal
 537         comb += dbg.state.eq(state)
 538
 539         # this bit doesn't have to be in the FSM: connect up to read
 540         # regfiles on demand from DMI
 541         self.do_dmi(m, dbg)
 542
 543         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
 544         # (which uses that in PowerDecoder2 to raise 0x900 exception)
 545         self.tb_dec_fsm(m, cur_state.dec)
 546
 547         return m
 548
 549     def __iter__(self):
 550         yield from self.pc_i.ports()
 551         yield from self.msr_i.ports()
 552         yield self.pc_o
 553         yield self.memerr_o
 554         yield from self.core.ports()
 555         yield from self.imem.ports()
 556         yield self.core_bigendian_i
 557         yield self.busy_o
 558
 559     def ports(self):
 560         return list(self)
 561
 562     def external_ports(self):
 563         ports = self.pc_i.ports()
 564         ports = self.msr_i.ports()
 565         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
 566                   ]
 567
 568         if self.jtag_en:
 569             ports += list(self.jtag.external_ports())
 570         else:
 571             # don't add DMI if JTAG is enabled
 572             ports += list(self.dbg.dmi.ports())
 573
 574         ports += list(self.imem.ibus.fields.values())
 575         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
 576
 577         if self.sram4x4k:
 578             for sram in self.sram4k:
 579                 ports += list(sram.bus.fields.values())
 580
 581         if self.xics:
 582             ports += list(self.xics_icp.bus.fields.values())
 583             ports += list(self.xics_ics.bus.fields.values())
 584             ports.append(self.int_level_i)
 585
 586         if self.gpio:
 587             ports += list(self.simple_gpio.bus.fields.values())
 588             ports.append(self.gpio_o)
 589
 590         return ports
 591
 592     def ports(self):
 593         return list(self)
 594
 595
 596
 597 # Fetch Finite State Machine.
 598 # WARNING: there are currently DriverConflicts but it's actually working.
 599 # TODO, here: everything that is global in nature, information from the
 600 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 601 # not only that: TestIssuerInternal.imem can entirely move into here
 602 # because imem is only ever accessed inside the FetchFSM.
 603 class FetchFSM(ControlBase):
 604     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 605                  pdecode2, cur_state,
 606                  dbg, core, svstate, nia, is_svp64_mode):
 607         self.allow_overlap = allow_overlap
 608         self.svp64_en = svp64_en
 609         self.imem = imem
 610         self.core_rst = core_rst
 611         self.pdecode2 = pdecode2
 612         self.cur_state = cur_state
 613         self.dbg = dbg
 614         self.core = core
 615         self.svstate = svstate
 616         self.nia = nia
 617         self.is_svp64_mode = is_svp64_mode
 618
 619         # set up pipeline ControlBase and allocate i/o specs
 620         # (unusual: normally done by the Pipeline API)
 621         super().__init__(stage=self)
 622         self.p.i_data, self.n.o_data = self.new_specs(None)
 623         self.i, self.o = self.p.i_data, self.n.o_data
 624
 625     # next 3 functions are Stage API Compliance
 626     def setup(self, m, i):
 627         pass
 628
 629     def ispec(self):
 630         return FetchInput()
 631
 632     def ospec(self):
 633         return FetchOutput()
 634
 635     def elaborate(self, platform):
 636         """fetch FSM
 637
 638         this FSM performs fetch of raw instruction data, partial-decodes
 639         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 640         read a 2nd 32-bit quantity if that occurs.
 641         """
 642         m = super().elaborate(platform)
 643
 644         dbg = self.dbg
 645         core = self.core
 646         pc = self.i.pc
 647         msr = self.i.msr
 648         svstate = self.svstate
 649         nia = self.nia
 650         is_svp64_mode = self.is_svp64_mode
 651         fetch_pc_o_ready = self.p.o_ready
 652         fetch_pc_i_valid = self.p.i_valid
 653         fetch_insn_o_valid = self.n.o_valid
 654         fetch_insn_i_ready = self.n.i_ready
 655
 656         comb = m.d.comb
 657         sync = m.d.sync
 658         pdecode2 = self.pdecode2
 659         cur_state = self.cur_state
 660         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 661
 662         # also note instruction fetch failed
 663         if hasattr(core, "icache"):
 664             fetch_failed = core.icache.i_out.fetch_failed
 665             flush_needed = True
 666         else:
 667             fetch_failed = Const(0, 1)
 668             flush_needed = False
 669
 670         with m.FSM(name='fetch_fsm'):
 671
 672             # waiting (zzz)
 673             with m.State("IDLE"):
 674                 with m.If(~dbg.stopping_o & ~fetch_failed):
 675                     comb += fetch_pc_o_ready.eq(1)
 676                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 677                     # instruction allowed to go: start by reading the PC
 678                     # capture the PC and also drop it into Insn Memory
 679                     # we have joined a pair of combinatorial memory
 680                     # lookups together.  this is Generally Bad.
 681                     comb += self.imem.a_pc_i.eq(pc)
 682                     comb += self.imem.a_i_valid.eq(1)
 683                     comb += self.imem.f_i_valid.eq(1)
 684                     sync += cur_state.pc.eq(pc)
 685                     sync += cur_state.svstate.eq(svstate)  # and svstate
 686                     sync += cur_state.msr.eq(msr)  # and msr
 687
 688                     m.next = "INSN_READ"  # move to "wait for bus" phase
 689
 690             # dummy pause to find out why simulation is not keeping up
 691             with m.State("INSN_READ"):
 692                 if self.allow_overlap:
 693                     stopping = dbg.stopping_o
 694                 else:
 695                     stopping = Const(0)
 696                 with m.If(stopping):
 697                     # stopping: jump back to idle
 698                     m.next = "IDLE"
 699                 with m.Else():
 700                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 701                         # busy but not fetch failed: stay in wait-read
 702                         comb += self.imem.a_i_valid.eq(1)
 703                         comb += self.imem.f_i_valid.eq(1)
 704                     with m.Else():
 705                         # not busy (or fetch failed!): instruction fetched
 706                         # when fetch failed, the instruction gets ignored
 707                         # by the decoder
 708                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 709                         if self.svp64_en:
 710                             svp64 = self.svp64
 711                             # decode the SVP64 prefix, if any
 712                             comb += svp64.raw_opcode_in.eq(insn)
 713                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 714                             # pass the decoded prefix (if any) to PowerDecoder2
 715                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 716                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 717                             # remember whether this is a prefixed instruction,
 718                             # so the FSM can readily loop when VL==0
 719                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 720                             # calculate the address of the following instruction
 721                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 722                             sync += nia.eq(cur_state.pc + insn_size)
 723                             with m.If(~svp64.is_svp64_mode):
 724                                 # with no prefix, store the instruction
 725                                 # and hand it directly to the next FSM
 726                                 sync += dec_opcode_o.eq(insn)
 727                                 m.next = "INSN_READY"
 728                             with m.Else():
 729                                 # fetch the rest of the instruction from memory
 730                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 731                                 comb += self.imem.a_i_valid.eq(1)
 732                                 comb += self.imem.f_i_valid.eq(1)
 733                                 m.next = "INSN_READ2"
 734                         else:
 735                             # not SVP64 - 32-bit only
 736                             sync += nia.eq(cur_state.pc + 4)
 737                             sync += dec_opcode_o.eq(insn)
 738                             m.next = "INSN_READY"
 739
 740             with m.State("INSN_READ2"):
 741                 with m.If(self.imem.f_busy_o):  # zzz...
 742                     # busy: stay in wait-read
 743                     comb += self.imem.a_i_valid.eq(1)
 744                     comb += self.imem.f_i_valid.eq(1)
 745                 with m.Else():
 746                     # not busy: instruction fetched
 747                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 748                     sync += dec_opcode_o.eq(insn)
 749                     m.next = "INSN_READY"
 750                     # TODO: probably can start looking at pdecode2.rm_dec
 751                     # here or maybe even in INSN_READ state, if svp64_mode
 752                     # detected, in order to trigger - and wait for - the
 753                     # predicate reading.
 754                     if self.svp64_en:
 755                         pmode = pdecode2.rm_dec.predmode
 756                     """
 757                     if pmode != SVP64PredMode.ALWAYS.value:
 758                         fire predicate loading FSM and wait before
 759                         moving to INSN_READY
 760                     else:
 761                         sync += self.srcmask.eq(-1) # set to all 1s
 762                         sync += self.dstmask.eq(-1) # set to all 1s
 763                         m.next = "INSN_READY"
 764                     """
 765
 766             with m.State("INSN_READY"):
 767                 # hand over the instruction, to be decoded
 768                 comb += fetch_insn_o_valid.eq(1)
 769                 with m.If(fetch_insn_i_ready):
 770                     m.next = "IDLE"
 771
 772         # whatever was done above, over-ride it if core reset is held
 773         with m.If(self.core_rst):
 774             sync += nia.eq(0)
 775
 776         return m
 777
 778
 779 class TestIssuerInternal(TestIssuerBase):
 780     """TestIssuer - reads instructions from TestMemory and issues them
 781
 782     efficiency and speed is not the main goal here: functional correctness
 783     and code clarity is.  optimisations (which almost 100% interfere with
 784     easy understanding) come later.
 785     """
 786
 787     def fetch_predicate_fsm(self, m,
 788                             pred_insn_i_valid, pred_insn_o_ready,
 789                             pred_mask_o_valid, pred_mask_i_ready):
 790         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 791            src/dest predicate masks
 792
 793         https://bugs.libre-soc.org/show_bug.cgi?id=617
 794         the predicates can be read here, by using IntRegs r_ports['pred']
 795         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 796         be done through multiple reads, extracting one relevant at a time.
 797         later, a faster way would be to use the 32-bit-wide CR port but
 798         this is more complex decoding, here.  equivalent code used in
 799         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 800
 801         note: this ENTIRE FSM is not to be called when svp64 is disabled
 802         """
 803         comb = m.d.comb
 804         sync = m.d.sync
 805         pdecode2 = self.pdecode2
 806         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 807         predmode = rm_dec.predmode
 808         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 809         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 810         # get src/dst step, so we can skip already used mask bits
 811         cur_state = self.cur_state
 812         srcstep = cur_state.svstate.srcstep
 813         dststep = cur_state.svstate.dststep
 814         cur_vl = cur_state.svstate.vl
 815
 816         # decode predicates
 817         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 818         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 819         sidx, scrinvert = get_predcr(m, srcpred, 's')
 820         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 821
 822         # store fetched masks, for either intpred or crpred
 823         # when src/dst step is not zero, the skipped mask bits need to be
 824         # shifted-out, before actually storing them in src/dest mask
 825         new_srcmask = Signal(64, reset_less=True)
 826         new_dstmask = Signal(64, reset_less=True)
 827
 828         with m.FSM(name="fetch_predicate"):
 829
 830             with m.State("FETCH_PRED_IDLE"):
 831                 comb += pred_insn_o_ready.eq(1)
 832                 with m.If(pred_insn_i_valid):
 833                     with m.If(predmode == SVP64PredMode.INT):
 834                         # skip fetching destination mask register, when zero
 835                         with m.If(dall1s):
 836                             sync += new_dstmask.eq(-1)
 837                             # directly go to fetch source mask register
 838                             # guaranteed not to be zero (otherwise predmode
 839                             # would be SVP64PredMode.ALWAYS, not INT)
 840                             comb += int_pred.addr.eq(sregread)
 841                             comb += int_pred.ren.eq(1)
 842                             m.next = "INT_SRC_READ"
 843                         # fetch destination predicate register
 844                         with m.Else():
 845                             comb += int_pred.addr.eq(dregread)
 846                             comb += int_pred.ren.eq(1)
 847                             m.next = "INT_DST_READ"
 848                     with m.Elif(predmode == SVP64PredMode.CR):
 849                         # go fetch masks from the CR register file
 850                         sync += new_srcmask.eq(0)
 851                         sync += new_dstmask.eq(0)
 852                         m.next = "CR_READ"
 853                     with m.Else():
 854                         sync += self.srcmask.eq(-1)
 855                         sync += self.dstmask.eq(-1)
 856                         m.next = "FETCH_PRED_DONE"
 857
 858             with m.State("INT_DST_READ"):
 859                 # store destination mask
 860                 inv = Repl(dinvert, 64)
 861                 with m.If(dunary):
 862                     # set selected mask bit for 1<<r3 mode
 863                     dst_shift = Signal(range(64))
 864                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 865                     sync += new_dstmask.eq(1 << dst_shift)
 866                 with m.Else():
 867                     # invert mask if requested
 868                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 869                 # skip fetching source mask register, when zero
 870                 with m.If(sall1s):
 871                     sync += new_srcmask.eq(-1)
 872                     m.next = "FETCH_PRED_SHIFT_MASK"
 873                 # fetch source predicate register
 874                 with m.Else():
 875                     comb += int_pred.addr.eq(sregread)
 876                     comb += int_pred.ren.eq(1)
 877                     m.next = "INT_SRC_READ"
 878
 879             with m.State("INT_SRC_READ"):
 880                 # store source mask
 881                 inv = Repl(sinvert, 64)
 882                 with m.If(sunary):
 883                     # set selected mask bit for 1<<r3 mode
 884                     src_shift = Signal(range(64))
 885                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 886                     sync += new_srcmask.eq(1 << src_shift)
 887                 with m.Else():
 888                     # invert mask if requested
 889                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 890                 m.next = "FETCH_PRED_SHIFT_MASK"
 891
 892             # fetch masks from the CR register file
 893             # implements the following loop:
 894             # idx, inv = get_predcr(mask)
 895             # mask = 0
 896             # for cr_idx in range(vl):
 897             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 898             #     if cr[idx] ^ inv:
 899             #         mask |= 1 << cr_idx
 900             # return mask
 901             with m.State("CR_READ"):
 902                 # CR index to be read, which will be ready by the next cycle
 903                 cr_idx = Signal.like(cur_vl, reset_less=True)
 904                 # submit the read operation to the regfile
 905                 with m.If(cr_idx != cur_vl):
 906                     # the CR read port is unary ...
 907                     # ren = 1 << cr_idx
 908                     # ... in MSB0 convention ...
 909                     # ren = 1 << (7 - cr_idx)
 910                     # ... and with an offset:
 911                     # ren = 1 << (7 - off - cr_idx)
 912                     idx = SVP64CROffs.CRPred + cr_idx
 913                     comb += cr_pred.ren.eq(1 << (7 - idx))
 914                     # signal data valid in the next cycle
 915                     cr_read = Signal(reset_less=True)
 916                     sync += cr_read.eq(1)
 917                     # load the next index
 918                     sync += cr_idx.eq(cr_idx + 1)
 919                 with m.Else():
 920                     # exit on loop end
 921                     sync += cr_read.eq(0)
 922                     sync += cr_idx.eq(0)
 923                     m.next = "FETCH_PRED_SHIFT_MASK"
 924                 with m.If(cr_read):
 925                     # compensate for the one cycle delay on the regfile
 926                     cur_cr_idx = Signal.like(cur_vl)
 927                     comb += cur_cr_idx.eq(cr_idx - 1)
 928                     # read the CR field, select the appropriate bit
 929                     cr_field = Signal(4)
 930                     scr_bit = Signal()
 931                     dcr_bit = Signal()
 932                     comb += cr_field.eq(cr_pred.o_data)
 933                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 934                                        ^ scrinvert)
 935                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 936                                        ^ dcrinvert)
 937                     # set the corresponding mask bit
 938                     bit_to_set = Signal.like(self.srcmask)
 939                     comb += bit_to_set.eq(1 << cur_cr_idx)
 940                     with m.If(scr_bit):
 941                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 942                     with m.If(dcr_bit):
 943                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 944
 945             with m.State("FETCH_PRED_SHIFT_MASK"):
 946                 # shift-out skipped mask bits
 947                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 948                 sync += self.dstmask.eq(new_dstmask >> dststep)
 949                 m.next = "FETCH_PRED_DONE"
 950
 951             with m.State("FETCH_PRED_DONE"):
 952                 comb += pred_mask_o_valid.eq(1)
 953                 with m.If(pred_mask_i_ready):
 954                     m.next = "FETCH_PRED_IDLE"
 955
 956     def issue_fsm(self, m, core, nia,
 957                   dbg, core_rst, is_svp64_mode,
 958                   fetch_pc_o_ready, fetch_pc_i_valid,
 959                   fetch_insn_o_valid, fetch_insn_i_ready,
 960                   pred_insn_i_valid, pred_insn_o_ready,
 961                   pred_mask_o_valid, pred_mask_i_ready,
 962                   exec_insn_i_valid, exec_insn_o_ready,
 963                   exec_pc_o_valid, exec_pc_i_ready):
 964         """issue FSM
 965
 966         decode / issue FSM.  this interacts with the "fetch" FSM
 967         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 968         (outgoing). also interacts with the "execute" FSM
 969         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 970         (incoming).
 971         SVP64 RM prefixes have already been set up by the
 972         "fetch" phase, so execute is fairly straightforward.
 973         """
 974
 975         comb = m.d.comb
 976         sync = m.d.sync
 977         pdecode2 = self.pdecode2
 978         cur_state = self.cur_state
 979
 980         # temporaries
 981         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 982
 983         # for updating svstate (things like srcstep etc.)
 984         update_svstate = Signal()  # set this (below) if updating
 985         new_svstate = SVSTATERec("new_svstate")
 986         comb += new_svstate.eq(cur_state.svstate)
 987
 988         # precalculate srcstep+1 and dststep+1
 989         cur_srcstep = cur_state.svstate.srcstep
 990         cur_dststep = cur_state.svstate.dststep
 991         next_srcstep = Signal.like(cur_srcstep)
 992         next_dststep = Signal.like(cur_dststep)
 993         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 994         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 995
 996         # note if an exception happened.  in a pipelined or OoO design
 997         # this needs to be accompanied by "shadowing" (or stalling)
 998         exc_happened = self.core.o.exc_happened
 999         # also note instruction fetch failed
1000         if hasattr(core, "icache"):
1001             fetch_failed = core.icache.i_out.fetch_failed
1002             flush_needed = True
1003             # set to fault in decoder
1004             # update (highest priority) instruction fault
1005             rising_fetch_failed = rising_edge(m, fetch_failed)
1006             with m.If(rising_fetch_failed):
1007                 sync += pdecode2.instr_fault.eq(1)
1008         else:
1009             fetch_failed = Const(0, 1)
1010             flush_needed = False
1011
1012         with m.FSM(name="issue_fsm"):
1013
1014             # sync with the "fetch" phase which is reading the instruction
1015             # at this point, there is no instruction running, that
1016             # could inadvertently update the PC.
1017             with m.State("ISSUE_START"):
1018                 # reset instruction fault
1019                 sync += pdecode2.instr_fault.eq(0)
1020                 # wait on "core stop" release, before next fetch
1021                 # need to do this here, in case we are in a VL==0 loop
1022                 with m.If(~dbg.core_stop_o & ~core_rst):
1023                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
1024                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
1025                         m.next = "INSN_WAIT"
1026                 with m.Else():
1027                     # tell core it's stopped, and acknowledge debug handshake
1028                     comb += dbg.core_stopped_i.eq(1)
1029                     # while stopped, allow updating the MSR, PC and SVSTATE
1030                     with m.If(self.pc_i.ok):
1031                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1032                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
1033                         sync += self.pc_changed.eq(1)
1034                     with m.If(self.msr_i.ok):
1035                         comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
1036                         comb += self.state_w_msr.i_data.eq(self.msr_i.data)
1037                         sync += self.msr_changed.eq(1)
1038                     with m.If(self.svstate_i.ok):
1039                         comb += new_svstate.eq(self.svstate_i.data)
1040                         comb += update_svstate.eq(1)
1041                         sync += self.sv_changed.eq(1)
1042
1043             # wait for an instruction to arrive from Fetch
1044             with m.State("INSN_WAIT"):
1045                 if self.allow_overlap:
1046                     stopping = dbg.stopping_o
1047                 else:
1048                     stopping = Const(0)
1049                 with m.If(stopping):
1050                     # stopping: jump back to idle
1051                     m.next = "ISSUE_START"
1052                     if flush_needed:
1053                         # request the icache to stop asserting "failed"
1054                         comb += core.icache.flush_in.eq(1)
1055                     # stop instruction fault
1056                     sync += pdecode2.instr_fault.eq(0)
1057                 with m.Else():
1058                     comb += fetch_insn_i_ready.eq(1)
1059                     with m.If(fetch_insn_o_valid):
1060                         # loop into ISSUE_START if it's a SVP64 instruction
1061                         # and VL == 0.  this because VL==0 is a for-loop
1062                         # from 0 to 0 i.e. always, always a NOP.
1063                         cur_vl = cur_state.svstate.vl
1064                         with m.If(is_svp64_mode & (cur_vl == 0)):
1065                             # update the PC before fetching the next instruction
1066                             # since we are in a VL==0 loop, no instruction was
1067                             # executed that we could be overwriting
1068                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1069                             comb += self.state_w_pc.i_data.eq(nia)
1070                             comb += self.insn_done.eq(1)
1071                             m.next = "ISSUE_START"
1072                         with m.Else():
1073                             if self.svp64_en:
1074                                 m.next = "PRED_START"  # fetching predicate
1075                             else:
1076                                 m.next = "DECODE_SV"  # skip predication
1077
1078             with m.State("PRED_START"):
1079                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
1080                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
1081                     m.next = "MASK_WAIT"
1082
1083             with m.State("MASK_WAIT"):
1084                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
1085                 with m.If(pred_mask_o_valid):  # predication masks are ready
1086                     m.next = "PRED_SKIP"
1087
1088             # skip zeros in predicate
1089             with m.State("PRED_SKIP"):
1090                 with m.If(~is_svp64_mode):
1091                     m.next = "DECODE_SV"  # nothing to do
1092                 with m.Else():
1093                     if self.svp64_en:
1094                         pred_src_zero = pdecode2.rm_dec.pred_sz
1095                         pred_dst_zero = pdecode2.rm_dec.pred_dz
1096
1097                         # new srcstep, after skipping zeros
1098                         skip_srcstep = Signal.like(cur_srcstep)
1099                         # value to be added to the current srcstep
1100                         src_delta = Signal.like(cur_srcstep)
1101                         # add leading zeros to srcstep, if not in zero mode
1102                         with m.If(~pred_src_zero):
1103                             # priority encoder (count leading zeros)
1104                             # append guard bit, in case the mask is all zeros
1105                             pri_enc_src = PriorityEncoder(65)
1106                             m.submodules.pri_enc_src = pri_enc_src
1107                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
1108                                                          Const(1, 1)))
1109                             comb += src_delta.eq(pri_enc_src.o)
1110                         # apply delta to srcstep
1111                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
1112                         # shift-out all leading zeros from the mask
1113                         # plus the leading "one" bit
1114                         # TODO count leading zeros and shift-out the zero
1115                         #      bits, in the same step, in hardware
1116                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1117
1118                         # same as above, but for dststep
1119                         skip_dststep = Signal.like(cur_dststep)
1120                         dst_delta = Signal.like(cur_dststep)
1121                         with m.If(~pred_dst_zero):
1122                             pri_enc_dst = PriorityEncoder(65)
1123                             m.submodules.pri_enc_dst = pri_enc_dst
1124                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1125                                                          Const(1, 1)))
1126                             comb += dst_delta.eq(pri_enc_dst.o)
1127                         comb += skip_dststep.eq(cur_dststep + dst_delta)
1128                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1129
1130                         # TODO: initialize mask[VL]=1 to avoid passing past VL
1131                         with m.If((skip_srcstep >= cur_vl) |
1132                                   (skip_dststep >= cur_vl)):
1133                             # end of VL loop. Update PC and reset src/dst step
1134                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1135                             comb += self.state_w_pc.i_data.eq(nia)
1136                             comb += new_svstate.srcstep.eq(0)
1137                             comb += new_svstate.dststep.eq(0)
1138                             comb += update_svstate.eq(1)
1139                             # synchronize with the simulator
1140                             comb += self.insn_done.eq(1)
1141                             # go back to Issue
1142                             m.next = "ISSUE_START"
1143                         with m.Else():
1144                             # update new src/dst step
1145                             comb += new_svstate.srcstep.eq(skip_srcstep)
1146                             comb += new_svstate.dststep.eq(skip_dststep)
1147                             comb += update_svstate.eq(1)
1148                             # proceed to Decode
1149                             m.next = "DECODE_SV"
1150
1151                         # pass predicate mask bits through to satellite decoders
1152                         # TODO: for SIMD this will be *multiple* bits
1153                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1154                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1155
1156             # after src/dst step have been updated, we are ready
1157             # to decode the instruction
1158             with m.State("DECODE_SV"):
1159                 # decode the instruction
1160                 with m.If(~fetch_failed):
1161                     sync += pdecode2.instr_fault.eq(0)
1162                 sync += core.i.e.eq(pdecode2.e)
1163                 sync += core.i.state.eq(cur_state)
1164                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1165                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1166                 if self.svp64_en:
1167                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1168                     # set RA_OR_ZERO detection in satellite decoders
1169                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1170                     # and svp64 detection
1171                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1172                     # and svp64 bit-rev'd ldst mode
1173                     ldst_dec = pdecode2.use_svp64_ldst_dec
1174                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1175                 # after decoding, reset any previous exception condition,
1176                 # allowing it to be set again during the next execution
1177                 sync += pdecode2.ldst_exc.eq(0)
1178
1179                 m.next = "INSN_EXECUTE"  # move to "execute"
1180
1181             # handshake with execution FSM, move to "wait" once acknowledged
1182             with m.State("INSN_EXECUTE"):
1183                 comb += exec_insn_i_valid.eq(1)  # trigger execute
1184                 with m.If(exec_insn_o_ready):   # execute acknowledged us
1185                     m.next = "EXECUTE_WAIT"
1186
1187             with m.State("EXECUTE_WAIT"):
1188                 # wait on "core stop" release, at instruction end
1189                 # need to do this here, in case we are in a VL>1 loop
1190                 with m.If(~dbg.core_stop_o & ~core_rst):
1191                     comb += exec_pc_i_ready.eq(1)
1192                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1193                     # the exception info needs to be blatted into
1194                     # pdecode.ldst_exc, and the instruction "re-run".
1195                     # when ldst_exc.happened is set, the PowerDecoder2
1196                     # reacts very differently: it re-writes the instruction
1197                     # with a "trap" (calls PowerDecoder2.trap()) which
1198                     # will *overwrite* whatever was requested and jump the
1199                     # PC to the exception address, as well as alter MSR.
1200                     # nothing else needs to be done other than to note
1201                     # the change of PC and MSR (and, later, SVSTATE)
1202                     with m.If(exc_happened):
1203                         mmu = core.fus.get_exc("mmu0")
1204                         ldst = core.fus.get_exc("ldst0")
1205                         if mmu is not None:
1206                             with m.If(fetch_failed):
1207                                 # instruction fetch: exception is from MMU
1208                                 # reset instr_fault (highest priority)
1209                                 sync += pdecode2.ldst_exc.eq(mmu)
1210                                 sync += pdecode2.instr_fault.eq(0)
1211                                 if flush_needed:
1212                                     # request icache to stop asserting "failed"
1213                                     comb += core.icache.flush_in.eq(1)
1214                         with m.If(~fetch_failed):
1215                             # otherwise assume it was a LDST exception
1216                             sync += pdecode2.ldst_exc.eq(ldst)
1217
1218                     with m.If(exec_pc_o_valid):
1219
1220                         # was this the last loop iteration?
1221                         is_last = Signal()
1222                         cur_vl = cur_state.svstate.vl
1223                         comb += is_last.eq(next_srcstep == cur_vl)
1224
1225                         # return directly to Decode if Execute generated an
1226                         # exception.
1227                         with m.If(pdecode2.ldst_exc.happened):
1228                             m.next = "DECODE_SV"
1229
1230                         # if MSR, PC or SVSTATE were changed by the previous
1231                         # instruction, go directly back to Fetch, without
1232                         # updating either MSR PC or SVSTATE
1233                         with m.Elif(self.msr_changed | self.pc_changed |
1234                                     self.sv_changed):
1235                             m.next = "ISSUE_START"
1236
1237                         # also return to Fetch, when no output was a vector
1238                         # (regardless of SRCSTEP and VL), or when the last
1239                         # instruction was really the last one of the VL loop
1240                         with m.Elif((~pdecode2.loop_continue) | is_last):
1241                             # before going back to fetch, update the PC state
1242                             # register with the NIA.
1243                             # ok here we are not reading the branch unit.
1244                             # TODO: this just blithely overwrites whatever
1245                             #       pipeline updated the PC
1246                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1247                             comb += self.state_w_pc.i_data.eq(nia)
1248                             # reset SRCSTEP before returning to Fetch
1249                             if self.svp64_en:
1250                                 with m.If(pdecode2.loop_continue):
1251                                     comb += new_svstate.srcstep.eq(0)
1252                                     comb += new_svstate.dststep.eq(0)
1253                                     comb += update_svstate.eq(1)
1254                             else:
1255                                 comb += new_svstate.srcstep.eq(0)
1256                                 comb += new_svstate.dststep.eq(0)
1257                                 comb += update_svstate.eq(1)
1258                             m.next = "ISSUE_START"
1259
1260                         # returning to Execute? then, first update SRCSTEP
1261                         with m.Else():
1262                             comb += new_svstate.srcstep.eq(next_srcstep)
1263                             comb += new_svstate.dststep.eq(next_dststep)
1264                             comb += update_svstate.eq(1)
1265                             # return to mask skip loop
1266                             m.next = "PRED_SKIP"
1267
1268                 with m.Else():
1269                     comb += dbg.core_stopped_i.eq(1)
1270                     if flush_needed:
1271                         # request the icache to stop asserting "failed"
1272                         comb += core.icache.flush_in.eq(1)
1273                     # stop instruction fault
1274                     sync += pdecode2.instr_fault.eq(0)
1275                     if flush_needed:
1276                         # request the icache to stop asserting "failed"
1277                         comb += core.icache.flush_in.eq(1)
1278                     # stop instruction fault
1279                     sync += pdecode2.instr_fault.eq(0)
1280                     # while stopped, allow updating the MSR, PC and SVSTATE
1281                     with m.If(self.msr_i.ok):
1282                         comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
1283                         comb += self.state_w_msr.i_data.eq(self.msr_i.data)
1284                         sync += self.msr_changed.eq(1)
1285                     with m.If(self.pc_i.ok):
1286                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1287                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
1288                         sync += self.pc_changed.eq(1)
1289                     with m.If(self.svstate_i.ok):
1290                         comb += new_svstate.eq(self.svstate_i.data)
1291                         comb += update_svstate.eq(1)
1292                         sync += self.sv_changed.eq(1)
1293
1294         # check if svstate needs updating: if so, write it to State Regfile
1295         with m.If(update_svstate):
1296             comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
1297             comb += self.state_w_sv.i_data.eq(new_svstate)
1298             sync += cur_state.svstate.eq(new_svstate)  # for next clock
1299
1300     def execute_fsm(self, m, core,
1301                     exec_insn_i_valid, exec_insn_o_ready,
1302                     exec_pc_o_valid, exec_pc_i_ready):
1303         """execute FSM
1304
1305         execute FSM. this interacts with the "issue" FSM
1306         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1307         (outgoing). SVP64 RM prefixes have already been set up by the
1308         "issue" phase, so execute is fairly straightforward.
1309         """
1310
1311         comb = m.d.comb
1312         sync = m.d.sync
1313         pdecode2 = self.pdecode2
1314
1315         # temporaries
1316         core_busy_o = core.n.o_data.busy_o  # core is busy
1317         core_ivalid_i = core.p.i_valid              # instruction is valid
1318
1319         if hasattr(core, "icache"):
1320             fetch_failed = core.icache.i_out.fetch_failed
1321         else:
1322             fetch_failed = Const(0, 1)
1323
1324         with m.FSM(name="exec_fsm"):
1325
1326             # waiting for instruction bus (stays there until not busy)
1327             with m.State("INSN_START"):
1328                 comb += exec_insn_o_ready.eq(1)
1329                 with m.If(exec_insn_i_valid):
1330                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1331                     sync += self.sv_changed.eq(0)
1332                     sync += self.pc_changed.eq(0)
1333                     sync += self.msr_changed.eq(0)
1334                     with m.If(core.p.o_ready):  # only move if accepted
1335                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1336
1337             # instruction started: must wait till it finishes
1338             with m.State("INSN_ACTIVE"):
1339                 # note changes to MSR, PC and SVSTATE
1340                 # XXX oops, really must monitor *all* State Regfile write
1341                 # ports looking for changes!
1342                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1343                     sync += self.sv_changed.eq(1)
1344                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1345                     sync += self.msr_changed.eq(1)
1346                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1347                     sync += self.pc_changed.eq(1)
1348                 with m.If(~core_busy_o):  # instruction done!
1349                     comb += exec_pc_o_valid.eq(1)
1350                     with m.If(exec_pc_i_ready):
1351                         # when finished, indicate "done".
1352                         # however, if there was an exception, the instruction
1353                         # is *not* yet done.  this is an implementation
1354                         # detail: we choose to implement exceptions by
1355                         # taking the exception information from the LDST
1356                         # unit, putting that *back* into the PowerDecoder2,
1357                         # and *re-running the entire instruction*.
1358                         # if we erroneously indicate "done" here, it is as if
1359                         # there were *TWO* instructions:
1360                         # 1) the failed LDST 2) a TRAP.
1361                         with m.If(~pdecode2.ldst_exc.happened &
1362                                   ~fetch_failed):
1363                             comb += self.insn_done.eq(1)
1364                         m.next = "INSN_START"  # back to fetch
1365
1366     def elaborate(self, platform):
1367         m = super().elaborate(platform)
1368         # convenience
1369         comb, sync = m.d.comb, m.d.sync
1370         cur_state = self.cur_state
1371         pdecode2 = self.pdecode2
1372         dbg = self.dbg
1373         core = self.core
1374
1375         # set up peripherals and core
1376         core_rst = self.core_rst
1377
1378         # indicate to outside world if any FU is still executing
1379         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1380
1381         # address of the next instruction, in the absence of a branch
1382         # depends on the instruction size
1383         nia = Signal(64)
1384
1385         # connect up debug signals
1386         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1387
1388         # pass the prefix mode from Fetch to Issue, so the latter can loop
1389         # on VL==0
1390         is_svp64_mode = Signal()
1391
1392         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1393         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1394         # these are the handshake signals between each
1395
1396         # fetch FSM can run as soon as the PC is valid
1397         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1398         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1399
1400         # fetch FSM hands over the instruction to be decoded / issued
1401         fetch_insn_o_valid = Signal()
1402         fetch_insn_i_ready = Signal()
1403
1404         # predicate fetch FSM decodes and fetches the predicate
1405         pred_insn_i_valid = Signal()
1406         pred_insn_o_ready = Signal()
1407
1408         # predicate fetch FSM delivers the masks
1409         pred_mask_o_valid = Signal()
1410         pred_mask_i_ready = Signal()
1411
1412         # issue FSM delivers the instruction to the be executed
1413         exec_insn_i_valid = Signal()
1414         exec_insn_o_ready = Signal()
1415
1416         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1417         exec_pc_o_valid = Signal()
1418         exec_pc_i_ready = Signal()
1419
1420         # the FSMs here are perhaps unusual in that they detect conditions
1421         # then "hold" information, combinatorially, for the core
1422         # (as opposed to using sync - which would be on a clock's delay)
1423         # this includes the actual opcode, valid flags and so on.
1424
1425         # Fetch, then predicate fetch, then Issue, then Execute.
1426         # Issue is where the VL for-loop # lives.  the ready/valid
1427         # signalling is used to communicate between the four.
1428
1429         # set up Fetch FSM
1430         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1431                          self.imem, core_rst, pdecode2, cur_state,
1432                          dbg, core,
1433                          dbg.state.svstate, # combinatorially same
1434                          nia, is_svp64_mode)
1435         m.submodules.fetch = fetch
1436         # connect up in/out data to existing Signals
1437         comb += fetch.p.i_data.pc.eq(dbg.state.pc)   # combinatorially same
1438         comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
1439         # and the ready/valid signalling
1440         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1441         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1442         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1443         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1444
1445         self.issue_fsm(m, core, nia,
1446                        dbg, core_rst, is_svp64_mode,
1447                        fetch_pc_o_ready, fetch_pc_i_valid,
1448                        fetch_insn_o_valid, fetch_insn_i_ready,
1449                        pred_insn_i_valid, pred_insn_o_ready,
1450                        pred_mask_o_valid, pred_mask_i_ready,
1451                        exec_insn_i_valid, exec_insn_o_ready,
1452                        exec_pc_o_valid, exec_pc_i_ready)
1453
1454         if self.svp64_en:
1455             self.fetch_predicate_fsm(m,
1456                                      pred_insn_i_valid, pred_insn_o_ready,
1457                                      pred_mask_o_valid, pred_mask_i_ready)
1458
1459         self.execute_fsm(m, core,
1460                          exec_insn_i_valid, exec_insn_o_ready,
1461                          exec_pc_o_valid, exec_pc_i_ready)
1462
1463         return m
1464
1465
1466 class TestIssuer(Elaboratable):
1467     def __init__(self, pspec):
1468         #self.ti = TestIssuerInternal(pspec)
1469         from soc.simple.inorder import TestIssuerInternalInOrder
1470         self.ti = TestIssuerInternalInOrder(pspec)
1471         self.pll = DummyPLL(instance=True)
1472
1473         # PLL direct clock or not
1474         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1475         if self.pll_en:
1476             self.pll_test_o = Signal(reset_less=True)
1477             self.pll_vco_o = Signal(reset_less=True)
1478             self.clk_sel_i = Signal(2, reset_less=True)
1479             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1480             self.pllclk_clk = ClockSignal("pllclk")
1481
1482     def elaborate(self, platform):
1483         m = Module()
1484         comb = m.d.comb
1485
1486         # TestIssuer nominally runs at main clock, actually it is
1487         # all combinatorial internally except for coresync'd components
1488         m.submodules.ti = ti = self.ti
1489
1490         if self.pll_en:
1491             # ClockSelect runs at PLL output internal clock rate
1492             m.submodules.wrappll = pll = self.pll
1493
1494             # add clock domains from PLL
1495             cd_pll = ClockDomain("pllclk")
1496             m.domains += cd_pll
1497
1498             # PLL clock established.  has the side-effect of running clklsel
1499             # at the PLL's speed (see DomainRenamer("pllclk") above)
1500             pllclk = self.pllclk_clk
1501             comb += pllclk.eq(pll.clk_pll_o)
1502
1503             # wire up external 24mhz to PLL
1504             #comb += pll.clk_24_i.eq(self.ref_clk)
1505             # output 18 mhz PLL test signal, and analog oscillator out
1506             comb += self.pll_test_o.eq(pll.pll_test_o)
1507             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1508
1509             # input to pll clock selection
1510             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1511
1512             # now wire up ResetSignals.  don't mind them being in this domain
1513             pll_rst = ResetSignal("pllclk")
1514             comb += pll_rst.eq(ResetSignal())
1515
1516         # internal clock is set to selector clock-out.  has the side-effect of
1517         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1518         # debug clock runs at coresync internal clock
1519         cd_coresync = ClockDomain("coresync")
1520         #m.domains += cd_coresync
1521         if self.ti.dbg_domain != 'sync':
1522             cd_dbgsync = ClockDomain("dbgsync")
1523             #m.domains += cd_dbgsync
1524         intclk = ClockSignal("coresync")
1525         dbgclk = ClockSignal(self.ti.dbg_domain)
1526         # XXX BYPASS PLL XXX
1527         # XXX BYPASS PLL XXX
1528         # XXX BYPASS PLL XXX
1529         if self.pll_en:
1530             comb += intclk.eq(self.ref_clk)
1531         else:
1532             comb += intclk.eq(ClockSignal())
1533         if self.ti.dbg_domain != 'sync':
1534             dbgclk = ClockSignal(self.ti.dbg_domain)
1535             comb += dbgclk.eq(intclk)
1536
1537         return m
1538
1539     def ports(self):
1540         return list(self.ti.ports()) + list(self.pll.ports()) + \
1541             [ClockSignal(), ResetSignal()]
1542
1543     def external_ports(self):
1544         ports = self.ti.external_ports()
1545         ports.append(ClockSignal())
1546         ports.append(ResetSignal())
1547         if self.pll_en:
1548             ports.append(self.clk_sel_i)
1549             ports.append(self.pll.clk_24_i)
1550             ports.append(self.pll_test_o)
1551             ports.append(self.pll_vco_o)
1552             ports.append(self.pllclk_clk)
1553             ports.append(self.ref_clk)
1554         return ports
1555
1556
1557 if __name__ == '__main__':
1558     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1559              'spr': 1,
1560              'div': 1,
1561              'mul': 1,
1562              'shiftrot': 1
1563              }
1564     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1565                          imem_ifacetype='bare_wb',
1566                          addr_wid=48,
1567                          mask_wid=8,
1568                          reg_wid=64,
1569                          units=units)
1570     dut = TestIssuer(pspec)
1571     vl = main(dut, ports=dut.ports(), name="test_issuer")
1572
1573     if len(sys.argv) == 1:
1574         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1575         with open("test_issuer.il", "w") as f:
1576             f.write(vl)