src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs, MSR)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the {insert state variable here}
  70     res_ok_delay = Signal(name="%s_ok_delay" % name)
  71     with m.If(~core_rst):
  72         sync += res_ok_delay.eq(~state_i.ok)
  73         with m.If(state_i.ok):
  74             # incoming override (start from pc_i)
  75             comb += res.eq(state_i.data)
  76         with m.Else():
  77             # otherwise read StateRegs regfile for {insert state here}...
  78             comb += regfile.ren.eq(1 << regnum)
  79         # ... but on a 1-clock delay
  80         with m.If(res_ok_delay):
  81             comb += res.eq(regfile.o_data)
  82
  83
  84 def get_predint(m, mask, name):
  85     """decode SVP64 predicate integer mask field to reg number and invert
  86     this is identical to the equivalent function in ISACaller except that
  87     it doesn't read the INT directly, it just decodes "what needs to be done"
  88     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  89
  90     * all1s is set to indicate that no mask is to be applied.
  91     * regread indicates the GPR register number to be read
  92     * invert is set to indicate that the register value is to be inverted
  93     * unary indicates that the contents of the register is to be shifted 1<<r3
  94     """
  95     comb = m.d.comb
  96     regread = Signal(5, name=name+"regread")
  97     invert = Signal(name=name+"invert")
  98     unary = Signal(name=name+"unary")
  99     all1s = Signal(name=name+"all1s")
 100     with m.Switch(mask):
 101         with m.Case(SVP64PredInt.ALWAYS.value):
 102             comb += all1s.eq(1)      # use 0b1111 (all ones)
 103         with m.Case(SVP64PredInt.R3_UNARY.value):
 104             comb += regread.eq(3)
 105             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 106         with m.Case(SVP64PredInt.R3.value):
 107             comb += regread.eq(3)
 108         with m.Case(SVP64PredInt.R3_N.value):
 109             comb += regread.eq(3)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R10.value):
 112             comb += regread.eq(10)
 113         with m.Case(SVP64PredInt.R10_N.value):
 114             comb += regread.eq(10)
 115             comb += invert.eq(1)
 116         with m.Case(SVP64PredInt.R30.value):
 117             comb += regread.eq(30)
 118         with m.Case(SVP64PredInt.R30_N.value):
 119             comb += regread.eq(30)
 120             comb += invert.eq(1)
 121     return regread, invert, unary, all1s
 122
 123
 124 def get_predcr(m, mask, name):
 125     """decode SVP64 predicate CR to reg number field and invert status
 126     this is identical to _get_predcr in ISACaller
 127     """
 128     comb = m.d.comb
 129     idx = Signal(2, name=name+"idx")
 130     invert = Signal(name=name+"crinvert")
 131     with m.Switch(mask):
 132         with m.Case(SVP64PredCR.LT.value):
 133             comb += idx.eq(CR.LT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.GE.value):
 136             comb += idx.eq(CR.LT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.GT.value):
 139             comb += idx.eq(CR.GT)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.LE.value):
 142             comb += idx.eq(CR.GT)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.EQ.value):
 145             comb += idx.eq(CR.EQ)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NE.value):
 148             comb += idx.eq(CR.EQ)
 149             comb += invert.eq(1)
 150         with m.Case(SVP64PredCR.SO.value):
 151             comb += idx.eq(CR.SO)
 152             comb += invert.eq(0)
 153         with m.Case(SVP64PredCR.NS.value):
 154             comb += idx.eq(CR.SO)
 155             comb += invert.eq(1)
 156     return idx, invert
 157
 158
 159 class TestIssuerBase(Elaboratable):
 160     """TestIssuerBase - common base class for Issuers
 161
 162     takes care of power-on reset, peripherals, debug, DEC/TB,
 163     and gets PC/MSR/SVSTATE from the State Regfile etc.
 164     """
 165
 166     def __init__(self, pspec):
 167
 168         # test is SVP64 is to be enabled
 169         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 170
 171         # and if regfiles are reduced
 172         self.regreduce_en = (hasattr(pspec, "regreduce") and
 173                              (pspec.regreduce == True))
 174
 175         # and if overlap requested
 176         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 177                               (pspec.allow_overlap == True))
 178
 179         # JTAG interface.  add this right at the start because if it's
 180         # added it *modifies* the pspec, by adding enable/disable signals
 181         # for parts of the rest of the core
 182         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 183         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 184         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 185         if self.jtag_en:
 186             # XXX MUST keep this up-to-date with litex, and
 187             # soc-cocotb-sim, and err.. all needs sorting out, argh
 188             subset = ['uart',
 189                       'mtwi',
 190                       'eint', 'gpio', 'mspi0',
 191                       # 'mspi1', - disabled for now
 192                       # 'pwm', 'sd0', - disabled for now
 193                       'sdr']
 194             self.jtag = JTAG(get_pinspecs(subset=subset),
 195                              domain=self.dbg_domain)
 196             # add signals to pspec to enable/disable icache and dcache
 197             # (or data and intstruction wishbone if icache/dcache not included)
 198             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 199             # TODO: do we actually care if these are not domain-synchronised?
 200             # honestly probably not.
 201             pspec.wb_icache_en = self.jtag.wb_icache_en
 202             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 203             self.wb_sram_en = self.jtag.wb_sram_en
 204         else:
 205             self.wb_sram_en = Const(1)
 206
 207         # add 4k sram blocks?
 208         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 209                          pspec.sram4x4kblock == True)
 210         if self.sram4x4k:
 211             self.sram4k = []
 212             for i in range(4):
 213                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 214                                                     # features={'err'}
 215                                                     ))
 216
 217         # add interrupt controller?
 218         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 219         if self.xics:
 220             self.xics_icp = XICS_ICP()
 221             self.xics_ics = XICS_ICS()
 222             self.int_level_i = self.xics_ics.int_level_i
 223
 224         # add GPIO peripheral?
 225         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 226         if self.gpio:
 227             self.simple_gpio = SimpleGPIO()
 228             self.gpio_o = self.simple_gpio.gpio_o
 229
 230         # main instruction core.  suitable for prototyping / demo only
 231         self.core = core = NonProductionCore(pspec)
 232         self.core_rst = ResetSignal("coresync")
 233
 234         # instruction decoder.  goes into Trap Record
 235         #pdecode = create_pdecode()
 236         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 237         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 238                                      opkls=IssuerDecode2ToOperand,
 239                                      svp64_en=self.svp64_en,
 240                                      regreduce_en=self.regreduce_en)
 241         pdecode = self.pdecode2.dec
 242
 243         if self.svp64_en:
 244             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 245
 246         self.update_svstate = Signal()  # set this if updating svstate
 247         self.new_svstate = new_svstate = SVSTATERec("new_svstate")
 248
 249         # Test Instruction memory
 250         if hasattr(core, "icache"):
 251             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 252             # truly dreadful.  needs a huge reorg.
 253             pspec.icache = core.icache
 254         self.imem = ConfigFetchUnit(pspec).fu
 255
 256         # DMI interface
 257         self.dbg = CoreDebug()
 258
 259         # instruction go/monitor
 260         self.pc_o = Signal(64, reset_less=True)
 261         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 262         self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
 263         self.svstate_i = Data(64, "svstate_i")  # ditto
 264         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 265         self.busy_o = Signal(reset_less=True)
 266         self.memerr_o = Signal(reset_less=True)
 267
 268         # STATE regfile read /write ports for PC, MSR, SVSTATE
 269         staterf = self.core.regs.rf['state']
 270         self.state_r_msr = staterf.r_ports['msr']  # MSR rd
 271         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 272         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 273
 274         self.state_w_msr = staterf.w_ports['msr']  # MSR wr
 275         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 276         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 277
 278         # DMI interface access
 279         intrf = self.core.regs.rf['int']
 280         crrf = self.core.regs.rf['cr']
 281         xerrf = self.core.regs.rf['xer']
 282         self.int_r = intrf.r_ports['dmi']  # INT read
 283         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 284         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 285
 286         if self.svp64_en:
 287             # for predication
 288             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 289             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 290
 291         # hack method of keeping an eye on whether branch/trap set the PC
 292         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 293         self.state_nia.wen.name = 'state_nia_wen'
 294
 295         # pulse to synchronize the simulator at instruction end
 296         self.insn_done = Signal()
 297
 298         # indicate any instruction still outstanding, in execution
 299         self.any_busy = Signal()
 300
 301         if self.svp64_en:
 302             # store copies of predicate masks
 303             self.srcmask = Signal(64)
 304             self.dstmask = Signal(64)
 305
 306     def setup_peripherals(self, m):
 307         comb, sync = m.d.comb, m.d.sync
 308
 309         # okaaaay so the debug module must be in coresync clock domain
 310         # but NOT its reset signal. to cope with this, set every single
 311         # submodule explicitly in coresync domain, debug and JTAG
 312         # in their own one but using *external* reset.
 313         csd = DomainRenamer("coresync")
 314         dbd = DomainRenamer(self.dbg_domain)
 315
 316         m.submodules.core = core = csd(self.core)
 317         # this _so_ needs sorting out.  ICache is added down inside
 318         # LoadStore1 and is already a submodule of LoadStore1
 319         if not isinstance(self.imem, ICache):
 320             m.submodules.imem = imem = csd(self.imem)
 321         m.submodules.dbg = dbg = dbd(self.dbg)
 322         if self.jtag_en:
 323             m.submodules.jtag = jtag = dbd(self.jtag)
 324             # TODO: UART2GDB mux, here, from external pin
 325             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 326             sync += dbg.dmi.connect_to(jtag.dmi)
 327
 328         cur_state = self.cur_state
 329
 330         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 331         if self.sram4x4k:
 332             for i, sram in enumerate(self.sram4k):
 333                 m.submodules["sram4k_%d" % i] = csd(sram)
 334                 comb += sram.enable.eq(self.wb_sram_en)
 335
 336         # XICS interrupt handler
 337         if self.xics:
 338             m.submodules.xics_icp = icp = csd(self.xics_icp)
 339             m.submodules.xics_ics = ics = csd(self.xics_ics)
 340             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 341             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
 342
 343         # GPIO test peripheral
 344         if self.gpio:
 345             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 346
 347         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 348         # XXX causes litex ECP5 test to get wrong idea about input and output
 349         # (but works with verilator sim *sigh*)
 350         # if self.gpio and self.xics:
 351         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 352
 353         # instruction decoder
 354         pdecode = create_pdecode()
 355         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 356         if self.svp64_en:
 357             m.submodules.svp64 = svp64 = csd(self.svp64)
 358
 359         # convenience
 360         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 361         intrf = self.core.regs.rf['int']
 362
 363         # clock delay power-on reset
 364         cd_por = ClockDomain(reset_less=True)
 365         cd_sync = ClockDomain()
 366         core_sync = ClockDomain("coresync")
 367         m.domains += cd_por, cd_sync, core_sync
 368         if self.dbg_domain != "sync":
 369             dbg_sync = ClockDomain(self.dbg_domain)
 370             m.domains += dbg_sync
 371
 372         ti_rst = Signal(reset_less=True)
 373         delay = Signal(range(4), reset=3)
 374         with m.If(delay != 0):
 375             m.d.por += delay.eq(delay - 1)
 376         comb += cd_por.clk.eq(ClockSignal())
 377
 378         # power-on reset delay
 379         core_rst = ResetSignal("coresync")
 380         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 381         comb += core_rst.eq(ti_rst)
 382
 383         # debug clock is same as coresync, but reset is *main external*
 384         if self.dbg_domain != "sync":
 385             dbg_rst = ResetSignal(self.dbg_domain)
 386             comb += dbg_rst.eq(ResetSignal())
 387
 388         # busy/halted signals from core
 389         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
 390         comb += self.busy_o.eq(core_busy_o)
 391         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 392
 393         # temporary hack: says "go" immediately for both address gen and ST
 394         l0 = core.l0
 395         ldst = core.fus.fus['ldst0']
 396         st_go_edge = rising_edge(m, ldst.st.rel_o)
 397         # link addr-go direct to rel
 398         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
 399         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
 400
 401     def do_dmi(self, m, dbg):
 402         """deals with DMI debug requests
 403
 404         currently only provides read requests for the INT regfile, CR and XER
 405         it will later also deal with *writing* to these regfiles.
 406         """
 407         comb = m.d.comb
 408         sync = m.d.sync
 409         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 410         intrf = self.core.regs.rf['int']
 411
 412         with m.If(d_reg.req):  # request for regfile access being made
 413             # TODO: error-check this
 414             # XXX should this be combinatorial?  sync better?
 415             if intrf.unary:
 416                 comb += self.int_r.ren.eq(1 << d_reg.addr)
 417             else:
 418                 comb += self.int_r.addr.eq(d_reg.addr)
 419                 comb += self.int_r.ren.eq(1)
 420         d_reg_delay = Signal()
 421         sync += d_reg_delay.eq(d_reg.req)
 422         with m.If(d_reg_delay):
 423             # data arrives one clock later
 424             comb += d_reg.data.eq(self.int_r.o_data)
 425             comb += d_reg.ack.eq(1)
 426
 427         # sigh same thing for CR debug
 428         with m.If(d_cr.req):  # request for regfile access being made
 429             comb += self.cr_r.ren.eq(0b11111111)  # enable all
 430         d_cr_delay = Signal()
 431         sync += d_cr_delay.eq(d_cr.req)
 432         with m.If(d_cr_delay):
 433             # data arrives one clock later
 434             comb += d_cr.data.eq(self.cr_r.o_data)
 435             comb += d_cr.ack.eq(1)
 436
 437         # aaand XER...
 438         with m.If(d_xer.req):  # request for regfile access being made
 439             comb += self.xer_r.ren.eq(0b111111)  # enable all
 440         d_xer_delay = Signal()
 441         sync += d_xer_delay.eq(d_xer.req)
 442         with m.If(d_xer_delay):
 443             # data arrives one clock later
 444             comb += d_xer.data.eq(self.xer_r.o_data)
 445             comb += d_xer.ack.eq(1)
 446
 447     def tb_dec_fsm(self, m, spr_dec):
 448         """tb_dec_fsm
 449
 450         this is a FSM for updating either dec or tb.  it runs alternately
 451         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
 452         value to DEC, however the regfile has "passthrough" on it so this
 453         *should* be ok.
 454
 455         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
 456         """
 457
 458         comb, sync = m.d.comb, m.d.sync
 459         fast_rf = self.core.regs.rf['fast']
 460         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
 461         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
 462
 463         with m.FSM() as fsm:
 464
 465             # initiates read of current DEC
 466             with m.State("DEC_READ"):
 467                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
 468                 comb += fast_r_dectb.ren.eq(1)
 469                 m.next = "DEC_WRITE"
 470
 471             # waits for DEC read to arrive (1 cycle), updates with new value
 472             with m.State("DEC_WRITE"):
 473                 new_dec = Signal(64)
 474                 # TODO: MSR.LPCR 32-bit decrement mode
 475                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
 476                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
 477                 comb += fast_w_dectb.wen.eq(1)
 478                 comb += fast_w_dectb.i_data.eq(new_dec)
 479                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
 480                 m.next = "TB_READ"
 481
 482             # initiates read of current TB
 483             with m.State("TB_READ"):
 484                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
 485                 comb += fast_r_dectb.ren.eq(1)
 486                 m.next = "TB_WRITE"
 487
 488             # waits for read TB to arrive, initiates write of current TB
 489             with m.State("TB_WRITE"):
 490                 new_tb = Signal(64)
 491                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
 492                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
 493                 comb += fast_w_dectb.wen.eq(1)
 494                 comb += fast_w_dectb.i_data.eq(new_tb)
 495                 m.next = "DEC_READ"
 496
 497         return m
 498
 499     def elaborate(self, platform):
 500         m = Module()
 501         # convenience
 502         comb, sync = m.d.comb, m.d.sync
 503         cur_state = self.cur_state
 504         pdecode2 = self.pdecode2
 505         dbg = self.dbg
 506
 507         # set up peripherals and core
 508         core_rst = self.core_rst
 509         self.setup_peripherals(m)
 510
 511         # reset current state if core reset requested
 512         with m.If(core_rst):
 513             m.d.sync += self.cur_state.eq(0)
 514
 515         # PC and instruction from I-Memory
 516         comb += self.pc_o.eq(cur_state.pc)
 517         self.pc_changed = Signal()  # note write to PC
 518         self.msr_changed = Signal()  # note write to MSR
 519         self.sv_changed = Signal()  # note write to SVSTATE
 520
 521         # read state either from incoming override or from regfile
 522         state = CoreState("get")  # current state (MSR/PC/SVSTATE)
 523         state_get(m, state.msr, core_rst, self.msr_i,
 524                        "msr",                  # read MSR
 525                        self.state_r_msr, StateRegs.MSR)
 526         state_get(m, state.pc, core_rst, self.pc_i,
 527                        "pc",                  # read PC
 528                        self.state_r_pc, StateRegs.PC)
 529         state_get(m, state.svstate, core_rst, self.svstate_i,
 530                             "svstate",   # read SVSTATE
 531                             self.state_r_sv, StateRegs.SVSTATE)
 532
 533         # don't write pc every cycle
 534         comb += self.state_w_pc.wen.eq(0)
 535         comb += self.state_w_pc.i_data.eq(0)
 536
 537         # connect up debug state.  note "combinatorially same" below,
 538         # this is a bit naff, passing state over in the dbg class, but
 539         # because it is combinatorial it achieves the desired goal
 540         comb += dbg.state.eq(state)
 541
 542         # this bit doesn't have to be in the FSM: connect up to read
 543         # regfiles on demand from DMI
 544         self.do_dmi(m, dbg)
 545
 546         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
 547         # (which uses that in PowerDecoder2 to raise 0x900 exception)
 548         self.tb_dec_fsm(m, cur_state.dec)
 549
 550         # while stopped, allow updating the MSR, PC and SVSTATE.
 551         # these are mainly for debugging purposes (including DMI/JTAG)
 552         with m.If(dbg.core_stopped_i):
 553             with m.If(self.pc_i.ok):
 554                 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 555                 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 556                 sync += self.pc_changed.eq(1)
 557             with m.If(self.msr_i.ok):
 558                 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
 559                 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
 560                 sync += self.msr_changed.eq(1)
 561             with m.If(self.svstate_i.ok | self.update_svstate):
 562                 with m.If(self.svstate_i.ok): # over-ride from external source
 563                     comb += self.new_svstate.eq(self.svstate_i.data)
 564                 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 565                 comb += self.state_w_sv.i_data.eq(self.new_svstate)
 566                 sync += self.sv_changed.eq(1)
 567
 568         return m
 569
 570     def __iter__(self):
 571         yield from self.pc_i.ports()
 572         yield from self.msr_i.ports()
 573         yield self.pc_o
 574         yield self.memerr_o
 575         yield from self.core.ports()
 576         yield from self.imem.ports()
 577         yield self.core_bigendian_i
 578         yield self.busy_o
 579
 580     def ports(self):
 581         return list(self)
 582
 583     def external_ports(self):
 584         ports = self.pc_i.ports()
 585         ports = self.msr_i.ports()
 586         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
 587                   ]
 588
 589         if self.jtag_en:
 590             ports += list(self.jtag.external_ports())
 591         else:
 592             # don't add DMI if JTAG is enabled
 593             ports += list(self.dbg.dmi.ports())
 594
 595         ports += list(self.imem.ibus.fields.values())
 596         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
 597
 598         if self.sram4x4k:
 599             for sram in self.sram4k:
 600                 ports += list(sram.bus.fields.values())
 601
 602         if self.xics:
 603             ports += list(self.xics_icp.bus.fields.values())
 604             ports += list(self.xics_ics.bus.fields.values())
 605             ports.append(self.int_level_i)
 606
 607         if self.gpio:
 608             ports += list(self.simple_gpio.bus.fields.values())
 609             ports.append(self.gpio_o)
 610
 611         return ports
 612
 613     def ports(self):
 614         return list(self)
 615
 616
 617
 618 # Fetch Finite State Machine.
 619 # WARNING: there are currently DriverConflicts but it's actually working.
 620 # TODO, here: everything that is global in nature, information from the
 621 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 622 # not only that: TestIssuerInternal.imem can entirely move into here
 623 # because imem is only ever accessed inside the FetchFSM.
 624 class FetchFSM(ControlBase):
 625     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 626                  pdecode2, cur_state,
 627                  dbg, core, svstate, nia, is_svp64_mode):
 628         self.allow_overlap = allow_overlap
 629         self.svp64_en = svp64_en
 630         self.imem = imem
 631         self.core_rst = core_rst
 632         self.pdecode2 = pdecode2
 633         self.cur_state = cur_state
 634         self.dbg = dbg
 635         self.core = core
 636         self.svstate = svstate
 637         self.nia = nia
 638         self.is_svp64_mode = is_svp64_mode
 639
 640         # set up pipeline ControlBase and allocate i/o specs
 641         # (unusual: normally done by the Pipeline API)
 642         super().__init__(stage=self)
 643         self.p.i_data, self.n.o_data = self.new_specs(None)
 644         self.i, self.o = self.p.i_data, self.n.o_data
 645
 646     # next 3 functions are Stage API Compliance
 647     def setup(self, m, i):
 648         pass
 649
 650     def ispec(self):
 651         return FetchInput()
 652
 653     def ospec(self):
 654         return FetchOutput()
 655
 656     def elaborate(self, platform):
 657         """fetch FSM
 658
 659         this FSM performs fetch of raw instruction data, partial-decodes
 660         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 661         read a 2nd 32-bit quantity if that occurs.
 662         """
 663         m = super().elaborate(platform)
 664
 665         dbg = self.dbg
 666         core = self.core
 667         pc = self.i.pc
 668         msr = self.i.msr
 669         svstate = self.svstate
 670         nia = self.nia
 671         is_svp64_mode = self.is_svp64_mode
 672         fetch_pc_o_ready = self.p.o_ready
 673         fetch_pc_i_valid = self.p.i_valid
 674         fetch_insn_o_valid = self.n.o_valid
 675         fetch_insn_i_ready = self.n.i_ready
 676
 677         comb = m.d.comb
 678         sync = m.d.sync
 679         pdecode2 = self.pdecode2
 680         cur_state = self.cur_state
 681         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 682
 683         # also note instruction fetch failed
 684         if hasattr(core, "icache"):
 685             fetch_failed = core.icache.i_out.fetch_failed
 686             flush_needed = True
 687         else:
 688             fetch_failed = Const(0, 1)
 689             flush_needed = False
 690
 691         # set priv / virt mode on I-Cache, sigh
 692         if isinstance(self.imem, ICache):
 693             comb += self.imem.i_in.priv_mode.eq(~msr[MSR.PR])
 694             comb += self.imem.i_in.virt_mode.eq(msr[MSR.DR])
 695
 696         with m.FSM(name='fetch_fsm'):
 697
 698             # waiting (zzz)
 699             with m.State("IDLE"):
 700                 with m.If(~dbg.stopping_o & ~fetch_failed):
 701                     comb += fetch_pc_o_ready.eq(1)
 702                 with m.If(fetch_pc_i_valid & ~pdecode2.instr_fault):
 703                     # instruction allowed to go: start by reading the PC
 704                     # capture the PC and also drop it into Insn Memory
 705                     # we have joined a pair of combinatorial memory
 706                     # lookups together.  this is Generally Bad.
 707                     comb += self.imem.a_pc_i.eq(pc)
 708                     comb += self.imem.a_i_valid.eq(1)
 709                     comb += self.imem.f_i_valid.eq(1)
 710                     # transfer state to output
 711                     sync += cur_state.pc.eq(pc)
 712                     sync += cur_state.svstate.eq(svstate)  # and svstate
 713                     sync += cur_state.msr.eq(msr)  # and msr
 714
 715                     m.next = "INSN_READ"  # move to "wait for bus" phase
 716
 717             # dummy pause to find out why simulation is not keeping up
 718             with m.State("INSN_READ"):
 719                 if self.allow_overlap:
 720                     stopping = dbg.stopping_o
 721                 else:
 722                     stopping = Const(0)
 723                 with m.If(stopping):
 724                     # stopping: jump back to idle
 725                     m.next = "IDLE"
 726                 with m.Else():
 727                     with m.If(self.imem.f_busy_o &
 728                               ~pdecode2.instr_fault):  # zzz...
 729                         # busy but not fetch failed: stay in wait-read
 730                         comb += self.imem.a_i_valid.eq(1)
 731                         comb += self.imem.f_i_valid.eq(1)
 732                     with m.Else():
 733                         # not busy (or fetch failed!): instruction fetched
 734                         # when fetch failed, the instruction gets ignored
 735                         # by the decoder
 736                         if hasattr(core, "icache"):
 737                             # blech, icache returns actual instruction
 738                             insn = self.imem.f_instr_o
 739                         else:
 740                             # but these return raw memory
 741                             insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 742                         if self.svp64_en:
 743                             svp64 = self.svp64
 744                             # decode the SVP64 prefix, if any
 745                             comb += svp64.raw_opcode_in.eq(insn)
 746                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 747                             # pass the decoded prefix (if any) to PowerDecoder2
 748                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 749                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 750                             # remember whether this is a prefixed instruction,
 751                             # so the FSM can readily loop when VL==0
 752                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 753                             # calculate the address of the following instruction
 754                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 755                             sync += nia.eq(cur_state.pc + insn_size)
 756                             with m.If(~svp64.is_svp64_mode):
 757                                 # with no prefix, store the instruction
 758                                 # and hand it directly to the next FSM
 759                                 sync += dec_opcode_o.eq(insn)
 760                                 m.next = "INSN_READY"
 761                             with m.Else():
 762                                 # fetch the rest of the instruction from memory
 763                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 764                                 comb += self.imem.a_i_valid.eq(1)
 765                                 comb += self.imem.f_i_valid.eq(1)
 766                                 m.next = "INSN_READ2"
 767                         else:
 768                             # not SVP64 - 32-bit only
 769                             sync += nia.eq(cur_state.pc + 4)
 770                             sync += dec_opcode_o.eq(insn)
 771                             m.next = "INSN_READY"
 772
 773             with m.State("INSN_READ2"):
 774                 with m.If(self.imem.f_busy_o):  # zzz...
 775                     # busy: stay in wait-read
 776                     comb += self.imem.a_i_valid.eq(1)
 777                     comb += self.imem.f_i_valid.eq(1)
 778                 with m.Else():
 779                     # not busy: instruction fetched
 780                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 781                     sync += dec_opcode_o.eq(insn)
 782                     m.next = "INSN_READY"
 783                     # TODO: probably can start looking at pdecode2.rm_dec
 784                     # here or maybe even in INSN_READ state, if svp64_mode
 785                     # detected, in order to trigger - and wait for - the
 786                     # predicate reading.
 787                     if self.svp64_en:
 788                         pmode = pdecode2.rm_dec.predmode
 789                     """
 790                     if pmode != SVP64PredMode.ALWAYS.value:
 791                         fire predicate loading FSM and wait before
 792                         moving to INSN_READY
 793                     else:
 794                         sync += self.srcmask.eq(-1) # set to all 1s
 795                         sync += self.dstmask.eq(-1) # set to all 1s
 796                         m.next = "INSN_READY"
 797                     """
 798
 799             with m.State("INSN_READY"):
 800                 # hand over the instruction, to be decoded
 801                 comb += fetch_insn_o_valid.eq(1)
 802                 with m.If(fetch_insn_i_ready):
 803                     m.next = "IDLE"
 804
 805         # whatever was done above, over-ride it if core reset is held
 806         with m.If(self.core_rst):
 807             sync += nia.eq(0)
 808
 809         return m
 810
 811
 812 class TestIssuerInternal(TestIssuerBase):
 813     """TestIssuer - reads instructions from TestMemory and issues them
 814
 815     efficiency and speed is not the main goal here: functional correctness
 816     and code clarity is.  optimisations (which almost 100% interfere with
 817     easy understanding) come later.
 818     """
 819
 820     def fetch_predicate_fsm(self, m,
 821                             pred_insn_i_valid, pred_insn_o_ready,
 822                             pred_mask_o_valid, pred_mask_i_ready):
 823         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 824            src/dest predicate masks
 825
 826         https://bugs.libre-soc.org/show_bug.cgi?id=617
 827         the predicates can be read here, by using IntRegs r_ports['pred']
 828         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 829         be done through multiple reads, extracting one relevant at a time.
 830         later, a faster way would be to use the 32-bit-wide CR port but
 831         this is more complex decoding, here.  equivalent code used in
 832         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 833
 834         note: this ENTIRE FSM is not to be called when svp64 is disabled
 835         """
 836         comb = m.d.comb
 837         sync = m.d.sync
 838         pdecode2 = self.pdecode2
 839         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 840         predmode = rm_dec.predmode
 841         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 842         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 843         # get src/dst step, so we can skip already used mask bits
 844         cur_state = self.cur_state
 845         srcstep = cur_state.svstate.srcstep
 846         dststep = cur_state.svstate.dststep
 847         cur_vl = cur_state.svstate.vl
 848
 849         # decode predicates
 850         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 851         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 852         sidx, scrinvert = get_predcr(m, srcpred, 's')
 853         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 854
 855         # store fetched masks, for either intpred or crpred
 856         # when src/dst step is not zero, the skipped mask bits need to be
 857         # shifted-out, before actually storing them in src/dest mask
 858         new_srcmask = Signal(64, reset_less=True)
 859         new_dstmask = Signal(64, reset_less=True)
 860
 861         with m.FSM(name="fetch_predicate"):
 862
 863             with m.State("FETCH_PRED_IDLE"):
 864                 comb += pred_insn_o_ready.eq(1)
 865                 with m.If(pred_insn_i_valid):
 866                     with m.If(predmode == SVP64PredMode.INT):
 867                         # skip fetching destination mask register, when zero
 868                         with m.If(dall1s):
 869                             sync += new_dstmask.eq(-1)
 870                             # directly go to fetch source mask register
 871                             # guaranteed not to be zero (otherwise predmode
 872                             # would be SVP64PredMode.ALWAYS, not INT)
 873                             comb += int_pred.addr.eq(sregread)
 874                             comb += int_pred.ren.eq(1)
 875                             m.next = "INT_SRC_READ"
 876                         # fetch destination predicate register
 877                         with m.Else():
 878                             comb += int_pred.addr.eq(dregread)
 879                             comb += int_pred.ren.eq(1)
 880                             m.next = "INT_DST_READ"
 881                     with m.Elif(predmode == SVP64PredMode.CR):
 882                         # go fetch masks from the CR register file
 883                         sync += new_srcmask.eq(0)
 884                         sync += new_dstmask.eq(0)
 885                         m.next = "CR_READ"
 886                     with m.Else():
 887                         sync += self.srcmask.eq(-1)
 888                         sync += self.dstmask.eq(-1)
 889                         m.next = "FETCH_PRED_DONE"
 890
 891             with m.State("INT_DST_READ"):
 892                 # store destination mask
 893                 inv = Repl(dinvert, 64)
 894                 with m.If(dunary):
 895                     # set selected mask bit for 1<<r3 mode
 896                     dst_shift = Signal(range(64))
 897                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 898                     sync += new_dstmask.eq(1 << dst_shift)
 899                 with m.Else():
 900                     # invert mask if requested
 901                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 902                 # skip fetching source mask register, when zero
 903                 with m.If(sall1s):
 904                     sync += new_srcmask.eq(-1)
 905                     m.next = "FETCH_PRED_SHIFT_MASK"
 906                 # fetch source predicate register
 907                 with m.Else():
 908                     comb += int_pred.addr.eq(sregread)
 909                     comb += int_pred.ren.eq(1)
 910                     m.next = "INT_SRC_READ"
 911
 912             with m.State("INT_SRC_READ"):
 913                 # store source mask
 914                 inv = Repl(sinvert, 64)
 915                 with m.If(sunary):
 916                     # set selected mask bit for 1<<r3 mode
 917                     src_shift = Signal(range(64))
 918                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 919                     sync += new_srcmask.eq(1 << src_shift)
 920                 with m.Else():
 921                     # invert mask if requested
 922                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 923                 m.next = "FETCH_PRED_SHIFT_MASK"
 924
 925             # fetch masks from the CR register file
 926             # implements the following loop:
 927             # idx, inv = get_predcr(mask)
 928             # mask = 0
 929             # for cr_idx in range(vl):
 930             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 931             #     if cr[idx] ^ inv:
 932             #         mask |= 1 << cr_idx
 933             # return mask
 934             with m.State("CR_READ"):
 935                 # CR index to be read, which will be ready by the next cycle
 936                 cr_idx = Signal.like(cur_vl, reset_less=True)
 937                 # submit the read operation to the regfile
 938                 with m.If(cr_idx != cur_vl):
 939                     # the CR read port is unary ...
 940                     # ren = 1 << cr_idx
 941                     # ... in MSB0 convention ...
 942                     # ren = 1 << (7 - cr_idx)
 943                     # ... and with an offset:
 944                     # ren = 1 << (7 - off - cr_idx)
 945                     idx = SVP64CROffs.CRPred + cr_idx
 946                     comb += cr_pred.ren.eq(1 << (7 - idx))
 947                     # signal data valid in the next cycle
 948                     cr_read = Signal(reset_less=True)
 949                     sync += cr_read.eq(1)
 950                     # load the next index
 951                     sync += cr_idx.eq(cr_idx + 1)
 952                 with m.Else():
 953                     # exit on loop end
 954                     sync += cr_read.eq(0)
 955                     sync += cr_idx.eq(0)
 956                     m.next = "FETCH_PRED_SHIFT_MASK"
 957                 with m.If(cr_read):
 958                     # compensate for the one cycle delay on the regfile
 959                     cur_cr_idx = Signal.like(cur_vl)
 960                     comb += cur_cr_idx.eq(cr_idx - 1)
 961                     # read the CR field, select the appropriate bit
 962                     cr_field = Signal(4)
 963                     scr_bit = Signal()
 964                     dcr_bit = Signal()
 965                     comb += cr_field.eq(cr_pred.o_data)
 966                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 967                                        ^ scrinvert)
 968                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 969                                        ^ dcrinvert)
 970                     # set the corresponding mask bit
 971                     bit_to_set = Signal.like(self.srcmask)
 972                     comb += bit_to_set.eq(1 << cur_cr_idx)
 973                     with m.If(scr_bit):
 974                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 975                     with m.If(dcr_bit):
 976                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 977
 978             with m.State("FETCH_PRED_SHIFT_MASK"):
 979                 # shift-out skipped mask bits
 980                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 981                 sync += self.dstmask.eq(new_dstmask >> dststep)
 982                 m.next = "FETCH_PRED_DONE"
 983
 984             with m.State("FETCH_PRED_DONE"):
 985                 comb += pred_mask_o_valid.eq(1)
 986                 with m.If(pred_mask_i_ready):
 987                     m.next = "FETCH_PRED_IDLE"
 988
 989     def issue_fsm(self, m, core, nia,
 990                   dbg, core_rst, is_svp64_mode,
 991                   fetch_pc_o_ready, fetch_pc_i_valid,
 992                   fetch_insn_o_valid, fetch_insn_i_ready,
 993                   pred_insn_i_valid, pred_insn_o_ready,
 994                   pred_mask_o_valid, pred_mask_i_ready,
 995                   exec_insn_i_valid, exec_insn_o_ready,
 996                   exec_pc_o_valid, exec_pc_i_ready):
 997         """issue FSM
 998
 999         decode / issue FSM.  this interacts with the "fetch" FSM
1000         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
1001         (outgoing). also interacts with the "execute" FSM
1002         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
1003         (incoming).
1004         SVP64 RM prefixes have already been set up by the
1005         "fetch" phase, so execute is fairly straightforward.
1006         """
1007
1008         comb = m.d.comb
1009         sync = m.d.sync
1010         pdecode2 = self.pdecode2
1011         cur_state = self.cur_state
1012         new_svstate = self.new_svstate
1013
1014         # temporaries
1015         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
1016
1017         # for updating svstate (things like srcstep etc.)
1018         comb += new_svstate.eq(cur_state.svstate)
1019
1020         # precalculate srcstep+1 and dststep+1
1021         cur_srcstep = cur_state.svstate.srcstep
1022         cur_dststep = cur_state.svstate.dststep
1023         next_srcstep = Signal.like(cur_srcstep)
1024         next_dststep = Signal.like(cur_dststep)
1025         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
1026         comb += next_dststep.eq(cur_state.svstate.dststep+1)
1027
1028         # note if an exception happened.  in a pipelined or OoO design
1029         # this needs to be accompanied by "shadowing" (or stalling)
1030         exc_happened = self.core.o.exc_happened
1031         # also note instruction fetch failed
1032         if hasattr(core, "icache"):
1033             fetch_failed = core.icache.i_out.fetch_failed
1034             flush_needed = True
1035             # set to fault in decoder
1036             # update (highest priority) instruction fault
1037             rising_fetch_failed = rising_edge(m, fetch_failed)
1038             with m.If(rising_fetch_failed):
1039                 sync += pdecode2.instr_fault.eq(1)
1040         else:
1041             fetch_failed = Const(0, 1)
1042             flush_needed = False
1043
1044         with m.FSM(name="issue_fsm"):
1045
1046             # sync with the "fetch" phase which is reading the instruction
1047             # at this point, there is no instruction running, that
1048             # could inadvertently update the PC.
1049             with m.State("ISSUE_START"):
1050                 # reset instruction fault
1051                 sync += pdecode2.instr_fault.eq(0)
1052                 # wait on "core stop" release, before next fetch
1053                 # need to do this here, in case we are in a VL==0 loop
1054                 with m.If(~dbg.core_stop_o & ~core_rst):
1055                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
1056                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
1057                         m.next = "INSN_WAIT"
1058                 with m.Else():
1059                     # tell core it's stopped, and acknowledge debug handshake
1060                     comb += dbg.core_stopped_i.eq(1)
1061                     # while stopped, allow updating SVSTATE
1062                     with m.If(self.svstate_i.ok):
1063                         comb += new_svstate.eq(self.svstate_i.data)
1064                         comb += self.update_svstate.eq(1)
1065                         sync += self.sv_changed.eq(1)
1066
1067             # wait for an instruction to arrive from Fetch
1068             with m.State("INSN_WAIT"):
1069                 if self.allow_overlap:
1070                     stopping = dbg.stopping_o
1071                 else:
1072                     stopping = Const(0)
1073                 with m.If(stopping):
1074                     # stopping: jump back to idle
1075                     m.next = "ISSUE_START"
1076                     if flush_needed:
1077                         # request the icache to stop asserting "failed"
1078                         comb += core.icache.flush_in.eq(1)
1079                     # stop instruction fault
1080                     sync += pdecode2.instr_fault.eq(0)
1081                 with m.Else():
1082                     comb += fetch_insn_i_ready.eq(1)
1083                     with m.If(fetch_insn_o_valid):
1084                         # loop into ISSUE_START if it's a SVP64 instruction
1085                         # and VL == 0.  this because VL==0 is a for-loop
1086                         # from 0 to 0 i.e. always, always a NOP.
1087                         cur_vl = cur_state.svstate.vl
1088                         with m.If(is_svp64_mode & (cur_vl == 0)):
1089                             # update the PC before fetching the next instruction
1090                             # since we are in a VL==0 loop, no instruction was
1091                             # executed that we could be overwriting
1092                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1093                             comb += self.state_w_pc.i_data.eq(nia)
1094                             comb += self.insn_done.eq(1)
1095                             m.next = "ISSUE_START"
1096                         with m.Else():
1097                             if self.svp64_en:
1098                                 m.next = "PRED_START"  # fetching predicate
1099                             else:
1100                                 m.next = "DECODE_SV"  # skip predication
1101
1102             with m.State("PRED_START"):
1103                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
1104                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
1105                     m.next = "MASK_WAIT"
1106
1107             with m.State("MASK_WAIT"):
1108                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
1109                 with m.If(pred_mask_o_valid):  # predication masks are ready
1110                     m.next = "PRED_SKIP"
1111
1112             # skip zeros in predicate
1113             with m.State("PRED_SKIP"):
1114                 with m.If(~is_svp64_mode):
1115                     m.next = "DECODE_SV"  # nothing to do
1116                 with m.Else():
1117                     if self.svp64_en:
1118                         pred_src_zero = pdecode2.rm_dec.pred_sz
1119                         pred_dst_zero = pdecode2.rm_dec.pred_dz
1120
1121                         # new srcstep, after skipping zeros
1122                         skip_srcstep = Signal.like(cur_srcstep)
1123                         # value to be added to the current srcstep
1124                         src_delta = Signal.like(cur_srcstep)
1125                         # add leading zeros to srcstep, if not in zero mode
1126                         with m.If(~pred_src_zero):
1127                             # priority encoder (count leading zeros)
1128                             # append guard bit, in case the mask is all zeros
1129                             pri_enc_src = PriorityEncoder(65)
1130                             m.submodules.pri_enc_src = pri_enc_src
1131                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
1132                                                          Const(1, 1)))
1133                             comb += src_delta.eq(pri_enc_src.o)
1134                         # apply delta to srcstep
1135                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
1136                         # shift-out all leading zeros from the mask
1137                         # plus the leading "one" bit
1138                         # TODO count leading zeros and shift-out the zero
1139                         #      bits, in the same step, in hardware
1140                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1141
1142                         # same as above, but for dststep
1143                         skip_dststep = Signal.like(cur_dststep)
1144                         dst_delta = Signal.like(cur_dststep)
1145                         with m.If(~pred_dst_zero):
1146                             pri_enc_dst = PriorityEncoder(65)
1147                             m.submodules.pri_enc_dst = pri_enc_dst
1148                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1149                                                          Const(1, 1)))
1150                             comb += dst_delta.eq(pri_enc_dst.o)
1151                         comb += skip_dststep.eq(cur_dststep + dst_delta)
1152                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1153
1154                         # TODO: initialize mask[VL]=1 to avoid passing past VL
1155                         with m.If((skip_srcstep >= cur_vl) |
1156                                   (skip_dststep >= cur_vl)):
1157                             # end of VL loop. Update PC and reset src/dst step
1158                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1159                             comb += self.state_w_pc.i_data.eq(nia)
1160                             comb += new_svstate.srcstep.eq(0)
1161                             comb += new_svstate.dststep.eq(0)
1162                             comb += self.update_svstate.eq(1)
1163                             # synchronize with the simulator
1164                             comb += self.insn_done.eq(1)
1165                             # go back to Issue
1166                             m.next = "ISSUE_START"
1167                         with m.Else():
1168                             # update new src/dst step
1169                             comb += new_svstate.srcstep.eq(skip_srcstep)
1170                             comb += new_svstate.dststep.eq(skip_dststep)
1171                             comb += self.update_svstate.eq(1)
1172                             # proceed to Decode
1173                             m.next = "DECODE_SV"
1174
1175                         # pass predicate mask bits through to satellite decoders
1176                         # TODO: for SIMD this will be *multiple* bits
1177                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1178                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1179
1180             # after src/dst step have been updated, we are ready
1181             # to decode the instruction
1182             with m.State("DECODE_SV"):
1183                 # decode the instruction
1184                 with m.If(~fetch_failed):
1185                     sync += pdecode2.instr_fault.eq(0)
1186                 sync += core.i.e.eq(pdecode2.e)
1187                 sync += core.i.state.eq(cur_state)
1188                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1189                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1190                 if self.svp64_en:
1191                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1192                     # set RA_OR_ZERO detection in satellite decoders
1193                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1194                     # and svp64 detection
1195                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1196                     # and svp64 bit-rev'd ldst mode
1197                     ldst_dec = pdecode2.use_svp64_ldst_dec
1198                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1199                 # after decoding, reset any previous exception condition,
1200                 # allowing it to be set again during the next execution
1201                 sync += pdecode2.ldst_exc.eq(0)
1202
1203                 m.next = "INSN_EXECUTE"  # move to "execute"
1204
1205             # handshake with execution FSM, move to "wait" once acknowledged
1206             with m.State("INSN_EXECUTE"):
1207                 comb += exec_insn_i_valid.eq(1)  # trigger execute
1208                 with m.If(exec_insn_o_ready):   # execute acknowledged us
1209                     m.next = "EXECUTE_WAIT"
1210
1211             with m.State("EXECUTE_WAIT"):
1212                 # wait on "core stop" release, at instruction end
1213                 # need to do this here, in case we are in a VL>1 loop
1214                 with m.If(~dbg.core_stop_o & ~core_rst):
1215                     comb += exec_pc_i_ready.eq(1)
1216                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1217                     # the exception info needs to be blatted into
1218                     # pdecode.ldst_exc, and the instruction "re-run".
1219                     # when ldst_exc.happened is set, the PowerDecoder2
1220                     # reacts very differently: it re-writes the instruction
1221                     # with a "trap" (calls PowerDecoder2.trap()) which
1222                     # will *overwrite* whatever was requested and jump the
1223                     # PC to the exception address, as well as alter MSR.
1224                     # nothing else needs to be done other than to note
1225                     # the change of PC and MSR (and, later, SVSTATE)
1226                     with m.If(exc_happened):
1227                         mmu = core.fus.get_exc("mmu0")
1228                         ldst = core.fus.get_exc("ldst0")
1229                         if mmu is not None:
1230                             with m.If(fetch_failed):
1231                                 # instruction fetch: exception is from MMU
1232                                 # reset instr_fault (highest priority)
1233                                 sync += pdecode2.ldst_exc.eq(mmu)
1234                                 sync += pdecode2.instr_fault.eq(0)
1235                                 if flush_needed:
1236                                     # request icache to stop asserting "failed"
1237                                     comb += core.icache.flush_in.eq(1)
1238                         with m.If(~fetch_failed):
1239                             # otherwise assume it was a LDST exception
1240                             sync += pdecode2.ldst_exc.eq(ldst)
1241
1242                     with m.If(exec_pc_o_valid):
1243
1244                         # was this the last loop iteration?
1245                         is_last = Signal()
1246                         cur_vl = cur_state.svstate.vl
1247                         comb += is_last.eq(next_srcstep == cur_vl)
1248
1249                         with m.If(pdecode2.instr_fault):
1250                             # reset instruction fault, try again
1251                             sync += pdecode2.instr_fault.eq(0)
1252                             m.next = "ISSUE_START"
1253
1254                         # return directly to Decode if Execute generated an
1255                         # exception.
1256                         with m.Elif(pdecode2.ldst_exc.happened):
1257                             m.next = "DECODE_SV"
1258
1259                         # if MSR, PC or SVSTATE were changed by the previous
1260                         # instruction, go directly back to Fetch, without
1261                         # updating either MSR PC or SVSTATE
1262                         with m.Elif(self.msr_changed | self.pc_changed |
1263                                     self.sv_changed):
1264                             m.next = "ISSUE_START"
1265
1266                         # also return to Fetch, when no output was a vector
1267                         # (regardless of SRCSTEP and VL), or when the last
1268                         # instruction was really the last one of the VL loop
1269                         with m.Elif((~pdecode2.loop_continue) | is_last):
1270                             # before going back to fetch, update the PC state
1271                             # register with the NIA.
1272                             # ok here we are not reading the branch unit.
1273                             # TODO: this just blithely overwrites whatever
1274                             #       pipeline updated the PC
1275                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1276                             comb += self.state_w_pc.i_data.eq(nia)
1277                             # reset SRCSTEP before returning to Fetch
1278                             if self.svp64_en:
1279                                 with m.If(pdecode2.loop_continue):
1280                                     comb += new_svstate.srcstep.eq(0)
1281                                     comb += new_svstate.dststep.eq(0)
1282                                     comb += self.update_svstate.eq(1)
1283                             else:
1284                                 comb += new_svstate.srcstep.eq(0)
1285                                 comb += new_svstate.dststep.eq(0)
1286                                 comb += self.update_svstate.eq(1)
1287                             m.next = "ISSUE_START"
1288
1289                         # returning to Execute? then, first update SRCSTEP
1290                         with m.Else():
1291                             comb += new_svstate.srcstep.eq(next_srcstep)
1292                             comb += new_svstate.dststep.eq(next_dststep)
1293                             comb += self.update_svstate.eq(1)
1294                             # return to mask skip loop
1295                             m.next = "PRED_SKIP"
1296
1297                 with m.Else():
1298                     comb += dbg.core_stopped_i.eq(1)
1299                     if flush_needed:
1300                         # request the icache to stop asserting "failed"
1301                         comb += core.icache.flush_in.eq(1)
1302                     # stop instruction fault
1303                     sync += pdecode2.instr_fault.eq(0)
1304
1305         # check if svstate needs updating: if so, write it to State Regfile
1306         with m.If(self.update_svstate):
1307             sync += cur_state.svstate.eq(self.new_svstate)  # for next clock
1308
1309     def execute_fsm(self, m, core,
1310                     exec_insn_i_valid, exec_insn_o_ready,
1311                     exec_pc_o_valid, exec_pc_i_ready):
1312         """execute FSM
1313
1314         execute FSM. this interacts with the "issue" FSM
1315         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1316         (outgoing). SVP64 RM prefixes have already been set up by the
1317         "issue" phase, so execute is fairly straightforward.
1318         """
1319
1320         comb = m.d.comb
1321         sync = m.d.sync
1322         pdecode2 = self.pdecode2
1323
1324         # temporaries
1325         core_busy_o = core.n.o_data.busy_o  # core is busy
1326         core_ivalid_i = core.p.i_valid              # instruction is valid
1327
1328         if hasattr(core, "icache"):
1329             fetch_failed = core.icache.i_out.fetch_failed
1330         else:
1331             fetch_failed = Const(0, 1)
1332
1333         with m.FSM(name="exec_fsm"):
1334
1335             # waiting for instruction bus (stays there until not busy)
1336             with m.State("INSN_START"):
1337                 comb += exec_insn_o_ready.eq(1)
1338                 with m.If(exec_insn_i_valid):
1339                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1340                     sync += self.sv_changed.eq(0)
1341                     sync += self.pc_changed.eq(0)
1342                     sync += self.msr_changed.eq(0)
1343                     with m.If(core.p.o_ready):  # only move if accepted
1344                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1345
1346             # instruction started: must wait till it finishes
1347             with m.State("INSN_ACTIVE"):
1348                 # note changes to MSR, PC and SVSTATE
1349                 # XXX oops, really must monitor *all* State Regfile write
1350                 # ports looking for changes!
1351                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1352                     sync += self.sv_changed.eq(1)
1353                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1354                     sync += self.msr_changed.eq(1)
1355                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1356                     sync += self.pc_changed.eq(1)
1357                 with m.If(~core_busy_o):  # instruction done!
1358                     comb += exec_pc_o_valid.eq(1)
1359                     with m.If(exec_pc_i_ready):
1360                         # when finished, indicate "done".
1361                         # however, if there was an exception, the instruction
1362                         # is *not* yet done.  this is an implementation
1363                         # detail: we choose to implement exceptions by
1364                         # taking the exception information from the LDST
1365                         # unit, putting that *back* into the PowerDecoder2,
1366                         # and *re-running the entire instruction*.
1367                         # if we erroneously indicate "done" here, it is as if
1368                         # there were *TWO* instructions:
1369                         # 1) the failed LDST 2) a TRAP.
1370                         with m.If(~pdecode2.ldst_exc.happened &
1371                                    ~pdecode2.instr_fault):
1372                             comb += self.insn_done.eq(1)
1373                         m.next = "INSN_START"  # back to fetch
1374
1375     def elaborate(self, platform):
1376         m = super().elaborate(platform)
1377         # convenience
1378         comb, sync = m.d.comb, m.d.sync
1379         cur_state = self.cur_state
1380         pdecode2 = self.pdecode2
1381         dbg = self.dbg
1382         core = self.core
1383
1384         # set up peripherals and core
1385         core_rst = self.core_rst
1386
1387         # indicate to outside world if any FU is still executing
1388         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1389
1390         # address of the next instruction, in the absence of a branch
1391         # depends on the instruction size
1392         nia = Signal(64)
1393
1394         # connect up debug signals
1395         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1396
1397         # pass the prefix mode from Fetch to Issue, so the latter can loop
1398         # on VL==0
1399         is_svp64_mode = Signal()
1400
1401         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1402         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1403         # these are the handshake signals between each
1404
1405         # fetch FSM can run as soon as the PC is valid
1406         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1407         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1408
1409         # fetch FSM hands over the instruction to be decoded / issued
1410         fetch_insn_o_valid = Signal()
1411         fetch_insn_i_ready = Signal()
1412
1413         # predicate fetch FSM decodes and fetches the predicate
1414         pred_insn_i_valid = Signal()
1415         pred_insn_o_ready = Signal()
1416
1417         # predicate fetch FSM delivers the masks
1418         pred_mask_o_valid = Signal()
1419         pred_mask_i_ready = Signal()
1420
1421         # issue FSM delivers the instruction to the be executed
1422         exec_insn_i_valid = Signal()
1423         exec_insn_o_ready = Signal()
1424
1425         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1426         exec_pc_o_valid = Signal()
1427         exec_pc_i_ready = Signal()
1428
1429         # the FSMs here are perhaps unusual in that they detect conditions
1430         # then "hold" information, combinatorially, for the core
1431         # (as opposed to using sync - which would be on a clock's delay)
1432         # this includes the actual opcode, valid flags and so on.
1433
1434         # Fetch, then predicate fetch, then Issue, then Execute.
1435         # Issue is where the VL for-loop # lives.  the ready/valid
1436         # signalling is used to communicate between the four.
1437
1438         # set up Fetch FSM
1439         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1440                          self.imem, core_rst, pdecode2, cur_state,
1441                          dbg, core,
1442                          dbg.state.svstate, # combinatorially same
1443                          nia, is_svp64_mode)
1444         m.submodules.fetch = fetch
1445         # connect up in/out data to existing Signals
1446         comb += fetch.p.i_data.pc.eq(dbg.state.pc)   # combinatorially same
1447         comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
1448         # and the ready/valid signalling
1449         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1450         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1451         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1452         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1453
1454         self.issue_fsm(m, core, nia,
1455                        dbg, core_rst, is_svp64_mode,
1456                        fetch_pc_o_ready, fetch_pc_i_valid,
1457                        fetch_insn_o_valid, fetch_insn_i_ready,
1458                        pred_insn_i_valid, pred_insn_o_ready,
1459                        pred_mask_o_valid, pred_mask_i_ready,
1460                        exec_insn_i_valid, exec_insn_o_ready,
1461                        exec_pc_o_valid, exec_pc_i_ready)
1462
1463         if self.svp64_en:
1464             self.fetch_predicate_fsm(m,
1465                                      pred_insn_i_valid, pred_insn_o_ready,
1466                                      pred_mask_o_valid, pred_mask_i_ready)
1467
1468         self.execute_fsm(m, core,
1469                          exec_insn_i_valid, exec_insn_o_ready,
1470                          exec_pc_o_valid, exec_pc_i_ready)
1471
1472         return m
1473
1474
1475 class TestIssuer(Elaboratable):
1476     def __init__(self, pspec):
1477         self.ti = TestIssuerInternal(pspec)
1478         # XXX TODO: make this a command-line selectable option from pspec
1479         #from soc.simple.inorder import TestIssuerInternalInOrder
1480         #self.ti = TestIssuerInternalInOrder(pspec)
1481         self.pll = DummyPLL(instance=True)
1482
1483         # PLL direct clock or not
1484         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1485         if self.pll_en:
1486             self.pll_test_o = Signal(reset_less=True)
1487             self.pll_vco_o = Signal(reset_less=True)
1488             self.clk_sel_i = Signal(2, reset_less=True)
1489             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1490             self.pllclk_clk = ClockSignal("pllclk")
1491
1492     def elaborate(self, platform):
1493         m = Module()
1494         comb = m.d.comb
1495
1496         # TestIssuer nominally runs at main clock, actually it is
1497         # all combinatorial internally except for coresync'd components
1498         m.submodules.ti = ti = self.ti
1499
1500         if self.pll_en:
1501             # ClockSelect runs at PLL output internal clock rate
1502             m.submodules.wrappll = pll = self.pll
1503
1504             # add clock domains from PLL
1505             cd_pll = ClockDomain("pllclk")
1506             m.domains += cd_pll
1507
1508             # PLL clock established.  has the side-effect of running clklsel
1509             # at the PLL's speed (see DomainRenamer("pllclk") above)
1510             pllclk = self.pllclk_clk
1511             comb += pllclk.eq(pll.clk_pll_o)
1512
1513             # wire up external 24mhz to PLL
1514             #comb += pll.clk_24_i.eq(self.ref_clk)
1515             # output 18 mhz PLL test signal, and analog oscillator out
1516             comb += self.pll_test_o.eq(pll.pll_test_o)
1517             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1518
1519             # input to pll clock selection
1520             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1521
1522             # now wire up ResetSignals.  don't mind them being in this domain
1523             pll_rst = ResetSignal("pllclk")
1524             comb += pll_rst.eq(ResetSignal())
1525
1526         # internal clock is set to selector clock-out.  has the side-effect of
1527         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1528         # debug clock runs at coresync internal clock
1529         cd_coresync = ClockDomain("coresync")
1530         #m.domains += cd_coresync
1531         if self.ti.dbg_domain != 'sync':
1532             cd_dbgsync = ClockDomain("dbgsync")
1533             #m.domains += cd_dbgsync
1534         intclk = ClockSignal("coresync")
1535         dbgclk = ClockSignal(self.ti.dbg_domain)
1536         # XXX BYPASS PLL XXX
1537         # XXX BYPASS PLL XXX
1538         # XXX BYPASS PLL XXX
1539         if self.pll_en:
1540             comb += intclk.eq(self.ref_clk)
1541         else:
1542             comb += intclk.eq(ClockSignal())
1543         if self.ti.dbg_domain != 'sync':
1544             dbgclk = ClockSignal(self.ti.dbg_domain)
1545             comb += dbgclk.eq(intclk)
1546
1547         return m
1548
1549     def ports(self):
1550         return list(self.ti.ports()) + list(self.pll.ports()) + \
1551             [ClockSignal(), ResetSignal()]
1552
1553     def external_ports(self):
1554         ports = self.ti.external_ports()
1555         ports.append(ClockSignal())
1556         ports.append(ResetSignal())
1557         if self.pll_en:
1558             ports.append(self.clk_sel_i)
1559             ports.append(self.pll.clk_24_i)
1560             ports.append(self.pll_test_o)
1561             ports.append(self.pll_vco_o)
1562             ports.append(self.pllclk_clk)
1563             ports.append(self.ref_clk)
1564         return ports
1565
1566
1567 if __name__ == '__main__':
1568     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1569              'spr': 1,
1570              'div': 1,
1571              'mul': 1,
1572              'shiftrot': 1
1573              }
1574     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1575                          imem_ifacetype='bare_wb',
1576                          addr_wid=48,
1577                          mask_wid=8,
1578                          reg_wid=64,
1579                          units=units)
1580     dut = TestIssuer(pspec)
1581     vl = main(dut, ports=dut.ports(), name="test_issuer")
1582
1583     if len(sys.argv) == 1:
1584         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1585         with open("test_issuer.il", "w") as f:
1586             f.write(vl)