src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs, MSR)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the {insert state variable here}
  70     res_ok_delay = Signal(name="%s_ok_delay" % name)
  71     with m.If(~core_rst):
  72         sync += res_ok_delay.eq(~state_i.ok)
  73         with m.If(state_i.ok):
  74             # incoming override (start from pc_i)
  75             comb += res.eq(state_i.data)
  76         with m.Else():
  77             # otherwise read StateRegs regfile for {insert state here}...
  78             comb += regfile.ren.eq(1 << regnum)
  79         # ... but on a 1-clock delay
  80         with m.If(res_ok_delay):
  81             comb += res.eq(regfile.o_data)
  82
  83
  84 def get_predint(m, mask, name):
  85     """decode SVP64 predicate integer mask field to reg number and invert
  86     this is identical to the equivalent function in ISACaller except that
  87     it doesn't read the INT directly, it just decodes "what needs to be done"
  88     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  89
  90     * all1s is set to indicate that no mask is to be applied.
  91     * regread indicates the GPR register number to be read
  92     * invert is set to indicate that the register value is to be inverted
  93     * unary indicates that the contents of the register is to be shifted 1<<r3
  94     """
  95     comb = m.d.comb
  96     regread = Signal(5, name=name+"regread")
  97     invert = Signal(name=name+"invert")
  98     unary = Signal(name=name+"unary")
  99     all1s = Signal(name=name+"all1s")
 100     with m.Switch(mask):
 101         with m.Case(SVP64PredInt.ALWAYS.value):
 102             comb += all1s.eq(1)      # use 0b1111 (all ones)
 103         with m.Case(SVP64PredInt.R3_UNARY.value):
 104             comb += regread.eq(3)
 105             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 106         with m.Case(SVP64PredInt.R3.value):
 107             comb += regread.eq(3)
 108         with m.Case(SVP64PredInt.R3_N.value):
 109             comb += regread.eq(3)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R10.value):
 112             comb += regread.eq(10)
 113         with m.Case(SVP64PredInt.R10_N.value):
 114             comb += regread.eq(10)
 115             comb += invert.eq(1)
 116         with m.Case(SVP64PredInt.R30.value):
 117             comb += regread.eq(30)
 118         with m.Case(SVP64PredInt.R30_N.value):
 119             comb += regread.eq(30)
 120             comb += invert.eq(1)
 121     return regread, invert, unary, all1s
 122
 123
 124 def get_predcr(m, mask, name):
 125     """decode SVP64 predicate CR to reg number field and invert status
 126     this is identical to _get_predcr in ISACaller
 127     """
 128     comb = m.d.comb
 129     idx = Signal(2, name=name+"idx")
 130     invert = Signal(name=name+"crinvert")
 131     with m.Switch(mask):
 132         with m.Case(SVP64PredCR.LT.value):
 133             comb += idx.eq(CR.LT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.GE.value):
 136             comb += idx.eq(CR.LT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.GT.value):
 139             comb += idx.eq(CR.GT)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.LE.value):
 142             comb += idx.eq(CR.GT)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.EQ.value):
 145             comb += idx.eq(CR.EQ)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NE.value):
 148             comb += idx.eq(CR.EQ)
 149             comb += invert.eq(1)
 150         with m.Case(SVP64PredCR.SO.value):
 151             comb += idx.eq(CR.SO)
 152             comb += invert.eq(0)
 153         with m.Case(SVP64PredCR.NS.value):
 154             comb += idx.eq(CR.SO)
 155             comb += invert.eq(1)
 156     return idx, invert
 157
 158
 159 class TestIssuerBase(Elaboratable):
 160     """TestIssuerBase - common base class for Issuers
 161
 162     takes care of power-on reset, peripherals, debug, DEC/TB,
 163     and gets PC/MSR/SVSTATE from the State Regfile etc.
 164     """
 165
 166     def __init__(self, pspec):
 167
 168         # test is SVP64 is to be enabled
 169         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 170
 171         # and if regfiles are reduced
 172         self.regreduce_en = (hasattr(pspec, "regreduce") and
 173                              (pspec.regreduce == True))
 174
 175         # and if overlap requested
 176         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 177                               (pspec.allow_overlap == True))
 178
 179         # JTAG interface.  add this right at the start because if it's
 180         # added it *modifies* the pspec, by adding enable/disable signals
 181         # for parts of the rest of the core
 182         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 183         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 184         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 185         if self.jtag_en:
 186             # XXX MUST keep this up-to-date with litex, and
 187             # soc-cocotb-sim, and err.. all needs sorting out, argh
 188             subset = ['uart',
 189                       'mtwi',
 190                       'eint', 'gpio', 'mspi0',
 191                       # 'mspi1', - disabled for now
 192                       # 'pwm', 'sd0', - disabled for now
 193                       'sdr']
 194             self.jtag = JTAG(get_pinspecs(subset=subset),
 195                              domain=self.dbg_domain)
 196             # add signals to pspec to enable/disable icache and dcache
 197             # (or data and intstruction wishbone if icache/dcache not included)
 198             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 199             # TODO: do we actually care if these are not domain-synchronised?
 200             # honestly probably not.
 201             pspec.wb_icache_en = self.jtag.wb_icache_en
 202             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 203             self.wb_sram_en = self.jtag.wb_sram_en
 204         else:
 205             self.wb_sram_en = Const(1)
 206
 207         # add 4k sram blocks?
 208         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 209                          pspec.sram4x4kblock == True)
 210         if self.sram4x4k:
 211             self.sram4k = []
 212             for i in range(4):
 213                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 214                                                     # features={'err'}
 215                                                     ))
 216
 217         # add interrupt controller?
 218         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 219         if self.xics:
 220             self.xics_icp = XICS_ICP()
 221             self.xics_ics = XICS_ICS()
 222             self.int_level_i = self.xics_ics.int_level_i
 223
 224         # add GPIO peripheral?
 225         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 226         if self.gpio:
 227             self.simple_gpio = SimpleGPIO()
 228             self.gpio_o = self.simple_gpio.gpio_o
 229
 230         # main instruction core.  suitable for prototyping / demo only
 231         self.core = core = NonProductionCore(pspec)
 232         self.core_rst = ResetSignal("coresync")
 233
 234         # instruction decoder.  goes into Trap Record
 235         #pdecode = create_pdecode()
 236         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 237         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 238                                      opkls=IssuerDecode2ToOperand,
 239                                      svp64_en=self.svp64_en,
 240                                      regreduce_en=self.regreduce_en)
 241         pdecode = self.pdecode2.dec
 242
 243         if self.svp64_en:
 244             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 245
 246         self.update_svstate = Signal()  # set this if updating svstate
 247         self.new_svstate = new_svstate = SVSTATERec("new_svstate")
 248
 249         # Test Instruction memory
 250         if hasattr(core, "icache"):
 251             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 252             # truly dreadful.  needs a huge reorg.
 253             pspec.icache = core.icache
 254         self.imem = ConfigFetchUnit(pspec).fu
 255
 256         # DMI interface
 257         self.dbg = CoreDebug()
 258
 259         # instruction go/monitor
 260         self.pc_o = Signal(64, reset_less=True)
 261         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 262         self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
 263         self.svstate_i = Data(64, "svstate_i")  # ditto
 264         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 265         self.busy_o = Signal(reset_less=True)
 266         self.memerr_o = Signal(reset_less=True)
 267
 268         # STATE regfile read /write ports for PC, MSR, SVSTATE
 269         staterf = self.core.regs.rf['state']
 270         self.state_r_msr = staterf.r_ports['msr']  # MSR rd
 271         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 272         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 273
 274         self.state_w_msr = staterf.w_ports['msr']  # MSR wr
 275         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 276         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 277
 278         # DMI interface access
 279         intrf = self.core.regs.rf['int']
 280         crrf = self.core.regs.rf['cr']
 281         xerrf = self.core.regs.rf['xer']
 282         self.int_r = intrf.r_ports['dmi']  # INT read
 283         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 284         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 285
 286         if self.svp64_en:
 287             # for predication
 288             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 289             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 290
 291         # hack method of keeping an eye on whether branch/trap set the PC
 292         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 293         self.state_nia.wen.name = 'state_nia_wen'
 294
 295         # pulse to synchronize the simulator at instruction end
 296         self.insn_done = Signal()
 297
 298         # indicate any instruction still outstanding, in execution
 299         self.any_busy = Signal()
 300
 301         if self.svp64_en:
 302             # store copies of predicate masks
 303             self.srcmask = Signal(64)
 304             self.dstmask = Signal(64)
 305
 306     def setup_peripherals(self, m):
 307         comb, sync = m.d.comb, m.d.sync
 308
 309         # okaaaay so the debug module must be in coresync clock domain
 310         # but NOT its reset signal. to cope with this, set every single
 311         # submodule explicitly in coresync domain, debug and JTAG
 312         # in their own one but using *external* reset.
 313         csd = DomainRenamer("coresync")
 314         dbd = DomainRenamer(self.dbg_domain)
 315
 316         m.submodules.core = core = csd(self.core)
 317         # this _so_ needs sorting out.  ICache is added down inside
 318         # LoadStore1 and is already a submodule of LoadStore1
 319         if not isinstance(self.imem, ICache):
 320             m.submodules.imem = imem = csd(self.imem)
 321         m.submodules.dbg = dbg = dbd(self.dbg)
 322         if self.jtag_en:
 323             m.submodules.jtag = jtag = dbd(self.jtag)
 324             # TODO: UART2GDB mux, here, from external pin
 325             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 326             sync += dbg.dmi.connect_to(jtag.dmi)
 327
 328         cur_state = self.cur_state
 329
 330         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 331         if self.sram4x4k:
 332             for i, sram in enumerate(self.sram4k):
 333                 m.submodules["sram4k_%d" % i] = csd(sram)
 334                 comb += sram.enable.eq(self.wb_sram_en)
 335
 336         # XICS interrupt handler
 337         if self.xics:
 338             m.submodules.xics_icp = icp = csd(self.xics_icp)
 339             m.submodules.xics_ics = ics = csd(self.xics_ics)
 340             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 341             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
 342
 343         # GPIO test peripheral
 344         if self.gpio:
 345             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 346
 347         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 348         # XXX causes litex ECP5 test to get wrong idea about input and output
 349         # (but works with verilator sim *sigh*)
 350         # if self.gpio and self.xics:
 351         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 352
 353         # instruction decoder
 354         pdecode = create_pdecode()
 355         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 356         if self.svp64_en:
 357             m.submodules.svp64 = svp64 = csd(self.svp64)
 358
 359         # convenience
 360         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 361         intrf = self.core.regs.rf['int']
 362
 363         # clock delay power-on reset
 364         cd_por = ClockDomain(reset_less=True)
 365         cd_sync = ClockDomain()
 366         core_sync = ClockDomain("coresync")
 367         m.domains += cd_por, cd_sync, core_sync
 368         if self.dbg_domain != "sync":
 369             dbg_sync = ClockDomain(self.dbg_domain)
 370             m.domains += dbg_sync
 371
 372         ti_rst = Signal(reset_less=True)
 373         delay = Signal(range(4), reset=3)
 374         with m.If(delay != 0):
 375             m.d.por += delay.eq(delay - 1)
 376         comb += cd_por.clk.eq(ClockSignal())
 377
 378         # power-on reset delay
 379         core_rst = ResetSignal("coresync")
 380         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 381         comb += core_rst.eq(ti_rst)
 382
 383         # debug clock is same as coresync, but reset is *main external*
 384         if self.dbg_domain != "sync":
 385             dbg_rst = ResetSignal(self.dbg_domain)
 386             comb += dbg_rst.eq(ResetSignal())
 387
 388         # busy/halted signals from core
 389         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
 390         comb += self.busy_o.eq(core_busy_o)
 391         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 392
 393         # temporary hack: says "go" immediately for both address gen and ST
 394         l0 = core.l0
 395         ldst = core.fus.fus['ldst0']
 396         st_go_edge = rising_edge(m, ldst.st.rel_o)
 397         # link addr-go direct to rel
 398         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
 399         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
 400
 401     def do_dmi(self, m, dbg):
 402         """deals with DMI debug requests
 403
 404         currently only provides read requests for the INT regfile, CR and XER
 405         it will later also deal with *writing* to these regfiles.
 406         """
 407         comb = m.d.comb
 408         sync = m.d.sync
 409         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 410         intrf = self.core.regs.rf['int']
 411
 412         with m.If(d_reg.req):  # request for regfile access being made
 413             # TODO: error-check this
 414             # XXX should this be combinatorial?  sync better?
 415             if intrf.unary:
 416                 comb += self.int_r.ren.eq(1 << d_reg.addr)
 417             else:
 418                 comb += self.int_r.addr.eq(d_reg.addr)
 419                 comb += self.int_r.ren.eq(1)
 420         d_reg_delay = Signal()
 421         sync += d_reg_delay.eq(d_reg.req)
 422         with m.If(d_reg_delay):
 423             # data arrives one clock later
 424             comb += d_reg.data.eq(self.int_r.o_data)
 425             comb += d_reg.ack.eq(1)
 426
 427         # sigh same thing for CR debug
 428         with m.If(d_cr.req):  # request for regfile access being made
 429             comb += self.cr_r.ren.eq(0b11111111)  # enable all
 430         d_cr_delay = Signal()
 431         sync += d_cr_delay.eq(d_cr.req)
 432         with m.If(d_cr_delay):
 433             # data arrives one clock later
 434             comb += d_cr.data.eq(self.cr_r.o_data)
 435             comb += d_cr.ack.eq(1)
 436
 437         # aaand XER...
 438         with m.If(d_xer.req):  # request for regfile access being made
 439             comb += self.xer_r.ren.eq(0b111111)  # enable all
 440         d_xer_delay = Signal()
 441         sync += d_xer_delay.eq(d_xer.req)
 442         with m.If(d_xer_delay):
 443             # data arrives one clock later
 444             comb += d_xer.data.eq(self.xer_r.o_data)
 445             comb += d_xer.ack.eq(1)
 446
 447     def tb_dec_fsm(self, m, spr_dec):
 448         """tb_dec_fsm
 449
 450         this is a FSM for updating either dec or tb.  it runs alternately
 451         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
 452         value to DEC, however the regfile has "passthrough" on it so this
 453         *should* be ok.
 454
 455         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
 456         """
 457
 458         comb, sync = m.d.comb, m.d.sync
 459         fast_rf = self.core.regs.rf['fast']
 460         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
 461         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
 462
 463         with m.FSM() as fsm:
 464
 465             # initiates read of current DEC
 466             with m.State("DEC_READ"):
 467                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
 468                 comb += fast_r_dectb.ren.eq(1)
 469                 m.next = "DEC_WRITE"
 470
 471             # waits for DEC read to arrive (1 cycle), updates with new value
 472             with m.State("DEC_WRITE"):
 473                 new_dec = Signal(64)
 474                 # TODO: MSR.LPCR 32-bit decrement mode
 475                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
 476                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
 477                 comb += fast_w_dectb.wen.eq(1)
 478                 comb += fast_w_dectb.i_data.eq(new_dec)
 479                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
 480                 m.next = "TB_READ"
 481
 482             # initiates read of current TB
 483             with m.State("TB_READ"):
 484                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
 485                 comb += fast_r_dectb.ren.eq(1)
 486                 m.next = "TB_WRITE"
 487
 488             # waits for read TB to arrive, initiates write of current TB
 489             with m.State("TB_WRITE"):
 490                 new_tb = Signal(64)
 491                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
 492                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
 493                 comb += fast_w_dectb.wen.eq(1)
 494                 comb += fast_w_dectb.i_data.eq(new_tb)
 495                 m.next = "DEC_READ"
 496
 497         return m
 498
 499     def elaborate(self, platform):
 500         m = Module()
 501         # convenience
 502         comb, sync = m.d.comb, m.d.sync
 503         cur_state = self.cur_state
 504         pdecode2 = self.pdecode2
 505         dbg = self.dbg
 506
 507         # set up peripherals and core
 508         core_rst = self.core_rst
 509         self.setup_peripherals(m)
 510
 511         # reset current state if core reset requested
 512         with m.If(core_rst):
 513             m.d.sync += self.cur_state.eq(0)
 514
 515         # PC and instruction from I-Memory
 516         comb += self.pc_o.eq(cur_state.pc)
 517         self.pc_changed = Signal()  # note write to PC
 518         self.msr_changed = Signal()  # note write to MSR
 519         self.sv_changed = Signal()  # note write to SVSTATE
 520
 521         # read state either from incoming override or from regfile
 522         state = CoreState("get")  # current state (MSR/PC/SVSTATE)
 523         state_get(m, state.msr, core_rst, self.msr_i,
 524                        "msr",                  # read MSR
 525                        self.state_r_msr, StateRegs.MSR)
 526         state_get(m, state.pc, core_rst, self.pc_i,
 527                        "pc",                  # read PC
 528                        self.state_r_pc, StateRegs.PC)
 529         state_get(m, state.svstate, core_rst, self.svstate_i,
 530                             "svstate",   # read SVSTATE
 531                             self.state_r_sv, StateRegs.SVSTATE)
 532
 533         # don't write pc every cycle
 534         comb += self.state_w_pc.wen.eq(0)
 535         comb += self.state_w_pc.i_data.eq(0)
 536
 537         # connect up debug state.  note "combinatorially same" below,
 538         # this is a bit naff, passing state over in the dbg class, but
 539         # because it is combinatorial it achieves the desired goal
 540         comb += dbg.state.eq(state)
 541
 542         # this bit doesn't have to be in the FSM: connect up to read
 543         # regfiles on demand from DMI
 544         self.do_dmi(m, dbg)
 545
 546         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
 547         # (which uses that in PowerDecoder2 to raise 0x900 exception)
 548         self.tb_dec_fsm(m, cur_state.dec)
 549
 550         # while stopped, allow updating the MSR, PC and SVSTATE.
 551         # these are mainly for debugging purposes (including DMI/JTAG)
 552         with m.If(dbg.core_stopped_i):
 553             with m.If(self.pc_i.ok):
 554                 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 555                 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 556                 sync += self.pc_changed.eq(1)
 557             with m.If(self.msr_i.ok):
 558                 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
 559                 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
 560                 sync += self.msr_changed.eq(1)
 561             with m.If(self.svstate_i.ok | self.update_svstate):
 562                 with m.If(self.svstate_i.ok): # over-ride from external source
 563                     comb += self.new_svstate.eq(self.svstate_i.data)
 564                 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 565                 comb += self.state_w_sv.i_data.eq(self.new_svstate)
 566                 sync += self.sv_changed.eq(1)
 567
 568         return m
 569
 570     def __iter__(self):
 571         yield from self.pc_i.ports()
 572         yield from self.msr_i.ports()
 573         yield self.pc_o
 574         yield self.memerr_o
 575         yield from self.core.ports()
 576         yield from self.imem.ports()
 577         yield self.core_bigendian_i
 578         yield self.busy_o
 579
 580     def ports(self):
 581         return list(self)
 582
 583     def external_ports(self):
 584         ports = self.pc_i.ports()
 585         ports = self.msr_i.ports()
 586         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
 587                   ]
 588
 589         if self.jtag_en:
 590             ports += list(self.jtag.external_ports())
 591         else:
 592             # don't add DMI if JTAG is enabled
 593             ports += list(self.dbg.dmi.ports())
 594
 595         ports += list(self.imem.ibus.fields.values())
 596         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
 597
 598         if self.sram4x4k:
 599             for sram in self.sram4k:
 600                 ports += list(sram.bus.fields.values())
 601
 602         if self.xics:
 603             ports += list(self.xics_icp.bus.fields.values())
 604             ports += list(self.xics_ics.bus.fields.values())
 605             ports.append(self.int_level_i)
 606
 607         if self.gpio:
 608             ports += list(self.simple_gpio.bus.fields.values())
 609             ports.append(self.gpio_o)
 610
 611         return ports
 612
 613     def ports(self):
 614         return list(self)
 615
 616
 617
 618 # Fetch Finite State Machine.
 619 # WARNING: there are currently DriverConflicts but it's actually working.
 620 # TODO, here: everything that is global in nature, information from the
 621 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 622 # not only that: TestIssuerInternal.imem can entirely move into here
 623 # because imem is only ever accessed inside the FetchFSM.
 624 class FetchFSM(ControlBase):
 625     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 626                  pdecode2, cur_state,
 627                  dbg, core, svstate, nia, is_svp64_mode):
 628         self.allow_overlap = allow_overlap
 629         self.svp64_en = svp64_en
 630         self.imem = imem
 631         self.core_rst = core_rst
 632         self.pdecode2 = pdecode2
 633         self.cur_state = cur_state
 634         self.dbg = dbg
 635         self.core = core
 636         self.svstate = svstate
 637         self.nia = nia
 638         self.is_svp64_mode = is_svp64_mode
 639
 640         # set up pipeline ControlBase and allocate i/o specs
 641         # (unusual: normally done by the Pipeline API)
 642         super().__init__(stage=self)
 643         self.p.i_data, self.n.o_data = self.new_specs(None)
 644         self.i, self.o = self.p.i_data, self.n.o_data
 645
 646     # next 3 functions are Stage API Compliance
 647     def setup(self, m, i):
 648         pass
 649
 650     def ispec(self):
 651         return FetchInput()
 652
 653     def ospec(self):
 654         return FetchOutput()
 655
 656     def elaborate(self, platform):
 657         """fetch FSM
 658
 659         this FSM performs fetch of raw instruction data, partial-decodes
 660         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 661         read a 2nd 32-bit quantity if that occurs.
 662         """
 663         m = super().elaborate(platform)
 664
 665         dbg = self.dbg
 666         core = self.core
 667         pc = self.i.pc
 668         msr = self.i.msr
 669         svstate = self.svstate
 670         nia = self.nia
 671         is_svp64_mode = self.is_svp64_mode
 672         fetch_pc_o_ready = self.p.o_ready
 673         fetch_pc_i_valid = self.p.i_valid
 674         fetch_insn_o_valid = self.n.o_valid
 675         fetch_insn_i_ready = self.n.i_ready
 676
 677         comb = m.d.comb
 678         sync = m.d.sync
 679         pdecode2 = self.pdecode2
 680         cur_state = self.cur_state
 681         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 682
 683         # also note instruction fetch failed
 684         if hasattr(core, "icache"):
 685             fetch_failed = core.icache.i_out.fetch_failed
 686             flush_needed = True
 687         else:
 688             fetch_failed = Const(0, 1)
 689             flush_needed = False
 690
 691         # set priv / virt mode on I-Cache, sigh
 692         if isinstance(self.imem, ICache):
 693             comb += self.imem.i_in.priv_mode.eq(~msr[MSR.PR])
 694             comb += self.imem.i_in.virt_mode.eq(msr[MSR.DR])
 695
 696         with m.FSM(name='fetch_fsm'):
 697
 698             # waiting (zzz)
 699             with m.State("IDLE"):
 700                 with m.If(~dbg.stopping_o & ~fetch_failed):
 701                     comb += fetch_pc_o_ready.eq(1)
 702                 with m.If(fetch_pc_i_valid & ~pdecode2.instr_fault):
 703                     # instruction allowed to go: start by reading the PC
 704                     # capture the PC and also drop it into Insn Memory
 705                     # we have joined a pair of combinatorial memory
 706                     # lookups together.  this is Generally Bad.
 707                     comb += self.imem.a_pc_i.eq(pc)
 708                     comb += self.imem.a_i_valid.eq(1)
 709                     comb += self.imem.f_i_valid.eq(1)
 710                     # transfer state to output
 711                     sync += cur_state.pc.eq(pc)
 712                     sync += cur_state.svstate.eq(svstate)  # and svstate
 713                     sync += cur_state.msr.eq(msr)  # and msr
 714
 715                     m.next = "INSN_READ"  # move to "wait for bus" phase
 716
 717             # dummy pause to find out why simulation is not keeping up
 718             with m.State("INSN_READ"):
 719                 if self.allow_overlap:
 720                     stopping = dbg.stopping_o
 721                 else:
 722                     stopping = Const(0)
 723                 with m.If(stopping):
 724                     # stopping: jump back to idle
 725                     m.next = "IDLE"
 726                 with m.Else():
 727                     with m.If(self.imem.f_busy_o &
 728                               ~pdecode2.instr_fault):  # zzz...
 729                         # busy but not fetch failed: stay in wait-read
 730                         comb += self.imem.a_i_valid.eq(1)
 731                         comb += self.imem.f_i_valid.eq(1)
 732                     with m.Else():
 733                         # not busy (or fetch failed!): instruction fetched
 734                         # when fetch failed, the instruction gets ignored
 735                         # by the decoder
 736                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 737                         if self.svp64_en:
 738                             svp64 = self.svp64
 739                             # decode the SVP64 prefix, if any
 740                             comb += svp64.raw_opcode_in.eq(insn)
 741                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 742                             # pass the decoded prefix (if any) to PowerDecoder2
 743                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 744                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 745                             # remember whether this is a prefixed instruction,
 746                             # so the FSM can readily loop when VL==0
 747                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 748                             # calculate the address of the following instruction
 749                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 750                             sync += nia.eq(cur_state.pc + insn_size)
 751                             with m.If(~svp64.is_svp64_mode):
 752                                 # with no prefix, store the instruction
 753                                 # and hand it directly to the next FSM
 754                                 sync += dec_opcode_o.eq(insn)
 755                                 m.next = "INSN_READY"
 756                             with m.Else():
 757                                 # fetch the rest of the instruction from memory
 758                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 759                                 comb += self.imem.a_i_valid.eq(1)
 760                                 comb += self.imem.f_i_valid.eq(1)
 761                                 m.next = "INSN_READ2"
 762                         else:
 763                             # not SVP64 - 32-bit only
 764                             sync += nia.eq(cur_state.pc + 4)
 765                             sync += dec_opcode_o.eq(insn)
 766                             m.next = "INSN_READY"
 767
 768             with m.State("INSN_READ2"):
 769                 with m.If(self.imem.f_busy_o):  # zzz...
 770                     # busy: stay in wait-read
 771                     comb += self.imem.a_i_valid.eq(1)
 772                     comb += self.imem.f_i_valid.eq(1)
 773                 with m.Else():
 774                     # not busy: instruction fetched
 775                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 776                     sync += dec_opcode_o.eq(insn)
 777                     m.next = "INSN_READY"
 778                     # TODO: probably can start looking at pdecode2.rm_dec
 779                     # here or maybe even in INSN_READ state, if svp64_mode
 780                     # detected, in order to trigger - and wait for - the
 781                     # predicate reading.
 782                     if self.svp64_en:
 783                         pmode = pdecode2.rm_dec.predmode
 784                     """
 785                     if pmode != SVP64PredMode.ALWAYS.value:
 786                         fire predicate loading FSM and wait before
 787                         moving to INSN_READY
 788                     else:
 789                         sync += self.srcmask.eq(-1) # set to all 1s
 790                         sync += self.dstmask.eq(-1) # set to all 1s
 791                         m.next = "INSN_READY"
 792                     """
 793
 794             with m.State("INSN_READY"):
 795                 # hand over the instruction, to be decoded
 796                 comb += fetch_insn_o_valid.eq(1)
 797                 with m.If(fetch_insn_i_ready):
 798                     m.next = "IDLE"
 799
 800         # whatever was done above, over-ride it if core reset is held
 801         with m.If(self.core_rst):
 802             sync += nia.eq(0)
 803
 804         return m
 805
 806
 807 class TestIssuerInternal(TestIssuerBase):
 808     """TestIssuer - reads instructions from TestMemory and issues them
 809
 810     efficiency and speed is not the main goal here: functional correctness
 811     and code clarity is.  optimisations (which almost 100% interfere with
 812     easy understanding) come later.
 813     """
 814
 815     def fetch_predicate_fsm(self, m,
 816                             pred_insn_i_valid, pred_insn_o_ready,
 817                             pred_mask_o_valid, pred_mask_i_ready):
 818         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 819            src/dest predicate masks
 820
 821         https://bugs.libre-soc.org/show_bug.cgi?id=617
 822         the predicates can be read here, by using IntRegs r_ports['pred']
 823         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 824         be done through multiple reads, extracting one relevant at a time.
 825         later, a faster way would be to use the 32-bit-wide CR port but
 826         this is more complex decoding, here.  equivalent code used in
 827         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 828
 829         note: this ENTIRE FSM is not to be called when svp64 is disabled
 830         """
 831         comb = m.d.comb
 832         sync = m.d.sync
 833         pdecode2 = self.pdecode2
 834         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 835         predmode = rm_dec.predmode
 836         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 837         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 838         # get src/dst step, so we can skip already used mask bits
 839         cur_state = self.cur_state
 840         srcstep = cur_state.svstate.srcstep
 841         dststep = cur_state.svstate.dststep
 842         cur_vl = cur_state.svstate.vl
 843
 844         # decode predicates
 845         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 846         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 847         sidx, scrinvert = get_predcr(m, srcpred, 's')
 848         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 849
 850         # store fetched masks, for either intpred or crpred
 851         # when src/dst step is not zero, the skipped mask bits need to be
 852         # shifted-out, before actually storing them in src/dest mask
 853         new_srcmask = Signal(64, reset_less=True)
 854         new_dstmask = Signal(64, reset_less=True)
 855
 856         with m.FSM(name="fetch_predicate"):
 857
 858             with m.State("FETCH_PRED_IDLE"):
 859                 comb += pred_insn_o_ready.eq(1)
 860                 with m.If(pred_insn_i_valid):
 861                     with m.If(predmode == SVP64PredMode.INT):
 862                         # skip fetching destination mask register, when zero
 863                         with m.If(dall1s):
 864                             sync += new_dstmask.eq(-1)
 865                             # directly go to fetch source mask register
 866                             # guaranteed not to be zero (otherwise predmode
 867                             # would be SVP64PredMode.ALWAYS, not INT)
 868                             comb += int_pred.addr.eq(sregread)
 869                             comb += int_pred.ren.eq(1)
 870                             m.next = "INT_SRC_READ"
 871                         # fetch destination predicate register
 872                         with m.Else():
 873                             comb += int_pred.addr.eq(dregread)
 874                             comb += int_pred.ren.eq(1)
 875                             m.next = "INT_DST_READ"
 876                     with m.Elif(predmode == SVP64PredMode.CR):
 877                         # go fetch masks from the CR register file
 878                         sync += new_srcmask.eq(0)
 879                         sync += new_dstmask.eq(0)
 880                         m.next = "CR_READ"
 881                     with m.Else():
 882                         sync += self.srcmask.eq(-1)
 883                         sync += self.dstmask.eq(-1)
 884                         m.next = "FETCH_PRED_DONE"
 885
 886             with m.State("INT_DST_READ"):
 887                 # store destination mask
 888                 inv = Repl(dinvert, 64)
 889                 with m.If(dunary):
 890                     # set selected mask bit for 1<<r3 mode
 891                     dst_shift = Signal(range(64))
 892                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 893                     sync += new_dstmask.eq(1 << dst_shift)
 894                 with m.Else():
 895                     # invert mask if requested
 896                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 897                 # skip fetching source mask register, when zero
 898                 with m.If(sall1s):
 899                     sync += new_srcmask.eq(-1)
 900                     m.next = "FETCH_PRED_SHIFT_MASK"
 901                 # fetch source predicate register
 902                 with m.Else():
 903                     comb += int_pred.addr.eq(sregread)
 904                     comb += int_pred.ren.eq(1)
 905                     m.next = "INT_SRC_READ"
 906
 907             with m.State("INT_SRC_READ"):
 908                 # store source mask
 909                 inv = Repl(sinvert, 64)
 910                 with m.If(sunary):
 911                     # set selected mask bit for 1<<r3 mode
 912                     src_shift = Signal(range(64))
 913                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 914                     sync += new_srcmask.eq(1 << src_shift)
 915                 with m.Else():
 916                     # invert mask if requested
 917                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 918                 m.next = "FETCH_PRED_SHIFT_MASK"
 919
 920             # fetch masks from the CR register file
 921             # implements the following loop:
 922             # idx, inv = get_predcr(mask)
 923             # mask = 0
 924             # for cr_idx in range(vl):
 925             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 926             #     if cr[idx] ^ inv:
 927             #         mask |= 1 << cr_idx
 928             # return mask
 929             with m.State("CR_READ"):
 930                 # CR index to be read, which will be ready by the next cycle
 931                 cr_idx = Signal.like(cur_vl, reset_less=True)
 932                 # submit the read operation to the regfile
 933                 with m.If(cr_idx != cur_vl):
 934                     # the CR read port is unary ...
 935                     # ren = 1 << cr_idx
 936                     # ... in MSB0 convention ...
 937                     # ren = 1 << (7 - cr_idx)
 938                     # ... and with an offset:
 939                     # ren = 1 << (7 - off - cr_idx)
 940                     idx = SVP64CROffs.CRPred + cr_idx
 941                     comb += cr_pred.ren.eq(1 << (7 - idx))
 942                     # signal data valid in the next cycle
 943                     cr_read = Signal(reset_less=True)
 944                     sync += cr_read.eq(1)
 945                     # load the next index
 946                     sync += cr_idx.eq(cr_idx + 1)
 947                 with m.Else():
 948                     # exit on loop end
 949                     sync += cr_read.eq(0)
 950                     sync += cr_idx.eq(0)
 951                     m.next = "FETCH_PRED_SHIFT_MASK"
 952                 with m.If(cr_read):
 953                     # compensate for the one cycle delay on the regfile
 954                     cur_cr_idx = Signal.like(cur_vl)
 955                     comb += cur_cr_idx.eq(cr_idx - 1)
 956                     # read the CR field, select the appropriate bit
 957                     cr_field = Signal(4)
 958                     scr_bit = Signal()
 959                     dcr_bit = Signal()
 960                     comb += cr_field.eq(cr_pred.o_data)
 961                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 962                                        ^ scrinvert)
 963                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 964                                        ^ dcrinvert)
 965                     # set the corresponding mask bit
 966                     bit_to_set = Signal.like(self.srcmask)
 967                     comb += bit_to_set.eq(1 << cur_cr_idx)
 968                     with m.If(scr_bit):
 969                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 970                     with m.If(dcr_bit):
 971                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 972
 973             with m.State("FETCH_PRED_SHIFT_MASK"):
 974                 # shift-out skipped mask bits
 975                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 976                 sync += self.dstmask.eq(new_dstmask >> dststep)
 977                 m.next = "FETCH_PRED_DONE"
 978
 979             with m.State("FETCH_PRED_DONE"):
 980                 comb += pred_mask_o_valid.eq(1)
 981                 with m.If(pred_mask_i_ready):
 982                     m.next = "FETCH_PRED_IDLE"
 983
 984     def issue_fsm(self, m, core, nia,
 985                   dbg, core_rst, is_svp64_mode,
 986                   fetch_pc_o_ready, fetch_pc_i_valid,
 987                   fetch_insn_o_valid, fetch_insn_i_ready,
 988                   pred_insn_i_valid, pred_insn_o_ready,
 989                   pred_mask_o_valid, pred_mask_i_ready,
 990                   exec_insn_i_valid, exec_insn_o_ready,
 991                   exec_pc_o_valid, exec_pc_i_ready):
 992         """issue FSM
 993
 994         decode / issue FSM.  this interacts with the "fetch" FSM
 995         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 996         (outgoing). also interacts with the "execute" FSM
 997         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 998         (incoming).
 999         SVP64 RM prefixes have already been set up by the
1000         "fetch" phase, so execute is fairly straightforward.
1001         """
1002
1003         comb = m.d.comb
1004         sync = m.d.sync
1005         pdecode2 = self.pdecode2
1006         cur_state = self.cur_state
1007         new_svstate = self.new_svstate
1008
1009         # temporaries
1010         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
1011
1012         # for updating svstate (things like srcstep etc.)
1013         comb += new_svstate.eq(cur_state.svstate)
1014
1015         # precalculate srcstep+1 and dststep+1
1016         cur_srcstep = cur_state.svstate.srcstep
1017         cur_dststep = cur_state.svstate.dststep
1018         next_srcstep = Signal.like(cur_srcstep)
1019         next_dststep = Signal.like(cur_dststep)
1020         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
1021         comb += next_dststep.eq(cur_state.svstate.dststep+1)
1022
1023         # note if an exception happened.  in a pipelined or OoO design
1024         # this needs to be accompanied by "shadowing" (or stalling)
1025         exc_happened = self.core.o.exc_happened
1026         # also note instruction fetch failed
1027         if hasattr(core, "icache"):
1028             fetch_failed = core.icache.i_out.fetch_failed
1029             flush_needed = True
1030             # set to fault in decoder
1031             # update (highest priority) instruction fault
1032             rising_fetch_failed = rising_edge(m, fetch_failed)
1033             with m.If(rising_fetch_failed):
1034                 sync += pdecode2.instr_fault.eq(1)
1035         else:
1036             fetch_failed = Const(0, 1)
1037             flush_needed = False
1038
1039         with m.FSM(name="issue_fsm"):
1040
1041             # sync with the "fetch" phase which is reading the instruction
1042             # at this point, there is no instruction running, that
1043             # could inadvertently update the PC.
1044             with m.State("ISSUE_START"):
1045                 # reset instruction fault
1046                 sync += pdecode2.instr_fault.eq(0)
1047                 # wait on "core stop" release, before next fetch
1048                 # need to do this here, in case we are in a VL==0 loop
1049                 with m.If(~dbg.core_stop_o & ~core_rst):
1050                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
1051                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
1052                         m.next = "INSN_WAIT"
1053                 with m.Else():
1054                     # tell core it's stopped, and acknowledge debug handshake
1055                     comb += dbg.core_stopped_i.eq(1)
1056                     # while stopped, allow updating SVSTATE
1057                     with m.If(self.svstate_i.ok):
1058                         comb += new_svstate.eq(self.svstate_i.data)
1059                         comb += self.update_svstate.eq(1)
1060                         sync += self.sv_changed.eq(1)
1061
1062             # wait for an instruction to arrive from Fetch
1063             with m.State("INSN_WAIT"):
1064                 if self.allow_overlap:
1065                     stopping = dbg.stopping_o
1066                 else:
1067                     stopping = Const(0)
1068                 with m.If(stopping):
1069                     # stopping: jump back to idle
1070                     m.next = "ISSUE_START"
1071                     if flush_needed:
1072                         # request the icache to stop asserting "failed"
1073                         comb += core.icache.flush_in.eq(1)
1074                     # stop instruction fault
1075                     sync += pdecode2.instr_fault.eq(0)
1076                 with m.Else():
1077                     comb += fetch_insn_i_ready.eq(1)
1078                     with m.If(fetch_insn_o_valid):
1079                         # loop into ISSUE_START if it's a SVP64 instruction
1080                         # and VL == 0.  this because VL==0 is a for-loop
1081                         # from 0 to 0 i.e. always, always a NOP.
1082                         cur_vl = cur_state.svstate.vl
1083                         with m.If(is_svp64_mode & (cur_vl == 0)):
1084                             # update the PC before fetching the next instruction
1085                             # since we are in a VL==0 loop, no instruction was
1086                             # executed that we could be overwriting
1087                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1088                             comb += self.state_w_pc.i_data.eq(nia)
1089                             comb += self.insn_done.eq(1)
1090                             m.next = "ISSUE_START"
1091                         with m.Else():
1092                             if self.svp64_en:
1093                                 m.next = "PRED_START"  # fetching predicate
1094                             else:
1095                                 m.next = "DECODE_SV"  # skip predication
1096
1097             with m.State("PRED_START"):
1098                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
1099                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
1100                     m.next = "MASK_WAIT"
1101
1102             with m.State("MASK_WAIT"):
1103                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
1104                 with m.If(pred_mask_o_valid):  # predication masks are ready
1105                     m.next = "PRED_SKIP"
1106
1107             # skip zeros in predicate
1108             with m.State("PRED_SKIP"):
1109                 with m.If(~is_svp64_mode):
1110                     m.next = "DECODE_SV"  # nothing to do
1111                 with m.Else():
1112                     if self.svp64_en:
1113                         pred_src_zero = pdecode2.rm_dec.pred_sz
1114                         pred_dst_zero = pdecode2.rm_dec.pred_dz
1115
1116                         # new srcstep, after skipping zeros
1117                         skip_srcstep = Signal.like(cur_srcstep)
1118                         # value to be added to the current srcstep
1119                         src_delta = Signal.like(cur_srcstep)
1120                         # add leading zeros to srcstep, if not in zero mode
1121                         with m.If(~pred_src_zero):
1122                             # priority encoder (count leading zeros)
1123                             # append guard bit, in case the mask is all zeros
1124                             pri_enc_src = PriorityEncoder(65)
1125                             m.submodules.pri_enc_src = pri_enc_src
1126                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
1127                                                          Const(1, 1)))
1128                             comb += src_delta.eq(pri_enc_src.o)
1129                         # apply delta to srcstep
1130                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
1131                         # shift-out all leading zeros from the mask
1132                         # plus the leading "one" bit
1133                         # TODO count leading zeros and shift-out the zero
1134                         #      bits, in the same step, in hardware
1135                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1136
1137                         # same as above, but for dststep
1138                         skip_dststep = Signal.like(cur_dststep)
1139                         dst_delta = Signal.like(cur_dststep)
1140                         with m.If(~pred_dst_zero):
1141                             pri_enc_dst = PriorityEncoder(65)
1142                             m.submodules.pri_enc_dst = pri_enc_dst
1143                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1144                                                          Const(1, 1)))
1145                             comb += dst_delta.eq(pri_enc_dst.o)
1146                         comb += skip_dststep.eq(cur_dststep + dst_delta)
1147                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1148
1149                         # TODO: initialize mask[VL]=1 to avoid passing past VL
1150                         with m.If((skip_srcstep >= cur_vl) |
1151                                   (skip_dststep >= cur_vl)):
1152                             # end of VL loop. Update PC and reset src/dst step
1153                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1154                             comb += self.state_w_pc.i_data.eq(nia)
1155                             comb += new_svstate.srcstep.eq(0)
1156                             comb += new_svstate.dststep.eq(0)
1157                             comb += self.update_svstate.eq(1)
1158                             # synchronize with the simulator
1159                             comb += self.insn_done.eq(1)
1160                             # go back to Issue
1161                             m.next = "ISSUE_START"
1162                         with m.Else():
1163                             # update new src/dst step
1164                             comb += new_svstate.srcstep.eq(skip_srcstep)
1165                             comb += new_svstate.dststep.eq(skip_dststep)
1166                             comb += self.update_svstate.eq(1)
1167                             # proceed to Decode
1168                             m.next = "DECODE_SV"
1169
1170                         # pass predicate mask bits through to satellite decoders
1171                         # TODO: for SIMD this will be *multiple* bits
1172                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1173                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1174
1175             # after src/dst step have been updated, we are ready
1176             # to decode the instruction
1177             with m.State("DECODE_SV"):
1178                 # decode the instruction
1179                 with m.If(~fetch_failed):
1180                     sync += pdecode2.instr_fault.eq(0)
1181                 sync += core.i.e.eq(pdecode2.e)
1182                 sync += core.i.state.eq(cur_state)
1183                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1184                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1185                 if self.svp64_en:
1186                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1187                     # set RA_OR_ZERO detection in satellite decoders
1188                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1189                     # and svp64 detection
1190                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1191                     # and svp64 bit-rev'd ldst mode
1192                     ldst_dec = pdecode2.use_svp64_ldst_dec
1193                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1194                 # after decoding, reset any previous exception condition,
1195                 # allowing it to be set again during the next execution
1196                 sync += pdecode2.ldst_exc.eq(0)
1197
1198                 m.next = "INSN_EXECUTE"  # move to "execute"
1199
1200             # handshake with execution FSM, move to "wait" once acknowledged
1201             with m.State("INSN_EXECUTE"):
1202                 comb += exec_insn_i_valid.eq(1)  # trigger execute
1203                 with m.If(exec_insn_o_ready):   # execute acknowledged us
1204                     m.next = "EXECUTE_WAIT"
1205
1206             with m.State("EXECUTE_WAIT"):
1207                 # wait on "core stop" release, at instruction end
1208                 # need to do this here, in case we are in a VL>1 loop
1209                 with m.If(~dbg.core_stop_o & ~core_rst):
1210                     comb += exec_pc_i_ready.eq(1)
1211                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1212                     # the exception info needs to be blatted into
1213                     # pdecode.ldst_exc, and the instruction "re-run".
1214                     # when ldst_exc.happened is set, the PowerDecoder2
1215                     # reacts very differently: it re-writes the instruction
1216                     # with a "trap" (calls PowerDecoder2.trap()) which
1217                     # will *overwrite* whatever was requested and jump the
1218                     # PC to the exception address, as well as alter MSR.
1219                     # nothing else needs to be done other than to note
1220                     # the change of PC and MSR (and, later, SVSTATE)
1221                     with m.If(exc_happened):
1222                         mmu = core.fus.get_exc("mmu0")
1223                         ldst = core.fus.get_exc("ldst0")
1224                         if mmu is not None:
1225                             with m.If(fetch_failed):
1226                                 # instruction fetch: exception is from MMU
1227                                 # reset instr_fault (highest priority)
1228                                 sync += pdecode2.ldst_exc.eq(mmu)
1229                                 sync += pdecode2.instr_fault.eq(0)
1230                                 if flush_needed:
1231                                     # request icache to stop asserting "failed"
1232                                     comb += core.icache.flush_in.eq(1)
1233                         with m.If(~fetch_failed):
1234                             # otherwise assume it was a LDST exception
1235                             sync += pdecode2.ldst_exc.eq(ldst)
1236
1237                     with m.If(exec_pc_o_valid):
1238
1239                         # was this the last loop iteration?
1240                         is_last = Signal()
1241                         cur_vl = cur_state.svstate.vl
1242                         comb += is_last.eq(next_srcstep == cur_vl)
1243
1244                         with m.If(pdecode2.instr_fault):
1245                             # reset instruction fault, try again
1246                             sync += pdecode2.instr_fault.eq(0)
1247                             m.next = "ISSUE_START"
1248
1249                         # return directly to Decode if Execute generated an
1250                         # exception.
1251                         with m.Elif(pdecode2.ldst_exc.happened):
1252                             m.next = "DECODE_SV"
1253
1254                         # if MSR, PC or SVSTATE were changed by the previous
1255                         # instruction, go directly back to Fetch, without
1256                         # updating either MSR PC or SVSTATE
1257                         with m.Elif(self.msr_changed | self.pc_changed |
1258                                     self.sv_changed):
1259                             m.next = "ISSUE_START"
1260
1261                         # also return to Fetch, when no output was a vector
1262                         # (regardless of SRCSTEP and VL), or when the last
1263                         # instruction was really the last one of the VL loop
1264                         with m.Elif((~pdecode2.loop_continue) | is_last):
1265                             # before going back to fetch, update the PC state
1266                             # register with the NIA.
1267                             # ok here we are not reading the branch unit.
1268                             # TODO: this just blithely overwrites whatever
1269                             #       pipeline updated the PC
1270                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1271                             comb += self.state_w_pc.i_data.eq(nia)
1272                             # reset SRCSTEP before returning to Fetch
1273                             if self.svp64_en:
1274                                 with m.If(pdecode2.loop_continue):
1275                                     comb += new_svstate.srcstep.eq(0)
1276                                     comb += new_svstate.dststep.eq(0)
1277                                     comb += self.update_svstate.eq(1)
1278                             else:
1279                                 comb += new_svstate.srcstep.eq(0)
1280                                 comb += new_svstate.dststep.eq(0)
1281                                 comb += self.update_svstate.eq(1)
1282                             m.next = "ISSUE_START"
1283
1284                         # returning to Execute? then, first update SRCSTEP
1285                         with m.Else():
1286                             comb += new_svstate.srcstep.eq(next_srcstep)
1287                             comb += new_svstate.dststep.eq(next_dststep)
1288                             comb += self.update_svstate.eq(1)
1289                             # return to mask skip loop
1290                             m.next = "PRED_SKIP"
1291
1292                 with m.Else():
1293                     comb += dbg.core_stopped_i.eq(1)
1294                     if flush_needed:
1295                         # request the icache to stop asserting "failed"
1296                         comb += core.icache.flush_in.eq(1)
1297                     # stop instruction fault
1298                     sync += pdecode2.instr_fault.eq(0)
1299
1300         # check if svstate needs updating: if so, write it to State Regfile
1301         with m.If(self.update_svstate):
1302             sync += cur_state.svstate.eq(self.new_svstate)  # for next clock
1303
1304     def execute_fsm(self, m, core,
1305                     exec_insn_i_valid, exec_insn_o_ready,
1306                     exec_pc_o_valid, exec_pc_i_ready):
1307         """execute FSM
1308
1309         execute FSM. this interacts with the "issue" FSM
1310         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1311         (outgoing). SVP64 RM prefixes have already been set up by the
1312         "issue" phase, so execute is fairly straightforward.
1313         """
1314
1315         comb = m.d.comb
1316         sync = m.d.sync
1317         pdecode2 = self.pdecode2
1318
1319         # temporaries
1320         core_busy_o = core.n.o_data.busy_o  # core is busy
1321         core_ivalid_i = core.p.i_valid              # instruction is valid
1322
1323         if hasattr(core, "icache"):
1324             fetch_failed = core.icache.i_out.fetch_failed
1325         else:
1326             fetch_failed = Const(0, 1)
1327
1328         with m.FSM(name="exec_fsm"):
1329
1330             # waiting for instruction bus (stays there until not busy)
1331             with m.State("INSN_START"):
1332                 comb += exec_insn_o_ready.eq(1)
1333                 with m.If(exec_insn_i_valid):
1334                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1335                     sync += self.sv_changed.eq(0)
1336                     sync += self.pc_changed.eq(0)
1337                     sync += self.msr_changed.eq(0)
1338                     with m.If(core.p.o_ready):  # only move if accepted
1339                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1340
1341             # instruction started: must wait till it finishes
1342             with m.State("INSN_ACTIVE"):
1343                 # note changes to MSR, PC and SVSTATE
1344                 # XXX oops, really must monitor *all* State Regfile write
1345                 # ports looking for changes!
1346                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1347                     sync += self.sv_changed.eq(1)
1348                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1349                     sync += self.msr_changed.eq(1)
1350                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1351                     sync += self.pc_changed.eq(1)
1352                 with m.If(~core_busy_o):  # instruction done!
1353                     comb += exec_pc_o_valid.eq(1)
1354                     with m.If(exec_pc_i_ready):
1355                         # when finished, indicate "done".
1356                         # however, if there was an exception, the instruction
1357                         # is *not* yet done.  this is an implementation
1358                         # detail: we choose to implement exceptions by
1359                         # taking the exception information from the LDST
1360                         # unit, putting that *back* into the PowerDecoder2,
1361                         # and *re-running the entire instruction*.
1362                         # if we erroneously indicate "done" here, it is as if
1363                         # there were *TWO* instructions:
1364                         # 1) the failed LDST 2) a TRAP.
1365                         with m.If(~pdecode2.ldst_exc.happened &
1366                                    ~pdecode2.instr_fault):
1367                             comb += self.insn_done.eq(1)
1368                         m.next = "INSN_START"  # back to fetch
1369
1370     def elaborate(self, platform):
1371         m = super().elaborate(platform)
1372         # convenience
1373         comb, sync = m.d.comb, m.d.sync
1374         cur_state = self.cur_state
1375         pdecode2 = self.pdecode2
1376         dbg = self.dbg
1377         core = self.core
1378
1379         # set up peripherals and core
1380         core_rst = self.core_rst
1381
1382         # indicate to outside world if any FU is still executing
1383         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1384
1385         # address of the next instruction, in the absence of a branch
1386         # depends on the instruction size
1387         nia = Signal(64)
1388
1389         # connect up debug signals
1390         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1391
1392         # pass the prefix mode from Fetch to Issue, so the latter can loop
1393         # on VL==0
1394         is_svp64_mode = Signal()
1395
1396         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1397         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1398         # these are the handshake signals between each
1399
1400         # fetch FSM can run as soon as the PC is valid
1401         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1402         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1403
1404         # fetch FSM hands over the instruction to be decoded / issued
1405         fetch_insn_o_valid = Signal()
1406         fetch_insn_i_ready = Signal()
1407
1408         # predicate fetch FSM decodes and fetches the predicate
1409         pred_insn_i_valid = Signal()
1410         pred_insn_o_ready = Signal()
1411
1412         # predicate fetch FSM delivers the masks
1413         pred_mask_o_valid = Signal()
1414         pred_mask_i_ready = Signal()
1415
1416         # issue FSM delivers the instruction to the be executed
1417         exec_insn_i_valid = Signal()
1418         exec_insn_o_ready = Signal()
1419
1420         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1421         exec_pc_o_valid = Signal()
1422         exec_pc_i_ready = Signal()
1423
1424         # the FSMs here are perhaps unusual in that they detect conditions
1425         # then "hold" information, combinatorially, for the core
1426         # (as opposed to using sync - which would be on a clock's delay)
1427         # this includes the actual opcode, valid flags and so on.
1428
1429         # Fetch, then predicate fetch, then Issue, then Execute.
1430         # Issue is where the VL for-loop # lives.  the ready/valid
1431         # signalling is used to communicate between the four.
1432
1433         # set up Fetch FSM
1434         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1435                          self.imem, core_rst, pdecode2, cur_state,
1436                          dbg, core,
1437                          dbg.state.svstate, # combinatorially same
1438                          nia, is_svp64_mode)
1439         m.submodules.fetch = fetch
1440         # connect up in/out data to existing Signals
1441         comb += fetch.p.i_data.pc.eq(dbg.state.pc)   # combinatorially same
1442         comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
1443         # and the ready/valid signalling
1444         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1445         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1446         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1447         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1448
1449         self.issue_fsm(m, core, nia,
1450                        dbg, core_rst, is_svp64_mode,
1451                        fetch_pc_o_ready, fetch_pc_i_valid,
1452                        fetch_insn_o_valid, fetch_insn_i_ready,
1453                        pred_insn_i_valid, pred_insn_o_ready,
1454                        pred_mask_o_valid, pred_mask_i_ready,
1455                        exec_insn_i_valid, exec_insn_o_ready,
1456                        exec_pc_o_valid, exec_pc_i_ready)
1457
1458         if self.svp64_en:
1459             self.fetch_predicate_fsm(m,
1460                                      pred_insn_i_valid, pred_insn_o_ready,
1461                                      pred_mask_o_valid, pred_mask_i_ready)
1462
1463         self.execute_fsm(m, core,
1464                          exec_insn_i_valid, exec_insn_o_ready,
1465                          exec_pc_o_valid, exec_pc_i_ready)
1466
1467         return m
1468
1469
1470 class TestIssuer(Elaboratable):
1471     def __init__(self, pspec):
1472         self.ti = TestIssuerInternal(pspec)
1473         # XXX TODO: make this a command-line selectable option from pspec
1474         #from soc.simple.inorder import TestIssuerInternalInOrder
1475         #self.ti = TestIssuerInternalInOrder(pspec)
1476         self.pll = DummyPLL(instance=True)
1477
1478         # PLL direct clock or not
1479         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1480         if self.pll_en:
1481             self.pll_test_o = Signal(reset_less=True)
1482             self.pll_vco_o = Signal(reset_less=True)
1483             self.clk_sel_i = Signal(2, reset_less=True)
1484             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1485             self.pllclk_clk = ClockSignal("pllclk")
1486
1487     def elaborate(self, platform):
1488         m = Module()
1489         comb = m.d.comb
1490
1491         # TestIssuer nominally runs at main clock, actually it is
1492         # all combinatorial internally except for coresync'd components
1493         m.submodules.ti = ti = self.ti
1494
1495         if self.pll_en:
1496             # ClockSelect runs at PLL output internal clock rate
1497             m.submodules.wrappll = pll = self.pll
1498
1499             # add clock domains from PLL
1500             cd_pll = ClockDomain("pllclk")
1501             m.domains += cd_pll
1502
1503             # PLL clock established.  has the side-effect of running clklsel
1504             # at the PLL's speed (see DomainRenamer("pllclk") above)
1505             pllclk = self.pllclk_clk
1506             comb += pllclk.eq(pll.clk_pll_o)
1507
1508             # wire up external 24mhz to PLL
1509             #comb += pll.clk_24_i.eq(self.ref_clk)
1510             # output 18 mhz PLL test signal, and analog oscillator out
1511             comb += self.pll_test_o.eq(pll.pll_test_o)
1512             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1513
1514             # input to pll clock selection
1515             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1516
1517             # now wire up ResetSignals.  don't mind them being in this domain
1518             pll_rst = ResetSignal("pllclk")
1519             comb += pll_rst.eq(ResetSignal())
1520
1521         # internal clock is set to selector clock-out.  has the side-effect of
1522         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1523         # debug clock runs at coresync internal clock
1524         cd_coresync = ClockDomain("coresync")
1525         #m.domains += cd_coresync
1526         if self.ti.dbg_domain != 'sync':
1527             cd_dbgsync = ClockDomain("dbgsync")
1528             #m.domains += cd_dbgsync
1529         intclk = ClockSignal("coresync")
1530         dbgclk = ClockSignal(self.ti.dbg_domain)
1531         # XXX BYPASS PLL XXX
1532         # XXX BYPASS PLL XXX
1533         # XXX BYPASS PLL XXX
1534         if self.pll_en:
1535             comb += intclk.eq(self.ref_clk)
1536         else:
1537             comb += intclk.eq(ClockSignal())
1538         if self.ti.dbg_domain != 'sync':
1539             dbgclk = ClockSignal(self.ti.dbg_domain)
1540             comb += dbgclk.eq(intclk)
1541
1542         return m
1543
1544     def ports(self):
1545         return list(self.ti.ports()) + list(self.pll.ports()) + \
1546             [ClockSignal(), ResetSignal()]
1547
1548     def external_ports(self):
1549         ports = self.ti.external_ports()
1550         ports.append(ClockSignal())
1551         ports.append(ResetSignal())
1552         if self.pll_en:
1553             ports.append(self.clk_sel_i)
1554             ports.append(self.pll.clk_24_i)
1555             ports.append(self.pll_test_o)
1556             ports.append(self.pll_vco_o)
1557             ports.append(self.pllclk_clk)
1558             ports.append(self.ref_clk)
1559         return ports
1560
1561
1562 if __name__ == '__main__':
1563     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1564              'spr': 1,
1565              'div': 1,
1566              'mul': 1,
1567              'shiftrot': 1
1568              }
1569     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1570                          imem_ifacetype='bare_wb',
1571                          addr_wid=48,
1572                          mask_wid=8,
1573                          reg_wid=64,
1574                          units=units)
1575     dut = TestIssuer(pspec)
1576     vl = main(dut, ports=dut.ports(), name="test_issuer")
1577
1578     if len(sys.argv) == 1:
1579         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1580         with open("test_issuer.il", "w") as f:
1581             f.write(vl)