src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the PC
  70     res = Signal(64, reset_less=True, name=name)
  71     res_ok_delay = Signal(name="%s_ok_delay" % name)
  72     with m.If(~core_rst):
  73         sync += res_ok_delay.eq(~state_i.ok)
  74         with m.If(state_i.ok):
  75             # incoming override (start from pc_i)
  76             comb += res.eq(state_i.data)
  77         with m.Else():
  78             # otherwise read StateRegs regfile for PC...
  79             comb += regfile.ren.eq(1 << regnum)
  80         # ... but on a 1-clock delay
  81         with m.If(res_ok_delay):
  82             comb += res.eq(regfile.o_data)
  83     return res
  84
  85
  86 def get_predint(m, mask, name):
  87     """decode SVP64 predicate integer mask field to reg number and invert
  88     this is identical to the equivalent function in ISACaller except that
  89     it doesn't read the INT directly, it just decodes "what needs to be done"
  90     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  91
  92     * all1s is set to indicate that no mask is to be applied.
  93     * regread indicates the GPR register number to be read
  94     * invert is set to indicate that the register value is to be inverted
  95     * unary indicates that the contents of the register is to be shifted 1<<r3
  96     """
  97     comb = m.d.comb
  98     regread = Signal(5, name=name+"regread")
  99     invert = Signal(name=name+"invert")
 100     unary = Signal(name=name+"unary")
 101     all1s = Signal(name=name+"all1s")
 102     with m.Switch(mask):
 103         with m.Case(SVP64PredInt.ALWAYS.value):
 104             comb += all1s.eq(1)      # use 0b1111 (all ones)
 105         with m.Case(SVP64PredInt.R3_UNARY.value):
 106             comb += regread.eq(3)
 107             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 108         with m.Case(SVP64PredInt.R3.value):
 109             comb += regread.eq(3)
 110         with m.Case(SVP64PredInt.R3_N.value):
 111             comb += regread.eq(3)
 112             comb += invert.eq(1)
 113         with m.Case(SVP64PredInt.R10.value):
 114             comb += regread.eq(10)
 115         with m.Case(SVP64PredInt.R10_N.value):
 116             comb += regread.eq(10)
 117             comb += invert.eq(1)
 118         with m.Case(SVP64PredInt.R30.value):
 119             comb += regread.eq(30)
 120         with m.Case(SVP64PredInt.R30_N.value):
 121             comb += regread.eq(30)
 122             comb += invert.eq(1)
 123     return regread, invert, unary, all1s
 124
 125
 126 def get_predcr(m, mask, name):
 127     """decode SVP64 predicate CR to reg number field and invert status
 128     this is identical to _get_predcr in ISACaller
 129     """
 130     comb = m.d.comb
 131     idx = Signal(2, name=name+"idx")
 132     invert = Signal(name=name+"crinvert")
 133     with m.Switch(mask):
 134         with m.Case(SVP64PredCR.LT.value):
 135             comb += idx.eq(CR.LT)
 136             comb += invert.eq(0)
 137         with m.Case(SVP64PredCR.GE.value):
 138             comb += idx.eq(CR.LT)
 139             comb += invert.eq(1)
 140         with m.Case(SVP64PredCR.GT.value):
 141             comb += idx.eq(CR.GT)
 142             comb += invert.eq(0)
 143         with m.Case(SVP64PredCR.LE.value):
 144             comb += idx.eq(CR.GT)
 145             comb += invert.eq(1)
 146         with m.Case(SVP64PredCR.EQ.value):
 147             comb += idx.eq(CR.EQ)
 148             comb += invert.eq(0)
 149         with m.Case(SVP64PredCR.NE.value):
 150             comb += idx.eq(CR.EQ)
 151             comb += invert.eq(1)
 152         with m.Case(SVP64PredCR.SO.value):
 153             comb += idx.eq(CR.SO)
 154             comb += invert.eq(0)
 155         with m.Case(SVP64PredCR.NS.value):
 156             comb += idx.eq(CR.SO)
 157             comb += invert.eq(1)
 158     return idx, invert
 159
 160
 161 # Fetch Finite State Machine.
 162 # WARNING: there are currently DriverConflicts but it's actually working.
 163 # TODO, here: everything that is global in nature, information from the
 164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 165 # not only that: TestIssuerInternal.imem can entirely move into here
 166 # because imem is only ever accessed inside the FetchFSM.
 167 class FetchFSM(ControlBase):
 168     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 169                  pdecode2, cur_state,
 170                  dbg, core, svstate, nia, is_svp64_mode):
 171         self.allow_overlap = allow_overlap
 172         self.svp64_en = svp64_en
 173         self.imem = imem
 174         self.core_rst = core_rst
 175         self.pdecode2 = pdecode2
 176         self.cur_state = cur_state
 177         self.dbg = dbg
 178         self.core = core
 179         self.svstate = svstate
 180         self.nia = nia
 181         self.is_svp64_mode = is_svp64_mode
 182
 183         # set up pipeline ControlBase and allocate i/o specs
 184         # (unusual: normally done by the Pipeline API)
 185         super().__init__(stage=self)
 186         self.p.i_data, self.n.o_data = self.new_specs(None)
 187         self.i, self.o = self.p.i_data, self.n.o_data
 188
 189     # next 3 functions are Stage API Compliance
 190     def setup(self, m, i):
 191         pass
 192
 193     def ispec(self):
 194         return FetchInput()
 195
 196     def ospec(self):
 197         return FetchOutput()
 198
 199     def elaborate(self, platform):
 200         """fetch FSM
 201
 202         this FSM performs fetch of raw instruction data, partial-decodes
 203         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 204         read a 2nd 32-bit quantity if that occurs.
 205         """
 206         m = super().elaborate(platform)
 207
 208         dbg = self.dbg
 209         core = self.core
 210         pc = self.i.pc
 211         svstate = self.svstate
 212         nia = self.nia
 213         is_svp64_mode = self.is_svp64_mode
 214         fetch_pc_o_ready = self.p.o_ready
 215         fetch_pc_i_valid = self.p.i_valid
 216         fetch_insn_o_valid = self.n.o_valid
 217         fetch_insn_i_ready = self.n.i_ready
 218
 219         comb = m.d.comb
 220         sync = m.d.sync
 221         pdecode2 = self.pdecode2
 222         cur_state = self.cur_state
 223         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 224
 225         msr_read = Signal(reset=1)
 226
 227         # also note instruction fetch failed
 228         if hasattr(core, "icache"):
 229             fetch_failed = core.icache.i_out.fetch_failed
 230             flush_needed = True
 231         else:
 232             fetch_failed = Const(0, 1)
 233             flush_needed = False
 234
 235         # don't read msr every cycle
 236         staterf = self.core.regs.rf['state']
 237         state_r_msr = staterf.r_ports['msr']  # MSR rd
 238
 239         comb += state_r_msr.ren.eq(0)
 240
 241         with m.FSM(name='fetch_fsm'):
 242
 243             # waiting (zzz)
 244             with m.State("IDLE"):
 245                 with m.If(~dbg.stopping_o & ~fetch_failed):
 246                     comb += fetch_pc_o_ready.eq(1)
 247                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 248                     # instruction allowed to go: start by reading the PC
 249                     # capture the PC and also drop it into Insn Memory
 250                     # we have joined a pair of combinatorial memory
 251                     # lookups together.  this is Generally Bad.
 252                     comb += self.imem.a_pc_i.eq(pc)
 253                     comb += self.imem.a_i_valid.eq(1)
 254                     comb += self.imem.f_i_valid.eq(1)
 255                     sync += cur_state.pc.eq(pc)
 256                     sync += cur_state.svstate.eq(svstate)  # and svstate
 257
 258                     # initiate read of MSR. arrives one clock later
 259                     comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
 260                     sync += msr_read.eq(0)
 261
 262                     m.next = "INSN_READ"  # move to "wait for bus" phase
 263
 264             # dummy pause to find out why simulation is not keeping up
 265             with m.State("INSN_READ"):
 266                 if self.allow_overlap:
 267                     stopping = dbg.stopping_o
 268                 else:
 269                     stopping = Const(0)
 270                 with m.If(stopping):
 271                     # stopping: jump back to idle
 272                     m.next = "IDLE"
 273                 with m.Else():
 274                     # one cycle later, msr/sv read arrives.  valid only once.
 275                     with m.If(~msr_read):
 276                         sync += msr_read.eq(1)  # yeah don't read it again
 277                         sync += cur_state.msr.eq(state_r_msr.o_data)
 278                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 279                         # busy but not fetch failed: stay in wait-read
 280                         comb += self.imem.a_i_valid.eq(1)
 281                         comb += self.imem.f_i_valid.eq(1)
 282                     with m.Else():
 283                         # not busy (or fetch failed!): instruction fetched
 284                         # when fetch failed, the instruction gets ignored
 285                         # by the decoder
 286                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 287                         if self.svp64_en:
 288                             svp64 = self.svp64
 289                             # decode the SVP64 prefix, if any
 290                             comb += svp64.raw_opcode_in.eq(insn)
 291                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 292                             # pass the decoded prefix (if any) to PowerDecoder2
 293                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 294                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 295                             # remember whether this is a prefixed instruction,
 296                             # so the FSM can readily loop when VL==0
 297                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 298                             # calculate the address of the following instruction
 299                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 300                             sync += nia.eq(cur_state.pc + insn_size)
 301                             with m.If(~svp64.is_svp64_mode):
 302                                 # with no prefix, store the instruction
 303                                 # and hand it directly to the next FSM
 304                                 sync += dec_opcode_o.eq(insn)
 305                                 m.next = "INSN_READY"
 306                             with m.Else():
 307                                 # fetch the rest of the instruction from memory
 308                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 309                                 comb += self.imem.a_i_valid.eq(1)
 310                                 comb += self.imem.f_i_valid.eq(1)
 311                                 m.next = "INSN_READ2"
 312                         else:
 313                             # not SVP64 - 32-bit only
 314                             sync += nia.eq(cur_state.pc + 4)
 315                             sync += dec_opcode_o.eq(insn)
 316                             m.next = "INSN_READY"
 317
 318             with m.State("INSN_READ2"):
 319                 with m.If(self.imem.f_busy_o):  # zzz...
 320                     # busy: stay in wait-read
 321                     comb += self.imem.a_i_valid.eq(1)
 322                     comb += self.imem.f_i_valid.eq(1)
 323                 with m.Else():
 324                     # not busy: instruction fetched
 325                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 326                     sync += dec_opcode_o.eq(insn)
 327                     m.next = "INSN_READY"
 328                     # TODO: probably can start looking at pdecode2.rm_dec
 329                     # here or maybe even in INSN_READ state, if svp64_mode
 330                     # detected, in order to trigger - and wait for - the
 331                     # predicate reading.
 332                     if self.svp64_en:
 333                         pmode = pdecode2.rm_dec.predmode
 334                     """
 335                     if pmode != SVP64PredMode.ALWAYS.value:
 336                         fire predicate loading FSM and wait before
 337                         moving to INSN_READY
 338                     else:
 339                         sync += self.srcmask.eq(-1) # set to all 1s
 340                         sync += self.dstmask.eq(-1) # set to all 1s
 341                         m.next = "INSN_READY"
 342                     """
 343
 344             with m.State("INSN_READY"):
 345                 # hand over the instruction, to be decoded
 346                 comb += fetch_insn_o_valid.eq(1)
 347                 with m.If(fetch_insn_i_ready):
 348                     m.next = "IDLE"
 349
 350         # whatever was done above, over-ride it if core reset is held
 351         with m.If(self.core_rst):
 352             sync += nia.eq(0)
 353
 354         return m
 355
 356
 357 class TestIssuerInternal(Elaboratable):
 358     """TestIssuer - reads instructions from TestMemory and issues them
 359
 360     efficiency and speed is not the main goal here: functional correctness
 361     and code clarity is.  optimisations (which almost 100% interfere with
 362     easy understanding) come later.
 363     """
 364
 365     def __init__(self, pspec):
 366
 367         # test is SVP64 is to be enabled
 368         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 369
 370         # and if regfiles are reduced
 371         self.regreduce_en = (hasattr(pspec, "regreduce") and
 372                              (pspec.regreduce == True))
 373
 374         # and if overlap requested
 375         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 376                               (pspec.allow_overlap == True))
 377
 378         # JTAG interface.  add this right at the start because if it's
 379         # added it *modifies* the pspec, by adding enable/disable signals
 380         # for parts of the rest of the core
 381         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 382         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 383         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 384         if self.jtag_en:
 385             # XXX MUST keep this up-to-date with litex, and
 386             # soc-cocotb-sim, and err.. all needs sorting out, argh
 387             subset = ['uart',
 388                       'mtwi',
 389                       'eint', 'gpio', 'mspi0',
 390                       # 'mspi1', - disabled for now
 391                       # 'pwm', 'sd0', - disabled for now
 392                       'sdr']
 393             self.jtag = JTAG(get_pinspecs(subset=subset),
 394                              domain=self.dbg_domain)
 395             # add signals to pspec to enable/disable icache and dcache
 396             # (or data and intstruction wishbone if icache/dcache not included)
 397             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 398             # TODO: do we actually care if these are not domain-synchronised?
 399             # honestly probably not.
 400             pspec.wb_icache_en = self.jtag.wb_icache_en
 401             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 402             self.wb_sram_en = self.jtag.wb_sram_en
 403         else:
 404             self.wb_sram_en = Const(1)
 405
 406         # add 4k sram blocks?
 407         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 408                          pspec.sram4x4kblock == True)
 409         if self.sram4x4k:
 410             self.sram4k = []
 411             for i in range(4):
 412                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 413                                                     # features={'err'}
 414                                                     ))
 415
 416         # add interrupt controller?
 417         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 418         if self.xics:
 419             self.xics_icp = XICS_ICP()
 420             self.xics_ics = XICS_ICS()
 421             self.int_level_i = self.xics_ics.int_level_i
 422
 423         # add GPIO peripheral?
 424         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 425         if self.gpio:
 426             self.simple_gpio = SimpleGPIO()
 427             self.gpio_o = self.simple_gpio.gpio_o
 428
 429         # main instruction core.  suitable for prototyping / demo only
 430         self.core = core = NonProductionCore(pspec)
 431         self.core_rst = ResetSignal("coresync")
 432
 433         # instruction decoder.  goes into Trap Record
 434         #pdecode = create_pdecode()
 435         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 436         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 437                                      opkls=IssuerDecode2ToOperand,
 438                                      svp64_en=self.svp64_en,
 439                                      regreduce_en=self.regreduce_en)
 440         pdecode = self.pdecode2.dec
 441
 442         if self.svp64_en:
 443             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 444
 445         # Test Instruction memory
 446         if hasattr(core, "icache"):
 447             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 448             # truly dreadful.  needs a huge reorg.
 449             pspec.icache = core.icache
 450         self.imem = ConfigFetchUnit(pspec).fu
 451
 452         # DMI interface
 453         self.dbg = CoreDebug()
 454
 455         # instruction go/monitor
 456         self.pc_o = Signal(64, reset_less=True)
 457         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 458         self.svstate_i = Data(64, "svstate_i")  # ditto
 459         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 460         self.busy_o = Signal(reset_less=True)
 461         self.memerr_o = Signal(reset_less=True)
 462
 463         # STATE regfile read /write ports for PC, MSR, SVSTATE
 464         staterf = self.core.regs.rf['state']
 465         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 466         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 467         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 468         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 469
 470         # DMI interface access
 471         intrf = self.core.regs.rf['int']
 472         crrf = self.core.regs.rf['cr']
 473         xerrf = self.core.regs.rf['xer']
 474         self.int_r = intrf.r_ports['dmi']  # INT read
 475         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 476         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 477
 478         if self.svp64_en:
 479             # for predication
 480             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 481             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 482
 483         # hack method of keeping an eye on whether branch/trap set the PC
 484         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 485         self.state_nia.wen.name = 'state_nia_wen'
 486
 487         # pulse to synchronize the simulator at instruction end
 488         self.insn_done = Signal()
 489
 490         # indicate any instruction still outstanding, in execution
 491         self.any_busy = Signal()
 492
 493         if self.svp64_en:
 494             # store copies of predicate masks
 495             self.srcmask = Signal(64)
 496             self.dstmask = Signal(64)
 497
 498     def fetch_predicate_fsm(self, m,
 499                             pred_insn_i_valid, pred_insn_o_ready,
 500                             pred_mask_o_valid, pred_mask_i_ready):
 501         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 502            src/dest predicate masks
 503
 504         https://bugs.libre-soc.org/show_bug.cgi?id=617
 505         the predicates can be read here, by using IntRegs r_ports['pred']
 506         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 507         be done through multiple reads, extracting one relevant at a time.
 508         later, a faster way would be to use the 32-bit-wide CR port but
 509         this is more complex decoding, here.  equivalent code used in
 510         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 511
 512         note: this ENTIRE FSM is not to be called when svp64 is disabled
 513         """
 514         comb = m.d.comb
 515         sync = m.d.sync
 516         pdecode2 = self.pdecode2
 517         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 518         predmode = rm_dec.predmode
 519         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 520         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 521         # get src/dst step, so we can skip already used mask bits
 522         cur_state = self.cur_state
 523         srcstep = cur_state.svstate.srcstep
 524         dststep = cur_state.svstate.dststep
 525         cur_vl = cur_state.svstate.vl
 526
 527         # decode predicates
 528         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 529         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 530         sidx, scrinvert = get_predcr(m, srcpred, 's')
 531         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 532
 533         # store fetched masks, for either intpred or crpred
 534         # when src/dst step is not zero, the skipped mask bits need to be
 535         # shifted-out, before actually storing them in src/dest mask
 536         new_srcmask = Signal(64, reset_less=True)
 537         new_dstmask = Signal(64, reset_less=True)
 538
 539         with m.FSM(name="fetch_predicate"):
 540
 541             with m.State("FETCH_PRED_IDLE"):
 542                 comb += pred_insn_o_ready.eq(1)
 543                 with m.If(pred_insn_i_valid):
 544                     with m.If(predmode == SVP64PredMode.INT):
 545                         # skip fetching destination mask register, when zero
 546                         with m.If(dall1s):
 547                             sync += new_dstmask.eq(-1)
 548                             # directly go to fetch source mask register
 549                             # guaranteed not to be zero (otherwise predmode
 550                             # would be SVP64PredMode.ALWAYS, not INT)
 551                             comb += int_pred.addr.eq(sregread)
 552                             comb += int_pred.ren.eq(1)
 553                             m.next = "INT_SRC_READ"
 554                         # fetch destination predicate register
 555                         with m.Else():
 556                             comb += int_pred.addr.eq(dregread)
 557                             comb += int_pred.ren.eq(1)
 558                             m.next = "INT_DST_READ"
 559                     with m.Elif(predmode == SVP64PredMode.CR):
 560                         # go fetch masks from the CR register file
 561                         sync += new_srcmask.eq(0)
 562                         sync += new_dstmask.eq(0)
 563                         m.next = "CR_READ"
 564                     with m.Else():
 565                         sync += self.srcmask.eq(-1)
 566                         sync += self.dstmask.eq(-1)
 567                         m.next = "FETCH_PRED_DONE"
 568
 569             with m.State("INT_DST_READ"):
 570                 # store destination mask
 571                 inv = Repl(dinvert, 64)
 572                 with m.If(dunary):
 573                     # set selected mask bit for 1<<r3 mode
 574                     dst_shift = Signal(range(64))
 575                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 576                     sync += new_dstmask.eq(1 << dst_shift)
 577                 with m.Else():
 578                     # invert mask if requested
 579                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 580                 # skip fetching source mask register, when zero
 581                 with m.If(sall1s):
 582                     sync += new_srcmask.eq(-1)
 583                     m.next = "FETCH_PRED_SHIFT_MASK"
 584                 # fetch source predicate register
 585                 with m.Else():
 586                     comb += int_pred.addr.eq(sregread)
 587                     comb += int_pred.ren.eq(1)
 588                     m.next = "INT_SRC_READ"
 589
 590             with m.State("INT_SRC_READ"):
 591                 # store source mask
 592                 inv = Repl(sinvert, 64)
 593                 with m.If(sunary):
 594                     # set selected mask bit for 1<<r3 mode
 595                     src_shift = Signal(range(64))
 596                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 597                     sync += new_srcmask.eq(1 << src_shift)
 598                 with m.Else():
 599                     # invert mask if requested
 600                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 601                 m.next = "FETCH_PRED_SHIFT_MASK"
 602
 603             # fetch masks from the CR register file
 604             # implements the following loop:
 605             # idx, inv = get_predcr(mask)
 606             # mask = 0
 607             # for cr_idx in range(vl):
 608             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 609             #     if cr[idx] ^ inv:
 610             #         mask |= 1 << cr_idx
 611             # return mask
 612             with m.State("CR_READ"):
 613                 # CR index to be read, which will be ready by the next cycle
 614                 cr_idx = Signal.like(cur_vl, reset_less=True)
 615                 # submit the read operation to the regfile
 616                 with m.If(cr_idx != cur_vl):
 617                     # the CR read port is unary ...
 618                     # ren = 1 << cr_idx
 619                     # ... in MSB0 convention ...
 620                     # ren = 1 << (7 - cr_idx)
 621                     # ... and with an offset:
 622                     # ren = 1 << (7 - off - cr_idx)
 623                     idx = SVP64CROffs.CRPred + cr_idx
 624                     comb += cr_pred.ren.eq(1 << (7 - idx))
 625                     # signal data valid in the next cycle
 626                     cr_read = Signal(reset_less=True)
 627                     sync += cr_read.eq(1)
 628                     # load the next index
 629                     sync += cr_idx.eq(cr_idx + 1)
 630                 with m.Else():
 631                     # exit on loop end
 632                     sync += cr_read.eq(0)
 633                     sync += cr_idx.eq(0)
 634                     m.next = "FETCH_PRED_SHIFT_MASK"
 635                 with m.If(cr_read):
 636                     # compensate for the one cycle delay on the regfile
 637                     cur_cr_idx = Signal.like(cur_vl)
 638                     comb += cur_cr_idx.eq(cr_idx - 1)
 639                     # read the CR field, select the appropriate bit
 640                     cr_field = Signal(4)
 641                     scr_bit = Signal()
 642                     dcr_bit = Signal()
 643                     comb += cr_field.eq(cr_pred.o_data)
 644                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 645                                        ^ scrinvert)
 646                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 647                                        ^ dcrinvert)
 648                     # set the corresponding mask bit
 649                     bit_to_set = Signal.like(self.srcmask)
 650                     comb += bit_to_set.eq(1 << cur_cr_idx)
 651                     with m.If(scr_bit):
 652                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 653                     with m.If(dcr_bit):
 654                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 655
 656             with m.State("FETCH_PRED_SHIFT_MASK"):
 657                 # shift-out skipped mask bits
 658                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 659                 sync += self.dstmask.eq(new_dstmask >> dststep)
 660                 m.next = "FETCH_PRED_DONE"
 661
 662             with m.State("FETCH_PRED_DONE"):
 663                 comb += pred_mask_o_valid.eq(1)
 664                 with m.If(pred_mask_i_ready):
 665                     m.next = "FETCH_PRED_IDLE"
 666
 667     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 668                   dbg, core_rst, is_svp64_mode,
 669                   fetch_pc_o_ready, fetch_pc_i_valid,
 670                   fetch_insn_o_valid, fetch_insn_i_ready,
 671                   pred_insn_i_valid, pred_insn_o_ready,
 672                   pred_mask_o_valid, pred_mask_i_ready,
 673                   exec_insn_i_valid, exec_insn_o_ready,
 674                   exec_pc_o_valid, exec_pc_i_ready):
 675         """issue FSM
 676
 677         decode / issue FSM.  this interacts with the "fetch" FSM
 678         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 679         (outgoing). also interacts with the "execute" FSM
 680         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 681         (incoming).
 682         SVP64 RM prefixes have already been set up by the
 683         "fetch" phase, so execute is fairly straightforward.
 684         """
 685
 686         comb = m.d.comb
 687         sync = m.d.sync
 688         pdecode2 = self.pdecode2
 689         cur_state = self.cur_state
 690
 691         # temporaries
 692         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 693
 694         # for updating svstate (things like srcstep etc.)
 695         update_svstate = Signal()  # set this (below) if updating
 696         new_svstate = SVSTATERec("new_svstate")
 697         comb += new_svstate.eq(cur_state.svstate)
 698
 699         # precalculate srcstep+1 and dststep+1
 700         cur_srcstep = cur_state.svstate.srcstep
 701         cur_dststep = cur_state.svstate.dststep
 702         next_srcstep = Signal.like(cur_srcstep)
 703         next_dststep = Signal.like(cur_dststep)
 704         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 705         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 706
 707         # note if an exception happened.  in a pipelined or OoO design
 708         # this needs to be accompanied by "shadowing" (or stalling)
 709         exc_happened = self.core.o.exc_happened
 710         # also note instruction fetch failed
 711         if hasattr(core, "icache"):
 712             fetch_failed = core.icache.i_out.fetch_failed
 713             flush_needed = True
 714         else:
 715             fetch_failed = Const(0, 1)
 716             flush_needed = False
 717         # set to fault in decoder
 718         # update (highest priority) instruction fault
 719         rising_fetch_failed = rising_edge(m, fetch_failed)
 720         with m.If(rising_fetch_failed):
 721             sync += pdecode2.instr_fault.eq(1)
 722
 723         with m.FSM(name="issue_fsm"):
 724
 725             # sync with the "fetch" phase which is reading the instruction
 726             # at this point, there is no instruction running, that
 727             # could inadvertently update the PC.
 728             with m.State("ISSUE_START"):
 729                 # reset instruction fault
 730                 sync += pdecode2.instr_fault.eq(0)
 731                 # wait on "core stop" release, before next fetch
 732                 # need to do this here, in case we are in a VL==0 loop
 733                 with m.If(~dbg.core_stop_o & ~core_rst):
 734                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 735                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 736                         m.next = "INSN_WAIT"
 737                 with m.Else():
 738                     # tell core it's stopped, and acknowledge debug handshake
 739                     comb += dbg.core_stopped_i.eq(1)
 740                     # while stopped, allow updating the PC and SVSTATE
 741                     with m.If(self.pc_i.ok):
 742                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 743                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 744                         sync += pc_changed.eq(1)
 745                     with m.If(self.svstate_i.ok):
 746                         comb += new_svstate.eq(self.svstate_i.data)
 747                         comb += update_svstate.eq(1)
 748                         sync += sv_changed.eq(1)
 749
 750             # wait for an instruction to arrive from Fetch
 751             with m.State("INSN_WAIT"):
 752                 if self.allow_overlap:
 753                     stopping = dbg.stopping_o
 754                 else:
 755                     stopping = Const(0)
 756                 with m.If(stopping):
 757                     # stopping: jump back to idle
 758                     m.next = "ISSUE_START"
 759                     if flush_needed:
 760                         # request the icache to stop asserting "failed"
 761                         comb += core.icache.flush_in.eq(1)
 762                     # stop instruction fault
 763                     sync += pdecode2.instr_fault.eq(0)
 764                 with m.Else():
 765                     comb += fetch_insn_i_ready.eq(1)
 766                     with m.If(fetch_insn_o_valid):
 767                         # loop into ISSUE_START if it's a SVP64 instruction
 768                         # and VL == 0.  this because VL==0 is a for-loop
 769                         # from 0 to 0 i.e. always, always a NOP.
 770                         cur_vl = cur_state.svstate.vl
 771                         with m.If(is_svp64_mode & (cur_vl == 0)):
 772                             # update the PC before fetching the next instruction
 773                             # since we are in a VL==0 loop, no instruction was
 774                             # executed that we could be overwriting
 775                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 776                             comb += self.state_w_pc.i_data.eq(nia)
 777                             comb += self.insn_done.eq(1)
 778                             m.next = "ISSUE_START"
 779                         with m.Else():
 780                             if self.svp64_en:
 781                                 m.next = "PRED_START"  # fetching predicate
 782                             else:
 783                                 m.next = "DECODE_SV"  # skip predication
 784
 785             with m.State("PRED_START"):
 786                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 787                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 788                     m.next = "MASK_WAIT"
 789
 790             with m.State("MASK_WAIT"):
 791                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
 792                 with m.If(pred_mask_o_valid):  # predication masks are ready
 793                     m.next = "PRED_SKIP"
 794
 795             # skip zeros in predicate
 796             with m.State("PRED_SKIP"):
 797                 with m.If(~is_svp64_mode):
 798                     m.next = "DECODE_SV"  # nothing to do
 799                 with m.Else():
 800                     if self.svp64_en:
 801                         pred_src_zero = pdecode2.rm_dec.pred_sz
 802                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 803
 804                         # new srcstep, after skipping zeros
 805                         skip_srcstep = Signal.like(cur_srcstep)
 806                         # value to be added to the current srcstep
 807                         src_delta = Signal.like(cur_srcstep)
 808                         # add leading zeros to srcstep, if not in zero mode
 809                         with m.If(~pred_src_zero):
 810                             # priority encoder (count leading zeros)
 811                             # append guard bit, in case the mask is all zeros
 812                             pri_enc_src = PriorityEncoder(65)
 813                             m.submodules.pri_enc_src = pri_enc_src
 814                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 815                                                          Const(1, 1)))
 816                             comb += src_delta.eq(pri_enc_src.o)
 817                         # apply delta to srcstep
 818                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 819                         # shift-out all leading zeros from the mask
 820                         # plus the leading "one" bit
 821                         # TODO count leading zeros and shift-out the zero
 822                         #      bits, in the same step, in hardware
 823                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 824
 825                         # same as above, but for dststep
 826                         skip_dststep = Signal.like(cur_dststep)
 827                         dst_delta = Signal.like(cur_dststep)
 828                         with m.If(~pred_dst_zero):
 829                             pri_enc_dst = PriorityEncoder(65)
 830                             m.submodules.pri_enc_dst = pri_enc_dst
 831                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 832                                                          Const(1, 1)))
 833                             comb += dst_delta.eq(pri_enc_dst.o)
 834                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 835                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 836
 837                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 838                         with m.If((skip_srcstep >= cur_vl) |
 839                                   (skip_dststep >= cur_vl)):
 840                             # end of VL loop. Update PC and reset src/dst step
 841                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 842                             comb += self.state_w_pc.i_data.eq(nia)
 843                             comb += new_svstate.srcstep.eq(0)
 844                             comb += new_svstate.dststep.eq(0)
 845                             comb += update_svstate.eq(1)
 846                             # synchronize with the simulator
 847                             comb += self.insn_done.eq(1)
 848                             # go back to Issue
 849                             m.next = "ISSUE_START"
 850                         with m.Else():
 851                             # update new src/dst step
 852                             comb += new_svstate.srcstep.eq(skip_srcstep)
 853                             comb += new_svstate.dststep.eq(skip_dststep)
 854                             comb += update_svstate.eq(1)
 855                             # proceed to Decode
 856                             m.next = "DECODE_SV"
 857
 858                         # pass predicate mask bits through to satellite decoders
 859                         # TODO: for SIMD this will be *multiple* bits
 860                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 861                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 862
 863             # after src/dst step have been updated, we are ready
 864             # to decode the instruction
 865             with m.State("DECODE_SV"):
 866                 # decode the instruction
 867                 with m.If(~fetch_failed):
 868                     sync += pdecode2.instr_fault.eq(0)
 869                 sync += core.i.e.eq(pdecode2.e)
 870                 sync += core.i.state.eq(cur_state)
 871                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 872                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 873                 if self.svp64_en:
 874                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 875                     # set RA_OR_ZERO detection in satellite decoders
 876                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 877                     # and svp64 detection
 878                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 879                     # and svp64 bit-rev'd ldst mode
 880                     ldst_dec = pdecode2.use_svp64_ldst_dec
 881                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 882                 # after decoding, reset any previous exception condition,
 883                 # allowing it to be set again during the next execution
 884                 sync += pdecode2.ldst_exc.eq(0)
 885
 886                 m.next = "INSN_EXECUTE"  # move to "execute"
 887
 888             # handshake with execution FSM, move to "wait" once acknowledged
 889             with m.State("INSN_EXECUTE"):
 890                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 891                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 892                     m.next = "EXECUTE_WAIT"
 893
 894             with m.State("EXECUTE_WAIT"):
 895                 # wait on "core stop" release, at instruction end
 896                 # need to do this here, in case we are in a VL>1 loop
 897                 with m.If(~dbg.core_stop_o & ~core_rst):
 898                     comb += exec_pc_i_ready.eq(1)
 899                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 900                     # the exception info needs to be blatted into
 901                     # pdecode.ldst_exc, and the instruction "re-run".
 902                     # when ldst_exc.happened is set, the PowerDecoder2
 903                     # reacts very differently: it re-writes the instruction
 904                     # with a "trap" (calls PowerDecoder2.trap()) which
 905                     # will *overwrite* whatever was requested and jump the
 906                     # PC to the exception address, as well as alter MSR.
 907                     # nothing else needs to be done other than to note
 908                     # the change of PC and MSR (and, later, SVSTATE)
 909                     with m.If(exc_happened):
 910                         mmu = core.fus.get_exc("mmu0")
 911                         ldst = core.fus.get_exc("ldst0")
 912                         with m.If(fetch_failed):
 913                             # instruction fetch: exception is from MMU
 914                             # reset instr_fault (highest priority)
 915                             sync += pdecode2.ldst_exc.eq(mmu)
 916                             sync += pdecode2.instr_fault.eq(0)
 917                             if flush_needed:
 918                                 # request the icache to stop asserting "failed"
 919                                 comb += core.icache.flush_in.eq(1)
 920                         with m.Else():
 921                             # otherwise assume it was a LDST exception
 922                             sync += pdecode2.ldst_exc.eq(ldst)
 923
 924                     with m.If(exec_pc_o_valid):
 925
 926                         # was this the last loop iteration?
 927                         is_last = Signal()
 928                         cur_vl = cur_state.svstate.vl
 929                         comb += is_last.eq(next_srcstep == cur_vl)
 930
 931                         # return directly to Decode if Execute generated an
 932                         # exception.
 933                         with m.If(pdecode2.ldst_exc.happened):
 934                             m.next = "DECODE_SV"
 935
 936                         # if either PC or SVSTATE were changed by the previous
 937                         # instruction, go directly back to Fetch, without
 938                         # updating either PC or SVSTATE
 939                         with m.Elif(pc_changed | sv_changed):
 940                             m.next = "ISSUE_START"
 941
 942                         # also return to Fetch, when no output was a vector
 943                         # (regardless of SRCSTEP and VL), or when the last
 944                         # instruction was really the last one of the VL loop
 945                         with m.Elif((~pdecode2.loop_continue) | is_last):
 946                             # before going back to fetch, update the PC state
 947                             # register with the NIA.
 948                             # ok here we are not reading the branch unit.
 949                             # TODO: this just blithely overwrites whatever
 950                             #       pipeline updated the PC
 951                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 952                             comb += self.state_w_pc.i_data.eq(nia)
 953                             # reset SRCSTEP before returning to Fetch
 954                             if self.svp64_en:
 955                                 with m.If(pdecode2.loop_continue):
 956                                     comb += new_svstate.srcstep.eq(0)
 957                                     comb += new_svstate.dststep.eq(0)
 958                                     comb += update_svstate.eq(1)
 959                             else:
 960                                 comb += new_svstate.srcstep.eq(0)
 961                                 comb += new_svstate.dststep.eq(0)
 962                                 comb += update_svstate.eq(1)
 963                             m.next = "ISSUE_START"
 964
 965                         # returning to Execute? then, first update SRCSTEP
 966                         with m.Else():
 967                             comb += new_svstate.srcstep.eq(next_srcstep)
 968                             comb += new_svstate.dststep.eq(next_dststep)
 969                             comb += update_svstate.eq(1)
 970                             # return to mask skip loop
 971                             m.next = "PRED_SKIP"
 972
 973                 with m.Else():
 974                     comb += dbg.core_stopped_i.eq(1)
 975                     if flush_needed:
 976                         # request the icache to stop asserting "failed"
 977                         comb += core.icache.flush_in.eq(1)
 978                     # stop instruction fault
 979                     sync += pdecode2.instr_fault.eq(0)
 980                     if flush_needed:
 981                         # request the icache to stop asserting "failed"
 982                         comb += core.icache.flush_in.eq(1)
 983                     # stop instruction fault
 984                     sync += pdecode2.instr_fault.eq(0)
 985                     # while stopped, allow updating the PC and SVSTATE
 986                     with m.If(self.pc_i.ok):
 987                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 988                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 989                         sync += pc_changed.eq(1)
 990                     with m.If(self.svstate_i.ok):
 991                         comb += new_svstate.eq(self.svstate_i.data)
 992                         comb += update_svstate.eq(1)
 993                         sync += sv_changed.eq(1)
 994
 995         # check if svstate needs updating: if so, write it to State Regfile
 996         with m.If(update_svstate):
 997             comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 998             comb += self.state_w_sv.i_data.eq(new_svstate)
 999             sync += cur_state.svstate.eq(new_svstate)  # for next clock
1000
1001     def execute_fsm(self, m, core, pc_changed, sv_changed,
1002                     exec_insn_i_valid, exec_insn_o_ready,
1003                     exec_pc_o_valid, exec_pc_i_ready):
1004         """execute FSM
1005
1006         execute FSM. this interacts with the "issue" FSM
1007         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1008         (outgoing). SVP64 RM prefixes have already been set up by the
1009         "issue" phase, so execute is fairly straightforward.
1010         """
1011
1012         comb = m.d.comb
1013         sync = m.d.sync
1014         pdecode2 = self.pdecode2
1015
1016         # temporaries
1017         core_busy_o = core.n.o_data.busy_o  # core is busy
1018         core_ivalid_i = core.p.i_valid              # instruction is valid
1019
1020         if hasattr(core, "icache"):
1021             fetch_failed = core.icache.i_out.fetch_failed
1022         else:
1023             fetch_failed = Const(0, 1)
1024
1025         with m.FSM(name="exec_fsm"):
1026
1027             # waiting for instruction bus (stays there until not busy)
1028             with m.State("INSN_START"):
1029                 comb += exec_insn_o_ready.eq(1)
1030                 with m.If(exec_insn_i_valid):
1031                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1032                     sync += sv_changed.eq(0)
1033                     sync += pc_changed.eq(0)
1034                     with m.If(core.p.o_ready):  # only move if accepted
1035                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1036
1037             # instruction started: must wait till it finishes
1038             with m.State("INSN_ACTIVE"):
1039                 # note changes to PC and SVSTATE
1040                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1041                     sync += sv_changed.eq(1)
1042                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1043                     sync += pc_changed.eq(1)
1044                 with m.If(~core_busy_o):  # instruction done!
1045                     comb += exec_pc_o_valid.eq(1)
1046                     with m.If(exec_pc_i_ready):
1047                         # when finished, indicate "done".
1048                         # however, if there was an exception, the instruction
1049                         # is *not* yet done.  this is an implementation
1050                         # detail: we choose to implement exceptions by
1051                         # taking the exception information from the LDST
1052                         # unit, putting that *back* into the PowerDecoder2,
1053                         # and *re-running the entire instruction*.
1054                         # if we erroneously indicate "done" here, it is as if
1055                         # there were *TWO* instructions:
1056                         # 1) the failed LDST 2) a TRAP.
1057                         with m.If(~pdecode2.ldst_exc.happened &
1058                                   ~fetch_failed):
1059                             comb += self.insn_done.eq(1)
1060                         m.next = "INSN_START"  # back to fetch
1061
1062     def setup_peripherals(self, m):
1063         comb, sync = m.d.comb, m.d.sync
1064
1065         # okaaaay so the debug module must be in coresync clock domain
1066         # but NOT its reset signal. to cope with this, set every single
1067         # submodule explicitly in coresync domain, debug and JTAG
1068         # in their own one but using *external* reset.
1069         csd = DomainRenamer("coresync")
1070         dbd = DomainRenamer(self.dbg_domain)
1071
1072         m.submodules.core = core = csd(self.core)
1073         # this _so_ needs sorting out.  ICache is added down inside
1074         # LoadStore1 and is already a submodule of LoadStore1
1075         if not isinstance(self.imem, ICache):
1076             m.submodules.imem = imem = csd(self.imem)
1077         m.submodules.dbg = dbg = dbd(self.dbg)
1078         if self.jtag_en:
1079             m.submodules.jtag = jtag = dbd(self.jtag)
1080             # TODO: UART2GDB mux, here, from external pin
1081             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1082             sync += dbg.dmi.connect_to(jtag.dmi)
1083
1084         cur_state = self.cur_state
1085
1086         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
1087         if self.sram4x4k:
1088             for i, sram in enumerate(self.sram4k):
1089                 m.submodules["sram4k_%d" % i] = csd(sram)
1090                 comb += sram.enable.eq(self.wb_sram_en)
1091
1092         # XICS interrupt handler
1093         if self.xics:
1094             m.submodules.xics_icp = icp = csd(self.xics_icp)
1095             m.submodules.xics_ics = ics = csd(self.xics_ics)
1096             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
1097             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
1098
1099         # GPIO test peripheral
1100         if self.gpio:
1101             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1102
1103         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1104         # XXX causes litex ECP5 test to get wrong idea about input and output
1105         # (but works with verilator sim *sigh*)
1106         # if self.gpio and self.xics:
1107         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1108
1109         # instruction decoder
1110         pdecode = create_pdecode()
1111         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1112         if self.svp64_en:
1113             m.submodules.svp64 = svp64 = csd(self.svp64)
1114
1115         # convenience
1116         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1117         intrf = self.core.regs.rf['int']
1118
1119         # clock delay power-on reset
1120         cd_por = ClockDomain(reset_less=True)
1121         cd_sync = ClockDomain()
1122         core_sync = ClockDomain("coresync")
1123         m.domains += cd_por, cd_sync, core_sync
1124         if self.dbg_domain != "sync":
1125             dbg_sync = ClockDomain(self.dbg_domain)
1126             m.domains += dbg_sync
1127
1128         ti_rst = Signal(reset_less=True)
1129         delay = Signal(range(4), reset=3)
1130         with m.If(delay != 0):
1131             m.d.por += delay.eq(delay - 1)
1132         comb += cd_por.clk.eq(ClockSignal())
1133
1134         # power-on reset delay
1135         core_rst = ResetSignal("coresync")
1136         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1137         comb += core_rst.eq(ti_rst)
1138
1139         # debug clock is same as coresync, but reset is *main external*
1140         if self.dbg_domain != "sync":
1141             dbg_rst = ResetSignal(self.dbg_domain)
1142             comb += dbg_rst.eq(ResetSignal())
1143
1144         # busy/halted signals from core
1145         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
1146         comb += self.busy_o.eq(core_busy_o)
1147         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1148
1149         # temporary hack: says "go" immediately for both address gen and ST
1150         l0 = core.l0
1151         ldst = core.fus.fus['ldst0']
1152         st_go_edge = rising_edge(m, ldst.st.rel_o)
1153         # link addr-go direct to rel
1154         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
1155         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
1156
1157     def elaborate(self, platform):
1158         m = Module()
1159         # convenience
1160         comb, sync = m.d.comb, m.d.sync
1161         cur_state = self.cur_state
1162         pdecode2 = self.pdecode2
1163         dbg = self.dbg
1164         core = self.core
1165
1166         # set up peripherals and core
1167         core_rst = self.core_rst
1168         self.setup_peripherals(m)
1169
1170         # reset current state if core reset requested
1171         with m.If(core_rst):
1172             m.d.sync += self.cur_state.eq(0)
1173
1174         # PC and instruction from I-Memory
1175         comb += self.pc_o.eq(cur_state.pc)
1176         pc_changed = Signal()  # note write to PC
1177         sv_changed = Signal()  # note write to SVSTATE
1178
1179         # indicate to outside world if any FU is still executing
1180         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1181
1182         # read state either from incoming override or from regfile
1183         # TODO: really should be doing MSR in the same way
1184         pc = state_get(m, core_rst, self.pc_i,
1185                        "pc",                  # read PC
1186                        self.state_r_pc, StateRegs.PC)
1187         svstate = state_get(m, core_rst, self.svstate_i,
1188                             "svstate",   # read SVSTATE
1189                             self.state_r_sv, StateRegs.SVSTATE)
1190
1191         # don't write pc every cycle
1192         comb += self.state_w_pc.wen.eq(0)
1193         comb += self.state_w_pc.i_data.eq(0)
1194
1195         # address of the next instruction, in the absence of a branch
1196         # depends on the instruction size
1197         nia = Signal(64)
1198
1199         # connect up debug signals
1200         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1201         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1202         comb += dbg.state.pc.eq(pc)
1203         comb += dbg.state.svstate.eq(svstate)
1204         comb += dbg.state.msr.eq(cur_state.msr)
1205
1206         # pass the prefix mode from Fetch to Issue, so the latter can loop
1207         # on VL==0
1208         is_svp64_mode = Signal()
1209
1210         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1211         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1212         # these are the handshake signals between each
1213
1214         # fetch FSM can run as soon as the PC is valid
1215         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1216         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1217
1218         # fetch FSM hands over the instruction to be decoded / issued
1219         fetch_insn_o_valid = Signal()
1220         fetch_insn_i_ready = Signal()
1221
1222         # predicate fetch FSM decodes and fetches the predicate
1223         pred_insn_i_valid = Signal()
1224         pred_insn_o_ready = Signal()
1225
1226         # predicate fetch FSM delivers the masks
1227         pred_mask_o_valid = Signal()
1228         pred_mask_i_ready = Signal()
1229
1230         # issue FSM delivers the instruction to the be executed
1231         exec_insn_i_valid = Signal()
1232         exec_insn_o_ready = Signal()
1233
1234         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1235         exec_pc_o_valid = Signal()
1236         exec_pc_i_ready = Signal()
1237
1238         # the FSMs here are perhaps unusual in that they detect conditions
1239         # then "hold" information, combinatorially, for the core
1240         # (as opposed to using sync - which would be on a clock's delay)
1241         # this includes the actual opcode, valid flags and so on.
1242
1243         # Fetch, then predicate fetch, then Issue, then Execute.
1244         # Issue is where the VL for-loop # lives.  the ready/valid
1245         # signalling is used to communicate between the four.
1246
1247         # set up Fetch FSM
1248         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1249                          self.imem, core_rst, pdecode2, cur_state,
1250                          dbg, core, svstate, nia, is_svp64_mode)
1251         m.submodules.fetch = fetch
1252         # connect up in/out data to existing Signals
1253         comb += fetch.p.i_data.pc.eq(pc)
1254         # and the ready/valid signalling
1255         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1256         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1257         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1258         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1259
1260         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1261                        dbg, core_rst, is_svp64_mode,
1262                        fetch_pc_o_ready, fetch_pc_i_valid,
1263                        fetch_insn_o_valid, fetch_insn_i_ready,
1264                        pred_insn_i_valid, pred_insn_o_ready,
1265                        pred_mask_o_valid, pred_mask_i_ready,
1266                        exec_insn_i_valid, exec_insn_o_ready,
1267                        exec_pc_o_valid, exec_pc_i_ready)
1268
1269         if self.svp64_en:
1270             self.fetch_predicate_fsm(m,
1271                                      pred_insn_i_valid, pred_insn_o_ready,
1272                                      pred_mask_o_valid, pred_mask_i_ready)
1273
1274         self.execute_fsm(m, core, pc_changed, sv_changed,
1275                          exec_insn_i_valid, exec_insn_o_ready,
1276                          exec_pc_o_valid, exec_pc_i_ready)
1277
1278         # this bit doesn't have to be in the FSM: connect up to read
1279         # regfiles on demand from DMI
1280         self.do_dmi(m, dbg)
1281
1282         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1283         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1284         self.tb_dec_fsm(m, cur_state.dec)
1285
1286         return m
1287
1288     def do_dmi(self, m, dbg):
1289         """deals with DMI debug requests
1290
1291         currently only provides read requests for the INT regfile, CR and XER
1292         it will later also deal with *writing* to these regfiles.
1293         """
1294         comb = m.d.comb
1295         sync = m.d.sync
1296         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1297         intrf = self.core.regs.rf['int']
1298
1299         with m.If(d_reg.req):  # request for regfile access being made
1300             # TODO: error-check this
1301             # XXX should this be combinatorial?  sync better?
1302             if intrf.unary:
1303                 comb += self.int_r.ren.eq(1 << d_reg.addr)
1304             else:
1305                 comb += self.int_r.addr.eq(d_reg.addr)
1306                 comb += self.int_r.ren.eq(1)
1307         d_reg_delay = Signal()
1308         sync += d_reg_delay.eq(d_reg.req)
1309         with m.If(d_reg_delay):
1310             # data arrives one clock later
1311             comb += d_reg.data.eq(self.int_r.o_data)
1312             comb += d_reg.ack.eq(1)
1313
1314         # sigh same thing for CR debug
1315         with m.If(d_cr.req):  # request for regfile access being made
1316             comb += self.cr_r.ren.eq(0b11111111)  # enable all
1317         d_cr_delay = Signal()
1318         sync += d_cr_delay.eq(d_cr.req)
1319         with m.If(d_cr_delay):
1320             # data arrives one clock later
1321             comb += d_cr.data.eq(self.cr_r.o_data)
1322             comb += d_cr.ack.eq(1)
1323
1324         # aaand XER...
1325         with m.If(d_xer.req):  # request for regfile access being made
1326             comb += self.xer_r.ren.eq(0b111111)  # enable all
1327         d_xer_delay = Signal()
1328         sync += d_xer_delay.eq(d_xer.req)
1329         with m.If(d_xer_delay):
1330             # data arrives one clock later
1331             comb += d_xer.data.eq(self.xer_r.o_data)
1332             comb += d_xer.ack.eq(1)
1333
1334     def tb_dec_fsm(self, m, spr_dec):
1335         """tb_dec_fsm
1336
1337         this is a FSM for updating either dec or tb.  it runs alternately
1338         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1339         value to DEC, however the regfile has "passthrough" on it so this
1340         *should* be ok.
1341
1342         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1343         """
1344
1345         comb, sync = m.d.comb, m.d.sync
1346         fast_rf = self.core.regs.rf['fast']
1347         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
1348         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
1349
1350         with m.FSM() as fsm:
1351
1352             # initiates read of current DEC
1353             with m.State("DEC_READ"):
1354                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1355                 comb += fast_r_dectb.ren.eq(1)
1356                 m.next = "DEC_WRITE"
1357
1358             # waits for DEC read to arrive (1 cycle), updates with new value
1359             with m.State("DEC_WRITE"):
1360                 new_dec = Signal(64)
1361                 # TODO: MSR.LPCR 32-bit decrement mode
1362                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1363                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1364                 comb += fast_w_dectb.wen.eq(1)
1365                 comb += fast_w_dectb.i_data.eq(new_dec)
1366                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
1367                 m.next = "TB_READ"
1368
1369             # initiates read of current TB
1370             with m.State("TB_READ"):
1371                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1372                 comb += fast_r_dectb.ren.eq(1)
1373                 m.next = "TB_WRITE"
1374
1375             # waits for read TB to arrive, initiates write of current TB
1376             with m.State("TB_WRITE"):
1377                 new_tb = Signal(64)
1378                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1379                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1380                 comb += fast_w_dectb.wen.eq(1)
1381                 comb += fast_w_dectb.i_data.eq(new_tb)
1382                 m.next = "DEC_READ"
1383
1384         return m
1385
1386     def __iter__(self):
1387         yield from self.pc_i.ports()
1388         yield self.pc_o
1389         yield self.memerr_o
1390         yield from self.core.ports()
1391         yield from self.imem.ports()
1392         yield self.core_bigendian_i
1393         yield self.busy_o
1394
1395     def ports(self):
1396         return list(self)
1397
1398     def external_ports(self):
1399         ports = self.pc_i.ports()
1400         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1401                   ]
1402
1403         if self.jtag_en:
1404             ports += list(self.jtag.external_ports())
1405         else:
1406             # don't add DMI if JTAG is enabled
1407             ports += list(self.dbg.dmi.ports())
1408
1409         ports += list(self.imem.ibus.fields.values())
1410         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1411
1412         if self.sram4x4k:
1413             for sram in self.sram4k:
1414                 ports += list(sram.bus.fields.values())
1415
1416         if self.xics:
1417             ports += list(self.xics_icp.bus.fields.values())
1418             ports += list(self.xics_ics.bus.fields.values())
1419             ports.append(self.int_level_i)
1420
1421         if self.gpio:
1422             ports += list(self.simple_gpio.bus.fields.values())
1423             ports.append(self.gpio_o)
1424
1425         return ports
1426
1427     def ports(self):
1428         return list(self)
1429
1430
1431 class TestIssuer(Elaboratable):
1432     def __init__(self, pspec):
1433         self.ti = TestIssuerInternal(pspec)
1434         self.pll = DummyPLL(instance=True)
1435
1436         # PLL direct clock or not
1437         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1438         if self.pll_en:
1439             self.pll_test_o = Signal(reset_less=True)
1440             self.pll_vco_o = Signal(reset_less=True)
1441             self.clk_sel_i = Signal(2, reset_less=True)
1442             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1443             self.pllclk_clk = ClockSignal("pllclk")
1444
1445     def elaborate(self, platform):
1446         m = Module()
1447         comb = m.d.comb
1448
1449         # TestIssuer nominally runs at main clock, actually it is
1450         # all combinatorial internally except for coresync'd components
1451         m.submodules.ti = ti = self.ti
1452
1453         if self.pll_en:
1454             # ClockSelect runs at PLL output internal clock rate
1455             m.submodules.wrappll = pll = self.pll
1456
1457             # add clock domains from PLL
1458             cd_pll = ClockDomain("pllclk")
1459             m.domains += cd_pll
1460
1461             # PLL clock established.  has the side-effect of running clklsel
1462             # at the PLL's speed (see DomainRenamer("pllclk") above)
1463             pllclk = self.pllclk_clk
1464             comb += pllclk.eq(pll.clk_pll_o)
1465
1466             # wire up external 24mhz to PLL
1467             #comb += pll.clk_24_i.eq(self.ref_clk)
1468             # output 18 mhz PLL test signal, and analog oscillator out
1469             comb += self.pll_test_o.eq(pll.pll_test_o)
1470             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1471
1472             # input to pll clock selection
1473             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1474
1475             # now wire up ResetSignals.  don't mind them being in this domain
1476             pll_rst = ResetSignal("pllclk")
1477             comb += pll_rst.eq(ResetSignal())
1478
1479         # internal clock is set to selector clock-out.  has the side-effect of
1480         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1481         # debug clock runs at coresync internal clock
1482         cd_coresync = ClockDomain("coresync")
1483         #m.domains += cd_coresync
1484         if self.ti.dbg_domain != 'sync':
1485             cd_dbgsync = ClockDomain("dbgsync")
1486             #m.domains += cd_dbgsync
1487         intclk = ClockSignal("coresync")
1488         dbgclk = ClockSignal(self.ti.dbg_domain)
1489         # XXX BYPASS PLL XXX
1490         # XXX BYPASS PLL XXX
1491         # XXX BYPASS PLL XXX
1492         if self.pll_en:
1493             comb += intclk.eq(self.ref_clk)
1494         else:
1495             comb += intclk.eq(ClockSignal())
1496         if self.ti.dbg_domain != 'sync':
1497             dbgclk = ClockSignal(self.ti.dbg_domain)
1498             comb += dbgclk.eq(intclk)
1499
1500         return m
1501
1502     def ports(self):
1503         return list(self.ti.ports()) + list(self.pll.ports()) + \
1504             [ClockSignal(), ResetSignal()]
1505
1506     def external_ports(self):
1507         ports = self.ti.external_ports()
1508         ports.append(ClockSignal())
1509         ports.append(ResetSignal())
1510         if self.pll_en:
1511             ports.append(self.clk_sel_i)
1512             ports.append(self.pll.clk_24_i)
1513             ports.append(self.pll_test_o)
1514             ports.append(self.pll_vco_o)
1515             ports.append(self.pllclk_clk)
1516             ports.append(self.ref_clk)
1517         return ports
1518
1519
1520 if __name__ == '__main__':
1521     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1522              'spr': 1,
1523              'div': 1,
1524              'mul': 1,
1525              'shiftrot': 1
1526              }
1527     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1528                          imem_ifacetype='bare_wb',
1529                          addr_wid=48,
1530                          mask_wid=8,
1531                          reg_wid=64,
1532                          units=units)
1533     dut = TestIssuer(pspec)
1534     vl = main(dut, ports=dut.ports(), name="test_issuer")
1535
1536     if len(sys.argv) == 1:
1537         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1538         with open("test_issuer.il", "w") as f:
1539             f.write(vl)