src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the PC
  70     res = Signal(64, reset_less=True, name=name)
  71     res_ok_delay = Signal(name="%s_ok_delay" % name)
  72     with m.If(~core_rst):
  73         sync += res_ok_delay.eq(~state_i.ok)
  74         with m.If(state_i.ok):
  75             # incoming override (start from pc_i)
  76             comb += res.eq(state_i.data)
  77         with m.Else():
  78             # otherwise read StateRegs regfile for PC...
  79             comb += regfile.ren.eq(1 << regnum)
  80         # ... but on a 1-clock delay
  81         with m.If(res_ok_delay):
  82             comb += res.eq(regfile.o_data)
  83     return res
  84
  85
  86 def get_predint(m, mask, name):
  87     """decode SVP64 predicate integer mask field to reg number and invert
  88     this is identical to the equivalent function in ISACaller except that
  89     it doesn't read the INT directly, it just decodes "what needs to be done"
  90     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  91
  92     * all1s is set to indicate that no mask is to be applied.
  93     * regread indicates the GPR register number to be read
  94     * invert is set to indicate that the register value is to be inverted
  95     * unary indicates that the contents of the register is to be shifted 1<<r3
  96     """
  97     comb = m.d.comb
  98     regread = Signal(5, name=name+"regread")
  99     invert = Signal(name=name+"invert")
 100     unary = Signal(name=name+"unary")
 101     all1s = Signal(name=name+"all1s")
 102     with m.Switch(mask):
 103         with m.Case(SVP64PredInt.ALWAYS.value):
 104             comb += all1s.eq(1)      # use 0b1111 (all ones)
 105         with m.Case(SVP64PredInt.R3_UNARY.value):
 106             comb += regread.eq(3)
 107             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 108         with m.Case(SVP64PredInt.R3.value):
 109             comb += regread.eq(3)
 110         with m.Case(SVP64PredInt.R3_N.value):
 111             comb += regread.eq(3)
 112             comb += invert.eq(1)
 113         with m.Case(SVP64PredInt.R10.value):
 114             comb += regread.eq(10)
 115         with m.Case(SVP64PredInt.R10_N.value):
 116             comb += regread.eq(10)
 117             comb += invert.eq(1)
 118         with m.Case(SVP64PredInt.R30.value):
 119             comb += regread.eq(30)
 120         with m.Case(SVP64PredInt.R30_N.value):
 121             comb += regread.eq(30)
 122             comb += invert.eq(1)
 123     return regread, invert, unary, all1s
 124
 125
 126 def get_predcr(m, mask, name):
 127     """decode SVP64 predicate CR to reg number field and invert status
 128     this is identical to _get_predcr in ISACaller
 129     """
 130     comb = m.d.comb
 131     idx = Signal(2, name=name+"idx")
 132     invert = Signal(name=name+"crinvert")
 133     with m.Switch(mask):
 134         with m.Case(SVP64PredCR.LT.value):
 135             comb += idx.eq(CR.LT)
 136             comb += invert.eq(0)
 137         with m.Case(SVP64PredCR.GE.value):
 138             comb += idx.eq(CR.LT)
 139             comb += invert.eq(1)
 140         with m.Case(SVP64PredCR.GT.value):
 141             comb += idx.eq(CR.GT)
 142             comb += invert.eq(0)
 143         with m.Case(SVP64PredCR.LE.value):
 144             comb += idx.eq(CR.GT)
 145             comb += invert.eq(1)
 146         with m.Case(SVP64PredCR.EQ.value):
 147             comb += idx.eq(CR.EQ)
 148             comb += invert.eq(0)
 149         with m.Case(SVP64PredCR.NE.value):
 150             comb += idx.eq(CR.EQ)
 151             comb += invert.eq(1)
 152         with m.Case(SVP64PredCR.SO.value):
 153             comb += idx.eq(CR.SO)
 154             comb += invert.eq(0)
 155         with m.Case(SVP64PredCR.NS.value):
 156             comb += idx.eq(CR.SO)
 157             comb += invert.eq(1)
 158     return idx, invert
 159
 160
 161 # Fetch Finite State Machine.
 162 # WARNING: there are currently DriverConflicts but it's actually working.
 163 # TODO, here: everything that is global in nature, information from the
 164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 165 # not only that: TestIssuerInternal.imem can entirely move into here
 166 # because imem is only ever accessed inside the FetchFSM.
 167 class FetchFSM(ControlBase):
 168     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 169                  pdecode2, cur_state,
 170                  dbg, core, svstate, nia, is_svp64_mode):
 171         self.allow_overlap = allow_overlap
 172         self.svp64_en = svp64_en
 173         self.imem = imem
 174         self.core_rst = core_rst
 175         self.pdecode2 = pdecode2
 176         self.cur_state = cur_state
 177         self.dbg = dbg
 178         self.core = core
 179         self.svstate = svstate
 180         self.nia = nia
 181         self.is_svp64_mode = is_svp64_mode
 182
 183         # set up pipeline ControlBase and allocate i/o specs
 184         # (unusual: normally done by the Pipeline API)
 185         super().__init__(stage=self)
 186         self.p.i_data, self.n.o_data = self.new_specs(None)
 187         self.i, self.o = self.p.i_data, self.n.o_data
 188
 189     # next 3 functions are Stage API Compliance
 190     def setup(self, m, i):
 191         pass
 192
 193     def ispec(self):
 194         return FetchInput()
 195
 196     def ospec(self):
 197         return FetchOutput()
 198
 199     def elaborate(self, platform):
 200         """fetch FSM
 201
 202         this FSM performs fetch of raw instruction data, partial-decodes
 203         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 204         read a 2nd 32-bit quantity if that occurs.
 205         """
 206         m = super().elaborate(platform)
 207
 208         dbg = self.dbg
 209         core = self.core
 210         pc = self.i.pc
 211         svstate = self.svstate
 212         nia = self.nia
 213         is_svp64_mode = self.is_svp64_mode
 214         fetch_pc_o_ready = self.p.o_ready
 215         fetch_pc_i_valid = self.p.i_valid
 216         fetch_insn_o_valid = self.n.o_valid
 217         fetch_insn_i_ready = self.n.i_ready
 218
 219         comb = m.d.comb
 220         sync = m.d.sync
 221         pdecode2 = self.pdecode2
 222         cur_state = self.cur_state
 223         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 224
 225         msr_read = Signal(reset=1)
 226
 227         # also note instruction fetch failed
 228         if hasattr(core, "icache"):
 229             fetch_failed = core.icache.i_out.fetch_failed
 230             flush_needed = True
 231         else:
 232             fetch_failed = Const(0, 1)
 233             flush_needed = False
 234
 235         # don't read msr every cycle
 236         staterf = self.core.regs.rf['state']
 237         state_r_msr = staterf.r_ports['msr']  # MSR rd
 238
 239         comb += state_r_msr.ren.eq(0)
 240
 241         with m.FSM(name='fetch_fsm'):
 242
 243             # waiting (zzz)
 244             with m.State("IDLE"):
 245                 with m.If(~dbg.stopping_o & ~fetch_failed):
 246                     comb += fetch_pc_o_ready.eq(1)
 247                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 248                     # instruction allowed to go: start by reading the PC
 249                     # capture the PC and also drop it into Insn Memory
 250                     # we have joined a pair of combinatorial memory
 251                     # lookups together.  this is Generally Bad.
 252                     comb += self.imem.a_pc_i.eq(pc)
 253                     comb += self.imem.a_i_valid.eq(1)
 254                     comb += self.imem.f_i_valid.eq(1)
 255                     sync += cur_state.pc.eq(pc)
 256                     sync += cur_state.svstate.eq(svstate)  # and svstate
 257
 258                     # initiate read of MSR. arrives one clock later
 259                     comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
 260                     sync += msr_read.eq(0)
 261
 262                     m.next = "INSN_READ"  # move to "wait for bus" phase
 263
 264             # dummy pause to find out why simulation is not keeping up
 265             with m.State("INSN_READ"):
 266                 if self.allow_overlap:
 267                     stopping = dbg.stopping_o
 268                 else:
 269                     stopping = Const(0)
 270                 with m.If(stopping):
 271                     # stopping: jump back to idle
 272                     m.next = "IDLE"
 273                 with m.Else():
 274                     # one cycle later, msr/sv read arrives.  valid only once.
 275                     with m.If(~msr_read):
 276                         sync += msr_read.eq(1)  # yeah don't read it again
 277                         sync += cur_state.msr.eq(state_r_msr.o_data)
 278                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 279                         # busy but not fetch failed: stay in wait-read
 280                         comb += self.imem.a_i_valid.eq(1)
 281                         comb += self.imem.f_i_valid.eq(1)
 282                     with m.Else():
 283                         # not busy (or fetch failed!): instruction fetched
 284                         # when fetch failed, the instruction gets ignored
 285                         # by the decoder
 286                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 287                         if self.svp64_en:
 288                             svp64 = self.svp64
 289                             # decode the SVP64 prefix, if any
 290                             comb += svp64.raw_opcode_in.eq(insn)
 291                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 292                             # pass the decoded prefix (if any) to PowerDecoder2
 293                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 294                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 295                             # remember whether this is a prefixed instruction,
 296                             # so the FSM can readily loop when VL==0
 297                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 298                             # calculate the address of the following instruction
 299                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 300                             sync += nia.eq(cur_state.pc + insn_size)
 301                             with m.If(~svp64.is_svp64_mode):
 302                                 # with no prefix, store the instruction
 303                                 # and hand it directly to the next FSM
 304                                 sync += dec_opcode_o.eq(insn)
 305                                 m.next = "INSN_READY"
 306                             with m.Else():
 307                                 # fetch the rest of the instruction from memory
 308                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 309                                 comb += self.imem.a_i_valid.eq(1)
 310                                 comb += self.imem.f_i_valid.eq(1)
 311                                 m.next = "INSN_READ2"
 312                         else:
 313                             # not SVP64 - 32-bit only
 314                             sync += nia.eq(cur_state.pc + 4)
 315                             sync += dec_opcode_o.eq(insn)
 316                             m.next = "INSN_READY"
 317
 318             with m.State("INSN_READ2"):
 319                 with m.If(self.imem.f_busy_o):  # zzz...
 320                     # busy: stay in wait-read
 321                     comb += self.imem.a_i_valid.eq(1)
 322                     comb += self.imem.f_i_valid.eq(1)
 323                 with m.Else():
 324                     # not busy: instruction fetched
 325                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 326                     sync += dec_opcode_o.eq(insn)
 327                     m.next = "INSN_READY"
 328                     # TODO: probably can start looking at pdecode2.rm_dec
 329                     # here or maybe even in INSN_READ state, if svp64_mode
 330                     # detected, in order to trigger - and wait for - the
 331                     # predicate reading.
 332                     if self.svp64_en:
 333                         pmode = pdecode2.rm_dec.predmode
 334                     """
 335                     if pmode != SVP64PredMode.ALWAYS.value:
 336                         fire predicate loading FSM and wait before
 337                         moving to INSN_READY
 338                     else:
 339                         sync += self.srcmask.eq(-1) # set to all 1s
 340                         sync += self.dstmask.eq(-1) # set to all 1s
 341                         m.next = "INSN_READY"
 342                     """
 343
 344             with m.State("INSN_READY"):
 345                 # hand over the instruction, to be decoded
 346                 comb += fetch_insn_o_valid.eq(1)
 347                 with m.If(fetch_insn_i_ready):
 348                     m.next = "IDLE"
 349
 350         # whatever was done above, over-ride it if core reset is held
 351         with m.If(self.core_rst):
 352             sync += nia.eq(0)
 353
 354         return m
 355
 356
 357 class TestIssuerInternal(Elaboratable):
 358     """TestIssuer - reads instructions from TestMemory and issues them
 359
 360     efficiency and speed is not the main goal here: functional correctness
 361     and code clarity is.  optimisations (which almost 100% interfere with
 362     easy understanding) come later.
 363     """
 364
 365     def __init__(self, pspec):
 366
 367         # test is SVP64 is to be enabled
 368         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 369
 370         # and if regfiles are reduced
 371         self.regreduce_en = (hasattr(pspec, "regreduce") and
 372                              (pspec.regreduce == True))
 373
 374         # and if overlap requested
 375         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 376                               (pspec.allow_overlap == True))
 377
 378         # JTAG interface.  add this right at the start because if it's
 379         # added it *modifies* the pspec, by adding enable/disable signals
 380         # for parts of the rest of the core
 381         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 382         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 383         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 384         if self.jtag_en:
 385             # XXX MUST keep this up-to-date with litex, and
 386             # soc-cocotb-sim, and err.. all needs sorting out, argh
 387             subset = ['uart',
 388                       'mtwi',
 389                       'eint', 'gpio', 'mspi0',
 390                       # 'mspi1', - disabled for now
 391                       # 'pwm', 'sd0', - disabled for now
 392                       'sdr']
 393             self.jtag = JTAG(get_pinspecs(subset=subset),
 394                              domain=self.dbg_domain)
 395             # add signals to pspec to enable/disable icache and dcache
 396             # (or data and intstruction wishbone if icache/dcache not included)
 397             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 398             # TODO: do we actually care if these are not domain-synchronised?
 399             # honestly probably not.
 400             pspec.wb_icache_en = self.jtag.wb_icache_en
 401             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 402             self.wb_sram_en = self.jtag.wb_sram_en
 403         else:
 404             self.wb_sram_en = Const(1)
 405
 406         # add 4k sram blocks?
 407         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 408                          pspec.sram4x4kblock == True)
 409         if self.sram4x4k:
 410             self.sram4k = []
 411             for i in range(4):
 412                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 413                                                     # features={'err'}
 414                                                     ))
 415
 416         # add interrupt controller?
 417         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 418         if self.xics:
 419             self.xics_icp = XICS_ICP()
 420             self.xics_ics = XICS_ICS()
 421             self.int_level_i = self.xics_ics.int_level_i
 422
 423         # add GPIO peripheral?
 424         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 425         if self.gpio:
 426             self.simple_gpio = SimpleGPIO()
 427             self.gpio_o = self.simple_gpio.gpio_o
 428
 429         # main instruction core.  suitable for prototyping / demo only
 430         self.core = core = NonProductionCore(pspec)
 431         self.core_rst = ResetSignal("coresync")
 432
 433         # instruction decoder.  goes into Trap Record
 434         #pdecode = create_pdecode()
 435         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 436         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 437                                      opkls=IssuerDecode2ToOperand,
 438                                      svp64_en=self.svp64_en,
 439                                      regreduce_en=self.regreduce_en)
 440         pdecode = self.pdecode2.dec
 441
 442         if self.svp64_en:
 443             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 444
 445         # Test Instruction memory
 446         if hasattr(core, "icache"):
 447             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 448             # truly dreadful.  needs a huge reorg.
 449             pspec.icache = core.icache
 450         self.imem = ConfigFetchUnit(pspec).fu
 451
 452         # DMI interface
 453         self.dbg = CoreDebug()
 454
 455         # instruction go/monitor
 456         self.pc_o = Signal(64, reset_less=True)
 457         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 458         self.svstate_i = Data(64, "svstate_i")  # ditto
 459         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 460         self.busy_o = Signal(reset_less=True)
 461         self.memerr_o = Signal(reset_less=True)
 462
 463         # STATE regfile read /write ports for PC, MSR, SVSTATE
 464         staterf = self.core.regs.rf['state']
 465         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 466         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 467         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 468         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 469
 470         # DMI interface access
 471         intrf = self.core.regs.rf['int']
 472         crrf = self.core.regs.rf['cr']
 473         xerrf = self.core.regs.rf['xer']
 474         self.int_r = intrf.r_ports['dmi']  # INT read
 475         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 476         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 477
 478         if self.svp64_en:
 479             # for predication
 480             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 481             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 482
 483         # hack method of keeping an eye on whether branch/trap set the PC
 484         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 485         self.state_nia.wen.name = 'state_nia_wen'
 486
 487         # pulse to synchronize the simulator at instruction end
 488         self.insn_done = Signal()
 489
 490         # indicate any instruction still outstanding, in execution
 491         self.any_busy = Signal()
 492
 493         if self.svp64_en:
 494             # store copies of predicate masks
 495             self.srcmask = Signal(64)
 496             self.dstmask = Signal(64)
 497
 498     def fetch_predicate_fsm(self, m,
 499                             pred_insn_i_valid, pred_insn_o_ready,
 500                             pred_mask_o_valid, pred_mask_i_ready):
 501         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 502            src/dest predicate masks
 503
 504         https://bugs.libre-soc.org/show_bug.cgi?id=617
 505         the predicates can be read here, by using IntRegs r_ports['pred']
 506         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 507         be done through multiple reads, extracting one relevant at a time.
 508         later, a faster way would be to use the 32-bit-wide CR port but
 509         this is more complex decoding, here.  equivalent code used in
 510         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 511
 512         note: this ENTIRE FSM is not to be called when svp64 is disabled
 513         """
 514         comb = m.d.comb
 515         sync = m.d.sync
 516         pdecode2 = self.pdecode2
 517         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 518         predmode = rm_dec.predmode
 519         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 520         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 521         # get src/dst step, so we can skip already used mask bits
 522         cur_state = self.cur_state
 523         srcstep = cur_state.svstate.srcstep
 524         dststep = cur_state.svstate.dststep
 525         cur_vl = cur_state.svstate.vl
 526
 527         # decode predicates
 528         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 529         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 530         sidx, scrinvert = get_predcr(m, srcpred, 's')
 531         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 532
 533         # store fetched masks, for either intpred or crpred
 534         # when src/dst step is not zero, the skipped mask bits need to be
 535         # shifted-out, before actually storing them in src/dest mask
 536         new_srcmask = Signal(64, reset_less=True)
 537         new_dstmask = Signal(64, reset_less=True)
 538
 539         with m.FSM(name="fetch_predicate"):
 540
 541             with m.State("FETCH_PRED_IDLE"):
 542                 comb += pred_insn_o_ready.eq(1)
 543                 with m.If(pred_insn_i_valid):
 544                     with m.If(predmode == SVP64PredMode.INT):
 545                         # skip fetching destination mask register, when zero
 546                         with m.If(dall1s):
 547                             sync += new_dstmask.eq(-1)
 548                             # directly go to fetch source mask register
 549                             # guaranteed not to be zero (otherwise predmode
 550                             # would be SVP64PredMode.ALWAYS, not INT)
 551                             comb += int_pred.addr.eq(sregread)
 552                             comb += int_pred.ren.eq(1)
 553                             m.next = "INT_SRC_READ"
 554                         # fetch destination predicate register
 555                         with m.Else():
 556                             comb += int_pred.addr.eq(dregread)
 557                             comb += int_pred.ren.eq(1)
 558                             m.next = "INT_DST_READ"
 559                     with m.Elif(predmode == SVP64PredMode.CR):
 560                         # go fetch masks from the CR register file
 561                         sync += new_srcmask.eq(0)
 562                         sync += new_dstmask.eq(0)
 563                         m.next = "CR_READ"
 564                     with m.Else():
 565                         sync += self.srcmask.eq(-1)
 566                         sync += self.dstmask.eq(-1)
 567                         m.next = "FETCH_PRED_DONE"
 568
 569             with m.State("INT_DST_READ"):
 570                 # store destination mask
 571                 inv = Repl(dinvert, 64)
 572                 with m.If(dunary):
 573                     # set selected mask bit for 1<<r3 mode
 574                     dst_shift = Signal(range(64))
 575                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 576                     sync += new_dstmask.eq(1 << dst_shift)
 577                 with m.Else():
 578                     # invert mask if requested
 579                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 580                 # skip fetching source mask register, when zero
 581                 with m.If(sall1s):
 582                     sync += new_srcmask.eq(-1)
 583                     m.next = "FETCH_PRED_SHIFT_MASK"
 584                 # fetch source predicate register
 585                 with m.Else():
 586                     comb += int_pred.addr.eq(sregread)
 587                     comb += int_pred.ren.eq(1)
 588                     m.next = "INT_SRC_READ"
 589
 590             with m.State("INT_SRC_READ"):
 591                 # store source mask
 592                 inv = Repl(sinvert, 64)
 593                 with m.If(sunary):
 594                     # set selected mask bit for 1<<r3 mode
 595                     src_shift = Signal(range(64))
 596                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 597                     sync += new_srcmask.eq(1 << src_shift)
 598                 with m.Else():
 599                     # invert mask if requested
 600                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 601                 m.next = "FETCH_PRED_SHIFT_MASK"
 602
 603             # fetch masks from the CR register file
 604             # implements the following loop:
 605             # idx, inv = get_predcr(mask)
 606             # mask = 0
 607             # for cr_idx in range(vl):
 608             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 609             #     if cr[idx] ^ inv:
 610             #         mask |= 1 << cr_idx
 611             # return mask
 612             with m.State("CR_READ"):
 613                 # CR index to be read, which will be ready by the next cycle
 614                 cr_idx = Signal.like(cur_vl, reset_less=True)
 615                 # submit the read operation to the regfile
 616                 with m.If(cr_idx != cur_vl):
 617                     # the CR read port is unary ...
 618                     # ren = 1 << cr_idx
 619                     # ... in MSB0 convention ...
 620                     # ren = 1 << (7 - cr_idx)
 621                     # ... and with an offset:
 622                     # ren = 1 << (7 - off - cr_idx)
 623                     idx = SVP64CROffs.CRPred + cr_idx
 624                     comb += cr_pred.ren.eq(1 << (7 - idx))
 625                     # signal data valid in the next cycle
 626                     cr_read = Signal(reset_less=True)
 627                     sync += cr_read.eq(1)
 628                     # load the next index
 629                     sync += cr_idx.eq(cr_idx + 1)
 630                 with m.Else():
 631                     # exit on loop end
 632                     sync += cr_read.eq(0)
 633                     sync += cr_idx.eq(0)
 634                     m.next = "FETCH_PRED_SHIFT_MASK"
 635                 with m.If(cr_read):
 636                     # compensate for the one cycle delay on the regfile
 637                     cur_cr_idx = Signal.like(cur_vl)
 638                     comb += cur_cr_idx.eq(cr_idx - 1)
 639                     # read the CR field, select the appropriate bit
 640                     cr_field = Signal(4)
 641                     scr_bit = Signal()
 642                     dcr_bit = Signal()
 643                     comb += cr_field.eq(cr_pred.o_data)
 644                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 645                                        ^ scrinvert)
 646                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 647                                        ^ dcrinvert)
 648                     # set the corresponding mask bit
 649                     bit_to_set = Signal.like(self.srcmask)
 650                     comb += bit_to_set.eq(1 << cur_cr_idx)
 651                     with m.If(scr_bit):
 652                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 653                     with m.If(dcr_bit):
 654                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 655
 656             with m.State("FETCH_PRED_SHIFT_MASK"):
 657                 # shift-out skipped mask bits
 658                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 659                 sync += self.dstmask.eq(new_dstmask >> dststep)
 660                 m.next = "FETCH_PRED_DONE"
 661
 662             with m.State("FETCH_PRED_DONE"):
 663                 comb += pred_mask_o_valid.eq(1)
 664                 with m.If(pred_mask_i_ready):
 665                     m.next = "FETCH_PRED_IDLE"
 666
 667     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 668                   dbg, core_rst, is_svp64_mode,
 669                   fetch_pc_o_ready, fetch_pc_i_valid,
 670                   fetch_insn_o_valid, fetch_insn_i_ready,
 671                   pred_insn_i_valid, pred_insn_o_ready,
 672                   pred_mask_o_valid, pred_mask_i_ready,
 673                   exec_insn_i_valid, exec_insn_o_ready,
 674                   exec_pc_o_valid, exec_pc_i_ready):
 675         """issue FSM
 676
 677         decode / issue FSM.  this interacts with the "fetch" FSM
 678         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 679         (outgoing). also interacts with the "execute" FSM
 680         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 681         (incoming).
 682         SVP64 RM prefixes have already been set up by the
 683         "fetch" phase, so execute is fairly straightforward.
 684         """
 685
 686         comb = m.d.comb
 687         sync = m.d.sync
 688         pdecode2 = self.pdecode2
 689         cur_state = self.cur_state
 690
 691         # temporaries
 692         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 693
 694         # for updating svstate (things like srcstep etc.)
 695         update_svstate = Signal()  # set this (below) if updating
 696         new_svstate = SVSTATERec("new_svstate")
 697         comb += new_svstate.eq(cur_state.svstate)
 698
 699         # precalculate srcstep+1 and dststep+1
 700         cur_srcstep = cur_state.svstate.srcstep
 701         cur_dststep = cur_state.svstate.dststep
 702         next_srcstep = Signal.like(cur_srcstep)
 703         next_dststep = Signal.like(cur_dststep)
 704         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 705         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 706
 707         # note if an exception happened.  in a pipelined or OoO design
 708         # this needs to be accompanied by "shadowing" (or stalling)
 709         exc_happened = self.core.o.exc_happened
 710         # also note instruction fetch failed
 711         if hasattr(core, "icache"):
 712             fetch_failed = core.icache.i_out.fetch_failed
 713             flush_needed = True
 714         else:
 715             fetch_failed = Const(0, 1)
 716             flush_needed = False
 717         # set to fault in decoder
 718         # update (highest priority) instruction fault
 719         rising_fetch_failed = rising_edge(m, fetch_failed)
 720         with m.If(rising_fetch_failed):
 721             sync += pdecode2.instr_fault.eq(1)
 722
 723         with m.FSM(name="issue_fsm"):
 724
 725             # sync with the "fetch" phase which is reading the instruction
 726             # at this point, there is no instruction running, that
 727             # could inadvertently update the PC.
 728             with m.State("ISSUE_START"):
 729                 # reset instruction fault
 730                 sync += pdecode2.instr_fault.eq(0)
 731                 # wait on "core stop" release, before next fetch
 732                 # need to do this here, in case we are in a VL==0 loop
 733                 with m.If(~dbg.core_stop_o & ~core_rst):
 734                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 735                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 736                         m.next = "INSN_WAIT"
 737                 with m.Else():
 738                     # tell core it's stopped, and acknowledge debug handshake
 739                     comb += dbg.core_stopped_i.eq(1)
 740                     # while stopped, allow updating the PC and SVSTATE
 741                     with m.If(self.pc_i.ok):
 742                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 743                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 744                         sync += pc_changed.eq(1)
 745                     with m.If(self.svstate_i.ok):
 746                         comb += new_svstate.eq(self.svstate_i.data)
 747                         comb += update_svstate.eq(1)
 748                         sync += sv_changed.eq(1)
 749
 750             # wait for an instruction to arrive from Fetch
 751             with m.State("INSN_WAIT"):
 752                 if self.allow_overlap:
 753                     stopping = dbg.stopping_o
 754                 else:
 755                     stopping = Const(0)
 756                 with m.If(stopping):
 757                     # stopping: jump back to idle
 758                     m.next = "ISSUE_START"
 759                 with m.Else():
 760                     comb += fetch_insn_i_ready.eq(1)
 761                     with m.If(fetch_insn_o_valid):
 762                         # loop into ISSUE_START if it's a SVP64 instruction
 763                         # and VL == 0.  this because VL==0 is a for-loop
 764                         # from 0 to 0 i.e. always, always a NOP.
 765                         cur_vl = cur_state.svstate.vl
 766                         with m.If(is_svp64_mode & (cur_vl == 0)):
 767                             # update the PC before fetching the next instruction
 768                             # since we are in a VL==0 loop, no instruction was
 769                             # executed that we could be overwriting
 770                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 771                             comb += self.state_w_pc.i_data.eq(nia)
 772                             comb += self.insn_done.eq(1)
 773                             m.next = "ISSUE_START"
 774                         with m.Else():
 775                             if self.svp64_en:
 776                                 m.next = "PRED_START"  # fetching predicate
 777                             else:
 778                                 m.next = "DECODE_SV"  # skip predication
 779
 780             with m.State("PRED_START"):
 781                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 782                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 783                     m.next = "MASK_WAIT"
 784
 785             with m.State("MASK_WAIT"):
 786                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
 787                 with m.If(pred_mask_o_valid):  # predication masks are ready
 788                     m.next = "PRED_SKIP"
 789
 790             # skip zeros in predicate
 791             with m.State("PRED_SKIP"):
 792                 with m.If(~is_svp64_mode):
 793                     m.next = "DECODE_SV"  # nothing to do
 794                 with m.Else():
 795                     if self.svp64_en:
 796                         pred_src_zero = pdecode2.rm_dec.pred_sz
 797                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 798
 799                         # new srcstep, after skipping zeros
 800                         skip_srcstep = Signal.like(cur_srcstep)
 801                         # value to be added to the current srcstep
 802                         src_delta = Signal.like(cur_srcstep)
 803                         # add leading zeros to srcstep, if not in zero mode
 804                         with m.If(~pred_src_zero):
 805                             # priority encoder (count leading zeros)
 806                             # append guard bit, in case the mask is all zeros
 807                             pri_enc_src = PriorityEncoder(65)
 808                             m.submodules.pri_enc_src = pri_enc_src
 809                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 810                                                          Const(1, 1)))
 811                             comb += src_delta.eq(pri_enc_src.o)
 812                         # apply delta to srcstep
 813                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 814                         # shift-out all leading zeros from the mask
 815                         # plus the leading "one" bit
 816                         # TODO count leading zeros and shift-out the zero
 817                         #      bits, in the same step, in hardware
 818                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 819
 820                         # same as above, but for dststep
 821                         skip_dststep = Signal.like(cur_dststep)
 822                         dst_delta = Signal.like(cur_dststep)
 823                         with m.If(~pred_dst_zero):
 824                             pri_enc_dst = PriorityEncoder(65)
 825                             m.submodules.pri_enc_dst = pri_enc_dst
 826                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 827                                                          Const(1, 1)))
 828                             comb += dst_delta.eq(pri_enc_dst.o)
 829                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 830                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 831
 832                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 833                         with m.If((skip_srcstep >= cur_vl) |
 834                                   (skip_dststep >= cur_vl)):
 835                             # end of VL loop. Update PC and reset src/dst step
 836                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 837                             comb += self.state_w_pc.i_data.eq(nia)
 838                             comb += new_svstate.srcstep.eq(0)
 839                             comb += new_svstate.dststep.eq(0)
 840                             comb += update_svstate.eq(1)
 841                             # synchronize with the simulator
 842                             comb += self.insn_done.eq(1)
 843                             # go back to Issue
 844                             m.next = "ISSUE_START"
 845                         with m.Else():
 846                             # update new src/dst step
 847                             comb += new_svstate.srcstep.eq(skip_srcstep)
 848                             comb += new_svstate.dststep.eq(skip_dststep)
 849                             comb += update_svstate.eq(1)
 850                             # proceed to Decode
 851                             m.next = "DECODE_SV"
 852
 853                         # pass predicate mask bits through to satellite decoders
 854                         # TODO: for SIMD this will be *multiple* bits
 855                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 856                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 857
 858             # after src/dst step have been updated, we are ready
 859             # to decode the instruction
 860             with m.State("DECODE_SV"):
 861                 # decode the instruction
 862                 with m.If(~fetch_failed):
 863                     sync += pdecode2.instr_fault.eq(0)
 864                 sync += core.i.e.eq(pdecode2.e)
 865                 sync += core.i.state.eq(cur_state)
 866                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 867                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 868                 if self.svp64_en:
 869                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 870                     # set RA_OR_ZERO detection in satellite decoders
 871                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 872                     # and svp64 detection
 873                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 874                     # and svp64 bit-rev'd ldst mode
 875                     ldst_dec = pdecode2.use_svp64_ldst_dec
 876                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 877                 # after decoding, reset any previous exception condition,
 878                 # allowing it to be set again during the next execution
 879                 sync += pdecode2.ldst_exc.eq(0)
 880
 881                 m.next = "INSN_EXECUTE"  # move to "execute"
 882
 883             # handshake with execution FSM, move to "wait" once acknowledged
 884             with m.State("INSN_EXECUTE"):
 885                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 886                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 887                     m.next = "EXECUTE_WAIT"
 888
 889             with m.State("EXECUTE_WAIT"):
 890                 # wait on "core stop" release, at instruction end
 891                 # need to do this here, in case we are in a VL>1 loop
 892                 with m.If(~dbg.core_stop_o & ~core_rst):
 893                     comb += exec_pc_i_ready.eq(1)
 894                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 895                     # the exception info needs to be blatted into
 896                     # pdecode.ldst_exc, and the instruction "re-run".
 897                     # when ldst_exc.happened is set, the PowerDecoder2
 898                     # reacts very differently: it re-writes the instruction
 899                     # with a "trap" (calls PowerDecoder2.trap()) which
 900                     # will *overwrite* whatever was requested and jump the
 901                     # PC to the exception address, as well as alter MSR.
 902                     # nothing else needs to be done other than to note
 903                     # the change of PC and MSR (and, later, SVSTATE)
 904                     with m.If(exc_happened):
 905                         mmu = core.fus.get_exc("mmu0")
 906                         ldst = core.fus.get_exc("ldst0")
 907                         with m.If(fetch_failed):
 908                             # instruction fetch: exception is from MMU
 909                             # reset instr_fault (highest priority)
 910                             sync += pdecode2.ldst_exc.eq(mmu)
 911                             sync += pdecode2.instr_fault.eq(0)
 912                             if flush_needed:
 913                                 # request the icache to stop asserting "failed"
 914                                 comb += core.icache.flush_in.eq(1)
 915                         with m.Else():
 916                             # otherwise assume it was a LDST exception
 917                             sync += pdecode2.ldst_exc.eq(ldst)
 918
 919                     with m.If(exec_pc_o_valid):
 920
 921                         # was this the last loop iteration?
 922                         is_last = Signal()
 923                         cur_vl = cur_state.svstate.vl
 924                         comb += is_last.eq(next_srcstep == cur_vl)
 925
 926                         # return directly to Decode if Execute generated an
 927                         # exception.
 928                         with m.If(pdecode2.ldst_exc.happened):
 929                             m.next = "DECODE_SV"
 930
 931                         # if either PC or SVSTATE were changed by the previous
 932                         # instruction, go directly back to Fetch, without
 933                         # updating either PC or SVSTATE
 934                         with m.Elif(pc_changed | sv_changed):
 935                             m.next = "ISSUE_START"
 936
 937                         # also return to Fetch, when no output was a vector
 938                         # (regardless of SRCSTEP and VL), or when the last
 939                         # instruction was really the last one of the VL loop
 940                         with m.Elif((~pdecode2.loop_continue) | is_last):
 941                             # before going back to fetch, update the PC state
 942                             # register with the NIA.
 943                             # ok here we are not reading the branch unit.
 944                             # TODO: this just blithely overwrites whatever
 945                             #       pipeline updated the PC
 946                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 947                             comb += self.state_w_pc.i_data.eq(nia)
 948                             # reset SRCSTEP before returning to Fetch
 949                             if self.svp64_en:
 950                                 with m.If(pdecode2.loop_continue):
 951                                     comb += new_svstate.srcstep.eq(0)
 952                                     comb += new_svstate.dststep.eq(0)
 953                                     comb += update_svstate.eq(1)
 954                             else:
 955                                 comb += new_svstate.srcstep.eq(0)
 956                                 comb += new_svstate.dststep.eq(0)
 957                                 comb += update_svstate.eq(1)
 958                             m.next = "ISSUE_START"
 959
 960                         # returning to Execute? then, first update SRCSTEP
 961                         with m.Else():
 962                             comb += new_svstate.srcstep.eq(next_srcstep)
 963                             comb += new_svstate.dststep.eq(next_dststep)
 964                             comb += update_svstate.eq(1)
 965                             # return to mask skip loop
 966                             m.next = "PRED_SKIP"
 967
 968                 with m.Else():
 969                     comb += dbg.core_stopped_i.eq(1)
 970                     # while stopped, allow updating the PC and SVSTATE
 971                     with m.If(self.pc_i.ok):
 972                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 973                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 974                         sync += pc_changed.eq(1)
 975                     with m.If(self.svstate_i.ok):
 976                         comb += new_svstate.eq(self.svstate_i.data)
 977                         comb += update_svstate.eq(1)
 978                         sync += sv_changed.eq(1)
 979
 980         # check if svstate needs updating: if so, write it to State Regfile
 981         with m.If(update_svstate):
 982             comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 983             comb += self.state_w_sv.i_data.eq(new_svstate)
 984             sync += cur_state.svstate.eq(new_svstate)  # for next clock
 985
 986     def execute_fsm(self, m, core, pc_changed, sv_changed,
 987                     exec_insn_i_valid, exec_insn_o_ready,
 988                     exec_pc_o_valid, exec_pc_i_ready):
 989         """execute FSM
 990
 991         execute FSM. this interacts with the "issue" FSM
 992         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 993         (outgoing). SVP64 RM prefixes have already been set up by the
 994         "issue" phase, so execute is fairly straightforward.
 995         """
 996
 997         comb = m.d.comb
 998         sync = m.d.sync
 999         pdecode2 = self.pdecode2
1000
1001         # temporaries
1002         core_busy_o = core.n.o_data.busy_o  # core is busy
1003         core_ivalid_i = core.p.i_valid              # instruction is valid
1004
1005         with m.FSM(name="exec_fsm"):
1006
1007             # waiting for instruction bus (stays there until not busy)
1008             with m.State("INSN_START"):
1009                 comb += exec_insn_o_ready.eq(1)
1010                 with m.If(exec_insn_i_valid):
1011                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1012                     sync += sv_changed.eq(0)
1013                     sync += pc_changed.eq(0)
1014                     with m.If(core.p.o_ready):  # only move if accepted
1015                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1016
1017             # instruction started: must wait till it finishes
1018             with m.State("INSN_ACTIVE"):
1019                 # note changes to PC and SVSTATE
1020                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1021                     sync += sv_changed.eq(1)
1022                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1023                     sync += pc_changed.eq(1)
1024                 with m.If(~core_busy_o):  # instruction done!
1025                     comb += exec_pc_o_valid.eq(1)
1026                     with m.If(exec_pc_i_ready):
1027                         # when finished, indicate "done".
1028                         # however, if there was an exception, the instruction
1029                         # is *not* yet done.  this is an implementation
1030                         # detail: we choose to implement exceptions by
1031                         # taking the exception information from the LDST
1032                         # unit, putting that *back* into the PowerDecoder2,
1033                         # and *re-running the entire instruction*.
1034                         # if we erroneously indicate "done" here, it is as if
1035                         # there were *TWO* instructions:
1036                         # 1) the failed LDST 2) a TRAP.
1037                         with m.If(~pdecode2.ldst_exc.happened):
1038                             comb += self.insn_done.eq(1)
1039                         m.next = "INSN_START"  # back to fetch
1040
1041     def setup_peripherals(self, m):
1042         comb, sync = m.d.comb, m.d.sync
1043
1044         # okaaaay so the debug module must be in coresync clock domain
1045         # but NOT its reset signal. to cope with this, set every single
1046         # submodule explicitly in coresync domain, debug and JTAG
1047         # in their own one but using *external* reset.
1048         csd = DomainRenamer("coresync")
1049         dbd = DomainRenamer(self.dbg_domain)
1050
1051         m.submodules.core = core = csd(self.core)
1052         # this _so_ needs sorting out.  ICache is added down inside
1053         # LoadStore1 and is already a submodule of LoadStore1
1054         if not isinstance(self.imem, ICache):
1055             m.submodules.imem = imem = csd(self.imem)
1056         m.submodules.dbg = dbg = dbd(self.dbg)
1057         if self.jtag_en:
1058             m.submodules.jtag = jtag = dbd(self.jtag)
1059             # TODO: UART2GDB mux, here, from external pin
1060             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1061             sync += dbg.dmi.connect_to(jtag.dmi)
1062
1063         cur_state = self.cur_state
1064
1065         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
1066         if self.sram4x4k:
1067             for i, sram in enumerate(self.sram4k):
1068                 m.submodules["sram4k_%d" % i] = csd(sram)
1069                 comb += sram.enable.eq(self.wb_sram_en)
1070
1071         # XICS interrupt handler
1072         if self.xics:
1073             m.submodules.xics_icp = icp = csd(self.xics_icp)
1074             m.submodules.xics_ics = ics = csd(self.xics_ics)
1075             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
1076             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
1077
1078         # GPIO test peripheral
1079         if self.gpio:
1080             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1081
1082         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1083         # XXX causes litex ECP5 test to get wrong idea about input and output
1084         # (but works with verilator sim *sigh*)
1085         # if self.gpio and self.xics:
1086         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1087
1088         # instruction decoder
1089         pdecode = create_pdecode()
1090         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1091         if self.svp64_en:
1092             m.submodules.svp64 = svp64 = csd(self.svp64)
1093
1094         # convenience
1095         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1096         intrf = self.core.regs.rf['int']
1097
1098         # clock delay power-on reset
1099         cd_por = ClockDomain(reset_less=True)
1100         cd_sync = ClockDomain()
1101         core_sync = ClockDomain("coresync")
1102         m.domains += cd_por, cd_sync, core_sync
1103         if self.dbg_domain != "sync":
1104             dbg_sync = ClockDomain(self.dbg_domain)
1105             m.domains += dbg_sync
1106
1107         ti_rst = Signal(reset_less=True)
1108         delay = Signal(range(4), reset=3)
1109         with m.If(delay != 0):
1110             m.d.por += delay.eq(delay - 1)
1111         comb += cd_por.clk.eq(ClockSignal())
1112
1113         # power-on reset delay
1114         core_rst = ResetSignal("coresync")
1115         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1116         comb += core_rst.eq(ti_rst)
1117
1118         # debug clock is same as coresync, but reset is *main external*
1119         if self.dbg_domain != "sync":
1120             dbg_rst = ResetSignal(self.dbg_domain)
1121             comb += dbg_rst.eq(ResetSignal())
1122
1123         # busy/halted signals from core
1124         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
1125         comb += self.busy_o.eq(core_busy_o)
1126         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1127
1128         # temporary hack: says "go" immediately for both address gen and ST
1129         l0 = core.l0
1130         ldst = core.fus.fus['ldst0']
1131         st_go_edge = rising_edge(m, ldst.st.rel_o)
1132         # link addr-go direct to rel
1133         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
1134         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
1135
1136     def elaborate(self, platform):
1137         m = Module()
1138         # convenience
1139         comb, sync = m.d.comb, m.d.sync
1140         cur_state = self.cur_state
1141         pdecode2 = self.pdecode2
1142         dbg = self.dbg
1143         core = self.core
1144
1145         # set up peripherals and core
1146         core_rst = self.core_rst
1147         self.setup_peripherals(m)
1148
1149         # reset current state if core reset requested
1150         with m.If(core_rst):
1151             m.d.sync += self.cur_state.eq(0)
1152
1153         # PC and instruction from I-Memory
1154         comb += self.pc_o.eq(cur_state.pc)
1155         pc_changed = Signal()  # note write to PC
1156         sv_changed = Signal()  # note write to SVSTATE
1157
1158         # indicate to outside world if any FU is still executing
1159         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1160
1161         # read state either from incoming override or from regfile
1162         # TODO: really should be doing MSR in the same way
1163         pc = state_get(m, core_rst, self.pc_i,
1164                        "pc",                  # read PC
1165                        self.state_r_pc, StateRegs.PC)
1166         svstate = state_get(m, core_rst, self.svstate_i,
1167                             "svstate",   # read SVSTATE
1168                             self.state_r_sv, StateRegs.SVSTATE)
1169
1170         # don't write pc every cycle
1171         comb += self.state_w_pc.wen.eq(0)
1172         comb += self.state_w_pc.i_data.eq(0)
1173
1174         # address of the next instruction, in the absence of a branch
1175         # depends on the instruction size
1176         nia = Signal(64)
1177
1178         # connect up debug signals
1179         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1180         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1181         comb += dbg.state.pc.eq(pc)
1182         comb += dbg.state.svstate.eq(svstate)
1183         comb += dbg.state.msr.eq(cur_state.msr)
1184
1185         # pass the prefix mode from Fetch to Issue, so the latter can loop
1186         # on VL==0
1187         is_svp64_mode = Signal()
1188
1189         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1190         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1191         # these are the handshake signals between each
1192
1193         # fetch FSM can run as soon as the PC is valid
1194         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1195         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1196
1197         # fetch FSM hands over the instruction to be decoded / issued
1198         fetch_insn_o_valid = Signal()
1199         fetch_insn_i_ready = Signal()
1200
1201         # predicate fetch FSM decodes and fetches the predicate
1202         pred_insn_i_valid = Signal()
1203         pred_insn_o_ready = Signal()
1204
1205         # predicate fetch FSM delivers the masks
1206         pred_mask_o_valid = Signal()
1207         pred_mask_i_ready = Signal()
1208
1209         # issue FSM delivers the instruction to the be executed
1210         exec_insn_i_valid = Signal()
1211         exec_insn_o_ready = Signal()
1212
1213         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1214         exec_pc_o_valid = Signal()
1215         exec_pc_i_ready = Signal()
1216
1217         # the FSMs here are perhaps unusual in that they detect conditions
1218         # then "hold" information, combinatorially, for the core
1219         # (as opposed to using sync - which would be on a clock's delay)
1220         # this includes the actual opcode, valid flags and so on.
1221
1222         # Fetch, then predicate fetch, then Issue, then Execute.
1223         # Issue is where the VL for-loop # lives.  the ready/valid
1224         # signalling is used to communicate between the four.
1225
1226         # set up Fetch FSM
1227         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1228                          self.imem, core_rst, pdecode2, cur_state,
1229                          dbg, core, svstate, nia, is_svp64_mode)
1230         m.submodules.fetch = fetch
1231         # connect up in/out data to existing Signals
1232         comb += fetch.p.i_data.pc.eq(pc)
1233         # and the ready/valid signalling
1234         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1235         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1236         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1237         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1238
1239         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1240                        dbg, core_rst, is_svp64_mode,
1241                        fetch_pc_o_ready, fetch_pc_i_valid,
1242                        fetch_insn_o_valid, fetch_insn_i_ready,
1243                        pred_insn_i_valid, pred_insn_o_ready,
1244                        pred_mask_o_valid, pred_mask_i_ready,
1245                        exec_insn_i_valid, exec_insn_o_ready,
1246                        exec_pc_o_valid, exec_pc_i_ready)
1247
1248         if self.svp64_en:
1249             self.fetch_predicate_fsm(m,
1250                                      pred_insn_i_valid, pred_insn_o_ready,
1251                                      pred_mask_o_valid, pred_mask_i_ready)
1252
1253         self.execute_fsm(m, core, pc_changed, sv_changed,
1254                          exec_insn_i_valid, exec_insn_o_ready,
1255                          exec_pc_o_valid, exec_pc_i_ready)
1256
1257         # this bit doesn't have to be in the FSM: connect up to read
1258         # regfiles on demand from DMI
1259         self.do_dmi(m, dbg)
1260
1261         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1262         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1263         self.tb_dec_fsm(m, cur_state.dec)
1264
1265         return m
1266
1267     def do_dmi(self, m, dbg):
1268         """deals with DMI debug requests
1269
1270         currently only provides read requests for the INT regfile, CR and XER
1271         it will later also deal with *writing* to these regfiles.
1272         """
1273         comb = m.d.comb
1274         sync = m.d.sync
1275         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1276         intrf = self.core.regs.rf['int']
1277
1278         with m.If(d_reg.req):  # request for regfile access being made
1279             # TODO: error-check this
1280             # XXX should this be combinatorial?  sync better?
1281             if intrf.unary:
1282                 comb += self.int_r.ren.eq(1 << d_reg.addr)
1283             else:
1284                 comb += self.int_r.addr.eq(d_reg.addr)
1285                 comb += self.int_r.ren.eq(1)
1286         d_reg_delay = Signal()
1287         sync += d_reg_delay.eq(d_reg.req)
1288         with m.If(d_reg_delay):
1289             # data arrives one clock later
1290             comb += d_reg.data.eq(self.int_r.o_data)
1291             comb += d_reg.ack.eq(1)
1292
1293         # sigh same thing for CR debug
1294         with m.If(d_cr.req):  # request for regfile access being made
1295             comb += self.cr_r.ren.eq(0b11111111)  # enable all
1296         d_cr_delay = Signal()
1297         sync += d_cr_delay.eq(d_cr.req)
1298         with m.If(d_cr_delay):
1299             # data arrives one clock later
1300             comb += d_cr.data.eq(self.cr_r.o_data)
1301             comb += d_cr.ack.eq(1)
1302
1303         # aaand XER...
1304         with m.If(d_xer.req):  # request for regfile access being made
1305             comb += self.xer_r.ren.eq(0b111111)  # enable all
1306         d_xer_delay = Signal()
1307         sync += d_xer_delay.eq(d_xer.req)
1308         with m.If(d_xer_delay):
1309             # data arrives one clock later
1310             comb += d_xer.data.eq(self.xer_r.o_data)
1311             comb += d_xer.ack.eq(1)
1312
1313     def tb_dec_fsm(self, m, spr_dec):
1314         """tb_dec_fsm
1315
1316         this is a FSM for updating either dec or tb.  it runs alternately
1317         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1318         value to DEC, however the regfile has "passthrough" on it so this
1319         *should* be ok.
1320
1321         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1322         """
1323
1324         comb, sync = m.d.comb, m.d.sync
1325         fast_rf = self.core.regs.rf['fast']
1326         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
1327         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
1328
1329         with m.FSM() as fsm:
1330
1331             # initiates read of current DEC
1332             with m.State("DEC_READ"):
1333                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1334                 comb += fast_r_dectb.ren.eq(1)
1335                 m.next = "DEC_WRITE"
1336
1337             # waits for DEC read to arrive (1 cycle), updates with new value
1338             with m.State("DEC_WRITE"):
1339                 new_dec = Signal(64)
1340                 # TODO: MSR.LPCR 32-bit decrement mode
1341                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1342                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1343                 comb += fast_w_dectb.wen.eq(1)
1344                 comb += fast_w_dectb.i_data.eq(new_dec)
1345                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
1346                 m.next = "TB_READ"
1347
1348             # initiates read of current TB
1349             with m.State("TB_READ"):
1350                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1351                 comb += fast_r_dectb.ren.eq(1)
1352                 m.next = "TB_WRITE"
1353
1354             # waits for read TB to arrive, initiates write of current TB
1355             with m.State("TB_WRITE"):
1356                 new_tb = Signal(64)
1357                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1358                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1359                 comb += fast_w_dectb.wen.eq(1)
1360                 comb += fast_w_dectb.i_data.eq(new_tb)
1361                 m.next = "DEC_READ"
1362
1363         return m
1364
1365     def __iter__(self):
1366         yield from self.pc_i.ports()
1367         yield self.pc_o
1368         yield self.memerr_o
1369         yield from self.core.ports()
1370         yield from self.imem.ports()
1371         yield self.core_bigendian_i
1372         yield self.busy_o
1373
1374     def ports(self):
1375         return list(self)
1376
1377     def external_ports(self):
1378         ports = self.pc_i.ports()
1379         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1380                   ]
1381
1382         if self.jtag_en:
1383             ports += list(self.jtag.external_ports())
1384         else:
1385             # don't add DMI if JTAG is enabled
1386             ports += list(self.dbg.dmi.ports())
1387
1388         ports += list(self.imem.ibus.fields.values())
1389         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1390
1391         if self.sram4x4k:
1392             for sram in self.sram4k:
1393                 ports += list(sram.bus.fields.values())
1394
1395         if self.xics:
1396             ports += list(self.xics_icp.bus.fields.values())
1397             ports += list(self.xics_ics.bus.fields.values())
1398             ports.append(self.int_level_i)
1399
1400         if self.gpio:
1401             ports += list(self.simple_gpio.bus.fields.values())
1402             ports.append(self.gpio_o)
1403
1404         return ports
1405
1406     def ports(self):
1407         return list(self)
1408
1409
1410 class TestIssuer(Elaboratable):
1411     def __init__(self, pspec):
1412         self.ti = TestIssuerInternal(pspec)
1413         self.pll = DummyPLL(instance=True)
1414
1415         # PLL direct clock or not
1416         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1417         if self.pll_en:
1418             self.pll_test_o = Signal(reset_less=True)
1419             self.pll_vco_o = Signal(reset_less=True)
1420             self.clk_sel_i = Signal(2, reset_less=True)
1421             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1422             self.pllclk_clk = ClockSignal("pllclk")
1423
1424     def elaborate(self, platform):
1425         m = Module()
1426         comb = m.d.comb
1427
1428         # TestIssuer nominally runs at main clock, actually it is
1429         # all combinatorial internally except for coresync'd components
1430         m.submodules.ti = ti = self.ti
1431
1432         if self.pll_en:
1433             # ClockSelect runs at PLL output internal clock rate
1434             m.submodules.wrappll = pll = self.pll
1435
1436             # add clock domains from PLL
1437             cd_pll = ClockDomain("pllclk")
1438             m.domains += cd_pll
1439
1440             # PLL clock established.  has the side-effect of running clklsel
1441             # at the PLL's speed (see DomainRenamer("pllclk") above)
1442             pllclk = self.pllclk_clk
1443             comb += pllclk.eq(pll.clk_pll_o)
1444
1445             # wire up external 24mhz to PLL
1446             #comb += pll.clk_24_i.eq(self.ref_clk)
1447             # output 18 mhz PLL test signal, and analog oscillator out
1448             comb += self.pll_test_o.eq(pll.pll_test_o)
1449             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1450
1451             # input to pll clock selection
1452             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1453
1454             # now wire up ResetSignals.  don't mind them being in this domain
1455             pll_rst = ResetSignal("pllclk")
1456             comb += pll_rst.eq(ResetSignal())
1457
1458         # internal clock is set to selector clock-out.  has the side-effect of
1459         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1460         # debug clock runs at coresync internal clock
1461         cd_coresync = ClockDomain("coresync")
1462         #m.domains += cd_coresync
1463         if self.ti.dbg_domain != 'sync':
1464             cd_dbgsync = ClockDomain("dbgsync")
1465             #m.domains += cd_dbgsync
1466         intclk = ClockSignal("coresync")
1467         dbgclk = ClockSignal(self.ti.dbg_domain)
1468         # XXX BYPASS PLL XXX
1469         # XXX BYPASS PLL XXX
1470         # XXX BYPASS PLL XXX
1471         if self.pll_en:
1472             comb += intclk.eq(self.ref_clk)
1473         else:
1474             comb += intclk.eq(ClockSignal())
1475         if self.ti.dbg_domain != 'sync':
1476             dbgclk = ClockSignal(self.ti.dbg_domain)
1477             comb += dbgclk.eq(intclk)
1478
1479         return m
1480
1481     def ports(self):
1482         return list(self.ti.ports()) + list(self.pll.ports()) + \
1483             [ClockSignal(), ResetSignal()]
1484
1485     def external_ports(self):
1486         ports = self.ti.external_ports()
1487         ports.append(ClockSignal())
1488         ports.append(ResetSignal())
1489         if self.pll_en:
1490             ports.append(self.clk_sel_i)
1491             ports.append(self.pll.clk_24_i)
1492             ports.append(self.pll_test_o)
1493             ports.append(self.pll_vco_o)
1494             ports.append(self.pllclk_clk)
1495             ports.append(self.ref_clk)
1496         return ports
1497
1498
1499 if __name__ == '__main__':
1500     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1501              'spr': 1,
1502              'div': 1,
1503              'mul': 1,
1504              'shiftrot': 1
1505              }
1506     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1507                          imem_ifacetype='bare_wb',
1508                          addr_wid=48,
1509                          mask_wid=8,
1510                          reg_wid=64,
1511                          units=units)
1512     dut = TestIssuer(pspec)
1513     vl = main(dut, ports=dut.ports(), name="test_issuer")
1514
1515     if len(sys.argv) == 1:
1516         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1517         with open("test_issuer.il", "w") as f:
1518             f.write(vl)