src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.o_data)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # and if overlap requested
 170         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 171                                             (pspec.allow_overlap == True))
 172
 173         # JTAG interface.  add this right at the start because if it's
 174         # added it *modifies* the pspec, by adding enable/disable signals
 175         # for parts of the rest of the core
 176         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 177         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 178         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 179         if self.jtag_en:
 180             # XXX MUST keep this up-to-date with litex, and
 181             # soc-cocotb-sim, and err.. all needs sorting out, argh
 182             subset = ['uart',
 183                       'mtwi',
 184                       'eint', 'gpio', 'mspi0',
 185                       # 'mspi1', - disabled for now
 186                       # 'pwm', 'sd0', - disabled for now
 187                        'sdr']
 188             self.jtag = JTAG(get_pinspecs(subset=subset),
 189                              domain=self.dbg_domain)
 190             # add signals to pspec to enable/disable icache and dcache
 191             # (or data and intstruction wishbone if icache/dcache not included)
 192             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 193             # TODO: do we actually care if these are not domain-synchronised?
 194             # honestly probably not.
 195             pspec.wb_icache_en = self.jtag.wb_icache_en
 196             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 197             self.wb_sram_en = self.jtag.wb_sram_en
 198         else:
 199             self.wb_sram_en = Const(1)
 200
 201         # add 4k sram blocks?
 202         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 203                          pspec.sram4x4kblock == True)
 204         if self.sram4x4k:
 205             self.sram4k = []
 206             for i in range(4):
 207                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 208                                                     #features={'err'}
 209                                                     ))
 210
 211         # add interrupt controller?
 212         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 213         if self.xics:
 214             self.xics_icp = XICS_ICP()
 215             self.xics_ics = XICS_ICS()
 216             self.int_level_i = self.xics_ics.int_level_i
 217
 218         # add GPIO peripheral?
 219         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 220         if self.gpio:
 221             self.simple_gpio = SimpleGPIO()
 222             self.gpio_o = self.simple_gpio.gpio_o
 223
 224         # main instruction core.  suitable for prototyping / demo only
 225         self.core = core = NonProductionCore(pspec)
 226         self.core_rst = ResetSignal("coresync")
 227
 228         # instruction decoder.  goes into Trap Record
 229         #pdecode = create_pdecode()
 230         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 231         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 232                                      opkls=IssuerDecode2ToOperand,
 233                                      svp64_en=self.svp64_en,
 234                                      regreduce_en=self.regreduce_en)
 235         pdecode = self.pdecode2.dec
 236
 237         if self.svp64_en:
 238             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 239
 240         # Test Instruction memory
 241         self.imem = ConfigFetchUnit(pspec).fu
 242
 243         # DMI interface
 244         self.dbg = CoreDebug()
 245
 246         # instruction go/monitor
 247         self.pc_o = Signal(64, reset_less=True)
 248         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 249         self.svstate_i = Data(64, "svstate_i") # ditto
 250         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 251         self.busy_o = Signal(reset_less=True)
 252         self.memerr_o = Signal(reset_less=True)
 253
 254         # STATE regfile read /write ports for PC, MSR, SVSTATE
 255         staterf = self.core.regs.rf['state']
 256         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 257         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 258         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 259         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 260         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 261
 262         # DMI interface access
 263         intrf = self.core.regs.rf['int']
 264         crrf = self.core.regs.rf['cr']
 265         xerrf = self.core.regs.rf['xer']
 266         self.int_r = intrf.r_ports['dmi'] # INT read
 267         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 268         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 269
 270         if self.svp64_en:
 271             # for predication
 272             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 273             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 274
 275         # hack method of keeping an eye on whether branch/trap set the PC
 276         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 277         self.state_nia.wen.name = 'state_nia_wen'
 278
 279         # pulse to synchronize the simulator at instruction end
 280         self.insn_done = Signal()
 281
 282         # indicate any instruction still outstanding, in execution
 283         self.any_busy = Signal()
 284
 285         if self.svp64_en:
 286             # store copies of predicate masks
 287             self.srcmask = Signal(64)
 288             self.dstmask = Signal(64)
 289
 290     def fetch_fsm(self, m, dbg, core, pc, svstate, nia, is_svp64_mode,
 291                         fetch_pc_o_ready, fetch_pc_i_valid,
 292                         fetch_insn_o_valid, fetch_insn_i_ready):
 293         """fetch FSM
 294
 295         this FSM performs fetch of raw instruction data, partial-decodes
 296         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 297         read a 2nd 32-bit quantity if that occurs.
 298         """
 299         comb = m.d.comb
 300         sync = m.d.sync
 301         pdecode2 = self.pdecode2
 302         cur_state = self.cur_state
 303         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 304
 305         msr_read = Signal(reset=1)
 306
 307         with m.FSM(name='fetch_fsm'):
 308
 309             # waiting (zzz)
 310             with m.State("IDLE"):
 311                 with m.If(~dbg.stopping_o):
 312                     comb += fetch_pc_o_ready.eq(1)
 313                 with m.If(fetch_pc_i_valid):
 314                     # instruction allowed to go: start by reading the PC
 315                     # capture the PC and also drop it into Insn Memory
 316                     # we have joined a pair of combinatorial memory
 317                     # lookups together.  this is Generally Bad.
 318                     comb += self.imem.a_pc_i.eq(pc)
 319                     comb += self.imem.a_i_valid.eq(1)
 320                     comb += self.imem.f_i_valid.eq(1)
 321                     sync += cur_state.pc.eq(pc)
 322                     sync += cur_state.svstate.eq(svstate) # and svstate
 323
 324                     # initiate read of MSR. arrives one clock later
 325                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 326                     sync += msr_read.eq(0)
 327
 328                     m.next = "INSN_READ"  # move to "wait for bus" phase
 329
 330             # dummy pause to find out why simulation is not keeping up
 331             with m.State("INSN_READ"):
 332                 if self.allow_overlap:
 333                     stopping = dbg.stopping_o
 334                 else:
 335                     stopping = Const(0)
 336                 with m.If(stopping):
 337                     # stopping: jump back to idle
 338                     m.next = "IDLE"
 339                 with m.Else():
 340                     # one cycle later, msr/sv read arrives.  valid only once.
 341                     with m.If(~msr_read):
 342                         sync += msr_read.eq(1) # yeah don't read it again
 343                         sync += cur_state.msr.eq(self.state_r_msr.o_data)
 344                     with m.If(self.imem.f_busy_o): # zzz...
 345                         # busy: stay in wait-read
 346                         comb += self.imem.a_i_valid.eq(1)
 347                         comb += self.imem.f_i_valid.eq(1)
 348                     with m.Else():
 349                         # not busy: instruction fetched
 350                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 351                         if self.svp64_en:
 352                             svp64 = self.svp64
 353                             # decode the SVP64 prefix, if any
 354                             comb += svp64.raw_opcode_in.eq(insn)
 355                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 356                             # pass the decoded prefix (if any) to PowerDecoder2
 357                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 358                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 359                             # remember whether this is a prefixed instruction,
 360                             # so the FSM can readily loop when VL==0
 361                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 362                             # calculate the address of the following instruction
 363                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 364                             sync += nia.eq(cur_state.pc + insn_size)
 365                             with m.If(~svp64.is_svp64_mode):
 366                                 # with no prefix, store the instruction
 367                                 # and hand it directly to the next FSM
 368                                 sync += dec_opcode_i.eq(insn)
 369                                 m.next = "INSN_READY"
 370                             with m.Else():
 371                                 # fetch the rest of the instruction from memory
 372                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 373                                 comb += self.imem.a_i_valid.eq(1)
 374                                 comb += self.imem.f_i_valid.eq(1)
 375                                 m.next = "INSN_READ2"
 376                         else:
 377                             # not SVP64 - 32-bit only
 378                             sync += nia.eq(cur_state.pc + 4)
 379                             sync += dec_opcode_i.eq(insn)
 380                             m.next = "INSN_READY"
 381
 382             with m.State("INSN_READ2"):
 383                 with m.If(self.imem.f_busy_o):  # zzz...
 384                     # busy: stay in wait-read
 385                     comb += self.imem.a_i_valid.eq(1)
 386                     comb += self.imem.f_i_valid.eq(1)
 387                 with m.Else():
 388                     # not busy: instruction fetched
 389                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 390                     sync += dec_opcode_i.eq(insn)
 391                     m.next = "INSN_READY"
 392                     # TODO: probably can start looking at pdecode2.rm_dec
 393                     # here or maybe even in INSN_READ state, if svp64_mode
 394                     # detected, in order to trigger - and wait for - the
 395                     # predicate reading.
 396                     if self.svp64_en:
 397                         pmode = pdecode2.rm_dec.predmode
 398                     """
 399                     if pmode != SVP64PredMode.ALWAYS.value:
 400                         fire predicate loading FSM and wait before
 401                         moving to INSN_READY
 402                     else:
 403                         sync += self.srcmask.eq(-1) # set to all 1s
 404                         sync += self.dstmask.eq(-1) # set to all 1s
 405                         m.next = "INSN_READY"
 406                     """
 407
 408             with m.State("INSN_READY"):
 409                 # hand over the instruction, to be decoded
 410                 comb += fetch_insn_o_valid.eq(1)
 411                 with m.If(fetch_insn_i_ready):
 412                     m.next = "IDLE"
 413
 414     def fetch_predicate_fsm(self, m,
 415                             pred_insn_i_valid, pred_insn_o_ready,
 416                             pred_mask_o_valid, pred_mask_i_ready):
 417         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 418            src/dest predicate masks
 419
 420         https://bugs.libre-soc.org/show_bug.cgi?id=617
 421         the predicates can be read here, by using IntRegs r_ports['pred']
 422         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 423         be done through multiple reads, extracting one relevant at a time.
 424         later, a faster way would be to use the 32-bit-wide CR port but
 425         this is more complex decoding, here.  equivalent code used in
 426         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 427
 428         note: this ENTIRE FSM is not to be called when svp64 is disabled
 429         """
 430         comb = m.d.comb
 431         sync = m.d.sync
 432         pdecode2 = self.pdecode2
 433         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 434         predmode = rm_dec.predmode
 435         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 436         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 437         # get src/dst step, so we can skip already used mask bits
 438         cur_state = self.cur_state
 439         srcstep = cur_state.svstate.srcstep
 440         dststep = cur_state.svstate.dststep
 441         cur_vl = cur_state.svstate.vl
 442
 443         # decode predicates
 444         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 445         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 446         sidx, scrinvert = get_predcr(m, srcpred, 's')
 447         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 448
 449         # store fetched masks, for either intpred or crpred
 450         # when src/dst step is not zero, the skipped mask bits need to be
 451         # shifted-out, before actually storing them in src/dest mask
 452         new_srcmask = Signal(64, reset_less=True)
 453         new_dstmask = Signal(64, reset_less=True)
 454
 455         with m.FSM(name="fetch_predicate"):
 456
 457             with m.State("FETCH_PRED_IDLE"):
 458                 comb += pred_insn_o_ready.eq(1)
 459                 with m.If(pred_insn_i_valid):
 460                     with m.If(predmode == SVP64PredMode.INT):
 461                         # skip fetching destination mask register, when zero
 462                         with m.If(dall1s):
 463                             sync += new_dstmask.eq(-1)
 464                             # directly go to fetch source mask register
 465                             # guaranteed not to be zero (otherwise predmode
 466                             # would be SVP64PredMode.ALWAYS, not INT)
 467                             comb += int_pred.addr.eq(sregread)
 468                             comb += int_pred.ren.eq(1)
 469                             m.next = "INT_SRC_READ"
 470                         # fetch destination predicate register
 471                         with m.Else():
 472                             comb += int_pred.addr.eq(dregread)
 473                             comb += int_pred.ren.eq(1)
 474                             m.next = "INT_DST_READ"
 475                     with m.Elif(predmode == SVP64PredMode.CR):
 476                         # go fetch masks from the CR register file
 477                         sync += new_srcmask.eq(0)
 478                         sync += new_dstmask.eq(0)
 479                         m.next = "CR_READ"
 480                     with m.Else():
 481                         sync += self.srcmask.eq(-1)
 482                         sync += self.dstmask.eq(-1)
 483                         m.next = "FETCH_PRED_DONE"
 484
 485             with m.State("INT_DST_READ"):
 486                 # store destination mask
 487                 inv = Repl(dinvert, 64)
 488                 with m.If(dunary):
 489                     # set selected mask bit for 1<<r3 mode
 490                     dst_shift = Signal(range(64))
 491                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 492                     sync += new_dstmask.eq(1 << dst_shift)
 493                 with m.Else():
 494                     # invert mask if requested
 495                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 496                 # skip fetching source mask register, when zero
 497                 with m.If(sall1s):
 498                     sync += new_srcmask.eq(-1)
 499                     m.next = "FETCH_PRED_SHIFT_MASK"
 500                 # fetch source predicate register
 501                 with m.Else():
 502                     comb += int_pred.addr.eq(sregread)
 503                     comb += int_pred.ren.eq(1)
 504                     m.next = "INT_SRC_READ"
 505
 506             with m.State("INT_SRC_READ"):
 507                 # store source mask
 508                 inv = Repl(sinvert, 64)
 509                 with m.If(sunary):
 510                     # set selected mask bit for 1<<r3 mode
 511                     src_shift = Signal(range(64))
 512                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 513                     sync += new_srcmask.eq(1 << src_shift)
 514                 with m.Else():
 515                     # invert mask if requested
 516                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 517                 m.next = "FETCH_PRED_SHIFT_MASK"
 518
 519             # fetch masks from the CR register file
 520             # implements the following loop:
 521             # idx, inv = get_predcr(mask)
 522             # mask = 0
 523             # for cr_idx in range(vl):
 524             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 525             #     if cr[idx] ^ inv:
 526             #         mask |= 1 << cr_idx
 527             # return mask
 528             with m.State("CR_READ"):
 529                 # CR index to be read, which will be ready by the next cycle
 530                 cr_idx = Signal.like(cur_vl, reset_less=True)
 531                 # submit the read operation to the regfile
 532                 with m.If(cr_idx != cur_vl):
 533                     # the CR read port is unary ...
 534                     # ren = 1 << cr_idx
 535                     # ... in MSB0 convention ...
 536                     # ren = 1 << (7 - cr_idx)
 537                     # ... and with an offset:
 538                     # ren = 1 << (7 - off - cr_idx)
 539                     idx = SVP64CROffs.CRPred + cr_idx
 540                     comb += cr_pred.ren.eq(1 << (7 - idx))
 541                     # signal data valid in the next cycle
 542                     cr_read = Signal(reset_less=True)
 543                     sync += cr_read.eq(1)
 544                     # load the next index
 545                     sync += cr_idx.eq(cr_idx + 1)
 546                 with m.Else():
 547                     # exit on loop end
 548                     sync += cr_read.eq(0)
 549                     sync += cr_idx.eq(0)
 550                     m.next = "FETCH_PRED_SHIFT_MASK"
 551                 with m.If(cr_read):
 552                     # compensate for the one cycle delay on the regfile
 553                     cur_cr_idx = Signal.like(cur_vl)
 554                     comb += cur_cr_idx.eq(cr_idx - 1)
 555                     # read the CR field, select the appropriate bit
 556                     cr_field = Signal(4)
 557                     scr_bit = Signal()
 558                     dcr_bit = Signal()
 559                     comb += cr_field.eq(cr_pred.o_data)
 560                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 561                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 562                     # set the corresponding mask bit
 563                     bit_to_set = Signal.like(self.srcmask)
 564                     comb += bit_to_set.eq(1 << cur_cr_idx)
 565                     with m.If(scr_bit):
 566                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 567                     with m.If(dcr_bit):
 568                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 569
 570             with m.State("FETCH_PRED_SHIFT_MASK"):
 571                 # shift-out skipped mask bits
 572                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 573                 sync += self.dstmask.eq(new_dstmask >> dststep)
 574                 m.next = "FETCH_PRED_DONE"
 575
 576             with m.State("FETCH_PRED_DONE"):
 577                 comb += pred_mask_o_valid.eq(1)
 578                 with m.If(pred_mask_i_ready):
 579                     m.next = "FETCH_PRED_IDLE"
 580
 581     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 582                   dbg, core_rst, is_svp64_mode,
 583                   fetch_pc_o_ready, fetch_pc_i_valid,
 584                   fetch_insn_o_valid, fetch_insn_i_ready,
 585                   pred_insn_i_valid, pred_insn_o_ready,
 586                   pred_mask_o_valid, pred_mask_i_ready,
 587                   exec_insn_i_valid, exec_insn_o_ready,
 588                   exec_pc_o_valid, exec_pc_i_ready):
 589         """issue FSM
 590
 591         decode / issue FSM.  this interacts with the "fetch" FSM
 592         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 593         (outgoing). also interacts with the "execute" FSM
 594         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 595         (incoming).
 596         SVP64 RM prefixes have already been set up by the
 597         "fetch" phase, so execute is fairly straightforward.
 598         """
 599
 600         comb = m.d.comb
 601         sync = m.d.sync
 602         pdecode2 = self.pdecode2
 603         cur_state = self.cur_state
 604
 605         # temporaries
 606         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 607
 608         # for updating svstate (things like srcstep etc.)
 609         update_svstate = Signal() # set this (below) if updating
 610         new_svstate = SVSTATERec("new_svstate")
 611         comb += new_svstate.eq(cur_state.svstate)
 612
 613         # precalculate srcstep+1 and dststep+1
 614         cur_srcstep = cur_state.svstate.srcstep
 615         cur_dststep = cur_state.svstate.dststep
 616         next_srcstep = Signal.like(cur_srcstep)
 617         next_dststep = Signal.like(cur_dststep)
 618         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 619         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 620
 621         # note if an exception happened.  in a pipelined or OoO design
 622         # this needs to be accompanied by "shadowing" (or stalling)
 623         exc_happened = self.core.o.exc_happened
 624
 625         with m.FSM(name="issue_fsm"):
 626
 627             # sync with the "fetch" phase which is reading the instruction
 628             # at this point, there is no instruction running, that
 629             # could inadvertently update the PC.
 630             with m.State("ISSUE_START"):
 631                 # wait on "core stop" release, before next fetch
 632                 # need to do this here, in case we are in a VL==0 loop
 633                 with m.If(~dbg.core_stop_o & ~core_rst):
 634                     comb += fetch_pc_i_valid.eq(1) # tell fetch to start
 635                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 636                         m.next = "INSN_WAIT"
 637                 with m.Else():
 638                     # tell core it's stopped, and acknowledge debug handshake
 639                     comb += dbg.core_stopped_i.eq(1)
 640                     # while stopped, allow updating the PC and SVSTATE
 641                     with m.If(self.pc_i.ok):
 642                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 643                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 644                         sync += pc_changed.eq(1)
 645                     with m.If(self.svstate_i.ok):
 646                         comb += new_svstate.eq(self.svstate_i.data)
 647                         comb += update_svstate.eq(1)
 648                         sync += sv_changed.eq(1)
 649
 650             # wait for an instruction to arrive from Fetch
 651             with m.State("INSN_WAIT"):
 652                 comb += fetch_insn_i_ready.eq(1)
 653                 with m.If(fetch_insn_o_valid):
 654                     # loop into ISSUE_START if it's a SVP64 instruction
 655                     # and VL == 0.  this because VL==0 is a for-loop
 656                     # from 0 to 0 i.e. always, always a NOP.
 657                     cur_vl = cur_state.svstate.vl
 658                     with m.If(is_svp64_mode & (cur_vl == 0)):
 659                         # update the PC before fetching the next instruction
 660                         # since we are in a VL==0 loop, no instruction was
 661                         # executed that we could be overwriting
 662                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 663                         comb += self.state_w_pc.i_data.eq(nia)
 664                         comb += self.insn_done.eq(1)
 665                         m.next = "ISSUE_START"
 666                     with m.Else():
 667                         if self.svp64_en:
 668                             m.next = "PRED_START"  # start fetching predicate
 669                         else:
 670                             m.next = "DECODE_SV"  # skip predication
 671
 672             with m.State("PRED_START"):
 673                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 674                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 675                     m.next = "MASK_WAIT"
 676
 677             with m.State("MASK_WAIT"):
 678                 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
 679                 with m.If(pred_mask_o_valid): # predication masks are ready
 680                     m.next = "PRED_SKIP"
 681
 682             # skip zeros in predicate
 683             with m.State("PRED_SKIP"):
 684                 with m.If(~is_svp64_mode):
 685                     m.next = "DECODE_SV"  # nothing to do
 686                 with m.Else():
 687                     if self.svp64_en:
 688                         pred_src_zero = pdecode2.rm_dec.pred_sz
 689                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 690
 691                         # new srcstep, after skipping zeros
 692                         skip_srcstep = Signal.like(cur_srcstep)
 693                         # value to be added to the current srcstep
 694                         src_delta = Signal.like(cur_srcstep)
 695                         # add leading zeros to srcstep, if not in zero mode
 696                         with m.If(~pred_src_zero):
 697                             # priority encoder (count leading zeros)
 698                             # append guard bit, in case the mask is all zeros
 699                             pri_enc_src = PriorityEncoder(65)
 700                             m.submodules.pri_enc_src = pri_enc_src
 701                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 702                                                          Const(1, 1)))
 703                             comb += src_delta.eq(pri_enc_src.o)
 704                         # apply delta to srcstep
 705                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 706                         # shift-out all leading zeros from the mask
 707                         # plus the leading "one" bit
 708                         # TODO count leading zeros and shift-out the zero
 709                         #      bits, in the same step, in hardware
 710                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 711
 712                         # same as above, but for dststep
 713                         skip_dststep = Signal.like(cur_dststep)
 714                         dst_delta = Signal.like(cur_dststep)
 715                         with m.If(~pred_dst_zero):
 716                             pri_enc_dst = PriorityEncoder(65)
 717                             m.submodules.pri_enc_dst = pri_enc_dst
 718                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 719                                                          Const(1, 1)))
 720                             comb += dst_delta.eq(pri_enc_dst.o)
 721                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 722                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 723
 724                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 725                         with m.If((skip_srcstep >= cur_vl) |
 726                                   (skip_dststep >= cur_vl)):
 727                             # end of VL loop. Update PC and reset src/dst step
 728                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 729                             comb += self.state_w_pc.i_data.eq(nia)
 730                             comb += new_svstate.srcstep.eq(0)
 731                             comb += new_svstate.dststep.eq(0)
 732                             comb += update_svstate.eq(1)
 733                             # synchronize with the simulator
 734                             comb += self.insn_done.eq(1)
 735                             # go back to Issue
 736                             m.next = "ISSUE_START"
 737                         with m.Else():
 738                             # update new src/dst step
 739                             comb += new_svstate.srcstep.eq(skip_srcstep)
 740                             comb += new_svstate.dststep.eq(skip_dststep)
 741                             comb += update_svstate.eq(1)
 742                             # proceed to Decode
 743                             m.next = "DECODE_SV"
 744
 745                         # pass predicate mask bits through to satellite decoders
 746                         # TODO: for SIMD this will be *multiple* bits
 747                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 748                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 749
 750             # after src/dst step have been updated, we are ready
 751             # to decode the instruction
 752             with m.State("DECODE_SV"):
 753                 # decode the instruction
 754                 sync += core.i.e.eq(pdecode2.e)
 755                 sync += core.i.state.eq(cur_state)
 756                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 757                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 758                 if self.svp64_en:
 759                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 760                     # set RA_OR_ZERO detection in satellite decoders
 761                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 762                     # and svp64 detection
 763                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 764                     # and svp64 bit-rev'd ldst mode
 765                     ldst_dec = pdecode2.use_svp64_ldst_dec
 766                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 767                 # after decoding, reset any previous exception condition,
 768                 # allowing it to be set again during the next execution
 769                 sync += pdecode2.ldst_exc.eq(0)
 770
 771                 m.next = "INSN_EXECUTE"  # move to "execute"
 772
 773             # handshake with execution FSM, move to "wait" once acknowledged
 774             with m.State("INSN_EXECUTE"):
 775                 comb += exec_insn_i_valid.eq(1) # trigger execute
 776                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 777                     m.next = "EXECUTE_WAIT"
 778
 779             with m.State("EXECUTE_WAIT"):
 780                 # wait on "core stop" release, at instruction end
 781                 # need to do this here, in case we are in a VL>1 loop
 782                 with m.If(~dbg.core_stop_o & ~core_rst):
 783                     comb += exec_pc_i_ready.eq(1)
 784                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 785                     # the exception info needs to be blatted into
 786                     # pdecode.ldst_exc, and the instruction "re-run".
 787                     # when ldst_exc.happened is set, the PowerDecoder2
 788                     # reacts very differently: it re-writes the instruction
 789                     # with a "trap" (calls PowerDecoder2.trap()) which
 790                     # will *overwrite* whatever was requested and jump the
 791                     # PC to the exception address, as well as alter MSR.
 792                     # nothing else needs to be done other than to note
 793                     # the change of PC and MSR (and, later, SVSTATE)
 794                     with m.If(exc_happened):
 795                         sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
 796
 797                     with m.If(exec_pc_o_valid):
 798
 799                         # was this the last loop iteration?
 800                         is_last = Signal()
 801                         cur_vl = cur_state.svstate.vl
 802                         comb += is_last.eq(next_srcstep == cur_vl)
 803
 804                         # return directly to Decode if Execute generated an
 805                         # exception.
 806                         with m.If(pdecode2.ldst_exc.happened):
 807                             m.next = "DECODE_SV"
 808
 809                         # if either PC or SVSTATE were changed by the previous
 810                         # instruction, go directly back to Fetch, without
 811                         # updating either PC or SVSTATE
 812                         with m.Elif(pc_changed | sv_changed):
 813                             m.next = "ISSUE_START"
 814
 815                         # also return to Fetch, when no output was a vector
 816                         # (regardless of SRCSTEP and VL), or when the last
 817                         # instruction was really the last one of the VL loop
 818                         with m.Elif((~pdecode2.loop_continue) | is_last):
 819                             # before going back to fetch, update the PC state
 820                             # register with the NIA.
 821                             # ok here we are not reading the branch unit.
 822                             # TODO: this just blithely overwrites whatever
 823                             #       pipeline updated the PC
 824                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 825                             comb += self.state_w_pc.i_data.eq(nia)
 826                             # reset SRCSTEP before returning to Fetch
 827                             if self.svp64_en:
 828                                 with m.If(pdecode2.loop_continue):
 829                                     comb += new_svstate.srcstep.eq(0)
 830                                     comb += new_svstate.dststep.eq(0)
 831                                     comb += update_svstate.eq(1)
 832                             else:
 833                                 comb += new_svstate.srcstep.eq(0)
 834                                 comb += new_svstate.dststep.eq(0)
 835                                 comb += update_svstate.eq(1)
 836                             m.next = "ISSUE_START"
 837
 838                         # returning to Execute? then, first update SRCSTEP
 839                         with m.Else():
 840                             comb += new_svstate.srcstep.eq(next_srcstep)
 841                             comb += new_svstate.dststep.eq(next_dststep)
 842                             comb += update_svstate.eq(1)
 843                             # return to mask skip loop
 844                             m.next = "PRED_SKIP"
 845
 846                 with m.Else():
 847                     comb += dbg.core_stopped_i.eq(1)
 848                     # while stopped, allow updating the PC and SVSTATE
 849                     with m.If(self.pc_i.ok):
 850                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 851                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 852                         sync += pc_changed.eq(1)
 853                     with m.If(self.svstate_i.ok):
 854                         comb += new_svstate.eq(self.svstate_i.data)
 855                         comb += update_svstate.eq(1)
 856                         sync += sv_changed.eq(1)
 857
 858         # check if svstate needs updating: if so, write it to State Regfile
 859         with m.If(update_svstate):
 860             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 861             comb += self.state_w_sv.i_data.eq(new_svstate)
 862             sync += cur_state.svstate.eq(new_svstate) # for next clock
 863
 864     def execute_fsm(self, m, core, pc_changed, sv_changed,
 865                     exec_insn_i_valid, exec_insn_o_ready,
 866                     exec_pc_o_valid, exec_pc_i_ready):
 867         """execute FSM
 868
 869         execute FSM. this interacts with the "issue" FSM
 870         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 871         (outgoing). SVP64 RM prefixes have already been set up by the
 872         "issue" phase, so execute is fairly straightforward.
 873         """
 874
 875         comb = m.d.comb
 876         sync = m.d.sync
 877         pdecode2 = self.pdecode2
 878
 879         # temporaries
 880         core_busy_o = core.n.o_data.busy_o # core is busy
 881         core_ivalid_i = core.p.i_valid              # instruction is valid
 882
 883         with m.FSM(name="exec_fsm"):
 884
 885             # waiting for instruction bus (stays there until not busy)
 886             with m.State("INSN_START"):
 887                 comb += exec_insn_o_ready.eq(1)
 888                 with m.If(exec_insn_i_valid):
 889                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 890                     sync += sv_changed.eq(0)
 891                     sync += pc_changed.eq(0)
 892                     with m.If(core.p.o_ready): # only move if accepted
 893                         m.next = "INSN_ACTIVE"  # move to "wait completion"
 894
 895             # instruction started: must wait till it finishes
 896             with m.State("INSN_ACTIVE"):
 897                 # note changes to PC and SVSTATE
 898                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 899                     sync += sv_changed.eq(1)
 900                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 901                     sync += pc_changed.eq(1)
 902                 with m.If(~core_busy_o): # instruction done!
 903                     comb += exec_pc_o_valid.eq(1)
 904                     with m.If(exec_pc_i_ready):
 905                         # when finished, indicate "done".
 906                         # however, if there was an exception, the instruction
 907                         # is *not* yet done.  this is an implementation
 908                         # detail: we choose to implement exceptions by
 909                         # taking the exception information from the LDST
 910                         # unit, putting that *back* into the PowerDecoder2,
 911                         # and *re-running the entire instruction*.
 912                         # if we erroneously indicate "done" here, it is as if
 913                         # there were *TWO* instructions:
 914                         # 1) the failed LDST 2) a TRAP.
 915                         with m.If(~pdecode2.ldst_exc.happened):
 916                             comb += self.insn_done.eq(1)
 917                         m.next = "INSN_START"  # back to fetch
 918
 919     def setup_peripherals(self, m):
 920         comb, sync = m.d.comb, m.d.sync
 921
 922         # okaaaay so the debug module must be in coresync clock domain
 923         # but NOT its reset signal. to cope with this, set every single
 924         # submodule explicitly in coresync domain, debug and JTAG
 925         # in their own one but using *external* reset.
 926         csd = DomainRenamer("coresync")
 927         dbd = DomainRenamer(self.dbg_domain)
 928
 929         m.submodules.core = core = csd(self.core)
 930         m.submodules.imem = imem = csd(self.imem)
 931         m.submodules.dbg = dbg = dbd(self.dbg)
 932         if self.jtag_en:
 933             m.submodules.jtag = jtag = dbd(self.jtag)
 934             # TODO: UART2GDB mux, here, from external pin
 935             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 936             sync += dbg.dmi.connect_to(jtag.dmi)
 937
 938         cur_state = self.cur_state
 939
 940         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 941         if self.sram4x4k:
 942             for i, sram in enumerate(self.sram4k):
 943                 m.submodules["sram4k_%d" % i] = csd(sram)
 944                 comb += sram.enable.eq(self.wb_sram_en)
 945
 946         # XICS interrupt handler
 947         if self.xics:
 948             m.submodules.xics_icp = icp = csd(self.xics_icp)
 949             m.submodules.xics_ics = ics = csd(self.xics_ics)
 950             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 951             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 952
 953         # GPIO test peripheral
 954         if self.gpio:
 955             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 956
 957         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 958         # XXX causes litex ECP5 test to get wrong idea about input and output
 959         # (but works with verilator sim *sigh*)
 960         #if self.gpio and self.xics:
 961         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 962
 963         # instruction decoder
 964         pdecode = create_pdecode()
 965         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 966         if self.svp64_en:
 967             m.submodules.svp64 = svp64 = csd(self.svp64)
 968
 969         # convenience
 970         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 971         intrf = self.core.regs.rf['int']
 972
 973         # clock delay power-on reset
 974         cd_por  = ClockDomain(reset_less=True)
 975         cd_sync = ClockDomain()
 976         core_sync = ClockDomain("coresync")
 977         m.domains += cd_por, cd_sync, core_sync
 978         if self.dbg_domain != "sync":
 979             dbg_sync = ClockDomain(self.dbg_domain)
 980             m.domains += dbg_sync
 981
 982         ti_rst = Signal(reset_less=True)
 983         delay = Signal(range(4), reset=3)
 984         with m.If(delay != 0):
 985             m.d.por += delay.eq(delay - 1)
 986         comb += cd_por.clk.eq(ClockSignal())
 987
 988         # power-on reset delay
 989         core_rst = ResetSignal("coresync")
 990         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 991         comb += core_rst.eq(ti_rst)
 992
 993         # debug clock is same as coresync, but reset is *main external*
 994         if self.dbg_domain != "sync":
 995             dbg_rst = ResetSignal(self.dbg_domain)
 996             comb += dbg_rst.eq(ResetSignal())
 997
 998         # busy/halted signals from core
 999         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1000         comb += self.busy_o.eq(core_busy_o)
1001         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1002
1003         # temporary hack: says "go" immediately for both address gen and ST
1004         l0 = core.l0
1005         ldst = core.fus.fus['ldst0']
1006         st_go_edge = rising_edge(m, ldst.st.rel_o)
1007         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
1008         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1009
1010     def elaborate(self, platform):
1011         m = Module()
1012         # convenience
1013         comb, sync = m.d.comb, m.d.sync
1014         cur_state = self.cur_state
1015         pdecode2 = self.pdecode2
1016         dbg = self.dbg
1017         core = self.core
1018
1019         # set up peripherals and core
1020         core_rst = self.core_rst
1021         self.setup_peripherals(m)
1022
1023         # reset current state if core reset requested
1024         with m.If(core_rst):
1025             m.d.sync += self.cur_state.eq(0)
1026
1027         # PC and instruction from I-Memory
1028         comb += self.pc_o.eq(cur_state.pc)
1029         pc_changed = Signal() # note write to PC
1030         sv_changed = Signal() # note write to SVSTATE
1031
1032         # indicate to outside world if any FU is still executing
1033         comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1034
1035         # read state either from incoming override or from regfile
1036         # TODO: really should be doing MSR in the same way
1037         pc = state_get(m, core_rst, self.pc_i,
1038                             "pc",                  # read PC
1039                             self.state_r_pc, StateRegs.PC)
1040         svstate = state_get(m, core_rst, self.svstate_i,
1041                             "svstate",   # read SVSTATE
1042                             self.state_r_sv, StateRegs.SVSTATE)
1043
1044         # don't write pc every cycle
1045         comb += self.state_w_pc.wen.eq(0)
1046         comb += self.state_w_pc.i_data.eq(0)
1047
1048         # don't read msr every cycle
1049         comb += self.state_r_msr.ren.eq(0)
1050
1051         # address of the next instruction, in the absence of a branch
1052         # depends on the instruction size
1053         nia = Signal(64)
1054
1055         # connect up debug signals
1056         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1057         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1058         comb += dbg.state.pc.eq(pc)
1059         comb += dbg.state.svstate.eq(svstate)
1060         comb += dbg.state.msr.eq(cur_state.msr)
1061
1062         # pass the prefix mode from Fetch to Issue, so the latter can loop
1063         # on VL==0
1064         is_svp64_mode = Signal()
1065
1066         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1067         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1068         # these are the handshake signals between each
1069
1070         # fetch FSM can run as soon as the PC is valid
1071         fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1072         fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1073
1074         # fetch FSM hands over the instruction to be decoded / issued
1075         fetch_insn_o_valid = Signal()
1076         fetch_insn_i_ready = Signal()
1077
1078         # predicate fetch FSM decodes and fetches the predicate
1079         pred_insn_i_valid = Signal()
1080         pred_insn_o_ready = Signal()
1081
1082         # predicate fetch FSM delivers the masks
1083         pred_mask_o_valid = Signal()
1084         pred_mask_i_ready = Signal()
1085
1086         # issue FSM delivers the instruction to the be executed
1087         exec_insn_i_valid = Signal()
1088         exec_insn_o_ready = Signal()
1089
1090         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1091         exec_pc_o_valid = Signal()
1092         exec_pc_i_ready = Signal()
1093
1094         # the FSMs here are perhaps unusual in that they detect conditions
1095         # then "hold" information, combinatorially, for the core
1096         # (as opposed to using sync - which would be on a clock's delay)
1097         # this includes the actual opcode, valid flags and so on.
1098
1099         # Fetch, then predicate fetch, then Issue, then Execute.
1100         # Issue is where the VL for-loop # lives.  the ready/valid
1101         # signalling is used to communicate between the four.
1102
1103         self.fetch_fsm(m, dbg, core, pc, svstate, nia, is_svp64_mode,
1104                        fetch_pc_o_ready, fetch_pc_i_valid,
1105                        fetch_insn_o_valid, fetch_insn_i_ready)
1106
1107         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1108                        dbg, core_rst, is_svp64_mode,
1109                        fetch_pc_o_ready, fetch_pc_i_valid,
1110                        fetch_insn_o_valid, fetch_insn_i_ready,
1111                        pred_insn_i_valid, pred_insn_o_ready,
1112                        pred_mask_o_valid, pred_mask_i_ready,
1113                        exec_insn_i_valid, exec_insn_o_ready,
1114                        exec_pc_o_valid, exec_pc_i_ready)
1115
1116         if self.svp64_en:
1117             self.fetch_predicate_fsm(m,
1118                                      pred_insn_i_valid, pred_insn_o_ready,
1119                                      pred_mask_o_valid, pred_mask_i_ready)
1120
1121         self.execute_fsm(m, core, pc_changed, sv_changed,
1122                          exec_insn_i_valid, exec_insn_o_ready,
1123                          exec_pc_o_valid, exec_pc_i_ready)
1124
1125         # whatever was done above, over-ride it if core reset is held
1126         with m.If(core_rst):
1127             sync += nia.eq(0)
1128
1129         # this bit doesn't have to be in the FSM: connect up to read
1130         # regfiles on demand from DMI
1131         self.do_dmi(m, dbg)
1132
1133         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1134         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1135         self.tb_dec_fsm(m, cur_state.dec)
1136
1137         return m
1138
1139     def do_dmi(self, m, dbg):
1140         """deals with DMI debug requests
1141
1142         currently only provides read requests for the INT regfile, CR and XER
1143         it will later also deal with *writing* to these regfiles.
1144         """
1145         comb = m.d.comb
1146         sync = m.d.sync
1147         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1148         intrf = self.core.regs.rf['int']
1149
1150         with m.If(d_reg.req): # request for regfile access being made
1151             # TODO: error-check this
1152             # XXX should this be combinatorial?  sync better?
1153             if intrf.unary:
1154                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1155             else:
1156                 comb += self.int_r.addr.eq(d_reg.addr)
1157                 comb += self.int_r.ren.eq(1)
1158         d_reg_delay  = Signal()
1159         sync += d_reg_delay.eq(d_reg.req)
1160         with m.If(d_reg_delay):
1161             # data arrives one clock later
1162             comb += d_reg.data.eq(self.int_r.o_data)
1163             comb += d_reg.ack.eq(1)
1164
1165         # sigh same thing for CR debug
1166         with m.If(d_cr.req): # request for regfile access being made
1167             comb += self.cr_r.ren.eq(0b11111111) # enable all
1168         d_cr_delay  = Signal()
1169         sync += d_cr_delay.eq(d_cr.req)
1170         with m.If(d_cr_delay):
1171             # data arrives one clock later
1172             comb += d_cr.data.eq(self.cr_r.o_data)
1173             comb += d_cr.ack.eq(1)
1174
1175         # aaand XER...
1176         with m.If(d_xer.req): # request for regfile access being made
1177             comb += self.xer_r.ren.eq(0b111111) # enable all
1178         d_xer_delay  = Signal()
1179         sync += d_xer_delay.eq(d_xer.req)
1180         with m.If(d_xer_delay):
1181             # data arrives one clock later
1182             comb += d_xer.data.eq(self.xer_r.o_data)
1183             comb += d_xer.ack.eq(1)
1184
1185     def tb_dec_fsm(self, m, spr_dec):
1186         """tb_dec_fsm
1187
1188         this is a FSM for updating either dec or tb.  it runs alternately
1189         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1190         value to DEC, however the regfile has "passthrough" on it so this
1191         *should* be ok.
1192
1193         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1194         """
1195
1196         comb, sync = m.d.comb, m.d.sync
1197         fast_rf = self.core.regs.rf['fast']
1198         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1199         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1200
1201         with m.FSM() as fsm:
1202
1203             # initiates read of current DEC
1204             with m.State("DEC_READ"):
1205                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1206                 comb += fast_r_dectb.ren.eq(1)
1207                 m.next = "DEC_WRITE"
1208
1209             # waits for DEC read to arrive (1 cycle), updates with new value
1210             with m.State("DEC_WRITE"):
1211                 new_dec = Signal(64)
1212                 # TODO: MSR.LPCR 32-bit decrement mode
1213                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1214                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1215                 comb += fast_w_dectb.wen.eq(1)
1216                 comb += fast_w_dectb.i_data.eq(new_dec)
1217                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1218                 m.next = "TB_READ"
1219
1220             # initiates read of current TB
1221             with m.State("TB_READ"):
1222                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1223                 comb += fast_r_dectb.ren.eq(1)
1224                 m.next = "TB_WRITE"
1225
1226             # waits for read TB to arrive, initiates write of current TB
1227             with m.State("TB_WRITE"):
1228                 new_tb = Signal(64)
1229                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1230                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1231                 comb += fast_w_dectb.wen.eq(1)
1232                 comb += fast_w_dectb.i_data.eq(new_tb)
1233                 m.next = "DEC_READ"
1234
1235         return m
1236
1237     def __iter__(self):
1238         yield from self.pc_i.ports()
1239         yield self.pc_o
1240         yield self.memerr_o
1241         yield from self.core.ports()
1242         yield from self.imem.ports()
1243         yield self.core_bigendian_i
1244         yield self.busy_o
1245
1246     def ports(self):
1247         return list(self)
1248
1249     def external_ports(self):
1250         ports = self.pc_i.ports()
1251         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1252                 ]
1253
1254         if self.jtag_en:
1255             ports += list(self.jtag.external_ports())
1256         else:
1257             # don't add DMI if JTAG is enabled
1258             ports += list(self.dbg.dmi.ports())
1259
1260         ports += list(self.imem.ibus.fields.values())
1261         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1262
1263         if self.sram4x4k:
1264             for sram in self.sram4k:
1265                 ports += list(sram.bus.fields.values())
1266
1267         if self.xics:
1268             ports += list(self.xics_icp.bus.fields.values())
1269             ports += list(self.xics_ics.bus.fields.values())
1270             ports.append(self.int_level_i)
1271
1272         if self.gpio:
1273             ports += list(self.simple_gpio.bus.fields.values())
1274             ports.append(self.gpio_o)
1275
1276         return ports
1277
1278     def ports(self):
1279         return list(self)
1280
1281
1282 class TestIssuer(Elaboratable):
1283     def __init__(self, pspec):
1284         self.ti = TestIssuerInternal(pspec)
1285         self.pll = DummyPLL(instance=True)
1286
1287         # PLL direct clock or not
1288         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1289         if self.pll_en:
1290             self.pll_test_o = Signal(reset_less=True)
1291             self.pll_vco_o = Signal(reset_less=True)
1292             self.clk_sel_i = Signal(2, reset_less=True)
1293             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1294             self.pllclk_clk = ClockSignal("pllclk")
1295
1296     def elaborate(self, platform):
1297         m = Module()
1298         comb = m.d.comb
1299
1300         # TestIssuer nominally runs at main clock, actually it is
1301         # all combinatorial internally except for coresync'd components
1302         m.submodules.ti = ti = self.ti
1303
1304         if self.pll_en:
1305             # ClockSelect runs at PLL output internal clock rate
1306             m.submodules.wrappll = pll = self.pll
1307
1308             # add clock domains from PLL
1309             cd_pll = ClockDomain("pllclk")
1310             m.domains += cd_pll
1311
1312             # PLL clock established.  has the side-effect of running clklsel
1313             # at the PLL's speed (see DomainRenamer("pllclk") above)
1314             pllclk = self.pllclk_clk
1315             comb += pllclk.eq(pll.clk_pll_o)
1316
1317             # wire up external 24mhz to PLL
1318             #comb += pll.clk_24_i.eq(self.ref_clk)
1319             # output 18 mhz PLL test signal, and analog oscillator out
1320             comb += self.pll_test_o.eq(pll.pll_test_o)
1321             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1322
1323             # input to pll clock selection
1324             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1325
1326             # now wire up ResetSignals.  don't mind them being in this domain
1327             pll_rst = ResetSignal("pllclk")
1328             comb += pll_rst.eq(ResetSignal())
1329
1330         # internal clock is set to selector clock-out.  has the side-effect of
1331         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1332         # debug clock runs at coresync internal clock
1333         cd_coresync = ClockDomain("coresync")
1334         #m.domains += cd_coresync
1335         if self.ti.dbg_domain != 'sync':
1336             cd_dbgsync = ClockDomain("dbgsync")
1337             #m.domains += cd_dbgsync
1338         intclk = ClockSignal("coresync")
1339         dbgclk = ClockSignal(self.ti.dbg_domain)
1340         # XXX BYPASS PLL XXX
1341         # XXX BYPASS PLL XXX
1342         # XXX BYPASS PLL XXX
1343         if self.pll_en:
1344             comb += intclk.eq(self.ref_clk)
1345         else:
1346             comb += intclk.eq(ClockSignal())
1347         if self.ti.dbg_domain != 'sync':
1348             dbgclk = ClockSignal(self.ti.dbg_domain)
1349             comb += dbgclk.eq(intclk)
1350
1351         return m
1352
1353     def ports(self):
1354         return list(self.ti.ports()) + list(self.pll.ports()) + \
1355                [ClockSignal(), ResetSignal()]
1356
1357     def external_ports(self):
1358         ports = self.ti.external_ports()
1359         ports.append(ClockSignal())
1360         ports.append(ResetSignal())
1361         if self.pll_en:
1362             ports.append(self.clk_sel_i)
1363             ports.append(self.pll.clk_24_i)
1364             ports.append(self.pll_test_o)
1365             ports.append(self.pll_vco_o)
1366             ports.append(self.pllclk_clk)
1367             ports.append(self.ref_clk)
1368         return ports
1369
1370
1371 if __name__ == '__main__':
1372     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1373              'spr': 1,
1374              'div': 1,
1375              'mul': 1,
1376              'shiftrot': 1
1377             }
1378     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1379                          imem_ifacetype='bare_wb',
1380                          addr_wid=48,
1381                          mask_wid=8,
1382                          reg_wid=64,
1383                          units=units)
1384     dut = TestIssuer(pspec)
1385     vl = main(dut, ports=dut.ports(), name="test_issuer")
1386
1387     if len(sys.argv) == 1:
1388         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1389         with open("test_issuer.il", "w") as f:
1390             f.write(vl)