src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.o_data)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # JTAG interface.  add this right at the start because if it's
 170         # added it *modifies* the pspec, by adding enable/disable signals
 171         # for parts of the rest of the core
 172         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 173         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 174         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 175         if self.jtag_en:
 176             # XXX MUST keep this up-to-date with litex, and
 177             # soc-cocotb-sim, and err.. all needs sorting out, argh
 178             subset = ['uart',
 179                       'mtwi',
 180                       'eint', 'gpio', 'mspi0',
 181                       # 'mspi1', - disabled for now
 182                       # 'pwm', 'sd0', - disabled for now
 183                        'sdr']
 184             self.jtag = JTAG(get_pinspecs(subset=subset),
 185                              domain=self.dbg_domain)
 186             # add signals to pspec to enable/disable icache and dcache
 187             # (or data and intstruction wishbone if icache/dcache not included)
 188             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 189             # TODO: do we actually care if these are not domain-synchronised?
 190             # honestly probably not.
 191             pspec.wb_icache_en = self.jtag.wb_icache_en
 192             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 193             self.wb_sram_en = self.jtag.wb_sram_en
 194         else:
 195             self.wb_sram_en = Const(1)
 196
 197         # add 4k sram blocks?
 198         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 199                          pspec.sram4x4kblock == True)
 200         if self.sram4x4k:
 201             self.sram4k = []
 202             for i in range(4):
 203                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 204                                                     #features={'err'}
 205                                                     ))
 206
 207         # add interrupt controller?
 208         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 209         if self.xics:
 210             self.xics_icp = XICS_ICP()
 211             self.xics_ics = XICS_ICS()
 212             self.int_level_i = self.xics_ics.int_level_i
 213
 214         # add GPIO peripheral?
 215         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 216         if self.gpio:
 217             self.simple_gpio = SimpleGPIO()
 218             self.gpio_o = self.simple_gpio.gpio_o
 219
 220         # main instruction core.  suitable for prototyping / demo only
 221         self.core = core = NonProductionCore(pspec)
 222         self.core_rst = ResetSignal("coresync")
 223
 224         # instruction decoder.  goes into Trap Record
 225         #pdecode = create_pdecode()
 226         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 227         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 228                                      opkls=IssuerDecode2ToOperand,
 229                                      svp64_en=self.svp64_en,
 230                                      regreduce_en=self.regreduce_en)
 231         pdecode = self.pdecode2.dec
 232
 233         if self.svp64_en:
 234             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 235
 236         # Test Instruction memory
 237         self.imem = ConfigFetchUnit(pspec).fu
 238
 239         # DMI interface
 240         self.dbg = CoreDebug()
 241
 242         # instruction go/monitor
 243         self.pc_o = Signal(64, reset_less=True)
 244         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 245         self.svstate_i = Data(64, "svstate_i") # ditto
 246         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 247         self.busy_o = Signal(reset_less=True)
 248         self.memerr_o = Signal(reset_less=True)
 249
 250         # STATE regfile read /write ports for PC, MSR, SVSTATE
 251         staterf = self.core.regs.rf['state']
 252         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 253         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 254         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 255         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 256         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 257
 258         # DMI interface access
 259         intrf = self.core.regs.rf['int']
 260         crrf = self.core.regs.rf['cr']
 261         xerrf = self.core.regs.rf['xer']
 262         self.int_r = intrf.r_ports['dmi'] # INT read
 263         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 264         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 265
 266         if self.svp64_en:
 267             # for predication
 268             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 269             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 270
 271         # hack method of keeping an eye on whether branch/trap set the PC
 272         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 273         self.state_nia.wen.name = 'state_nia_wen'
 274
 275         # pulse to synchronize the simulator at instruction end
 276         self.insn_done = Signal()
 277
 278         if self.svp64_en:
 279             # store copies of predicate masks
 280             self.srcmask = Signal(64)
 281             self.dstmask = Signal(64)
 282
 283     def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
 284                         fetch_pc_o_ready, fetch_pc_i_valid,
 285                         fetch_insn_o_valid, fetch_insn_i_ready):
 286         """fetch FSM
 287
 288         this FSM performs fetch of raw instruction data, partial-decodes
 289         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 290         read a 2nd 32-bit quantity if that occurs.
 291         """
 292         comb = m.d.comb
 293         sync = m.d.sync
 294         pdecode2 = self.pdecode2
 295         cur_state = self.cur_state
 296         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 297
 298         msr_read = Signal(reset=1)
 299
 300         with m.FSM(name='fetch_fsm'):
 301
 302             # waiting (zzz)
 303             with m.State("IDLE"):
 304                 comb += fetch_pc_o_ready.eq(1)
 305                 with m.If(fetch_pc_i_valid):
 306                     # instruction allowed to go: start by reading the PC
 307                     # capture the PC and also drop it into Insn Memory
 308                     # we have joined a pair of combinatorial memory
 309                     # lookups together.  this is Generally Bad.
 310                     comb += self.imem.a_pc_i.eq(pc)
 311                     comb += self.imem.a_i_valid.eq(1)
 312                     comb += self.imem.f_i_valid.eq(1)
 313                     sync += cur_state.pc.eq(pc)
 314                     sync += cur_state.svstate.eq(svstate) # and svstate
 315
 316                     # initiate read of MSR. arrives one clock later
 317                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 318                     sync += msr_read.eq(0)
 319
 320                     m.next = "INSN_READ"  # move to "wait for bus" phase
 321
 322             # dummy pause to find out why simulation is not keeping up
 323             with m.State("INSN_READ"):
 324                 # one cycle later, msr/sv read arrives.  valid only once.
 325                 with m.If(~msr_read):
 326                     sync += msr_read.eq(1) # yeah don't read it again
 327                     sync += cur_state.msr.eq(self.state_r_msr.o_data)
 328                 with m.If(self.imem.f_busy_o): # zzz...
 329                     # busy: stay in wait-read
 330                     comb += self.imem.a_i_valid.eq(1)
 331                     comb += self.imem.f_i_valid.eq(1)
 332                 with m.Else():
 333                     # not busy: instruction fetched
 334                     insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 335                     if self.svp64_en:
 336                         svp64 = self.svp64
 337                         # decode the SVP64 prefix, if any
 338                         comb += svp64.raw_opcode_in.eq(insn)
 339                         comb += svp64.bigendian.eq(self.core_bigendian_i)
 340                         # pass the decoded prefix (if any) to PowerDecoder2
 341                         sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 342                         sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 343                         # remember whether this is a prefixed instruction, so
 344                         # the FSM can readily loop when VL==0
 345                         sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 346                         # calculate the address of the following instruction
 347                         insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 348                         sync += nia.eq(cur_state.pc + insn_size)
 349                         with m.If(~svp64.is_svp64_mode):
 350                             # with no prefix, store the instruction
 351                             # and hand it directly to the next FSM
 352                             sync += dec_opcode_i.eq(insn)
 353                             m.next = "INSN_READY"
 354                         with m.Else():
 355                             # fetch the rest of the instruction from memory
 356                             comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 357                             comb += self.imem.a_i_valid.eq(1)
 358                             comb += self.imem.f_i_valid.eq(1)
 359                             m.next = "INSN_READ2"
 360                     else:
 361                         # not SVP64 - 32-bit only
 362                         sync += nia.eq(cur_state.pc + 4)
 363                         sync += dec_opcode_i.eq(insn)
 364                         m.next = "INSN_READY"
 365
 366             with m.State("INSN_READ2"):
 367                 with m.If(self.imem.f_busy_o):  # zzz...
 368                     # busy: stay in wait-read
 369                     comb += self.imem.a_i_valid.eq(1)
 370                     comb += self.imem.f_i_valid.eq(1)
 371                 with m.Else():
 372                     # not busy: instruction fetched
 373                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 374                     sync += dec_opcode_i.eq(insn)
 375                     m.next = "INSN_READY"
 376                     # TODO: probably can start looking at pdecode2.rm_dec
 377                     # here or maybe even in INSN_READ state, if svp64_mode
 378                     # detected, in order to trigger - and wait for - the
 379                     # predicate reading.
 380                     if self.svp64_en:
 381                         pmode = pdecode2.rm_dec.predmode
 382                     """
 383                     if pmode != SVP64PredMode.ALWAYS.value:
 384                         fire predicate loading FSM and wait before
 385                         moving to INSN_READY
 386                     else:
 387                         sync += self.srcmask.eq(-1) # set to all 1s
 388                         sync += self.dstmask.eq(-1) # set to all 1s
 389                         m.next = "INSN_READY"
 390                     """
 391
 392             with m.State("INSN_READY"):
 393                 # hand over the instruction, to be decoded
 394                 comb += fetch_insn_o_valid.eq(1)
 395                 with m.If(fetch_insn_i_ready):
 396                     m.next = "IDLE"
 397
 398     def fetch_predicate_fsm(self, m,
 399                             pred_insn_i_valid, pred_insn_o_ready,
 400                             pred_mask_o_valid, pred_mask_i_ready):
 401         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 402            src/dest predicate masks
 403
 404         https://bugs.libre-soc.org/show_bug.cgi?id=617
 405         the predicates can be read here, by using IntRegs r_ports['pred']
 406         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 407         be done through multiple reads, extracting one relevant at a time.
 408         later, a faster way would be to use the 32-bit-wide CR port but
 409         this is more complex decoding, here.  equivalent code used in
 410         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 411
 412         note: this ENTIRE FSM is not to be called when svp64 is disabled
 413         """
 414         comb = m.d.comb
 415         sync = m.d.sync
 416         pdecode2 = self.pdecode2
 417         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 418         predmode = rm_dec.predmode
 419         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 420         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 421         # get src/dst step, so we can skip already used mask bits
 422         cur_state = self.cur_state
 423         srcstep = cur_state.svstate.srcstep
 424         dststep = cur_state.svstate.dststep
 425         cur_vl = cur_state.svstate.vl
 426
 427         # decode predicates
 428         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 429         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 430         sidx, scrinvert = get_predcr(m, srcpred, 's')
 431         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 432
 433         # store fetched masks, for either intpred or crpred
 434         # when src/dst step is not zero, the skipped mask bits need to be
 435         # shifted-out, before actually storing them in src/dest mask
 436         new_srcmask = Signal(64, reset_less=True)
 437         new_dstmask = Signal(64, reset_less=True)
 438
 439         with m.FSM(name="fetch_predicate"):
 440
 441             with m.State("FETCH_PRED_IDLE"):
 442                 comb += pred_insn_o_ready.eq(1)
 443                 with m.If(pred_insn_i_valid):
 444                     with m.If(predmode == SVP64PredMode.INT):
 445                         # skip fetching destination mask register, when zero
 446                         with m.If(dall1s):
 447                             sync += new_dstmask.eq(-1)
 448                             # directly go to fetch source mask register
 449                             # guaranteed not to be zero (otherwise predmode
 450                             # would be SVP64PredMode.ALWAYS, not INT)
 451                             comb += int_pred.addr.eq(sregread)
 452                             comb += int_pred.ren.eq(1)
 453                             m.next = "INT_SRC_READ"
 454                         # fetch destination predicate register
 455                         with m.Else():
 456                             comb += int_pred.addr.eq(dregread)
 457                             comb += int_pred.ren.eq(1)
 458                             m.next = "INT_DST_READ"
 459                     with m.Elif(predmode == SVP64PredMode.CR):
 460                         # go fetch masks from the CR register file
 461                         sync += new_srcmask.eq(0)
 462                         sync += new_dstmask.eq(0)
 463                         m.next = "CR_READ"
 464                     with m.Else():
 465                         sync += self.srcmask.eq(-1)
 466                         sync += self.dstmask.eq(-1)
 467                         m.next = "FETCH_PRED_DONE"
 468
 469             with m.State("INT_DST_READ"):
 470                 # store destination mask
 471                 inv = Repl(dinvert, 64)
 472                 with m.If(dunary):
 473                     # set selected mask bit for 1<<r3 mode
 474                     dst_shift = Signal(range(64))
 475                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 476                     sync += new_dstmask.eq(1 << dst_shift)
 477                 with m.Else():
 478                     # invert mask if requested
 479                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 480                 # skip fetching source mask register, when zero
 481                 with m.If(sall1s):
 482                     sync += new_srcmask.eq(-1)
 483                     m.next = "FETCH_PRED_SHIFT_MASK"
 484                 # fetch source predicate register
 485                 with m.Else():
 486                     comb += int_pred.addr.eq(sregread)
 487                     comb += int_pred.ren.eq(1)
 488                     m.next = "INT_SRC_READ"
 489
 490             with m.State("INT_SRC_READ"):
 491                 # store source mask
 492                 inv = Repl(sinvert, 64)
 493                 with m.If(sunary):
 494                     # set selected mask bit for 1<<r3 mode
 495                     src_shift = Signal(range(64))
 496                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 497                     sync += new_srcmask.eq(1 << src_shift)
 498                 with m.Else():
 499                     # invert mask if requested
 500                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 501                 m.next = "FETCH_PRED_SHIFT_MASK"
 502
 503             # fetch masks from the CR register file
 504             # implements the following loop:
 505             # idx, inv = get_predcr(mask)
 506             # mask = 0
 507             # for cr_idx in range(vl):
 508             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 509             #     if cr[idx] ^ inv:
 510             #         mask |= 1 << cr_idx
 511             # return mask
 512             with m.State("CR_READ"):
 513                 # CR index to be read, which will be ready by the next cycle
 514                 cr_idx = Signal.like(cur_vl, reset_less=True)
 515                 # submit the read operation to the regfile
 516                 with m.If(cr_idx != cur_vl):
 517                     # the CR read port is unary ...
 518                     # ren = 1 << cr_idx
 519                     # ... in MSB0 convention ...
 520                     # ren = 1 << (7 - cr_idx)
 521                     # ... and with an offset:
 522                     # ren = 1 << (7 - off - cr_idx)
 523                     idx = SVP64CROffs.CRPred + cr_idx
 524                     comb += cr_pred.ren.eq(1 << (7 - idx))
 525                     # signal data valid in the next cycle
 526                     cr_read = Signal(reset_less=True)
 527                     sync += cr_read.eq(1)
 528                     # load the next index
 529                     sync += cr_idx.eq(cr_idx + 1)
 530                 with m.Else():
 531                     # exit on loop end
 532                     sync += cr_read.eq(0)
 533                     sync += cr_idx.eq(0)
 534                     m.next = "FETCH_PRED_SHIFT_MASK"
 535                 with m.If(cr_read):
 536                     # compensate for the one cycle delay on the regfile
 537                     cur_cr_idx = Signal.like(cur_vl)
 538                     comb += cur_cr_idx.eq(cr_idx - 1)
 539                     # read the CR field, select the appropriate bit
 540                     cr_field = Signal(4)
 541                     scr_bit = Signal()
 542                     dcr_bit = Signal()
 543                     comb += cr_field.eq(cr_pred.o_data)
 544                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 545                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 546                     # set the corresponding mask bit
 547                     bit_to_set = Signal.like(self.srcmask)
 548                     comb += bit_to_set.eq(1 << cur_cr_idx)
 549                     with m.If(scr_bit):
 550                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 551                     with m.If(dcr_bit):
 552                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 553
 554             with m.State("FETCH_PRED_SHIFT_MASK"):
 555                 # shift-out skipped mask bits
 556                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 557                 sync += self.dstmask.eq(new_dstmask >> dststep)
 558                 m.next = "FETCH_PRED_DONE"
 559
 560             with m.State("FETCH_PRED_DONE"):
 561                 comb += pred_mask_o_valid.eq(1)
 562                 with m.If(pred_mask_i_ready):
 563                     m.next = "FETCH_PRED_IDLE"
 564
 565     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 566                   dbg, core_rst, is_svp64_mode,
 567                   fetch_pc_o_ready, fetch_pc_i_valid,
 568                   fetch_insn_o_valid, fetch_insn_i_ready,
 569                   pred_insn_i_valid, pred_insn_o_ready,
 570                   pred_mask_o_valid, pred_mask_i_ready,
 571                   exec_insn_i_valid, exec_insn_o_ready,
 572                   exec_pc_o_valid, exec_pc_i_ready):
 573         """issue FSM
 574
 575         decode / issue FSM.  this interacts with the "fetch" FSM
 576         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 577         (outgoing). also interacts with the "execute" FSM
 578         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 579         (incoming).
 580         SVP64 RM prefixes have already been set up by the
 581         "fetch" phase, so execute is fairly straightforward.
 582         """
 583
 584         comb = m.d.comb
 585         sync = m.d.sync
 586         pdecode2 = self.pdecode2
 587         cur_state = self.cur_state
 588
 589         # temporaries
 590         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 591
 592         # for updating svstate (things like srcstep etc.)
 593         update_svstate = Signal() # set this (below) if updating
 594         new_svstate = SVSTATERec("new_svstate")
 595         comb += new_svstate.eq(cur_state.svstate)
 596
 597         # precalculate srcstep+1 and dststep+1
 598         cur_srcstep = cur_state.svstate.srcstep
 599         cur_dststep = cur_state.svstate.dststep
 600         next_srcstep = Signal.like(cur_srcstep)
 601         next_dststep = Signal.like(cur_dststep)
 602         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 603         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 604
 605         # note if an exception happened.  in a pipelined or OoO design
 606         # this needs to be accompanied by "shadowing" (or stalling)
 607         exc_happened = self.core.o.exc_happened
 608
 609         with m.FSM(name="issue_fsm"):
 610
 611             # sync with the "fetch" phase which is reading the instruction
 612             # at this point, there is no instruction running, that
 613             # could inadvertently update the PC.
 614             with m.State("ISSUE_START"):
 615                 # wait on "core stop" release, before next fetch
 616                 # need to do this here, in case we are in a VL==0 loop
 617                 with m.If(~dbg.core_stop_o & ~core_rst):
 618                     comb += fetch_pc_i_valid.eq(1) # tell fetch to start
 619                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 620                         m.next = "INSN_WAIT"
 621                 with m.Else():
 622                     # tell core it's stopped, and acknowledge debug handshake
 623                     comb += dbg.core_stopped_i.eq(1)
 624                     # while stopped, allow updating the PC and SVSTATE
 625                     with m.If(self.pc_i.ok):
 626                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 627                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 628                         sync += pc_changed.eq(1)
 629                     with m.If(self.svstate_i.ok):
 630                         comb += new_svstate.eq(self.svstate_i.data)
 631                         comb += update_svstate.eq(1)
 632                         sync += sv_changed.eq(1)
 633
 634             # wait for an instruction to arrive from Fetch
 635             with m.State("INSN_WAIT"):
 636                 comb += fetch_insn_i_ready.eq(1)
 637                 with m.If(fetch_insn_o_valid):
 638                     # loop into ISSUE_START if it's a SVP64 instruction
 639                     # and VL == 0.  this because VL==0 is a for-loop
 640                     # from 0 to 0 i.e. always, always a NOP.
 641                     cur_vl = cur_state.svstate.vl
 642                     with m.If(is_svp64_mode & (cur_vl == 0)):
 643                         # update the PC before fetching the next instruction
 644                         # since we are in a VL==0 loop, no instruction was
 645                         # executed that we could be overwriting
 646                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 647                         comb += self.state_w_pc.i_data.eq(nia)
 648                         comb += self.insn_done.eq(1)
 649                         m.next = "ISSUE_START"
 650                     with m.Else():
 651                         if self.svp64_en:
 652                             m.next = "PRED_START"  # start fetching predicate
 653                         else:
 654                             m.next = "DECODE_SV"  # skip predication
 655
 656             with m.State("PRED_START"):
 657                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 658                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 659                     m.next = "MASK_WAIT"
 660
 661             with m.State("MASK_WAIT"):
 662                 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
 663                 with m.If(pred_mask_o_valid): # predication masks are ready
 664                     m.next = "PRED_SKIP"
 665
 666             # skip zeros in predicate
 667             with m.State("PRED_SKIP"):
 668                 with m.If(~is_svp64_mode):
 669                     m.next = "DECODE_SV"  # nothing to do
 670                 with m.Else():
 671                     if self.svp64_en:
 672                         pred_src_zero = pdecode2.rm_dec.pred_sz
 673                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 674
 675                         # new srcstep, after skipping zeros
 676                         skip_srcstep = Signal.like(cur_srcstep)
 677                         # value to be added to the current srcstep
 678                         src_delta = Signal.like(cur_srcstep)
 679                         # add leading zeros to srcstep, if not in zero mode
 680                         with m.If(~pred_src_zero):
 681                             # priority encoder (count leading zeros)
 682                             # append guard bit, in case the mask is all zeros
 683                             pri_enc_src = PriorityEncoder(65)
 684                             m.submodules.pri_enc_src = pri_enc_src
 685                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 686                                                          Const(1, 1)))
 687                             comb += src_delta.eq(pri_enc_src.o)
 688                         # apply delta to srcstep
 689                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 690                         # shift-out all leading zeros from the mask
 691                         # plus the leading "one" bit
 692                         # TODO count leading zeros and shift-out the zero
 693                         #      bits, in the same step, in hardware
 694                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 695
 696                         # same as above, but for dststep
 697                         skip_dststep = Signal.like(cur_dststep)
 698                         dst_delta = Signal.like(cur_dststep)
 699                         with m.If(~pred_dst_zero):
 700                             pri_enc_dst = PriorityEncoder(65)
 701                             m.submodules.pri_enc_dst = pri_enc_dst
 702                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 703                                                          Const(1, 1)))
 704                             comb += dst_delta.eq(pri_enc_dst.o)
 705                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 706                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 707
 708                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 709                         with m.If((skip_srcstep >= cur_vl) |
 710                                   (skip_dststep >= cur_vl)):
 711                             # end of VL loop. Update PC and reset src/dst step
 712                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 713                             comb += self.state_w_pc.i_data.eq(nia)
 714                             comb += new_svstate.srcstep.eq(0)
 715                             comb += new_svstate.dststep.eq(0)
 716                             comb += update_svstate.eq(1)
 717                             # synchronize with the simulator
 718                             comb += self.insn_done.eq(1)
 719                             # go back to Issue
 720                             m.next = "ISSUE_START"
 721                         with m.Else():
 722                             # update new src/dst step
 723                             comb += new_svstate.srcstep.eq(skip_srcstep)
 724                             comb += new_svstate.dststep.eq(skip_dststep)
 725                             comb += update_svstate.eq(1)
 726                             # proceed to Decode
 727                             m.next = "DECODE_SV"
 728
 729                         # pass predicate mask bits through to satellite decoders
 730                         # TODO: for SIMD this will be *multiple* bits
 731                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 732                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 733
 734             # after src/dst step have been updated, we are ready
 735             # to decode the instruction
 736             with m.State("DECODE_SV"):
 737                 # decode the instruction
 738                 sync += core.i.e.eq(pdecode2.e)
 739                 sync += core.i.state.eq(cur_state)
 740                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 741                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 742                 if self.svp64_en:
 743                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 744                     # set RA_OR_ZERO detection in satellite decoders
 745                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 746                     # and svp64 detection
 747                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 748                     # and svp64 bit-rev'd ldst mode
 749                     ldst_dec = pdecode2.use_svp64_ldst_dec
 750                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 751                 # after decoding, reset any previous exception condition,
 752                 # allowing it to be set again during the next execution
 753                 sync += pdecode2.ldst_exc.eq(0)
 754
 755                 m.next = "INSN_EXECUTE"  # move to "execute"
 756
 757             # handshake with execution FSM, move to "wait" once acknowledged
 758             with m.State("INSN_EXECUTE"):
 759                 comb += exec_insn_i_valid.eq(1) # trigger execute
 760                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 761                     m.next = "EXECUTE_WAIT"
 762
 763             with m.State("EXECUTE_WAIT"):
 764                 # wait on "core stop" release, at instruction end
 765                 # need to do this here, in case we are in a VL>1 loop
 766                 with m.If(~dbg.core_stop_o & ~core_rst):
 767                     comb += exec_pc_i_ready.eq(1)
 768                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 769                     # the exception info needs to be blatted into
 770                     # pdecode.ldst_exc, and the instruction "re-run".
 771                     # when ldst_exc.happened is set, the PowerDecoder2
 772                     # reacts very differently: it re-writes the instruction
 773                     # with a "trap" (calls PowerDecoder2.trap()) which
 774                     # will *overwrite* whatever was requested and jump the
 775                     # PC to the exception address, as well as alter MSR.
 776                     # nothing else needs to be done other than to note
 777                     # the change of PC and MSR (and, later, SVSTATE)
 778                     with m.If(exc_happened):
 779                         sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
 780
 781                     with m.If(exec_pc_o_valid):
 782
 783                         # was this the last loop iteration?
 784                         is_last = Signal()
 785                         cur_vl = cur_state.svstate.vl
 786                         comb += is_last.eq(next_srcstep == cur_vl)
 787
 788                         # return directly to Decode if Execute generated an
 789                         # exception.
 790                         with m.If(pdecode2.ldst_exc.happened):
 791                             m.next = "DECODE_SV"
 792
 793                         # if either PC or SVSTATE were changed by the previous
 794                         # instruction, go directly back to Fetch, without
 795                         # updating either PC or SVSTATE
 796                         with m.Elif(pc_changed | sv_changed):
 797                             m.next = "ISSUE_START"
 798
 799                         # also return to Fetch, when no output was a vector
 800                         # (regardless of SRCSTEP and VL), or when the last
 801                         # instruction was really the last one of the VL loop
 802                         with m.Elif((~pdecode2.loop_continue) | is_last):
 803                             # before going back to fetch, update the PC state
 804                             # register with the NIA.
 805                             # ok here we are not reading the branch unit.
 806                             # TODO: this just blithely overwrites whatever
 807                             #       pipeline updated the PC
 808                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 809                             comb += self.state_w_pc.i_data.eq(nia)
 810                             # reset SRCSTEP before returning to Fetch
 811                             if self.svp64_en:
 812                                 with m.If(pdecode2.loop_continue):
 813                                     comb += new_svstate.srcstep.eq(0)
 814                                     comb += new_svstate.dststep.eq(0)
 815                                     comb += update_svstate.eq(1)
 816                             else:
 817                                 comb += new_svstate.srcstep.eq(0)
 818                                 comb += new_svstate.dststep.eq(0)
 819                                 comb += update_svstate.eq(1)
 820                             m.next = "ISSUE_START"
 821
 822                         # returning to Execute? then, first update SRCSTEP
 823                         with m.Else():
 824                             comb += new_svstate.srcstep.eq(next_srcstep)
 825                             comb += new_svstate.dststep.eq(next_dststep)
 826                             comb += update_svstate.eq(1)
 827                             # return to mask skip loop
 828                             m.next = "PRED_SKIP"
 829
 830                 with m.Else():
 831                     comb += dbg.core_stopped_i.eq(1)
 832                     # while stopped, allow updating the PC and SVSTATE
 833                     with m.If(self.pc_i.ok):
 834                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 835                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 836                         sync += pc_changed.eq(1)
 837                     with m.If(self.svstate_i.ok):
 838                         comb += new_svstate.eq(self.svstate_i.data)
 839                         comb += update_svstate.eq(1)
 840                         sync += sv_changed.eq(1)
 841
 842         # check if svstate needs updating: if so, write it to State Regfile
 843         with m.If(update_svstate):
 844             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 845             comb += self.state_w_sv.i_data.eq(new_svstate)
 846             sync += cur_state.svstate.eq(new_svstate) # for next clock
 847
 848     def execute_fsm(self, m, core, pc_changed, sv_changed,
 849                     exec_insn_i_valid, exec_insn_o_ready,
 850                     exec_pc_o_valid, exec_pc_i_ready):
 851         """execute FSM
 852
 853         execute FSM. this interacts with the "issue" FSM
 854         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 855         (outgoing). SVP64 RM prefixes have already been set up by the
 856         "issue" phase, so execute is fairly straightforward.
 857         """
 858
 859         comb = m.d.comb
 860         sync = m.d.sync
 861         pdecode2 = self.pdecode2
 862
 863         # temporaries
 864         core_busy_o = ~core.p.o_ready                # core is busy
 865         core_ivalid_i = core.p.i_valid              # instruction is valid
 866         core_issue_i = core.i.issue_i               # instruction is issued
 867         insn_type = core.i.e.do.insn_type           # instruction MicroOp type
 868
 869         with m.FSM(name="exec_fsm"):
 870
 871             # waiting for instruction bus (stays there until not busy)
 872             with m.State("INSN_START"):
 873                 comb += exec_insn_o_ready.eq(1)
 874                 with m.If(exec_insn_i_valid):
 875                     comb += core_ivalid_i.eq(1)  # instruction is valid
 876                     comb += core_issue_i.eq(1)  # and issued
 877                     sync += sv_changed.eq(0)
 878                     sync += pc_changed.eq(0)
 879                     m.next = "INSN_ACTIVE"  # move to "wait completion"
 880
 881             # instruction started: must wait till it finishes
 882             with m.State("INSN_ACTIVE"):
 883                 with m.If(insn_type != MicrOp.OP_NOP):
 884                     comb += core_ivalid_i.eq(1) # instruction is valid
 885                 # note changes to PC and SVSTATE
 886                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 887                     sync += sv_changed.eq(1)
 888                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 889                     sync += pc_changed.eq(1)
 890                 with m.If(~core_busy_o): # instruction done!
 891                     comb += exec_pc_o_valid.eq(1)
 892                     with m.If(exec_pc_i_ready):
 893                         # when finished, indicate "done".
 894                         # however, if there was an exception, the instruction
 895                         # is *not* yet done.  this is an implementation
 896                         # detail: we choose to implement exceptions by
 897                         # taking the exception information from the LDST
 898                         # unit, putting that *back* into the PowerDecoder2,
 899                         # and *re-running the entire instruction*.
 900                         # if we erroneously indicate "done" here, it is as if
 901                         # there were *TWO* instructions:
 902                         # 1) the failed LDST 2) a TRAP.
 903                         with m.If(~pdecode2.ldst_exc.happened):
 904                             comb += self.insn_done.eq(1)
 905                         m.next = "INSN_START"  # back to fetch
 906
 907     def setup_peripherals(self, m):
 908         comb, sync = m.d.comb, m.d.sync
 909
 910         # okaaaay so the debug module must be in coresync clock domain
 911         # but NOT its reset signal. to cope with this, set every single
 912         # submodule explicitly in coresync domain, debug and JTAG
 913         # in their own one but using *external* reset.
 914         csd = DomainRenamer("coresync")
 915         dbd = DomainRenamer(self.dbg_domain)
 916
 917         m.submodules.core = core = csd(self.core)
 918         m.submodules.imem = imem = csd(self.imem)
 919         m.submodules.dbg = dbg = dbd(self.dbg)
 920         if self.jtag_en:
 921             m.submodules.jtag = jtag = dbd(self.jtag)
 922             # TODO: UART2GDB mux, here, from external pin
 923             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 924             sync += dbg.dmi.connect_to(jtag.dmi)
 925
 926         cur_state = self.cur_state
 927
 928         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 929         if self.sram4x4k:
 930             for i, sram in enumerate(self.sram4k):
 931                 m.submodules["sram4k_%d" % i] = csd(sram)
 932                 comb += sram.enable.eq(self.wb_sram_en)
 933
 934         # XICS interrupt handler
 935         if self.xics:
 936             m.submodules.xics_icp = icp = csd(self.xics_icp)
 937             m.submodules.xics_ics = ics = csd(self.xics_ics)
 938             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 939             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 940
 941         # GPIO test peripheral
 942         if self.gpio:
 943             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 944
 945         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 946         # XXX causes litex ECP5 test to get wrong idea about input and output
 947         # (but works with verilator sim *sigh*)
 948         #if self.gpio and self.xics:
 949         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 950
 951         # instruction decoder
 952         pdecode = create_pdecode()
 953         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 954         if self.svp64_en:
 955             m.submodules.svp64 = svp64 = csd(self.svp64)
 956
 957         # convenience
 958         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 959         intrf = self.core.regs.rf['int']
 960
 961         # clock delay power-on reset
 962         cd_por  = ClockDomain(reset_less=True)
 963         cd_sync = ClockDomain()
 964         core_sync = ClockDomain("coresync")
 965         m.domains += cd_por, cd_sync, core_sync
 966         if self.dbg_domain != "sync":
 967             dbg_sync = ClockDomain(self.dbg_domain)
 968             m.domains += dbg_sync
 969
 970         ti_rst = Signal(reset_less=True)
 971         delay = Signal(range(4), reset=3)
 972         with m.If(delay != 0):
 973             m.d.por += delay.eq(delay - 1)
 974         comb += cd_por.clk.eq(ClockSignal())
 975
 976         # power-on reset delay
 977         core_rst = ResetSignal("coresync")
 978         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 979         comb += core_rst.eq(ti_rst)
 980
 981         # debug clock is same as coresync, but reset is *main external*
 982         if self.dbg_domain != "sync":
 983             dbg_rst = ResetSignal(self.dbg_domain)
 984             comb += dbg_rst.eq(ResetSignal())
 985
 986         # busy/halted signals from core
 987         core_busy_o = ~core.p.o_ready                # core is busy
 988         comb += self.busy_o.eq(core_busy_o)
 989         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 990
 991         # temporary hack: says "go" immediately for both address gen and ST
 992         l0 = core.l0
 993         ldst = core.fus.fus['ldst0']
 994         st_go_edge = rising_edge(m, ldst.st.rel_o)
 995         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
 996         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
 997
 998     def elaborate(self, platform):
 999         m = Module()
1000         # convenience
1001         comb, sync = m.d.comb, m.d.sync
1002         cur_state = self.cur_state
1003         pdecode2 = self.pdecode2
1004         dbg = self.dbg
1005         core = self.core
1006
1007         # set up peripherals and core
1008         core_rst = self.core_rst
1009         self.setup_peripherals(m)
1010
1011         # reset current state if core reset requested
1012         with m.If(core_rst):
1013             m.d.sync += self.cur_state.eq(0)
1014
1015         # PC and instruction from I-Memory
1016         comb += self.pc_o.eq(cur_state.pc)
1017         pc_changed = Signal() # note write to PC
1018         sv_changed = Signal() # note write to SVSTATE
1019
1020         # read state either from incoming override or from regfile
1021         # TODO: really should be doing MSR in the same way
1022         pc = state_get(m, core_rst, self.pc_i,
1023                             "pc",                  # read PC
1024                             self.state_r_pc, StateRegs.PC)
1025         svstate = state_get(m, core_rst, self.svstate_i,
1026                             "svstate",   # read SVSTATE
1027                             self.state_r_sv, StateRegs.SVSTATE)
1028
1029         # don't write pc every cycle
1030         comb += self.state_w_pc.wen.eq(0)
1031         comb += self.state_w_pc.i_data.eq(0)
1032
1033         # don't read msr every cycle
1034         comb += self.state_r_msr.ren.eq(0)
1035
1036         # address of the next instruction, in the absence of a branch
1037         # depends on the instruction size
1038         nia = Signal(64)
1039
1040         # connect up debug signals
1041         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1042         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1043         comb += dbg.state.pc.eq(pc)
1044         comb += dbg.state.svstate.eq(svstate)
1045         comb += dbg.state.msr.eq(cur_state.msr)
1046
1047         # pass the prefix mode from Fetch to Issue, so the latter can loop
1048         # on VL==0
1049         is_svp64_mode = Signal()
1050
1051         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1052         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1053         # these are the handshake signals between each
1054
1055         # fetch FSM can run as soon as the PC is valid
1056         fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1057         fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1058
1059         # fetch FSM hands over the instruction to be decoded / issued
1060         fetch_insn_o_valid = Signal()
1061         fetch_insn_i_ready = Signal()
1062
1063         # predicate fetch FSM decodes and fetches the predicate
1064         pred_insn_i_valid = Signal()
1065         pred_insn_o_ready = Signal()
1066
1067         # predicate fetch FSM delivers the masks
1068         pred_mask_o_valid = Signal()
1069         pred_mask_i_ready = Signal()
1070
1071         # issue FSM delivers the instruction to the be executed
1072         exec_insn_i_valid = Signal()
1073         exec_insn_o_ready = Signal()
1074
1075         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1076         exec_pc_o_valid = Signal()
1077         exec_pc_i_ready = Signal()
1078
1079         # the FSMs here are perhaps unusual in that they detect conditions
1080         # then "hold" information, combinatorially, for the core
1081         # (as opposed to using sync - which would be on a clock's delay)
1082         # this includes the actual opcode, valid flags and so on.
1083
1084         # Fetch, then predicate fetch, then Issue, then Execute.
1085         # Issue is where the VL for-loop # lives.  the ready/valid
1086         # signalling is used to communicate between the four.
1087
1088         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
1089                        fetch_pc_o_ready, fetch_pc_i_valid,
1090                        fetch_insn_o_valid, fetch_insn_i_ready)
1091
1092         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1093                        dbg, core_rst, is_svp64_mode,
1094                        fetch_pc_o_ready, fetch_pc_i_valid,
1095                        fetch_insn_o_valid, fetch_insn_i_ready,
1096                        pred_insn_i_valid, pred_insn_o_ready,
1097                        pred_mask_o_valid, pred_mask_i_ready,
1098                        exec_insn_i_valid, exec_insn_o_ready,
1099                        exec_pc_o_valid, exec_pc_i_ready)
1100
1101         if self.svp64_en:
1102             self.fetch_predicate_fsm(m,
1103                                      pred_insn_i_valid, pred_insn_o_ready,
1104                                      pred_mask_o_valid, pred_mask_i_ready)
1105
1106         self.execute_fsm(m, core, pc_changed, sv_changed,
1107                          exec_insn_i_valid, exec_insn_o_ready,
1108                          exec_pc_o_valid, exec_pc_i_ready)
1109
1110         # whatever was done above, over-ride it if core reset is held
1111         with m.If(core_rst):
1112             sync += nia.eq(0)
1113
1114         # this bit doesn't have to be in the FSM: connect up to read
1115         # regfiles on demand from DMI
1116         self.do_dmi(m, dbg)
1117
1118         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1119         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1120         self.tb_dec_fsm(m, cur_state.dec)
1121
1122         return m
1123
1124     def do_dmi(self, m, dbg):
1125         """deals with DMI debug requests
1126
1127         currently only provides read requests for the INT regfile, CR and XER
1128         it will later also deal with *writing* to these regfiles.
1129         """
1130         comb = m.d.comb
1131         sync = m.d.sync
1132         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1133         intrf = self.core.regs.rf['int']
1134
1135         with m.If(d_reg.req): # request for regfile access being made
1136             # TODO: error-check this
1137             # XXX should this be combinatorial?  sync better?
1138             if intrf.unary:
1139                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1140             else:
1141                 comb += self.int_r.addr.eq(d_reg.addr)
1142                 comb += self.int_r.ren.eq(1)
1143         d_reg_delay  = Signal()
1144         sync += d_reg_delay.eq(d_reg.req)
1145         with m.If(d_reg_delay):
1146             # data arrives one clock later
1147             comb += d_reg.data.eq(self.int_r.o_data)
1148             comb += d_reg.ack.eq(1)
1149
1150         # sigh same thing for CR debug
1151         with m.If(d_cr.req): # request for regfile access being made
1152             comb += self.cr_r.ren.eq(0b11111111) # enable all
1153         d_cr_delay  = Signal()
1154         sync += d_cr_delay.eq(d_cr.req)
1155         with m.If(d_cr_delay):
1156             # data arrives one clock later
1157             comb += d_cr.data.eq(self.cr_r.o_data)
1158             comb += d_cr.ack.eq(1)
1159
1160         # aaand XER...
1161         with m.If(d_xer.req): # request for regfile access being made
1162             comb += self.xer_r.ren.eq(0b111111) # enable all
1163         d_xer_delay  = Signal()
1164         sync += d_xer_delay.eq(d_xer.req)
1165         with m.If(d_xer_delay):
1166             # data arrives one clock later
1167             comb += d_xer.data.eq(self.xer_r.o_data)
1168             comb += d_xer.ack.eq(1)
1169
1170     def tb_dec_fsm(self, m, spr_dec):
1171         """tb_dec_fsm
1172
1173         this is a FSM for updating either dec or tb.  it runs alternately
1174         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1175         value to DEC, however the regfile has "passthrough" on it so this
1176         *should* be ok.
1177
1178         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1179         """
1180
1181         comb, sync = m.d.comb, m.d.sync
1182         fast_rf = self.core.regs.rf['fast']
1183         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1184         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1185
1186         with m.FSM() as fsm:
1187
1188             # initiates read of current DEC
1189             with m.State("DEC_READ"):
1190                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1191                 comb += fast_r_dectb.ren.eq(1)
1192                 m.next = "DEC_WRITE"
1193
1194             # waits for DEC read to arrive (1 cycle), updates with new value
1195             with m.State("DEC_WRITE"):
1196                 new_dec = Signal(64)
1197                 # TODO: MSR.LPCR 32-bit decrement mode
1198                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1199                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1200                 comb += fast_w_dectb.wen.eq(1)
1201                 comb += fast_w_dectb.i_data.eq(new_dec)
1202                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1203                 m.next = "TB_READ"
1204
1205             # initiates read of current TB
1206             with m.State("TB_READ"):
1207                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1208                 comb += fast_r_dectb.ren.eq(1)
1209                 m.next = "TB_WRITE"
1210
1211             # waits for read TB to arrive, initiates write of current TB
1212             with m.State("TB_WRITE"):
1213                 new_tb = Signal(64)
1214                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1215                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1216                 comb += fast_w_dectb.wen.eq(1)
1217                 comb += fast_w_dectb.i_data.eq(new_tb)
1218                 m.next = "DEC_READ"
1219
1220         return m
1221
1222     def __iter__(self):
1223         yield from self.pc_i.ports()
1224         yield self.pc_o
1225         yield self.memerr_o
1226         yield from self.core.ports()
1227         yield from self.imem.ports()
1228         yield self.core_bigendian_i
1229         yield self.busy_o
1230
1231     def ports(self):
1232         return list(self)
1233
1234     def external_ports(self):
1235         ports = self.pc_i.ports()
1236         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1237                 ]
1238
1239         if self.jtag_en:
1240             ports += list(self.jtag.external_ports())
1241         else:
1242             # don't add DMI if JTAG is enabled
1243             ports += list(self.dbg.dmi.ports())
1244
1245         ports += list(self.imem.ibus.fields.values())
1246         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1247
1248         if self.sram4x4k:
1249             for sram in self.sram4k:
1250                 ports += list(sram.bus.fields.values())
1251
1252         if self.xics:
1253             ports += list(self.xics_icp.bus.fields.values())
1254             ports += list(self.xics_ics.bus.fields.values())
1255             ports.append(self.int_level_i)
1256
1257         if self.gpio:
1258             ports += list(self.simple_gpio.bus.fields.values())
1259             ports.append(self.gpio_o)
1260
1261         return ports
1262
1263     def ports(self):
1264         return list(self)
1265
1266
1267 class TestIssuer(Elaboratable):
1268     def __init__(self, pspec):
1269         self.ti = TestIssuerInternal(pspec)
1270         self.pll = DummyPLL(instance=True)
1271
1272         # PLL direct clock or not
1273         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1274         if self.pll_en:
1275             self.pll_test_o = Signal(reset_less=True)
1276             self.pll_vco_o = Signal(reset_less=True)
1277             self.clk_sel_i = Signal(2, reset_less=True)
1278             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1279             self.pllclk_clk = ClockSignal("pllclk")
1280
1281     def elaborate(self, platform):
1282         m = Module()
1283         comb = m.d.comb
1284
1285         # TestIssuer nominally runs at main clock, actually it is
1286         # all combinatorial internally except for coresync'd components
1287         m.submodules.ti = ti = self.ti
1288
1289         if self.pll_en:
1290             # ClockSelect runs at PLL output internal clock rate
1291             m.submodules.wrappll = pll = self.pll
1292
1293             # add clock domains from PLL
1294             cd_pll = ClockDomain("pllclk")
1295             m.domains += cd_pll
1296
1297             # PLL clock established.  has the side-effect of running clklsel
1298             # at the PLL's speed (see DomainRenamer("pllclk") above)
1299             pllclk = self.pllclk_clk
1300             comb += pllclk.eq(pll.clk_pll_o)
1301
1302             # wire up external 24mhz to PLL
1303             #comb += pll.clk_24_i.eq(self.ref_clk)
1304             # output 18 mhz PLL test signal, and analog oscillator out
1305             comb += self.pll_test_o.eq(pll.pll_test_o)
1306             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1307
1308             # input to pll clock selection
1309             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1310
1311             # now wire up ResetSignals.  don't mind them being in this domain
1312             pll_rst = ResetSignal("pllclk")
1313             comb += pll_rst.eq(ResetSignal())
1314
1315         # internal clock is set to selector clock-out.  has the side-effect of
1316         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1317         # debug clock runs at coresync internal clock
1318         cd_coresync = ClockDomain("coresync")
1319         #m.domains += cd_coresync
1320         if self.ti.dbg_domain != 'sync':
1321             cd_dbgsync = ClockDomain("dbgsync")
1322             #m.domains += cd_dbgsync
1323         intclk = ClockSignal("coresync")
1324         dbgclk = ClockSignal(self.ti.dbg_domain)
1325         # XXX BYPASS PLL XXX
1326         # XXX BYPASS PLL XXX
1327         # XXX BYPASS PLL XXX
1328         if self.pll_en:
1329             comb += intclk.eq(self.ref_clk)
1330         else:
1331             comb += intclk.eq(ClockSignal())
1332         if self.ti.dbg_domain != 'sync':
1333             dbgclk = ClockSignal(self.ti.dbg_domain)
1334             comb += dbgclk.eq(intclk)
1335
1336         return m
1337
1338     def ports(self):
1339         return list(self.ti.ports()) + list(self.pll.ports()) + \
1340                [ClockSignal(), ResetSignal()]
1341
1342     def external_ports(self):
1343         ports = self.ti.external_ports()
1344         ports.append(ClockSignal())
1345         ports.append(ResetSignal())
1346         if self.pll_en:
1347             ports.append(self.clk_sel_i)
1348             ports.append(self.pll.clk_24_i)
1349             ports.append(self.pll_test_o)
1350             ports.append(self.pll_vco_o)
1351             ports.append(self.pllclk_clk)
1352             ports.append(self.ref_clk)
1353         return ports
1354
1355
1356 if __name__ == '__main__':
1357     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1358              'spr': 1,
1359              'div': 1,
1360              'mul': 1,
1361              'shiftrot': 1
1362             }
1363     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1364                          imem_ifacetype='bare_wb',
1365                          addr_wid=48,
1366                          mask_wid=8,
1367                          reg_wid=64,
1368                          units=units)
1369     dut = TestIssuer(pspec)
1370     vl = main(dut, ports=dut.ports(), name="test_issuer")
1371
1372     if len(sys.argv) == 1:
1373         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1374         with open("test_issuer.il", "w") as f:
1375             f.write(vl)