src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.data_o)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # JTAG interface.  add this right at the start because if it's
 170         # added it *modifies* the pspec, by adding enable/disable signals
 171         # for parts of the rest of the core
 172         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 173         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 174         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 175         if self.jtag_en:
 176             # XXX MUST keep this up-to-date with litex, and
 177             # soc-cocotb-sim, and err.. all needs sorting out, argh
 178             subset = ['uart',
 179                       'mtwi',
 180                       'eint', 'gpio', 'mspi0',
 181                       # 'mspi1', - disabled for now
 182                       # 'pwm', 'sd0', - disabled for now
 183                        'sdr']
 184             self.jtag = JTAG(get_pinspecs(subset=subset),
 185                              domain=self.dbg_domain)
 186             # add signals to pspec to enable/disable icache and dcache
 187             # (or data and intstruction wishbone if icache/dcache not included)
 188             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 189             # TODO: do we actually care if these are not domain-synchronised?
 190             # honestly probably not.
 191             pspec.wb_icache_en = self.jtag.wb_icache_en
 192             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 193             self.wb_sram_en = self.jtag.wb_sram_en
 194         else:
 195             self.wb_sram_en = Const(1)
 196
 197         # add 4k sram blocks?
 198         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 199                          pspec.sram4x4kblock == True)
 200         if self.sram4x4k:
 201             self.sram4k = []
 202             for i in range(4):
 203                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 204                                                     #features={'err'}
 205                                                     ))
 206
 207         # add interrupt controller?
 208         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 209         if self.xics:
 210             self.xics_icp = XICS_ICP()
 211             self.xics_ics = XICS_ICS()
 212             self.int_level_i = self.xics_ics.int_level_i
 213
 214         # add GPIO peripheral?
 215         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 216         if self.gpio:
 217             self.simple_gpio = SimpleGPIO()
 218             self.gpio_o = self.simple_gpio.gpio_o
 219
 220         # main instruction core.  suitable for prototyping / demo only
 221         self.core = core = NonProductionCore(pspec)
 222         self.core_rst = ResetSignal("coresync")
 223
 224         # instruction decoder.  goes into Trap Record
 225         pdecode = create_pdecode()
 226         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 227         self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
 228                                      opkls=IssuerDecode2ToOperand,
 229                                      svp64_en=self.svp64_en,
 230                                      regreduce_en=self.regreduce_en)
 231         if self.svp64_en:
 232             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 233
 234         # Test Instruction memory
 235         self.imem = ConfigFetchUnit(pspec).fu
 236
 237         # DMI interface
 238         self.dbg = CoreDebug()
 239
 240         # instruction go/monitor
 241         self.pc_o = Signal(64, reset_less=True)
 242         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 243         self.svstate_i = Data(32, "svstate_i") # ditto
 244         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 245         self.busy_o = Signal(reset_less=True)
 246         self.memerr_o = Signal(reset_less=True)
 247
 248         # STATE regfile read /write ports for PC, MSR, SVSTATE
 249         staterf = self.core.regs.rf['state']
 250         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 251         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 252         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 253         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 254         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 255
 256         # DMI interface access
 257         intrf = self.core.regs.rf['int']
 258         crrf = self.core.regs.rf['cr']
 259         xerrf = self.core.regs.rf['xer']
 260         self.int_r = intrf.r_ports['dmi'] # INT read
 261         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 262         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 263
 264         if self.svp64_en:
 265             # for predication
 266             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 267             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 268
 269         # hack method of keeping an eye on whether branch/trap set the PC
 270         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 271         self.state_nia.wen.name = 'state_nia_wen'
 272
 273         # pulse to synchronize the simulator at instruction end
 274         self.insn_done = Signal()
 275
 276         if self.svp64_en:
 277             # store copies of predicate masks
 278             self.srcmask = Signal(64)
 279             self.dstmask = Signal(64)
 280
 281     def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
 282                         fetch_pc_ready_o, fetch_pc_valid_i,
 283                         fetch_insn_valid_o, fetch_insn_ready_i):
 284         """fetch FSM
 285
 286         this FSM performs fetch of raw instruction data, partial-decodes
 287         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 288         read a 2nd 32-bit quantity if that occurs.
 289         """
 290         comb = m.d.comb
 291         sync = m.d.sync
 292         pdecode2 = self.pdecode2
 293         cur_state = self.cur_state
 294         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 295
 296         msr_read = Signal(reset=1)
 297
 298         with m.FSM(name='fetch_fsm'):
 299
 300             # waiting (zzz)
 301             with m.State("IDLE"):
 302                 comb += fetch_pc_ready_o.eq(1)
 303                 with m.If(fetch_pc_valid_i):
 304                     # instruction allowed to go: start by reading the PC
 305                     # capture the PC and also drop it into Insn Memory
 306                     # we have joined a pair of combinatorial memory
 307                     # lookups together.  this is Generally Bad.
 308                     comb += self.imem.a_pc_i.eq(pc)
 309                     comb += self.imem.a_valid_i.eq(1)
 310                     comb += self.imem.f_valid_i.eq(1)
 311                     sync += cur_state.pc.eq(pc)
 312                     sync += cur_state.svstate.eq(svstate) # and svstate
 313
 314                     # initiate read of MSR. arrives one clock later
 315                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 316                     sync += msr_read.eq(0)
 317
 318                     m.next = "INSN_READ"  # move to "wait for bus" phase
 319
 320             # dummy pause to find out why simulation is not keeping up
 321             with m.State("INSN_READ"):
 322                 # one cycle later, msr/sv read arrives.  valid only once.
 323                 with m.If(~msr_read):
 324                     sync += msr_read.eq(1) # yeah don't read it again
 325                     sync += cur_state.msr.eq(self.state_r_msr.data_o)
 326                 with m.If(self.imem.f_busy_o): # zzz...
 327                     # busy: stay in wait-read
 328                     comb += self.imem.a_valid_i.eq(1)
 329                     comb += self.imem.f_valid_i.eq(1)
 330                 with m.Else():
 331                     # not busy: instruction fetched
 332                     insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 333                     if self.svp64_en:
 334                         svp64 = self.svp64
 335                         # decode the SVP64 prefix, if any
 336                         comb += svp64.raw_opcode_in.eq(insn)
 337                         comb += svp64.bigendian.eq(self.core_bigendian_i)
 338                         # pass the decoded prefix (if any) to PowerDecoder2
 339                         sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 340                         # remember whether this is a prefixed instruction, so
 341                         # the FSM can readily loop when VL==0
 342                         sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 343                         # calculate the address of the following instruction
 344                         insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 345                         sync += nia.eq(cur_state.pc + insn_size)
 346                         with m.If(~svp64.is_svp64_mode):
 347                             # with no prefix, store the instruction
 348                             # and hand it directly to the next FSM
 349                             sync += dec_opcode_i.eq(insn)
 350                             m.next = "INSN_READY"
 351                         with m.Else():
 352                             # fetch the rest of the instruction from memory
 353                             comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 354                             comb += self.imem.a_valid_i.eq(1)
 355                             comb += self.imem.f_valid_i.eq(1)
 356                             m.next = "INSN_READ2"
 357                     else:
 358                         # not SVP64 - 32-bit only
 359                         sync += nia.eq(cur_state.pc + 4)
 360                         sync += dec_opcode_i.eq(insn)
 361                         m.next = "INSN_READY"
 362
 363             with m.State("INSN_READ2"):
 364                 with m.If(self.imem.f_busy_o):  # zzz...
 365                     # busy: stay in wait-read
 366                     comb += self.imem.a_valid_i.eq(1)
 367                     comb += self.imem.f_valid_i.eq(1)
 368                 with m.Else():
 369                     # not busy: instruction fetched
 370                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 371                     sync += dec_opcode_i.eq(insn)
 372                     m.next = "INSN_READY"
 373                     # TODO: probably can start looking at pdecode2.rm_dec
 374                     # here or maybe even in INSN_READ state, if svp64_mode
 375                     # detected, in order to trigger - and wait for - the
 376                     # predicate reading.
 377                     if self.svp64_en:
 378                         pmode = pdecode2.rm_dec.predmode
 379                     """
 380                     if pmode != SVP64PredMode.ALWAYS.value:
 381                         fire predicate loading FSM and wait before
 382                         moving to INSN_READY
 383                     else:
 384                         sync += self.srcmask.eq(-1) # set to all 1s
 385                         sync += self.dstmask.eq(-1) # set to all 1s
 386                         m.next = "INSN_READY"
 387                     """
 388
 389             with m.State("INSN_READY"):
 390                 # hand over the instruction, to be decoded
 391                 comb += fetch_insn_valid_o.eq(1)
 392                 with m.If(fetch_insn_ready_i):
 393                     m.next = "IDLE"
 394
 395     def fetch_predicate_fsm(self, m,
 396                             pred_insn_valid_i, pred_insn_ready_o,
 397                             pred_mask_valid_o, pred_mask_ready_i):
 398         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 399            src/dest predicate masks
 400
 401         https://bugs.libre-soc.org/show_bug.cgi?id=617
 402         the predicates can be read here, by using IntRegs r_ports['pred']
 403         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 404         be done through multiple reads, extracting one relevant at a time.
 405         later, a faster way would be to use the 32-bit-wide CR port but
 406         this is more complex decoding, here.  equivalent code used in
 407         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 408
 409         note: this ENTIRE FSM is not to be called when svp64 is disabled
 410         """
 411         comb = m.d.comb
 412         sync = m.d.sync
 413         pdecode2 = self.pdecode2
 414         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 415         predmode = rm_dec.predmode
 416         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 417         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 418         # get src/dst step, so we can skip already used mask bits
 419         cur_state = self.cur_state
 420         srcstep = cur_state.svstate.srcstep
 421         dststep = cur_state.svstate.dststep
 422         cur_vl = cur_state.svstate.vl
 423
 424         # decode predicates
 425         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 426         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 427         sidx, scrinvert = get_predcr(m, srcpred, 's')
 428         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 429
 430         # store fetched masks, for either intpred or crpred
 431         # when src/dst step is not zero, the skipped mask bits need to be
 432         # shifted-out, before actually storing them in src/dest mask
 433         new_srcmask = Signal(64, reset_less=True)
 434         new_dstmask = Signal(64, reset_less=True)
 435
 436         with m.FSM(name="fetch_predicate"):
 437
 438             with m.State("FETCH_PRED_IDLE"):
 439                 comb += pred_insn_ready_o.eq(1)
 440                 with m.If(pred_insn_valid_i):
 441                     with m.If(predmode == SVP64PredMode.INT):
 442                         # skip fetching destination mask register, when zero
 443                         with m.If(dall1s):
 444                             sync += new_dstmask.eq(-1)
 445                             # directly go to fetch source mask register
 446                             # guaranteed not to be zero (otherwise predmode
 447                             # would be SVP64PredMode.ALWAYS, not INT)
 448                             comb += int_pred.addr.eq(sregread)
 449                             comb += int_pred.ren.eq(1)
 450                             m.next = "INT_SRC_READ"
 451                         # fetch destination predicate register
 452                         with m.Else():
 453                             comb += int_pred.addr.eq(dregread)
 454                             comb += int_pred.ren.eq(1)
 455                             m.next = "INT_DST_READ"
 456                     with m.Elif(predmode == SVP64PredMode.CR):
 457                         # go fetch masks from the CR register file
 458                         sync += new_srcmask.eq(0)
 459                         sync += new_dstmask.eq(0)
 460                         m.next = "CR_READ"
 461                     with m.Else():
 462                         sync += self.srcmask.eq(-1)
 463                         sync += self.dstmask.eq(-1)
 464                         m.next = "FETCH_PRED_DONE"
 465
 466             with m.State("INT_DST_READ"):
 467                 # store destination mask
 468                 inv = Repl(dinvert, 64)
 469                 with m.If(dunary):
 470                     # set selected mask bit for 1<<r3 mode
 471                     dst_shift = Signal(range(64))
 472                     comb += dst_shift.eq(self.int_pred.data_o & 0b111111)
 473                     sync += new_dstmask.eq(1 << dst_shift)
 474                 with m.Else():
 475                     # invert mask if requested
 476                     sync += new_dstmask.eq(self.int_pred.data_o ^ inv)
 477                 # skip fetching source mask register, when zero
 478                 with m.If(sall1s):
 479                     sync += new_srcmask.eq(-1)
 480                     m.next = "FETCH_PRED_SHIFT_MASK"
 481                 # fetch source predicate register
 482                 with m.Else():
 483                     comb += int_pred.addr.eq(sregread)
 484                     comb += int_pred.ren.eq(1)
 485                     m.next = "INT_SRC_READ"
 486
 487             with m.State("INT_SRC_READ"):
 488                 # store source mask
 489                 inv = Repl(sinvert, 64)
 490                 with m.If(sunary):
 491                     # set selected mask bit for 1<<r3 mode
 492                     src_shift = Signal(range(64))
 493                     comb += src_shift.eq(self.int_pred.data_o & 0b111111)
 494                     sync += new_srcmask.eq(1 << src_shift)
 495                 with m.Else():
 496                     # invert mask if requested
 497                     sync += new_srcmask.eq(self.int_pred.data_o ^ inv)
 498                 m.next = "FETCH_PRED_SHIFT_MASK"
 499
 500             # fetch masks from the CR register file
 501             # implements the following loop:
 502             # idx, inv = get_predcr(mask)
 503             # mask = 0
 504             # for cr_idx in range(vl):
 505             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 506             #     if cr[idx] ^ inv:
 507             #         mask |= 1 << cr_idx
 508             # return mask
 509             with m.State("CR_READ"):
 510                 # CR index to be read, which will be ready by the next cycle
 511                 cr_idx = Signal.like(cur_vl, reset_less=True)
 512                 # submit the read operation to the regfile
 513                 with m.If(cr_idx != cur_vl):
 514                     # the CR read port is unary ...
 515                     # ren = 1 << cr_idx
 516                     # ... in MSB0 convention ...
 517                     # ren = 1 << (7 - cr_idx)
 518                     # ... and with an offset:
 519                     # ren = 1 << (7 - off - cr_idx)
 520                     idx = SVP64CROffs.CRPred + cr_idx
 521                     comb += cr_pred.ren.eq(1 << (7 - idx))
 522                     # signal data valid in the next cycle
 523                     cr_read = Signal(reset_less=True)
 524                     sync += cr_read.eq(1)
 525                     # load the next index
 526                     sync += cr_idx.eq(cr_idx + 1)
 527                 with m.Else():
 528                     # exit on loop end
 529                     sync += cr_read.eq(0)
 530                     sync += cr_idx.eq(0)
 531                     m.next = "FETCH_PRED_SHIFT_MASK"
 532                 with m.If(cr_read):
 533                     # compensate for the one cycle delay on the regfile
 534                     cur_cr_idx = Signal.like(cur_vl)
 535                     comb += cur_cr_idx.eq(cr_idx - 1)
 536                     # read the CR field, select the appropriate bit
 537                     cr_field = Signal(4)
 538                     scr_bit = Signal()
 539                     dcr_bit = Signal()
 540                     comb += cr_field.eq(cr_pred.data_o)
 541                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 542                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 543                     # set the corresponding mask bit
 544                     bit_to_set = Signal.like(self.srcmask)
 545                     comb += bit_to_set.eq(1 << cur_cr_idx)
 546                     with m.If(scr_bit):
 547                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 548                     with m.If(dcr_bit):
 549                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 550
 551             with m.State("FETCH_PRED_SHIFT_MASK"):
 552                 # shift-out skipped mask bits
 553                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 554                 sync += self.dstmask.eq(new_dstmask >> dststep)
 555                 m.next = "FETCH_PRED_DONE"
 556
 557             with m.State("FETCH_PRED_DONE"):
 558                 comb += pred_mask_valid_o.eq(1)
 559                 with m.If(pred_mask_ready_i):
 560                     m.next = "FETCH_PRED_IDLE"
 561
 562     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 563                   dbg, core_rst, is_svp64_mode,
 564                   fetch_pc_ready_o, fetch_pc_valid_i,
 565                   fetch_insn_valid_o, fetch_insn_ready_i,
 566                   pred_insn_valid_i, pred_insn_ready_o,
 567                   pred_mask_valid_o, pred_mask_ready_i,
 568                   exec_insn_valid_i, exec_insn_ready_o,
 569                   exec_pc_valid_o, exec_pc_ready_i):
 570         """issue FSM
 571
 572         decode / issue FSM.  this interacts with the "fetch" FSM
 573         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 574         (outgoing). also interacts with the "execute" FSM
 575         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 576         (incoming).
 577         SVP64 RM prefixes have already been set up by the
 578         "fetch" phase, so execute is fairly straightforward.
 579         """
 580
 581         comb = m.d.comb
 582         sync = m.d.sync
 583         pdecode2 = self.pdecode2
 584         cur_state = self.cur_state
 585
 586         # temporaries
 587         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 588
 589         # for updating svstate (things like srcstep etc.)
 590         update_svstate = Signal() # set this (below) if updating
 591         new_svstate = SVSTATERec("new_svstate")
 592         comb += new_svstate.eq(cur_state.svstate)
 593
 594         # precalculate srcstep+1 and dststep+1
 595         cur_srcstep = cur_state.svstate.srcstep
 596         cur_dststep = cur_state.svstate.dststep
 597         next_srcstep = Signal.like(cur_srcstep)
 598         next_dststep = Signal.like(cur_dststep)
 599         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 600         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 601
 602         # note if an exception happened.  in a pipelined or OoO design
 603         # this needs to be accompanied by "shadowing" (or stalling)
 604         el = []
 605         for exc in core.fus.excs.values():
 606             el.append(exc.happened)
 607         exc_happened = Signal()
 608         if len(el) > 0: # at least one exception
 609             comb += exc_happened.eq(Cat(*el).bool())
 610
 611         with m.FSM(name="issue_fsm"):
 612
 613             # sync with the "fetch" phase which is reading the instruction
 614             # at this point, there is no instruction running, that
 615             # could inadvertently update the PC.
 616             with m.State("ISSUE_START"):
 617                 # wait on "core stop" release, before next fetch
 618                 # need to do this here, in case we are in a VL==0 loop
 619                 with m.If(~dbg.core_stop_o & ~core_rst):
 620                     comb += fetch_pc_valid_i.eq(1) # tell fetch to start
 621                     with m.If(fetch_pc_ready_o):   # fetch acknowledged us
 622                         m.next = "INSN_WAIT"
 623                 with m.Else():
 624                     # tell core it's stopped, and acknowledge debug handshake
 625                     comb += dbg.core_stopped_i.eq(1)
 626                     # while stopped, allow updating the PC and SVSTATE
 627                     with m.If(self.pc_i.ok):
 628                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 629                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 630                         sync += pc_changed.eq(1)
 631                     with m.If(self.svstate_i.ok):
 632                         comb += new_svstate.eq(self.svstate_i.data)
 633                         comb += update_svstate.eq(1)
 634                         sync += sv_changed.eq(1)
 635
 636             # wait for an instruction to arrive from Fetch
 637             with m.State("INSN_WAIT"):
 638                 comb += fetch_insn_ready_i.eq(1)
 639                 with m.If(fetch_insn_valid_o):
 640                     # loop into ISSUE_START if it's a SVP64 instruction
 641                     # and VL == 0.  this because VL==0 is a for-loop
 642                     # from 0 to 0 i.e. always, always a NOP.
 643                     cur_vl = cur_state.svstate.vl
 644                     with m.If(is_svp64_mode & (cur_vl == 0)):
 645                         # update the PC before fetching the next instruction
 646                         # since we are in a VL==0 loop, no instruction was
 647                         # executed that we could be overwriting
 648                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 649                         comb += self.state_w_pc.data_i.eq(nia)
 650                         comb += self.insn_done.eq(1)
 651                         m.next = "ISSUE_START"
 652                     with m.Else():
 653                         if self.svp64_en:
 654                             m.next = "PRED_START"  # start fetching predicate
 655                         else:
 656                             m.next = "DECODE_SV"  # skip predication
 657
 658             with m.State("PRED_START"):
 659                 comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
 660                 with m.If(pred_insn_ready_o):  # fetch_pred acknowledged us
 661                     m.next = "MASK_WAIT"
 662
 663             with m.State("MASK_WAIT"):
 664                 comb += pred_mask_ready_i.eq(1) # ready to receive the masks
 665                 with m.If(pred_mask_valid_o): # predication masks are ready
 666                     m.next = "PRED_SKIP"
 667
 668             # skip zeros in predicate
 669             with m.State("PRED_SKIP"):
 670                 with m.If(~is_svp64_mode):
 671                     m.next = "DECODE_SV"  # nothing to do
 672                 with m.Else():
 673                     if self.svp64_en:
 674                         pred_src_zero = pdecode2.rm_dec.pred_sz
 675                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 676
 677                         # new srcstep, after skipping zeros
 678                         skip_srcstep = Signal.like(cur_srcstep)
 679                         # value to be added to the current srcstep
 680                         src_delta = Signal.like(cur_srcstep)
 681                         # add leading zeros to srcstep, if not in zero mode
 682                         with m.If(~pred_src_zero):
 683                             # priority encoder (count leading zeros)
 684                             # append guard bit, in case the mask is all zeros
 685                             pri_enc_src = PriorityEncoder(65)
 686                             m.submodules.pri_enc_src = pri_enc_src
 687                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 688                                                          Const(1, 1)))
 689                             comb += src_delta.eq(pri_enc_src.o)
 690                         # apply delta to srcstep
 691                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 692                         # shift-out all leading zeros from the mask
 693                         # plus the leading "one" bit
 694                         # TODO count leading zeros and shift-out the zero
 695                         #      bits, in the same step, in hardware
 696                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 697
 698                         # same as above, but for dststep
 699                         skip_dststep = Signal.like(cur_dststep)
 700                         dst_delta = Signal.like(cur_dststep)
 701                         with m.If(~pred_dst_zero):
 702                             pri_enc_dst = PriorityEncoder(65)
 703                             m.submodules.pri_enc_dst = pri_enc_dst
 704                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 705                                                          Const(1, 1)))
 706                             comb += dst_delta.eq(pri_enc_dst.o)
 707                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 708                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 709
 710                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 711                         with m.If((skip_srcstep >= cur_vl) |
 712                                   (skip_dststep >= cur_vl)):
 713                             # end of VL loop. Update PC and reset src/dst step
 714                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 715                             comb += self.state_w_pc.data_i.eq(nia)
 716                             comb += new_svstate.srcstep.eq(0)
 717                             comb += new_svstate.dststep.eq(0)
 718                             comb += update_svstate.eq(1)
 719                             # synchronize with the simulator
 720                             comb += self.insn_done.eq(1)
 721                             # go back to Issue
 722                             m.next = "ISSUE_START"
 723                         with m.Else():
 724                             # update new src/dst step
 725                             comb += new_svstate.srcstep.eq(skip_srcstep)
 726                             comb += new_svstate.dststep.eq(skip_dststep)
 727                             comb += update_svstate.eq(1)
 728                             # proceed to Decode
 729                             m.next = "DECODE_SV"
 730
 731                         # pass predicate mask bits through to satellite decoders
 732                         # TODO: for SIMD this will be *multiple* bits
 733                         sync += core.sv_pred_sm.eq(self.srcmask[0])
 734                         sync += core.sv_pred_dm.eq(self.dstmask[0])
 735
 736             # after src/dst step have been updated, we are ready
 737             # to decode the instruction
 738             with m.State("DECODE_SV"):
 739                 # decode the instruction
 740                 sync += core.e.eq(pdecode2.e)
 741                 sync += core.state.eq(cur_state)
 742                 sync += core.raw_insn_i.eq(dec_opcode_i)
 743                 sync += core.bigendian_i.eq(self.core_bigendian_i)
 744                 if self.svp64_en:
 745                     sync += core.sv_rm.eq(pdecode2.sv_rm)
 746                     # set RA_OR_ZERO detection in satellite decoders
 747                     sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
 748
 749                 m.next = "INSN_EXECUTE"  # move to "execute"
 750
 751             # handshake with execution FSM, move to "wait" once acknowledged
 752             with m.State("INSN_EXECUTE"):
 753                 comb += exec_insn_valid_i.eq(1) # trigger execute
 754                 with m.If(exec_insn_ready_o):   # execute acknowledged us
 755                     m.next = "EXECUTE_WAIT"
 756
 757             with m.State("EXECUTE_WAIT"):
 758                 # wait on "core stop" release, at instruction end
 759                 # need to do this here, in case we are in a VL>1 loop
 760                 with m.If(~dbg.core_stop_o & ~core_rst):
 761                     comb += exec_pc_ready_i.eq(1)
 762                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 763                     #with m.If(exec_pc_valid_o & exc_happened):
 764                     #    probably something like this:
 765                     #    sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0")
 766                     # TODO: the exception info needs to be blatted
 767                     # into pdecode.ldst_exc, and the instruction "re-run".
 768                     # when ldst_exc.happened is set, the PowerDecoder2
 769                     # reacts very differently: it re-writes the instruction
 770                     # with a "trap" (calls PowerDecoder2.trap()) which
 771                     # will *overwrite* whatever was requested and jump the
 772                     # PC to the exception address, as well as alter MSR.
 773                     # nothing else needs to be done other than to note
 774                     # the change of PC and MSR (and, later, SVSTATE)
 775                     #with m.Elif(exec_pc_valid_o):
 776                     with m.If(exec_pc_valid_o): # replace with Elif (above)
 777
 778                         # was this the last loop iteration?
 779                         is_last = Signal()
 780                         cur_vl = cur_state.svstate.vl
 781                         comb += is_last.eq(next_srcstep == cur_vl)
 782
 783                         # if either PC or SVSTATE were changed by the previous
 784                         # instruction, go directly back to Fetch, without
 785                         # updating either PC or SVSTATE
 786                         with m.If(pc_changed | sv_changed):
 787                             m.next = "ISSUE_START"
 788
 789                         # also return to Fetch, when no output was a vector
 790                         # (regardless of SRCSTEP and VL), or when the last
 791                         # instruction was really the last one of the VL loop
 792                         with m.Elif((~pdecode2.loop_continue) | is_last):
 793                             # before going back to fetch, update the PC state
 794                             # register with the NIA.
 795                             # ok here we are not reading the branch unit.
 796                             # TODO: this just blithely overwrites whatever
 797                             #       pipeline updated the PC
 798                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 799                             comb += self.state_w_pc.data_i.eq(nia)
 800                             # reset SRCSTEP before returning to Fetch
 801                             if self.svp64_en:
 802                                 with m.If(pdecode2.loop_continue):
 803                                     comb += new_svstate.srcstep.eq(0)
 804                                     comb += new_svstate.dststep.eq(0)
 805                                     comb += update_svstate.eq(1)
 806                             else:
 807                                 comb += new_svstate.srcstep.eq(0)
 808                                 comb += new_svstate.dststep.eq(0)
 809                                 comb += update_svstate.eq(1)
 810                             m.next = "ISSUE_START"
 811
 812                         # returning to Execute? then, first update SRCSTEP
 813                         with m.Else():
 814                             comb += new_svstate.srcstep.eq(next_srcstep)
 815                             comb += new_svstate.dststep.eq(next_dststep)
 816                             comb += update_svstate.eq(1)
 817                             # return to mask skip loop
 818                             m.next = "PRED_SKIP"
 819
 820                 with m.Else():
 821                     comb += dbg.core_stopped_i.eq(1)
 822                     # while stopped, allow updating the PC and SVSTATE
 823                     with m.If(self.pc_i.ok):
 824                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 825                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 826                         sync += pc_changed.eq(1)
 827                     with m.If(self.svstate_i.ok):
 828                         comb += new_svstate.eq(self.svstate_i.data)
 829                         comb += update_svstate.eq(1)
 830                         sync += sv_changed.eq(1)
 831
 832         # check if svstate needs updating: if so, write it to State Regfile
 833         with m.If(update_svstate):
 834             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 835             comb += self.state_w_sv.data_i.eq(new_svstate)
 836             sync += cur_state.svstate.eq(new_svstate) # for next clock
 837
 838     def execute_fsm(self, m, core, pc_changed, sv_changed,
 839                     exec_insn_valid_i, exec_insn_ready_o,
 840                     exec_pc_valid_o, exec_pc_ready_i):
 841         """execute FSM
 842
 843         execute FSM. this interacts with the "issue" FSM
 844         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 845         (outgoing). SVP64 RM prefixes have already been set up by the
 846         "issue" phase, so execute is fairly straightforward.
 847         """
 848
 849         comb = m.d.comb
 850         sync = m.d.sync
 851         pdecode2 = self.pdecode2
 852
 853         # temporaries
 854         core_busy_o = core.busy_o                 # core is busy
 855         core_ivalid_i = core.ivalid_i             # instruction is valid
 856         core_issue_i = core.issue_i               # instruction is issued
 857         insn_type = core.e.do.insn_type           # instruction MicroOp type
 858
 859         with m.FSM(name="exec_fsm"):
 860
 861             # waiting for instruction bus (stays there until not busy)
 862             with m.State("INSN_START"):
 863                 comb += exec_insn_ready_o.eq(1)
 864                 with m.If(exec_insn_valid_i):
 865                     comb += core_ivalid_i.eq(1)  # instruction is valid
 866                     comb += core_issue_i.eq(1)  # and issued
 867                     sync += sv_changed.eq(0)
 868                     sync += pc_changed.eq(0)
 869                     m.next = "INSN_ACTIVE"  # move to "wait completion"
 870
 871             # instruction started: must wait till it finishes
 872             with m.State("INSN_ACTIVE"):
 873                 with m.If(insn_type != MicrOp.OP_NOP):
 874                     comb += core_ivalid_i.eq(1) # instruction is valid
 875                 # note changes to PC and SVSTATE
 876                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 877                     sync += sv_changed.eq(1)
 878                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 879                     sync += pc_changed.eq(1)
 880                 with m.If(~core_busy_o): # instruction done!
 881                     comb += exec_pc_valid_o.eq(1)
 882                     with m.If(exec_pc_ready_i):
 883                         comb += self.insn_done.eq(1)
 884                         m.next = "INSN_START"  # back to fetch
 885
 886     def setup_peripherals(self, m):
 887         comb, sync = m.d.comb, m.d.sync
 888
 889         # okaaaay so the debug module must be in coresync clock domain
 890         # but NOT its reset signal. to cope with this, set every single
 891         # submodule explicitly in coresync domain, debug and JTAG
 892         # in their own one but using *external* reset.
 893         csd = DomainRenamer("coresync")
 894         dbd = DomainRenamer(self.dbg_domain)
 895
 896         m.submodules.core = core = csd(self.core)
 897         m.submodules.imem = imem = csd(self.imem)
 898         m.submodules.dbg = dbg = dbd(self.dbg)
 899         if self.jtag_en:
 900             m.submodules.jtag = jtag = dbd(self.jtag)
 901             # TODO: UART2GDB mux, here, from external pin
 902             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 903             sync += dbg.dmi.connect_to(jtag.dmi)
 904
 905         cur_state = self.cur_state
 906
 907         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 908         if self.sram4x4k:
 909             for i, sram in enumerate(self.sram4k):
 910                 m.submodules["sram4k_%d" % i] = csd(sram)
 911                 comb += sram.enable.eq(self.wb_sram_en)
 912
 913         # XICS interrupt handler
 914         if self.xics:
 915             m.submodules.xics_icp = icp = csd(self.xics_icp)
 916             m.submodules.xics_ics = ics = csd(self.xics_ics)
 917             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 918             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 919
 920         # GPIO test peripheral
 921         if self.gpio:
 922             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 923
 924         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 925         # XXX causes litex ECP5 test to get wrong idea about input and output
 926         # (but works with verilator sim *sigh*)
 927         #if self.gpio and self.xics:
 928         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 929
 930         # instruction decoder
 931         pdecode = create_pdecode()
 932         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 933         if self.svp64_en:
 934             m.submodules.svp64 = svp64 = csd(self.svp64)
 935
 936         # convenience
 937         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 938         intrf = self.core.regs.rf['int']
 939
 940         # clock delay power-on reset
 941         cd_por  = ClockDomain(reset_less=True)
 942         cd_sync = ClockDomain()
 943         core_sync = ClockDomain("coresync")
 944         m.domains += cd_por, cd_sync, core_sync
 945         if self.dbg_domain != "sync":
 946             dbg_sync = ClockDomain(self.dbg_domain)
 947             m.domains += dbg_sync
 948
 949         ti_rst = Signal(reset_less=True)
 950         delay = Signal(range(4), reset=3)
 951         with m.If(delay != 0):
 952             m.d.por += delay.eq(delay - 1)
 953         comb += cd_por.clk.eq(ClockSignal())
 954
 955         # power-on reset delay
 956         core_rst = ResetSignal("coresync")
 957         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 958         comb += core_rst.eq(ti_rst)
 959
 960         # debug clock is same as coresync, but reset is *main external*
 961         if self.dbg_domain != "sync":
 962             dbg_rst = ResetSignal(self.dbg_domain)
 963             comb += dbg_rst.eq(ResetSignal())
 964
 965         # busy/halted signals from core
 966         comb += self.busy_o.eq(core.busy_o)
 967         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 968
 969         # temporary hack: says "go" immediately for both address gen and ST
 970         l0 = core.l0
 971         ldst = core.fus.fus['ldst0']
 972         st_go_edge = rising_edge(m, ldst.st.rel_o)
 973         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
 974         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
 975
 976     def elaborate(self, platform):
 977         m = Module()
 978         # convenience
 979         comb, sync = m.d.comb, m.d.sync
 980         cur_state = self.cur_state
 981         pdecode2 = self.pdecode2
 982         dbg = self.dbg
 983         core = self.core
 984
 985         # set up peripherals and core
 986         core_rst = self.core_rst
 987         self.setup_peripherals(m)
 988
 989         # reset current state if core reset requested
 990         with m.If(core_rst):
 991             m.d.sync += self.cur_state.eq(0)
 992
 993         # PC and instruction from I-Memory
 994         comb += self.pc_o.eq(cur_state.pc)
 995         pc_changed = Signal() # note write to PC
 996         sv_changed = Signal() # note write to SVSTATE
 997
 998         # read state either from incoming override or from regfile
 999         # TODO: really should be doing MSR in the same way
1000         pc = state_get(m, core_rst, self.pc_i,
1001                             "pc",                  # read PC
1002                             self.state_r_pc, StateRegs.PC)
1003         svstate = state_get(m, core_rst, self.svstate_i,
1004                             "svstate",   # read SVSTATE
1005                             self.state_r_sv, StateRegs.SVSTATE)
1006
1007         # don't write pc every cycle
1008         comb += self.state_w_pc.wen.eq(0)
1009         comb += self.state_w_pc.data_i.eq(0)
1010
1011         # don't read msr every cycle
1012         comb += self.state_r_msr.ren.eq(0)
1013
1014         # address of the next instruction, in the absence of a branch
1015         # depends on the instruction size
1016         nia = Signal(64)
1017
1018         # connect up debug signals
1019         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1020         comb += dbg.terminate_i.eq(core.core_terminate_o)
1021         comb += dbg.state.pc.eq(pc)
1022         comb += dbg.state.svstate.eq(svstate)
1023         comb += dbg.state.msr.eq(cur_state.msr)
1024
1025         # pass the prefix mode from Fetch to Issue, so the latter can loop
1026         # on VL==0
1027         is_svp64_mode = Signal()
1028
1029         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1030         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1031         # these are the handshake signals between each
1032
1033         # fetch FSM can run as soon as the PC is valid
1034         fetch_pc_valid_i = Signal() # Execute tells Fetch "start next read"
1035         fetch_pc_ready_o = Signal() # Fetch Tells SVSTATE "proceed"
1036
1037         # fetch FSM hands over the instruction to be decoded / issued
1038         fetch_insn_valid_o = Signal()
1039         fetch_insn_ready_i = Signal()
1040
1041         # predicate fetch FSM decodes and fetches the predicate
1042         pred_insn_valid_i = Signal()
1043         pred_insn_ready_o = Signal()
1044
1045         # predicate fetch FSM delivers the masks
1046         pred_mask_valid_o = Signal()
1047         pred_mask_ready_i = Signal()
1048
1049         # issue FSM delivers the instruction to the be executed
1050         exec_insn_valid_i = Signal()
1051         exec_insn_ready_o = Signal()
1052
1053         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1054         exec_pc_valid_o = Signal()
1055         exec_pc_ready_i = Signal()
1056
1057         # the FSMs here are perhaps unusual in that they detect conditions
1058         # then "hold" information, combinatorially, for the core
1059         # (as opposed to using sync - which would be on a clock's delay)
1060         # this includes the actual opcode, valid flags and so on.
1061
1062         # Fetch, then predicate fetch, then Issue, then Execute.
1063         # Issue is where the VL for-loop # lives.  the ready/valid
1064         # signalling is used to communicate between the four.
1065
1066         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
1067                        fetch_pc_ready_o, fetch_pc_valid_i,
1068                        fetch_insn_valid_o, fetch_insn_ready_i)
1069
1070         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1071                        dbg, core_rst, is_svp64_mode,
1072                        fetch_pc_ready_o, fetch_pc_valid_i,
1073                        fetch_insn_valid_o, fetch_insn_ready_i,
1074                        pred_insn_valid_i, pred_insn_ready_o,
1075                        pred_mask_valid_o, pred_mask_ready_i,
1076                        exec_insn_valid_i, exec_insn_ready_o,
1077                        exec_pc_valid_o, exec_pc_ready_i)
1078
1079         if self.svp64_en:
1080             self.fetch_predicate_fsm(m,
1081                                      pred_insn_valid_i, pred_insn_ready_o,
1082                                      pred_mask_valid_o, pred_mask_ready_i)
1083
1084         self.execute_fsm(m, core, pc_changed, sv_changed,
1085                          exec_insn_valid_i, exec_insn_ready_o,
1086                          exec_pc_valid_o, exec_pc_ready_i)
1087
1088         # whatever was done above, over-ride it if core reset is held
1089         with m.If(core_rst):
1090             sync += nia.eq(0)
1091
1092         # this bit doesn't have to be in the FSM: connect up to read
1093         # regfiles on demand from DMI
1094         self.do_dmi(m, dbg)
1095
1096         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1097         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1098         self.tb_dec_fsm(m, cur_state.dec)
1099
1100         return m
1101
1102     def do_dmi(self, m, dbg):
1103         """deals with DMI debug requests
1104
1105         currently only provides read requests for the INT regfile, CR and XER
1106         it will later also deal with *writing* to these regfiles.
1107         """
1108         comb = m.d.comb
1109         sync = m.d.sync
1110         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1111         intrf = self.core.regs.rf['int']
1112
1113         with m.If(d_reg.req): # request for regfile access being made
1114             # TODO: error-check this
1115             # XXX should this be combinatorial?  sync better?
1116             if intrf.unary:
1117                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1118             else:
1119                 comb += self.int_r.addr.eq(d_reg.addr)
1120                 comb += self.int_r.ren.eq(1)
1121         d_reg_delay  = Signal()
1122         sync += d_reg_delay.eq(d_reg.req)
1123         with m.If(d_reg_delay):
1124             # data arrives one clock later
1125             comb += d_reg.data.eq(self.int_r.data_o)
1126             comb += d_reg.ack.eq(1)
1127
1128         # sigh same thing for CR debug
1129         with m.If(d_cr.req): # request for regfile access being made
1130             comb += self.cr_r.ren.eq(0b11111111) # enable all
1131         d_cr_delay  = Signal()
1132         sync += d_cr_delay.eq(d_cr.req)
1133         with m.If(d_cr_delay):
1134             # data arrives one clock later
1135             comb += d_cr.data.eq(self.cr_r.data_o)
1136             comb += d_cr.ack.eq(1)
1137
1138         # aaand XER...
1139         with m.If(d_xer.req): # request for regfile access being made
1140             comb += self.xer_r.ren.eq(0b111111) # enable all
1141         d_xer_delay  = Signal()
1142         sync += d_xer_delay.eq(d_xer.req)
1143         with m.If(d_xer_delay):
1144             # data arrives one clock later
1145             comb += d_xer.data.eq(self.xer_r.data_o)
1146             comb += d_xer.ack.eq(1)
1147
1148     def tb_dec_fsm(self, m, spr_dec):
1149         """tb_dec_fsm
1150
1151         this is a FSM for updating either dec or tb.  it runs alternately
1152         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1153         value to DEC, however the regfile has "passthrough" on it so this
1154         *should* be ok.
1155
1156         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1157         """
1158
1159         comb, sync = m.d.comb, m.d.sync
1160         fast_rf = self.core.regs.rf['fast']
1161         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1162         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1163
1164         with m.FSM() as fsm:
1165
1166             # initiates read of current DEC
1167             with m.State("DEC_READ"):
1168                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1169                 comb += fast_r_dectb.ren.eq(1)
1170                 m.next = "DEC_WRITE"
1171
1172             # waits for DEC read to arrive (1 cycle), updates with new value
1173             with m.State("DEC_WRITE"):
1174                 new_dec = Signal(64)
1175                 # TODO: MSR.LPCR 32-bit decrement mode
1176                 comb += new_dec.eq(fast_r_dectb.data_o - 1)
1177                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1178                 comb += fast_w_dectb.wen.eq(1)
1179                 comb += fast_w_dectb.data_i.eq(new_dec)
1180                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1181                 m.next = "TB_READ"
1182
1183             # initiates read of current TB
1184             with m.State("TB_READ"):
1185                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1186                 comb += fast_r_dectb.ren.eq(1)
1187                 m.next = "TB_WRITE"
1188
1189             # waits for read TB to arrive, initiates write of current TB
1190             with m.State("TB_WRITE"):
1191                 new_tb = Signal(64)
1192                 comb += new_tb.eq(fast_r_dectb.data_o + 1)
1193                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1194                 comb += fast_w_dectb.wen.eq(1)
1195                 comb += fast_w_dectb.data_i.eq(new_tb)
1196                 m.next = "DEC_READ"
1197
1198         return m
1199
1200     def __iter__(self):
1201         yield from self.pc_i.ports()
1202         yield self.pc_o
1203         yield self.memerr_o
1204         yield from self.core.ports()
1205         yield from self.imem.ports()
1206         yield self.core_bigendian_i
1207         yield self.busy_o
1208
1209     def ports(self):
1210         return list(self)
1211
1212     def external_ports(self):
1213         ports = self.pc_i.ports()
1214         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1215                 ]
1216
1217         if self.jtag_en:
1218             ports += list(self.jtag.external_ports())
1219         else:
1220             # don't add DMI if JTAG is enabled
1221             ports += list(self.dbg.dmi.ports())
1222
1223         ports += list(self.imem.ibus.fields.values())
1224         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1225
1226         if self.sram4x4k:
1227             for sram in self.sram4k:
1228                 ports += list(sram.bus.fields.values())
1229
1230         if self.xics:
1231             ports += list(self.xics_icp.bus.fields.values())
1232             ports += list(self.xics_ics.bus.fields.values())
1233             ports.append(self.int_level_i)
1234
1235         if self.gpio:
1236             ports += list(self.simple_gpio.bus.fields.values())
1237             ports.append(self.gpio_o)
1238
1239         return ports
1240
1241     def ports(self):
1242         return list(self)
1243
1244
1245 class TestIssuer(Elaboratable):
1246     def __init__(self, pspec):
1247         self.ti = TestIssuerInternal(pspec)
1248         self.pll = DummyPLL(instance=True)
1249
1250         # PLL direct clock or not
1251         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1252         if self.pll_en:
1253             self.pll_test_o = Signal(reset_less=True)
1254             self.pll_vco_o = Signal(reset_less=True)
1255             self.clk_sel_i = Signal(2, reset_less=True)
1256             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1257             self.pllclk_clk = ClockSignal("pllclk")
1258
1259     def elaborate(self, platform):
1260         m = Module()
1261         comb = m.d.comb
1262
1263         # TestIssuer nominally runs at main clock, actually it is
1264         # all combinatorial internally except for coresync'd components
1265         m.submodules.ti = ti = self.ti
1266
1267         if self.pll_en:
1268             # ClockSelect runs at PLL output internal clock rate
1269             m.submodules.wrappll = pll = self.pll
1270
1271             # add clock domains from PLL
1272             cd_pll = ClockDomain("pllclk")
1273             m.domains += cd_pll
1274
1275             # PLL clock established.  has the side-effect of running clklsel
1276             # at the PLL's speed (see DomainRenamer("pllclk") above)
1277             pllclk = self.pllclk_clk
1278             comb += pllclk.eq(pll.clk_pll_o)
1279
1280             # wire up external 24mhz to PLL
1281             #comb += pll.clk_24_i.eq(self.ref_clk)
1282             # output 18 mhz PLL test signal, and analog oscillator out
1283             comb += self.pll_test_o.eq(pll.pll_test_o)
1284             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1285
1286             # input to pll clock selection
1287             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1288
1289             # now wire up ResetSignals.  don't mind them being in this domain
1290             pll_rst = ResetSignal("pllclk")
1291             comb += pll_rst.eq(ResetSignal())
1292
1293         # internal clock is set to selector clock-out.  has the side-effect of
1294         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1295         # debug clock runs at coresync internal clock
1296         cd_coresync = ClockDomain("coresync")
1297         #m.domains += cd_coresync
1298         if self.ti.dbg_domain != 'sync':
1299             cd_dbgsync = ClockDomain("dbgsync")
1300             #m.domains += cd_dbgsync
1301         intclk = ClockSignal("coresync")
1302         dbgclk = ClockSignal(self.ti.dbg_domain)
1303         # XXX BYPASS PLL XXX
1304         # XXX BYPASS PLL XXX
1305         # XXX BYPASS PLL XXX
1306         if self.pll_en:
1307             comb += intclk.eq(self.ref_clk)
1308         else:
1309             comb += intclk.eq(ClockSignal())
1310         if self.ti.dbg_domain != 'sync':
1311             dbgclk = ClockSignal(self.ti.dbg_domain)
1312             comb += dbgclk.eq(intclk)
1313
1314         return m
1315
1316     def ports(self):
1317         return list(self.ti.ports()) + list(self.pll.ports()) + \
1318                [ClockSignal(), ResetSignal()]
1319
1320     def external_ports(self):
1321         ports = self.ti.external_ports()
1322         ports.append(ClockSignal())
1323         ports.append(ResetSignal())
1324         if self.pll_en:
1325             ports.append(self.clk_sel_i)
1326             ports.append(self.pll.clk_24_i)
1327             ports.append(self.pll_test_o)
1328             ports.append(self.pll_vco_o)
1329             ports.append(self.pllclk_clk)
1330             ports.append(self.ref_clk)
1331         return ports
1332
1333
1334 if __name__ == '__main__':
1335     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1336              'spr': 1,
1337              'div': 1,
1338              'mul': 1,
1339              'shiftrot': 1
1340             }
1341     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1342                          imem_ifacetype='bare_wb',
1343                          addr_wid=48,
1344                          mask_wid=8,
1345                          reg_wid=64,
1346                          units=units)
1347     dut = TestIssuer(pspec)
1348     vl = main(dut, ports=dut.ports(), name="test_issuer")
1349
1350     if len(sys.argv) == 1:
1351         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1352         with open("test_issuer.il", "w") as f:
1353             f.write(vl)