src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.data_o)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # JTAG interface.  add this right at the start because if it's
 170         # added it *modifies* the pspec, by adding enable/disable signals
 171         # for parts of the rest of the core
 172         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 173         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 174         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 175         if self.jtag_en:
 176             # XXX MUST keep this up-to-date with litex, and
 177             # soc-cocotb-sim, and err.. all needs sorting out, argh
 178             subset = ['uart',
 179                       'mtwi',
 180                       'eint', 'gpio', 'mspi0',
 181                       # 'mspi1', - disabled for now
 182                       # 'pwm', 'sd0', - disabled for now
 183                        'sdr']
 184             self.jtag = JTAG(get_pinspecs(subset=subset),
 185                              domain=self.dbg_domain)
 186             # add signals to pspec to enable/disable icache and dcache
 187             # (or data and intstruction wishbone if icache/dcache not included)
 188             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 189             # TODO: do we actually care if these are not domain-synchronised?
 190             # honestly probably not.
 191             pspec.wb_icache_en = self.jtag.wb_icache_en
 192             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 193             self.wb_sram_en = self.jtag.wb_sram_en
 194         else:
 195             self.wb_sram_en = Const(1)
 196
 197         # add 4k sram blocks?
 198         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 199                          pspec.sram4x4kblock == True)
 200         if self.sram4x4k:
 201             self.sram4k = []
 202             for i in range(4):
 203                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 204                                                     #features={'err'}
 205                                                     ))
 206
 207         # add interrupt controller?
 208         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 209         if self.xics:
 210             self.xics_icp = XICS_ICP()
 211             self.xics_ics = XICS_ICS()
 212             self.int_level_i = self.xics_ics.int_level_i
 213
 214         # add GPIO peripheral?
 215         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 216         if self.gpio:
 217             self.simple_gpio = SimpleGPIO()
 218             self.gpio_o = self.simple_gpio.gpio_o
 219
 220         # main instruction core.  suitable for prototyping / demo only
 221         self.core = core = NonProductionCore(pspec)
 222         self.core_rst = ResetSignal("coresync")
 223
 224         # instruction decoder.  goes into Trap Record
 225         pdecode = create_pdecode()
 226         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 227         self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
 228                                      opkls=IssuerDecode2ToOperand,
 229                                      svp64_en=self.svp64_en,
 230                                      regreduce_en=self.regreduce_en)
 231         if self.svp64_en:
 232             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 233
 234         # Test Instruction memory
 235         self.imem = ConfigFetchUnit(pspec).fu
 236
 237         # DMI interface
 238         self.dbg = CoreDebug()
 239
 240         # instruction go/monitor
 241         self.pc_o = Signal(64, reset_less=True)
 242         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 243         self.svstate_i = Data(32, "svstate_i") # ditto
 244         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 245         self.busy_o = Signal(reset_less=True)
 246         self.memerr_o = Signal(reset_less=True)
 247
 248         # STATE regfile read /write ports for PC, MSR, SVSTATE
 249         staterf = self.core.regs.rf['state']
 250         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 251         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 252         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 253         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 254         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 255
 256         # DMI interface access
 257         intrf = self.core.regs.rf['int']
 258         crrf = self.core.regs.rf['cr']
 259         xerrf = self.core.regs.rf['xer']
 260         self.int_r = intrf.r_ports['dmi'] # INT read
 261         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 262         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 263
 264         if self.svp64_en:
 265             # for predication
 266             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 267             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 268
 269         # hack method of keeping an eye on whether branch/trap set the PC
 270         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 271         self.state_nia.wen.name = 'state_nia_wen'
 272
 273         # pulse to synchronize the simulator at instruction end
 274         self.insn_done = Signal()
 275
 276         if self.svp64_en:
 277             # store copies of predicate masks
 278             self.srcmask = Signal(64)
 279             self.dstmask = Signal(64)
 280
 281     def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
 282                         fetch_pc_ready_o, fetch_pc_valid_i,
 283                         fetch_insn_valid_o, fetch_insn_ready_i):
 284         """fetch FSM
 285
 286         this FSM performs fetch of raw instruction data, partial-decodes
 287         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 288         read a 2nd 32-bit quantity if that occurs.
 289         """
 290         comb = m.d.comb
 291         sync = m.d.sync
 292         pdecode2 = self.pdecode2
 293         cur_state = self.cur_state
 294         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 295
 296         msr_read = Signal(reset=1)
 297
 298         with m.FSM(name='fetch_fsm'):
 299
 300             # waiting (zzz)
 301             with m.State("IDLE"):
 302                 comb += fetch_pc_ready_o.eq(1)
 303                 with m.If(fetch_pc_valid_i):
 304                     # instruction allowed to go: start by reading the PC
 305                     # capture the PC and also drop it into Insn Memory
 306                     # we have joined a pair of combinatorial memory
 307                     # lookups together.  this is Generally Bad.
 308                     comb += self.imem.a_pc_i.eq(pc)
 309                     comb += self.imem.a_valid_i.eq(1)
 310                     comb += self.imem.f_valid_i.eq(1)
 311                     sync += cur_state.pc.eq(pc)
 312                     sync += cur_state.svstate.eq(svstate) # and svstate
 313
 314                     # initiate read of MSR. arrives one clock later
 315                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 316                     sync += msr_read.eq(0)
 317
 318                     m.next = "INSN_READ"  # move to "wait for bus" phase
 319
 320             # dummy pause to find out why simulation is not keeping up
 321             with m.State("INSN_READ"):
 322                 # one cycle later, msr/sv read arrives.  valid only once.
 323                 with m.If(~msr_read):
 324                     sync += msr_read.eq(1) # yeah don't read it again
 325                     sync += cur_state.msr.eq(self.state_r_msr.data_o)
 326                 with m.If(self.imem.f_busy_o): # zzz...
 327                     # busy: stay in wait-read
 328                     comb += self.imem.a_valid_i.eq(1)
 329                     comb += self.imem.f_valid_i.eq(1)
 330                 with m.Else():
 331                     # not busy: instruction fetched
 332                     insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 333                     if self.svp64_en:
 334                         svp64 = self.svp64
 335                         # decode the SVP64 prefix, if any
 336                         comb += svp64.raw_opcode_in.eq(insn)
 337                         comb += svp64.bigendian.eq(self.core_bigendian_i)
 338                         # pass the decoded prefix (if any) to PowerDecoder2
 339                         sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 340                         sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 341                         # remember whether this is a prefixed instruction, so
 342                         # the FSM can readily loop when VL==0
 343                         sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 344                         # calculate the address of the following instruction
 345                         insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 346                         sync += nia.eq(cur_state.pc + insn_size)
 347                         with m.If(~svp64.is_svp64_mode):
 348                             # with no prefix, store the instruction
 349                             # and hand it directly to the next FSM
 350                             sync += dec_opcode_i.eq(insn)
 351                             m.next = "INSN_READY"
 352                         with m.Else():
 353                             # fetch the rest of the instruction from memory
 354                             comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 355                             comb += self.imem.a_valid_i.eq(1)
 356                             comb += self.imem.f_valid_i.eq(1)
 357                             m.next = "INSN_READ2"
 358                     else:
 359                         # not SVP64 - 32-bit only
 360                         sync += nia.eq(cur_state.pc + 4)
 361                         sync += dec_opcode_i.eq(insn)
 362                         m.next = "INSN_READY"
 363
 364             with m.State("INSN_READ2"):
 365                 with m.If(self.imem.f_busy_o):  # zzz...
 366                     # busy: stay in wait-read
 367                     comb += self.imem.a_valid_i.eq(1)
 368                     comb += self.imem.f_valid_i.eq(1)
 369                 with m.Else():
 370                     # not busy: instruction fetched
 371                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 372                     sync += dec_opcode_i.eq(insn)
 373                     m.next = "INSN_READY"
 374                     # TODO: probably can start looking at pdecode2.rm_dec
 375                     # here or maybe even in INSN_READ state, if svp64_mode
 376                     # detected, in order to trigger - and wait for - the
 377                     # predicate reading.
 378                     if self.svp64_en:
 379                         pmode = pdecode2.rm_dec.predmode
 380                     """
 381                     if pmode != SVP64PredMode.ALWAYS.value:
 382                         fire predicate loading FSM and wait before
 383                         moving to INSN_READY
 384                     else:
 385                         sync += self.srcmask.eq(-1) # set to all 1s
 386                         sync += self.dstmask.eq(-1) # set to all 1s
 387                         m.next = "INSN_READY"
 388                     """
 389
 390             with m.State("INSN_READY"):
 391                 # hand over the instruction, to be decoded
 392                 comb += fetch_insn_valid_o.eq(1)
 393                 with m.If(fetch_insn_ready_i):
 394                     m.next = "IDLE"
 395
 396     def fetch_predicate_fsm(self, m,
 397                             pred_insn_valid_i, pred_insn_ready_o,
 398                             pred_mask_valid_o, pred_mask_ready_i):
 399         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 400            src/dest predicate masks
 401
 402         https://bugs.libre-soc.org/show_bug.cgi?id=617
 403         the predicates can be read here, by using IntRegs r_ports['pred']
 404         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 405         be done through multiple reads, extracting one relevant at a time.
 406         later, a faster way would be to use the 32-bit-wide CR port but
 407         this is more complex decoding, here.  equivalent code used in
 408         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 409
 410         note: this ENTIRE FSM is not to be called when svp64 is disabled
 411         """
 412         comb = m.d.comb
 413         sync = m.d.sync
 414         pdecode2 = self.pdecode2
 415         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 416         predmode = rm_dec.predmode
 417         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 418         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 419         # get src/dst step, so we can skip already used mask bits
 420         cur_state = self.cur_state
 421         srcstep = cur_state.svstate.srcstep
 422         dststep = cur_state.svstate.dststep
 423         cur_vl = cur_state.svstate.vl
 424
 425         # decode predicates
 426         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 427         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 428         sidx, scrinvert = get_predcr(m, srcpred, 's')
 429         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 430
 431         # store fetched masks, for either intpred or crpred
 432         # when src/dst step is not zero, the skipped mask bits need to be
 433         # shifted-out, before actually storing them in src/dest mask
 434         new_srcmask = Signal(64, reset_less=True)
 435         new_dstmask = Signal(64, reset_less=True)
 436
 437         with m.FSM(name="fetch_predicate"):
 438
 439             with m.State("FETCH_PRED_IDLE"):
 440                 comb += pred_insn_ready_o.eq(1)
 441                 with m.If(pred_insn_valid_i):
 442                     with m.If(predmode == SVP64PredMode.INT):
 443                         # skip fetching destination mask register, when zero
 444                         with m.If(dall1s):
 445                             sync += new_dstmask.eq(-1)
 446                             # directly go to fetch source mask register
 447                             # guaranteed not to be zero (otherwise predmode
 448                             # would be SVP64PredMode.ALWAYS, not INT)
 449                             comb += int_pred.addr.eq(sregread)
 450                             comb += int_pred.ren.eq(1)
 451                             m.next = "INT_SRC_READ"
 452                         # fetch destination predicate register
 453                         with m.Else():
 454                             comb += int_pred.addr.eq(dregread)
 455                             comb += int_pred.ren.eq(1)
 456                             m.next = "INT_DST_READ"
 457                     with m.Elif(predmode == SVP64PredMode.CR):
 458                         # go fetch masks from the CR register file
 459                         sync += new_srcmask.eq(0)
 460                         sync += new_dstmask.eq(0)
 461                         m.next = "CR_READ"
 462                     with m.Else():
 463                         sync += self.srcmask.eq(-1)
 464                         sync += self.dstmask.eq(-1)
 465                         m.next = "FETCH_PRED_DONE"
 466
 467             with m.State("INT_DST_READ"):
 468                 # store destination mask
 469                 inv = Repl(dinvert, 64)
 470                 with m.If(dunary):
 471                     # set selected mask bit for 1<<r3 mode
 472                     dst_shift = Signal(range(64))
 473                     comb += dst_shift.eq(self.int_pred.data_o & 0b111111)
 474                     sync += new_dstmask.eq(1 << dst_shift)
 475                 with m.Else():
 476                     # invert mask if requested
 477                     sync += new_dstmask.eq(self.int_pred.data_o ^ inv)
 478                 # skip fetching source mask register, when zero
 479                 with m.If(sall1s):
 480                     sync += new_srcmask.eq(-1)
 481                     m.next = "FETCH_PRED_SHIFT_MASK"
 482                 # fetch source predicate register
 483                 with m.Else():
 484                     comb += int_pred.addr.eq(sregread)
 485                     comb += int_pred.ren.eq(1)
 486                     m.next = "INT_SRC_READ"
 487
 488             with m.State("INT_SRC_READ"):
 489                 # store source mask
 490                 inv = Repl(sinvert, 64)
 491                 with m.If(sunary):
 492                     # set selected mask bit for 1<<r3 mode
 493                     src_shift = Signal(range(64))
 494                     comb += src_shift.eq(self.int_pred.data_o & 0b111111)
 495                     sync += new_srcmask.eq(1 << src_shift)
 496                 with m.Else():
 497                     # invert mask if requested
 498                     sync += new_srcmask.eq(self.int_pred.data_o ^ inv)
 499                 m.next = "FETCH_PRED_SHIFT_MASK"
 500
 501             # fetch masks from the CR register file
 502             # implements the following loop:
 503             # idx, inv = get_predcr(mask)
 504             # mask = 0
 505             # for cr_idx in range(vl):
 506             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 507             #     if cr[idx] ^ inv:
 508             #         mask |= 1 << cr_idx
 509             # return mask
 510             with m.State("CR_READ"):
 511                 # CR index to be read, which will be ready by the next cycle
 512                 cr_idx = Signal.like(cur_vl, reset_less=True)
 513                 # submit the read operation to the regfile
 514                 with m.If(cr_idx != cur_vl):
 515                     # the CR read port is unary ...
 516                     # ren = 1 << cr_idx
 517                     # ... in MSB0 convention ...
 518                     # ren = 1 << (7 - cr_idx)
 519                     # ... and with an offset:
 520                     # ren = 1 << (7 - off - cr_idx)
 521                     idx = SVP64CROffs.CRPred + cr_idx
 522                     comb += cr_pred.ren.eq(1 << (7 - idx))
 523                     # signal data valid in the next cycle
 524                     cr_read = Signal(reset_less=True)
 525                     sync += cr_read.eq(1)
 526                     # load the next index
 527                     sync += cr_idx.eq(cr_idx + 1)
 528                 with m.Else():
 529                     # exit on loop end
 530                     sync += cr_read.eq(0)
 531                     sync += cr_idx.eq(0)
 532                     m.next = "FETCH_PRED_SHIFT_MASK"
 533                 with m.If(cr_read):
 534                     # compensate for the one cycle delay on the regfile
 535                     cur_cr_idx = Signal.like(cur_vl)
 536                     comb += cur_cr_idx.eq(cr_idx - 1)
 537                     # read the CR field, select the appropriate bit
 538                     cr_field = Signal(4)
 539                     scr_bit = Signal()
 540                     dcr_bit = Signal()
 541                     comb += cr_field.eq(cr_pred.data_o)
 542                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 543                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 544                     # set the corresponding mask bit
 545                     bit_to_set = Signal.like(self.srcmask)
 546                     comb += bit_to_set.eq(1 << cur_cr_idx)
 547                     with m.If(scr_bit):
 548                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 549                     with m.If(dcr_bit):
 550                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 551
 552             with m.State("FETCH_PRED_SHIFT_MASK"):
 553                 # shift-out skipped mask bits
 554                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 555                 sync += self.dstmask.eq(new_dstmask >> dststep)
 556                 m.next = "FETCH_PRED_DONE"
 557
 558             with m.State("FETCH_PRED_DONE"):
 559                 comb += pred_mask_valid_o.eq(1)
 560                 with m.If(pred_mask_ready_i):
 561                     m.next = "FETCH_PRED_IDLE"
 562
 563     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 564                   dbg, core_rst, is_svp64_mode,
 565                   fetch_pc_ready_o, fetch_pc_valid_i,
 566                   fetch_insn_valid_o, fetch_insn_ready_i,
 567                   pred_insn_valid_i, pred_insn_ready_o,
 568                   pred_mask_valid_o, pred_mask_ready_i,
 569                   exec_insn_valid_i, exec_insn_ready_o,
 570                   exec_pc_valid_o, exec_pc_ready_i):
 571         """issue FSM
 572
 573         decode / issue FSM.  this interacts with the "fetch" FSM
 574         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 575         (outgoing). also interacts with the "execute" FSM
 576         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 577         (incoming).
 578         SVP64 RM prefixes have already been set up by the
 579         "fetch" phase, so execute is fairly straightforward.
 580         """
 581
 582         comb = m.d.comb
 583         sync = m.d.sync
 584         pdecode2 = self.pdecode2
 585         cur_state = self.cur_state
 586
 587         # temporaries
 588         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 589
 590         # for updating svstate (things like srcstep etc.)
 591         update_svstate = Signal() # set this (below) if updating
 592         new_svstate = SVSTATERec("new_svstate")
 593         comb += new_svstate.eq(cur_state.svstate)
 594
 595         # precalculate srcstep+1 and dststep+1
 596         cur_srcstep = cur_state.svstate.srcstep
 597         cur_dststep = cur_state.svstate.dststep
 598         next_srcstep = Signal.like(cur_srcstep)
 599         next_dststep = Signal.like(cur_dststep)
 600         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 601         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 602
 603         # note if an exception happened.  in a pipelined or OoO design
 604         # this needs to be accompanied by "shadowing" (or stalling)
 605         el = []
 606         for exc in core.fus.excs.values():
 607             el.append(exc.happened)
 608         exc_happened = Signal()
 609         if len(el) > 0: # at least one exception
 610             comb += exc_happened.eq(Cat(*el).bool())
 611
 612         with m.FSM(name="issue_fsm"):
 613
 614             # sync with the "fetch" phase which is reading the instruction
 615             # at this point, there is no instruction running, that
 616             # could inadvertently update the PC.
 617             with m.State("ISSUE_START"):
 618                 # wait on "core stop" release, before next fetch
 619                 # need to do this here, in case we are in a VL==0 loop
 620                 with m.If(~dbg.core_stop_o & ~core_rst):
 621                     comb += fetch_pc_valid_i.eq(1) # tell fetch to start
 622                     with m.If(fetch_pc_ready_o):   # fetch acknowledged us
 623                         m.next = "INSN_WAIT"
 624                 with m.Else():
 625                     # tell core it's stopped, and acknowledge debug handshake
 626                     comb += dbg.core_stopped_i.eq(1)
 627                     # while stopped, allow updating the PC and SVSTATE
 628                     with m.If(self.pc_i.ok):
 629                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 630                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 631                         sync += pc_changed.eq(1)
 632                     with m.If(self.svstate_i.ok):
 633                         comb += new_svstate.eq(self.svstate_i.data)
 634                         comb += update_svstate.eq(1)
 635                         sync += sv_changed.eq(1)
 636
 637             # wait for an instruction to arrive from Fetch
 638             with m.State("INSN_WAIT"):
 639                 comb += fetch_insn_ready_i.eq(1)
 640                 with m.If(fetch_insn_valid_o):
 641                     # loop into ISSUE_START if it's a SVP64 instruction
 642                     # and VL == 0.  this because VL==0 is a for-loop
 643                     # from 0 to 0 i.e. always, always a NOP.
 644                     cur_vl = cur_state.svstate.vl
 645                     with m.If(is_svp64_mode & (cur_vl == 0)):
 646                         # update the PC before fetching the next instruction
 647                         # since we are in a VL==0 loop, no instruction was
 648                         # executed that we could be overwriting
 649                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 650                         comb += self.state_w_pc.data_i.eq(nia)
 651                         comb += self.insn_done.eq(1)
 652                         m.next = "ISSUE_START"
 653                     with m.Else():
 654                         if self.svp64_en:
 655                             m.next = "PRED_START"  # start fetching predicate
 656                         else:
 657                             m.next = "DECODE_SV"  # skip predication
 658
 659             with m.State("PRED_START"):
 660                 comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
 661                 with m.If(pred_insn_ready_o):  # fetch_pred acknowledged us
 662                     m.next = "MASK_WAIT"
 663
 664             with m.State("MASK_WAIT"):
 665                 comb += pred_mask_ready_i.eq(1) # ready to receive the masks
 666                 with m.If(pred_mask_valid_o): # predication masks are ready
 667                     m.next = "PRED_SKIP"
 668
 669             # skip zeros in predicate
 670             with m.State("PRED_SKIP"):
 671                 with m.If(~is_svp64_mode):
 672                     m.next = "DECODE_SV"  # nothing to do
 673                 with m.Else():
 674                     if self.svp64_en:
 675                         pred_src_zero = pdecode2.rm_dec.pred_sz
 676                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 677
 678                         # new srcstep, after skipping zeros
 679                         skip_srcstep = Signal.like(cur_srcstep)
 680                         # value to be added to the current srcstep
 681                         src_delta = Signal.like(cur_srcstep)
 682                         # add leading zeros to srcstep, if not in zero mode
 683                         with m.If(~pred_src_zero):
 684                             # priority encoder (count leading zeros)
 685                             # append guard bit, in case the mask is all zeros
 686                             pri_enc_src = PriorityEncoder(65)
 687                             m.submodules.pri_enc_src = pri_enc_src
 688                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 689                                                          Const(1, 1)))
 690                             comb += src_delta.eq(pri_enc_src.o)
 691                         # apply delta to srcstep
 692                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 693                         # shift-out all leading zeros from the mask
 694                         # plus the leading "one" bit
 695                         # TODO count leading zeros and shift-out the zero
 696                         #      bits, in the same step, in hardware
 697                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 698
 699                         # same as above, but for dststep
 700                         skip_dststep = Signal.like(cur_dststep)
 701                         dst_delta = Signal.like(cur_dststep)
 702                         with m.If(~pred_dst_zero):
 703                             pri_enc_dst = PriorityEncoder(65)
 704                             m.submodules.pri_enc_dst = pri_enc_dst
 705                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 706                                                          Const(1, 1)))
 707                             comb += dst_delta.eq(pri_enc_dst.o)
 708                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 709                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 710
 711                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 712                         with m.If((skip_srcstep >= cur_vl) |
 713                                   (skip_dststep >= cur_vl)):
 714                             # end of VL loop. Update PC and reset src/dst step
 715                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 716                             comb += self.state_w_pc.data_i.eq(nia)
 717                             comb += new_svstate.srcstep.eq(0)
 718                             comb += new_svstate.dststep.eq(0)
 719                             comb += update_svstate.eq(1)
 720                             # synchronize with the simulator
 721                             comb += self.insn_done.eq(1)
 722                             # go back to Issue
 723                             m.next = "ISSUE_START"
 724                         with m.Else():
 725                             # update new src/dst step
 726                             comb += new_svstate.srcstep.eq(skip_srcstep)
 727                             comb += new_svstate.dststep.eq(skip_dststep)
 728                             comb += update_svstate.eq(1)
 729                             # proceed to Decode
 730                             m.next = "DECODE_SV"
 731
 732                         # pass predicate mask bits through to satellite decoders
 733                         # TODO: for SIMD this will be *multiple* bits
 734                         sync += core.sv_pred_sm.eq(self.srcmask[0])
 735                         sync += core.sv_pred_dm.eq(self.dstmask[0])
 736
 737             # after src/dst step have been updated, we are ready
 738             # to decode the instruction
 739             with m.State("DECODE_SV"):
 740                 # decode the instruction
 741                 sync += core.e.eq(pdecode2.e)
 742                 sync += core.state.eq(cur_state)
 743                 sync += core.raw_insn_i.eq(dec_opcode_i)
 744                 sync += core.bigendian_i.eq(self.core_bigendian_i)
 745                 if self.svp64_en:
 746                     sync += core.sv_rm.eq(pdecode2.sv_rm)
 747                     # set RA_OR_ZERO detection in satellite decoders
 748                     sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
 749                     # and svp64 detection
 750                     sync += core.is_svp64_mode.eq(is_svp64_mode)
 751
 752                 m.next = "INSN_EXECUTE"  # move to "execute"
 753
 754             # handshake with execution FSM, move to "wait" once acknowledged
 755             with m.State("INSN_EXECUTE"):
 756                 comb += exec_insn_valid_i.eq(1) # trigger execute
 757                 with m.If(exec_insn_ready_o):   # execute acknowledged us
 758                     m.next = "EXECUTE_WAIT"
 759
 760             with m.State("EXECUTE_WAIT"):
 761                 # wait on "core stop" release, at instruction end
 762                 # need to do this here, in case we are in a VL>1 loop
 763                 with m.If(~dbg.core_stop_o & ~core_rst):
 764                     comb += exec_pc_ready_i.eq(1)
 765                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 766                     #with m.If(exec_pc_valid_o & exc_happened):
 767                     #    probably something like this:
 768                     #    sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0")
 769                     # TODO: the exception info needs to be blatted
 770                     # into pdecode.ldst_exc, and the instruction "re-run".
 771                     # when ldst_exc.happened is set, the PowerDecoder2
 772                     # reacts very differently: it re-writes the instruction
 773                     # with a "trap" (calls PowerDecoder2.trap()) which
 774                     # will *overwrite* whatever was requested and jump the
 775                     # PC to the exception address, as well as alter MSR.
 776                     # nothing else needs to be done other than to note
 777                     # the change of PC and MSR (and, later, SVSTATE)
 778                     #with m.Elif(exec_pc_valid_o):
 779                     with m.If(exec_pc_valid_o): # replace with Elif (above)
 780
 781                         # was this the last loop iteration?
 782                         is_last = Signal()
 783                         cur_vl = cur_state.svstate.vl
 784                         comb += is_last.eq(next_srcstep == cur_vl)
 785
 786                         # if either PC or SVSTATE were changed by the previous
 787                         # instruction, go directly back to Fetch, without
 788                         # updating either PC or SVSTATE
 789                         with m.If(pc_changed | sv_changed):
 790                             m.next = "ISSUE_START"
 791
 792                         # also return to Fetch, when no output was a vector
 793                         # (regardless of SRCSTEP and VL), or when the last
 794                         # instruction was really the last one of the VL loop
 795                         with m.Elif((~pdecode2.loop_continue) | is_last):
 796                             # before going back to fetch, update the PC state
 797                             # register with the NIA.
 798                             # ok here we are not reading the branch unit.
 799                             # TODO: this just blithely overwrites whatever
 800                             #       pipeline updated the PC
 801                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 802                             comb += self.state_w_pc.data_i.eq(nia)
 803                             # reset SRCSTEP before returning to Fetch
 804                             if self.svp64_en:
 805                                 with m.If(pdecode2.loop_continue):
 806                                     comb += new_svstate.srcstep.eq(0)
 807                                     comb += new_svstate.dststep.eq(0)
 808                                     comb += update_svstate.eq(1)
 809                             else:
 810                                 comb += new_svstate.srcstep.eq(0)
 811                                 comb += new_svstate.dststep.eq(0)
 812                                 comb += update_svstate.eq(1)
 813                             m.next = "ISSUE_START"
 814
 815                         # returning to Execute? then, first update SRCSTEP
 816                         with m.Else():
 817                             comb += new_svstate.srcstep.eq(next_srcstep)
 818                             comb += new_svstate.dststep.eq(next_dststep)
 819                             comb += update_svstate.eq(1)
 820                             # return to mask skip loop
 821                             m.next = "PRED_SKIP"
 822
 823                 with m.Else():
 824                     comb += dbg.core_stopped_i.eq(1)
 825                     # while stopped, allow updating the PC and SVSTATE
 826                     with m.If(self.pc_i.ok):
 827                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 828                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 829                         sync += pc_changed.eq(1)
 830                     with m.If(self.svstate_i.ok):
 831                         comb += new_svstate.eq(self.svstate_i.data)
 832                         comb += update_svstate.eq(1)
 833                         sync += sv_changed.eq(1)
 834
 835         # check if svstate needs updating: if so, write it to State Regfile
 836         with m.If(update_svstate):
 837             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 838             comb += self.state_w_sv.data_i.eq(new_svstate)
 839             sync += cur_state.svstate.eq(new_svstate) # for next clock
 840
 841     def execute_fsm(self, m, core, pc_changed, sv_changed,
 842                     exec_insn_valid_i, exec_insn_ready_o,
 843                     exec_pc_valid_o, exec_pc_ready_i):
 844         """execute FSM
 845
 846         execute FSM. this interacts with the "issue" FSM
 847         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 848         (outgoing). SVP64 RM prefixes have already been set up by the
 849         "issue" phase, so execute is fairly straightforward.
 850         """
 851
 852         comb = m.d.comb
 853         sync = m.d.sync
 854         pdecode2 = self.pdecode2
 855
 856         # temporaries
 857         core_busy_o = core.busy_o                 # core is busy
 858         core_ivalid_i = core.ivalid_i             # instruction is valid
 859         core_issue_i = core.issue_i               # instruction is issued
 860         insn_type = core.e.do.insn_type           # instruction MicroOp type
 861
 862         with m.FSM(name="exec_fsm"):
 863
 864             # waiting for instruction bus (stays there until not busy)
 865             with m.State("INSN_START"):
 866                 comb += exec_insn_ready_o.eq(1)
 867                 with m.If(exec_insn_valid_i):
 868                     comb += core_ivalid_i.eq(1)  # instruction is valid
 869                     comb += core_issue_i.eq(1)  # and issued
 870                     sync += sv_changed.eq(0)
 871                     sync += pc_changed.eq(0)
 872                     m.next = "INSN_ACTIVE"  # move to "wait completion"
 873
 874             # instruction started: must wait till it finishes
 875             with m.State("INSN_ACTIVE"):
 876                 with m.If(insn_type != MicrOp.OP_NOP):
 877                     comb += core_ivalid_i.eq(1) # instruction is valid
 878                 # note changes to PC and SVSTATE
 879                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 880                     sync += sv_changed.eq(1)
 881                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 882                     sync += pc_changed.eq(1)
 883                 with m.If(~core_busy_o): # instruction done!
 884                     comb += exec_pc_valid_o.eq(1)
 885                     with m.If(exec_pc_ready_i):
 886                         comb += self.insn_done.eq(1)
 887                         m.next = "INSN_START"  # back to fetch
 888
 889     def setup_peripherals(self, m):
 890         comb, sync = m.d.comb, m.d.sync
 891
 892         # okaaaay so the debug module must be in coresync clock domain
 893         # but NOT its reset signal. to cope with this, set every single
 894         # submodule explicitly in coresync domain, debug and JTAG
 895         # in their own one but using *external* reset.
 896         csd = DomainRenamer("coresync")
 897         dbd = DomainRenamer(self.dbg_domain)
 898
 899         m.submodules.core = core = csd(self.core)
 900         m.submodules.imem = imem = csd(self.imem)
 901         m.submodules.dbg = dbg = dbd(self.dbg)
 902         if self.jtag_en:
 903             m.submodules.jtag = jtag = dbd(self.jtag)
 904             # TODO: UART2GDB mux, here, from external pin
 905             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 906             sync += dbg.dmi.connect_to(jtag.dmi)
 907
 908         cur_state = self.cur_state
 909
 910         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 911         if self.sram4x4k:
 912             for i, sram in enumerate(self.sram4k):
 913                 m.submodules["sram4k_%d" % i] = csd(sram)
 914                 comb += sram.enable.eq(self.wb_sram_en)
 915
 916         # XICS interrupt handler
 917         if self.xics:
 918             m.submodules.xics_icp = icp = csd(self.xics_icp)
 919             m.submodules.xics_ics = ics = csd(self.xics_ics)
 920             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 921             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 922
 923         # GPIO test peripheral
 924         if self.gpio:
 925             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 926
 927         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 928         # XXX causes litex ECP5 test to get wrong idea about input and output
 929         # (but works with verilator sim *sigh*)
 930         #if self.gpio and self.xics:
 931         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 932
 933         # instruction decoder
 934         pdecode = create_pdecode()
 935         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 936         if self.svp64_en:
 937             m.submodules.svp64 = svp64 = csd(self.svp64)
 938
 939         # convenience
 940         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 941         intrf = self.core.regs.rf['int']
 942
 943         # clock delay power-on reset
 944         cd_por  = ClockDomain(reset_less=True)
 945         cd_sync = ClockDomain()
 946         core_sync = ClockDomain("coresync")
 947         m.domains += cd_por, cd_sync, core_sync
 948         if self.dbg_domain != "sync":
 949             dbg_sync = ClockDomain(self.dbg_domain)
 950             m.domains += dbg_sync
 951
 952         ti_rst = Signal(reset_less=True)
 953         delay = Signal(range(4), reset=3)
 954         with m.If(delay != 0):
 955             m.d.por += delay.eq(delay - 1)
 956         comb += cd_por.clk.eq(ClockSignal())
 957
 958         # power-on reset delay
 959         core_rst = ResetSignal("coresync")
 960         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 961         comb += core_rst.eq(ti_rst)
 962
 963         # debug clock is same as coresync, but reset is *main external*
 964         if self.dbg_domain != "sync":
 965             dbg_rst = ResetSignal(self.dbg_domain)
 966             comb += dbg_rst.eq(ResetSignal())
 967
 968         # busy/halted signals from core
 969         comb += self.busy_o.eq(core.busy_o)
 970         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 971
 972         # temporary hack: says "go" immediately for both address gen and ST
 973         l0 = core.l0
 974         ldst = core.fus.fus['ldst0']
 975         st_go_edge = rising_edge(m, ldst.st.rel_o)
 976         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
 977         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
 978
 979     def elaborate(self, platform):
 980         m = Module()
 981         # convenience
 982         comb, sync = m.d.comb, m.d.sync
 983         cur_state = self.cur_state
 984         pdecode2 = self.pdecode2
 985         dbg = self.dbg
 986         core = self.core
 987
 988         # set up peripherals and core
 989         core_rst = self.core_rst
 990         self.setup_peripherals(m)
 991
 992         # reset current state if core reset requested
 993         with m.If(core_rst):
 994             m.d.sync += self.cur_state.eq(0)
 995
 996         # PC and instruction from I-Memory
 997         comb += self.pc_o.eq(cur_state.pc)
 998         pc_changed = Signal() # note write to PC
 999         sv_changed = Signal() # note write to SVSTATE
1000
1001         # read state either from incoming override or from regfile
1002         # TODO: really should be doing MSR in the same way
1003         pc = state_get(m, core_rst, self.pc_i,
1004                             "pc",                  # read PC
1005                             self.state_r_pc, StateRegs.PC)
1006         svstate = state_get(m, core_rst, self.svstate_i,
1007                             "svstate",   # read SVSTATE
1008                             self.state_r_sv, StateRegs.SVSTATE)
1009
1010         # don't write pc every cycle
1011         comb += self.state_w_pc.wen.eq(0)
1012         comb += self.state_w_pc.data_i.eq(0)
1013
1014         # don't read msr every cycle
1015         comb += self.state_r_msr.ren.eq(0)
1016
1017         # address of the next instruction, in the absence of a branch
1018         # depends on the instruction size
1019         nia = Signal(64)
1020
1021         # connect up debug signals
1022         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1023         comb += dbg.terminate_i.eq(core.core_terminate_o)
1024         comb += dbg.state.pc.eq(pc)
1025         comb += dbg.state.svstate.eq(svstate)
1026         comb += dbg.state.msr.eq(cur_state.msr)
1027
1028         # pass the prefix mode from Fetch to Issue, so the latter can loop
1029         # on VL==0
1030         is_svp64_mode = Signal()
1031
1032         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1033         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1034         # these are the handshake signals between each
1035
1036         # fetch FSM can run as soon as the PC is valid
1037         fetch_pc_valid_i = Signal() # Execute tells Fetch "start next read"
1038         fetch_pc_ready_o = Signal() # Fetch Tells SVSTATE "proceed"
1039
1040         # fetch FSM hands over the instruction to be decoded / issued
1041         fetch_insn_valid_o = Signal()
1042         fetch_insn_ready_i = Signal()
1043
1044         # predicate fetch FSM decodes and fetches the predicate
1045         pred_insn_valid_i = Signal()
1046         pred_insn_ready_o = Signal()
1047
1048         # predicate fetch FSM delivers the masks
1049         pred_mask_valid_o = Signal()
1050         pred_mask_ready_i = Signal()
1051
1052         # issue FSM delivers the instruction to the be executed
1053         exec_insn_valid_i = Signal()
1054         exec_insn_ready_o = Signal()
1055
1056         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1057         exec_pc_valid_o = Signal()
1058         exec_pc_ready_i = Signal()
1059
1060         # the FSMs here are perhaps unusual in that they detect conditions
1061         # then "hold" information, combinatorially, for the core
1062         # (as opposed to using sync - which would be on a clock's delay)
1063         # this includes the actual opcode, valid flags and so on.
1064
1065         # Fetch, then predicate fetch, then Issue, then Execute.
1066         # Issue is where the VL for-loop # lives.  the ready/valid
1067         # signalling is used to communicate between the four.
1068
1069         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
1070                        fetch_pc_ready_o, fetch_pc_valid_i,
1071                        fetch_insn_valid_o, fetch_insn_ready_i)
1072
1073         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1074                        dbg, core_rst, is_svp64_mode,
1075                        fetch_pc_ready_o, fetch_pc_valid_i,
1076                        fetch_insn_valid_o, fetch_insn_ready_i,
1077                        pred_insn_valid_i, pred_insn_ready_o,
1078                        pred_mask_valid_o, pred_mask_ready_i,
1079                        exec_insn_valid_i, exec_insn_ready_o,
1080                        exec_pc_valid_o, exec_pc_ready_i)
1081
1082         if self.svp64_en:
1083             self.fetch_predicate_fsm(m,
1084                                      pred_insn_valid_i, pred_insn_ready_o,
1085                                      pred_mask_valid_o, pred_mask_ready_i)
1086
1087         self.execute_fsm(m, core, pc_changed, sv_changed,
1088                          exec_insn_valid_i, exec_insn_ready_o,
1089                          exec_pc_valid_o, exec_pc_ready_i)
1090
1091         # whatever was done above, over-ride it if core reset is held
1092         with m.If(core_rst):
1093             sync += nia.eq(0)
1094
1095         # this bit doesn't have to be in the FSM: connect up to read
1096         # regfiles on demand from DMI
1097         self.do_dmi(m, dbg)
1098
1099         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1100         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1101         self.tb_dec_fsm(m, cur_state.dec)
1102
1103         return m
1104
1105     def do_dmi(self, m, dbg):
1106         """deals with DMI debug requests
1107
1108         currently only provides read requests for the INT regfile, CR and XER
1109         it will later also deal with *writing* to these regfiles.
1110         """
1111         comb = m.d.comb
1112         sync = m.d.sync
1113         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1114         intrf = self.core.regs.rf['int']
1115
1116         with m.If(d_reg.req): # request for regfile access being made
1117             # TODO: error-check this
1118             # XXX should this be combinatorial?  sync better?
1119             if intrf.unary:
1120                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1121             else:
1122                 comb += self.int_r.addr.eq(d_reg.addr)
1123                 comb += self.int_r.ren.eq(1)
1124         d_reg_delay  = Signal()
1125         sync += d_reg_delay.eq(d_reg.req)
1126         with m.If(d_reg_delay):
1127             # data arrives one clock later
1128             comb += d_reg.data.eq(self.int_r.data_o)
1129             comb += d_reg.ack.eq(1)
1130
1131         # sigh same thing for CR debug
1132         with m.If(d_cr.req): # request for regfile access being made
1133             comb += self.cr_r.ren.eq(0b11111111) # enable all
1134         d_cr_delay  = Signal()
1135         sync += d_cr_delay.eq(d_cr.req)
1136         with m.If(d_cr_delay):
1137             # data arrives one clock later
1138             comb += d_cr.data.eq(self.cr_r.data_o)
1139             comb += d_cr.ack.eq(1)
1140
1141         # aaand XER...
1142         with m.If(d_xer.req): # request for regfile access being made
1143             comb += self.xer_r.ren.eq(0b111111) # enable all
1144         d_xer_delay  = Signal()
1145         sync += d_xer_delay.eq(d_xer.req)
1146         with m.If(d_xer_delay):
1147             # data arrives one clock later
1148             comb += d_xer.data.eq(self.xer_r.data_o)
1149             comb += d_xer.ack.eq(1)
1150
1151     def tb_dec_fsm(self, m, spr_dec):
1152         """tb_dec_fsm
1153
1154         this is a FSM for updating either dec or tb.  it runs alternately
1155         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1156         value to DEC, however the regfile has "passthrough" on it so this
1157         *should* be ok.
1158
1159         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1160         """
1161
1162         comb, sync = m.d.comb, m.d.sync
1163         fast_rf = self.core.regs.rf['fast']
1164         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1165         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1166
1167         with m.FSM() as fsm:
1168
1169             # initiates read of current DEC
1170             with m.State("DEC_READ"):
1171                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1172                 comb += fast_r_dectb.ren.eq(1)
1173                 m.next = "DEC_WRITE"
1174
1175             # waits for DEC read to arrive (1 cycle), updates with new value
1176             with m.State("DEC_WRITE"):
1177                 new_dec = Signal(64)
1178                 # TODO: MSR.LPCR 32-bit decrement mode
1179                 comb += new_dec.eq(fast_r_dectb.data_o - 1)
1180                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1181                 comb += fast_w_dectb.wen.eq(1)
1182                 comb += fast_w_dectb.data_i.eq(new_dec)
1183                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1184                 m.next = "TB_READ"
1185
1186             # initiates read of current TB
1187             with m.State("TB_READ"):
1188                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1189                 comb += fast_r_dectb.ren.eq(1)
1190                 m.next = "TB_WRITE"
1191
1192             # waits for read TB to arrive, initiates write of current TB
1193             with m.State("TB_WRITE"):
1194                 new_tb = Signal(64)
1195                 comb += new_tb.eq(fast_r_dectb.data_o + 1)
1196                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1197                 comb += fast_w_dectb.wen.eq(1)
1198                 comb += fast_w_dectb.data_i.eq(new_tb)
1199                 m.next = "DEC_READ"
1200
1201         return m
1202
1203     def __iter__(self):
1204         yield from self.pc_i.ports()
1205         yield self.pc_o
1206         yield self.memerr_o
1207         yield from self.core.ports()
1208         yield from self.imem.ports()
1209         yield self.core_bigendian_i
1210         yield self.busy_o
1211
1212     def ports(self):
1213         return list(self)
1214
1215     def external_ports(self):
1216         ports = self.pc_i.ports()
1217         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1218                 ]
1219
1220         if self.jtag_en:
1221             ports += list(self.jtag.external_ports())
1222         else:
1223             # don't add DMI if JTAG is enabled
1224             ports += list(self.dbg.dmi.ports())
1225
1226         ports += list(self.imem.ibus.fields.values())
1227         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1228
1229         if self.sram4x4k:
1230             for sram in self.sram4k:
1231                 ports += list(sram.bus.fields.values())
1232
1233         if self.xics:
1234             ports += list(self.xics_icp.bus.fields.values())
1235             ports += list(self.xics_ics.bus.fields.values())
1236             ports.append(self.int_level_i)
1237
1238         if self.gpio:
1239             ports += list(self.simple_gpio.bus.fields.values())
1240             ports.append(self.gpio_o)
1241
1242         return ports
1243
1244     def ports(self):
1245         return list(self)
1246
1247
1248 class TestIssuer(Elaboratable):
1249     def __init__(self, pspec):
1250         self.ti = TestIssuerInternal(pspec)
1251         self.pll = DummyPLL(instance=True)
1252
1253         # PLL direct clock or not
1254         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1255         if self.pll_en:
1256             self.pll_test_o = Signal(reset_less=True)
1257             self.pll_vco_o = Signal(reset_less=True)
1258             self.clk_sel_i = Signal(2, reset_less=True)
1259             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1260             self.pllclk_clk = ClockSignal("pllclk")
1261
1262     def elaborate(self, platform):
1263         m = Module()
1264         comb = m.d.comb
1265
1266         # TestIssuer nominally runs at main clock, actually it is
1267         # all combinatorial internally except for coresync'd components
1268         m.submodules.ti = ti = self.ti
1269
1270         if self.pll_en:
1271             # ClockSelect runs at PLL output internal clock rate
1272             m.submodules.wrappll = pll = self.pll
1273
1274             # add clock domains from PLL
1275             cd_pll = ClockDomain("pllclk")
1276             m.domains += cd_pll
1277
1278             # PLL clock established.  has the side-effect of running clklsel
1279             # at the PLL's speed (see DomainRenamer("pllclk") above)
1280             pllclk = self.pllclk_clk
1281             comb += pllclk.eq(pll.clk_pll_o)
1282
1283             # wire up external 24mhz to PLL
1284             #comb += pll.clk_24_i.eq(self.ref_clk)
1285             # output 18 mhz PLL test signal, and analog oscillator out
1286             comb += self.pll_test_o.eq(pll.pll_test_o)
1287             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1288
1289             # input to pll clock selection
1290             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1291
1292             # now wire up ResetSignals.  don't mind them being in this domain
1293             pll_rst = ResetSignal("pllclk")
1294             comb += pll_rst.eq(ResetSignal())
1295
1296         # internal clock is set to selector clock-out.  has the side-effect of
1297         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1298         # debug clock runs at coresync internal clock
1299         cd_coresync = ClockDomain("coresync")
1300         #m.domains += cd_coresync
1301         if self.ti.dbg_domain != 'sync':
1302             cd_dbgsync = ClockDomain("dbgsync")
1303             #m.domains += cd_dbgsync
1304         intclk = ClockSignal("coresync")
1305         dbgclk = ClockSignal(self.ti.dbg_domain)
1306         # XXX BYPASS PLL XXX
1307         # XXX BYPASS PLL XXX
1308         # XXX BYPASS PLL XXX
1309         if self.pll_en:
1310             comb += intclk.eq(self.ref_clk)
1311         else:
1312             comb += intclk.eq(ClockSignal())
1313         if self.ti.dbg_domain != 'sync':
1314             dbgclk = ClockSignal(self.ti.dbg_domain)
1315             comb += dbgclk.eq(intclk)
1316
1317         return m
1318
1319     def ports(self):
1320         return list(self.ti.ports()) + list(self.pll.ports()) + \
1321                [ClockSignal(), ResetSignal()]
1322
1323     def external_ports(self):
1324         ports = self.ti.external_ports()
1325         ports.append(ClockSignal())
1326         ports.append(ResetSignal())
1327         if self.pll_en:
1328             ports.append(self.clk_sel_i)
1329             ports.append(self.pll.clk_24_i)
1330             ports.append(self.pll_test_o)
1331             ports.append(self.pll_vco_o)
1332             ports.append(self.pllclk_clk)
1333             ports.append(self.ref_clk)
1334         return ports
1335
1336
1337 if __name__ == '__main__':
1338     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1339              'spr': 1,
1340              'div': 1,
1341              'mul': 1,
1342              'shiftrot': 1
1343             }
1344     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1345                          imem_ifacetype='bare_wb',
1346                          addr_wid=48,
1347                          mask_wid=8,
1348                          reg_wid=64,
1349                          units=units)
1350     dut = TestIssuer(pspec)
1351     vl = main(dut, ports=dut.ports(), name="test_issuer")
1352
1353     if len(sys.argv) == 1:
1354         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1355         with open("test_issuer.il", "w") as f:
1356             f.write(vl)