src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.o_data)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # and if overlap requested
 170         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 171                                             (pspec.allow_overlap == True))
 172
 173         # JTAG interface.  add this right at the start because if it's
 174         # added it *modifies* the pspec, by adding enable/disable signals
 175         # for parts of the rest of the core
 176         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 177         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 178         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 179         if self.jtag_en:
 180             # XXX MUST keep this up-to-date with litex, and
 181             # soc-cocotb-sim, and err.. all needs sorting out, argh
 182             subset = ['uart',
 183                       'mtwi',
 184                       'eint', 'gpio', 'mspi0',
 185                       # 'mspi1', - disabled for now
 186                       # 'pwm', 'sd0', - disabled for now
 187                        'sdr']
 188             self.jtag = JTAG(get_pinspecs(subset=subset),
 189                              domain=self.dbg_domain)
 190             # add signals to pspec to enable/disable icache and dcache
 191             # (or data and intstruction wishbone if icache/dcache not included)
 192             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 193             # TODO: do we actually care if these are not domain-synchronised?
 194             # honestly probably not.
 195             pspec.wb_icache_en = self.jtag.wb_icache_en
 196             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 197             self.wb_sram_en = self.jtag.wb_sram_en
 198         else:
 199             self.wb_sram_en = Const(1)
 200
 201         # add 4k sram blocks?
 202         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 203                          pspec.sram4x4kblock == True)
 204         if self.sram4x4k:
 205             self.sram4k = []
 206             for i in range(4):
 207                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 208                                                     #features={'err'}
 209                                                     ))
 210
 211         # add interrupt controller?
 212         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 213         if self.xics:
 214             self.xics_icp = XICS_ICP()
 215             self.xics_ics = XICS_ICS()
 216             self.int_level_i = self.xics_ics.int_level_i
 217
 218         # add GPIO peripheral?
 219         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 220         if self.gpio:
 221             self.simple_gpio = SimpleGPIO()
 222             self.gpio_o = self.simple_gpio.gpio_o
 223
 224         # main instruction core.  suitable for prototyping / demo only
 225         self.core = core = NonProductionCore(pspec)
 226         self.core_rst = ResetSignal("coresync")
 227
 228         # instruction decoder.  goes into Trap Record
 229         #pdecode = create_pdecode()
 230         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 231         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 232                                      opkls=IssuerDecode2ToOperand,
 233                                      svp64_en=self.svp64_en,
 234                                      regreduce_en=self.regreduce_en)
 235         pdecode = self.pdecode2.dec
 236
 237         if self.svp64_en:
 238             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 239
 240         # Test Instruction memory
 241         self.imem = ConfigFetchUnit(pspec).fu
 242
 243         # DMI interface
 244         self.dbg = CoreDebug()
 245
 246         # instruction go/monitor
 247         self.pc_o = Signal(64, reset_less=True)
 248         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 249         self.svstate_i = Data(64, "svstate_i") # ditto
 250         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 251         self.busy_o = Signal(reset_less=True)
 252         self.memerr_o = Signal(reset_less=True)
 253
 254         # STATE regfile read /write ports for PC, MSR, SVSTATE
 255         staterf = self.core.regs.rf['state']
 256         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 257         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 258         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 259         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 260         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 261
 262         # DMI interface access
 263         intrf = self.core.regs.rf['int']
 264         crrf = self.core.regs.rf['cr']
 265         xerrf = self.core.regs.rf['xer']
 266         self.int_r = intrf.r_ports['dmi'] # INT read
 267         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 268         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 269
 270         if self.svp64_en:
 271             # for predication
 272             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 273             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 274
 275         # hack method of keeping an eye on whether branch/trap set the PC
 276         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 277         self.state_nia.wen.name = 'state_nia_wen'
 278
 279         # pulse to synchronize the simulator at instruction end
 280         self.insn_done = Signal()
 281
 282         # indicate any instruction still outstanding, in execution
 283         self.any_busy = Signal()
 284
 285         if self.svp64_en:
 286             # store copies of predicate masks
 287             self.srcmask = Signal(64)
 288             self.dstmask = Signal(64)
 289
 290     def fetch_fsm(self, m, dbg, core, pc, svstate, nia, is_svp64_mode,
 291                         fetch_pc_o_ready, fetch_pc_i_valid,
 292                         fetch_insn_o_valid, fetch_insn_i_ready):
 293         """fetch FSM
 294
 295         this FSM performs fetch of raw instruction data, partial-decodes
 296         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 297         read a 2nd 32-bit quantity if that occurs.
 298         """
 299         comb = m.d.comb
 300         sync = m.d.sync
 301         pdecode2 = self.pdecode2
 302         cur_state = self.cur_state
 303         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 304
 305         msr_read = Signal(reset=1)
 306
 307         with m.FSM(name='fetch_fsm'):
 308
 309             # waiting (zzz)
 310             with m.State("IDLE"):
 311                 with m.If(~dbg.stopping_o):
 312                     comb += fetch_pc_o_ready.eq(1)
 313                 with m.If(fetch_pc_i_valid):
 314                     # instruction allowed to go: start by reading the PC
 315                     # capture the PC and also drop it into Insn Memory
 316                     # we have joined a pair of combinatorial memory
 317                     # lookups together.  this is Generally Bad.
 318                     comb += self.imem.a_pc_i.eq(pc)
 319                     comb += self.imem.a_i_valid.eq(1)
 320                     comb += self.imem.f_i_valid.eq(1)
 321                     sync += cur_state.pc.eq(pc)
 322                     sync += cur_state.svstate.eq(svstate) # and svstate
 323
 324                     # initiate read of MSR. arrives one clock later
 325                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 326                     sync += msr_read.eq(0)
 327
 328                     m.next = "INSN_READ"  # move to "wait for bus" phase
 329
 330             # dummy pause to find out why simulation is not keeping up
 331             with m.State("INSN_READ"):
 332                 if self.allow_overlap:
 333                     stopping = dbg.stopping_o
 334                 else:
 335                     stopping = Const(0)
 336                 with m.If(stopping):
 337                     # stopping: jump back to idle
 338                     m.next = "IDLE"
 339                 with m.Else():
 340                     # one cycle later, msr/sv read arrives.  valid only once.
 341                     with m.If(~msr_read):
 342                         sync += msr_read.eq(1) # yeah don't read it again
 343                         sync += cur_state.msr.eq(self.state_r_msr.o_data)
 344                     with m.If(self.imem.f_busy_o): # zzz...
 345                         # busy: stay in wait-read
 346                         comb += self.imem.a_i_valid.eq(1)
 347                         comb += self.imem.f_i_valid.eq(1)
 348                     with m.Else():
 349                         # not busy: instruction fetched
 350                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 351                         if self.svp64_en:
 352                             svp64 = self.svp64
 353                             # decode the SVP64 prefix, if any
 354                             comb += svp64.raw_opcode_in.eq(insn)
 355                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 356                             # pass the decoded prefix (if any) to PowerDecoder2
 357                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 358                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 359                             # remember whether this is a prefixed instruction,
 360                             # so the FSM can readily loop when VL==0
 361                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 362                             # calculate the address of the following instruction
 363                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 364                             sync += nia.eq(cur_state.pc + insn_size)
 365                             with m.If(~svp64.is_svp64_mode):
 366                                 # with no prefix, store the instruction
 367                                 # and hand it directly to the next FSM
 368                                 sync += dec_opcode_i.eq(insn)
 369                                 m.next = "INSN_READY"
 370                             with m.Else():
 371                                 # fetch the rest of the instruction from memory
 372                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 373                                 comb += self.imem.a_i_valid.eq(1)
 374                                 comb += self.imem.f_i_valid.eq(1)
 375                                 m.next = "INSN_READ2"
 376                         else:
 377                             # not SVP64 - 32-bit only
 378                             sync += nia.eq(cur_state.pc + 4)
 379                             sync += dec_opcode_i.eq(insn)
 380                             m.next = "INSN_READY"
 381
 382             with m.State("INSN_READ2"):
 383                 with m.If(self.imem.f_busy_o):  # zzz...
 384                     # busy: stay in wait-read
 385                     comb += self.imem.a_i_valid.eq(1)
 386                     comb += self.imem.f_i_valid.eq(1)
 387                 with m.Else():
 388                     # not busy: instruction fetched
 389                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 390                     sync += dec_opcode_i.eq(insn)
 391                     m.next = "INSN_READY"
 392                     # TODO: probably can start looking at pdecode2.rm_dec
 393                     # here or maybe even in INSN_READ state, if svp64_mode
 394                     # detected, in order to trigger - and wait for - the
 395                     # predicate reading.
 396                     if self.svp64_en:
 397                         pmode = pdecode2.rm_dec.predmode
 398                     """
 399                     if pmode != SVP64PredMode.ALWAYS.value:
 400                         fire predicate loading FSM and wait before
 401                         moving to INSN_READY
 402                     else:
 403                         sync += self.srcmask.eq(-1) # set to all 1s
 404                         sync += self.dstmask.eq(-1) # set to all 1s
 405                         m.next = "INSN_READY"
 406                     """
 407
 408             with m.State("INSN_READY"):
 409                 # hand over the instruction, to be decoded
 410                 comb += fetch_insn_o_valid.eq(1)
 411                 with m.If(fetch_insn_i_ready):
 412                     m.next = "IDLE"
 413
 414     def fetch_predicate_fsm(self, m,
 415                             pred_insn_i_valid, pred_insn_o_ready,
 416                             pred_mask_o_valid, pred_mask_i_ready):
 417         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 418            src/dest predicate masks
 419
 420         https://bugs.libre-soc.org/show_bug.cgi?id=617
 421         the predicates can be read here, by using IntRegs r_ports['pred']
 422         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 423         be done through multiple reads, extracting one relevant at a time.
 424         later, a faster way would be to use the 32-bit-wide CR port but
 425         this is more complex decoding, here.  equivalent code used in
 426         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 427
 428         note: this ENTIRE FSM is not to be called when svp64 is disabled
 429         """
 430         comb = m.d.comb
 431         sync = m.d.sync
 432         pdecode2 = self.pdecode2
 433         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 434         predmode = rm_dec.predmode
 435         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 436         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 437         # get src/dst step, so we can skip already used mask bits
 438         cur_state = self.cur_state
 439         srcstep = cur_state.svstate.srcstep
 440         dststep = cur_state.svstate.dststep
 441         cur_vl = cur_state.svstate.vl
 442
 443         # decode predicates
 444         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 445         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 446         sidx, scrinvert = get_predcr(m, srcpred, 's')
 447         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 448
 449         # store fetched masks, for either intpred or crpred
 450         # when src/dst step is not zero, the skipped mask bits need to be
 451         # shifted-out, before actually storing them in src/dest mask
 452         new_srcmask = Signal(64, reset_less=True)
 453         new_dstmask = Signal(64, reset_less=True)
 454
 455         with m.FSM(name="fetch_predicate"):
 456
 457             with m.State("FETCH_PRED_IDLE"):
 458                 comb += pred_insn_o_ready.eq(1)
 459                 with m.If(pred_insn_i_valid):
 460                     with m.If(predmode == SVP64PredMode.INT):
 461                         # skip fetching destination mask register, when zero
 462                         with m.If(dall1s):
 463                             sync += new_dstmask.eq(-1)
 464                             # directly go to fetch source mask register
 465                             # guaranteed not to be zero (otherwise predmode
 466                             # would be SVP64PredMode.ALWAYS, not INT)
 467                             comb += int_pred.addr.eq(sregread)
 468                             comb += int_pred.ren.eq(1)
 469                             m.next = "INT_SRC_READ"
 470                         # fetch destination predicate register
 471                         with m.Else():
 472                             comb += int_pred.addr.eq(dregread)
 473                             comb += int_pred.ren.eq(1)
 474                             m.next = "INT_DST_READ"
 475                     with m.Elif(predmode == SVP64PredMode.CR):
 476                         # go fetch masks from the CR register file
 477                         sync += new_srcmask.eq(0)
 478                         sync += new_dstmask.eq(0)
 479                         m.next = "CR_READ"
 480                     with m.Else():
 481                         sync += self.srcmask.eq(-1)
 482                         sync += self.dstmask.eq(-1)
 483                         m.next = "FETCH_PRED_DONE"
 484
 485             with m.State("INT_DST_READ"):
 486                 # store destination mask
 487                 inv = Repl(dinvert, 64)
 488                 with m.If(dunary):
 489                     # set selected mask bit for 1<<r3 mode
 490                     dst_shift = Signal(range(64))
 491                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 492                     sync += new_dstmask.eq(1 << dst_shift)
 493                 with m.Else():
 494                     # invert mask if requested
 495                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 496                 # skip fetching source mask register, when zero
 497                 with m.If(sall1s):
 498                     sync += new_srcmask.eq(-1)
 499                     m.next = "FETCH_PRED_SHIFT_MASK"
 500                 # fetch source predicate register
 501                 with m.Else():
 502                     comb += int_pred.addr.eq(sregread)
 503                     comb += int_pred.ren.eq(1)
 504                     m.next = "INT_SRC_READ"
 505
 506             with m.State("INT_SRC_READ"):
 507                 # store source mask
 508                 inv = Repl(sinvert, 64)
 509                 with m.If(sunary):
 510                     # set selected mask bit for 1<<r3 mode
 511                     src_shift = Signal(range(64))
 512                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 513                     sync += new_srcmask.eq(1 << src_shift)
 514                 with m.Else():
 515                     # invert mask if requested
 516                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 517                 m.next = "FETCH_PRED_SHIFT_MASK"
 518
 519             # fetch masks from the CR register file
 520             # implements the following loop:
 521             # idx, inv = get_predcr(mask)
 522             # mask = 0
 523             # for cr_idx in range(vl):
 524             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 525             #     if cr[idx] ^ inv:
 526             #         mask |= 1 << cr_idx
 527             # return mask
 528             with m.State("CR_READ"):
 529                 # CR index to be read, which will be ready by the next cycle
 530                 cr_idx = Signal.like(cur_vl, reset_less=True)
 531                 # submit the read operation to the regfile
 532                 with m.If(cr_idx != cur_vl):
 533                     # the CR read port is unary ...
 534                     # ren = 1 << cr_idx
 535                     # ... in MSB0 convention ...
 536                     # ren = 1 << (7 - cr_idx)
 537                     # ... and with an offset:
 538                     # ren = 1 << (7 - off - cr_idx)
 539                     idx = SVP64CROffs.CRPred + cr_idx
 540                     comb += cr_pred.ren.eq(1 << (7 - idx))
 541                     # signal data valid in the next cycle
 542                     cr_read = Signal(reset_less=True)
 543                     sync += cr_read.eq(1)
 544                     # load the next index
 545                     sync += cr_idx.eq(cr_idx + 1)
 546                 with m.Else():
 547                     # exit on loop end
 548                     sync += cr_read.eq(0)
 549                     sync += cr_idx.eq(0)
 550                     m.next = "FETCH_PRED_SHIFT_MASK"
 551                 with m.If(cr_read):
 552                     # compensate for the one cycle delay on the regfile
 553                     cur_cr_idx = Signal.like(cur_vl)
 554                     comb += cur_cr_idx.eq(cr_idx - 1)
 555                     # read the CR field, select the appropriate bit
 556                     cr_field = Signal(4)
 557                     scr_bit = Signal()
 558                     dcr_bit = Signal()
 559                     comb += cr_field.eq(cr_pred.o_data)
 560                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 561                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 562                     # set the corresponding mask bit
 563                     bit_to_set = Signal.like(self.srcmask)
 564                     comb += bit_to_set.eq(1 << cur_cr_idx)
 565                     with m.If(scr_bit):
 566                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 567                     with m.If(dcr_bit):
 568                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 569
 570             with m.State("FETCH_PRED_SHIFT_MASK"):
 571                 # shift-out skipped mask bits
 572                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 573                 sync += self.dstmask.eq(new_dstmask >> dststep)
 574                 m.next = "FETCH_PRED_DONE"
 575
 576             with m.State("FETCH_PRED_DONE"):
 577                 comb += pred_mask_o_valid.eq(1)
 578                 with m.If(pred_mask_i_ready):
 579                     m.next = "FETCH_PRED_IDLE"
 580
 581     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 582                   dbg, core_rst, is_svp64_mode,
 583                   fetch_pc_o_ready, fetch_pc_i_valid,
 584                   fetch_insn_o_valid, fetch_insn_i_ready,
 585                   pred_insn_i_valid, pred_insn_o_ready,
 586                   pred_mask_o_valid, pred_mask_i_ready,
 587                   exec_insn_i_valid, exec_insn_o_ready,
 588                   exec_pc_o_valid, exec_pc_i_ready):
 589         """issue FSM
 590
 591         decode / issue FSM.  this interacts with the "fetch" FSM
 592         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 593         (outgoing). also interacts with the "execute" FSM
 594         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 595         (incoming).
 596         SVP64 RM prefixes have already been set up by the
 597         "fetch" phase, so execute is fairly straightforward.
 598         """
 599
 600         comb = m.d.comb
 601         sync = m.d.sync
 602         pdecode2 = self.pdecode2
 603         cur_state = self.cur_state
 604
 605         # temporaries
 606         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 607
 608         # for updating svstate (things like srcstep etc.)
 609         update_svstate = Signal() # set this (below) if updating
 610         new_svstate = SVSTATERec("new_svstate")
 611         comb += new_svstate.eq(cur_state.svstate)
 612
 613         # precalculate srcstep+1 and dststep+1
 614         cur_srcstep = cur_state.svstate.srcstep
 615         cur_dststep = cur_state.svstate.dststep
 616         next_srcstep = Signal.like(cur_srcstep)
 617         next_dststep = Signal.like(cur_dststep)
 618         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 619         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 620
 621         # note if an exception happened.  in a pipelined or OoO design
 622         # this needs to be accompanied by "shadowing" (or stalling)
 623         exc_happened = self.core.o.exc_happened
 624
 625         with m.FSM(name="issue_fsm"):
 626
 627             # sync with the "fetch" phase which is reading the instruction
 628             # at this point, there is no instruction running, that
 629             # could inadvertently update the PC.
 630             with m.State("ISSUE_START"):
 631                 # wait on "core stop" release, before next fetch
 632                 # need to do this here, in case we are in a VL==0 loop
 633                 with m.If(~dbg.core_stop_o & ~core_rst):
 634                     comb += fetch_pc_i_valid.eq(1) # tell fetch to start
 635                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 636                         m.next = "INSN_WAIT"
 637                 with m.Else():
 638                     # tell core it's stopped, and acknowledge debug handshake
 639                     comb += dbg.core_stopped_i.eq(1)
 640                     # while stopped, allow updating the PC and SVSTATE
 641                     with m.If(self.pc_i.ok):
 642                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 643                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 644                         sync += pc_changed.eq(1)
 645                     with m.If(self.svstate_i.ok):
 646                         comb += new_svstate.eq(self.svstate_i.data)
 647                         comb += update_svstate.eq(1)
 648                         sync += sv_changed.eq(1)
 649
 650             # wait for an instruction to arrive from Fetch
 651             with m.State("INSN_WAIT"):
 652                 if self.allow_overlap:
 653                     stopping = dbg.stopping_o
 654                 else:
 655                     stopping = Const(0)
 656                 with m.If(stopping):
 657                     # stopping: jump back to idle
 658                     m.next = "ISSUE_START"
 659                 with m.Else():
 660                     comb += fetch_insn_i_ready.eq(1)
 661                     with m.If(fetch_insn_o_valid):
 662                         # loop into ISSUE_START if it's a SVP64 instruction
 663                         # and VL == 0.  this because VL==0 is a for-loop
 664                         # from 0 to 0 i.e. always, always a NOP.
 665                         cur_vl = cur_state.svstate.vl
 666                         with m.If(is_svp64_mode & (cur_vl == 0)):
 667                             # update the PC before fetching the next instruction
 668                             # since we are in a VL==0 loop, no instruction was
 669                             # executed that we could be overwriting
 670                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 671                             comb += self.state_w_pc.i_data.eq(nia)
 672                             comb += self.insn_done.eq(1)
 673                             m.next = "ISSUE_START"
 674                         with m.Else():
 675                             if self.svp64_en:
 676                                 m.next = "PRED_START"  # fetching predicate
 677                             else:
 678                                 m.next = "DECODE_SV"  # skip predication
 679
 680             with m.State("PRED_START"):
 681                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 682                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 683                     m.next = "MASK_WAIT"
 684
 685             with m.State("MASK_WAIT"):
 686                 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
 687                 with m.If(pred_mask_o_valid): # predication masks are ready
 688                     m.next = "PRED_SKIP"
 689
 690             # skip zeros in predicate
 691             with m.State("PRED_SKIP"):
 692                 with m.If(~is_svp64_mode):
 693                     m.next = "DECODE_SV"  # nothing to do
 694                 with m.Else():
 695                     if self.svp64_en:
 696                         pred_src_zero = pdecode2.rm_dec.pred_sz
 697                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 698
 699                         # new srcstep, after skipping zeros
 700                         skip_srcstep = Signal.like(cur_srcstep)
 701                         # value to be added to the current srcstep
 702                         src_delta = Signal.like(cur_srcstep)
 703                         # add leading zeros to srcstep, if not in zero mode
 704                         with m.If(~pred_src_zero):
 705                             # priority encoder (count leading zeros)
 706                             # append guard bit, in case the mask is all zeros
 707                             pri_enc_src = PriorityEncoder(65)
 708                             m.submodules.pri_enc_src = pri_enc_src
 709                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 710                                                          Const(1, 1)))
 711                             comb += src_delta.eq(pri_enc_src.o)
 712                         # apply delta to srcstep
 713                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 714                         # shift-out all leading zeros from the mask
 715                         # plus the leading "one" bit
 716                         # TODO count leading zeros and shift-out the zero
 717                         #      bits, in the same step, in hardware
 718                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 719
 720                         # same as above, but for dststep
 721                         skip_dststep = Signal.like(cur_dststep)
 722                         dst_delta = Signal.like(cur_dststep)
 723                         with m.If(~pred_dst_zero):
 724                             pri_enc_dst = PriorityEncoder(65)
 725                             m.submodules.pri_enc_dst = pri_enc_dst
 726                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 727                                                          Const(1, 1)))
 728                             comb += dst_delta.eq(pri_enc_dst.o)
 729                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 730                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 731
 732                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 733                         with m.If((skip_srcstep >= cur_vl) |
 734                                   (skip_dststep >= cur_vl)):
 735                             # end of VL loop. Update PC and reset src/dst step
 736                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 737                             comb += self.state_w_pc.i_data.eq(nia)
 738                             comb += new_svstate.srcstep.eq(0)
 739                             comb += new_svstate.dststep.eq(0)
 740                             comb += update_svstate.eq(1)
 741                             # synchronize with the simulator
 742                             comb += self.insn_done.eq(1)
 743                             # go back to Issue
 744                             m.next = "ISSUE_START"
 745                         with m.Else():
 746                             # update new src/dst step
 747                             comb += new_svstate.srcstep.eq(skip_srcstep)
 748                             comb += new_svstate.dststep.eq(skip_dststep)
 749                             comb += update_svstate.eq(1)
 750                             # proceed to Decode
 751                             m.next = "DECODE_SV"
 752
 753                         # pass predicate mask bits through to satellite decoders
 754                         # TODO: for SIMD this will be *multiple* bits
 755                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 756                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 757
 758             # after src/dst step have been updated, we are ready
 759             # to decode the instruction
 760             with m.State("DECODE_SV"):
 761                 # decode the instruction
 762                 sync += core.i.e.eq(pdecode2.e)
 763                 sync += core.i.state.eq(cur_state)
 764                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 765                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 766                 if self.svp64_en:
 767                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 768                     # set RA_OR_ZERO detection in satellite decoders
 769                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 770                     # and svp64 detection
 771                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 772                     # and svp64 bit-rev'd ldst mode
 773                     ldst_dec = pdecode2.use_svp64_ldst_dec
 774                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 775                 # after decoding, reset any previous exception condition,
 776                 # allowing it to be set again during the next execution
 777                 sync += pdecode2.ldst_exc.eq(0)
 778
 779                 m.next = "INSN_EXECUTE"  # move to "execute"
 780
 781             # handshake with execution FSM, move to "wait" once acknowledged
 782             with m.State("INSN_EXECUTE"):
 783                 comb += exec_insn_i_valid.eq(1) # trigger execute
 784                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 785                     m.next = "EXECUTE_WAIT"
 786
 787             with m.State("EXECUTE_WAIT"):
 788                 # wait on "core stop" release, at instruction end
 789                 # need to do this here, in case we are in a VL>1 loop
 790                 with m.If(~dbg.core_stop_o & ~core_rst):
 791                     comb += exec_pc_i_ready.eq(1)
 792                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 793                     # the exception info needs to be blatted into
 794                     # pdecode.ldst_exc, and the instruction "re-run".
 795                     # when ldst_exc.happened is set, the PowerDecoder2
 796                     # reacts very differently: it re-writes the instruction
 797                     # with a "trap" (calls PowerDecoder2.trap()) which
 798                     # will *overwrite* whatever was requested and jump the
 799                     # PC to the exception address, as well as alter MSR.
 800                     # nothing else needs to be done other than to note
 801                     # the change of PC and MSR (and, later, SVSTATE)
 802                     with m.If(exc_happened):
 803                         sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
 804
 805                     with m.If(exec_pc_o_valid):
 806
 807                         # was this the last loop iteration?
 808                         is_last = Signal()
 809                         cur_vl = cur_state.svstate.vl
 810                         comb += is_last.eq(next_srcstep == cur_vl)
 811
 812                         # return directly to Decode if Execute generated an
 813                         # exception.
 814                         with m.If(pdecode2.ldst_exc.happened):
 815                             m.next = "DECODE_SV"
 816
 817                         # if either PC or SVSTATE were changed by the previous
 818                         # instruction, go directly back to Fetch, without
 819                         # updating either PC or SVSTATE
 820                         with m.Elif(pc_changed | sv_changed):
 821                             m.next = "ISSUE_START"
 822
 823                         # also return to Fetch, when no output was a vector
 824                         # (regardless of SRCSTEP and VL), or when the last
 825                         # instruction was really the last one of the VL loop
 826                         with m.Elif((~pdecode2.loop_continue) | is_last):
 827                             # before going back to fetch, update the PC state
 828                             # register with the NIA.
 829                             # ok here we are not reading the branch unit.
 830                             # TODO: this just blithely overwrites whatever
 831                             #       pipeline updated the PC
 832                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 833                             comb += self.state_w_pc.i_data.eq(nia)
 834                             # reset SRCSTEP before returning to Fetch
 835                             if self.svp64_en:
 836                                 with m.If(pdecode2.loop_continue):
 837                                     comb += new_svstate.srcstep.eq(0)
 838                                     comb += new_svstate.dststep.eq(0)
 839                                     comb += update_svstate.eq(1)
 840                             else:
 841                                 comb += new_svstate.srcstep.eq(0)
 842                                 comb += new_svstate.dststep.eq(0)
 843                                 comb += update_svstate.eq(1)
 844                             m.next = "ISSUE_START"
 845
 846                         # returning to Execute? then, first update SRCSTEP
 847                         with m.Else():
 848                             comb += new_svstate.srcstep.eq(next_srcstep)
 849                             comb += new_svstate.dststep.eq(next_dststep)
 850                             comb += update_svstate.eq(1)
 851                             # return to mask skip loop
 852                             m.next = "PRED_SKIP"
 853
 854                 with m.Else():
 855                     comb += dbg.core_stopped_i.eq(1)
 856                     # while stopped, allow updating the PC and SVSTATE
 857                     with m.If(self.pc_i.ok):
 858                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 859                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 860                         sync += pc_changed.eq(1)
 861                     with m.If(self.svstate_i.ok):
 862                         comb += new_svstate.eq(self.svstate_i.data)
 863                         comb += update_svstate.eq(1)
 864                         sync += sv_changed.eq(1)
 865
 866         # check if svstate needs updating: if so, write it to State Regfile
 867         with m.If(update_svstate):
 868             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 869             comb += self.state_w_sv.i_data.eq(new_svstate)
 870             sync += cur_state.svstate.eq(new_svstate) # for next clock
 871
 872     def execute_fsm(self, m, core, pc_changed, sv_changed,
 873                     exec_insn_i_valid, exec_insn_o_ready,
 874                     exec_pc_o_valid, exec_pc_i_ready):
 875         """execute FSM
 876
 877         execute FSM. this interacts with the "issue" FSM
 878         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 879         (outgoing). SVP64 RM prefixes have already been set up by the
 880         "issue" phase, so execute is fairly straightforward.
 881         """
 882
 883         comb = m.d.comb
 884         sync = m.d.sync
 885         pdecode2 = self.pdecode2
 886
 887         # temporaries
 888         core_busy_o = core.n.o_data.busy_o # core is busy
 889         core_ivalid_i = core.p.i_valid              # instruction is valid
 890
 891         with m.FSM(name="exec_fsm"):
 892
 893             # waiting for instruction bus (stays there until not busy)
 894             with m.State("INSN_START"):
 895                 comb += exec_insn_o_ready.eq(1)
 896                 with m.If(exec_insn_i_valid):
 897                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 898                     sync += sv_changed.eq(0)
 899                     sync += pc_changed.eq(0)
 900                     with m.If(core.p.o_ready): # only move if accepted
 901                         m.next = "INSN_ACTIVE"  # move to "wait completion"
 902
 903             # instruction started: must wait till it finishes
 904             with m.State("INSN_ACTIVE"):
 905                 # note changes to PC and SVSTATE
 906                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 907                     sync += sv_changed.eq(1)
 908                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 909                     sync += pc_changed.eq(1)
 910                 with m.If(~core_busy_o): # instruction done!
 911                     comb += exec_pc_o_valid.eq(1)
 912                     with m.If(exec_pc_i_ready):
 913                         # when finished, indicate "done".
 914                         # however, if there was an exception, the instruction
 915                         # is *not* yet done.  this is an implementation
 916                         # detail: we choose to implement exceptions by
 917                         # taking the exception information from the LDST
 918                         # unit, putting that *back* into the PowerDecoder2,
 919                         # and *re-running the entire instruction*.
 920                         # if we erroneously indicate "done" here, it is as if
 921                         # there were *TWO* instructions:
 922                         # 1) the failed LDST 2) a TRAP.
 923                         with m.If(~pdecode2.ldst_exc.happened):
 924                             comb += self.insn_done.eq(1)
 925                         m.next = "INSN_START"  # back to fetch
 926
 927     def setup_peripherals(self, m):
 928         comb, sync = m.d.comb, m.d.sync
 929
 930         # okaaaay so the debug module must be in coresync clock domain
 931         # but NOT its reset signal. to cope with this, set every single
 932         # submodule explicitly in coresync domain, debug and JTAG
 933         # in their own one but using *external* reset.
 934         csd = DomainRenamer("coresync")
 935         dbd = DomainRenamer(self.dbg_domain)
 936
 937         m.submodules.core = core = csd(self.core)
 938         m.submodules.imem = imem = csd(self.imem)
 939         m.submodules.dbg = dbg = dbd(self.dbg)
 940         if self.jtag_en:
 941             m.submodules.jtag = jtag = dbd(self.jtag)
 942             # TODO: UART2GDB mux, here, from external pin
 943             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 944             sync += dbg.dmi.connect_to(jtag.dmi)
 945
 946         cur_state = self.cur_state
 947
 948         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 949         if self.sram4x4k:
 950             for i, sram in enumerate(self.sram4k):
 951                 m.submodules["sram4k_%d" % i] = csd(sram)
 952                 comb += sram.enable.eq(self.wb_sram_en)
 953
 954         # XICS interrupt handler
 955         if self.xics:
 956             m.submodules.xics_icp = icp = csd(self.xics_icp)
 957             m.submodules.xics_ics = ics = csd(self.xics_ics)
 958             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 959             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 960
 961         # GPIO test peripheral
 962         if self.gpio:
 963             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 964
 965         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 966         # XXX causes litex ECP5 test to get wrong idea about input and output
 967         # (but works with verilator sim *sigh*)
 968         #if self.gpio and self.xics:
 969         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 970
 971         # instruction decoder
 972         pdecode = create_pdecode()
 973         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 974         if self.svp64_en:
 975             m.submodules.svp64 = svp64 = csd(self.svp64)
 976
 977         # convenience
 978         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 979         intrf = self.core.regs.rf['int']
 980
 981         # clock delay power-on reset
 982         cd_por  = ClockDomain(reset_less=True)
 983         cd_sync = ClockDomain()
 984         core_sync = ClockDomain("coresync")
 985         m.domains += cd_por, cd_sync, core_sync
 986         if self.dbg_domain != "sync":
 987             dbg_sync = ClockDomain(self.dbg_domain)
 988             m.domains += dbg_sync
 989
 990         ti_rst = Signal(reset_less=True)
 991         delay = Signal(range(4), reset=3)
 992         with m.If(delay != 0):
 993             m.d.por += delay.eq(delay - 1)
 994         comb += cd_por.clk.eq(ClockSignal())
 995
 996         # power-on reset delay
 997         core_rst = ResetSignal("coresync")
 998         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 999         comb += core_rst.eq(ti_rst)
1000
1001         # debug clock is same as coresync, but reset is *main external*
1002         if self.dbg_domain != "sync":
1003             dbg_rst = ResetSignal(self.dbg_domain)
1004             comb += dbg_rst.eq(ResetSignal())
1005
1006         # busy/halted signals from core
1007         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1008         comb += self.busy_o.eq(core_busy_o)
1009         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1010
1011         # temporary hack: says "go" immediately for both address gen and ST
1012         l0 = core.l0
1013         ldst = core.fus.fus['ldst0']
1014         st_go_edge = rising_edge(m, ldst.st.rel_o)
1015         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
1016         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1017
1018     def elaborate(self, platform):
1019         m = Module()
1020         # convenience
1021         comb, sync = m.d.comb, m.d.sync
1022         cur_state = self.cur_state
1023         pdecode2 = self.pdecode2
1024         dbg = self.dbg
1025         core = self.core
1026
1027         # set up peripherals and core
1028         core_rst = self.core_rst
1029         self.setup_peripherals(m)
1030
1031         # reset current state if core reset requested
1032         with m.If(core_rst):
1033             m.d.sync += self.cur_state.eq(0)
1034
1035         # PC and instruction from I-Memory
1036         comb += self.pc_o.eq(cur_state.pc)
1037         pc_changed = Signal() # note write to PC
1038         sv_changed = Signal() # note write to SVSTATE
1039
1040         # indicate to outside world if any FU is still executing
1041         comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1042
1043         # read state either from incoming override or from regfile
1044         # TODO: really should be doing MSR in the same way
1045         pc = state_get(m, core_rst, self.pc_i,
1046                             "pc",                  # read PC
1047                             self.state_r_pc, StateRegs.PC)
1048         svstate = state_get(m, core_rst, self.svstate_i,
1049                             "svstate",   # read SVSTATE
1050                             self.state_r_sv, StateRegs.SVSTATE)
1051
1052         # don't write pc every cycle
1053         comb += self.state_w_pc.wen.eq(0)
1054         comb += self.state_w_pc.i_data.eq(0)
1055
1056         # don't read msr every cycle
1057         comb += self.state_r_msr.ren.eq(0)
1058
1059         # address of the next instruction, in the absence of a branch
1060         # depends on the instruction size
1061         nia = Signal(64)
1062
1063         # connect up debug signals
1064         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1065         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1066         comb += dbg.state.pc.eq(pc)
1067         comb += dbg.state.svstate.eq(svstate)
1068         comb += dbg.state.msr.eq(cur_state.msr)
1069
1070         # pass the prefix mode from Fetch to Issue, so the latter can loop
1071         # on VL==0
1072         is_svp64_mode = Signal()
1073
1074         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1075         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1076         # these are the handshake signals between each
1077
1078         # fetch FSM can run as soon as the PC is valid
1079         fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1080         fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1081
1082         # fetch FSM hands over the instruction to be decoded / issued
1083         fetch_insn_o_valid = Signal()
1084         fetch_insn_i_ready = Signal()
1085
1086         # predicate fetch FSM decodes and fetches the predicate
1087         pred_insn_i_valid = Signal()
1088         pred_insn_o_ready = Signal()
1089
1090         # predicate fetch FSM delivers the masks
1091         pred_mask_o_valid = Signal()
1092         pred_mask_i_ready = Signal()
1093
1094         # issue FSM delivers the instruction to the be executed
1095         exec_insn_i_valid = Signal()
1096         exec_insn_o_ready = Signal()
1097
1098         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1099         exec_pc_o_valid = Signal()
1100         exec_pc_i_ready = Signal()
1101
1102         # the FSMs here are perhaps unusual in that they detect conditions
1103         # then "hold" information, combinatorially, for the core
1104         # (as opposed to using sync - which would be on a clock's delay)
1105         # this includes the actual opcode, valid flags and so on.
1106
1107         # Fetch, then predicate fetch, then Issue, then Execute.
1108         # Issue is where the VL for-loop # lives.  the ready/valid
1109         # signalling is used to communicate between the four.
1110
1111         self.fetch_fsm(m, dbg, core, pc, svstate, nia, is_svp64_mode,
1112                        fetch_pc_o_ready, fetch_pc_i_valid,
1113                        fetch_insn_o_valid, fetch_insn_i_ready)
1114
1115         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1116                        dbg, core_rst, is_svp64_mode,
1117                        fetch_pc_o_ready, fetch_pc_i_valid,
1118                        fetch_insn_o_valid, fetch_insn_i_ready,
1119                        pred_insn_i_valid, pred_insn_o_ready,
1120                        pred_mask_o_valid, pred_mask_i_ready,
1121                        exec_insn_i_valid, exec_insn_o_ready,
1122                        exec_pc_o_valid, exec_pc_i_ready)
1123
1124         if self.svp64_en:
1125             self.fetch_predicate_fsm(m,
1126                                      pred_insn_i_valid, pred_insn_o_ready,
1127                                      pred_mask_o_valid, pred_mask_i_ready)
1128
1129         self.execute_fsm(m, core, pc_changed, sv_changed,
1130                          exec_insn_i_valid, exec_insn_o_ready,
1131                          exec_pc_o_valid, exec_pc_i_ready)
1132
1133         # whatever was done above, over-ride it if core reset is held
1134         with m.If(core_rst):
1135             sync += nia.eq(0)
1136
1137         # this bit doesn't have to be in the FSM: connect up to read
1138         # regfiles on demand from DMI
1139         self.do_dmi(m, dbg)
1140
1141         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1142         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1143         self.tb_dec_fsm(m, cur_state.dec)
1144
1145         return m
1146
1147     def do_dmi(self, m, dbg):
1148         """deals with DMI debug requests
1149
1150         currently only provides read requests for the INT regfile, CR and XER
1151         it will later also deal with *writing* to these regfiles.
1152         """
1153         comb = m.d.comb
1154         sync = m.d.sync
1155         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1156         intrf = self.core.regs.rf['int']
1157
1158         with m.If(d_reg.req): # request for regfile access being made
1159             # TODO: error-check this
1160             # XXX should this be combinatorial?  sync better?
1161             if intrf.unary:
1162                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1163             else:
1164                 comb += self.int_r.addr.eq(d_reg.addr)
1165                 comb += self.int_r.ren.eq(1)
1166         d_reg_delay  = Signal()
1167         sync += d_reg_delay.eq(d_reg.req)
1168         with m.If(d_reg_delay):
1169             # data arrives one clock later
1170             comb += d_reg.data.eq(self.int_r.o_data)
1171             comb += d_reg.ack.eq(1)
1172
1173         # sigh same thing for CR debug
1174         with m.If(d_cr.req): # request for regfile access being made
1175             comb += self.cr_r.ren.eq(0b11111111) # enable all
1176         d_cr_delay  = Signal()
1177         sync += d_cr_delay.eq(d_cr.req)
1178         with m.If(d_cr_delay):
1179             # data arrives one clock later
1180             comb += d_cr.data.eq(self.cr_r.o_data)
1181             comb += d_cr.ack.eq(1)
1182
1183         # aaand XER...
1184         with m.If(d_xer.req): # request for regfile access being made
1185             comb += self.xer_r.ren.eq(0b111111) # enable all
1186         d_xer_delay  = Signal()
1187         sync += d_xer_delay.eq(d_xer.req)
1188         with m.If(d_xer_delay):
1189             # data arrives one clock later
1190             comb += d_xer.data.eq(self.xer_r.o_data)
1191             comb += d_xer.ack.eq(1)
1192
1193     def tb_dec_fsm(self, m, spr_dec):
1194         """tb_dec_fsm
1195
1196         this is a FSM for updating either dec or tb.  it runs alternately
1197         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1198         value to DEC, however the regfile has "passthrough" on it so this
1199         *should* be ok.
1200
1201         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1202         """
1203
1204         comb, sync = m.d.comb, m.d.sync
1205         fast_rf = self.core.regs.rf['fast']
1206         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1207         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1208
1209         with m.FSM() as fsm:
1210
1211             # initiates read of current DEC
1212             with m.State("DEC_READ"):
1213                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1214                 comb += fast_r_dectb.ren.eq(1)
1215                 m.next = "DEC_WRITE"
1216
1217             # waits for DEC read to arrive (1 cycle), updates with new value
1218             with m.State("DEC_WRITE"):
1219                 new_dec = Signal(64)
1220                 # TODO: MSR.LPCR 32-bit decrement mode
1221                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1222                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1223                 comb += fast_w_dectb.wen.eq(1)
1224                 comb += fast_w_dectb.i_data.eq(new_dec)
1225                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1226                 m.next = "TB_READ"
1227
1228             # initiates read of current TB
1229             with m.State("TB_READ"):
1230                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1231                 comb += fast_r_dectb.ren.eq(1)
1232                 m.next = "TB_WRITE"
1233
1234             # waits for read TB to arrive, initiates write of current TB
1235             with m.State("TB_WRITE"):
1236                 new_tb = Signal(64)
1237                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1238                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1239                 comb += fast_w_dectb.wen.eq(1)
1240                 comb += fast_w_dectb.i_data.eq(new_tb)
1241                 m.next = "DEC_READ"
1242
1243         return m
1244
1245     def __iter__(self):
1246         yield from self.pc_i.ports()
1247         yield self.pc_o
1248         yield self.memerr_o
1249         yield from self.core.ports()
1250         yield from self.imem.ports()
1251         yield self.core_bigendian_i
1252         yield self.busy_o
1253
1254     def ports(self):
1255         return list(self)
1256
1257     def external_ports(self):
1258         ports = self.pc_i.ports()
1259         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1260                 ]
1261
1262         if self.jtag_en:
1263             ports += list(self.jtag.external_ports())
1264         else:
1265             # don't add DMI if JTAG is enabled
1266             ports += list(self.dbg.dmi.ports())
1267
1268         ports += list(self.imem.ibus.fields.values())
1269         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1270
1271         if self.sram4x4k:
1272             for sram in self.sram4k:
1273                 ports += list(sram.bus.fields.values())
1274
1275         if self.xics:
1276             ports += list(self.xics_icp.bus.fields.values())
1277             ports += list(self.xics_ics.bus.fields.values())
1278             ports.append(self.int_level_i)
1279
1280         if self.gpio:
1281             ports += list(self.simple_gpio.bus.fields.values())
1282             ports.append(self.gpio_o)
1283
1284         return ports
1285
1286     def ports(self):
1287         return list(self)
1288
1289
1290 class TestIssuer(Elaboratable):
1291     def __init__(self, pspec):
1292         self.ti = TestIssuerInternal(pspec)
1293         self.pll = DummyPLL(instance=True)
1294
1295         # PLL direct clock or not
1296         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1297         if self.pll_en:
1298             self.pll_test_o = Signal(reset_less=True)
1299             self.pll_vco_o = Signal(reset_less=True)
1300             self.clk_sel_i = Signal(2, reset_less=True)
1301             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1302             self.pllclk_clk = ClockSignal("pllclk")
1303
1304     def elaborate(self, platform):
1305         m = Module()
1306         comb = m.d.comb
1307
1308         # TestIssuer nominally runs at main clock, actually it is
1309         # all combinatorial internally except for coresync'd components
1310         m.submodules.ti = ti = self.ti
1311
1312         if self.pll_en:
1313             # ClockSelect runs at PLL output internal clock rate
1314             m.submodules.wrappll = pll = self.pll
1315
1316             # add clock domains from PLL
1317             cd_pll = ClockDomain("pllclk")
1318             m.domains += cd_pll
1319
1320             # PLL clock established.  has the side-effect of running clklsel
1321             # at the PLL's speed (see DomainRenamer("pllclk") above)
1322             pllclk = self.pllclk_clk
1323             comb += pllclk.eq(pll.clk_pll_o)
1324
1325             # wire up external 24mhz to PLL
1326             #comb += pll.clk_24_i.eq(self.ref_clk)
1327             # output 18 mhz PLL test signal, and analog oscillator out
1328             comb += self.pll_test_o.eq(pll.pll_test_o)
1329             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1330
1331             # input to pll clock selection
1332             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1333
1334             # now wire up ResetSignals.  don't mind them being in this domain
1335             pll_rst = ResetSignal("pllclk")
1336             comb += pll_rst.eq(ResetSignal())
1337
1338         # internal clock is set to selector clock-out.  has the side-effect of
1339         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1340         # debug clock runs at coresync internal clock
1341         cd_coresync = ClockDomain("coresync")
1342         #m.domains += cd_coresync
1343         if self.ti.dbg_domain != 'sync':
1344             cd_dbgsync = ClockDomain("dbgsync")
1345             #m.domains += cd_dbgsync
1346         intclk = ClockSignal("coresync")
1347         dbgclk = ClockSignal(self.ti.dbg_domain)
1348         # XXX BYPASS PLL XXX
1349         # XXX BYPASS PLL XXX
1350         # XXX BYPASS PLL XXX
1351         if self.pll_en:
1352             comb += intclk.eq(self.ref_clk)
1353         else:
1354             comb += intclk.eq(ClockSignal())
1355         if self.ti.dbg_domain != 'sync':
1356             dbgclk = ClockSignal(self.ti.dbg_domain)
1357             comb += dbgclk.eq(intclk)
1358
1359         return m
1360
1361     def ports(self):
1362         return list(self.ti.ports()) + list(self.pll.ports()) + \
1363                [ClockSignal(), ResetSignal()]
1364
1365     def external_ports(self):
1366         ports = self.ti.external_ports()
1367         ports.append(ClockSignal())
1368         ports.append(ResetSignal())
1369         if self.pll_en:
1370             ports.append(self.clk_sel_i)
1371             ports.append(self.pll.clk_24_i)
1372             ports.append(self.pll_test_o)
1373             ports.append(self.pll_vco_o)
1374             ports.append(self.pllclk_clk)
1375             ports.append(self.ref_clk)
1376         return ports
1377
1378
1379 if __name__ == '__main__':
1380     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1381              'spr': 1,
1382              'div': 1,
1383              'mul': 1,
1384              'shiftrot': 1
1385             }
1386     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1387                          imem_ifacetype='bare_wb',
1388                          addr_wid=48,
1389                          mask_wid=8,
1390                          reg_wid=64,
1391                          units=units)
1392     dut = TestIssuer(pspec)
1393     vl = main(dut, ports=dut.ports(), name="test_issuer")
1394
1395     if len(sys.argv) == 1:
1396         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1397         with open("test_issuer.il", "w") as f:
1398             f.write(vl)