src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                      SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51
  52
  53 from nmutil.util import rising_edge
  54
  55 def get_insn(f_instr_o, pc):
  56     if f_instr_o.width == 32:
  57         return f_instr_o
  58     else:
  59         # 64-bit: bit 2 of pc decides which word to select
  60         return f_instr_o.word_select(pc[2], 32)
  61
  62 # gets state input or reads from state regfile
  63 def state_get(m, core_rst, state_i, name, regfile, regnum):
  64     comb = m.d.comb
  65     sync = m.d.sync
  66     # read the PC
  67     res = Signal(64, reset_less=True, name=name)
  68     res_ok_delay = Signal(name="%s_ok_delay" % name)
  69     with m.If(~core_rst):
  70         sync += res_ok_delay.eq(~state_i.ok)
  71         with m.If(state_i.ok):
  72             # incoming override (start from pc_i)
  73             comb += res.eq(state_i.data)
  74         with m.Else():
  75             # otherwise read StateRegs regfile for PC...
  76             comb += regfile.ren.eq(1<<regnum)
  77         # ... but on a 1-clock delay
  78         with m.If(res_ok_delay):
  79             comb += res.eq(regfile.o_data)
  80     return res
  81
  82
  83 def get_predint(m, mask, name):
  84     """decode SVP64 predicate integer mask field to reg number and invert
  85     this is identical to the equivalent function in ISACaller except that
  86     it doesn't read the INT directly, it just decodes "what needs to be done"
  87     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  88
  89     * all1s is set to indicate that no mask is to be applied.
  90     * regread indicates the GPR register number to be read
  91     * invert is set to indicate that the register value is to be inverted
  92     * unary indicates that the contents of the register is to be shifted 1<<r3
  93     """
  94     comb = m.d.comb
  95     regread = Signal(5, name=name+"regread")
  96     invert = Signal(name=name+"invert")
  97     unary = Signal(name=name+"unary")
  98     all1s = Signal(name=name+"all1s")
  99     with m.Switch(mask):
 100         with m.Case(SVP64PredInt.ALWAYS.value):
 101             comb += all1s.eq(1)      # use 0b1111 (all ones)
 102         with m.Case(SVP64PredInt.R3_UNARY.value):
 103             comb += regread.eq(3)
 104             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 105         with m.Case(SVP64PredInt.R3.value):
 106             comb += regread.eq(3)
 107         with m.Case(SVP64PredInt.R3_N.value):
 108             comb += regread.eq(3)
 109             comb += invert.eq(1)
 110         with m.Case(SVP64PredInt.R10.value):
 111             comb += regread.eq(10)
 112         with m.Case(SVP64PredInt.R10_N.value):
 113             comb += regread.eq(10)
 114             comb += invert.eq(1)
 115         with m.Case(SVP64PredInt.R30.value):
 116             comb += regread.eq(30)
 117         with m.Case(SVP64PredInt.R30_N.value):
 118             comb += regread.eq(30)
 119             comb += invert.eq(1)
 120     return regread, invert, unary, all1s
 121
 122
 123 def get_predcr(m, mask, name):
 124     """decode SVP64 predicate CR to reg number field and invert status
 125     this is identical to _get_predcr in ISACaller
 126     """
 127     comb = m.d.comb
 128     idx = Signal(2, name=name+"idx")
 129     invert = Signal(name=name+"crinvert")
 130     with m.Switch(mask):
 131         with m.Case(SVP64PredCR.LT.value):
 132             comb += idx.eq(CR.LT)
 133             comb += invert.eq(0)
 134         with m.Case(SVP64PredCR.GE.value):
 135             comb += idx.eq(CR.LT)
 136             comb += invert.eq(1)
 137         with m.Case(SVP64PredCR.GT.value):
 138             comb += idx.eq(CR.GT)
 139             comb += invert.eq(0)
 140         with m.Case(SVP64PredCR.LE.value):
 141             comb += idx.eq(CR.GT)
 142             comb += invert.eq(1)
 143         with m.Case(SVP64PredCR.EQ.value):
 144             comb += idx.eq(CR.EQ)
 145             comb += invert.eq(0)
 146         with m.Case(SVP64PredCR.NE.value):
 147             comb += idx.eq(CR.EQ)
 148             comb += invert.eq(1)
 149         with m.Case(SVP64PredCR.SO.value):
 150             comb += idx.eq(CR.SO)
 151             comb += invert.eq(0)
 152         with m.Case(SVP64PredCR.NS.value):
 153             comb += idx.eq(CR.SO)
 154             comb += invert.eq(1)
 155     return idx, invert
 156
 157
 158 # Fetch Finite State Machine.
 159 # WARNING: there are currently DriverConflicts but it's actually working.
 160 class FetchFSM(ControlBase):
 161     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 162                        pdecode2, cur_state,
 163                        dbg, core, svstate, nia, is_svp64_mode):
 164         self.allow_overlap = allow_overlap
 165         self.svp64_en = svp64_en
 166         self.imem = imem
 167         self.core_rst = core_rst
 168         self.pdecode2 = pdecode2
 169         self.cur_state = cur_state
 170         self.dbg = dbg
 171         self.core = core
 172         self.svstate = svstate
 173         self.nia = nia
 174         self.is_svp64_mode = is_svp64_mode
 175
 176         # set up pipeline ControlBase and allocate i/o specs
 177         # (unusual: normally done by the Pipeline API)
 178         super().__init__(stage=self)
 179         self.p.i_data, self.n.o_data = self.new_specs(None)
 180         self.i, self.o = self.p.i_data, self.n.o_data
 181
 182     # next 3 functions are Stage API Compliance
 183     def setup(self, m, i):
 184         pass
 185
 186     def ispec(self):
 187         return FetchInput()
 188
 189     def ospec(self):
 190         return FetchOutput()
 191
 192     def elaborate(self, platform):
 193         """fetch FSM
 194
 195         this FSM performs fetch of raw instruction data, partial-decodes
 196         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 197         read a 2nd 32-bit quantity if that occurs.
 198         """
 199         m = super().elaborate(platform)
 200
 201         dbg = self.dbg
 202         core = self.core,
 203         pc = self.i.pc
 204         svstate = self.svstate
 205         nia = self.nia
 206         is_svp64_mode = self.is_svp64_mode
 207         fetch_pc_o_ready = self.p.o_ready
 208         fetch_pc_i_valid = self.p.i_valid
 209         fetch_insn_o_valid = self.n.o_valid
 210         fetch_insn_i_ready = self.n.i_ready
 211
 212         comb = m.d.comb
 213         sync = m.d.sync
 214         pdecode2 = self.pdecode2
 215         cur_state = self.cur_state
 216         dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
 217
 218         msr_read = Signal(reset=1)
 219
 220         # don't read msr every cycle
 221         staterf = self.core.regs.rf['state']
 222         state_r_msr = staterf.r_ports['msr'] # MSR rd
 223
 224         comb += state_r_msr.ren.eq(0)
 225
 226         with m.FSM(name='fetch_fsm'):
 227
 228             # waiting (zzz)
 229             with m.State("IDLE"):
 230                 with m.If(~dbg.stopping_o):
 231                     comb += fetch_pc_o_ready.eq(1)
 232                 with m.If(fetch_pc_i_valid):
 233                     # instruction allowed to go: start by reading the PC
 234                     # capture the PC and also drop it into Insn Memory
 235                     # we have joined a pair of combinatorial memory
 236                     # lookups together.  this is Generally Bad.
 237                     comb += self.imem.a_pc_i.eq(pc)
 238                     comb += self.imem.a_i_valid.eq(1)
 239                     comb += self.imem.f_i_valid.eq(1)
 240                     sync += cur_state.pc.eq(pc)
 241                     sync += cur_state.svstate.eq(svstate) # and svstate
 242
 243                     # initiate read of MSR. arrives one clock later
 244                     comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
 245                     sync += msr_read.eq(0)
 246
 247                     m.next = "INSN_READ"  # move to "wait for bus" phase
 248
 249             # dummy pause to find out why simulation is not keeping up
 250             with m.State("INSN_READ"):
 251                 if self.allow_overlap:
 252                     stopping = dbg.stopping_o
 253                 else:
 254                     stopping = Const(0)
 255                 with m.If(stopping):
 256                     # stopping: jump back to idle
 257                     m.next = "IDLE"
 258                 with m.Else():
 259                     # one cycle later, msr/sv read arrives.  valid only once.
 260                     with m.If(~msr_read):
 261                         sync += msr_read.eq(1) # yeah don't read it again
 262                         sync += cur_state.msr.eq(state_r_msr.o_data)
 263                     with m.If(self.imem.f_busy_o): # zzz...
 264                         # busy: stay in wait-read
 265                         comb += self.imem.a_i_valid.eq(1)
 266                         comb += self.imem.f_i_valid.eq(1)
 267                     with m.Else():
 268                         # not busy: instruction fetched
 269                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 270                         if self.svp64_en:
 271                             svp64 = self.svp64
 272                             # decode the SVP64 prefix, if any
 273                             comb += svp64.raw_opcode_in.eq(insn)
 274                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 275                             # pass the decoded prefix (if any) to PowerDecoder2
 276                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 277                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 278                             # remember whether this is a prefixed instruction,
 279                             # so the FSM can readily loop when VL==0
 280                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 281                             # calculate the address of the following instruction
 282                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 283                             sync += nia.eq(cur_state.pc + insn_size)
 284                             with m.If(~svp64.is_svp64_mode):
 285                                 # with no prefix, store the instruction
 286                                 # and hand it directly to the next FSM
 287                                 sync += dec_opcode_o.eq(insn)
 288                                 m.next = "INSN_READY"
 289                             with m.Else():
 290                                 # fetch the rest of the instruction from memory
 291                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 292                                 comb += self.imem.a_i_valid.eq(1)
 293                                 comb += self.imem.f_i_valid.eq(1)
 294                                 m.next = "INSN_READ2"
 295                         else:
 296                             # not SVP64 - 32-bit only
 297                             sync += nia.eq(cur_state.pc + 4)
 298                             sync += dec_opcode_o.eq(insn)
 299                             m.next = "INSN_READY"
 300
 301             with m.State("INSN_READ2"):
 302                 with m.If(self.imem.f_busy_o):  # zzz...
 303                     # busy: stay in wait-read
 304                     comb += self.imem.a_i_valid.eq(1)
 305                     comb += self.imem.f_i_valid.eq(1)
 306                 with m.Else():
 307                     # not busy: instruction fetched
 308                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 309                     sync += dec_opcode_o.eq(insn)
 310                     m.next = "INSN_READY"
 311                     # TODO: probably can start looking at pdecode2.rm_dec
 312                     # here or maybe even in INSN_READ state, if svp64_mode
 313                     # detected, in order to trigger - and wait for - the
 314                     # predicate reading.
 315                     if self.svp64_en:
 316                         pmode = pdecode2.rm_dec.predmode
 317                     """
 318                     if pmode != SVP64PredMode.ALWAYS.value:
 319                         fire predicate loading FSM and wait before
 320                         moving to INSN_READY
 321                     else:
 322                         sync += self.srcmask.eq(-1) # set to all 1s
 323                         sync += self.dstmask.eq(-1) # set to all 1s
 324                         m.next = "INSN_READY"
 325                     """
 326
 327             with m.State("INSN_READY"):
 328                 # hand over the instruction, to be decoded
 329                 comb += fetch_insn_o_valid.eq(1)
 330                 with m.If(fetch_insn_i_ready):
 331                     m.next = "IDLE"
 332
 333         # whatever was done above, over-ride it if core reset is held
 334         with m.If(self.core_rst):
 335             sync += nia.eq(0)
 336
 337         return m
 338
 339
 340 class TestIssuerInternal(Elaboratable):
 341     """TestIssuer - reads instructions from TestMemory and issues them
 342
 343     efficiency and speed is not the main goal here: functional correctness
 344     and code clarity is.  optimisations (which almost 100% interfere with
 345     easy understanding) come later.
 346     """
 347     def __init__(self, pspec):
 348
 349         # test is SVP64 is to be enabled
 350         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 351
 352         # and if regfiles are reduced
 353         self.regreduce_en = (hasattr(pspec, "regreduce") and
 354                                             (pspec.regreduce == True))
 355
 356         # and if overlap requested
 357         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 358                                             (pspec.allow_overlap == True))
 359
 360         # JTAG interface.  add this right at the start because if it's
 361         # added it *modifies* the pspec, by adding enable/disable signals
 362         # for parts of the rest of the core
 363         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 364         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 365         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 366         if self.jtag_en:
 367             # XXX MUST keep this up-to-date with litex, and
 368             # soc-cocotb-sim, and err.. all needs sorting out, argh
 369             subset = ['uart',
 370                       'mtwi',
 371                       'eint', 'gpio', 'mspi0',
 372                       # 'mspi1', - disabled for now
 373                       # 'pwm', 'sd0', - disabled for now
 374                        'sdr']
 375             self.jtag = JTAG(get_pinspecs(subset=subset),
 376                              domain=self.dbg_domain)
 377             # add signals to pspec to enable/disable icache and dcache
 378             # (or data and intstruction wishbone if icache/dcache not included)
 379             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 380             # TODO: do we actually care if these are not domain-synchronised?
 381             # honestly probably not.
 382             pspec.wb_icache_en = self.jtag.wb_icache_en
 383             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 384             self.wb_sram_en = self.jtag.wb_sram_en
 385         else:
 386             self.wb_sram_en = Const(1)
 387
 388         # add 4k sram blocks?
 389         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 390                          pspec.sram4x4kblock == True)
 391         if self.sram4x4k:
 392             self.sram4k = []
 393             for i in range(4):
 394                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 395                                                     #features={'err'}
 396                                                     ))
 397
 398         # add interrupt controller?
 399         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 400         if self.xics:
 401             self.xics_icp = XICS_ICP()
 402             self.xics_ics = XICS_ICS()
 403             self.int_level_i = self.xics_ics.int_level_i
 404
 405         # add GPIO peripheral?
 406         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 407         if self.gpio:
 408             self.simple_gpio = SimpleGPIO()
 409             self.gpio_o = self.simple_gpio.gpio_o
 410
 411         # main instruction core.  suitable for prototyping / demo only
 412         self.core = core = NonProductionCore(pspec)
 413         self.core_rst = ResetSignal("coresync")
 414
 415         # instruction decoder.  goes into Trap Record
 416         #pdecode = create_pdecode()
 417         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 418         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 419                                      opkls=IssuerDecode2ToOperand,
 420                                      svp64_en=self.svp64_en,
 421                                      regreduce_en=self.regreduce_en)
 422         pdecode = self.pdecode2.dec
 423
 424         if self.svp64_en:
 425             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 426
 427         # Test Instruction memory
 428         self.imem = ConfigFetchUnit(pspec).fu
 429
 430         # DMI interface
 431         self.dbg = CoreDebug()
 432
 433         # instruction go/monitor
 434         self.pc_o = Signal(64, reset_less=True)
 435         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 436         self.svstate_i = Data(64, "svstate_i") # ditto
 437         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 438         self.busy_o = Signal(reset_less=True)
 439         self.memerr_o = Signal(reset_less=True)
 440
 441         # STATE regfile read /write ports for PC, MSR, SVSTATE
 442         staterf = self.core.regs.rf['state']
 443         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 444         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 445         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 446         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 447
 448         # DMI interface access
 449         intrf = self.core.regs.rf['int']
 450         crrf = self.core.regs.rf['cr']
 451         xerrf = self.core.regs.rf['xer']
 452         self.int_r = intrf.r_ports['dmi'] # INT read
 453         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 454         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 455
 456         if self.svp64_en:
 457             # for predication
 458             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 459             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 460
 461         # hack method of keeping an eye on whether branch/trap set the PC
 462         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 463         self.state_nia.wen.name = 'state_nia_wen'
 464
 465         # pulse to synchronize the simulator at instruction end
 466         self.insn_done = Signal()
 467
 468         # indicate any instruction still outstanding, in execution
 469         self.any_busy = Signal()
 470
 471         if self.svp64_en:
 472             # store copies of predicate masks
 473             self.srcmask = Signal(64)
 474             self.dstmask = Signal(64)
 475
 476     def fetch_predicate_fsm(self, m,
 477                             pred_insn_i_valid, pred_insn_o_ready,
 478                             pred_mask_o_valid, pred_mask_i_ready):
 479         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 480            src/dest predicate masks
 481
 482         https://bugs.libre-soc.org/show_bug.cgi?id=617
 483         the predicates can be read here, by using IntRegs r_ports['pred']
 484         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 485         be done through multiple reads, extracting one relevant at a time.
 486         later, a faster way would be to use the 32-bit-wide CR port but
 487         this is more complex decoding, here.  equivalent code used in
 488         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 489
 490         note: this ENTIRE FSM is not to be called when svp64 is disabled
 491         """
 492         comb = m.d.comb
 493         sync = m.d.sync
 494         pdecode2 = self.pdecode2
 495         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 496         predmode = rm_dec.predmode
 497         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 498         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 499         # get src/dst step, so we can skip already used mask bits
 500         cur_state = self.cur_state
 501         srcstep = cur_state.svstate.srcstep
 502         dststep = cur_state.svstate.dststep
 503         cur_vl = cur_state.svstate.vl
 504
 505         # decode predicates
 506         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 507         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 508         sidx, scrinvert = get_predcr(m, srcpred, 's')
 509         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 510
 511         # store fetched masks, for either intpred or crpred
 512         # when src/dst step is not zero, the skipped mask bits need to be
 513         # shifted-out, before actually storing them in src/dest mask
 514         new_srcmask = Signal(64, reset_less=True)
 515         new_dstmask = Signal(64, reset_less=True)
 516
 517         with m.FSM(name="fetch_predicate"):
 518
 519             with m.State("FETCH_PRED_IDLE"):
 520                 comb += pred_insn_o_ready.eq(1)
 521                 with m.If(pred_insn_i_valid):
 522                     with m.If(predmode == SVP64PredMode.INT):
 523                         # skip fetching destination mask register, when zero
 524                         with m.If(dall1s):
 525                             sync += new_dstmask.eq(-1)
 526                             # directly go to fetch source mask register
 527                             # guaranteed not to be zero (otherwise predmode
 528                             # would be SVP64PredMode.ALWAYS, not INT)
 529                             comb += int_pred.addr.eq(sregread)
 530                             comb += int_pred.ren.eq(1)
 531                             m.next = "INT_SRC_READ"
 532                         # fetch destination predicate register
 533                         with m.Else():
 534                             comb += int_pred.addr.eq(dregread)
 535                             comb += int_pred.ren.eq(1)
 536                             m.next = "INT_DST_READ"
 537                     with m.Elif(predmode == SVP64PredMode.CR):
 538                         # go fetch masks from the CR register file
 539                         sync += new_srcmask.eq(0)
 540                         sync += new_dstmask.eq(0)
 541                         m.next = "CR_READ"
 542                     with m.Else():
 543                         sync += self.srcmask.eq(-1)
 544                         sync += self.dstmask.eq(-1)
 545                         m.next = "FETCH_PRED_DONE"
 546
 547             with m.State("INT_DST_READ"):
 548                 # store destination mask
 549                 inv = Repl(dinvert, 64)
 550                 with m.If(dunary):
 551                     # set selected mask bit for 1<<r3 mode
 552                     dst_shift = Signal(range(64))
 553                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 554                     sync += new_dstmask.eq(1 << dst_shift)
 555                 with m.Else():
 556                     # invert mask if requested
 557                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 558                 # skip fetching source mask register, when zero
 559                 with m.If(sall1s):
 560                     sync += new_srcmask.eq(-1)
 561                     m.next = "FETCH_PRED_SHIFT_MASK"
 562                 # fetch source predicate register
 563                 with m.Else():
 564                     comb += int_pred.addr.eq(sregread)
 565                     comb += int_pred.ren.eq(1)
 566                     m.next = "INT_SRC_READ"
 567
 568             with m.State("INT_SRC_READ"):
 569                 # store source mask
 570                 inv = Repl(sinvert, 64)
 571                 with m.If(sunary):
 572                     # set selected mask bit for 1<<r3 mode
 573                     src_shift = Signal(range(64))
 574                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 575                     sync += new_srcmask.eq(1 << src_shift)
 576                 with m.Else():
 577                     # invert mask if requested
 578                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 579                 m.next = "FETCH_PRED_SHIFT_MASK"
 580
 581             # fetch masks from the CR register file
 582             # implements the following loop:
 583             # idx, inv = get_predcr(mask)
 584             # mask = 0
 585             # for cr_idx in range(vl):
 586             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 587             #     if cr[idx] ^ inv:
 588             #         mask |= 1 << cr_idx
 589             # return mask
 590             with m.State("CR_READ"):
 591                 # CR index to be read, which will be ready by the next cycle
 592                 cr_idx = Signal.like(cur_vl, reset_less=True)
 593                 # submit the read operation to the regfile
 594                 with m.If(cr_idx != cur_vl):
 595                     # the CR read port is unary ...
 596                     # ren = 1 << cr_idx
 597                     # ... in MSB0 convention ...
 598                     # ren = 1 << (7 - cr_idx)
 599                     # ... and with an offset:
 600                     # ren = 1 << (7 - off - cr_idx)
 601                     idx = SVP64CROffs.CRPred + cr_idx
 602                     comb += cr_pred.ren.eq(1 << (7 - idx))
 603                     # signal data valid in the next cycle
 604                     cr_read = Signal(reset_less=True)
 605                     sync += cr_read.eq(1)
 606                     # load the next index
 607                     sync += cr_idx.eq(cr_idx + 1)
 608                 with m.Else():
 609                     # exit on loop end
 610                     sync += cr_read.eq(0)
 611                     sync += cr_idx.eq(0)
 612                     m.next = "FETCH_PRED_SHIFT_MASK"
 613                 with m.If(cr_read):
 614                     # compensate for the one cycle delay on the regfile
 615                     cur_cr_idx = Signal.like(cur_vl)
 616                     comb += cur_cr_idx.eq(cr_idx - 1)
 617                     # read the CR field, select the appropriate bit
 618                     cr_field = Signal(4)
 619                     scr_bit = Signal()
 620                     dcr_bit = Signal()
 621                     comb += cr_field.eq(cr_pred.o_data)
 622                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 623                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 624                     # set the corresponding mask bit
 625                     bit_to_set = Signal.like(self.srcmask)
 626                     comb += bit_to_set.eq(1 << cur_cr_idx)
 627                     with m.If(scr_bit):
 628                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 629                     with m.If(dcr_bit):
 630                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 631
 632             with m.State("FETCH_PRED_SHIFT_MASK"):
 633                 # shift-out skipped mask bits
 634                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 635                 sync += self.dstmask.eq(new_dstmask >> dststep)
 636                 m.next = "FETCH_PRED_DONE"
 637
 638             with m.State("FETCH_PRED_DONE"):
 639                 comb += pred_mask_o_valid.eq(1)
 640                 with m.If(pred_mask_i_ready):
 641                     m.next = "FETCH_PRED_IDLE"
 642
 643     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 644                   dbg, core_rst, is_svp64_mode,
 645                   fetch_pc_o_ready, fetch_pc_i_valid,
 646                   fetch_insn_o_valid, fetch_insn_i_ready,
 647                   pred_insn_i_valid, pred_insn_o_ready,
 648                   pred_mask_o_valid, pred_mask_i_ready,
 649                   exec_insn_i_valid, exec_insn_o_ready,
 650                   exec_pc_o_valid, exec_pc_i_ready):
 651         """issue FSM
 652
 653         decode / issue FSM.  this interacts with the "fetch" FSM
 654         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 655         (outgoing). also interacts with the "execute" FSM
 656         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 657         (incoming).
 658         SVP64 RM prefixes have already been set up by the
 659         "fetch" phase, so execute is fairly straightforward.
 660         """
 661
 662         comb = m.d.comb
 663         sync = m.d.sync
 664         pdecode2 = self.pdecode2
 665         cur_state = self.cur_state
 666
 667         # temporaries
 668         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 669
 670         # for updating svstate (things like srcstep etc.)
 671         update_svstate = Signal() # set this (below) if updating
 672         new_svstate = SVSTATERec("new_svstate")
 673         comb += new_svstate.eq(cur_state.svstate)
 674
 675         # precalculate srcstep+1 and dststep+1
 676         cur_srcstep = cur_state.svstate.srcstep
 677         cur_dststep = cur_state.svstate.dststep
 678         next_srcstep = Signal.like(cur_srcstep)
 679         next_dststep = Signal.like(cur_dststep)
 680         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 681         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 682
 683         # note if an exception happened.  in a pipelined or OoO design
 684         # this needs to be accompanied by "shadowing" (or stalling)
 685         exc_happened = self.core.o.exc_happened
 686
 687         with m.FSM(name="issue_fsm"):
 688
 689             # sync with the "fetch" phase which is reading the instruction
 690             # at this point, there is no instruction running, that
 691             # could inadvertently update the PC.
 692             with m.State("ISSUE_START"):
 693                 # wait on "core stop" release, before next fetch
 694                 # need to do this here, in case we are in a VL==0 loop
 695                 with m.If(~dbg.core_stop_o & ~core_rst):
 696                     comb += fetch_pc_i_valid.eq(1) # tell fetch to start
 697                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 698                         m.next = "INSN_WAIT"
 699                 with m.Else():
 700                     # tell core it's stopped, and acknowledge debug handshake
 701                     comb += dbg.core_stopped_i.eq(1)
 702                     # while stopped, allow updating the PC and SVSTATE
 703                     with m.If(self.pc_i.ok):
 704                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 705                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 706                         sync += pc_changed.eq(1)
 707                     with m.If(self.svstate_i.ok):
 708                         comb += new_svstate.eq(self.svstate_i.data)
 709                         comb += update_svstate.eq(1)
 710                         sync += sv_changed.eq(1)
 711
 712             # wait for an instruction to arrive from Fetch
 713             with m.State("INSN_WAIT"):
 714                 if self.allow_overlap:
 715                     stopping = dbg.stopping_o
 716                 else:
 717                     stopping = Const(0)
 718                 with m.If(stopping):
 719                     # stopping: jump back to idle
 720                     m.next = "ISSUE_START"
 721                 with m.Else():
 722                     comb += fetch_insn_i_ready.eq(1)
 723                     with m.If(fetch_insn_o_valid):
 724                         # loop into ISSUE_START if it's a SVP64 instruction
 725                         # and VL == 0.  this because VL==0 is a for-loop
 726                         # from 0 to 0 i.e. always, always a NOP.
 727                         cur_vl = cur_state.svstate.vl
 728                         with m.If(is_svp64_mode & (cur_vl == 0)):
 729                             # update the PC before fetching the next instruction
 730                             # since we are in a VL==0 loop, no instruction was
 731                             # executed that we could be overwriting
 732                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 733                             comb += self.state_w_pc.i_data.eq(nia)
 734                             comb += self.insn_done.eq(1)
 735                             m.next = "ISSUE_START"
 736                         with m.Else():
 737                             if self.svp64_en:
 738                                 m.next = "PRED_START"  # fetching predicate
 739                             else:
 740                                 m.next = "DECODE_SV"  # skip predication
 741
 742             with m.State("PRED_START"):
 743                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 744                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 745                     m.next = "MASK_WAIT"
 746
 747             with m.State("MASK_WAIT"):
 748                 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
 749                 with m.If(pred_mask_o_valid): # predication masks are ready
 750                     m.next = "PRED_SKIP"
 751
 752             # skip zeros in predicate
 753             with m.State("PRED_SKIP"):
 754                 with m.If(~is_svp64_mode):
 755                     m.next = "DECODE_SV"  # nothing to do
 756                 with m.Else():
 757                     if self.svp64_en:
 758                         pred_src_zero = pdecode2.rm_dec.pred_sz
 759                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 760
 761                         # new srcstep, after skipping zeros
 762                         skip_srcstep = Signal.like(cur_srcstep)
 763                         # value to be added to the current srcstep
 764                         src_delta = Signal.like(cur_srcstep)
 765                         # add leading zeros to srcstep, if not in zero mode
 766                         with m.If(~pred_src_zero):
 767                             # priority encoder (count leading zeros)
 768                             # append guard bit, in case the mask is all zeros
 769                             pri_enc_src = PriorityEncoder(65)
 770                             m.submodules.pri_enc_src = pri_enc_src
 771                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 772                                                          Const(1, 1)))
 773                             comb += src_delta.eq(pri_enc_src.o)
 774                         # apply delta to srcstep
 775                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 776                         # shift-out all leading zeros from the mask
 777                         # plus the leading "one" bit
 778                         # TODO count leading zeros and shift-out the zero
 779                         #      bits, in the same step, in hardware
 780                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 781
 782                         # same as above, but for dststep
 783                         skip_dststep = Signal.like(cur_dststep)
 784                         dst_delta = Signal.like(cur_dststep)
 785                         with m.If(~pred_dst_zero):
 786                             pri_enc_dst = PriorityEncoder(65)
 787                             m.submodules.pri_enc_dst = pri_enc_dst
 788                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 789                                                          Const(1, 1)))
 790                             comb += dst_delta.eq(pri_enc_dst.o)
 791                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 792                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 793
 794                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 795                         with m.If((skip_srcstep >= cur_vl) |
 796                                   (skip_dststep >= cur_vl)):
 797                             # end of VL loop. Update PC and reset src/dst step
 798                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 799                             comb += self.state_w_pc.i_data.eq(nia)
 800                             comb += new_svstate.srcstep.eq(0)
 801                             comb += new_svstate.dststep.eq(0)
 802                             comb += update_svstate.eq(1)
 803                             # synchronize with the simulator
 804                             comb += self.insn_done.eq(1)
 805                             # go back to Issue
 806                             m.next = "ISSUE_START"
 807                         with m.Else():
 808                             # update new src/dst step
 809                             comb += new_svstate.srcstep.eq(skip_srcstep)
 810                             comb += new_svstate.dststep.eq(skip_dststep)
 811                             comb += update_svstate.eq(1)
 812                             # proceed to Decode
 813                             m.next = "DECODE_SV"
 814
 815                         # pass predicate mask bits through to satellite decoders
 816                         # TODO: for SIMD this will be *multiple* bits
 817                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 818                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 819
 820             # after src/dst step have been updated, we are ready
 821             # to decode the instruction
 822             with m.State("DECODE_SV"):
 823                 # decode the instruction
 824                 sync += core.i.e.eq(pdecode2.e)
 825                 sync += core.i.state.eq(cur_state)
 826                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 827                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 828                 if self.svp64_en:
 829                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 830                     # set RA_OR_ZERO detection in satellite decoders
 831                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 832                     # and svp64 detection
 833                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 834                     # and svp64 bit-rev'd ldst mode
 835                     ldst_dec = pdecode2.use_svp64_ldst_dec
 836                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 837                 # after decoding, reset any previous exception condition,
 838                 # allowing it to be set again during the next execution
 839                 sync += pdecode2.ldst_exc.eq(0)
 840
 841                 m.next = "INSN_EXECUTE"  # move to "execute"
 842
 843             # handshake with execution FSM, move to "wait" once acknowledged
 844             with m.State("INSN_EXECUTE"):
 845                 comb += exec_insn_i_valid.eq(1) # trigger execute
 846                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 847                     m.next = "EXECUTE_WAIT"
 848
 849             with m.State("EXECUTE_WAIT"):
 850                 # wait on "core stop" release, at instruction end
 851                 # need to do this here, in case we are in a VL>1 loop
 852                 with m.If(~dbg.core_stop_o & ~core_rst):
 853                     comb += exec_pc_i_ready.eq(1)
 854                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 855                     # the exception info needs to be blatted into
 856                     # pdecode.ldst_exc, and the instruction "re-run".
 857                     # when ldst_exc.happened is set, the PowerDecoder2
 858                     # reacts very differently: it re-writes the instruction
 859                     # with a "trap" (calls PowerDecoder2.trap()) which
 860                     # will *overwrite* whatever was requested and jump the
 861                     # PC to the exception address, as well as alter MSR.
 862                     # nothing else needs to be done other than to note
 863                     # the change of PC and MSR (and, later, SVSTATE)
 864                     with m.If(exc_happened):
 865                         sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
 866
 867                     with m.If(exec_pc_o_valid):
 868
 869                         # was this the last loop iteration?
 870                         is_last = Signal()
 871                         cur_vl = cur_state.svstate.vl
 872                         comb += is_last.eq(next_srcstep == cur_vl)
 873
 874                         # return directly to Decode if Execute generated an
 875                         # exception.
 876                         with m.If(pdecode2.ldst_exc.happened):
 877                             m.next = "DECODE_SV"
 878
 879                         # if either PC or SVSTATE were changed by the previous
 880                         # instruction, go directly back to Fetch, without
 881                         # updating either PC or SVSTATE
 882                         with m.Elif(pc_changed | sv_changed):
 883                             m.next = "ISSUE_START"
 884
 885                         # also return to Fetch, when no output was a vector
 886                         # (regardless of SRCSTEP and VL), or when the last
 887                         # instruction was really the last one of the VL loop
 888                         with m.Elif((~pdecode2.loop_continue) | is_last):
 889                             # before going back to fetch, update the PC state
 890                             # register with the NIA.
 891                             # ok here we are not reading the branch unit.
 892                             # TODO: this just blithely overwrites whatever
 893                             #       pipeline updated the PC
 894                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 895                             comb += self.state_w_pc.i_data.eq(nia)
 896                             # reset SRCSTEP before returning to Fetch
 897                             if self.svp64_en:
 898                                 with m.If(pdecode2.loop_continue):
 899                                     comb += new_svstate.srcstep.eq(0)
 900                                     comb += new_svstate.dststep.eq(0)
 901                                     comb += update_svstate.eq(1)
 902                             else:
 903                                 comb += new_svstate.srcstep.eq(0)
 904                                 comb += new_svstate.dststep.eq(0)
 905                                 comb += update_svstate.eq(1)
 906                             m.next = "ISSUE_START"
 907
 908                         # returning to Execute? then, first update SRCSTEP
 909                         with m.Else():
 910                             comb += new_svstate.srcstep.eq(next_srcstep)
 911                             comb += new_svstate.dststep.eq(next_dststep)
 912                             comb += update_svstate.eq(1)
 913                             # return to mask skip loop
 914                             m.next = "PRED_SKIP"
 915
 916                 with m.Else():
 917                     comb += dbg.core_stopped_i.eq(1)
 918                     # while stopped, allow updating the PC and SVSTATE
 919                     with m.If(self.pc_i.ok):
 920                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 921                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 922                         sync += pc_changed.eq(1)
 923                     with m.If(self.svstate_i.ok):
 924                         comb += new_svstate.eq(self.svstate_i.data)
 925                         comb += update_svstate.eq(1)
 926                         sync += sv_changed.eq(1)
 927
 928         # check if svstate needs updating: if so, write it to State Regfile
 929         with m.If(update_svstate):
 930             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 931             comb += self.state_w_sv.i_data.eq(new_svstate)
 932             sync += cur_state.svstate.eq(new_svstate) # for next clock
 933
 934     def execute_fsm(self, m, core, pc_changed, sv_changed,
 935                     exec_insn_i_valid, exec_insn_o_ready,
 936                     exec_pc_o_valid, exec_pc_i_ready):
 937         """execute FSM
 938
 939         execute FSM. this interacts with the "issue" FSM
 940         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 941         (outgoing). SVP64 RM prefixes have already been set up by the
 942         "issue" phase, so execute is fairly straightforward.
 943         """
 944
 945         comb = m.d.comb
 946         sync = m.d.sync
 947         pdecode2 = self.pdecode2
 948
 949         # temporaries
 950         core_busy_o = core.n.o_data.busy_o # core is busy
 951         core_ivalid_i = core.p.i_valid              # instruction is valid
 952
 953         with m.FSM(name="exec_fsm"):
 954
 955             # waiting for instruction bus (stays there until not busy)
 956             with m.State("INSN_START"):
 957                 comb += exec_insn_o_ready.eq(1)
 958                 with m.If(exec_insn_i_valid):
 959                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 960                     sync += sv_changed.eq(0)
 961                     sync += pc_changed.eq(0)
 962                     with m.If(core.p.o_ready): # only move if accepted
 963                         m.next = "INSN_ACTIVE"  # move to "wait completion"
 964
 965             # instruction started: must wait till it finishes
 966             with m.State("INSN_ACTIVE"):
 967                 # note changes to PC and SVSTATE
 968                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 969                     sync += sv_changed.eq(1)
 970                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 971                     sync += pc_changed.eq(1)
 972                 with m.If(~core_busy_o): # instruction done!
 973                     comb += exec_pc_o_valid.eq(1)
 974                     with m.If(exec_pc_i_ready):
 975                         # when finished, indicate "done".
 976                         # however, if there was an exception, the instruction
 977                         # is *not* yet done.  this is an implementation
 978                         # detail: we choose to implement exceptions by
 979                         # taking the exception information from the LDST
 980                         # unit, putting that *back* into the PowerDecoder2,
 981                         # and *re-running the entire instruction*.
 982                         # if we erroneously indicate "done" here, it is as if
 983                         # there were *TWO* instructions:
 984                         # 1) the failed LDST 2) a TRAP.
 985                         with m.If(~pdecode2.ldst_exc.happened):
 986                             comb += self.insn_done.eq(1)
 987                         m.next = "INSN_START"  # back to fetch
 988
 989     def setup_peripherals(self, m):
 990         comb, sync = m.d.comb, m.d.sync
 991
 992         # okaaaay so the debug module must be in coresync clock domain
 993         # but NOT its reset signal. to cope with this, set every single
 994         # submodule explicitly in coresync domain, debug and JTAG
 995         # in their own one but using *external* reset.
 996         csd = DomainRenamer("coresync")
 997         dbd = DomainRenamer(self.dbg_domain)
 998
 999         m.submodules.core = core = csd(self.core)
1000         m.submodules.imem = imem = csd(self.imem)
1001         m.submodules.dbg = dbg = dbd(self.dbg)
1002         if self.jtag_en:
1003             m.submodules.jtag = jtag = dbd(self.jtag)
1004             # TODO: UART2GDB mux, here, from external pin
1005             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1006             sync += dbg.dmi.connect_to(jtag.dmi)
1007
1008         cur_state = self.cur_state
1009
1010         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
1011         if self.sram4x4k:
1012             for i, sram in enumerate(self.sram4k):
1013                 m.submodules["sram4k_%d" % i] = csd(sram)
1014                 comb += sram.enable.eq(self.wb_sram_en)
1015
1016         # XICS interrupt handler
1017         if self.xics:
1018             m.submodules.xics_icp = icp = csd(self.xics_icp)
1019             m.submodules.xics_ics = ics = csd(self.xics_ics)
1020             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
1021             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
1022
1023         # GPIO test peripheral
1024         if self.gpio:
1025             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1026
1027         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1028         # XXX causes litex ECP5 test to get wrong idea about input and output
1029         # (but works with verilator sim *sigh*)
1030         #if self.gpio and self.xics:
1031         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1032
1033         # instruction decoder
1034         pdecode = create_pdecode()
1035         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1036         if self.svp64_en:
1037             m.submodules.svp64 = svp64 = csd(self.svp64)
1038
1039         # convenience
1040         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1041         intrf = self.core.regs.rf['int']
1042
1043         # clock delay power-on reset
1044         cd_por  = ClockDomain(reset_less=True)
1045         cd_sync = ClockDomain()
1046         core_sync = ClockDomain("coresync")
1047         m.domains += cd_por, cd_sync, core_sync
1048         if self.dbg_domain != "sync":
1049             dbg_sync = ClockDomain(self.dbg_domain)
1050             m.domains += dbg_sync
1051
1052         ti_rst = Signal(reset_less=True)
1053         delay = Signal(range(4), reset=3)
1054         with m.If(delay != 0):
1055             m.d.por += delay.eq(delay - 1)
1056         comb += cd_por.clk.eq(ClockSignal())
1057
1058         # power-on reset delay
1059         core_rst = ResetSignal("coresync")
1060         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1061         comb += core_rst.eq(ti_rst)
1062
1063         # debug clock is same as coresync, but reset is *main external*
1064         if self.dbg_domain != "sync":
1065             dbg_rst = ResetSignal(self.dbg_domain)
1066             comb += dbg_rst.eq(ResetSignal())
1067
1068         # busy/halted signals from core
1069         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1070         comb += self.busy_o.eq(core_busy_o)
1071         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1072
1073         # temporary hack: says "go" immediately for both address gen and ST
1074         l0 = core.l0
1075         ldst = core.fus.fus['ldst0']
1076         st_go_edge = rising_edge(m, ldst.st.rel_o)
1077         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
1078         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1079
1080     def elaborate(self, platform):
1081         m = Module()
1082         # convenience
1083         comb, sync = m.d.comb, m.d.sync
1084         cur_state = self.cur_state
1085         pdecode2 = self.pdecode2
1086         dbg = self.dbg
1087         core = self.core
1088
1089         # set up peripherals and core
1090         core_rst = self.core_rst
1091         self.setup_peripherals(m)
1092
1093         # reset current state if core reset requested
1094         with m.If(core_rst):
1095             m.d.sync += self.cur_state.eq(0)
1096
1097         # PC and instruction from I-Memory
1098         comb += self.pc_o.eq(cur_state.pc)
1099         pc_changed = Signal() # note write to PC
1100         sv_changed = Signal() # note write to SVSTATE
1101
1102         # indicate to outside world if any FU is still executing
1103         comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1104
1105         # read state either from incoming override or from regfile
1106         # TODO: really should be doing MSR in the same way
1107         pc = state_get(m, core_rst, self.pc_i,
1108                             "pc",                  # read PC
1109                             self.state_r_pc, StateRegs.PC)
1110         svstate = state_get(m, core_rst, self.svstate_i,
1111                             "svstate",   # read SVSTATE
1112                             self.state_r_sv, StateRegs.SVSTATE)
1113
1114         # don't write pc every cycle
1115         comb += self.state_w_pc.wen.eq(0)
1116         comb += self.state_w_pc.i_data.eq(0)
1117
1118         # address of the next instruction, in the absence of a branch
1119         # depends on the instruction size
1120         nia = Signal(64)
1121
1122         # connect up debug signals
1123         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1124         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1125         comb += dbg.state.pc.eq(pc)
1126         comb += dbg.state.svstate.eq(svstate)
1127         comb += dbg.state.msr.eq(cur_state.msr)
1128
1129         # pass the prefix mode from Fetch to Issue, so the latter can loop
1130         # on VL==0
1131         is_svp64_mode = Signal()
1132
1133         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1134         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1135         # these are the handshake signals between each
1136
1137         # fetch FSM can run as soon as the PC is valid
1138         fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1139         fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1140
1141         # fetch FSM hands over the instruction to be decoded / issued
1142         fetch_insn_o_valid = Signal()
1143         fetch_insn_i_ready = Signal()
1144
1145         # predicate fetch FSM decodes and fetches the predicate
1146         pred_insn_i_valid = Signal()
1147         pred_insn_o_ready = Signal()
1148
1149         # predicate fetch FSM delivers the masks
1150         pred_mask_o_valid = Signal()
1151         pred_mask_i_ready = Signal()
1152
1153         # issue FSM delivers the instruction to the be executed
1154         exec_insn_i_valid = Signal()
1155         exec_insn_o_ready = Signal()
1156
1157         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1158         exec_pc_o_valid = Signal()
1159         exec_pc_i_ready = Signal()
1160
1161         # the FSMs here are perhaps unusual in that they detect conditions
1162         # then "hold" information, combinatorially, for the core
1163         # (as opposed to using sync - which would be on a clock's delay)
1164         # this includes the actual opcode, valid flags and so on.
1165
1166         # Fetch, then predicate fetch, then Issue, then Execute.
1167         # Issue is where the VL for-loop # lives.  the ready/valid
1168         # signalling is used to communicate between the four.
1169
1170         # set up Fetch FSM
1171         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1172                         self.imem, core_rst, pdecode2, cur_state,
1173                        dbg, core, svstate, nia, is_svp64_mode)
1174         m.submodules.fetch = fetch
1175         # connect up in/out data to existing Signals
1176         comb += fetch.p.i_data.pc.eq(pc)
1177         # and the ready/valid signalling
1178         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1179         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1180         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1181         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1182
1183         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1184                        dbg, core_rst, is_svp64_mode,
1185                        fetch_pc_o_ready, fetch_pc_i_valid,
1186                        fetch_insn_o_valid, fetch_insn_i_ready,
1187                        pred_insn_i_valid, pred_insn_o_ready,
1188                        pred_mask_o_valid, pred_mask_i_ready,
1189                        exec_insn_i_valid, exec_insn_o_ready,
1190                        exec_pc_o_valid, exec_pc_i_ready)
1191
1192         if self.svp64_en:
1193             self.fetch_predicate_fsm(m,
1194                                      pred_insn_i_valid, pred_insn_o_ready,
1195                                      pred_mask_o_valid, pred_mask_i_ready)
1196
1197         self.execute_fsm(m, core, pc_changed, sv_changed,
1198                          exec_insn_i_valid, exec_insn_o_ready,
1199                          exec_pc_o_valid, exec_pc_i_ready)
1200
1201         # this bit doesn't have to be in the FSM: connect up to read
1202         # regfiles on demand from DMI
1203         self.do_dmi(m, dbg)
1204
1205         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1206         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1207         self.tb_dec_fsm(m, cur_state.dec)
1208
1209         return m
1210
1211     def do_dmi(self, m, dbg):
1212         """deals with DMI debug requests
1213
1214         currently only provides read requests for the INT regfile, CR and XER
1215         it will later also deal with *writing* to these regfiles.
1216         """
1217         comb = m.d.comb
1218         sync = m.d.sync
1219         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1220         intrf = self.core.regs.rf['int']
1221
1222         with m.If(d_reg.req): # request for regfile access being made
1223             # TODO: error-check this
1224             # XXX should this be combinatorial?  sync better?
1225             if intrf.unary:
1226                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1227             else:
1228                 comb += self.int_r.addr.eq(d_reg.addr)
1229                 comb += self.int_r.ren.eq(1)
1230         d_reg_delay  = Signal()
1231         sync += d_reg_delay.eq(d_reg.req)
1232         with m.If(d_reg_delay):
1233             # data arrives one clock later
1234             comb += d_reg.data.eq(self.int_r.o_data)
1235             comb += d_reg.ack.eq(1)
1236
1237         # sigh same thing for CR debug
1238         with m.If(d_cr.req): # request for regfile access being made
1239             comb += self.cr_r.ren.eq(0b11111111) # enable all
1240         d_cr_delay  = Signal()
1241         sync += d_cr_delay.eq(d_cr.req)
1242         with m.If(d_cr_delay):
1243             # data arrives one clock later
1244             comb += d_cr.data.eq(self.cr_r.o_data)
1245             comb += d_cr.ack.eq(1)
1246
1247         # aaand XER...
1248         with m.If(d_xer.req): # request for regfile access being made
1249             comb += self.xer_r.ren.eq(0b111111) # enable all
1250         d_xer_delay  = Signal()
1251         sync += d_xer_delay.eq(d_xer.req)
1252         with m.If(d_xer_delay):
1253             # data arrives one clock later
1254             comb += d_xer.data.eq(self.xer_r.o_data)
1255             comb += d_xer.ack.eq(1)
1256
1257     def tb_dec_fsm(self, m, spr_dec):
1258         """tb_dec_fsm
1259
1260         this is a FSM for updating either dec or tb.  it runs alternately
1261         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1262         value to DEC, however the regfile has "passthrough" on it so this
1263         *should* be ok.
1264
1265         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1266         """
1267
1268         comb, sync = m.d.comb, m.d.sync
1269         fast_rf = self.core.regs.rf['fast']
1270         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1271         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1272
1273         with m.FSM() as fsm:
1274
1275             # initiates read of current DEC
1276             with m.State("DEC_READ"):
1277                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1278                 comb += fast_r_dectb.ren.eq(1)
1279                 m.next = "DEC_WRITE"
1280
1281             # waits for DEC read to arrive (1 cycle), updates with new value
1282             with m.State("DEC_WRITE"):
1283                 new_dec = Signal(64)
1284                 # TODO: MSR.LPCR 32-bit decrement mode
1285                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1286                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1287                 comb += fast_w_dectb.wen.eq(1)
1288                 comb += fast_w_dectb.i_data.eq(new_dec)
1289                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1290                 m.next = "TB_READ"
1291
1292             # initiates read of current TB
1293             with m.State("TB_READ"):
1294                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1295                 comb += fast_r_dectb.ren.eq(1)
1296                 m.next = "TB_WRITE"
1297
1298             # waits for read TB to arrive, initiates write of current TB
1299             with m.State("TB_WRITE"):
1300                 new_tb = Signal(64)
1301                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1302                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1303                 comb += fast_w_dectb.wen.eq(1)
1304                 comb += fast_w_dectb.i_data.eq(new_tb)
1305                 m.next = "DEC_READ"
1306
1307         return m
1308
1309     def __iter__(self):
1310         yield from self.pc_i.ports()
1311         yield self.pc_o
1312         yield self.memerr_o
1313         yield from self.core.ports()
1314         yield from self.imem.ports()
1315         yield self.core_bigendian_i
1316         yield self.busy_o
1317
1318     def ports(self):
1319         return list(self)
1320
1321     def external_ports(self):
1322         ports = self.pc_i.ports()
1323         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1324                 ]
1325
1326         if self.jtag_en:
1327             ports += list(self.jtag.external_ports())
1328         else:
1329             # don't add DMI if JTAG is enabled
1330             ports += list(self.dbg.dmi.ports())
1331
1332         ports += list(self.imem.ibus.fields.values())
1333         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1334
1335         if self.sram4x4k:
1336             for sram in self.sram4k:
1337                 ports += list(sram.bus.fields.values())
1338
1339         if self.xics:
1340             ports += list(self.xics_icp.bus.fields.values())
1341             ports += list(self.xics_ics.bus.fields.values())
1342             ports.append(self.int_level_i)
1343
1344         if self.gpio:
1345             ports += list(self.simple_gpio.bus.fields.values())
1346             ports.append(self.gpio_o)
1347
1348         return ports
1349
1350     def ports(self):
1351         return list(self)
1352
1353
1354 class TestIssuer(Elaboratable):
1355     def __init__(self, pspec):
1356         self.ti = TestIssuerInternal(pspec)
1357         self.pll = DummyPLL(instance=True)
1358
1359         # PLL direct clock or not
1360         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1361         if self.pll_en:
1362             self.pll_test_o = Signal(reset_less=True)
1363             self.pll_vco_o = Signal(reset_less=True)
1364             self.clk_sel_i = Signal(2, reset_less=True)
1365             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1366             self.pllclk_clk = ClockSignal("pllclk")
1367
1368     def elaborate(self, platform):
1369         m = Module()
1370         comb = m.d.comb
1371
1372         # TestIssuer nominally runs at main clock, actually it is
1373         # all combinatorial internally except for coresync'd components
1374         m.submodules.ti = ti = self.ti
1375
1376         if self.pll_en:
1377             # ClockSelect runs at PLL output internal clock rate
1378             m.submodules.wrappll = pll = self.pll
1379
1380             # add clock domains from PLL
1381             cd_pll = ClockDomain("pllclk")
1382             m.domains += cd_pll
1383
1384             # PLL clock established.  has the side-effect of running clklsel
1385             # at the PLL's speed (see DomainRenamer("pllclk") above)
1386             pllclk = self.pllclk_clk
1387             comb += pllclk.eq(pll.clk_pll_o)
1388
1389             # wire up external 24mhz to PLL
1390             #comb += pll.clk_24_i.eq(self.ref_clk)
1391             # output 18 mhz PLL test signal, and analog oscillator out
1392             comb += self.pll_test_o.eq(pll.pll_test_o)
1393             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1394
1395             # input to pll clock selection
1396             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1397
1398             # now wire up ResetSignals.  don't mind them being in this domain
1399             pll_rst = ResetSignal("pllclk")
1400             comb += pll_rst.eq(ResetSignal())
1401
1402         # internal clock is set to selector clock-out.  has the side-effect of
1403         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1404         # debug clock runs at coresync internal clock
1405         cd_coresync = ClockDomain("coresync")
1406         #m.domains += cd_coresync
1407         if self.ti.dbg_domain != 'sync':
1408             cd_dbgsync = ClockDomain("dbgsync")
1409             #m.domains += cd_dbgsync
1410         intclk = ClockSignal("coresync")
1411         dbgclk = ClockSignal(self.ti.dbg_domain)
1412         # XXX BYPASS PLL XXX
1413         # XXX BYPASS PLL XXX
1414         # XXX BYPASS PLL XXX
1415         if self.pll_en:
1416             comb += intclk.eq(self.ref_clk)
1417         else:
1418             comb += intclk.eq(ClockSignal())
1419         if self.ti.dbg_domain != 'sync':
1420             dbgclk = ClockSignal(self.ti.dbg_domain)
1421             comb += dbgclk.eq(intclk)
1422
1423         return m
1424
1425     def ports(self):
1426         return list(self.ti.ports()) + list(self.pll.ports()) + \
1427                [ClockSignal(), ResetSignal()]
1428
1429     def external_ports(self):
1430         ports = self.ti.external_ports()
1431         ports.append(ClockSignal())
1432         ports.append(ResetSignal())
1433         if self.pll_en:
1434             ports.append(self.clk_sel_i)
1435             ports.append(self.pll.clk_24_i)
1436             ports.append(self.pll_test_o)
1437             ports.append(self.pll_vco_o)
1438             ports.append(self.pllclk_clk)
1439             ports.append(self.ref_clk)
1440         return ports
1441
1442
1443 if __name__ == '__main__':
1444     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1445              'spr': 1,
1446              'div': 1,
1447              'mul': 1,
1448              'shiftrot': 1
1449             }
1450     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1451                          imem_ifacetype='bare_wb',
1452                          addr_wid=48,
1453                          mask_wid=8,
1454                          reg_wid=64,
1455                          units=units)
1456     dut = TestIssuer(pspec)
1457     vl = main(dut, ports=dut.ports(), name="test_issuer")
1458
1459     if len(sys.argv) == 1:
1460         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1461         with open("test_issuer.il", "w") as f:
1462             f.write(vl)