src/soc/simple/inorder.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal,
  19                     Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from openpower.consts import MSR
  28 from openpower.decoder.power_enums import MicrOp
  29 from openpower.state import CoreState
  30 from soc.regfile.regfiles import StateRegs
  31 from soc.config.test.test_loadstore import TestMemPspec
  32 from soc.experiment.icache import ICache
  33
  34 from nmutil.util import rising_edge
  35
  36 from soc.simple.issuer import TestIssuerBase
  37
  38 def get_insn(f_instr_o, pc):
  39     if f_instr_o.width == 32:
  40         return f_instr_o
  41     else:
  42         # 64-bit: bit 2 of pc decides which word to select
  43         return f_instr_o.word_select(pc[2], 32)
  44
  45
  46 # Fetch Finite State Machine.
  47 # WARNING: there are currently DriverConflicts but it's actually working.
  48 # TODO, here: everything that is global in nature, information from the
  49 # main TestIssuerInternal, needs to move to either ispec() or ospec().
  50 # not only that: TestIssuerInternal.imem can entirely move into here
  51 # because imem is only ever accessed inside the FetchFSM.
  52 class FetchFSM(ControlBase):
  53     def __init__(self, allow_overlap, imem, core_rst,
  54                  pdecode2, cur_state,
  55                  dbg, core, svstate, nia):
  56         self.allow_overlap = allow_overlap
  57         self.imem = imem
  58         self.core_rst = core_rst
  59         self.pdecode2 = pdecode2
  60         self.cur_state = cur_state
  61         self.dbg = dbg
  62         self.core = core
  63         self.svstate = svstate
  64         self.nia = nia
  65
  66         # set up pipeline ControlBase and allocate i/o specs
  67         # (unusual: normally done by the Pipeline API)
  68         super().__init__(stage=self)
  69         self.p.i_data, self.n.o_data = self.new_specs(None)
  70         self.i, self.o = self.p.i_data, self.n.o_data
  71
  72     # next 3 functions are Stage API Compliance
  73     def setup(self, m, i):
  74         pass
  75
  76     def ispec(self):
  77         return FetchInput()
  78
  79     def ospec(self):
  80         return FetchOutput()
  81
  82     def elaborate(self, platform):
  83         """fetch FSM
  84
  85         this FSM performs fetch of raw instruction data, partial-decodes
  86         it 32-bit at a time to detect SVP64 prefixes, and will optionally
  87         read a 2nd 32-bit quantity if that occurs.
  88         """
  89         m = super().elaborate(platform)
  90
  91         dbg = self.dbg
  92         core = self.core
  93         pc = self.i.pc
  94         msr = self.i.msr
  95         svstate = self.svstate
  96         nia = self.nia
  97         fetch_pc_o_ready = self.p.o_ready
  98         fetch_pc_i_valid = self.p.i_valid
  99         fetch_insn_o_valid = self.n.o_valid
 100         fetch_insn_i_ready = self.n.i_ready
 101
 102         comb = m.d.comb
 103         sync = m.d.sync
 104         pdecode2 = self.pdecode2
 105         cur_state = self.cur_state
 106         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 107
 108         # also note instruction fetch failed
 109         if hasattr(core, "icache"):
 110             fetch_failed = core.icache.i_out.fetch_failed
 111             flush_needed = True
 112         else:
 113             fetch_failed = Const(0, 1)
 114             flush_needed = False
 115
 116         # set priv / virt mode on I-Cache, sigh
 117         if isinstance(self.imem, ICache):
 118             comb += self.imem.i_in.priv_mode.eq(~msr[MSR.PR])
 119             comb += self.imem.i_in.virt_mode.eq(msr[MSR.DR])
 120
 121         with m.FSM(name='fetch_fsm'):
 122
 123             # waiting (zzz)
 124             with m.State("IDLE"):
 125                 with m.If(~dbg.stopping_o & ~fetch_failed):
 126                     comb += fetch_pc_o_ready.eq(1)
 127                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 128                     # instruction allowed to go: start by reading the PC
 129                     # capture the PC and also drop it into Insn Memory
 130                     # we have joined a pair of combinatorial memory
 131                     # lookups together.  this is Generally Bad.
 132                     comb += self.imem.a_pc_i.eq(pc)
 133                     comb += self.imem.a_i_valid.eq(1)
 134                     comb += self.imem.f_i_valid.eq(1)
 135                     sync += cur_state.pc.eq(pc)
 136                     sync += cur_state.svstate.eq(svstate)  # and svstate
 137                     sync += cur_state.msr.eq(msr)  # and msr
 138
 139                     m.next = "INSN_READ"  # move to "wait for bus" phase
 140
 141             # dummy pause to find out why simulation is not keeping up
 142             with m.State("INSN_READ"):
 143                 if self.allow_overlap:
 144                     stopping = dbg.stopping_o
 145                 else:
 146                     stopping = Const(0)
 147                 with m.If(stopping):
 148                     # stopping: jump back to idle
 149                     m.next = "IDLE"
 150                 with m.Else():
 151                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 152                         # busy but not fetch failed: stay in wait-read
 153                         comb += self.imem.a_i_valid.eq(1)
 154                         comb += self.imem.f_i_valid.eq(1)
 155                     with m.Else():
 156                         # not busy (or fetch failed!): instruction fetched
 157                         # when fetch failed, the instruction gets ignored
 158                         # by the decoder
 159                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 160                         # not SVP64 - 32-bit only
 161                         sync += nia.eq(cur_state.pc + 4)
 162                         sync += dec_opcode_o.eq(insn)
 163                         m.next = "INSN_READY"
 164
 165             with m.State("INSN_READY"):
 166                 # hand over the instruction, to be decoded
 167                 comb += fetch_insn_o_valid.eq(1)
 168                 with m.If(fetch_insn_i_ready):
 169                     m.next = "IDLE"
 170
 171         # whatever was done above, over-ride it if core reset is held
 172         with m.If(self.core_rst):
 173             sync += nia.eq(0)
 174
 175         return m
 176
 177
 178 class TestIssuerInternalInOrder(TestIssuerBase):
 179     """TestIssuer - reads instructions from TestMemory and issues them
 180
 181     efficiency and speed is not the main goal here: functional correctness
 182     and code clarity is.  optimisations (which almost 100% interfere with
 183     easy understanding) come later.
 184     """
 185
 186     def issue_fsm(self, m, core, nia,
 187                   dbg, core_rst,
 188                   fetch_pc_o_ready, fetch_pc_i_valid,
 189                   fetch_insn_o_valid, fetch_insn_i_ready,
 190                   exec_insn_i_valid, exec_insn_o_ready,
 191                   exec_pc_o_valid, exec_pc_i_ready):
 192         """issue FSM
 193
 194         decode / issue FSM.  this interacts with the "fetch" FSM
 195         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 196         (outgoing). also interacts with the "execute" FSM
 197         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 198         (incoming).
 199         SVP64 RM prefixes have already been set up by the
 200         "fetch" phase, so execute is fairly straightforward.
 201         """
 202
 203         comb = m.d.comb
 204         sync = m.d.sync
 205         pdecode2 = self.pdecode2
 206         cur_state = self.cur_state
 207
 208         # temporaries
 209         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 210
 211         # note if an exception happened.  in a pipelined or OoO design
 212         # this needs to be accompanied by "shadowing" (or stalling)
 213         exc_happened = self.core.o.exc_happened
 214         # also note instruction fetch failed
 215         if hasattr(core, "icache"):
 216             fetch_failed = core.icache.i_out.fetch_failed
 217             flush_needed = True
 218             # set to fault in decoder
 219             # update (highest priority) instruction fault
 220             rising_fetch_failed = rising_edge(m, fetch_failed)
 221             with m.If(rising_fetch_failed):
 222                 sync += pdecode2.instr_fault.eq(1)
 223         else:
 224             fetch_failed = Const(0, 1)
 225             flush_needed = False
 226
 227         with m.FSM(name="issue_fsm"):
 228
 229             # sync with the "fetch" phase which is reading the instruction
 230             # at this point, there is no instruction running, that
 231             # could inadvertently update the PC.
 232             with m.State("ISSUE_START"):
 233                 # reset instruction fault
 234                 sync += pdecode2.instr_fault.eq(0)
 235                 # wait on "core stop" release, before next fetch
 236                 # need to do this here, in case we are in a VL==0 loop
 237                 with m.If(~dbg.core_stop_o & ~core_rst):
 238                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 239                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 240                         m.next = "INSN_WAIT"
 241                 with m.Else():
 242                     # tell core it's stopped, and acknowledge debug handshake
 243                     comb += dbg.core_stopped_i.eq(1)
 244
 245             # wait for an instruction to arrive from Fetch
 246             with m.State("INSN_WAIT"):
 247                 if self.allow_overlap:
 248                     stopping = dbg.stopping_o
 249                 else:
 250                     stopping = Const(0)
 251                 with m.If(stopping):
 252                     # stopping: jump back to idle
 253                     m.next = "ISSUE_START"
 254                     if flush_needed:
 255                         # request the icache to stop asserting "failed"
 256                         comb += core.icache.flush_in.eq(1)
 257                     # stop instruction fault
 258                     sync += pdecode2.instr_fault.eq(0)
 259                 with m.Else():
 260                     comb += fetch_insn_i_ready.eq(1)
 261                     with m.If(fetch_insn_o_valid):
 262                         # loop into ISSUE_START if it's a SVP64 instruction
 263                         # and VL == 0.  this because VL==0 is a for-loop
 264                         # from 0 to 0 i.e. always, always a NOP.
 265                         m.next = "DECODE_SV"  # skip predication
 266
 267             # after src/dst step have been updated, we are ready
 268             # to decode the instruction
 269             with m.State("DECODE_SV"):
 270                 # decode the instruction
 271                 with m.If(~fetch_failed):
 272                     sync += pdecode2.instr_fault.eq(0)
 273                 sync += core.i.e.eq(pdecode2.e)
 274                 sync += core.i.state.eq(cur_state)
 275                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 276                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 277                 # after decoding, reset any previous exception condition,
 278                 # allowing it to be set again during the next execution
 279                 sync += pdecode2.ldst_exc.eq(0)
 280
 281                 m.next = "INSN_EXECUTE"  # move to "execute"
 282
 283             # handshake with execution FSM, move to "wait" once acknowledged
 284             with m.State("INSN_EXECUTE"):
 285                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 286                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 287                     m.next = "EXECUTE_WAIT"
 288
 289             with m.State("EXECUTE_WAIT"):
 290                 # wait on "core stop" release, at instruction end
 291                 # need to do this here, in case we are in a VL>1 loop
 292                 with m.If(~dbg.core_stop_o & ~core_rst):
 293                     comb += exec_pc_i_ready.eq(1)
 294                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 295                     # the exception info needs to be blatted into
 296                     # pdecode.ldst_exc, and the instruction "re-run".
 297                     # when ldst_exc.happened is set, the PowerDecoder2
 298                     # reacts very differently: it re-writes the instruction
 299                     # with a "trap" (calls PowerDecoder2.trap()) which
 300                     # will *overwrite* whatever was requested and jump the
 301                     # PC to the exception address, as well as alter MSR.
 302                     # nothing else needs to be done other than to note
 303                     # the change of PC and MSR (and, later, SVSTATE)
 304                     with m.If(exc_happened):
 305                         mmu = core.fus.get_exc("mmu0")
 306                         ldst = core.fus.get_exc("ldst0")
 307                         if mmu is not None:
 308                             with m.If(fetch_failed):
 309                                 # instruction fetch: exception is from MMU
 310                                 # reset instr_fault (highest priority)
 311                                 sync += pdecode2.ldst_exc.eq(mmu)
 312                                 sync += pdecode2.instr_fault.eq(0)
 313                                 if flush_needed:
 314                                     # request icache to stop asserting "failed"
 315                                     comb += core.icache.flush_in.eq(1)
 316                         with m.If(~fetch_failed):
 317                             # otherwise assume it was a LDST exception
 318                             sync += pdecode2.ldst_exc.eq(ldst)
 319
 320                     with m.If(exec_pc_o_valid):
 321
 322                         # return directly to Decode if Execute generated an
 323                         # exception.
 324                         with m.If(pdecode2.ldst_exc.happened):
 325                             m.next = "DECODE_SV"
 326
 327                         # if MSR, PC or SVSTATE were changed by the previous
 328                         # instruction, go directly back to Fetch, without
 329                         # updating either MSR PC or SVSTATE
 330                         with m.Elif(self.msr_changed | self.pc_changed |
 331                                     self.sv_changed):
 332                             m.next = "ISSUE_START"
 333
 334                         with m.Else():
 335                             # before going back to fetch, update the PC state
 336                             # register with the NIA.
 337                             # ok here we are not reading the branch unit.
 338                             # TODO: this just blithely overwrites whatever
 339                             #       pipeline updated the PC
 340                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 341                             comb += self.state_w_pc.i_data.eq(nia)
 342                             m.next = "ISSUE_START"
 343
 344                 with m.Else():
 345                     comb += dbg.core_stopped_i.eq(1)
 346                     if flush_needed:
 347                         # request the icache to stop asserting "failed"
 348                         comb += core.icache.flush_in.eq(1)
 349                     # stop instruction fault
 350                     sync += pdecode2.instr_fault.eq(0)
 351                     if flush_needed:
 352                         # request the icache to stop asserting "failed"
 353                         comb += core.icache.flush_in.eq(1)
 354                     # stop instruction fault
 355                     sync += pdecode2.instr_fault.eq(0)
 356
 357     def execute_fsm(self, m, core,
 358                     exec_insn_i_valid, exec_insn_o_ready,
 359                     exec_pc_o_valid, exec_pc_i_ready):
 360         """execute FSM
 361
 362         execute FSM. this interacts with the "issue" FSM
 363         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 364         (outgoing). SVP64 RM prefixes have already been set up by the
 365         "issue" phase, so execute is fairly straightforward.
 366         """
 367
 368         comb = m.d.comb
 369         sync = m.d.sync
 370         pdecode2 = self.pdecode2
 371
 372         # temporaries
 373         core_busy_o = core.n.o_data.busy_o  # core is busy
 374         core_ivalid_i = core.p.i_valid              # instruction is valid
 375
 376         if hasattr(core, "icache"):
 377             fetch_failed = core.icache.i_out.fetch_failed
 378         else:
 379             fetch_failed = Const(0, 1)
 380
 381         with m.FSM(name="exec_fsm"):
 382
 383             # waiting for instruction bus (stays there until not busy)
 384             with m.State("INSN_START"):
 385                 comb += exec_insn_o_ready.eq(1)
 386                 with m.If(exec_insn_i_valid):
 387                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 388                     sync += self.sv_changed.eq(0)
 389                     sync += self.pc_changed.eq(0)
 390                     sync += self.msr_changed.eq(0)
 391                     with m.If(core.p.o_ready):  # only move if accepted
 392                         m.next = "INSN_ACTIVE"  # move to "wait completion"
 393
 394             # instruction started: must wait till it finishes
 395             with m.State("INSN_ACTIVE"):
 396                 # note changes to MSR, PC and SVSTATE
 397                 # XXX oops, really must monitor *all* State Regfile write
 398                 # ports looking for changes!
 399                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
 400                     sync += self.sv_changed.eq(1)
 401                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
 402                     sync += self.msr_changed.eq(1)
 403                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
 404                     sync += self.pc_changed.eq(1)
 405                 with m.If(~core_busy_o):  # instruction done!
 406                     comb += exec_pc_o_valid.eq(1)
 407                     with m.If(exec_pc_i_ready):
 408                         # when finished, indicate "done".
 409                         # however, if there was an exception, the instruction
 410                         # is *not* yet done.  this is an implementation
 411                         # detail: we choose to implement exceptions by
 412                         # taking the exception information from the LDST
 413                         # unit, putting that *back* into the PowerDecoder2,
 414                         # and *re-running the entire instruction*.
 415                         # if we erroneously indicate "done" here, it is as if
 416                         # there were *TWO* instructions:
 417                         # 1) the failed LDST 2) a TRAP.
 418                         with m.If(~pdecode2.ldst_exc.happened &
 419                                   ~fetch_failed):
 420                             comb += self.insn_done.eq(1)
 421                         m.next = "INSN_START"  # back to fetch
 422
 423     def elaborate(self, platform):
 424         m = super().elaborate(platform)
 425         # convenience
 426         comb, sync = m.d.comb, m.d.sync
 427         cur_state = self.cur_state
 428         pdecode2 = self.pdecode2
 429         dbg = self.dbg
 430         core = self.core
 431
 432         # set up peripherals and core
 433         core_rst = self.core_rst
 434
 435         # indicate to outside world if any FU is still executing
 436         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
 437
 438         # address of the next instruction, in the absence of a branch
 439         # depends on the instruction size
 440         nia = Signal(64)
 441
 442         # connect up debug signals
 443         with m.If(core.o.core_terminate_o):
 444             comb += dbg.terminate_i.eq(1)
 445
 446         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
 447         # issue, decode/execute, now joined by "Predicate fetch/calculate".
 448         # these are the handshake signals between each
 449
 450         # fetch FSM can run as soon as the PC is valid
 451         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
 452         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
 453
 454         # fetch FSM hands over the instruction to be decoded / issued
 455         fetch_insn_o_valid = Signal()
 456         fetch_insn_i_ready = Signal()
 457
 458         # issue FSM delivers the instruction to the be executed
 459         exec_insn_i_valid = Signal()
 460         exec_insn_o_ready = Signal()
 461
 462         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
 463         exec_pc_o_valid = Signal()
 464         exec_pc_i_ready = Signal()
 465
 466         # the FSMs here are perhaps unusual in that they detect conditions
 467         # then "hold" information, combinatorially, for the core
 468         # (as opposed to using sync - which would be on a clock's delay)
 469         # this includes the actual opcode, valid flags and so on.
 470
 471         # Fetch, then predicate fetch, then Issue, then Execute.
 472         # Issue is where the VL for-loop # lives.  the ready/valid
 473         # signalling is used to communicate between the four.
 474
 475         # set up Fetch FSM
 476         fetch = FetchFSM(self.allow_overlap,
 477                          self.imem, core_rst, pdecode2, cur_state,
 478                          dbg, core,
 479                          dbg.state.svstate, # combinatorially same
 480                          nia)
 481         m.submodules.fetch = fetch
 482         # connect up in/out data to existing Signals
 483         comb += fetch.p.i_data.pc.eq(dbg.state.pc)   # combinatorially same
 484         comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
 485         # and the ready/valid signalling
 486         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
 487         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
 488         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
 489         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
 490
 491         self.issue_fsm(m, core, nia,
 492                        dbg, core_rst,
 493                        fetch_pc_o_ready, fetch_pc_i_valid,
 494                        fetch_insn_o_valid, fetch_insn_i_ready,
 495                        exec_insn_i_valid, exec_insn_o_ready,
 496                        exec_pc_o_valid, exec_pc_i_ready)
 497
 498         self.execute_fsm(m, core,
 499                          exec_insn_i_valid, exec_insn_o_ready,
 500                          exec_pc_o_valid, exec_pc_i_ready)
 501
 502         return m
 503
 504
 505 # XXX TODO: update this
 506
 507 if __name__ == '__main__':
 508     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
 509              'spr': 1,
 510              'div': 1,
 511              'mul': 1,
 512              'shiftrot': 1
 513              }
 514     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
 515                          imem_ifacetype='bare_wb',
 516                          addr_wid=64,
 517                          mask_wid=8,
 518                          reg_wid=64,
 519                          units=units)
 520     dut = TestIssuer(pspec)
 521     vl = main(dut, ports=dut.ports(), name="test_issuer")
 522
 523     if len(sys.argv) == 1:
 524         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
 525         with open("test_issuer.il", "w") as f:
 526             f.write(vl)