src/soc/simple/inorder.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal,
  19                     Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from openpower.consts import MSR
  28 from openpower.decoder.power_enums import MicrOp
  29 from openpower.state import CoreState
  30 from soc.regfile.regfiles import StateRegs
  31 from soc.config.test.test_loadstore import TestMemPspec
  32 from soc.experiment.icache import ICache
  33
  34 from nmutil.util import rising_edge
  35
  36 from soc.simple.issuer import TestIssuerBase
  37
  38 def get_insn(f_instr_o, pc):
  39     if f_instr_o.width == 32:
  40         return f_instr_o
  41     else:
  42         # 64-bit: bit 2 of pc decides which word to select
  43         return f_instr_o.word_select(pc[2], 32)
  44
  45
  46 # Fetch Finite State Machine.
  47 # WARNING: there are currently DriverConflicts but it's actually working.
  48 # TODO, here: everything that is global in nature, information from the
  49 # main TestIssuerInternal, needs to move to either ispec() or ospec().
  50 # not only that: TestIssuerInternal.imem can entirely move into here
  51 # because imem is only ever accessed inside the FetchFSM.
  52 class FetchFSM(ControlBase):
  53     def __init__(self, allow_overlap, imem, core_rst,
  54                  pdecode2, cur_state,
  55                  dbg, core, svstate, nia):
  56         self.allow_overlap = allow_overlap
  57         self.imem = imem
  58         self.core_rst = core_rst
  59         self.pdecode2 = pdecode2
  60         self.cur_state = cur_state
  61         self.dbg = dbg
  62         self.core = core
  63         self.svstate = svstate
  64         self.nia = nia
  65
  66         # set up pipeline ControlBase and allocate i/o specs
  67         # (unusual: normally done by the Pipeline API)
  68         super().__init__(stage=self)
  69         self.p.i_data, self.n.o_data = self.new_specs(None)
  70         self.i, self.o = self.p.i_data, self.n.o_data
  71
  72     # next 3 functions are Stage API Compliance
  73     def setup(self, m, i):
  74         pass
  75
  76     def ispec(self):
  77         return FetchInput()
  78
  79     def ospec(self):
  80         return FetchOutput()
  81
  82     def elaborate(self, platform):
  83         """fetch FSM
  84
  85         this FSM performs fetch of raw instruction data, partial-decodes
  86         it 32-bit at a time to detect SVP64 prefixes, and will optionally
  87         read a 2nd 32-bit quantity if that occurs.
  88         """
  89         m = super().elaborate(platform)
  90
  91         dbg = self.dbg
  92         core = self.core
  93         pc = self.i.pc
  94         msr = self.i.msr
  95         svstate = self.svstate
  96         nia = self.nia
  97         fetch_pc_o_ready = self.p.o_ready
  98         fetch_pc_i_valid = self.p.i_valid
  99         fetch_insn_o_valid = self.n.o_valid
 100         fetch_insn_i_ready = self.n.i_ready
 101
 102         comb = m.d.comb
 103         sync = m.d.sync
 104         pdecode2 = self.pdecode2
 105         cur_state = self.cur_state
 106         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 107
 108         # also note instruction fetch failed
 109         if hasattr(core, "icache"):
 110             fetch_failed = core.icache.i_out.fetch_failed
 111             flush_needed = True
 112         else:
 113             fetch_failed = Const(0, 1)
 114             flush_needed = False
 115
 116         # set priv / virt mode on I-Cache, sigh
 117         if isinstance(self.imem, ICache):
 118             comb += self.imem.i_in.priv_mode.eq(~msr[MSR.PR])
 119             comb += self.imem.i_in.virt_mode.eq(msr[MSR.DR])
 120
 121         with m.FSM(name='fetch_fsm'):
 122
 123             # allow fetch to not run at startup due to I-Cache reset not
 124             # having time to settle.  power-on-reset holds dbg.core_stopped_i
 125             with m.State("PRE_IDLE"):
 126                 with m.If(~dbg.core_stopped_i & ~dbg.core_stop_o):
 127                     m.next = "IDLE"
 128
 129             # waiting (zzz)
 130             with m.State("IDLE"):
 131                 with m.If(~dbg.stopping_o & ~fetch_failed):
 132                     comb += fetch_pc_o_ready.eq(1)
 133                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 134                     # instruction allowed to go: start by reading the PC
 135                     # capture the PC and also drop it into Insn Memory
 136                     # we have joined a pair of combinatorial memory
 137                     # lookups together.  this is Generally Bad.
 138                     comb += self.imem.a_pc_i.eq(pc)
 139                     comb += self.imem.a_i_valid.eq(1)
 140                     comb += self.imem.f_i_valid.eq(1)
 141                     sync += cur_state.pc.eq(pc)
 142                     sync += cur_state.svstate.eq(svstate)  # and svstate
 143                     sync += cur_state.msr.eq(msr)  # and msr
 144
 145                     m.next = "INSN_READ"  # move to "wait for bus" phase
 146
 147             # dummy pause to find out why simulation is not keeping up
 148             with m.State("INSN_READ"):
 149                 if self.allow_overlap:
 150                     stopping = dbg.stopping_o
 151                 else:
 152                     stopping = Const(0)
 153                 with m.If(stopping):
 154                     # stopping: jump back to idle
 155                     m.next = "IDLE"
 156                 with m.Else():
 157                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 158                         # busy but not fetch failed: stay in wait-read
 159                         comb += self.imem.a_i_valid.eq(1)
 160                         comb += self.imem.f_i_valid.eq(1)
 161                     with m.Else():
 162                         # not busy (or fetch failed!): instruction fetched
 163                         # when fetch failed, the instruction gets ignored
 164                         # by the decoder
 165                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 166                         # not SVP64 - 32-bit only
 167                         sync += nia.eq(cur_state.pc + 4)
 168                         sync += dec_opcode_o.eq(insn)
 169                         m.next = "INSN_READY"
 170
 171             with m.State("INSN_READY"):
 172                 # hand over the instruction, to be decoded
 173                 comb += fetch_insn_o_valid.eq(1)
 174                 with m.If(fetch_insn_i_ready):
 175                     m.next = "IDLE"
 176
 177         # whatever was done above, over-ride it if core reset is held
 178         with m.If(self.core_rst):
 179             sync += nia.eq(0)
 180
 181         return m
 182
 183
 184 class TestIssuerInternalInOrder(TestIssuerBase):
 185     """TestIssuer - reads instructions from TestMemory and issues them
 186
 187     efficiency and speed is not the main goal here: functional correctness
 188     and code clarity is.  optimisations (which almost 100% interfere with
 189     easy understanding) come later.
 190     """
 191
 192     def issue_fsm(self, m, core, nia,
 193                   dbg, core_rst,
 194                   fetch_pc_o_ready, fetch_pc_i_valid,
 195                   fetch_insn_o_valid, fetch_insn_i_ready,
 196                   exec_insn_i_valid, exec_insn_o_ready,
 197                   exec_pc_o_valid, exec_pc_i_ready):
 198         """issue FSM
 199
 200         decode / issue FSM.  this interacts with the "fetch" FSM
 201         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 202         (outgoing). also interacts with the "execute" FSM
 203         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 204         (incoming).
 205         SVP64 RM prefixes have already been set up by the
 206         "fetch" phase, so execute is fairly straightforward.
 207         """
 208
 209         comb = m.d.comb
 210         sync = m.d.sync
 211         pdecode2 = self.pdecode2
 212         cur_state = self.cur_state
 213
 214         # temporaries
 215         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 216
 217         # note if an exception happened.  in a pipelined or OoO design
 218         # this needs to be accompanied by "shadowing" (or stalling)
 219         exc_happened = self.core.o.exc_happened
 220         # also note instruction fetch failed
 221         if hasattr(core, "icache"):
 222             fetch_failed = core.icache.i_out.fetch_failed
 223             flush_needed = True
 224             # set to fault in decoder
 225             # update (highest priority) instruction fault
 226             rising_fetch_failed = rising_edge(m, fetch_failed)
 227             with m.If(rising_fetch_failed):
 228                 sync += pdecode2.instr_fault.eq(1)
 229         else:
 230             fetch_failed = Const(0, 1)
 231             flush_needed = False
 232
 233         with m.FSM(name="issue_fsm"):
 234
 235             # sync with the "fetch" phase which is reading the instruction
 236             # at this point, there is no instruction running, that
 237             # could inadvertently update the PC.
 238             with m.State("ISSUE_START"):
 239                 # reset instruction fault
 240                 sync += pdecode2.instr_fault.eq(0)
 241                 # wait on "core stop" release, before next fetch
 242                 # need to do this here, in case we are in a VL==0 loop
 243                 with m.If(~dbg.core_stop_o & ~core_rst):
 244                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 245                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 246                         m.next = "INSN_WAIT"
 247                 with m.Else():
 248                     # tell core it's stopped, and acknowledge debug handshake
 249                     comb += dbg.core_stopped_i.eq(1)
 250
 251             # wait for an instruction to arrive from Fetch
 252             with m.State("INSN_WAIT"):
 253                 if self.allow_overlap:
 254                     stopping = dbg.stopping_o
 255                 else:
 256                     stopping = Const(0)
 257                 with m.If(stopping):
 258                     # stopping: jump back to idle
 259                     m.next = "ISSUE_START"
 260                     if flush_needed:
 261                         # request the icache to stop asserting "failed"
 262                         comb += core.icache.flush_in.eq(1)
 263                     # stop instruction fault
 264                     sync += pdecode2.instr_fault.eq(0)
 265                 with m.Else():
 266                     comb += fetch_insn_i_ready.eq(1)
 267                     with m.If(fetch_insn_o_valid):
 268                         # loop into ISSUE_START if it's a SVP64 instruction
 269                         # and VL == 0.  this because VL==0 is a for-loop
 270                         # from 0 to 0 i.e. always, always a NOP.
 271                         m.next = "DECODE_SV"  # skip predication
 272
 273             # after src/dst step have been updated, we are ready
 274             # to decode the instruction
 275             with m.State("DECODE_SV"):
 276                 # decode the instruction
 277                 with m.If(~fetch_failed):
 278                     sync += pdecode2.instr_fault.eq(0)
 279                 sync += core.i.e.eq(pdecode2.e)
 280                 sync += core.i.state.eq(cur_state)
 281                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 282                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 283                 # after decoding, reset any previous exception condition,
 284                 # allowing it to be set again during the next execution
 285                 sync += pdecode2.ldst_exc.eq(0)
 286
 287                 m.next = "INSN_EXECUTE"  # move to "execute"
 288
 289             # handshake with execution FSM, move to "wait" once acknowledged
 290             with m.State("INSN_EXECUTE"):
 291                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 292                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 293                     m.next = "EXECUTE_WAIT"
 294
 295             with m.State("EXECUTE_WAIT"):
 296                 # wait on "core stop" release, at instruction end
 297                 # need to do this here, in case we are in a VL>1 loop
 298                 with m.If(~dbg.core_stop_o & ~core_rst):
 299                     comb += exec_pc_i_ready.eq(1)
 300                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 301                     # the exception info needs to be blatted into
 302                     # pdecode.ldst_exc, and the instruction "re-run".
 303                     # when ldst_exc.happened is set, the PowerDecoder2
 304                     # reacts very differently: it re-writes the instruction
 305                     # with a "trap" (calls PowerDecoder2.trap()) which
 306                     # will *overwrite* whatever was requested and jump the
 307                     # PC to the exception address, as well as alter MSR.
 308                     # nothing else needs to be done other than to note
 309                     # the change of PC and MSR (and, later, SVSTATE)
 310                     with m.If(exc_happened):
 311                         mmu = core.fus.get_exc("mmu0")
 312                         ldst = core.fus.get_exc("ldst0")
 313                         if mmu is not None:
 314                             with m.If(fetch_failed):
 315                                 # instruction fetch: exception is from MMU
 316                                 # reset instr_fault (highest priority)
 317                                 sync += pdecode2.ldst_exc.eq(mmu)
 318                                 sync += pdecode2.instr_fault.eq(0)
 319                                 if flush_needed:
 320                                     # request icache to stop asserting "failed"
 321                                     comb += core.icache.flush_in.eq(1)
 322                         with m.If(~fetch_failed):
 323                             # otherwise assume it was a LDST exception
 324                             sync += pdecode2.ldst_exc.eq(ldst)
 325
 326                     with m.If(exec_pc_o_valid):
 327
 328                         # return directly to Decode if Execute generated an
 329                         # exception.
 330                         with m.If(pdecode2.ldst_exc.happened):
 331                             m.next = "DECODE_SV"
 332
 333                         # if MSR, PC or SVSTATE were changed by the previous
 334                         # instruction, go directly back to Fetch, without
 335                         # updating either MSR PC or SVSTATE
 336                         with m.Elif(self.msr_changed | self.pc_changed |
 337                                     self.sv_changed):
 338                             m.next = "ISSUE_START"
 339
 340                         with m.Else():
 341                             # before going back to fetch, update the PC state
 342                             # register with the NIA.
 343                             # ok here we are not reading the branch unit.
 344                             # TODO: this just blithely overwrites whatever
 345                             #       pipeline updated the PC
 346                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 347                             comb += self.state_w_pc.i_data.eq(nia)
 348                             m.next = "ISSUE_START"
 349
 350                 with m.Else():
 351                     comb += dbg.core_stopped_i.eq(1)
 352                     if flush_needed:
 353                         # request the icache to stop asserting "failed"
 354                         comb += core.icache.flush_in.eq(1)
 355                     # stop instruction fault
 356                     sync += pdecode2.instr_fault.eq(0)
 357                     if flush_needed:
 358                         # request the icache to stop asserting "failed"
 359                         comb += core.icache.flush_in.eq(1)
 360                     # stop instruction fault
 361                     sync += pdecode2.instr_fault.eq(0)
 362
 363     def execute_fsm(self, m, core,
 364                     exec_insn_i_valid, exec_insn_o_ready,
 365                     exec_pc_o_valid, exec_pc_i_ready):
 366         """execute FSM
 367
 368         execute FSM. this interacts with the "issue" FSM
 369         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 370         (outgoing). SVP64 RM prefixes have already been set up by the
 371         "issue" phase, so execute is fairly straightforward.
 372         """
 373
 374         comb = m.d.comb
 375         sync = m.d.sync
 376         pdecode2 = self.pdecode2
 377
 378         # temporaries
 379         core_busy_o = core.n.o_data.busy_o  # core is busy
 380         core_ivalid_i = core.p.i_valid              # instruction is valid
 381
 382         if hasattr(core, "icache"):
 383             fetch_failed = core.icache.i_out.fetch_failed
 384         else:
 385             fetch_failed = Const(0, 1)
 386
 387         with m.FSM(name="exec_fsm"):
 388
 389             # waiting for instruction bus (stays there until not busy)
 390             with m.State("INSN_START"):
 391                 comb += exec_insn_o_ready.eq(1)
 392                 with m.If(exec_insn_i_valid):
 393                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 394                     sync += self.sv_changed.eq(0)
 395                     sync += self.pc_changed.eq(0)
 396                     sync += self.msr_changed.eq(0)
 397                     with m.If(core.p.o_ready):  # only move if accepted
 398                         m.next = "INSN_ACTIVE"  # move to "wait completion"
 399
 400             # instruction started: must wait till it finishes
 401             with m.State("INSN_ACTIVE"):
 402                 # note changes to MSR, PC and SVSTATE
 403                 # XXX oops, really must monitor *all* State Regfile write
 404                 # ports looking for changes!
 405                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
 406                     sync += self.sv_changed.eq(1)
 407                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
 408                     sync += self.msr_changed.eq(1)
 409                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
 410                     sync += self.pc_changed.eq(1)
 411                 with m.If(~core_busy_o):  # instruction done!
 412                     comb += exec_pc_o_valid.eq(1)
 413                     with m.If(exec_pc_i_ready):
 414                         # when finished, indicate "done".
 415                         # however, if there was an exception, the instruction
 416                         # is *not* yet done.  this is an implementation
 417                         # detail: we choose to implement exceptions by
 418                         # taking the exception information from the LDST
 419                         # unit, putting that *back* into the PowerDecoder2,
 420                         # and *re-running the entire instruction*.
 421                         # if we erroneously indicate "done" here, it is as if
 422                         # there were *TWO* instructions:
 423                         # 1) the failed LDST 2) a TRAP.
 424                         with m.If(~pdecode2.ldst_exc.happened &
 425                                   ~fetch_failed):
 426                             comb += self.insn_done.eq(1)
 427                         m.next = "INSN_START"  # back to fetch
 428
 429     def elaborate(self, platform):
 430         m = super().elaborate(platform)
 431         # convenience
 432         comb, sync = m.d.comb, m.d.sync
 433         cur_state = self.cur_state
 434         pdecode2 = self.pdecode2
 435         dbg = self.dbg
 436         core = self.core
 437
 438         # set up peripherals and core
 439         core_rst = self.core_rst
 440
 441         # indicate to outside world if any FU is still executing
 442         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
 443
 444         # address of the next instruction, in the absence of a branch
 445         # depends on the instruction size
 446         nia = Signal(64)
 447
 448         # connect up debug signals
 449         with m.If(core.o.core_terminate_o):
 450             comb += dbg.terminate_i.eq(1)
 451
 452         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
 453         # issue, decode/execute, now joined by "Predicate fetch/calculate".
 454         # these are the handshake signals between each
 455
 456         # fetch FSM can run as soon as the PC is valid
 457         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
 458         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
 459
 460         # fetch FSM hands over the instruction to be decoded / issued
 461         fetch_insn_o_valid = Signal()
 462         fetch_insn_i_ready = Signal()
 463
 464         # issue FSM delivers the instruction to the be executed
 465         exec_insn_i_valid = Signal()
 466         exec_insn_o_ready = Signal()
 467
 468         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
 469         exec_pc_o_valid = Signal()
 470         exec_pc_i_ready = Signal()
 471
 472         # the FSMs here are perhaps unusual in that they detect conditions
 473         # then "hold" information, combinatorially, for the core
 474         # (as opposed to using sync - which would be on a clock's delay)
 475         # this includes the actual opcode, valid flags and so on.
 476
 477         # Fetch, then predicate fetch, then Issue, then Execute.
 478         # Issue is where the VL for-loop # lives.  the ready/valid
 479         # signalling is used to communicate between the four.
 480
 481         # set up Fetch FSM
 482         fetch = FetchFSM(self.allow_overlap,
 483                          self.imem, core_rst, pdecode2, cur_state,
 484                          dbg, core,
 485                          dbg.state.svstate, # combinatorially same
 486                          nia)
 487         m.submodules.fetch = fetch
 488         # connect up in/out data to existing Signals
 489         comb += fetch.p.i_data.pc.eq(dbg.state.pc)   # combinatorially same
 490         comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
 491         # and the ready/valid signalling
 492         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
 493         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
 494         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
 495         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
 496
 497         self.issue_fsm(m, core, nia,
 498                        dbg, core_rst,
 499                        fetch_pc_o_ready, fetch_pc_i_valid,
 500                        fetch_insn_o_valid, fetch_insn_i_ready,
 501                        exec_insn_i_valid, exec_insn_o_ready,
 502                        exec_pc_o_valid, exec_pc_i_ready)
 503
 504         self.execute_fsm(m, core,
 505                          exec_insn_i_valid, exec_insn_o_ready,
 506                          exec_pc_o_valid, exec_pc_i_ready)
 507
 508         return m
 509
 510
 511 # XXX TODO: update this
 512
 513 if __name__ == '__main__':
 514     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
 515              'spr': 1,
 516              'div': 1,
 517              'mul': 1,
 518              'shiftrot': 1
 519              }
 520     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
 521                          imem_ifacetype='bare_wb',
 522                          addr_wid=64,
 523                          mask_wid=8,
 524                          reg_wid=64,
 525                          units=units)
 526     dut = TestIssuer(pspec)
 527     vl = main(dut, ports=dut.ports(), name="test_issuer")
 528
 529     if len(sys.argv) == 1:
 530         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
 531         with open("test_issuer.il", "w") as f:
 532             f.write(vl)