3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
,
19 Mux
, Const
, Repl
, Cat
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from nmutil
.singlepipe
import ControlBase
25 from soc
.simple
.core_data
import FetchOutput
, FetchInput
27 from openpower
.consts
import MSR
28 from openpower
.decoder
.power_enums
import MicrOp
29 from openpower
.state
import CoreState
30 from soc
.regfile
.regfiles
import StateRegs
31 from soc
.config
.test
.test_loadstore
import TestMemPspec
32 from soc
.experiment
.icache
import ICache
34 from nmutil
.util
import rising_edge
36 from soc
.simple
.issuer
import TestIssuerBase
38 def get_insn(f_instr_o
, pc
):
39 if f_instr_o
.width
== 32:
42 # 64-bit: bit 2 of pc decides which word to select
43 return f_instr_o
.word_select(pc
[2], 32)
46 # Fetch Finite State Machine.
47 # WARNING: there are currently DriverConflicts but it's actually working.
48 # TODO, here: everything that is global in nature, information from the
49 # main TestIssuerInternal, needs to move to either ispec() or ospec().
50 # not only that: TestIssuerInternal.imem can entirely move into here
51 # because imem is only ever accessed inside the FetchFSM.
52 class FetchFSM(ControlBase
):
53 def __init__(self
, allow_overlap
, imem
, core_rst
,
55 dbg
, core
, svstate
, nia
):
56 self
.allow_overlap
= allow_overlap
58 self
.core_rst
= core_rst
59 self
.pdecode2
= pdecode2
60 self
.cur_state
= cur_state
63 self
.svstate
= svstate
66 # set up pipeline ControlBase and allocate i/o specs
67 # (unusual: normally done by the Pipeline API)
68 super().__init
__(stage
=self
)
69 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
70 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
72 # next 3 functions are Stage API Compliance
73 def setup(self
, m
, i
):
82 def elaborate(self
, platform
):
85 this FSM performs fetch of raw instruction data, partial-decodes
86 it 32-bit at a time to detect SVP64 prefixes, and will optionally
87 read a 2nd 32-bit quantity if that occurs.
89 m
= super().elaborate(platform
)
95 svstate
= self
.svstate
97 fetch_pc_o_ready
= self
.p
.o_ready
98 fetch_pc_i_valid
= self
.p
.i_valid
99 fetch_insn_o_valid
= self
.n
.o_valid
100 fetch_insn_i_ready
= self
.n
.i_ready
104 pdecode2
= self
.pdecode2
105 cur_state
= self
.cur_state
106 dec_opcode_o
= pdecode2
.dec
.raw_opcode_in
# raw opcode
108 # also note instruction fetch failed
109 if hasattr(core
, "icache"):
110 fetch_failed
= core
.icache
.i_out
.fetch_failed
113 fetch_failed
= Const(0, 1)
116 # set priv / virt mode on I-Cache, sigh
117 if isinstance(self
.imem
, ICache
):
118 comb
+= self
.imem
.i_in
.priv_mode
.eq(~msr
[MSR
.PR
])
119 comb
+= self
.imem
.i_in
.virt_mode
.eq(msr
[MSR
.DR
])
121 with m
.FSM(name
='fetch_fsm'):
123 # allow fetch to not run at startup due to I-Cache reset not
124 # having time to settle. power-on-reset holds dbg.core_stopped_i
125 with m
.State("PRE_IDLE"):
126 with m
.If(~dbg
.core_stopped_i
& ~dbg
.core_stop_o
):
130 with m
.State("IDLE"):
131 with m
.If(~dbg
.stopping_o
& ~fetch_failed
):
132 comb
+= fetch_pc_o_ready
.eq(1)
133 with m
.If(fetch_pc_i_valid
& ~fetch_failed
):
134 # instruction allowed to go: start by reading the PC
135 # capture the PC and also drop it into Insn Memory
136 # we have joined a pair of combinatorial memory
137 # lookups together. this is Generally Bad.
138 comb
+= self
.imem
.a_pc_i
.eq(pc
)
139 comb
+= self
.imem
.a_i_valid
.eq(1)
140 comb
+= self
.imem
.f_i_valid
.eq(1)
141 sync
+= cur_state
.pc
.eq(pc
)
142 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
143 sync
+= cur_state
.msr
.eq(msr
) # and msr
145 m
.next
= "INSN_READ" # move to "wait for bus" phase
147 # dummy pause to find out why simulation is not keeping up
148 with m
.State("INSN_READ"):
149 if self
.allow_overlap
:
150 stopping
= dbg
.stopping_o
154 # stopping: jump back to idle
157 with m
.If(self
.imem
.f_busy_o
& ~fetch_failed
): # zzz...
158 # busy but not fetch failed: stay in wait-read
159 comb
+= self
.imem
.a_i_valid
.eq(1)
160 comb
+= self
.imem
.f_i_valid
.eq(1)
162 # not busy (or fetch failed!): instruction fetched
163 # when fetch failed, the instruction gets ignored
165 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
166 # not SVP64 - 32-bit only
167 sync
+= nia
.eq(cur_state
.pc
+ 4)
168 sync
+= dec_opcode_o
.eq(insn
)
169 m
.next
= "INSN_READY"
171 with m
.State("INSN_READY"):
172 # hand over the instruction, to be decoded
173 comb
+= fetch_insn_o_valid
.eq(1)
174 with m
.If(fetch_insn_i_ready
):
177 # whatever was done above, over-ride it if core reset is held
178 with m
.If(self
.core_rst
):
184 class TestIssuerInternalInOrder(TestIssuerBase
):
185 """TestIssuer - reads instructions from TestMemory and issues them
187 efficiency and speed is not the main goal here: functional correctness
188 and code clarity is. optimisations (which almost 100% interfere with
189 easy understanding) come later.
192 def issue_fsm(self
, m
, core
, nia
,
194 fetch_pc_o_ready
, fetch_pc_i_valid
,
195 fetch_insn_o_valid
, fetch_insn_i_ready
,
196 exec_insn_i_valid
, exec_insn_o_ready
,
197 exec_pc_o_valid
, exec_pc_i_ready
):
200 decode / issue FSM. this interacts with the "fetch" FSM
201 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
202 (outgoing). also interacts with the "execute" FSM
203 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
205 SVP64 RM prefixes have already been set up by the
206 "fetch" phase, so execute is fairly straightforward.
211 pdecode2
= self
.pdecode2
212 cur_state
= self
.cur_state
215 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
217 # note if an exception happened. in a pipelined or OoO design
218 # this needs to be accompanied by "shadowing" (or stalling)
219 exc_happened
= self
.core
.o
.exc_happened
220 # also note instruction fetch failed
221 if hasattr(core
, "icache"):
222 fetch_failed
= core
.icache
.i_out
.fetch_failed
224 # set to fault in decoder
225 # update (highest priority) instruction fault
226 rising_fetch_failed
= rising_edge(m
, fetch_failed
)
227 with m
.If(rising_fetch_failed
):
228 sync
+= pdecode2
.instr_fault
.eq(1)
230 fetch_failed
= Const(0, 1)
233 with m
.FSM(name
="issue_fsm"):
235 # sync with the "fetch" phase which is reading the instruction
236 # at this point, there is no instruction running, that
237 # could inadvertently update the PC.
238 with m
.State("ISSUE_START"):
239 # reset instruction fault
240 sync
+= pdecode2
.instr_fault
.eq(0)
241 # wait on "core stop" release, before next fetch
242 # need to do this here, in case we are in a VL==0 loop
243 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
244 comb
+= fetch_pc_i_valid
.eq(1) # tell fetch to start
245 with m
.If(fetch_pc_o_ready
): # fetch acknowledged us
248 # tell core it's stopped, and acknowledge debug handshake
249 comb
+= dbg
.core_stopped_i
.eq(1)
251 # wait for an instruction to arrive from Fetch
252 with m
.State("INSN_WAIT"):
253 if self
.allow_overlap
:
254 stopping
= dbg
.stopping_o
258 # stopping: jump back to idle
259 m
.next
= "ISSUE_START"
261 # request the icache to stop asserting "failed"
262 comb
+= core
.icache
.flush_in
.eq(1)
263 # stop instruction fault
264 sync
+= pdecode2
.instr_fault
.eq(0)
266 comb
+= fetch_insn_i_ready
.eq(1)
267 with m
.If(fetch_insn_o_valid
):
268 # loop into ISSUE_START if it's a SVP64 instruction
269 # and VL == 0. this because VL==0 is a for-loop
270 # from 0 to 0 i.e. always, always a NOP.
271 m
.next
= "DECODE_SV" # skip predication
273 # after src/dst step have been updated, we are ready
274 # to decode the instruction
275 with m
.State("DECODE_SV"):
276 # decode the instruction
277 with m
.If(~fetch_failed
):
278 sync
+= pdecode2
.instr_fault
.eq(0)
279 sync
+= core
.i
.e
.eq(pdecode2
.e
)
280 sync
+= core
.i
.state
.eq(cur_state
)
281 sync
+= core
.i
.raw_insn_i
.eq(dec_opcode_i
)
282 sync
+= core
.i
.bigendian_i
.eq(self
.core_bigendian_i
)
283 # after decoding, reset any previous exception condition,
284 # allowing it to be set again during the next execution
285 sync
+= pdecode2
.ldst_exc
.eq(0)
287 m
.next
= "INSN_EXECUTE" # move to "execute"
289 # handshake with execution FSM, move to "wait" once acknowledged
290 with m
.State("INSN_EXECUTE"):
291 comb
+= exec_insn_i_valid
.eq(1) # trigger execute
292 with m
.If(exec_insn_o_ready
): # execute acknowledged us
293 m
.next
= "EXECUTE_WAIT"
295 with m
.State("EXECUTE_WAIT"):
296 # wait on "core stop" release, at instruction end
297 # need to do this here, in case we are in a VL>1 loop
298 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
299 comb
+= exec_pc_i_ready
.eq(1)
300 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
301 # the exception info needs to be blatted into
302 # pdecode.ldst_exc, and the instruction "re-run".
303 # when ldst_exc.happened is set, the PowerDecoder2
304 # reacts very differently: it re-writes the instruction
305 # with a "trap" (calls PowerDecoder2.trap()) which
306 # will *overwrite* whatever was requested and jump the
307 # PC to the exception address, as well as alter MSR.
308 # nothing else needs to be done other than to note
309 # the change of PC and MSR (and, later, SVSTATE)
310 with m
.If(exc_happened
):
311 mmu
= core
.fus
.get_exc("mmu0")
312 ldst
= core
.fus
.get_exc("ldst0")
314 with m
.If(fetch_failed
):
315 # instruction fetch: exception is from MMU
316 # reset instr_fault (highest priority)
317 sync
+= pdecode2
.ldst_exc
.eq(mmu
)
318 sync
+= pdecode2
.instr_fault
.eq(0)
320 # request icache to stop asserting "failed"
321 comb
+= core
.icache
.flush_in
.eq(1)
322 with m
.If(~fetch_failed
):
323 # otherwise assume it was a LDST exception
324 sync
+= pdecode2
.ldst_exc
.eq(ldst
)
326 with m
.If(exec_pc_o_valid
):
328 # return directly to Decode if Execute generated an
330 with m
.If(pdecode2
.ldst_exc
.happened
):
333 # if MSR, PC or SVSTATE were changed by the previous
334 # instruction, go directly back to Fetch, without
335 # updating either MSR PC or SVSTATE
336 with m
.Elif(self
.msr_changed | self
.pc_changed |
338 m
.next
= "ISSUE_START"
341 # before going back to fetch, update the PC state
342 # register with the NIA.
343 # ok here we are not reading the branch unit.
344 # TODO: this just blithely overwrites whatever
345 # pipeline updated the PC
346 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
347 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
348 m
.next
= "ISSUE_START"
351 comb
+= dbg
.core_stopped_i
.eq(1)
353 # request the icache to stop asserting "failed"
354 comb
+= core
.icache
.flush_in
.eq(1)
355 # stop instruction fault
356 sync
+= pdecode2
.instr_fault
.eq(0)
358 # request the icache to stop asserting "failed"
359 comb
+= core
.icache
.flush_in
.eq(1)
360 # stop instruction fault
361 sync
+= pdecode2
.instr_fault
.eq(0)
363 def execute_fsm(self
, m
, core
,
364 exec_insn_i_valid
, exec_insn_o_ready
,
365 exec_pc_o_valid
, exec_pc_i_ready
):
368 execute FSM. this interacts with the "issue" FSM
369 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
370 (outgoing). SVP64 RM prefixes have already been set up by the
371 "issue" phase, so execute is fairly straightforward.
376 pdecode2
= self
.pdecode2
379 core_busy_o
= core
.n
.o_data
.busy_o
# core is busy
380 core_ivalid_i
= core
.p
.i_valid
# instruction is valid
382 if hasattr(core
, "icache"):
383 fetch_failed
= core
.icache
.i_out
.fetch_failed
385 fetch_failed
= Const(0, 1)
387 with m
.FSM(name
="exec_fsm"):
389 # waiting for instruction bus (stays there until not busy)
390 with m
.State("INSN_START"):
391 comb
+= exec_insn_o_ready
.eq(1)
392 with m
.If(exec_insn_i_valid
):
393 comb
+= core_ivalid_i
.eq(1) # instruction is valid/issued
394 sync
+= self
.sv_changed
.eq(0)
395 sync
+= self
.pc_changed
.eq(0)
396 sync
+= self
.msr_changed
.eq(0)
397 with m
.If(core
.p
.o_ready
): # only move if accepted
398 m
.next
= "INSN_ACTIVE" # move to "wait completion"
400 # instruction started: must wait till it finishes
401 with m
.State("INSN_ACTIVE"):
402 # note changes to MSR, PC and SVSTATE
403 # XXX oops, really must monitor *all* State Regfile write
404 # ports looking for changes!
405 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.SVSTATE
)):
406 sync
+= self
.sv_changed
.eq(1)
407 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.MSR
)):
408 sync
+= self
.msr_changed
.eq(1)
409 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.PC
)):
410 sync
+= self
.pc_changed
.eq(1)
411 with m
.If(~core_busy_o
): # instruction done!
412 comb
+= exec_pc_o_valid
.eq(1)
413 with m
.If(exec_pc_i_ready
):
414 # when finished, indicate "done".
415 # however, if there was an exception, the instruction
416 # is *not* yet done. this is an implementation
417 # detail: we choose to implement exceptions by
418 # taking the exception information from the LDST
419 # unit, putting that *back* into the PowerDecoder2,
420 # and *re-running the entire instruction*.
421 # if we erroneously indicate "done" here, it is as if
422 # there were *TWO* instructions:
423 # 1) the failed LDST 2) a TRAP.
424 with m
.If(~pdecode2
.ldst_exc
.happened
&
426 comb
+= self
.insn_done
.eq(1)
427 m
.next
= "INSN_START" # back to fetch
429 def elaborate(self
, platform
):
430 m
= super().elaborate(platform
)
432 comb
, sync
= m
.d
.comb
, m
.d
.sync
433 cur_state
= self
.cur_state
434 pdecode2
= self
.pdecode2
438 # set up peripherals and core
439 core_rst
= self
.core_rst
441 # indicate to outside world if any FU is still executing
442 comb
+= self
.any_busy
.eq(core
.n
.o_data
.any_busy_o
) # any FU executing
444 # address of the next instruction, in the absence of a branch
445 # depends on the instruction size
448 # connect up debug signals
449 with m
.If(core
.o
.core_terminate_o
):
450 comb
+= dbg
.terminate_i
.eq(1)
452 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
453 # issue, decode/execute, now joined by "Predicate fetch/calculate".
454 # these are the handshake signals between each
456 # fetch FSM can run as soon as the PC is valid
457 fetch_pc_i_valid
= Signal() # Execute tells Fetch "start next read"
458 fetch_pc_o_ready
= Signal() # Fetch Tells SVSTATE "proceed"
460 # fetch FSM hands over the instruction to be decoded / issued
461 fetch_insn_o_valid
= Signal()
462 fetch_insn_i_ready
= Signal()
464 # issue FSM delivers the instruction to the be executed
465 exec_insn_i_valid
= Signal()
466 exec_insn_o_ready
= Signal()
468 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
469 exec_pc_o_valid
= Signal()
470 exec_pc_i_ready
= Signal()
472 # the FSMs here are perhaps unusual in that they detect conditions
473 # then "hold" information, combinatorially, for the core
474 # (as opposed to using sync - which would be on a clock's delay)
475 # this includes the actual opcode, valid flags and so on.
477 # Fetch, then predicate fetch, then Issue, then Execute.
478 # Issue is where the VL for-loop # lives. the ready/valid
479 # signalling is used to communicate between the four.
482 fetch
= FetchFSM(self
.allow_overlap
,
483 self
.imem
, core_rst
, pdecode2
, cur_state
,
485 dbg
.state
.svstate
, # combinatorially same
487 m
.submodules
.fetch
= fetch
488 # connect up in/out data to existing Signals
489 comb
+= fetch
.p
.i_data
.pc
.eq(dbg
.state
.pc
) # combinatorially same
490 comb
+= fetch
.p
.i_data
.msr
.eq(dbg
.state
.msr
) # combinatorially same
491 # and the ready/valid signalling
492 comb
+= fetch_pc_o_ready
.eq(fetch
.p
.o_ready
)
493 comb
+= fetch
.p
.i_valid
.eq(fetch_pc_i_valid
)
494 comb
+= fetch_insn_o_valid
.eq(fetch
.n
.o_valid
)
495 comb
+= fetch
.n
.i_ready
.eq(fetch_insn_i_ready
)
497 self
.issue_fsm(m
, core
, nia
,
499 fetch_pc_o_ready
, fetch_pc_i_valid
,
500 fetch_insn_o_valid
, fetch_insn_i_ready
,
501 exec_insn_i_valid
, exec_insn_o_ready
,
502 exec_pc_o_valid
, exec_pc_i_ready
)
504 self
.execute_fsm(m
, core
,
505 exec_insn_i_valid
, exec_insn_o_ready
,
506 exec_pc_o_valid
, exec_pc_i_ready
)
511 # XXX TODO: update this
513 if __name__
== '__main__':
514 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
520 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
521 imem_ifacetype
='bare_wb',
526 dut
= TestIssuer(pspec
)
527 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
529 if len(sys
.argv
) == 1:
530 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
531 with
open("test_issuer.il", "w") as f
: