8174fc5c0d067ee719744aeca721bf7df73a8d8c
3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
,
19 Mux
, Const
, Repl
, Cat
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from nmutil
.singlepipe
import ControlBase
25 from soc
.simple
.core_data
import FetchOutput
, FetchInput
27 from openpower
.consts
import MSR
28 from openpower
.decoder
.power_enums
import MicrOp
29 from openpower
.state
import CoreState
30 from soc
.regfile
.regfiles
import StateRegs
31 from soc
.config
.test
.test_loadstore
import TestMemPspec
32 from soc
.experiment
.icache
import ICache
34 from nmutil
.util
import rising_edge
36 from soc
.simple
.issuer
import TestIssuerBase
38 def get_insn(f_instr_o
, pc
):
39 if f_instr_o
.width
== 32:
42 # 64-bit: bit 2 of pc decides which word to select
43 return f_instr_o
.word_select(pc
[2], 32)
46 # Fetch Finite State Machine.
47 # WARNING: there are currently DriverConflicts but it's actually working.
48 # TODO, here: everything that is global in nature, information from the
49 # main TestIssuerInternal, needs to move to either ispec() or ospec().
50 # not only that: TestIssuerInternal.imem can entirely move into here
51 # because imem is only ever accessed inside the FetchFSM.
52 class FetchFSM(ControlBase
):
53 def __init__(self
, allow_overlap
, imem
, core_rst
,
55 dbg
, core
, svstate
, nia
):
56 self
.allow_overlap
= allow_overlap
58 self
.core_rst
= core_rst
59 self
.pdecode2
= pdecode2
60 self
.cur_state
= cur_state
63 self
.svstate
= svstate
66 # set up pipeline ControlBase and allocate i/o specs
67 # (unusual: normally done by the Pipeline API)
68 super().__init
__(stage
=self
)
69 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
70 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
72 # next 3 functions are Stage API Compliance
73 def setup(self
, m
, i
):
82 def elaborate(self
, platform
):
85 this FSM performs fetch of raw instruction data, partial-decodes
86 it 32-bit at a time to detect SVP64 prefixes, and will optionally
87 read a 2nd 32-bit quantity if that occurs.
89 m
= super().elaborate(platform
)
95 svstate
= self
.svstate
97 fetch_pc_o_ready
= self
.p
.o_ready
98 fetch_pc_i_valid
= self
.p
.i_valid
99 fetch_insn_o_valid
= self
.n
.o_valid
100 fetch_insn_i_ready
= self
.n
.i_ready
104 pdecode2
= self
.pdecode2
105 cur_state
= self
.cur_state
106 dec_opcode_o
= pdecode2
.dec
.raw_opcode_in
# raw opcode
108 # also note instruction fetch failed
109 if hasattr(core
, "icache"):
110 fetch_failed
= core
.icache
.i_out
.fetch_failed
113 fetch_failed
= Const(0, 1)
116 # set priv / virt mode on I-Cache, sigh
117 if isinstance(self
.imem
, ICache
):
118 comb
+= self
.imem
.i_in
.priv_mode
.eq(~msr
[MSR
.PR
])
119 comb
+= self
.imem
.i_in
.virt_mode
.eq(msr
[MSR
.DR
])
121 with m
.FSM(name
='fetch_fsm'):
124 with m
.State("IDLE"):
125 with m
.If(~dbg
.stopping_o
& ~fetch_failed
):
126 comb
+= fetch_pc_o_ready
.eq(1)
127 with m
.If(fetch_pc_i_valid
& ~fetch_failed
):
128 # instruction allowed to go: start by reading the PC
129 # capture the PC and also drop it into Insn Memory
130 # we have joined a pair of combinatorial memory
131 # lookups together. this is Generally Bad.
132 comb
+= self
.imem
.a_pc_i
.eq(pc
)
133 comb
+= self
.imem
.a_i_valid
.eq(1)
134 comb
+= self
.imem
.f_i_valid
.eq(1)
135 sync
+= cur_state
.pc
.eq(pc
)
136 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
137 sync
+= cur_state
.msr
.eq(msr
) # and msr
139 m
.next
= "INSN_READ" # move to "wait for bus" phase
141 # dummy pause to find out why simulation is not keeping up
142 with m
.State("INSN_READ"):
143 if self
.allow_overlap
:
144 stopping
= dbg
.stopping_o
148 # stopping: jump back to idle
151 with m
.If(self
.imem
.f_busy_o
& ~fetch_failed
): # zzz...
152 # busy but not fetch failed: stay in wait-read
153 comb
+= self
.imem
.a_i_valid
.eq(1)
154 comb
+= self
.imem
.f_i_valid
.eq(1)
156 # not busy (or fetch failed!): instruction fetched
157 # when fetch failed, the instruction gets ignored
159 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
160 # not SVP64 - 32-bit only
161 sync
+= nia
.eq(cur_state
.pc
+ 4)
162 sync
+= dec_opcode_o
.eq(insn
)
163 m
.next
= "INSN_READY"
165 with m
.State("INSN_READY"):
166 # hand over the instruction, to be decoded
167 comb
+= fetch_insn_o_valid
.eq(1)
168 with m
.If(fetch_insn_i_ready
):
171 # whatever was done above, over-ride it if core reset is held
172 with m
.If(self
.core_rst
):
178 class TestIssuerInternalInOrder(TestIssuerBase
):
179 """TestIssuer - reads instructions from TestMemory and issues them
181 efficiency and speed is not the main goal here: functional correctness
182 and code clarity is. optimisations (which almost 100% interfere with
183 easy understanding) come later.
186 def issue_fsm(self
, m
, core
, nia
,
188 fetch_pc_o_ready
, fetch_pc_i_valid
,
189 fetch_insn_o_valid
, fetch_insn_i_ready
,
190 exec_insn_i_valid
, exec_insn_o_ready
,
191 exec_pc_o_valid
, exec_pc_i_ready
):
194 decode / issue FSM. this interacts with the "fetch" FSM
195 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
196 (outgoing). also interacts with the "execute" FSM
197 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
199 SVP64 RM prefixes have already been set up by the
200 "fetch" phase, so execute is fairly straightforward.
205 pdecode2
= self
.pdecode2
206 cur_state
= self
.cur_state
209 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
211 # note if an exception happened. in a pipelined or OoO design
212 # this needs to be accompanied by "shadowing" (or stalling)
213 exc_happened
= self
.core
.o
.exc_happened
214 # also note instruction fetch failed
215 if hasattr(core
, "icache"):
216 fetch_failed
= core
.icache
.i_out
.fetch_failed
218 # set to fault in decoder
219 # update (highest priority) instruction fault
220 rising_fetch_failed
= rising_edge(m
, fetch_failed
)
221 with m
.If(rising_fetch_failed
):
222 sync
+= pdecode2
.instr_fault
.eq(1)
224 fetch_failed
= Const(0, 1)
227 with m
.FSM(name
="issue_fsm"):
229 # sync with the "fetch" phase which is reading the instruction
230 # at this point, there is no instruction running, that
231 # could inadvertently update the PC.
232 with m
.State("ISSUE_START"):
233 # reset instruction fault
234 sync
+= pdecode2
.instr_fault
.eq(0)
235 # wait on "core stop" release, before next fetch
236 # need to do this here, in case we are in a VL==0 loop
237 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
238 comb
+= fetch_pc_i_valid
.eq(1) # tell fetch to start
239 with m
.If(fetch_pc_o_ready
): # fetch acknowledged us
242 # tell core it's stopped, and acknowledge debug handshake
243 comb
+= dbg
.core_stopped_i
.eq(1)
245 # wait for an instruction to arrive from Fetch
246 with m
.State("INSN_WAIT"):
247 if self
.allow_overlap
:
248 stopping
= dbg
.stopping_o
252 # stopping: jump back to idle
253 m
.next
= "ISSUE_START"
255 # request the icache to stop asserting "failed"
256 comb
+= core
.icache
.flush_in
.eq(1)
257 # stop instruction fault
258 sync
+= pdecode2
.instr_fault
.eq(0)
260 comb
+= fetch_insn_i_ready
.eq(1)
261 with m
.If(fetch_insn_o_valid
):
262 # loop into ISSUE_START if it's a SVP64 instruction
263 # and VL == 0. this because VL==0 is a for-loop
264 # from 0 to 0 i.e. always, always a NOP.
265 m
.next
= "DECODE_SV" # skip predication
267 # after src/dst step have been updated, we are ready
268 # to decode the instruction
269 with m
.State("DECODE_SV"):
270 # decode the instruction
271 with m
.If(~fetch_failed
):
272 sync
+= pdecode2
.instr_fault
.eq(0)
273 sync
+= core
.i
.e
.eq(pdecode2
.e
)
274 sync
+= core
.i
.state
.eq(cur_state
)
275 sync
+= core
.i
.raw_insn_i
.eq(dec_opcode_i
)
276 sync
+= core
.i
.bigendian_i
.eq(self
.core_bigendian_i
)
277 # after decoding, reset any previous exception condition,
278 # allowing it to be set again during the next execution
279 sync
+= pdecode2
.ldst_exc
.eq(0)
281 m
.next
= "INSN_EXECUTE" # move to "execute"
283 # handshake with execution FSM, move to "wait" once acknowledged
284 with m
.State("INSN_EXECUTE"):
285 comb
+= exec_insn_i_valid
.eq(1) # trigger execute
286 with m
.If(exec_insn_o_ready
): # execute acknowledged us
287 m
.next
= "EXECUTE_WAIT"
289 with m
.State("EXECUTE_WAIT"):
290 # wait on "core stop" release, at instruction end
291 # need to do this here, in case we are in a VL>1 loop
292 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
293 comb
+= exec_pc_i_ready
.eq(1)
294 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
295 # the exception info needs to be blatted into
296 # pdecode.ldst_exc, and the instruction "re-run".
297 # when ldst_exc.happened is set, the PowerDecoder2
298 # reacts very differently: it re-writes the instruction
299 # with a "trap" (calls PowerDecoder2.trap()) which
300 # will *overwrite* whatever was requested and jump the
301 # PC to the exception address, as well as alter MSR.
302 # nothing else needs to be done other than to note
303 # the change of PC and MSR (and, later, SVSTATE)
304 with m
.If(exc_happened
):
305 mmu
= core
.fus
.get_exc("mmu0")
306 ldst
= core
.fus
.get_exc("ldst0")
308 with m
.If(fetch_failed
):
309 # instruction fetch: exception is from MMU
310 # reset instr_fault (highest priority)
311 sync
+= pdecode2
.ldst_exc
.eq(mmu
)
312 sync
+= pdecode2
.instr_fault
.eq(0)
314 # request icache to stop asserting "failed"
315 comb
+= core
.icache
.flush_in
.eq(1)
316 with m
.If(~fetch_failed
):
317 # otherwise assume it was a LDST exception
318 sync
+= pdecode2
.ldst_exc
.eq(ldst
)
320 with m
.If(exec_pc_o_valid
):
322 # return directly to Decode if Execute generated an
324 with m
.If(pdecode2
.ldst_exc
.happened
):
327 # if MSR, PC or SVSTATE were changed by the previous
328 # instruction, go directly back to Fetch, without
329 # updating either MSR PC or SVSTATE
330 with m
.Elif(self
.msr_changed | self
.pc_changed |
332 m
.next
= "ISSUE_START"
335 # before going back to fetch, update the PC state
336 # register with the NIA.
337 # ok here we are not reading the branch unit.
338 # TODO: this just blithely overwrites whatever
339 # pipeline updated the PC
340 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
341 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
342 m
.next
= "ISSUE_START"
345 comb
+= dbg
.core_stopped_i
.eq(1)
347 # request the icache to stop asserting "failed"
348 comb
+= core
.icache
.flush_in
.eq(1)
349 # stop instruction fault
350 sync
+= pdecode2
.instr_fault
.eq(0)
352 # request the icache to stop asserting "failed"
353 comb
+= core
.icache
.flush_in
.eq(1)
354 # stop instruction fault
355 sync
+= pdecode2
.instr_fault
.eq(0)
357 def execute_fsm(self
, m
, core
,
358 exec_insn_i_valid
, exec_insn_o_ready
,
359 exec_pc_o_valid
, exec_pc_i_ready
):
362 execute FSM. this interacts with the "issue" FSM
363 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
364 (outgoing). SVP64 RM prefixes have already been set up by the
365 "issue" phase, so execute is fairly straightforward.
370 pdecode2
= self
.pdecode2
373 core_busy_o
= core
.n
.o_data
.busy_o
# core is busy
374 core_ivalid_i
= core
.p
.i_valid
# instruction is valid
376 if hasattr(core
, "icache"):
377 fetch_failed
= core
.icache
.i_out
.fetch_failed
379 fetch_failed
= Const(0, 1)
381 with m
.FSM(name
="exec_fsm"):
383 # waiting for instruction bus (stays there until not busy)
384 with m
.State("INSN_START"):
385 comb
+= exec_insn_o_ready
.eq(1)
386 with m
.If(exec_insn_i_valid
):
387 comb
+= core_ivalid_i
.eq(1) # instruction is valid/issued
388 sync
+= self
.sv_changed
.eq(0)
389 sync
+= self
.pc_changed
.eq(0)
390 sync
+= self
.msr_changed
.eq(0)
391 with m
.If(core
.p
.o_ready
): # only move if accepted
392 m
.next
= "INSN_ACTIVE" # move to "wait completion"
394 # instruction started: must wait till it finishes
395 with m
.State("INSN_ACTIVE"):
396 # note changes to MSR, PC and SVSTATE
397 # XXX oops, really must monitor *all* State Regfile write
398 # ports looking for changes!
399 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.SVSTATE
)):
400 sync
+= self
.sv_changed
.eq(1)
401 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.MSR
)):
402 sync
+= self
.msr_changed
.eq(1)
403 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.PC
)):
404 sync
+= self
.pc_changed
.eq(1)
405 with m
.If(~core_busy_o
): # instruction done!
406 comb
+= exec_pc_o_valid
.eq(1)
407 with m
.If(exec_pc_i_ready
):
408 # when finished, indicate "done".
409 # however, if there was an exception, the instruction
410 # is *not* yet done. this is an implementation
411 # detail: we choose to implement exceptions by
412 # taking the exception information from the LDST
413 # unit, putting that *back* into the PowerDecoder2,
414 # and *re-running the entire instruction*.
415 # if we erroneously indicate "done" here, it is as if
416 # there were *TWO* instructions:
417 # 1) the failed LDST 2) a TRAP.
418 with m
.If(~pdecode2
.ldst_exc
.happened
&
420 comb
+= self
.insn_done
.eq(1)
421 m
.next
= "INSN_START" # back to fetch
423 def elaborate(self
, platform
):
424 m
= super().elaborate(platform
)
426 comb
, sync
= m
.d
.comb
, m
.d
.sync
427 cur_state
= self
.cur_state
428 pdecode2
= self
.pdecode2
432 # set up peripherals and core
433 core_rst
= self
.core_rst
435 # indicate to outside world if any FU is still executing
436 comb
+= self
.any_busy
.eq(core
.n
.o_data
.any_busy_o
) # any FU executing
438 # address of the next instruction, in the absence of a branch
439 # depends on the instruction size
442 # connect up debug signals
443 with m
.If(core
.o
.core_terminate_o
):
444 comb
+= dbg
.terminate_i
.eq(1)
446 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
447 # issue, decode/execute, now joined by "Predicate fetch/calculate".
448 # these are the handshake signals between each
450 # fetch FSM can run as soon as the PC is valid
451 fetch_pc_i_valid
= Signal() # Execute tells Fetch "start next read"
452 fetch_pc_o_ready
= Signal() # Fetch Tells SVSTATE "proceed"
454 # fetch FSM hands over the instruction to be decoded / issued
455 fetch_insn_o_valid
= Signal()
456 fetch_insn_i_ready
= Signal()
458 # issue FSM delivers the instruction to the be executed
459 exec_insn_i_valid
= Signal()
460 exec_insn_o_ready
= Signal()
462 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
463 exec_pc_o_valid
= Signal()
464 exec_pc_i_ready
= Signal()
466 # the FSMs here are perhaps unusual in that they detect conditions
467 # then "hold" information, combinatorially, for the core
468 # (as opposed to using sync - which would be on a clock's delay)
469 # this includes the actual opcode, valid flags and so on.
471 # Fetch, then predicate fetch, then Issue, then Execute.
472 # Issue is where the VL for-loop # lives. the ready/valid
473 # signalling is used to communicate between the four.
476 fetch
= FetchFSM(self
.allow_overlap
,
477 self
.imem
, core_rst
, pdecode2
, cur_state
,
479 dbg
.state
.svstate
, # combinatorially same
481 m
.submodules
.fetch
= fetch
482 # connect up in/out data to existing Signals
483 comb
+= fetch
.p
.i_data
.pc
.eq(dbg
.state
.pc
) # combinatorially same
484 comb
+= fetch
.p
.i_data
.msr
.eq(dbg
.state
.msr
) # combinatorially same
485 # and the ready/valid signalling
486 comb
+= fetch_pc_o_ready
.eq(fetch
.p
.o_ready
)
487 comb
+= fetch
.p
.i_valid
.eq(fetch_pc_i_valid
)
488 comb
+= fetch_insn_o_valid
.eq(fetch
.n
.o_valid
)
489 comb
+= fetch
.n
.i_ready
.eq(fetch_insn_i_ready
)
491 self
.issue_fsm(m
, core
, nia
,
493 fetch_pc_o_ready
, fetch_pc_i_valid
,
494 fetch_insn_o_valid
, fetch_insn_i_ready
,
495 exec_insn_i_valid
, exec_insn_o_ready
,
496 exec_pc_o_valid
, exec_pc_i_ready
)
498 self
.execute_fsm(m
, core
,
499 exec_insn_i_valid
, exec_insn_o_ready
,
500 exec_pc_o_valid
, exec_pc_i_ready
)
505 # XXX TODO: update this
507 if __name__
== '__main__':
508 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
514 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
515 imem_ifacetype
='bare_wb',
520 dut
= TestIssuer(pspec
)
521 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
523 if len(sys
.argv
) == 1:
524 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
525 with
open("test_issuer.il", "w") as f
: