Copy the startup delay from issuer.py to inorder.py
[soc.git] / src / soc / simple / inorder.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal,
19 Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput, FetchInput
26
27 from openpower.consts import MSR
28 from openpower.decoder.power_enums import MicrOp
29 from openpower.state import CoreState
30 from soc.regfile.regfiles import StateRegs
31 from soc.config.test.test_loadstore import TestMemPspec
32 from soc.experiment.icache import ICache
33
34 from nmutil.util import rising_edge
35
36 from soc.simple.issuer import TestIssuerBase
37
38 def get_insn(f_instr_o, pc):
39 if f_instr_o.width == 32:
40 return f_instr_o
41 else:
42 # 64-bit: bit 2 of pc decides which word to select
43 return f_instr_o.word_select(pc[2], 32)
44
45
46 # Fetch Finite State Machine.
47 # WARNING: there are currently DriverConflicts but it's actually working.
48 # TODO, here: everything that is global in nature, information from the
49 # main TestIssuerInternal, needs to move to either ispec() or ospec().
50 # not only that: TestIssuerInternal.imem can entirely move into here
51 # because imem is only ever accessed inside the FetchFSM.
52 class FetchFSM(ControlBase):
53 def __init__(self, allow_overlap, imem, core_rst,
54 pdecode2, cur_state,
55 dbg, core, svstate, nia):
56 self.allow_overlap = allow_overlap
57 self.imem = imem
58 self.core_rst = core_rst
59 self.pdecode2 = pdecode2
60 self.cur_state = cur_state
61 self.dbg = dbg
62 self.core = core
63 self.svstate = svstate
64 self.nia = nia
65
66 # set up pipeline ControlBase and allocate i/o specs
67 # (unusual: normally done by the Pipeline API)
68 super().__init__(stage=self)
69 self.p.i_data, self.n.o_data = self.new_specs(None)
70 self.i, self.o = self.p.i_data, self.n.o_data
71
72 # next 3 functions are Stage API Compliance
73 def setup(self, m, i):
74 pass
75
76 def ispec(self):
77 return FetchInput()
78
79 def ospec(self):
80 return FetchOutput()
81
82 def elaborate(self, platform):
83 """fetch FSM
84
85 this FSM performs fetch of raw instruction data, partial-decodes
86 it 32-bit at a time to detect SVP64 prefixes, and will optionally
87 read a 2nd 32-bit quantity if that occurs.
88 """
89 m = super().elaborate(platform)
90
91 dbg = self.dbg
92 core = self.core
93 pc = self.i.pc
94 msr = self.i.msr
95 svstate = self.svstate
96 nia = self.nia
97 fetch_pc_o_ready = self.p.o_ready
98 fetch_pc_i_valid = self.p.i_valid
99 fetch_insn_o_valid = self.n.o_valid
100 fetch_insn_i_ready = self.n.i_ready
101
102 comb = m.d.comb
103 sync = m.d.sync
104 pdecode2 = self.pdecode2
105 cur_state = self.cur_state
106 dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
107
108 # also note instruction fetch failed
109 if hasattr(core, "icache"):
110 fetch_failed = core.icache.i_out.fetch_failed
111 flush_needed = True
112 else:
113 fetch_failed = Const(0, 1)
114 flush_needed = False
115
116 # set priv / virt mode on I-Cache, sigh
117 if isinstance(self.imem, ICache):
118 comb += self.imem.i_in.priv_mode.eq(~msr[MSR.PR])
119 comb += self.imem.i_in.virt_mode.eq(msr[MSR.DR])
120
121 with m.FSM(name='fetch_fsm'):
122
123 # allow fetch to not run at startup due to I-Cache reset not
124 # having time to settle. power-on-reset holds dbg.core_stopped_i
125 with m.State("PRE_IDLE"):
126 with m.If(~dbg.core_stopped_i & ~dbg.core_stop_o):
127 m.next = "IDLE"
128
129 # waiting (zzz)
130 with m.State("IDLE"):
131 with m.If(~dbg.stopping_o & ~fetch_failed):
132 comb += fetch_pc_o_ready.eq(1)
133 with m.If(fetch_pc_i_valid & ~fetch_failed):
134 # instruction allowed to go: start by reading the PC
135 # capture the PC and also drop it into Insn Memory
136 # we have joined a pair of combinatorial memory
137 # lookups together. this is Generally Bad.
138 comb += self.imem.a_pc_i.eq(pc)
139 comb += self.imem.a_i_valid.eq(1)
140 comb += self.imem.f_i_valid.eq(1)
141 sync += cur_state.pc.eq(pc)
142 sync += cur_state.svstate.eq(svstate) # and svstate
143 sync += cur_state.msr.eq(msr) # and msr
144
145 m.next = "INSN_READ" # move to "wait for bus" phase
146
147 # dummy pause to find out why simulation is not keeping up
148 with m.State("INSN_READ"):
149 if self.allow_overlap:
150 stopping = dbg.stopping_o
151 else:
152 stopping = Const(0)
153 with m.If(stopping):
154 # stopping: jump back to idle
155 m.next = "IDLE"
156 with m.Else():
157 with m.If(self.imem.f_busy_o & ~fetch_failed): # zzz...
158 # busy but not fetch failed: stay in wait-read
159 comb += self.imem.a_i_valid.eq(1)
160 comb += self.imem.f_i_valid.eq(1)
161 with m.Else():
162 # not busy (or fetch failed!): instruction fetched
163 # when fetch failed, the instruction gets ignored
164 # by the decoder
165 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
166 # not SVP64 - 32-bit only
167 sync += nia.eq(cur_state.pc + 4)
168 sync += dec_opcode_o.eq(insn)
169 m.next = "INSN_READY"
170
171 with m.State("INSN_READY"):
172 # hand over the instruction, to be decoded
173 comb += fetch_insn_o_valid.eq(1)
174 with m.If(fetch_insn_i_ready):
175 m.next = "IDLE"
176
177 # whatever was done above, over-ride it if core reset is held
178 with m.If(self.core_rst):
179 sync += nia.eq(0)
180
181 return m
182
183
184 class TestIssuerInternalInOrder(TestIssuerBase):
185 """TestIssuer - reads instructions from TestMemory and issues them
186
187 efficiency and speed is not the main goal here: functional correctness
188 and code clarity is. optimisations (which almost 100% interfere with
189 easy understanding) come later.
190 """
191
192 def issue_fsm(self, m, core, nia,
193 dbg, core_rst,
194 fetch_pc_o_ready, fetch_pc_i_valid,
195 fetch_insn_o_valid, fetch_insn_i_ready,
196 exec_insn_i_valid, exec_insn_o_ready,
197 exec_pc_o_valid, exec_pc_i_ready):
198 """issue FSM
199
200 decode / issue FSM. this interacts with the "fetch" FSM
201 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
202 (outgoing). also interacts with the "execute" FSM
203 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
204 (incoming).
205 SVP64 RM prefixes have already been set up by the
206 "fetch" phase, so execute is fairly straightforward.
207 """
208
209 comb = m.d.comb
210 sync = m.d.sync
211 pdecode2 = self.pdecode2
212 cur_state = self.cur_state
213
214 # temporaries
215 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
216
217 # note if an exception happened. in a pipelined or OoO design
218 # this needs to be accompanied by "shadowing" (or stalling)
219 exc_happened = self.core.o.exc_happened
220 # also note instruction fetch failed
221 if hasattr(core, "icache"):
222 fetch_failed = core.icache.i_out.fetch_failed
223 flush_needed = True
224 # set to fault in decoder
225 # update (highest priority) instruction fault
226 rising_fetch_failed = rising_edge(m, fetch_failed)
227 with m.If(rising_fetch_failed):
228 sync += pdecode2.instr_fault.eq(1)
229 else:
230 fetch_failed = Const(0, 1)
231 flush_needed = False
232
233 with m.FSM(name="issue_fsm"):
234
235 # sync with the "fetch" phase which is reading the instruction
236 # at this point, there is no instruction running, that
237 # could inadvertently update the PC.
238 with m.State("ISSUE_START"):
239 # reset instruction fault
240 sync += pdecode2.instr_fault.eq(0)
241 # wait on "core stop" release, before next fetch
242 # need to do this here, in case we are in a VL==0 loop
243 with m.If(~dbg.core_stop_o & ~core_rst):
244 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
245 with m.If(fetch_pc_o_ready): # fetch acknowledged us
246 m.next = "INSN_WAIT"
247 with m.Else():
248 # tell core it's stopped, and acknowledge debug handshake
249 comb += dbg.core_stopped_i.eq(1)
250
251 # wait for an instruction to arrive from Fetch
252 with m.State("INSN_WAIT"):
253 if self.allow_overlap:
254 stopping = dbg.stopping_o
255 else:
256 stopping = Const(0)
257 with m.If(stopping):
258 # stopping: jump back to idle
259 m.next = "ISSUE_START"
260 if flush_needed:
261 # request the icache to stop asserting "failed"
262 comb += core.icache.flush_in.eq(1)
263 # stop instruction fault
264 sync += pdecode2.instr_fault.eq(0)
265 with m.Else():
266 comb += fetch_insn_i_ready.eq(1)
267 with m.If(fetch_insn_o_valid):
268 # loop into ISSUE_START if it's a SVP64 instruction
269 # and VL == 0. this because VL==0 is a for-loop
270 # from 0 to 0 i.e. always, always a NOP.
271 m.next = "DECODE_SV" # skip predication
272
273 # after src/dst step have been updated, we are ready
274 # to decode the instruction
275 with m.State("DECODE_SV"):
276 # decode the instruction
277 with m.If(~fetch_failed):
278 sync += pdecode2.instr_fault.eq(0)
279 sync += core.i.e.eq(pdecode2.e)
280 sync += core.i.state.eq(cur_state)
281 sync += core.i.raw_insn_i.eq(dec_opcode_i)
282 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
283 # after decoding, reset any previous exception condition,
284 # allowing it to be set again during the next execution
285 sync += pdecode2.ldst_exc.eq(0)
286
287 m.next = "INSN_EXECUTE" # move to "execute"
288
289 # handshake with execution FSM, move to "wait" once acknowledged
290 with m.State("INSN_EXECUTE"):
291 comb += exec_insn_i_valid.eq(1) # trigger execute
292 with m.If(exec_insn_o_ready): # execute acknowledged us
293 m.next = "EXECUTE_WAIT"
294
295 with m.State("EXECUTE_WAIT"):
296 # wait on "core stop" release, at instruction end
297 # need to do this here, in case we are in a VL>1 loop
298 with m.If(~dbg.core_stop_o & ~core_rst):
299 comb += exec_pc_i_ready.eq(1)
300 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
301 # the exception info needs to be blatted into
302 # pdecode.ldst_exc, and the instruction "re-run".
303 # when ldst_exc.happened is set, the PowerDecoder2
304 # reacts very differently: it re-writes the instruction
305 # with a "trap" (calls PowerDecoder2.trap()) which
306 # will *overwrite* whatever was requested and jump the
307 # PC to the exception address, as well as alter MSR.
308 # nothing else needs to be done other than to note
309 # the change of PC and MSR (and, later, SVSTATE)
310 with m.If(exc_happened):
311 mmu = core.fus.get_exc("mmu0")
312 ldst = core.fus.get_exc("ldst0")
313 if mmu is not None:
314 with m.If(fetch_failed):
315 # instruction fetch: exception is from MMU
316 # reset instr_fault (highest priority)
317 sync += pdecode2.ldst_exc.eq(mmu)
318 sync += pdecode2.instr_fault.eq(0)
319 if flush_needed:
320 # request icache to stop asserting "failed"
321 comb += core.icache.flush_in.eq(1)
322 with m.If(~fetch_failed):
323 # otherwise assume it was a LDST exception
324 sync += pdecode2.ldst_exc.eq(ldst)
325
326 with m.If(exec_pc_o_valid):
327
328 # return directly to Decode if Execute generated an
329 # exception.
330 with m.If(pdecode2.ldst_exc.happened):
331 m.next = "DECODE_SV"
332
333 # if MSR, PC or SVSTATE were changed by the previous
334 # instruction, go directly back to Fetch, without
335 # updating either MSR PC or SVSTATE
336 with m.Elif(self.msr_changed | self.pc_changed |
337 self.sv_changed):
338 m.next = "ISSUE_START"
339
340 with m.Else():
341 # before going back to fetch, update the PC state
342 # register with the NIA.
343 # ok here we are not reading the branch unit.
344 # TODO: this just blithely overwrites whatever
345 # pipeline updated the PC
346 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
347 comb += self.state_w_pc.i_data.eq(nia)
348 m.next = "ISSUE_START"
349
350 with m.Else():
351 comb += dbg.core_stopped_i.eq(1)
352 if flush_needed:
353 # request the icache to stop asserting "failed"
354 comb += core.icache.flush_in.eq(1)
355 # stop instruction fault
356 sync += pdecode2.instr_fault.eq(0)
357 if flush_needed:
358 # request the icache to stop asserting "failed"
359 comb += core.icache.flush_in.eq(1)
360 # stop instruction fault
361 sync += pdecode2.instr_fault.eq(0)
362
363 def execute_fsm(self, m, core,
364 exec_insn_i_valid, exec_insn_o_ready,
365 exec_pc_o_valid, exec_pc_i_ready):
366 """execute FSM
367
368 execute FSM. this interacts with the "issue" FSM
369 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
370 (outgoing). SVP64 RM prefixes have already been set up by the
371 "issue" phase, so execute is fairly straightforward.
372 """
373
374 comb = m.d.comb
375 sync = m.d.sync
376 pdecode2 = self.pdecode2
377
378 # temporaries
379 core_busy_o = core.n.o_data.busy_o # core is busy
380 core_ivalid_i = core.p.i_valid # instruction is valid
381
382 if hasattr(core, "icache"):
383 fetch_failed = core.icache.i_out.fetch_failed
384 else:
385 fetch_failed = Const(0, 1)
386
387 with m.FSM(name="exec_fsm"):
388
389 # waiting for instruction bus (stays there until not busy)
390 with m.State("INSN_START"):
391 comb += exec_insn_o_ready.eq(1)
392 with m.If(exec_insn_i_valid):
393 comb += core_ivalid_i.eq(1) # instruction is valid/issued
394 sync += self.sv_changed.eq(0)
395 sync += self.pc_changed.eq(0)
396 sync += self.msr_changed.eq(0)
397 with m.If(core.p.o_ready): # only move if accepted
398 m.next = "INSN_ACTIVE" # move to "wait completion"
399
400 # instruction started: must wait till it finishes
401 with m.State("INSN_ACTIVE"):
402 # note changes to MSR, PC and SVSTATE
403 # XXX oops, really must monitor *all* State Regfile write
404 # ports looking for changes!
405 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
406 sync += self.sv_changed.eq(1)
407 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
408 sync += self.msr_changed.eq(1)
409 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
410 sync += self.pc_changed.eq(1)
411 with m.If(~core_busy_o): # instruction done!
412 comb += exec_pc_o_valid.eq(1)
413 with m.If(exec_pc_i_ready):
414 # when finished, indicate "done".
415 # however, if there was an exception, the instruction
416 # is *not* yet done. this is an implementation
417 # detail: we choose to implement exceptions by
418 # taking the exception information from the LDST
419 # unit, putting that *back* into the PowerDecoder2,
420 # and *re-running the entire instruction*.
421 # if we erroneously indicate "done" here, it is as if
422 # there were *TWO* instructions:
423 # 1) the failed LDST 2) a TRAP.
424 with m.If(~pdecode2.ldst_exc.happened &
425 ~fetch_failed):
426 comb += self.insn_done.eq(1)
427 m.next = "INSN_START" # back to fetch
428
429 def elaborate(self, platform):
430 m = super().elaborate(platform)
431 # convenience
432 comb, sync = m.d.comb, m.d.sync
433 cur_state = self.cur_state
434 pdecode2 = self.pdecode2
435 dbg = self.dbg
436 core = self.core
437
438 # set up peripherals and core
439 core_rst = self.core_rst
440
441 # indicate to outside world if any FU is still executing
442 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
443
444 # address of the next instruction, in the absence of a branch
445 # depends on the instruction size
446 nia = Signal(64)
447
448 # connect up debug signals
449 with m.If(core.o.core_terminate_o):
450 comb += dbg.terminate_i.eq(1)
451
452 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
453 # issue, decode/execute, now joined by "Predicate fetch/calculate".
454 # these are the handshake signals between each
455
456 # fetch FSM can run as soon as the PC is valid
457 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
458 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
459
460 # fetch FSM hands over the instruction to be decoded / issued
461 fetch_insn_o_valid = Signal()
462 fetch_insn_i_ready = Signal()
463
464 # issue FSM delivers the instruction to the be executed
465 exec_insn_i_valid = Signal()
466 exec_insn_o_ready = Signal()
467
468 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
469 exec_pc_o_valid = Signal()
470 exec_pc_i_ready = Signal()
471
472 # the FSMs here are perhaps unusual in that they detect conditions
473 # then "hold" information, combinatorially, for the core
474 # (as opposed to using sync - which would be on a clock's delay)
475 # this includes the actual opcode, valid flags and so on.
476
477 # Fetch, then predicate fetch, then Issue, then Execute.
478 # Issue is where the VL for-loop # lives. the ready/valid
479 # signalling is used to communicate between the four.
480
481 # set up Fetch FSM
482 fetch = FetchFSM(self.allow_overlap,
483 self.imem, core_rst, pdecode2, cur_state,
484 dbg, core,
485 dbg.state.svstate, # combinatorially same
486 nia)
487 m.submodules.fetch = fetch
488 # connect up in/out data to existing Signals
489 comb += fetch.p.i_data.pc.eq(dbg.state.pc) # combinatorially same
490 comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
491 # and the ready/valid signalling
492 comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
493 comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
494 comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
495 comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
496
497 self.issue_fsm(m, core, nia,
498 dbg, core_rst,
499 fetch_pc_o_ready, fetch_pc_i_valid,
500 fetch_insn_o_valid, fetch_insn_i_ready,
501 exec_insn_i_valid, exec_insn_o_ready,
502 exec_pc_o_valid, exec_pc_i_ready)
503
504 self.execute_fsm(m, core,
505 exec_insn_i_valid, exec_insn_o_ready,
506 exec_pc_o_valid, exec_pc_i_ready)
507
508 return m
509
510
511 # XXX TODO: update this
512
513 if __name__ == '__main__':
514 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
515 'spr': 1,
516 'div': 1,
517 'mul': 1,
518 'shiftrot': 1
519 }
520 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
521 imem_ifacetype='bare_wb',
522 addr_wid=64,
523 mask_wid=8,
524 reg_wid=64,
525 units=units)
526 dut = TestIssuer(pspec)
527 vl = main(dut, ports=dut.ports(), name="test_issuer")
528
529 if len(sys.argv) == 1:
530 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
531 with open("test_issuer.il", "w") as f:
532 f.write(vl)