3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
, Mux
, Const
, Repl
, Cat
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from nmutil
.singlepipe
import ControlBase
25 from soc
.simple
.core_data
import FetchOutput
, FetchInput
27 from nmigen
.lib
.coding
import PriorityEncoder
29 from openpower
.decoder
.power_decoder
import create_pdecode
30 from openpower
.decoder
.power_decoder2
import PowerDecode2
, SVP64PrefixDecoder
31 from openpower
.decoder
.decode2execute1
import IssuerDecode2ToOperand
32 from openpower
.decoder
.decode2execute1
import Data
33 from openpower
.decoder
.power_enums
import (MicrOp
, SVP64PredInt
, SVP64PredCR
,
35 from openpower
.state
import CoreState
36 from openpower
.consts
import (CR
, SVP64CROffs
)
37 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
38 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
39 from soc
.simple
.core
import NonProductionCore
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from soc
.config
.ifetch
import ConfigFetchUnit
42 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
43 from soc
.debug
.jtag
import JTAG
44 from soc
.config
.pinouts
import get_pinspecs
45 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
46 from soc
.bus
.simple_gpio
import SimpleGPIO
47 from soc
.bus
.SPBlock512W64B8W
import SPBlock512W64B8W
48 from soc
.clock
.select
import ClockSelect
49 from soc
.clock
.dummypll
import DummyPLL
50 from openpower
.sv
.svstate
import SVSTATERec
51 from soc
.experiment
.icache
import ICache
53 from nmutil
.util
import rising_edge
56 def get_insn(f_instr_o
, pc
):
57 if f_instr_o
.width
== 32:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o
.word_select(pc
[2], 32)
63 # gets state input or reads from state regfile
66 def state_get(m
, res
, core_rst
, state_i
, name
, regfile
, regnum
):
69 # read the {insert state variable here}
70 res_ok_delay
= Signal(name
="%s_ok_delay" % name
)
72 sync
+= res_ok_delay
.eq(~state_i
.ok
)
73 with m
.If(state_i
.ok
):
74 # incoming override (start from pc_i)
75 comb
+= res
.eq(state_i
.data
)
77 # otherwise read StateRegs regfile for {insert state here}...
78 comb
+= regfile
.ren
.eq(1 << regnum
)
79 # ... but on a 1-clock delay
80 with m
.If(res_ok_delay
):
81 comb
+= res
.eq(regfile
.o_data
)
84 def get_predint(m
, mask
, name
):
85 """decode SVP64 predicate integer mask field to reg number and invert
86 this is identical to the equivalent function in ISACaller except that
87 it doesn't read the INT directly, it just decodes "what needs to be done"
88 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
90 * all1s is set to indicate that no mask is to be applied.
91 * regread indicates the GPR register number to be read
92 * invert is set to indicate that the register value is to be inverted
93 * unary indicates that the contents of the register is to be shifted 1<<r3
96 regread
= Signal(5, name
=name
+"regread")
97 invert
= Signal(name
=name
+"invert")
98 unary
= Signal(name
=name
+"unary")
99 all1s
= Signal(name
=name
+"all1s")
101 with m
.Case(SVP64PredInt
.ALWAYS
.value
):
102 comb
+= all1s
.eq(1) # use 0b1111 (all ones)
103 with m
.Case(SVP64PredInt
.R3_UNARY
.value
):
104 comb
+= regread
.eq(3)
105 comb
+= unary
.eq(1) # 1<<r3 - shift r3 (single bit)
106 with m
.Case(SVP64PredInt
.R3
.value
):
107 comb
+= regread
.eq(3)
108 with m
.Case(SVP64PredInt
.R3_N
.value
):
109 comb
+= regread
.eq(3)
111 with m
.Case(SVP64PredInt
.R10
.value
):
112 comb
+= regread
.eq(10)
113 with m
.Case(SVP64PredInt
.R10_N
.value
):
114 comb
+= regread
.eq(10)
116 with m
.Case(SVP64PredInt
.R30
.value
):
117 comb
+= regread
.eq(30)
118 with m
.Case(SVP64PredInt
.R30_N
.value
):
119 comb
+= regread
.eq(30)
121 return regread
, invert
, unary
, all1s
124 def get_predcr(m
, mask
, name
):
125 """decode SVP64 predicate CR to reg number field and invert status
126 this is identical to _get_predcr in ISACaller
129 idx
= Signal(2, name
=name
+"idx")
130 invert
= Signal(name
=name
+"crinvert")
132 with m
.Case(SVP64PredCR
.LT
.value
):
133 comb
+= idx
.eq(CR
.LT
)
135 with m
.Case(SVP64PredCR
.GE
.value
):
136 comb
+= idx
.eq(CR
.LT
)
138 with m
.Case(SVP64PredCR
.GT
.value
):
139 comb
+= idx
.eq(CR
.GT
)
141 with m
.Case(SVP64PredCR
.LE
.value
):
142 comb
+= idx
.eq(CR
.GT
)
144 with m
.Case(SVP64PredCR
.EQ
.value
):
145 comb
+= idx
.eq(CR
.EQ
)
147 with m
.Case(SVP64PredCR
.NE
.value
):
148 comb
+= idx
.eq(CR
.EQ
)
150 with m
.Case(SVP64PredCR
.SO
.value
):
151 comb
+= idx
.eq(CR
.SO
)
153 with m
.Case(SVP64PredCR
.NS
.value
):
154 comb
+= idx
.eq(CR
.SO
)
159 class TestIssuerBase(Elaboratable
):
160 """TestIssuerBase - common base class for Issuers
162 takes care of power-on reset, peripherals, debug, DEC/TB,
163 and gets PC/MSR/SVSTATE from the State Regfile etc.
166 def __init__(self
, pspec
):
168 # test is SVP64 is to be enabled
169 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
171 # and if regfiles are reduced
172 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
173 (pspec
.regreduce
== True))
175 # and if overlap requested
176 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
177 (pspec
.allow_overlap
== True))
179 # JTAG interface. add this right at the start because if it's
180 # added it *modifies* the pspec, by adding enable/disable signals
181 # for parts of the rest of the core
182 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
183 self
.dbg_domain
= "sync" # sigh "dbgsunc" too problematic
184 # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
186 # XXX MUST keep this up-to-date with litex, and
187 # soc-cocotb-sim, and err.. all needs sorting out, argh
190 'eint', 'gpio', 'mspi0',
191 # 'mspi1', - disabled for now
192 # 'pwm', 'sd0', - disabled for now
194 self
.jtag
= JTAG(get_pinspecs(subset
=subset
),
195 domain
=self
.dbg_domain
)
196 # add signals to pspec to enable/disable icache and dcache
197 # (or data and intstruction wishbone if icache/dcache not included)
198 # https://bugs.libre-soc.org/show_bug.cgi?id=520
199 # TODO: do we actually care if these are not domain-synchronised?
200 # honestly probably not.
201 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
202 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
203 self
.wb_sram_en
= self
.jtag
.wb_sram_en
205 self
.wb_sram_en
= Const(1)
207 # add 4k sram blocks?
208 self
.sram4x4k
= (hasattr(pspec
, "sram4x4kblock") and
209 pspec
.sram4x4kblock
== True)
213 self
.sram4k
.append(SPBlock512W64B8W(name
="sram4k_%d" % i
,
217 # add interrupt controller?
218 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
220 self
.xics_icp
= XICS_ICP()
221 self
.xics_ics
= XICS_ICS()
222 self
.int_level_i
= self
.xics_ics
.int_level_i
224 # add GPIO peripheral?
225 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
227 self
.simple_gpio
= SimpleGPIO()
228 self
.gpio_o
= self
.simple_gpio
.gpio_o
230 # main instruction core. suitable for prototyping / demo only
231 self
.core
= core
= NonProductionCore(pspec
)
232 self
.core_rst
= ResetSignal("coresync")
234 # instruction decoder. goes into Trap Record
235 #pdecode = create_pdecode()
236 self
.cur_state
= CoreState("cur") # current state (MSR/PC/SVSTATE)
237 self
.pdecode2
= PowerDecode2(None, state
=self
.cur_state
,
238 opkls
=IssuerDecode2ToOperand
,
239 svp64_en
=self
.svp64_en
,
240 regreduce_en
=self
.regreduce_en
)
241 pdecode
= self
.pdecode2
.dec
244 self
.svp64
= SVP64PrefixDecoder() # for decoding SVP64 prefix
246 # Test Instruction memory
247 if hasattr(core
, "icache"):
248 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
249 # truly dreadful. needs a huge reorg.
250 pspec
.icache
= core
.icache
251 self
.imem
= ConfigFetchUnit(pspec
).fu
254 self
.dbg
= CoreDebug()
256 # instruction go/monitor
257 self
.pc_o
= Signal(64, reset_less
=True)
258 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
259 self
.msr_i
= Data(64, "msr_i") # set "ok" to indicate "please change me"
260 self
.svstate_i
= Data(64, "svstate_i") # ditto
261 self
.core_bigendian_i
= Signal() # TODO: set based on MSR.LE
262 self
.busy_o
= Signal(reset_less
=True)
263 self
.memerr_o
= Signal(reset_less
=True)
265 # STATE regfile read /write ports for PC, MSR, SVSTATE
266 staterf
= self
.core
.regs
.rf
['state']
267 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
268 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
269 self
.state_r_sv
= staterf
.r_ports
['sv'] # SVSTATE rd
271 self
.state_w_msr
= staterf
.w_ports
['msr'] # MSR wr
272 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
273 self
.state_w_sv
= staterf
.w_ports
['sv'] # SVSTATE wr
275 # DMI interface access
276 intrf
= self
.core
.regs
.rf
['int']
277 crrf
= self
.core
.regs
.rf
['cr']
278 xerrf
= self
.core
.regs
.rf
['xer']
279 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
280 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
281 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
285 self
.int_pred
= intrf
.r_ports
['pred'] # INT predicate read
286 self
.cr_pred
= crrf
.r_ports
['cr_pred'] # CR predicate read
288 # hack method of keeping an eye on whether branch/trap set the PC
289 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
290 self
.state_nia
.wen
.name
= 'state_nia_wen'
292 # pulse to synchronize the simulator at instruction end
293 self
.insn_done
= Signal()
295 # indicate any instruction still outstanding, in execution
296 self
.any_busy
= Signal()
299 # store copies of predicate masks
300 self
.srcmask
= Signal(64)
301 self
.dstmask
= Signal(64)
303 def setup_peripherals(self
, m
):
304 comb
, sync
= m
.d
.comb
, m
.d
.sync
306 # okaaaay so the debug module must be in coresync clock domain
307 # but NOT its reset signal. to cope with this, set every single
308 # submodule explicitly in coresync domain, debug and JTAG
309 # in their own one but using *external* reset.
310 csd
= DomainRenamer("coresync")
311 dbd
= DomainRenamer(self
.dbg_domain
)
313 m
.submodules
.core
= core
= csd(self
.core
)
314 # this _so_ needs sorting out. ICache is added down inside
315 # LoadStore1 and is already a submodule of LoadStore1
316 if not isinstance(self
.imem
, ICache
):
317 m
.submodules
.imem
= imem
= csd(self
.imem
)
318 m
.submodules
.dbg
= dbg
= dbd(self
.dbg
)
320 m
.submodules
.jtag
= jtag
= dbd(self
.jtag
)
321 # TODO: UART2GDB mux, here, from external pin
322 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
323 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
325 cur_state
= self
.cur_state
327 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
329 for i
, sram
in enumerate(self
.sram4k
):
330 m
.submodules
["sram4k_%d" % i
] = csd(sram
)
331 comb
+= sram
.enable
.eq(self
.wb_sram_en
)
333 # XICS interrupt handler
335 m
.submodules
.xics_icp
= icp
= csd(self
.xics_icp
)
336 m
.submodules
.xics_ics
= ics
= csd(self
.xics_ics
)
337 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
338 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
340 # GPIO test peripheral
342 m
.submodules
.simple_gpio
= simple_gpio
= csd(self
.simple_gpio
)
344 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
345 # XXX causes litex ECP5 test to get wrong idea about input and output
346 # (but works with verilator sim *sigh*)
347 # if self.gpio and self.xics:
348 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
350 # instruction decoder
351 pdecode
= create_pdecode()
352 m
.submodules
.dec2
= pdecode2
= csd(self
.pdecode2
)
354 m
.submodules
.svp64
= svp64
= csd(self
.svp64
)
357 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
358 intrf
= self
.core
.regs
.rf
['int']
360 # clock delay power-on reset
361 cd_por
= ClockDomain(reset_less
=True)
362 cd_sync
= ClockDomain()
363 core_sync
= ClockDomain("coresync")
364 m
.domains
+= cd_por
, cd_sync
, core_sync
365 if self
.dbg_domain
!= "sync":
366 dbg_sync
= ClockDomain(self
.dbg_domain
)
367 m
.domains
+= dbg_sync
369 ti_rst
= Signal(reset_less
=True)
370 delay
= Signal(range(4), reset
=3)
371 with m
.If(delay
!= 0):
372 m
.d
.por
+= delay
.eq(delay
- 1)
373 comb
+= cd_por
.clk
.eq(ClockSignal())
375 # power-on reset delay
376 core_rst
= ResetSignal("coresync")
377 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
378 comb
+= core_rst
.eq(ti_rst
)
380 # debug clock is same as coresync, but reset is *main external*
381 if self
.dbg_domain
!= "sync":
382 dbg_rst
= ResetSignal(self
.dbg_domain
)
383 comb
+= dbg_rst
.eq(ResetSignal())
385 # busy/halted signals from core
386 core_busy_o
= ~core
.p
.o_ready | core
.n
.o_data
.busy_o
# core is busy
387 comb
+= self
.busy_o
.eq(core_busy_o
)
388 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
390 # temporary hack: says "go" immediately for both address gen and ST
392 ldst
= core
.fus
.fus
['ldst0']
393 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
394 # link addr-go direct to rel
395 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
)
396 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
398 def do_dmi(self
, m
, dbg
):
399 """deals with DMI debug requests
401 currently only provides read requests for the INT regfile, CR and XER
402 it will later also deal with *writing* to these regfiles.
406 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
407 intrf
= self
.core
.regs
.rf
['int']
409 with m
.If(d_reg
.req
): # request for regfile access being made
410 # TODO: error-check this
411 # XXX should this be combinatorial? sync better?
413 comb
+= self
.int_r
.ren
.eq(1 << d_reg
.addr
)
415 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
416 comb
+= self
.int_r
.ren
.eq(1)
417 d_reg_delay
= Signal()
418 sync
+= d_reg_delay
.eq(d_reg
.req
)
419 with m
.If(d_reg_delay
):
420 # data arrives one clock later
421 comb
+= d_reg
.data
.eq(self
.int_r
.o_data
)
422 comb
+= d_reg
.ack
.eq(1)
424 # sigh same thing for CR debug
425 with m
.If(d_cr
.req
): # request for regfile access being made
426 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
427 d_cr_delay
= Signal()
428 sync
+= d_cr_delay
.eq(d_cr
.req
)
429 with m
.If(d_cr_delay
):
430 # data arrives one clock later
431 comb
+= d_cr
.data
.eq(self
.cr_r
.o_data
)
432 comb
+= d_cr
.ack
.eq(1)
435 with m
.If(d_xer
.req
): # request for regfile access being made
436 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
437 d_xer_delay
= Signal()
438 sync
+= d_xer_delay
.eq(d_xer
.req
)
439 with m
.If(d_xer_delay
):
440 # data arrives one clock later
441 comb
+= d_xer
.data
.eq(self
.xer_r
.o_data
)
442 comb
+= d_xer
.ack
.eq(1)
444 def tb_dec_fsm(self
, m
, spr_dec
):
447 this is a FSM for updating either dec or tb. it runs alternately
448 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
449 value to DEC, however the regfile has "passthrough" on it so this
452 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
455 comb
, sync
= m
.d
.comb
, m
.d
.sync
456 fast_rf
= self
.core
.regs
.rf
['fast']
457 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
458 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
462 # initiates read of current DEC
463 with m
.State("DEC_READ"):
464 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
465 comb
+= fast_r_dectb
.ren
.eq(1)
468 # waits for DEC read to arrive (1 cycle), updates with new value
469 with m
.State("DEC_WRITE"):
471 # TODO: MSR.LPCR 32-bit decrement mode
472 comb
+= new_dec
.eq(fast_r_dectb
.o_data
- 1)
473 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
474 comb
+= fast_w_dectb
.wen
.eq(1)
475 comb
+= fast_w_dectb
.i_data
.eq(new_dec
)
476 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
479 # initiates read of current TB
480 with m
.State("TB_READ"):
481 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
482 comb
+= fast_r_dectb
.ren
.eq(1)
485 # waits for read TB to arrive, initiates write of current TB
486 with m
.State("TB_WRITE"):
488 comb
+= new_tb
.eq(fast_r_dectb
.o_data
+ 1)
489 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
490 comb
+= fast_w_dectb
.wen
.eq(1)
491 comb
+= fast_w_dectb
.i_data
.eq(new_tb
)
496 def elaborate(self
, platform
):
499 comb
, sync
= m
.d
.comb
, m
.d
.sync
500 cur_state
= self
.cur_state
501 pdecode2
= self
.pdecode2
504 # set up peripherals and core
505 core_rst
= self
.core_rst
506 self
.setup_peripherals(m
)
508 # reset current state if core reset requested
510 m
.d
.sync
+= self
.cur_state
.eq(0)
512 # PC and instruction from I-Memory
513 comb
+= self
.pc_o
.eq(cur_state
.pc
)
514 self
.pc_changed
= Signal() # note write to PC
515 self
.msr_changed
= Signal() # note write to MSR
516 self
.sv_changed
= Signal() # note write to SVSTATE
518 # read state either from incoming override or from regfile
519 state
= CoreState("get") # current state (MSR/PC/SVSTATE)
520 state_get(m
, state
.msr
, core_rst
, self
.msr_i
,
522 self
.state_r_msr
, StateRegs
.MSR
)
523 state_get(m
, state
.pc
, core_rst
, self
.pc_i
,
525 self
.state_r_pc
, StateRegs
.PC
)
526 state_get(m
, state
.svstate
, core_rst
, self
.svstate_i
,
527 "svstate", # read SVSTATE
528 self
.state_r_sv
, StateRegs
.SVSTATE
)
530 # don't write pc every cycle
531 comb
+= self
.state_w_pc
.wen
.eq(0)
532 comb
+= self
.state_w_pc
.i_data
.eq(0)
534 # connect up debug state. note "combinatorially same" below,
535 # this is a bit naff, passing state over in the dbg class, but
536 # because it is combinatorial it achieves the desired goal
537 comb
+= dbg
.state
.eq(state
)
539 # this bit doesn't have to be in the FSM: connect up to read
540 # regfiles on demand from DMI
543 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
544 # (which uses that in PowerDecoder2 to raise 0x900 exception)
545 self
.tb_dec_fsm(m
, cur_state
.dec
)
550 yield from self
.pc_i
.ports()
551 yield from self
.msr_i
.ports()
554 yield from self
.core
.ports()
555 yield from self
.imem
.ports()
556 yield self
.core_bigendian_i
562 def external_ports(self
):
563 ports
= self
.pc_i
.ports()
564 ports
= self
.msr_i
.ports()
565 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
569 ports
+= list(self
.jtag
.external_ports())
571 # don't add DMI if JTAG is enabled
572 ports
+= list(self
.dbg
.dmi
.ports())
574 ports
+= list(self
.imem
.ibus
.fields
.values())
575 ports
+= list(self
.core
.l0
.cmpi
.wb_bus().fields
.values())
578 for sram
in self
.sram4k
:
579 ports
+= list(sram
.bus
.fields
.values())
582 ports
+= list(self
.xics_icp
.bus
.fields
.values())
583 ports
+= list(self
.xics_ics
.bus
.fields
.values())
584 ports
.append(self
.int_level_i
)
587 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
588 ports
.append(self
.gpio_o
)
597 # Fetch Finite State Machine.
598 # WARNING: there are currently DriverConflicts but it's actually working.
599 # TODO, here: everything that is global in nature, information from the
600 # main TestIssuerInternal, needs to move to either ispec() or ospec().
601 # not only that: TestIssuerInternal.imem can entirely move into here
602 # because imem is only ever accessed inside the FetchFSM.
603 class FetchFSM(ControlBase
):
604 def __init__(self
, allow_overlap
, svp64_en
, imem
, core_rst
,
606 dbg
, core
, svstate
, nia
, is_svp64_mode
):
607 self
.allow_overlap
= allow_overlap
608 self
.svp64_en
= svp64_en
610 self
.core_rst
= core_rst
611 self
.pdecode2
= pdecode2
612 self
.cur_state
= cur_state
615 self
.svstate
= svstate
617 self
.is_svp64_mode
= is_svp64_mode
619 # set up pipeline ControlBase and allocate i/o specs
620 # (unusual: normally done by the Pipeline API)
621 super().__init
__(stage
=self
)
622 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
623 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
625 # next 3 functions are Stage API Compliance
626 def setup(self
, m
, i
):
635 def elaborate(self
, platform
):
638 this FSM performs fetch of raw instruction data, partial-decodes
639 it 32-bit at a time to detect SVP64 prefixes, and will optionally
640 read a 2nd 32-bit quantity if that occurs.
642 m
= super().elaborate(platform
)
648 svstate
= self
.svstate
650 is_svp64_mode
= self
.is_svp64_mode
651 fetch_pc_o_ready
= self
.p
.o_ready
652 fetch_pc_i_valid
= self
.p
.i_valid
653 fetch_insn_o_valid
= self
.n
.o_valid
654 fetch_insn_i_ready
= self
.n
.i_ready
658 pdecode2
= self
.pdecode2
659 cur_state
= self
.cur_state
660 dec_opcode_o
= pdecode2
.dec
.raw_opcode_in
# raw opcode
662 # also note instruction fetch failed
663 if hasattr(core
, "icache"):
664 fetch_failed
= core
.icache
.i_out
.fetch_failed
667 fetch_failed
= Const(0, 1)
670 with m
.FSM(name
='fetch_fsm'):
673 with m
.State("IDLE"):
674 with m
.If(~dbg
.stopping_o
& ~fetch_failed
):
675 comb
+= fetch_pc_o_ready
.eq(1)
676 with m
.If(fetch_pc_i_valid
& ~fetch_failed
):
677 # instruction allowed to go: start by reading the PC
678 # capture the PC and also drop it into Insn Memory
679 # we have joined a pair of combinatorial memory
680 # lookups together. this is Generally Bad.
681 comb
+= self
.imem
.a_pc_i
.eq(pc
)
682 comb
+= self
.imem
.a_i_valid
.eq(1)
683 comb
+= self
.imem
.f_i_valid
.eq(1)
684 sync
+= cur_state
.pc
.eq(pc
)
685 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
686 sync
+= cur_state
.msr
.eq(msr
) # and msr
688 m
.next
= "INSN_READ" # move to "wait for bus" phase
690 # dummy pause to find out why simulation is not keeping up
691 with m
.State("INSN_READ"):
692 if self
.allow_overlap
:
693 stopping
= dbg
.stopping_o
697 # stopping: jump back to idle
700 with m
.If(self
.imem
.f_busy_o
& ~fetch_failed
): # zzz...
701 # busy but not fetch failed: stay in wait-read
702 comb
+= self
.imem
.a_i_valid
.eq(1)
703 comb
+= self
.imem
.f_i_valid
.eq(1)
705 # not busy (or fetch failed!): instruction fetched
706 # when fetch failed, the instruction gets ignored
708 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
711 # decode the SVP64 prefix, if any
712 comb
+= svp64
.raw_opcode_in
.eq(insn
)
713 comb
+= svp64
.bigendian
.eq(self
.core_bigendian_i
)
714 # pass the decoded prefix (if any) to PowerDecoder2
715 sync
+= pdecode2
.sv_rm
.eq(svp64
.svp64_rm
)
716 sync
+= pdecode2
.is_svp64_mode
.eq(is_svp64_mode
)
717 # remember whether this is a prefixed instruction,
718 # so the FSM can readily loop when VL==0
719 sync
+= is_svp64_mode
.eq(svp64
.is_svp64_mode
)
720 # calculate the address of the following instruction
721 insn_size
= Mux(svp64
.is_svp64_mode
, 8, 4)
722 sync
+= nia
.eq(cur_state
.pc
+ insn_size
)
723 with m
.If(~svp64
.is_svp64_mode
):
724 # with no prefix, store the instruction
725 # and hand it directly to the next FSM
726 sync
+= dec_opcode_o
.eq(insn
)
727 m
.next
= "INSN_READY"
729 # fetch the rest of the instruction from memory
730 comb
+= self
.imem
.a_pc_i
.eq(cur_state
.pc
+ 4)
731 comb
+= self
.imem
.a_i_valid
.eq(1)
732 comb
+= self
.imem
.f_i_valid
.eq(1)
733 m
.next
= "INSN_READ2"
735 # not SVP64 - 32-bit only
736 sync
+= nia
.eq(cur_state
.pc
+ 4)
737 sync
+= dec_opcode_o
.eq(insn
)
738 m
.next
= "INSN_READY"
740 with m
.State("INSN_READ2"):
741 with m
.If(self
.imem
.f_busy_o
): # zzz...
742 # busy: stay in wait-read
743 comb
+= self
.imem
.a_i_valid
.eq(1)
744 comb
+= self
.imem
.f_i_valid
.eq(1)
746 # not busy: instruction fetched
747 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
+4)
748 sync
+= dec_opcode_o
.eq(insn
)
749 m
.next
= "INSN_READY"
750 # TODO: probably can start looking at pdecode2.rm_dec
751 # here or maybe even in INSN_READ state, if svp64_mode
752 # detected, in order to trigger - and wait for - the
755 pmode
= pdecode2
.rm_dec
.predmode
757 if pmode != SVP64PredMode.ALWAYS.value:
758 fire predicate loading FSM and wait before
761 sync += self.srcmask.eq(-1) # set to all 1s
762 sync += self.dstmask.eq(-1) # set to all 1s
763 m.next = "INSN_READY"
766 with m
.State("INSN_READY"):
767 # hand over the instruction, to be decoded
768 comb
+= fetch_insn_o_valid
.eq(1)
769 with m
.If(fetch_insn_i_ready
):
772 # whatever was done above, over-ride it if core reset is held
773 with m
.If(self
.core_rst
):
779 class TestIssuerInternal(TestIssuerBase
):
780 """TestIssuer - reads instructions from TestMemory and issues them
782 efficiency and speed is not the main goal here: functional correctness
783 and code clarity is. optimisations (which almost 100% interfere with
784 easy understanding) come later.
787 def fetch_predicate_fsm(self
, m
,
788 pred_insn_i_valid
, pred_insn_o_ready
,
789 pred_mask_o_valid
, pred_mask_i_ready
):
790 """fetch_predicate_fsm - obtains (constructs in the case of CR)
791 src/dest predicate masks
793 https://bugs.libre-soc.org/show_bug.cgi?id=617
794 the predicates can be read here, by using IntRegs r_ports['pred']
795 or CRRegs r_ports['pred']. in the case of CRs it will have to
796 be done through multiple reads, extracting one relevant at a time.
797 later, a faster way would be to use the 32-bit-wide CR port but
798 this is more complex decoding, here. equivalent code used in
799 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
801 note: this ENTIRE FSM is not to be called when svp64 is disabled
805 pdecode2
= self
.pdecode2
806 rm_dec
= pdecode2
.rm_dec
# SVP64RMModeDecode
807 predmode
= rm_dec
.predmode
808 srcpred
, dstpred
= rm_dec
.srcpred
, rm_dec
.dstpred
809 cr_pred
, int_pred
= self
.cr_pred
, self
.int_pred
# read regfiles
810 # get src/dst step, so we can skip already used mask bits
811 cur_state
= self
.cur_state
812 srcstep
= cur_state
.svstate
.srcstep
813 dststep
= cur_state
.svstate
.dststep
814 cur_vl
= cur_state
.svstate
.vl
817 sregread
, sinvert
, sunary
, sall1s
= get_predint(m
, srcpred
, 's')
818 dregread
, dinvert
, dunary
, dall1s
= get_predint(m
, dstpred
, 'd')
819 sidx
, scrinvert
= get_predcr(m
, srcpred
, 's')
820 didx
, dcrinvert
= get_predcr(m
, dstpred
, 'd')
822 # store fetched masks, for either intpred or crpred
823 # when src/dst step is not zero, the skipped mask bits need to be
824 # shifted-out, before actually storing them in src/dest mask
825 new_srcmask
= Signal(64, reset_less
=True)
826 new_dstmask
= Signal(64, reset_less
=True)
828 with m
.FSM(name
="fetch_predicate"):
830 with m
.State("FETCH_PRED_IDLE"):
831 comb
+= pred_insn_o_ready
.eq(1)
832 with m
.If(pred_insn_i_valid
):
833 with m
.If(predmode
== SVP64PredMode
.INT
):
834 # skip fetching destination mask register, when zero
836 sync
+= new_dstmask
.eq(-1)
837 # directly go to fetch source mask register
838 # guaranteed not to be zero (otherwise predmode
839 # would be SVP64PredMode.ALWAYS, not INT)
840 comb
+= int_pred
.addr
.eq(sregread
)
841 comb
+= int_pred
.ren
.eq(1)
842 m
.next
= "INT_SRC_READ"
843 # fetch destination predicate register
845 comb
+= int_pred
.addr
.eq(dregread
)
846 comb
+= int_pred
.ren
.eq(1)
847 m
.next
= "INT_DST_READ"
848 with m
.Elif(predmode
== SVP64PredMode
.CR
):
849 # go fetch masks from the CR register file
850 sync
+= new_srcmask
.eq(0)
851 sync
+= new_dstmask
.eq(0)
854 sync
+= self
.srcmask
.eq(-1)
855 sync
+= self
.dstmask
.eq(-1)
856 m
.next
= "FETCH_PRED_DONE"
858 with m
.State("INT_DST_READ"):
859 # store destination mask
860 inv
= Repl(dinvert
, 64)
862 # set selected mask bit for 1<<r3 mode
863 dst_shift
= Signal(range(64))
864 comb
+= dst_shift
.eq(self
.int_pred
.o_data
& 0b111111)
865 sync
+= new_dstmask
.eq(1 << dst_shift
)
867 # invert mask if requested
868 sync
+= new_dstmask
.eq(self
.int_pred
.o_data ^ inv
)
869 # skip fetching source mask register, when zero
871 sync
+= new_srcmask
.eq(-1)
872 m
.next
= "FETCH_PRED_SHIFT_MASK"
873 # fetch source predicate register
875 comb
+= int_pred
.addr
.eq(sregread
)
876 comb
+= int_pred
.ren
.eq(1)
877 m
.next
= "INT_SRC_READ"
879 with m
.State("INT_SRC_READ"):
881 inv
= Repl(sinvert
, 64)
883 # set selected mask bit for 1<<r3 mode
884 src_shift
= Signal(range(64))
885 comb
+= src_shift
.eq(self
.int_pred
.o_data
& 0b111111)
886 sync
+= new_srcmask
.eq(1 << src_shift
)
888 # invert mask if requested
889 sync
+= new_srcmask
.eq(self
.int_pred
.o_data ^ inv
)
890 m
.next
= "FETCH_PRED_SHIFT_MASK"
892 # fetch masks from the CR register file
893 # implements the following loop:
894 # idx, inv = get_predcr(mask)
896 # for cr_idx in range(vl):
897 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
899 # mask |= 1 << cr_idx
901 with m
.State("CR_READ"):
902 # CR index to be read, which will be ready by the next cycle
903 cr_idx
= Signal
.like(cur_vl
, reset_less
=True)
904 # submit the read operation to the regfile
905 with m
.If(cr_idx
!= cur_vl
):
906 # the CR read port is unary ...
908 # ... in MSB0 convention ...
909 # ren = 1 << (7 - cr_idx)
910 # ... and with an offset:
911 # ren = 1 << (7 - off - cr_idx)
912 idx
= SVP64CROffs
.CRPred
+ cr_idx
913 comb
+= cr_pred
.ren
.eq(1 << (7 - idx
))
914 # signal data valid in the next cycle
915 cr_read
= Signal(reset_less
=True)
916 sync
+= cr_read
.eq(1)
917 # load the next index
918 sync
+= cr_idx
.eq(cr_idx
+ 1)
921 sync
+= cr_read
.eq(0)
923 m
.next
= "FETCH_PRED_SHIFT_MASK"
925 # compensate for the one cycle delay on the regfile
926 cur_cr_idx
= Signal
.like(cur_vl
)
927 comb
+= cur_cr_idx
.eq(cr_idx
- 1)
928 # read the CR field, select the appropriate bit
932 comb
+= cr_field
.eq(cr_pred
.o_data
)
933 comb
+= scr_bit
.eq(cr_field
.bit_select(sidx
, 1)
935 comb
+= dcr_bit
.eq(cr_field
.bit_select(didx
, 1)
937 # set the corresponding mask bit
938 bit_to_set
= Signal
.like(self
.srcmask
)
939 comb
+= bit_to_set
.eq(1 << cur_cr_idx
)
941 sync
+= new_srcmask
.eq(new_srcmask | bit_to_set
)
943 sync
+= new_dstmask
.eq(new_dstmask | bit_to_set
)
945 with m
.State("FETCH_PRED_SHIFT_MASK"):
946 # shift-out skipped mask bits
947 sync
+= self
.srcmask
.eq(new_srcmask
>> srcstep
)
948 sync
+= self
.dstmask
.eq(new_dstmask
>> dststep
)
949 m
.next
= "FETCH_PRED_DONE"
951 with m
.State("FETCH_PRED_DONE"):
952 comb
+= pred_mask_o_valid
.eq(1)
953 with m
.If(pred_mask_i_ready
):
954 m
.next
= "FETCH_PRED_IDLE"
956 def issue_fsm(self
, m
, core
, nia
,
957 dbg
, core_rst
, is_svp64_mode
,
958 fetch_pc_o_ready
, fetch_pc_i_valid
,
959 fetch_insn_o_valid
, fetch_insn_i_ready
,
960 pred_insn_i_valid
, pred_insn_o_ready
,
961 pred_mask_o_valid
, pred_mask_i_ready
,
962 exec_insn_i_valid
, exec_insn_o_ready
,
963 exec_pc_o_valid
, exec_pc_i_ready
):
966 decode / issue FSM. this interacts with the "fetch" FSM
967 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
968 (outgoing). also interacts with the "execute" FSM
969 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
971 SVP64 RM prefixes have already been set up by the
972 "fetch" phase, so execute is fairly straightforward.
977 pdecode2
= self
.pdecode2
978 cur_state
= self
.cur_state
981 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
983 # for updating svstate (things like srcstep etc.)
984 update_svstate
= Signal() # set this (below) if updating
985 new_svstate
= SVSTATERec("new_svstate")
986 comb
+= new_svstate
.eq(cur_state
.svstate
)
988 # precalculate srcstep+1 and dststep+1
989 cur_srcstep
= cur_state
.svstate
.srcstep
990 cur_dststep
= cur_state
.svstate
.dststep
991 next_srcstep
= Signal
.like(cur_srcstep
)
992 next_dststep
= Signal
.like(cur_dststep
)
993 comb
+= next_srcstep
.eq(cur_state
.svstate
.srcstep
+1)
994 comb
+= next_dststep
.eq(cur_state
.svstate
.dststep
+1)
996 # note if an exception happened. in a pipelined or OoO design
997 # this needs to be accompanied by "shadowing" (or stalling)
998 exc_happened
= self
.core
.o
.exc_happened
999 # also note instruction fetch failed
1000 if hasattr(core
, "icache"):
1001 fetch_failed
= core
.icache
.i_out
.fetch_failed
1003 # set to fault in decoder
1004 # update (highest priority) instruction fault
1005 rising_fetch_failed
= rising_edge(m
, fetch_failed
)
1006 with m
.If(rising_fetch_failed
):
1007 sync
+= pdecode2
.instr_fault
.eq(1)
1009 fetch_failed
= Const(0, 1)
1010 flush_needed
= False
1012 with m
.FSM(name
="issue_fsm"):
1014 # sync with the "fetch" phase which is reading the instruction
1015 # at this point, there is no instruction running, that
1016 # could inadvertently update the PC.
1017 with m
.State("ISSUE_START"):
1018 # reset instruction fault
1019 sync
+= pdecode2
.instr_fault
.eq(0)
1020 # wait on "core stop" release, before next fetch
1021 # need to do this here, in case we are in a VL==0 loop
1022 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
1023 comb
+= fetch_pc_i_valid
.eq(1) # tell fetch to start
1024 with m
.If(fetch_pc_o_ready
): # fetch acknowledged us
1025 m
.next
= "INSN_WAIT"
1027 # tell core it's stopped, and acknowledge debug handshake
1028 comb
+= dbg
.core_stopped_i
.eq(1)
1029 # while stopped, allow updating the MSR, PC and SVSTATE
1030 with m
.If(self
.pc_i
.ok
):
1031 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1032 comb
+= self
.state_w_pc
.i_data
.eq(self
.pc_i
.data
)
1033 sync
+= self
.pc_changed
.eq(1)
1034 with m
.If(self
.msr_i
.ok
):
1035 comb
+= self
.state_w_msr
.wen
.eq(1 << StateRegs
.MSR
)
1036 comb
+= self
.state_w_msr
.i_data
.eq(self
.msr_i
.data
)
1037 sync
+= self
.msr_changed
.eq(1)
1038 with m
.If(self
.svstate_i
.ok
):
1039 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
1040 comb
+= update_svstate
.eq(1)
1041 sync
+= self
.sv_changed
.eq(1)
1043 # wait for an instruction to arrive from Fetch
1044 with m
.State("INSN_WAIT"):
1045 if self
.allow_overlap
:
1046 stopping
= dbg
.stopping_o
1049 with m
.If(stopping
):
1050 # stopping: jump back to idle
1051 m
.next
= "ISSUE_START"
1053 # request the icache to stop asserting "failed"
1054 comb
+= core
.icache
.flush_in
.eq(1)
1055 # stop instruction fault
1056 sync
+= pdecode2
.instr_fault
.eq(0)
1058 comb
+= fetch_insn_i_ready
.eq(1)
1059 with m
.If(fetch_insn_o_valid
):
1060 # loop into ISSUE_START if it's a SVP64 instruction
1061 # and VL == 0. this because VL==0 is a for-loop
1062 # from 0 to 0 i.e. always, always a NOP.
1063 cur_vl
= cur_state
.svstate
.vl
1064 with m
.If(is_svp64_mode
& (cur_vl
== 0)):
1065 # update the PC before fetching the next instruction
1066 # since we are in a VL==0 loop, no instruction was
1067 # executed that we could be overwriting
1068 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1069 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
1070 comb
+= self
.insn_done
.eq(1)
1071 m
.next
= "ISSUE_START"
1074 m
.next
= "PRED_START" # fetching predicate
1076 m
.next
= "DECODE_SV" # skip predication
1078 with m
.State("PRED_START"):
1079 comb
+= pred_insn_i_valid
.eq(1) # tell fetch_pred to start
1080 with m
.If(pred_insn_o_ready
): # fetch_pred acknowledged us
1081 m
.next
= "MASK_WAIT"
1083 with m
.State("MASK_WAIT"):
1084 comb
+= pred_mask_i_ready
.eq(1) # ready to receive the masks
1085 with m
.If(pred_mask_o_valid
): # predication masks are ready
1086 m
.next
= "PRED_SKIP"
1088 # skip zeros in predicate
1089 with m
.State("PRED_SKIP"):
1090 with m
.If(~is_svp64_mode
):
1091 m
.next
= "DECODE_SV" # nothing to do
1094 pred_src_zero
= pdecode2
.rm_dec
.pred_sz
1095 pred_dst_zero
= pdecode2
.rm_dec
.pred_dz
1097 # new srcstep, after skipping zeros
1098 skip_srcstep
= Signal
.like(cur_srcstep
)
1099 # value to be added to the current srcstep
1100 src_delta
= Signal
.like(cur_srcstep
)
1101 # add leading zeros to srcstep, if not in zero mode
1102 with m
.If(~pred_src_zero
):
1103 # priority encoder (count leading zeros)
1104 # append guard bit, in case the mask is all zeros
1105 pri_enc_src
= PriorityEncoder(65)
1106 m
.submodules
.pri_enc_src
= pri_enc_src
1107 comb
+= pri_enc_src
.i
.eq(Cat(self
.srcmask
,
1109 comb
+= src_delta
.eq(pri_enc_src
.o
)
1110 # apply delta to srcstep
1111 comb
+= skip_srcstep
.eq(cur_srcstep
+ src_delta
)
1112 # shift-out all leading zeros from the mask
1113 # plus the leading "one" bit
1114 # TODO count leading zeros and shift-out the zero
1115 # bits, in the same step, in hardware
1116 sync
+= self
.srcmask
.eq(self
.srcmask
>> (src_delta
+1))
1118 # same as above, but for dststep
1119 skip_dststep
= Signal
.like(cur_dststep
)
1120 dst_delta
= Signal
.like(cur_dststep
)
1121 with m
.If(~pred_dst_zero
):
1122 pri_enc_dst
= PriorityEncoder(65)
1123 m
.submodules
.pri_enc_dst
= pri_enc_dst
1124 comb
+= pri_enc_dst
.i
.eq(Cat(self
.dstmask
,
1126 comb
+= dst_delta
.eq(pri_enc_dst
.o
)
1127 comb
+= skip_dststep
.eq(cur_dststep
+ dst_delta
)
1128 sync
+= self
.dstmask
.eq(self
.dstmask
>> (dst_delta
+1))
1130 # TODO: initialize mask[VL]=1 to avoid passing past VL
1131 with m
.If((skip_srcstep
>= cur_vl
) |
1132 (skip_dststep
>= cur_vl
)):
1133 # end of VL loop. Update PC and reset src/dst step
1134 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1135 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
1136 comb
+= new_svstate
.srcstep
.eq(0)
1137 comb
+= new_svstate
.dststep
.eq(0)
1138 comb
+= update_svstate
.eq(1)
1139 # synchronize with the simulator
1140 comb
+= self
.insn_done
.eq(1)
1142 m
.next
= "ISSUE_START"
1144 # update new src/dst step
1145 comb
+= new_svstate
.srcstep
.eq(skip_srcstep
)
1146 comb
+= new_svstate
.dststep
.eq(skip_dststep
)
1147 comb
+= update_svstate
.eq(1)
1149 m
.next
= "DECODE_SV"
1151 # pass predicate mask bits through to satellite decoders
1152 # TODO: for SIMD this will be *multiple* bits
1153 sync
+= core
.i
.sv_pred_sm
.eq(self
.srcmask
[0])
1154 sync
+= core
.i
.sv_pred_dm
.eq(self
.dstmask
[0])
1156 # after src/dst step have been updated, we are ready
1157 # to decode the instruction
1158 with m
.State("DECODE_SV"):
1159 # decode the instruction
1160 with m
.If(~fetch_failed
):
1161 sync
+= pdecode2
.instr_fault
.eq(0)
1162 sync
+= core
.i
.e
.eq(pdecode2
.e
)
1163 sync
+= core
.i
.state
.eq(cur_state
)
1164 sync
+= core
.i
.raw_insn_i
.eq(dec_opcode_i
)
1165 sync
+= core
.i
.bigendian_i
.eq(self
.core_bigendian_i
)
1167 sync
+= core
.i
.sv_rm
.eq(pdecode2
.sv_rm
)
1168 # set RA_OR_ZERO detection in satellite decoders
1169 sync
+= core
.i
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
1170 # and svp64 detection
1171 sync
+= core
.i
.is_svp64_mode
.eq(is_svp64_mode
)
1172 # and svp64 bit-rev'd ldst mode
1173 ldst_dec
= pdecode2
.use_svp64_ldst_dec
1174 sync
+= core
.i
.use_svp64_ldst_dec
.eq(ldst_dec
)
1175 # after decoding, reset any previous exception condition,
1176 # allowing it to be set again during the next execution
1177 sync
+= pdecode2
.ldst_exc
.eq(0)
1179 m
.next
= "INSN_EXECUTE" # move to "execute"
1181 # handshake with execution FSM, move to "wait" once acknowledged
1182 with m
.State("INSN_EXECUTE"):
1183 comb
+= exec_insn_i_valid
.eq(1) # trigger execute
1184 with m
.If(exec_insn_o_ready
): # execute acknowledged us
1185 m
.next
= "EXECUTE_WAIT"
1187 with m
.State("EXECUTE_WAIT"):
1188 # wait on "core stop" release, at instruction end
1189 # need to do this here, in case we are in a VL>1 loop
1190 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
1191 comb
+= exec_pc_i_ready
.eq(1)
1192 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1193 # the exception info needs to be blatted into
1194 # pdecode.ldst_exc, and the instruction "re-run".
1195 # when ldst_exc.happened is set, the PowerDecoder2
1196 # reacts very differently: it re-writes the instruction
1197 # with a "trap" (calls PowerDecoder2.trap()) which
1198 # will *overwrite* whatever was requested and jump the
1199 # PC to the exception address, as well as alter MSR.
1200 # nothing else needs to be done other than to note
1201 # the change of PC and MSR (and, later, SVSTATE)
1202 with m
.If(exc_happened
):
1203 mmu
= core
.fus
.get_exc("mmu0")
1204 ldst
= core
.fus
.get_exc("ldst0")
1206 with m
.If(fetch_failed
):
1207 # instruction fetch: exception is from MMU
1208 # reset instr_fault (highest priority)
1209 sync
+= pdecode2
.ldst_exc
.eq(mmu
)
1210 sync
+= pdecode2
.instr_fault
.eq(0)
1212 # request icache to stop asserting "failed"
1213 comb
+= core
.icache
.flush_in
.eq(1)
1214 with m
.If(~fetch_failed
):
1215 # otherwise assume it was a LDST exception
1216 sync
+= pdecode2
.ldst_exc
.eq(ldst
)
1218 with m
.If(exec_pc_o_valid
):
1220 # was this the last loop iteration?
1222 cur_vl
= cur_state
.svstate
.vl
1223 comb
+= is_last
.eq(next_srcstep
== cur_vl
)
1225 # return directly to Decode if Execute generated an
1227 with m
.If(pdecode2
.ldst_exc
.happened
):
1228 m
.next
= "DECODE_SV"
1230 # if MSR, PC or SVSTATE were changed by the previous
1231 # instruction, go directly back to Fetch, without
1232 # updating either MSR PC or SVSTATE
1233 with m
.Elif(self
.msr_changed | self
.pc_changed |
1235 m
.next
= "ISSUE_START"
1237 # also return to Fetch, when no output was a vector
1238 # (regardless of SRCSTEP and VL), or when the last
1239 # instruction was really the last one of the VL loop
1240 with m
.Elif((~pdecode2
.loop_continue
) | is_last
):
1241 # before going back to fetch, update the PC state
1242 # register with the NIA.
1243 # ok here we are not reading the branch unit.
1244 # TODO: this just blithely overwrites whatever
1245 # pipeline updated the PC
1246 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1247 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
1248 # reset SRCSTEP before returning to Fetch
1250 with m
.If(pdecode2
.loop_continue
):
1251 comb
+= new_svstate
.srcstep
.eq(0)
1252 comb
+= new_svstate
.dststep
.eq(0)
1253 comb
+= update_svstate
.eq(1)
1255 comb
+= new_svstate
.srcstep
.eq(0)
1256 comb
+= new_svstate
.dststep
.eq(0)
1257 comb
+= update_svstate
.eq(1)
1258 m
.next
= "ISSUE_START"
1260 # returning to Execute? then, first update SRCSTEP
1262 comb
+= new_svstate
.srcstep
.eq(next_srcstep
)
1263 comb
+= new_svstate
.dststep
.eq(next_dststep
)
1264 comb
+= update_svstate
.eq(1)
1265 # return to mask skip loop
1266 m
.next
= "PRED_SKIP"
1269 comb
+= dbg
.core_stopped_i
.eq(1)
1271 # request the icache to stop asserting "failed"
1272 comb
+= core
.icache
.flush_in
.eq(1)
1273 # stop instruction fault
1274 sync
+= pdecode2
.instr_fault
.eq(0)
1276 # request the icache to stop asserting "failed"
1277 comb
+= core
.icache
.flush_in
.eq(1)
1278 # stop instruction fault
1279 sync
+= pdecode2
.instr_fault
.eq(0)
1280 # while stopped, allow updating the MSR, PC and SVSTATE
1281 with m
.If(self
.msr_i
.ok
):
1282 comb
+= self
.state_w_msr
.wen
.eq(1 << StateRegs
.MSR
)
1283 comb
+= self
.state_w_msr
.i_data
.eq(self
.msr_i
.data
)
1284 sync
+= self
.msr_changed
.eq(1)
1285 with m
.If(self
.pc_i
.ok
):
1286 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1287 comb
+= self
.state_w_pc
.i_data
.eq(self
.pc_i
.data
)
1288 sync
+= self
.pc_changed
.eq(1)
1289 with m
.If(self
.svstate_i
.ok
):
1290 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
1291 comb
+= update_svstate
.eq(1)
1292 sync
+= self
.sv_changed
.eq(1)
1294 # check if svstate needs updating: if so, write it to State Regfile
1295 with m
.If(update_svstate
):
1296 comb
+= self
.state_w_sv
.wen
.eq(1 << StateRegs
.SVSTATE
)
1297 comb
+= self
.state_w_sv
.i_data
.eq(new_svstate
)
1298 sync
+= cur_state
.svstate
.eq(new_svstate
) # for next clock
1300 def execute_fsm(self
, m
, core
,
1301 exec_insn_i_valid
, exec_insn_o_ready
,
1302 exec_pc_o_valid
, exec_pc_i_ready
):
1305 execute FSM. this interacts with the "issue" FSM
1306 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1307 (outgoing). SVP64 RM prefixes have already been set up by the
1308 "issue" phase, so execute is fairly straightforward.
1313 pdecode2
= self
.pdecode2
1316 core_busy_o
= core
.n
.o_data
.busy_o
# core is busy
1317 core_ivalid_i
= core
.p
.i_valid
# instruction is valid
1319 if hasattr(core
, "icache"):
1320 fetch_failed
= core
.icache
.i_out
.fetch_failed
1322 fetch_failed
= Const(0, 1)
1324 with m
.FSM(name
="exec_fsm"):
1326 # waiting for instruction bus (stays there until not busy)
1327 with m
.State("INSN_START"):
1328 comb
+= exec_insn_o_ready
.eq(1)
1329 with m
.If(exec_insn_i_valid
):
1330 comb
+= core_ivalid_i
.eq(1) # instruction is valid/issued
1331 sync
+= self
.sv_changed
.eq(0)
1332 sync
+= self
.pc_changed
.eq(0)
1333 sync
+= self
.msr_changed
.eq(0)
1334 with m
.If(core
.p
.o_ready
): # only move if accepted
1335 m
.next
= "INSN_ACTIVE" # move to "wait completion"
1337 # instruction started: must wait till it finishes
1338 with m
.State("INSN_ACTIVE"):
1339 # note changes to MSR, PC and SVSTATE
1340 # XXX oops, really must monitor *all* State Regfile write
1341 # ports looking for changes!
1342 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.SVSTATE
)):
1343 sync
+= self
.sv_changed
.eq(1)
1344 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.MSR
)):
1345 sync
+= self
.msr_changed
.eq(1)
1346 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.PC
)):
1347 sync
+= self
.pc_changed
.eq(1)
1348 with m
.If(~core_busy_o
): # instruction done!
1349 comb
+= exec_pc_o_valid
.eq(1)
1350 with m
.If(exec_pc_i_ready
):
1351 # when finished, indicate "done".
1352 # however, if there was an exception, the instruction
1353 # is *not* yet done. this is an implementation
1354 # detail: we choose to implement exceptions by
1355 # taking the exception information from the LDST
1356 # unit, putting that *back* into the PowerDecoder2,
1357 # and *re-running the entire instruction*.
1358 # if we erroneously indicate "done" here, it is as if
1359 # there were *TWO* instructions:
1360 # 1) the failed LDST 2) a TRAP.
1361 with m
.If(~pdecode2
.ldst_exc
.happened
&
1363 comb
+= self
.insn_done
.eq(1)
1364 m
.next
= "INSN_START" # back to fetch
1366 def elaborate(self
, platform
):
1367 m
= super().elaborate(platform
)
1369 comb
, sync
= m
.d
.comb
, m
.d
.sync
1370 cur_state
= self
.cur_state
1371 pdecode2
= self
.pdecode2
1375 # set up peripherals and core
1376 core_rst
= self
.core_rst
1378 # indicate to outside world if any FU is still executing
1379 comb
+= self
.any_busy
.eq(core
.n
.o_data
.any_busy_o
) # any FU executing
1381 # address of the next instruction, in the absence of a branch
1382 # depends on the instruction size
1385 # connect up debug signals
1386 comb
+= dbg
.terminate_i
.eq(core
.o
.core_terminate_o
)
1388 # pass the prefix mode from Fetch to Issue, so the latter can loop
1390 is_svp64_mode
= Signal()
1392 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1393 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1394 # these are the handshake signals between each
1396 # fetch FSM can run as soon as the PC is valid
1397 fetch_pc_i_valid
= Signal() # Execute tells Fetch "start next read"
1398 fetch_pc_o_ready
= Signal() # Fetch Tells SVSTATE "proceed"
1400 # fetch FSM hands over the instruction to be decoded / issued
1401 fetch_insn_o_valid
= Signal()
1402 fetch_insn_i_ready
= Signal()
1404 # predicate fetch FSM decodes and fetches the predicate
1405 pred_insn_i_valid
= Signal()
1406 pred_insn_o_ready
= Signal()
1408 # predicate fetch FSM delivers the masks
1409 pred_mask_o_valid
= Signal()
1410 pred_mask_i_ready
= Signal()
1412 # issue FSM delivers the instruction to the be executed
1413 exec_insn_i_valid
= Signal()
1414 exec_insn_o_ready
= Signal()
1416 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1417 exec_pc_o_valid
= Signal()
1418 exec_pc_i_ready
= Signal()
1420 # the FSMs here are perhaps unusual in that they detect conditions
1421 # then "hold" information, combinatorially, for the core
1422 # (as opposed to using sync - which would be on a clock's delay)
1423 # this includes the actual opcode, valid flags and so on.
1425 # Fetch, then predicate fetch, then Issue, then Execute.
1426 # Issue is where the VL for-loop # lives. the ready/valid
1427 # signalling is used to communicate between the four.
1430 fetch
= FetchFSM(self
.allow_overlap
, self
.svp64_en
,
1431 self
.imem
, core_rst
, pdecode2
, cur_state
,
1433 dbg
.state
.svstate
, # combinatorially same
1435 m
.submodules
.fetch
= fetch
1436 # connect up in/out data to existing Signals
1437 comb
+= fetch
.p
.i_data
.pc
.eq(dbg
.state
.pc
) # combinatorially same
1438 comb
+= fetch
.p
.i_data
.msr
.eq(dbg
.state
.msr
) # combinatorially same
1439 # and the ready/valid signalling
1440 comb
+= fetch_pc_o_ready
.eq(fetch
.p
.o_ready
)
1441 comb
+= fetch
.p
.i_valid
.eq(fetch_pc_i_valid
)
1442 comb
+= fetch_insn_o_valid
.eq(fetch
.n
.o_valid
)
1443 comb
+= fetch
.n
.i_ready
.eq(fetch_insn_i_ready
)
1445 self
.issue_fsm(m
, core
, nia
,
1446 dbg
, core_rst
, is_svp64_mode
,
1447 fetch_pc_o_ready
, fetch_pc_i_valid
,
1448 fetch_insn_o_valid
, fetch_insn_i_ready
,
1449 pred_insn_i_valid
, pred_insn_o_ready
,
1450 pred_mask_o_valid
, pred_mask_i_ready
,
1451 exec_insn_i_valid
, exec_insn_o_ready
,
1452 exec_pc_o_valid
, exec_pc_i_ready
)
1455 self
.fetch_predicate_fsm(m
,
1456 pred_insn_i_valid
, pred_insn_o_ready
,
1457 pred_mask_o_valid
, pred_mask_i_ready
)
1459 self
.execute_fsm(m
, core
,
1460 exec_insn_i_valid
, exec_insn_o_ready
,
1461 exec_pc_o_valid
, exec_pc_i_ready
)
1466 class TestIssuer(Elaboratable
):
1467 def __init__(self
, pspec
):
1468 #self.ti = TestIssuerInternal(pspec)
1469 from soc
.simple
.inorder
import TestIssuerInternalInOrder
1470 self
.ti
= TestIssuerInternalInOrder(pspec
)
1471 self
.pll
= DummyPLL(instance
=True)
1473 # PLL direct clock or not
1474 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
1476 self
.pll_test_o
= Signal(reset_less
=True)
1477 self
.pll_vco_o
= Signal(reset_less
=True)
1478 self
.clk_sel_i
= Signal(2, reset_less
=True)
1479 self
.ref_clk
= ClockSignal() # can't rename it but that's ok
1480 self
.pllclk_clk
= ClockSignal("pllclk")
1482 def elaborate(self
, platform
):
1486 # TestIssuer nominally runs at main clock, actually it is
1487 # all combinatorial internally except for coresync'd components
1488 m
.submodules
.ti
= ti
= self
.ti
1491 # ClockSelect runs at PLL output internal clock rate
1492 m
.submodules
.wrappll
= pll
= self
.pll
1494 # add clock domains from PLL
1495 cd_pll
= ClockDomain("pllclk")
1498 # PLL clock established. has the side-effect of running clklsel
1499 # at the PLL's speed (see DomainRenamer("pllclk") above)
1500 pllclk
= self
.pllclk_clk
1501 comb
+= pllclk
.eq(pll
.clk_pll_o
)
1503 # wire up external 24mhz to PLL
1504 #comb += pll.clk_24_i.eq(self.ref_clk)
1505 # output 18 mhz PLL test signal, and analog oscillator out
1506 comb
+= self
.pll_test_o
.eq(pll
.pll_test_o
)
1507 comb
+= self
.pll_vco_o
.eq(pll
.pll_vco_o
)
1509 # input to pll clock selection
1510 comb
+= pll
.clk_sel_i
.eq(self
.clk_sel_i
)
1512 # now wire up ResetSignals. don't mind them being in this domain
1513 pll_rst
= ResetSignal("pllclk")
1514 comb
+= pll_rst
.eq(ResetSignal())
1516 # internal clock is set to selector clock-out. has the side-effect of
1517 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1518 # debug clock runs at coresync internal clock
1519 cd_coresync
= ClockDomain("coresync")
1520 #m.domains += cd_coresync
1521 if self
.ti
.dbg_domain
!= 'sync':
1522 cd_dbgsync
= ClockDomain("dbgsync")
1523 #m.domains += cd_dbgsync
1524 intclk
= ClockSignal("coresync")
1525 dbgclk
= ClockSignal(self
.ti
.dbg_domain
)
1526 # XXX BYPASS PLL XXX
1527 # XXX BYPASS PLL XXX
1528 # XXX BYPASS PLL XXX
1530 comb
+= intclk
.eq(self
.ref_clk
)
1532 comb
+= intclk
.eq(ClockSignal())
1533 if self
.ti
.dbg_domain
!= 'sync':
1534 dbgclk
= ClockSignal(self
.ti
.dbg_domain
)
1535 comb
+= dbgclk
.eq(intclk
)
1540 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
1541 [ClockSignal(), ResetSignal()]
1543 def external_ports(self
):
1544 ports
= self
.ti
.external_ports()
1545 ports
.append(ClockSignal())
1546 ports
.append(ResetSignal())
1548 ports
.append(self
.clk_sel_i
)
1549 ports
.append(self
.pll
.clk_24_i
)
1550 ports
.append(self
.pll_test_o
)
1551 ports
.append(self
.pll_vco_o
)
1552 ports
.append(self
.pllclk_clk
)
1553 ports
.append(self
.ref_clk
)
1557 if __name__
== '__main__':
1558 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1564 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
1565 imem_ifacetype
='bare_wb',
1570 dut
= TestIssuer(pspec
)
1571 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
1573 if len(sys
.argv
) == 1:
1574 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
1575 with
open("test_issuer.il", "w") as f
: