3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
, Mux
, Const
, Repl
, Cat
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from nmutil
.singlepipe
import ControlBase
25 from soc
.simple
.core_data
import FetchOutput
, FetchInput
27 from nmigen
.lib
.coding
import PriorityEncoder
29 from openpower
.decoder
.power_decoder
import create_pdecode
30 from openpower
.decoder
.power_decoder2
import PowerDecode2
, SVP64PrefixDecoder
31 from openpower
.decoder
.decode2execute1
import IssuerDecode2ToOperand
32 from openpower
.decoder
.decode2execute1
import Data
33 from openpower
.decoder
.power_enums
import (MicrOp
, SVP64PredInt
, SVP64PredCR
,
35 from openpower
.state
import CoreState
36 from openpower
.consts
import (CR
, SVP64CROffs
, MSR
)
37 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
38 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
39 from soc
.simple
.core
import NonProductionCore
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from soc
.config
.ifetch
import ConfigFetchUnit
42 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
43 from soc
.debug
.jtag
import JTAG
44 from soc
.config
.pinouts
import get_pinspecs
45 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
46 from soc
.bus
.simple_gpio
import SimpleGPIO
47 from soc
.bus
.SPBlock512W64B8W
import SPBlock512W64B8W
48 from soc
.clock
.select
import ClockSelect
49 from soc
.clock
.dummypll
import DummyPLL
50 from openpower
.sv
.svstate
import SVSTATERec
51 from soc
.experiment
.icache
import ICache
53 from nmutil
.util
import rising_edge
56 def get_insn(f_instr_o
, pc
):
57 if f_instr_o
.width
== 32:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o
.word_select(pc
[2], 32)
63 # gets state input or reads from state regfile
66 def state_get(m
, res
, core_rst
, state_i
, name
, regfile
, regnum
):
69 # read the {insert state variable here}
70 res_ok_delay
= Signal(name
="%s_ok_delay" % name
)
72 sync
+= res_ok_delay
.eq(~state_i
.ok
)
73 with m
.If(state_i
.ok
):
74 # incoming override (start from pc_i)
75 comb
+= res
.eq(state_i
.data
)
77 # otherwise read StateRegs regfile for {insert state here}...
78 comb
+= regfile
.ren
.eq(1 << regnum
)
79 # ... but on a 1-clock delay
80 with m
.If(res_ok_delay
):
81 comb
+= res
.eq(regfile
.o_data
)
84 def get_predint(m
, mask
, name
):
85 """decode SVP64 predicate integer mask field to reg number and invert
86 this is identical to the equivalent function in ISACaller except that
87 it doesn't read the INT directly, it just decodes "what needs to be done"
88 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
90 * all1s is set to indicate that no mask is to be applied.
91 * regread indicates the GPR register number to be read
92 * invert is set to indicate that the register value is to be inverted
93 * unary indicates that the contents of the register is to be shifted 1<<r3
96 regread
= Signal(5, name
=name
+"regread")
97 invert
= Signal(name
=name
+"invert")
98 unary
= Signal(name
=name
+"unary")
99 all1s
= Signal(name
=name
+"all1s")
101 with m
.Case(SVP64PredInt
.ALWAYS
.value
):
102 comb
+= all1s
.eq(1) # use 0b1111 (all ones)
103 with m
.Case(SVP64PredInt
.R3_UNARY
.value
):
104 comb
+= regread
.eq(3)
105 comb
+= unary
.eq(1) # 1<<r3 - shift r3 (single bit)
106 with m
.Case(SVP64PredInt
.R3
.value
):
107 comb
+= regread
.eq(3)
108 with m
.Case(SVP64PredInt
.R3_N
.value
):
109 comb
+= regread
.eq(3)
111 with m
.Case(SVP64PredInt
.R10
.value
):
112 comb
+= regread
.eq(10)
113 with m
.Case(SVP64PredInt
.R10_N
.value
):
114 comb
+= regread
.eq(10)
116 with m
.Case(SVP64PredInt
.R30
.value
):
117 comb
+= regread
.eq(30)
118 with m
.Case(SVP64PredInt
.R30_N
.value
):
119 comb
+= regread
.eq(30)
121 return regread
, invert
, unary
, all1s
124 def get_predcr(m
, mask
, name
):
125 """decode SVP64 predicate CR to reg number field and invert status
126 this is identical to _get_predcr in ISACaller
129 idx
= Signal(2, name
=name
+"idx")
130 invert
= Signal(name
=name
+"crinvert")
132 with m
.Case(SVP64PredCR
.LT
.value
):
133 comb
+= idx
.eq(CR
.LT
)
135 with m
.Case(SVP64PredCR
.GE
.value
):
136 comb
+= idx
.eq(CR
.LT
)
138 with m
.Case(SVP64PredCR
.GT
.value
):
139 comb
+= idx
.eq(CR
.GT
)
141 with m
.Case(SVP64PredCR
.LE
.value
):
142 comb
+= idx
.eq(CR
.GT
)
144 with m
.Case(SVP64PredCR
.EQ
.value
):
145 comb
+= idx
.eq(CR
.EQ
)
147 with m
.Case(SVP64PredCR
.NE
.value
):
148 comb
+= idx
.eq(CR
.EQ
)
150 with m
.Case(SVP64PredCR
.SO
.value
):
151 comb
+= idx
.eq(CR
.SO
)
153 with m
.Case(SVP64PredCR
.NS
.value
):
154 comb
+= idx
.eq(CR
.SO
)
159 class TestIssuerBase(Elaboratable
):
160 """TestIssuerBase - common base class for Issuers
162 takes care of power-on reset, peripherals, debug, DEC/TB,
163 and gets PC/MSR/SVSTATE from the State Regfile etc.
166 def __init__(self
, pspec
):
168 # test if microwatt compatibility is to be enabled
169 self
.microwatt_compat
= (hasattr(pspec
, "microwatt_compat") and
170 (pspec
.microwatt_compat
== True))
171 self
.alt_reset
= Signal(reset_less
=True) # not connected yet (microwatt)
173 # test is SVP64 is to be enabled
174 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
176 # and if regfiles are reduced
177 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
178 (pspec
.regreduce
== True))
180 # and if overlap requested
181 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
182 (pspec
.allow_overlap
== True))
184 # and get the core domain
185 self
.core_domain
= "coresync"
186 if (hasattr(pspec
, "core_domain") and
187 isinstance(pspec
.core_domain
, str)):
188 self
.core_domain
= pspec
.core_domain
190 # JTAG interface. add this right at the start because if it's
191 # added it *modifies* the pspec, by adding enable/disable signals
192 # for parts of the rest of the core
193 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
194 #self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
195 self
.dbg_domain
= "dbgsync" # domain for DMI/JTAG clock
197 # XXX MUST keep this up-to-date with litex, and
198 # soc-cocotb-sim, and err.. all needs sorting out, argh
201 'eint', 'gpio', 'mspi0',
202 # 'mspi1', - disabled for now
203 # 'pwm', 'sd0', - disabled for now
205 self
.jtag
= JTAG(get_pinspecs(subset
=subset
),
206 domain
=self
.dbg_domain
)
207 # add signals to pspec to enable/disable icache and dcache
208 # (or data and intstruction wishbone if icache/dcache not included)
209 # https://bugs.libre-soc.org/show_bug.cgi?id=520
210 # TODO: do we actually care if these are not domain-synchronised?
211 # honestly probably not.
212 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
213 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
214 self
.wb_sram_en
= self
.jtag
.wb_sram_en
216 self
.wb_sram_en
= Const(1)
218 # add 4k sram blocks?
219 self
.sram4x4k
= (hasattr(pspec
, "sram4x4kblock") and
220 pspec
.sram4x4kblock
== True)
224 self
.sram4k
.append(SPBlock512W64B8W(name
="sram4k_%d" % i
,
228 # add interrupt controller?
229 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
231 self
.xics_icp
= XICS_ICP()
232 self
.xics_ics
= XICS_ICS()
233 self
.int_level_i
= self
.xics_ics
.int_level_i
235 self
.ext_irq
= Signal()
237 # add GPIO peripheral?
238 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
240 self
.simple_gpio
= SimpleGPIO()
241 self
.gpio_o
= self
.simple_gpio
.gpio_o
243 # main instruction core. suitable for prototyping / demo only
244 self
.core
= core
= NonProductionCore(pspec
)
245 self
.core_rst
= ResetSignal(self
.core_domain
)
247 # instruction decoder. goes into Trap Record
248 #pdecode = create_pdecode()
249 self
.cur_state
= CoreState("cur") # current state (MSR/PC/SVSTATE)
250 self
.pdecode2
= PowerDecode2(None, state
=self
.cur_state
,
251 opkls
=IssuerDecode2ToOperand
,
252 svp64_en
=self
.svp64_en
,
253 regreduce_en
=self
.regreduce_en
)
254 pdecode
= self
.pdecode2
.dec
257 self
.svp64
= SVP64PrefixDecoder() # for decoding SVP64 prefix
259 self
.update_svstate
= Signal() # set this if updating svstate
260 self
.new_svstate
= new_svstate
= SVSTATERec("new_svstate")
262 # Test Instruction memory
263 if hasattr(core
, "icache"):
264 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
265 # truly dreadful. needs a huge reorg.
266 pspec
.icache
= core
.icache
267 self
.imem
= ConfigFetchUnit(pspec
).fu
270 self
.dbg
= CoreDebug()
271 self
.dbg_rst_i
= Signal(reset_less
=True)
273 # instruction go/monitor
274 self
.pc_o
= Signal(64, reset_less
=True)
275 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
276 self
.msr_i
= Data(64, "msr_i") # set "ok" to indicate "please change me"
277 self
.svstate_i
= Data(64, "svstate_i") # ditto
278 self
.core_bigendian_i
= Signal() # TODO: set based on MSR.LE
279 self
.busy_o
= Signal(reset_less
=True)
280 self
.memerr_o
= Signal(reset_less
=True)
282 # STATE regfile read /write ports for PC, MSR, SVSTATE
283 staterf
= self
.core
.regs
.rf
['state']
284 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
285 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
286 self
.state_r_sv
= staterf
.r_ports
['sv'] # SVSTATE rd
288 self
.state_w_msr
= staterf
.w_ports
['d_wr2'] # MSR wr
289 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
290 self
.state_w_sv
= staterf
.w_ports
['sv'] # SVSTATE wr
292 # DMI interface access
293 intrf
= self
.core
.regs
.rf
['int']
294 crrf
= self
.core
.regs
.rf
['cr']
295 xerrf
= self
.core
.regs
.rf
['xer']
296 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
297 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
298 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
302 self
.int_pred
= intrf
.r_ports
['pred'] # INT predicate read
303 self
.cr_pred
= crrf
.r_ports
['cr_pred'] # CR predicate read
305 # hack method of keeping an eye on whether branch/trap set the PC
306 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
307 self
.state_nia
.wen
.name
= 'state_nia_wen'
309 # pulse to synchronize the simulator at instruction end
310 self
.insn_done
= Signal()
312 # indicate any instruction still outstanding, in execution
313 self
.any_busy
= Signal()
316 # store copies of predicate masks
317 self
.srcmask
= Signal(64)
318 self
.dstmask
= Signal(64)
320 # sigh, the wishbone addresses are not wishbone-compliant in microwatt
321 if self
.microwatt_compat
:
322 self
.ibus_adr
= Signal(32, name
='wishbone_insn_out.adr')
323 self
.dbus_adr
= Signal(32, name
='wishbone_data_out.adr')
325 # add an output of the PC and instruction, and whether it was requested
326 # this is for verilator debug purposes
327 if self
.microwatt_compat
:
328 self
.nia
= Signal(64)
329 self
.msr_o
= Signal(64)
330 self
.nia_req
= Signal(1)
331 self
.insn
= Signal(32)
333 def setup_peripherals(self
, m
):
334 comb
, sync
= m
.d
.comb
, m
.d
.sync
336 # okaaaay so the debug module must be in coresync clock domain
337 # but NOT its reset signal. to cope with this, set every single
338 # submodule explicitly in coresync domain, debug and JTAG
339 # in their own one but using *external* reset.
340 csd
= DomainRenamer(self
.core_domain
)
341 dbd
= DomainRenamer(self
.dbg_domain
)
343 if self
.microwatt_compat
:
344 m
.submodules
.core
= core
= self
.core
346 m
.submodules
.core
= core
= csd(self
.core
)
347 # this _so_ needs sorting out. ICache is added down inside
348 # LoadStore1 and is already a submodule of LoadStore1
349 if not isinstance(self
.imem
, ICache
):
350 m
.submodules
.imem
= imem
= csd(self
.imem
)
351 m
.submodules
.dbg
= dbg
= dbd(self
.dbg
)
353 m
.submodules
.jtag
= jtag
= dbd(self
.jtag
)
354 # TODO: UART2GDB mux, here, from external pin
355 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
356 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
358 # fixup the clocks in microwatt-compat mode (but leave resets alone
359 # so that microwatt soc.vhdl can pull a reset on the core or DMI
360 # can do it, just like in TestIssuer)
361 if self
.microwatt_compat
:
362 intclk
= ClockSignal(self
.core_domain
)
363 dbgclk
= ClockSignal(self
.dbg_domain
)
364 if self
.core_domain
!= 'sync':
365 comb
+= intclk
.eq(ClockSignal())
366 if self
.dbg_domain
!= 'sync':
367 comb
+= dbgclk
.eq(ClockSignal())
369 # drop the first 3 bits of the incoming wishbone addresses
370 # this can go if using later versions of microwatt (not now)
371 if self
.microwatt_compat
:
372 ibus
= self
.imem
.ibus
373 dbus
= self
.core
.l0
.cmpi
.wb_bus()
374 comb
+= self
.ibus_adr
.eq(Cat(Const(0, 3), ibus
.adr
))
375 comb
+= self
.dbus_adr
.eq(Cat(Const(0, 3), dbus
.adr
))
377 cur_state
= self
.cur_state
379 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
381 for i
, sram
in enumerate(self
.sram4k
):
382 m
.submodules
["sram4k_%d" % i
] = csd(sram
)
383 comb
+= sram
.enable
.eq(self
.wb_sram_en
)
385 # XICS interrupt handler
387 m
.submodules
.xics_icp
= icp
= csd(self
.xics_icp
)
388 m
.submodules
.xics_ics
= ics
= csd(self
.xics_ics
)
389 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
390 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
392 sync
+= cur_state
.eint
.eq(self
.ext_irq
) # connect externally
394 # GPIO test peripheral
396 m
.submodules
.simple_gpio
= simple_gpio
= csd(self
.simple_gpio
)
398 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
399 # XXX causes litex ECP5 test to get wrong idea about input and output
400 # (but works with verilator sim *sigh*)
401 # if self.gpio and self.xics:
402 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
404 # instruction decoder
405 pdecode
= create_pdecode()
406 m
.submodules
.dec2
= pdecode2
= csd(self
.pdecode2
)
408 m
.submodules
.svp64
= svp64
= csd(self
.svp64
)
411 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
412 intrf
= self
.core
.regs
.rf
['int']
414 # clock delay power-on reset
415 cd_por
= ClockDomain(reset_less
=True)
416 cd_sync
= ClockDomain()
417 m
.domains
+= cd_por
, cd_sync
418 core_sync
= ClockDomain(self
.core_domain
)
419 if self
.core_domain
!= "sync":
420 m
.domains
+= core_sync
421 if self
.dbg_domain
!= "sync":
422 dbg_sync
= ClockDomain(self
.dbg_domain
)
423 m
.domains
+= dbg_sync
425 ti_rst
= Signal(reset_less
=True)
426 delay
= Signal(range(4), reset
=3)
427 with m
.If(delay
!= 0):
428 m
.d
.por
+= delay
.eq(delay
- 1)
429 comb
+= cd_por
.clk
.eq(ClockSignal())
431 # power-on reset delay
432 core_rst
= ResetSignal(self
.core_domain
)
433 if self
.core_domain
!= "sync":
434 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
435 comb
+= core_rst
.eq(ti_rst
)
437 with m
.If(delay
!= 0 | dbg
.core_rst_o
):
438 comb
+= core_rst
.eq(1)
440 # connect external reset signal to DMI Reset
441 if self
.dbg_domain
!= "sync":
442 dbg_rst
= ResetSignal(self
.dbg_domain
)
443 comb
+= dbg_rst
.eq(self
.dbg_rst_i
)
445 # busy/halted signals from core
446 core_busy_o
= ~core
.p
.o_ready | core
.n
.o_data
.busy_o
# core is busy
447 comb
+= self
.busy_o
.eq(core_busy_o
)
448 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
450 # temporary hack: says "go" immediately for both address gen and ST
452 ldst
= core
.fus
.fus
['ldst0']
453 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
454 # link addr-go direct to rel
455 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
)
456 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
458 def do_dmi(self
, m
, dbg
):
459 """deals with DMI debug requests
461 currently only provides read requests for the INT regfile, CR and XER
462 it will later also deal with *writing* to these regfiles.
466 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
467 intrf
= self
.core
.regs
.rf
['int']
469 with m
.If(d_reg
.req
): # request for regfile access being made
470 # TODO: error-check this
471 # XXX should this be combinatorial? sync better?
473 comb
+= self
.int_r
.ren
.eq(1 << d_reg
.addr
)
475 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
476 comb
+= self
.int_r
.ren
.eq(1)
477 d_reg_delay
= Signal()
478 sync
+= d_reg_delay
.eq(d_reg
.req
)
479 with m
.If(d_reg_delay
):
480 # data arrives one clock later
481 comb
+= d_reg
.data
.eq(self
.int_r
.o_data
)
482 comb
+= d_reg
.ack
.eq(1)
484 # sigh same thing for CR debug
485 with m
.If(d_cr
.req
): # request for regfile access being made
486 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
487 d_cr_delay
= Signal()
488 sync
+= d_cr_delay
.eq(d_cr
.req
)
489 with m
.If(d_cr_delay
):
490 # data arrives one clock later
491 comb
+= d_cr
.data
.eq(self
.cr_r
.o_data
)
492 comb
+= d_cr
.ack
.eq(1)
495 with m
.If(d_xer
.req
): # request for regfile access being made
496 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
497 d_xer_delay
= Signal()
498 sync
+= d_xer_delay
.eq(d_xer
.req
)
499 with m
.If(d_xer_delay
):
500 # data arrives one clock later
501 comb
+= d_xer
.data
.eq(self
.xer_r
.o_data
)
502 comb
+= d_xer
.ack
.eq(1)
504 def tb_dec_fsm(self
, m
, spr_dec
):
507 this is a FSM for updating either dec or tb. it runs alternately
508 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
509 value to DEC, however the regfile has "passthrough" on it so this
512 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
515 comb
, sync
= m
.d
.comb
, m
.d
.sync
516 fast_rf
= self
.core
.regs
.rf
['fast']
517 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
518 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
522 # initiates read of current DEC
523 with m
.State("DEC_READ"):
524 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
525 comb
+= fast_r_dectb
.ren
.eq(1)
528 # waits for DEC read to arrive (1 cycle), updates with new value
529 with m
.State("DEC_WRITE"):
531 # TODO: MSR.LPCR 32-bit decrement mode
532 comb
+= new_dec
.eq(fast_r_dectb
.o_data
- 1)
533 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
534 comb
+= fast_w_dectb
.wen
.eq(1)
535 comb
+= fast_w_dectb
.i_data
.eq(new_dec
)
536 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
539 # initiates read of current TB
540 with m
.State("TB_READ"):
541 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
542 comb
+= fast_r_dectb
.ren
.eq(1)
545 # waits for read TB to arrive, initiates write of current TB
546 with m
.State("TB_WRITE"):
548 comb
+= new_tb
.eq(fast_r_dectb
.o_data
+ 1)
549 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
550 comb
+= fast_w_dectb
.wen
.eq(1)
551 comb
+= fast_w_dectb
.i_data
.eq(new_tb
)
556 def elaborate(self
, platform
):
559 comb
, sync
= m
.d
.comb
, m
.d
.sync
560 cur_state
= self
.cur_state
561 pdecode2
= self
.pdecode2
564 # set up peripherals and core
565 core_rst
= self
.core_rst
566 self
.setup_peripherals(m
)
568 # reset current state if core reset requested
570 m
.d
.sync
+= self
.cur_state
.eq(0)
572 # check halted condition: requested PC to execute matches DMI stop addr
573 # and immediately stop. address of 0xffff_ffff_ffff_ffff can never
576 comb
+= halted
.eq(dbg
.stop_addr_o
== dbg
.state
.pc
)
578 comb
+= dbg
.core_stopped_i
.eq(1)
579 comb
+= dbg
.terminate_i
.eq(1)
581 # PC and instruction from I-Memory
582 comb
+= self
.pc_o
.eq(cur_state
.pc
)
583 self
.pc_changed
= Signal() # note write to PC
584 self
.msr_changed
= Signal() # note write to MSR
585 self
.sv_changed
= Signal() # note write to SVSTATE
587 # read state either from incoming override or from regfile
588 state
= CoreState("get") # current state (MSR/PC/SVSTATE)
589 state_get(m
, state
.msr
, core_rst
, self
.msr_i
,
591 self
.state_r_msr
, StateRegs
.MSR
)
592 state_get(m
, state
.pc
, core_rst
, self
.pc_i
,
594 self
.state_r_pc
, StateRegs
.PC
)
595 state_get(m
, state
.svstate
, core_rst
, self
.svstate_i
,
596 "svstate", # read SVSTATE
597 self
.state_r_sv
, StateRegs
.SVSTATE
)
599 # don't write pc every cycle
600 comb
+= self
.state_w_pc
.wen
.eq(0)
601 comb
+= self
.state_w_pc
.i_data
.eq(0)
603 # connect up debug state. note "combinatorially same" below,
604 # this is a bit naff, passing state over in the dbg class, but
605 # because it is combinatorial it achieves the desired goal
606 comb
+= dbg
.state
.eq(state
)
608 # this bit doesn't have to be in the FSM: connect up to read
609 # regfiles on demand from DMI
612 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
613 # (which uses that in PowerDecoder2 to raise 0x900 exception)
614 self
.tb_dec_fsm(m
, cur_state
.dec
)
616 # while stopped, allow updating the MSR, PC and SVSTATE.
617 # these are mainly for debugging purposes (including DMI/JTAG)
618 with m
.If(dbg
.core_stopped_i
):
619 with m
.If(self
.pc_i
.ok
):
620 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
621 comb
+= self
.state_w_pc
.i_data
.eq(self
.pc_i
.data
)
622 sync
+= self
.pc_changed
.eq(1)
623 with m
.If(self
.msr_i
.ok
):
624 comb
+= self
.state_w_msr
.wen
.eq(1 << StateRegs
.MSR
)
625 comb
+= self
.state_w_msr
.i_data
.eq(self
.msr_i
.data
)
626 sync
+= self
.msr_changed
.eq(1)
627 with m
.If(self
.svstate_i
.ok | self
.update_svstate
):
628 with m
.If(self
.svstate_i
.ok
): # over-ride from external source
629 comb
+= self
.new_svstate
.eq(self
.svstate_i
.data
)
630 comb
+= self
.state_w_sv
.wen
.eq(1 << StateRegs
.SVSTATE
)
631 comb
+= self
.state_w_sv
.i_data
.eq(self
.new_svstate
)
632 sync
+= self
.sv_changed
.eq(1)
634 # start renaming some of the ports to match microwatt
635 if self
.microwatt_compat
:
636 self
.core
.o
.core_terminate_o
.name
= "terminated_out"
637 # names of DMI interface
638 self
.dbg
.dmi
.addr_i
.name
= 'dmi_addr'
639 self
.dbg
.dmi
.din
.name
= 'dmi_din'
640 self
.dbg
.dmi
.dout
.name
= 'dmi_dout'
641 self
.dbg
.dmi
.req_i
.name
= 'dmi_req'
642 self
.dbg
.dmi
.we_i
.name
= 'dmi_wr'
643 self
.dbg
.dmi
.ack_o
.name
= 'dmi_ack'
644 # wishbone instruction bus
645 ibus
= self
.imem
.ibus
646 ibus
.adr
.name
= 'wishbone_insn_out.adr'
647 ibus
.dat_w
.name
= 'wishbone_insn_out.dat'
648 ibus
.sel
.name
= 'wishbone_insn_out.sel'
649 ibus
.cyc
.name
= 'wishbone_insn_out.cyc'
650 ibus
.stb
.name
= 'wishbone_insn_out.stb'
651 ibus
.we
.name
= 'wishbone_insn_out.we'
652 ibus
.dat_r
.name
= 'wishbone_insn_in.dat'
653 ibus
.ack
.name
= 'wishbone_insn_in.ack'
654 ibus
.stall
.name
= 'wishbone_insn_in.stall'
656 dbus
= self
.core
.l0
.cmpi
.wb_bus()
657 dbus
.adr
.name
= 'wishbone_data_out.adr'
658 dbus
.dat_w
.name
= 'wishbone_data_out.dat'
659 dbus
.sel
.name
= 'wishbone_data_out.sel'
660 dbus
.cyc
.name
= 'wishbone_data_out.cyc'
661 dbus
.stb
.name
= 'wishbone_data_out.stb'
662 dbus
.we
.name
= 'wishbone_data_out.we'
663 dbus
.dat_r
.name
= 'wishbone_data_in.dat'
664 dbus
.ack
.name
= 'wishbone_data_in.ack'
665 dbus
.stall
.name
= 'wishbone_data_in.stall'
670 yield from self
.pc_i
.ports()
671 yield from self
.msr_i
.ports()
674 yield from self
.core
.ports()
675 yield from self
.imem
.ports()
676 yield self
.core_bigendian_i
682 def external_ports(self
):
683 if self
.microwatt_compat
:
684 ports
= [self
.core
.o
.core_terminate_o
,
686 self
.alt_reset
, # not connected yet
687 self
.nia
, self
.insn
, self
.nia_req
, self
.msr_o
,
691 ports
+= list(self
.dbg
.dmi
.ports())
692 # for dbus/ibus microwatt, exclude err btw and cti
693 for name
, sig
in self
.imem
.ibus
.fields
.items():
694 if name
not in ['err', 'bte', 'cti', 'adr']:
696 for name
, sig
in self
.core
.l0
.cmpi
.wb_bus().fields
.items():
697 if name
not in ['err', 'bte', 'cti', 'adr']:
699 # microwatt non-compliant with wishbone
700 ports
.append(self
.ibus_adr
)
701 ports
.append(self
.dbus_adr
)
704 ports
= self
.pc_i
.ports()
705 ports
= self
.msr_i
.ports()
706 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
710 ports
+= list(self
.jtag
.external_ports())
712 # don't add DMI if JTAG is enabled
713 ports
+= list(self
.dbg
.dmi
.ports())
715 ports
+= list(self
.imem
.ibus
.fields
.values())
716 ports
+= list(self
.core
.l0
.cmpi
.wb_bus().fields
.values())
719 for sram
in self
.sram4k
:
720 ports
+= list(sram
.bus
.fields
.values())
723 ports
+= list(self
.xics_icp
.bus
.fields
.values())
724 ports
+= list(self
.xics_ics
.bus
.fields
.values())
725 ports
.append(self
.int_level_i
)
727 ports
.append(self
.ext_irq
)
730 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
731 ports
.append(self
.gpio_o
)
739 class TestIssuerInternal(TestIssuerBase
):
740 """TestIssuer - reads instructions from TestMemory and issues them
742 efficiency and speed is not the main goal here: functional correctness
743 and code clarity is. optimisations (which almost 100% interfere with
744 easy understanding) come later.
747 def fetch_fsm(self
, m
, dbg
, core
, pc
, msr
, svstate
, nia
, is_svp64_mode
,
748 fetch_pc_o_ready
, fetch_pc_i_valid
,
749 fetch_insn_o_valid
, fetch_insn_i_ready
):
752 this FSM performs fetch of raw instruction data, partial-decodes
753 it 32-bit at a time to detect SVP64 prefixes, and will optionally
754 read a 2nd 32-bit quantity if that occurs.
758 pdecode2
= self
.pdecode2
759 cur_state
= self
.cur_state
760 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
762 # also note instruction fetch failed
763 if hasattr(core
, "icache"):
764 fetch_failed
= core
.icache
.i_out
.fetch_failed
767 fetch_failed
= Const(0, 1)
770 # set priv / virt mode on I-Cache, sigh
771 if isinstance(self
.imem
, ICache
):
772 comb
+= self
.imem
.i_in
.priv_mode
.eq(~msr
[MSR
.PR
])
773 comb
+= self
.imem
.i_in
.virt_mode
.eq(msr
[MSR
.IR
]) # Instr. Redir (VM)
775 with m
.FSM(name
='fetch_fsm'):
778 with m
.State("IDLE"):
779 # fetch allowed if not failed and stopped but not stepping
780 # (see dmi.py for how core_stop_o is generated)
781 with m
.If(~fetch_failed
& ~dbg
.core_stop_o
):
782 comb
+= fetch_pc_o_ready
.eq(1)
783 with m
.If(fetch_pc_i_valid
& ~pdecode2
.instr_fault
785 # instruction allowed to go: start by reading the PC
786 # capture the PC and also drop it into Insn Memory
787 # we have joined a pair of combinatorial memory
788 # lookups together. this is Generally Bad.
789 comb
+= self
.imem
.a_pc_i
.eq(pc
)
790 comb
+= self
.imem
.a_i_valid
.eq(1)
791 comb
+= self
.imem
.f_i_valid
.eq(1)
792 # transfer state to output
793 sync
+= cur_state
.pc
.eq(pc
)
794 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
795 sync
+= cur_state
.msr
.eq(msr
) # and msr
797 m
.next
= "INSN_READ" # move to "wait for bus" phase
799 # dummy pause to find out why simulation is not keeping up
800 with m
.State("INSN_READ"):
801 # when using "single-step" mode, checking dbg.stopping_o
802 # prevents progress. allow fetch to proceed once started
804 #if self.allow_overlap:
805 # stopping = dbg.stopping_o
807 # stopping: jump back to idle
810 with m
.If(self
.imem
.f_busy_o
&
811 ~pdecode2
.instr_fault
): # zzz...
812 # busy but not fetch failed: stay in wait-read
813 comb
+= self
.imem
.a_pc_i
.eq(pc
)
814 comb
+= self
.imem
.a_i_valid
.eq(1)
815 comb
+= self
.imem
.f_i_valid
.eq(1)
817 # not busy (or fetch failed!): instruction fetched
818 # when fetch failed, the instruction gets ignored
820 if hasattr(core
, "icache"):
821 # blech, icache returns actual instruction
822 insn
= self
.imem
.f_instr_o
824 # but these return raw memory
825 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
828 # decode the SVP64 prefix, if any
829 comb
+= svp64
.raw_opcode_in
.eq(insn
)
830 comb
+= svp64
.bigendian
.eq(self
.core_bigendian_i
)
831 # pass the decoded prefix (if any) to PowerDecoder2
832 sync
+= pdecode2
.sv_rm
.eq(svp64
.svp64_rm
)
833 sync
+= pdecode2
.is_svp64_mode
.eq(is_svp64_mode
)
834 # remember whether this is a prefixed instruction,
835 # so the FSM can readily loop when VL==0
836 sync
+= is_svp64_mode
.eq(svp64
.is_svp64_mode
)
837 # calculate the address of the following instruction
838 insn_size
= Mux(svp64
.is_svp64_mode
, 8, 4)
839 sync
+= nia
.eq(cur_state
.pc
+ insn_size
)
840 with m
.If(~svp64
.is_svp64_mode
):
841 # with no prefix, store the instruction
842 # and hand it directly to the next FSM
843 sync
+= dec_opcode_i
.eq(insn
)
844 m
.next
= "INSN_READY"
846 # fetch the rest of the instruction from memory
847 comb
+= self
.imem
.a_pc_i
.eq(cur_state
.pc
+ 4)
848 comb
+= self
.imem
.a_i_valid
.eq(1)
849 comb
+= self
.imem
.f_i_valid
.eq(1)
850 m
.next
= "INSN_READ2"
852 # not SVP64 - 32-bit only
853 sync
+= nia
.eq(cur_state
.pc
+ 4)
854 sync
+= dec_opcode_i
.eq(insn
)
855 if self
.microwatt_compat
:
856 # for verilator debug purposes
857 comb
+= self
.insn
.eq(insn
)
858 comb
+= self
.nia
.eq(cur_state
.pc
)
859 comb
+= self
.msr_o
.eq(cur_state
.msr
)
860 comb
+= self
.nia_req
.eq(1)
861 m
.next
= "INSN_READY"
863 with m
.State("INSN_READ2"):
864 with m
.If(self
.imem
.f_busy_o
): # zzz...
865 # busy: stay in wait-read
866 comb
+= self
.imem
.a_i_valid
.eq(1)
867 comb
+= self
.imem
.f_i_valid
.eq(1)
869 # not busy: instruction fetched
870 if hasattr(core
, "icache"):
871 # blech, icache returns actual instruction
872 insn
= self
.imem
.f_instr_o
874 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
+4)
875 sync
+= dec_opcode_i
.eq(insn
)
876 m
.next
= "INSN_READY"
877 # TODO: probably can start looking at pdecode2.rm_dec
878 # here or maybe even in INSN_READ state, if svp64_mode
879 # detected, in order to trigger - and wait for - the
882 pmode
= pdecode2
.rm_dec
.predmode
884 if pmode != SVP64PredMode.ALWAYS.value:
885 fire predicate loading FSM and wait before
888 sync += self.srcmask.eq(-1) # set to all 1s
889 sync += self.dstmask.eq(-1) # set to all 1s
890 m.next = "INSN_READY"
893 with m
.State("INSN_READY"):
894 # hand over the instruction, to be decoded
895 comb
+= fetch_insn_o_valid
.eq(1)
896 with m
.If(fetch_insn_i_ready
):
900 def fetch_predicate_fsm(self
, m
,
901 pred_insn_i_valid
, pred_insn_o_ready
,
902 pred_mask_o_valid
, pred_mask_i_ready
):
903 """fetch_predicate_fsm - obtains (constructs in the case of CR)
904 src/dest predicate masks
906 https://bugs.libre-soc.org/show_bug.cgi?id=617
907 the predicates can be read here, by using IntRegs r_ports['pred']
908 or CRRegs r_ports['pred']. in the case of CRs it will have to
909 be done through multiple reads, extracting one relevant at a time.
910 later, a faster way would be to use the 32-bit-wide CR port but
911 this is more complex decoding, here. equivalent code used in
912 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
914 note: this ENTIRE FSM is not to be called when svp64 is disabled
918 pdecode2
= self
.pdecode2
919 rm_dec
= pdecode2
.rm_dec
# SVP64RMModeDecode
920 predmode
= rm_dec
.predmode
921 srcpred
, dstpred
= rm_dec
.srcpred
, rm_dec
.dstpred
922 cr_pred
, int_pred
= self
.cr_pred
, self
.int_pred
# read regfiles
923 # get src/dst step, so we can skip already used mask bits
924 cur_state
= self
.cur_state
925 srcstep
= cur_state
.svstate
.srcstep
926 dststep
= cur_state
.svstate
.dststep
927 cur_vl
= cur_state
.svstate
.vl
930 sregread
, sinvert
, sunary
, sall1s
= get_predint(m
, srcpred
, 's')
931 dregread
, dinvert
, dunary
, dall1s
= get_predint(m
, dstpred
, 'd')
932 sidx
, scrinvert
= get_predcr(m
, srcpred
, 's')
933 didx
, dcrinvert
= get_predcr(m
, dstpred
, 'd')
935 # store fetched masks, for either intpred or crpred
936 # when src/dst step is not zero, the skipped mask bits need to be
937 # shifted-out, before actually storing them in src/dest mask
938 new_srcmask
= Signal(64, reset_less
=True)
939 new_dstmask
= Signal(64, reset_less
=True)
941 with m
.FSM(name
="fetch_predicate"):
943 with m
.State("FETCH_PRED_IDLE"):
944 comb
+= pred_insn_o_ready
.eq(1)
945 with m
.If(pred_insn_i_valid
):
946 with m
.If(predmode
== SVP64PredMode
.INT
):
947 # skip fetching destination mask register, when zero
949 sync
+= new_dstmask
.eq(-1)
950 # directly go to fetch source mask register
951 # guaranteed not to be zero (otherwise predmode
952 # would be SVP64PredMode.ALWAYS, not INT)
953 comb
+= int_pred
.addr
.eq(sregread
)
954 comb
+= int_pred
.ren
.eq(1)
955 m
.next
= "INT_SRC_READ"
956 # fetch destination predicate register
958 comb
+= int_pred
.addr
.eq(dregread
)
959 comb
+= int_pred
.ren
.eq(1)
960 m
.next
= "INT_DST_READ"
961 with m
.Elif(predmode
== SVP64PredMode
.CR
):
962 # go fetch masks from the CR register file
963 sync
+= new_srcmask
.eq(0)
964 sync
+= new_dstmask
.eq(0)
967 sync
+= self
.srcmask
.eq(-1)
968 sync
+= self
.dstmask
.eq(-1)
969 m
.next
= "FETCH_PRED_DONE"
971 with m
.State("INT_DST_READ"):
972 # store destination mask
973 inv
= Repl(dinvert
, 64)
975 # set selected mask bit for 1<<r3 mode
976 dst_shift
= Signal(range(64))
977 comb
+= dst_shift
.eq(self
.int_pred
.o_data
& 0b111111)
978 sync
+= new_dstmask
.eq(1 << dst_shift
)
980 # invert mask if requested
981 sync
+= new_dstmask
.eq(self
.int_pred
.o_data ^ inv
)
982 # skip fetching source mask register, when zero
984 sync
+= new_srcmask
.eq(-1)
985 m
.next
= "FETCH_PRED_SHIFT_MASK"
986 # fetch source predicate register
988 comb
+= int_pred
.addr
.eq(sregread
)
989 comb
+= int_pred
.ren
.eq(1)
990 m
.next
= "INT_SRC_READ"
992 with m
.State("INT_SRC_READ"):
994 inv
= Repl(sinvert
, 64)
996 # set selected mask bit for 1<<r3 mode
997 src_shift
= Signal(range(64))
998 comb
+= src_shift
.eq(self
.int_pred
.o_data
& 0b111111)
999 sync
+= new_srcmask
.eq(1 << src_shift
)
1001 # invert mask if requested
1002 sync
+= new_srcmask
.eq(self
.int_pred
.o_data ^ inv
)
1003 m
.next
= "FETCH_PRED_SHIFT_MASK"
1005 # fetch masks from the CR register file
1006 # implements the following loop:
1007 # idx, inv = get_predcr(mask)
1009 # for cr_idx in range(vl):
1010 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
1012 # mask |= 1 << cr_idx
1014 with m
.State("CR_READ"):
1015 # CR index to be read, which will be ready by the next cycle
1016 cr_idx
= Signal
.like(cur_vl
, reset_less
=True)
1017 # submit the read operation to the regfile
1018 with m
.If(cr_idx
!= cur_vl
):
1019 # the CR read port is unary ...
1021 # ... in MSB0 convention ...
1022 # ren = 1 << (7 - cr_idx)
1023 # ... and with an offset:
1024 # ren = 1 << (7 - off - cr_idx)
1025 idx
= SVP64CROffs
.CRPred
+ cr_idx
1026 comb
+= cr_pred
.ren
.eq(1 << (7 - idx
))
1027 # signal data valid in the next cycle
1028 cr_read
= Signal(reset_less
=True)
1029 sync
+= cr_read
.eq(1)
1030 # load the next index
1031 sync
+= cr_idx
.eq(cr_idx
+ 1)
1034 sync
+= cr_read
.eq(0)
1035 sync
+= cr_idx
.eq(0)
1036 m
.next
= "FETCH_PRED_SHIFT_MASK"
1038 # compensate for the one cycle delay on the regfile
1039 cur_cr_idx
= Signal
.like(cur_vl
)
1040 comb
+= cur_cr_idx
.eq(cr_idx
- 1)
1041 # read the CR field, select the appropriate bit
1042 cr_field
= Signal(4)
1045 comb
+= cr_field
.eq(cr_pred
.o_data
)
1046 comb
+= scr_bit
.eq(cr_field
.bit_select(sidx
, 1)
1048 comb
+= dcr_bit
.eq(cr_field
.bit_select(didx
, 1)
1050 # set the corresponding mask bit
1051 bit_to_set
= Signal
.like(self
.srcmask
)
1052 comb
+= bit_to_set
.eq(1 << cur_cr_idx
)
1054 sync
+= new_srcmask
.eq(new_srcmask | bit_to_set
)
1056 sync
+= new_dstmask
.eq(new_dstmask | bit_to_set
)
1058 with m
.State("FETCH_PRED_SHIFT_MASK"):
1059 # shift-out skipped mask bits
1060 sync
+= self
.srcmask
.eq(new_srcmask
>> srcstep
)
1061 sync
+= self
.dstmask
.eq(new_dstmask
>> dststep
)
1062 m
.next
= "FETCH_PRED_DONE"
1064 with m
.State("FETCH_PRED_DONE"):
1065 comb
+= pred_mask_o_valid
.eq(1)
1066 with m
.If(pred_mask_i_ready
):
1067 m
.next
= "FETCH_PRED_IDLE"
1069 def issue_fsm(self
, m
, core
, nia
,
1070 dbg
, core_rst
, is_svp64_mode
,
1071 fetch_pc_o_ready
, fetch_pc_i_valid
,
1072 fetch_insn_o_valid
, fetch_insn_i_ready
,
1073 pred_insn_i_valid
, pred_insn_o_ready
,
1074 pred_mask_o_valid
, pred_mask_i_ready
,
1075 exec_insn_i_valid
, exec_insn_o_ready
,
1076 exec_pc_o_valid
, exec_pc_i_ready
):
1079 decode / issue FSM. this interacts with the "fetch" FSM
1080 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
1081 (outgoing). also interacts with the "execute" FSM
1082 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
1084 SVP64 RM prefixes have already been set up by the
1085 "fetch" phase, so execute is fairly straightforward.
1090 pdecode2
= self
.pdecode2
1091 cur_state
= self
.cur_state
1092 new_svstate
= self
.new_svstate
1095 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
1097 # for updating svstate (things like srcstep etc.)
1098 comb
+= new_svstate
.eq(cur_state
.svstate
)
1100 # precalculate srcstep+1 and dststep+1
1101 cur_srcstep
= cur_state
.svstate
.srcstep
1102 cur_dststep
= cur_state
.svstate
.dststep
1103 next_srcstep
= Signal
.like(cur_srcstep
)
1104 next_dststep
= Signal
.like(cur_dststep
)
1105 comb
+= next_srcstep
.eq(cur_state
.svstate
.srcstep
+1)
1106 comb
+= next_dststep
.eq(cur_state
.svstate
.dststep
+1)
1108 # note if an exception happened. in a pipelined or OoO design
1109 # this needs to be accompanied by "shadowing" (or stalling)
1110 exc_happened
= self
.core
.o
.exc_happened
1111 # also note instruction fetch failed
1112 if hasattr(core
, "icache"):
1113 fetch_failed
= core
.icache
.i_out
.fetch_failed
1115 # set to fault in decoder
1116 # update (highest priority) instruction fault
1117 rising_fetch_failed
= rising_edge(m
, fetch_failed
)
1118 with m
.If(rising_fetch_failed
):
1119 sync
+= pdecode2
.instr_fault
.eq(1)
1121 fetch_failed
= Const(0, 1)
1122 flush_needed
= False
1124 with m
.FSM(name
="issue_fsm"):
1126 # sync with the "fetch" phase which is reading the instruction
1127 # at this point, there is no instruction running, that
1128 # could inadvertently update the PC.
1129 with m
.State("ISSUE_START"):
1130 # reset instruction fault
1131 sync
+= pdecode2
.instr_fault
.eq(0)
1132 # wait on "core stop" release, before next fetch
1133 # need to do this here, in case we are in a VL==0 loop
1134 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
1135 comb
+= fetch_pc_i_valid
.eq(1) # tell fetch to start
1136 with m
.If(fetch_pc_o_ready
): # fetch acknowledged us
1137 m
.next
= "INSN_WAIT"
1139 # tell core it's stopped, and acknowledge debug handshake
1140 comb
+= dbg
.core_stopped_i
.eq(1)
1141 # while stopped, allow updating SVSTATE
1142 with m
.If(self
.svstate_i
.ok
):
1143 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
1144 comb
+= self
.update_svstate
.eq(1)
1145 sync
+= self
.sv_changed
.eq(1)
1147 # wait for an instruction to arrive from Fetch
1148 with m
.State("INSN_WAIT"):
1149 # when using "single-step" mode, checking dbg.stopping_o
1150 # prevents progress. allow issue to proceed once started
1152 #if self.allow_overlap:
1153 # stopping = dbg.stopping_o
1154 with m
.If(stopping
):
1155 # stopping: jump back to idle
1156 m
.next
= "ISSUE_START"
1158 # request the icache to stop asserting "failed"
1159 comb
+= core
.icache
.flush_in
.eq(1)
1160 # stop instruction fault
1161 sync
+= pdecode2
.instr_fault
.eq(0)
1163 comb
+= fetch_insn_i_ready
.eq(1)
1164 with m
.If(fetch_insn_o_valid
):
1165 # loop into ISSUE_START if it's a SVP64 instruction
1166 # and VL == 0. this because VL==0 is a for-loop
1167 # from 0 to 0 i.e. always, always a NOP.
1168 cur_vl
= cur_state
.svstate
.vl
1169 with m
.If(is_svp64_mode
& (cur_vl
== 0)):
1170 # update the PC before fetching the next instruction
1171 # since we are in a VL==0 loop, no instruction was
1172 # executed that we could be overwriting
1173 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1174 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
1175 comb
+= self
.insn_done
.eq(1)
1176 m
.next
= "ISSUE_START"
1179 m
.next
= "PRED_START" # fetching predicate
1181 m
.next
= "DECODE_SV" # skip predication
1183 with m
.State("PRED_START"):
1184 comb
+= pred_insn_i_valid
.eq(1) # tell fetch_pred to start
1185 with m
.If(pred_insn_o_ready
): # fetch_pred acknowledged us
1186 m
.next
= "MASK_WAIT"
1188 with m
.State("MASK_WAIT"):
1189 comb
+= pred_mask_i_ready
.eq(1) # ready to receive the masks
1190 with m
.If(pred_mask_o_valid
): # predication masks are ready
1191 m
.next
= "PRED_SKIP"
1193 # skip zeros in predicate
1194 with m
.State("PRED_SKIP"):
1195 with m
.If(~is_svp64_mode
):
1196 m
.next
= "DECODE_SV" # nothing to do
1199 pred_src_zero
= pdecode2
.rm_dec
.pred_sz
1200 pred_dst_zero
= pdecode2
.rm_dec
.pred_dz
1202 # new srcstep, after skipping zeros
1203 skip_srcstep
= Signal
.like(cur_srcstep
)
1204 # value to be added to the current srcstep
1205 src_delta
= Signal
.like(cur_srcstep
)
1206 # add leading zeros to srcstep, if not in zero mode
1207 with m
.If(~pred_src_zero
):
1208 # priority encoder (count leading zeros)
1209 # append guard bit, in case the mask is all zeros
1210 pri_enc_src
= PriorityEncoder(65)
1211 m
.submodules
.pri_enc_src
= pri_enc_src
1212 comb
+= pri_enc_src
.i
.eq(Cat(self
.srcmask
,
1214 comb
+= src_delta
.eq(pri_enc_src
.o
)
1215 # apply delta to srcstep
1216 comb
+= skip_srcstep
.eq(cur_srcstep
+ src_delta
)
1217 # shift-out all leading zeros from the mask
1218 # plus the leading "one" bit
1219 # TODO count leading zeros and shift-out the zero
1220 # bits, in the same step, in hardware
1221 sync
+= self
.srcmask
.eq(self
.srcmask
>> (src_delta
+1))
1223 # same as above, but for dststep
1224 skip_dststep
= Signal
.like(cur_dststep
)
1225 dst_delta
= Signal
.like(cur_dststep
)
1226 with m
.If(~pred_dst_zero
):
1227 pri_enc_dst
= PriorityEncoder(65)
1228 m
.submodules
.pri_enc_dst
= pri_enc_dst
1229 comb
+= pri_enc_dst
.i
.eq(Cat(self
.dstmask
,
1231 comb
+= dst_delta
.eq(pri_enc_dst
.o
)
1232 comb
+= skip_dststep
.eq(cur_dststep
+ dst_delta
)
1233 sync
+= self
.dstmask
.eq(self
.dstmask
>> (dst_delta
+1))
1235 # TODO: initialize mask[VL]=1 to avoid passing past VL
1236 with m
.If((skip_srcstep
>= cur_vl
) |
1237 (skip_dststep
>= cur_vl
)):
1238 # end of VL loop. Update PC and reset src/dst step
1239 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1240 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
1241 comb
+= new_svstate
.srcstep
.eq(0)
1242 comb
+= new_svstate
.dststep
.eq(0)
1243 comb
+= self
.update_svstate
.eq(1)
1244 # synchronize with the simulator
1245 comb
+= self
.insn_done
.eq(1)
1247 m
.next
= "ISSUE_START"
1249 # update new src/dst step
1250 comb
+= new_svstate
.srcstep
.eq(skip_srcstep
)
1251 comb
+= new_svstate
.dststep
.eq(skip_dststep
)
1252 comb
+= self
.update_svstate
.eq(1)
1254 m
.next
= "DECODE_SV"
1256 # pass predicate mask bits through to satellite decoders
1257 # TODO: for SIMD this will be *multiple* bits
1258 sync
+= core
.i
.sv_pred_sm
.eq(self
.srcmask
[0])
1259 sync
+= core
.i
.sv_pred_dm
.eq(self
.dstmask
[0])
1261 # after src/dst step have been updated, we are ready
1262 # to decode the instruction
1263 with m
.State("DECODE_SV"):
1264 # decode the instruction
1265 with m
.If(~fetch_failed
):
1266 sync
+= pdecode2
.instr_fault
.eq(0)
1267 sync
+= core
.i
.e
.eq(pdecode2
.e
)
1268 sync
+= core
.i
.state
.eq(cur_state
)
1269 sync
+= core
.i
.raw_insn_i
.eq(dec_opcode_i
)
1270 sync
+= core
.i
.bigendian_i
.eq(self
.core_bigendian_i
)
1272 sync
+= core
.i
.sv_rm
.eq(pdecode2
.sv_rm
)
1273 # set RA_OR_ZERO detection in satellite decoders
1274 sync
+= core
.i
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
1275 # and svp64 detection
1276 sync
+= core
.i
.is_svp64_mode
.eq(is_svp64_mode
)
1277 # and svp64 bit-rev'd ldst mode
1278 ldst_dec
= pdecode2
.use_svp64_ldst_dec
1279 sync
+= core
.i
.use_svp64_ldst_dec
.eq(ldst_dec
)
1280 # after decoding, reset any previous exception condition,
1281 # allowing it to be set again during the next execution
1282 sync
+= pdecode2
.ldst_exc
.eq(0)
1284 m
.next
= "INSN_EXECUTE" # move to "execute"
1286 # handshake with execution FSM, move to "wait" once acknowledged
1287 with m
.State("INSN_EXECUTE"):
1288 # when using "single-step" mode, checking dbg.stopping_o
1289 # prevents progress. allow execute to proceed once started
1291 #if self.allow_overlap:
1292 # stopping = dbg.stopping_o
1293 with m
.If(stopping
):
1294 # stopping: jump back to idle
1295 m
.next
= "ISSUE_START"
1297 # request the icache to stop asserting "failed"
1298 comb
+= core
.icache
.flush_in
.eq(1)
1299 # stop instruction fault
1300 sync
+= pdecode2
.instr_fault
.eq(0)
1302 comb
+= exec_insn_i_valid
.eq(1) # trigger execute
1303 with m
.If(exec_insn_o_ready
): # execute acknowledged us
1304 m
.next
= "EXECUTE_WAIT"
1306 with m
.State("EXECUTE_WAIT"):
1307 comb
+= exec_pc_i_ready
.eq(1)
1308 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1309 # the exception info needs to be blatted into
1310 # pdecode.ldst_exc, and the instruction "re-run".
1311 # when ldst_exc.happened is set, the PowerDecoder2
1312 # reacts very differently: it re-writes the instruction
1313 # with a "trap" (calls PowerDecoder2.trap()) which
1314 # will *overwrite* whatever was requested and jump the
1315 # PC to the exception address, as well as alter MSR.
1316 # nothing else needs to be done other than to note
1317 # the change of PC and MSR (and, later, SVSTATE)
1318 with m
.If(exc_happened
):
1319 mmu
= core
.fus
.get_exc("mmu0")
1320 ldst
= core
.fus
.get_exc("ldst0")
1322 with m
.If(fetch_failed
):
1323 # instruction fetch: exception is from MMU
1324 # reset instr_fault (highest priority)
1325 sync
+= pdecode2
.ldst_exc
.eq(mmu
)
1326 sync
+= pdecode2
.instr_fault
.eq(0)
1328 # request icache to stop asserting "failed"
1329 comb
+= core
.icache
.flush_in
.eq(1)
1330 with m
.If(~fetch_failed
):
1331 # otherwise assume it was a LDST exception
1332 sync
+= pdecode2
.ldst_exc
.eq(ldst
)
1334 with m
.If(exec_pc_o_valid
):
1336 # was this the last loop iteration?
1338 cur_vl
= cur_state
.svstate
.vl
1339 comb
+= is_last
.eq(next_srcstep
== cur_vl
)
1341 with m
.If(pdecode2
.instr_fault
):
1342 # reset instruction fault, try again
1343 sync
+= pdecode2
.instr_fault
.eq(0)
1344 m
.next
= "ISSUE_START"
1346 # return directly to Decode if Execute generated an
1348 with m
.Elif(pdecode2
.ldst_exc
.happened
):
1349 m
.next
= "DECODE_SV"
1351 # if MSR, PC or SVSTATE were changed by the previous
1352 # instruction, go directly back to Fetch, without
1353 # updating either MSR PC or SVSTATE
1354 with m
.Elif(self
.msr_changed | self
.pc_changed |
1356 m
.next
= "ISSUE_START"
1358 # also return to Fetch, when no output was a vector
1359 # (regardless of SRCSTEP and VL), or when the last
1360 # instruction was really the last one of the VL loop
1361 with m
.Elif((~pdecode2
.loop_continue
) | is_last
):
1362 # before going back to fetch, update the PC state
1363 # register with the NIA.
1364 # ok here we are not reading the branch unit.
1365 # TODO: this just blithely overwrites whatever
1366 # pipeline updated the PC
1367 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1368 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
1369 # reset SRCSTEP before returning to Fetch
1371 with m
.If(pdecode2
.loop_continue
):
1372 comb
+= new_svstate
.srcstep
.eq(0)
1373 comb
+= new_svstate
.dststep
.eq(0)
1374 comb
+= self
.update_svstate
.eq(1)
1376 comb
+= new_svstate
.srcstep
.eq(0)
1377 comb
+= new_svstate
.dststep
.eq(0)
1378 comb
+= self
.update_svstate
.eq(1)
1379 m
.next
= "ISSUE_START"
1381 # returning to Execute? then, first update SRCSTEP
1383 comb
+= new_svstate
.srcstep
.eq(next_srcstep
)
1384 comb
+= new_svstate
.dststep
.eq(next_dststep
)
1385 comb
+= self
.update_svstate
.eq(1)
1386 # return to mask skip loop
1387 m
.next
= "PRED_SKIP"
1390 # check if svstate needs updating: if so, write it to State Regfile
1391 with m
.If(self
.update_svstate
):
1392 sync
+= cur_state
.svstate
.eq(self
.new_svstate
) # for next clock
1394 def execute_fsm(self
, m
, core
,
1395 exec_insn_i_valid
, exec_insn_o_ready
,
1396 exec_pc_o_valid
, exec_pc_i_ready
):
1399 execute FSM. this interacts with the "issue" FSM
1400 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1401 (outgoing). SVP64 RM prefixes have already been set up by the
1402 "issue" phase, so execute is fairly straightforward.
1408 pdecode2
= self
.pdecode2
1411 core_busy_o
= core
.n
.o_data
.busy_o
# core is busy
1412 core_ivalid_i
= core
.p
.i_valid
# instruction is valid
1414 if hasattr(core
, "icache"):
1415 fetch_failed
= core
.icache
.i_out
.fetch_failed
1417 fetch_failed
= Const(0, 1)
1419 with m
.FSM(name
="exec_fsm"):
1421 # waiting for instruction bus (stays there until not busy)
1422 with m
.State("INSN_START"):
1423 comb
+= exec_insn_o_ready
.eq(1)
1424 with m
.If(exec_insn_i_valid
):
1425 comb
+= core_ivalid_i
.eq(1) # instruction is valid/issued
1426 sync
+= self
.sv_changed
.eq(0)
1427 sync
+= self
.pc_changed
.eq(0)
1428 sync
+= self
.msr_changed
.eq(0)
1429 with m
.If(core
.p
.o_ready
): # only move if accepted
1430 m
.next
= "INSN_ACTIVE" # move to "wait completion"
1432 # instruction started: must wait till it finishes
1433 with m
.State("INSN_ACTIVE"):
1434 # note changes to MSR, PC and SVSTATE
1435 # XXX oops, really must monitor *all* State Regfile write
1436 # ports looking for changes!
1437 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.SVSTATE
)):
1438 sync
+= self
.sv_changed
.eq(1)
1439 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.MSR
)):
1440 sync
+= self
.msr_changed
.eq(1)
1441 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.PC
)):
1442 sync
+= self
.pc_changed
.eq(1)
1443 with m
.If(~core_busy_o
): # instruction done!
1444 comb
+= exec_pc_o_valid
.eq(1)
1445 with m
.If(exec_pc_i_ready
):
1446 # when finished, indicate "done".
1447 # however, if there was an exception, the instruction
1448 # is *not* yet done. this is an implementation
1449 # detail: we choose to implement exceptions by
1450 # taking the exception information from the LDST
1451 # unit, putting that *back* into the PowerDecoder2,
1452 # and *re-running the entire instruction*.
1453 # if we erroneously indicate "done" here, it is as if
1454 # there were *TWO* instructions:
1455 # 1) the failed LDST 2) a TRAP.
1456 with m
.If(~pdecode2
.ldst_exc
.happened
&
1457 ~pdecode2
.instr_fault
):
1458 comb
+= self
.insn_done
.eq(1)
1459 m
.next
= "INSN_START" # back to fetch
1460 # terminate returns directly to INSN_START
1461 with m
.If(dbg
.terminate_i
):
1462 # comb += self.insn_done.eq(1) - no because it's not
1463 m
.next
= "INSN_START" # back to fetch
1465 def elaborate(self
, platform
):
1466 m
= super().elaborate(platform
)
1468 comb
, sync
= m
.d
.comb
, m
.d
.sync
1469 cur_state
= self
.cur_state
1470 pdecode2
= self
.pdecode2
1474 # set up peripherals and core
1475 core_rst
= self
.core_rst
1477 # indicate to outside world if any FU is still executing
1478 comb
+= self
.any_busy
.eq(core
.n
.o_data
.any_busy_o
) # any FU executing
1480 # address of the next instruction, in the absence of a branch
1481 # depends on the instruction size
1484 # connect up debug signals
1485 with m
.If(core
.o
.core_terminate_o
):
1486 comb
+= dbg
.terminate_i
.eq(1)
1488 # pass the prefix mode from Fetch to Issue, so the latter can loop
1490 is_svp64_mode
= Signal()
1492 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1493 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1494 # these are the handshake signals between each
1496 # fetch FSM can run as soon as the PC is valid
1497 fetch_pc_i_valid
= Signal() # Execute tells Fetch "start next read"
1498 fetch_pc_o_ready
= Signal() # Fetch Tells SVSTATE "proceed"
1500 # fetch FSM hands over the instruction to be decoded / issued
1501 fetch_insn_o_valid
= Signal()
1502 fetch_insn_i_ready
= Signal()
1504 # predicate fetch FSM decodes and fetches the predicate
1505 pred_insn_i_valid
= Signal()
1506 pred_insn_o_ready
= Signal()
1508 # predicate fetch FSM delivers the masks
1509 pred_mask_o_valid
= Signal()
1510 pred_mask_i_ready
= Signal()
1512 # issue FSM delivers the instruction to the be executed
1513 exec_insn_i_valid
= Signal()
1514 exec_insn_o_ready
= Signal()
1516 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1517 exec_pc_o_valid
= Signal()
1518 exec_pc_i_ready
= Signal()
1520 # the FSMs here are perhaps unusual in that they detect conditions
1521 # then "hold" information, combinatorially, for the core
1522 # (as opposed to using sync - which would be on a clock's delay)
1523 # this includes the actual opcode, valid flags and so on.
1525 # Fetch, then predicate fetch, then Issue, then Execute.
1526 # Issue is where the VL for-loop # lives. the ready/valid
1527 # signalling is used to communicate between the four.
1529 self
.fetch_fsm(m
, dbg
, core
, dbg
.state
.pc
, dbg
.state
.msr
,
1530 dbg
.state
.svstate
, nia
, is_svp64_mode
,
1531 fetch_pc_o_ready
, fetch_pc_i_valid
,
1532 fetch_insn_o_valid
, fetch_insn_i_ready
)
1534 self
.issue_fsm(m
, core
, nia
,
1535 dbg
, core_rst
, is_svp64_mode
,
1536 fetch_pc_o_ready
, fetch_pc_i_valid
,
1537 fetch_insn_o_valid
, fetch_insn_i_ready
,
1538 pred_insn_i_valid
, pred_insn_o_ready
,
1539 pred_mask_o_valid
, pred_mask_i_ready
,
1540 exec_insn_i_valid
, exec_insn_o_ready
,
1541 exec_pc_o_valid
, exec_pc_i_ready
)
1544 self
.fetch_predicate_fsm(m
,
1545 pred_insn_i_valid
, pred_insn_o_ready
,
1546 pred_mask_o_valid
, pred_mask_i_ready
)
1548 self
.execute_fsm(m
, core
,
1549 exec_insn_i_valid
, exec_insn_o_ready
,
1550 exec_pc_o_valid
, exec_pc_i_ready
)
1552 # whatever was done above, over-ride it if core reset is held
1553 with m
.If(core_rst
):
1559 class TestIssuer(Elaboratable
):
1560 def __init__(self
, pspec
):
1561 self
.ti
= TestIssuerInternal(pspec
)
1562 self
.pll
= DummyPLL(instance
=True)
1564 self
.dbg_rst_i
= Signal(reset_less
=True)
1566 # PLL direct clock or not
1567 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
1569 self
.pll_test_o
= Signal(reset_less
=True)
1570 self
.pll_vco_o
= Signal(reset_less
=True)
1571 self
.clk_sel_i
= Signal(2, reset_less
=True)
1572 self
.ref_clk
= ClockSignal() # can't rename it but that's ok
1573 self
.pllclk_clk
= ClockSignal("pllclk")
1575 def elaborate(self
, platform
):
1579 # TestIssuer nominally runs at main clock, actually it is
1580 # all combinatorial internally except for coresync'd components
1581 m
.submodules
.ti
= ti
= self
.ti
1584 # ClockSelect runs at PLL output internal clock rate
1585 m
.submodules
.wrappll
= pll
= self
.pll
1587 # add clock domains from PLL
1588 cd_pll
= ClockDomain("pllclk")
1591 # PLL clock established. has the side-effect of running clklsel
1592 # at the PLL's speed (see DomainRenamer("pllclk") above)
1593 pllclk
= self
.pllclk_clk
1594 comb
+= pllclk
.eq(pll
.clk_pll_o
)
1596 # wire up external 24mhz to PLL
1597 #comb += pll.clk_24_i.eq(self.ref_clk)
1598 # output 18 mhz PLL test signal, and analog oscillator out
1599 comb
+= self
.pll_test_o
.eq(pll
.pll_test_o
)
1600 comb
+= self
.pll_vco_o
.eq(pll
.pll_vco_o
)
1602 # input to pll clock selection
1603 comb
+= pll
.clk_sel_i
.eq(self
.clk_sel_i
)
1605 # now wire up ResetSignals. don't mind them being in this domain
1606 pll_rst
= ResetSignal("pllclk")
1607 comb
+= pll_rst
.eq(ResetSignal())
1609 # internal clock is set to selector clock-out. has the side-effect of
1610 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1611 # debug clock runs at coresync internal clock
1612 if self
.ti
.dbg_domain
!= 'sync':
1613 cd_dbgsync
= ClockDomain("dbgsync")
1614 intclk
= ClockSignal(self
.ti
.core_domain
)
1615 dbgclk
= ClockSignal(self
.ti
.dbg_domain
)
1616 # XXX BYPASS PLL XXX
1617 # XXX BYPASS PLL XXX
1618 # XXX BYPASS PLL XXX
1620 comb
+= intclk
.eq(self
.ref_clk
)
1621 assert self
.ti
.core_domain
!= 'sync', \
1622 "cannot set core_domain to sync and use pll at the same time"
1624 if self
.ti
.core_domain
!= 'sync':
1625 comb
+= intclk
.eq(ClockSignal())
1626 if self
.ti
.dbg_domain
!= 'sync':
1627 dbgclk
= ClockSignal(self
.ti
.dbg_domain
)
1628 comb
+= dbgclk
.eq(intclk
)
1629 comb
+= self
.ti
.dbg_rst_i
.eq(self
.dbg_rst_i
)
1634 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
1635 [ClockSignal(), ResetSignal()]
1637 def external_ports(self
):
1638 ports
= self
.ti
.external_ports()
1639 ports
.append(ClockSignal())
1640 ports
.append(ResetSignal())
1642 ports
.append(self
.clk_sel_i
)
1643 ports
.append(self
.pll
.clk_24_i
)
1644 ports
.append(self
.pll_test_o
)
1645 ports
.append(self
.pll_vco_o
)
1646 ports
.append(self
.pllclk_clk
)
1647 ports
.append(self
.ref_clk
)
1651 if __name__
== '__main__':
1652 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1658 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
1659 imem_ifacetype
='bare_wb',
1664 dut
= TestIssuer(pspec
)
1665 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
1667 if len(sys
.argv
) == 1:
1668 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
1669 with
open("test_issuer.il", "w") as f
: