make FetchFSM take PC as an input in its ispec
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput, FetchInput
26
27 from nmigen.lib.coding import PriorityEncoder
28
29 from openpower.decoder.power_decoder import create_pdecode
30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
32 from openpower.decoder.decode2execute1 import Data
33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
34 SVP64PredMode)
35 from openpower.state import CoreState
36 from openpower.consts import (CR, SVP64CROffs)
37 from soc.experiment.testmem import TestMemory # test only for instructions
38 from soc.regfile.regfiles import StateRegs, FastRegs
39 from soc.simple.core import NonProductionCore
40 from soc.config.test.test_loadstore import TestMemPspec
41 from soc.config.ifetch import ConfigFetchUnit
42 from soc.debug.dmi import CoreDebug, DMIInterface
43 from soc.debug.jtag import JTAG
44 from soc.config.pinouts import get_pinspecs
45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
46 from soc.bus.simple_gpio import SimpleGPIO
47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
48 from soc.clock.select import ClockSelect
49 from soc.clock.dummypll import DummyPLL
50 from openpower.sv.svstate import SVSTATERec
51
52
53 from nmutil.util import rising_edge
54
55 def get_insn(f_instr_o, pc):
56 if f_instr_o.width == 32:
57 return f_instr_o
58 else:
59 # 64-bit: bit 2 of pc decides which word to select
60 return f_instr_o.word_select(pc[2], 32)
61
62 # gets state input or reads from state regfile
63 def state_get(m, core_rst, state_i, name, regfile, regnum):
64 comb = m.d.comb
65 sync = m.d.sync
66 # read the PC
67 res = Signal(64, reset_less=True, name=name)
68 res_ok_delay = Signal(name="%s_ok_delay" % name)
69 with m.If(~core_rst):
70 sync += res_ok_delay.eq(~state_i.ok)
71 with m.If(state_i.ok):
72 # incoming override (start from pc_i)
73 comb += res.eq(state_i.data)
74 with m.Else():
75 # otherwise read StateRegs regfile for PC...
76 comb += regfile.ren.eq(1<<regnum)
77 # ... but on a 1-clock delay
78 with m.If(res_ok_delay):
79 comb += res.eq(regfile.o_data)
80 return res
81
82
83 def get_predint(m, mask, name):
84 """decode SVP64 predicate integer mask field to reg number and invert
85 this is identical to the equivalent function in ISACaller except that
86 it doesn't read the INT directly, it just decodes "what needs to be done"
87 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
88
89 * all1s is set to indicate that no mask is to be applied.
90 * regread indicates the GPR register number to be read
91 * invert is set to indicate that the register value is to be inverted
92 * unary indicates that the contents of the register is to be shifted 1<<r3
93 """
94 comb = m.d.comb
95 regread = Signal(5, name=name+"regread")
96 invert = Signal(name=name+"invert")
97 unary = Signal(name=name+"unary")
98 all1s = Signal(name=name+"all1s")
99 with m.Switch(mask):
100 with m.Case(SVP64PredInt.ALWAYS.value):
101 comb += all1s.eq(1) # use 0b1111 (all ones)
102 with m.Case(SVP64PredInt.R3_UNARY.value):
103 comb += regread.eq(3)
104 comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
105 with m.Case(SVP64PredInt.R3.value):
106 comb += regread.eq(3)
107 with m.Case(SVP64PredInt.R3_N.value):
108 comb += regread.eq(3)
109 comb += invert.eq(1)
110 with m.Case(SVP64PredInt.R10.value):
111 comb += regread.eq(10)
112 with m.Case(SVP64PredInt.R10_N.value):
113 comb += regread.eq(10)
114 comb += invert.eq(1)
115 with m.Case(SVP64PredInt.R30.value):
116 comb += regread.eq(30)
117 with m.Case(SVP64PredInt.R30_N.value):
118 comb += regread.eq(30)
119 comb += invert.eq(1)
120 return regread, invert, unary, all1s
121
122
123 def get_predcr(m, mask, name):
124 """decode SVP64 predicate CR to reg number field and invert status
125 this is identical to _get_predcr in ISACaller
126 """
127 comb = m.d.comb
128 idx = Signal(2, name=name+"idx")
129 invert = Signal(name=name+"crinvert")
130 with m.Switch(mask):
131 with m.Case(SVP64PredCR.LT.value):
132 comb += idx.eq(CR.LT)
133 comb += invert.eq(0)
134 with m.Case(SVP64PredCR.GE.value):
135 comb += idx.eq(CR.LT)
136 comb += invert.eq(1)
137 with m.Case(SVP64PredCR.GT.value):
138 comb += idx.eq(CR.GT)
139 comb += invert.eq(0)
140 with m.Case(SVP64PredCR.LE.value):
141 comb += idx.eq(CR.GT)
142 comb += invert.eq(1)
143 with m.Case(SVP64PredCR.EQ.value):
144 comb += idx.eq(CR.EQ)
145 comb += invert.eq(0)
146 with m.Case(SVP64PredCR.NE.value):
147 comb += idx.eq(CR.EQ)
148 comb += invert.eq(1)
149 with m.Case(SVP64PredCR.SO.value):
150 comb += idx.eq(CR.SO)
151 comb += invert.eq(0)
152 with m.Case(SVP64PredCR.NS.value):
153 comb += idx.eq(CR.SO)
154 comb += invert.eq(1)
155 return idx, invert
156
157
158 # Fetch Finite State Machine.
159 # WARNING: there are currently DriverConflicts but it's actually working.
160 class FetchFSM(ControlBase):
161 def __init__(self, allow_overlap, svp64_en, imem, core_rst,
162 pdecode2, cur_state,
163 dbg, core, svstate, nia, is_svp64_mode):
164 self.allow_overlap = allow_overlap
165 self.svp64_en = svp64_en
166 self.imem = imem
167 self.core_rst = core_rst
168 self.pdecode2 = pdecode2
169 self.cur_state = cur_state
170 self.dbg = dbg
171 self.core = core
172 self.svstate = svstate
173 self.nia = nia
174 self.is_svp64_mode = is_svp64_mode
175
176 # set up pipeline ControlBase and allocate i/o specs
177 # (unusual: normally done by the Pipeline API)
178 super().__init__(stage=self)
179 self.p.i_data, self.n.o_data = self.new_specs(None)
180 self.i, self.o = self.p.i_data, self.n.o_data
181
182 # next 3 functions are Stage API Compliance
183 def setup(self, m, i):
184 pass
185
186 def ispec(self):
187 return FetchInput()
188
189 def ospec(self):
190 return FetchOutput()
191
192 def elaborate(self, platform):
193 """fetch FSM
194
195 this FSM performs fetch of raw instruction data, partial-decodes
196 it 32-bit at a time to detect SVP64 prefixes, and will optionally
197 read a 2nd 32-bit quantity if that occurs.
198 """
199 m = super().elaborate(platform)
200
201 dbg = self.dbg
202 core = self.core,
203 pc = self.i.pc
204 svstate = self.svstate
205 nia = self.nia
206 is_svp64_mode = self.is_svp64_mode
207 fetch_pc_o_ready = self.p.o_ready
208 fetch_pc_i_valid = self.p.i_valid
209 fetch_insn_o_valid = self.n.o_valid
210 fetch_insn_i_ready = self.n.i_ready
211
212 comb = m.d.comb
213 sync = m.d.sync
214 pdecode2 = self.pdecode2
215 cur_state = self.cur_state
216 dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
217
218 msr_read = Signal(reset=1)
219
220 # don't read msr every cycle
221 staterf = self.core.regs.rf['state']
222 state_r_msr = staterf.r_ports['msr'] # MSR rd
223
224 comb += state_r_msr.ren.eq(0)
225
226 with m.FSM(name='fetch_fsm'):
227
228 # waiting (zzz)
229 with m.State("IDLE"):
230 with m.If(~dbg.stopping_o):
231 comb += fetch_pc_o_ready.eq(1)
232 with m.If(fetch_pc_i_valid):
233 # instruction allowed to go: start by reading the PC
234 # capture the PC and also drop it into Insn Memory
235 # we have joined a pair of combinatorial memory
236 # lookups together. this is Generally Bad.
237 comb += self.imem.a_pc_i.eq(pc)
238 comb += self.imem.a_i_valid.eq(1)
239 comb += self.imem.f_i_valid.eq(1)
240 sync += cur_state.pc.eq(pc)
241 sync += cur_state.svstate.eq(svstate) # and svstate
242
243 # initiate read of MSR. arrives one clock later
244 comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
245 sync += msr_read.eq(0)
246
247 m.next = "INSN_READ" # move to "wait for bus" phase
248
249 # dummy pause to find out why simulation is not keeping up
250 with m.State("INSN_READ"):
251 if self.allow_overlap:
252 stopping = dbg.stopping_o
253 else:
254 stopping = Const(0)
255 with m.If(stopping):
256 # stopping: jump back to idle
257 m.next = "IDLE"
258 with m.Else():
259 # one cycle later, msr/sv read arrives. valid only once.
260 with m.If(~msr_read):
261 sync += msr_read.eq(1) # yeah don't read it again
262 sync += cur_state.msr.eq(state_r_msr.o_data)
263 with m.If(self.imem.f_busy_o): # zzz...
264 # busy: stay in wait-read
265 comb += self.imem.a_i_valid.eq(1)
266 comb += self.imem.f_i_valid.eq(1)
267 with m.Else():
268 # not busy: instruction fetched
269 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
270 if self.svp64_en:
271 svp64 = self.svp64
272 # decode the SVP64 prefix, if any
273 comb += svp64.raw_opcode_in.eq(insn)
274 comb += svp64.bigendian.eq(self.core_bigendian_i)
275 # pass the decoded prefix (if any) to PowerDecoder2
276 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
277 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
278 # remember whether this is a prefixed instruction,
279 # so the FSM can readily loop when VL==0
280 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
281 # calculate the address of the following instruction
282 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
283 sync += nia.eq(cur_state.pc + insn_size)
284 with m.If(~svp64.is_svp64_mode):
285 # with no prefix, store the instruction
286 # and hand it directly to the next FSM
287 sync += dec_opcode_o.eq(insn)
288 m.next = "INSN_READY"
289 with m.Else():
290 # fetch the rest of the instruction from memory
291 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
292 comb += self.imem.a_i_valid.eq(1)
293 comb += self.imem.f_i_valid.eq(1)
294 m.next = "INSN_READ2"
295 else:
296 # not SVP64 - 32-bit only
297 sync += nia.eq(cur_state.pc + 4)
298 sync += dec_opcode_o.eq(insn)
299 m.next = "INSN_READY"
300
301 with m.State("INSN_READ2"):
302 with m.If(self.imem.f_busy_o): # zzz...
303 # busy: stay in wait-read
304 comb += self.imem.a_i_valid.eq(1)
305 comb += self.imem.f_i_valid.eq(1)
306 with m.Else():
307 # not busy: instruction fetched
308 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
309 sync += dec_opcode_o.eq(insn)
310 m.next = "INSN_READY"
311 # TODO: probably can start looking at pdecode2.rm_dec
312 # here or maybe even in INSN_READ state, if svp64_mode
313 # detected, in order to trigger - and wait for - the
314 # predicate reading.
315 if self.svp64_en:
316 pmode = pdecode2.rm_dec.predmode
317 """
318 if pmode != SVP64PredMode.ALWAYS.value:
319 fire predicate loading FSM and wait before
320 moving to INSN_READY
321 else:
322 sync += self.srcmask.eq(-1) # set to all 1s
323 sync += self.dstmask.eq(-1) # set to all 1s
324 m.next = "INSN_READY"
325 """
326
327 with m.State("INSN_READY"):
328 # hand over the instruction, to be decoded
329 comb += fetch_insn_o_valid.eq(1)
330 with m.If(fetch_insn_i_ready):
331 m.next = "IDLE"
332
333 # whatever was done above, over-ride it if core reset is held
334 with m.If(self.core_rst):
335 sync += nia.eq(0)
336
337 return m
338
339
340 class TestIssuerInternal(Elaboratable):
341 """TestIssuer - reads instructions from TestMemory and issues them
342
343 efficiency and speed is not the main goal here: functional correctness
344 and code clarity is. optimisations (which almost 100% interfere with
345 easy understanding) come later.
346 """
347 def __init__(self, pspec):
348
349 # test is SVP64 is to be enabled
350 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
351
352 # and if regfiles are reduced
353 self.regreduce_en = (hasattr(pspec, "regreduce") and
354 (pspec.regreduce == True))
355
356 # and if overlap requested
357 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
358 (pspec.allow_overlap == True))
359
360 # JTAG interface. add this right at the start because if it's
361 # added it *modifies* the pspec, by adding enable/disable signals
362 # for parts of the rest of the core
363 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
364 self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
365 #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
366 if self.jtag_en:
367 # XXX MUST keep this up-to-date with litex, and
368 # soc-cocotb-sim, and err.. all needs sorting out, argh
369 subset = ['uart',
370 'mtwi',
371 'eint', 'gpio', 'mspi0',
372 # 'mspi1', - disabled for now
373 # 'pwm', 'sd0', - disabled for now
374 'sdr']
375 self.jtag = JTAG(get_pinspecs(subset=subset),
376 domain=self.dbg_domain)
377 # add signals to pspec to enable/disable icache and dcache
378 # (or data and intstruction wishbone if icache/dcache not included)
379 # https://bugs.libre-soc.org/show_bug.cgi?id=520
380 # TODO: do we actually care if these are not domain-synchronised?
381 # honestly probably not.
382 pspec.wb_icache_en = self.jtag.wb_icache_en
383 pspec.wb_dcache_en = self.jtag.wb_dcache_en
384 self.wb_sram_en = self.jtag.wb_sram_en
385 else:
386 self.wb_sram_en = Const(1)
387
388 # add 4k sram blocks?
389 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
390 pspec.sram4x4kblock == True)
391 if self.sram4x4k:
392 self.sram4k = []
393 for i in range(4):
394 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
395 #features={'err'}
396 ))
397
398 # add interrupt controller?
399 self.xics = hasattr(pspec, "xics") and pspec.xics == True
400 if self.xics:
401 self.xics_icp = XICS_ICP()
402 self.xics_ics = XICS_ICS()
403 self.int_level_i = self.xics_ics.int_level_i
404
405 # add GPIO peripheral?
406 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
407 if self.gpio:
408 self.simple_gpio = SimpleGPIO()
409 self.gpio_o = self.simple_gpio.gpio_o
410
411 # main instruction core. suitable for prototyping / demo only
412 self.core = core = NonProductionCore(pspec)
413 self.core_rst = ResetSignal("coresync")
414
415 # instruction decoder. goes into Trap Record
416 #pdecode = create_pdecode()
417 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
418 self.pdecode2 = PowerDecode2(None, state=self.cur_state,
419 opkls=IssuerDecode2ToOperand,
420 svp64_en=self.svp64_en,
421 regreduce_en=self.regreduce_en)
422 pdecode = self.pdecode2.dec
423
424 if self.svp64_en:
425 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
426
427 # Test Instruction memory
428 self.imem = ConfigFetchUnit(pspec).fu
429
430 # DMI interface
431 self.dbg = CoreDebug()
432
433 # instruction go/monitor
434 self.pc_o = Signal(64, reset_less=True)
435 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
436 self.svstate_i = Data(64, "svstate_i") # ditto
437 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
438 self.busy_o = Signal(reset_less=True)
439 self.memerr_o = Signal(reset_less=True)
440
441 # STATE regfile read /write ports for PC, MSR, SVSTATE
442 staterf = self.core.regs.rf['state']
443 self.state_r_pc = staterf.r_ports['cia'] # PC rd
444 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
445 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
446 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
447
448 # DMI interface access
449 intrf = self.core.regs.rf['int']
450 crrf = self.core.regs.rf['cr']
451 xerrf = self.core.regs.rf['xer']
452 self.int_r = intrf.r_ports['dmi'] # INT read
453 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
454 self.xer_r = xerrf.r_ports['full_xer'] # XER read
455
456 if self.svp64_en:
457 # for predication
458 self.int_pred = intrf.r_ports['pred'] # INT predicate read
459 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
460
461 # hack method of keeping an eye on whether branch/trap set the PC
462 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
463 self.state_nia.wen.name = 'state_nia_wen'
464
465 # pulse to synchronize the simulator at instruction end
466 self.insn_done = Signal()
467
468 # indicate any instruction still outstanding, in execution
469 self.any_busy = Signal()
470
471 if self.svp64_en:
472 # store copies of predicate masks
473 self.srcmask = Signal(64)
474 self.dstmask = Signal(64)
475
476 def fetch_predicate_fsm(self, m,
477 pred_insn_i_valid, pred_insn_o_ready,
478 pred_mask_o_valid, pred_mask_i_ready):
479 """fetch_predicate_fsm - obtains (constructs in the case of CR)
480 src/dest predicate masks
481
482 https://bugs.libre-soc.org/show_bug.cgi?id=617
483 the predicates can be read here, by using IntRegs r_ports['pred']
484 or CRRegs r_ports['pred']. in the case of CRs it will have to
485 be done through multiple reads, extracting one relevant at a time.
486 later, a faster way would be to use the 32-bit-wide CR port but
487 this is more complex decoding, here. equivalent code used in
488 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
489
490 note: this ENTIRE FSM is not to be called when svp64 is disabled
491 """
492 comb = m.d.comb
493 sync = m.d.sync
494 pdecode2 = self.pdecode2
495 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
496 predmode = rm_dec.predmode
497 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
498 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
499 # get src/dst step, so we can skip already used mask bits
500 cur_state = self.cur_state
501 srcstep = cur_state.svstate.srcstep
502 dststep = cur_state.svstate.dststep
503 cur_vl = cur_state.svstate.vl
504
505 # decode predicates
506 sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
507 dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
508 sidx, scrinvert = get_predcr(m, srcpred, 's')
509 didx, dcrinvert = get_predcr(m, dstpred, 'd')
510
511 # store fetched masks, for either intpred or crpred
512 # when src/dst step is not zero, the skipped mask bits need to be
513 # shifted-out, before actually storing them in src/dest mask
514 new_srcmask = Signal(64, reset_less=True)
515 new_dstmask = Signal(64, reset_less=True)
516
517 with m.FSM(name="fetch_predicate"):
518
519 with m.State("FETCH_PRED_IDLE"):
520 comb += pred_insn_o_ready.eq(1)
521 with m.If(pred_insn_i_valid):
522 with m.If(predmode == SVP64PredMode.INT):
523 # skip fetching destination mask register, when zero
524 with m.If(dall1s):
525 sync += new_dstmask.eq(-1)
526 # directly go to fetch source mask register
527 # guaranteed not to be zero (otherwise predmode
528 # would be SVP64PredMode.ALWAYS, not INT)
529 comb += int_pred.addr.eq(sregread)
530 comb += int_pred.ren.eq(1)
531 m.next = "INT_SRC_READ"
532 # fetch destination predicate register
533 with m.Else():
534 comb += int_pred.addr.eq(dregread)
535 comb += int_pred.ren.eq(1)
536 m.next = "INT_DST_READ"
537 with m.Elif(predmode == SVP64PredMode.CR):
538 # go fetch masks from the CR register file
539 sync += new_srcmask.eq(0)
540 sync += new_dstmask.eq(0)
541 m.next = "CR_READ"
542 with m.Else():
543 sync += self.srcmask.eq(-1)
544 sync += self.dstmask.eq(-1)
545 m.next = "FETCH_PRED_DONE"
546
547 with m.State("INT_DST_READ"):
548 # store destination mask
549 inv = Repl(dinvert, 64)
550 with m.If(dunary):
551 # set selected mask bit for 1<<r3 mode
552 dst_shift = Signal(range(64))
553 comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
554 sync += new_dstmask.eq(1 << dst_shift)
555 with m.Else():
556 # invert mask if requested
557 sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
558 # skip fetching source mask register, when zero
559 with m.If(sall1s):
560 sync += new_srcmask.eq(-1)
561 m.next = "FETCH_PRED_SHIFT_MASK"
562 # fetch source predicate register
563 with m.Else():
564 comb += int_pred.addr.eq(sregread)
565 comb += int_pred.ren.eq(1)
566 m.next = "INT_SRC_READ"
567
568 with m.State("INT_SRC_READ"):
569 # store source mask
570 inv = Repl(sinvert, 64)
571 with m.If(sunary):
572 # set selected mask bit for 1<<r3 mode
573 src_shift = Signal(range(64))
574 comb += src_shift.eq(self.int_pred.o_data & 0b111111)
575 sync += new_srcmask.eq(1 << src_shift)
576 with m.Else():
577 # invert mask if requested
578 sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
579 m.next = "FETCH_PRED_SHIFT_MASK"
580
581 # fetch masks from the CR register file
582 # implements the following loop:
583 # idx, inv = get_predcr(mask)
584 # mask = 0
585 # for cr_idx in range(vl):
586 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
587 # if cr[idx] ^ inv:
588 # mask |= 1 << cr_idx
589 # return mask
590 with m.State("CR_READ"):
591 # CR index to be read, which will be ready by the next cycle
592 cr_idx = Signal.like(cur_vl, reset_less=True)
593 # submit the read operation to the regfile
594 with m.If(cr_idx != cur_vl):
595 # the CR read port is unary ...
596 # ren = 1 << cr_idx
597 # ... in MSB0 convention ...
598 # ren = 1 << (7 - cr_idx)
599 # ... and with an offset:
600 # ren = 1 << (7 - off - cr_idx)
601 idx = SVP64CROffs.CRPred + cr_idx
602 comb += cr_pred.ren.eq(1 << (7 - idx))
603 # signal data valid in the next cycle
604 cr_read = Signal(reset_less=True)
605 sync += cr_read.eq(1)
606 # load the next index
607 sync += cr_idx.eq(cr_idx + 1)
608 with m.Else():
609 # exit on loop end
610 sync += cr_read.eq(0)
611 sync += cr_idx.eq(0)
612 m.next = "FETCH_PRED_SHIFT_MASK"
613 with m.If(cr_read):
614 # compensate for the one cycle delay on the regfile
615 cur_cr_idx = Signal.like(cur_vl)
616 comb += cur_cr_idx.eq(cr_idx - 1)
617 # read the CR field, select the appropriate bit
618 cr_field = Signal(4)
619 scr_bit = Signal()
620 dcr_bit = Signal()
621 comb += cr_field.eq(cr_pred.o_data)
622 comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
623 comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
624 # set the corresponding mask bit
625 bit_to_set = Signal.like(self.srcmask)
626 comb += bit_to_set.eq(1 << cur_cr_idx)
627 with m.If(scr_bit):
628 sync += new_srcmask.eq(new_srcmask | bit_to_set)
629 with m.If(dcr_bit):
630 sync += new_dstmask.eq(new_dstmask | bit_to_set)
631
632 with m.State("FETCH_PRED_SHIFT_MASK"):
633 # shift-out skipped mask bits
634 sync += self.srcmask.eq(new_srcmask >> srcstep)
635 sync += self.dstmask.eq(new_dstmask >> dststep)
636 m.next = "FETCH_PRED_DONE"
637
638 with m.State("FETCH_PRED_DONE"):
639 comb += pred_mask_o_valid.eq(1)
640 with m.If(pred_mask_i_ready):
641 m.next = "FETCH_PRED_IDLE"
642
643 def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
644 dbg, core_rst, is_svp64_mode,
645 fetch_pc_o_ready, fetch_pc_i_valid,
646 fetch_insn_o_valid, fetch_insn_i_ready,
647 pred_insn_i_valid, pred_insn_o_ready,
648 pred_mask_o_valid, pred_mask_i_ready,
649 exec_insn_i_valid, exec_insn_o_ready,
650 exec_pc_o_valid, exec_pc_i_ready):
651 """issue FSM
652
653 decode / issue FSM. this interacts with the "fetch" FSM
654 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
655 (outgoing). also interacts with the "execute" FSM
656 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
657 (incoming).
658 SVP64 RM prefixes have already been set up by the
659 "fetch" phase, so execute is fairly straightforward.
660 """
661
662 comb = m.d.comb
663 sync = m.d.sync
664 pdecode2 = self.pdecode2
665 cur_state = self.cur_state
666
667 # temporaries
668 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
669
670 # for updating svstate (things like srcstep etc.)
671 update_svstate = Signal() # set this (below) if updating
672 new_svstate = SVSTATERec("new_svstate")
673 comb += new_svstate.eq(cur_state.svstate)
674
675 # precalculate srcstep+1 and dststep+1
676 cur_srcstep = cur_state.svstate.srcstep
677 cur_dststep = cur_state.svstate.dststep
678 next_srcstep = Signal.like(cur_srcstep)
679 next_dststep = Signal.like(cur_dststep)
680 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
681 comb += next_dststep.eq(cur_state.svstate.dststep+1)
682
683 # note if an exception happened. in a pipelined or OoO design
684 # this needs to be accompanied by "shadowing" (or stalling)
685 exc_happened = self.core.o.exc_happened
686
687 with m.FSM(name="issue_fsm"):
688
689 # sync with the "fetch" phase which is reading the instruction
690 # at this point, there is no instruction running, that
691 # could inadvertently update the PC.
692 with m.State("ISSUE_START"):
693 # wait on "core stop" release, before next fetch
694 # need to do this here, in case we are in a VL==0 loop
695 with m.If(~dbg.core_stop_o & ~core_rst):
696 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
697 with m.If(fetch_pc_o_ready): # fetch acknowledged us
698 m.next = "INSN_WAIT"
699 with m.Else():
700 # tell core it's stopped, and acknowledge debug handshake
701 comb += dbg.core_stopped_i.eq(1)
702 # while stopped, allow updating the PC and SVSTATE
703 with m.If(self.pc_i.ok):
704 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
705 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
706 sync += pc_changed.eq(1)
707 with m.If(self.svstate_i.ok):
708 comb += new_svstate.eq(self.svstate_i.data)
709 comb += update_svstate.eq(1)
710 sync += sv_changed.eq(1)
711
712 # wait for an instruction to arrive from Fetch
713 with m.State("INSN_WAIT"):
714 if self.allow_overlap:
715 stopping = dbg.stopping_o
716 else:
717 stopping = Const(0)
718 with m.If(stopping):
719 # stopping: jump back to idle
720 m.next = "ISSUE_START"
721 with m.Else():
722 comb += fetch_insn_i_ready.eq(1)
723 with m.If(fetch_insn_o_valid):
724 # loop into ISSUE_START if it's a SVP64 instruction
725 # and VL == 0. this because VL==0 is a for-loop
726 # from 0 to 0 i.e. always, always a NOP.
727 cur_vl = cur_state.svstate.vl
728 with m.If(is_svp64_mode & (cur_vl == 0)):
729 # update the PC before fetching the next instruction
730 # since we are in a VL==0 loop, no instruction was
731 # executed that we could be overwriting
732 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
733 comb += self.state_w_pc.i_data.eq(nia)
734 comb += self.insn_done.eq(1)
735 m.next = "ISSUE_START"
736 with m.Else():
737 if self.svp64_en:
738 m.next = "PRED_START" # fetching predicate
739 else:
740 m.next = "DECODE_SV" # skip predication
741
742 with m.State("PRED_START"):
743 comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start
744 with m.If(pred_insn_o_ready): # fetch_pred acknowledged us
745 m.next = "MASK_WAIT"
746
747 with m.State("MASK_WAIT"):
748 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
749 with m.If(pred_mask_o_valid): # predication masks are ready
750 m.next = "PRED_SKIP"
751
752 # skip zeros in predicate
753 with m.State("PRED_SKIP"):
754 with m.If(~is_svp64_mode):
755 m.next = "DECODE_SV" # nothing to do
756 with m.Else():
757 if self.svp64_en:
758 pred_src_zero = pdecode2.rm_dec.pred_sz
759 pred_dst_zero = pdecode2.rm_dec.pred_dz
760
761 # new srcstep, after skipping zeros
762 skip_srcstep = Signal.like(cur_srcstep)
763 # value to be added to the current srcstep
764 src_delta = Signal.like(cur_srcstep)
765 # add leading zeros to srcstep, if not in zero mode
766 with m.If(~pred_src_zero):
767 # priority encoder (count leading zeros)
768 # append guard bit, in case the mask is all zeros
769 pri_enc_src = PriorityEncoder(65)
770 m.submodules.pri_enc_src = pri_enc_src
771 comb += pri_enc_src.i.eq(Cat(self.srcmask,
772 Const(1, 1)))
773 comb += src_delta.eq(pri_enc_src.o)
774 # apply delta to srcstep
775 comb += skip_srcstep.eq(cur_srcstep + src_delta)
776 # shift-out all leading zeros from the mask
777 # plus the leading "one" bit
778 # TODO count leading zeros and shift-out the zero
779 # bits, in the same step, in hardware
780 sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
781
782 # same as above, but for dststep
783 skip_dststep = Signal.like(cur_dststep)
784 dst_delta = Signal.like(cur_dststep)
785 with m.If(~pred_dst_zero):
786 pri_enc_dst = PriorityEncoder(65)
787 m.submodules.pri_enc_dst = pri_enc_dst
788 comb += pri_enc_dst.i.eq(Cat(self.dstmask,
789 Const(1, 1)))
790 comb += dst_delta.eq(pri_enc_dst.o)
791 comb += skip_dststep.eq(cur_dststep + dst_delta)
792 sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
793
794 # TODO: initialize mask[VL]=1 to avoid passing past VL
795 with m.If((skip_srcstep >= cur_vl) |
796 (skip_dststep >= cur_vl)):
797 # end of VL loop. Update PC and reset src/dst step
798 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
799 comb += self.state_w_pc.i_data.eq(nia)
800 comb += new_svstate.srcstep.eq(0)
801 comb += new_svstate.dststep.eq(0)
802 comb += update_svstate.eq(1)
803 # synchronize with the simulator
804 comb += self.insn_done.eq(1)
805 # go back to Issue
806 m.next = "ISSUE_START"
807 with m.Else():
808 # update new src/dst step
809 comb += new_svstate.srcstep.eq(skip_srcstep)
810 comb += new_svstate.dststep.eq(skip_dststep)
811 comb += update_svstate.eq(1)
812 # proceed to Decode
813 m.next = "DECODE_SV"
814
815 # pass predicate mask bits through to satellite decoders
816 # TODO: for SIMD this will be *multiple* bits
817 sync += core.i.sv_pred_sm.eq(self.srcmask[0])
818 sync += core.i.sv_pred_dm.eq(self.dstmask[0])
819
820 # after src/dst step have been updated, we are ready
821 # to decode the instruction
822 with m.State("DECODE_SV"):
823 # decode the instruction
824 sync += core.i.e.eq(pdecode2.e)
825 sync += core.i.state.eq(cur_state)
826 sync += core.i.raw_insn_i.eq(dec_opcode_i)
827 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
828 if self.svp64_en:
829 sync += core.i.sv_rm.eq(pdecode2.sv_rm)
830 # set RA_OR_ZERO detection in satellite decoders
831 sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
832 # and svp64 detection
833 sync += core.i.is_svp64_mode.eq(is_svp64_mode)
834 # and svp64 bit-rev'd ldst mode
835 ldst_dec = pdecode2.use_svp64_ldst_dec
836 sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
837 # after decoding, reset any previous exception condition,
838 # allowing it to be set again during the next execution
839 sync += pdecode2.ldst_exc.eq(0)
840
841 m.next = "INSN_EXECUTE" # move to "execute"
842
843 # handshake with execution FSM, move to "wait" once acknowledged
844 with m.State("INSN_EXECUTE"):
845 comb += exec_insn_i_valid.eq(1) # trigger execute
846 with m.If(exec_insn_o_ready): # execute acknowledged us
847 m.next = "EXECUTE_WAIT"
848
849 with m.State("EXECUTE_WAIT"):
850 # wait on "core stop" release, at instruction end
851 # need to do this here, in case we are in a VL>1 loop
852 with m.If(~dbg.core_stop_o & ~core_rst):
853 comb += exec_pc_i_ready.eq(1)
854 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
855 # the exception info needs to be blatted into
856 # pdecode.ldst_exc, and the instruction "re-run".
857 # when ldst_exc.happened is set, the PowerDecoder2
858 # reacts very differently: it re-writes the instruction
859 # with a "trap" (calls PowerDecoder2.trap()) which
860 # will *overwrite* whatever was requested and jump the
861 # PC to the exception address, as well as alter MSR.
862 # nothing else needs to be done other than to note
863 # the change of PC and MSR (and, later, SVSTATE)
864 with m.If(exc_happened):
865 sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
866
867 with m.If(exec_pc_o_valid):
868
869 # was this the last loop iteration?
870 is_last = Signal()
871 cur_vl = cur_state.svstate.vl
872 comb += is_last.eq(next_srcstep == cur_vl)
873
874 # return directly to Decode if Execute generated an
875 # exception.
876 with m.If(pdecode2.ldst_exc.happened):
877 m.next = "DECODE_SV"
878
879 # if either PC or SVSTATE were changed by the previous
880 # instruction, go directly back to Fetch, without
881 # updating either PC or SVSTATE
882 with m.Elif(pc_changed | sv_changed):
883 m.next = "ISSUE_START"
884
885 # also return to Fetch, when no output was a vector
886 # (regardless of SRCSTEP and VL), or when the last
887 # instruction was really the last one of the VL loop
888 with m.Elif((~pdecode2.loop_continue) | is_last):
889 # before going back to fetch, update the PC state
890 # register with the NIA.
891 # ok here we are not reading the branch unit.
892 # TODO: this just blithely overwrites whatever
893 # pipeline updated the PC
894 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
895 comb += self.state_w_pc.i_data.eq(nia)
896 # reset SRCSTEP before returning to Fetch
897 if self.svp64_en:
898 with m.If(pdecode2.loop_continue):
899 comb += new_svstate.srcstep.eq(0)
900 comb += new_svstate.dststep.eq(0)
901 comb += update_svstate.eq(1)
902 else:
903 comb += new_svstate.srcstep.eq(0)
904 comb += new_svstate.dststep.eq(0)
905 comb += update_svstate.eq(1)
906 m.next = "ISSUE_START"
907
908 # returning to Execute? then, first update SRCSTEP
909 with m.Else():
910 comb += new_svstate.srcstep.eq(next_srcstep)
911 comb += new_svstate.dststep.eq(next_dststep)
912 comb += update_svstate.eq(1)
913 # return to mask skip loop
914 m.next = "PRED_SKIP"
915
916 with m.Else():
917 comb += dbg.core_stopped_i.eq(1)
918 # while stopped, allow updating the PC and SVSTATE
919 with m.If(self.pc_i.ok):
920 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
921 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
922 sync += pc_changed.eq(1)
923 with m.If(self.svstate_i.ok):
924 comb += new_svstate.eq(self.svstate_i.data)
925 comb += update_svstate.eq(1)
926 sync += sv_changed.eq(1)
927
928 # check if svstate needs updating: if so, write it to State Regfile
929 with m.If(update_svstate):
930 comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
931 comb += self.state_w_sv.i_data.eq(new_svstate)
932 sync += cur_state.svstate.eq(new_svstate) # for next clock
933
934 def execute_fsm(self, m, core, pc_changed, sv_changed,
935 exec_insn_i_valid, exec_insn_o_ready,
936 exec_pc_o_valid, exec_pc_i_ready):
937 """execute FSM
938
939 execute FSM. this interacts with the "issue" FSM
940 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
941 (outgoing). SVP64 RM prefixes have already been set up by the
942 "issue" phase, so execute is fairly straightforward.
943 """
944
945 comb = m.d.comb
946 sync = m.d.sync
947 pdecode2 = self.pdecode2
948
949 # temporaries
950 core_busy_o = core.n.o_data.busy_o # core is busy
951 core_ivalid_i = core.p.i_valid # instruction is valid
952
953 with m.FSM(name="exec_fsm"):
954
955 # waiting for instruction bus (stays there until not busy)
956 with m.State("INSN_START"):
957 comb += exec_insn_o_ready.eq(1)
958 with m.If(exec_insn_i_valid):
959 comb += core_ivalid_i.eq(1) # instruction is valid/issued
960 sync += sv_changed.eq(0)
961 sync += pc_changed.eq(0)
962 with m.If(core.p.o_ready): # only move if accepted
963 m.next = "INSN_ACTIVE" # move to "wait completion"
964
965 # instruction started: must wait till it finishes
966 with m.State("INSN_ACTIVE"):
967 # note changes to PC and SVSTATE
968 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
969 sync += sv_changed.eq(1)
970 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
971 sync += pc_changed.eq(1)
972 with m.If(~core_busy_o): # instruction done!
973 comb += exec_pc_o_valid.eq(1)
974 with m.If(exec_pc_i_ready):
975 # when finished, indicate "done".
976 # however, if there was an exception, the instruction
977 # is *not* yet done. this is an implementation
978 # detail: we choose to implement exceptions by
979 # taking the exception information from the LDST
980 # unit, putting that *back* into the PowerDecoder2,
981 # and *re-running the entire instruction*.
982 # if we erroneously indicate "done" here, it is as if
983 # there were *TWO* instructions:
984 # 1) the failed LDST 2) a TRAP.
985 with m.If(~pdecode2.ldst_exc.happened):
986 comb += self.insn_done.eq(1)
987 m.next = "INSN_START" # back to fetch
988
989 def setup_peripherals(self, m):
990 comb, sync = m.d.comb, m.d.sync
991
992 # okaaaay so the debug module must be in coresync clock domain
993 # but NOT its reset signal. to cope with this, set every single
994 # submodule explicitly in coresync domain, debug and JTAG
995 # in their own one but using *external* reset.
996 csd = DomainRenamer("coresync")
997 dbd = DomainRenamer(self.dbg_domain)
998
999 m.submodules.core = core = csd(self.core)
1000 m.submodules.imem = imem = csd(self.imem)
1001 m.submodules.dbg = dbg = dbd(self.dbg)
1002 if self.jtag_en:
1003 m.submodules.jtag = jtag = dbd(self.jtag)
1004 # TODO: UART2GDB mux, here, from external pin
1005 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1006 sync += dbg.dmi.connect_to(jtag.dmi)
1007
1008 cur_state = self.cur_state
1009
1010 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
1011 if self.sram4x4k:
1012 for i, sram in enumerate(self.sram4k):
1013 m.submodules["sram4k_%d" % i] = csd(sram)
1014 comb += sram.enable.eq(self.wb_sram_en)
1015
1016 # XICS interrupt handler
1017 if self.xics:
1018 m.submodules.xics_icp = icp = csd(self.xics_icp)
1019 m.submodules.xics_ics = ics = csd(self.xics_ics)
1020 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
1021 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
1022
1023 # GPIO test peripheral
1024 if self.gpio:
1025 m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1026
1027 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1028 # XXX causes litex ECP5 test to get wrong idea about input and output
1029 # (but works with verilator sim *sigh*)
1030 #if self.gpio and self.xics:
1031 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1032
1033 # instruction decoder
1034 pdecode = create_pdecode()
1035 m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1036 if self.svp64_en:
1037 m.submodules.svp64 = svp64 = csd(self.svp64)
1038
1039 # convenience
1040 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1041 intrf = self.core.regs.rf['int']
1042
1043 # clock delay power-on reset
1044 cd_por = ClockDomain(reset_less=True)
1045 cd_sync = ClockDomain()
1046 core_sync = ClockDomain("coresync")
1047 m.domains += cd_por, cd_sync, core_sync
1048 if self.dbg_domain != "sync":
1049 dbg_sync = ClockDomain(self.dbg_domain)
1050 m.domains += dbg_sync
1051
1052 ti_rst = Signal(reset_less=True)
1053 delay = Signal(range(4), reset=3)
1054 with m.If(delay != 0):
1055 m.d.por += delay.eq(delay - 1)
1056 comb += cd_por.clk.eq(ClockSignal())
1057
1058 # power-on reset delay
1059 core_rst = ResetSignal("coresync")
1060 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1061 comb += core_rst.eq(ti_rst)
1062
1063 # debug clock is same as coresync, but reset is *main external*
1064 if self.dbg_domain != "sync":
1065 dbg_rst = ResetSignal(self.dbg_domain)
1066 comb += dbg_rst.eq(ResetSignal())
1067
1068 # busy/halted signals from core
1069 core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1070 comb += self.busy_o.eq(core_busy_o)
1071 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1072
1073 # temporary hack: says "go" immediately for both address gen and ST
1074 l0 = core.l0
1075 ldst = core.fus.fus['ldst0']
1076 st_go_edge = rising_edge(m, ldst.st.rel_o)
1077 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
1078 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1079
1080 def elaborate(self, platform):
1081 m = Module()
1082 # convenience
1083 comb, sync = m.d.comb, m.d.sync
1084 cur_state = self.cur_state
1085 pdecode2 = self.pdecode2
1086 dbg = self.dbg
1087 core = self.core
1088
1089 # set up peripherals and core
1090 core_rst = self.core_rst
1091 self.setup_peripherals(m)
1092
1093 # reset current state if core reset requested
1094 with m.If(core_rst):
1095 m.d.sync += self.cur_state.eq(0)
1096
1097 # PC and instruction from I-Memory
1098 comb += self.pc_o.eq(cur_state.pc)
1099 pc_changed = Signal() # note write to PC
1100 sv_changed = Signal() # note write to SVSTATE
1101
1102 # indicate to outside world if any FU is still executing
1103 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1104
1105 # read state either from incoming override or from regfile
1106 # TODO: really should be doing MSR in the same way
1107 pc = state_get(m, core_rst, self.pc_i,
1108 "pc", # read PC
1109 self.state_r_pc, StateRegs.PC)
1110 svstate = state_get(m, core_rst, self.svstate_i,
1111 "svstate", # read SVSTATE
1112 self.state_r_sv, StateRegs.SVSTATE)
1113
1114 # don't write pc every cycle
1115 comb += self.state_w_pc.wen.eq(0)
1116 comb += self.state_w_pc.i_data.eq(0)
1117
1118 # address of the next instruction, in the absence of a branch
1119 # depends on the instruction size
1120 nia = Signal(64)
1121
1122 # connect up debug signals
1123 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1124 comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1125 comb += dbg.state.pc.eq(pc)
1126 comb += dbg.state.svstate.eq(svstate)
1127 comb += dbg.state.msr.eq(cur_state.msr)
1128
1129 # pass the prefix mode from Fetch to Issue, so the latter can loop
1130 # on VL==0
1131 is_svp64_mode = Signal()
1132
1133 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1134 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1135 # these are the handshake signals between each
1136
1137 # fetch FSM can run as soon as the PC is valid
1138 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1139 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1140
1141 # fetch FSM hands over the instruction to be decoded / issued
1142 fetch_insn_o_valid = Signal()
1143 fetch_insn_i_ready = Signal()
1144
1145 # predicate fetch FSM decodes and fetches the predicate
1146 pred_insn_i_valid = Signal()
1147 pred_insn_o_ready = Signal()
1148
1149 # predicate fetch FSM delivers the masks
1150 pred_mask_o_valid = Signal()
1151 pred_mask_i_ready = Signal()
1152
1153 # issue FSM delivers the instruction to the be executed
1154 exec_insn_i_valid = Signal()
1155 exec_insn_o_ready = Signal()
1156
1157 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1158 exec_pc_o_valid = Signal()
1159 exec_pc_i_ready = Signal()
1160
1161 # the FSMs here are perhaps unusual in that they detect conditions
1162 # then "hold" information, combinatorially, for the core
1163 # (as opposed to using sync - which would be on a clock's delay)
1164 # this includes the actual opcode, valid flags and so on.
1165
1166 # Fetch, then predicate fetch, then Issue, then Execute.
1167 # Issue is where the VL for-loop # lives. the ready/valid
1168 # signalling is used to communicate between the four.
1169
1170 # set up Fetch FSM
1171 fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1172 self.imem, core_rst, pdecode2, cur_state,
1173 dbg, core, svstate, nia, is_svp64_mode)
1174 m.submodules.fetch = fetch
1175 # connect up in/out data to existing Signals
1176 comb += fetch.p.i_data.pc.eq(pc)
1177 # and the ready/valid signalling
1178 comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1179 comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1180 comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1181 comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1182
1183 self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1184 dbg, core_rst, is_svp64_mode,
1185 fetch_pc_o_ready, fetch_pc_i_valid,
1186 fetch_insn_o_valid, fetch_insn_i_ready,
1187 pred_insn_i_valid, pred_insn_o_ready,
1188 pred_mask_o_valid, pred_mask_i_ready,
1189 exec_insn_i_valid, exec_insn_o_ready,
1190 exec_pc_o_valid, exec_pc_i_ready)
1191
1192 if self.svp64_en:
1193 self.fetch_predicate_fsm(m,
1194 pred_insn_i_valid, pred_insn_o_ready,
1195 pred_mask_o_valid, pred_mask_i_ready)
1196
1197 self.execute_fsm(m, core, pc_changed, sv_changed,
1198 exec_insn_i_valid, exec_insn_o_ready,
1199 exec_pc_o_valid, exec_pc_i_ready)
1200
1201 # this bit doesn't have to be in the FSM: connect up to read
1202 # regfiles on demand from DMI
1203 self.do_dmi(m, dbg)
1204
1205 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
1206 # (which uses that in PowerDecoder2 to raise 0x900 exception)
1207 self.tb_dec_fsm(m, cur_state.dec)
1208
1209 return m
1210
1211 def do_dmi(self, m, dbg):
1212 """deals with DMI debug requests
1213
1214 currently only provides read requests for the INT regfile, CR and XER
1215 it will later also deal with *writing* to these regfiles.
1216 """
1217 comb = m.d.comb
1218 sync = m.d.sync
1219 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1220 intrf = self.core.regs.rf['int']
1221
1222 with m.If(d_reg.req): # request for regfile access being made
1223 # TODO: error-check this
1224 # XXX should this be combinatorial? sync better?
1225 if intrf.unary:
1226 comb += self.int_r.ren.eq(1<<d_reg.addr)
1227 else:
1228 comb += self.int_r.addr.eq(d_reg.addr)
1229 comb += self.int_r.ren.eq(1)
1230 d_reg_delay = Signal()
1231 sync += d_reg_delay.eq(d_reg.req)
1232 with m.If(d_reg_delay):
1233 # data arrives one clock later
1234 comb += d_reg.data.eq(self.int_r.o_data)
1235 comb += d_reg.ack.eq(1)
1236
1237 # sigh same thing for CR debug
1238 with m.If(d_cr.req): # request for regfile access being made
1239 comb += self.cr_r.ren.eq(0b11111111) # enable all
1240 d_cr_delay = Signal()
1241 sync += d_cr_delay.eq(d_cr.req)
1242 with m.If(d_cr_delay):
1243 # data arrives one clock later
1244 comb += d_cr.data.eq(self.cr_r.o_data)
1245 comb += d_cr.ack.eq(1)
1246
1247 # aaand XER...
1248 with m.If(d_xer.req): # request for regfile access being made
1249 comb += self.xer_r.ren.eq(0b111111) # enable all
1250 d_xer_delay = Signal()
1251 sync += d_xer_delay.eq(d_xer.req)
1252 with m.If(d_xer_delay):
1253 # data arrives one clock later
1254 comb += d_xer.data.eq(self.xer_r.o_data)
1255 comb += d_xer.ack.eq(1)
1256
1257 def tb_dec_fsm(self, m, spr_dec):
1258 """tb_dec_fsm
1259
1260 this is a FSM for updating either dec or tb. it runs alternately
1261 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
1262 value to DEC, however the regfile has "passthrough" on it so this
1263 *should* be ok.
1264
1265 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1266 """
1267
1268 comb, sync = m.d.comb, m.d.sync
1269 fast_rf = self.core.regs.rf['fast']
1270 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1271 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1272
1273 with m.FSM() as fsm:
1274
1275 # initiates read of current DEC
1276 with m.State("DEC_READ"):
1277 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1278 comb += fast_r_dectb.ren.eq(1)
1279 m.next = "DEC_WRITE"
1280
1281 # waits for DEC read to arrive (1 cycle), updates with new value
1282 with m.State("DEC_WRITE"):
1283 new_dec = Signal(64)
1284 # TODO: MSR.LPCR 32-bit decrement mode
1285 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1286 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1287 comb += fast_w_dectb.wen.eq(1)
1288 comb += fast_w_dectb.i_data.eq(new_dec)
1289 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1290 m.next = "TB_READ"
1291
1292 # initiates read of current TB
1293 with m.State("TB_READ"):
1294 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1295 comb += fast_r_dectb.ren.eq(1)
1296 m.next = "TB_WRITE"
1297
1298 # waits for read TB to arrive, initiates write of current TB
1299 with m.State("TB_WRITE"):
1300 new_tb = Signal(64)
1301 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1302 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1303 comb += fast_w_dectb.wen.eq(1)
1304 comb += fast_w_dectb.i_data.eq(new_tb)
1305 m.next = "DEC_READ"
1306
1307 return m
1308
1309 def __iter__(self):
1310 yield from self.pc_i.ports()
1311 yield self.pc_o
1312 yield self.memerr_o
1313 yield from self.core.ports()
1314 yield from self.imem.ports()
1315 yield self.core_bigendian_i
1316 yield self.busy_o
1317
1318 def ports(self):
1319 return list(self)
1320
1321 def external_ports(self):
1322 ports = self.pc_i.ports()
1323 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1324 ]
1325
1326 if self.jtag_en:
1327 ports += list(self.jtag.external_ports())
1328 else:
1329 # don't add DMI if JTAG is enabled
1330 ports += list(self.dbg.dmi.ports())
1331
1332 ports += list(self.imem.ibus.fields.values())
1333 ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1334
1335 if self.sram4x4k:
1336 for sram in self.sram4k:
1337 ports += list(sram.bus.fields.values())
1338
1339 if self.xics:
1340 ports += list(self.xics_icp.bus.fields.values())
1341 ports += list(self.xics_ics.bus.fields.values())
1342 ports.append(self.int_level_i)
1343
1344 if self.gpio:
1345 ports += list(self.simple_gpio.bus.fields.values())
1346 ports.append(self.gpio_o)
1347
1348 return ports
1349
1350 def ports(self):
1351 return list(self)
1352
1353
1354 class TestIssuer(Elaboratable):
1355 def __init__(self, pspec):
1356 self.ti = TestIssuerInternal(pspec)
1357 self.pll = DummyPLL(instance=True)
1358
1359 # PLL direct clock or not
1360 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1361 if self.pll_en:
1362 self.pll_test_o = Signal(reset_less=True)
1363 self.pll_vco_o = Signal(reset_less=True)
1364 self.clk_sel_i = Signal(2, reset_less=True)
1365 self.ref_clk = ClockSignal() # can't rename it but that's ok
1366 self.pllclk_clk = ClockSignal("pllclk")
1367
1368 def elaborate(self, platform):
1369 m = Module()
1370 comb = m.d.comb
1371
1372 # TestIssuer nominally runs at main clock, actually it is
1373 # all combinatorial internally except for coresync'd components
1374 m.submodules.ti = ti = self.ti
1375
1376 if self.pll_en:
1377 # ClockSelect runs at PLL output internal clock rate
1378 m.submodules.wrappll = pll = self.pll
1379
1380 # add clock domains from PLL
1381 cd_pll = ClockDomain("pllclk")
1382 m.domains += cd_pll
1383
1384 # PLL clock established. has the side-effect of running clklsel
1385 # at the PLL's speed (see DomainRenamer("pllclk") above)
1386 pllclk = self.pllclk_clk
1387 comb += pllclk.eq(pll.clk_pll_o)
1388
1389 # wire up external 24mhz to PLL
1390 #comb += pll.clk_24_i.eq(self.ref_clk)
1391 # output 18 mhz PLL test signal, and analog oscillator out
1392 comb += self.pll_test_o.eq(pll.pll_test_o)
1393 comb += self.pll_vco_o.eq(pll.pll_vco_o)
1394
1395 # input to pll clock selection
1396 comb += pll.clk_sel_i.eq(self.clk_sel_i)
1397
1398 # now wire up ResetSignals. don't mind them being in this domain
1399 pll_rst = ResetSignal("pllclk")
1400 comb += pll_rst.eq(ResetSignal())
1401
1402 # internal clock is set to selector clock-out. has the side-effect of
1403 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1404 # debug clock runs at coresync internal clock
1405 cd_coresync = ClockDomain("coresync")
1406 #m.domains += cd_coresync
1407 if self.ti.dbg_domain != 'sync':
1408 cd_dbgsync = ClockDomain("dbgsync")
1409 #m.domains += cd_dbgsync
1410 intclk = ClockSignal("coresync")
1411 dbgclk = ClockSignal(self.ti.dbg_domain)
1412 # XXX BYPASS PLL XXX
1413 # XXX BYPASS PLL XXX
1414 # XXX BYPASS PLL XXX
1415 if self.pll_en:
1416 comb += intclk.eq(self.ref_clk)
1417 else:
1418 comb += intclk.eq(ClockSignal())
1419 if self.ti.dbg_domain != 'sync':
1420 dbgclk = ClockSignal(self.ti.dbg_domain)
1421 comb += dbgclk.eq(intclk)
1422
1423 return m
1424
1425 def ports(self):
1426 return list(self.ti.ports()) + list(self.pll.ports()) + \
1427 [ClockSignal(), ResetSignal()]
1428
1429 def external_ports(self):
1430 ports = self.ti.external_ports()
1431 ports.append(ClockSignal())
1432 ports.append(ResetSignal())
1433 if self.pll_en:
1434 ports.append(self.clk_sel_i)
1435 ports.append(self.pll.clk_24_i)
1436 ports.append(self.pll_test_o)
1437 ports.append(self.pll_vco_o)
1438 ports.append(self.pllclk_clk)
1439 ports.append(self.ref_clk)
1440 return ports
1441
1442
1443 if __name__ == '__main__':
1444 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1445 'spr': 1,
1446 'div': 1,
1447 'mul': 1,
1448 'shiftrot': 1
1449 }
1450 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1451 imem_ifacetype='bare_wb',
1452 addr_wid=48,
1453 mask_wid=8,
1454 reg_wid=64,
1455 units=units)
1456 dut = TestIssuer(pspec)
1457 vl = main(dut, ports=dut.ports(), name="test_issuer")
1458
1459 if len(sys.argv) == 1:
1460 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1461 with open("test_issuer.il", "w") as f:
1462 f.write(vl)