use p.i_valid in core instead of explicit signal ivalid_i
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmigen.lib.coding import PriorityEncoder
25
26 from openpower.decoder.power_decoder import create_pdecode
27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
29 from openpower.decoder.decode2execute1 import Data
30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
31 SVP64PredMode)
32 from openpower.state import CoreState
33 from openpower.consts import (CR, SVP64CROffs)
34 from soc.experiment.testmem import TestMemory # test only for instructions
35 from soc.regfile.regfiles import StateRegs, FastRegs
36 from soc.simple.core import NonProductionCore
37 from soc.config.test.test_loadstore import TestMemPspec
38 from soc.config.ifetch import ConfigFetchUnit
39 from soc.debug.dmi import CoreDebug, DMIInterface
40 from soc.debug.jtag import JTAG
41 from soc.config.pinouts import get_pinspecs
42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
43 from soc.bus.simple_gpio import SimpleGPIO
44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
45 from soc.clock.select import ClockSelect
46 from soc.clock.dummypll import DummyPLL
47 from openpower.sv.svstate import SVSTATERec
48
49
50 from nmutil.util import rising_edge
51
52 def get_insn(f_instr_o, pc):
53 if f_instr_o.width == 32:
54 return f_instr_o
55 else:
56 # 64-bit: bit 2 of pc decides which word to select
57 return f_instr_o.word_select(pc[2], 32)
58
59 # gets state input or reads from state regfile
60 def state_get(m, core_rst, state_i, name, regfile, regnum):
61 comb = m.d.comb
62 sync = m.d.sync
63 # read the PC
64 res = Signal(64, reset_less=True, name=name)
65 res_ok_delay = Signal(name="%s_ok_delay" % name)
66 with m.If(~core_rst):
67 sync += res_ok_delay.eq(~state_i.ok)
68 with m.If(state_i.ok):
69 # incoming override (start from pc_i)
70 comb += res.eq(state_i.data)
71 with m.Else():
72 # otherwise read StateRegs regfile for PC...
73 comb += regfile.ren.eq(1<<regnum)
74 # ... but on a 1-clock delay
75 with m.If(res_ok_delay):
76 comb += res.eq(regfile.o_data)
77 return res
78
79 def get_predint(m, mask, name):
80 """decode SVP64 predicate integer mask field to reg number and invert
81 this is identical to the equivalent function in ISACaller except that
82 it doesn't read the INT directly, it just decodes "what needs to be done"
83 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
84
85 * all1s is set to indicate that no mask is to be applied.
86 * regread indicates the GPR register number to be read
87 * invert is set to indicate that the register value is to be inverted
88 * unary indicates that the contents of the register is to be shifted 1<<r3
89 """
90 comb = m.d.comb
91 regread = Signal(5, name=name+"regread")
92 invert = Signal(name=name+"invert")
93 unary = Signal(name=name+"unary")
94 all1s = Signal(name=name+"all1s")
95 with m.Switch(mask):
96 with m.Case(SVP64PredInt.ALWAYS.value):
97 comb += all1s.eq(1) # use 0b1111 (all ones)
98 with m.Case(SVP64PredInt.R3_UNARY.value):
99 comb += regread.eq(3)
100 comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
101 with m.Case(SVP64PredInt.R3.value):
102 comb += regread.eq(3)
103 with m.Case(SVP64PredInt.R3_N.value):
104 comb += regread.eq(3)
105 comb += invert.eq(1)
106 with m.Case(SVP64PredInt.R10.value):
107 comb += regread.eq(10)
108 with m.Case(SVP64PredInt.R10_N.value):
109 comb += regread.eq(10)
110 comb += invert.eq(1)
111 with m.Case(SVP64PredInt.R30.value):
112 comb += regread.eq(30)
113 with m.Case(SVP64PredInt.R30_N.value):
114 comb += regread.eq(30)
115 comb += invert.eq(1)
116 return regread, invert, unary, all1s
117
118 def get_predcr(m, mask, name):
119 """decode SVP64 predicate CR to reg number field and invert status
120 this is identical to _get_predcr in ISACaller
121 """
122 comb = m.d.comb
123 idx = Signal(2, name=name+"idx")
124 invert = Signal(name=name+"crinvert")
125 with m.Switch(mask):
126 with m.Case(SVP64PredCR.LT.value):
127 comb += idx.eq(CR.LT)
128 comb += invert.eq(0)
129 with m.Case(SVP64PredCR.GE.value):
130 comb += idx.eq(CR.LT)
131 comb += invert.eq(1)
132 with m.Case(SVP64PredCR.GT.value):
133 comb += idx.eq(CR.GT)
134 comb += invert.eq(0)
135 with m.Case(SVP64PredCR.LE.value):
136 comb += idx.eq(CR.GT)
137 comb += invert.eq(1)
138 with m.Case(SVP64PredCR.EQ.value):
139 comb += idx.eq(CR.EQ)
140 comb += invert.eq(0)
141 with m.Case(SVP64PredCR.NE.value):
142 comb += idx.eq(CR.EQ)
143 comb += invert.eq(1)
144 with m.Case(SVP64PredCR.SO.value):
145 comb += idx.eq(CR.SO)
146 comb += invert.eq(0)
147 with m.Case(SVP64PredCR.NS.value):
148 comb += idx.eq(CR.SO)
149 comb += invert.eq(1)
150 return idx, invert
151
152
153 class TestIssuerInternal(Elaboratable):
154 """TestIssuer - reads instructions from TestMemory and issues them
155
156 efficiency and speed is not the main goal here: functional correctness
157 and code clarity is. optimisations (which almost 100% interfere with
158 easy understanding) come later.
159 """
160 def __init__(self, pspec):
161
162 # test is SVP64 is to be enabled
163 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
164
165 # and if regfiles are reduced
166 self.regreduce_en = (hasattr(pspec, "regreduce") and
167 (pspec.regreduce == True))
168
169 # JTAG interface. add this right at the start because if it's
170 # added it *modifies* the pspec, by adding enable/disable signals
171 # for parts of the rest of the core
172 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
173 self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
174 #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
175 if self.jtag_en:
176 # XXX MUST keep this up-to-date with litex, and
177 # soc-cocotb-sim, and err.. all needs sorting out, argh
178 subset = ['uart',
179 'mtwi',
180 'eint', 'gpio', 'mspi0',
181 # 'mspi1', - disabled for now
182 # 'pwm', 'sd0', - disabled for now
183 'sdr']
184 self.jtag = JTAG(get_pinspecs(subset=subset),
185 domain=self.dbg_domain)
186 # add signals to pspec to enable/disable icache and dcache
187 # (or data and intstruction wishbone if icache/dcache not included)
188 # https://bugs.libre-soc.org/show_bug.cgi?id=520
189 # TODO: do we actually care if these are not domain-synchronised?
190 # honestly probably not.
191 pspec.wb_icache_en = self.jtag.wb_icache_en
192 pspec.wb_dcache_en = self.jtag.wb_dcache_en
193 self.wb_sram_en = self.jtag.wb_sram_en
194 else:
195 self.wb_sram_en = Const(1)
196
197 # add 4k sram blocks?
198 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
199 pspec.sram4x4kblock == True)
200 if self.sram4x4k:
201 self.sram4k = []
202 for i in range(4):
203 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
204 #features={'err'}
205 ))
206
207 # add interrupt controller?
208 self.xics = hasattr(pspec, "xics") and pspec.xics == True
209 if self.xics:
210 self.xics_icp = XICS_ICP()
211 self.xics_ics = XICS_ICS()
212 self.int_level_i = self.xics_ics.int_level_i
213
214 # add GPIO peripheral?
215 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
216 if self.gpio:
217 self.simple_gpio = SimpleGPIO()
218 self.gpio_o = self.simple_gpio.gpio_o
219
220 # main instruction core. suitable for prototyping / demo only
221 self.core = core = NonProductionCore(pspec)
222 self.core_rst = ResetSignal("coresync")
223
224 # instruction decoder. goes into Trap Record
225 #pdecode = create_pdecode()
226 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
227 self.pdecode2 = PowerDecode2(None, state=self.cur_state,
228 opkls=IssuerDecode2ToOperand,
229 svp64_en=self.svp64_en,
230 regreduce_en=self.regreduce_en)
231 pdecode = self.pdecode2.dec
232
233 if self.svp64_en:
234 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
235
236 # Test Instruction memory
237 self.imem = ConfigFetchUnit(pspec).fu
238
239 # DMI interface
240 self.dbg = CoreDebug()
241
242 # instruction go/monitor
243 self.pc_o = Signal(64, reset_less=True)
244 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
245 self.svstate_i = Data(64, "svstate_i") # ditto
246 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
247 self.busy_o = Signal(reset_less=True)
248 self.memerr_o = Signal(reset_less=True)
249
250 # STATE regfile read /write ports for PC, MSR, SVSTATE
251 staterf = self.core.regs.rf['state']
252 self.state_r_pc = staterf.r_ports['cia'] # PC rd
253 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
254 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
255 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
256 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
257
258 # DMI interface access
259 intrf = self.core.regs.rf['int']
260 crrf = self.core.regs.rf['cr']
261 xerrf = self.core.regs.rf['xer']
262 self.int_r = intrf.r_ports['dmi'] # INT read
263 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
264 self.xer_r = xerrf.r_ports['full_xer'] # XER read
265
266 if self.svp64_en:
267 # for predication
268 self.int_pred = intrf.r_ports['pred'] # INT predicate read
269 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
270
271 # hack method of keeping an eye on whether branch/trap set the PC
272 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
273 self.state_nia.wen.name = 'state_nia_wen'
274
275 # pulse to synchronize the simulator at instruction end
276 self.insn_done = Signal()
277
278 if self.svp64_en:
279 # store copies of predicate masks
280 self.srcmask = Signal(64)
281 self.dstmask = Signal(64)
282
283 def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
284 fetch_pc_o_ready, fetch_pc_i_valid,
285 fetch_insn_o_valid, fetch_insn_i_ready):
286 """fetch FSM
287
288 this FSM performs fetch of raw instruction data, partial-decodes
289 it 32-bit at a time to detect SVP64 prefixes, and will optionally
290 read a 2nd 32-bit quantity if that occurs.
291 """
292 comb = m.d.comb
293 sync = m.d.sync
294 pdecode2 = self.pdecode2
295 cur_state = self.cur_state
296 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
297
298 msr_read = Signal(reset=1)
299
300 with m.FSM(name='fetch_fsm'):
301
302 # waiting (zzz)
303 with m.State("IDLE"):
304 comb += fetch_pc_o_ready.eq(1)
305 with m.If(fetch_pc_i_valid):
306 # instruction allowed to go: start by reading the PC
307 # capture the PC and also drop it into Insn Memory
308 # we have joined a pair of combinatorial memory
309 # lookups together. this is Generally Bad.
310 comb += self.imem.a_pc_i.eq(pc)
311 comb += self.imem.a_i_valid.eq(1)
312 comb += self.imem.f_i_valid.eq(1)
313 sync += cur_state.pc.eq(pc)
314 sync += cur_state.svstate.eq(svstate) # and svstate
315
316 # initiate read of MSR. arrives one clock later
317 comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
318 sync += msr_read.eq(0)
319
320 m.next = "INSN_READ" # move to "wait for bus" phase
321
322 # dummy pause to find out why simulation is not keeping up
323 with m.State("INSN_READ"):
324 # one cycle later, msr/sv read arrives. valid only once.
325 with m.If(~msr_read):
326 sync += msr_read.eq(1) # yeah don't read it again
327 sync += cur_state.msr.eq(self.state_r_msr.o_data)
328 with m.If(self.imem.f_busy_o): # zzz...
329 # busy: stay in wait-read
330 comb += self.imem.a_i_valid.eq(1)
331 comb += self.imem.f_i_valid.eq(1)
332 with m.Else():
333 # not busy: instruction fetched
334 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
335 if self.svp64_en:
336 svp64 = self.svp64
337 # decode the SVP64 prefix, if any
338 comb += svp64.raw_opcode_in.eq(insn)
339 comb += svp64.bigendian.eq(self.core_bigendian_i)
340 # pass the decoded prefix (if any) to PowerDecoder2
341 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
342 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
343 # remember whether this is a prefixed instruction, so
344 # the FSM can readily loop when VL==0
345 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
346 # calculate the address of the following instruction
347 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
348 sync += nia.eq(cur_state.pc + insn_size)
349 with m.If(~svp64.is_svp64_mode):
350 # with no prefix, store the instruction
351 # and hand it directly to the next FSM
352 sync += dec_opcode_i.eq(insn)
353 m.next = "INSN_READY"
354 with m.Else():
355 # fetch the rest of the instruction from memory
356 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
357 comb += self.imem.a_i_valid.eq(1)
358 comb += self.imem.f_i_valid.eq(1)
359 m.next = "INSN_READ2"
360 else:
361 # not SVP64 - 32-bit only
362 sync += nia.eq(cur_state.pc + 4)
363 sync += dec_opcode_i.eq(insn)
364 m.next = "INSN_READY"
365
366 with m.State("INSN_READ2"):
367 with m.If(self.imem.f_busy_o): # zzz...
368 # busy: stay in wait-read
369 comb += self.imem.a_i_valid.eq(1)
370 comb += self.imem.f_i_valid.eq(1)
371 with m.Else():
372 # not busy: instruction fetched
373 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
374 sync += dec_opcode_i.eq(insn)
375 m.next = "INSN_READY"
376 # TODO: probably can start looking at pdecode2.rm_dec
377 # here or maybe even in INSN_READ state, if svp64_mode
378 # detected, in order to trigger - and wait for - the
379 # predicate reading.
380 if self.svp64_en:
381 pmode = pdecode2.rm_dec.predmode
382 """
383 if pmode != SVP64PredMode.ALWAYS.value:
384 fire predicate loading FSM and wait before
385 moving to INSN_READY
386 else:
387 sync += self.srcmask.eq(-1) # set to all 1s
388 sync += self.dstmask.eq(-1) # set to all 1s
389 m.next = "INSN_READY"
390 """
391
392 with m.State("INSN_READY"):
393 # hand over the instruction, to be decoded
394 comb += fetch_insn_o_valid.eq(1)
395 with m.If(fetch_insn_i_ready):
396 m.next = "IDLE"
397
398 def fetch_predicate_fsm(self, m,
399 pred_insn_i_valid, pred_insn_o_ready,
400 pred_mask_o_valid, pred_mask_i_ready):
401 """fetch_predicate_fsm - obtains (constructs in the case of CR)
402 src/dest predicate masks
403
404 https://bugs.libre-soc.org/show_bug.cgi?id=617
405 the predicates can be read here, by using IntRegs r_ports['pred']
406 or CRRegs r_ports['pred']. in the case of CRs it will have to
407 be done through multiple reads, extracting one relevant at a time.
408 later, a faster way would be to use the 32-bit-wide CR port but
409 this is more complex decoding, here. equivalent code used in
410 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
411
412 note: this ENTIRE FSM is not to be called when svp64 is disabled
413 """
414 comb = m.d.comb
415 sync = m.d.sync
416 pdecode2 = self.pdecode2
417 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
418 predmode = rm_dec.predmode
419 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
420 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
421 # get src/dst step, so we can skip already used mask bits
422 cur_state = self.cur_state
423 srcstep = cur_state.svstate.srcstep
424 dststep = cur_state.svstate.dststep
425 cur_vl = cur_state.svstate.vl
426
427 # decode predicates
428 sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
429 dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
430 sidx, scrinvert = get_predcr(m, srcpred, 's')
431 didx, dcrinvert = get_predcr(m, dstpred, 'd')
432
433 # store fetched masks, for either intpred or crpred
434 # when src/dst step is not zero, the skipped mask bits need to be
435 # shifted-out, before actually storing them in src/dest mask
436 new_srcmask = Signal(64, reset_less=True)
437 new_dstmask = Signal(64, reset_less=True)
438
439 with m.FSM(name="fetch_predicate"):
440
441 with m.State("FETCH_PRED_IDLE"):
442 comb += pred_insn_o_ready.eq(1)
443 with m.If(pred_insn_i_valid):
444 with m.If(predmode == SVP64PredMode.INT):
445 # skip fetching destination mask register, when zero
446 with m.If(dall1s):
447 sync += new_dstmask.eq(-1)
448 # directly go to fetch source mask register
449 # guaranteed not to be zero (otherwise predmode
450 # would be SVP64PredMode.ALWAYS, not INT)
451 comb += int_pred.addr.eq(sregread)
452 comb += int_pred.ren.eq(1)
453 m.next = "INT_SRC_READ"
454 # fetch destination predicate register
455 with m.Else():
456 comb += int_pred.addr.eq(dregread)
457 comb += int_pred.ren.eq(1)
458 m.next = "INT_DST_READ"
459 with m.Elif(predmode == SVP64PredMode.CR):
460 # go fetch masks from the CR register file
461 sync += new_srcmask.eq(0)
462 sync += new_dstmask.eq(0)
463 m.next = "CR_READ"
464 with m.Else():
465 sync += self.srcmask.eq(-1)
466 sync += self.dstmask.eq(-1)
467 m.next = "FETCH_PRED_DONE"
468
469 with m.State("INT_DST_READ"):
470 # store destination mask
471 inv = Repl(dinvert, 64)
472 with m.If(dunary):
473 # set selected mask bit for 1<<r3 mode
474 dst_shift = Signal(range(64))
475 comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
476 sync += new_dstmask.eq(1 << dst_shift)
477 with m.Else():
478 # invert mask if requested
479 sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
480 # skip fetching source mask register, when zero
481 with m.If(sall1s):
482 sync += new_srcmask.eq(-1)
483 m.next = "FETCH_PRED_SHIFT_MASK"
484 # fetch source predicate register
485 with m.Else():
486 comb += int_pred.addr.eq(sregread)
487 comb += int_pred.ren.eq(1)
488 m.next = "INT_SRC_READ"
489
490 with m.State("INT_SRC_READ"):
491 # store source mask
492 inv = Repl(sinvert, 64)
493 with m.If(sunary):
494 # set selected mask bit for 1<<r3 mode
495 src_shift = Signal(range(64))
496 comb += src_shift.eq(self.int_pred.o_data & 0b111111)
497 sync += new_srcmask.eq(1 << src_shift)
498 with m.Else():
499 # invert mask if requested
500 sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
501 m.next = "FETCH_PRED_SHIFT_MASK"
502
503 # fetch masks from the CR register file
504 # implements the following loop:
505 # idx, inv = get_predcr(mask)
506 # mask = 0
507 # for cr_idx in range(vl):
508 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
509 # if cr[idx] ^ inv:
510 # mask |= 1 << cr_idx
511 # return mask
512 with m.State("CR_READ"):
513 # CR index to be read, which will be ready by the next cycle
514 cr_idx = Signal.like(cur_vl, reset_less=True)
515 # submit the read operation to the regfile
516 with m.If(cr_idx != cur_vl):
517 # the CR read port is unary ...
518 # ren = 1 << cr_idx
519 # ... in MSB0 convention ...
520 # ren = 1 << (7 - cr_idx)
521 # ... and with an offset:
522 # ren = 1 << (7 - off - cr_idx)
523 idx = SVP64CROffs.CRPred + cr_idx
524 comb += cr_pred.ren.eq(1 << (7 - idx))
525 # signal data valid in the next cycle
526 cr_read = Signal(reset_less=True)
527 sync += cr_read.eq(1)
528 # load the next index
529 sync += cr_idx.eq(cr_idx + 1)
530 with m.Else():
531 # exit on loop end
532 sync += cr_read.eq(0)
533 sync += cr_idx.eq(0)
534 m.next = "FETCH_PRED_SHIFT_MASK"
535 with m.If(cr_read):
536 # compensate for the one cycle delay on the regfile
537 cur_cr_idx = Signal.like(cur_vl)
538 comb += cur_cr_idx.eq(cr_idx - 1)
539 # read the CR field, select the appropriate bit
540 cr_field = Signal(4)
541 scr_bit = Signal()
542 dcr_bit = Signal()
543 comb += cr_field.eq(cr_pred.o_data)
544 comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
545 comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
546 # set the corresponding mask bit
547 bit_to_set = Signal.like(self.srcmask)
548 comb += bit_to_set.eq(1 << cur_cr_idx)
549 with m.If(scr_bit):
550 sync += new_srcmask.eq(new_srcmask | bit_to_set)
551 with m.If(dcr_bit):
552 sync += new_dstmask.eq(new_dstmask | bit_to_set)
553
554 with m.State("FETCH_PRED_SHIFT_MASK"):
555 # shift-out skipped mask bits
556 sync += self.srcmask.eq(new_srcmask >> srcstep)
557 sync += self.dstmask.eq(new_dstmask >> dststep)
558 m.next = "FETCH_PRED_DONE"
559
560 with m.State("FETCH_PRED_DONE"):
561 comb += pred_mask_o_valid.eq(1)
562 with m.If(pred_mask_i_ready):
563 m.next = "FETCH_PRED_IDLE"
564
565 def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
566 dbg, core_rst, is_svp64_mode,
567 fetch_pc_o_ready, fetch_pc_i_valid,
568 fetch_insn_o_valid, fetch_insn_i_ready,
569 pred_insn_i_valid, pred_insn_o_ready,
570 pred_mask_o_valid, pred_mask_i_ready,
571 exec_insn_i_valid, exec_insn_o_ready,
572 exec_pc_o_valid, exec_pc_i_ready):
573 """issue FSM
574
575 decode / issue FSM. this interacts with the "fetch" FSM
576 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
577 (outgoing). also interacts with the "execute" FSM
578 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
579 (incoming).
580 SVP64 RM prefixes have already been set up by the
581 "fetch" phase, so execute is fairly straightforward.
582 """
583
584 comb = m.d.comb
585 sync = m.d.sync
586 pdecode2 = self.pdecode2
587 cur_state = self.cur_state
588
589 # temporaries
590 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
591
592 # for updating svstate (things like srcstep etc.)
593 update_svstate = Signal() # set this (below) if updating
594 new_svstate = SVSTATERec("new_svstate")
595 comb += new_svstate.eq(cur_state.svstate)
596
597 # precalculate srcstep+1 and dststep+1
598 cur_srcstep = cur_state.svstate.srcstep
599 cur_dststep = cur_state.svstate.dststep
600 next_srcstep = Signal.like(cur_srcstep)
601 next_dststep = Signal.like(cur_dststep)
602 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
603 comb += next_dststep.eq(cur_state.svstate.dststep+1)
604
605 # note if an exception happened. in a pipelined or OoO design
606 # this needs to be accompanied by "shadowing" (or stalling)
607 el = []
608 for exc in core.fus.excs.values():
609 el.append(exc.happened)
610 exc_happened = Signal()
611 if len(el) > 0: # at least one exception
612 comb += exc_happened.eq(Cat(*el).bool())
613
614 with m.FSM(name="issue_fsm"):
615
616 # sync with the "fetch" phase which is reading the instruction
617 # at this point, there is no instruction running, that
618 # could inadvertently update the PC.
619 with m.State("ISSUE_START"):
620 # wait on "core stop" release, before next fetch
621 # need to do this here, in case we are in a VL==0 loop
622 with m.If(~dbg.core_stop_o & ~core_rst):
623 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
624 with m.If(fetch_pc_o_ready): # fetch acknowledged us
625 m.next = "INSN_WAIT"
626 with m.Else():
627 # tell core it's stopped, and acknowledge debug handshake
628 comb += dbg.core_stopped_i.eq(1)
629 # while stopped, allow updating the PC and SVSTATE
630 with m.If(self.pc_i.ok):
631 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
632 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
633 sync += pc_changed.eq(1)
634 with m.If(self.svstate_i.ok):
635 comb += new_svstate.eq(self.svstate_i.data)
636 comb += update_svstate.eq(1)
637 sync += sv_changed.eq(1)
638
639 # wait for an instruction to arrive from Fetch
640 with m.State("INSN_WAIT"):
641 comb += fetch_insn_i_ready.eq(1)
642 with m.If(fetch_insn_o_valid):
643 # loop into ISSUE_START if it's a SVP64 instruction
644 # and VL == 0. this because VL==0 is a for-loop
645 # from 0 to 0 i.e. always, always a NOP.
646 cur_vl = cur_state.svstate.vl
647 with m.If(is_svp64_mode & (cur_vl == 0)):
648 # update the PC before fetching the next instruction
649 # since we are in a VL==0 loop, no instruction was
650 # executed that we could be overwriting
651 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
652 comb += self.state_w_pc.i_data.eq(nia)
653 comb += self.insn_done.eq(1)
654 m.next = "ISSUE_START"
655 with m.Else():
656 if self.svp64_en:
657 m.next = "PRED_START" # start fetching predicate
658 else:
659 m.next = "DECODE_SV" # skip predication
660
661 with m.State("PRED_START"):
662 comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start
663 with m.If(pred_insn_o_ready): # fetch_pred acknowledged us
664 m.next = "MASK_WAIT"
665
666 with m.State("MASK_WAIT"):
667 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
668 with m.If(pred_mask_o_valid): # predication masks are ready
669 m.next = "PRED_SKIP"
670
671 # skip zeros in predicate
672 with m.State("PRED_SKIP"):
673 with m.If(~is_svp64_mode):
674 m.next = "DECODE_SV" # nothing to do
675 with m.Else():
676 if self.svp64_en:
677 pred_src_zero = pdecode2.rm_dec.pred_sz
678 pred_dst_zero = pdecode2.rm_dec.pred_dz
679
680 # new srcstep, after skipping zeros
681 skip_srcstep = Signal.like(cur_srcstep)
682 # value to be added to the current srcstep
683 src_delta = Signal.like(cur_srcstep)
684 # add leading zeros to srcstep, if not in zero mode
685 with m.If(~pred_src_zero):
686 # priority encoder (count leading zeros)
687 # append guard bit, in case the mask is all zeros
688 pri_enc_src = PriorityEncoder(65)
689 m.submodules.pri_enc_src = pri_enc_src
690 comb += pri_enc_src.i.eq(Cat(self.srcmask,
691 Const(1, 1)))
692 comb += src_delta.eq(pri_enc_src.o)
693 # apply delta to srcstep
694 comb += skip_srcstep.eq(cur_srcstep + src_delta)
695 # shift-out all leading zeros from the mask
696 # plus the leading "one" bit
697 # TODO count leading zeros and shift-out the zero
698 # bits, in the same step, in hardware
699 sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
700
701 # same as above, but for dststep
702 skip_dststep = Signal.like(cur_dststep)
703 dst_delta = Signal.like(cur_dststep)
704 with m.If(~pred_dst_zero):
705 pri_enc_dst = PriorityEncoder(65)
706 m.submodules.pri_enc_dst = pri_enc_dst
707 comb += pri_enc_dst.i.eq(Cat(self.dstmask,
708 Const(1, 1)))
709 comb += dst_delta.eq(pri_enc_dst.o)
710 comb += skip_dststep.eq(cur_dststep + dst_delta)
711 sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
712
713 # TODO: initialize mask[VL]=1 to avoid passing past VL
714 with m.If((skip_srcstep >= cur_vl) |
715 (skip_dststep >= cur_vl)):
716 # end of VL loop. Update PC and reset src/dst step
717 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
718 comb += self.state_w_pc.i_data.eq(nia)
719 comb += new_svstate.srcstep.eq(0)
720 comb += new_svstate.dststep.eq(0)
721 comb += update_svstate.eq(1)
722 # synchronize with the simulator
723 comb += self.insn_done.eq(1)
724 # go back to Issue
725 m.next = "ISSUE_START"
726 with m.Else():
727 # update new src/dst step
728 comb += new_svstate.srcstep.eq(skip_srcstep)
729 comb += new_svstate.dststep.eq(skip_dststep)
730 comb += update_svstate.eq(1)
731 # proceed to Decode
732 m.next = "DECODE_SV"
733
734 # pass predicate mask bits through to satellite decoders
735 # TODO: for SIMD this will be *multiple* bits
736 sync += core.i.sv_pred_sm.eq(self.srcmask[0])
737 sync += core.i.sv_pred_dm.eq(self.dstmask[0])
738
739 # after src/dst step have been updated, we are ready
740 # to decode the instruction
741 with m.State("DECODE_SV"):
742 # decode the instruction
743 sync += core.i.e.eq(pdecode2.e)
744 sync += core.i.state.eq(cur_state)
745 sync += core.i.raw_insn_i.eq(dec_opcode_i)
746 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
747 if self.svp64_en:
748 sync += core.i.sv_rm.eq(pdecode2.sv_rm)
749 # set RA_OR_ZERO detection in satellite decoders
750 sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
751 # and svp64 detection
752 sync += core.i.is_svp64_mode.eq(is_svp64_mode)
753 # and svp64 bit-rev'd ldst mode
754 ldst_dec = pdecode2.use_svp64_ldst_dec
755 sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
756 # after decoding, reset any previous exception condition,
757 # allowing it to be set again during the next execution
758 sync += pdecode2.ldst_exc.eq(0)
759
760 m.next = "INSN_EXECUTE" # move to "execute"
761
762 # handshake with execution FSM, move to "wait" once acknowledged
763 with m.State("INSN_EXECUTE"):
764 comb += exec_insn_i_valid.eq(1) # trigger execute
765 with m.If(exec_insn_o_ready): # execute acknowledged us
766 m.next = "EXECUTE_WAIT"
767
768 with m.State("EXECUTE_WAIT"):
769 # wait on "core stop" release, at instruction end
770 # need to do this here, in case we are in a VL>1 loop
771 with m.If(~dbg.core_stop_o & ~core_rst):
772 comb += exec_pc_i_ready.eq(1)
773 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
774 # the exception info needs to be blatted into
775 # pdecode.ldst_exc, and the instruction "re-run".
776 # when ldst_exc.happened is set, the PowerDecoder2
777 # reacts very differently: it re-writes the instruction
778 # with a "trap" (calls PowerDecoder2.trap()) which
779 # will *overwrite* whatever was requested and jump the
780 # PC to the exception address, as well as alter MSR.
781 # nothing else needs to be done other than to note
782 # the change of PC and MSR (and, later, SVSTATE)
783 with m.If(exc_happened):
784 sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
785
786 with m.If(exec_pc_o_valid):
787
788 # was this the last loop iteration?
789 is_last = Signal()
790 cur_vl = cur_state.svstate.vl
791 comb += is_last.eq(next_srcstep == cur_vl)
792
793 # return directly to Decode if Execute generated an
794 # exception.
795 with m.If(pdecode2.ldst_exc.happened):
796 m.next = "DECODE_SV"
797
798 # if either PC or SVSTATE were changed by the previous
799 # instruction, go directly back to Fetch, without
800 # updating either PC or SVSTATE
801 with m.Elif(pc_changed | sv_changed):
802 m.next = "ISSUE_START"
803
804 # also return to Fetch, when no output was a vector
805 # (regardless of SRCSTEP and VL), or when the last
806 # instruction was really the last one of the VL loop
807 with m.Elif((~pdecode2.loop_continue) | is_last):
808 # before going back to fetch, update the PC state
809 # register with the NIA.
810 # ok here we are not reading the branch unit.
811 # TODO: this just blithely overwrites whatever
812 # pipeline updated the PC
813 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
814 comb += self.state_w_pc.i_data.eq(nia)
815 # reset SRCSTEP before returning to Fetch
816 if self.svp64_en:
817 with m.If(pdecode2.loop_continue):
818 comb += new_svstate.srcstep.eq(0)
819 comb += new_svstate.dststep.eq(0)
820 comb += update_svstate.eq(1)
821 else:
822 comb += new_svstate.srcstep.eq(0)
823 comb += new_svstate.dststep.eq(0)
824 comb += update_svstate.eq(1)
825 m.next = "ISSUE_START"
826
827 # returning to Execute? then, first update SRCSTEP
828 with m.Else():
829 comb += new_svstate.srcstep.eq(next_srcstep)
830 comb += new_svstate.dststep.eq(next_dststep)
831 comb += update_svstate.eq(1)
832 # return to mask skip loop
833 m.next = "PRED_SKIP"
834
835 with m.Else():
836 comb += dbg.core_stopped_i.eq(1)
837 # while stopped, allow updating the PC and SVSTATE
838 with m.If(self.pc_i.ok):
839 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
840 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
841 sync += pc_changed.eq(1)
842 with m.If(self.svstate_i.ok):
843 comb += new_svstate.eq(self.svstate_i.data)
844 comb += update_svstate.eq(1)
845 sync += sv_changed.eq(1)
846
847 # check if svstate needs updating: if so, write it to State Regfile
848 with m.If(update_svstate):
849 comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
850 comb += self.state_w_sv.i_data.eq(new_svstate)
851 sync += cur_state.svstate.eq(new_svstate) # for next clock
852
853 def execute_fsm(self, m, core, pc_changed, sv_changed,
854 exec_insn_i_valid, exec_insn_o_ready,
855 exec_pc_o_valid, exec_pc_i_ready):
856 """execute FSM
857
858 execute FSM. this interacts with the "issue" FSM
859 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
860 (outgoing). SVP64 RM prefixes have already been set up by the
861 "issue" phase, so execute is fairly straightforward.
862 """
863
864 comb = m.d.comb
865 sync = m.d.sync
866 pdecode2 = self.pdecode2
867
868 # temporaries
869 core_busy_o = ~core.p.o_ready # core is busy
870 core_ivalid_i = core.p.i_valid # instruction is valid
871 core_issue_i = core.i.issue_i # instruction is issued
872 insn_type = core.i.e.do.insn_type # instruction MicroOp type
873
874 with m.FSM(name="exec_fsm"):
875
876 # waiting for instruction bus (stays there until not busy)
877 with m.State("INSN_START"):
878 comb += exec_insn_o_ready.eq(1)
879 with m.If(exec_insn_i_valid):
880 comb += core_ivalid_i.eq(1) # instruction is valid
881 comb += core_issue_i.eq(1) # and issued
882 sync += sv_changed.eq(0)
883 sync += pc_changed.eq(0)
884 m.next = "INSN_ACTIVE" # move to "wait completion"
885
886 # instruction started: must wait till it finishes
887 with m.State("INSN_ACTIVE"):
888 with m.If(insn_type != MicrOp.OP_NOP):
889 comb += core_ivalid_i.eq(1) # instruction is valid
890 # note changes to PC and SVSTATE
891 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
892 sync += sv_changed.eq(1)
893 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
894 sync += pc_changed.eq(1)
895 with m.If(~core_busy_o): # instruction done!
896 comb += exec_pc_o_valid.eq(1)
897 with m.If(exec_pc_i_ready):
898 # when finished, indicate "done".
899 # however, if there was an exception, the instruction
900 # is *not* yet done. this is an implementation
901 # detail: we choose to implement exceptions by
902 # taking the exception information from the LDST
903 # unit, putting that *back* into the PowerDecoder2,
904 # and *re-running the entire instruction*.
905 # if we erroneously indicate "done" here, it is as if
906 # there were *TWO* instructions:
907 # 1) the failed LDST 2) a TRAP.
908 with m.If(~pdecode2.ldst_exc.happened):
909 comb += self.insn_done.eq(1)
910 m.next = "INSN_START" # back to fetch
911
912 def setup_peripherals(self, m):
913 comb, sync = m.d.comb, m.d.sync
914
915 # okaaaay so the debug module must be in coresync clock domain
916 # but NOT its reset signal. to cope with this, set every single
917 # submodule explicitly in coresync domain, debug and JTAG
918 # in their own one but using *external* reset.
919 csd = DomainRenamer("coresync")
920 dbd = DomainRenamer(self.dbg_domain)
921
922 m.submodules.core = core = csd(self.core)
923 m.submodules.imem = imem = csd(self.imem)
924 m.submodules.dbg = dbg = dbd(self.dbg)
925 if self.jtag_en:
926 m.submodules.jtag = jtag = dbd(self.jtag)
927 # TODO: UART2GDB mux, here, from external pin
928 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
929 sync += dbg.dmi.connect_to(jtag.dmi)
930
931 cur_state = self.cur_state
932
933 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
934 if self.sram4x4k:
935 for i, sram in enumerate(self.sram4k):
936 m.submodules["sram4k_%d" % i] = csd(sram)
937 comb += sram.enable.eq(self.wb_sram_en)
938
939 # XICS interrupt handler
940 if self.xics:
941 m.submodules.xics_icp = icp = csd(self.xics_icp)
942 m.submodules.xics_ics = ics = csd(self.xics_ics)
943 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
944 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
945
946 # GPIO test peripheral
947 if self.gpio:
948 m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
949
950 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
951 # XXX causes litex ECP5 test to get wrong idea about input and output
952 # (but works with verilator sim *sigh*)
953 #if self.gpio and self.xics:
954 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
955
956 # instruction decoder
957 pdecode = create_pdecode()
958 m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
959 if self.svp64_en:
960 m.submodules.svp64 = svp64 = csd(self.svp64)
961
962 # convenience
963 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
964 intrf = self.core.regs.rf['int']
965
966 # clock delay power-on reset
967 cd_por = ClockDomain(reset_less=True)
968 cd_sync = ClockDomain()
969 core_sync = ClockDomain("coresync")
970 m.domains += cd_por, cd_sync, core_sync
971 if self.dbg_domain != "sync":
972 dbg_sync = ClockDomain(self.dbg_domain)
973 m.domains += dbg_sync
974
975 ti_rst = Signal(reset_less=True)
976 delay = Signal(range(4), reset=3)
977 with m.If(delay != 0):
978 m.d.por += delay.eq(delay - 1)
979 comb += cd_por.clk.eq(ClockSignal())
980
981 # power-on reset delay
982 core_rst = ResetSignal("coresync")
983 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
984 comb += core_rst.eq(ti_rst)
985
986 # debug clock is same as coresync, but reset is *main external*
987 if self.dbg_domain != "sync":
988 dbg_rst = ResetSignal(self.dbg_domain)
989 comb += dbg_rst.eq(ResetSignal())
990
991 # busy/halted signals from core
992 core_busy_o = ~core.p.o_ready # core is busy
993 comb += self.busy_o.eq(core_busy_o)
994 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
995
996 # temporary hack: says "go" immediately for both address gen and ST
997 l0 = core.l0
998 ldst = core.fus.fus['ldst0']
999 st_go_edge = rising_edge(m, ldst.st.rel_o)
1000 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
1001 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1002
1003 def elaborate(self, platform):
1004 m = Module()
1005 # convenience
1006 comb, sync = m.d.comb, m.d.sync
1007 cur_state = self.cur_state
1008 pdecode2 = self.pdecode2
1009 dbg = self.dbg
1010 core = self.core
1011
1012 # set up peripherals and core
1013 core_rst = self.core_rst
1014 self.setup_peripherals(m)
1015
1016 # reset current state if core reset requested
1017 with m.If(core_rst):
1018 m.d.sync += self.cur_state.eq(0)
1019
1020 # PC and instruction from I-Memory
1021 comb += self.pc_o.eq(cur_state.pc)
1022 pc_changed = Signal() # note write to PC
1023 sv_changed = Signal() # note write to SVSTATE
1024
1025 # read state either from incoming override or from regfile
1026 # TODO: really should be doing MSR in the same way
1027 pc = state_get(m, core_rst, self.pc_i,
1028 "pc", # read PC
1029 self.state_r_pc, StateRegs.PC)
1030 svstate = state_get(m, core_rst, self.svstate_i,
1031 "svstate", # read SVSTATE
1032 self.state_r_sv, StateRegs.SVSTATE)
1033
1034 # don't write pc every cycle
1035 comb += self.state_w_pc.wen.eq(0)
1036 comb += self.state_w_pc.i_data.eq(0)
1037
1038 # don't read msr every cycle
1039 comb += self.state_r_msr.ren.eq(0)
1040
1041 # address of the next instruction, in the absence of a branch
1042 # depends on the instruction size
1043 nia = Signal(64)
1044
1045 # connect up debug signals
1046 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1047 comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1048 comb += dbg.state.pc.eq(pc)
1049 comb += dbg.state.svstate.eq(svstate)
1050 comb += dbg.state.msr.eq(cur_state.msr)
1051
1052 # pass the prefix mode from Fetch to Issue, so the latter can loop
1053 # on VL==0
1054 is_svp64_mode = Signal()
1055
1056 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1057 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1058 # these are the handshake signals between each
1059
1060 # fetch FSM can run as soon as the PC is valid
1061 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1062 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1063
1064 # fetch FSM hands over the instruction to be decoded / issued
1065 fetch_insn_o_valid = Signal()
1066 fetch_insn_i_ready = Signal()
1067
1068 # predicate fetch FSM decodes and fetches the predicate
1069 pred_insn_i_valid = Signal()
1070 pred_insn_o_ready = Signal()
1071
1072 # predicate fetch FSM delivers the masks
1073 pred_mask_o_valid = Signal()
1074 pred_mask_i_ready = Signal()
1075
1076 # issue FSM delivers the instruction to the be executed
1077 exec_insn_i_valid = Signal()
1078 exec_insn_o_ready = Signal()
1079
1080 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1081 exec_pc_o_valid = Signal()
1082 exec_pc_i_ready = Signal()
1083
1084 # the FSMs here are perhaps unusual in that they detect conditions
1085 # then "hold" information, combinatorially, for the core
1086 # (as opposed to using sync - which would be on a clock's delay)
1087 # this includes the actual opcode, valid flags and so on.
1088
1089 # Fetch, then predicate fetch, then Issue, then Execute.
1090 # Issue is where the VL for-loop # lives. the ready/valid
1091 # signalling is used to communicate between the four.
1092
1093 self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
1094 fetch_pc_o_ready, fetch_pc_i_valid,
1095 fetch_insn_o_valid, fetch_insn_i_ready)
1096
1097 self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1098 dbg, core_rst, is_svp64_mode,
1099 fetch_pc_o_ready, fetch_pc_i_valid,
1100 fetch_insn_o_valid, fetch_insn_i_ready,
1101 pred_insn_i_valid, pred_insn_o_ready,
1102 pred_mask_o_valid, pred_mask_i_ready,
1103 exec_insn_i_valid, exec_insn_o_ready,
1104 exec_pc_o_valid, exec_pc_i_ready)
1105
1106 if self.svp64_en:
1107 self.fetch_predicate_fsm(m,
1108 pred_insn_i_valid, pred_insn_o_ready,
1109 pred_mask_o_valid, pred_mask_i_ready)
1110
1111 self.execute_fsm(m, core, pc_changed, sv_changed,
1112 exec_insn_i_valid, exec_insn_o_ready,
1113 exec_pc_o_valid, exec_pc_i_ready)
1114
1115 # whatever was done above, over-ride it if core reset is held
1116 with m.If(core_rst):
1117 sync += nia.eq(0)
1118
1119 # this bit doesn't have to be in the FSM: connect up to read
1120 # regfiles on demand from DMI
1121 self.do_dmi(m, dbg)
1122
1123 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
1124 # (which uses that in PowerDecoder2 to raise 0x900 exception)
1125 self.tb_dec_fsm(m, cur_state.dec)
1126
1127 return m
1128
1129 def do_dmi(self, m, dbg):
1130 """deals with DMI debug requests
1131
1132 currently only provides read requests for the INT regfile, CR and XER
1133 it will later also deal with *writing* to these regfiles.
1134 """
1135 comb = m.d.comb
1136 sync = m.d.sync
1137 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1138 intrf = self.core.regs.rf['int']
1139
1140 with m.If(d_reg.req): # request for regfile access being made
1141 # TODO: error-check this
1142 # XXX should this be combinatorial? sync better?
1143 if intrf.unary:
1144 comb += self.int_r.ren.eq(1<<d_reg.addr)
1145 else:
1146 comb += self.int_r.addr.eq(d_reg.addr)
1147 comb += self.int_r.ren.eq(1)
1148 d_reg_delay = Signal()
1149 sync += d_reg_delay.eq(d_reg.req)
1150 with m.If(d_reg_delay):
1151 # data arrives one clock later
1152 comb += d_reg.data.eq(self.int_r.o_data)
1153 comb += d_reg.ack.eq(1)
1154
1155 # sigh same thing for CR debug
1156 with m.If(d_cr.req): # request for regfile access being made
1157 comb += self.cr_r.ren.eq(0b11111111) # enable all
1158 d_cr_delay = Signal()
1159 sync += d_cr_delay.eq(d_cr.req)
1160 with m.If(d_cr_delay):
1161 # data arrives one clock later
1162 comb += d_cr.data.eq(self.cr_r.o_data)
1163 comb += d_cr.ack.eq(1)
1164
1165 # aaand XER...
1166 with m.If(d_xer.req): # request for regfile access being made
1167 comb += self.xer_r.ren.eq(0b111111) # enable all
1168 d_xer_delay = Signal()
1169 sync += d_xer_delay.eq(d_xer.req)
1170 with m.If(d_xer_delay):
1171 # data arrives one clock later
1172 comb += d_xer.data.eq(self.xer_r.o_data)
1173 comb += d_xer.ack.eq(1)
1174
1175 def tb_dec_fsm(self, m, spr_dec):
1176 """tb_dec_fsm
1177
1178 this is a FSM for updating either dec or tb. it runs alternately
1179 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
1180 value to DEC, however the regfile has "passthrough" on it so this
1181 *should* be ok.
1182
1183 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1184 """
1185
1186 comb, sync = m.d.comb, m.d.sync
1187 fast_rf = self.core.regs.rf['fast']
1188 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1189 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1190
1191 with m.FSM() as fsm:
1192
1193 # initiates read of current DEC
1194 with m.State("DEC_READ"):
1195 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1196 comb += fast_r_dectb.ren.eq(1)
1197 m.next = "DEC_WRITE"
1198
1199 # waits for DEC read to arrive (1 cycle), updates with new value
1200 with m.State("DEC_WRITE"):
1201 new_dec = Signal(64)
1202 # TODO: MSR.LPCR 32-bit decrement mode
1203 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1204 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1205 comb += fast_w_dectb.wen.eq(1)
1206 comb += fast_w_dectb.i_data.eq(new_dec)
1207 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1208 m.next = "TB_READ"
1209
1210 # initiates read of current TB
1211 with m.State("TB_READ"):
1212 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1213 comb += fast_r_dectb.ren.eq(1)
1214 m.next = "TB_WRITE"
1215
1216 # waits for read TB to arrive, initiates write of current TB
1217 with m.State("TB_WRITE"):
1218 new_tb = Signal(64)
1219 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1220 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1221 comb += fast_w_dectb.wen.eq(1)
1222 comb += fast_w_dectb.i_data.eq(new_tb)
1223 m.next = "DEC_READ"
1224
1225 return m
1226
1227 def __iter__(self):
1228 yield from self.pc_i.ports()
1229 yield self.pc_o
1230 yield self.memerr_o
1231 yield from self.core.ports()
1232 yield from self.imem.ports()
1233 yield self.core_bigendian_i
1234 yield self.busy_o
1235
1236 def ports(self):
1237 return list(self)
1238
1239 def external_ports(self):
1240 ports = self.pc_i.ports()
1241 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1242 ]
1243
1244 if self.jtag_en:
1245 ports += list(self.jtag.external_ports())
1246 else:
1247 # don't add DMI if JTAG is enabled
1248 ports += list(self.dbg.dmi.ports())
1249
1250 ports += list(self.imem.ibus.fields.values())
1251 ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1252
1253 if self.sram4x4k:
1254 for sram in self.sram4k:
1255 ports += list(sram.bus.fields.values())
1256
1257 if self.xics:
1258 ports += list(self.xics_icp.bus.fields.values())
1259 ports += list(self.xics_ics.bus.fields.values())
1260 ports.append(self.int_level_i)
1261
1262 if self.gpio:
1263 ports += list(self.simple_gpio.bus.fields.values())
1264 ports.append(self.gpio_o)
1265
1266 return ports
1267
1268 def ports(self):
1269 return list(self)
1270
1271
1272 class TestIssuer(Elaboratable):
1273 def __init__(self, pspec):
1274 self.ti = TestIssuerInternal(pspec)
1275 self.pll = DummyPLL(instance=True)
1276
1277 # PLL direct clock or not
1278 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1279 if self.pll_en:
1280 self.pll_test_o = Signal(reset_less=True)
1281 self.pll_vco_o = Signal(reset_less=True)
1282 self.clk_sel_i = Signal(2, reset_less=True)
1283 self.ref_clk = ClockSignal() # can't rename it but that's ok
1284 self.pllclk_clk = ClockSignal("pllclk")
1285
1286 def elaborate(self, platform):
1287 m = Module()
1288 comb = m.d.comb
1289
1290 # TestIssuer nominally runs at main clock, actually it is
1291 # all combinatorial internally except for coresync'd components
1292 m.submodules.ti = ti = self.ti
1293
1294 if self.pll_en:
1295 # ClockSelect runs at PLL output internal clock rate
1296 m.submodules.wrappll = pll = self.pll
1297
1298 # add clock domains from PLL
1299 cd_pll = ClockDomain("pllclk")
1300 m.domains += cd_pll
1301
1302 # PLL clock established. has the side-effect of running clklsel
1303 # at the PLL's speed (see DomainRenamer("pllclk") above)
1304 pllclk = self.pllclk_clk
1305 comb += pllclk.eq(pll.clk_pll_o)
1306
1307 # wire up external 24mhz to PLL
1308 #comb += pll.clk_24_i.eq(self.ref_clk)
1309 # output 18 mhz PLL test signal, and analog oscillator out
1310 comb += self.pll_test_o.eq(pll.pll_test_o)
1311 comb += self.pll_vco_o.eq(pll.pll_vco_o)
1312
1313 # input to pll clock selection
1314 comb += pll.clk_sel_i.eq(self.clk_sel_i)
1315
1316 # now wire up ResetSignals. don't mind them being in this domain
1317 pll_rst = ResetSignal("pllclk")
1318 comb += pll_rst.eq(ResetSignal())
1319
1320 # internal clock is set to selector clock-out. has the side-effect of
1321 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1322 # debug clock runs at coresync internal clock
1323 cd_coresync = ClockDomain("coresync")
1324 #m.domains += cd_coresync
1325 if self.ti.dbg_domain != 'sync':
1326 cd_dbgsync = ClockDomain("dbgsync")
1327 #m.domains += cd_dbgsync
1328 intclk = ClockSignal("coresync")
1329 dbgclk = ClockSignal(self.ti.dbg_domain)
1330 # XXX BYPASS PLL XXX
1331 # XXX BYPASS PLL XXX
1332 # XXX BYPASS PLL XXX
1333 if self.pll_en:
1334 comb += intclk.eq(self.ref_clk)
1335 else:
1336 comb += intclk.eq(ClockSignal())
1337 if self.ti.dbg_domain != 'sync':
1338 dbgclk = ClockSignal(self.ti.dbg_domain)
1339 comb += dbgclk.eq(intclk)
1340
1341 return m
1342
1343 def ports(self):
1344 return list(self.ti.ports()) + list(self.pll.ports()) + \
1345 [ClockSignal(), ResetSignal()]
1346
1347 def external_ports(self):
1348 ports = self.ti.external_ports()
1349 ports.append(ClockSignal())
1350 ports.append(ResetSignal())
1351 if self.pll_en:
1352 ports.append(self.clk_sel_i)
1353 ports.append(self.pll.clk_24_i)
1354 ports.append(self.pll_test_o)
1355 ports.append(self.pll_vco_o)
1356 ports.append(self.pllclk_clk)
1357 ports.append(self.ref_clk)
1358 return ports
1359
1360
1361 if __name__ == '__main__':
1362 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1363 'spr': 1,
1364 'div': 1,
1365 'mul': 1,
1366 'shiftrot': 1
1367 }
1368 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1369 imem_ifacetype='bare_wb',
1370 addr_wid=48,
1371 mask_wid=8,
1372 reg_wid=64,
1373 units=units)
1374 dut = TestIssuer(pspec)
1375 vl = main(dut, ports=dut.ports(), name="test_issuer")
1376
1377 if len(sys.argv) == 1:
1378 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1379 with open("test_issuer.il", "w") as f:
1380 f.write(vl)