add comments on most likely place to put predicate mask read-firing
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
26 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
27 from soc.decoder.decode2execute1 import Data
28 from soc.experiment.testmem import TestMemory # test only for instructions
29 from soc.regfile.regfiles import StateRegs, FastRegs
30 from soc.simple.core import NonProductionCore
31 from soc.config.test.test_loadstore import TestMemPspec
32 from soc.config.ifetch import ConfigFetchUnit
33 from soc.decoder.power_enums import MicrOp
34 from soc.debug.dmi import CoreDebug, DMIInterface
35 from soc.debug.jtag import JTAG
36 from soc.config.pinouts import get_pinspecs
37 from soc.config.state import CoreState
38 from soc.interrupts.xics import XICS_ICP, XICS_ICS
39 from soc.bus.simple_gpio import SimpleGPIO
40 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
41 from soc.clock.select import ClockSelect
42 from soc.clock.dummypll import DummyPLL
43 from soc.sv.svstate import SVSTATERec
44
45
46 from nmutil.util import rising_edge
47
48 def get_insn(f_instr_o, pc):
49 if f_instr_o.width == 32:
50 return f_instr_o
51 else:
52 # 64-bit: bit 2 of pc decides which word to select
53 return f_instr_o.word_select(pc[2], 32)
54
55
56 class TestIssuerInternal(Elaboratable):
57 """TestIssuer - reads instructions from TestMemory and issues them
58
59 efficiency and speed is not the main goal here: functional correctness is.
60 """
61 def __init__(self, pspec):
62
63 # test is SVP64 is to be enabled
64 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
65
66 # JTAG interface. add this right at the start because if it's
67 # added it *modifies* the pspec, by adding enable/disable signals
68 # for parts of the rest of the core
69 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
70 if self.jtag_en:
71 subset = {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
72 'pwm', 'sd0', 'sdr'}
73 self.jtag = JTAG(get_pinspecs(subset=subset))
74 # add signals to pspec to enable/disable icache and dcache
75 # (or data and intstruction wishbone if icache/dcache not included)
76 # https://bugs.libre-soc.org/show_bug.cgi?id=520
77 # TODO: do we actually care if these are not domain-synchronised?
78 # honestly probably not.
79 pspec.wb_icache_en = self.jtag.wb_icache_en
80 pspec.wb_dcache_en = self.jtag.wb_dcache_en
81 self.wb_sram_en = self.jtag.wb_sram_en
82 else:
83 self.wb_sram_en = Const(1)
84
85 # add 4k sram blocks?
86 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
87 pspec.sram4x4kblock == True)
88 if self.sram4x4k:
89 self.sram4k = []
90 for i in range(4):
91 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
92 features={'err'}))
93
94 # add interrupt controller?
95 self.xics = hasattr(pspec, "xics") and pspec.xics == True
96 if self.xics:
97 self.xics_icp = XICS_ICP()
98 self.xics_ics = XICS_ICS()
99 self.int_level_i = self.xics_ics.int_level_i
100
101 # add GPIO peripheral?
102 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
103 if self.gpio:
104 self.simple_gpio = SimpleGPIO()
105 self.gpio_o = self.simple_gpio.gpio_o
106
107 # main instruction core25
108 self.core = core = NonProductionCore(pspec)
109
110 # instruction decoder. goes into Trap Record
111 pdecode = create_pdecode()
112 self.cur_state = CoreState("cur") # current state (MSR/PC/EINT/SVSTATE)
113 self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
114 opkls=IssuerDecode2ToOperand,
115 svp64_en=self.svp64_en)
116 if self.svp64_en:
117 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
118
119 # Test Instruction memory
120 self.imem = ConfigFetchUnit(pspec).fu
121 # one-row cache of instruction read
122 self.iline = Signal(64) # one instruction line
123 self.iprev_adr = Signal(64) # previous address: if different, do read
124
125 # DMI interface
126 self.dbg = CoreDebug()
127
128 # instruction go/monitor
129 self.pc_o = Signal(64, reset_less=True)
130 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
131 self.svstate_i = Data(32, "svstate_i") # ditto
132 self.core_bigendian_i = Signal()
133 self.busy_o = Signal(reset_less=True)
134 self.memerr_o = Signal(reset_less=True)
135
136 # STATE regfile read /write ports for PC, MSR, SVSTATE
137 staterf = self.core.regs.rf['state']
138 self.state_r_pc = staterf.r_ports['cia'] # PC rd
139 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
140 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
141 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
142 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
143
144 # DMI interface access
145 intrf = self.core.regs.rf['int']
146 crrf = self.core.regs.rf['cr']
147 xerrf = self.core.regs.rf['xer']
148 self.int_r = intrf.r_ports['dmi'] # INT read
149 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
150 self.xer_r = xerrf.r_ports['full_xer'] # XER read
151
152 # hack method of keeping an eye on whether branch/trap set the PC
153 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
154 self.state_nia.wen.name = 'state_nia_wen'
155
156 # pulse to synchronize the simulator at instruction end
157 self.insn_done = Signal()
158
159 if self.svp64_en:
160 # store copies of predicate masks
161 self.srcmask = Signal(64)
162 self.dstmask = Signal(64)
163
164 def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
165 fetch_pc_ready_o, fetch_pc_valid_i,
166 fetch_insn_valid_o, fetch_insn_ready_i):
167 """fetch FSM
168 this FSM performs fetch of raw instruction data, partial-decodes
169 it 32-bit at a time to detect SVP64 prefixes, and will optionally
170 read a 2nd 32-bit quantity if that occurs.
171 """
172 comb = m.d.comb
173 sync = m.d.sync
174 pdecode2 = self.pdecode2
175 cur_state = self.cur_state
176 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
177
178 msr_read = Signal(reset=1)
179
180 with m.FSM(name='fetch_fsm'):
181
182 # waiting (zzz)
183 with m.State("IDLE"):
184 comb += fetch_pc_ready_o.eq(1)
185 with m.If(fetch_pc_valid_i):
186 # instruction allowed to go: start by reading the PC
187 # capture the PC and also drop it into Insn Memory
188 # we have joined a pair of combinatorial memory
189 # lookups together. this is Generally Bad.
190 comb += self.imem.a_pc_i.eq(pc)
191 comb += self.imem.a_valid_i.eq(1)
192 comb += self.imem.f_valid_i.eq(1)
193 sync += cur_state.pc.eq(pc)
194 sync += cur_state.svstate.eq(svstate) # and svstate
195
196 # initiate read of MSR. arrives one clock later
197 comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
198 sync += msr_read.eq(0)
199
200 m.next = "INSN_READ" # move to "wait for bus" phase
201
202 # dummy pause to find out why simulation is not keeping up
203 with m.State("INSN_READ"):
204 # one cycle later, msr/sv read arrives. valid only once.
205 with m.If(~msr_read):
206 sync += msr_read.eq(1) # yeah don't read it again
207 sync += cur_state.msr.eq(self.state_r_msr.data_o)
208 with m.If(self.imem.f_busy_o): # zzz...
209 # busy: stay in wait-read
210 comb += self.imem.a_valid_i.eq(1)
211 comb += self.imem.f_valid_i.eq(1)
212 with m.Else():
213 # not busy: instruction fetched
214 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
215 if self.svp64_en:
216 svp64 = self.svp64
217 # decode the SVP64 prefix, if any
218 comb += svp64.raw_opcode_in.eq(insn)
219 comb += svp64.bigendian.eq(self.core_bigendian_i)
220 # pass the decoded prefix (if any) to PowerDecoder2
221 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
222 # remember whether this is a prefixed instruction, so
223 # the FSM can readily loop when VL==0
224 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
225 # calculate the address of the following instruction
226 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
227 sync += nia.eq(cur_state.pc + insn_size)
228 with m.If(~svp64.is_svp64_mode):
229 # with no prefix, store the instruction
230 # and hand it directly to the next FSM
231 sync += dec_opcode_i.eq(insn)
232 m.next = "INSN_READY"
233 with m.Else():
234 # fetch the rest of the instruction from memory
235 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
236 comb += self.imem.a_valid_i.eq(1)
237 comb += self.imem.f_valid_i.eq(1)
238 m.next = "INSN_READ2"
239 else:
240 # not SVP64 - 32-bit only
241 sync += nia.eq(cur_state.pc + 4)
242 sync += dec_opcode_i.eq(insn)
243 m.next = "INSN_READY"
244
245 with m.State("INSN_READ2"):
246 with m.If(self.imem.f_busy_o): # zzz...
247 # busy: stay in wait-read
248 comb += self.imem.a_valid_i.eq(1)
249 comb += self.imem.f_valid_i.eq(1)
250 with m.Else():
251 # not busy: instruction fetched
252 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
253 sync += dec_opcode_i.eq(insn)
254 m.next = "INSN_READY"
255 # TODO: probably can start looking at pdecode2.rm_dec
256 # here (or maybe even in INSN_READ state, if svp64_mode
257 # detected, in order to trigger - and wait for - the
258 # predicate reading.
259
260 with m.State("INSN_READY"):
261 # hand over the instruction, to be decoded
262 comb += fetch_insn_valid_o.eq(1)
263 with m.If(fetch_insn_ready_i):
264 m.next = "IDLE"
265
266 def fetch_predicate_fsm(self, m, core, TODO):
267 """fetch_predicate_fsm - obtains (constructs in the case of CR)
268 src/dest predicate masks
269
270 https://bugs.libre-soc.org/show_bug.cgi?id=617
271 the predicates can be read here, by using IntRegs r_ports['pred']
272 or CRRegs r_ports['pred']. in the case of CRs it will have to
273 be done through multiple reads, extracting one relevant at a time.
274 later, a faster way would be to use the 32-bit-wide CR port but
275 this is more complex decoding, here.
276 """
277 comb = m.d.comb
278 sync = m.d.sync
279 pdecode2 = self.pdecode2
280 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
281 predmode = rm_dec.predmode
282 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
283
284 def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
285 dbg, core_rst, is_svp64_mode,
286 fetch_pc_ready_o, fetch_pc_valid_i,
287 fetch_insn_valid_o, fetch_insn_ready_i,
288 exec_insn_valid_i, exec_insn_ready_o,
289 exec_pc_valid_o, exec_pc_ready_i):
290 """issue FSM
291
292 decode / issue FSM. this interacts with the "fetch" FSM
293 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
294 (outgoing). also interacts with the "execute" FSM
295 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
296 (incoming).
297 SVP64 RM prefixes have already been set up by the
298 "fetch" phase, so execute is fairly straightforward.
299 """
300
301 comb = m.d.comb
302 sync = m.d.sync
303 pdecode2 = self.pdecode2
304 cur_state = self.cur_state
305
306 # temporaries
307 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
308
309 # for updating svstate (things like srcstep etc.)
310 update_svstate = Signal() # set this (below) if updating
311 new_svstate = SVSTATERec("new_svstate")
312 comb += new_svstate.eq(cur_state.svstate)
313
314 with m.FSM(name="issue_fsm"):
315
316 # sync with the "fetch" phase which is reading the instruction
317 # at this point, there is no instruction running, that
318 # could inadvertently update the PC.
319 with m.State("ISSUE_START"):
320 # wait on "core stop" release, before next fetch
321 # need to do this here, in case we are in a VL==0 loop
322 with m.If(~dbg.core_stop_o & ~core_rst):
323 comb += fetch_pc_valid_i.eq(1) # tell fetch to start
324 with m.If(fetch_pc_ready_o): # fetch acknowledged us
325 m.next = "INSN_WAIT"
326 with m.Else():
327 # tell core it's stopped, and acknowledge debug handshake
328 comb += core.core_stopped_i.eq(1)
329 comb += dbg.core_stopped_i.eq(1)
330 # while stopped, allow updating the PC and SVSTATE
331 with m.If(self.pc_i.ok):
332 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
333 comb += self.state_w_pc.data_i.eq(self.pc_i.data)
334 sync += pc_changed.eq(1)
335 with m.If(self.svstate_i.ok):
336 comb += new_svstate.eq(self.svstate_i.data)
337 comb += update_svstate.eq(1)
338 sync += sv_changed.eq(1)
339
340 # decode the instruction when it arrives
341 with m.State("INSN_WAIT"):
342 comb += fetch_insn_ready_i.eq(1)
343 with m.If(fetch_insn_valid_o):
344 # decode the instruction
345 sync += core.e.eq(pdecode2.e)
346 sync += core.state.eq(cur_state)
347 sync += core.raw_insn_i.eq(dec_opcode_i)
348 sync += core.bigendian_i.eq(self.core_bigendian_i)
349 # set RA_OR_ZERO detection in satellite decoders
350 sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
351 # loop into ISSUE_START if it's a SVP64 instruction
352 # and VL == 0. this because VL==0 is a for-loop
353 # from 0 to 0 i.e. always, always a NOP.
354 cur_vl = cur_state.svstate.vl
355 with m.If(is_svp64_mode & (cur_vl == 0)):
356 # update the PC before fetching the next instruction
357 # since we are in a VL==0 loop, no instruction was
358 # executed that we could be overwriting
359 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
360 comb += self.state_w_pc.data_i.eq(nia)
361 comb += self.insn_done.eq(1)
362 m.next = "ISSUE_START"
363 with m.Else():
364 m.next = "INSN_EXECUTE" # move to "execute"
365
366 # handshake with execution FSM, move to "wait" once acknowledged
367 with m.State("INSN_EXECUTE"):
368 comb += exec_insn_valid_i.eq(1) # trigger execute
369 with m.If(exec_insn_ready_o): # execute acknowledged us
370 m.next = "EXECUTE_WAIT"
371
372 with m.State("EXECUTE_WAIT"):
373 # wait on "core stop" release, at instruction end
374 # need to do this here, in case we are in a VL>1 loop
375 with m.If(~dbg.core_stop_o & ~core_rst):
376 comb += exec_pc_ready_i.eq(1)
377 with m.If(exec_pc_valid_o):
378 # precalculate srcstep+1 and dststep+1
379 # TODO these need to "skip" over predicated-out src/dst
380 # https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
381 # but still without exceeding VL in either case
382 next_srcstep = Signal.like(cur_state.svstate.srcstep)
383 next_dststep = Signal.like(cur_state.svstate.dststep)
384 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
385 comb += next_dststep.eq(cur_state.svstate.dststep+1)
386
387 # was this the last loop iteration?
388 is_last = Signal()
389 cur_vl = cur_state.svstate.vl
390 comb += is_last.eq(next_srcstep == cur_vl)
391
392 # if either PC or SVSTATE were changed by the previous
393 # instruction, go directly back to Fetch, without
394 # updating either PC or SVSTATE
395 with m.If(pc_changed | sv_changed):
396 m.next = "ISSUE_START"
397
398 # also return to Fetch, when no output was a vector
399 # (regardless of SRCSTEP and VL), or when the last
400 # instruction was really the last one of the VL loop
401 with m.Elif((~pdecode2.loop_continue) | is_last):
402 # before going back to fetch, update the PC state
403 # register with the NIA.
404 # ok here we are not reading the branch unit.
405 # TODO: this just blithely overwrites whatever
406 # pipeline updated the PC
407 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
408 comb += self.state_w_pc.data_i.eq(nia)
409 # reset SRCSTEP before returning to Fetch
410 with m.If(pdecode2.loop_continue):
411 comb += new_svstate.srcstep.eq(0)
412 comb += new_svstate.dststep.eq(0)
413 comb += update_svstate.eq(1)
414 m.next = "ISSUE_START"
415
416 # returning to Execute? then, first update SRCSTEP
417 with m.Else():
418 comb += new_svstate.srcstep.eq(next_srcstep)
419 comb += new_svstate.dststep.eq(next_dststep)
420 comb += update_svstate.eq(1)
421 m.next = "DECODE_SV"
422
423 with m.Else():
424 comb += core.core_stopped_i.eq(1)
425 comb += dbg.core_stopped_i.eq(1)
426 # while stopped, allow updating the PC and SVSTATE
427 with m.If(self.pc_i.ok):
428 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
429 comb += self.state_w_pc.data_i.eq(self.pc_i.data)
430 sync += pc_changed.eq(1)
431 with m.If(self.svstate_i.ok):
432 comb += new_svstate.eq(self.svstate_i.data)
433 comb += update_svstate.eq(1)
434 sync += sv_changed.eq(1)
435
436 # need to decode the instruction again, after updating SRCSTEP
437 # in the previous state.
438 # mostly a copy of INSN_WAIT, but without the actual wait
439 with m.State("DECODE_SV"):
440 # decode the instruction
441 sync += core.e.eq(pdecode2.e)
442 sync += core.state.eq(cur_state)
443 sync += core.bigendian_i.eq(self.core_bigendian_i)
444 sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
445 m.next = "INSN_EXECUTE" # move to "execute"
446
447 # check if svstate needs updating: if so, write it to State Regfile
448 with m.If(update_svstate):
449 comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
450 comb += self.state_w_sv.data_i.eq(new_svstate)
451 sync += cur_state.svstate.eq(new_svstate) # for next clock
452
453 def execute_fsm(self, m, core, pc_changed, sv_changed,
454 exec_insn_valid_i, exec_insn_ready_o,
455 exec_pc_valid_o, exec_pc_ready_i):
456 """execute FSM
457
458 execute FSM. this interacts with the "issue" FSM
459 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
460 (outgoing). SVP64 RM prefixes have already been set up by the
461 "issue" phase, so execute is fairly straightforward.
462 """
463
464 comb = m.d.comb
465 sync = m.d.sync
466 pdecode2 = self.pdecode2
467
468 # temporaries
469 core_busy_o = core.busy_o # core is busy
470 core_ivalid_i = core.ivalid_i # instruction is valid
471 core_issue_i = core.issue_i # instruction is issued
472 insn_type = core.e.do.insn_type # instruction MicroOp type
473
474 with m.FSM(name="exec_fsm"):
475
476 # waiting for instruction bus (stays there until not busy)
477 with m.State("INSN_START"):
478 comb += exec_insn_ready_o.eq(1)
479 with m.If(exec_insn_valid_i):
480 comb += core_ivalid_i.eq(1) # instruction is valid
481 comb += core_issue_i.eq(1) # and issued
482 sync += sv_changed.eq(0)
483 sync += pc_changed.eq(0)
484 m.next = "INSN_ACTIVE" # move to "wait completion"
485
486 # instruction started: must wait till it finishes
487 with m.State("INSN_ACTIVE"):
488 with m.If(insn_type != MicrOp.OP_NOP):
489 comb += core_ivalid_i.eq(1) # instruction is valid
490 # note changes to PC and SVSTATE
491 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
492 sync += sv_changed.eq(1)
493 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
494 sync += pc_changed.eq(1)
495 with m.If(~core_busy_o): # instruction done!
496 comb += exec_pc_valid_o.eq(1)
497 with m.If(exec_pc_ready_i):
498 comb += self.insn_done.eq(1)
499 m.next = "INSN_START" # back to fetch
500
501 def elaborate(self, platform):
502 m = Module()
503 comb, sync = m.d.comb, m.d.sync
504
505 m.submodules.core = core = DomainRenamer("coresync")(self.core)
506 m.submodules.imem = imem = self.imem
507 m.submodules.dbg = dbg = self.dbg
508 if self.jtag_en:
509 m.submodules.jtag = jtag = self.jtag
510 # TODO: UART2GDB mux, here, from external pin
511 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
512 sync += dbg.dmi.connect_to(jtag.dmi)
513
514 cur_state = self.cur_state
515
516 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
517 if self.sram4x4k:
518 for i, sram in enumerate(self.sram4k):
519 m.submodules["sram4k_%d" % i] = sram
520 comb += sram.enable.eq(self.wb_sram_en)
521
522 # XICS interrupt handler
523 if self.xics:
524 m.submodules.xics_icp = icp = self.xics_icp
525 m.submodules.xics_ics = ics = self.xics_ics
526 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
527 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
528
529 # GPIO test peripheral
530 if self.gpio:
531 m.submodules.simple_gpio = simple_gpio = self.simple_gpio
532
533 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
534 # XXX causes litex ECP5 test to get wrong idea about input and output
535 # (but works with verilator sim *sigh*)
536 #if self.gpio and self.xics:
537 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
538
539 # instruction decoder
540 pdecode = create_pdecode()
541 m.submodules.dec2 = pdecode2 = self.pdecode2
542 if self.svp64_en:
543 m.submodules.svp64 = svp64 = self.svp64
544
545 # convenience
546 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
547 intrf = self.core.regs.rf['int']
548
549 # clock delay power-on reset
550 cd_por = ClockDomain(reset_less=True)
551 cd_sync = ClockDomain()
552 core_sync = ClockDomain("coresync")
553 m.domains += cd_por, cd_sync, core_sync
554
555 ti_rst = Signal(reset_less=True)
556 delay = Signal(range(4), reset=3)
557 with m.If(delay != 0):
558 m.d.por += delay.eq(delay - 1)
559 comb += cd_por.clk.eq(ClockSignal())
560
561 # power-on reset delay
562 core_rst = ResetSignal("coresync")
563 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
564 comb += core_rst.eq(ti_rst)
565
566 # busy/halted signals from core
567 comb += self.busy_o.eq(core.busy_o)
568 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
569
570 # temporary hack: says "go" immediately for both address gen and ST
571 l0 = core.l0
572 ldst = core.fus.fus['ldst0']
573 st_go_edge = rising_edge(m, ldst.st.rel_o)
574 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
575 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
576
577 # PC and instruction from I-Memory
578 comb += self.pc_o.eq(cur_state.pc)
579 pc_changed = Signal() # note write to PC
580 sv_changed = Signal() # note write to SVSTATE
581
582 # read the PC
583 pc = Signal(64, reset_less=True)
584 pc_ok_delay = Signal()
585 sync += pc_ok_delay.eq(~self.pc_i.ok)
586 with m.If(self.pc_i.ok):
587 # incoming override (start from pc_i)
588 comb += pc.eq(self.pc_i.data)
589 with m.Else():
590 # otherwise read StateRegs regfile for PC...
591 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
592 # ... but on a 1-clock delay
593 with m.If(pc_ok_delay):
594 comb += pc.eq(self.state_r_pc.data_o)
595
596 # read svstate
597 svstate = Signal(64, reset_less=True)
598 svstate_ok_delay = Signal()
599 sync += svstate_ok_delay.eq(~self.svstate_i.ok)
600 with m.If(self.svstate_i.ok):
601 # incoming override (start from svstate__i)
602 comb += svstate.eq(self.svstate_i.data)
603 with m.Else():
604 # otherwise read StateRegs regfile for SVSTATE...
605 comb += self.state_r_sv.ren.eq(1 << StateRegs.SVSTATE)
606 # ... but on a 1-clock delay
607 with m.If(svstate_ok_delay):
608 comb += svstate.eq(self.state_r_sv.data_o)
609
610 # don't write pc every cycle
611 comb += self.state_w_pc.wen.eq(0)
612 comb += self.state_w_pc.data_i.eq(0)
613
614 # don't read msr every cycle
615 comb += self.state_r_msr.ren.eq(0)
616
617 # address of the next instruction, in the absence of a branch
618 # depends on the instruction size
619 nia = Signal(64, reset_less=True)
620
621 # connect up debug signals
622 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
623 comb += dbg.terminate_i.eq(core.core_terminate_o)
624 comb += dbg.state.pc.eq(pc)
625 comb += dbg.state.svstate.eq(svstate)
626 comb += dbg.state.msr.eq(cur_state.msr)
627
628 # pass the prefix mode from Fetch to Issue, so the latter can loop
629 # on VL==0
630 is_svp64_mode = Signal()
631
632 # there are *THREE* FSMs, fetch (32/64-bit) issue, decode/execute.
633 # these are the handshake signals between fetch and decode/execute
634
635 # fetch FSM can run as soon as the PC is valid
636 fetch_pc_valid_i = Signal() # Execute tells Fetch "start next read"
637 fetch_pc_ready_o = Signal() # Fetch Tells SVSTATE "proceed"
638
639 # fetch FSM hands over the instruction to be decoded / issued
640 fetch_insn_valid_o = Signal()
641 fetch_insn_ready_i = Signal()
642
643 # issue FSM delivers the instruction to the be executed
644 exec_insn_valid_i = Signal()
645 exec_insn_ready_o = Signal()
646
647 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
648 exec_pc_valid_o = Signal()
649 exec_pc_ready_i = Signal()
650
651 # the FSMs here are perhaps unusual in that they detect conditions
652 # then "hold" information, combinatorially, for the core
653 # (as opposed to using sync - which would be on a clock's delay)
654 # this includes the actual opcode, valid flags and so on.
655
656 # Fetch, then Issue, then Execute. Issue is where the VL for-loop
657 # lives. the ready/valid signalling is used to communicate between
658 # the three.
659
660 self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
661 fetch_pc_ready_o, fetch_pc_valid_i,
662 fetch_insn_valid_o, fetch_insn_ready_i)
663
664 self.issue_fsm(m, core, pc_changed, sv_changed, nia,
665 dbg, core_rst, is_svp64_mode,
666 fetch_pc_ready_o, fetch_pc_valid_i,
667 fetch_insn_valid_o, fetch_insn_ready_i,
668 exec_insn_valid_i, exec_insn_ready_o,
669 exec_pc_valid_o, exec_pc_ready_i)
670
671 self.execute_fsm(m, core, pc_changed, sv_changed,
672 exec_insn_valid_i, exec_insn_ready_o,
673 exec_pc_valid_o, exec_pc_ready_i)
674
675 # this bit doesn't have to be in the FSM: connect up to read
676 # regfiles on demand from DMI
677 self.do_dmi(m, dbg)
678
679 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
680 # (which uses that in PowerDecoder2 to raise 0x900 exception)
681 self.tb_dec_fsm(m, cur_state.dec)
682
683 return m
684
685 def do_dmi(self, m, dbg):
686 comb = m.d.comb
687 sync = m.d.sync
688 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
689 intrf = self.core.regs.rf['int']
690
691 with m.If(d_reg.req): # request for regfile access being made
692 # TODO: error-check this
693 # XXX should this be combinatorial? sync better?
694 if intrf.unary:
695 comb += self.int_r.ren.eq(1<<d_reg.addr)
696 else:
697 comb += self.int_r.addr.eq(d_reg.addr)
698 comb += self.int_r.ren.eq(1)
699 d_reg_delay = Signal()
700 sync += d_reg_delay.eq(d_reg.req)
701 with m.If(d_reg_delay):
702 # data arrives one clock later
703 comb += d_reg.data.eq(self.int_r.data_o)
704 comb += d_reg.ack.eq(1)
705
706 # sigh same thing for CR debug
707 with m.If(d_cr.req): # request for regfile access being made
708 comb += self.cr_r.ren.eq(0b11111111) # enable all
709 d_cr_delay = Signal()
710 sync += d_cr_delay.eq(d_cr.req)
711 with m.If(d_cr_delay):
712 # data arrives one clock later
713 comb += d_cr.data.eq(self.cr_r.data_o)
714 comb += d_cr.ack.eq(1)
715
716 # aaand XER...
717 with m.If(d_xer.req): # request for regfile access being made
718 comb += self.xer_r.ren.eq(0b111111) # enable all
719 d_xer_delay = Signal()
720 sync += d_xer_delay.eq(d_xer.req)
721 with m.If(d_xer_delay):
722 # data arrives one clock later
723 comb += d_xer.data.eq(self.xer_r.data_o)
724 comb += d_xer.ack.eq(1)
725
726 def tb_dec_fsm(self, m, spr_dec):
727 """tb_dec_fsm
728
729 this is a FSM for updating either dec or tb. it runs alternately
730 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
731 value to DEC, however the regfile has "passthrough" on it so this
732 *should* be ok.
733
734 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
735 """
736
737 comb, sync = m.d.comb, m.d.sync
738 fast_rf = self.core.regs.rf['fast']
739 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
740 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
741
742 with m.FSM() as fsm:
743
744 # initiates read of current DEC
745 with m.State("DEC_READ"):
746 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
747 comb += fast_r_dectb.ren.eq(1)
748 m.next = "DEC_WRITE"
749
750 # waits for DEC read to arrive (1 cycle), updates with new value
751 with m.State("DEC_WRITE"):
752 new_dec = Signal(64)
753 # TODO: MSR.LPCR 32-bit decrement mode
754 comb += new_dec.eq(fast_r_dectb.data_o - 1)
755 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
756 comb += fast_w_dectb.wen.eq(1)
757 comb += fast_w_dectb.data_i.eq(new_dec)
758 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
759 m.next = "TB_READ"
760
761 # initiates read of current TB
762 with m.State("TB_READ"):
763 comb += fast_r_dectb.addr.eq(FastRegs.TB)
764 comb += fast_r_dectb.ren.eq(1)
765 m.next = "TB_WRITE"
766
767 # waits for read TB to arrive, initiates write of current TB
768 with m.State("TB_WRITE"):
769 new_tb = Signal(64)
770 comb += new_tb.eq(fast_r_dectb.data_o + 1)
771 comb += fast_w_dectb.addr.eq(FastRegs.TB)
772 comb += fast_w_dectb.wen.eq(1)
773 comb += fast_w_dectb.data_i.eq(new_tb)
774 m.next = "DEC_READ"
775
776 return m
777
778 def __iter__(self):
779 yield from self.pc_i.ports()
780 yield self.pc_o
781 yield self.memerr_o
782 yield from self.core.ports()
783 yield from self.imem.ports()
784 yield self.core_bigendian_i
785 yield self.busy_o
786
787 def ports(self):
788 return list(self)
789
790 def external_ports(self):
791 ports = self.pc_i.ports()
792 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
793 ]
794
795 if self.jtag_en:
796 ports += list(self.jtag.external_ports())
797 else:
798 # don't add DMI if JTAG is enabled
799 ports += list(self.dbg.dmi.ports())
800
801 ports += list(self.imem.ibus.fields.values())
802 ports += list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
803
804 if self.sram4x4k:
805 for sram in self.sram4k:
806 ports += list(sram.bus.fields.values())
807
808 if self.xics:
809 ports += list(self.xics_icp.bus.fields.values())
810 ports += list(self.xics_ics.bus.fields.values())
811 ports.append(self.int_level_i)
812
813 if self.gpio:
814 ports += list(self.simple_gpio.bus.fields.values())
815 ports.append(self.gpio_o)
816
817 return ports
818
819 def ports(self):
820 return list(self)
821
822
823 class TestIssuer(Elaboratable):
824 def __init__(self, pspec):
825 self.ti = TestIssuerInternal(pspec)
826
827 self.pll = DummyPLL()
828
829 # PLL direct clock or not
830 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
831 if self.pll_en:
832 self.pll_18_o = Signal(reset_less=True)
833
834 def elaborate(self, platform):
835 m = Module()
836 comb = m.d.comb
837
838 # TestIssuer runs at direct clock
839 m.submodules.ti = ti = self.ti
840 cd_int = ClockDomain("coresync")
841
842 if self.pll_en:
843 # ClockSelect runs at PLL output internal clock rate
844 m.submodules.pll = pll = self.pll
845
846 # add clock domains from PLL
847 cd_pll = ClockDomain("pllclk")
848 m.domains += cd_pll
849
850 # PLL clock established. has the side-effect of running clklsel
851 # at the PLL's speed (see DomainRenamer("pllclk") above)
852 pllclk = ClockSignal("pllclk")
853 comb += pllclk.eq(pll.clk_pll_o)
854
855 # wire up external 24mhz to PLL
856 comb += pll.clk_24_i.eq(ClockSignal())
857
858 # output 18 mhz PLL test signal
859 comb += self.pll_18_o.eq(pll.pll_18_o)
860
861 # now wire up ResetSignals. don't mind them being in this domain
862 pll_rst = ResetSignal("pllclk")
863 comb += pll_rst.eq(ResetSignal())
864
865 # internal clock is set to selector clock-out. has the side-effect of
866 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
867 intclk = ClockSignal("coresync")
868 if self.pll_en:
869 comb += intclk.eq(pll.clk_pll_o)
870 else:
871 comb += intclk.eq(ClockSignal())
872
873 return m
874
875 def ports(self):
876 return list(self.ti.ports()) + list(self.pll.ports()) + \
877 [ClockSignal(), ResetSignal()]
878
879 def external_ports(self):
880 ports = self.ti.external_ports()
881 ports.append(ClockSignal())
882 ports.append(ResetSignal())
883 if self.pll_en:
884 ports.append(self.pll.clk_sel_i)
885 ports.append(self.pll_18_o)
886 ports.append(self.pll.pll_lck_o)
887 return ports
888
889
890 if __name__ == '__main__':
891 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
892 'spr': 1,
893 'div': 1,
894 'mul': 1,
895 'shiftrot': 1
896 }
897 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
898 imem_ifacetype='bare_wb',
899 addr_wid=48,
900 mask_wid=8,
901 reg_wid=64,
902 units=units)
903 dut = TestIssuer(pspec)
904 vl = main(dut, ports=dut.ports(), name="test_issuer")
905
906 if len(sys.argv) == 1:
907 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
908 with open("test_issuer.il", "w") as f:
909 f.write(vl)