eec9bc5007ed5fa180503b4f9e880bf3f90723fd
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import Data
27 from soc.experiment.testmem import TestMemory # test only for instructions
28 from soc.regfile.regfiles import StateRegs, FastRegs
29 from soc.simple.core import NonProductionCore
30 from soc.config.test.test_loadstore import TestMemPspec
31 from soc.config.ifetch import ConfigFetchUnit
32 from soc.decoder.power_enums import MicrOp
33 from soc.debug.dmi import CoreDebug, DMIInterface
34 from soc.config.state import CoreState
35 from soc.interrupts.xics import XICS_ICP, XICS_ICS
36 from soc.bus.simple_gpio import SimpleGPIO
37
38 from nmutil.util import rising_edge
39
40
41 class TestIssuer(Elaboratable):
42 """TestIssuer - reads instructions from TestMemory and issues them
43
44 efficiency and speed is not the main goal here: functional correctness is.
45 """
46 def __init__(self, pspec):
47
48 # add interrupt controller?
49 self.xics = hasattr(pspec, "xics") and pspec.xics == True
50 if self.xics:
51 self.xics_icp = XICS_ICP()
52 self.xics_ics = XICS_ICS()
53 self.int_level_i = self.xics_ics.int_level_i
54
55 # add GPIO peripheral?
56 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
57 if self.gpio:
58 self.simple_gpio = SimpleGPIO()
59 self.gpio_o = self.simple_gpio.gpio_o
60
61 # main instruction core
62 self.core = core = NonProductionCore(pspec)
63
64 # instruction decoder
65 pdecode = create_pdecode()
66 self.cur_state = CoreState("cur") # current state (MSR/PC/EINT)
67 self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state)
68
69 # Test Instruction memory
70 self.imem = ConfigFetchUnit(pspec).fu
71 # one-row cache of instruction read
72 self.iline = Signal(64) # one instruction line
73 self.iprev_adr = Signal(64) # previous address: if different, do read
74
75 # DMI interface
76 self.dbg = CoreDebug()
77
78 # instruction go/monitor
79 self.pc_o = Signal(64, reset_less=True)
80 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
81 self.core_bigendian_i = Signal()
82 self.busy_o = Signal(reset_less=True)
83 self.memerr_o = Signal(reset_less=True)
84
85 # FAST regfile read /write ports for PC, MSR, DEC/TB
86 staterf = self.core.regs.rf['state']
87 self.state_r_pc = staterf.r_ports['cia'] # PC rd
88 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
89 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
90
91 # DMI interface access
92 intrf = self.core.regs.rf['int']
93 crrf = self.core.regs.rf['cr']
94 xerrf = self.core.regs.rf['xer']
95 self.int_r = intrf.r_ports['dmi'] # INT read
96 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
97 self.xer_r = xerrf.r_ports['full_xer'] # XER read
98
99 # hack method of keeping an eye on whether branch/trap set the PC
100 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
101 self.state_nia.wen.name = 'state_nia_wen'
102
103 def elaborate(self, platform):
104 m = Module()
105 comb, sync = m.d.comb, m.d.sync
106
107 m.submodules.core = core = DomainRenamer("coresync")(self.core)
108 m.submodules.imem = imem = self.imem
109 m.submodules.dbg = dbg = self.dbg
110
111 cur_state = self.cur_state
112
113 # XICS interrupt handler
114 if self.xics:
115 m.submodules.xics_icp = icp = self.xics_icp
116 m.submodules.xics_ics = ics = self.xics_ics
117 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
118 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
119
120 # GPIO test peripheral
121 if self.gpio:
122 m.submodules.simple_gpio = simple_gpio = self.simple_gpio
123
124 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
125 if self.gpio and self.xics:
126 comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
127
128 # instruction decoder
129 pdecode = create_pdecode()
130 m.submodules.dec2 = pdecode2 = self.pdecode2
131
132 # convenience
133 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
134 intrf = self.core.regs.rf['int']
135
136 # clock delay power-on reset
137 cd_por = ClockDomain(reset_less=True)
138 cd_sync = ClockDomain()
139 core_sync = ClockDomain("coresync")
140 m.domains += cd_por, cd_sync, core_sync
141
142 delay = Signal(range(4), reset=3)
143 with m.If(delay != 0):
144 m.d.por += delay.eq(delay - 1)
145 comb += cd_por.clk.eq(ClockSignal())
146 comb += core_sync.clk.eq(ClockSignal())
147 # power-on reset delay
148 comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
149
150 # busy/halted signals from core
151 comb += self.busy_o.eq(core.busy_o)
152 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
153
154 # temporary hack: says "go" immediately for both address gen and ST
155 l0 = core.l0
156 ldst = core.fus.fus['ldst0']
157 st_go_edge = rising_edge(m, ldst.st.rel_o)
158 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
159 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
160
161 # PC and instruction from I-Memory
162 pc_changed = Signal() # note write to PC
163 comb += self.pc_o.eq(cur_state.pc)
164 ilatch = Signal(32)
165
166 # next instruction (+4 on current)
167 nia = Signal(64, reset_less=True)
168 comb += nia.eq(cur_state.pc + 4)
169
170 # read the PC
171 pc = Signal(64, reset_less=True)
172 pc_ok_delay = Signal()
173 sync += pc_ok_delay.eq(~self.pc_i.ok)
174 with m.If(self.pc_i.ok):
175 # incoming override (start from pc_i)
176 comb += pc.eq(self.pc_i.data)
177 with m.Else():
178 # otherwise read StateRegs regfile for PC...
179 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
180 # ... but on a 1-clock delay
181 with m.If(pc_ok_delay):
182 comb += pc.eq(self.state_r_pc.data_o)
183
184 # don't write pc every cycle
185 comb += self.state_w_pc.wen.eq(0)
186 comb += self.state_w_pc.data_i.eq(0)
187
188 # don't read msr every cycle
189 comb += self.state_r_msr.ren.eq(0)
190 msr_read = Signal(reset=1)
191
192 # connect up debug signals
193 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
194 comb += dbg.terminate_i.eq(core.core_terminate_o)
195 comb += dbg.state.pc.eq(pc)
196 #comb += dbg.state.pc.eq(cur_state.pc)
197 comb += dbg.state.msr.eq(cur_state.msr)
198
199 # temporaries
200 core_busy_o = core.busy_o # core is busy
201 core_ivalid_i = core.ivalid_i # instruction is valid
202 core_issue_i = core.issue_i # instruction is issued
203 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
204
205 insn_type = core.e.do.insn_type
206
207 # actually use a nmigen FSM for the first time (w00t)
208 # this FSM is perhaps unusual in that it detects conditions
209 # then "holds" information, combinatorially, for the core
210 # (as opposed to using sync - which would be on a clock's delay)
211 # this includes the actual opcode, valid flags and so on.
212 with m.FSM() as fsm:
213
214 # waiting (zzz)
215 with m.State("IDLE"):
216 sync += pc_changed.eq(0)
217 sync += core.e.eq(0)
218 sync += core.raw_insn_i.eq(0)
219 sync += core.bigendian_i.eq(0)
220 with m.If(~dbg.core_stop_o & ~core.core_reset_i):
221 # instruction allowed to go: start by reading the PC
222 # capture the PC and also drop it into Insn Memory
223 # we have joined a pair of combinatorial memory
224 # lookups together. this is Generally Bad.
225 comb += self.imem.a_pc_i.eq(pc)
226 comb += self.imem.a_valid_i.eq(1)
227 comb += self.imem.f_valid_i.eq(1)
228 sync += cur_state.pc.eq(pc)
229
230 # initiate read of MSR. arrives one clock later
231 comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
232 sync += msr_read.eq(0)
233
234 m.next = "INSN_READ" # move to "wait for bus" phase
235 with m.Else():
236 comb += core.core_stopped_i.eq(1)
237 comb += dbg.core_stopped_i.eq(1)
238
239 # dummy pause to find out why simulation is not keeping up
240 with m.State("INSN_READ"):
241 # one cycle later, msr read arrives. valid only once.
242 with m.If(~msr_read):
243 sync += msr_read.eq(1) # yeah don't read it again
244 sync += cur_state.msr.eq(self.state_r_msr.data_o)
245 with m.If(self.imem.f_busy_o): # zzz...
246 # busy: stay in wait-read
247 comb += self.imem.a_valid_i.eq(1)
248 comb += self.imem.f_valid_i.eq(1)
249 with m.Else():
250 # not busy: instruction fetched
251 f_instr_o = self.imem.f_instr_o
252 if f_instr_o.width == 32:
253 insn = f_instr_o
254 else:
255 insn = f_instr_o.word_select(cur_state.pc[2], 32)
256 comb += dec_opcode_i.eq(insn) # actual opcode
257 sync += core.e.eq(pdecode2.e)
258 sync += core.state.eq(cur_state)
259 sync += core.raw_insn_i.eq(dec_opcode_i)
260 sync += core.bigendian_i.eq(self.core_bigendian_i)
261 sync += ilatch.eq(insn) # latch current insn
262 # also drop PC and MSR into decode "state"
263 m.next = "INSN_START" # move to "start"
264
265 # waiting for instruction bus (stays there until not busy)
266 with m.State("INSN_START"):
267 comb += core_ivalid_i.eq(1) # instruction is valid
268 comb += core_issue_i.eq(1) # and issued
269
270 m.next = "INSN_ACTIVE" # move to "wait completion"
271
272 # instruction started: must wait till it finishes
273 with m.State("INSN_ACTIVE"):
274 with m.If(insn_type != MicrOp.OP_NOP):
275 comb += core_ivalid_i.eq(1) # instruction is valid
276 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
277 sync += pc_changed.eq(1)
278 with m.If(~core_busy_o): # instruction done!
279 # ok here we are not reading the branch unit. TODO
280 # this just blithely overwrites whatever pipeline
281 # updated the PC
282 with m.If(~pc_changed):
283 comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
284 comb += self.state_w_pc.data_i.eq(nia)
285 sync += core.e.eq(0)
286 sync += core.raw_insn_i.eq(0)
287 sync += core.bigendian_i.eq(0)
288 m.next = "IDLE" # back to idle
289
290 # this bit doesn't have to be in the FSM: connect up to read
291 # regfiles on demand from DMI
292 with m.If(d_reg.req): # request for regfile access being made
293 # TODO: error-check this
294 # XXX should this be combinatorial? sync better?
295 if intrf.unary:
296 comb += self.int_r.ren.eq(1<<d_reg.addr)
297 else:
298 comb += self.int_r.addr.eq(d_reg.addr)
299 comb += self.int_r.ren.eq(1)
300 d_reg_delay = Signal()
301 sync += d_reg_delay.eq(d_reg.req)
302 with m.If(d_reg_delay):
303 # data arrives one clock later
304 comb += d_reg.data.eq(self.int_r.data_o)
305 comb += d_reg.ack.eq(1)
306
307 # sigh same thing for CR debug
308 with m.If(d_cr.req): # request for regfile access being made
309 comb += self.cr_r.ren.eq(0b11111111) # enable all
310 d_cr_delay = Signal()
311 sync += d_cr_delay.eq(d_cr.req)
312 with m.If(d_cr_delay):
313 # data arrives one clock later
314 comb += d_cr.data.eq(self.cr_r.data_o)
315 comb += d_cr.ack.eq(1)
316
317 # aaand XER...
318 with m.If(d_xer.req): # request for regfile access being made
319 comb += self.xer_r.ren.eq(0b111111) # enable all
320 d_xer_delay = Signal()
321 sync += d_xer_delay.eq(d_xer.req)
322 with m.If(d_xer_delay):
323 # data arrives one clock later
324 comb += d_xer.data.eq(self.xer_r.data_o)
325 comb += d_xer.ack.eq(1)
326
327 # DEC and TB inc/dec FSM
328 self.tb_dec_fsm(m, cur_state.dec)
329
330 return m
331
332 def tb_dec_fsm(self, m, spr_dec):
333 """tb_dec_fsm
334
335 this is a FSM for updating either dec or tb. it runs alternately
336 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
337 value to DEC, however the regfile has "passthrough" on it so this
338 *should* be ok.
339
340 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
341 """
342
343 comb, sync = m.d.comb, m.d.sync
344 fast_rf = self.core.regs.rf['fast']
345 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
346 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
347
348 with m.FSM() as fsm:
349
350 # initiates read of current DEC
351 with m.State("DEC_READ"):
352 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
353 comb += fast_r_dectb.ren.eq(1)
354 m.next = "DEC_WRITE"
355
356 # waits for DEC read to arrive (1 cycle), updates with new value
357 with m.State("DEC_WRITE"):
358 new_dec = Signal(64)
359 # TODO: MSR.LPCR 32-bit decrement mode
360 comb += new_dec.eq(fast_r_dectb.data_o - 1)
361 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
362 comb += fast_w_dectb.wen.eq(1)
363 comb += fast_w_dectb.data_i.eq(new_dec)
364 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
365 m.next = "TB_READ"
366
367 # initiates read of current TB
368 with m.State("TB_READ"):
369 comb += fast_r_dectb.addr.eq(FastRegs.TB)
370 comb += fast_r_dectb.ren.eq(1)
371 m.next = "TB_WRITE"
372
373 # waits for read TB to arrive, initiates write of current TB
374 with m.State("TB_WRITE"):
375 new_tb = Signal(64)
376 comb += new_tb.eq(fast_r_dectb.data_o + 1)
377 comb += fast_w_dectb.addr.eq(FastRegs.TB)
378 comb += fast_w_dectb.wen.eq(1)
379 comb += fast_w_dectb.data_i.eq(new_tb)
380 m.next = "DEC_READ"
381
382 return m
383
384 def __iter__(self):
385 yield from self.pc_i.ports()
386 yield self.pc_o
387 yield self.memerr_o
388 yield from self.core.ports()
389 yield from self.imem.ports()
390 yield self.core_bigendian_i
391 yield self.busy_o
392
393 def ports(self):
394 return list(self)
395
396 def external_ports(self):
397 ports = self.pc_i.ports()
398 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
399 ClockSignal(), ResetSignal(),
400 ]
401 ports += list(self.dbg.dmi.ports())
402 ports += list(self.imem.ibus.fields.values())
403 ports += list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
404
405 if self.xics:
406 ports += list(self.xics_icp.bus.fields.values())
407 ports += list(self.xics_ics.bus.fields.values())
408 ports.append(self.int_level_i)
409
410 if self.gpio:
411 ports += list(self.simple_gpio.bus.fields.values())
412 ports.append(self.gpio_o)
413
414 return ports
415
416 def ports(self):
417 return list(self)
418
419
420 if __name__ == '__main__':
421 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
422 'spr': 1,
423 'div': 1,
424 'mul': 1,
425 'shiftrot': 1
426 }
427 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
428 imem_ifacetype='bare_wb',
429 addr_wid=48,
430 mask_wid=8,
431 reg_wid=64,
432 units=units)
433 dut = TestIssuer(pspec)
434 vl = main(dut, ports=dut.ports(), name="test_issuer")
435
436 if len(sys.argv) == 1:
437 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
438 with open("test_issuer.il", "w") as f:
439 f.write(vl)