add an explicit PowerDecoder.is_svp64_mode flag to help with detection
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from openpower.decoder.power_decoder2 import PowerDecodeSubset
26 from openpower.decoder.power_regspec_map import regspec_decode_read
27 from openpower.decoder.power_regspec_map import regspec_decode_write
28 from openpower.sv.svp64 import SVP64Rec
29
30 from nmutil.picker import PriorityPicker
31 from nmutil.util import treereduce
32
33 from soc.fu.compunits.compunits import AllFunctionUnits
34 from soc.regfile.regfiles import RegFiles
35 from openpower.decoder.decode2execute1 import Decode2ToExecute1Type
36 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
37 from openpower.decoder.power_decoder2 import get_rdflags
38 from openpower.decoder.decode2execute1 import Data
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp
42 from soc.config.state import CoreState
43
44 import operator
45
46 from nmutil.util import rising_edge
47
48
49 # helper function for reducing a list of signals down to a parallel
50 # ORed single signal.
51 def ortreereduce(tree, attr="data_o"):
52 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
53
54
55 def ortreereduce_sig(tree):
56 return treereduce(tree, operator.or_, lambda x: x)
57
58
59 # helper function to place full regs declarations first
60 def sort_fuspecs(fuspecs):
61 res = []
62 for (regname, fspec) in fuspecs.items():
63 if regname.startswith("full"):
64 res.append((regname, fspec))
65 for (regname, fspec) in fuspecs.items():
66 if not regname.startswith("full"):
67 res.append((regname, fspec))
68 return res # enumerate(res)
69
70
71 class NonProductionCore(Elaboratable):
72 def __init__(self, pspec):
73 self.pspec = pspec
74
75 # test is SVP64 is to be enabled
76 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
77
78 # test to see if regfile ports should be reduced
79 self.regreduce_en = (hasattr(pspec, "regreduce") and
80 (pspec.regreduce == True))
81
82 # single LD/ST funnel for memory access
83 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
84 pi = l0.l0.dports[0]
85
86 # function units (only one each)
87 # only include mmu if enabled in pspec
88 self.fus = AllFunctionUnits(pspec, pilist=[pi])
89
90 # link LoadStore1 into MMU
91 mmu = self.fus.get_fu('mmu0')
92 print ("core pspec", pspec.ldst_ifacetype)
93 print ("core mmu", mmu)
94 print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
95 if mmu is not None:
96 mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
97
98 # register files (yes plural)
99 self.regs = RegFiles(pspec)
100
101 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
102 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand,
103 regreduce_en=self.regreduce_en)
104
105 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
106 self.sv_a_nz = Signal()
107
108 # state and raw instruction (and SVP64 ReMap fields)
109 self.state = CoreState("core")
110 self.raw_insn_i = Signal(32) # raw instruction
111 self.bigendian_i = Signal() # bigendian - TODO, set by MSR.BE
112 if self.svp64_en:
113 self.sv_rm = SVP64Rec(name="core_svp64_rm") # SVP64 RM field
114 self.is_svp64_mode = Signal() # set if SVP64 mode is enabled
115 self.sv_pred_sm = Signal() # TODO: SIMD width
116 self.sv_pred_dm = Signal() # TODO: SIMD width
117
118 # issue/valid/busy signalling
119 self.ivalid_i = Signal(reset_less=True) # instruction is valid
120 self.issue_i = Signal(reset_less=True)
121 self.busy_o = Signal(name="corebusy_o", reset_less=True)
122
123 # start/stop and terminated signalling
124 self.core_terminate_o = Signal(reset=0) # indicates stopped
125
126 # create per-FU instruction decoders (subsetted)
127 self.decoders = {}
128 self.des = {}
129
130 for funame, fu in self.fus.fus.items():
131 f_name = fu.fnunit.name
132 fnunit = fu.fnunit.value
133 opkls = fu.opsubsetkls
134 if f_name == 'TRAP':
135 # TRAP decoder is the *main* decoder
136 self.trapunit = funame
137 continue
138 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
139 final=True,
140 state=self.state,
141 svp64_en=self.svp64_en,
142 regreduce_en=self.regreduce_en)
143 self.des[funame] = self.decoders[funame].do
144
145 if "mmu0" in self.decoders:
146 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
147
148 def elaborate(self, platform):
149 m = Module()
150 # for testing purposes, to cut down on build time in coriolis2
151 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
152 x = Signal() # dummy signal
153 m.d.sync += x.eq(~x)
154 return m
155 comb = m.d.comb
156
157 m.submodules.fus = self.fus
158 m.submodules.l0 = l0 = self.l0
159 self.regs.elaborate_into(m, platform)
160 regs = self.regs
161 fus = self.fus.fus
162
163 # connect decoders
164 for k, v in self.decoders.items():
165 setattr(m.submodules, "dec_%s" % v.fn_name, v)
166 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
167 comb += v.dec.bigendian.eq(self.bigendian_i)
168 # sigh due to SVP64 RA_OR_ZERO detection connect these too
169 comb += v.sv_a_nz.eq(self.sv_a_nz)
170 if self.svp64_en:
171 comb += v.pred_sm.eq(self.sv_pred_sm)
172 comb += v.pred_dm.eq(self.sv_pred_dm)
173 if k != self.trapunit:
174 comb += v.sv_rm.eq(self.sv_rm) # pass through SVP64 ReMap
175 comb += v.is_svp64_mode.eq(self.is_svp64_mode)
176
177 # ssh, cheat: trap uses the main decoder because of the rewriting
178 self.des[self.trapunit] = self.e.do
179
180 # connect up Function Units, then read/write ports
181 fu_bitdict = self.connect_instruction(m)
182 self.connect_rdports(m, fu_bitdict)
183 self.connect_wrports(m, fu_bitdict)
184
185 return m
186
187 def connect_instruction(self, m):
188 """connect_instruction
189
190 uses decoded (from PowerOp) function unit information from CSV files
191 to ascertain which Function Unit should deal with the current
192 instruction.
193
194 some (such as OP_ATTN, OP_NOP) are dealt with here, including
195 ignoring it and halting the processor. OP_NOP is a bit annoying
196 because the issuer expects busy flag still to be raised then lowered.
197 (this requires a fake counter to be set).
198 """
199 comb, sync = m.d.comb, m.d.sync
200 fus = self.fus.fus
201
202 # enable-signals for each FU, get one bit for each FU (by name)
203 fu_enable = Signal(len(fus), reset_less=True)
204 fu_bitdict = {}
205 for i, funame in enumerate(fus.keys()):
206 fu_bitdict[funame] = fu_enable[i]
207
208 # enable the required Function Unit based on the opcode decode
209 # note: this *only* works correctly for simple core when one and
210 # *only* one FU is allocated per instruction
211 for funame, fu in fus.items():
212 fnunit = fu.fnunit.value
213 enable = Signal(name="en_%s" % funame, reset_less=True)
214 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
215 comb += fu_bitdict[funame].eq(enable)
216
217 # sigh - need a NOP counter
218 counter = Signal(2)
219 with m.If(counter != 0):
220 sync += counter.eq(counter - 1)
221 comb += self.busy_o.eq(1)
222
223 with m.If(self.ivalid_i): # run only when valid
224 with m.Switch(self.e.do.insn_type):
225 # check for ATTN: halt if true
226 with m.Case(MicrOp.OP_ATTN):
227 m.d.sync += self.core_terminate_o.eq(1)
228
229 with m.Case(MicrOp.OP_NOP):
230 sync += counter.eq(2)
231 comb += self.busy_o.eq(1)
232
233 with m.Default():
234 # connect up instructions. only one enabled at a time
235 for funame, fu in fus.items():
236 do = self.des[funame]
237 enable = fu_bitdict[funame]
238
239 # run this FunctionUnit if enabled
240 # route op, issue, busy, read flags and mask to FU
241 with m.If(enable):
242 # operand comes from the *local* decoder
243 comb += fu.oper_i.eq_from(do)
244 #comb += fu.oper_i.eq_from_execute1(e)
245 comb += fu.issue_i.eq(self.issue_i)
246 comb += self.busy_o.eq(fu.busy_o)
247 # rdmask, which is for registers, needs to come
248 # from the *main* decoder
249 rdmask = get_rdflags(self.e, fu)
250 comb += fu.rdmaskn.eq(~rdmask)
251
252 return fu_bitdict
253
254 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
255 comb, sync = m.d.comb, m.d.sync
256 fus = self.fus.fus
257 regs = self.regs
258
259 rpidx = regname
260
261 # select the required read port. these are pre-defined sizes
262 rfile = regs.rf[regfile.lower()]
263 rport = rfile.r_ports[rpidx]
264 print("read regfile", rpidx, regfile, regs.rf.keys(),
265 rfile, rfile.unary)
266
267 fspecs = fspec
268 if not isinstance(fspecs, list):
269 fspecs = [fspecs]
270
271 rdflags = []
272 pplen = 0
273 reads = []
274 ppoffs = []
275 for i, fspec in enumerate(fspecs):
276 # get the regfile specs for this regfile port
277 (rf, read, write, wid, fuspec) = fspec
278 print ("fpsec", i, fspec, len(fuspec))
279 ppoffs.append(pplen) # record offset for picker
280 pplen += len(fuspec)
281 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
282 rdflag = Signal(name=name, reset_less=True)
283 comb += rdflag.eq(rf)
284 rdflags.append(rdflag)
285 reads.append(read)
286
287 print ("pplen", pplen)
288
289 # create a priority picker to manage this port
290 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
291 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
292
293 rens = []
294 addrs = []
295 for i, fspec in enumerate(fspecs):
296 (rf, read, write, wid, fuspec) = fspec
297 # connect up the FU req/go signals, and the reg-read to the FU
298 # and create a Read Broadcast Bus
299 for pi, (funame, fu, idx) in enumerate(fuspec):
300 pi += ppoffs[i]
301
302 # connect request-read to picker input, and output to go-rd
303 fu_active = fu_bitdict[funame]
304 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
305 addr_en = Signal.like(reads[i], name="addr_en_"+name)
306 pick = Signal(name="pick_"+name) # picker input
307 rp = Signal(name="rp_"+name) # picker output
308 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
309
310 # exclude any currently-enabled read-request (mask out active)
311 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
312 ~delay_pick)
313 comb += rdpick.i[pi].eq(pick)
314 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
315
316 # if picked, select read-port "reg select" number to port
317 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
318 sync += delay_pick.eq(rp) # delayed "pick"
319 comb += addr_en.eq(Mux(rp, reads[i], 0))
320
321 # the read-enable happens combinatorially (see mux-bus below)
322 # but it results in the data coming out on a one-cycle delay.
323 if rfile.unary:
324 rens.append(addr_en)
325 else:
326 addrs.append(addr_en)
327 rens.append(rp)
328
329 # use the *delayed* pick signal to put requested data onto bus
330 with m.If(delay_pick):
331 # connect regfile port to input, creating fan-out Bus
332 src = fu.src_i[idx]
333 print("reg connect widths",
334 regfile, regname, pi, funame,
335 src.shape(), rport.data_o.shape())
336 # all FUs connect to same port
337 comb += src.eq(rport.data_o)
338
339 # or-reduce the muxed read signals
340 if rfile.unary:
341 # for unary-addressed
342 comb += rport.ren.eq(ortreereduce_sig(rens))
343 else:
344 # for binary-addressed
345 comb += rport.addr.eq(ortreereduce_sig(addrs))
346 comb += rport.ren.eq(Cat(*rens).bool())
347 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
348
349 def connect_rdports(self, m, fu_bitdict):
350 """connect read ports
351
352 orders the read regspecs into a dict-of-dicts, by regfile, by
353 regport name, then connects all FUs that want that regport by
354 way of a PriorityPicker.
355 """
356 comb, sync = m.d.comb, m.d.sync
357 fus = self.fus.fus
358 regs = self.regs
359
360 # dictionary of lists of regfile read ports
361 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
362
363 # okaay, now we need a PriorityPicker per regfile per regfile port
364 # loootta pickers... peter piper picked a pack of pickled peppers...
365 rdpickers = {}
366 for regfile, spec in byregfiles_rd.items():
367 fuspecs = byregfiles_rdspec[regfile]
368 rdpickers[regfile] = {}
369
370 # argh. an experiment to merge RA and RB in the INT regfile
371 # (we have too many read/write ports)
372 if self.regreduce_en:
373 if regfile == 'INT':
374 fuspecs['rabc'] = [fuspecs.pop('rb')]
375 fuspecs['rabc'].append(fuspecs.pop('rc'))
376 fuspecs['rabc'].append(fuspecs.pop('ra'))
377 if regfile == 'FAST':
378 fuspecs['fast1'] = [fuspecs.pop('fast1')]
379 if 'fast2' in fuspecs:
380 fuspecs['fast1'].append(fuspecs.pop('fast2'))
381 if 'fast3' in fuspecs:
382 fuspecs['fast1'].append(fuspecs.pop('fast3'))
383
384 # for each named regfile port, connect up all FUs to that port
385 for (regname, fspec) in sort_fuspecs(fuspecs):
386 print("connect rd", regname, fspec)
387 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
388 regname, fspec)
389
390 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
391 comb, sync = m.d.comb, m.d.sync
392 fus = self.fus.fus
393 regs = self.regs
394
395 print("connect wr", regname, fspec)
396 rpidx = regname
397
398 # select the required write port. these are pre-defined sizes
399 print(regfile, regs.rf.keys())
400 rfile = regs.rf[regfile.lower()]
401 wport = rfile.w_ports[rpidx]
402
403 fspecs = fspec
404 if not isinstance(fspecs, list):
405 fspecs = [fspecs]
406
407 pplen = 0
408 writes = []
409 ppoffs = []
410 for i, fspec in enumerate(fspecs):
411 # get the regfile specs for this regfile port
412 (rf, read, write, wid, fuspec) = fspec
413 print ("fpsec", i, fspec, len(fuspec))
414 ppoffs.append(pplen) # record offset for picker
415 pplen += len(fuspec)
416
417 # create a priority picker to manage this port
418 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
419 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
420
421 wsigs = []
422 wens = []
423 addrs = []
424 for i, fspec in enumerate(fspecs):
425 # connect up the FU req/go signals and the reg-read to the FU
426 # these are arbitrated by Data.ok signals
427 (rf, read, write, wid, fuspec) = fspec
428 for pi, (funame, fu, idx) in enumerate(fuspec):
429 pi += ppoffs[i]
430
431 # write-request comes from dest.ok
432 dest = fu.get_out(idx)
433 fu_dest_latch = fu.get_fu_out(idx) # latched output
434 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
435 wrflag = Signal(name=name, reset_less=True)
436 comb += wrflag.eq(dest.ok & fu.busy_o)
437
438 # connect request-write to picker input, and output to go-wr
439 fu_active = fu_bitdict[funame]
440 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
441 comb += wrpick.i[pi].eq(pick)
442 # create a single-pulse go write from the picker output
443 wr_pick = Signal()
444 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
445 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
446
447 # connect the regspec write "reg select" number to this port
448 # only if one FU actually requests (and is granted) the port
449 # will the write-enable be activated
450 addr_en = Signal.like(write)
451 wp = Signal()
452 comb += wp.eq(wr_pick & wrpick.en_o)
453 comb += addr_en.eq(Mux(wp, write, 0))
454 if rfile.unary:
455 wens.append(addr_en)
456 else:
457 addrs.append(addr_en)
458 wens.append(wp)
459
460 # connect regfile port to input
461 print("reg connect widths",
462 regfile, regname, pi, funame,
463 dest.shape(), wport.data_i.shape())
464 wsigs.append(fu_dest_latch)
465
466 # here is where we create the Write Broadcast Bus. simple, eh?
467 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
468 if rfile.unary:
469 # for unary-addressed
470 comb += wport.wen.eq(ortreereduce_sig(wens))
471 else:
472 # for binary-addressed
473 comb += wport.addr.eq(ortreereduce_sig(addrs))
474 comb += wport.wen.eq(ortreereduce_sig(wens))
475
476 def connect_wrports(self, m, fu_bitdict):
477 """connect write ports
478
479 orders the write regspecs into a dict-of-dicts, by regfile,
480 by regport name, then connects all FUs that want that regport
481 by way of a PriorityPicker.
482
483 note that the write-port wen, write-port data, and go_wr_i all need to
484 be on the exact same clock cycle. as there is a combinatorial loop bug
485 at the moment, these all use sync.
486 """
487 comb, sync = m.d.comb, m.d.sync
488 fus = self.fus.fus
489 regs = self.regs
490 # dictionary of lists of regfile write ports
491 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
492
493 # same for write ports.
494 # BLECH! complex code-duplication! BLECH!
495 wrpickers = {}
496 for regfile, spec in byregfiles_wr.items():
497 fuspecs = byregfiles_wrspec[regfile]
498 wrpickers[regfile] = {}
499
500 if self.regreduce_en:
501 # argh, more port-merging
502 if regfile == 'INT':
503 fuspecs['o'] = [fuspecs.pop('o')]
504 fuspecs['o'].append(fuspecs.pop('o1'))
505 if regfile == 'FAST':
506 fuspecs['fast1'] = [fuspecs.pop('fast1')]
507 if 'fast2' in fuspecs:
508 fuspecs['fast1'].append(fuspecs.pop('fast2'))
509 if 'fast3' in fuspecs:
510 fuspecs['fast1'].append(fuspecs.pop('fast3'))
511
512 for (regname, fspec) in sort_fuspecs(fuspecs):
513 self.connect_wrport(m, fu_bitdict, wrpickers,
514 regfile, regname, fspec)
515
516 def get_byregfiles(self, readmode):
517
518 mode = "read" if readmode else "write"
519 regs = self.regs
520 fus = self.fus.fus
521 e = self.e # decoded instruction to execute
522
523 # dictionary of lists of regfile ports
524 byregfiles = {}
525 byregfiles_spec = {}
526 for (funame, fu) in fus.items():
527 print("%s ports for %s" % (mode, funame))
528 for idx in range(fu.n_src if readmode else fu.n_dst):
529 if readmode:
530 (regfile, regname, wid) = fu.get_in_spec(idx)
531 else:
532 (regfile, regname, wid) = fu.get_out_spec(idx)
533 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
534 if readmode:
535 rdflag, read = regspec_decode_read(e, regfile, regname)
536 write = None
537 else:
538 rdflag, read = None, None
539 wrport, write = regspec_decode_write(e, regfile, regname)
540 if regfile not in byregfiles:
541 byregfiles[regfile] = {}
542 byregfiles_spec[regfile] = {}
543 if regname not in byregfiles_spec[regfile]:
544 byregfiles_spec[regfile][regname] = \
545 (rdflag, read, write, wid, [])
546 # here we start to create "lanes"
547 if idx not in byregfiles[regfile]:
548 byregfiles[regfile][idx] = []
549 fuspec = (funame, fu, idx)
550 byregfiles[regfile][idx].append(fuspec)
551 byregfiles_spec[regfile][regname][4].append(fuspec)
552
553 # ok just print that out, for convenience
554 for regfile, spec in byregfiles.items():
555 print("regfile %s ports:" % mode, regfile)
556 fuspecs = byregfiles_spec[regfile]
557 for regname, fspec in fuspecs.items():
558 [rdflag, read, write, wid, fuspec] = fspec
559 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
560 print(" %s" % regname, wid, read, write, rdflag)
561 for (funame, fu, idx) in fuspec:
562 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
563 print(" ", funame, fu, idx, fusig)
564 print()
565
566 return byregfiles, byregfiles_spec
567
568 def __iter__(self):
569 yield from self.fus.ports()
570 yield from self.e.ports()
571 yield from self.l0.ports()
572 # TODO: regs
573
574 def ports(self):
575 return list(self)
576
577
578 if __name__ == '__main__':
579 pspec = TestMemPspec(ldst_ifacetype='testpi',
580 imem_ifacetype='',
581 addr_wid=48,
582 mask_wid=8,
583 reg_wid=64)
584 dut = NonProductionCore(pspec)
585 vl = rtlil.convert(dut, ports=dut.ports())
586 with open("test_core.il", "w") as f:
587 f.write(vl)