add linux-5.7 unit test which showed a silly error:
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 (update: actually this is being added now:
21 https://bugs.libre-soc.org/show_bug.cgi?id=737)
22 """
23
24 from nmigen import (Elaboratable, Module, Signal, ResetSignal, Cat, Mux,
25 Const)
26 from nmigen.cli import rtlil
27
28 from openpower.decoder.power_decoder2 import PowerDecodeSubset
29 from openpower.decoder.power_regspec_map import regspec_decode
30 from openpower.sv.svp64 import SVP64Rec
31
32 from nmutil.picker import PriorityPicker
33 from nmutil.util import treereduce
34 from nmutil.singlepipe import ControlBase
35
36 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
37 from soc.regfile.regfiles import RegFiles
38 from openpower.decoder.power_decoder2 import get_rdflags
39 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
40 from soc.config.test.test_loadstore import TestMemPspec
41 from openpower.decoder.power_enums import MicrOp, Function
42 from soc.simple.core_data import CoreInput, CoreOutput
43
44 from collections import defaultdict, namedtuple
45 import operator
46
47 from nmutil.util import rising_edge
48
49 FUSpec = namedtuple("FUSpec", ["funame", "fu", "idx"])
50 ByRegSpec = namedtuple("ByRegSpec", ["okflag", "regport", "wid", "specs"])
51
52 # helper function for reducing a list of signals down to a parallel
53 # ORed single signal.
54 def ortreereduce(tree, attr="o_data"):
55 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
56
57
58 def ortreereduce_sig(tree):
59 return treereduce(tree, operator.or_, lambda x: x)
60
61
62 # helper function to place full regs declarations first
63 def sort_fuspecs(fuspecs):
64 res = []
65 for (regname, fspec) in fuspecs.items():
66 if regname.startswith("full"):
67 res.append((regname, fspec))
68 for (regname, fspec) in fuspecs.items():
69 if not regname.startswith("full"):
70 res.append((regname, fspec))
71 return res # enumerate(res)
72
73
74 # a hazard bitvector "remap" function which returns an AST expression
75 # that remaps read/write hazard regfile port numbers to either a full
76 # bitvector or a reduced subset one. SPR for example is reduced to a
77 # single bit.
78 # CRITICALLY-IMPORTANT NOTE: these bitvectors *have* to match up per
79 # regfile! therefore the remapping is per regfile, *NOT* per regfile
80 # port and certainly not based on whether it is a read port or write port.
81 # note that any reductions here will result in degraded performance due
82 # to conflicts, but at least it keeps the hazard matrix sizes down to "sane"
83 def bitvector_remap(regfile, rfile, port):
84 # 8-bits (at the moment, no SVP64), CR is unary: no remap
85 if regfile == 'CR':
86 return port
87 # 3 bits, unary alrady: return the port
88 if regfile == 'XER':
89 return port
90 # 3 bits, unary: return the port
91 if regfile == 'XER':
92 return port
93 # 3 bits, unary: return the port
94 if regfile == 'SVSTATE':
95 return port
96 # 9 bits (9 entries), might be unary already
97 if regfile == 'FAST':
98 if rfile.unary: # FAST might be unary already
99 return port
100 else:
101 return 1 << port
102 # 10 bits (!!) - reduce to one
103 if regfile == 'SPR':
104 if rfile.unary: # FAST might be unary already
105 return port
106 else:
107 return 1 << port
108 if regfile == 'INT':
109 if rfile.unary: # INT, check if unary/binary
110 return port
111 else:
112 return 1 << port
113
114
115 # derive from ControlBase rather than have a separate Stage instance,
116 # this is simpler to do
117 class NonProductionCore(ControlBase):
118 def __init__(self, pspec):
119 self.pspec = pspec
120
121 # test is SVP64 is to be enabled
122 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
123
124 # test to see if regfile ports should be reduced
125 self.regreduce_en = (hasattr(pspec, "regreduce") and
126 (pspec.regreduce == True))
127
128 # test to see if overlapping of instructions is allowed
129 # (not normally enabled for TestIssuer FSM but useful for checking
130 # the bitvector hazard detection, before doing In-Order)
131 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
132 (pspec.allow_overlap == True))
133
134 # test core type
135 self.make_hazard_vecs = self.allow_overlap
136 self.core_type = "fsm"
137 if hasattr(pspec, "core_type"):
138 self.core_type = pspec.core_type
139
140 super().__init__(stage=self)
141
142 # single LD/ST funnel for memory access
143 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
144 pi = l0.l0.dports[0]
145
146 # function units (only one each)
147 # only include mmu if enabled in pspec
148 self.fus = AllFunctionUnits(pspec, pilist=[pi])
149
150 # link LoadStore1 into MMU
151 mmu = self.fus.get_fu('mmu0')
152 ldst0 = self.fus.get_fu('ldst0')
153 print ("core pspec", pspec.ldst_ifacetype)
154 print ("core mmu", mmu)
155 if mmu is not None:
156 lsi = l0.cmpi.lsmem.lsi # a LoadStore1 Interface object
157 print ("core lsmem.lsi", lsi)
158 mmu.alu.set_ldst_interface(lsi)
159 # urr store I-Cache in core so it is easier to get at
160 self.icache = lsi.icache
161
162 self.msr_at_reset = 0x0
163 if hasattr(pspec, "msr_reset") and isinstance(pspec.msr_reset, int):
164 self.msr_at_reset = pspec.msr_reset
165 state_resets = [0x0, # PC at reset
166 self.msr_at_reset, # MSR at reset
167 0x0] # SVSTATE at reset
168
169 # register files (yes plural)
170 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs,
171 state_resets=state_resets)
172
173 # set up input and output: unusual requirement to set data directly
174 # (due to the way that the core is set up in a different domain,
175 # see TestIssuer.setup_peripherals
176 self.p.i_data, self.n.o_data = self.new_specs(None)
177 self.i, self.o = self.p.i_data, self.n.o_data
178
179 # actual internal input data used (captured)
180 self.ireg = self.ispec()
181
182 # create per-FU instruction decoders (subsetted). these "satellite"
183 # decoders reduce wire fan-out from the one (main) PowerDecoder2
184 # (used directly by the trap unit) to the *twelve* (or more)
185 # Function Units. we can either have 32 wires (the instruction)
186 # to each, or we can have well over a 200 wire fan-out (to 12
187 # ALUs). it's an easy choice to make.
188 self.decoders = {}
189 self.des = {}
190
191 # eep, these should be *per FU* i.e. for FunctionUnitBaseMulti
192 # they should be shared (put into the ALU *once*).
193
194 for funame, fu in self.fus.fus.items():
195 f_name = fu.fnunit.name
196 fnunit = fu.fnunit.value
197 opkls = fu.opsubsetkls
198 if f_name == 'TRAP':
199 # TRAP decoder is the *main* decoder
200 self.trapunit = funame
201 continue
202 assert funame not in self.decoders
203 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
204 final=True,
205 state=self.ireg.state,
206 svp64_en=self.svp64_en,
207 regreduce_en=self.regreduce_en)
208 self.des[funame] = self.decoders[funame].do
209 print ("create decoder subset", funame, opkls, self.des[funame])
210
211 # create per-Function Unit write-after-write hazard signals
212 # yes, really, this should have been added in ReservationStations
213 # but hey.
214 for funame, fu in self.fus.fus.items():
215 fu._waw_hazard = Signal(name="waw_%s" % funame)
216
217 # share the SPR decoder with the MMU if it exists
218 if "mmu0" in self.decoders:
219 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
220
221 # next 3 functions are Stage API Compliance
222 def setup(self, m, i):
223 pass
224
225 def ispec(self):
226 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
227
228 def ospec(self):
229 return CoreOutput()
230
231 # elaborate function to create HDL
232 def elaborate(self, platform):
233 m = super().elaborate(platform)
234
235 # for testing purposes, to cut down on build time in coriolis2
236 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
237 x = Signal() # dummy signal
238 m.d.sync += x.eq(~x)
239 return m
240 comb = m.d.comb
241
242 m.submodules.fus = self.fus
243 m.submodules.l0 = l0 = self.l0
244 self.regs.elaborate_into(m, platform)
245 regs = self.regs
246 fus = self.fus.fus
247
248 # amalgamate write-hazards into a single top-level Signal
249 self.waw_hazard = Signal()
250 whaz = []
251 for funame, fu in self.fus.fus.items():
252 whaz.append(fu._waw_hazard)
253 comb += self.waw_hazard.eq(Cat(*whaz).bool())
254
255 # connect decoders
256 self.connect_satellite_decoders(m)
257
258 # ssh, cheat: trap uses the main decoder because of the rewriting
259 self.des[self.trapunit] = self.ireg.e.do
260
261 # connect up Function Units, then read/write ports, and hazard conflict
262 self.issue_conflict = Signal()
263 fu_bitdict, fu_selected = self.connect_instruction(m)
264 raw_hazard = self.connect_rdports(m, fu_bitdict, fu_selected)
265 self.connect_wrports(m, fu_bitdict, fu_selected)
266 if self.allow_overlap:
267 comb += self.issue_conflict.eq(raw_hazard)
268
269 # note if an exception happened. in a pipelined or OoO design
270 # this needs to be accompanied by "shadowing" (or stalling)
271 el = []
272 for exc in self.fus.excs.values():
273 el.append(exc.happened)
274 if len(el) > 0: # at least one exception
275 comb += self.o.exc_happened.eq(Cat(*el).bool())
276
277 return m
278
279 def connect_satellite_decoders(self, m):
280 comb = m.d.comb
281 for k, v in self.decoders.items():
282 # connect each satellite decoder and give it the instruction.
283 # as subset decoders this massively reduces wire fanout given
284 # the large number of ALUs
285 m.submodules["dec_%s" % k] = v
286 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
287 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
288 # sigh due to SVP64 RA_OR_ZERO detection connect these too
289 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
290 if not self.svp64_en:
291 continue
292 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
293 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
294 if k == self.trapunit:
295 continue
296 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
297 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
298 # only the LDST PowerDecodeSubset *actually* needs to
299 # know to use the alternative decoder. this is all
300 # a terrible hack
301 if not k.lower().startswith("ldst"):
302 continue
303 comb += v.use_svp64_ldst_dec.eq( self.ireg.use_svp64_ldst_dec)
304
305 def connect_instruction(self, m):
306 """connect_instruction
307
308 uses decoded (from PowerOp) function unit information from CSV files
309 to ascertain which Function Unit should deal with the current
310 instruction.
311
312 some (such as OP_ATTN, OP_NOP) are dealt with here, including
313 ignoring it and halting the processor. OP_NOP is a bit annoying
314 because the issuer expects busy flag still to be raised then lowered.
315 (this requires a fake counter to be set).
316 """
317 comb, sync = m.d.comb, m.d.sync
318 fus = self.fus.fus
319
320 # indicate if core is busy
321 busy_o = self.o.busy_o
322 any_busy_o = self.o.any_busy_o
323
324 # connect up temporary copy of incoming instruction. the FSM will
325 # either blat the incoming instruction (if valid) into self.ireg
326 # or if the instruction could not be delivered, keep dropping the
327 # latched copy into ireg
328 ilatch = self.ispec()
329 self.instr_active = Signal()
330
331 # enable/busy-signals for each FU, get one bit for each FU (by name)
332 fu_enable = Signal(len(fus), reset_less=True)
333 fu_busy = Signal(len(fus), reset_less=True)
334 fu_bitdict = {}
335 fu_selected = {}
336 for i, funame in enumerate(fus.keys()):
337 fu_bitdict[funame] = fu_enable[i]
338 fu_selected[funame] = fu_busy[i]
339
340 # identify function units and create a list by fnunit so that
341 # PriorityPickers can be created for selecting one of them that
342 # isn't busy at the time the incoming instruction needs passing on
343 by_fnunit = defaultdict(list)
344 for fname, member in Function.__members__.items():
345 for funame, fu in fus.items():
346 fnunit = fu.fnunit.value
347 if member.value & fnunit: # this FU handles this type of op
348 by_fnunit[fname].append((funame, fu)) # add by Function
349
350 # ok now just print out the list of FUs by Function, because we can
351 for fname, fu_list in by_fnunit.items():
352 print ("FUs by type", fname, fu_list)
353
354 # now create a PriorityPicker per FU-type such that only one
355 # non-busy FU will be picked
356 issue_pps = {}
357 fu_found = Signal() # take a note if no Function Unit was available
358 for fname, fu_list in by_fnunit.items():
359 i_pp = PriorityPicker(len(fu_list))
360 m.submodules['i_pp_%s' % fname] = i_pp
361 i_l = []
362 for i, (funame, fu) in enumerate(fu_list):
363 # match the decoded instruction (e.do.fn_unit) against the
364 # "capability" of this FU, gate that by whether that FU is
365 # busy, and drop that into the PriorityPicker.
366 # this will give us an output of the first available *non-busy*
367 # Function Unit (Reservation Statio) capable of handling this
368 # instruction.
369 fnunit = fu.fnunit.value
370 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
371 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
372 comb += en_req.eq(fnmatch & ~fu.busy_o &
373 self.instr_active)
374 i_l.append(en_req) # store in list for doing the Cat-trick
375 # picker output, gated by enable: store in fu_bitdict
376 po = Signal(name="o_issue_pick_"+funame) # picker output
377 comb += po.eq(i_pp.o[i] & i_pp.en_o)
378 comb += fu_bitdict[funame].eq(po)
379 comb += fu_selected[funame].eq(fu.busy_o | po)
380 # if we don't do this, then when there are no FUs available,
381 # the "p.o_ready" signal will go back "ok we accepted this
382 # instruction" which of course isn't true.
383 with m.If(i_pp.en_o):
384 comb += fu_found.eq(1)
385 # for each input, Cat them together and drop them into the picker
386 comb += i_pp.i.eq(Cat(*i_l))
387
388 # rdmask, which is for registers needs to come from the *main* decoder
389 for funame, fu in fus.items():
390 rdmask = get_rdflags(m, self.ireg.e, fu)
391 comb += fu.rdmaskn.eq(~rdmask)
392
393 # sigh - need a NOP counter
394 counter = Signal(2)
395 with m.If(counter != 0):
396 sync += counter.eq(counter - 1)
397 comb += busy_o.eq(1)
398
399 # default to reading from incoming instruction: may be overridden
400 # by copy from latch when "waiting"
401 comb += self.ireg.eq(self.i)
402 # always say "ready" except if overridden
403 comb += self.p.o_ready.eq(1)
404
405 with m.FSM():
406 with m.State("READY"):
407 with m.If(self.p.i_valid): # run only when valid
408 with m.Switch(self.ireg.e.do.insn_type):
409 # check for ATTN: halt if true
410 with m.Case(MicrOp.OP_ATTN):
411 m.d.sync += self.o.core_terminate_o.eq(1)
412
413 # fake NOP - this isn't really used (Issuer detects NOP)
414 with m.Case(MicrOp.OP_NOP):
415 sync += counter.eq(2)
416 comb += busy_o.eq(1)
417
418 with m.Default():
419 comb += self.instr_active.eq(1)
420 comb += self.p.o_ready.eq(0)
421 # connect instructions. only one enabled at a time
422 for funame, fu in fus.items():
423 do = self.des[funame]
424 enable = fu_bitdict[funame]
425
426 # run this FunctionUnit if enabled route op,
427 # issue, busy, read flags and mask to FU
428 with m.If(enable):
429 # operand comes from the *local* decoder
430 # do not actually issue, though, if there
431 # is a waw hazard. decoder has to still
432 # be asserted in order to detect that, tho
433 comb += fu.oper_i.eq_from(do)
434 if funame == 'mmu0':
435 # URRR this is truly dreadful.
436 # OP_FETCH_FAILED is a "fake" op.
437 # no instruction creates it. OP_TRAP
438 # uses the *main* decoder: this is
439 # a *Satellite* decoder that reacts
440 # on *insn_in*... not fake ops. gaah.
441 main_op = self.ireg.e.do
442 with m.If(main_op.insn_type ==
443 MicrOp.OP_FETCH_FAILED):
444 comb += fu.oper_i.insn_type.eq(
445 MicrOp.OP_FETCH_FAILED)
446 comb += fu.oper_i.fn_unit.eq(
447 Function.MMU)
448 # issue when valid (and no write-hazard)
449 comb += fu.issue_i.eq(~self.waw_hazard)
450 # instruction ok, indicate ready
451 comb += self.p.o_ready.eq(1)
452
453 if self.allow_overlap:
454 with m.If(~fu_found | self.waw_hazard):
455 # latch copy of instruction
456 sync += ilatch.eq(self.i)
457 comb += self.p.o_ready.eq(1) # accept
458 comb += busy_o.eq(1)
459 m.next = "WAITING"
460
461 with m.State("WAITING"):
462 comb += self.instr_active.eq(1)
463 comb += self.p.o_ready.eq(0)
464 comb += busy_o.eq(1)
465 # using copy of instruction, keep waiting until an FU is free
466 comb += self.ireg.eq(ilatch)
467 with m.If(fu_found): # wait for conflict to clear
468 # connect instructions. only one enabled at a time
469 for funame, fu in fus.items():
470 do = self.des[funame]
471 enable = fu_bitdict[funame]
472
473 # run this FunctionUnit if enabled route op,
474 # issue, busy, read flags and mask to FU
475 with m.If(enable):
476 # operand comes from the *local* decoder,
477 # which is asserted even if not issued,
478 # so that WaW-detection can check for hazards.
479 # only if the waw hazard is clear does the
480 # instruction actually get issued
481 comb += fu.oper_i.eq_from(do)
482 # issue when valid
483 comb += fu.issue_i.eq(~self.waw_hazard)
484 with m.If(~self.waw_hazard):
485 comb += self.p.o_ready.eq(1)
486 comb += busy_o.eq(0)
487 m.next = "READY"
488
489 print ("core: overlap allowed", self.allow_overlap)
490 # true when any FU is busy (including the cycle where it is perhaps
491 # to be issued - because that's what fu_busy is)
492 comb += any_busy_o.eq(fu_busy.bool())
493 if not self.allow_overlap:
494 # for simple non-overlap, if any instruction is busy, set
495 # busy output for core.
496 comb += busy_o.eq(any_busy_o)
497 else:
498 # sigh deal with a fun situation that needs to be investigated
499 # and resolved
500 with m.If(self.issue_conflict):
501 comb += busy_o.eq(1)
502 # make sure that LDST, SPR, MMU, Branch and Trap all say "busy"
503 # and do not allow overlap. these are all the ones that
504 # are non-forward-progressing: exceptions etc. that otherwise
505 # change CoreState for some reason (MSR, PC, SVSTATE)
506 for funame, fu in fus.items():
507 if (funame.lower().startswith('ldst') or
508 funame.lower().startswith('branch') or
509 funame.lower().startswith('mmu') or
510 funame.lower().startswith('spr') or
511 funame.lower().startswith('trap')):
512 with m.If(fu.busy_o):
513 comb += busy_o.eq(1)
514
515 # return both the function unit "enable" dict as well as the "busy".
516 # the "busy-or-issued" can be passed in to the Read/Write port
517 # connecters to give them permission to request access to regfiles
518 return fu_bitdict, fu_selected
519
520 def connect_rdport(self, m, fu_bitdict, fu_selected,
521 rdpickers, regfile, regname, fspec):
522 comb, sync = m.d.comb, m.d.sync
523 fus = self.fus.fus
524 regs = self.regs
525
526 rpidx = regname
527
528 # select the required read port. these are pre-defined sizes
529 rfile = regs.rf[regfile.lower()]
530 rport = rfile.r_ports[rpidx]
531 print("read regfile", rpidx, regfile, regs.rf.keys(),
532 rfile, rfile.unary)
533
534 # for checking if the read port has an outstanding write
535 if self.make_hazard_vecs:
536 wv = regs.wv[regfile.lower()]
537 wvchk = wv.q_int # write-vec bit-level hazard check
538
539 # if a hazard is detected on this read port, simply blithely block
540 # every FU from reading on it. this is complete overkill but very
541 # simple for now.
542 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
543
544 fspecs = fspec
545 if not isinstance(fspecs, list):
546 fspecs = [fspecs]
547
548 rdflags = []
549 pplen = 0
550 ppoffs = []
551 for i, fspec in enumerate(fspecs):
552 # get the regfile specs for this regfile port
553 print ("fpsec", i, fspec, len(fspec.specs))
554 name = "%s_%s_%d" % (regfile, regname, i)
555 ppoffs.append(pplen) # record offset for picker
556 pplen += len(fspec.specs)
557 rdflag = Signal(name="rdflag_"+name, reset_less=True)
558 comb += rdflag.eq(fspec.okflag)
559 rdflags.append(rdflag)
560
561 print ("pplen", pplen)
562
563 # create a priority picker to manage this port
564 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
565 m.submodules["rdpick_%s_%s" % (regfile, rpidx)] = rdpick
566
567 rens = []
568 addrs = []
569 wvens = []
570
571 for i, fspec in enumerate(fspecs):
572 (rf, _read, wid, fuspecs) = \
573 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
574 # connect up the FU req/go signals, and the reg-read to the FU
575 # and create a Read Broadcast Bus
576 for pi, fuspec in enumerate(fspec.specs):
577 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
578 pi += ppoffs[i]
579 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
580 fu_active = fu_selected[funame]
581 fu_issued = fu_bitdict[funame]
582
583 # get (or set up) a latched copy of read register number
584 # and (sigh) also the read-ok flag
585 # TODO: use nmutil latchregister
586 rhname = "%s_%s_%d" % (regfile, regname, i)
587 rdflag = Signal(name="rdflag_%s_%s" % (funame, rhname),
588 reset_less=True)
589 if rhname not in fu.rf_latches:
590 rfl = Signal(name="rdflag_latch_%s_%s" % (funame, rhname))
591 fu.rf_latches[rhname] = rfl
592 with m.If(fu.issue_i):
593 sync += rfl.eq(rdflags[i])
594 else:
595 rfl = fu.rf_latches[rhname]
596
597 # now the register port
598 rname = "%s_%s_%s_%d" % (funame, regfile, regname, pi)
599 read = Signal.like(_read, name="read_"+rname)
600 if rname not in fu.rd_latches:
601 rdl = Signal.like(_read, name="rdlatch_"+rname)
602 fu.rd_latches[rname] = rdl
603 with m.If(fu.issue_i):
604 sync += rdl.eq(_read)
605 else:
606 rdl = fu.rd_latches[rname]
607
608 # make the read immediately available on issue cycle
609 # after the read cycle, otherwies use the latched copy.
610 # this captures the regport and okflag on issue
611 with m.If(fu.issue_i):
612 comb += read.eq(_read)
613 comb += rdflag.eq(rdflags[i])
614 with m.Else():
615 comb += read.eq(rdl)
616 comb += rdflag.eq(rfl)
617
618 # connect request-read to picker input, and output to go-rd
619 addr_en = Signal.like(read, name="addr_en_"+name)
620 pick = Signal(name="pick_"+name) # picker input
621 rp = Signal(name="rp_"+name) # picker output
622 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
623 rhazard = Signal(name="rhaz_"+name)
624
625 # exclude any currently-enabled read-request (mask out active)
626 # entirely block anything hazarded from being picked
627 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflag &
628 ~delay_pick & ~rhazard)
629 comb += rdpick.i[pi].eq(pick)
630 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
631
632 # if picked, select read-port "reg select" number to port
633 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
634 sync += delay_pick.eq(rp) # delayed "pick"
635 comb += addr_en.eq(Mux(rp, read, 0))
636
637 # the read-enable happens combinatorially (see mux-bus below)
638 # but it results in the data coming out on a one-cycle delay.
639 if rfile.unary:
640 rens.append(addr_en)
641 else:
642 addrs.append(addr_en)
643 rens.append(rp)
644
645 # use the *delayed* pick signal to put requested data onto bus
646 with m.If(delay_pick):
647 # connect regfile port to input, creating fan-out Bus
648 src = fu.src_i[idx]
649 print("reg connect widths",
650 regfile, regname, pi, funame,
651 src.shape(), rport.o_data.shape())
652 # all FUs connect to same port
653 comb += src.eq(rport.o_data)
654
655 if not self.make_hazard_vecs:
656 continue
657
658 # read the write-hazard bitvector (wv) for any bit that is
659 wvchk_en = Signal(len(wvchk), name="wv_chk_addr_en_"+name)
660 issue_active = Signal(name="rd_iactive_"+name)
661 # XXX combinatorial loop here
662 comb += issue_active.eq(fu_active & rdflag)
663 with m.If(issue_active):
664 if rfile.unary:
665 comb += wvchk_en.eq(read)
666 else:
667 comb += wvchk_en.eq(1<<read)
668 # if FU is busy (which doesn't get set at the same time as
669 # issue) and no hazard was detected, clear wvchk_en (i.e.
670 # stop checking for hazards). there is a loop here, but it's
671 # via a DFF, so is ok. some linters may complain, but hey.
672 with m.If(fu.busy_o & ~rhazard):
673 comb += wvchk_en.eq(0)
674
675 # read-hazard is ANDed with (filtered by) what is actually
676 # being requested.
677 comb += rhazard.eq((wvchk & wvchk_en).bool())
678
679 wvens.append(wvchk_en)
680
681 # or-reduce the muxed read signals
682 if rfile.unary:
683 # for unary-addressed
684 comb += rport.ren.eq(ortreereduce_sig(rens))
685 else:
686 # for binary-addressed
687 comb += rport.addr.eq(ortreereduce_sig(addrs))
688 comb += rport.ren.eq(Cat(*rens).bool())
689 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
690
691 if not self.make_hazard_vecs:
692 return Const(0) # declare "no hazards"
693
694 # enable the read bitvectors for this issued instruction
695 # and return whether any write-hazard bit is set
696 wvchk_and = Signal(len(wvchk), name="wv_chk_"+name)
697 comb += wvchk_and.eq(wvchk & ortreereduce_sig(wvens))
698 comb += hazard_detected.eq(wvchk_and.bool())
699 return hazard_detected
700
701 def connect_rdports(self, m, fu_bitdict, fu_selected):
702 """connect read ports
703
704 orders the read regspecs into a dict-of-dicts, by regfile, by
705 regport name, then connects all FUs that want that regport by
706 way of a PriorityPicker.
707 """
708 comb, sync = m.d.comb, m.d.sync
709 fus = self.fus.fus
710 regs = self.regs
711 rd_hazard = []
712
713 # dictionary of lists of regfile read ports
714 byregfiles_rdspec = self.get_byregfiles(m, True)
715
716 # okaay, now we need a PriorityPicker per regfile per regfile port
717 # loootta pickers... peter piper picked a pack of pickled peppers...
718 rdpickers = {}
719 for regfile, fuspecs in byregfiles_rdspec.items():
720 rdpickers[regfile] = {}
721
722 # argh. an experiment to merge RA and RB in the INT regfile
723 # (we have too many read/write ports)
724 if self.regreduce_en:
725 if regfile == 'INT':
726 fuspecs['rabc'] = [fuspecs.pop('rb')]
727 fuspecs['rabc'].append(fuspecs.pop('rc'))
728 fuspecs['rabc'].append(fuspecs.pop('ra'))
729 if regfile == 'FAST':
730 fuspecs['fast1'] = [fuspecs.pop('fast1')]
731 if 'fast2' in fuspecs:
732 fuspecs['fast1'].append(fuspecs.pop('fast2'))
733 if 'fast3' in fuspecs:
734 fuspecs['fast1'].append(fuspecs.pop('fast3'))
735
736 # for each named regfile port, connect up all FUs to that port
737 # also return (and collate) hazard detection)
738 for (regname, fspec) in sort_fuspecs(fuspecs):
739 print("connect rd", regname, fspec)
740 rh = self.connect_rdport(m, fu_bitdict, fu_selected,
741 rdpickers, regfile,
742 regname, fspec)
743 rd_hazard.append(rh)
744
745 return Cat(*rd_hazard).bool()
746
747 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
748 funame, regname, idx,
749 addr_en, wp, fu, fu_active, wrflag, write,
750 fu_wrok):
751 """make_hazards: a setter and a clearer for the regfile write ports
752
753 setter is at issue time (using PowerDecoder2 regfile write numbers)
754 clearer is at regfile write time (when FU has said what to write to)
755
756 there is *one* unusual case here which has to be dealt with:
757 when the Function Unit does *NOT* request a write to the regfile
758 (has its data.ok bit CLEARED). this is perfectly legitimate.
759 and a royal pain.
760 """
761 comb, sync = m.d.comb, m.d.sync
762 name = "%s_%s_%d" % (funame, regname, idx)
763
764 # connect up the bitvector write hazard. unlike the
765 # regfile writeports, a ONE must be written to the corresponding
766 # bit of the hazard bitvector (to indicate the existence of
767 # the hazard)
768
769 # the detection of what shall be written to is based
770 # on *issue*. it is delayed by 1 cycle so that instructions
771 # "addi 5,5,0x2" do not cause combinatorial loops due to
772 # fake-dependency on *themselves*. this will totally fail
773 # spectacularly when doing multi-issue
774 print ("write vector (for regread)", regfile, wvset)
775 wviaddr_en = Signal(len(wvset), name="wv_issue_addr_en_"+name)
776 issue_active = Signal(name="iactive_"+name)
777 sync += issue_active.eq(fu.issue_i & fu_active & wrflag)
778 with m.If(issue_active):
779 if rfile.unary:
780 comb += wviaddr_en.eq(write)
781 else:
782 comb += wviaddr_en.eq(1<<write)
783
784 # deal with write vector clear: this kicks in when the regfile
785 # is written to, and clears the corresponding bitvector entry
786 print ("write vector", regfile, wvclr)
787 wvaddr_en = Signal(len(wvclr), name="wvaddr_en_"+name)
788 if rfile.unary:
789 comb += wvaddr_en.eq(addr_en)
790 else:
791 with m.If(wp):
792 comb += wvaddr_en.eq(1<<addr_en)
793
794 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
795 # this may NOT be the case when an exception occurs
796 if isinstance(fu, LDSTFunctionUnit):
797 return wvaddr_en, wviaddr_en
798
799 # okaaay, this is preparation for the awkward case.
800 # * latch a copy of wrflag when issue goes high.
801 # * when the fu_wrok (data.ok) flag is NOT set,
802 # but the FU is done, the FU is NEVER going to write
803 # so the bitvector has to be cleared.
804 latch_wrflag = Signal(name="latch_wrflag_"+name)
805 with m.If(~fu.busy_o):
806 sync += latch_wrflag.eq(0)
807 with m.If(fu.issue_i & fu_active):
808 sync += latch_wrflag.eq(wrflag)
809 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
810 if rfile.unary:
811 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
812 else:
813 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
814
815 return wvaddr_en, wviaddr_en
816
817 def connect_wrport(self, m, fu_bitdict, fu_selected,
818 wrpickers, regfile, regname, fspec):
819 comb, sync = m.d.comb, m.d.sync
820 fus = self.fus.fus
821 regs = self.regs
822
823 rpidx = regname
824
825 # select the required write port. these are pre-defined sizes
826 rfile = regs.rf[regfile.lower()]
827 wport = rfile.w_ports[rpidx]
828
829 print("connect wr", regname, "unary", rfile.unary, fspec)
830 print(regfile, regs.rf.keys())
831
832 # select the write-protection hazard vector. note that this still
833 # requires to WRITE to the hazard bitvector! read-requests need
834 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
835 if self.make_hazard_vecs:
836 wv = regs.wv[regfile.lower()]
837 wvset = wv.s # write-vec bit-level hazard ctrl
838 wvclr = wv.r # write-vec bit-level hazard ctrl
839 wvchk = wv.q # write-after-write hazard check
840
841 fspecs = fspec
842 if not isinstance(fspecs, list):
843 fspecs = [fspecs]
844
845 pplen = 0
846 writes = []
847 ppoffs = []
848 wrflags = []
849 for i, fspec in enumerate(fspecs):
850 # get the regfile specs for this regfile port
851 (wf, _write, wid, fuspecs) = \
852 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
853 print ("fpsec", i, "wrflag", wf, fspec, len(fuspecs))
854 ppoffs.append(pplen) # record offset for picker
855 pplen += len(fuspecs)
856
857 name = "%s_%s_%d" % (regfile, regname, i)
858 wrflag = Signal(name="wr_flag_"+name)
859 if wf is not None:
860 comb += wrflag.eq(wf)
861 else:
862 comb += wrflag.eq(0)
863 wrflags.append(wrflag)
864
865 # create a priority picker to manage this port
866 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
867 m.submodules["wrpick_%s_%s" % (regfile, rpidx)] = wrpick
868
869 wsigs = []
870 wens = []
871 wvsets = []
872 wvseten = []
873 wvclren = []
874 #wvens = [] - not needed: reading of writevec is permanently held hi
875 addrs = []
876 for i, fspec in enumerate(fspecs):
877 # connect up the FU req/go signals and the reg-read to the FU
878 # these are arbitrated by Data.ok signals
879 (wf, _write, wid, fuspecs) = \
880 (fspec.okflag, fspec.regport, fspec.wid, fspec.specs)
881 for pi, fuspec in enumerate(fspec.specs):
882 (funame, fu, idx) = (fuspec.funame, fuspec.fu, fuspec.idx)
883 fu_requested = fu_bitdict[funame]
884 pi += ppoffs[i]
885 name = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
886 # get (or set up) a write-latched copy of write register number
887 write = Signal.like(_write, name="write_"+name)
888 rname = "%s_%s_%s_%d" % (funame, regfile, regname, idx)
889 if rname not in fu.wr_latches:
890 wrl = Signal.like(_write, name="wrlatch_"+rname)
891 fu.wr_latches[rname] = write
892 # do not depend on fu.issue_i here, it creates a
893 # combinatorial loop on waw checking. using the FU
894 # "enable" bitdict entry for this FU is sufficient,
895 # because the PowerDecoder2 read/write nums are
896 # valid continuously when the instruction is valid
897 with m.If(fu_requested):
898 sync += wrl.eq(_write)
899 comb += write.eq(_write)
900 with m.Else():
901 comb += write.eq(wrl)
902 else:
903 write = fu.wr_latches[rname]
904
905 # write-request comes from dest.ok
906 dest = fu.get_out(idx)
907 fu_dest_latch = fu.get_fu_out(idx) # latched output
908 name = "%s_%s_%d" % (funame, regname, idx)
909 fu_wrok = Signal(name="fu_wrok_"+name, reset_less=True)
910 comb += fu_wrok.eq(dest.ok & fu.busy_o)
911
912 # connect request-write to picker input, and output to go-wr
913 fu_active = fu_selected[funame]
914 pick = fu.wr.rel_o[idx] & fu_active
915 comb += wrpick.i[pi].eq(pick)
916 # create a single-pulse go write from the picker output
917 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
918 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
919 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
920
921 # connect the regspec write "reg select" number to this port
922 # only if one FU actually requests (and is granted) the port
923 # will the write-enable be activated
924 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
925 addr_en = Signal.like(write, name=wname)
926 wp = Signal()
927 comb += wp.eq(wr_pick & wrpick.en_o)
928 comb += addr_en.eq(Mux(wp, write, 0))
929 if rfile.unary:
930 wens.append(addr_en)
931 else:
932 addrs.append(addr_en)
933 wens.append(wp)
934
935 # connect regfile port to input
936 print("reg connect widths",
937 regfile, regname, pi, funame,
938 dest.shape(), wport.i_data.shape())
939 wsigs.append(fu_dest_latch)
940
941 # now connect up the bitvector write hazard
942 if not self.make_hazard_vecs:
943 continue
944 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
945 funame, regname, idx,
946 addr_en, wp, fu, fu_active,
947 wrflags[i], write, fu_wrok)
948 wvaddr_en, wv_issue_en = res
949 wvclren.append(wvaddr_en) # set only: no data => clear bit
950 wvseten.append(wv_issue_en) # set data same as enable
951
952 # read the write-hazard bitvector (wv) for any bit that is
953 fu_requested = fu_bitdict[funame]
954 wvchk_en = Signal(len(wvchk), name="waw_chk_addr_en_"+name)
955 issue_active = Signal(name="waw_iactive_"+name)
956 whazard = Signal(name="whaz_"+name)
957 if wf is None:
958 # XXX EEK! STATE regfile (branch) does not have an
959 # write-active indicator in regspec_decode_write()
960 print ("XXX FIXME waw_iactive", issue_active,
961 fu_requested, wf)
962 else:
963 # check bits from the incoming instruction. note (back
964 # in connect_instruction) that the decoder is held for
965 # us to be able to do this, here... *without* issue being
966 # held HI. we MUST NOT gate this with fu.issue_i or
967 # with fu_bitdict "enable": it would create a loop
968 comb += issue_active.eq(wf)
969 with m.If(issue_active):
970 if rfile.unary:
971 comb += wvchk_en.eq(write)
972 else:
973 comb += wvchk_en.eq(1<<write)
974 # if FU is busy (which doesn't get set at the same time as
975 # issue) and no hazard was detected, clear wvchk_en (i.e.
976 # stop checking for hazards). there is a loop here, but it's
977 # via a DFF, so is ok. some linters may complain, but hey.
978 with m.If(fu.busy_o & ~whazard):
979 comb += wvchk_en.eq(0)
980
981 # write-hazard is ANDed with (filtered by) what is actually
982 # being requested. the wvchk data is on a one-clock delay,
983 # and wvchk_en comes directly from the main decoder
984 comb += whazard.eq((wvchk & wvchk_en).bool())
985 with m.If(whazard):
986 comb += fu._waw_hazard.eq(1)
987
988 #wvens.append(wvchk_en)
989
990 # here is where we create the Write Broadcast Bus. simple, eh?
991 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
992 if rfile.unary:
993 # for unary-addressed
994 comb += wport.wen.eq(ortreereduce_sig(wens))
995 else:
996 # for binary-addressed
997 comb += wport.addr.eq(ortreereduce_sig(addrs))
998 comb += wport.wen.eq(ortreereduce_sig(wens))
999
1000 if not self.make_hazard_vecs:
1001 return [], []
1002
1003 # return these here rather than set wvclr/wvset directly,
1004 # because there may be more than one write-port to a given
1005 # regfile. example: XER has a write-port for SO, CA, and OV
1006 # and the *last one added* of those would overwrite the other
1007 # two. solution: have connect_wrports collate all the
1008 # or-tree-reduced bitvector set/clear requests and drop them
1009 # in as a single "thing". this can only be done because the
1010 # set/get is an unary bitvector.
1011 print ("make write-vecs", regfile, regname, wvset, wvclr)
1012 return (wvclren, # clear (regfile write)
1013 wvseten) # set (issue time)
1014
1015 def connect_wrports(self, m, fu_bitdict, fu_selected):
1016 """connect write ports
1017
1018 orders the write regspecs into a dict-of-dicts, by regfile,
1019 by regport name, then connects all FUs that want that regport
1020 by way of a PriorityPicker.
1021
1022 note that the write-port wen, write-port data, and go_wr_i all need to
1023 be on the exact same clock cycle. as there is a combinatorial loop bug
1024 at the moment, these all use sync.
1025 """
1026 comb, sync = m.d.comb, m.d.sync
1027 fus = self.fus.fus
1028 regs = self.regs
1029 # dictionary of lists of regfile write ports
1030 byregfiles_wrspec = self.get_byregfiles(m, False)
1031
1032 # same for write ports.
1033 # BLECH! complex code-duplication! BLECH!
1034 wrpickers = {}
1035 wvclrers = defaultdict(list)
1036 wvseters = defaultdict(list)
1037 for regfile, fuspecs in byregfiles_wrspec.items():
1038 wrpickers[regfile] = {}
1039
1040 if self.regreduce_en:
1041 # argh, more port-merging
1042 if regfile == 'INT':
1043 fuspecs['o'] = [fuspecs.pop('o')]
1044 fuspecs['o'].append(fuspecs.pop('o1'))
1045 if regfile == 'FAST':
1046 fuspecs['fast1'] = [fuspecs.pop('fast1')]
1047 if 'fast2' in fuspecs:
1048 fuspecs['fast1'].append(fuspecs.pop('fast2'))
1049 if 'fast3' in fuspecs:
1050 fuspecs['fast1'].append(fuspecs.pop('fast3'))
1051
1052 # collate these and record them by regfile because there
1053 # are sometimes more write-ports per regfile
1054 for (regname, fspec) in sort_fuspecs(fuspecs):
1055 wvclren, wvseten = self.connect_wrport(m,
1056 fu_bitdict, fu_selected,
1057 wrpickers,
1058 regfile, regname, fspec)
1059 wvclrers[regfile.lower()] += wvclren
1060 wvseters[regfile.lower()] += wvseten
1061
1062 if not self.make_hazard_vecs:
1063 return
1064
1065 # for write-vectors: reduce the clr-ers and set-ers down to
1066 # a single set of bits. otherwise if there are two write
1067 # ports (on some regfiles), the last one doing comb += on
1068 # the reg.wv[regfile] instance "wins" (and all others are ignored,
1069 # whoops). if there was only one write-port per wv regfile this would
1070 # not be an issue.
1071 for regfile in wvclrers.keys():
1072 wv = regs.wv[regfile]
1073 wvset = wv.s # write-vec bit-level hazard ctrl
1074 wvclr = wv.r # write-vec bit-level hazard ctrl
1075 wvclren = wvclrers[regfile]
1076 wvseten = wvseters[regfile]
1077 comb += wvclr.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
1078 comb += wvset.eq(ortreereduce_sig(wvseten)) # set (issue time)
1079
1080 def get_byregfiles(self, m, readmode):
1081
1082 mode = "read" if readmode else "write"
1083 regs = self.regs
1084 fus = self.fus.fus
1085 e = self.ireg.e # decoded instruction to execute
1086
1087 # dictionary of dictionaries of lists/tuples of regfile ports.
1088 # first key: regfile. second key: regfile port name
1089 byregfiles_spec = defaultdict(dict)
1090
1091 for (funame, fu) in fus.items():
1092 # create in each FU a receptacle for the read/write register
1093 # hazard numbers (and okflags for read). to be latched in
1094 # connect_rd/write_ports
1095 if readmode:
1096 fu.rd_latches = {} # read reg number latches
1097 fu.rf_latches = {} # read flag latches
1098 else:
1099 fu.wr_latches = {}
1100
1101 # construct regfile specs: read uses inspec, write outspec
1102 print("%s ports for %s" % (mode, funame))
1103 for idx in range(fu.n_src if readmode else fu.n_dst):
1104 (regfile, regname, wid) = fu.get_io_spec(readmode, idx)
1105 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
1106
1107 # the PowerDecoder2 (main one, not the satellites) contains
1108 # the decoded regfile numbers. obtain these now
1109 decinfo = regspec_decode(m, readmode, e, regfile, regname)
1110 okflag, regport = decinfo.okflag, decinfo.regport
1111
1112 # construct the dictionary of regspec information by regfile
1113 if regname not in byregfiles_spec[regfile]:
1114 byregfiles_spec[regfile][regname] = \
1115 ByRegSpec(okflag, regport, wid, [])
1116
1117 # here we start to create "lanes" where each Function Unit
1118 # requiring access to a given [single-contended resource]
1119 # regfile port is appended to a list, so that PriorityPickers
1120 # can be created to give uncontested access to it
1121 fuspec = FUSpec(funame, fu, idx)
1122 byregfiles_spec[regfile][regname].specs.append(fuspec)
1123
1124 # ok just print that all out, for convenience
1125 for regfile, fuspecs in byregfiles_spec.items():
1126 print("regfile %s ports:" % mode, regfile)
1127 for regname, fspec in fuspecs.items():
1128 [okflag, regport, wid, fuspecs] = fspec
1129 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
1130 print(" %s" % regname, wid, okflag, regport)
1131 for (funame, fu, idx) in fuspecs:
1132 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
1133 print(" ", funame, fu.__class__.__name__, idx, fusig)
1134 print()
1135
1136 return byregfiles_spec
1137
1138 def __iter__(self):
1139 yield from self.fus.ports()
1140 yield from self.i.e.ports()
1141 yield from self.l0.ports()
1142 # TODO: regs
1143
1144 def ports(self):
1145 return list(self)
1146
1147
1148 if __name__ == '__main__':
1149 pspec = TestMemPspec(ldst_ifacetype='testpi',
1150 imem_ifacetype='',
1151 addr_wid=64,
1152 allow_overlap=True,
1153 mask_wid=8,
1154 reg_wid=64)
1155 dut = NonProductionCore(pspec)
1156 vl = rtlil.convert(dut, ports=dut.ports())
1157 with open("test_core.il", "w") as f:
1158 f.write(vl)