set up a temporary copy of CoreInput
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from openpower.decoder.power_decoder2 import PowerDecodeSubset
26 from openpower.decoder.power_regspec_map import regspec_decode_read
27 from openpower.decoder.power_regspec_map import regspec_decode_write
28 from openpower.sv.svp64 import SVP64Rec
29
30 from nmutil.picker import PriorityPicker
31 from nmutil.util import treereduce
32 from nmutil.singlepipe import ControlBase
33
34 from soc.fu.compunits.compunits import AllFunctionUnits, LDSTFunctionUnit
35 from soc.regfile.regfiles import RegFiles
36 from openpower.decoder.power_decoder2 import get_rdflags
37 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
38 from soc.config.test.test_loadstore import TestMemPspec
39 from openpower.decoder.power_enums import MicrOp, Function
40 from soc.simple.core_data import CoreInput, CoreOutput
41
42 from collections import defaultdict
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="o_data"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 # derive from ControlBase rather than have a separate Stage instance,
71 # this is simpler to do
72 class NonProductionCore(ControlBase):
73 def __init__(self, pspec):
74 self.pspec = pspec
75
76 # test is SVP64 is to be enabled
77 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
78
79 # test to see if regfile ports should be reduced
80 self.regreduce_en = (hasattr(pspec, "regreduce") and
81 (pspec.regreduce == True))
82
83 # test to see if overlapping of instructions is allowed
84 # (not normally enabled for TestIssuer FSM but useful for checking
85 # the bitvector hazard detection, before doing In-Order)
86 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
87 (pspec.allow_overlap == True))
88
89 # test core type
90 self.make_hazard_vecs = True
91 self.core_type = "fsm"
92 if hasattr(pspec, "core_type"):
93 self.core_type = pspec.core_type
94
95 super().__init__(stage=self)
96
97 # single LD/ST funnel for memory access
98 self.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
99 pi = l0.l0.dports[0]
100
101 # function units (only one each)
102 # only include mmu if enabled in pspec
103 self.fus = AllFunctionUnits(pspec, pilist=[pi])
104
105 # link LoadStore1 into MMU
106 mmu = self.fus.get_fu('mmu0')
107 print ("core pspec", pspec.ldst_ifacetype)
108 print ("core mmu", mmu)
109 if mmu is not None:
110 print ("core lsmem.lsi", l0.cmpi.lsmem.lsi)
111 mmu.alu.set_ldst_interface(l0.cmpi.lsmem.lsi)
112
113 # register files (yes plural)
114 self.regs = RegFiles(pspec, make_hazard_vecs=self.make_hazard_vecs)
115
116 # set up input and output: unusual requirement to set data directly
117 # (due to the way that the core is set up in a different domain,
118 # see TestIssuer.setup_peripherals
119 self.p.i_data, self.n.o_data = self.new_specs(None)
120 self.i, self.o = self.p.i_data, self.n.o_data
121
122 # actual internal input data used (captured)
123 self.ireg = self.ispec()
124
125 # create per-FU instruction decoders (subsetted). these "satellite"
126 # decoders reduce wire fan-out from the one (main) PowerDecoder2
127 # (used directly by the trap unit) to the *twelve* (or more)
128 # Function Units. we can either have 32 wires (the instruction)
129 # to each, or we can have well over a 200 wire fan-out (to 12
130 # ALUs). it's an easy choice to make.
131 self.decoders = {}
132 self.des = {}
133
134 for funame, fu in self.fus.fus.items():
135 f_name = fu.fnunit.name
136 fnunit = fu.fnunit.value
137 opkls = fu.opsubsetkls
138 if f_name == 'TRAP':
139 # TRAP decoder is the *main* decoder
140 self.trapunit = funame
141 continue
142 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
143 final=True,
144 state=self.ireg.state,
145 svp64_en=self.svp64_en,
146 regreduce_en=self.regreduce_en)
147 self.des[funame] = self.decoders[funame].do
148
149 # share the SPR decoder with the MMU if it exists
150 if "mmu0" in self.decoders:
151 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
152
153 # next 3 functions are Stage API Compliance
154 def setup(self, m, i):
155 pass
156
157 def ispec(self):
158 return CoreInput(self.pspec, self.svp64_en, self.regreduce_en)
159
160 def ospec(self):
161 return CoreOutput()
162
163 # elaborate function to create HDL
164 def elaborate(self, platform):
165 m = super().elaborate(platform)
166
167 # for testing purposes, to cut down on build time in coriolis2
168 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
169 x = Signal() # dummy signal
170 m.d.sync += x.eq(~x)
171 return m
172 comb = m.d.comb
173
174 m.submodules.fus = self.fus
175 m.submodules.l0 = l0 = self.l0
176 self.regs.elaborate_into(m, platform)
177 regs = self.regs
178 fus = self.fus.fus
179
180 # connect up temporary copy of incoming instruction
181 print ("connect ireg, i", self.ireg, self.i)
182 comb += self.ireg.eq(self.i)
183
184 # connect decoders
185 self.connect_satellite_decoders(m)
186
187 # ssh, cheat: trap uses the main decoder because of the rewriting
188 self.des[self.trapunit] = self.ireg.e.do
189
190 # connect up Function Units, then read/write ports, and hazard conflict
191 issue_conflict = Signal()
192 fu_bitdict, fu_selected = self.connect_instruction(m, issue_conflict)
193 raw_hazard = self.connect_rdports(m, fu_selected)
194 self.connect_wrports(m, fu_selected)
195 comb += issue_conflict.eq(raw_hazard)
196
197 # note if an exception happened. in a pipelined or OoO design
198 # this needs to be accompanied by "shadowing" (or stalling)
199 el = []
200 for exc in self.fus.excs.values():
201 el.append(exc.happened)
202 if len(el) > 0: # at least one exception
203 comb += self.o.exc_happened.eq(Cat(*el).bool())
204
205 return m
206
207 def connect_satellite_decoders(self, m):
208 comb = m.d.comb
209 for k, v in self.decoders.items():
210 # connect each satellite decoder and give it the instruction.
211 # as subset decoders this massively reduces wire fanout given
212 # the large number of ALUs
213 setattr(m.submodules, "dec_%s" % v.fn_name, v)
214 comb += v.dec.raw_opcode_in.eq(self.ireg.raw_insn_i)
215 comb += v.dec.bigendian.eq(self.ireg.bigendian_i)
216 # sigh due to SVP64 RA_OR_ZERO detection connect these too
217 comb += v.sv_a_nz.eq(self.ireg.sv_a_nz)
218 if self.svp64_en:
219 comb += v.pred_sm.eq(self.ireg.sv_pred_sm)
220 comb += v.pred_dm.eq(self.ireg.sv_pred_dm)
221 if k != self.trapunit:
222 comb += v.sv_rm.eq(self.ireg.sv_rm) # pass through SVP64 RM
223 comb += v.is_svp64_mode.eq(self.ireg.is_svp64_mode)
224 # only the LDST PowerDecodeSubset *actually* needs to
225 # know to use the alternative decoder. this is all
226 # a terrible hack
227 if k.lower().startswith("ldst"):
228 comb += v.use_svp64_ldst_dec.eq(
229 self.ireg.use_svp64_ldst_dec)
230
231 def connect_instruction(self, m, issue_conflict):
232 """connect_instruction
233
234 uses decoded (from PowerOp) function unit information from CSV files
235 to ascertain which Function Unit should deal with the current
236 instruction.
237
238 some (such as OP_ATTN, OP_NOP) are dealt with here, including
239 ignoring it and halting the processor. OP_NOP is a bit annoying
240 because the issuer expects busy flag still to be raised then lowered.
241 (this requires a fake counter to be set).
242 """
243 comb, sync = m.d.comb, m.d.sync
244 fus = self.fus.fus
245
246 # indicate if core is busy
247 busy_o = self.o.busy_o
248
249 # enable/busy-signals for each FU, get one bit for each FU (by name)
250 fu_enable = Signal(len(fus), reset_less=True)
251 fu_busy = Signal(len(fus), reset_less=True)
252 fu_bitdict = {}
253 fu_selected = {}
254 for i, funame in enumerate(fus.keys()):
255 fu_bitdict[funame] = fu_enable[i]
256 fu_selected[funame] = fu_busy[i]
257
258 # identify function units and create a list by fnunit so that
259 # PriorityPickers can be created for selecting one of them that
260 # isn't busy at the time the incoming instruction needs passing on
261 by_fnunit = defaultdict(list)
262 for fname, member in Function.__members__.items():
263 for funame, fu in fus.items():
264 fnunit = fu.fnunit.value
265 if member.value & fnunit: # this FU handles this type of op
266 by_fnunit[fname].append((funame, fu)) # add by Function
267
268 # ok now just print out the list of FUs by Function, because we can
269 for fname, fu_list in by_fnunit.items():
270 print ("FUs by type", fname, fu_list)
271
272 # now create a PriorityPicker per FU-type such that only one
273 # non-busy FU will be picked
274 issue_pps = {}
275 fu_found = Signal() # take a note if no Function Unit was available
276 for fname, fu_list in by_fnunit.items():
277 i_pp = PriorityPicker(len(fu_list))
278 m.submodules['i_pp_%s' % fname] = i_pp
279 i_l = []
280 for i, (funame, fu) in enumerate(fu_list):
281 # match the decoded instruction (e.do.fn_unit) against the
282 # "capability" of this FU, gate that by whether that FU is
283 # busy, and drop that into the PriorityPicker.
284 # this will give us an output of the first available *non-busy*
285 # Function Unit (Reservation Statio) capable of handling this
286 # instruction.
287 fnunit = fu.fnunit.value
288 en_req = Signal(name="issue_en_%s" % funame, reset_less=True)
289 fnmatch = (self.ireg.e.do.fn_unit & fnunit).bool()
290 comb += en_req.eq(fnmatch & ~fu.busy_o & self.p.i_valid)
291 i_l.append(en_req) # store in list for doing the Cat-trick
292 # picker output, gated by enable: store in fu_bitdict
293 po = Signal(name="o_issue_pick_"+funame) # picker output
294 comb += po.eq(i_pp.o[i] & i_pp.en_o)
295 comb += fu_bitdict[funame].eq(po)
296 comb += fu_selected[funame].eq(fu.busy_o | po)
297 # if we don't do this, then when there are no FUs available,
298 # the "p.o_ready" signal will go back "ok we accepted this
299 # instruction" which of course isn't true.
300 with m.If(~issue_conflict & i_pp.en_o):
301 comb += fu_found.eq(1)
302 # for each input, Cat them together and drop them into the picker
303 comb += i_pp.i.eq(Cat(*i_l))
304
305 # sigh - need a NOP counter
306 counter = Signal(2)
307 with m.If(counter != 0):
308 sync += counter.eq(counter - 1)
309 comb += busy_o.eq(1)
310
311 with m.If(self.p.i_valid): # run only when valid
312 with m.Switch(self.ireg.e.do.insn_type):
313 # check for ATTN: halt if true
314 with m.Case(MicrOp.OP_ATTN):
315 m.d.sync += self.o.core_terminate_o.eq(1)
316
317 # fake NOP - this isn't really used (Issuer detects NOP)
318 with m.Case(MicrOp.OP_NOP):
319 sync += counter.eq(2)
320 comb += busy_o.eq(1)
321
322 with m.Default():
323 # connect up instructions. only one enabled at a time
324 for funame, fu in fus.items():
325 do = self.des[funame]
326 enable = fu_bitdict[funame]
327
328 # run this FunctionUnit if enabled
329 # route op, issue, busy, read flags and mask to FU
330 with m.If(enable):
331 # operand comes from the *local* decoder
332 comb += fu.oper_i.eq_from(do)
333 comb += fu.issue_i.eq(1) # issue when input valid
334 # rdmask, which is for registers, needs to come
335 # from the *main* decoder
336 rdmask = get_rdflags(self.ireg.e, fu)
337 comb += fu.rdmaskn.eq(~rdmask)
338
339 print ("core: overlap allowed", self.allow_overlap)
340 if not self.allow_overlap:
341 # for simple non-overlap, if any instruction is busy, set
342 # busy output for core.
343 busys = map(lambda fu: fu.busy_o, fus.values())
344 comb += busy_o.eq(Cat(*busys).bool())
345 else:
346 # for the overlap case, only set busy if an FU is not found,
347 # and an FU will not be found if the write hazards are blocked
348 comb += busy_o.eq(~fu_found | issue_conflict)
349
350 # ready/valid signalling. if busy, means refuse incoming issue.
351 # also, if there was no fu found we must not send back a valid
352 # indicator. BUT, of course, when there is no instruction
353 # we must ignore the fu_found flag, otherwise o_ready will never
354 # be set when everything is idle
355 comb += self.p.o_ready.eq(fu_found | ~self.p.i_valid)
356
357 # return both the function unit "enable" dict as well as the "busy".
358 # the "busy-or-issued" can be passed in to the Read/Write port
359 # connecters to give them permission to request access to regfiles
360 return fu_bitdict, fu_selected
361
362 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
363 comb, sync = m.d.comb, m.d.sync
364 fus = self.fus.fus
365 regs = self.regs
366
367 rpidx = regname
368
369 # select the required read port. these are pre-defined sizes
370 rfile = regs.rf[regfile.lower()]
371 rport = rfile.r_ports[rpidx]
372 print("read regfile", rpidx, regfile, regs.rf.keys(),
373 rfile, rfile.unary)
374
375 # for checking if the read port has an outstanding write
376 if self.make_hazard_vecs:
377 wv = regs.wv[regfile.lower()]
378 wvchk = wv.r_ports["issue"] # write-vec bit-level hazard check
379
380 fspecs = fspec
381 if not isinstance(fspecs, list):
382 fspecs = [fspecs]
383
384 rdflags = []
385 pplen = 0
386 reads = []
387 ppoffs = []
388 for i, fspec in enumerate(fspecs):
389 # get the regfile specs for this regfile port
390 (rf, wf, read, write, wid, fuspec) = fspec
391 print ("fpsec", i, fspec, len(fuspec))
392 ppoffs.append(pplen) # record offset for picker
393 pplen += len(fuspec)
394 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
395 rdflag = Signal(name=name, reset_less=True)
396 comb += rdflag.eq(rf)
397 rdflags.append(rdflag)
398 reads.append(read)
399
400 print ("pplen", pplen)
401
402 # create a priority picker to manage this port
403 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
404 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
405
406 rens = []
407 addrs = []
408 wvens = []
409
410 for i, fspec in enumerate(fspecs):
411 (rf, wf, read, write, wid, fuspec) = fspec
412 # connect up the FU req/go signals, and the reg-read to the FU
413 # and create a Read Broadcast Bus
414 for pi, (funame, fu, idx) in enumerate(fuspec):
415 pi += ppoffs[i]
416
417 # connect request-read to picker input, and output to go-rd
418 fu_active = fu_bitdict[funame]
419 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
420 addr_en = Signal.like(reads[i], name="addr_en_"+name)
421 pick = Signal(name="pick_"+name) # picker input
422 rp = Signal(name="rp_"+name) # picker output
423 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
424
425 # exclude any currently-enabled read-request (mask out active)
426 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
427 ~delay_pick)
428 comb += rdpick.i[pi].eq(pick)
429 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
430
431 # if picked, select read-port "reg select" number to port
432 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
433 sync += delay_pick.eq(rp) # delayed "pick"
434 comb += addr_en.eq(Mux(rp, reads[i], 0))
435
436 # the read-enable happens combinatorially (see mux-bus below)
437 # but it results in the data coming out on a one-cycle delay.
438 if rfile.unary:
439 rens.append(addr_en)
440 else:
441 addrs.append(addr_en)
442 rens.append(rp)
443
444 # use the *delayed* pick signal to put requested data onto bus
445 with m.If(delay_pick):
446 # connect regfile port to input, creating fan-out Bus
447 src = fu.src_i[idx]
448 print("reg connect widths",
449 regfile, regname, pi, funame,
450 src.shape(), rport.o_data.shape())
451 # all FUs connect to same port
452 comb += src.eq(rport.o_data)
453
454 if not self.make_hazard_vecs:
455 continue
456
457 # read the write-hazard bitvector (wv) for any bit that is
458 wvchk_en = Signal(len(wvchk.ren), name="wv_chk_addr_en_"+name)
459 issue_active = Signal(name="rd_iactive_"+name)
460 comb += issue_active.eq(fu.issue_i & rdflags[i])
461 with m.If(issue_active):
462 if rfile.unary:
463 comb += wvchk_en.eq(reads[i])
464 else:
465 comb += wvchk_en.eq(1<<reads[i])
466 wvens.append(wvchk_en)
467
468 # or-reduce the muxed read signals
469 if rfile.unary:
470 # for unary-addressed
471 comb += rport.ren.eq(ortreereduce_sig(rens))
472 else:
473 # for binary-addressed
474 comb += rport.addr.eq(ortreereduce_sig(addrs))
475 comb += rport.ren.eq(Cat(*rens).bool())
476 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
477
478 if not self.make_hazard_vecs:
479 return Const(0) # declare "no hazards"
480
481 # enable the read bitvectors for this issued instruction
482 # and return whether any write-hazard bit is set
483 comb += wvchk.ren.eq(ortreereduce_sig(wvens))
484 hazard_detected = Signal(name="raw_%s_%s" % (regfile, rpidx))
485 comb += hazard_detected.eq(wvchk.o_data.bool())
486 return hazard_detected
487
488 def connect_rdports(self, m, fu_bitdict):
489 """connect read ports
490
491 orders the read regspecs into a dict-of-dicts, by regfile, by
492 regport name, then connects all FUs that want that regport by
493 way of a PriorityPicker.
494 """
495 comb, sync = m.d.comb, m.d.sync
496 fus = self.fus.fus
497 regs = self.regs
498 rd_hazard = []
499
500 # dictionary of lists of regfile read ports
501 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
502
503 # okaay, now we need a PriorityPicker per regfile per regfile port
504 # loootta pickers... peter piper picked a pack of pickled peppers...
505 rdpickers = {}
506 for regfile, spec in byregfiles_rd.items():
507 fuspecs = byregfiles_rdspec[regfile]
508 rdpickers[regfile] = {}
509
510 # argh. an experiment to merge RA and RB in the INT regfile
511 # (we have too many read/write ports)
512 if self.regreduce_en:
513 if regfile == 'INT':
514 fuspecs['rabc'] = [fuspecs.pop('rb')]
515 fuspecs['rabc'].append(fuspecs.pop('rc'))
516 fuspecs['rabc'].append(fuspecs.pop('ra'))
517 if regfile == 'FAST':
518 fuspecs['fast1'] = [fuspecs.pop('fast1')]
519 if 'fast2' in fuspecs:
520 fuspecs['fast1'].append(fuspecs.pop('fast2'))
521 if 'fast3' in fuspecs:
522 fuspecs['fast1'].append(fuspecs.pop('fast3'))
523
524 # for each named regfile port, connect up all FUs to that port
525 # also return (and collate) hazard detection)
526 for (regname, fspec) in sort_fuspecs(fuspecs):
527 print("connect rd", regname, fspec)
528 rh = self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
529 regname, fspec)
530 rd_hazard.append(rh)
531
532 return Cat(*rd_hazard).bool()
533
534 def make_hazards(self, m, regfile, rfile, wvclr, wvset,
535 funame, regname, idx,
536 addr_en, wp, fu, fu_active, wrflag, write,
537 fu_wrok):
538 """make_hazards: a setter and a clearer for the regfile write ports
539
540 setter is at issue time (using PowerDecoder2 regfile write numbers)
541 clearer is at regfile write time (when FU has said what to write to)
542
543 there is *one* unusual case here which has to be dealt with:
544 when the Function Unit does *NOT* request a write to the regfile
545 (has its data.ok bit CLEARED). this is perfectly legitimate.
546 and a royal pain.
547 """
548 comb, sync = m.d.comb, m.d.sync
549 name = "%s_%s_%d" % (funame, regname, idx)
550
551 # connect up the bitvector write hazard. unlike the
552 # regfile writeports, a ONE must be written to the corresponding
553 # bit of the hazard bitvector (to indicate the existence of
554 # the hazard)
555
556 # the detection of what shall be written to is based
557 # on *issue*
558 print ("write vector (for regread)", regfile, wvset)
559 wviaddr_en = Signal(len(wvset.wen), name="wv_issue_addr_en_"+name)
560 issue_active = Signal(name="iactive_"+name)
561 comb += issue_active.eq(fu.issue_i & fu_active & wrflag)
562 with m.If(issue_active):
563 if rfile.unary:
564 comb += wviaddr_en.eq(write)
565 else:
566 comb += wviaddr_en.eq(1<<write)
567
568 # deal with write vector clear: this kicks in when the regfile
569 # is written to, and clears the corresponding bitvector entry
570 print ("write vector", regfile, wvclr)
571 wvaddr_en = Signal(len(wvclr.wen), name="wvaddr_en_"+name)
572 if rfile.unary:
573 comb += wvaddr_en.eq(addr_en)
574 else:
575 with m.If(wp):
576 comb += wvaddr_en.eq(1<<addr_en)
577
578 # XXX ASSUME that LDSTFunctionUnit always sets the data it intends to
579 # this may NOT be the case when an exception occurs
580 if isinstance(fu, LDSTFunctionUnit):
581 return wvaddr_en, wviaddr_en
582
583 # okaaay, this is preparation for the awkward case.
584 # * latch a copy of wrflag when issue goes high.
585 # * when the fu_wrok (data.ok) flag is NOT set,
586 # but the FU is done, the FU is NEVER going to write
587 # so the bitvector has to be cleared.
588 latch_wrflag = Signal(name="latch_wrflag_"+name)
589 with m.If(~fu.busy_o):
590 sync += latch_wrflag.eq(0)
591 with m.If(fu.issue_i & fu_active):
592 sync += latch_wrflag.eq(wrflag)
593 with m.If(fu.alu_done_o & latch_wrflag & ~fu_wrok):
594 if rfile.unary:
595 comb += wvaddr_en.eq(write) # addr_en gated with wp, don't use
596 else:
597 comb += wvaddr_en.eq(1<<addr_en) # binary addr_en not gated
598
599 return wvaddr_en, wviaddr_en
600
601 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
602 comb, sync = m.d.comb, m.d.sync
603 fus = self.fus.fus
604 regs = self.regs
605
606 rpidx = regname
607
608 # select the required write port. these are pre-defined sizes
609 rfile = regs.rf[regfile.lower()]
610 wport = rfile.w_ports[rpidx]
611
612 print("connect wr", regname, "unary", rfile.unary, fspec)
613 print(regfile, regs.rf.keys())
614
615 # select the write-protection hazard vector. note that this still
616 # requires to WRITE to the hazard bitvector! read-requests need
617 # to RAISE the bitvector (set it to 1), which, duh, requires a WRITE
618 if self.make_hazard_vecs:
619 wv = regs.wv[regfile.lower()]
620 wvset = wv.w_ports["set"] # write-vec bit-level hazard ctrl
621 wvclr = wv.w_ports["clr"] # write-vec bit-level hazard ctrl
622
623 fspecs = fspec
624 if not isinstance(fspecs, list):
625 fspecs = [fspecs]
626
627 pplen = 0
628 writes = []
629 ppoffs = []
630 rdflags = []
631 wrflags = []
632 for i, fspec in enumerate(fspecs):
633 # get the regfile specs for this regfile port
634 (rf, wf, read, write, wid, fuspec) = fspec
635 print ("fpsec", i, "wrflag", wf, fspec, len(fuspec))
636 ppoffs.append(pplen) # record offset for picker
637 pplen += len(fuspec)
638
639 name = "%s_%s_%d" % (regfile, regname, i)
640 rdflag = Signal(name="rd_flag_"+name)
641 wrflag = Signal(name="wr_flag_"+name)
642 if rf is not None:
643 comb += rdflag.eq(rf)
644 else:
645 comb += rdflag.eq(0)
646 if wf is not None:
647 comb += wrflag.eq(wf)
648 else:
649 comb += wrflag.eq(0)
650 rdflags.append(rdflag)
651 wrflags.append(wrflag)
652
653 # create a priority picker to manage this port
654 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
655 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
656
657 wsigs = []
658 wens = []
659 wvsets = []
660 wvseten = []
661 wvclren = []
662 addrs = []
663 for i, fspec in enumerate(fspecs):
664 # connect up the FU req/go signals and the reg-read to the FU
665 # these are arbitrated by Data.ok signals
666 (rf, wf, read, _write, wid, fuspec) = fspec
667 wrname = "write_%s_%s_%d" % (regfile, regname, i)
668 write = Signal.like(_write, name=wrname)
669 comb += write.eq(_write)
670 for pi, (funame, fu, idx) in enumerate(fuspec):
671 pi += ppoffs[i]
672
673 # write-request comes from dest.ok
674 dest = fu.get_out(idx)
675 fu_dest_latch = fu.get_fu_out(idx) # latched output
676 name = "fu_wrok_%s_%s_%d" % (funame, regname, idx)
677 fu_wrok = Signal(name=name, reset_less=True)
678 comb += fu_wrok.eq(dest.ok & fu.busy_o)
679
680 # connect request-write to picker input, and output to go-wr
681 fu_active = fu_bitdict[funame]
682 pick = fu.wr.rel_o[idx] & fu_active
683 comb += wrpick.i[pi].eq(pick)
684 # create a single-pulse go write from the picker output
685 wr_pick = Signal(name="wpick_%s_%s_%d" % (funame, regname, idx))
686 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
687 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
688
689 # connect the regspec write "reg select" number to this port
690 # only if one FU actually requests (and is granted) the port
691 # will the write-enable be activated
692 wname = "waddr_en_%s_%s_%d" % (funame, regname, idx)
693 addr_en = Signal.like(write, name=wname)
694 wp = Signal()
695 comb += wp.eq(wr_pick & wrpick.en_o)
696 comb += addr_en.eq(Mux(wp, write, 0))
697 if rfile.unary:
698 wens.append(addr_en)
699 else:
700 addrs.append(addr_en)
701 wens.append(wp)
702
703 # connect regfile port to input
704 print("reg connect widths",
705 regfile, regname, pi, funame,
706 dest.shape(), wport.i_data.shape())
707 wsigs.append(fu_dest_latch)
708
709 # now connect up the bitvector write hazard
710 if not self.make_hazard_vecs:
711 continue
712 res = self.make_hazards(m, regfile, rfile, wvclr, wvset,
713 funame, regname, idx,
714 addr_en, wp, fu, fu_active,
715 wrflags[i], write, fu_wrok)
716 wvaddr_en, wv_issue_en = res
717 wvclren.append(wvaddr_en) # set only: no data => clear bit
718 wvseten.append(wv_issue_en) # set data same as enable
719 wvsets.append(wv_issue_en) # because enable needs a 1
720
721 # here is where we create the Write Broadcast Bus. simple, eh?
722 comb += wport.i_data.eq(ortreereduce_sig(wsigs))
723 if rfile.unary:
724 # for unary-addressed
725 comb += wport.wen.eq(ortreereduce_sig(wens))
726 else:
727 # for binary-addressed
728 comb += wport.addr.eq(ortreereduce_sig(addrs))
729 comb += wport.wen.eq(ortreereduce_sig(wens))
730
731 if not self.make_hazard_vecs:
732 return
733
734 # for write-vectors
735 comb += wvclr.wen.eq(ortreereduce_sig(wvclren)) # clear (regfile write)
736 comb += wvset.wen.eq(ortreereduce_sig(wvseten)) # set (issue time)
737 comb += wvset.i_data.eq(ortreereduce_sig(wvsets))
738
739 def connect_wrports(self, m, fu_bitdict):
740 """connect write ports
741
742 orders the write regspecs into a dict-of-dicts, by regfile,
743 by regport name, then connects all FUs that want that regport
744 by way of a PriorityPicker.
745
746 note that the write-port wen, write-port data, and go_wr_i all need to
747 be on the exact same clock cycle. as there is a combinatorial loop bug
748 at the moment, these all use sync.
749 """
750 comb, sync = m.d.comb, m.d.sync
751 fus = self.fus.fus
752 regs = self.regs
753 # dictionary of lists of regfile write ports
754 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
755
756 # same for write ports.
757 # BLECH! complex code-duplication! BLECH!
758 wrpickers = {}
759 for regfile, spec in byregfiles_wr.items():
760 fuspecs = byregfiles_wrspec[regfile]
761 wrpickers[regfile] = {}
762
763 if self.regreduce_en:
764 # argh, more port-merging
765 if regfile == 'INT':
766 fuspecs['o'] = [fuspecs.pop('o')]
767 fuspecs['o'].append(fuspecs.pop('o1'))
768 if regfile == 'FAST':
769 fuspecs['fast1'] = [fuspecs.pop('fast1')]
770 if 'fast2' in fuspecs:
771 fuspecs['fast1'].append(fuspecs.pop('fast2'))
772 if 'fast3' in fuspecs:
773 fuspecs['fast1'].append(fuspecs.pop('fast3'))
774
775 for (regname, fspec) in sort_fuspecs(fuspecs):
776 self.connect_wrport(m, fu_bitdict, wrpickers,
777 regfile, regname, fspec)
778
779 def get_byregfiles(self, readmode):
780
781 mode = "read" if readmode else "write"
782 regs = self.regs
783 fus = self.fus.fus
784 e = self.ireg.e # decoded instruction to execute
785
786 # dictionary of lists of regfile ports
787 byregfiles = {}
788 byregfiles_spec = {}
789 for (funame, fu) in fus.items():
790 print("%s ports for %s" % (mode, funame))
791 for idx in range(fu.n_src if readmode else fu.n_dst):
792 if readmode:
793 (regfile, regname, wid) = fu.get_in_spec(idx)
794 else:
795 (regfile, regname, wid) = fu.get_out_spec(idx)
796 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
797 if readmode:
798 rdflag, read = regspec_decode_read(e, regfile, regname)
799 wrport, write = None, None
800 else:
801 rdflag, read = None, None
802 wrport, write = regspec_decode_write(e, regfile, regname)
803 if regfile not in byregfiles:
804 byregfiles[regfile] = {}
805 byregfiles_spec[regfile] = {}
806 if regname not in byregfiles_spec[regfile]:
807 byregfiles_spec[regfile][regname] = \
808 (rdflag, wrport, read, write, wid, [])
809 # here we start to create "lanes"
810 if idx not in byregfiles[regfile]:
811 byregfiles[regfile][idx] = []
812 fuspec = (funame, fu, idx)
813 byregfiles[regfile][idx].append(fuspec)
814 byregfiles_spec[regfile][regname][5].append(fuspec)
815
816 # ok just print that out, for convenience
817 for regfile, spec in byregfiles.items():
818 print("regfile %s ports:" % mode, regfile)
819 fuspecs = byregfiles_spec[regfile]
820 for regname, fspec in fuspecs.items():
821 [rdflag, wrflag, read, write, wid, fuspec] = fspec
822 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
823 print(" %s" % regname, wid, read, write, rdflag, wrflag)
824 for (funame, fu, idx) in fuspec:
825 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
826 print(" ", funame, fu.__class__.__name__, idx, fusig)
827 print()
828
829 return byregfiles, byregfiles_spec
830
831 def __iter__(self):
832 yield from self.fus.ports()
833 yield from self.i.e.ports()
834 yield from self.l0.ports()
835 # TODO: regs
836
837 def ports(self):
838 return list(self)
839
840
841 if __name__ == '__main__':
842 pspec = TestMemPspec(ldst_ifacetype='testpi',
843 imem_ifacetype='',
844 addr_wid=48,
845 mask_wid=8,
846 reg_wid=64)
847 dut = NonProductionCore(pspec)
848 vl = rtlil.convert(dut, ports=dut.ports())
849 with open("test_core.il", "w") as f:
850 f.write(vl)