3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
23 from nmigen
.cli
import rtlil
25 from nmutil
.picker
import PriorityPicker
26 from nmutil
.util
import treereduce
28 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
29 from soc
.regfile
.regfiles
import RegFiles
30 from soc
.decoder
.power_decoder
import create_pdecode
31 from soc
.decoder
.power_decoder2
import PowerDecode2
32 from soc
.decoder
.decode2execute1
import Data
33 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
34 from soc
.config
.test
.test_loadstore
import TestMemPspec
35 from soc
.decoder
.power_enums
import InternalOp
39 # helper function for reducing a list of signals down to a parallel
41 def ortreereduce(tree
, attr
="data_o"):
42 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
44 def ortreereduce_sig(tree
):
45 return treereduce(tree
, operator
.or_
, lambda x
: x
)
48 # helper function to place full regs declarations first
49 def sort_fuspecs(fuspecs
):
51 for (regname
, fspec
) in fuspecs
.items():
52 if regname
.startswith("full"):
53 res
.append((regname
, fspec
))
54 for (regname
, fspec
) in fuspecs
.items():
55 if not regname
.startswith("full"):
56 res
.append((regname
, fspec
))
57 return res
# enumerate(res)
60 class NonProductionCore(Elaboratable
):
61 def __init__(self
, pspec
):
62 # single LD/ST funnel for memory access
63 self
.l0
= TstL0CacheBuffer(pspec
, n_units
=1)
64 pi
= self
.l0
.l0
.dports
[0]
66 # function units (only one each)
67 self
.fus
= AllFunctionUnits(pspec
, pilist
=[pi
])
69 # register files (yes plural)
70 self
.regs
= RegFiles()
73 pdecode
= create_pdecode()
74 self
.pdecode2
= PowerDecode2(pdecode
) # instruction decoder
76 # issue/valid/busy signalling
77 self
.ivalid_i
= self
.pdecode2
.valid
# instruction is valid
78 self
.issue_i
= Signal(reset_less
=True)
79 self
.busy_o
= Signal(name
="corebusy_o", reset_less
=True)
82 self
.bigendian_i
= self
.pdecode2
.dec
.bigendian
83 self
.raw_opcode_i
= self
.pdecode2
.dec
.raw_opcode_in
85 # start/stop and terminated signalling
86 self
.core_start_i
= Signal(reset_less
=True)
87 self
.core_stop_i
= Signal(reset_less
=True)
88 self
.core_terminated_o
= Signal(reset
=1) # indicates stopped
90 def elaborate(self
, platform
):
93 m
.submodules
.pdecode2
= dec2
= self
.pdecode2
94 m
.submodules
.fus
= self
.fus
95 m
.submodules
.l0
= l0
= self
.l0
96 self
.regs
.elaborate_into(m
, platform
)
100 # core start/stopped state
101 core_stopped
= Signal(reset
=1) # begins in stopped state
103 # start/stop signalling
104 with m
.If(self
.core_start_i
):
105 m
.d
.sync
+= core_stopped
.eq(0)
106 with m
.If(self
.core_stop_i
):
107 m
.d
.sync
+= core_stopped
.eq(1)
108 m
.d
.comb
+= self
.core_terminated_o
.eq(core_stopped
)
110 # connect up Function Units, then read/write ports
111 fu_bitdict
= self
.connect_instruction(m
, core_stopped
)
112 self
.connect_rdports(m
, fu_bitdict
)
113 self
.connect_wrports(m
, fu_bitdict
)
117 def connect_instruction(self
, m
, core_stopped
):
118 comb
, sync
= m
.d
.comb
, m
.d
.sync
122 # enable-signals for each FU, get one bit for each FU (by name)
123 fu_enable
= Signal(len(fus
), reset_less
=True)
125 for i
, funame
in enumerate(fus
.keys()):
126 fu_bitdict
[funame
] = fu_enable
[i
]
127 # only run when allowed and when instruction is valid
128 can_run
= Signal(reset_less
=True)
129 comb
+= can_run
.eq(self
.ivalid_i
& ~core_stopped
)
131 # enable the required Function Unit based on the opcode decode
132 # note: this *only* works correctly for simple core when one and
133 # *only* one FU is allocated per instruction
134 for funame
, fu
in fus
.items():
135 fnunit
= fu
.fnunit
.value
136 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
137 comb
+= enable
.eq((dec2
.e
.do
.fn_unit
& fnunit
).bool() & can_run
)
138 comb
+= fu_bitdict
[funame
].eq(enable
)
140 # sigh - need a NOP counter
142 with m
.If(counter
!= 0):
143 sync
+= counter
.eq(counter
- 1)
144 comb
+= self
.busy_o
.eq(counter
!= 0)
146 # check for ATTN: halt if true
147 with m
.If(self
.ivalid_i
& (dec2
.e
.do
.insn_type
== InternalOp
.OP_ATTN
)):
148 m
.d
.sync
+= core_stopped
.eq(1)
150 with m
.Elif(can_run
& (dec2
.e
.do
.insn_type
== InternalOp
.OP_NOP
)):
151 sync
+= counter
.eq(2)
152 comb
+= self
.busy_o
.eq(1)
155 # connect up instructions. only one is enabled at any given time
156 for funame
, fu
in fus
.items():
157 enable
= fu_bitdict
[funame
]
159 # run this FunctionUnit if enabled, except if the instruction
160 # is "attn" in which case we HALT.
162 # route operand, issue, busy, read flags and mask to FU
163 comb
+= fu
.oper_i
.eq_from_execute1(dec2
.e
)
164 comb
+= fu
.issue_i
.eq(self
.issue_i
)
165 comb
+= self
.busy_o
.eq(fu
.busy_o
)
166 rdmask
= dec2
.rdflags(fu
)
167 comb
+= fu
.rdmaskn
.eq(~rdmask
)
171 def connect_rdports(self
, m
, fu_bitdict
):
172 """connect read ports
174 orders the read regspecs into a dict-of-dicts, by regfile, by
175 regport name, then connects all FUs that want that regport by
176 way of a PriorityPicker.
178 comb
, sync
= m
.d
.comb
, m
.d
.sync
182 # dictionary of lists of regfile read ports
183 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
185 # okaay, now we need a PriorityPicker per regfile per regfile port
186 # loootta pickers... peter piper picked a pack of pickled peppers...
188 for regfile
, spec
in byregfiles_rd
.items():
189 fuspecs
= byregfiles_rdspec
[regfile
]
190 rdpickers
[regfile
] = {}
192 # for each named regfile port, connect up all FUs to that port
193 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
194 print ("connect rd", regname
, fspec
)
196 # get the regfile specs for this regfile port
197 (rf
, read
, write
, wid
, fuspec
) = fspec
198 name
= "rdflag_%s_%s" % (regfile
, regname
)
199 rdflag
= Signal(name
=name
, reset_less
=True)
200 comb
+= rdflag
.eq(rf
)
202 # select the required read port. these are pre-defined sizes
203 print (rpidx
, regfile
, regs
.rf
.keys())
204 rport
= regs
.rf
[regfile
.lower()].r_ports
[rpidx
]
206 # create a priority picker to manage this port
207 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(len(fuspec
))
208 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
210 # connect the regspec "reg select" number to this port
211 with m
.If(rdpick
.en_o
):
212 comb
+= rport
.ren
.eq(read
)
214 # connect up the FU req/go signals, and the reg-read to the FU
215 # and create a Read Broadcast Bus
216 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
219 # connect request-read to picker input, and output to go-rd
220 fu_active
= fu_bitdict
[funame
]
221 pick
= fu
.rd_rel_o
[idx
] & fu_active
& rdflag
222 comb
+= rdpick
.i
[pi
].eq(pick
)
223 comb
+= fu
.go_rd_i
[idx
].eq(rdpick
.o
[pi
])
225 # connect regfile port to input, creating a Broadcast Bus
226 print ("reg connect widths",
227 regfile
, regname
, pi
, funame
,
228 src
.shape(), rport
.data_o
.shape())
229 comb
+= src
.eq(rport
.data_o
) # all FUs connect to same port
231 def connect_wrports(self
, m
, fu_bitdict
):
232 """connect write ports
234 orders the write regspecs into a dict-of-dicts, by regfile,
235 by regport name, then connects all FUs that want that regport
236 by way of a PriorityPicker.
238 note that the write-port wen, write-port data, and go_wr_i all need to
239 be on the exact same clock cycle. as there is a combinatorial loop bug
240 at the moment, these all use sync.
242 comb
, sync
= m
.d
.comb
, m
.d
.sync
245 # dictionary of lists of regfile write ports
246 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
248 # same for write ports.
249 # BLECH! complex code-duplication! BLECH!
251 for regfile
, spec
in byregfiles_wr
.items():
252 fuspecs
= byregfiles_wrspec
[regfile
]
253 wrpickers
[regfile
] = {}
254 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
255 print ("connect wr", regname
, fspec
)
257 # get the regfile specs for this regfile port
258 (rf
, read
, write
, wid
, fuspec
) = fspec
260 # select the required write port. these are pre-defined sizes
261 print (regfile
, regs
.rf
.keys())
262 wport
= regs
.rf
[regfile
.lower()].w_ports
[rpidx
]
264 # create a priority picker to manage this port
265 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(len(fuspec
))
266 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
268 # connect the regspec write "reg select" number to this port
269 # only if one FU actually requests (and is granted) the port
270 # will the write-enable be activated
271 with m
.If(wrpick
.en_o
):
272 comb
+= wport
.wen
.eq(write
)
274 comb
+= wport
.wen
.eq(0)
276 # connect up the FU req/go signals and the reg-read to the FU
277 # these are arbitrated by Data.ok signals
279 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
280 # write-request comes from dest.ok
281 dest
= fu
.get_out(idx
)
282 fu_dest_latch
= fu
.get_fu_out(idx
) # latched output
283 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
284 wrflag
= Signal(name
=name
, reset_less
=True)
285 comb
+= wrflag
.eq(dest
.ok
& fu
.busy_o
)
287 # connect request-read to picker input, and output to go-wr
288 fu_active
= fu_bitdict
[funame
]
289 pick
= fu
.wr
.rel
[idx
] & fu_active
#& wrflag
290 comb
+= wrpick
.i
[pi
].eq(pick
)
291 comb
+= fu
.go_wr_i
[idx
].eq(wrpick
.o
[pi
] & wrpick
.en_o
)
292 # connect regfile port to input
293 print ("reg connect widths",
294 regfile
, regname
, pi
, funame
,
295 dest
.shape(), wport
.data_i
.shape())
296 wsigs
.append(fu_dest_latch
)
298 # here is where we create the Write Broadcast Bus. simple, eh?
299 comb
+= wport
.data_i
.eq(ortreereduce_sig(wsigs
))
301 def get_byregfiles(self
, readmode
):
303 mode
= "read" if readmode
else "write"
308 # dictionary of lists of regfile ports
311 for (funame
, fu
) in fus
.items():
312 print ("%s ports for %s" % (mode
, funame
))
313 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
315 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
317 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
318 print (" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
320 rdflag
, read
= dec2
.regspecmap_read(regfile
, regname
)
323 rdflag
, read
= None, None
324 wrport
, write
= dec2
.regspecmap_write(regfile
, regname
)
325 if regfile
not in byregfiles
:
326 byregfiles
[regfile
] = {}
327 byregfiles_spec
[regfile
] = {}
328 if regname
not in byregfiles_spec
[regfile
]:
329 byregfiles_spec
[regfile
][regname
] = \
330 [rdflag
, read
, write
, wid
, []]
331 # here we start to create "lanes"
332 if idx
not in byregfiles
[regfile
]:
333 byregfiles
[regfile
][idx
] = []
334 fuspec
= (funame
, fu
, idx
)
335 byregfiles
[regfile
][idx
].append(fuspec
)
336 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
338 # ok just print that out, for convenience
339 for regfile
, spec
in byregfiles
.items():
340 print ("regfile %s ports:" % mode
, regfile
)
341 fuspecs
= byregfiles_spec
[regfile
]
342 for regname
, fspec
in fuspecs
.items():
343 [rdflag
, read
, write
, wid
, fuspec
] = fspec
344 print (" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
345 print (" %s" % regname
, wid
, read
, write
, rdflag
)
346 for (funame
, fu
, idx
) in fuspec
:
347 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
348 print (" ", funame
, fu
, idx
, fusig
)
351 return byregfiles
, byregfiles_spec
354 yield from self
.fus
.ports()
355 yield from self
.pdecode2
.ports()
356 yield from self
.l0
.ports()
363 if __name__
== '__main__':
364 pspec
= TestMemPspec(ldst_ifacetype
='testpi',
369 dut
= NonProductionCore(pspec
)
370 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
371 with
open("test_core.il", "w") as f
: