format code
[nmutil.git] / src / nmutil / concurrentunit.py
1 """ concurrent unit from mitch alsup augmentations to 6600 scoreboard
2
3 This work is funded through NLnet under Grant 2019-02-012
4
5 License: LGPLv3+
6
7
8 * data fans in
9 * data goes through a pipeline
10 * results fan back out.
11
12 the output data format has to have a member "muxid", which is used
13 as the array index on fan-out
14
15 Associated bugreports:
16
17 * https://bugs.libre-soc.org/show_bug.cgi?id=538
18 """
19
20 from math import log
21 from nmigen import Module, Elaboratable, Signal, Cat
22 from nmigen.asserts import Assert
23 from nmigen.lib.coding import PriorityEncoder
24 from nmigen.cli import main, verilog
25
26 from nmutil.singlepipe import PassThroughStage
27 from nmutil.multipipe import CombMuxOutPipe
28 from nmutil.multipipe import PriorityCombMuxInPipe
29 from nmutil.iocontrol import NextControl, PrevControl
30 from nmutil import nmoperator
31
32
33 def num_bits(n):
34 return int(log(n) / log(2))
35
36
37 class PipeContext:
38
39 def __init__(self, pspec):
40 """ creates a pipeline context. currently: operator (op) and muxid
41
42 opkls (within pspec) - the class to create that will be the
43 "operator". instance must have an "eq"
44 function.
45 """
46 self.id_wid = pspec.id_wid
47 self.op_wid = pspec.op_wid
48 self.muxid = Signal(self.id_wid, reset_less=True) # RS multiplex ID
49 opkls = pspec.opkls
50 if opkls is None:
51 self.op = Signal(self.op_wid, reset_less=True)
52 else:
53 self.op = opkls(pspec)
54
55 def eq(self, i):
56 ret = [self.muxid.eq(i.muxid)]
57 ret.append(self.op.eq(i.op))
58 # don't forget to update matches if you add fields later.
59 return ret
60
61 def matches(self, another):
62 """
63 Returns a list of Assert()s validating that this context
64 matches the other context.
65 """
66 # I couldn't figure a clean way of overloading the == operator.
67 return [
68 Assert(self.muxid == another.muxid),
69 Assert(self.op == another.op),
70 ]
71
72 def __iter__(self):
73 yield self.muxid
74 yield self.op
75
76 def ports(self):
77 if hasattr(self.op, "ports"):
78 return [self.muxid] + self.op.ports()
79 else:
80 return list(self)
81
82
83 class InMuxPipe(PriorityCombMuxInPipe):
84 def __init__(self, num_rows, iospecfn, maskwid=0):
85 self.num_rows = num_rows
86 stage = PassThroughStage(iospecfn)
87 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows,
88 maskwid=maskwid)
89
90
91 class MuxOutPipe(CombMuxOutPipe):
92 def __init__(self, num_rows, iospecfn, maskwid=0):
93 self.num_rows = num_rows
94 stage = PassThroughStage(iospecfn)
95 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows,
96 maskwid=maskwid)
97
98
99 class ALUProxy:
100 """ALUProxy: create a series of ALUs that look like the ALU being
101 sandwiched in between the fan-in and fan-out. One ALU looks like
102 it is multiple concurrent ALUs
103 """
104
105 def __init__(self, alu, p, n):
106 self.alu = alu
107 self.p = p
108 self.n = n
109
110
111 class ReservationStations(Elaboratable):
112 """ Reservation-Station pipeline
113
114 Input: num_rows - number of input and output Reservation Stations
115
116 Requires: the addition of an "alu" object, from which ispec and ospec
117 are taken, and inpipe and outpipe are connected to it
118
119 * fan-in on inputs (an array of BaseData: a,b,mid)
120 * ALU pipeline
121 * fan-out on outputs (an array of FPPackData: z,mid)
122
123 Fan-in and Fan-out are combinatorial.
124 """
125
126 def __init__(self, num_rows, maskwid=0, feedback_width=None):
127 self.num_rows = nr = num_rows
128 self.feedback_width = feedback_width
129 self.inpipe = InMuxPipe(nr, self.i_specfn, maskwid) # fan-in
130 self.outpipe = MuxOutPipe(nr, self.o_specfn, maskwid) # fan-out
131
132 self.p = self.inpipe.p # kinda annoying,
133 self.n = self.outpipe.n # use pipe in/out as this class in/out
134 self._ports = self.inpipe.ports() + self.outpipe.ports()
135
136 def setup_pseudoalus(self):
137 """setup_pseudoalus: establishes a suite of pseudo-alus
138 that look to all pipeline-intents-and-purposes just like the original
139 """
140 self.pseudoalus = []
141 for i in range(self.num_rows):
142 self.pseudoalus.append(ALUProxy(self.alu, self.p[i], self.n[i]))
143
144 def elaborate(self, platform):
145 m = Module()
146 m.submodules.inpipe = self.inpipe
147 m.submodules.alu = self.alu
148 m.submodules.outpipe = self.outpipe
149
150 m.d.comb += self.inpipe.n.connect_to_next(self.alu.p)
151 m.d.comb += self.alu.connect_to_next(self.outpipe)
152
153 if self.feedback_width is None:
154 return m
155
156 # connect all outputs above the feedback width back to their inputs
157 # (hence, feedback). pipeline stages are then expected to *modify*
158 # the muxid (with care) in order to use the "upper numbered" RSes
159 # for storing partially-completed results. micro-coding, basically
160
161 for i in range(self.feedback_width, self.num_rows):
162 self.outpipe.n[i].connect_to_next(self.inpipe.p[i])
163
164 return m
165
166 def ports(self):
167 return self._ports
168
169 def i_specfn(self):
170 return self.alu.ispec()
171
172 def o_specfn(self):
173 return self.alu.ospec()
174
175
176 class ReservationStations2(Elaboratable):
177 """ Reservation-Station pipeline. Manages an ALU and makes it look like
178 there are multiple of them, presenting the same ready/valid API
179
180 Input:
181
182 :alu: - an ALU to be "managed" by these ReservationStations
183 :num_rows: - number of input and output Reservation Stations
184
185 Note that the ALU data (in and out specs) right the way down the
186 entire chain *must* have a "muxid" data member. this is picked
187 up and used to route data correctly from input RS to output RS.
188
189 It is the responsibility of the USER of the ReservationStations
190 class to correctly set that muxid in each data packet to the
191 correct constant. this could change in future.
192
193 FAILING TO SET THE MUXID IS GUARANTEED TO RESULT IN CORRUPTED DATA.
194 """
195
196 def __init__(self, alu, num_rows, alu_name=None):
197 if alu_name is None:
198 alu_name = "alu"
199 self.num_rows = nr = num_rows
200 id_wid = num_rows.bit_length()
201 self.p = []
202 self.n = []
203 self.alu = alu
204 self.alu_name = alu_name
205 # create prev and next ready/valid and add replica of ALU data specs
206 for i in range(num_rows):
207 suffix = "_%d" % i
208 p = PrevControl(name=suffix)
209 n = NextControl(name=suffix)
210 p.i_data, n.o_data = self.alu.new_specs("rs_%d" % i)
211 self.p.append(p)
212 self.n.append(n)
213
214 self.pipe = self # for Arbiter to select the incoming prevcontrols
215
216 # set up pseudo-alus that look like a standard pipeline
217 self.pseudoalus = []
218 for i in range(self.num_rows):
219 self.pseudoalus.append(ALUProxy(self.alu, self.p[i], self.n[i]))
220
221 def __iter__(self):
222 for p in self.p:
223 yield from p
224 for n in self.n:
225 yield from n
226
227 def ports(self):
228 return list(self)
229
230 def elaborate(self, platform):
231 m = Module()
232 pe = PriorityEncoder(self.num_rows) # input priority picker
233 m.submodules[self.alu_name] = self.alu
234 m.submodules.selector = pe
235 for i, (p, n) in enumerate(zip(self.p, self.n)):
236 m.submodules["rs_p_%d" % i] = p
237 m.submodules["rs_n_%d" % i] = n
238
239 # Priority picker for one RS
240 self.active = Signal()
241 self.m_id = Signal.like(pe.o)
242
243 # ReservationStation status information, progressively updated in FSM
244 rsvd = Signal(self.num_rows) # indicates RS data in flight
245 sent = Signal(self.num_rows) # sent indicates data in pipeline
246 wait = Signal(self.num_rows) # the outputs are waiting for accept
247
248 # pick first non-reserved ReservationStation with data not already
249 # sent into the ALU
250 m.d.comb += pe.i.eq(rsvd & ~sent)
251 m.d.comb += self.active.eq(~pe.n) # encoder active (one input valid)
252 m.d.comb += self.m_id.eq(pe.o) # output one active input
253
254 # mux in and mux out ids. note that all data *must* have a muxid
255 mid = self.m_id # input mux selector
256 o_muxid = self.alu.n.o_data.muxid # output mux selector
257
258 # technically speaking this could be set permanently "HI".
259 # when all the ReservationStations outputs are waiting,
260 # the ALU cannot obviously accept any more data. as the
261 # ALU is effectively "decoupled" from (managed by) the RSes,
262 # as long as there is sufficient RS allocation this should not
263 # be necessary, i.e. at no time should the ALU be given more inputs
264 # than there are outputs to accept (!) but just in case...
265 m.d.comb += self.alu.n.i_ready.eq(~wait.all())
266
267 #####
268 # input side
269 #####
270
271 # first, establish input: select one input to pass data to (p_mux)
272 for i in range(self.num_rows):
273 i_buf, o_buf = self.alu.new_specs("buf%d" % i) # buffers
274 with m.FSM():
275 # indicate ready to accept data, and accept it if incoming
276 # BUT, if there is an opportunity to send on immediately
277 # to the ALU, take it early (combinatorial)
278 with m.State("ACCEPTING%d" % i):
279 m.d.comb += self.p[i].o_ready.eq(1) # ready indicator
280 with m.If(self.p[i].i_valid): # valid data incoming
281 m.d.sync += rsvd[i].eq(1) # now reserved
282 # a unique opportunity: the ALU happens to be free
283 with m.If(mid == i): # picker selected us
284 with m.If(self.alu.p.o_ready): # ALU can accept
285 # transfer
286 m.d.comb += self.alu.p.i_valid.eq(1)
287 m.d.comb += nmoperator.eq(self.alu.p.i_data,
288 self.p[i].i_data)
289 m.d.sync += sent[i].eq(1) # now reserved
290 m.next = "WAITOUT%d" % i # move to "wait output"
291 with m.Else():
292 # nope. ALU wasn't free. try next cycle(s)
293 m.d.sync += nmoperator.eq(i_buf, self.p[i].i_data)
294 m.next = "ACCEPTED%d" % i # move to "accepted"
295
296 # now try to deliver to the ALU, but only if we are "picked"
297 with m.State("ACCEPTED%d" % i):
298 with m.If(mid == i): # picker selected us
299 with m.If(self.alu.p.o_ready): # ALU can accept
300 m.d.comb += self.alu.p.i_valid.eq(1) # transfer
301 m.d.comb += nmoperator.eq(self.alu.p.i_data, i_buf)
302 m.d.sync += sent[i].eq(1) # now reserved
303 m.next = "WAITOUT%d" % i # move to "wait output"
304
305 # waiting for output to appear on the ALU, take a copy
306 # BUT, again, if there is an opportunity to send on
307 # immediately, take it (combinatorial)
308 with m.State("WAITOUT%d" % i):
309 with m.If(o_muxid == i): # when ALU output matches our RS
310 with m.If(self.alu.n.o_valid): # ALU can accept
311 # second unique opportunity: the RS is ready
312 with m.If(self.n[i].i_ready): # ready to receive
313 m.d.comb += self.n[i].o_valid.eq(1) # valid
314 m.d.comb += nmoperator.eq(self.n[i].o_data,
315 self.alu.n.o_data)
316 m.d.sync += wait[i].eq(0) # clear waiting
317 m.d.sync += sent[i].eq(0) # and sending
318 m.d.sync += rsvd[i].eq(0) # and reserved
319 m.next = "ACCEPTING%d" % i # back to "accepting"
320 with m.Else():
321 # nope. RS wasn't ready. try next cycles
322 m.d.sync += wait[i].eq(1) # now waiting
323 m.d.sync += nmoperator.eq(o_buf,
324 self.alu.n.o_data)
325 m.next = "SENDON%d" % i # move to "send data on"
326
327 # waiting for "valid" indicator on RS output: deliver it
328 with m.State("SENDON%d" % i):
329 with m.If(self.n[i].i_ready): # user is ready to receive
330 m.d.comb += self.n[i].o_valid.eq(1) # indicate valid
331 m.d.comb += nmoperator.eq(self.n[i].o_data, o_buf)
332 m.d.sync += wait[i].eq(0) # clear waiting
333 m.d.sync += sent[i].eq(0) # and sending
334 m.d.sync += rsvd[i].eq(0) # and reserved
335 m.next = "ACCEPTING%d" % i # and back to "accepting"
336
337 return m