add abstract Stage classes, rename PipelineBase to ControlBase
[ieee754fpu.git] / src / add / example_buf_pipe.py
1 """ Pipeline and BufferedPipeline implementation, conforming to the same API.
2
3 eq:
4 --
5
6 a strategically very important function that is identical in function
7 to nmigen's Signal.eq function, except it may take objects, or a list
8 of objects, or a tuple of objects, and where objects may also be
9 Records.
10
11 Stage API:
12 ---------
13
14 stage requires compliance with a strict API that may be
15 implemented in several means, including as a static class.
16 the methods of a stage instance must be as follows:
17
18 * ispec() - Input data format specification
19 returns an object or a list or tuple of objects, or
20 a Record, each object having an "eq" function which
21 takes responsibility for copying by assignment all
22 sub-objects
23 * ospec() - Output data format specification
24 requirements as for ospec
25 * process(m, i) - Processes an ispec-formatted object
26 returns a combinatorial block of a result that
27 may be assigned to the output, by way of the "eq"
28 function
29 * setup(m, i) - Optional function for setting up submodules
30 may be used for more complex stages, to link
31 the input (i) to submodules. must take responsibility
32 for adding those submodules to the module (m).
33 the submodules must be combinatorial blocks and
34 must have their inputs and output linked combinatorially.
35
36 StageChain:
37 ----------
38
39 A useful combinatorial wrapper around stages that chains them together
40 and then presents a Stage-API-conformant interface.
41
42 UnbufferedPipeline:
43 ------------------
44
45 A simple stalling clock-synchronised pipeline that has no buffering
46 (unlike BufferedPipeline). A stall anywhere along the line will
47 result in a stall back-propagating down the entire chain.
48
49 The BufferedPipeline by contrast will buffer incoming data, allowing
50 previous stages one clock cycle's grace before also having to stall.
51
52 BufferedPipeline:
53 ----------------
54
55 nmigen implementation of buffered pipeline stage, based on zipcpu:
56 https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html
57
58 this module requires quite a bit of thought to understand how it works
59 (and why it is needed in the first place). reading the above is
60 *strongly* recommended.
61
62 unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires
63 the STB / ACK signals to raise and lower (on separate clocks) before
64 data may proceeed (thus only allowing one piece of data to proceed
65 on *ALTERNATE* cycles), the signalling here is a true pipeline
66 where data will flow on *every* clock when the conditions are right.
67
68 input acceptance conditions are when:
69 * incoming previous-stage strobe (p.i_valid) is HIGH
70 * outgoing previous-stage ready (p.o_ready) is LOW
71
72 output transmission conditions are when:
73 * outgoing next-stage strobe (n.o_valid) is HIGH
74 * outgoing next-stage ready (n.i_ready) is LOW
75
76 the tricky bit is when the input has valid data and the output is not
77 ready to accept it. if it wasn't for the clock synchronisation, it
78 would be possible to tell the input "hey don't send that data, we're
79 not ready". unfortunately, it's not possible to "change the past":
80 the previous stage *has no choice* but to pass on its data.
81
82 therefore, the incoming data *must* be accepted - and stored: that
83 is the responsibility / contract that this stage *must* accept.
84 on the same clock, it's possible to tell the input that it must
85 not send any more data. this is the "stall" condition.
86
87 we now effectively have *two* possible pieces of data to "choose" from:
88 the buffered data, and the incoming data. the decision as to which
89 to process and output is based on whether we are in "stall" or not.
90 i.e. when the next stage is no longer ready, the output comes from
91 the buffer if a stall had previously occurred, otherwise it comes
92 direct from processing the input.
93
94 this allows us to respect a synchronous "travelling STB" with what
95 dan calls a "buffered handshake".
96
97 it's quite a complex state machine!
98 """
99
100 from nmigen import Signal, Cat, Const, Mux, Module
101 from nmigen.cli import verilog, rtlil
102 from nmigen.hdl.rec import Record, Layout
103
104 from abc import ABCMeta, abstractmethod
105 from collections.abc import Sequence
106
107
108 class PrevControl:
109 """ contains signals that come *from* the previous stage (both in and out)
110 * i_valid: previous stage indicating all incoming data is valid.
111 may be a multi-bit signal, where all bits are required
112 to be asserted to indicate "valid".
113 * o_ready: output to next stage indicating readiness to accept data
114 * i_data : an input - added by the user of this class
115 """
116
117 def __init__(self, i_width=1):
118 self.i_valid = Signal(i_width, name="p_i_valid") # prev >>in self
119 self.o_ready = Signal(name="p_o_ready") # prev <<out self
120
121 def connect_in(self, prev):
122 """ helper function to connect stage to an input source. do not
123 use to connect stage-to-stage!
124 """
125 return [self.i_valid.eq(prev.i_valid),
126 prev.o_ready.eq(self.o_ready),
127 eq(self.i_data, prev.i_data),
128 ]
129
130 def i_valid_logic(self):
131 vlen = len(self.i_valid)
132 if vlen > 1: # multi-bit case: valid only when i_valid is all 1s
133 all1s = Const(-1, (len(self.i_valid), False))
134 return self.i_valid == all1s
135 # single-bit i_valid case
136 return self.i_valid
137
138
139 class NextControl:
140 """ contains the signals that go *to* the next stage (both in and out)
141 * o_valid: output indicating to next stage that data is valid
142 * i_ready: input from next stage indicating that it can accept data
143 * o_data : an output - added by the user of this class
144 """
145 def __init__(self):
146 self.o_valid = Signal(name="n_o_valid") # self out>> next
147 self.i_ready = Signal(name="n_i_ready") # self <<in next
148
149 def connect_to_next(self, nxt):
150 """ helper function to connect to the next stage data/valid/ready.
151 data/valid is passed *TO* nxt, and ready comes *IN* from nxt.
152 """
153 return [nxt.i_valid.eq(self.o_valid),
154 self.i_ready.eq(nxt.o_ready),
155 eq(nxt.i_data, self.o_data),
156 ]
157
158 def connect_out(self, nxt):
159 """ helper function to connect stage to an output source. do not
160 use to connect stage-to-stage!
161 """
162 return [nxt.o_valid.eq(self.o_valid),
163 self.i_ready.eq(nxt.i_ready),
164 eq(nxt.o_data, self.o_data),
165 ]
166
167
168 def eq(o, i):
169 """ makes signals equal: a helper routine which identifies if it is being
170 passed a list (or tuple) of objects, or signals, or Records, and calls
171 the objects' eq function.
172
173 complex objects (classes) can be used: they must follow the
174 convention of having an eq member function, which takes the
175 responsibility of further calling eq and returning a list of
176 eq assignments
177
178 Record is a special (unusual, recursive) case, where the input may be
179 specified as a dictionary (which may contain further dictionaries,
180 recursively), where the field names of the dictionary must match
181 the Record's field spec. Alternatively, an object with the same
182 member names as the Record may be assigned: it does not have to
183 *be* a Record.
184 """
185 if not isinstance(o, Sequence):
186 o, i = [o], [i]
187 res = []
188 for (ao, ai) in zip(o, i):
189 #print ("eq", ao, ai)
190 if isinstance(ao, Record):
191 for idx, (field_name, field_shape, _) in enumerate(ao.layout):
192 if isinstance(field_shape, Layout):
193 val = ai.fields
194 else:
195 val = ai
196 if hasattr(val, field_name): # check for attribute
197 val = getattr(val, field_name)
198 else:
199 val = val[field_name] # dictionary-style specification
200 rres = eq(ao.fields[field_name], val)
201 res += rres
202 else:
203 rres = ao.eq(ai)
204 if not isinstance(rres, Sequence):
205 rres = [rres]
206 res += rres
207 return res
208
209
210 class StageCls(metaclass=ABCMeta):
211 """ Class-based "Stage" API. requires instantiation (after derivation)
212 see "Stage API" above.
213 """
214 @abstractmethod
215 def ispec(self): pass # REQUIRED
216 @abstractmethod
217 def ospec(self): pass # REQUIRED
218 #@abstractmethod
219 #def setup(self, m, i): pass # OPTIONAL
220 @abstractmethod
221 def process(self, i): pass # REQUIRED
222
223
224 class Stage(metaclass=ABCMeta):
225 """ Static "Stage" API. does not require instantiation (after derivation)
226 see "Stage API" above
227 """
228 @staticmethod
229 @abstractmethod
230 def ispec(): pass
231
232 @staticmethod
233 @abstractmethod
234 def ospec(): pass
235
236 #@staticmethod
237 #@abstractmethod
238 #def setup(m, i): pass
239
240 @staticmethod
241 @abstractmethod
242 def process(i): pass
243
244
245 class StageChain(StageCls):
246 """ pass in a list of stages, and they will automatically be
247 chained together via their input and output specs into a
248 combinatorial chain.
249
250 * input to this class will be the input of the first stage
251 * output of first stage goes into input of second
252 * output of second goes into input into third (etc. etc.)
253 * the output of this class will be the output of the last stage
254 """
255 def __init__(self, chain):
256 self.chain = chain
257
258 def ispec(self):
259 return self.chain[0].ispec()
260
261 def ospec(self):
262 return self.chain[-1].ospec()
263
264 def setup(self, m, i):
265 for (idx, c) in enumerate(self.chain):
266 if hasattr(c, "setup"):
267 c.setup(m, i) # stage may have some module stuff
268 o = self.chain[idx].ospec() # only the last assignment survives
269 m.d.comb += eq(o, c.process(i)) # process input into "o"
270 if idx != len(self.chain)-1:
271 ni = self.chain[idx+1].ispec() # becomes new input on next loop
272 m.d.comb += eq(ni, o) # assign output to next input
273 i = ni
274 self.o = o # last loop is the output
275
276 def process(self, i):
277 return self.o
278
279
280 class ControlBase:
281 """ Common functions for Pipeline API
282 """
283 def __init__(self, stage=None, in_multi=None):
284 """ pass in a "stage" which may be either a static class or a class
285 instance, which has four functions (one optional):
286 * ispec: returns input signals according to the input specification
287 * ispec: returns output signals to the output specification
288 * process: takes an input instance and returns processed data
289 * setup: performs any module linkage if the stage uses one.
290
291 User must also:
292 * add i_data member to PrevControl and
293 * add o_data member to NextControl
294 """
295 self.stage = stage
296
297 # set up input and output IO ACK (prev/next ready/valid)
298 self.p = PrevControl(in_multi)
299 self.n = NextControl()
300
301 def connect_to_next(self, nxt):
302 """ helper function to connect to the next stage data/valid/ready.
303 """
304 return self.n.connect_to_next(nxt.p)
305
306 def connect_in(self, prev):
307 """ helper function to connect stage to an input source. do not
308 use to connect stage-to-stage!
309 """
310 return self.p.connect_in(prev.p)
311
312 def connect_out(self, nxt):
313 """ helper function to connect stage to an output source. do not
314 use to connect stage-to-stage!
315 """
316 return self.n.connect_out(nxt.n)
317
318 def set_input(self, i):
319 """ helper function to set the input data
320 """
321 return eq(self.p.i_data, i)
322
323 def ports(self):
324 return [self.p.i_valid, self.n.i_ready,
325 self.n.o_valid, self.p.o_ready,
326 self.p.i_data, self.n.o_data # XXX need flattening!
327 ]
328
329
330 class BufferedPipeline(ControlBase):
331 """ buffered pipeline stage. data and strobe signals travel in sync.
332 if ever the input is ready and the output is not, processed data
333 is stored in a temporary register.
334
335 Argument: stage. see Stage API above
336
337 stage-1 p.i_valid >>in stage n.o_valid out>> stage+1
338 stage-1 p.o_ready <<out stage n.i_ready <<in stage+1
339 stage-1 p.i_data >>in stage n.o_data out>> stage+1
340 | |
341 process --->----^
342 | |
343 +-- r_data ->-+
344
345 input data p.i_data is read (only), is processed and goes into an
346 intermediate result store [process()]. this is updated combinatorially.
347
348 in a non-stall condition, the intermediate result will go into the
349 output (update_output). however if ever there is a stall, it goes
350 into r_data instead [update_buffer()].
351
352 when the non-stall condition is released, r_data is the first
353 to be transferred to the output [flush_buffer()], and the stall
354 condition cleared.
355
356 on the next cycle (as long as stall is not raised again) the
357 input may begin to be processed and transferred directly to output.
358
359 """
360 def __init__(self, stage):
361 ControlBase.__init__(self)
362 self.stage = stage
363
364 # set up the input and output data
365 self.p.i_data = stage.ispec() # input type
366 self.n.o_data = stage.ospec()
367
368 def elaborate(self, platform):
369 m = Module()
370
371 result = self.stage.ospec()
372 r_data = self.stage.ospec()
373 if hasattr(self.stage, "setup"):
374 self.stage.setup(m, self.p.i_data)
375
376 # establish some combinatorial temporaries
377 o_n_validn = Signal(reset_less=True)
378 i_p_valid_o_p_ready = Signal(reset_less=True)
379 p_i_valid = Signal(reset_less=True)
380 m.d.comb += [p_i_valid.eq(self.p.i_valid_logic()),
381 o_n_validn.eq(~self.n.o_valid),
382 i_p_valid_o_p_ready.eq(p_i_valid & self.p.o_ready),
383 ]
384
385 # store result of processing in combinatorial temporary
386 m.d.comb += eq(result, self.stage.process(self.p.i_data))
387
388 # if not in stall condition, update the temporary register
389 with m.If(self.p.o_ready): # not stalled
390 m.d.sync += eq(r_data, result) # update buffer
391
392 with m.If(self.n.i_ready): # next stage is ready
393 with m.If(self.p.o_ready): # not stalled
394 # nothing in buffer: send (processed) input direct to output
395 m.d.sync += [self.n.o_valid.eq(p_i_valid),
396 eq(self.n.o_data, result), # update output
397 ]
398 with m.Else(): # p.o_ready is false, and something is in buffer.
399 # Flush the [already processed] buffer to the output port.
400 m.d.sync += [self.n.o_valid.eq(1), # declare reg empty
401 eq(self.n.o_data, r_data), # flush buffer
402 self.p.o_ready.eq(1), # clear stall condition
403 ]
404 # ignore input, since p.o_ready is also false.
405
406 # (n.i_ready) is false here: next stage is ready
407 with m.Elif(o_n_validn): # next stage being told "ready"
408 m.d.sync += [self.n.o_valid.eq(p_i_valid),
409 self.p.o_ready.eq(1), # Keep the buffer empty
410 eq(self.n.o_data, result), # set output data
411 ]
412
413 # (n.i_ready) false and (n.o_valid) true:
414 with m.Elif(i_p_valid_o_p_ready):
415 # If next stage *is* ready, and not stalled yet, accept input
416 m.d.sync += self.p.o_ready.eq(~(p_i_valid & self.n.o_valid))
417
418 return m
419
420
421 class ExampleAddStage(StageCls):
422 """ an example of how to use the buffered pipeline, as a class instance
423 """
424
425 def ispec(self):
426 """ returns a tuple of input signals which will be the incoming data
427 """
428 return (Signal(16), Signal(16))
429
430 def ospec(self):
431 """ returns an output signal which will happen to contain the sum
432 of the two inputs
433 """
434 return Signal(16)
435
436 def process(self, i):
437 """ process the input data (sums the values in the tuple) and returns it
438 """
439 return i[0] + i[1]
440
441
442 class ExampleBufPipeAdd(BufferedPipeline):
443 """ an example of how to use the buffered pipeline, using a class instance
444 """
445
446 def __init__(self):
447 addstage = ExampleAddStage()
448 BufferedPipeline.__init__(self, addstage)
449
450
451 class ExampleStage(Stage):
452 """ an example of how to use the buffered pipeline, in a static class
453 fashion
454 """
455
456 def ispec():
457 return Signal(16, name="example_input_signal")
458
459 def ospec():
460 return Signal(16, name="example_output_signal")
461
462 def process(i):
463 """ process the input data and returns it (adds 1)
464 """
465 return i + 1
466
467
468 class ExampleStageCls(StageCls):
469 """ an example of how to use the buffered pipeline, in a static class
470 fashion
471 """
472
473 def ispec(self):
474 return Signal(16, name="example_input_signal")
475
476 def ospec(self):
477 return Signal(16, name="example_output_signal")
478
479 def process(self, i):
480 """ process the input data and returns it (adds 1)
481 """
482 return i + 1
483
484
485 class ExampleBufPipe(BufferedPipeline):
486 """ an example of how to use the buffered pipeline.
487 """
488
489 def __init__(self):
490 BufferedPipeline.__init__(self, ExampleStage)
491
492
493 class UnbufferedPipeline(ControlBase):
494 """ A simple pipeline stage with single-clock synchronisation
495 and two-way valid/ready synchronised signalling.
496
497 Note that a stall in one stage will result in the entire pipeline
498 chain stalling.
499
500 Also that unlike BufferedPipeline, the valid/ready signalling does NOT
501 travel synchronously with the data: the valid/ready signalling
502 combines in a *combinatorial* fashion. Therefore, a long pipeline
503 chain will lengthen propagation delays.
504
505 Argument: stage. see Stage API, above
506
507 stage-1 p.i_valid >>in stage n.o_valid out>> stage+1
508 stage-1 p.o_ready <<out stage n.i_ready <<in stage+1
509 stage-1 p.i_data >>in stage n.o_data out>> stage+1
510 | |
511 r_data result
512 | |
513 +--process ->-+
514
515 Attributes:
516 -----------
517 p.i_data : StageInput, shaped according to ispec
518 The pipeline input
519 p.o_data : StageOutput, shaped according to ospec
520 The pipeline output
521 r_data : input_shape according to ispec
522 A temporary (buffered) copy of a prior (valid) input.
523 This is HELD if the output is not ready. It is updated
524 SYNCHRONOUSLY.
525 result: output_shape according to ospec
526 The output of the combinatorial logic. it is updated
527 COMBINATORIALLY (no clock dependence).
528 """
529
530 def __init__(self, stage):
531 ControlBase.__init__(self)
532 self.stage = stage
533 self._data_valid = Signal()
534
535 # set up the input and output data
536 self.p.i_data = stage.ispec() # input type
537 self.n.o_data = stage.ospec() # output type
538
539 def elaborate(self, platform):
540 m = Module()
541
542 r_data = self.stage.ispec() # input type
543 result = self.stage.ospec() # output data
544 if hasattr(self.stage, "setup"):
545 self.stage.setup(m, r_data)
546
547 p_i_valid = Signal(reset_less=True)
548 m.d.comb += p_i_valid.eq(self.p.i_valid_logic())
549 m.d.comb += eq(result, self.stage.process(r_data))
550 m.d.comb += self.n.o_valid.eq(self._data_valid)
551 m.d.comb += self.p.o_ready.eq(~self._data_valid | self.n.i_ready)
552 m.d.sync += self._data_valid.eq(p_i_valid | \
553 (~self.n.i_ready & self._data_valid))
554 with m.If(self.p.i_valid & self.p.o_ready):
555 m.d.sync += eq(r_data, self.p.i_data)
556 m.d.comb += eq(self.n.o_data, result)
557 return m
558
559
560 class ExamplePipeline(UnbufferedPipeline):
561 """ an example of how to use the combinatorial pipeline.
562 """
563
564 def __init__(self):
565 UnbufferedPipeline.__init__(self, ExampleStage)
566
567
568 if __name__ == '__main__':
569 dut = ExampleBufPipe()
570 vl = rtlil.convert(dut, ports=dut.ports())
571 with open("test_bufpipe.il", "w") as f:
572 f.write(vl)
573
574 dut = ExamplePipeline()
575 vl = rtlil.convert(dut, ports=dut.ports())
576 with open("test_combpipe.il", "w") as f:
577 f.write(vl)