1 """ Pipeline and BufferedPipeline implementation, conforming to the same API.
6 a strategically very important function that is identical in function
7 to nmigen's Signal.eq function, except it may take objects, or a list
8 of objects, or a tuple of objects, and where objects may also be
14 stage requires compliance with a strict API that may be
15 implemented in several means, including as a static class.
16 the methods of a stage instance must be as follows:
18 * ispec() - Input data format specification
19 returns an object or a list or tuple of objects, or
20 a Record, each object having an "eq" function which
21 takes responsibility for copying by assignment all
23 * ospec() - Output data format specification
24 requirements as for ospec
25 * process(m, i) - Processes an ispec-formatted object
26 returns a combinatorial block of a result that
27 may be assigned to the output, by way of the "eq"
29 * setup(m, i) - Optional function for setting up submodules
30 may be used for more complex stages, to link
31 the input (i) to submodules. must take responsibility
32 for adding those submodules to the module (m).
33 the submodules must be combinatorial blocks and
34 must have their inputs and output linked combinatorially.
39 A useful combinatorial wrapper around stages that chains them together
40 and then presents a Stage-API-conformant interface.
45 A simple stalling clock-synchronised pipeline that has no buffering
46 (unlike BufferedPipeline). A stall anywhere along the line will
47 result in a stall back-propagating down the entire chain.
49 The BufferedPipeline by contrast will buffer incoming data, allowing
50 previous stages one clock cycle's grace before also having to stall.
55 nmigen implementation of buffered pipeline stage, based on zipcpu:
56 https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html
58 this module requires quite a bit of thought to understand how it works
59 (and why it is needed in the first place). reading the above is
60 *strongly* recommended.
62 unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires
63 the STB / ACK signals to raise and lower (on separate clocks) before
64 data may proceeed (thus only allowing one piece of data to proceed
65 on *ALTERNATE* cycles), the signalling here is a true pipeline
66 where data will flow on *every* clock when the conditions are right.
68 input acceptance conditions are when:
69 * incoming previous-stage strobe (p.i_valid) is HIGH
70 * outgoing previous-stage ready (p.o_ready) is LOW
72 output transmission conditions are when:
73 * outgoing next-stage strobe (n.o_valid) is HIGH
74 * outgoing next-stage ready (n.i_ready) is LOW
76 the tricky bit is when the input has valid data and the output is not
77 ready to accept it. if it wasn't for the clock synchronisation, it
78 would be possible to tell the input "hey don't send that data, we're
79 not ready". unfortunately, it's not possible to "change the past":
80 the previous stage *has no choice* but to pass on its data.
82 therefore, the incoming data *must* be accepted - and stored: that
83 is the responsibility / contract that this stage *must* accept.
84 on the same clock, it's possible to tell the input that it must
85 not send any more data. this is the "stall" condition.
87 we now effectively have *two* possible pieces of data to "choose" from:
88 the buffered data, and the incoming data. the decision as to which
89 to process and output is based on whether we are in "stall" or not.
90 i.e. when the next stage is no longer ready, the output comes from
91 the buffer if a stall had previously occurred, otherwise it comes
92 direct from processing the input.
94 this allows us to respect a synchronous "travelling STB" with what
95 dan calls a "buffered handshake".
97 it's quite a complex state machine!
100 from nmigen
import Signal
, Cat
, Const
, Mux
, Module
101 from nmigen
.cli
import verilog
, rtlil
102 from nmigen
.hdl
.rec
import Record
, Layout
104 from abc
import ABCMeta
, abstractmethod
105 from collections
.abc
import Sequence
109 """ contains signals that come *from* the previous stage (both in and out)
110 * i_valid: previous stage indicating all incoming data is valid.
111 may be a multi-bit signal, where all bits are required
112 to be asserted to indicate "valid".
113 * o_ready: output to next stage indicating readiness to accept data
114 * i_data : an input - added by the user of this class
117 def __init__(self
, i_width
=1):
118 self
.i_valid
= Signal(i_width
, name
="p_i_valid") # prev >>in self
119 self
.o_ready
= Signal(name
="p_o_ready") # prev <<out self
121 def connect_in(self
, prev
):
122 """ helper function to connect stage to an input source. do not
123 use to connect stage-to-stage!
125 return [self
.i_valid
.eq(prev
.i_valid
),
126 prev
.o_ready
.eq(self
.o_ready
),
127 eq(self
.i_data
, prev
.i_data
),
130 def i_valid_logic(self
):
131 vlen
= len(self
.i_valid
)
132 if vlen
> 1: # multi-bit case: valid only when i_valid is all 1s
133 all1s
= Const(-1, (len(self
.i_valid
), False))
134 return self
.i_valid
== all1s
135 # single-bit i_valid case
140 """ contains the signals that go *to* the next stage (both in and out)
141 * o_valid: output indicating to next stage that data is valid
142 * i_ready: input from next stage indicating that it can accept data
143 * o_data : an output - added by the user of this class
146 self
.o_valid
= Signal(name
="n_o_valid") # self out>> next
147 self
.i_ready
= Signal(name
="n_i_ready") # self <<in next
149 def connect_to_next(self
, nxt
):
150 """ helper function to connect to the next stage data/valid/ready.
151 data/valid is passed *TO* nxt, and ready comes *IN* from nxt.
153 return [nxt
.i_valid
.eq(self
.o_valid
),
154 self
.i_ready
.eq(nxt
.o_ready
),
155 eq(nxt
.i_data
, self
.o_data
),
158 def connect_out(self
, nxt
):
159 """ helper function to connect stage to an output source. do not
160 use to connect stage-to-stage!
162 return [nxt
.o_valid
.eq(self
.o_valid
),
163 self
.i_ready
.eq(nxt
.i_ready
),
164 eq(nxt
.o_data
, self
.o_data
),
169 """ makes signals equal: a helper routine which identifies if it is being
170 passed a list (or tuple) of objects, or signals, or Records, and calls
171 the objects' eq function.
173 complex objects (classes) can be used: they must follow the
174 convention of having an eq member function, which takes the
175 responsibility of further calling eq and returning a list of
178 Record is a special (unusual, recursive) case, where the input may be
179 specified as a dictionary (which may contain further dictionaries,
180 recursively), where the field names of the dictionary must match
181 the Record's field spec. Alternatively, an object with the same
182 member names as the Record may be assigned: it does not have to
185 if not isinstance(o
, Sequence
):
188 for (ao
, ai
) in zip(o
, i
):
189 #print ("eq", ao, ai)
190 if isinstance(ao
, Record
):
191 for idx
, (field_name
, field_shape
, _
) in enumerate(ao
.layout
):
192 if isinstance(field_shape
, Layout
):
196 if hasattr(val
, field_name
): # check for attribute
197 val
= getattr(val
, field_name
)
199 val
= val
[field_name
] # dictionary-style specification
200 rres
= eq(ao
.fields
[field_name
], val
)
204 if not isinstance(rres
, Sequence
):
210 class StageCls(metaclass
=ABCMeta
):
211 """ Class-based "Stage" API. requires instantiation (after derivation)
212 see "Stage API" above.
215 def ispec(self
): pass # REQUIRED
217 def ospec(self
): pass # REQUIRED
219 #def setup(self, m, i): pass # OPTIONAL
221 def process(self
, i
): pass # REQUIRED
224 class Stage(metaclass
=ABCMeta
):
225 """ Static "Stage" API. does not require instantiation (after derivation)
226 see "Stage API" above
238 #def setup(m, i): pass
245 class StageChain(StageCls
):
246 """ pass in a list of stages, and they will automatically be
247 chained together via their input and output specs into a
250 * input to this class will be the input of the first stage
251 * output of first stage goes into input of second
252 * output of second goes into input into third (etc. etc.)
253 * the output of this class will be the output of the last stage
255 def __init__(self
, chain
):
259 return self
.chain
[0].ispec()
262 return self
.chain
[-1].ospec()
264 def setup(self
, m
, i
):
265 for (idx
, c
) in enumerate(self
.chain
):
266 if hasattr(c
, "setup"):
267 c
.setup(m
, i
) # stage may have some module stuff
268 o
= self
.chain
[idx
].ospec() # only the last assignment survives
269 m
.d
.comb
+= eq(o
, c
.process(i
)) # process input into "o"
270 if idx
!= len(self
.chain
)-1:
271 ni
= self
.chain
[idx
+1].ispec() # becomes new input on next loop
272 m
.d
.comb
+= eq(ni
, o
) # assign output to next input
274 self
.o
= o
# last loop is the output
276 def process(self
, i
):
281 """ Common functions for Pipeline API
283 def __init__(self
, stage
=None, in_multi
=None):
284 """ pass in a "stage" which may be either a static class or a class
285 instance, which has four functions (one optional):
286 * ispec: returns input signals according to the input specification
287 * ispec: returns output signals to the output specification
288 * process: takes an input instance and returns processed data
289 * setup: performs any module linkage if the stage uses one.
292 * add i_data member to PrevControl and
293 * add o_data member to NextControl
297 # set up input and output IO ACK (prev/next ready/valid)
298 self
.p
= PrevControl(in_multi
)
299 self
.n
= NextControl()
301 def connect_to_next(self
, nxt
):
302 """ helper function to connect to the next stage data/valid/ready.
304 return self
.n
.connect_to_next(nxt
.p
)
306 def connect_in(self
, prev
):
307 """ helper function to connect stage to an input source. do not
308 use to connect stage-to-stage!
310 return self
.p
.connect_in(prev
.p
)
312 def connect_out(self
, nxt
):
313 """ helper function to connect stage to an output source. do not
314 use to connect stage-to-stage!
316 return self
.n
.connect_out(nxt
.n
)
318 def set_input(self
, i
):
319 """ helper function to set the input data
321 return eq(self
.p
.i_data
, i
)
324 return [self
.p
.i_valid
, self
.n
.i_ready
,
325 self
.n
.o_valid
, self
.p
.o_ready
,
326 self
.p
.i_data
, self
.n
.o_data
# XXX need flattening!
330 class BufferedPipeline(ControlBase
):
331 """ buffered pipeline stage. data and strobe signals travel in sync.
332 if ever the input is ready and the output is not, processed data
333 is stored in a temporary register.
335 Argument: stage. see Stage API above
337 stage-1 p.i_valid >>in stage n.o_valid out>> stage+1
338 stage-1 p.o_ready <<out stage n.i_ready <<in stage+1
339 stage-1 p.i_data >>in stage n.o_data out>> stage+1
345 input data p.i_data is read (only), is processed and goes into an
346 intermediate result store [process()]. this is updated combinatorially.
348 in a non-stall condition, the intermediate result will go into the
349 output (update_output). however if ever there is a stall, it goes
350 into r_data instead [update_buffer()].
352 when the non-stall condition is released, r_data is the first
353 to be transferred to the output [flush_buffer()], and the stall
356 on the next cycle (as long as stall is not raised again) the
357 input may begin to be processed and transferred directly to output.
360 def __init__(self
, stage
):
361 ControlBase
.__init
__(self
)
364 # set up the input and output data
365 self
.p
.i_data
= stage
.ispec() # input type
366 self
.n
.o_data
= stage
.ospec()
368 def elaborate(self
, platform
):
371 result
= self
.stage
.ospec()
372 r_data
= self
.stage
.ospec()
373 if hasattr(self
.stage
, "setup"):
374 self
.stage
.setup(m
, self
.p
.i_data
)
376 # establish some combinatorial temporaries
377 o_n_validn
= Signal(reset_less
=True)
378 i_p_valid_o_p_ready
= Signal(reset_less
=True)
379 p_i_valid
= Signal(reset_less
=True)
380 m
.d
.comb
+= [p_i_valid
.eq(self
.p
.i_valid_logic()),
381 o_n_validn
.eq(~self
.n
.o_valid
),
382 i_p_valid_o_p_ready
.eq(p_i_valid
& self
.p
.o_ready
),
385 # store result of processing in combinatorial temporary
386 m
.d
.comb
+= eq(result
, self
.stage
.process(self
.p
.i_data
))
388 # if not in stall condition, update the temporary register
389 with m
.If(self
.p
.o_ready
): # not stalled
390 m
.d
.sync
+= eq(r_data
, result
) # update buffer
392 with m
.If(self
.n
.i_ready
): # next stage is ready
393 with m
.If(self
.p
.o_ready
): # not stalled
394 # nothing in buffer: send (processed) input direct to output
395 m
.d
.sync
+= [self
.n
.o_valid
.eq(p_i_valid
),
396 eq(self
.n
.o_data
, result
), # update output
398 with m
.Else(): # p.o_ready is false, and something is in buffer.
399 # Flush the [already processed] buffer to the output port.
400 m
.d
.sync
+= [self
.n
.o_valid
.eq(1), # declare reg empty
401 eq(self
.n
.o_data
, r_data
), # flush buffer
402 self
.p
.o_ready
.eq(1), # clear stall condition
404 # ignore input, since p.o_ready is also false.
406 # (n.i_ready) is false here: next stage is ready
407 with m
.Elif(o_n_validn
): # next stage being told "ready"
408 m
.d
.sync
+= [self
.n
.o_valid
.eq(p_i_valid
),
409 self
.p
.o_ready
.eq(1), # Keep the buffer empty
410 eq(self
.n
.o_data
, result
), # set output data
413 # (n.i_ready) false and (n.o_valid) true:
414 with m
.Elif(i_p_valid_o_p_ready
):
415 # If next stage *is* ready, and not stalled yet, accept input
416 m
.d
.sync
+= self
.p
.o_ready
.eq(~
(p_i_valid
& self
.n
.o_valid
))
421 class ExampleAddStage(StageCls
):
422 """ an example of how to use the buffered pipeline, as a class instance
426 """ returns a tuple of input signals which will be the incoming data
428 return (Signal(16), Signal(16))
431 """ returns an output signal which will happen to contain the sum
436 def process(self
, i
):
437 """ process the input data (sums the values in the tuple) and returns it
442 class ExampleBufPipeAdd(BufferedPipeline
):
443 """ an example of how to use the buffered pipeline, using a class instance
447 addstage
= ExampleAddStage()
448 BufferedPipeline
.__init
__(self
, addstage
)
451 class ExampleStage(Stage
):
452 """ an example of how to use the buffered pipeline, in a static class
457 return Signal(16, name
="example_input_signal")
460 return Signal(16, name
="example_output_signal")
463 """ process the input data and returns it (adds 1)
468 class ExampleStageCls(StageCls
):
469 """ an example of how to use the buffered pipeline, in a static class
474 return Signal(16, name
="example_input_signal")
477 return Signal(16, name
="example_output_signal")
479 def process(self
, i
):
480 """ process the input data and returns it (adds 1)
485 class ExampleBufPipe(BufferedPipeline
):
486 """ an example of how to use the buffered pipeline.
490 BufferedPipeline
.__init
__(self
, ExampleStage
)
493 class UnbufferedPipeline(ControlBase
):
494 """ A simple pipeline stage with single-clock synchronisation
495 and two-way valid/ready synchronised signalling.
497 Note that a stall in one stage will result in the entire pipeline
500 Also that unlike BufferedPipeline, the valid/ready signalling does NOT
501 travel synchronously with the data: the valid/ready signalling
502 combines in a *combinatorial* fashion. Therefore, a long pipeline
503 chain will lengthen propagation delays.
505 Argument: stage. see Stage API, above
507 stage-1 p.i_valid >>in stage n.o_valid out>> stage+1
508 stage-1 p.o_ready <<out stage n.i_ready <<in stage+1
509 stage-1 p.i_data >>in stage n.o_data out>> stage+1
517 p.i_data : StageInput, shaped according to ispec
519 p.o_data : StageOutput, shaped according to ospec
521 r_data : input_shape according to ispec
522 A temporary (buffered) copy of a prior (valid) input.
523 This is HELD if the output is not ready. It is updated
525 result: output_shape according to ospec
526 The output of the combinatorial logic. it is updated
527 COMBINATORIALLY (no clock dependence).
530 def __init__(self
, stage
):
531 ControlBase
.__init
__(self
)
533 self
._data
_valid
= Signal()
535 # set up the input and output data
536 self
.p
.i_data
= stage
.ispec() # input type
537 self
.n
.o_data
= stage
.ospec() # output type
539 def elaborate(self
, platform
):
542 r_data
= self
.stage
.ispec() # input type
543 result
= self
.stage
.ospec() # output data
544 if hasattr(self
.stage
, "setup"):
545 self
.stage
.setup(m
, r_data
)
547 p_i_valid
= Signal(reset_less
=True)
548 m
.d
.comb
+= p_i_valid
.eq(self
.p
.i_valid_logic())
549 m
.d
.comb
+= eq(result
, self
.stage
.process(r_data
))
550 m
.d
.comb
+= self
.n
.o_valid
.eq(self
._data
_valid
)
551 m
.d
.comb
+= self
.p
.o_ready
.eq(~self
._data
_valid | self
.n
.i_ready
)
552 m
.d
.sync
+= self
._data
_valid
.eq(p_i_valid | \
553 (~self
.n
.i_ready
& self
._data
_valid
))
554 with m
.If(self
.p
.i_valid
& self
.p
.o_ready
):
555 m
.d
.sync
+= eq(r_data
, self
.p
.i_data
)
556 m
.d
.comb
+= eq(self
.n
.o_data
, result
)
560 class ExamplePipeline(UnbufferedPipeline
):
561 """ an example of how to use the combinatorial pipeline.
565 UnbufferedPipeline
.__init
__(self
, ExampleStage
)
568 if __name__
== '__main__':
569 dut
= ExampleBufPipe()
570 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
571 with
open("test_bufpipe.il", "w") as f
:
574 dut
= ExamplePipeline()
575 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
576 with
open("test_combpipe.il", "w") as f
: