redo module imports
[ieee754fpu.git] / src / add / singlepipe.py
1 """ Pipeline and BufferedHandshake implementation, conforming to the same API.
2 For multi-input and multi-output variants, see multipipe.
3
4 Associated development bugs:
5 * http://bugs.libre-riscv.org/show_bug.cgi?id=64
6 * http://bugs.libre-riscv.org/show_bug.cgi?id=57
7
8 Important: see Stage API (iocontrol.py) in combination with below
9
10 RecordBasedStage:
11 ----------------
12
13 A convenience class that takes an input shape, output shape, a
14 "processing" function and an optional "setup" function. Honestly
15 though, there's not much more effort to just... create a class
16 that returns a couple of Records (see ExampleAddRecordStage in
17 examples).
18
19 PassThroughStage:
20 ----------------
21
22 A convenience class that takes a single function as a parameter,
23 that is chain-called to create the exact same input and output spec.
24 It has a process() function that simply returns its input.
25
26 Instances of this class are completely redundant if handed to
27 StageChain, however when passed to UnbufferedPipeline they
28 can be used to introduce a single clock delay.
29
30 ControlBase:
31 -----------
32
33 The base class for pipelines. Contains previous and next ready/valid/data.
34 Also has an extremely useful "connect" function that can be used to
35 connect a chain of pipelines and present the exact same prev/next
36 ready/valid/data API.
37
38 Note: pipelines basically do not become pipelines as such until
39 handed to a derivative of ControlBase. ControlBase itself is *not*
40 strictly considered a pipeline class. Wishbone and AXI4 (master or
41 slave) could be derived from ControlBase, for example.
42 UnbufferedPipeline:
43 ------------------
44
45 A simple stalling clock-synchronised pipeline that has no buffering
46 (unlike BufferedHandshake). Data flows on *every* clock cycle when
47 the conditions are right (this is nominally when the input is valid
48 and the output is ready).
49
50 A stall anywhere along the line will result in a stall back-propagating
51 down the entire chain. The BufferedHandshake by contrast will buffer
52 incoming data, allowing previous stages one clock cycle's grace before
53 also having to stall.
54
55 An advantage of the UnbufferedPipeline over the Buffered one is
56 that the amount of logic needed (number of gates) is greatly
57 reduced (no second set of buffers basically)
58
59 The disadvantage of the UnbufferedPipeline is that the valid/ready
60 logic, if chained together, is *combinatorial*, resulting in
61 progressively larger gate delay.
62
63 PassThroughHandshake:
64 ------------------
65
66 A Control class that introduces a single clock delay, passing its
67 data through unaltered. Unlike RegisterPipeline (which relies
68 on UnbufferedPipeline and PassThroughStage) it handles ready/valid
69 itself.
70
71 RegisterPipeline:
72 ----------------
73
74 A convenience class that, because UnbufferedPipeline introduces a single
75 clock delay, when its stage is a PassThroughStage, it results in a Pipeline
76 stage that, duh, delays its (unmodified) input by one clock cycle.
77
78 BufferedHandshake:
79 ----------------
80
81 nmigen implementation of buffered pipeline stage, based on zipcpu:
82 https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html
83
84 this module requires quite a bit of thought to understand how it works
85 (and why it is needed in the first place). reading the above is
86 *strongly* recommended.
87
88 unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires
89 the STB / ACK signals to raise and lower (on separate clocks) before
90 data may proceeed (thus only allowing one piece of data to proceed
91 on *ALTERNATE* cycles), the signalling here is a true pipeline
92 where data will flow on *every* clock when the conditions are right.
93
94 input acceptance conditions are when:
95 * incoming previous-stage strobe (p.valid_i) is HIGH
96 * outgoing previous-stage ready (p.ready_o) is LOW
97
98 output transmission conditions are when:
99 * outgoing next-stage strobe (n.valid_o) is HIGH
100 * outgoing next-stage ready (n.ready_i) is LOW
101
102 the tricky bit is when the input has valid data and the output is not
103 ready to accept it. if it wasn't for the clock synchronisation, it
104 would be possible to tell the input "hey don't send that data, we're
105 not ready". unfortunately, it's not possible to "change the past":
106 the previous stage *has no choice* but to pass on its data.
107
108 therefore, the incoming data *must* be accepted - and stored: that
109 is the responsibility / contract that this stage *must* accept.
110 on the same clock, it's possible to tell the input that it must
111 not send any more data. this is the "stall" condition.
112
113 we now effectively have *two* possible pieces of data to "choose" from:
114 the buffered data, and the incoming data. the decision as to which
115 to process and output is based on whether we are in "stall" or not.
116 i.e. when the next stage is no longer ready, the output comes from
117 the buffer if a stall had previously occurred, otherwise it comes
118 direct from processing the input.
119
120 this allows us to respect a synchronous "travelling STB" with what
121 dan calls a "buffered handshake".
122
123 it's quite a complex state machine!
124
125 SimpleHandshake
126 ---------------
127
128 Synchronised pipeline, Based on:
129 https://github.com/ZipCPU/dbgbus/blob/master/hexbus/rtl/hbdeword.v
130 """
131
132 from nmigen import Signal, Mux, Module, Elaboratable
133 from nmigen.cli import verilog, rtlil
134 from nmigen.lib.fifo import SyncFIFO, SyncFIFOBuffered
135 from nmigen.hdl.ast import ArrayProxy
136 from nmigen.hdl.rec import Record
137
138 from queue import Queue
139 import inspect
140
141 import nmoperator
142 from iocontrol import (PrevControl, NextControl, Object, RecordObject)
143 from stageapi import (_spec, StageCls, Stage,
144 StageChain, StageHelper)
145
146
147 class RecordBasedStage(Stage):
148 """ convenience class which provides a Records-based layout.
149 honestly it's a lot easier just to create a direct Records-based
150 class (see ExampleAddRecordStage)
151 """
152 def __init__(self, in_shape, out_shape, processfn, setupfn=None):
153 self.in_shape = in_shape
154 self.out_shape = out_shape
155 self.__process = processfn
156 self.__setup = setupfn
157 def ispec(self): return Record(self.in_shape)
158 def ospec(self): return Record(self.out_shape)
159 def process(seif, i): return self.__process(i)
160 def setup(seif, m, i): return self.__setup(m, i)
161
162
163 class PassThroughStage(StageCls):
164 """ a pass-through stage with its input data spec identical to its output,
165 and "passes through" its data from input to output (does nothing).
166
167 use this basically to explicitly make any data spec Stage-compliant.
168 (many APIs would potentially use a static "wrap" method in e.g.
169 StageCls to achieve a similar effect)
170 """
171 def __init__(self, iospecfn): self.iospecfn = iospecfn
172 def ispec(self): return self.iospecfn()
173 def ospec(self): return self.iospecfn()
174
175
176 class ControlBase(StageHelper, Elaboratable):
177 """ Common functions for Pipeline API. Note: a "pipeline stage" only
178 exists (conceptually) when a ControlBase derivative is handed
179 a Stage (combinatorial block)
180
181 NOTE: ControlBase derives from StageHelper, making it accidentally
182 compliant with the Stage API. Using those functions directly
183 *BYPASSES* a ControlBase instance ready/valid signalling, which
184 clearly should not be done without a really, really good reason.
185 """
186 def __init__(self, stage=None, in_multi=None, stage_ctl=False):
187 """ Base class containing ready/valid/data to previous and next stages
188
189 * p: contains ready/valid to the previous stage
190 * n: contains ready/valid to the next stage
191
192 Except when calling Controlbase.connect(), user must also:
193 * add data_i member to PrevControl (p) and
194 * add data_o member to NextControl (n)
195 Calling ControlBase._new_data is a good way to do that.
196 """
197 StageHelper.__init__(self, stage)
198
199 # set up input and output IO ACK (prev/next ready/valid)
200 self.p = PrevControl(in_multi, stage_ctl)
201 self.n = NextControl(stage_ctl)
202
203 # set up the input and output data
204 if stage is not None:
205 self._new_data(self, self, "data")
206
207 def _new_data(self, p, n, name):
208 """ allocates new data_i and data_o
209 """
210 self.p.data_i = _spec(p.stage.ispec, "%s_i" % name)
211 self.n.data_o = _spec(n.stage.ospec, "%s_o" % name)
212
213 @property
214 def data_r(self):
215 return self.process(self.p.data_i)
216
217 def connect_to_next(self, nxt):
218 """ helper function to connect to the next stage data/valid/ready.
219 """
220 return self.n.connect_to_next(nxt.p)
221
222 def _connect_in(self, prev):
223 """ internal helper function to connect stage to an input source.
224 do not use to connect stage-to-stage!
225 """
226 return self.p._connect_in(prev.p)
227
228 def _connect_out(self, nxt):
229 """ internal helper function to connect stage to an output source.
230 do not use to connect stage-to-stage!
231 """
232 return self.n._connect_out(nxt.n)
233
234 def connect(self, pipechain):
235 """ connects a chain (list) of Pipeline instances together and
236 links them to this ControlBase instance:
237
238 in <----> self <---> out
239 | ^
240 v |
241 [pipe1, pipe2, pipe3, pipe4]
242 | ^ | ^ | ^
243 v | v | v |
244 out---in out--in out---in
245
246 Also takes care of allocating data_i/data_o, by looking up
247 the data spec for each end of the pipechain. i.e It is NOT
248 necessary to allocate self.p.data_i or self.n.data_o manually:
249 this is handled AUTOMATICALLY, here.
250
251 Basically this function is the direct equivalent of StageChain,
252 except that unlike StageChain, the Pipeline logic is followed.
253
254 Just as StageChain presents an object that conforms to the
255 Stage API from a list of objects that also conform to the
256 Stage API, an object that calls this Pipeline connect function
257 has the exact same pipeline API as the list of pipline objects
258 it is called with.
259
260 Thus it becomes possible to build up larger chains recursively.
261 More complex chains (multi-input, multi-output) will have to be
262 done manually.
263
264 Argument:
265
266 * :pipechain: - a sequence of ControlBase-derived classes
267 (must be one or more in length)
268
269 Returns:
270
271 * a list of eq assignments that will need to be added in
272 an elaborate() to m.d.comb
273 """
274 assert len(pipechain) > 0, "pipechain must be non-zero length"
275 eqs = [] # collated list of assignment statements
276
277 # connect inter-chain
278 for i in range(len(pipechain)-1):
279 pipe1 = pipechain[i] # earlier
280 pipe2 = pipechain[i+1] # later (by 1)
281 eqs += pipe1.connect_to_next(pipe2) # earlier n to later p
282
283 # connect front and back of chain to ourselves
284 front = pipechain[0] # first in chain
285 end = pipechain[-1] # last in chain
286 self._new_data(front, end, "chain") # NOTE: REPLACES existing data
287 eqs += front._connect_in(self) # front p to our p
288 eqs += end._connect_out(self) # end n to out n
289
290 return eqs
291
292 def set_input(self, i):
293 """ helper function to set the input data (used in unit tests)
294 """
295 return nmoperator.eq(self.p.data_i, i)
296
297 def __iter__(self):
298 yield from self.p # yields ready/valid/data (data also gets yielded)
299 yield from self.n # ditto
300
301 def ports(self):
302 return list(self)
303
304 def elaborate(self, platform):
305 """ handles case where stage has dynamic ready/valid functions
306 """
307 m = Module()
308 m.submodules.p = self.p
309 m.submodules.n = self.n
310
311 self.setup(m, self.p.data_i)
312
313 if not self.p.stage_ctl:
314 return m
315
316 # intercept the previous (outgoing) "ready", combine with stage ready
317 m.d.comb += self.p.s_ready_o.eq(self.p._ready_o & self.stage.d_ready)
318
319 # intercept the next (incoming) "ready" and combine it with data valid
320 sdv = self.stage.d_valid(self.n.ready_i)
321 m.d.comb += self.n.d_valid.eq(self.n.ready_i & sdv)
322
323 return m
324
325
326 class BufferedHandshake(ControlBase):
327 """ buffered pipeline stage. data and strobe signals travel in sync.
328 if ever the input is ready and the output is not, processed data
329 is shunted in a temporary register.
330
331 Argument: stage. see Stage API above
332
333 stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
334 stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
335 stage-1 p.data_i >>in stage n.data_o out>> stage+1
336 | |
337 process --->----^
338 | |
339 +-- r_data ->-+
340
341 input data p.data_i is read (only), is processed and goes into an
342 intermediate result store [process()]. this is updated combinatorially.
343
344 in a non-stall condition, the intermediate result will go into the
345 output (update_output). however if ever there is a stall, it goes
346 into r_data instead [update_buffer()].
347
348 when the non-stall condition is released, r_data is the first
349 to be transferred to the output [flush_buffer()], and the stall
350 condition cleared.
351
352 on the next cycle (as long as stall is not raised again) the
353 input may begin to be processed and transferred directly to output.
354 """
355
356 def elaborate(self, platform):
357 self.m = ControlBase.elaborate(self, platform)
358
359 result = _spec(self.stage.ospec, "r_tmp")
360 r_data = _spec(self.stage.ospec, "r_data")
361
362 # establish some combinatorial temporaries
363 o_n_validn = Signal(reset_less=True)
364 n_ready_i = Signal(reset_less=True, name="n_i_rdy_data")
365 nir_por = Signal(reset_less=True)
366 nir_por_n = Signal(reset_less=True)
367 p_valid_i = Signal(reset_less=True)
368 nir_novn = Signal(reset_less=True)
369 nirn_novn = Signal(reset_less=True)
370 por_pivn = Signal(reset_less=True)
371 npnn = Signal(reset_less=True)
372 self.m.d.comb += [p_valid_i.eq(self.p.valid_i_test),
373 o_n_validn.eq(~self.n.valid_o),
374 n_ready_i.eq(self.n.ready_i_test),
375 nir_por.eq(n_ready_i & self.p._ready_o),
376 nir_por_n.eq(n_ready_i & ~self.p._ready_o),
377 nir_novn.eq(n_ready_i | o_n_validn),
378 nirn_novn.eq(~n_ready_i & o_n_validn),
379 npnn.eq(nir_por | nirn_novn),
380 por_pivn.eq(self.p._ready_o & ~p_valid_i)
381 ]
382
383 # store result of processing in combinatorial temporary
384 self.m.d.comb += nmoperator.eq(result, self.data_r)
385
386 # if not in stall condition, update the temporary register
387 with self.m.If(self.p.ready_o): # not stalled
388 self.m.d.sync += nmoperator.eq(r_data, result) # update buffer
389
390 # data pass-through conditions
391 with self.m.If(npnn):
392 data_o = self._postprocess(result) # XXX TBD, does nothing right now
393 self.m.d.sync += [self.n.valid_o.eq(p_valid_i), # valid if p_valid
394 nmoperator.eq(self.n.data_o, data_o), # update out
395 ]
396 # buffer flush conditions (NOTE: can override data passthru conditions)
397 with self.m.If(nir_por_n): # not stalled
398 # Flush the [already processed] buffer to the output port.
399 data_o = self._postprocess(r_data) # XXX TBD, does nothing right now
400 self.m.d.sync += [self.n.valid_o.eq(1), # reg empty
401 nmoperator.eq(self.n.data_o, data_o), # flush
402 ]
403 # output ready conditions
404 self.m.d.sync += self.p._ready_o.eq(nir_novn | por_pivn)
405
406 return self.m
407
408
409 class SimpleHandshake(ControlBase):
410 """ simple handshake control. data and strobe signals travel in sync.
411 implements the protocol used by Wishbone and AXI4.
412
413 Argument: stage. see Stage API above
414
415 stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
416 stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
417 stage-1 p.data_i >>in stage n.data_o out>> stage+1
418 | |
419 +--process->--^
420 Truth Table
421
422 Inputs Temporary Output Data
423 ------- ---------- ----- ----
424 P P N N PiV& ~NiR& N P
425 i o i o PoR NoV o o
426 V R R V V R
427
428 ------- - - - -
429 0 0 0 0 0 0 >0 0 reg
430 0 0 0 1 0 1 >1 0 reg
431 0 0 1 0 0 0 0 1 process(data_i)
432 0 0 1 1 0 0 0 1 process(data_i)
433 ------- - - - -
434 0 1 0 0 0 0 >0 0 reg
435 0 1 0 1 0 1 >1 0 reg
436 0 1 1 0 0 0 0 1 process(data_i)
437 0 1 1 1 0 0 0 1 process(data_i)
438 ------- - - - -
439 1 0 0 0 0 0 >0 0 reg
440 1 0 0 1 0 1 >1 0 reg
441 1 0 1 0 0 0 0 1 process(data_i)
442 1 0 1 1 0 0 0 1 process(data_i)
443 ------- - - - -
444 1 1 0 0 1 0 1 0 process(data_i)
445 1 1 0 1 1 1 1 0 process(data_i)
446 1 1 1 0 1 0 1 1 process(data_i)
447 1 1 1 1 1 0 1 1 process(data_i)
448 ------- - - - -
449 """
450
451 def elaborate(self, platform):
452 self.m = m = ControlBase.elaborate(self, platform)
453
454 r_busy = Signal()
455 result = _spec(self.stage.ospec, "r_tmp")
456
457 # establish some combinatorial temporaries
458 n_ready_i = Signal(reset_less=True, name="n_i_rdy_data")
459 p_valid_i_p_ready_o = Signal(reset_less=True)
460 p_valid_i = Signal(reset_less=True)
461 m.d.comb += [p_valid_i.eq(self.p.valid_i_test),
462 n_ready_i.eq(self.n.ready_i_test),
463 p_valid_i_p_ready_o.eq(p_valid_i & self.p.ready_o),
464 ]
465
466 # store result of processing in combinatorial temporary
467 m.d.comb += nmoperator.eq(result, self.data_r)
468
469 # previous valid and ready
470 with m.If(p_valid_i_p_ready_o):
471 data_o = self._postprocess(result) # XXX TBD, does nothing right now
472 m.d.sync += [r_busy.eq(1), # output valid
473 nmoperator.eq(self.n.data_o, data_o), # update output
474 ]
475 # previous invalid or not ready, however next is accepting
476 with m.Elif(n_ready_i):
477 data_o = self._postprocess(result) # XXX TBD, does nothing right now
478 m.d.sync += [nmoperator.eq(self.n.data_o, data_o)]
479 # TODO: could still send data here (if there was any)
480 #m.d.sync += self.n.valid_o.eq(0) # ...so set output invalid
481 m.d.sync += r_busy.eq(0) # ...so set output invalid
482
483 m.d.comb += self.n.valid_o.eq(r_busy)
484 # if next is ready, so is previous
485 m.d.comb += self.p._ready_o.eq(n_ready_i)
486
487 return self.m
488
489
490 class UnbufferedPipeline(ControlBase):
491 """ A simple pipeline stage with single-clock synchronisation
492 and two-way valid/ready synchronised signalling.
493
494 Note that a stall in one stage will result in the entire pipeline
495 chain stalling.
496
497 Also that unlike BufferedHandshake, the valid/ready signalling does NOT
498 travel synchronously with the data: the valid/ready signalling
499 combines in a *combinatorial* fashion. Therefore, a long pipeline
500 chain will lengthen propagation delays.
501
502 Argument: stage. see Stage API, above
503
504 stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
505 stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
506 stage-1 p.data_i >>in stage n.data_o out>> stage+1
507 | |
508 r_data result
509 | |
510 +--process ->-+
511
512 Attributes:
513 -----------
514 p.data_i : StageInput, shaped according to ispec
515 The pipeline input
516 p.data_o : StageOutput, shaped according to ospec
517 The pipeline output
518 r_data : input_shape according to ispec
519 A temporary (buffered) copy of a prior (valid) input.
520 This is HELD if the output is not ready. It is updated
521 SYNCHRONOUSLY.
522 result: output_shape according to ospec
523 The output of the combinatorial logic. it is updated
524 COMBINATORIALLY (no clock dependence).
525
526 Truth Table
527
528 Inputs Temp Output Data
529 ------- - ----- ----
530 P P N N ~NiR& N P
531 i o i o NoV o o
532 V R R V V R
533
534 ------- - - -
535 0 0 0 0 0 0 1 reg
536 0 0 0 1 1 1 0 reg
537 0 0 1 0 0 0 1 reg
538 0 0 1 1 0 0 1 reg
539 ------- - - -
540 0 1 0 0 0 0 1 reg
541 0 1 0 1 1 1 0 reg
542 0 1 1 0 0 0 1 reg
543 0 1 1 1 0 0 1 reg
544 ------- - - -
545 1 0 0 0 0 1 1 reg
546 1 0 0 1 1 1 0 reg
547 1 0 1 0 0 1 1 reg
548 1 0 1 1 0 1 1 reg
549 ------- - - -
550 1 1 0 0 0 1 1 process(data_i)
551 1 1 0 1 1 1 0 process(data_i)
552 1 1 1 0 0 1 1 process(data_i)
553 1 1 1 1 0 1 1 process(data_i)
554 ------- - - -
555
556 Note: PoR is *NOT* involved in the above decision-making.
557 """
558
559 def elaborate(self, platform):
560 self.m = m = ControlBase.elaborate(self, platform)
561
562 data_valid = Signal() # is data valid or not
563 r_data = _spec(self.stage.ospec, "r_tmp") # output type
564
565 # some temporaries
566 p_valid_i = Signal(reset_less=True)
567 pv = Signal(reset_less=True)
568 buf_full = Signal(reset_less=True)
569 m.d.comb += p_valid_i.eq(self.p.valid_i_test)
570 m.d.comb += pv.eq(self.p.valid_i & self.p.ready_o)
571 m.d.comb += buf_full.eq(~self.n.ready_i_test & data_valid)
572
573 m.d.comb += self.n.valid_o.eq(data_valid)
574 m.d.comb += self.p._ready_o.eq(~data_valid | self.n.ready_i_test)
575 m.d.sync += data_valid.eq(p_valid_i | buf_full)
576
577 with m.If(pv):
578 m.d.sync += nmoperator.eq(r_data, self.data_r)
579 data_o = self._postprocess(r_data) # XXX TBD, does nothing right now
580 m.d.comb += nmoperator.eq(self.n.data_o, data_o)
581
582 return self.m
583
584 class UnbufferedPipeline2(ControlBase):
585 """ A simple pipeline stage with single-clock synchronisation
586 and two-way valid/ready synchronised signalling.
587
588 Note that a stall in one stage will result in the entire pipeline
589 chain stalling.
590
591 Also that unlike BufferedHandshake, the valid/ready signalling does NOT
592 travel synchronously with the data: the valid/ready signalling
593 combines in a *combinatorial* fashion. Therefore, a long pipeline
594 chain will lengthen propagation delays.
595
596 Argument: stage. see Stage API, above
597
598 stage-1 p.valid_i >>in stage n.valid_o out>> stage+1
599 stage-1 p.ready_o <<out stage n.ready_i <<in stage+1
600 stage-1 p.data_i >>in stage n.data_o out>> stage+1
601 | | |
602 +- process-> buf <-+
603 Attributes:
604 -----------
605 p.data_i : StageInput, shaped according to ispec
606 The pipeline input
607 p.data_o : StageOutput, shaped according to ospec
608 The pipeline output
609 buf : output_shape according to ospec
610 A temporary (buffered) copy of a valid output
611 This is HELD if the output is not ready. It is updated
612 SYNCHRONOUSLY.
613
614 Inputs Temp Output Data
615 ------- - -----
616 P P N N ~NiR& N P (buf_full)
617 i o i o NoV o o
618 V R R V V R
619
620 ------- - - -
621 0 0 0 0 0 0 1 process(data_i)
622 0 0 0 1 1 1 0 reg (odata, unchanged)
623 0 0 1 0 0 0 1 process(data_i)
624 0 0 1 1 0 0 1 process(data_i)
625 ------- - - -
626 0 1 0 0 0 0 1 process(data_i)
627 0 1 0 1 1 1 0 reg (odata, unchanged)
628 0 1 1 0 0 0 1 process(data_i)
629 0 1 1 1 0 0 1 process(data_i)
630 ------- - - -
631 1 0 0 0 0 1 1 process(data_i)
632 1 0 0 1 1 1 0 reg (odata, unchanged)
633 1 0 1 0 0 1 1 process(data_i)
634 1 0 1 1 0 1 1 process(data_i)
635 ------- - - -
636 1 1 0 0 0 1 1 process(data_i)
637 1 1 0 1 1 1 0 reg (odata, unchanged)
638 1 1 1 0 0 1 1 process(data_i)
639 1 1 1 1 0 1 1 process(data_i)
640 ------- - - -
641
642 Note: PoR is *NOT* involved in the above decision-making.
643 """
644
645 def elaborate(self, platform):
646 self.m = m = ControlBase.elaborate(self, platform)
647
648 buf_full = Signal() # is data valid or not
649 buf = _spec(self.stage.ospec, "r_tmp") # output type
650
651 # some temporaries
652 p_valid_i = Signal(reset_less=True)
653 m.d.comb += p_valid_i.eq(self.p.valid_i_test)
654
655 m.d.comb += self.n.valid_o.eq(buf_full | p_valid_i)
656 m.d.comb += self.p._ready_o.eq(~buf_full)
657 m.d.sync += buf_full.eq(~self.n.ready_i_test & self.n.valid_o)
658
659 data_o = Mux(buf_full, buf, self.data_r)
660 data_o = self._postprocess(data_o) # XXX TBD, does nothing right now
661 m.d.comb += nmoperator.eq(self.n.data_o, data_o)
662 m.d.sync += nmoperator.eq(buf, self.n.data_o)
663
664 return self.m
665
666
667 class PassThroughHandshake(ControlBase):
668 """ A control block that delays by one clock cycle.
669
670 Inputs Temporary Output Data
671 ------- ------------------ ----- ----
672 P P N N PiV& PiV| NiR| pvr N P (pvr)
673 i o i o PoR ~PoR ~NoV o o
674 V R R V V R
675
676 ------- - - - - - -
677 0 0 0 0 0 1 1 0 1 1 odata (unchanged)
678 0 0 0 1 0 1 0 0 1 0 odata (unchanged)
679 0 0 1 0 0 1 1 0 1 1 odata (unchanged)
680 0 0 1 1 0 1 1 0 1 1 odata (unchanged)
681 ------- - - - - - -
682 0 1 0 0 0 0 1 0 0 1 odata (unchanged)
683 0 1 0 1 0 0 0 0 0 0 odata (unchanged)
684 0 1 1 0 0 0 1 0 0 1 odata (unchanged)
685 0 1 1 1 0 0 1 0 0 1 odata (unchanged)
686 ------- - - - - - -
687 1 0 0 0 0 1 1 1 1 1 process(in)
688 1 0 0 1 0 1 0 0 1 0 odata (unchanged)
689 1 0 1 0 0 1 1 1 1 1 process(in)
690 1 0 1 1 0 1 1 1 1 1 process(in)
691 ------- - - - - - -
692 1 1 0 0 1 1 1 1 1 1 process(in)
693 1 1 0 1 1 1 0 0 1 0 odata (unchanged)
694 1 1 1 0 1 1 1 1 1 1 process(in)
695 1 1 1 1 1 1 1 1 1 1 process(in)
696 ------- - - - - - -
697
698 """
699
700 def elaborate(self, platform):
701 self.m = m = ControlBase.elaborate(self, platform)
702
703 r_data = _spec(self.stage.ospec, "r_tmp") # output type
704
705 # temporaries
706 p_valid_i = Signal(reset_less=True)
707 pvr = Signal(reset_less=True)
708 m.d.comb += p_valid_i.eq(self.p.valid_i_test)
709 m.d.comb += pvr.eq(p_valid_i & self.p.ready_o)
710
711 m.d.comb += self.p.ready_o.eq(~self.n.valid_o | self.n.ready_i_test)
712 m.d.sync += self.n.valid_o.eq(p_valid_i | ~self.p.ready_o)
713
714 odata = Mux(pvr, self.data_r, r_data)
715 m.d.sync += nmoperator.eq(r_data, odata)
716 r_data = self._postprocess(r_data) # XXX TBD, does nothing right now
717 m.d.comb += nmoperator.eq(self.n.data_o, r_data)
718
719 return m
720
721
722 class RegisterPipeline(UnbufferedPipeline):
723 """ A pipeline stage that delays by one clock cycle, creating a
724 sync'd latch out of data_o and valid_o as an indirect byproduct
725 of using PassThroughStage
726 """
727 def __init__(self, iospecfn):
728 UnbufferedPipeline.__init__(self, PassThroughStage(iospecfn))
729
730
731 class FIFOControl(ControlBase):
732 """ FIFO Control. Uses SyncFIFO to store data, coincidentally
733 happens to have same valid/ready signalling as Stage API.
734
735 data_i -> fifo.din -> FIFO -> fifo.dout -> data_o
736 """
737 def __init__(self, depth, stage, in_multi=None, stage_ctl=False,
738 fwft=True, buffered=False, pipe=False):
739 """ FIFO Control
740
741 * :depth: number of entries in the FIFO
742 * :stage: data processing block
743 * :fwft: first word fall-thru mode (non-fwft introduces delay)
744 * :buffered: use buffered FIFO (introduces extra cycle delay)
745
746 NOTE 1: FPGAs may have trouble with the defaults for SyncFIFO
747 (fwft=True, buffered=False). XXX TODO: fix this by
748 using Queue in all cases instead.
749
750 data is processed (and located) as follows:
751
752 self.p self.stage temp fn temp fn temp fp self.n
753 data_i->process()->result->cat->din.FIFO.dout->cat(data_o)
754
755 yes, really: cat produces a Cat() which can be assigned to.
756 this is how the FIFO gets de-catted without needing a de-cat
757 function
758 """
759
760 assert not (fwft and buffered), "buffered cannot do fwft"
761 if buffered:
762 depth += 1
763 self.fwft = fwft
764 self.buffered = buffered
765 self.pipe = pipe
766 self.fdepth = depth
767 ControlBase.__init__(self, stage, in_multi, stage_ctl)
768
769 def elaborate(self, platform):
770 self.m = m = ControlBase.elaborate(self, platform)
771
772 # make a FIFO with a signal of equal width to the data_o.
773 (fwidth, _) = nmoperator.shape(self.n.data_o)
774 if self.buffered:
775 fifo = SyncFIFOBuffered(fwidth, self.fdepth)
776 else:
777 fifo = Queue(fwidth, self.fdepth, fwft=self.fwft, pipe=self.pipe)
778 m.submodules.fifo = fifo
779
780 # store result of processing in combinatorial temporary
781 result = _spec(self.stage.ospec, "r_temp")
782 m.d.comb += nmoperator.eq(result, self.data_r)
783
784 # connect previous rdy/valid/data - do cat on data_i
785 # NOTE: cannot do the PrevControl-looking trick because
786 # of need to process the data. shaaaame....
787 m.d.comb += [fifo.we.eq(self.p.valid_i_test),
788 self.p.ready_o.eq(fifo.writable),
789 nmoperator.eq(fifo.din, nmoperator.cat(result)),
790 ]
791
792 # connect next rdy/valid/data - do cat on data_o (further below)
793 connections = [self.n.valid_o.eq(fifo.readable),
794 fifo.re.eq(self.n.ready_i_test),
795 ]
796 if self.fwft or self.buffered:
797 m.d.comb += connections # combinatorial on next ready/valid
798 else:
799 m.d.sync += connections # unbuffered fwft mode needs sync
800 data_o = nmoperator.cat(self.n.data_o).eq(fifo.dout)
801 data_o = self._postprocess(data_o) # XXX TBD, does nothing right now
802 m.d.comb += data_o
803
804 return m
805
806
807 # aka "RegStage".
808 class UnbufferedPipeline(FIFOControl):
809 def __init__(self, stage, in_multi=None, stage_ctl=False):
810 FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl,
811 fwft=True, pipe=False)
812
813 # aka "BreakReadyStage" XXX had to set fwft=True to get it to work
814 class PassThroughHandshake(FIFOControl):
815 def __init__(self, stage, in_multi=None, stage_ctl=False):
816 FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl,
817 fwft=True, pipe=True)
818
819 # this is *probably* BufferedHandshake, although test #997 now succeeds.
820 class BufferedHandshake(FIFOControl):
821 def __init__(self, stage, in_multi=None, stage_ctl=False):
822 FIFOControl.__init__(self, 2, stage, in_multi, stage_ctl,
823 fwft=True, pipe=False)
824
825
826 """
827 # this is *probably* SimpleHandshake (note: memory cell size=0)
828 class SimpleHandshake(FIFOControl):
829 def __init__(self, stage, in_multi=None, stage_ctl=False):
830 FIFOControl.__init__(self, 0, stage, in_multi, stage_ctl,
831 fwft=True, pipe=False)
832 """