X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fadd%2Fsinglepipe.py;h=68b62e432d4fc6022b99361c3358d01638414fce;hb=6bff1a997f3846872cf489c24b5c01426c4dc97c;hp=bb1426a038610dfb107c8a4e42ec8347f49cfe3d;hpb=3ad3978aaa9ee09487fe808631e40911425f5aab;p=ieee754fpu.git diff --git a/src/add/singlepipe.py b/src/add/singlepipe.py index bb1426a0..68b62e43 100644 --- a/src/add/singlepipe.py +++ b/src/add/singlepipe.py @@ -1,50 +1,10 @@ -""" Pipeline and BufferedPipeline implementation, conforming to the same API. - For multi-input and multi-output variants, see multipipe. - - eq: - -- - - a strategically very important function that is identical in function - to nmigen's Signal.eq function, except it may take objects, or a list - of objects, or a tuple of objects, and where objects may also be - Records. - - Stage API: - --------- - - stage requires compliance with a strict API that may be - implemented in several means, including as a static class. - the methods of a stage instance must be as follows: - - * ispec() - Input data format specification - returns an object or a list or tuple of objects, or - a Record, each object having an "eq" function which - takes responsibility for copying by assignment all - sub-objects - * ospec() - Output data format specification - requirements as for ospec - * process(m, i) - Processes an ispec-formatted object - returns a combinatorial block of a result that - may be assigned to the output, by way of the "eq" - function - * setup(m, i) - Optional function for setting up submodules - may be used for more complex stages, to link - the input (i) to submodules. must take responsibility - for adding those submodules to the module (m). - the submodules must be combinatorial blocks and - must have their inputs and output linked combinatorially. - - Both StageCls (for use with non-static classes) and Stage (for use - by static classes) are abstract classes from which, for convenience - and as a courtesy to other developers, anything conforming to the - Stage API may *choose* to derive. - - StageChain: - ---------- - - A useful combinatorial wrapper around stages that chains them together - and then presents a Stage-API-conformant interface. By presenting - the same API as the stages it wraps, it can clearly be used recursively. +""" Pipeline API. For multi-input and multi-output variants, see multipipe. + + Associated development bugs: + * http://bugs.libre-riscv.org/show_bug.cgi?id=64 + * http://bugs.libre-riscv.org/show_bug.cgi?id=57 + + Important: see Stage API (stageapi.py) in combination with below RecordBasedStage: ---------------- @@ -74,16 +34,20 @@ connect a chain of pipelines and present the exact same prev/next ready/valid/data API. + Note: pipelines basically do not become pipelines as such until + handed to a derivative of ControlBase. ControlBase itself is *not* + strictly considered a pipeline class. Wishbone and AXI4 (master or + slave) could be derived from ControlBase, for example. UnbufferedPipeline: ------------------ A simple stalling clock-synchronised pipeline that has no buffering - (unlike BufferedPipeline). Data flows on *every* clock cycle when + (unlike BufferedHandshake). Data flows on *every* clock cycle when the conditions are right (this is nominally when the input is valid and the output is ready). A stall anywhere along the line will result in a stall back-propagating - down the entire chain. The BufferedPipeline by contrast will buffer + down the entire chain. The BufferedHandshake by contrast will buffer incoming data, allowing previous stages one clock cycle's grace before also having to stall. @@ -95,6 +59,14 @@ logic, if chained together, is *combinatorial*, resulting in progressively larger gate delay. + PassThroughHandshake: + ------------------ + + A Control class that introduces a single clock delay, passing its + data through unaltered. Unlike RegisterPipeline (which relies + on UnbufferedPipeline and PassThroughStage) it handles ready/valid + itself. + RegisterPipeline: ---------------- @@ -102,7 +74,7 @@ clock delay, when its stage is a PassThroughStage, it results in a Pipeline stage that, duh, delays its (unmodified) input by one clock cycle. - BufferedPipeline: + BufferedHandshake: ---------------- nmigen implementation of buffered pipeline stage, based on zipcpu: @@ -119,12 +91,12 @@ where data will flow on *every* clock when the conditions are right. input acceptance conditions are when: - * incoming previous-stage strobe (p.i_valid) is HIGH - * outgoing previous-stage ready (p.o_ready) is LOW + * incoming previous-stage strobe (p.valid_i) is HIGH + * outgoing previous-stage ready (p.ready_o) is LOW output transmission conditions are when: - * outgoing next-stage strobe (n.o_valid) is HIGH - * outgoing next-stage ready (n.i_ready) is LOW + * outgoing next-stage strobe (n.valid_o) is HIGH + * outgoing next-stage ready (n.ready_i) is LOW the tricky bit is when the input has valid data and the output is not ready to accept it. if it wasn't for the clock synchronisation, it @@ -149,223 +121,24 @@ it's quite a complex state machine! - BufferedPipeline2 - ----------------- + SimpleHandshake + --------------- - Synchronised pipeline - - Based on: + Synchronised pipeline, Based on: https://github.com/ZipCPU/dbgbus/blob/master/hexbus/rtl/hbdeword.v """ -from nmigen import Signal, Cat, Const, Mux, Module, Value +from nmigen import Signal, Mux, Module, Elaboratable from nmigen.cli import verilog, rtlil -from nmigen.hdl.ast import ArrayProxy -from nmigen.hdl.rec import Record, Layout - -from abc import ABCMeta, abstractmethod -from collections.abc import Sequence - - -class PrevControl: - """ contains signals that come *from* the previous stage (both in and out) - * i_valid: previous stage indicating all incoming data is valid. - may be a multi-bit signal, where all bits are required - to be asserted to indicate "valid". - * o_ready: output to next stage indicating readiness to accept data - * i_data : an input - added by the user of this class - """ - - def __init__(self, i_width=1, stage_ctl=False): - self.stage_ctl = stage_ctl - self.i_valid = Signal(i_width, name="p_i_valid") # prev >>in self - self._o_ready = Signal(name="p_o_ready") # prev < 1: - # multi-bit case: valid only when i_valid is all 1s - all1s = Const(-1, (len(self.i_valid), False)) - i_valid = (self.i_valid == all1s) - else: - # single-bit i_valid case - i_valid = self.i_valid - - # when stage indicates not ready, incoming data - # must "appear" to be not ready too - if self.stage_ctl: - i_valid = i_valid & self.s_o_ready - - return i_valid - - -class NextControl: - """ contains the signals that go *to* the next stage (both in and out) - * o_valid: output indicating to next stage that data is valid - * i_ready: input from next stage indicating that it can accept data - * o_data : an output - added by the user of this class - """ - def __init__(self, stage_ctl=False): - self.stage_ctl = stage_ctl - self.o_valid = Signal(name="n_o_valid") # self out>> next - self.i_ready = Signal(name="n_i_ready") # self < 0, "pipechain must be non-zero length" + assert self.stage is None, "do not use connect with a stage" eqs = [] # collated list of assignment statements # connect inter-chain for i in range(len(pipechain)-1): - pipe1 = pipechain[i] - pipe2 = pipechain[i+1] - eqs += pipe1.connect_to_next(pipe2) - - # connect front of chain to ourselves - front = pipechain[0] - self.p.i_data = front.stage.ispec() - eqs += front._connect_in(self) - - # connect end of chain to ourselves - end = pipechain[-1] - self.n.o_data = end.stage.ospec() - eqs += end._connect_out(self) + pipe1 = pipechain[i] # earlier + pipe2 = pipechain[i+1] # later (by 1) + eqs += pipe1.connect_to_next(pipe2) # earlier n to later p + + # connect front and back of chain to ourselves + front = pipechain[0] # first in chain + end = pipechain[-1] # last in chain + self.set_specs(front, end) # sets up ispec/ospec functions + self._new_data("chain") # NOTE: REPLACES existing data + eqs += front._connect_in(self) # front p to our p + eqs += end._connect_out(self) # end n to our n return eqs def set_input(self, i): - """ helper function to set the input data + """ helper function to set the input data (used in unit tests) """ - return eq(self.p.i_data, i) + return nmoperator.eq(self.p.data_i, i) + + def __iter__(self): + yield from self.p # yields ready/valid/data (data also gets yielded) + yield from self.n # ditto def ports(self): - res = [self.p.i_valid, self.n.i_ready, - self.n.o_valid, self.p.o_ready, - ] - if hasattr(self.p.i_data, "ports"): - res += self.p.i_data.ports() - else: - res += self.p.i_data - if hasattr(self.n.o_data, "ports"): - res += self.n.o_data.ports() - else: - res += self.n.o_data - return res + return list(self) - def _elaborate(self, platform): + def elaborate(self, platform): """ handles case where stage has dynamic ready/valid functions """ m = Module() + m.submodules.p = self.p + m.submodules.n = self.n + + self.setup(m, self.p.data_i) + if not self.p.stage_ctl: return m # intercept the previous (outgoing) "ready", combine with stage ready - m.d.comb += self.p.s_o_ready.eq(self.p._o_ready & self.stage.d_ready) + m.d.comb += self.p.s_ready_o.eq(self.p._ready_o & self.stage.d_ready) # intercept the next (incoming) "ready" and combine it with data valid - sdv = self.stage.d_valid(self.n.i_ready) - m.d.comb += self.n.d_valid.eq(self.n.i_ready & sdv) + sdv = self.stage.d_valid(self.n.ready_i) + m.d.comb += self.n.d_valid.eq(self.n.ready_i & sdv) return m -class BufferedPipeline(ControlBase): +class BufferedHandshake(ControlBase): """ buffered pipeline stage. data and strobe signals travel in sync. if ever the input is ready and the output is not, processed data is shunted in a temporary register. Argument: stage. see Stage API above - stage-1 p.i_valid >>in stage n.o_valid out>> stage+1 - stage-1 p.o_ready <>in stage n.o_data out>> stage+1 + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 | | process --->----^ | | +-- r_data ->-+ - input data p.i_data is read (only), is processed and goes into an + input data p.data_i is read (only), is processed and goes into an intermediate result store [process()]. this is updated combinatorially. in a non-stall condition, the intermediate result will go into the @@ -575,147 +348,138 @@ class BufferedPipeline(ControlBase): on the next cycle (as long as stall is not raised again) the input may begin to be processed and transferred directly to output. - """ - def __init__(self, stage, stage_ctl=False): - ControlBase.__init__(self, stage_ctl=stage_ctl) - self.stage = stage - - # set up the input and output data - self.p.i_data = stage.ispec() # input type - self.n.o_data = stage.ospec() def elaborate(self, platform): + self.m = ControlBase.elaborate(self, platform) - self.m = ControlBase._elaborate(self, platform) - - result = self.stage.ospec() - r_data = self.stage.ospec() - if hasattr(self.stage, "setup"): - self.stage.setup(self.m, self.p.i_data) + result = _spec(self.stage.ospec, "r_tmp") + r_data = _spec(self.stage.ospec, "r_data") # establish some combinatorial temporaries o_n_validn = Signal(reset_less=True) - n_i_ready = Signal(reset_less=True, name="n_i_rdy_data") - i_p_valid_o_p_ready = Signal(reset_less=True) - p_i_valid = Signal(reset_less=True) - self.m.d.comb += [p_i_valid.eq(self.p.i_valid_test), - o_n_validn.eq(~self.n.o_valid), - i_p_valid_o_p_ready.eq(p_i_valid & self.p.o_ready), - n_i_ready.eq(self.n.i_ready_test), + n_ready_i = Signal(reset_less=True, name="n_i_rdy_data") + nir_por = Signal(reset_less=True) + nir_por_n = Signal(reset_less=True) + p_valid_i = Signal(reset_less=True) + nir_novn = Signal(reset_less=True) + nirn_novn = Signal(reset_less=True) + por_pivn = Signal(reset_less=True) + npnn = Signal(reset_less=True) + self.m.d.comb += [p_valid_i.eq(self.p.valid_i_test), + o_n_validn.eq(~self.n.valid_o), + n_ready_i.eq(self.n.ready_i_test), + nir_por.eq(n_ready_i & self.p._ready_o), + nir_por_n.eq(n_ready_i & ~self.p._ready_o), + nir_novn.eq(n_ready_i | o_n_validn), + nirn_novn.eq(~n_ready_i & o_n_validn), + npnn.eq(nir_por | nirn_novn), + por_pivn.eq(self.p._ready_o & ~p_valid_i) ] # store result of processing in combinatorial temporary - self.m.d.comb += eq(result, self.stage.process(self.p.i_data)) + self.m.d.comb += nmoperator.eq(result, self.data_r) # if not in stall condition, update the temporary register - with self.m.If(self.p.o_ready): # not stalled - self.m.d.sync += eq(r_data, result) # update buffer - - with self.m.If(n_i_ready): # next stage is ready - with self.m.If(self.p._o_ready): # not stalled - # nothing in buffer: send (processed) input direct to output - self.m.d.sync += [self.n.o_valid.eq(p_i_valid), - eq(self.n.o_data, result), # update output - ] - with self.m.Else(): # p.o_ready is false, and data in buffer - # Flush the [already processed] buffer to the output port. - self.m.d.sync += [self.n.o_valid.eq(1), # reg empty - eq(self.n.o_data, r_data), # flush buffer - self.p._o_ready.eq(1), # clear stall - ] - # ignore input, since p.o_ready is also false. - - # (n.i_ready) is false here: next stage is ready - with self.m.Elif(o_n_validn): # next stage being told "ready" - self.m.d.sync += [self.n.o_valid.eq(p_i_valid), - self.p._o_ready.eq(1), # Keep the buffer empty - eq(self.n.o_data, result), # set output data - ] - - # (n.i_ready) false and (n.o_valid) true: - with self.m.Elif(i_p_valid_o_p_ready): - # If next stage *is* ready, and not stalled yet, accept input - self.m.d.sync += self.p._o_ready.eq(~(p_i_valid & self.n.o_valid)) + with self.m.If(self.p.ready_o): # not stalled + self.m.d.sync += nmoperator.eq(r_data, result) # update buffer + + # data pass-through conditions + with self.m.If(npnn): + data_o = self._postprocess(result) # XXX TBD, does nothing right now + self.m.d.sync += [self.n.valid_o.eq(p_valid_i), # valid if p_valid + nmoperator.eq(self.n.data_o, data_o), # update out + ] + # buffer flush conditions (NOTE: can override data passthru conditions) + with self.m.If(nir_por_n): # not stalled + # Flush the [already processed] buffer to the output port. + data_o = self._postprocess(r_data) # XXX TBD, does nothing right now + self.m.d.sync += [self.n.valid_o.eq(1), # reg empty + nmoperator.eq(self.n.data_o, data_o), # flush + ] + # output ready conditions + self.m.d.sync += self.p._ready_o.eq(nir_novn | por_pivn) return self.m -class BufferedPipeline2(ControlBase): - """ buffered pipeline stage. data and strobe signals travel in sync. - if ever the input is ready and the output is not, processed data - is shunted in a temporary register. +class SimpleHandshake(ControlBase): + """ simple handshake control. data and strobe signals travel in sync. + implements the protocol used by Wishbone and AXI4. Argument: stage. see Stage API above - stage-1 p.i_valid >>in stage n.o_valid out>> stage+1 - stage-1 p.o_ready <>in stage n.o_data out>> stage+1 + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 | | - process --->----^ - | | - +-- r_data ->-+ - - input data p.i_data is read (only), is processed and goes into an - intermediate result store [process()]. this is updated combinatorially. - - in a non-stall condition, the intermediate result will go into the - output (update_output). however if ever there is a stall, it goes - into r_data instead [update_buffer()]. - - when the non-stall condition is released, r_data is the first - to be transferred to the output [flush_buffer()], and the stall - condition cleared. - - on the next cycle (as long as stall is not raised again) the - input may begin to be processed and transferred directly to output. - + +--process->--^ + Truth Table + + Inputs Temporary Output Data + ------- ---------- ----- ---- + P P N N PiV& ~NiR& N P + i o i o PoR NoV o o + V R R V V R + + ------- - - - - + 0 0 0 0 0 0 >0 0 reg + 0 0 0 1 0 1 >1 0 reg + 0 0 1 0 0 0 0 1 process(data_i) + 0 0 1 1 0 0 0 1 process(data_i) + ------- - - - - + 0 1 0 0 0 0 >0 0 reg + 0 1 0 1 0 1 >1 0 reg + 0 1 1 0 0 0 0 1 process(data_i) + 0 1 1 1 0 0 0 1 process(data_i) + ------- - - - - + 1 0 0 0 0 0 >0 0 reg + 1 0 0 1 0 1 >1 0 reg + 1 0 1 0 0 0 0 1 process(data_i) + 1 0 1 1 0 0 0 1 process(data_i) + ------- - - - - + 1 1 0 0 1 0 1 0 process(data_i) + 1 1 0 1 1 1 1 0 process(data_i) + 1 1 1 0 1 0 1 1 process(data_i) + 1 1 1 1 1 0 1 1 process(data_i) + ------- - - - - """ - def __init__(self, stage, stage_ctl=False): - ControlBase.__init__(self, stage_ctl=stage_ctl) - self.stage = stage - - # set up the input and output data - self.p.i_data = stage.ispec() # input type - self.n.o_data = stage.ospec() def elaborate(self, platform): - - self.m = ControlBase._elaborate(self, platform) + self.m = m = ControlBase.elaborate(self, platform) r_busy = Signal() - result = self.stage.ospec() - if hasattr(self.stage, "setup"): - self.stage.setup(self.m, self.p.i_data) + result = _spec(self.stage.ospec, "r_tmp") # establish some combinatorial temporaries - n_i_ready = Signal(reset_less=True, name="n_i_rdy_data") - p_i_valid_p_o_ready = Signal(reset_less=True) - p_i_valid = Signal(reset_less=True) - self.m.d.comb += [p_i_valid.eq(self.p.i_valid_test), - n_i_ready.eq(self.n.i_ready_test), - p_i_valid_p_o_ready.eq(p_i_valid & self.p.o_ready), + n_ready_i = Signal(reset_less=True, name="n_i_rdy_data") + p_valid_i_p_ready_o = Signal(reset_less=True) + p_valid_i = Signal(reset_less=True) + m.d.comb += [p_valid_i.eq(self.p.valid_i_test), + n_ready_i.eq(self.n.ready_i_test), + p_valid_i_p_ready_o.eq(p_valid_i & self.p.ready_o), ] # store result of processing in combinatorial temporary - self.m.d.comb += eq(result, self.stage.process(self.p.i_data)) + m.d.comb += nmoperator.eq(result, self.data_r) # previous valid and ready - with self.m.If(p_i_valid_p_o_ready): - self.m.d.sync += [r_busy.eq(1), # output valid - #self.n.o_valid.eq(1), # output valid - eq(self.n.o_data, result), # update output - ] + with m.If(p_valid_i_p_ready_o): + data_o = self._postprocess(result) # XXX TBD, does nothing right now + m.d.sync += [r_busy.eq(1), # output valid + nmoperator.eq(self.n.data_o, data_o), # update output + ] # previous invalid or not ready, however next is accepting - with self.m.Elif(n_i_ready): - self.m.d.sync += [ eq(self.n.o_data, result)] + with m.Elif(n_ready_i): + data_o = self._postprocess(result) # XXX TBD, does nothing right now + m.d.sync += [nmoperator.eq(self.n.data_o, data_o)] # TODO: could still send data here (if there was any) - #self.m.d.sync += self.n.o_valid.eq(0) # ...so set output invalid - self.m.d.sync += r_busy.eq(0) # ...so set output invalid + #m.d.sync += self.n.valid_o.eq(0) # ...so set output invalid + m.d.sync += r_busy.eq(0) # ...so set output invalid - self.m.d.comb += self.n.o_valid.eq(r_busy) + m.d.comb += self.n.valid_o.eq(r_busy) # if next is ready, so is previous - self.m.d.comb += self.p._o_ready.eq(n_i_ready) + m.d.comb += self.p._ready_o.eq(n_ready_i) return self.m @@ -727,16 +491,16 @@ class UnbufferedPipeline(ControlBase): Note that a stall in one stage will result in the entire pipeline chain stalling. - Also that unlike BufferedPipeline, the valid/ready signalling does NOT + Also that unlike BufferedHandshake, the valid/ready signalling does NOT travel synchronously with the data: the valid/ready signalling combines in a *combinatorial* fashion. Therefore, a long pipeline chain will lengthen propagation delays. Argument: stage. see Stage API, above - stage-1 p.i_valid >>in stage n.o_valid out>> stage+1 - stage-1 p.o_ready <>in stage n.o_data out>> stage+1 + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 | | r_data result | | @@ -744,9 +508,9 @@ class UnbufferedPipeline(ControlBase): Attributes: ----------- - p.i_data : StageInput, shaped according to ispec + p.data_i : StageInput, shaped according to ispec The pipeline input - p.o_data : StageOutput, shaped according to ospec + p.data_o : StageOutput, shaped according to ospec The pipeline output r_data : input_shape according to ispec A temporary (buffered) copy of a prior (valid) input. @@ -755,39 +519,64 @@ class UnbufferedPipeline(ControlBase): result: output_shape according to ospec The output of the combinatorial logic. it is updated COMBINATORIALLY (no clock dependence). - """ - - def __init__(self, stage, stage_ctl=False): - ControlBase.__init__(self, stage_ctl=stage_ctl) - self.stage = stage - # set up the input and output data - self.p.i_data = stage.ispec() # input type - self.n.o_data = stage.ospec() # output type + Truth Table + + Inputs Temp Output Data + ------- - ----- ---- + P P N N ~NiR& N P + i o i o NoV o o + V R R V V R + + ------- - - - + 0 0 0 0 0 0 1 reg + 0 0 0 1 1 1 0 reg + 0 0 1 0 0 0 1 reg + 0 0 1 1 0 0 1 reg + ------- - - - + 0 1 0 0 0 0 1 reg + 0 1 0 1 1 1 0 reg + 0 1 1 0 0 0 1 reg + 0 1 1 1 0 0 1 reg + ------- - - - + 1 0 0 0 0 1 1 reg + 1 0 0 1 1 1 0 reg + 1 0 1 0 0 1 1 reg + 1 0 1 1 0 1 1 reg + ------- - - - + 1 1 0 0 0 1 1 process(data_i) + 1 1 0 1 1 1 0 process(data_i) + 1 1 1 0 0 1 1 process(data_i) + 1 1 1 1 0 1 1 process(data_i) + ------- - - - + + Note: PoR is *NOT* involved in the above decision-making. + """ def elaborate(self, platform): - self.m = ControlBase._elaborate(self, platform) + self.m = m = ControlBase.elaborate(self, platform) data_valid = Signal() # is data valid or not - r_data = self.stage.ispec() # input type - if hasattr(self.stage, "setup"): - self.stage.setup(self.m, r_data) + r_data = _spec(self.stage.ospec, "r_tmp") # output type # some temporaries - p_i_valid = Signal(reset_less=True) + p_valid_i = Signal(reset_less=True) pv = Signal(reset_less=True) - self.m.d.comb += p_i_valid.eq(self.p.i_valid_test) - self.m.d.comb += pv.eq(self.p.i_valid & self.p.o_ready) - - self.m.d.comb += self.n.o_valid.eq(data_valid) - self.m.d.comb += self.p._o_ready.eq(~data_valid | self.n.i_ready_test) - self.m.d.sync += data_valid.eq(p_i_valid | \ - (~self.n.i_ready_test & data_valid)) - with self.m.If(pv): - self.m.d.sync += eq(r_data, self.p.i_data) - self.m.d.comb += eq(self.n.o_data, self.stage.process(r_data)) - return self.m + buf_full = Signal(reset_less=True) + m.d.comb += p_valid_i.eq(self.p.valid_i_test) + m.d.comb += pv.eq(self.p.valid_i & self.p.ready_o) + m.d.comb += buf_full.eq(~self.n.ready_i_test & data_valid) + + m.d.comb += self.n.valid_o.eq(data_valid) + m.d.comb += self.p._ready_o.eq(~data_valid | self.n.ready_i_test) + m.d.sync += data_valid.eq(p_valid_i | buf_full) + with m.If(pv): + m.d.sync += nmoperator.eq(r_data, self.data_r) + data_o = self._postprocess(r_data) # XXX TBD, does nothing right now + m.d.comb += nmoperator.eq(self.n.data_o, data_o) + + return self.m class UnbufferedPipeline2(ControlBase): """ A simple pipeline stage with single-clock synchronisation @@ -796,81 +585,245 @@ class UnbufferedPipeline2(ControlBase): Note that a stall in one stage will result in the entire pipeline chain stalling. - Also that unlike BufferedPipeline, the valid/ready signalling does NOT + Also that unlike BufferedHandshake, the valid/ready signalling does NOT travel synchronously with the data: the valid/ready signalling combines in a *combinatorial* fashion. Therefore, a long pipeline chain will lengthen propagation delays. Argument: stage. see Stage API, above - stage-1 p.i_valid >>in stage n.o_valid out>> stage+1 - stage-1 p.o_ready <>in stage n.o_data out>> stage+1 - | | - r_data result - | | - +--process ->-+ - + stage-1 p.valid_i >>in stage n.valid_o out>> stage+1 + stage-1 p.ready_o <>in stage n.data_o out>> stage+1 + | | | + +- process-> buf <-+ Attributes: ----------- - p.i_data : StageInput, shaped according to ispec + p.data_i : StageInput, shaped according to ispec The pipeline input - p.o_data : StageOutput, shaped according to ospec + p.data_o : StageOutput, shaped according to ospec The pipeline output buf : output_shape according to ospec A temporary (buffered) copy of a valid output This is HELD if the output is not ready. It is updated SYNCHRONOUSLY. - """ - def __init__(self, stage, stage_ctl=False): - ControlBase.__init__(self, stage_ctl=stage_ctl) - self.stage = stage - - # set up the input and output data - self.p.i_data = stage.ispec() # input type - self.n.o_data = stage.ospec() # output type + Inputs Temp Output Data + ------- - ----- + P P N N ~NiR& N P (buf_full) + i o i o NoV o o + V R R V V R + + ------- - - - + 0 0 0 0 0 0 1 process(data_i) + 0 0 0 1 1 1 0 reg (odata, unchanged) + 0 0 1 0 0 0 1 process(data_i) + 0 0 1 1 0 0 1 process(data_i) + ------- - - - + 0 1 0 0 0 0 1 process(data_i) + 0 1 0 1 1 1 0 reg (odata, unchanged) + 0 1 1 0 0 0 1 process(data_i) + 0 1 1 1 0 0 1 process(data_i) + ------- - - - + 1 0 0 0 0 1 1 process(data_i) + 1 0 0 1 1 1 0 reg (odata, unchanged) + 1 0 1 0 0 1 1 process(data_i) + 1 0 1 1 0 1 1 process(data_i) + ------- - - - + 1 1 0 0 0 1 1 process(data_i) + 1 1 0 1 1 1 0 reg (odata, unchanged) + 1 1 1 0 0 1 1 process(data_i) + 1 1 1 1 0 1 1 process(data_i) + ------- - - - + + Note: PoR is *NOT* involved in the above decision-making. + """ def elaborate(self, platform): - self.m = ControlBase._elaborate(self, platform) + self.m = m = ControlBase.elaborate(self, platform) buf_full = Signal() # is data valid or not - buf = self.stage.ospec() # output type - if hasattr(self.stage, "setup"): - self.stage.setup(self.m, self.p.i_data) + buf = _spec(self.stage.ospec, "r_tmp") # output type # some temporaries - p_i_valid = Signal(reset_less=True) - self.m.d.comb += p_i_valid.eq(self.p.i_valid_test) + p_valid_i = Signal(reset_less=True) + m.d.comb += p_valid_i.eq(self.p.valid_i_test) - self.m.d.comb += self.n.o_valid.eq(buf_full | p_i_valid) - self.m.d.comb += self.p._o_ready.eq(~buf_full) - self.m.d.sync += buf_full.eq(~self.n.i_ready_test & \ - (p_i_valid | buf_full)) + m.d.comb += self.n.valid_o.eq(buf_full | p_valid_i) + m.d.comb += self.p._ready_o.eq(~buf_full) + m.d.sync += buf_full.eq(~self.n.ready_i_test & self.n.valid_o) - odata = Mux(buf_full, buf, self.stage.process(self.p.i_data)) - self.m.d.comb += eq(self.n.o_data, odata) - self.m.d.sync += eq(buf, self.n.o_data) + data_o = Mux(buf_full, buf, self.data_r) + data_o = self._postprocess(data_o) # XXX TBD, does nothing right now + m.d.comb += nmoperator.eq(self.n.data_o, data_o) + m.d.sync += nmoperator.eq(buf, self.n.data_o) return self.m -class PassThroughStage(StageCls): - """ a pass-through stage which has its input data spec equal to its output, - and "passes through" its data from input to output. +class PassThroughHandshake(ControlBase): + """ A control block that delays by one clock cycle. + + Inputs Temporary Output Data + ------- ------------------ ----- ---- + P P N N PiV& PiV| NiR| pvr N P (pvr) + i o i o PoR ~PoR ~NoV o o + V R R V V R + + ------- - - - - - - + 0 0 0 0 0 1 1 0 1 1 odata (unchanged) + 0 0 0 1 0 1 0 0 1 0 odata (unchanged) + 0 0 1 0 0 1 1 0 1 1 odata (unchanged) + 0 0 1 1 0 1 1 0 1 1 odata (unchanged) + ------- - - - - - - + 0 1 0 0 0 0 1 0 0 1 odata (unchanged) + 0 1 0 1 0 0 0 0 0 0 odata (unchanged) + 0 1 1 0 0 0 1 0 0 1 odata (unchanged) + 0 1 1 1 0 0 1 0 0 1 odata (unchanged) + ------- - - - - - - + 1 0 0 0 0 1 1 1 1 1 process(in) + 1 0 0 1 0 1 0 0 1 0 odata (unchanged) + 1 0 1 0 0 1 1 1 1 1 process(in) + 1 0 1 1 0 1 1 1 1 1 process(in) + ------- - - - - - - + 1 1 0 0 1 1 1 1 1 1 process(in) + 1 1 0 1 1 1 0 0 1 0 odata (unchanged) + 1 1 1 0 1 1 1 1 1 1 process(in) + 1 1 1 1 1 1 1 1 1 1 process(in) + ------- - - - - - - + """ - def __init__(self, iospecfn): - self.iospecfn = iospecfn - def ispec(self): return self.iospecfn() - def ospec(self): return self.iospecfn() - def process(self, i): return i + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + r_data = _spec(self.stage.ospec, "r_tmp") # output type + + # temporaries + p_valid_i = Signal(reset_less=True) + pvr = Signal(reset_less=True) + m.d.comb += p_valid_i.eq(self.p.valid_i_test) + m.d.comb += pvr.eq(p_valid_i & self.p.ready_o) + + m.d.comb += self.p.ready_o.eq(~self.n.valid_o | self.n.ready_i_test) + m.d.sync += self.n.valid_o.eq(p_valid_i | ~self.p.ready_o) + + odata = Mux(pvr, self.data_r, r_data) + m.d.sync += nmoperator.eq(r_data, odata) + r_data = self._postprocess(r_data) # XXX TBD, does nothing right now + m.d.comb += nmoperator.eq(self.n.data_o, r_data) + + return m class RegisterPipeline(UnbufferedPipeline): """ A pipeline stage that delays by one clock cycle, creating a - sync'd latch out of o_data and o_valid as an indirect byproduct + sync'd latch out of data_o and valid_o as an indirect byproduct of using PassThroughStage """ def __init__(self, iospecfn): UnbufferedPipeline.__init__(self, PassThroughStage(iospecfn)) + +class FIFOControl(ControlBase): + """ FIFO Control. Uses Queue to store data, coincidentally + happens to have same valid/ready signalling as Stage API. + + data_i -> fifo.din -> FIFO -> fifo.dout -> data_o + """ + def __init__(self, depth, stage, in_multi=None, stage_ctl=False, + fwft=True, pipe=False): + """ FIFO Control + + * :depth: number of entries in the FIFO + * :stage: data processing block + * :fwft: first word fall-thru mode (non-fwft introduces delay) + * :pipe: specifies pipe mode. + + when fwft = True it indicates that transfers may occur + combinatorially through stage processing in the same clock cycle. + This requires that the Stage be a Moore FSM: + https://en.wikipedia.org/wiki/Moore_machine + + when fwft = False it indicates that all output signals are + produced only from internal registers or memory, i.e. that the + Stage is a Mealy FSM: + https://en.wikipedia.org/wiki/Mealy_machine + + data is processed (and located) as follows: + + self.p self.stage temp fn temp fn temp fp self.n + data_i->process()->result->cat->din.FIFO.dout->cat(data_o) + + yes, really: cat produces a Cat() which can be assigned to. + this is how the FIFO gets de-catted without needing a de-cat + function + """ + self.fwft = fwft + self.pipe = pipe + self.fdepth = depth + ControlBase.__init__(self, stage, in_multi, stage_ctl) + + def elaborate(self, platform): + self.m = m = ControlBase.elaborate(self, platform) + + # make a FIFO with a signal of equal width to the data_o. + (fwidth, _) = nmoperator.shape(self.n.data_o) + fifo = Queue(fwidth, self.fdepth, fwft=self.fwft, pipe=self.pipe) + m.submodules.fifo = fifo + + def processfn(data_i): + # store result of processing in combinatorial temporary + result = _spec(self.stage.ospec, "r_temp") + m.d.comb += nmoperator.eq(result, self.process(data_i)) + return nmoperator.cat(result) + + ## prev: make the FIFO (Queue object) "look" like a PrevControl... + m.submodules.fp = fp = PrevControl() + fp.valid_i, fp._ready_o, fp.data_i = fifo.we, fifo.writable, fifo.din + m.d.comb += fp._connect_in(self.p, fn=processfn) + + # next: make the FIFO (Queue object) "look" like a NextControl... + m.submodules.fn = fn = NextControl() + fn.valid_o, fn.ready_i, fn.data_o = fifo.readable, fifo.re, fifo.dout + connections = fn._connect_out(self.n, fn=nmoperator.cat) + + # ok ok so we can't just do the ready/valid eqs straight: + # first 2 from connections are the ready/valid, 3rd is data. + if self.fwft: + m.d.comb += connections[:2] # combinatorial on next ready/valid + else: + m.d.sync += connections[:2] # non-fwft mode needs sync + data_o = connections[2] # get the data + data_o = self._postprocess(data_o) # XXX TBD, does nothing right now + m.d.comb += data_o + + return m + + +# aka "RegStage". +class UnbufferedPipeline(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl, + fwft=True, pipe=False) + +# aka "BreakReadyStage" XXX had to set fwft=True to get it to work +class PassThroughHandshake(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 1, stage, in_multi, stage_ctl, + fwft=True, pipe=True) + +# this is *probably* BufferedHandshake, although test #997 now succeeds. +class BufferedHandshake(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 2, stage, in_multi, stage_ctl, + fwft=True, pipe=False) + + +""" +# this is *probably* SimpleHandshake (note: memory cell size=0) +class SimpleHandshake(FIFOControl): + def __init__(self, stage, in_multi=None, stage_ctl=False): + FIFOControl.__init__(self, 0, stage, in_multi, stage_ctl, + fwft=True, pipe=False) +"""