X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fadd%2Fexample_buf_pipe.py;h=b83d5035cc01c7ec338c1589359323b213e3099f;hb=f3945d1317aa236b752c1e286801caa2c3a07703;hp=717504bcd37db33a2bce90e6168f5e64019e8674;hpb=c60a4997aa35ebc32e121d401af06d3bfee9c5c3;p=ieee754fpu.git diff --git a/src/add/example_buf_pipe.py b/src/add/example_buf_pipe.py index 717504bc..b83d5035 100644 --- a/src/add/example_buf_pipe.py +++ b/src/add/example_buf_pipe.py @@ -12,12 +12,12 @@ where data will flow on *every* clock when the conditions are right. input acceptance conditions are when: - * incoming previous-stage strobe (i_p_stb) is HIGH - * outgoing previous-stage busy (o_p_busy) is LOW + * incoming previous-stage strobe (p.i_valid) is HIGH + * outgoing previous-stage ready (p.o_ready) is LOW output transmission conditions are when: - * outgoing next-stage strobe (o_n_stb) is HIGH - * outgoing next-stage busy (i_n_busy) is LOW + * outgoing next-stage strobe (n.o_valid) is HIGH + * outgoing next-stage ready (n.i_ready) is LOW the tricky bit is when the input has valid data and the output is not ready to accept it. if it wasn't for the clock synchronisation, it @@ -25,149 +25,434 @@ not ready". unfortunately, it's not possible to "change the past": the previous stage *has no choice* but to pass on its data. - therefore, the incoming data *must* be accepted - and stored. + therefore, the incoming data *must* be accepted - and stored: that + is the responsibility / contract that this stage *must* accept. on the same clock, it's possible to tell the input that it must not send any more data. this is the "stall" condition. we now effectively have *two* possible pieces of data to "choose" from: the buffered data, and the incoming data. the decision as to which to process and output is based on whether we are in "stall" or not. - i.e. when the next stage is no longer busy, the output comes from + i.e. when the next stage is no longer ready, the output comes from the buffer if a stall had previously occurred, otherwise it comes direct from processing the input. + this allows us to respect a synchronous "travelling STB" with what + dan calls a "buffered handshake". + it's quite a complex state machine! """ from nmigen import Signal, Cat, Const, Mux, Module -from nmigen.compat.sim import run_simulation from nmigen.cli import verilog, rtlil +from nmigen.hdl.rec import Record, Layout + +from collections.abc import Sequence + + +class PrevControl: + """ contains signals that come *from* the previous stage (both in and out) + * i_valid: previous stage indicating all incoming data is valid. + may be a multi-bit signal, where all bits are required + to be asserted to indicate "valid". + * o_ready: output to next stage indicating readiness to accept data + * i_data : an input - added by the user of this class + """ + + def __init__(self, i_width=1): + self.i_valid = Signal(i_width, name="p_i_valid") # prev >>in self + self.o_ready = Signal(name="p_o_ready") # prev < 1: # multi-bit case: valid only when i_valid is all 1s + all1s = Const(-1, (len(self.i_valid), False)) + return self.i_valid == all1s + # single-bit i_valid case + return self.i_valid + + +class NextControl: + """ contains the signals that go *to* the next stage (both in and out) + * o_valid: output indicating to next stage that data is valid + * i_ready: input from next stage indicating that it can accept data + * o_data : an output - added by the user of this class + """ + def __init__(self): + self.o_valid = Signal(name="n_o_valid") # self out>> next + self.i_ready = Signal(name="n_i_ready") # self <>in stage o_n_stb out>> stage+1 - stage-1 o_p_busy <>in stage o_data out>> stage+1 + def connect_out(self, nxt): + """ helper function to connect stage to an output source. do not + use to connect stage-to-stage! + """ + return self.n.connect_out(nxt.n) + + def set_input(self, i): + """ helper function to set the input data + """ + return eq(self.p.i_data, i) + + def ports(self): + return [self.p.i_valid, self.n.i_ready, + self.n.o_valid, self.p.o_ready, + self.p.i_data, self.n.o_data # XXX need flattening! + ] + + +class BufferedPipeline(PipelineBase): + """ buffered pipeline stage. data and strobe signals travel in sync. + if ever the input is ready and the output is not, processed data + is stored in a temporary register. + + stage-1 p.i_valid >>in stage n.o_valid out>> stage+1 + stage-1 p.o_ready <>in stage n.o_data out>> stage+1 | | - +-------> process + process --->----^ | | - +-- r_data ---+ + +-- r_data ->-+ + + input data p.i_data is read (only), is processed and goes into an + intermediate result store [process()]. this is updated combinatorially. + + in a non-stall condition, the intermediate result will go into the + output (update_output). however if ever there is a stall, it goes + into r_data instead [update_buffer()]. + + when the non-stall condition is released, r_data is the first + to be transferred to the output [flush_buffer()], and the stall + condition cleared. + + on the next cycle (as long as stall is not raised again) the + input may begin to be processed and transferred directly to output. """ + def __init__(self, stage): + PipelineBase.__init__(self, stage) + + # set up the input and output data + self.p.i_data = stage.ispec() # input type + self.n.o_data = stage.ospec() + + def elaborate(self, platform): + m = Module() + + result = self.stage.ospec() + r_data = self.stage.ospec() + if hasattr(self.stage, "setup"): + self.stage.setup(m, self.p.i_data) + + # establish some combinatorial temporaries + o_n_validn = Signal(reset_less=True) + i_p_valid_o_p_ready = Signal(reset_less=True) + p_i_valid = Signal(reset_less=True) + m.d.comb += [p_i_valid.eq(self.p.i_valid_logic()), + o_n_validn.eq(~self.n.o_valid), + i_p_valid_o_p_ready.eq(p_i_valid & self.p.o_ready), + ] + + # store result of processing in combinatorial temporary + #with m.If(self.p.i_valid): # input is valid: process it + m.d.comb += eq(result, self.stage.process(self.p.i_data)) + # if not in stall condition, update the temporary register + with m.If(self.p.o_ready): # not stalled + m.d.sync += eq(r_data, result) # update buffer + + #with m.If(self.p.i_rst): # reset + # m.d.sync += self.n.o_valid.eq(0) + # m.d.sync += self.p.o_ready.eq(0) + with m.If(self.n.i_ready): # next stage is ready + with m.If(self.p.o_ready): # not stalled + # nothing in buffer: send (processed) input direct to output + m.d.sync += [self.n.o_valid.eq(p_i_valid), + eq(self.n.o_data, result), # update output + ] + with m.Else(): # p.o_ready is false, and something is in buffer. + # Flush the [already processed] buffer to the output port. + m.d.sync += [self.n.o_valid.eq(1), + eq(self.n.o_data, r_data), # flush buffer + # clear stall condition, declare register empty. + self.p.o_ready.eq(1), + ] + # ignore input, since p.o_ready is also false. + + # (n.i_ready) is false here: next stage is ready + with m.Elif(o_n_validn): # next stage being told "ready" + m.d.sync += [self.n.o_valid.eq(p_i_valid), + self.p.o_ready.eq(1), # Keep the buffer empty + # set the output data (from comb result) + eq(self.n.o_data, result), + ] + # (n.i_ready) false and (n.o_valid) true: + with m.Elif(i_p_valid_o_p_ready): + # If next stage *is* ready, and not stalled yet, accept input + m.d.sync += self.p.o_ready.eq(~(p_i_valid & self.n.o_valid)) + + return m + + +class ExampleAddStage: + """ an example of how to use the buffered pipeline, as a class instance + """ + + def ispec(self): + """ returns a tuple of input signals which will be the incoming data + """ + return (Signal(16), Signal(16)) + + def ospec(self): + """ returns an output signal which will happen to contain the sum + of the two inputs + """ + return Signal(16) + + def process(self, i): + """ process the input data (sums the values in the tuple) and returns it + """ + return i[0] + i[1] + + +class ExampleBufPipeAdd(BufferedPipeline): + """ an example of how to use the buffered pipeline, using a class instance + """ + def __init__(self): - # input - #self.i_p_rst = Signal() # >>in - comes in from PREVIOUS stage - self.i_p_stb = Signal() # >>in - comes in from PREVIOUS stage - self.i_n_busy = Signal() # in<< - comes in from the NEXT stage - self.i_data = Signal(32) # >>in - comes in from the PREVIOUS stage - #self.i_rst = Signal() + addstage = ExampleAddStage() + BufferedPipeline.__init__(self, addstage) + + +class ExampleStage: + """ an example of how to use the buffered pipeline, in a static class + fashion + """ - # buffered - self.r_data = Signal(32) + def ispec(): + return Signal(16, name="example_input_signal") - # output - self.o_n_stb = Signal() # out>> - goes out to the NEXT stage - self.o_p_busy = Signal() # <> - goes out to the NEXT stage + def ospec(): + return Signal(16, name="example_output_signal") - def pre_process(self, d_in): - return d_in | 0xf0000 + def process(i): + """ process the input data and returns it (adds 1) + """ + return i + 1 - def process(self, d_in): - return d_in + 1 + +class ExampleStageCls: + """ an example of how to use the buffered pipeline, in a static class + fashion + """ + + def ispec(self): + return Signal(16, name="example_input_signal") + + def ospec(self): + return Signal(16, name="example_output_signal") + + def process(self, i): + """ process the input data and returns it (adds 1) + """ + return i + 1 + + +class ExampleBufPipe(BufferedPipeline): + """ an example of how to use the buffered pipeline. + """ + + def __init__(self): + BufferedPipeline.__init__(self, ExampleStage) + + +class CombPipe(PipelineBase): + """A simple pipeline stage containing combinational logic that can execute + completely in one clock cycle. + + Attributes: + ----------- + input : StageInput + The pipeline input + output : StageOutput + The pipeline output + r_data : Signal, input_shape + A temporary (buffered) copy of a prior (valid) input + result: Signal, output_shape + The output of the combinatorial logic + """ + + def __init__(self, stage): + PipelineBase.__init__(self, stage) + self._data_valid = Signal() + + # set up the input and output data + self.p.i_data = stage.ispec() # input type + self.n.o_data = stage.ospec() # output type def elaborate(self, platform): m = Module() - o_p_busyn = Signal(reset_less=True) - o_n_stbn = Signal(reset_less=True) - i_n_busyn = Signal(reset_less=True) - i_p_stb_o_p_busyn = Signal(reset_less=True) - m.d.comb += i_n_busyn.eq(~self.i_n_busy) - m.d.comb += o_n_stbn.eq(~self.o_n_stb) - m.d.comb += o_p_busyn.eq(~self.o_p_busy) - m.d.comb += i_p_stb_o_p_busyn.eq(self.i_p_stb & o_p_busyn) - - result = Signal(32) - m.d.comb += result.eq(self.process(self.i_data)) - with m.If(o_p_busyn): # not stalled - m.d.sync += self.r_data.eq(result) - - #with m.If(self.i_p_rst): # reset - # m.d.sync += self.o_n_stb.eq(0) - # m.d.sync += self.o_p_busy.eq(0) - with m.If(i_n_busyn): # next stage is not busy - with m.If(o_p_busyn): # not stalled - # nothing in buffer: send input direct to output - m.d.sync += self.o_n_stb.eq(self.i_p_stb) - m.d.sync += self.o_data.eq(result) - with m.Else(): # o_p_busy is true, and something is in our buffer. - # Flush the [already processed] buffer to the output port. - m.d.sync += self.o_n_stb.eq(1) - m.d.sync += self.o_data.eq(self.r_data) - # ignore input, since o_p_busy is also true. - # also clear stall condition, declare register to be empty. - m.d.sync += self.o_p_busy.eq(0) - - # (i_n_busy) is true here: next stage is busy - with m.Elif(o_n_stbn): # next stage being told "not busy" - m.d.sync += self.o_n_stb.eq(self.i_p_stb) - m.d.sync += self.o_p_busy.eq(0) # Keep the buffer empty - # Apply the logic to the input data, and set the output data - m.d.sync += self.o_data.eq(result) - - # (i_n_busy) and (o_n_stb) both true: - with m.Elif(i_p_stb_o_p_busyn): - # If next stage *is* busy, and not stalled yet, accept input - m.d.sync += self.o_p_busy.eq(self.i_p_stb & self.o_n_stb) - - with m.If(o_p_busyn): # not stalled - # turns out that from all of the above conditions, just - # always put result into buffer if not busy - m.d.sync += self.r_data.eq(result) + r_data = self.stage.ispec() # input type + result = self.stage.ospec() # output data + if hasattr(self.stage, "setup"): + self.stage.setup(m, r_data) + p_i_valid = Signal(reset_less=True) + m.d.comb += p_i_valid.eq(self.p.i_valid_logic()) + m.d.comb += eq(result, self.stage.process(r_data)) + m.d.comb += self.n.o_valid.eq(self._data_valid) + m.d.comb += self.p.o_ready.eq(~self._data_valid | self.n.i_ready) + m.d.sync += self._data_valid.eq(p_i_valid | \ + (~self.n.i_ready & self._data_valid)) + with m.If(self.p.i_valid & self.p.o_ready): + m.d.sync += eq(r_data, self.p.i_data) + m.d.comb += eq(self.n.o_data, result) return m - def ports(self): - return [self.i_p_stb, self.i_n_busy, self.i_data, - self.r_data, - self.o_n_stb, self.o_p_busy, self.o_data - ] +class ExampleCombPipe(CombPipe): + """ an example of how to use the combinatorial pipeline. + """ -def testbench(dut): - #yield dut.i_p_rst.eq(1) - yield dut.i_n_busy.eq(1) - yield dut.o_p_busy.eq(1) - yield - yield - #yield dut.i_p_rst.eq(0) - yield dut.i_n_busy.eq(0) - yield dut.i_data.eq(5) - yield dut.i_p_stb.eq(1) - yield - yield dut.i_data.eq(7) - yield - yield dut.i_data.eq(2) - yield - yield dut.i_n_busy.eq(1) - yield dut.i_data.eq(9) - yield - yield dut.i_p_stb.eq(0) - yield dut.i_data.eq(12) - yield - yield dut.i_data.eq(32) - yield dut.i_n_busy.eq(0) - yield - yield - yield - yield + def __init__(self): + CombPipe.__init__(self, ExampleStage) if __name__ == '__main__': - dut = BufPipe() + dut = ExampleBufPipe() vl = rtlil.convert(dut, ports=dut.ports()) with open("test_bufpipe.il", "w") as f: f.write(vl) - run_simulation(dut, testbench(dut), vcd_name="test_bufpipe.vcd") + dut = ExampleCombPipe() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_combpipe.il", "w") as f: + f.write(vl)