X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fadd%2Fexample_buf_pipe.py;h=45bed19301a5c46b3213c8ae0217405182f6a215;hb=6ba0c1a42bfc8362af281aaae60858f05acc991b;hp=20e91e4e877948f72000b5ac789b053f82d21f4d;hpb=e1336d2ad072dc6661c9af1b0460a69ff1bf588f;p=ieee754fpu.git diff --git a/src/add/example_buf_pipe.py b/src/add/example_buf_pipe.py index 20e91e4e..45bed193 100644 --- a/src/add/example_buf_pipe.py +++ b/src/add/example_buf_pipe.py @@ -1,98 +1,267 @@ +""" nmigen implementation of buffered pipeline stage, based on zipcpu: + https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html + + this module requires quite a bit of thought to understand how it works + (and why it is needed in the first place). reading the above is + *strongly* recommended. + + unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires + the STB / ACK signals to raise and lower (on separate clocks) before + data may proceeed (thus only allowing one piece of data to proceed + on *ALTERNATE* cycles), the signalling here is a true pipeline + where data will flow on *every* clock when the conditions are right. + + input acceptance conditions are when: + * incoming previous-stage strobe (i.p_valid) is HIGH + * outgoing previous-stage ready (o.p_ready) is LOW + + output transmission conditions are when: + * outgoing next-stage strobe (o.n_valid) is HIGH + * outgoing next-stage ready (i.n_ready) is LOW + + the tricky bit is when the input has valid data and the output is not + ready to accept it. if it wasn't for the clock synchronisation, it + would be possible to tell the input "hey don't send that data, we're + not ready". unfortunately, it's not possible to "change the past": + the previous stage *has no choice* but to pass on its data. + + therefore, the incoming data *must* be accepted - and stored: that + is the responsibility / contract that this stage *must* accept. + on the same clock, it's possible to tell the input that it must + not send any more data. this is the "stall" condition. + + we now effectively have *two* possible pieces of data to "choose" from: + the buffered data, and the incoming data. the decision as to which + to process and output is based on whether we are in "stall" or not. + i.e. when the next stage is no longer ready, the output comes from + the buffer if a stall had previously occurred, otherwise it comes + direct from processing the input. + + this allows us to respect a synchronous "travelling STB" with what + dan calls a "buffered handshake". + + it's quite a complex state machine! +""" + from nmigen import Signal, Cat, Const, Mux, Module -from nmigen.compat.sim import run_simulation from nmigen.cli import verilog, rtlil +from collections.abc import Sequence + + +class IOAckIn: -class BufPipe: def __init__(self): - # input - self.i_p_stb = Signal() # >>in - comes in from PREVIOUS stage - self.i_n_busy = Signal() # in<< - comes in from the NEXT stage - self.i_data = Signal(32) # >>in - comes in from the PREVIOUS stage - #self.i_rst = Signal() + self.p_valid = Signal() # >>in - comes in from PREVIOUS stage + self.n_ready = Signal() # in<< - comes in from the NEXT stage + + +class IOAckOut: + + def __init__(self): + self.n_valid = Signal() # out>> - goes out to the NEXT stage + self.p_ready = Signal() # <>in stage o.n_valid out>> stage+1 + stage-1 o.p_ready <>in stage o.data out>> stage+1 + | | + process --->----^ + | | + +-- r_data ->-+ + + input data i_data is read (only), is processed and goes into an + intermediate result store [process()]. this is updated combinatorially. + + in a non-stall condition, the intermediate result will go into the + output (update_output). however if ever there is a stall, it goes + into r_data instead [update_buffer()]. + + when the non-stall condition is released, r_data is the first + to be transferred to the output [flush_buffer()], and the stall + condition cleared. + + on the next cycle (as long as stall is not raised again) the + input may begin to be processed and transferred directly to output. + """ + def __init__(self, stage): + """ pass in a "stage" which may be either a static class or a class + instance, which has three functions: + * ispec: returns input signals according to the input specification + * ispec: returns output signals to the output specification + * process: takes an input instance and returns processed data - # buffered - self.r_data = Signal(32) + i_data -> process() -> result --> o.data + | ^ + | | + +-> r_data -+ + """ + # input: strobe comes in from previous stage, ready comes in from next + self.i = IOAckIn() + #self.i.p_valid = Signal() # >>in - comes in from PREVIOUS stage + #self.i.n_ready = Signal() # in<< - comes in from the NEXT stage - # output - self.o_n_stb = Signal() # out>> - goes out to the NEXT stage - self.o_p_busy = Signal() # <> - goes out to the NEXT stage + # output: strobe goes out to next stage, ready comes in from previous + self.o = IOAckOut() + #self.o.n_valid = Signal() # out>> - goes out to the NEXT stage + #self.o.p_ready = Signal() # <