From 9de2c40d3c1051650dd6f29b2ea5a0bd4e67b366 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Wed, 13 Mar 2019 07:26:23 +0000 Subject: [PATCH] add 2 stage buffered pipeline unit test, reduce to 16-bit to make vcd clearer --- src/add/example_buf_pipe.py | 8 +-- src/add/test_buf_pipe.py | 129 ++++++++++++++++++++++++------------ 2 files changed, 92 insertions(+), 45 deletions(-) diff --git a/src/add/example_buf_pipe.py b/src/add/example_buf_pipe.py index fa52f3c7..5faee787 100644 --- a/src/add/example_buf_pipe.py +++ b/src/add/example_buf_pipe.py @@ -58,16 +58,16 @@ class BufPipe: #self.i_p_rst = Signal() # >>in - comes in from PREVIOUS stage self.i_p_stb = Signal() # >>in - comes in from PREVIOUS stage self.i_n_busy = Signal() # in<< - comes in from the NEXT stage - self.i_data = Signal(32) # >>in - comes in from the PREVIOUS stage + self.i_data = Signal(16) # >>in - comes in from the PREVIOUS stage #self.i_rst = Signal() # buffered - self.r_data = Signal(32) + self.r_data = Signal(16) # output self.o_n_stb = Signal() # out>> - goes out to the NEXT stage self.o_p_busy = Signal() # <> - goes out to the NEXT stage + self.o_data = Signal(16) # out>> - goes out to the NEXT stage def pre_process(self, d_in): return d_in | 0xf0000 @@ -90,7 +90,7 @@ class BufPipe: ] # store result of processing in combinatorial temporary - result = Signal(32) + result = Signal(16) with m.If(self.i_p_stb): # input is valid: process it m.d.comb += result.eq(self.process(self.i_data)) with m.If(o_p_busyn): # not stalled diff --git a/src/add/test_buf_pipe.py b/src/add/test_buf_pipe.py index 66f0b4cc..32c7aa1a 100644 --- a/src/add/test_buf_pipe.py +++ b/src/add/test_buf_pipe.py @@ -1,44 +1,4 @@ -""" nmigen implementation of buffered pipeline stage, based on zipcpu: - https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html - - this module requires quite a bit of thought to understand how it works - (and why it is needed in the first place). reading the above is - *strongly* recommended. - - unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires - the STB / ACK signals to raise and lower (on separate clocks) before - data may proceeed (thus only allowing one piece of data to proceed - on *ALTERNATE* cycles), the signalling here is a true pipeline - where data will flow on *every* clock when the conditions are right. - - input acceptance conditions are when: - * incoming previous-stage strobe (i_p_stb) is HIGH - * outgoing previous-stage busy (o_p_busy) is LOW - - output transmission conditions are when: - * outgoing next-stage strobe (o_n_stb) is HIGH - * outgoing next-stage busy (i_n_busy) is LOW - - the tricky bit is when the input has valid data and the output is not - ready to accept it. if it wasn't for the clock synchronisation, it - would be possible to tell the input "hey don't send that data, we're - not ready". unfortunately, it's not possible to "change the past": - the previous stage *has no choice* but to pass on its data. - - therefore, the incoming data *must* be accepted - and stored. - on the same clock, it's possible to tell the input that it must - not send any more data. this is the "stall" condition. - - we now effectively have *two* possible pieces of data to "choose" from: - the buffered data, and the incoming data. the decision as to which - to process and output is based on whether we are in "stall" or not. - i.e. when the next stage is no longer busy, the output comes from - the buffer if a stall had previously occurred, otherwise it comes - direct from processing the input. - - it's quite a complex state machine! -""" - +from nmigen import Module, Signal from nmigen.compat.sim import run_simulation from example_buf_pipe import BufPipe @@ -66,7 +26,44 @@ def testbench(dut): yield dut.i_data.eq(2) yield + yield dut.i_n_busy.eq(1) # begin going into "stall" (next stage says busy) + yield dut.i_data.eq(9) + yield + yield dut.i_p_stb.eq(0) + yield dut.i_data.eq(12) + yield + yield dut.i_data.eq(32) + yield dut.i_n_busy.eq(0) + yield + yield from check_o_n_stb(dut, 1) # buffer still needs to output + yield + yield from check_o_n_stb(dut, 1) # buffer still needs to output + yield + yield from check_o_n_stb(dut, 0) # buffer outputted, *now* we're done. + yield + + +def testbench2(dut): + #yield dut.i_p_rst.eq(1) yield dut.i_n_busy.eq(1) + #yield dut.o_p_busy.eq(1) + yield + yield + #yield dut.i_p_rst.eq(0) + yield dut.i_n_busy.eq(0) + yield dut.i_data.eq(5) + yield dut.i_p_stb.eq(1) + yield + + yield dut.i_data.eq(7) + yield from check_o_n_stb(dut, 0) # effects of i_p_stb delayed 2 clocks + yield + yield from check_o_n_stb(dut, 0) # effects of i_p_stb delayed 2 clocks + + yield dut.i_data.eq(2) + yield + yield from check_o_n_stb(dut, 1) # ok *now* i_p_stb effect is felt + yield dut.i_n_busy.eq(1) # begin going into "stall" (next stage says busy) yield dut.i_data.eq(9) yield yield dut.i_p_stb.eq(0) @@ -79,11 +76,61 @@ def testbench(dut): yield yield from check_o_n_stb(dut, 1) # buffer still needs to output yield + yield from check_o_n_stb(dut, 1) # buffer still needs to output + yield yield from check_o_n_stb(dut, 0) # buffer outputted, *now* we're done. yield + yield + yield + +class BufPipe2: + """ + connect these: ------|---------------| + v v + i_p_stb >>in pipe1 o_n_stb out>> i_p_stb >>in pipe2 + o_p_busy <>in pipe1 o_data out>> i_data >>in pipe2 + """ + def __init__(self): + self.pipe1 = BufPipe() + self.pipe2 = BufPipe() + + # input + self.i_p_stb = Signal() # >>in - comes in from PREVIOUS stage + self.i_n_busy = Signal() # in<< - comes in from the NEXT stage + self.i_data = Signal(32) # >>in - comes in from the PREVIOUS stage + + # output + self.o_n_stb = Signal() # out>> - goes out to the NEXT stage + self.o_p_busy = Signal() # <> - goes out to the NEXT stage + + def elaborate(self, platform): + m = Module() + m.submodules.pipe1 = self.pipe1 + m.submodules.pipe2 = self.pipe2 + + # connect inter-pipe input/output stb/busy/data + m.d.comb += self.pipe2.i_p_stb.eq(self.pipe1.o_n_stb) + m.d.comb += self.pipe1.i_n_busy.eq(self.pipe2.o_p_busy) + m.d.comb += self.pipe2.i_data.eq(self.pipe1.o_data) + + # inputs/outputs to the module: pipe1 connections here (LHS) + m.d.comb += self.pipe1.i_p_stb.eq(self.i_p_stb) + m.d.comb += self.o_p_busy.eq(self.pipe1.o_p_busy) + m.d.comb += self.pipe1.i_data.eq(self.i_data) + + # now pipe2 connections (RHS) + m.d.comb += self.o_n_stb.eq(self.pipe2.o_n_stb) + m.d.comb += self.pipe2.i_n_busy.eq(self.i_n_busy) + m.d.comb += self.o_data.eq(self.pipe2.o_data) + + return m if __name__ == '__main__': dut = BufPipe() run_simulation(dut, testbench(dut), vcd_name="test_bufpipe.vcd") + dut = BufPipe2() + run_simulation(dut, testbench2(dut), vcd_name="test_bufpipe2.vcd") -- 2.30.2