X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fadd%2Ftest_buf_pipe.py;h=b84a66042607551e122d3569d1f5e94f12aa27dc;hb=25357c032b55274ce620a331ecc1dc0874f5fdac;hp=c46685acb9f0651005ecbf52ec35f82659219bec;hpb=14559d0d0edaee06af261a04ed0a33a5bd1e0479;p=ieee754fpu.git diff --git a/src/add/test_buf_pipe.py b/src/add/test_buf_pipe.py index c46685ac..b84a6604 100644 --- a/src/add/test_buf_pipe.py +++ b/src/add/test_buf_pipe.py @@ -1,78 +1,348 @@ -""" nmigen implementation of buffered pipeline stage, based on zipcpu: - https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html - - this module requires quite a bit of thought to understand how it works - (and why it is needed in the first place). reading the above is - *strongly* recommended. - - unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires - the STB / ACK signals to raise and lower (on separate clocks) before - data may proceeed (thus only allowing one piece of data to proceed - on *ALTERNATE* cycles), the signalling here is a true pipeline - where data will flow on *every* clock when the conditions are right. - - input acceptance conditions are when: - * incoming previous-stage strobe (i_p_stb) is HIGH - * outgoing previous-stage busy (o_p_busy) is LOW - - output transmission conditions are when: - * outgoing next-stage strobe (o_n_stb) is HIGH - * outgoing next-stage busy (i_n_busy) is LOW - - the tricky bit is when the input has valid data and the output is not - ready to accept it. if it wasn't for the clock synchronisation, it - would be possible to tell the input "hey don't send that data, we're - not ready". unfortunately, it's not possible to "change the past": - the previous stage *has no choice* but to pass on its data. - - therefore, the incoming data *must* be accepted - and stored. - on the same clock, it's possible to tell the input that it must - not send any more data. this is the "stall" condition. - - we now effectively have *two* possible pieces of data to "choose" from: - the buffered data, and the incoming data. the decision as to which - to process and output is based on whether we are in "stall" or not. - i.e. when the next stage is no longer busy, the output comes from - the buffer if a stall had previously occurred, otherwise it comes - direct from processing the input. - - it's quite a complex state machine! -""" - +from nmigen import Module, Signal, Mux from nmigen.compat.sim import run_simulation -from example_buf_pipe import BufPipe +from example_buf_pipe import ExampleBufPipe, ExampleBufPipeAdd +from example_buf_pipe import ExampleCombPipe, CombPipe +from example_buf_pipe import PrevControl, NextControl +from random import randint + + +def check_o_n_valid(dut, val): + o_n_valid = yield dut.n.o_valid + assert o_n_valid == val def testbench(dut): #yield dut.i_p_rst.eq(1) - yield dut.i_n_busy.eq(1) - yield dut.o_p_busy.eq(1) + yield dut.n.i_ready.eq(0) + yield dut.p.o_ready.eq(0) yield yield #yield dut.i_p_rst.eq(0) - yield dut.i_n_busy.eq(0) - yield dut.i_data.eq(5) - yield dut.i_p_stb.eq(1) + yield dut.n.i_ready.eq(1) + yield dut.p.i_data.eq(5) + yield dut.p.i_valid.eq(1) + yield + + yield dut.p.i_data.eq(7) + yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed + yield + yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt + + yield dut.p.i_data.eq(2) + yield + yield dut.n.i_ready.eq(0) # begin going into "stall" (next stage says ready) + yield dut.p.i_data.eq(9) yield - yield dut.i_data.eq(7) + yield dut.p.i_valid.eq(0) + yield dut.p.i_data.eq(12) yield - yield dut.i_data.eq(2) + yield dut.p.i_data.eq(32) + yield dut.n.i_ready.eq(1) yield - yield dut.i_n_busy.eq(1) - yield dut.i_data.eq(9) + yield from check_o_n_valid(dut, 1) # buffer still needs to output yield - yield dut.i_p_stb.eq(0) - yield dut.i_data.eq(12) + yield from check_o_n_valid(dut, 1) # buffer still needs to output yield - yield dut.i_data.eq(32) - yield dut.i_n_busy.eq(0) + yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done. yield + + +def testbench2(dut): + #yield dut.p.i_rst.eq(1) + yield dut.n.i_ready.eq(0) + #yield dut.p.o_ready.eq(0) + yield + yield + #yield dut.p.i_rst.eq(0) + yield dut.n.i_ready.eq(1) + yield dut.p.i_data.eq(5) + yield dut.p.i_valid.eq(1) + yield + + yield dut.p.i_data.eq(7) + yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed 2 clocks + yield + yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed 2 clocks + + yield dut.p.i_data.eq(2) yield + yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt + yield dut.n.i_ready.eq(0) # begin going into "stall" (next stage says ready) + yield dut.p.i_data.eq(9) yield + yield dut.p.i_valid.eq(0) + yield dut.p.i_data.eq(12) yield + yield dut.p.i_data.eq(32) + yield dut.n.i_ready.eq(1) + yield + yield from check_o_n_valid(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done. + yield + yield + yield + + +class Test3: + def __init__(self, dut, resultfn): + self.dut = dut + self.resultfn = resultfn + self.data = [] + for i in range(num_tests): + #data.append(randint(0, 1<<16-1)) + self.data.append(i+1) + self.i = 0 + self.o = 0 + + def send(self): + while self.o != len(self.data): + send_range = randint(0, 3) + for j in range(randint(1,10)): + if send_range == 0: + send = True + else: + send = randint(0, send_range) != 0 + o_p_ready = yield self.dut.p.o_ready + if not o_p_ready: + yield + continue + if send and self.i != len(self.data): + yield self.dut.p.i_valid.eq(1) + yield self.dut.p.i_data.eq(self.data[self.i]) + self.i += 1 + else: + yield self.dut.p.i_valid.eq(0) + yield + + def rcv(self): + while self.o != len(self.data): + stall_range = randint(0, 3) + for j in range(randint(1,10)): + stall = randint(0, stall_range) != 0 + yield self.dut.n.i_ready.eq(stall) + yield + o_n_valid = yield self.dut.n.o_valid + i_n_ready = yield self.dut.n.i_ready + if not o_n_valid or not i_n_ready: + continue + o_data = yield self.dut.n.o_data + self.resultfn(o_data, self.data[self.o], self.i, self.o) + self.o += 1 + if self.o == len(self.data): + break + +def test3_resultfn(o_data, expected, i, o): + assert o_data == expected + 1, \ + "%d-%d data %x not match %x\n" \ + % (i, o, o_data, expected) + +class Test5: + def __init__(self, dut, resultfn): + self.dut = dut + self.resultfn = resultfn + self.data = [] + for i in range(num_tests): + self.data.append((randint(0, 1<<16-1), randint(0, 1<<16-1))) + self.i = 0 + self.o = 0 + + def send(self): + while self.o != len(self.data): + send_range = randint(0, 3) + for j in range(randint(1,10)): + if send_range == 0: + send = True + else: + send = randint(0, send_range) != 0 + o_p_ready = yield self.dut.p.o_ready + if not o_p_ready: + yield + continue + if send and self.i != len(self.data): + yield self.dut.p.i_valid.eq(1) + for v in self.dut.set_input(self.data[self.i]): + yield v + self.i += 1 + else: + yield self.dut.p.i_valid.eq(0) + yield + + def rcv(self): + while self.o != len(self.data): + stall_range = randint(0, 3) + for j in range(randint(1,10)): + stall = randint(0, stall_range) != 0 + yield self.dut.n.i_ready.eq(stall) + yield + o_n_valid = yield self.dut.n.o_valid + i_n_ready = yield self.dut.n.i_ready + if not o_n_valid or not i_n_ready: + continue + o_data = yield self.dut.n.o_data + self.resultfn(o_data, self.data[self.o], self.i, self.o) + self.o += 1 + if self.o == len(self.data): + break +def test5_resultfn(o_data, expected, i, o): + res = expected[0] + expected[1] + assert o_data == res, \ + "%d-%d data %x not match %s\n" \ + % (i, o, o_data, repr(expected)) + +def testbench4(dut): + data = [] + for i in range(num_tests): + #data.append(randint(0, 1<<16-1)) + data.append(i+1) + i = 0 + o = 0 + while True: + stall = randint(0, 3) != 0 + send = randint(0, 5) != 0 + yield dut.n.i_ready.eq(stall) + o_p_ready = yield dut.p.o_ready + if o_p_ready: + if send and i != len(data): + yield dut.p.i_valid.eq(1) + yield dut.p.i_data.eq(data[i]) + i += 1 + else: + yield dut.p.i_valid.eq(0) + yield + o_n_valid = yield dut.n.o_valid + i_n_ready = yield dut.n.i_ready + if o_n_valid and i_n_ready: + o_data = yield dut.n.o_data + assert o_data == data[o] + 2, "%d-%d data %x not match %x\n" \ + % (i, o, o_data, data[o]) + o += 1 + if o == len(data): + break + + +class ExampleBufPipe2: + """ + connect these: ------|---------------| + v v + i_p_valid >>in pipe1 o_n_valid out>> i_p_valid >>in pipe2 + o_p_ready <>in pipe1 p_i_data out>> n_o_data >>in pipe2 + """ + def __init__(self): + self.pipe1 = ExampleBufPipe() + self.pipe2 = ExampleBufPipe() + + # input + self.p = PrevControl() + self.p.i_data = Signal(32) # >>in - comes in from the PREVIOUS stage + + # output + self.n = NextControl() + self.n.o_data = Signal(32) # out>> - goes out to the NEXT stage + + def elaborate(self, platform): + m = Module() + m.submodules.pipe1 = self.pipe1 + m.submodules.pipe2 = self.pipe2 + + # connect inter-pipe input/output valid/ready/data + m.d.comb += self.pipe1.connect_to_next(self.pipe2) + + # inputs/outputs to the module: pipe1 connections here (LHS) + m.d.comb += self.pipe1.connect_in(self) + + # now pipe2 connections (RHS) + m.d.comb += self.pipe2.connect_out(self) + + return m + +class SetLessThan: + def __init__(self, width, signed): + self.src1 = Signal((width, signed)) + self.src2 = Signal((width, signed)) + self.output = Signal(width) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.output.eq(Mux(self.src1 < self.src2, 1, 0)) + return m + + +class LTStage: + def __init__(self): + self.slt = SetLessThan(16, True) + + def ispec(self): + return (Signal(16), Signal(16)) + + def ospec(self): + return Signal(16) + + def setup(self, m, i): + self.o = Signal(16) + m.submodules.slt = self.slt + m.d.comb += self.slt.src1.eq(i[0]) + m.d.comb += self.slt.src2.eq(i[1]) + m.d.comb += self.o.eq(self.slt.output) + + def process(self, i): + return self.o + + +class ExampleLTCombPipe(CombPipe): + """ an example of how to use the combinatorial pipeline. + """ + + def __init__(self): + stage = LTStage() + CombPipe.__init__(self, stage) + + +def test6_resultfn(o_data, expected, i, o): + res = 1 if expected[0] < expected[1] else 0 + assert o_data == res, \ + "%d-%d data %x not match %s\n" \ + % (i, o, o_data, repr(expected)) + + +num_tests = 1000 if __name__ == '__main__': - dut = BufPipe() + print ("test 1") + dut = ExampleBufPipe() run_simulation(dut, testbench(dut), vcd_name="test_bufpipe.vcd") + print ("test 2") + dut = ExampleBufPipe2() + run_simulation(dut, testbench2(dut), vcd_name="test_bufpipe2.vcd") + + print ("test 3") + dut = ExampleBufPipe() + test = Test3(dut, test3_resultfn) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe3.vcd") + + print ("test 3.5") + dut = ExampleCombPipe() + test = Test3(dut, test3_resultfn) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_combpipe3.vcd") + + print ("test 4") + dut = ExampleBufPipe2() + run_simulation(dut, testbench4(dut), vcd_name="test_bufpipe4.vcd") + + print ("test 5") + dut = ExampleBufPipeAdd() + test = Test5(dut, test5_resultfn) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe5.vcd") + + print ("test 6") + dut = ExampleLTCombPipe() + test = Test5(dut, test6_resultfn) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltcomb6.vcd") +