X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fadd%2Ftest_buf_pipe.py;h=e38043ecd7775959e65a15341c83b5636a895748;hb=f3945d1317aa236b752c1e286801caa2c3a07703;hp=c46685acb9f0651005ecbf52ec35f82659219bec;hpb=14559d0d0edaee06af261a04ed0a33a5bd1e0479;p=ieee754fpu.git diff --git a/src/add/test_buf_pipe.py b/src/add/test_buf_pipe.py index c46685ac..e38043ec 100644 --- a/src/add/test_buf_pipe.py +++ b/src/add/test_buf_pipe.py @@ -1,78 +1,539 @@ -""" nmigen implementation of buffered pipeline stage, based on zipcpu: - https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html - - this module requires quite a bit of thought to understand how it works - (and why it is needed in the first place). reading the above is - *strongly* recommended. - - unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires - the STB / ACK signals to raise and lower (on separate clocks) before - data may proceeed (thus only allowing one piece of data to proceed - on *ALTERNATE* cycles), the signalling here is a true pipeline - where data will flow on *every* clock when the conditions are right. - - input acceptance conditions are when: - * incoming previous-stage strobe (i_p_stb) is HIGH - * outgoing previous-stage busy (o_p_busy) is LOW - - output transmission conditions are when: - * outgoing next-stage strobe (o_n_stb) is HIGH - * outgoing next-stage busy (i_n_busy) is LOW - - the tricky bit is when the input has valid data and the output is not - ready to accept it. if it wasn't for the clock synchronisation, it - would be possible to tell the input "hey don't send that data, we're - not ready". unfortunately, it's not possible to "change the past": - the previous stage *has no choice* but to pass on its data. - - therefore, the incoming data *must* be accepted - and stored. - on the same clock, it's possible to tell the input that it must - not send any more data. this is the "stall" condition. - - we now effectively have *two* possible pieces of data to "choose" from: - the buffered data, and the incoming data. the decision as to which - to process and output is based on whether we are in "stall" or not. - i.e. when the next stage is no longer busy, the output comes from - the buffer if a stall had previously occurred, otherwise it comes - direct from processing the input. - - it's quite a complex state machine! -""" - +from nmigen import Module, Signal, Mux +from nmigen.hdl.rec import Record from nmigen.compat.sim import run_simulation -from example_buf_pipe import BufPipe +from nmigen.cli import verilog, rtlil + +from example_buf_pipe import ExampleBufPipe, ExampleBufPipeAdd +from example_buf_pipe import ExampleCombPipe, CombPipe, ExampleStageCls +from example_buf_pipe import PrevControl, NextControl, BufferedPipeline +from example_buf_pipe import StageChain + +from random import randint + + +def check_o_n_valid(dut, val): + o_n_valid = yield dut.n.o_valid + assert o_n_valid == val def testbench(dut): #yield dut.i_p_rst.eq(1) - yield dut.i_n_busy.eq(1) - yield dut.o_p_busy.eq(1) + yield dut.n.i_ready.eq(0) + yield dut.p.o_ready.eq(0) yield yield #yield dut.i_p_rst.eq(0) - yield dut.i_n_busy.eq(0) - yield dut.i_data.eq(5) - yield dut.i_p_stb.eq(1) + yield dut.n.i_ready.eq(1) + yield dut.p.i_data.eq(5) + yield dut.p.i_valid.eq(1) + yield + + yield dut.p.i_data.eq(7) + yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed yield - yield dut.i_data.eq(7) + yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt + + yield dut.p.i_data.eq(2) + yield + yield dut.n.i_ready.eq(0) # begin going into "stall" (next stage says ready) + yield dut.p.i_data.eq(9) yield - yield dut.i_data.eq(2) + yield dut.p.i_valid.eq(0) + yield dut.p.i_data.eq(12) yield - yield dut.i_n_busy.eq(1) - yield dut.i_data.eq(9) + yield dut.p.i_data.eq(32) + yield dut.n.i_ready.eq(1) yield - yield dut.i_p_stb.eq(0) - yield dut.i_data.eq(12) + yield from check_o_n_valid(dut, 1) # buffer still needs to output yield - yield dut.i_data.eq(32) - yield dut.i_n_busy.eq(0) + yield from check_o_n_valid(dut, 1) # buffer still needs to output yield + yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done. yield + + +def testbench2(dut): + #yield dut.p.i_rst.eq(1) + yield dut.n.i_ready.eq(0) + #yield dut.p.o_ready.eq(0) yield yield + #yield dut.p.i_rst.eq(0) + yield dut.n.i_ready.eq(1) + yield dut.p.i_data.eq(5) + yield dut.p.i_valid.eq(1) + yield + + yield dut.p.i_data.eq(7) + yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed 2 clocks + yield + yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed 2 clocks + + yield dut.p.i_data.eq(2) + yield + yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt + yield dut.n.i_ready.eq(0) # begin going into "stall" (next stage says ready) + yield dut.p.i_data.eq(9) + yield + yield dut.p.i_valid.eq(0) + yield dut.p.i_data.eq(12) + yield + yield dut.p.i_data.eq(32) + yield dut.n.i_ready.eq(1) + yield + yield from check_o_n_valid(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid(dut, 1) # buffer still needs to output + yield + yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done. + yield + yield + yield + + +class Test3: + def __init__(self, dut, resultfn): + self.dut = dut + self.resultfn = resultfn + self.data = [] + for i in range(num_tests): + #data.append(randint(0, 1<<16-1)) + self.data.append(i+1) + self.i = 0 + self.o = 0 + + def send(self): + while self.o != len(self.data): + send_range = randint(0, 3) + for j in range(randint(1,10)): + if send_range == 0: + send = True + else: + send = randint(0, send_range) != 0 + o_p_ready = yield self.dut.p.o_ready + if not o_p_ready: + yield + continue + if send and self.i != len(self.data): + yield self.dut.p.i_valid.eq(1) + yield self.dut.p.i_data.eq(self.data[self.i]) + self.i += 1 + else: + yield self.dut.p.i_valid.eq(0) + yield + + def rcv(self): + while self.o != len(self.data): + stall_range = randint(0, 3) + for j in range(randint(1,10)): + stall = randint(0, stall_range) != 0 + yield self.dut.n.i_ready.eq(stall) + yield + o_n_valid = yield self.dut.n.o_valid + i_n_ready = yield self.dut.n.i_ready + if not o_n_valid or not i_n_ready: + continue + o_data = yield self.dut.n.o_data + self.resultfn(o_data, self.data[self.o], self.i, self.o) + self.o += 1 + if self.o == len(self.data): + break + +def test3_resultfn(o_data, expected, i, o): + assert o_data == expected + 1, \ + "%d-%d data %x not match %x\n" \ + % (i, o, o_data, expected) + +def data_dict(): + data = [] + for i in range(num_tests): + data.append({'src1': randint(0, 1<<16-1), + 'src2': randint(0, 1<<16-1)}) + return data + + +class Test5: + def __init__(self, dut, resultfn, data=None): + self.dut = dut + self.resultfn = resultfn + if data: + self.data = data + else: + self.data = [] + for i in range(num_tests): + self.data.append((randint(0, 1<<16-1), randint(0, 1<<16-1))) + self.i = 0 + self.o = 0 + + def send(self): + while self.o != len(self.data): + send_range = randint(0, 3) + for j in range(randint(1,10)): + if send_range == 0: + send = True + else: + send = randint(0, send_range) != 0 + o_p_ready = yield self.dut.p.o_ready + if not o_p_ready: + yield + continue + if send and self.i != len(self.data): + yield self.dut.p.i_valid.eq(1) + for v in self.dut.set_input(self.data[self.i]): + yield v + self.i += 1 + else: + yield self.dut.p.i_valid.eq(0) + yield + + def rcv(self): + while self.o != len(self.data): + stall_range = randint(0, 3) + for j in range(randint(1,10)): + stall = randint(0, stall_range) != 0 + yield self.dut.n.i_ready.eq(stall) + yield + o_n_valid = yield self.dut.n.o_valid + i_n_ready = yield self.dut.n.i_ready + if not o_n_valid or not i_n_ready: + continue + if isinstance(self.dut.n.o_data, Record): + o_data = {} + dod = self.dut.n.o_data + for k, v in dod.fields.items(): + o_data[k] = yield v + else: + o_data = yield self.dut.n.o_data + self.resultfn(o_data, self.data[self.o], self.i, self.o) + self.o += 1 + if self.o == len(self.data): + break + +def test5_resultfn(o_data, expected, i, o): + res = expected[0] + expected[1] + assert o_data == res, \ + "%d-%d data %x not match %s\n" \ + % (i, o, o_data, repr(expected)) + +def testbench4(dut): + data = [] + for i in range(num_tests): + #data.append(randint(0, 1<<16-1)) + data.append(i+1) + i = 0 + o = 0 + while True: + stall = randint(0, 3) != 0 + send = randint(0, 5) != 0 + yield dut.n.i_ready.eq(stall) + o_p_ready = yield dut.p.o_ready + if o_p_ready: + if send and i != len(data): + yield dut.p.i_valid.eq(1) + yield dut.p.i_data.eq(data[i]) + i += 1 + else: + yield dut.p.i_valid.eq(0) + yield + o_n_valid = yield dut.n.o_valid + i_n_ready = yield dut.n.i_ready + if o_n_valid and i_n_ready: + o_data = yield dut.n.o_data + assert o_data == data[o] + 2, "%d-%d data %x not match %x\n" \ + % (i, o, o_data, data[o]) + o += 1 + if o == len(data): + break + + +class ExampleBufPipe2: + """ + connect these: ------|---------------| + v v + i_p_valid >>in pipe1 o_n_valid out>> i_p_valid >>in pipe2 + o_p_ready <>in pipe1 p_i_data out>> n_o_data >>in pipe2 + """ + def __init__(self): + self.pipe1 = ExampleBufPipe() + self.pipe2 = ExampleBufPipe() + + # input + self.p = PrevControl() + self.p.i_data = Signal(32) # >>in - comes in from the PREVIOUS stage + + # output + self.n = NextControl() + self.n.o_data = Signal(32) # out>> - goes out to the NEXT stage + + def elaborate(self, platform): + m = Module() + m.submodules.pipe1 = self.pipe1 + m.submodules.pipe2 = self.pipe2 + + # connect inter-pipe input/output valid/ready/data + m.d.comb += self.pipe1.connect_to_next(self.pipe2) + + # inputs/outputs to the module: pipe1 connections here (LHS) + m.d.comb += self.pipe1.connect_in(self) + + # now pipe2 connections (RHS) + m.d.comb += self.pipe2.connect_out(self) + + return m + + +class ExampleBufPipeChain2(BufferedPipeline): + """ connects two stages together as a *single* combinatorial stage. + """ + def __init__(self): + stage1 = ExampleStageCls() + stage2 = ExampleStageCls() + combined = StageChain([stage1, stage2]) + BufferedPipeline.__init__(self, combined) + + +def data_chain2(): + data = [] + for i in range(num_tests): + data.append(randint(0, 1<<16-2)) + return data + + +def test9_resultfn(o_data, expected, i, o): + res = expected + 2 + assert o_data == res, \ + "%d-%d data %x not match %s\n" \ + % (i, o, o_data, repr(expected)) + +class SetLessThan: + def __init__(self, width, signed): + self.src1 = Signal((width, signed)) + self.src2 = Signal((width, signed)) + self.output = Signal(width) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.output.eq(Mux(self.src1 < self.src2, 1, 0)) + return m + + +class LTStage: + def __init__(self): + self.slt = SetLessThan(16, True) + + def ispec(self): + return (Signal(16), Signal(16)) + + def ospec(self): + return Signal(16) + + def setup(self, m, i): + self.o = Signal(16) + m.submodules.slt = self.slt + m.d.comb += self.slt.src1.eq(i[0]) + m.d.comb += self.slt.src2.eq(i[1]) + m.d.comb += self.o.eq(self.slt.output) + + def process(self, i): + return self.o + + +class ExampleLTCombPipe(CombPipe): + """ an example of how to use the combinatorial pipeline. + """ + + def __init__(self): + stage = LTStage() + CombPipe.__init__(self, stage) + + +def test6_resultfn(o_data, expected, i, o): + res = 1 if expected[0] < expected[1] else 0 + assert o_data == res, \ + "%d-%d data %x not match %s\n" \ + % (i, o, o_data, repr(expected)) + + +class ExampleAddRecordStage: + """ example use of a Record + """ + + record_spec = [('src1', 16), ('src2', 16)] + def ispec(self): + """ returns a tuple of input signals which will be the incoming data + """ + return Record(self.record_spec) + + def ospec(self): + return Record(self.record_spec) + + def process(self, i): + """ process the input data (sums the values in the tuple) and returns it + """ + return {'src1': i.src1 + 1, + 'src2': i.src2 + 1} + + +class ExampleAddRecordPipe(CombPipe): + """ an example of how to use the combinatorial pipeline. + """ + + def __init__(self): + stage = ExampleAddRecordStage() + CombPipe.__init__(self, stage) + + +def test7_resultfn(o_data, expected, i, o): + res = (expected['src1'] + 1, expected['src2'] + 1) + assert o_data['src1'] == res[0] and o_data['src2'] == res[1], \ + "%d-%d data %s not match %s\n" \ + % (i, o, repr(o_data), repr(expected)) + + +class Example2OpClass: + """ an example of a class used to store 2 operands. + requires an eq function, to conform with the pipeline stage API + """ + + def __init__(self): + self.op1 = Signal(16) + self.op2 = Signal(16) + + def eq(self, i): + return [self.op1.eq(i.op1), self.op2.eq(i.op2)] + + +class ExampleAddClassStage: + """ an example of how to use the buffered pipeline, as a class instance + """ + + def ispec(self): + """ returns an instance of an Example2OpClass. + """ + return Example2OpClass() + + def ospec(self): + """ returns an output signal which will happen to contain the sum + of the two inputs + """ + return Signal(16) + + def process(self, i): + """ process the input data (sums the values in the tuple) and returns it + """ + return i.op1 + i.op2 + + +class ExampleBufPipeAddClass(BufferedPipeline): + """ an example of how to use the buffered pipeline, using a class instance + """ + + def __init__(self): + addstage = ExampleAddClassStage() + BufferedPipeline.__init__(self, addstage) + + +class TestInputAdd: + """ the eq function, called by set_input, needs an incoming object + that conforms to the Example2OpClass.eq function requirements + easiest way to do that is to create a class that has the exact + same member layout (self.op1, self.op2) as Example2OpClass + """ + def __init__(self, op1, op2): + self.op1 = op1 + self.op2 = op2 + + +def test8_resultfn(o_data, expected, i, o): + res = expected.op1 + expected.op2 # these are a TestInputAdd instance + assert o_data == res, \ + "%d-%d data %x not match %s\n" \ + % (i, o, o_data, repr(expected)) + +def data_2op(): + data = [] + for i in range(num_tests): + data.append(TestInputAdd(randint(0, 1<<16-1), randint(0, 1<<16-1))) + return data + + +num_tests = 100 if __name__ == '__main__': - dut = BufPipe() + print ("test 1") + dut = ExampleBufPipe() run_simulation(dut, testbench(dut), vcd_name="test_bufpipe.vcd") + print ("test 2") + dut = ExampleBufPipe2() + run_simulation(dut, testbench2(dut), vcd_name="test_bufpipe2.vcd") + + print ("test 3") + dut = ExampleBufPipe() + test = Test3(dut, test3_resultfn) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe3.vcd") + + print ("test 3.5") + dut = ExampleCombPipe() + test = Test3(dut, test3_resultfn) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_combpipe3.vcd") + + print ("test 4") + dut = ExampleBufPipe2() + run_simulation(dut, testbench4(dut), vcd_name="test_bufpipe4.vcd") + + print ("test 5") + dut = ExampleBufPipeAdd() + test = Test5(dut, test5_resultfn) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe5.vcd") + + print ("test 6") + dut = ExampleLTCombPipe() + test = Test5(dut, test6_resultfn) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltcomb6.vcd") + + ports = [dut.p.i_valid, dut.n.i_ready, + dut.n.o_valid, dut.p.o_ready] + \ + list(dut.p.i_data) + [dut.n.o_data] + vl = rtlil.convert(dut, ports=ports) + with open("test_ltcomb_pipe.il", "w") as f: + f.write(vl) + + print ("test 7") + dut = ExampleAddRecordPipe() + data=data_dict() + test = Test5(dut, test7_resultfn, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_addrecord.vcd") + + ports = [dut.p.i_valid, dut.n.i_ready, + dut.n.o_valid, dut.p.o_ready, + dut.p.i_data.src1, dut.p.i_data.src2, + dut.n.o_data.src1, dut.n.o_data.src2] + vl = rtlil.convert(dut, ports=ports) + with open("test_recordcomb_pipe.il", "w") as f: + f.write(vl) + + print ("test 8") + dut = ExampleBufPipeAddClass() + data=data_2op() + test = Test5(dut, test8_resultfn, data=data) + run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe8.vcd") + + print ("test 9") + dut = ExampleBufPipeChain2() + ports = [dut.p.i_valid, dut.n.i_ready, + dut.n.o_valid, dut.p.o_ready] + \ + [dut.p.i_data] + [dut.n.o_data] + vl = rtlil.convert(dut, ports=ports) + with open("test_bufpipechain2.il", "w") as f: + f.write(vl) + + data = data_chain2() + test = Test5(dut, test9_resultfn, data=data) + run_simulation(dut, [test.send, test.rcv], + vcd_name="test_bufpipechain2.vcd") +