output less-than test to ilang

[ieee754fpu.git] / src / add / test_buf_pipe.py
diff --git a/src/add/test_buf_pipe.py b/src/add/test_buf_pipe.py

index c46685acb9f0651005ecbf52ec35f82659219bec..e32e2d322bc87cab445f677d0067376ce0c6b5aa 100644 (file)
--- a/src/add/test_buf_pipe.py
+++ b/src/add/test_buf_pipe.py
@@ -1,78 +1,357 @@
-""" nmigen implementation of buffered pipeline stage, based on zipcpu:
-    https://zipcpu.com/blog/2017/08/14/strategies-for-pipelining.html
-
-    this module requires quite a bit of thought to understand how it works
-    (and why it is needed in the first place).  reading the above is
-    *strongly* recommended.
-
-    unlike john dawson's IEEE754 FPU STB/ACK signalling, which requires
-    the STB / ACK signals to raise and lower (on separate clocks) before
-    data may proceeed (thus only allowing one piece of data to proceed
-    on *ALTERNATE* cycles), the signalling here is a true pipeline
-    where data will flow on *every* clock when the conditions are right.
-
-    input acceptance conditions are when:
-        * incoming previous-stage strobe (i_p_stb) is HIGH
-        * outgoing previous-stage busy   (o_p_busy) is LOW
-
-    output transmission conditions are when:
-        * outgoing next-stage strobe (o_n_stb) is HIGH
-        * outgoing next-stage busy   (i_n_busy) is LOW
-
-    the tricky bit is when the input has valid data and the output is not
-    ready to accept it.  if it wasn't for the clock synchronisation, it
-    would be possible to tell the input "hey don't send that data, we're
-    not ready".  unfortunately, it's not possible to "change the past":
-    the previous stage *has no choice* but to pass on its data.
-
-    therefore, the incoming data *must* be accepted - and stored.
-    on the same clock, it's possible to tell the input that it must
-    not send any more data.  this is the "stall" condition.
-
-    we now effectively have *two* possible pieces of data to "choose" from:
-    the buffered data, and the incoming data.  the decision as to which
-    to process and output is based on whether we are in "stall" or not.
-    i.e. when the next stage is no longer busy, the output comes from
-    the buffer if a stall had previously occurred, otherwise it comes
-    direct from processing the input.
-
-    it's quite a complex state machine!
-"""
-
+from nmigen import Module, Signal, Mux
  from nmigen.compat.sim import run_simulation
-from example_buf_pipe import BufPipe
+from nmigen.cli import verilog, rtlil
+
+from example_buf_pipe import ExampleBufPipe, ExampleBufPipeAdd
+from example_buf_pipe import ExampleCombPipe, CombPipe
+from example_buf_pipe import PrevControl, NextControl
+from random import randint
+
+
+def check_o_n_valid(dut, val):
+    o_n_valid = yield dut.n.o_valid
+    assert o_n_valid == val
  
  
  def testbench(dut):
      #yield dut.i_p_rst.eq(1)
-    yield dut.i_n_busy.eq(1)
-    yield dut.o_p_busy.eq(1)
+    yield dut.n.i_ready.eq(0)
+    yield dut.p.o_ready.eq(0)
      yield
      yield
      #yield dut.i_p_rst.eq(0)
-    yield dut.i_n_busy.eq(0)
-    yield dut.i_data.eq(5)
-    yield dut.i_p_stb.eq(1)
+    yield dut.n.i_ready.eq(1)
+    yield dut.p.i_data.eq(5)
+    yield dut.p.i_valid.eq(1)
+    yield
+
+    yield dut.p.i_data.eq(7)
+    yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed
+    yield
+    yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt
+
+    yield dut.p.i_data.eq(2)
+    yield
+    yield dut.n.i_ready.eq(0) # begin going into "stall" (next stage says ready)
+    yield dut.p.i_data.eq(9)
+    yield
+    yield dut.p.i_valid.eq(0)
+    yield dut.p.i_data.eq(12)
+    yield
+    yield dut.p.i_data.eq(32)
+    yield dut.n.i_ready.eq(1)
+    yield
+    yield from check_o_n_valid(dut, 1) # buffer still needs to output
+    yield
+    yield from check_o_n_valid(dut, 1) # buffer still needs to output
+    yield
+    yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done.
+    yield
+
+
+def testbench2(dut):
+    #yield dut.p.i_rst.eq(1)
+    yield dut.n.i_ready.eq(0)
+    #yield dut.p.o_ready.eq(0)
+    yield
+    yield
+    #yield dut.p.i_rst.eq(0)
+    yield dut.n.i_ready.eq(1)
+    yield dut.p.i_data.eq(5)
+    yield dut.p.i_valid.eq(1)
+    yield
+
+    yield dut.p.i_data.eq(7)
+    yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed 2 clocks
+    yield
+    yield from check_o_n_valid(dut, 0) # effects of i_p_valid delayed 2 clocks
+
+    yield dut.p.i_data.eq(2)
+    yield
+    yield from check_o_n_valid(dut, 1) # ok *now* i_p_valid effect is felt
+    yield dut.n.i_ready.eq(0) # begin going into "stall" (next stage says ready)
+    yield dut.p.i_data.eq(9)
      yield
-    yield dut.i_data.eq(7)
+    yield dut.p.i_valid.eq(0)
+    yield dut.p.i_data.eq(12)
      yield
-    yield dut.i_data.eq(2)
+    yield dut.p.i_data.eq(32)
+    yield dut.n.i_ready.eq(1)
      yield
-    yield dut.i_n_busy.eq(1)
-    yield dut.i_data.eq(9)
+    yield from check_o_n_valid(dut, 1) # buffer still needs to output
      yield
-    yield dut.i_p_stb.eq(0)
-    yield dut.i_data.eq(12)
+    yield from check_o_n_valid(dut, 1) # buffer still needs to output
      yield
-    yield dut.i_data.eq(32)
-    yield dut.i_n_busy.eq(0)
+    yield from check_o_n_valid(dut, 1) # buffer still needs to output
      yield
+    yield from check_o_n_valid(dut, 0) # buffer outputted, *now* we're done.
      yield
      yield
      yield
  
  
+class Test3:
+    def __init__(self, dut, resultfn):
+        self.dut = dut
+        self.resultfn = resultfn
+        self.data = []
+        for i in range(num_tests):
+            #data.append(randint(0, 1<<16-1))
+            self.data.append(i+1)
+        self.i = 0
+        self.o = 0
+
+    def send(self):
+        while self.o != len(self.data):
+            send_range = randint(0, 3)
+            for j in range(randint(1,10)):
+                if send_range == 0:
+                    send = True
+                else:
+                    send = randint(0, send_range) != 0
+                o_p_ready = yield self.dut.p.o_ready
+                if not o_p_ready:
+                    yield
+                    continue
+                if send and self.i != len(self.data):
+                    yield self.dut.p.i_valid.eq(1)
+                    yield self.dut.p.i_data.eq(self.data[self.i])
+                    self.i += 1
+                else:
+                    yield self.dut.p.i_valid.eq(0)
+                yield
+
+    def rcv(self):
+        while self.o != len(self.data):
+            stall_range = randint(0, 3)
+            for j in range(randint(1,10)):
+                stall = randint(0, stall_range) != 0
+                yield self.dut.n.i_ready.eq(stall)
+                yield
+                o_n_valid = yield self.dut.n.o_valid
+                i_n_ready = yield self.dut.n.i_ready
+                if not o_n_valid or not i_n_ready:
+                    continue
+                o_data = yield self.dut.n.o_data
+                self.resultfn(o_data, self.data[self.o], self.i, self.o)
+                self.o += 1
+                if self.o == len(self.data):
+                    break
+
+def test3_resultfn(o_data, expected, i, o):
+    assert o_data == expected + 1, \
+                "%d-%d data %x not match %x\n" \
+                % (i, o, o_data, expected)
+
+class Test5:
+    def __init__(self, dut, resultfn):
+        self.dut = dut
+        self.resultfn = resultfn
+        self.data = []
+        for i in range(num_tests):
+            self.data.append((randint(0, 1<<16-1), randint(0, 1<<16-1)))
+        self.i = 0
+        self.o = 0
+
+    def send(self):
+        while self.o != len(self.data):
+            send_range = randint(0, 3)
+            for j in range(randint(1,10)):
+                if send_range == 0:
+                    send = True
+                else:
+                    send = randint(0, send_range) != 0
+                o_p_ready = yield self.dut.p.o_ready
+                if not o_p_ready:
+                    yield
+                    continue
+                if send and self.i != len(self.data):
+                    yield self.dut.p.i_valid.eq(1)
+                    for v in self.dut.set_input(self.data[self.i]):
+                        yield v
+                    self.i += 1
+                else:
+                    yield self.dut.p.i_valid.eq(0)
+                yield
+
+    def rcv(self):
+        while self.o != len(self.data):
+            stall_range = randint(0, 3)
+            for j in range(randint(1,10)):
+                stall = randint(0, stall_range) != 0
+                yield self.dut.n.i_ready.eq(stall)
+                yield
+                o_n_valid = yield self.dut.n.o_valid
+                i_n_ready = yield self.dut.n.i_ready
+                if not o_n_valid or not i_n_ready:
+                    continue
+                o_data = yield self.dut.n.o_data
+                self.resultfn(o_data, self.data[self.o], self.i, self.o)
+                self.o += 1
+                if self.o == len(self.data):
+                    break
+
+def test5_resultfn(o_data, expected, i, o):
+    res = expected[0] + expected[1]
+    assert o_data == res, \
+                "%d-%d data %x not match %s\n" \
+                % (i, o, o_data, repr(expected))
+
+def testbench4(dut):
+    data = []
+    for i in range(num_tests):
+        #data.append(randint(0, 1<<16-1))
+        data.append(i+1)
+    i = 0
+    o = 0
+    while True:
+        stall = randint(0, 3) != 0
+        send = randint(0, 5) != 0
+        yield dut.n.i_ready.eq(stall)
+        o_p_ready = yield dut.p.o_ready
+        if o_p_ready:
+            if send and i != len(data):
+                yield dut.p.i_valid.eq(1)
+                yield dut.p.i_data.eq(data[i])
+                i += 1
+            else:
+                yield dut.p.i_valid.eq(0)
+        yield
+        o_n_valid = yield dut.n.o_valid
+        i_n_ready = yield dut.n.i_ready
+        if o_n_valid and i_n_ready:
+            o_data = yield dut.n.o_data
+            assert o_data == data[o] + 2, "%d-%d data %x not match %x\n" \
+                                        % (i, o, o_data, data[o])
+            o += 1
+            if o == len(data):
+                break
+
+
+class ExampleBufPipe2:
+    """
+        connect these:  ------|---------------|
+                              v               v
+        i_p_valid >>in  pipe1 o_n_valid out>> i_p_valid >>in  pipe2
+        o_p_ready <<out pipe1 i_n_ready <<in  o_p_ready <<out pipe2
+        p_i_data  >>in  pipe1 p_i_data  out>> n_o_data  >>in  pipe2
+    """
+    def __init__(self):
+        self.pipe1 = ExampleBufPipe()
+        self.pipe2 = ExampleBufPipe()
+
+        # input
+        self.p = PrevControl()
+        self.p.i_data = Signal(32) # >>in - comes in from the PREVIOUS stage
+
+        # output
+        self.n = NextControl()
+        self.n.o_data = Signal(32) # out>> - goes out to the NEXT stage
+
+    def elaborate(self, platform):
+        m = Module()
+        m.submodules.pipe1 = self.pipe1
+        m.submodules.pipe2 = self.pipe2
+
+        # connect inter-pipe input/output valid/ready/data
+        m.d.comb += self.pipe1.connect_to_next(self.pipe2)
+
+        # inputs/outputs to the module: pipe1 connections here (LHS)
+        m.d.comb += self.pipe1.connect_in(self)
+
+        # now pipe2 connections (RHS)
+        m.d.comb += self.pipe2.connect_out(self)
+
+        return m
+
+class SetLessThan:
+    def __init__(self, width, signed):
+        self.src1 = Signal((width, signed))
+        self.src2 = Signal((width, signed))
+        self.output = Signal(width)
+
+    def elaborate(self, platform):
+        m = Module()
+        m.d.comb += self.output.eq(Mux(self.src1 < self.src2, 1, 0))
+        return m
+
+
+class LTStage:
+    def __init__(self):
+        self.slt = SetLessThan(16, True)
+
+    def ispec(self):
+        return (Signal(16), Signal(16))
+
+    def ospec(self):
+        return Signal(16)
+
+    def setup(self, m, i):
+        self.o = Signal(16)
+        m.submodules.slt = self.slt
+        m.d.comb += self.slt.src1.eq(i[0])
+        m.d.comb += self.slt.src2.eq(i[1])
+        m.d.comb += self.o.eq(self.slt.output)
+
+    def process(self, i):
+        return self.o
+
+
+class ExampleLTCombPipe(CombPipe):
+    """ an example of how to use the combinatorial pipeline.
+    """
+
+    def __init__(self):
+        stage = LTStage()
+        CombPipe.__init__(self, stage)
+
+
+def test6_resultfn(o_data, expected, i, o):
+    res = 1 if expected[0] < expected[1] else 0
+    assert o_data == res, \
+                "%d-%d data %x not match %s\n" \
+                % (i, o, o_data, repr(expected))
+
+
+num_tests = 1000
+
  if __name__ == '__main__':
-    dut = BufPipe()
+    print ("test 1")
+    dut = ExampleBufPipe()
      run_simulation(dut, testbench(dut), vcd_name="test_bufpipe.vcd")
  
+    print ("test 2")
+    dut = ExampleBufPipe2()
+    run_simulation(dut, testbench2(dut), vcd_name="test_bufpipe2.vcd")
+
+    print ("test 3")
+    dut = ExampleBufPipe()
+    test = Test3(dut, test3_resultfn)
+    run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe3.vcd")
+
+    print ("test 3.5")
+    dut = ExampleCombPipe()
+    test = Test3(dut, test3_resultfn)
+    run_simulation(dut, [test.send, test.rcv], vcd_name="test_combpipe3.vcd")
+
+    print ("test 4")
+    dut = ExampleBufPipe2()
+    run_simulation(dut, testbench4(dut), vcd_name="test_bufpipe4.vcd")
+
+    print ("test 5")
+    dut = ExampleBufPipeAdd()
+    test = Test5(dut, test5_resultfn)
+    run_simulation(dut, [test.send, test.rcv], vcd_name="test_bufpipe5.vcd")
+
+    print ("test 6")
+    dut = ExampleLTCombPipe()
+    test = Test5(dut, test6_resultfn)
+    run_simulation(dut, [test.send, test.rcv], vcd_name="test_ltcomb6.vcd")
+
+    ports = [dut.p.i_valid, dut.n.i_ready,
+             dut.n.o_valid, dut.p.o_ready] + \
+             list(dut.p.i_data) + [dut.n.o_data]
+    vl = rtlil.convert(dut, ports=ports)
+    with open("test_ltcomb_pipe.il", "w") as f:
+        f.write(vl)
+