From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Thu, 2 May 2019 13:08:55 +0000 (+0100)
Subject: more code-shuffling into subdirs
X-Git-Tag: ls180-24jan2020~1081
X-Git-Url: https://git.libre-soc.org/?p=ieee754fpu.git;a=commitdiff_plain;h=e71ebd7c7df6fed881f1a5cea15ae1d7b022cd28;hp=c413b537ad80d8392a19975561b18063992a1939

more code-shuffling into subdirs
---

diff --git a/src/ieee754/add/fmul.py b/src/ieee754/add/fmul.py
deleted file mode 100644
index abe6f613..00000000
--- a/src/ieee754/add/fmul.py
+++ /dev/null
@@ -1,172 +0,0 @@
-from nmigen import Module, Signal, Cat, Mux, Array, Const
-from nmigen.cli import main, verilog
-
-from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPState
-from fpcommon.getop import FPGetOp
-from nmutil.singlepipe import eq
-
-
-class FPMUL(FPBase):
-
-    def __init__(self, width):
-        FPBase.__init__(self)
-        self.width = width
-
-        self.in_a  = FPOp(width)
-        self.in_b  = FPOp(width)
-        self.out_z = FPOp(width)
-
-        self.states = []
-
-    def add_state(self, state):
-        self.states.append(state)
-        return state
-
-    def elaborate(self, platform=None):
-        """ creates the HDL code-fragment for FPMUL
-        """
-        m = Module()
-
-        # Latches
-        a = FPNumIn(None, self.width, False)
-        b = FPNumIn(None, self.width, False)
-        z = FPNumOut(self.width, False)
-
-        mw = (z.m_width)*2 - 1 + 3 # sticky/round/guard bits + (2*mant) - 1
-        product = Signal(mw)
-
-        of = Overflow()
-        m.submodules.of = of
-        m.submodules.a = a
-        m.submodules.b = b
-        m.submodules.z = z
-
-        m.d.comb += a.v.eq(self.in_a.v)
-        m.d.comb += b.v.eq(self.in_b.v)
-
-        with m.FSM() as fsm:
-
-            # ******
-            # gets operand a
-
-            with m.State("get_a"):
-                res = self.get_op(m, self.in_a, a, "get_b")
-                m.d.sync += eq([a, self.in_a.ack], res)
-
-            # ******
-            # gets operand b
-
-            with m.State("get_b"):
-                res = self.get_op(m, self.in_b, b, "special_cases")
-                m.d.sync += eq([b, self.in_b.ack], res)
-
-            # ******
-            # special cases
-
-            with m.State("special_cases"):
-                #if a or b is NaN return NaN
-                with m.If(a.is_nan | b.is_nan):
-                    m.next = "put_z"
-                    m.d.sync += z.nan(1)
-                #if a is inf return inf
-                with m.Elif(a.is_inf):
-                    m.next = "put_z"
-                    m.d.sync += z.inf(a.s ^ b.s)
-                    #if b is zero return NaN
-                    with m.If(b.is_zero):
-                        m.d.sync += z.nan(1)
-                #if b is inf return inf
-                with m.Elif(b.is_inf):
-                    m.next = "put_z"
-                    m.d.sync += z.inf(a.s ^ b.s)
-                    #if a is zero return NaN
-                    with m.If(a.is_zero):
-                        m.next = "put_z"
-                        m.d.sync += z.nan(1)
-                #if a is zero return zero
-                with m.Elif(a.is_zero):
-                    m.next = "put_z"
-                    m.d.sync += z.zero(a.s ^ b.s)
-                #if b is zero return zero
-                with m.Elif(b.is_zero):
-                    m.next = "put_z"
-                    m.d.sync += z.zero(a.s ^ b.s)
-                # Denormalised Number checks
-                with m.Else():
-                    m.next = "normalise_a"
-                    self.denormalise(m, a)
-                    self.denormalise(m, b)
-
-            # ******
-            # normalise_a
-
-            with m.State("normalise_a"):
-                self.op_normalise(m, a, "normalise_b")
-
-            # ******
-            # normalise_b
-
-            with m.State("normalise_b"):
-                self.op_normalise(m, b, "multiply_0")
-
-            #multiply_0
-            with m.State("multiply_0"):
-                m.next = "multiply_1"
-                m.d.sync += [
-                   z.s.eq(a.s ^ b.s),
-                   z.e.eq(a.e + b.e + 1),
-                   product.eq(a.m * b.m * 4)
-                ]
-
-            #multiply_1
-            with m.State("multiply_1"):
-                mw = z.m_width
-                m.next = "normalise_1"
-                m.d.sync += [
-                z.m.eq(product[mw+2:]),
-                of.guard.eq(product[mw+1]),
-                of.round_bit.eq(product[mw]),
-                of.sticky.eq(product[0:mw] != 0)
-            ]
-
-            # ******
-            # First stage of normalisation.
-            with m.State("normalise_1"):
-                self.normalise_1(m, z, of, "normalise_2")
-
-            # ******
-            # Second stage of normalisation.
-
-            with m.State("normalise_2"):
-                self.normalise_2(m, z, of, "round")
-
-            # ******
-            # rounding stage
-
-            with m.State("round"):
-                self.roundz(m, z, of.roundz)
-                m.next = "corrections"
-
-            # ******
-            # correction stage
-
-            with m.State("corrections"):
-                self.corrections(m, z, "pack")
-
-            # ******
-            # pack stage
-            with m.State("pack"):
-                self.pack(m, z, "put_z")
-
-            # ******
-            # put_z stage
-
-            with m.State("put_z"):
-                self.put_z(m, z, self.out_z, "get_a")
-
-        return m
-
-
-if __name__ == "__main__":
-    alu = FPMUL(width=32)
-    main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
diff --git a/src/ieee754/add/nmigen_div_experiment.py b/src/ieee754/add/nmigen_div_experiment.py
deleted file mode 100644
index a19decd5..00000000
--- a/src/ieee754/add/nmigen_div_experiment.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# IEEE Floating Point Divider (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
-
-from nmigen import Module, Signal, Const, Cat
-from nmigen.cli import main, verilog
-
-from fpbase import FPNumIn, FPNumOut, FPOpIn, FPOpOut, Overflow, FPBase, FPState
-from nmutil.singlepipe import eq
-
-class Div:
-    def __init__(self, width):
-        self.width = width
-        self.quot = Signal(width)  # quotient
-        self.dor = Signal(width)   # divisor
-        self.dend = Signal(width)  # dividend
-        self.rem = Signal(width)   # remainder
-        self.count = Signal(7)     # loop count
-
-        self.czero = Const(0, width)
-
-    def reset(self, m):
-        m.d.sync += [
-            self.quot.eq(self.czero),
-            self.rem.eq(self.czero),
-            self.count.eq(Const(0, 7))
-        ]
-
-
-class FPDIV(FPBase):
-
-    def __init__(self, width):
-        FPBase.__init__(self)
-        self.width = width
-
-        self.in_a  = FPOpIn(width)
-        self.in_b  = FPOpIn(width)
-        self.out_z = FPOpOut(width)
-
-        self.states = []
-
-    def add_state(self, state):
-        self.states.append(state)
-        return state
-
-    def elaborate(self, platform=None):
-        """ creates the HDL code-fragment for FPDiv
-        """
-        m = Module()
-
-        # Latches
-        a = FPNumIn(None, self.width, False)
-        b = FPNumIn(None, self.width, False)
-        z = FPNumOut(self.width, False)
-
-        div = Div(a.m_width*2 + 3) # double the mantissa width plus g/r/sticky
-
-        of = Overflow()
-        m.submodules.in_a = a
-        m.submodules.in_b = b
-        m.submodules.z = z
-        m.submodules.of = of
-
-        m.d.comb += a.v.eq(self.in_a.v)
-        m.d.comb += b.v.eq(self.in_b.v)
-
-        with m.FSM() as fsm:
-
-            # ******
-            # gets operand a
-
-            with m.State("get_a"):
-                res = self.get_op(m, self.in_a, a, "get_b")
-                m.d.sync += eq([a, self.in_a.ready_o], res)
-
-            # ******
-            # gets operand b
-
-            with m.State("get_b"):
-                res = self.get_op(m, self.in_b, b, "special_cases")
-                m.d.sync += eq([b, self.in_b.ready_o], res)
-
-            # ******
-            # special cases: NaNs, infs, zeros, denormalised
-            # NOTE: some of these are unique to div.  see "Special Operations"
-            # https://steve.hollasch.net/cgindex/coding/ieeefloat.html
-
-            with m.State("special_cases"):
-
-                # if a is NaN or b is NaN return NaN
-                with m.If(a.is_nan | b.is_nan):
-                    m.next = "put_z"
-                    m.d.sync += z.nan(1)
-
-                # if a is Inf and b is Inf return NaN
-                with m.Elif(a.is_inf & b.is_inf):
-                    m.next = "put_z"
-                    m.d.sync += z.nan(1)
-
-                # if a is inf return inf (or NaN if b is zero)
-                with m.Elif(a.is_inf):
-                    m.next = "put_z"
-                    m.d.sync += z.inf(a.s ^ b.s)
-
-                # if b is inf return zero
-                with m.Elif(b.is_inf):
-                    m.next = "put_z"
-                    m.d.sync += z.zero(a.s ^ b.s)
-
-                # if a is zero return zero (or NaN if b is zero)
-                with m.Elif(a.is_zero):
-                    m.next = "put_z"
-                    # if b is zero return NaN
-                    with m.If(b.is_zero):
-                        m.d.sync += z.nan(1)
-                    with m.Else():
-                        m.d.sync += z.zero(a.s ^ b.s)
-
-                # if b is zero return Inf
-                with m.Elif(b.is_zero):
-                    m.next = "put_z"
-                    m.d.sync += z.inf(a.s ^ b.s)
-
-                # Denormalised Number checks
-                with m.Else():
-                    m.next = "normalise_a"
-                    self.denormalise(m, a)
-                    self.denormalise(m, b)
-
-            # ******
-            # normalise_a
-
-            with m.State("normalise_a"):
-                self.op_normalise(m, a, "normalise_b")
-
-            # ******
-            # normalise_b
-
-            with m.State("normalise_b"):
-                self.op_normalise(m, b, "divide_0")
-
-            # ******
-            # First stage of divide.  initialise state
-
-            with m.State("divide_0"):
-                m.next = "divide_1"
-                m.d.sync += [
-                    z.s.eq(a.s ^ b.s), # sign
-                    z.e.eq(a.e - b.e), # exponent
-                    div.dend.eq(a.m<<(a.m_width+3)), # 3 bits for g/r/sticky
-                    div.dor.eq(b.m),
-                ]
-                div.reset(m)
-
-            # ******
-            # Second stage of divide.
-
-            with m.State("divide_1"):
-                m.next = "divide_2"
-                m.d.sync += [
-                    div.quot.eq(div.quot << 1),
-                    div.rem.eq(Cat(div.dend[-1], div.rem[0:])),
-                    div.dend.eq(div.dend << 1),
-                ]
-
-            # ******
-            # Third stage of divide.
-            # This stage ends by jumping out to divide_3
-            # However it defaults to jumping to divide_1 (which comes back here)
-
-            with m.State("divide_2"):
-                with m.If(div.rem >= div.dor):
-                    m.d.sync += [
-                        div.quot[0].eq(1),
-                        div.rem.eq(div.rem - div.dor),
-                    ]
-                with m.If(div.count == div.width-2):
-                    m.next = "divide_3"
-                with m.Else():
-                    m.next = "divide_1"
-                    m.d.sync += [
-                        div.count.eq(div.count + 1),
-                    ]
-
-            # ******
-            # Fourth stage of divide.
-
-            with m.State("divide_3"):
-                m.next = "normalise_1"
-                m.d.sync += [
-                    z.m.eq(div.quot[3:]),
-                    of.guard.eq(div.quot[2]),
-                    of.round_bit.eq(div.quot[1]),
-                    of.sticky.eq(div.quot[0] | (div.rem != 0))
-                ]
-
-            # ******
-            # First stage of normalisation.
-
-            with m.State("normalise_1"):
-                self.normalise_1(m, z, of, "normalise_2")
-
-            # ******
-            # Second stage of normalisation.
-
-            with m.State("normalise_2"):
-                self.normalise_2(m, z, of, "round")
-
-            # ******
-            # rounding stage
-
-            with m.State("round"):
-                self.roundz(m, z, of.roundz)
-                m.next = "corrections"
-
-            # ******
-            # correction stage
-
-            with m.State("corrections"):
-                self.corrections(m, z, "pack")
-
-            # ******
-            # pack stage
-
-            with m.State("pack"):
-                self.pack(m, z, "put_z")
-
-            # ******
-            # put_z stage
-
-            with m.State("put_z"):
-                self.put_z(m, z, self.out_z, "get_a")
-
-        return m
-
-
-if __name__ == "__main__":
-    alu = FPDIV(width=32)
-    main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
-
-
-    # works... but don't use, just do "python fname.py convert -t v"
-    #print (verilog.convert(alu, ports=[
-    #                        ports=alu.in_a.ports() + \
-    #                              alu.in_b.ports() + \
-    #                              alu.out_z.ports())
diff --git a/src/ieee754/add/test_div.py b/src/ieee754/add/test_div.py
deleted file mode 100644
index 3f192338..00000000
--- a/src/ieee754/add/test_div.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import sys
-from random import randint
-from random import seed
-from operator import truediv
-
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from nmigen_div_experiment import FPDIV
-
-from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
-                                is_inf, is_pos_inf, is_neg_inf,
-                                match, get_case, check_case, run_test,
-                                run_edge_cases, run_corner_cases)
-
-
-def testbench(dut):
-    yield from check_case(dut, 0x80000000, 0x00000000, 0xffc00000)
-    yield from check_case(dut, 0x00000000, 0x80000000, 0xffc00000)
-    yield from check_case(dut, 0x0002b017, 0xff3807ab, 0x80000000)
-    yield from check_case(dut, 0x40000000, 0x3F800000, 0x40000000)
-    yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000)
-    yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB)
-    yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C)
-    yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2)
-    yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8)
-    yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC)
-    yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5)
-    yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2)
-
-    count = 0
-
-    #regression tests
-    stimulus_a = [0xbf9b1e94, 0x34082401, 0x5e8ef81, 0x5c75da81, 0x2b017]
-    stimulus_b = [0xc038ed3a, 0xb328cd45, 0x114f3db, 0x2f642a39, 0xff3807ab]
-    yield from run_test(dut, stimulus_a, stimulus_b, truediv, get_case)
-    count += len(stimulus_a)
-    print (count, "vectors passed")
-
-    yield from run_corner_cases(dut, count, truediv, get_case)
-    yield from run_edge_cases(dut, count, truediv, get_case)
-
-
-if __name__ == '__main__':
-    dut = FPDIV(width=32)
-    run_simulation(dut, testbench(dut), vcd_name="test_div.vcd")
-
diff --git a/src/ieee754/add/test_div64.py b/src/ieee754/add/test_div64.py
deleted file mode 100644
index 5a9daf23..00000000
--- a/src/ieee754/add/test_div64.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from nmigen_div_experiment import FPDIV
-
-class ORGate:
-    def __init__(self):
-        self.a = Signal()
-        self.b = Signal()
-        self.x = Signal()
-
-    def elaborate(self, platform=None):
-
-        m = Module()
-        m.d.comb += self.x.eq(self.a | self.b)
-
-        return m
-
-def check_case(dut, a, b, z):
-    yield dut.in_a.v.eq(a)
-    yield dut.in_a.stb.eq(1)
-    yield
-    yield
-    a_ack = (yield dut.in_a.ack)
-    assert a_ack == 0
-    yield dut.in_b.v.eq(b)
-    yield dut.in_b.stb.eq(1)
-    b_ack = (yield dut.in_b.ack)
-    assert b_ack == 0
-
-    while True:
-        yield
-        out_z_stb = (yield dut.out_z.stb)
-        if not out_z_stb:
-            continue
-        yield dut.in_a.stb.eq(0)
-        yield dut.in_b.stb.eq(0)
-        yield dut.out_z.ack.eq(1)
-        yield
-        yield dut.out_z.ack.eq(0)
-        yield
-        yield
-        break
-
-    out_z = yield dut.out_z.v
-    assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
-
-def testbench(dut):
-    yield from check_case(dut, 0x4008000000000000, 0x3FF0000000000000,
-                               0x4008000000000000)
-    yield from check_case(dut, 0x3FF0000000000000, 0x4008000000000000,
-                               0x3FD5555555555555)
-
-    if False:
-        yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000)
-        yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB)
-        yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C)
-        yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2)
-        yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8)
-        yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC)
-        yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5)
-        yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2)
-
-if __name__ == '__main__':
-    dut = FPDIV(width=64)
-    run_simulation(dut, testbench(dut), vcd_name="test_div64.vcd")
-
diff --git a/src/ieee754/add/test_fpadd_pipe.py b/src/ieee754/add/test_fpadd_pipe.py
deleted file mode 100644
index df25e55f..00000000
--- a/src/ieee754/add/test_fpadd_pipe.py
+++ /dev/null
@@ -1,126 +0,0 @@
-""" key strategic example showing how to do multi-input fan-in into a
-    multi-stage pipeline, then multi-output fanout.
-
-    the multiplex ID from the fan-in is passed in to the pipeline, preserved,
-    and used as a routing ID on the fanout.
-"""
-
-from random import randint
-from math import log
-from nmigen import Module, Signal, Cat, Value
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-
-from nmigen_add_experiment import (FPADDMuxInOut,)
-
-from sfpy import Float32
-
-class InputTest:
-    def __init__(self, dut):
-        self.dut = dut
-        self.di = {}
-        self.do = {}
-        self.tlen = 10
-        self.width = 32
-        for mid in range(dut.num_rows):
-            self.di[mid] = {}
-            self.do[mid] = []
-            for i in range(self.tlen):
-                op1 = randint(0, (1<<self.width)-1)
-                op2 = randint(0, (1<<self.width)-1)
-                #op1 = 0x40900000
-                #op2 = 0x40200000
-                res = Float32(op1) + Float32(op2)
-                self.di[mid][i] = (op1, op2)
-                self.do[mid].append(res.bits)
-
-    def send(self, mid):
-        for i in range(self.tlen):
-            op1, op2 = self.di[mid][i]
-            rs = dut.p[mid]
-            yield rs.valid_i.eq(1)
-            yield rs.data_i.a.eq(op1)
-            yield rs.data_i.b.eq(op2)
-            yield rs.data_i.mid.eq(mid)
-            yield
-            o_p_ready = yield rs.ready_o
-            while not o_p_ready:
-                yield
-                o_p_ready = yield rs.ready_o
-
-            fop1 = Float32(op1)
-            fop2 = Float32(op2)
-            res = fop1 + fop2
-            print ("send", mid, i, hex(op1), hex(op2), hex(res.bits),
-                           fop1, fop2, res)
-
-            yield rs.valid_i.eq(0)
-            # wait random period of time before queueing another value
-            for i in range(randint(0, 3)):
-                yield
-
-        yield rs.valid_i.eq(0)
-        yield
-
-        print ("send ended", mid)
-
-        ## wait random period of time before queueing another value
-        #for i in range(randint(0, 3)):
-        #    yield
-
-        #send_range = randint(0, 3)
-        #if send_range == 0:
-        #    send = True
-        #else:
-        #    send = randint(0, send_range) != 0
-
-    def rcv(self, mid):
-        while True:
-            #stall_range = randint(0, 3)
-            #for j in range(randint(1,10)):
-            #    stall = randint(0, stall_range) != 0
-            #    yield self.dut.n[0].ready_i.eq(stall)
-            #    yield
-            n = self.dut.n[mid]
-            yield n.ready_i.eq(1)
-            yield
-            o_n_valid = yield n.valid_o
-            i_n_ready = yield n.ready_i
-            if not o_n_valid or not i_n_ready:
-                continue
-
-            out_mid = yield n.data_o.mid
-            out_z = yield n.data_o.z
-
-            out_i = 0
-
-            print ("recv", out_mid, hex(out_z), "expected",
-                        hex(self.do[mid][out_i] ))
-
-            # see if this output has occurred already, delete it if it has
-            assert mid == out_mid, "out_mid %d not correct %d" % (out_mid, mid)
-            assert self.do[mid][out_i] == out_z
-            del self.do[mid][out_i]
-
-            # check if there's any more outputs
-            if len(self.do[mid]) == 0:
-                break
-        print ("recv ended", mid)
-
-
-
-if __name__ == '__main__':
-    dut = FPADDMuxInOut(32, 4)
-    vl = rtlil.convert(dut, ports=dut.ports())
-    with open("test_fpadd_pipe.il", "w") as f:
-        f.write(vl)
-    #run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd")
-
-    test = InputTest(dut)
-    run_simulation(dut, [test.rcv(1), test.rcv(0),
-                         test.rcv(3), test.rcv(2),
-                         test.send(0), test.send(1),
-                         test.send(3), test.send(2),
-                        ],
-                   vcd_name="test_fpadd_pipe.vcd")
-
diff --git a/src/ieee754/add/test_mul.py b/src/ieee754/add/test_mul.py
deleted file mode 100644
index 21d82528..00000000
--- a/src/ieee754/add/test_mul.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import sys
-from random import randint
-from random import seed
-from operator import mul
-
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-
-from fmul import FPMUL
-
-from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
-                                is_inf, is_pos_inf, is_neg_inf,
-                                match, get_case, check_case, run_test,
-                                run_edge_cases, run_corner_cases)
-
-
-def testbench(dut):
-    yield from check_case(dut, 0x40000000, 0x40000000, 0x40800000)
-    yield from check_case(dut, 0x41400000, 0x40A00000, 0x42700000)
-
-    count = 0
-
-    #regression tests
-    stimulus_a = [0xba57711a, 0xbf9b1e94, 0x34082401, 0x5e8ef81,
-                  0x5c75da81, 0x2b017]
-    stimulus_b = [0xee1818c5, 0xc038ed3a, 0xb328cd45, 0x114f3db,
-                  0x2f642a39, 0xff3807ab]
-    yield from run_test(dut, stimulus_a, stimulus_b, mul, get_case)
-    count += len(stimulus_a)
-    print (count, "vectors passed")
-
-    yield from run_corner_cases(dut, count, mul, get_case)
-    yield from run_edge_cases(dut, count, mul, get_case)
-
-
-if __name__ == '__main__':
-    dut = FPMUL(width=32)
-    run_simulation(dut, testbench(dut), vcd_name="test_mul.vcd")
-
diff --git a/src/ieee754/add/test_mul64.py b/src/ieee754/add/test_mul64.py
deleted file mode 100644
index 81c5b5a4..00000000
--- a/src/ieee754/add/test_mul64.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from nmigen import Module, Signal
-from nmigen.compat.sim import run_simulation
-from operator import mul
-
-from fmul import FPMUL
-
-import sys
-import atexit
-from random import randint
-from random import seed
-
-from unit_test_double import (get_mantissa, get_exponent, get_sign, is_nan,
-                                is_inf, is_pos_inf, is_neg_inf,
-                                match, get_case, check_case, run_test,
-                                run_edge_cases, run_corner_cases)
-
-
-def testbench(dut):
-    yield from check_case(dut, 0, 0, 0)
-
-    count = 0
-
-    #regression tests
-    stimulus_a = [0xff80000000000000, 0x3351099a0528e138]
-    stimulus_b = [0x7f80000000000000, 0xd651a9a9986af2b5]
-    yield from run_test(dut, stimulus_a, stimulus_b, mul)
-    count += len(stimulus_a)
-    print (count, "vectors passed")
-
-    yield from run_corner_cases(dut, count, mul)
-    yield from run_edge_cases(dut, count, mul)
-
-
-if __name__ == '__main__':
-    dut = FPMUL(width=64)
-    run_simulation(dut, testbench(dut), vcd_name="test_mul64.vcd")
-
diff --git a/src/ieee754/fpadd/test/test_fpadd_pipe.py b/src/ieee754/fpadd/test/test_fpadd_pipe.py
new file mode 100644
index 00000000..df25e55f
--- /dev/null
+++ b/src/ieee754/fpadd/test/test_fpadd_pipe.py
@@ -0,0 +1,126 @@
+""" key strategic example showing how to do multi-input fan-in into a
+    multi-stage pipeline, then multi-output fanout.
+
+    the multiplex ID from the fan-in is passed in to the pipeline, preserved,
+    and used as a routing ID on the fanout.
+"""
+
+from random import randint
+from math import log
+from nmigen import Module, Signal, Cat, Value
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+
+from nmigen_add_experiment import (FPADDMuxInOut,)
+
+from sfpy import Float32
+
+class InputTest:
+    def __init__(self, dut):
+        self.dut = dut
+        self.di = {}
+        self.do = {}
+        self.tlen = 10
+        self.width = 32
+        for mid in range(dut.num_rows):
+            self.di[mid] = {}
+            self.do[mid] = []
+            for i in range(self.tlen):
+                op1 = randint(0, (1<<self.width)-1)
+                op2 = randint(0, (1<<self.width)-1)
+                #op1 = 0x40900000
+                #op2 = 0x40200000
+                res = Float32(op1) + Float32(op2)
+                self.di[mid][i] = (op1, op2)
+                self.do[mid].append(res.bits)
+
+    def send(self, mid):
+        for i in range(self.tlen):
+            op1, op2 = self.di[mid][i]
+            rs = dut.p[mid]
+            yield rs.valid_i.eq(1)
+            yield rs.data_i.a.eq(op1)
+            yield rs.data_i.b.eq(op2)
+            yield rs.data_i.mid.eq(mid)
+            yield
+            o_p_ready = yield rs.ready_o
+            while not o_p_ready:
+                yield
+                o_p_ready = yield rs.ready_o
+
+            fop1 = Float32(op1)
+            fop2 = Float32(op2)
+            res = fop1 + fop2
+            print ("send", mid, i, hex(op1), hex(op2), hex(res.bits),
+                           fop1, fop2, res)
+
+            yield rs.valid_i.eq(0)
+            # wait random period of time before queueing another value
+            for i in range(randint(0, 3)):
+                yield
+
+        yield rs.valid_i.eq(0)
+        yield
+
+        print ("send ended", mid)
+
+        ## wait random period of time before queueing another value
+        #for i in range(randint(0, 3)):
+        #    yield
+
+        #send_range = randint(0, 3)
+        #if send_range == 0:
+        #    send = True
+        #else:
+        #    send = randint(0, send_range) != 0
+
+    def rcv(self, mid):
+        while True:
+            #stall_range = randint(0, 3)
+            #for j in range(randint(1,10)):
+            #    stall = randint(0, stall_range) != 0
+            #    yield self.dut.n[0].ready_i.eq(stall)
+            #    yield
+            n = self.dut.n[mid]
+            yield n.ready_i.eq(1)
+            yield
+            o_n_valid = yield n.valid_o
+            i_n_ready = yield n.ready_i
+            if not o_n_valid or not i_n_ready:
+                continue
+
+            out_mid = yield n.data_o.mid
+            out_z = yield n.data_o.z
+
+            out_i = 0
+
+            print ("recv", out_mid, hex(out_z), "expected",
+                        hex(self.do[mid][out_i] ))
+
+            # see if this output has occurred already, delete it if it has
+            assert mid == out_mid, "out_mid %d not correct %d" % (out_mid, mid)
+            assert self.do[mid][out_i] == out_z
+            del self.do[mid][out_i]
+
+            # check if there's any more outputs
+            if len(self.do[mid]) == 0:
+                break
+        print ("recv ended", mid)
+
+
+
+if __name__ == '__main__':
+    dut = FPADDMuxInOut(32, 4)
+    vl = rtlil.convert(dut, ports=dut.ports())
+    with open("test_fpadd_pipe.il", "w") as f:
+        f.write(vl)
+    #run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd")
+
+    test = InputTest(dut)
+    run_simulation(dut, [test.rcv(1), test.rcv(0),
+                         test.rcv(3), test.rcv(2),
+                         test.send(0), test.send(1),
+                         test.send(3), test.send(2),
+                        ],
+                   vcd_name="test_fpadd_pipe.vcd")
+
diff --git a/src/ieee754/fpdiv/__init__.py b/src/ieee754/fpdiv/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/ieee754/fpdiv/nmigen_div_experiment.py b/src/ieee754/fpdiv/nmigen_div_experiment.py
new file mode 100644
index 00000000..a19decd5
--- /dev/null
+++ b/src/ieee754/fpdiv/nmigen_div_experiment.py
@@ -0,0 +1,246 @@
+# IEEE Floating Point Divider (Single Precision)
+# Copyright (C) Jonathan P Dawson 2013
+# 2013-12-12
+
+from nmigen import Module, Signal, Const, Cat
+from nmigen.cli import main, verilog
+
+from fpbase import FPNumIn, FPNumOut, FPOpIn, FPOpOut, Overflow, FPBase, FPState
+from nmutil.singlepipe import eq
+
+class Div:
+    def __init__(self, width):
+        self.width = width
+        self.quot = Signal(width)  # quotient
+        self.dor = Signal(width)   # divisor
+        self.dend = Signal(width)  # dividend
+        self.rem = Signal(width)   # remainder
+        self.count = Signal(7)     # loop count
+
+        self.czero = Const(0, width)
+
+    def reset(self, m):
+        m.d.sync += [
+            self.quot.eq(self.czero),
+            self.rem.eq(self.czero),
+            self.count.eq(Const(0, 7))
+        ]
+
+
+class FPDIV(FPBase):
+
+    def __init__(self, width):
+        FPBase.__init__(self)
+        self.width = width
+
+        self.in_a  = FPOpIn(width)
+        self.in_b  = FPOpIn(width)
+        self.out_z = FPOpOut(width)
+
+        self.states = []
+
+    def add_state(self, state):
+        self.states.append(state)
+        return state
+
+    def elaborate(self, platform=None):
+        """ creates the HDL code-fragment for FPDiv
+        """
+        m = Module()
+
+        # Latches
+        a = FPNumIn(None, self.width, False)
+        b = FPNumIn(None, self.width, False)
+        z = FPNumOut(self.width, False)
+
+        div = Div(a.m_width*2 + 3) # double the mantissa width plus g/r/sticky
+
+        of = Overflow()
+        m.submodules.in_a = a
+        m.submodules.in_b = b
+        m.submodules.z = z
+        m.submodules.of = of
+
+        m.d.comb += a.v.eq(self.in_a.v)
+        m.d.comb += b.v.eq(self.in_b.v)
+
+        with m.FSM() as fsm:
+
+            # ******
+            # gets operand a
+
+            with m.State("get_a"):
+                res = self.get_op(m, self.in_a, a, "get_b")
+                m.d.sync += eq([a, self.in_a.ready_o], res)
+
+            # ******
+            # gets operand b
+
+            with m.State("get_b"):
+                res = self.get_op(m, self.in_b, b, "special_cases")
+                m.d.sync += eq([b, self.in_b.ready_o], res)
+
+            # ******
+            # special cases: NaNs, infs, zeros, denormalised
+            # NOTE: some of these are unique to div.  see "Special Operations"
+            # https://steve.hollasch.net/cgindex/coding/ieeefloat.html
+
+            with m.State("special_cases"):
+
+                # if a is NaN or b is NaN return NaN
+                with m.If(a.is_nan | b.is_nan):
+                    m.next = "put_z"
+                    m.d.sync += z.nan(1)
+
+                # if a is Inf and b is Inf return NaN
+                with m.Elif(a.is_inf & b.is_inf):
+                    m.next = "put_z"
+                    m.d.sync += z.nan(1)
+
+                # if a is inf return inf (or NaN if b is zero)
+                with m.Elif(a.is_inf):
+                    m.next = "put_z"
+                    m.d.sync += z.inf(a.s ^ b.s)
+
+                # if b is inf return zero
+                with m.Elif(b.is_inf):
+                    m.next = "put_z"
+                    m.d.sync += z.zero(a.s ^ b.s)
+
+                # if a is zero return zero (or NaN if b is zero)
+                with m.Elif(a.is_zero):
+                    m.next = "put_z"
+                    # if b is zero return NaN
+                    with m.If(b.is_zero):
+                        m.d.sync += z.nan(1)
+                    with m.Else():
+                        m.d.sync += z.zero(a.s ^ b.s)
+
+                # if b is zero return Inf
+                with m.Elif(b.is_zero):
+                    m.next = "put_z"
+                    m.d.sync += z.inf(a.s ^ b.s)
+
+                # Denormalised Number checks
+                with m.Else():
+                    m.next = "normalise_a"
+                    self.denormalise(m, a)
+                    self.denormalise(m, b)
+
+            # ******
+            # normalise_a
+
+            with m.State("normalise_a"):
+                self.op_normalise(m, a, "normalise_b")
+
+            # ******
+            # normalise_b
+
+            with m.State("normalise_b"):
+                self.op_normalise(m, b, "divide_0")
+
+            # ******
+            # First stage of divide.  initialise state
+
+            with m.State("divide_0"):
+                m.next = "divide_1"
+                m.d.sync += [
+                    z.s.eq(a.s ^ b.s), # sign
+                    z.e.eq(a.e - b.e), # exponent
+                    div.dend.eq(a.m<<(a.m_width+3)), # 3 bits for g/r/sticky
+                    div.dor.eq(b.m),
+                ]
+                div.reset(m)
+
+            # ******
+            # Second stage of divide.
+
+            with m.State("divide_1"):
+                m.next = "divide_2"
+                m.d.sync += [
+                    div.quot.eq(div.quot << 1),
+                    div.rem.eq(Cat(div.dend[-1], div.rem[0:])),
+                    div.dend.eq(div.dend << 1),
+                ]
+
+            # ******
+            # Third stage of divide.
+            # This stage ends by jumping out to divide_3
+            # However it defaults to jumping to divide_1 (which comes back here)
+
+            with m.State("divide_2"):
+                with m.If(div.rem >= div.dor):
+                    m.d.sync += [
+                        div.quot[0].eq(1),
+                        div.rem.eq(div.rem - div.dor),
+                    ]
+                with m.If(div.count == div.width-2):
+                    m.next = "divide_3"
+                with m.Else():
+                    m.next = "divide_1"
+                    m.d.sync += [
+                        div.count.eq(div.count + 1),
+                    ]
+
+            # ******
+            # Fourth stage of divide.
+
+            with m.State("divide_3"):
+                m.next = "normalise_1"
+                m.d.sync += [
+                    z.m.eq(div.quot[3:]),
+                    of.guard.eq(div.quot[2]),
+                    of.round_bit.eq(div.quot[1]),
+                    of.sticky.eq(div.quot[0] | (div.rem != 0))
+                ]
+
+            # ******
+            # First stage of normalisation.
+
+            with m.State("normalise_1"):
+                self.normalise_1(m, z, of, "normalise_2")
+
+            # ******
+            # Second stage of normalisation.
+
+            with m.State("normalise_2"):
+                self.normalise_2(m, z, of, "round")
+
+            # ******
+            # rounding stage
+
+            with m.State("round"):
+                self.roundz(m, z, of.roundz)
+                m.next = "corrections"
+
+            # ******
+            # correction stage
+
+            with m.State("corrections"):
+                self.corrections(m, z, "pack")
+
+            # ******
+            # pack stage
+
+            with m.State("pack"):
+                self.pack(m, z, "put_z")
+
+            # ******
+            # put_z stage
+
+            with m.State("put_z"):
+                self.put_z(m, z, self.out_z, "get_a")
+
+        return m
+
+
+if __name__ == "__main__":
+    alu = FPDIV(width=32)
+    main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
+
+
+    # works... but don't use, just do "python fname.py convert -t v"
+    #print (verilog.convert(alu, ports=[
+    #                        ports=alu.in_a.ports() + \
+    #                              alu.in_b.ports() + \
+    #                              alu.out_z.ports())
diff --git a/src/ieee754/fpdiv/test/test_div.py b/src/ieee754/fpdiv/test/test_div.py
new file mode 100644
index 00000000..3f192338
--- /dev/null
+++ b/src/ieee754/fpdiv/test/test_div.py
@@ -0,0 +1,47 @@
+import sys
+from random import randint
+from random import seed
+from operator import truediv
+
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from nmigen_div_experiment import FPDIV
+
+from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
+                                is_inf, is_pos_inf, is_neg_inf,
+                                match, get_case, check_case, run_test,
+                                run_edge_cases, run_corner_cases)
+
+
+def testbench(dut):
+    yield from check_case(dut, 0x80000000, 0x00000000, 0xffc00000)
+    yield from check_case(dut, 0x00000000, 0x80000000, 0xffc00000)
+    yield from check_case(dut, 0x0002b017, 0xff3807ab, 0x80000000)
+    yield from check_case(dut, 0x40000000, 0x3F800000, 0x40000000)
+    yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000)
+    yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB)
+    yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C)
+    yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2)
+    yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8)
+    yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC)
+    yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5)
+    yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2)
+
+    count = 0
+
+    #regression tests
+    stimulus_a = [0xbf9b1e94, 0x34082401, 0x5e8ef81, 0x5c75da81, 0x2b017]
+    stimulus_b = [0xc038ed3a, 0xb328cd45, 0x114f3db, 0x2f642a39, 0xff3807ab]
+    yield from run_test(dut, stimulus_a, stimulus_b, truediv, get_case)
+    count += len(stimulus_a)
+    print (count, "vectors passed")
+
+    yield from run_corner_cases(dut, count, truediv, get_case)
+    yield from run_edge_cases(dut, count, truediv, get_case)
+
+
+if __name__ == '__main__':
+    dut = FPDIV(width=32)
+    run_simulation(dut, testbench(dut), vcd_name="test_div.vcd")
+
diff --git a/src/ieee754/fpdiv/test/test_div64.py b/src/ieee754/fpdiv/test/test_div64.py
new file mode 100644
index 00000000..5a9daf23
--- /dev/null
+++ b/src/ieee754/fpdiv/test/test_div64.py
@@ -0,0 +1,67 @@
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from nmigen_div_experiment import FPDIV
+
+class ORGate:
+    def __init__(self):
+        self.a = Signal()
+        self.b = Signal()
+        self.x = Signal()
+
+    def elaborate(self, platform=None):
+
+        m = Module()
+        m.d.comb += self.x.eq(self.a | self.b)
+
+        return m
+
+def check_case(dut, a, b, z):
+    yield dut.in_a.v.eq(a)
+    yield dut.in_a.stb.eq(1)
+    yield
+    yield
+    a_ack = (yield dut.in_a.ack)
+    assert a_ack == 0
+    yield dut.in_b.v.eq(b)
+    yield dut.in_b.stb.eq(1)
+    b_ack = (yield dut.in_b.ack)
+    assert b_ack == 0
+
+    while True:
+        yield
+        out_z_stb = (yield dut.out_z.stb)
+        if not out_z_stb:
+            continue
+        yield dut.in_a.stb.eq(0)
+        yield dut.in_b.stb.eq(0)
+        yield dut.out_z.ack.eq(1)
+        yield
+        yield dut.out_z.ack.eq(0)
+        yield
+        yield
+        break
+
+    out_z = yield dut.out_z.v
+    assert out_z == z, "Output z 0x%x not equal to expected 0x%x" % (out_z, z)
+
+def testbench(dut):
+    yield from check_case(dut, 0x4008000000000000, 0x3FF0000000000000,
+                               0x4008000000000000)
+    yield from check_case(dut, 0x3FF0000000000000, 0x4008000000000000,
+                               0x3FD5555555555555)
+
+    if False:
+        yield from check_case(dut, 0x3F800000, 0x40000000, 0x3F000000)
+        yield from check_case(dut, 0x3F800000, 0x40400000, 0x3EAAAAAB)
+        yield from check_case(dut, 0x40400000, 0x41F80000, 0x3DC6318C)
+        yield from check_case(dut, 0x41F9EB4D, 0x429A4C70, 0x3ECF52B2)
+        yield from check_case(dut, 0x7F7FFFFE, 0x70033181, 0x4EF9C4C8)
+        yield from check_case(dut, 0x7F7FFFFE, 0x70000001, 0x4EFFFFFC)
+        yield from check_case(dut, 0x7F7FFCFF, 0x70200201, 0x4ECCC7D5)
+        yield from check_case(dut, 0x70200201, 0x7F7FFCFF, 0x302003E2)
+
+if __name__ == '__main__':
+    dut = FPDIV(width=64)
+    run_simulation(dut, testbench(dut), vcd_name="test_div64.vcd")
+
diff --git a/src/ieee754/fpmul/__init__.py b/src/ieee754/fpmul/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/ieee754/fpmul/fmul.py b/src/ieee754/fpmul/fmul.py
new file mode 100644
index 00000000..abe6f613
--- /dev/null
+++ b/src/ieee754/fpmul/fmul.py
@@ -0,0 +1,172 @@
+from nmigen import Module, Signal, Cat, Mux, Array, Const
+from nmigen.cli import main, verilog
+
+from fpbase import FPNumIn, FPNumOut, FPOp, Overflow, FPBase, FPState
+from fpcommon.getop import FPGetOp
+from nmutil.singlepipe import eq
+
+
+class FPMUL(FPBase):
+
+    def __init__(self, width):
+        FPBase.__init__(self)
+        self.width = width
+
+        self.in_a  = FPOp(width)
+        self.in_b  = FPOp(width)
+        self.out_z = FPOp(width)
+
+        self.states = []
+
+    def add_state(self, state):
+        self.states.append(state)
+        return state
+
+    def elaborate(self, platform=None):
+        """ creates the HDL code-fragment for FPMUL
+        """
+        m = Module()
+
+        # Latches
+        a = FPNumIn(None, self.width, False)
+        b = FPNumIn(None, self.width, False)
+        z = FPNumOut(self.width, False)
+
+        mw = (z.m_width)*2 - 1 + 3 # sticky/round/guard bits + (2*mant) - 1
+        product = Signal(mw)
+
+        of = Overflow()
+        m.submodules.of = of
+        m.submodules.a = a
+        m.submodules.b = b
+        m.submodules.z = z
+
+        m.d.comb += a.v.eq(self.in_a.v)
+        m.d.comb += b.v.eq(self.in_b.v)
+
+        with m.FSM() as fsm:
+
+            # ******
+            # gets operand a
+
+            with m.State("get_a"):
+                res = self.get_op(m, self.in_a, a, "get_b")
+                m.d.sync += eq([a, self.in_a.ack], res)
+
+            # ******
+            # gets operand b
+
+            with m.State("get_b"):
+                res = self.get_op(m, self.in_b, b, "special_cases")
+                m.d.sync += eq([b, self.in_b.ack], res)
+
+            # ******
+            # special cases
+
+            with m.State("special_cases"):
+                #if a or b is NaN return NaN
+                with m.If(a.is_nan | b.is_nan):
+                    m.next = "put_z"
+                    m.d.sync += z.nan(1)
+                #if a is inf return inf
+                with m.Elif(a.is_inf):
+                    m.next = "put_z"
+                    m.d.sync += z.inf(a.s ^ b.s)
+                    #if b is zero return NaN
+                    with m.If(b.is_zero):
+                        m.d.sync += z.nan(1)
+                #if b is inf return inf
+                with m.Elif(b.is_inf):
+                    m.next = "put_z"
+                    m.d.sync += z.inf(a.s ^ b.s)
+                    #if a is zero return NaN
+                    with m.If(a.is_zero):
+                        m.next = "put_z"
+                        m.d.sync += z.nan(1)
+                #if a is zero return zero
+                with m.Elif(a.is_zero):
+                    m.next = "put_z"
+                    m.d.sync += z.zero(a.s ^ b.s)
+                #if b is zero return zero
+                with m.Elif(b.is_zero):
+                    m.next = "put_z"
+                    m.d.sync += z.zero(a.s ^ b.s)
+                # Denormalised Number checks
+                with m.Else():
+                    m.next = "normalise_a"
+                    self.denormalise(m, a)
+                    self.denormalise(m, b)
+
+            # ******
+            # normalise_a
+
+            with m.State("normalise_a"):
+                self.op_normalise(m, a, "normalise_b")
+
+            # ******
+            # normalise_b
+
+            with m.State("normalise_b"):
+                self.op_normalise(m, b, "multiply_0")
+
+            #multiply_0
+            with m.State("multiply_0"):
+                m.next = "multiply_1"
+                m.d.sync += [
+                   z.s.eq(a.s ^ b.s),
+                   z.e.eq(a.e + b.e + 1),
+                   product.eq(a.m * b.m * 4)
+                ]
+
+            #multiply_1
+            with m.State("multiply_1"):
+                mw = z.m_width
+                m.next = "normalise_1"
+                m.d.sync += [
+                z.m.eq(product[mw+2:]),
+                of.guard.eq(product[mw+1]),
+                of.round_bit.eq(product[mw]),
+                of.sticky.eq(product[0:mw] != 0)
+            ]
+
+            # ******
+            # First stage of normalisation.
+            with m.State("normalise_1"):
+                self.normalise_1(m, z, of, "normalise_2")
+
+            # ******
+            # Second stage of normalisation.
+
+            with m.State("normalise_2"):
+                self.normalise_2(m, z, of, "round")
+
+            # ******
+            # rounding stage
+
+            with m.State("round"):
+                self.roundz(m, z, of.roundz)
+                m.next = "corrections"
+
+            # ******
+            # correction stage
+
+            with m.State("corrections"):
+                self.corrections(m, z, "pack")
+
+            # ******
+            # pack stage
+            with m.State("pack"):
+                self.pack(m, z, "put_z")
+
+            # ******
+            # put_z stage
+
+            with m.State("put_z"):
+                self.put_z(m, z, self.out_z, "get_a")
+
+        return m
+
+
+if __name__ == "__main__":
+    alu = FPMUL(width=32)
+    main(alu, ports=alu.in_a.ports() + alu.in_b.ports() + alu.out_z.ports())
diff --git a/src/ieee754/fpmul/test/test_mul.py b/src/ieee754/fpmul/test/test_mul.py
new file mode 100644
index 00000000..21d82528
--- /dev/null
+++ b/src/ieee754/fpmul/test/test_mul.py
@@ -0,0 +1,39 @@
+import sys
+from random import randint
+from random import seed
+from operator import mul
+
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+
+from fmul import FPMUL
+
+from unit_test_single import (get_mantissa, get_exponent, get_sign, is_nan,
+                                is_inf, is_pos_inf, is_neg_inf,
+                                match, get_case, check_case, run_test,
+                                run_edge_cases, run_corner_cases)
+
+
+def testbench(dut):
+    yield from check_case(dut, 0x40000000, 0x40000000, 0x40800000)
+    yield from check_case(dut, 0x41400000, 0x40A00000, 0x42700000)
+
+    count = 0
+
+    #regression tests
+    stimulus_a = [0xba57711a, 0xbf9b1e94, 0x34082401, 0x5e8ef81,
+                  0x5c75da81, 0x2b017]
+    stimulus_b = [0xee1818c5, 0xc038ed3a, 0xb328cd45, 0x114f3db,
+                  0x2f642a39, 0xff3807ab]
+    yield from run_test(dut, stimulus_a, stimulus_b, mul, get_case)
+    count += len(stimulus_a)
+    print (count, "vectors passed")
+
+    yield from run_corner_cases(dut, count, mul, get_case)
+    yield from run_edge_cases(dut, count, mul, get_case)
+
+
+if __name__ == '__main__':
+    dut = FPMUL(width=32)
+    run_simulation(dut, testbench(dut), vcd_name="test_mul.vcd")
+
diff --git a/src/ieee754/fpmul/test/test_mul64.py b/src/ieee754/fpmul/test/test_mul64.py
new file mode 100644
index 00000000..81c5b5a4
--- /dev/null
+++ b/src/ieee754/fpmul/test/test_mul64.py
@@ -0,0 +1,37 @@
+from nmigen import Module, Signal
+from nmigen.compat.sim import run_simulation
+from operator import mul
+
+from fmul import FPMUL
+
+import sys
+import atexit
+from random import randint
+from random import seed
+
+from unit_test_double import (get_mantissa, get_exponent, get_sign, is_nan,
+                                is_inf, is_pos_inf, is_neg_inf,
+                                match, get_case, check_case, run_test,
+                                run_edge_cases, run_corner_cases)
+
+
+def testbench(dut):
+    yield from check_case(dut, 0, 0, 0)
+
+    count = 0
+
+    #regression tests
+    stimulus_a = [0xff80000000000000, 0x3351099a0528e138]
+    stimulus_b = [0x7f80000000000000, 0xd651a9a9986af2b5]
+    yield from run_test(dut, stimulus_a, stimulus_b, mul)
+    count += len(stimulus_a)
+    print (count, "vectors passed")
+
+    yield from run_corner_cases(dut, count, mul)
+    yield from run_edge_cases(dut, count, mul)
+
+
+if __name__ == '__main__':
+    dut = FPMUL(width=64)
+    run_simulation(dut, testbench(dut), vcd_name="test_mul64.vcd")
+