add docstring Mul8_16_32_64 only for testing
[ieee754fpu.git] / src / ieee754 / part_mul_add / multiply.py
index 4c6b570ce4474008c4fc590110afb1719cb818cb..215d18c6a1aacce049dd74c6331437c2e74e5853 100644 (file)
@@ -8,6 +8,8 @@ from abc import ABCMeta, abstractmethod
 from nmigen.cli import main
 from functools import reduce
 from operator import or_
 from nmigen.cli import main
 from functools import reduce
 from operator import or_
+from ieee754.pipeline import PipelineSpec
+from nmutil.pipemodbase import PipeModBase
 
 
 class PartitionPoints(dict):
 
 
 class PartitionPoints(dict):
@@ -308,7 +310,7 @@ class AddReduceData:
     def __init__(self, part_pts, n_inputs, output_width, n_parts):
         self.part_ops = [Signal(2, name=f"part_ops_{i}", reset_less=True)
                           for i in range(n_parts)]
     def __init__(self, part_pts, n_inputs, output_width, n_parts):
         self.part_ops = [Signal(2, name=f"part_ops_{i}", reset_less=True)
                           for i in range(n_parts)]
-        self.terms = [Signal(output_width, name=f"inputs_{i}",
+        self.terms = [Signal(output_width, name=f"terms_{i}",
                               reset_less=True)
                         for i in range(n_inputs)]
         self.part_pts = part_pts.like()
                               reset_less=True)
                         for i in range(n_inputs)]
         self.part_pts = part_pts.like()
@@ -342,20 +344,22 @@ class FinalReduceData:
         return self.eq_from(rhs.part_pts, rhs.output, rhs.part_ops)
 
 
         return self.eq_from(rhs.part_pts, rhs.output, rhs.part_ops)
 
 
-class FinalAdd(Elaboratable):
+class FinalAdd(PipeModBase):
     """ Final stage of add reduce
     """
 
     """ Final stage of add reduce
     """
 
-    def __init__(self, n_inputs, output_width, n_parts, partition_points):
-        self.output_width = output_width
+    def __init__(self, pspec, lidx, n_inputs, partition_points,
+                       partition_step=1):
+        self.lidx = lidx
+        self.partition_step = partition_step
+        self.output_width = pspec.width * 2
         self.n_inputs = n_inputs
         self.n_inputs = n_inputs
-        self.n_parts = n_parts
+        self.n_parts = pspec.n_parts
         self.partition_points = PartitionPoints(partition_points)
         self.partition_points = PartitionPoints(partition_points)
-        if not self.partition_points.fits_in_width(output_width):
+        if not self.partition_points.fits_in_width(self.output_width):
             raise ValueError("partition_points doesn't fit in output_width")
 
             raise ValueError("partition_points doesn't fit in output_width")
 
-        self.i = self.ispec()
-        self.o = self.ospec()
+        super().__init__(pspec, "finaladd")
 
     def ispec(self):
         return AddReduceData(self.partition_points, self.n_inputs,
 
     def ispec(self):
         return AddReduceData(self.partition_points, self.n_inputs,
@@ -381,7 +385,7 @@ class FinalAdd(Elaboratable):
             # base case for adding 2 inputs
             assert self.n_inputs == 2
             adder = PartitionedAdder(output_width,
             # base case for adding 2 inputs
             assert self.n_inputs == 2
             adder = PartitionedAdder(output_width,
-                                     self.i.part_pts, 2)
+                                     self.i.part_pts, self.partition_step)
             m.submodules.final_adder = adder
             m.d.comb += adder.a.eq(self.i.terms[0])
             m.d.comb += adder.b.eq(self.i.terms[1])
             m.submodules.final_adder = adder
             m.d.comb += adder.a.eq(self.i.terms[0])
             m.d.comb += adder.b.eq(self.i.terms[1])
@@ -394,7 +398,7 @@ class FinalAdd(Elaboratable):
         return m
 
 
         return m
 
 
-class AddReduceSingle(Elaboratable):
+class AddReduceSingle(PipeModBase):
     """Add list of numbers together.
 
     :attribute inputs: input ``Signal``s to be summed. Modification not
     """Add list of numbers together.
 
     :attribute inputs: input ``Signal``s to be summed. Modification not
@@ -406,25 +410,27 @@ class AddReduceSingle(Elaboratable):
         supported, except for by ``Signal.eq``.
     """
 
         supported, except for by ``Signal.eq``.
     """
 
-    def __init__(self, n_inputs, output_width, n_parts, partition_points):
+    def __init__(self, pspec, lidx, n_inputs, partition_points,
+                       partition_step=1):
         """Create an ``AddReduce``.
 
         :param inputs: input ``Signal``s to be summed.
         :param output_width: bit-width of ``output``.
         :param partition_points: the input partition points.
         """
         """Create an ``AddReduce``.
 
         :param inputs: input ``Signal``s to be summed.
         :param output_width: bit-width of ``output``.
         :param partition_points: the input partition points.
         """
+        self.lidx = lidx
+        self.partition_step = partition_step
         self.n_inputs = n_inputs
         self.n_inputs = n_inputs
-        self.n_parts = n_parts
-        self.output_width = output_width
+        self.n_parts = pspec.n_parts
+        self.output_width = pspec.width * 2
         self.partition_points = PartitionPoints(partition_points)
         self.partition_points = PartitionPoints(partition_points)
-        if not self.partition_points.fits_in_width(output_width):
+        if not self.partition_points.fits_in_width(self.output_width):
             raise ValueError("partition_points doesn't fit in output_width")
 
         self.groups = AddReduceSingle.full_adder_groups(n_inputs)
         self.n_terms = AddReduceSingle.calc_n_inputs(n_inputs, self.groups)
 
             raise ValueError("partition_points doesn't fit in output_width")
 
         self.groups = AddReduceSingle.full_adder_groups(n_inputs)
         self.n_terms = AddReduceSingle.calc_n_inputs(n_inputs, self.groups)
 
-        self.i = self.ispec()
-        self.o = self.ospec()
+        super().__init__(pspec, "addreduce_%d" % lidx)
 
     def ispec(self):
         return AddReduceData(self.partition_points, self.n_inputs,
 
     def ispec(self):
         return AddReduceData(self.partition_points, self.n_inputs,
@@ -516,7 +522,8 @@ class AddReduceSingle(Elaboratable):
         part_mask = Signal(self.output_width, reset_less=True)
 
         # get partition points as a mask
         part_mask = Signal(self.output_width, reset_less=True)
 
         # get partition points as a mask
-        mask = self.i.part_pts.as_mask(self.output_width, mul=2)
+        mask = self.i.part_pts.as_mask(self.output_width,
+                                       mul=self.partition_step)
         m.d.comb += part_mask.eq(mask)
 
         # add and link the intermediate term modules
         m.d.comb += part_mask.eq(mask)
 
         # add and link the intermediate term modules
@@ -532,7 +539,7 @@ class AddReduceSingle(Elaboratable):
 
 
 class AddReduceInternal:
 
 
 class AddReduceInternal:
-    """Recursively Add list of numbers together.
+    """Iteratively Add list of numbers together.
 
     :attribute inputs: input ``Signal``s to be summed. Modification not
         supported, except for by ``Signal.eq``.
 
     :attribute inputs: input ``Signal``s to be summed. Modification not
         supported, except for by ``Signal.eq``.
@@ -543,18 +550,18 @@ class AddReduceInternal:
         supported, except for by ``Signal.eq``.
     """
 
         supported, except for by ``Signal.eq``.
     """
 
-    def __init__(self, i, output_width):
+    def __init__(self, pspec, n_inputs, part_pts, partition_step=1):
         """Create an ``AddReduce``.
 
         :param inputs: input ``Signal``s to be summed.
         :param output_width: bit-width of ``output``.
         :param partition_points: the input partition points.
         """
         """Create an ``AddReduce``.
 
         :param inputs: input ``Signal``s to be summed.
         :param output_width: bit-width of ``output``.
         :param partition_points: the input partition points.
         """
-        self.i = i
-        self.inputs = i.terms
-        self.part_ops = i.part_ops
-        self.output_width = output_width
-        self.partition_points = i.part_pts
+        self.pspec = pspec
+        self.n_inputs = n_inputs
+        self.output_width = pspec.width * 2
+        self.partition_points = part_pts
+        self.partition_step = partition_step
 
         self.create_levels()
 
 
         self.create_levels()
 
@@ -563,24 +570,22 @@ class AddReduceInternal:
 
         mods = []
         partition_points = self.partition_points
 
         mods = []
         partition_points = self.partition_points
-        part_ops = self.part_ops
-        n_parts = len(part_ops)
-        inputs = self.inputs
-        ilen = len(inputs)
+        ilen = self.n_inputs
         while True:
         while True:
-            groups = AddReduceSingle.full_adder_groups(len(inputs))
+            groups = AddReduceSingle.full_adder_groups(ilen)
             if len(groups) == 0:
                 break
             if len(groups) == 0:
                 break
-            next_level = AddReduceSingle(ilen, self.output_width, n_parts,
-                                         partition_points)
+            lidx = len(mods)
+            next_level = AddReduceSingle(self.pspec, lidx, ilen,
+                                         partition_points,
+                                         self.partition_step)
             mods.append(next_level)
             partition_points = next_level.i.part_pts
             mods.append(next_level)
             partition_points = next_level.i.part_pts
-            inputs = next_level.o.terms
-            ilen = len(inputs)
-            part_ops = next_level.i.part_ops
+            ilen = len(next_level.o.terms)
 
 
-        next_level = FinalAdd(ilen, self.output_width, n_parts,
-                              partition_points)
+        lidx = len(mods)
+        next_level = FinalAdd(self.pspec, lidx, ilen,
+                              partition_points, self.partition_step)
         mods.append(next_level)
 
         self.levels = mods
         mods.append(next_level)
 
         self.levels = mods
@@ -599,7 +604,7 @@ class AddReduce(AddReduceInternal, Elaboratable):
     """
 
     def __init__(self, inputs, output_width, register_levels, part_pts,
     """
 
     def __init__(self, inputs, output_width, register_levels, part_pts,
-                       part_ops):
+                       part_ops, partition_step=1):
         """Create an ``AddReduce``.
 
         :param inputs: input ``Signal``s to be summed.
         """Create an ``AddReduce``.
 
         :param inputs: input ``Signal``s to be summed.
@@ -614,7 +619,8 @@ class AddReduce(AddReduceInternal, Elaboratable):
         n_parts = len(part_ops)
         self.i = AddReduceData(part_pts, len(inputs),
                              output_width, n_parts)
         n_parts = len(part_ops)
         self.i = AddReduceData(part_pts, len(inputs),
                              output_width, n_parts)
-        AddReduceInternal.__init__(self, self.i, output_width)
+        AddReduceInternal.__init__(self, pspec, n_inputs, part_pts,
+                                   partition_step)
         self.o = FinalReduceData(part_pts, output_width, n_parts)
         self.register_levels = register_levels
 
         self.o = FinalReduceData(part_pts, output_width, n_parts)
         self.register_levels = register_levels
 
@@ -864,7 +870,7 @@ class Part(Elaboratable):
         the extra terms - as separate terms - are then thrown at the
         AddReduce alongside the multiplication part-results.
     """
         the extra terms - as separate terms - are then thrown at the
         AddReduce alongside the multiplication part-results.
     """
-    def __init__(self, part_pts, width, n_parts, n_levels, pbwid):
+    def __init__(self, part_pts, width, n_parts, pbwid):
 
         self.pbwid = pbwid
         self.part_pts = part_pts
 
         self.pbwid = pbwid
         self.part_pts = part_pts
@@ -966,21 +972,21 @@ class IntermediateOut(Elaboratable):
         return m
 
 
         return m
 
 
-class FinalOut(Elaboratable):
+class FinalOut(PipeModBase):
     """ selects the final output based on the partitioning.
 
         each byte is selectable independently, i.e. it is possible
         that some partitions requested 8-bit computation whilst others
         requested 16 or 32 bit.
     """
     """ selects the final output based on the partitioning.
 
         each byte is selectable independently, i.e. it is possible
         that some partitions requested 8-bit computation whilst others
         requested 16 or 32 bit.
     """
-    def __init__(self, output_width, n_parts, part_pts):
+    def __init__(self, pspec, part_pts):
+
         self.part_pts = part_pts
         self.part_pts = part_pts
-        self.output_width = output_width
-        self.n_parts = n_parts
-        self.out_wid = output_width//2
+        self.output_width = pspec.width * 2
+        self.n_parts = pspec.n_parts
+        self.out_wid = pspec.width
 
 
-        self.i = self.ispec()
-        self.o = self.ospec()
+        super().__init__(pspec, "finalout")
 
     def ispec(self):
         return IntermediateData(self.part_pts, self.output_width, self.n_parts)
 
     def ispec(self):
         return IntermediateData(self.part_pts, self.output_width, self.n_parts)
@@ -1147,26 +1153,17 @@ class OutputData:
                 self.output.eq(rhs.output)]
 
 
                 self.output.eq(rhs.output)]
 
 
-class AllTerms(Elaboratable):
+class AllTerms(PipeModBase):
     """Set of terms to be added together
     """
 
     """Set of terms to be added together
     """
 
-    def __init__(self, n_inputs, output_width, n_parts, register_levels):
-        """Create an ``AddReduce``.
-
-        :param inputs: input ``Signal``s to be summed.
-        :param output_width: bit-width of ``output``.
-        :param register_levels: List of nesting levels that should have
-            pipeline registers.
-        :param partition_points: the input partition points.
+    def __init__(self, pspec, n_inputs):
+        """Create an ``AllTerms``.
         """
         """
-        self.register_levels = register_levels
         self.n_inputs = n_inputs
         self.n_inputs = n_inputs
-        self.n_parts = n_parts
-        self.output_width = output_width
-
-        self.i = self.ispec()
-        self.o = self.ospec()
+        self.n_parts = pspec.n_parts
+        self.output_width = pspec.width * 2
+        super().__init__(pspec, "allterms")
 
     def ispec(self):
         return InputData()
 
     def ispec(self):
         return InputData()
@@ -1197,11 +1194,10 @@ class AllTerms(Elaboratable):
             setattr(m.submodules, "signs%d" % i, s)
             m.d.comb += s.part_ops.eq(self.i.part_ops[i])
 
             setattr(m.submodules, "signs%d" % i, s)
             m.d.comb += s.part_ops.eq(self.i.part_ops[i])
 
-        n_levels = len(self.register_levels)+1
-        m.submodules.part_8 = part_8 = Part(eps, 128, 8, n_levels, 8)
-        m.submodules.part_16 = part_16 = Part(eps, 128, 4, n_levels, 8)
-        m.submodules.part_32 = part_32 = Part(eps, 128, 2, n_levels, 8)
-        m.submodules.part_64 = part_64 = Part(eps, 128, 1, n_levels, 8)
+        m.submodules.part_8 = part_8 = Part(eps, 128, 8, 8)
+        m.submodules.part_16 = part_16 = Part(eps, 128, 4, 8)
+        m.submodules.part_32 = part_32 = Part(eps, 128, 2, 8)
+        m.submodules.part_64 = part_64 = Part(eps, 128, 1, 8)
         nat_l, nbt_l, nla_l, nlb_l = [], [], [], []
         for mod in [part_8, part_16, part_32, part_64]:
             m.d.comb += mod.a.eq(self.i.a)
         nat_l, nbt_l, nla_l, nlb_l = [], [], [], []
         for mod in [part_8, part_16, part_32, part_64]:
             m.d.comb += mod.a.eq(self.i.a)
@@ -1254,17 +1250,16 @@ class AllTerms(Elaboratable):
         return m
 
 
         return m
 
 
-class Intermediates(Elaboratable):
+class Intermediates(PipeModBase):
     """ Intermediate output modules
     """
 
     """ Intermediate output modules
     """
 
-    def __init__(self, output_width, n_parts, part_pts):
+    def __init__(self, pspec, part_pts):
         self.part_pts = part_pts
         self.part_pts = part_pts
-        self.output_width = output_width
-        self.n_parts = n_parts
+        self.output_width = pspec.width * 2
+        self.n_parts = pspec.n_parts
 
 
-        self.i = self.ispec()
-        self.o = self.ospec()
+        super().__init__(pspec, "intermediates")
 
     def ispec(self):
         return FinalReduceData(self.part_pts, self.output_width, self.n_parts)
 
     def ispec(self):
         return FinalReduceData(self.part_pts, self.output_width, self.n_parts)
@@ -1317,6 +1312,8 @@ class Intermediates(Elaboratable):
 class Mul8_16_32_64(Elaboratable):
     """Signed/Unsigned 8/16/32/64-bit partitioned integer multiplier.
 
 class Mul8_16_32_64(Elaboratable):
     """Signed/Unsigned 8/16/32/64-bit partitioned integer multiplier.
 
+    XXX NOTE: this class is intended for unit test purposes ONLY.
+
     Supports partitioning into any combination of 8, 16, 32, and 64-bit
     partitions on naturally-aligned boundaries. Supports the operation being
     set for each partition independently.
     Supports partitioning into any combination of 8, 16, 32, and 64-bit
     partitions on naturally-aligned boundaries. Supports the operation being
     set for each partition independently.
@@ -1347,6 +1344,11 @@ class Mul8_16_32_64(Elaboratable):
             flip-flops are to be inserted.
         """
 
             flip-flops are to be inserted.
         """
 
+        self.id_wid = 0 # num_bits(num_rows)
+        self.op_wid = 0
+        self.pspec = PipelineSpec(64, self.id_wid, self.op_wid, n_ops=3)
+        self.pspec.n_parts = 8
+
         # parameter(s)
         self.register_levels = list(register_levels)
 
         # parameter(s)
         self.register_levels = list(register_levels)
 
@@ -1375,29 +1377,32 @@ class Mul8_16_32_64(Elaboratable):
         part_pts = self.part_pts
 
         n_inputs = 64 + 4
         part_pts = self.part_pts
 
         n_inputs = 64 + 4
-        n_parts = 8
-        t = AllTerms(n_inputs, 128, n_parts, self.register_levels)
-        m.submodules.allterms = t
-        m.d.comb += t.i.eq(self.i)
+        t = AllTerms(self.pspec, n_inputs)
+        t.setup(m, self.i)
 
         terms = t.o.terms
 
 
         terms = t.o.terms
 
-        add_reduce = AddReduce(terms,
-                               128,
-                               self.register_levels,
-                               t.o.part_pts,
-                               t.o.part_ops)
+        at = AddReduceInternal(self.pspec, n_inputs, part_pts, partition_step=2)
 
 
-        m.submodules.add_reduce = add_reduce
+        i = t.o
+        for idx in range(len(at.levels)):
+            mcur = at.levels[idx]
+            mcur.setup(m, i)
+            o = mcur.ospec()
+            if idx in self.register_levels:
+                m.d.sync += o.eq(mcur.process(i))
+            else:
+                m.d.comb += o.eq(mcur.process(i))
+            i = o # for next loop
 
 
-        interm = Intermediates(128, 8, part_pts)
-        m.submodules.intermediates = interm
-        m.d.comb += interm.i.eq(add_reduce.o)
+        interm = Intermediates(self.pspec, part_pts)
+        interm.setup(m, i)
+        o = interm.process(interm.i)
 
         # final output
 
         # final output
-        m.submodules.finalout = finalout = FinalOut(128, 8, part_pts)
-        m.d.comb += finalout.i.eq(interm.o)
-        m.d.comb += self.o.eq(finalout.o)
+        finalout = FinalOut(self.pspec, part_pts)
+        finalout.setup(m, o)
+        m.d.comb += self.o.eq(finalout.process(o))
 
         return m
 
 
         return m