remove use of AddReduce, use AddReduceInternal instead
[ieee754fpu.git] / src / ieee754 / part_mul_add / multiply.py
index b5706014a3b404fbfa72978a46910bfe337d16a9..2c828c187f2747bde3285df64599636719e3be72 100644 (file)
@@ -408,13 +408,15 @@ class AddReduceSingle(Elaboratable):
         supported, except for by ``Signal.eq``.
     """
 
-    def __init__(self, n_inputs, output_width, n_parts, partition_points):
+    def __init__(self, n_inputs, output_width, n_parts, partition_points,
+                       partition_step=1):
         """Create an ``AddReduce``.
 
         :param inputs: input ``Signal``s to be summed.
         :param output_width: bit-width of ``output``.
         :param partition_points: the input partition points.
         """
+        self.partition_step = partition_step
         self.n_inputs = n_inputs
         self.n_parts = n_parts
         self.output_width = output_width
@@ -518,7 +520,8 @@ class AddReduceSingle(Elaboratable):
         part_mask = Signal(self.output_width, reset_less=True)
 
         # get partition points as a mask
-        mask = self.i.part_pts.as_mask(self.output_width, mul=2)
+        mask = self.i.part_pts.as_mask(self.output_width,
+                                       mul=self.partition_step)
         m.d.comb += part_mask.eq(mask)
 
         # add and link the intermediate term modules
@@ -575,7 +578,8 @@ class AddReduceInternal:
             if len(groups) == 0:
                 break
             next_level = AddReduceSingle(ilen, self.output_width, n_parts,
-                                         partition_points)
+                                         partition_points,
+                                         self.partition_step)
             mods.append(next_level)
             partition_points = next_level.i.part_pts
             inputs = next_level.o.terms
@@ -1385,18 +1389,21 @@ class Mul8_16_32_64(Elaboratable):
 
         terms = t.o.terms
 
-        add_reduce = AddReduce(terms,
-                               128,
-                               self.register_levels,
-                               t.o.part_pts,
-                               t.o.part_ops,
-                               partition_step=2)
+        at = AddReduceInternal(t.o, 128, partition_step=2)
 
-        m.submodules.add_reduce = add_reduce
+        i = at.i
+        for idx in range(len(at.levels)):
+            mcur = at.levels[idx]
+            setattr(m.submodules, "addreduce_%d" % idx, mcur)
+            if idx in self.register_levels:
+                m.d.sync += mcur.i.eq(i)
+            else:
+                m.d.comb += mcur.i.eq(i)
+            i = mcur.o # for next loop
 
         interm = Intermediates(128, 8, part_pts)
         m.submodules.intermediates = interm
-        m.d.comb += interm.i.eq(add_reduce.o)
+        m.d.comb += interm.i.eq(i)
 
         # final output
         m.submodules.finalout = finalout = FinalOut(128, 8, part_pts)