run tests in parallel

[ieee754fpu.git] / src / ieee754 / part_mul_add / adder.py
diff --git a/src/ieee754/part_mul_add/adder.py b/src/ieee754/part_mul_add/adder.py

index 2e504368c1c664a3b51b553338ca4231d0431d3b..e1849b4d25fc5ec4fc4473cc02b32f49dd5912b2 100644 (file)
--- a/src/ieee754/part_mul_add/adder.py
+++ b/src/ieee754/part_mul_add/adder.py
@@ -6,16 +6,10 @@ See:
  * https://libre-riscv.org/3d_gpu/architecture/dynamic_simd/add/
  """
  
-from nmigen import Signal, Module, Value, Elaboratable, Cat, C, Mux, Repl
-from nmigen.hdl.ast import Assign
-from abc import ABCMeta, abstractmethod
-from nmigen.cli import main
-from functools import reduce
-from operator import or_
-from ieee754.pipeline import PipelineSpec
-from nmutil.pipemodbase import PipeModBase
+from nmigen import Signal, Module, Elaboratable, Cat
  
  from ieee754.part_mul_add.partpoints import PartitionPoints
+from ieee754.part_cmp.ripple import MoveMSBDown
  
  
  class FullAdder(Elaboratable):
@@ -119,6 +113,7 @@ class PartitionedAdder(Elaboratable):
      to the next bit.  Then the final output *removes* the extra bits from
      the result.
  
+    In the case of no carry:
      partition: .... P... P... P... P... (32 bits)
      a        : .... .... .... .... .... (32 bits)
      b        : .... .... .... .... .... (32 bits)
@@ -127,17 +122,39 @@ class PartitionedAdder(Elaboratable):
      exp-o    : ....xN...xN...xN...xN... (32+4 bits - x to be discarded)
      o        : .... N... N... N... N... (32 bits - x ignored, N is carry-over)
  
+    However, with carry the behavior is a little different:
      partition:      p    p    p    p      (4 bits)
-    carry-in :           c    c    c    c (4 bits)
-    C = c & P:           C    C    C    c (4 bits)
-    I = P=>c :           I    I    I    I (4 bits)
+    carry-in :      c    c    c    c    c (5 bits)
+    C = c & P:      C    C    C    C    c (5 bits)
+    I = P=>c :      I    I    I    I    c (5 bits)
      a        :  AAAA AAAA AAAA AAAA AAAA  (32 bits)
      b        :  BBBB BBBB BBBB BBBB BBBB  (32 bits)
-    exp-a    : 0AAAApAAAACAAAACAAAACAAAAc (32+4 bits, P=1 if no partition)
-    exp-b    : 0BBBB0BBBBIBBBBIBBBBIBBBBI (32 bits plus 4 zeros)
-    exp-o    : o....oN...oN...oN...oN...x (32+4 bits - x to be discarded)
+    exp-a    : 0AAAACAAAACAAAACAAAACAAAAc (32+4+2 bits, P=1 if no partition)
+    exp-b    : 0BBBBIBBBBIBBBBIBBBBIBBBBc (32+2 bits plus 4 zeros)
+    exp-o    : o....oN...oN...oN...oN...x (32+4+2 bits - x to be discarded)
      o        :  .... N... N... N... N... (32 bits - x ignored, N is carry-over)
-    carry-out:      o    o    o    o      (4 bits)
+    carry-out: o    o    o    o    o      (5 bits)
+
+    A couple of differences should be noted:
+     - The expanded a/b/o have 2 extra bits added to them. These bits
+       allow the carry-in for the least significant partition to be
+       injected, and the carry out for the most significant partition
+       to be extracted.
+     - The partition bits P and 0 in the first example have been
+       replaced with bits C and I. Bits C and I are set to 1 when
+       there is a partition and a carry-in at that position. This has
+       the effect of creating a carry at that position in the expanded
+       adder, while preventing carries from the previous partition
+       from propogating through to the next. These bits are also used
+       to extract the carry-out information for each partition, as
+       when there is a carry out in a partition, the next most
+       significant partition bit will be set to 1
+
+    Additionally, the carry-out bits must be rearranged before being
+    output to move the most significant carry bit for each partition
+    into the least significant bit for that partition, as well as to
+    ignore the other carry bits in that partition. This is
+    accomplished by the MoveMSBDown module
  
      :attribute width: the bit width of the input and output. Read-only.
      :attribute a: the first input to the adder
@@ -176,13 +193,17 @@ class PartitionedAdder(Elaboratable):
          """Elaborate this module."""
          m = Module()
          comb = m.d.comb
+
+        carry_tmp = Signal(self.carry_out.width)
+        m.submodules.ripple = ripple = MoveMSBDown(self.carry_out.width)
+
          expanded_a = Signal(self._expanded_width, reset_less=True)
          expanded_b = Signal(self._expanded_width, reset_less=True)
          expanded_o = Signal(self._expanded_width, reset_less=True)
  
          expanded_index = 0
          # store bits in a list, use Cat later.  graphviz is much cleaner
-        al, bl, ol, cl, ea, eb, eo, co = [],[],[],[],[],[],[],[]
+        al, bl, ol, cl, ea, eb, eo, co = [], [], [], [], [], [], [], []
  
          # partition points are "breaks" (extra zeros or 1s) in what would
          # otherwise be a massive long add.  when the "break" points are 0,
@@ -201,20 +222,22 @@ class PartitionedAdder(Elaboratable):
          expanded_index += 1
  
          for i in range(self.width):
-            pi = i/self.pmul # double the range of the partition point test
+            pi = i/self.pmul  # double the range of the partition point test
              if pi.is_integer() and pi in self.part_pts:
-                # add extra bit set to 0 + 0 for enabled partition points
+                # add extra bit set to carry + carry for enabled
+                # partition points
                  a_bit = Signal(name="a_bit_%d" % i, reset_less=True)
-                carry_in = self.carry_in[carry_bit] # convenience
+                carry_in = self.carry_in[carry_bit]  # convenience
                  m.d.comb += a_bit.eq(self.part_pts[pi].implies(carry_in))
+
                  # and 1 + 0 for disabled partition points
                  ea.append(expanded_a[expanded_index])
-                al.append(a_bit) # add extra bit in a
+                al.append(a_bit)  # add extra bit in a
                  eb.append(expanded_b[expanded_index])
-                bl.append(carry_in & self.part_pts[pi]) # yes, add a zero
+                bl.append(carry_in & self.part_pts[pi])  # carry bit
                  co.append(expanded_o[expanded_index])
-                cl.append(self.carry_out[carry_bit-1])
-                expanded_index += 1 # skip the extra point.  NOT in the output
+                cl.append(carry_tmp[carry_bit-1])
+                expanded_index += 1  # skip the extra point.  NOT in the output
                  carry_bit += 1
              ea.append(expanded_a[expanded_index])
              eb.append(expanded_b[expanded_index])
@@ -225,8 +248,8 @@ class PartitionedAdder(Elaboratable):
              expanded_index += 1
          al.append(0)
          bl.append(0)
-        co.append(expanded_o[expanded_index])
-        cl.append(self.carry_out[carry_bit-1])
+        co.append(expanded_o[-1])
+        cl.append(carry_tmp[carry_bit-1])
  
          # combine above using Cat
          comb += Cat(*ea).eq(Cat(*al))
@@ -238,6 +261,13 @@ class PartitionedAdder(Elaboratable):
          # special hardware on FPGAs
          comb += expanded_o.eq(expanded_a + expanded_b)
  
-        return m
-
+        # ok now we have the carry-out, however because it's the MSB it's
+        # in the wrong position in the output as far as putting it into
+        # a chain of adds (or other operations).  therefore we need to
+        # "ripple" carry-out down to the same position that carry-in is
+        # in [the LSB of each partition].
+        comb += ripple.results_in.eq(carry_tmp)
+        comb += ripple.gates.eq(self.part_pts.as_sig())
+        m.d.sync += self.carry_out.eq(ripple.output)
  
+        return m