src/ieee754/part_mul_add/adder.py

   1 # SPDX-License-Identifier: LGPL-2.1-or-later
   2 # See Notices.txt for copyright information
   3 """Partitioned Integer Addition.
   4
   5 See:
   6 * https://libre-riscv.org/3d_gpu/architecture/dynamic_simd/add/
   7 """
   8
   9 from nmigen import Signal, Module, Value, Elaboratable, Cat, C, Mux, Repl
  10 from nmigen.hdl.ast import Assign
  11 from abc import ABCMeta, abstractmethod
  12 from nmigen.cli import main
  13 from functools import reduce
  14 from operator import or_
  15 from ieee754.pipeline import PipelineSpec
  16 from nmutil.pipemodbase import PipeModBase
  17
  18 from ieee754.part_mul_add.partpoints import PartitionPoints
  19
  20
  21 class FullAdder(Elaboratable):
  22     """Full Adder.
  23
  24     :attribute in0: the first input
  25     :attribute in1: the second input
  26     :attribute in2: the third input
  27     :attribute sum: the sum output
  28     :attribute carry: the carry output
  29
  30     Rather than do individual full adders (and have an array of them,
  31     which would be very slow to simulate), this module can specify the
  32     bit width of the inputs and outputs: in effect it performs multiple
  33     Full 3-2 Add operations "in parallel".
  34     """
  35
  36     def __init__(self, width):
  37         """Create a ``FullAdder``.
  38
  39         :param width: the bit width of the input and output
  40         """
  41         self.in0 = Signal(width, reset_less=True)
  42         self.in1 = Signal(width, reset_less=True)
  43         self.in2 = Signal(width, reset_less=True)
  44         self.sum = Signal(width, reset_less=True)
  45         self.carry = Signal(width, reset_less=True)
  46
  47     def elaborate(self, platform):
  48         """Elaborate this module."""
  49         m = Module()
  50         comb = m.d.comb
  51         comb += self.sum.eq(self.in0 ^ self.in1 ^ self.in2)
  52         comb += self.carry.eq((self.in0 & self.in1)
  53                               | (self.in1 & self.in2)
  54                               | (self.in2 & self.in0))
  55         return m
  56
  57
  58 class MaskedFullAdder(Elaboratable):
  59     """Masked Full Adder.
  60
  61     :attribute mask: the carry partition mask
  62     :attribute in0: the first input
  63     :attribute in1: the second input
  64     :attribute in2: the third input
  65     :attribute sum: the sum output
  66     :attribute mcarry: the masked carry output
  67
  68     FullAdders are always used with a "mask" on the output.  To keep
  69     the graphviz "clean", this class performs the masking here rather
  70     than inside a large for-loop.
  71
  72     See the following discussion as to why this is no longer derived
  73     from FullAdder.  Each carry is shifted here *before* being ANDed
  74     with the mask, so that an AOI cell may be used (which is more
  75     gate-efficient)
  76     https://en.wikipedia.org/wiki/AND-OR-Invert
  77     https://groups.google.com/d/msg/comp.arch/fcq-GLQqvas/vTxmcA0QAgAJ
  78     """
  79
  80     def __init__(self, width):
  81         """Create a ``MaskedFullAdder``.
  82
  83         :param width: the bit width of the input and output
  84         """
  85         self.width = width
  86         self.mask = Signal(width, reset_less=True)
  87         self.mcarry = Signal(width, reset_less=True)
  88         self.in0 = Signal(width, reset_less=True)
  89         self.in1 = Signal(width, reset_less=True)
  90         self.in2 = Signal(width, reset_less=True)
  91         self.sum = Signal(width, reset_less=True)
  92
  93     def elaborate(self, platform):
  94         """Elaborate this module."""
  95         m = Module()
  96         comb = m.d.comb
  97         s1 = Signal(self.width, reset_less=True)
  98         s2 = Signal(self.width, reset_less=True)
  99         s3 = Signal(self.width, reset_less=True)
 100         c1 = Signal(self.width, reset_less=True)
 101         c2 = Signal(self.width, reset_less=True)
 102         c3 = Signal(self.width, reset_less=True)
 103         comb += self.sum.eq(self.in0 ^ self.in1 ^ self.in2)
 104         comb += s1.eq(Cat(0, self.in0))
 105         comb += s2.eq(Cat(0, self.in1))
 106         comb += s3.eq(Cat(0, self.in2))
 107         comb += c1.eq(s1 & s2 & self.mask)
 108         comb += c2.eq(s2 & s3 & self.mask)
 109         comb += c3.eq(s3 & s1 & self.mask)
 110         comb += self.mcarry.eq(c1 | c2 | c3)
 111         return m
 112
 113
 114 class PartitionedAdder(Elaboratable):
 115     """Partitioned Adder.
 116
 117     Performs the final add.  The partition points are included in the
 118     actual add (in one of the operands only), which causes a carry over
 119     to the next bit.  Then the final output *removes* the extra bits from
 120     the result.
 121
 122     partition: .... P... P... P... P... (32 bits)
 123     a        : .... .... .... .... .... (32 bits)
 124     b        : .... .... .... .... .... (32 bits)
 125     exp-a    : ....P....P....P....P.... (32+4 bits, P=1 if no partition)
 126     exp-b    : ....0....0....0....0.... (32 bits plus 4 zeros)
 127     exp-o    : ....xN...xN...xN...xN... (32+4 bits - x to be discarded)
 128     o        : .... N... N... N... N... (32 bits - x ignored, N is carry-over)
 129
 130     partition:      p    p    p    p      (4 bits)
 131     carry-in :           c    c    c    c (4 bits)
 132     C = c & P:           C    C    C    c (4 bits)
 133     I = P=>c :           I    I    I    I (4 bits)
 134     a        :  AAAA AAAA AAAA AAAA AAAA  (32 bits)
 135     b        :  BBBB BBBB BBBB BBBB BBBB  (32 bits)
 136     exp-a    : 0AAAApAAAACAAAACAAAACAAAAc (32+4 bits, P=1 if no partition)
 137     exp-b    : 0BBBB0BBBBIBBBBIBBBBIBBBBI (32 bits plus 4 zeros)
 138     exp-o    : o....oN...oN...oN...oN...x (32+4 bits - x to be discarded)
 139     o        :  .... N... N... N... N... (32 bits - x ignored, N is carry-over)
 140     carry-out:      o    o    o    o      (4 bits)
 141
 142     :attribute width: the bit width of the input and output. Read-only.
 143     :attribute a: the first input to the adder
 144     :attribute b: the second input to the adder
 145     :attribute output: the sum output
 146     :attribute part_pts: the input partition points. Modification not
 147         supported, except for by ``Signal.eq``.
 148     """
 149
 150     def __init__(self, width, part_pts, partition_step=1):
 151         """Create a ``PartitionedAdder``.
 152
 153         :param width: the bit width of the input and output
 154         :param part_pts: the input partition points
 155         :param partition_step: a multiplier (typically double) step
 156                                which in-place "expands" the partition points
 157         """
 158         self.width = width
 159         self.pmul = partition_step
 160         self.part_pts = PartitionPoints(part_pts)
 161         self.a = Signal(width, reset_less=True)
 162         self.b = Signal(width, reset_less=True)
 163         self.carry_in = Signal(self.part_pts.get_max_partition_count(width))
 164         self.carry_out = Signal(self.part_pts.get_max_partition_count(width))
 165         self.output = Signal(width, reset_less=True)
 166         if not self.part_pts.fits_in_width(width):
 167             raise ValueError("partition_points doesn't fit in width")
 168         expanded_width = 2
 169         for i in range(self.width):
 170             if i in self.part_pts:
 171                 expanded_width += 1
 172             expanded_width += 1
 173         self._expanded_width = expanded_width
 174
 175     def elaborate(self, platform):
 176         """Elaborate this module."""
 177         m = Module()
 178         comb = m.d.comb
 179         expanded_a = Signal(self._expanded_width, reset_less=True)
 180         expanded_b = Signal(self._expanded_width, reset_less=True)
 181         expanded_o = Signal(self._expanded_width, reset_less=True)
 182
 183         expanded_index = 0
 184         # store bits in a list, use Cat later.  graphviz is much cleaner
 185         al, bl, ol, cl, ea, eb, eo, co = [],[],[],[],[],[],[],[]
 186
 187         # partition points are "breaks" (extra zeros or 1s) in what would
 188         # otherwise be a massive long add.  when the "break" points are 0,
 189         # whatever is in it (in the output) is discarded.  however when
 190         # there is a "1", it causes a roll-over carry to the *next* bit.
 191         # we still ignore the "break" bit in the [intermediate] output,
 192         # however by that time we've got the effect that we wanted: the
 193         # carry has been carried *over* the break point.
 194
 195         carry_bit = 0
 196         al.append(self.carry_in[carry_bit])
 197         bl.append(self.carry_in[carry_bit])
 198         ea.append(expanded_a[expanded_index])
 199         eb.append(expanded_b[expanded_index])
 200         carry_bit += 1
 201         expanded_index += 1
 202
 203         for i in range(self.width):
 204             pi = i/self.pmul # double the range of the partition point test
 205             if pi.is_integer() and pi in self.part_pts:
 206                 # add extra bit set to 0 + 0 for enabled partition points
 207                 a_bit = Signal()
 208                 carry_in = self.carry_in[carry_bit] # convenience
 209                 m.d.comb += a_bit.eq(self.part_pts[pi].implies(carry_in))
 210                 # and 1 + 0 for disabled partition points
 211                 ea.append(expanded_a[expanded_index])
 212                 al.append(a_bit) # add extra bit in a
 213                 eb.append(expanded_b[expanded_index])
 214                 bl.append(carry_in & self.part_pts[pi]) # yes, add a zero
 215                 co.append(expanded_o[expanded_index])
 216                 cl.append(self.carry_out[carry_bit-1])
 217                 expanded_index += 1 # skip the extra point.  NOT in the output
 218                 carry_bit += 1
 219             ea.append(expanded_a[expanded_index])
 220             eb.append(expanded_b[expanded_index])
 221             eo.append(expanded_o[expanded_index])
 222             al.append(self.a[i])
 223             bl.append(self.b[i])
 224             ol.append(self.output[i])
 225             expanded_index += 1
 226         al.append(0)
 227         bl.append(0)
 228         co.append(expanded_o[expanded_index])
 229         cl.append(self.carry_out[carry_bit-1])
 230
 231         # combine above using Cat
 232         comb += Cat(*ea).eq(Cat(*al))
 233         comb += Cat(*eb).eq(Cat(*bl))
 234         comb += Cat(*ol).eq(Cat(*eo))
 235         comb += Cat(*cl).eq(Cat(*co))
 236
 237         # use only one addition to take advantage of look-ahead carry and
 238         # special hardware on FPGAs
 239         comb += expanded_o.eq(expanded_a + expanded_b)
 240
 241         return m
 242
 243