src/ieee754/part_mul_add/adder.py

   1 # SPDX-License-Identifier: LGPL-2.1-or-later
   2 # See Notices.txt for copyright information
   3 """Partitioned Integer Addition.
   4
   5 See:
   6 * https://libre-riscv.org/3d_gpu/architecture/dynamic_simd/add/
   7 """
   8
   9 from nmigen import Signal, Module, Value, Elaboratable, Cat, C, Mux, Repl
  10 from nmigen.hdl.ast import Assign
  11 from abc import ABCMeta, abstractmethod
  12 from nmigen.cli import main
  13 from functools import reduce
  14 from operator import or_
  15 from ieee754.pipeline import PipelineSpec
  16 from nmutil.pipemodbase import PipeModBase
  17
  18 from ieee754.part_mul_add.partpoints import PartitionPoints
  19
  20
  21 class FullAdder(Elaboratable):
  22     """Full Adder.
  23
  24     :attribute in0: the first input
  25     :attribute in1: the second input
  26     :attribute in2: the third input
  27     :attribute sum: the sum output
  28     :attribute carry: the carry output
  29
  30     Rather than do individual full adders (and have an array of them,
  31     which would be very slow to simulate), this module can specify the
  32     bit width of the inputs and outputs: in effect it performs multiple
  33     Full 3-2 Add operations "in parallel".
  34     """
  35
  36     def __init__(self, width):
  37         """Create a ``FullAdder``.
  38
  39         :param width: the bit width of the input and output
  40         """
  41         self.in0 = Signal(width, reset_less=True)
  42         self.in1 = Signal(width, reset_less=True)
  43         self.in2 = Signal(width, reset_less=True)
  44         self.sum = Signal(width, reset_less=True)
  45         self.carry = Signal(width, reset_less=True)
  46
  47     def elaborate(self, platform):
  48         """Elaborate this module."""
  49         m = Module()
  50         comb = m.d.comb
  51         comb += self.sum.eq(self.in0 ^ self.in1 ^ self.in2)
  52         comb += self.carry.eq((self.in0 & self.in1)
  53                               | (self.in1 & self.in2)
  54                               | (self.in2 & self.in0))
  55         return m
  56
  57
  58 class MaskedFullAdder(Elaboratable):
  59     """Masked Full Adder.
  60
  61     :attribute mask: the carry partition mask
  62     :attribute in0: the first input
  63     :attribute in1: the second input
  64     :attribute in2: the third input
  65     :attribute sum: the sum output
  66     :attribute mcarry: the masked carry output
  67
  68     FullAdders are always used with a "mask" on the output.  To keep
  69     the graphviz "clean", this class performs the masking here rather
  70     than inside a large for-loop.
  71
  72     See the following discussion as to why this is no longer derived
  73     from FullAdder.  Each carry is shifted here *before* being ANDed
  74     with the mask, so that an AOI cell may be used (which is more
  75     gate-efficient)
  76     https://en.wikipedia.org/wiki/AND-OR-Invert
  77     https://groups.google.com/d/msg/comp.arch/fcq-GLQqvas/vTxmcA0QAgAJ
  78     """
  79
  80     def __init__(self, width):
  81         """Create a ``MaskedFullAdder``.
  82
  83         :param width: the bit width of the input and output
  84         """
  85         self.width = width
  86         self.mask = Signal(width, reset_less=True)
  87         self.mcarry = Signal(width, reset_less=True)
  88         self.in0 = Signal(width, reset_less=True)
  89         self.in1 = Signal(width, reset_less=True)
  90         self.in2 = Signal(width, reset_less=True)
  91         self.sum = Signal(width, reset_less=True)
  92
  93     def elaborate(self, platform):
  94         """Elaborate this module."""
  95         m = Module()
  96         comb = m.d.comb
  97         s1 = Signal(self.width, reset_less=True)
  98         s2 = Signal(self.width, reset_less=True)
  99         s3 = Signal(self.width, reset_less=True)
 100         c1 = Signal(self.width, reset_less=True)
 101         c2 = Signal(self.width, reset_less=True)
 102         c3 = Signal(self.width, reset_less=True)
 103         comb += self.sum.eq(self.in0 ^ self.in1 ^ self.in2)
 104         comb += s1.eq(Cat(0, self.in0))
 105         comb += s2.eq(Cat(0, self.in1))
 106         comb += s3.eq(Cat(0, self.in2))
 107         comb += c1.eq(s1 & s2 & self.mask)
 108         comb += c2.eq(s2 & s3 & self.mask)
 109         comb += c3.eq(s3 & s1 & self.mask)
 110         comb += self.mcarry.eq(c1 | c2 | c3)
 111         return m
 112
 113
 114 class PartitionedAdder(Elaboratable):
 115     """Partitioned Adder.
 116
 117     Performs the final add.  The partition points are included in the
 118     actual add (in one of the operands only), which causes a carry over
 119     to the next bit.  Then the final output *removes* the extra bits from
 120     the result.
 121
 122     partition: .... P... P... P... P... (32 bits)
 123     a        : .... .... .... .... .... (32 bits)
 124     b        : .... .... .... .... .... (32 bits)
 125     exp-a    : ....P....P....P....P.... (32+4 bits, P=1 if no partition)
 126     exp-b    : ....0....0....0....0.... (32 bits plus 4 zeros)
 127     exp-o    : ....xN...xN...xN...xN... (32+4 bits - x to be discarded)
 128     o        : .... N... N... N... N... (32 bits - x ignored, N is carry-over)
 129
 130     :attribute width: the bit width of the input and output. Read-only.
 131     :attribute a: the first input to the adder
 132     :attribute b: the second input to the adder
 133     :attribute output: the sum output
 134     :attribute partition_points: the input partition points. Modification not
 135         supported, except for by ``Signal.eq``.
 136     """
 137
 138     def __init__(self, width, partition_points, partition_step=1):
 139         """Create a ``PartitionedAdder``.
 140
 141         :param width: the bit width of the input and output
 142         :param partition_points: the input partition points
 143         :param partition_step: a multiplier (typically double) step
 144                                which in-place "expands" the partition points
 145         """
 146         self.width = width
 147         self.pmul = partition_step
 148         self.a = Signal(width, reset_less=True)
 149         self.b = Signal(width, reset_less=True)
 150         self.output = Signal(width, reset_less=True)
 151         self.partition_points = PartitionPoints(partition_points)
 152         if not self.partition_points.fits_in_width(width):
 153             raise ValueError("partition_points doesn't fit in width")
 154         expanded_width = 0
 155         for i in range(self.width):
 156             if i in self.partition_points:
 157                 expanded_width += 1
 158             expanded_width += 1
 159         self._expanded_width = expanded_width
 160
 161     def elaborate(self, platform):
 162         """Elaborate this module."""
 163         m = Module()
 164         comb = m.d.comb
 165         expanded_a = Signal(self._expanded_width, reset_less=True)
 166         expanded_b = Signal(self._expanded_width, reset_less=True)
 167         expanded_o = Signal(self._expanded_width, reset_less=True)
 168
 169         expanded_index = 0
 170         # store bits in a list, use Cat later.  graphviz is much cleaner
 171         al, bl, ol, ea, eb, eo = [],[],[],[],[],[]
 172
 173         # partition points are "breaks" (extra zeros or 1s) in what would
 174         # otherwise be a massive long add.  when the "break" points are 0,
 175         # whatever is in it (in the output) is discarded.  however when
 176         # there is a "1", it causes a roll-over carry to the *next* bit.
 177         # we still ignore the "break" bit in the [intermediate] output,
 178         # however by that time we've got the effect that we wanted: the
 179         # carry has been carried *over* the break point.
 180
 181         for i in range(self.width):
 182             pi = i/self.pmul # double the range of the partition point test
 183             if pi.is_integer() and pi in self.partition_points:
 184                 # add extra bit set to 0 + 0 for enabled partition points
 185                 # and 1 + 0 for disabled partition points
 186                 ea.append(expanded_a[expanded_index])
 187                 al.append(~self.partition_points[pi]) # add extra bit in a
 188                 eb.append(expanded_b[expanded_index])
 189                 bl.append(C(0)) # yes, add a zero
 190                 expanded_index += 1 # skip the extra point.  NOT in the output
 191             ea.append(expanded_a[expanded_index])
 192             eb.append(expanded_b[expanded_index])
 193             eo.append(expanded_o[expanded_index])
 194             al.append(self.a[i])
 195             bl.append(self.b[i])
 196             ol.append(self.output[i])
 197             expanded_index += 1
 198
 199         # combine above using Cat
 200         comb += Cat(*ea).eq(Cat(*al))
 201         comb += Cat(*eb).eq(Cat(*bl))
 202         comb += Cat(*ol).eq(Cat(*eo))
 203
 204         # use only one addition to take advantage of look-ahead carry and
 205         # special hardware on FPGAs
 206         comb += expanded_o.eq(expanded_a + expanded_b)
 207
 208         return m
 209
 210