1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3 """Partitioned Integer Addition.
6 * https://libre-riscv.org/3d_gpu/architecture/dynamic_simd/add/
9 from nmigen
import Signal
, Module
, Value
, Elaboratable
, Cat
, C
, Mux
, Repl
10 from nmigen
.hdl
.ast
import Assign
11 from abc
import ABCMeta
, abstractmethod
12 from nmigen
.cli
import main
13 from functools
import reduce
14 from operator
import or_
15 from ieee754
.pipeline
import PipelineSpec
16 from nmutil
.pipemodbase
import PipeModBase
18 from ieee754
.part_mul_add
.partpoints
import PartitionPoints
21 class FullAdder(Elaboratable
):
24 :attribute in0: the first input
25 :attribute in1: the second input
26 :attribute in2: the third input
27 :attribute sum: the sum output
28 :attribute carry: the carry output
30 Rather than do individual full adders (and have an array of them,
31 which would be very slow to simulate), this module can specify the
32 bit width of the inputs and outputs: in effect it performs multiple
33 Full 3-2 Add operations "in parallel".
36 def __init__(self
, width
):
37 """Create a ``FullAdder``.
39 :param width: the bit width of the input and output
41 self
.in0
= Signal(width
, reset_less
=True)
42 self
.in1
= Signal(width
, reset_less
=True)
43 self
.in2
= Signal(width
, reset_less
=True)
44 self
.sum = Signal(width
, reset_less
=True)
45 self
.carry
= Signal(width
, reset_less
=True)
47 def elaborate(self
, platform
):
48 """Elaborate this module."""
51 comb
+= self
.sum.eq(self
.in0 ^ self
.in1 ^ self
.in2
)
52 comb
+= self
.carry
.eq((self
.in0
& self
.in1
)
53 |
(self
.in1
& self
.in2
)
54 |
(self
.in2
& self
.in0
))
58 class MaskedFullAdder(Elaboratable
):
61 :attribute mask: the carry partition mask
62 :attribute in0: the first input
63 :attribute in1: the second input
64 :attribute in2: the third input
65 :attribute sum: the sum output
66 :attribute mcarry: the masked carry output
68 FullAdders are always used with a "mask" on the output. To keep
69 the graphviz "clean", this class performs the masking here rather
70 than inside a large for-loop.
72 See the following discussion as to why this is no longer derived
73 from FullAdder. Each carry is shifted here *before* being ANDed
74 with the mask, so that an AOI cell may be used (which is more
76 https://en.wikipedia.org/wiki/AND-OR-Invert
77 https://groups.google.com/d/msg/comp.arch/fcq-GLQqvas/vTxmcA0QAgAJ
80 def __init__(self
, width
):
81 """Create a ``MaskedFullAdder``.
83 :param width: the bit width of the input and output
86 self
.mask
= Signal(width
, reset_less
=True)
87 self
.mcarry
= Signal(width
, reset_less
=True)
88 self
.in0
= Signal(width
, reset_less
=True)
89 self
.in1
= Signal(width
, reset_less
=True)
90 self
.in2
= Signal(width
, reset_less
=True)
91 self
.sum = Signal(width
, reset_less
=True)
93 def elaborate(self
, platform
):
94 """Elaborate this module."""
97 s1
= Signal(self
.width
, reset_less
=True)
98 s2
= Signal(self
.width
, reset_less
=True)
99 s3
= Signal(self
.width
, reset_less
=True)
100 c1
= Signal(self
.width
, reset_less
=True)
101 c2
= Signal(self
.width
, reset_less
=True)
102 c3
= Signal(self
.width
, reset_less
=True)
103 comb
+= self
.sum.eq(self
.in0 ^ self
.in1 ^ self
.in2
)
104 comb
+= s1
.eq(Cat(0, self
.in0
))
105 comb
+= s2
.eq(Cat(0, self
.in1
))
106 comb
+= s3
.eq(Cat(0, self
.in2
))
107 comb
+= c1
.eq(s1
& s2
& self
.mask
)
108 comb
+= c2
.eq(s2
& s3
& self
.mask
)
109 comb
+= c3
.eq(s3
& s1
& self
.mask
)
110 comb
+= self
.mcarry
.eq(c1 | c2 | c3
)
114 class PartitionedAdder(Elaboratable
):
115 """Partitioned Adder.
117 Performs the final add. The partition points are included in the
118 actual add (in one of the operands only), which causes a carry over
119 to the next bit. Then the final output *removes* the extra bits from
122 partition: .... P... P... P... P... (32 bits)
123 a : .... .... .... .... .... (32 bits)
124 b : .... .... .... .... .... (32 bits)
125 exp-a : ....P....P....P....P.... (32+4 bits, P=1 if no partition)
126 exp-b : ....0....0....0....0.... (32 bits plus 4 zeros)
127 exp-o : ....xN...xN...xN...xN... (32+4 bits - x to be discarded)
128 o : .... N... N... N... N... (32 bits - x ignored, N is carry-over)
130 partition: p p p p (4 bits)
131 carry-in : c c c c (4 bits)
132 C = c & P: C C C c (4 bits)
133 I = P=>c : I I I I (4 bits)
134 a : AAAA AAAA AAAA AAAA AAAA (32 bits)
135 b : BBBB BBBB BBBB BBBB BBBB (32 bits)
136 exp-a : 0AAAApAAAACAAAACAAAACAAAAc (32+4 bits, P=1 if no partition)
137 exp-b : 0BBBB0BBBBIBBBBIBBBBIBBBBI (32 bits plus 4 zeros)
138 exp-o : o....oN...oN...oN...oN...x (32+4 bits - x to be discarded)
139 o : .... N... N... N... N... (32 bits - x ignored, N is carry-over)
140 carry-out: o o o o (4 bits)
142 :attribute width: the bit width of the input and output. Read-only.
143 :attribute a: the first input to the adder
144 :attribute b: the second input to the adder
145 :attribute output: the sum output
146 :attribute part_pts: the input partition points. Modification not
147 supported, except for by ``Signal.eq``.
150 def __init__(self
, width
, part_pts
, partition_step
=1):
151 """Create a ``PartitionedAdder``.
153 :param width: the bit width of the input and output
154 :param part_pts: the input partition points
155 :param partition_step: a multiplier (typically double) step
156 which in-place "expands" the partition points
159 self
.pmul
= partition_step
160 self
.part_pts
= PartitionPoints(part_pts
)
161 self
.a
= Signal(width
, reset_less
=True)
162 self
.b
= Signal(width
, reset_less
=True)
163 self
.carry_in
= Signal(self
.part_pts
.get_max_partition_count(width
))
164 self
.carry_out
= Signal(self
.part_pts
.get_max_partition_count(width
))
165 self
.output
= Signal(width
, reset_less
=True)
166 if not self
.part_pts
.fits_in_width(width
):
167 raise ValueError("partition_points doesn't fit in width")
169 for i
in range(self
.width
):
170 if i
in self
.part_pts
:
173 self
._expanded
_width
= expanded_width
175 def elaborate(self
, platform
):
176 """Elaborate this module."""
179 expanded_a
= Signal(self
._expanded
_width
, reset_less
=True)
180 expanded_b
= Signal(self
._expanded
_width
, reset_less
=True)
181 expanded_o
= Signal(self
._expanded
_width
, reset_less
=True)
184 # store bits in a list, use Cat later. graphviz is much cleaner
185 al
, bl
, ol
, cl
, ea
, eb
, eo
, co
= [],[],[],[],[],[],[],[]
187 # partition points are "breaks" (extra zeros or 1s) in what would
188 # otherwise be a massive long add. when the "break" points are 0,
189 # whatever is in it (in the output) is discarded. however when
190 # there is a "1", it causes a roll-over carry to the *next* bit.
191 # we still ignore the "break" bit in the [intermediate] output,
192 # however by that time we've got the effect that we wanted: the
193 # carry has been carried *over* the break point.
196 al
.append(self
.carry_in
[carry_bit
])
197 bl
.append(self
.carry_in
[carry_bit
])
198 ea
.append(expanded_a
[expanded_index
])
199 eb
.append(expanded_b
[expanded_index
])
203 for i
in range(self
.width
):
204 pi
= i
/self
.pmul
# double the range of the partition point test
205 if pi
.is_integer() and pi
in self
.part_pts
:
206 # add extra bit set to 0 + 0 for enabled partition points
208 carry_in
= self
.carry_in
[carry_bit
] # convenience
209 m
.d
.comb
+= a_bit
.eq(self
.part_pts
[pi
].implies(carry_in
))
210 # and 1 + 0 for disabled partition points
211 ea
.append(expanded_a
[expanded_index
])
212 al
.append(a_bit
) # add extra bit in a
213 eb
.append(expanded_b
[expanded_index
])
214 bl
.append(carry_in
& self
.part_pts
[pi
]) # yes, add a zero
215 co
.append(expanded_o
[expanded_index
])
216 cl
.append(self
.carry_out
[carry_bit
-1])
217 expanded_index
+= 1 # skip the extra point. NOT in the output
219 ea
.append(expanded_a
[expanded_index
])
220 eb
.append(expanded_b
[expanded_index
])
221 eo
.append(expanded_o
[expanded_index
])
224 ol
.append(self
.output
[i
])
228 co
.append(expanded_o
[expanded_index
])
229 cl
.append(self
.carry_out
[carry_bit
-1])
231 # combine above using Cat
232 comb
+= Cat(*ea
).eq(Cat(*al
))
233 comb
+= Cat(*eb
).eq(Cat(*bl
))
234 comb
+= Cat(*ol
).eq(Cat(*eo
))
235 comb
+= Cat(*cl
).eq(Cat(*co
))
237 # use only one addition to take advantage of look-ahead carry and
238 # special hardware on FPGAs
239 comb
+= expanded_o
.eq(expanded_a
+ expanded_b
)