X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fieee754%2Fpart_mul_add%2Fadder.py;h=e1849b4d25fc5ec4fc4473cc02b32f49dd5912b2;hb=77f150cff440bed025e2487b6f0fcda9f529290b;hp=c2e75257340702050943d948774a4085e391072a;hpb=8c4baccb7201d450840015f27f05ff53095e6c82;p=ieee754fpu.git diff --git a/src/ieee754/part_mul_add/adder.py b/src/ieee754/part_mul_add/adder.py index c2e75257..e1849b4d 100644 --- a/src/ieee754/part_mul_add/adder.py +++ b/src/ieee754/part_mul_add/adder.py @@ -6,16 +6,10 @@ See: * https://libre-riscv.org/3d_gpu/architecture/dynamic_simd/add/ """ -from nmigen import Signal, Module, Value, Elaboratable, Cat, C, Mux, Repl -from nmigen.hdl.ast import Assign -from abc import ABCMeta, abstractmethod -from nmigen.cli import main -from functools import reduce -from operator import or_ -from ieee754.pipeline import PipelineSpec -from nmutil.pipemodbase import PipeModBase +from nmigen import Signal, Module, Elaboratable, Cat from ieee754.part_mul_add.partpoints import PartitionPoints +from ieee754.part_cmp.ripple import MoveMSBDown class FullAdder(Elaboratable): @@ -119,6 +113,7 @@ class PartitionedAdder(Elaboratable): to the next bit. Then the final output *removes* the extra bits from the result. + In the case of no carry: partition: .... P... P... P... P... (32 bits) a : .... .... .... .... .... (32 bits) b : .... .... .... .... .... (32 bits) @@ -127,17 +122,39 @@ class PartitionedAdder(Elaboratable): exp-o : ....xN...xN...xN...xN... (32+4 bits - x to be discarded) o : .... N... N... N... N... (32 bits - x ignored, N is carry-over) + However, with carry the behavior is a little different: partition: p p p p (4 bits) - carry-in : c c c c (4 bits) - C = c & P: C C C c (4 bits) - I = P=>c : I I I I (4 bits) + carry-in : c c c c c (5 bits) + C = c & P: C C C C c (5 bits) + I = P=>c : I I I I c (5 bits) a : AAAA AAAA AAAA AAAA AAAA (32 bits) b : BBBB BBBB BBBB BBBB BBBB (32 bits) - exp-a : 0AAAApAAAACAAAACAAAACAAAAc (32+4 bits, P=1 if no partition) - exp-b : 0BBBB0BBBBIBBBBIBBBBIBBBBI (32 bits plus 4 zeros) - exp-o : o....oN...oN...oN...oN...x (32+4 bits - x to be discarded) + exp-a : 0AAAACAAAACAAAACAAAACAAAAc (32+4+2 bits, P=1 if no partition) + exp-b : 0BBBBIBBBBIBBBBIBBBBIBBBBc (32+2 bits plus 4 zeros) + exp-o : o....oN...oN...oN...oN...x (32+4+2 bits - x to be discarded) o : .... N... N... N... N... (32 bits - x ignored, N is carry-over) - carry-out: o o o o (4 bits) + carry-out: o o o o o (5 bits) + + A couple of differences should be noted: + - The expanded a/b/o have 2 extra bits added to them. These bits + allow the carry-in for the least significant partition to be + injected, and the carry out for the most significant partition + to be extracted. + - The partition bits P and 0 in the first example have been + replaced with bits C and I. Bits C and I are set to 1 when + there is a partition and a carry-in at that position. This has + the effect of creating a carry at that position in the expanded + adder, while preventing carries from the previous partition + from propogating through to the next. These bits are also used + to extract the carry-out information for each partition, as + when there is a carry out in a partition, the next most + significant partition bit will be set to 1 + + Additionally, the carry-out bits must be rearranged before being + output to move the most significant carry bit for each partition + into the least significant bit for that partition, as well as to + ignore the other carry bits in that partition. This is + accomplished by the MoveMSBDown module :attribute width: the bit width of the input and output. Read-only. :attribute a: the first input to the adder @@ -176,13 +193,17 @@ class PartitionedAdder(Elaboratable): """Elaborate this module.""" m = Module() comb = m.d.comb + + carry_tmp = Signal(self.carry_out.width) + m.submodules.ripple = ripple = MoveMSBDown(self.carry_out.width) + expanded_a = Signal(self._expanded_width, reset_less=True) expanded_b = Signal(self._expanded_width, reset_less=True) expanded_o = Signal(self._expanded_width, reset_less=True) expanded_index = 0 # store bits in a list, use Cat later. graphviz is much cleaner - al, bl, ol, cl, ea, eb, eo, co = [],[],[],[],[],[],[],[] + al, bl, ol, cl, ea, eb, eo, co = [], [], [], [], [], [], [], [] # partition points are "breaks" (extra zeros or 1s) in what would # otherwise be a massive long add. when the "break" points are 0, @@ -201,20 +222,22 @@ class PartitionedAdder(Elaboratable): expanded_index += 1 for i in range(self.width): - pi = i/self.pmul # double the range of the partition point test + pi = i/self.pmul # double the range of the partition point test if pi.is_integer() and pi in self.part_pts: - # add extra bit set to 0 + 0 for enabled partition points - a_bit = Signal() - carry_in = self.carry_in[carry_bit] # convenience + # add extra bit set to carry + carry for enabled + # partition points + a_bit = Signal(name="a_bit_%d" % i, reset_less=True) + carry_in = self.carry_in[carry_bit] # convenience m.d.comb += a_bit.eq(self.part_pts[pi].implies(carry_in)) + # and 1 + 0 for disabled partition points ea.append(expanded_a[expanded_index]) - al.append(a_bit) # add extra bit in a + al.append(a_bit) # add extra bit in a eb.append(expanded_b[expanded_index]) - bl.append(carry_in & self.part_pts[pi]) # yes, add a zero + bl.append(carry_in & self.part_pts[pi]) # carry bit co.append(expanded_o[expanded_index]) - cl.append(self.carry_out[carry_bit-1]) - expanded_index += 1 # skip the extra point. NOT in the output + cl.append(carry_tmp[carry_bit-1]) + expanded_index += 1 # skip the extra point. NOT in the output carry_bit += 1 ea.append(expanded_a[expanded_index]) eb.append(expanded_b[expanded_index]) @@ -225,8 +248,8 @@ class PartitionedAdder(Elaboratable): expanded_index += 1 al.append(0) bl.append(0) - co.append(expanded_o[expanded_index]) - cl.append(self.carry_out[carry_bit-1]) + co.append(expanded_o[-1]) + cl.append(carry_tmp[carry_bit-1]) # combine above using Cat comb += Cat(*ea).eq(Cat(*al)) @@ -238,6 +261,13 @@ class PartitionedAdder(Elaboratable): # special hardware on FPGAs comb += expanded_o.eq(expanded_a + expanded_b) - return m - + # ok now we have the carry-out, however because it's the MSB it's + # in the wrong position in the output as far as putting it into + # a chain of adds (or other operations). therefore we need to + # "ripple" carry-out down to the same position that carry-in is + # in [the LSB of each partition]. + comb += ripple.results_in.eq(carry_tmp) + comb += ripple.gates.eq(self.part_pts.as_sig()) + m.d.sync += self.carry_out.eq(ripple.output) + return m