X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fieee754%2Fpart_mul_add%2Fmultiply.py;h=215d18c6a1aacce049dd74c6331437c2e74e5853;hb=674602ad56ad774971c0ce95a878028b65dc176b;hp=f94f95fcb473436dbaced2e40cad3ed0dc9fd572;hpb=ba31508267c2a1cc2e7913da5acff4669122b786;p=ieee754fpu.git diff --git a/src/ieee754/part_mul_add/multiply.py b/src/ieee754/part_mul_add/multiply.py index f94f95fc..215d18c6 100644 --- a/src/ieee754/part_mul_add/multiply.py +++ b/src/ieee754/part_mul_add/multiply.py @@ -8,6 +8,8 @@ from abc import ABCMeta, abstractmethod from nmigen.cli import main from functools import reduce from operator import or_ +from ieee754.pipeline import PipelineSpec +from nmutil.pipemodbase import PipeModBase class PartitionPoints(dict): @@ -308,7 +310,7 @@ class AddReduceData: def __init__(self, part_pts, n_inputs, output_width, n_parts): self.part_ops = [Signal(2, name=f"part_ops_{i}", reset_less=True) for i in range(n_parts)] - self.terms = [Signal(output_width, name=f"inputs_{i}", + self.terms = [Signal(output_width, name=f"terms_{i}", reset_less=True) for i in range(n_inputs)] self.part_pts = part_pts.like() @@ -342,23 +344,22 @@ class FinalReduceData: return self.eq_from(rhs.part_pts, rhs.output, rhs.part_ops) -class FinalAdd(Elaboratable): +class FinalAdd(PipeModBase): """ Final stage of add reduce """ - def __init__(self, lidx, n_inputs, output_width, n_parts, partition_points, + def __init__(self, pspec, lidx, n_inputs, partition_points, partition_step=1): self.lidx = lidx self.partition_step = partition_step - self.output_width = output_width + self.output_width = pspec.width * 2 self.n_inputs = n_inputs - self.n_parts = n_parts + self.n_parts = pspec.n_parts self.partition_points = PartitionPoints(partition_points) - if not self.partition_points.fits_in_width(output_width): + if not self.partition_points.fits_in_width(self.output_width): raise ValueError("partition_points doesn't fit in output_width") - self.i = self.ispec() - self.o = self.ospec() + super().__init__(pspec, "finaladd") def ispec(self): return AddReduceData(self.partition_points, self.n_inputs, @@ -368,13 +369,6 @@ class FinalAdd(Elaboratable): return FinalReduceData(self.partition_points, self.output_width, self.n_parts) - def setup(self, m, i): - m.submodules.finaladd = self - m.d.comb += self.i.eq(i) - - def process(self, i): - return self.o - def elaborate(self, platform): """Elaborate this module.""" m = Module() @@ -404,7 +398,7 @@ class FinalAdd(Elaboratable): return m -class AddReduceSingle(Elaboratable): +class AddReduceSingle(PipeModBase): """Add list of numbers together. :attribute inputs: input ``Signal``s to be summed. Modification not @@ -416,7 +410,7 @@ class AddReduceSingle(Elaboratable): supported, except for by ``Signal.eq``. """ - def __init__(self, lidx, n_inputs, output_width, n_parts, partition_points, + def __init__(self, pspec, lidx, n_inputs, partition_points, partition_step=1): """Create an ``AddReduce``. @@ -427,17 +421,16 @@ class AddReduceSingle(Elaboratable): self.lidx = lidx self.partition_step = partition_step self.n_inputs = n_inputs - self.n_parts = n_parts - self.output_width = output_width + self.n_parts = pspec.n_parts + self.output_width = pspec.width * 2 self.partition_points = PartitionPoints(partition_points) - if not self.partition_points.fits_in_width(output_width): + if not self.partition_points.fits_in_width(self.output_width): raise ValueError("partition_points doesn't fit in output_width") self.groups = AddReduceSingle.full_adder_groups(n_inputs) self.n_terms = AddReduceSingle.calc_n_inputs(n_inputs, self.groups) - self.i = self.ispec() - self.o = self.ospec() + super().__init__(pspec, "addreduce_%d" % lidx) def ispec(self): return AddReduceData(self.partition_points, self.n_inputs, @@ -447,13 +440,6 @@ class AddReduceSingle(Elaboratable): return AddReduceData(self.partition_points, self.n_terms, self.output_width, self.n_parts) - def setup(self, m, i): - setattr(m.submodules, "addreduce_%d" % self.lidx, self) - m.d.comb += self.i.eq(i) - - def process(self, i): - return self.o - @staticmethod def calc_n_inputs(n_inputs, groups): retval = len(groups)*2 @@ -553,7 +539,7 @@ class AddReduceSingle(Elaboratable): class AddReduceInternal: - """Recursively Add list of numbers together. + """Iteratively Add list of numbers together. :attribute inputs: input ``Signal``s to be summed. Modification not supported, except for by ``Signal.eq``. @@ -564,18 +550,17 @@ class AddReduceInternal: supported, except for by ``Signal.eq``. """ - def __init__(self, i, output_width, partition_step=1): + def __init__(self, pspec, n_inputs, part_pts, partition_step=1): """Create an ``AddReduce``. :param inputs: input ``Signal``s to be summed. :param output_width: bit-width of ``output``. :param partition_points: the input partition points. """ - self.i = i - self.inputs = i.terms - self.part_ops = i.part_ops - self.output_width = output_width - self.partition_points = i.part_pts + self.pspec = pspec + self.n_inputs = n_inputs + self.output_width = pspec.width * 2 + self.partition_points = part_pts self.partition_step = partition_step self.create_levels() @@ -585,26 +570,21 @@ class AddReduceInternal: mods = [] partition_points = self.partition_points - part_ops = self.part_ops - n_parts = len(part_ops) - inputs = self.inputs - ilen = len(inputs) + ilen = self.n_inputs while True: - groups = AddReduceSingle.full_adder_groups(len(inputs)) + groups = AddReduceSingle.full_adder_groups(ilen) if len(groups) == 0: break lidx = len(mods) - next_level = AddReduceSingle(lidx, ilen, self.output_width, n_parts, + next_level = AddReduceSingle(self.pspec, lidx, ilen, partition_points, self.partition_step) mods.append(next_level) partition_points = next_level.i.part_pts - inputs = next_level.o.terms - ilen = len(inputs) - part_ops = next_level.i.part_ops + ilen = len(next_level.o.terms) lidx = len(mods) - next_level = FinalAdd(lidx, ilen, self.output_width, n_parts, + next_level = FinalAdd(self.pspec, lidx, ilen, partition_points, self.partition_step) mods.append(next_level) @@ -639,7 +619,8 @@ class AddReduce(AddReduceInternal, Elaboratable): n_parts = len(part_ops) self.i = AddReduceData(part_pts, len(inputs), output_width, n_parts) - AddReduceInternal.__init__(self, self.i, output_width, partition_step) + AddReduceInternal.__init__(self, pspec, n_inputs, part_pts, + partition_step) self.o = FinalReduceData(part_pts, output_width, n_parts) self.register_levels = register_levels @@ -991,21 +972,21 @@ class IntermediateOut(Elaboratable): return m -class FinalOut(Elaboratable): +class FinalOut(PipeModBase): """ selects the final output based on the partitioning. each byte is selectable independently, i.e. it is possible that some partitions requested 8-bit computation whilst others requested 16 or 32 bit. """ - def __init__(self, output_width, n_parts, part_pts): + def __init__(self, pspec, part_pts): + self.part_pts = part_pts - self.output_width = output_width - self.n_parts = n_parts - self.out_wid = output_width//2 + self.output_width = pspec.width * 2 + self.n_parts = pspec.n_parts + self.out_wid = pspec.width - self.i = self.ispec() - self.o = self.ospec() + super().__init__(pspec, "finalout") def ispec(self): return IntermediateData(self.part_pts, self.output_width, self.n_parts) @@ -1013,13 +994,6 @@ class FinalOut(Elaboratable): def ospec(self): return OutputData() - def setup(self, m, i): - m.submodules.finalout = self - m.d.comb += self.i.eq(i) - - def process(self, i): - return self.o - def elaborate(self, platform): m = Module() @@ -1179,32 +1153,17 @@ class OutputData: self.output.eq(rhs.output)] -class AllTerms(Elaboratable): +class AllTerms(PipeModBase): """Set of terms to be added together """ - def __init__(self, n_inputs, output_width, n_parts): - """Create an ``AddReduce``. - - :param inputs: input ``Signal``s to be summed. - :param output_width: bit-width of ``output``. - :param register_levels: List of nesting levels that should have - pipeline registers. - :param partition_points: the input partition points. + def __init__(self, pspec, n_inputs): + """Create an ``AllTerms``. """ self.n_inputs = n_inputs - self.n_parts = n_parts - self.output_width = output_width - - self.i = self.ispec() - self.o = self.ospec() - - def setup(self, m, i): - m.submodules.allterms = self - m.d.comb += self.i.eq(i) - - def process(self, i): - return self.o + self.n_parts = pspec.n_parts + self.output_width = pspec.width * 2 + super().__init__(pspec, "allterms") def ispec(self): return InputData() @@ -1291,17 +1250,16 @@ class AllTerms(Elaboratable): return m -class Intermediates(Elaboratable): +class Intermediates(PipeModBase): """ Intermediate output modules """ - def __init__(self, output_width, n_parts, part_pts): + def __init__(self, pspec, part_pts): self.part_pts = part_pts - self.output_width = output_width - self.n_parts = n_parts + self.output_width = pspec.width * 2 + self.n_parts = pspec.n_parts - self.i = self.ispec() - self.o = self.ospec() + super().__init__(pspec, "intermediates") def ispec(self): return FinalReduceData(self.part_pts, self.output_width, self.n_parts) @@ -1309,13 +1267,6 @@ class Intermediates(Elaboratable): def ospec(self): return IntermediateData(self.part_pts, self.output_width, self.n_parts) - def setup(self, m, i): - m.submodules.intermediates = self - m.d.comb += self.i.eq(i) - - def process(self, i): - return self.o - def elaborate(self, platform): m = Module() @@ -1361,6 +1312,8 @@ class Intermediates(Elaboratable): class Mul8_16_32_64(Elaboratable): """Signed/Unsigned 8/16/32/64-bit partitioned integer multiplier. + XXX NOTE: this class is intended for unit test purposes ONLY. + Supports partitioning into any combination of 8, 16, 32, and 64-bit partitions on naturally-aligned boundaries. Supports the operation being set for each partition independently. @@ -1391,6 +1344,11 @@ class Mul8_16_32_64(Elaboratable): flip-flops are to be inserted. """ + self.id_wid = 0 # num_bits(num_rows) + self.op_wid = 0 + self.pspec = PipelineSpec(64, self.id_wid, self.op_wid, n_ops=3) + self.pspec.n_parts = 8 + # parameter(s) self.register_levels = list(register_levels) @@ -1419,15 +1377,14 @@ class Mul8_16_32_64(Elaboratable): part_pts = self.part_pts n_inputs = 64 + 4 - n_parts = 8 - t = AllTerms(n_inputs, 128, n_parts) + t = AllTerms(self.pspec, n_inputs) t.setup(m, self.i) terms = t.o.terms - at = AddReduceInternal(t.process(self.i), 128, partition_step=2) + at = AddReduceInternal(self.pspec, n_inputs, part_pts, partition_step=2) - i = at.i + i = t.o for idx in range(len(at.levels)): mcur = at.levels[idx] mcur.setup(m, i) @@ -1438,12 +1395,12 @@ class Mul8_16_32_64(Elaboratable): m.d.comb += o.eq(mcur.process(i)) i = o # for next loop - interm = Intermediates(128, 8, part_pts) + interm = Intermediates(self.pspec, part_pts) interm.setup(m, i) o = interm.process(interm.i) # final output - finalout = FinalOut(128, 8, part_pts) + finalout = FinalOut(self.pspec, part_pts) finalout.setup(m, o) m.d.comb += self.o.eq(finalout.process(o))