X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fieee754%2Fpart_mul_add%2Fmultiply.py;h=215d18c6a1aacce049dd74c6331437c2e74e5853;hb=674602ad56ad774971c0ce95a878028b65dc176b;hp=2c828c187f2747bde3285df64599636719e3be72;hpb=a5a060d10873d4ae26ba656fa9bfdda96a429d4e;p=ieee754fpu.git diff --git a/src/ieee754/part_mul_add/multiply.py b/src/ieee754/part_mul_add/multiply.py index 2c828c18..215d18c6 100644 --- a/src/ieee754/part_mul_add/multiply.py +++ b/src/ieee754/part_mul_add/multiply.py @@ -8,6 +8,8 @@ from abc import ABCMeta, abstractmethod from nmigen.cli import main from functools import reduce from operator import or_ +from ieee754.pipeline import PipelineSpec +from nmutil.pipemodbase import PipeModBase class PartitionPoints(dict): @@ -308,7 +310,7 @@ class AddReduceData: def __init__(self, part_pts, n_inputs, output_width, n_parts): self.part_ops = [Signal(2, name=f"part_ops_{i}", reset_less=True) for i in range(n_parts)] - self.terms = [Signal(output_width, name=f"inputs_{i}", + self.terms = [Signal(output_width, name=f"terms_{i}", reset_less=True) for i in range(n_inputs)] self.part_pts = part_pts.like() @@ -342,22 +344,22 @@ class FinalReduceData: return self.eq_from(rhs.part_pts, rhs.output, rhs.part_ops) -class FinalAdd(Elaboratable): +class FinalAdd(PipeModBase): """ Final stage of add reduce """ - def __init__(self, n_inputs, output_width, n_parts, partition_points, + def __init__(self, pspec, lidx, n_inputs, partition_points, partition_step=1): + self.lidx = lidx self.partition_step = partition_step - self.output_width = output_width + self.output_width = pspec.width * 2 self.n_inputs = n_inputs - self.n_parts = n_parts + self.n_parts = pspec.n_parts self.partition_points = PartitionPoints(partition_points) - if not self.partition_points.fits_in_width(output_width): + if not self.partition_points.fits_in_width(self.output_width): raise ValueError("partition_points doesn't fit in output_width") - self.i = self.ispec() - self.o = self.ospec() + super().__init__(pspec, "finaladd") def ispec(self): return AddReduceData(self.partition_points, self.n_inputs, @@ -396,7 +398,7 @@ class FinalAdd(Elaboratable): return m -class AddReduceSingle(Elaboratable): +class AddReduceSingle(PipeModBase): """Add list of numbers together. :attribute inputs: input ``Signal``s to be summed. Modification not @@ -408,7 +410,7 @@ class AddReduceSingle(Elaboratable): supported, except for by ``Signal.eq``. """ - def __init__(self, n_inputs, output_width, n_parts, partition_points, + def __init__(self, pspec, lidx, n_inputs, partition_points, partition_step=1): """Create an ``AddReduce``. @@ -416,19 +418,19 @@ class AddReduceSingle(Elaboratable): :param output_width: bit-width of ``output``. :param partition_points: the input partition points. """ + self.lidx = lidx self.partition_step = partition_step self.n_inputs = n_inputs - self.n_parts = n_parts - self.output_width = output_width + self.n_parts = pspec.n_parts + self.output_width = pspec.width * 2 self.partition_points = PartitionPoints(partition_points) - if not self.partition_points.fits_in_width(output_width): + if not self.partition_points.fits_in_width(self.output_width): raise ValueError("partition_points doesn't fit in output_width") self.groups = AddReduceSingle.full_adder_groups(n_inputs) self.n_terms = AddReduceSingle.calc_n_inputs(n_inputs, self.groups) - self.i = self.ispec() - self.o = self.ospec() + super().__init__(pspec, "addreduce_%d" % lidx) def ispec(self): return AddReduceData(self.partition_points, self.n_inputs, @@ -537,7 +539,7 @@ class AddReduceSingle(Elaboratable): class AddReduceInternal: - """Recursively Add list of numbers together. + """Iteratively Add list of numbers together. :attribute inputs: input ``Signal``s to be summed. Modification not supported, except for by ``Signal.eq``. @@ -548,18 +550,17 @@ class AddReduceInternal: supported, except for by ``Signal.eq``. """ - def __init__(self, i, output_width, partition_step=1): + def __init__(self, pspec, n_inputs, part_pts, partition_step=1): """Create an ``AddReduce``. :param inputs: input ``Signal``s to be summed. :param output_width: bit-width of ``output``. :param partition_points: the input partition points. """ - self.i = i - self.inputs = i.terms - self.part_ops = i.part_ops - self.output_width = output_width - self.partition_points = i.part_pts + self.pspec = pspec + self.n_inputs = n_inputs + self.output_width = pspec.width * 2 + self.partition_points = part_pts self.partition_step = partition_step self.create_levels() @@ -569,24 +570,21 @@ class AddReduceInternal: mods = [] partition_points = self.partition_points - part_ops = self.part_ops - n_parts = len(part_ops) - inputs = self.inputs - ilen = len(inputs) + ilen = self.n_inputs while True: - groups = AddReduceSingle.full_adder_groups(len(inputs)) + groups = AddReduceSingle.full_adder_groups(ilen) if len(groups) == 0: break - next_level = AddReduceSingle(ilen, self.output_width, n_parts, + lidx = len(mods) + next_level = AddReduceSingle(self.pspec, lidx, ilen, partition_points, self.partition_step) mods.append(next_level) partition_points = next_level.i.part_pts - inputs = next_level.o.terms - ilen = len(inputs) - part_ops = next_level.i.part_ops + ilen = len(next_level.o.terms) - next_level = FinalAdd(ilen, self.output_width, n_parts, + lidx = len(mods) + next_level = FinalAdd(self.pspec, lidx, ilen, partition_points, self.partition_step) mods.append(next_level) @@ -621,7 +619,8 @@ class AddReduce(AddReduceInternal, Elaboratable): n_parts = len(part_ops) self.i = AddReduceData(part_pts, len(inputs), output_width, n_parts) - AddReduceInternal.__init__(self, self.i, output_width, partition_step) + AddReduceInternal.__init__(self, pspec, n_inputs, part_pts, + partition_step) self.o = FinalReduceData(part_pts, output_width, n_parts) self.register_levels = register_levels @@ -871,7 +870,7 @@ class Part(Elaboratable): the extra terms - as separate terms - are then thrown at the AddReduce alongside the multiplication part-results. """ - def __init__(self, part_pts, width, n_parts, n_levels, pbwid): + def __init__(self, part_pts, width, n_parts, pbwid): self.pbwid = pbwid self.part_pts = part_pts @@ -973,21 +972,21 @@ class IntermediateOut(Elaboratable): return m -class FinalOut(Elaboratable): +class FinalOut(PipeModBase): """ selects the final output based on the partitioning. each byte is selectable independently, i.e. it is possible that some partitions requested 8-bit computation whilst others requested 16 or 32 bit. """ - def __init__(self, output_width, n_parts, part_pts): + def __init__(self, pspec, part_pts): + self.part_pts = part_pts - self.output_width = output_width - self.n_parts = n_parts - self.out_wid = output_width//2 + self.output_width = pspec.width * 2 + self.n_parts = pspec.n_parts + self.out_wid = pspec.width - self.i = self.ispec() - self.o = self.ospec() + super().__init__(pspec, "finalout") def ispec(self): return IntermediateData(self.part_pts, self.output_width, self.n_parts) @@ -1154,26 +1153,17 @@ class OutputData: self.output.eq(rhs.output)] -class AllTerms(Elaboratable): +class AllTerms(PipeModBase): """Set of terms to be added together """ - def __init__(self, n_inputs, output_width, n_parts, register_levels): - """Create an ``AddReduce``. - - :param inputs: input ``Signal``s to be summed. - :param output_width: bit-width of ``output``. - :param register_levels: List of nesting levels that should have - pipeline registers. - :param partition_points: the input partition points. + def __init__(self, pspec, n_inputs): + """Create an ``AllTerms``. """ - self.register_levels = register_levels self.n_inputs = n_inputs - self.n_parts = n_parts - self.output_width = output_width - - self.i = self.ispec() - self.o = self.ospec() + self.n_parts = pspec.n_parts + self.output_width = pspec.width * 2 + super().__init__(pspec, "allterms") def ispec(self): return InputData() @@ -1204,11 +1194,10 @@ class AllTerms(Elaboratable): setattr(m.submodules, "signs%d" % i, s) m.d.comb += s.part_ops.eq(self.i.part_ops[i]) - n_levels = len(self.register_levels)+1 - m.submodules.part_8 = part_8 = Part(eps, 128, 8, n_levels, 8) - m.submodules.part_16 = part_16 = Part(eps, 128, 4, n_levels, 8) - m.submodules.part_32 = part_32 = Part(eps, 128, 2, n_levels, 8) - m.submodules.part_64 = part_64 = Part(eps, 128, 1, n_levels, 8) + m.submodules.part_8 = part_8 = Part(eps, 128, 8, 8) + m.submodules.part_16 = part_16 = Part(eps, 128, 4, 8) + m.submodules.part_32 = part_32 = Part(eps, 128, 2, 8) + m.submodules.part_64 = part_64 = Part(eps, 128, 1, 8) nat_l, nbt_l, nla_l, nlb_l = [], [], [], [] for mod in [part_8, part_16, part_32, part_64]: m.d.comb += mod.a.eq(self.i.a) @@ -1261,17 +1250,16 @@ class AllTerms(Elaboratable): return m -class Intermediates(Elaboratable): +class Intermediates(PipeModBase): """ Intermediate output modules """ - def __init__(self, output_width, n_parts, part_pts): + def __init__(self, pspec, part_pts): self.part_pts = part_pts - self.output_width = output_width - self.n_parts = n_parts + self.output_width = pspec.width * 2 + self.n_parts = pspec.n_parts - self.i = self.ispec() - self.o = self.ospec() + super().__init__(pspec, "intermediates") def ispec(self): return FinalReduceData(self.part_pts, self.output_width, self.n_parts) @@ -1324,6 +1312,8 @@ class Intermediates(Elaboratable): class Mul8_16_32_64(Elaboratable): """Signed/Unsigned 8/16/32/64-bit partitioned integer multiplier. + XXX NOTE: this class is intended for unit test purposes ONLY. + Supports partitioning into any combination of 8, 16, 32, and 64-bit partitions on naturally-aligned boundaries. Supports the operation being set for each partition independently. @@ -1354,6 +1344,11 @@ class Mul8_16_32_64(Elaboratable): flip-flops are to be inserted. """ + self.id_wid = 0 # num_bits(num_rows) + self.op_wid = 0 + self.pspec = PipelineSpec(64, self.id_wid, self.op_wid, n_ops=3) + self.pspec.n_parts = 8 + # parameter(s) self.register_levels = list(register_levels) @@ -1382,33 +1377,32 @@ class Mul8_16_32_64(Elaboratable): part_pts = self.part_pts n_inputs = 64 + 4 - n_parts = 8 - t = AllTerms(n_inputs, 128, n_parts, self.register_levels) - m.submodules.allterms = t - m.d.comb += t.i.eq(self.i) + t = AllTerms(self.pspec, n_inputs) + t.setup(m, self.i) terms = t.o.terms - at = AddReduceInternal(t.o, 128, partition_step=2) + at = AddReduceInternal(self.pspec, n_inputs, part_pts, partition_step=2) - i = at.i + i = t.o for idx in range(len(at.levels)): mcur = at.levels[idx] - setattr(m.submodules, "addreduce_%d" % idx, mcur) + mcur.setup(m, i) + o = mcur.ospec() if idx in self.register_levels: - m.d.sync += mcur.i.eq(i) + m.d.sync += o.eq(mcur.process(i)) else: - m.d.comb += mcur.i.eq(i) - i = mcur.o # for next loop + m.d.comb += o.eq(mcur.process(i)) + i = o # for next loop - interm = Intermediates(128, 8, part_pts) - m.submodules.intermediates = interm - m.d.comb += interm.i.eq(i) + interm = Intermediates(self.pspec, part_pts) + interm.setup(m, i) + o = interm.process(interm.i) # final output - m.submodules.finalout = finalout = FinalOut(128, 8, part_pts) - m.d.comb += finalout.i.eq(interm.o) - m.d.comb += self.o.eq(finalout.o) + finalout = FinalOut(self.pspec, part_pts) + finalout.setup(m, o) + m.d.comb += self.o.eq(finalout.process(o)) return m