change goldschmidt_div_sqrt to use nmutil.plain_data rather than dataclasses

[soc.git] / src / soc / fu / div / experiment / goldschmidt_div_sqrt.py
diff --git a/src/soc/fu/div/experiment/goldschmidt_div_sqrt.py b/src/soc/fu/div/experiment/goldschmidt_div_sqrt.py

index ea0ddda0763c6847336589ec38cc8b5426013ba5..3f7c2480742d6913859461da120099385f99d18a 100644 (file)
--- a/src/soc/fu/div/experiment/goldschmidt_div_sqrt.py
+++ b/src/soc/fu/div/experiment/goldschmidt_div_sqrt.py
@@ -5,17 +5,17 @@
  # of Horizon 2020 EU Programme 957073.
  
  from collections import defaultdict
-from dataclasses import dataclass, field, fields, replace
  import logging
  import math
  import enum
  from fractions import Fraction
  from types import FunctionType
  from functools import lru_cache
-from nmigen.hdl.ast import Signal, unsigned, Mux, signed
+from nmigen.hdl.ast import Signal, unsigned, signed, Const
  from nmigen.hdl.dsl import Module, Elaboratable
  from nmigen.hdl.mem import Memory
  from nmutil.clz import CLZ
+from nmutil.plain_data import plain_data, fields, replace
  
  try:
      from functools import cached_property
@@ -65,13 +65,13 @@ class RoundDir(enum.Enum):
      ERROR_IF_INEXACT = enum.auto()
  
  
-@dataclass(frozen=True)
+@plain_data(frozen=True, eq=False, repr=False)
  class FixedPoint:
-    bits: int
-    frac_wid: int
+    __slots__ = "bits", "frac_wid"
  
-    def __post_init__(self):
-        # called by the autogenerated __init__
+    def __init__(self, bits, frac_wid):
+        self.bits = bits
+        self.frac_wid = frac_wid
          assert isinstance(self.bits, int)
          assert isinstance(self.frac_wid, int) and self.frac_wid >= 0
  
@@ -332,43 +332,47 @@ def _assert_accuracy(condition, msg="not accurate enough"):
      raise ParamsNotAccurateEnough(msg)
  
  
-@dataclass(frozen=True, unsafe_hash=True)
+@plain_data(frozen=True, unsafe_hash=True)
  class GoldschmidtDivParamsBase:
      """parameters for a Goldschmidt division algorithm, excluding derived
      parameters.
      """
  
-    io_width: int
-    """bit-width of the input divisor and the result.
-    the input numerator is `2 * io_width`-bits wide.
-    """
+    __slots__ = ("io_width", "extra_precision", "table_addr_bits",
+                 "table_data_bits", "iter_count")
+
+    def __init__(self, io_width, extra_precision, table_addr_bits,
+                 table_data_bits, iter_count):
+        assert isinstance(io_width, int)
+        assert isinstance(extra_precision, int)
+        assert isinstance(table_addr_bits, int)
+        assert isinstance(table_data_bits, int)
+        assert isinstance(iter_count, int)
+        self.io_width = io_width
+        """bit-width of the input divisor and the result.
+        the input numerator is `2 * io_width`-bits wide.
+        """
  
-    extra_precision: int
-    """number of bits of additional precision used inside the algorithm."""
+        self.extra_precision = extra_precision
+        """number of bits of additional precision used inside the algorithm."""
  
-    table_addr_bits: int
-    """the number of address bits used in the lookup-table."""
+        self.table_addr_bits = table_addr_bits
+        """the number of address bits used in the lookup-table."""
  
-    table_data_bits: int
-    """the number of data bits used in the lookup-table."""
+        self.table_data_bits = table_data_bits
+        """the number of data bits used in the lookup-table."""
  
-    iter_count: int
-    """the total number of iterations of the division algorithm's loop"""
+        self.iter_count = iter_count
+        """the total number of iterations of the division algorithm's loop"""
  
  
-@dataclass(frozen=True, unsafe_hash=True)
+@plain_data(frozen=True, unsafe_hash=True)
  class GoldschmidtDivParams(GoldschmidtDivParamsBase):
      """parameters for a Goldschmidt division algorithm.
      Use `GoldschmidtDivParams.get` to find a efficient set of parameters.
      """
  
-    # tuple to be immutable, repr=False so repr() works for debugging even when
-    # __post_init__ hasn't finished running yet
-    table: "tuple[FixedPoint, ...]" = field(init=False, repr=False)
-    """the lookup-table"""
-
-    ops: "tuple[GoldschmidtDivOp, ...]" = field(init=False, repr=False)
-    """the operations needed to perform the goldschmidt division algorithm."""
+    __slots__ = "table", "ops"
  
      def _shrink_bound(self, bound, round_dir):
          """prevent fractions from having huge numerators/denominators by
@@ -445,8 +449,13 @@ class GoldschmidtDivParams(GoldschmidtDivParamsBase):
          # we round down
          return min_value
  
-    def __post_init__(self):
-        # called by the autogenerated __init__
+    def __init__(self, io_width, extra_precision, table_addr_bits,
+                 table_data_bits, iter_count):
+        super().__init__(io_width=io_width,
+                         extra_precision=extra_precision,
+                         table_addr_bits=table_addr_bits,
+                         table_data_bits=table_data_bits,
+                         iter_count=iter_count)
          _assert_accuracy(self.io_width >= 1, "io_width out of range")
          _assert_accuracy(self.extra_precision >= 0,
                           "extra_precision out of range")
@@ -460,9 +469,14 @@ class GoldschmidtDivParams(GoldschmidtDivParamsBase):
              table.append(FixedPoint.with_frac_wid(self.table_exact_value(addr),
                                                    self.table_data_bits,
                                                    RoundDir.DOWN))
-        # we have to use object.__setattr__ since frozen=True
-        object.__setattr__(self, "table", tuple(table))
-        object.__setattr__(self, "ops", tuple(self.__make_ops()))
+
+        self.table = tuple(table)
+        """ the lookup-table.
+        type: tuple[FixedPoint, ...]
+        """
+
+        self.ops = tuple(self.__make_ops())
+        "the operations needed to perform the goldschmidt division algorithm."
  
      @property
      def expanded_width(self):
@@ -694,14 +708,9 @@ class GoldschmidtDivParams(GoldschmidtDivParamsBase):
      def max_n_shift(self):
          """ maximum value of `state.n_shift`.
          """
-        # input numerator is `2*io_width`-bits
-        max_n = (1 << (self.io_width * 2)) - 1
-        max_n_shift = 0
-        # normalize so 1 <= n < 2
-        while max_n >= 2:
-            max_n >>= 1
-            max_n_shift += 1
-        return max_n_shift
+        # numerator must be less than `denominator << self.io_width`, so
+        # `n_shift` is at most `self.io_width`
+        return self.io_width
  
      @cached_property
      def n_hat(self):
@@ -769,13 +778,14 @@ class GoldschmidtDivParams(GoldschmidtDivParamsBase):
          max_rel_error = (2 * i) * self.n_hat + power
  
          min_a_over_b = Fraction(1, 2)
-        max_a_over_b = Fraction(2)
-        max_allowed_abs_error = max_a_over_b / (1 << self.max_n_shift)
-        max_allowed_rel_error = max_allowed_abs_error / min_a_over_b
+        min_abs_error_for_correctness = min_a_over_b / (1 << self.max_n_shift)
+        min_rel_error_for_correctness = (min_abs_error_for_correctness
+                                         / min_a_over_b)
  
-        _assert_accuracy(max_rel_error < max_allowed_rel_error,
-                         f"not accurate enough: max_rel_error={max_rel_error}"
-                         f" max_allowed_rel_error={max_allowed_rel_error}")
+        _assert_accuracy(
+            max_rel_error < min_rel_error_for_correctness,
+            f"not accurate enough: max_rel_error={max_rel_error}"
+            f" min_rel_error_for_correctness={min_rel_error_for_correctness}")
  
          yield GoldschmidtDivOp.CalcResult
  
@@ -804,11 +814,9 @@ class GoldschmidtDivParams(GoldschmidtDivParamsBase):
      @lru_cache(maxsize=1 << 16)
      def __cached_new(base_params):
          assert isinstance(base_params, GoldschmidtDivParamsBase)
-        # can't use dataclasses.asdict, since it's recursive and will also give
-        # child class fields too, which we don't want.
          kwargs = {}
          for field in fields(GoldschmidtDivParamsBase):
-            kwargs[field.name] = getattr(base_params, field.name)
+            kwargs[field] = getattr(base_params, field)
          try:
              return GoldschmidtDivParams(**kwargs), None
          except ParamsNotAccurateEnough as e:
@@ -937,7 +945,9 @@ class GoldschmidtDivParams(GoldschmidtDivParamsBase):
              else:
                  break
  
-        return cached_new(params)
+        retval = cached_new(params)
+        assert isinstance(retval, GoldschmidtDivParams)
+        return retval
  
  
  def clz(v, wid):
@@ -971,7 +981,8 @@ class GoldschmidtDivOp(enum.Enum):
              state.n_shift = 0
              # normalize so 1 <= n < 2
              while state.n >= 2:
-                state.n = (state.n * 0.5).to_frac_wid(expanded_width)
+                state.n = (state.n * 0.5).to_frac_wid(expanded_width,
+                                                      round_dir=RoundDir.DOWN)
                  state.n_shift += 1
          elif self == GoldschmidtDivOp.FEqTableLookup:
              # compute initial f by table lookup
@@ -979,6 +990,8 @@ class GoldschmidtDivOp(enum.Enum):
              d_m_1 = d_m_1.to_frac_wid(table_addr_bits, RoundDir.DOWN)
              assert 0 <= d_m_1.bits < (1 << params.table_addr_bits)
              state.f = params.table[d_m_1.bits]
+            state.f = state.f.to_frac_wid(expanded_width,
+                                          round_dir=RoundDir.DOWN)
          elif self == GoldschmidtDivOp.MulNByF:
              assert state.f is not None
              n = state.n * state.f
@@ -1003,7 +1016,6 @@ class GoldschmidtDivOp(enum.Enum):
              assert False, f"unimplemented GoldschmidtDivOp: {self}"
  
      def gen_hdl(self, params, state, sync_rom):
-        # FIXME: finish getting hdl/simulation to work
          """generate the hdl for this operation.
  
          arguments:
@@ -1018,8 +1030,6 @@ class GoldschmidtDivOp(enum.Enum):
          assert isinstance(params, GoldschmidtDivParams)
          assert isinstance(state, GoldschmidtDivHDLState)
          m = state.m
-        expanded_width = params.expanded_width
-        table_addr_bits = params.table_addr_bits
          if self == GoldschmidtDivOp.Normalize:
              # normalize so 1 <= d < 2
              assert state.d.width == params.io_width
@@ -1029,27 +1039,41 @@ class GoldschmidtDivOp(enum.Enum):
              m.d.comb += d_leading_zeros.sig_in.eq(state.d)
              d_shift_out = Signal.like(state.d)
              m.d.comb += d_shift_out.eq(state.d << d_leading_zeros.lz)
-            state.d = Signal(params.n_d_f_total_wid)
-            m.d.comb += state.d.eq(d_shift_out << (params.extra_precision
-                                                   + params.n_d_f_int_wid))
+            d = Signal(params.n_d_f_total_wid)
+            m.d.comb += d.eq((d_shift_out << (1 + params.expanded_width))
+                             >> state.d.width)
  
              # normalize so 1 <= n < 2
              n_leading_zeros = CLZ(2 * params.io_width)
              m.submodules.n_leading_zeros = n_leading_zeros
              m.d.comb += n_leading_zeros.sig_in.eq(state.n)
-            n_shift_s_v = (params.io_width + d_leading_zeros.lz
+            signed_zero = Const(0, signed(1))  # force subtraction to be signed
+            n_shift_s_v = (params.io_width + signed_zero + d_leading_zeros.lz
                             - n_leading_zeros.lz)
              n_shift_s = Signal.like(n_shift_s_v)
-            state.n_shift = Signal(d_leading_zeros.lz.width)
+            n_shift_n_lz_out = Signal.like(state.n)
+            n_shift_d_lz_out = Signal.like(state.n << d_leading_zeros.lz)
              m.d.comb += [
                  n_shift_s.eq(n_shift_s_v),
-                state.n_shift.eq(Mux(n_shift_s < 0, 0, n_shift_s)),
+                n_shift_d_lz_out.eq(state.n << d_leading_zeros.lz),
+                n_shift_n_lz_out.eq(state.n << n_leading_zeros.lz),
              ]
+            state.n_shift = Signal(d_leading_zeros.lz.width)
              n = Signal(params.n_d_f_total_wid)
-            shifted_n = state.n << state.n_shift
-            fixed_shift = params.expanded_width - state.n.width
-            m.d.comb += n.eq(shifted_n << fixed_shift)
+            with m.If(n_shift_s < 0):
+                m.d.comb += [
+                    state.n_shift.eq(0),
+                    n.eq((n_shift_d_lz_out << (1 + params.expanded_width))
+                         >> state.d.width),
+                ]
+            with m.Else():
+                m.d.comb += [
+                    state.n_shift.eq(n_shift_s),
+                    n.eq((n_shift_n_lz_out << (1 + params.expanded_width))
+                         >> state.n.width),
+                ]
              state.n = n
+            state.d = d
          elif self == GoldschmidtDivOp.FEqTableLookup:
              assert state.d.width == params.n_d_f_total_wid, "invalid d width"
              # compute initial f by table lookup
@@ -1058,7 +1082,7 @@ class GoldschmidtDivOp(enum.Enum):
              table_width = 1 + params.table_data_bits
              table = Memory(width=table_width, depth=len(params.table),
                             init=[i.bits for i in params.table])
-            addr = state.d[:-params.n_d_f_int_wid][-table_addr_bits:]
+            addr = state.d[:-params.n_d_f_int_wid][-params.table_addr_bits:]
              if sync_rom:
                  table_read = table.read_port()
                  m.d.comb += table_read.addr.eq(addr)
@@ -1068,8 +1092,7 @@ class GoldschmidtDivOp(enum.Enum):
                  m.d.comb += table_read.addr.eq(addr)
              m.submodules.table_read = table_read
              state.f = Signal(params.n_d_f_int_wid + params.expanded_width)
-            data_shift = (table_width - params.table_data_bits
-                          + params.expanded_width)
+            data_shift = params.expanded_width - params.table_data_bits
              m.d.comb += state.f.eq(table_read.data << data_shift)
          elif self == GoldschmidtDivOp.MulNByF:
              assert state.n.width == params.n_d_f_total_wid, "invalid n width"
@@ -1083,7 +1106,13 @@ class GoldschmidtDivOp(enum.Enum):
              assert state.f is not None
              assert state.f.width == params.n_d_f_total_wid, "invalid f width"
              d = Signal.like(state.d)
-            m.d.comb += d.eq((state.d * state.f) >> params.expanded_width)
+            d_times_f = Signal.like(state.d * state.f)
+            m.d.comb += [
+                d_times_f.eq(state.d * state.f),
+                # round the multiplication up
+                d.eq((d_times_f >> params.expanded_width)
+                     + (d_times_f[:params.expanded_width] != 0)),
+            ]
              state.d = d
          elif self == GoldschmidtDivOp.FEq2MinusD:
              assert state.d.width == params.n_d_f_total_wid, "invalid d width"
@@ -1122,36 +1151,61 @@ class GoldschmidtDivOp(enum.Enum):
              assert False, f"unimplemented GoldschmidtDivOp: {self}"
  
  
-@dataclass
+@plain_data(repr=False)
  class GoldschmidtDivState:
-    orig_n: int
-    """original numerator"""
-
-    orig_d: int
-    """original denominator"""
-
-    n: FixedPoint
-    """numerator -- N_prime[i] in the paper's algorithm 2"""
-
-    d: FixedPoint
-    """denominator -- D_prime[i] in the paper's algorithm 2"""
-
-    f: "FixedPoint | None" = None
-    """current factor -- F_prime[i] in the paper's algorithm 2"""
-
-    quotient: "int | None" = None
-    """final quotient"""
-
-    remainder: "int | None" = None
-    """final remainder"""
+    __slots__ = ("orig_n", "orig_d", "n", "d",
+                 "f", "quotient", "remainder", "n_shift")
+
+    def __init__(self, orig_n, orig_d, n, d,
+                 f=None, quotient=None, remainder=None, n_shift=None):
+        assert isinstance(orig_n, int)
+        assert isinstance(orig_d, int)
+        assert isinstance(n, FixedPoint)
+        assert isinstance(d, FixedPoint)
+        assert f is None or isinstance(f, FixedPoint)
+        assert quotient is None or isinstance(quotient, int)
+        assert remainder is None or isinstance(remainder, int)
+        assert n_shift is None or isinstance(n_shift, int)
+        self.orig_n = orig_n
+        """original numerator"""
+
+        self.orig_d = orig_d
+        """original denominator"""
+
+        self.n = n
+        """numerator -- N_prime[i] in the paper's algorithm 2"""
+
+        self.d = d
+        """denominator -- D_prime[i] in the paper's algorithm 2"""
+
+        self.f = f
+        """current factor -- F_prime[i] in the paper's algorithm 2"""
+
+        self.quotient = quotient
+        """final quotient"""
+
+        self.remainder = remainder
+        """final remainder"""
+
+        self.n_shift = n_shift
+        """amount the numerator needs to be left-shifted at the end of the
+        algorithm.
+        """
  
-    n_shift: "int | None" = None
-    """amount the numerator needs to be left-shifted at the end of the
-    algorithm.
-    """
+    def __repr__(self):
+        fields_str = []
+        for field in fields(GoldschmidtDivState):
+            value = getattr(self, field)
+            if value is None:
+                continue
+            if isinstance(value, int) and field != "n_shift":
+                fields_str.append(f"{field}={hex(value)}")
+            else:
+                fields_str.append(f"{field}={value!r}")
+        return f"GoldschmidtDivState({', '.join(fields_str)})"
  
  
-def goldschmidt_div(n, d, params):
+def goldschmidt_div(n, d, params, trace=lambda state: None):
      """ Goldschmidt division algorithm.
  
          based on:
@@ -1168,6 +1222,9 @@ def goldschmidt_div(n, d, params):
              denominator. a `width`-bit unsigned integer. must not be zero.
          width: int
              the bit-width of the inputs/outputs. must be a positive integer.
+        trace: Function[[GoldschmidtDivState], None]
+            called with the initial state and the state after executing each
+            operation in `params.ops`.
  
          returns: tuple[int, int]
              the quotient and remainder. a tuple of two `width`-bit unsigned
@@ -1187,8 +1244,10 @@ def goldschmidt_div(n, d, params):
          d=FixedPoint(d, params.io_width),
      )
  
+    trace(state)
      for op in params.ops:
          op.run(params, state)
+        trace(state)
  
      assert state.quotient is not None
      assert state.remainder is not None
@@ -1196,45 +1255,55 @@ def goldschmidt_div(n, d, params):
      return state.quotient, state.remainder
  
  
-@dataclass(eq=False)
+@plain_data(eq=False)
  class GoldschmidtDivHDLState:
-    m: Module
-    """The HDL Module"""
+    __slots__ = ("m", "orig_n", "orig_d", "n", "d",
+                 "f", "quotient", "remainder", "n_shift")
  
-    orig_n: Signal
-    """original numerator"""
+    __signal_name_prefix = "state_"
  
-    orig_d: Signal
-    """original denominator"""
+    def __init__(self, m, orig_n, orig_d, n, d,
+                 f=None, quotient=None, remainder=None, n_shift=None):
+        assert isinstance(m, Module)
+        assert isinstance(orig_n, Signal)
+        assert isinstance(orig_d, Signal)
+        assert isinstance(n, Signal)
+        assert isinstance(d, Signal)
+        assert f is None or isinstance(f, Signal)
+        assert quotient is None or isinstance(quotient, Signal)
+        assert remainder is None or isinstance(remainder, Signal)
+        assert n_shift is None or isinstance(n_shift, Signal)
  
-    n: Signal
-    """numerator -- N_prime[i] in the paper's algorithm 2"""
+        self.m = m
+        """The HDL Module"""
  
-    d: Signal
-    """denominator -- D_prime[i] in the paper's algorithm 2"""
+        self.orig_n = orig_n
+        """original numerator"""
  
-    f: "Signal | None" = None
-    """current factor -- F_prime[i] in the paper's algorithm 2"""
+        self.orig_d = orig_d
+        """original denominator"""
  
-    quotient: "Signal | None" = None
-    """final quotient"""
+        self.n = n
+        """numerator -- N_prime[i] in the paper's algorithm 2"""
  
-    remainder: "Signal | None" = None
-    """final remainder"""
+        self.d = d
+        """denominator -- D_prime[i] in the paper's algorithm 2"""
  
-    n_shift: "Signal | None" = None
-    """amount the numerator needs to be left-shifted at the end of the
-    algorithm.
-    """
+        self.f = f
+        """current factor -- F_prime[i] in the paper's algorithm 2"""
+
+        self.quotient = quotient
+        """final quotient"""
  
-    old_signals: "defaultdict[str, list[Signal]]" = field(repr=False,
-                                                          init=False)
+        self.remainder = remainder
+        """final remainder"""
  
-    __signal_name_prefix: "str" = field(default="state_", repr=False,
-                                        init=False)
+        self.n_shift = n_shift
+        """amount the numerator needs to be left-shifted at the end of the
+        algorithm.
+        """
  
-    def __post_init__(self):
-        # called by the autogenerated __init__
+        # old_signals must be set last
          self.old_signals = defaultdict(list)
  
      def __setattr__(self, name, value):
@@ -1259,21 +1328,20 @@ class GoldschmidtDivHDLState:
          old_prefix = self.__signal_name_prefix
          try:
              for field in fields(GoldschmidtDivHDLState):
-                if field.name.startswith("_") or field.name == "m":
+                if field.startswith("_") or field == "m":
                      continue
-                old_sig = getattr(self, field.name, None)
+                old_sig = getattr(self, field, None)
                  if old_sig is None:
                      continue
                  assert isinstance(old_sig, Signal)
                  new_sig = Signal.like(old_sig)
-                setattr(self, field.name, new_sig)
+                setattr(self, field, new_sig)
                  self.m.d.sync += new_sig.eq(old_sig)
          finally:
              self.__signal_name_prefix = old_prefix
  
  
  class GoldschmidtDivHDL(Elaboratable):
-    # FIXME: finish getting hdl/simulation to work
      """ Goldschmidt division algorithm.
  
          based on:
@@ -1303,6 +1371,9 @@ class GoldschmidtDivHDL(Elaboratable):
              output quotient. only valid when `n < (d << params.io_width)`.
          r: Signal(unsigned(params.io_width))
              output remainder. only valid when `n < (d << params.io_width)`.
+        trace: list[GoldschmidtDivHDLState]
+            list of the initial state and the state after executing each
+            operation in `params.ops`.
      """
  
      @property
@@ -1321,15 +1392,16 @@ class GoldschmidtDivHDL(Elaboratable):
          self.q = Signal(unsigned(params.io_width))
          self.r = Signal(unsigned(params.io_width))
  
-    def elaborate(self, platform):
-        m = Module()
+        # in constructor so we get trace without needing to call elaborate
          state = GoldschmidtDivHDLState(
-            m=m,
+            m=Module(),
              orig_n=self.n,
              orig_d=self.d,
              n=self.n,
              d=self.d)
  
+        self.trace = [replace(state)]
+
          # copy and reverse
          pipe_reg_indexes = list(reversed(self.pipe_reg_indexes))
  
@@ -1339,14 +1411,19 @@ class GoldschmidtDivHDL(Elaboratable):
                  pipe_reg_indexes.pop()
                  state.insert_pipeline_register()
              op.gen_hdl(self.params, state, self.sync_rom)
+            self.trace.append(replace(state))
  
          while len(pipe_reg_indexes) > 0:
              pipe_reg_indexes.pop()
              state.insert_pipeline_register()
  
-        m.d.comb += self.q.eq(state.quotient)
-        m.d.comb += self.r.eq(state.remainder)
-        return m
+        state.m.d.comb += [
+            self.q.eq(state.quotient),
+            self.r.eq(state.remainder),
+        ]
+
+    def elaborate(self, platform):
+        return self.trace[0].m
  
  
  GOLDSCHMIDT_SQRT_RSQRT_TABLE_ADDR_INT_WID = 2