add FPFormat.get_exponent_value to get an unbiased exponent corrected for subnormals

[ieee754fpu.git] / src / ieee754 / fpcommon / fpbase.py
diff --git a/src/ieee754/fpcommon/fpbase.py b/src/ieee754/fpcommon/fpbase.py

index b8deab021f2f8080270b71a656bb7ebf7e20f030..84768edc158e2a15323f2baef6984ca2f6f874e6 100644 (file)
--- a/src/ieee754/fpcommon/fpbase.py
+++ b/src/ieee754/fpcommon/fpbase.py
@@ -1,22 +1,239 @@
-# IEEE Floating Point Adder (Single Precision)
-# Copyright (C) Jonathan P Dawson 2013
-# 2013-12-12
+"""IEEE754 Floating Point Library
  
-from nmigen import Signal, Cat, Const, Mux, Module, Elaboratable
-from math import log
+Copyright (C) 2019 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+Copyright (C) 2019,2022 Jacob Lifshay <programmerjake@gmail.com>
+
+"""
+
+
+from nmigen import (Signal, Cat, Const, Mux, Module, Elaboratable, Array,
+                    Value, Shape, signed, unsigned)
+from nmigen.utils import bits_for
  from operator import or_
  from functools import reduce
  
  from nmutil.singlepipe import PrevControl, NextControl
  from nmutil.pipeline import ObjectProxy
+import unittest
  import math
+import enum
+
+try:
+    from nmigen.hdl.smtlib2 import RoundingModeEnum
+    _HAVE_SMTLIB2 = True
+except ImportError:
+    _HAVE_SMTLIB2 = False
+
+# value so FPRoundingMode.to_smtlib2 can detect when no default is supplied
+_raise_err = object()
+
+
+class FPRoundingMode(enum.Enum):
+    # matches the FPSCR.RN field values, but includes some extra
+    # values (>= 0b100) used in miscellaneous instructions.
+
+    # naming matches smtlib2 names, doc strings are the OpenPower ISA
+    # specification's names (v3.1 section 7.3.2.6 --
+    # matches values in section 4.3.6).
+    RNE = 0b00
+    """Round to Nearest Even
+
+    Rounds to the nearest representable floating-point number, ties are
+    rounded to the number with the even mantissa. Treats +-Infinity as if
+    it were a normalized floating-point number when deciding which number
+    is closer when rounding. See IEEE754 spec. for details.
+    """
+
+    ROUND_NEAREST_TIES_TO_EVEN = RNE
+    DEFAULT = RNE
+
+    RTZ = 0b01
+    """Round towards Zero
+
+    If the result is exactly representable as a floating-point number, return
+    that, otherwise return the nearest representable floating-point value
+    with magnitude smaller than the exact answer.
+    """
+
+    ROUND_TOWARDS_ZERO = RTZ
+
+    RTP = 0b10
+    """Round towards +Infinity
+
+    If the result is exactly representable as a floating-point number, return
+    that, otherwise return the nearest representable floating-point value
+    that is numerically greater than the exact answer. This can round up to
+    +Infinity.
+    """
+
+    ROUND_TOWARDS_POSITIVE = RTP
+
+    RTN = 0b11
+    """Round towards -Infinity
+
+    If the result is exactly representable as a floating-point number, return
+    that, otherwise return the nearest representable floating-point value
+    that is numerically less than the exact answer. This can round down to
+    -Infinity.
+    """
+
+    ROUND_TOWARDS_NEGATIVE = RTN
+
+    RNA = 0b100
+    """Round to Nearest Away
+
+    Rounds to the nearest representable floating-point number, ties are
+    rounded to the number with the maximum magnitude. Treats +-Infinity as if
+    it were a normalized floating-point number when deciding which number
+    is closer when rounding. See IEEE754 spec. for details.
+    """
+
+    ROUND_NEAREST_TIES_TO_AWAY = RNA
+
+    RTOP = 0b101
+    """Round to Odd, unsigned zeros are Positive
+
+    Not in smtlib2.
+
+    If the result is exactly representable as a floating-point number, return
+    that, otherwise return the nearest representable floating-point value
+    that has an odd mantissa.
+
+    If the result is zero but with otherwise undetermined sign
+    (e.g. `1.0 - 1.0`), the sign is positive.
+
+    This rounding mode is used for instructions with Round To Odd enabled,
+    and `FPSCR.RN != RTN`.
+
+    This is useful to avoid double-rounding errors when doing arithmetic in a
+    larger type (e.g. f128) but where the answer should be a smaller type
+    (e.g. f80).
+    """
+
+    ROUND_TO_ODD_UNSIGNED_ZEROS_ARE_POSITIVE = RTOP
+
+    RTON = 0b110
+    """Round to Odd, unsigned zeros are Negative
+
+    Not in smtlib2.
+
+    If the result is exactly representable as a floating-point number, return
+    that, otherwise return the nearest representable floating-point value
+    that has an odd mantissa.
+
+    If the result is zero but with otherwise undetermined sign
+    (e.g. `1.0 - 1.0`), the sign is negative.
+
+    This rounding mode is used for instructions with Round To Odd enabled,
+    and `FPSCR.RN == RTN`.
+
+    This is useful to avoid double-rounding errors when doing arithmetic in a
+    larger type (e.g. f128) but where the answer should be a smaller type
+    (e.g. f80).
+    """
+
+    ROUND_TO_ODD_UNSIGNED_ZEROS_ARE_NEGATIVE = RTON
+
+    @staticmethod
+    def make_array(f):
+        l = [None] * len(FPRoundingMode)
+        for rm in FPRoundingMode:
+            l[rm.value] = f(rm)
+        return Array(l)
+
+    def overflow_rounds_to_inf(self, sign):
+        """returns true if an overflow should round to `inf`,
+        false if it should round to `max_normal`
+        """
+        not_sign = ~sign if isinstance(sign, Value) else not sign
+        if self is FPRoundingMode.RNE:
+            return True
+        elif self is FPRoundingMode.RTZ:
+            return False
+        elif self is FPRoundingMode.RTP:
+            return not_sign
+        elif self is FPRoundingMode.RTN:
+            return sign
+        elif self is FPRoundingMode.RNA:
+            return True
+        elif self is FPRoundingMode.RTOP:
+            return False
+        else:
+            assert self is FPRoundingMode.RTON
+            return False
+
+    def underflow_rounds_to_zero(self, sign):
+        """returns true if an underflow should round to `zero`,
+        false if it should round to `min_denormal`
+        """
+        not_sign = ~sign if isinstance(sign, Value) else not sign
+        if self is FPRoundingMode.RNE:
+            return True
+        elif self is FPRoundingMode.RTZ:
+            return True
+        elif self is FPRoundingMode.RTP:
+            return sign
+        elif self is FPRoundingMode.RTN:
+            return not_sign
+        elif self is FPRoundingMode.RNA:
+            return True
+        elif self is FPRoundingMode.RTOP:
+            return False
+        else:
+            assert self is FPRoundingMode.RTON
+            return False
+
+    def zero_sign(self):
+        """which sign an exact zero result should have when it isn't
+        otherwise determined, e.g. for `1.0 - 1.0`.
+        """
+        if self is FPRoundingMode.RNE:
+            return False
+        elif self is FPRoundingMode.RTZ:
+            return False
+        elif self is FPRoundingMode.RTP:
+            return False
+        elif self is FPRoundingMode.RTN:
+            return True
+        elif self is FPRoundingMode.RNA:
+            return False
+        elif self is FPRoundingMode.RTOP:
+            return False
+        else:
+            assert self is FPRoundingMode.RTON
+            return True
+
+    if _HAVE_SMTLIB2:
+        def to_smtlib2(self, default=_raise_err):
+            """return the corresponding smtlib2 rounding mode for `self`. If
+            there is no corresponding smtlib2 rounding mode, then return
+            `default` if specified, else raise `ValueError`.
+            """
+            if self is FPRoundingMode.RNE:
+                return RoundingModeEnum.RNE
+            elif self is FPRoundingMode.RTZ:
+                return RoundingModeEnum.RTZ
+            elif self is FPRoundingMode.RTP:
+                return RoundingModeEnum.RTP
+            elif self is FPRoundingMode.RTN:
+                return RoundingModeEnum.RTN
+            elif self is FPRoundingMode.RNA:
+                return RoundingModeEnum.RNA
+            else:
+                assert self in (FPRoundingMode.RTOP, FPRoundingMode.RTON)
+                if default is _raise_err:
+                    raise ValueError(
+                        "no corresponding smtlib2 rounding mode", self)
+                return default
+
+
  
  
  class FPFormat:
      """ Class describing binary floating-point formats based on IEEE 754.
  
-    :attribute exponent_width: the number of bits in the exponent field.
-    :attribute mantissa_width: the number of bits stored in the mantissa
+    :attribute e_width: the number of bits in the exponent field.
+    :attribute m_width: the number of bits stored in the mantissa
          field.
      :attribute has_int_bit: if the FP format has an explicit integer bit (like
          the x87 80-bit format). The bit is considered part of the mantissa.
@@ -25,13 +242,13 @@ class FPFormat:
      """
  
      def __init__(self,
-                 exponent_width,
-                 mantissa_width,
+                 e_width,
+                 m_width,
                   has_int_bit=False,
                   has_sign=True):
          """ Create ``FPFormat`` instance. """
-        self.exponent_width = exponent_width
-        self.mantissa_width = mantissa_width
+        self.e_width = e_width
+        self.m_width = m_width
          self.has_int_bit = has_int_bit
          self.has_sign = has_sign
  
@@ -39,8 +256,8 @@ class FPFormat:
          """ Check for equality. """
          if not isinstance(other, FPFormat):
              return NotImplemented
-        return (self.exponent_width == other.exponent_width
-                and self.mantissa_width == other.mantissa_width
+        return (self.e_width == other.e_width
+                and self.m_width == other.m_width
                  and self.has_int_bit == other.has_int_bit
                  and self.has_sign == other.has_sign)
  
@@ -51,8 +268,6 @@ class FPFormat:
          :param width: bit-width of requested format.
          :returns: the requested ``FPFormat`` instance.
          """
-        if not instanceof(width, int):
-            raise TypeError()
          if width == 16:
              return FPFormat(5, 10)
          if width == 32:
@@ -64,8 +279,8 @@ class FPFormat:
          if width > 128 and width % 32 == 0:
              if width > 1000000:  # arbitrary upper limit
                  raise ValueError("width too big")
-            exponent_width = round(4 * math.log2(width)) - 13
-            return FPFormat(exponent_width, width - 1 - exponent_width)
+            e_width = round(4 * math.log2(width)) - 13
+            return FPFormat(e_width, width - 1 - e_width)
          raise ValueError("width must be the bit-width of a valid IEEE"
                           " 754-2008 binary format")
  
@@ -76,24 +291,149 @@ class FPFormat:
                  return f"FPFormat.standard({self.width})"
          except ValueError:
              pass
-        retval = f"FPFormat({self.exponent_width}, {self.mantissa_width}"
+        retval = f"FPFormat({self.e_width}, {self.m_width}"
          if self.has_int_bit is not False:
              retval += f", {self.has_int_bit}"
          if self.has_sign is not True:
              retval += f", {self.has_sign}"
          return retval + ")"
  
+    def get_sign_field(self, x):
+        """ returns the sign bit of its input number, x
+            (assumes FPFormat is set to signed - has_sign=True)
+        """
+        return x >> (self.e_width + self.m_width)
+
+    def get_exponent_field(self, x):
+        """ returns the raw exponent of its input number, x (no bias subtracted)
+        """
+        x = ((x >> self.m_width) & self.exponent_inf_nan)
+        return x
+
+    def get_exponent(self, x):
+        """ returns the exponent of its input number, x
+        """
+        x = self.get_exponent_field(x)
+        if isinstance(x, Value) and not x.shape().signed:
+            # convert x to signed without changing its value,
+            # since exponents can be negative
+            x |= Const(0, signed(1))
+        return x - self.exponent_bias
+
+    def get_exponent_value(self, x):
+        """ returns the exponent of its input number, x, adjusted for the
+        mathematically correct subnormal exponent.
+        """
+        x = self.get_exponent_field(x)
+        if isinstance(x, Value) and not x.shape().signed:
+            # convert x to signed without changing its value,
+            # since exponents can be negative
+            x |= Const(0, signed(1))
+        return x + (x == self.exponent_denormal_zero) - self.exponent_bias
+
+    def get_mantissa_field(self, x):
+        """ returns the mantissa of its input number, x
+        """
+        return x & self.mantissa_mask
+
+    def get_mantissa_value(self, x):
+        """ returns the mantissa of its input number, x, but with the
+        implicit bit, if any, made explicit.
+        """
+        if self.has_int_bit:
+            return self.get_mantissa_field(x)
+        exponent_field = self.get_exponent_field(x)
+        mantissa_field = self.get_mantissa_field(x)
+        implicit_bit = exponent_field != self.exponent_denormal_zero
+        return (implicit_bit << self.fraction_width) | mantissa_field
+
+    def is_zero(self, x):
+        """ returns true if x is +/- zero
+        """
+        return (self.get_exponent(x) == self.e_sub) & \
+            (self.get_mantissa_field(x) == 0)
+
+    def is_subnormal(self, x):
+        """ returns true if x is subnormal (exp at minimum)
+        """
+        return (self.get_exponent(x) == self.e_sub) & \
+            (self.get_mantissa_field(x) != 0)
+
+    def is_inf(self, x):
+        """ returns true if x is infinite
+        """
+        return (self.get_exponent(x) == self.e_max) & \
+            (self.get_mantissa_field(x) == 0)
+
+    def is_nan(self, x):
+        """ returns true if x is a nan (quiet or signalling)
+        """
+        return (self.get_exponent(x) == self.e_max) & \
+            (self.get_mantissa_field(x) != 0)
+
+    def is_quiet_nan(self, x):
+        """ returns true if x is a quiet nan
+        """
+        highbit = 1 << (self.m_width - 1)
+        return (self.get_exponent(x) == self.e_max) & \
+            (self.get_mantissa_field(x) != 0) & \
+            (self.get_mantissa_field(x) & highbit != 0)
+
+    def to_quiet_nan(self, x):
+        """ converts `x` to a quiet NaN """
+        highbit = 1 << (self.m_width - 1)
+        return x | highbit | self.exponent_mask
+
+    def quiet_nan(self, sign=0):
+        """ return the default quiet NaN with sign `sign` """
+        return self.to_quiet_nan(self.zero(sign))
+
+    def zero(self, sign=0):
+        """ return zero with sign `sign` """
+        return (sign != 0) << (self.e_width + self.m_width)
+
+    def inf(self, sign=0):
+        """ return infinity with sign `sign` """
+        return self.zero(sign) | self.exponent_mask
+
+    def is_nan_signaling(self, x):
+        """ returns true if x is a signalling nan
+        """
+        highbit = 1 << (self.m_width - 1)
+        return (self.get_exponent(x) == self.e_max) & \
+            (self.get_mantissa_field(x) != 0) & \
+            (self.get_mantissa_field(x) & highbit) == 0
+
      @property
      def width(self):
          """ Get the total number of bits in the FP format. """
-        return self.has_sign + self.exponent_width + self.mantissa_width
+        return self.has_sign + self.e_width + self.m_width
+
+    @property
+    def mantissa_mask(self):
+        """ Get a mantissa mask based on the mantissa width """
+        return (1 << self.m_width) - 1
+
+    @property
+    def exponent_mask(self):
+        """ Get an exponent mask """
+        return self.exponent_inf_nan << self.m_width
  
      @property
      def exponent_inf_nan(self):
          """ Get the value of the exponent field designating infinity/NaN. """
-        return (1 << self.exponent_width) - 1
+        return (1 << self.e_width) - 1
+
+    @property
+    def e_max(self):
+        """ get the maximum exponent (minus bias)
+        """
+        return self.exponent_inf_nan - self.exponent_bias
  
      @property
+    def e_sub(self):
+        return self.exponent_denormal_zero - self.exponent_bias
+    @property
      def exponent_denormal_zero(self):
          """ Get the value of the exponent field designating denormal/zero. """
          return 0
@@ -111,19 +451,102 @@ class FPFormat:
      @property
      def exponent_bias(self):
          """ Get the exponent bias. """
-        return (1 << (self.exponent_width - 1)) - 1
+        return (1 << (self.e_width - 1)) - 1
  
      @property
      def fraction_width(self):
          """ Get the number of mantissa bits that are fraction bits. """
-        return self.mantissa_width - self.has_int_bit
+        return self.m_width - self.has_int_bit
  
  
-class MultiShiftR:
+class TestFPFormat(unittest.TestCase):
+    """ very quick test for FPFormat
+    """
+
+    def test_fpformat_fp64(self):
+        f64 = FPFormat.standard(64)
+        from sfpy import Float64
+        x = Float64(1.0).bits
+        print (hex(x))
+
+        self.assertEqual(f64.get_exponent(x), 0)
+        x = Float64(2.0).bits
+        print (hex(x))
+        self.assertEqual(f64.get_exponent(x), 1)
+
+        x = Float64(1.5).bits
+        m = f64.get_mantissa_field(x)
+        print (hex(x), hex(m))
+        self.assertEqual(m, 0x8000000000000)
+
+        s = f64.get_sign_field(x)
+        print (hex(x), hex(s))
+        self.assertEqual(s, 0)
+
+        x = Float64(-1.5).bits
+        s = f64.get_sign_field(x)
+        print (hex(x), hex(s))
+        self.assertEqual(s, 1)
+
+    def test_fpformat_fp32(self):
+        f32 = FPFormat.standard(32)
+        from sfpy import Float32
+        x = Float32(1.0).bits
+        print (hex(x))
+
+        self.assertEqual(f32.get_exponent(x), 0)
+        x = Float32(2.0).bits
+        print (hex(x))
+        self.assertEqual(f32.get_exponent(x), 1)
+
+        x = Float32(1.5).bits
+        m = f32.get_mantissa_field(x)
+        print (hex(x), hex(m))
+        self.assertEqual(m, 0x400000)
+
+        # NaN test
+        x = Float32(-1.0).sqrt()
+        x = x.bits
+        i = f32.is_nan(x)
+        print (hex(x), "nan", f32.get_exponent(x), f32.e_max,
+               f32.get_mantissa_field(x), i)
+        self.assertEqual(i, True)
+
+        # Inf test
+        x = Float32(1e36) * Float32(1e36) * Float32(1e36)
+        x = x.bits
+        i = f32.is_inf(x)
+        print (hex(x), "inf", f32.get_exponent(x), f32.e_max,
+               f32.get_mantissa_field(x), i)
+        self.assertEqual(i, True)
+
+        # subnormal
+        x = Float32(1e-41)
+        x = x.bits
+        i = f32.is_subnormal(x)
+        print (hex(x), "sub", f32.get_exponent(x), f32.e_max,
+               f32.get_mantissa_field(x), i)
+        self.assertEqual(i, True)
+
+        x = Float32(0.0)
+        x = x.bits
+        i = f32.is_subnormal(x)
+        print (hex(x), "sub", f32.get_exponent(x), f32.e_max,
+               f32.get_mantissa_field(x), i)
+        self.assertEqual(i, False)
+
+        # zero
+        i = f32.is_zero(x)
+        print (hex(x), "zero", f32.get_exponent(x), f32.e_max,
+               f32.get_mantissa_field(x), i)
+        self.assertEqual(i, True)
+
+
+class MultiShiftR(Elaboratable):
  
      def __init__(self, width):
          self.width = width
-        self.smax = int(log(width) / log(2))
+        self.smax = bits_for(width - 1)
          self.i = Signal(width, reset_less=True)
          self.s = Signal(self.smax, reset_less=True)
          self.o = Signal(width, reset_less=True)
@@ -147,7 +570,7 @@ class MultiShift:
  
      def __init__(self, width):
          self.width = width
-        self.smax = int(log(width) / log(2))
+        self.smax = bits_for(width - 1)
  
      def lshift(self, op, s):
          res = op << s
@@ -159,10 +582,24 @@ class MultiShift:
  
  
  class FPNumBaseRecord:
-    """ Floating-point Base Number Class
+    """ Floating-point Base Number Class.
+
+    This class is designed to be passed around in other data structures
+    (between pipelines and between stages).  Its "friend" is FPNumBase,
+    which is a *module*.  The reason for the discernment is because
+    nmigen modules that are not added to submodules results in the
+    irritating "Elaboration" warning.  Despite not *needing* FPNumBase
+    in many cases to be added as a submodule (because it is just data)
+    this was not possible to solve without splitting out the data from
+    the module.
      """
  
-    def __init__(self, width, m_extra=True, e_extra=False):
+    def __init__(self, width, m_extra=True, e_extra=False, name=None):
+        if name is None:
+            name = ""
+            # assert false, "missing name"
+        else:
+            name += "_"
          self.width = width
          m_width = {16: 11, 32: 24, 64: 53}[width]  # 1 extra bit (overflow)
          e_width = {16: 7,  32: 10, 64: 13}[width]  # 2 extra bits (overflow)
@@ -186,10 +623,12 @@ class FPNumBaseRecord:
          self.e_start = self.rmw
          self.e_end = self.rmw + self.e_width - 2  # for decoding
  
-        self.v = Signal(width, reset_less=True)      # Latched copy of value
-        self.m = Signal(m_width, reset_less=True)    # Mantissa
-        self.e = Signal((e_width, True), reset_less=True)  # exp+2 bits, signed
-        self.s = Signal(reset_less=True)           # Sign bit
+        self.v = Signal(width, reset_less=True,
+                        name=name+"v")  # Latched copy of value
+        self.m = Signal(m_width, reset_less=True, name=name+"m")  # Mantissa
+        self.e = Signal(signed(e_width),
+                        reset_less=True, name=name+"e")  # exp+2 bits, signed
+        self.s = Signal(reset_less=True, name=name+"s")  # Sign bit
  
          self.fp = self
          self.drop_in(self)
@@ -212,14 +651,14 @@ class FPNumBaseRecord:
          e_max = self.e_max
          e_width = self.e_width
  
-        self.mzero = Const(0, (m_width, False))
+        self.mzero = Const(0, unsigned(m_width))
          m_msb = 1 << (self.m_width-2)
-        self.msb1 = Const(m_msb, (m_width, False))
-        self.m1s = Const(-1, (m_width, False))
-        self.P128 = Const(e_max, (e_width, True))
-        self.P127 = Const(e_max-1, (e_width, True))
-        self.N127 = Const(-(e_max-1), (e_width, True))
-        self.N126 = Const(-(e_max-2), (e_width, True))
+        self.msb1 = Const(m_msb, unsigned(m_width))
+        self.m1s = Const(-1, unsigned(m_width))
+        self.P128 = Const(e_max, signed(e_width))
+        self.P127 = Const(e_max-1, signed(e_width))
+        self.N127 = Const(-(e_max-1), signed(e_width))
+        self.N126 = Const(-(e_max-2), signed(e_width))
  
      def create(self, s, e, m):
          """ creates a value from sign / exponent / mantissa
@@ -247,9 +686,21 @@ class FPNumBaseRecord:
      def nan(self, s):
          return self.create(*self._nan(s))
  
+    def quieted_nan(self, other):
+        assert isinstance(other, FPNumBaseRecord)
+        assert self.width == other.width
+        return self.create(other.s, self.fp.P128,
+                           other.v[0:self.e_start] | (1 << (self.e_start - 1)))
+
      def inf(self, s):
          return self.create(*self._inf(s))
  
+    def max_normal(self, s):
+        return self.create(s, self.fp.P127, ~0)
+
+    def min_denormal(self, s):
+        return self.create(s, self.fp.N127, 1)
+
      def zero(self, s):
          return self.create(*self._zero(s))
  
@@ -296,8 +747,9 @@ class FPNumBase(FPNumBaseRecord, Elaboratable):
          self.is_overflowed = Signal(reset_less=True)
          self.is_denormalised = Signal(reset_less=True)
          self.exp_128 = Signal(reset_less=True)
-        self.exp_sub_n126 = Signal((e_width, True), reset_less=True)
+        self.exp_sub_n126 = Signal(signed(e_width), reset_less=True)
          self.exp_lt_n126 = Signal(reset_less=True)
+        self.exp_zero = Signal(reset_less=True)
          self.exp_gt_n126 = Signal(reset_less=True)
          self.exp_gt127 = Signal(reset_less=True)
          self.exp_n127 = Signal(reset_less=True)
@@ -316,6 +768,7 @@ class FPNumBase(FPNumBaseRecord, Elaboratable):
          m.d.comb += self.exp_sub_n126.eq(self.e - self.fp.N126)
          m.d.comb += self.exp_gt_n126.eq(self.exp_sub_n126 > 0)
          m.d.comb += self.exp_lt_n126.eq(self.exp_sub_n126 < 0)
+        m.d.comb += self.exp_zero.eq(self.e == 0)
          m.d.comb += self.exp_gt127.eq(self.e > self.fp.P127)
          m.d.comb += self.exp_n127.eq(self.e == self.fp.N127)
          m.d.comb += self.exp_n126.eq(self.e == self.fp.N126)
@@ -337,6 +790,8 @@ class FPNumBase(FPNumBaseRecord, Elaboratable):
          return self.exp_gt127
  
      def _is_denormalised(self):
+        # XXX NOT to be used for "official" quiet NaN tests!
+        # particularly when the MSB has been extended
          return (self.exp_n126) & (self.m_msbzero)
  
  
@@ -369,8 +824,8 @@ class MultiShiftRMerge(Elaboratable):
  
      def __init__(self, width, s_max=None):
          if s_max is None:
-            s_max = int(log(width) / log(2))
-        self.smax = s_max
+            s_max = bits_for(width - 1)
+        self.smax = Shape.cast(s_max)
          self.m = Signal(width, reset_less=True)
          self.inp = Signal(width, reset_less=True)
          self.diff = Signal(s_max, reset_less=True)
@@ -383,8 +838,9 @@ class MultiShiftRMerge(Elaboratable):
          m_mask = Signal(self.width, reset_less=True)
          smask = Signal(self.width, reset_less=True)
          stickybit = Signal(reset_less=True)
-        maxslen = Signal(self.smax, reset_less=True)
-        maxsleni = Signal(self.smax, reset_less=True)
+        # XXX GRR frickin nuisance https://github.com/nmigen/nmigen/issues/302
+        maxslen = Signal(self.smax.width, reset_less=True)
+        maxsleni = Signal(self.smax.width, reset_less=True)
  
          sm = MultiShift(self.width-1)
          m0s = Const(0, self.width-1)
@@ -695,7 +1151,13 @@ class FPOpOut(NextControl):
                  ]
  
  
-class Overflow:  # (Elaboratable):
+class Overflow:
+    # TODO: change FFLAGS to be FPSCR's status flags
+    FFLAGS_NV = Const(1<<4, 5) # invalid operation
+    FFLAGS_DZ = Const(1<<3, 5) # divide by zero
+    FFLAGS_OF = Const(1<<2, 5) # overflow
+    FFLAGS_UF = Const(1<<1, 5) # underflow
+    FFLAGS_NX = Const(1<<0, 5) # inexact
      def __init__(self, name=None):
          if name is None:
              name = ""
@@ -703,6 +1165,14 @@ class Overflow:  # (Elaboratable):
          self.round_bit = Signal(reset_less=True, name=name+"round")  # tot[1]
          self.sticky = Signal(reset_less=True, name=name+"sticky")   # tot[0]
          self.m0 = Signal(reset_less=True, name=name+"m0")  # mantissa bit 0
+        self.fpflags = Signal(5, reset_less=True, name=name+"fflags")
+
+        self.sign = Signal(reset_less=True, name=name+"sign")
+        """sign bit -- 1 means negative, 0 means positive"""
+
+        self.rm = Signal(FPRoundingMode, name=name+"rm",
+                         reset=FPRoundingMode.DEFAULT)
+        """rounding mode"""
  
          #self.roundz = Signal(reset_less=True)
  
@@ -711,17 +1181,84 @@ class Overflow:  # (Elaboratable):
          yield self.round_bit
          yield self.sticky
          yield self.m0
+        yield self.fpflags
+        yield self.sign
+        yield self.rm
  
      def eq(self, inp):
          return [self.guard.eq(inp.guard),
                  self.round_bit.eq(inp.round_bit),
                  self.sticky.eq(inp.sticky),
-                self.m0.eq(inp.m0)]
+                self.m0.eq(inp.m0),
+                self.fpflags.eq(inp.fpflags),
+                self.sign.eq(inp.sign),
+                self.rm.eq(inp.rm)]
  
      @property
-    def roundz(self):
+    def roundz_rne(self):
+        """true if the mantissa should be rounded up for `rm == RNE`
+
+        assumes the rounding mode is `ROUND_NEAREST_TIES_TO_EVEN`
+        """
          return self.guard & (self.round_bit | self.sticky | self.m0)
  
+    @property
+    def roundz_rna(self):
+        """true if the mantissa should be rounded up for `rm == RNA`
+
+        assumes the rounding mode is `ROUND_NEAREST_TIES_TO_AWAY`
+        """
+        return self.guard
+
+    @property
+    def roundz_rtn(self):
+        """true if the mantissa should be rounded up for `rm == RTN`
+
+        assumes the rounding mode is `ROUND_TOWARDS_NEGATIVE`
+        """
+        return self.sign & (self.guard | self.round_bit | self.sticky)
+
+    @property
+    def roundz_rto(self):
+        """true if the mantissa should be rounded up for `rm in (RTOP, RTON)`
+
+        assumes the rounding mode is `ROUND_TO_ODD_UNSIGNED_ZEROS_ARE_POSITIVE`
+        or `ROUND_TO_ODD_UNSIGNED_ZEROS_ARE_NEGATIVE`
+        """
+        return ~self.m0 & (self.guard | self.round_bit | self.sticky)
+
+    @property
+    def roundz_rtp(self):
+        """true if the mantissa should be rounded up for `rm == RTP`
+
+        assumes the rounding mode is `ROUND_TOWARDS_POSITIVE`
+        """
+        return ~self.sign & (self.guard | self.round_bit | self.sticky)
+
+    @property
+    def roundz_rtz(self):
+        """true if the mantissa should be rounded up for `rm == RTZ`
+
+        assumes the rounding mode is `ROUND_TOWARDS_ZERO`
+        """
+        return False
+
+    @property
+    def roundz(self):
+        """true if the mantissa should be rounded up for the current rounding
+        mode `self.rm`
+        """
+        d = {
+            FPRoundingMode.RNA: self.roundz_rna,
+            FPRoundingMode.RNE: self.roundz_rne,
+            FPRoundingMode.RTN: self.roundz_rtn,
+            FPRoundingMode.RTOP: self.roundz_rto,
+            FPRoundingMode.RTON: self.roundz_rto,
+            FPRoundingMode.RTP: self.roundz_rtp,
+            FPRoundingMode.RTZ: self.roundz_rtz,
+        }
+        return FPRoundingMode.make_array(lambda rm: d[rm])[self.rm]
+
  
  class OverflowMod(Elaboratable, Overflow):
      def __init__(self, name=None):
@@ -739,7 +1276,7 @@ class OverflowMod(Elaboratable, Overflow):
  
      def elaborate(self, platform):
          m = Module()
-        m.d.comb += self.roundz_out.eq(self.roundz)
+        m.d.comb += self.roundz_out.eq(self.roundz) # roundz is a property
          return m
  
  
@@ -903,3 +1440,7 @@ class FPID:
      def idsync(self, m):
          if self.id_wid is not None:
              m.d.sync += self.out_mid.eq(self.in_mid)
+
+
+if __name__ == '__main__':
+    unittest.main()