softfloat/internals.h

   1
   2 /*** UPDATE COMMENTS. ***/
   3
   4 #include "softfloat_types.h"
   5
   6 union ui32_f32 { uint32_t ui; float32_t f; };
   7 union ui64_f64 { uint64_t ui; float64_t f; };
   8 #ifdef LITTLEENDIAN
   9 union ui128_f128 { uint64_t ui0, ui64; float128_t f; };
  10 #else
  11 union ui128_f128 { uint64_t ui64, ui0; float128_t f; };
  12 #endif
  13
  14 enum {
  15     softfloat_mulAdd_subC    = 1,
  16     softfloat_mulAdd_subProd = 2
  17 };
  18
  19 uint_fast32_t
  20  softfloat_roundPackToUI32( bool, uint_fast64_t, int_fast8_t, bool );
  21 uint_fast64_t
  22  softfloat_roundPackToUI64(
  23      bool, uint_fast64_t, uint_fast64_t, int_fast8_t, bool );
  24 /*----------------------------------------------------------------------------
  25 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
  26 | and 7, and returns the properly rounded 32-bit integer corresponding to the
  27 | input.  If `zSign' is 1, the input is negated before being converted to an
  28 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
  29 | is simply rounded to an integer, with the inexact exception raised if the
  30 | input cannot be represented exactly as an integer.  However, if the fixed-
  31 | point input is too large, the invalid exception is raised and the largest
  32 | positive or negative integer is returned.
  33 *----------------------------------------------------------------------------*/
  34 int_fast32_t
  35  softfloat_roundPackToI32( bool, uint_fast64_t, int_fast8_t, bool );
  36 /*----------------------------------------------------------------------------
  37 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
  38 | `absZ1', with binary point between bits 63 and 64 (between the input words),
  39 | and returns the properly rounded 64-bit integer corresponding to the input.
  40 | If `zSign' is 1, the input is negated before being converted to an integer.
  41 | Ordinarily, the fixed-point input is simply rounded to an integer, with
  42 | the inexact exception raised if the input cannot be represented exactly as
  43 | an integer.  However, if the fixed-point input is too large, the invalid
  44 | exception is raised and the largest positive or negative integer is
  45 | returned.
  46 *----------------------------------------------------------------------------*/
  47 int_fast64_t
  48  softfloat_roundPackToI64(
  49      bool, uint_fast64_t, uint_fast64_t, int_fast8_t, bool );
  50
  51 /*----------------------------------------------------------------------------
  52 | Returns 1 if the single-precision floating-point value `a' is a NaN;
  53 | otherwise, returns 0.
  54 *----------------------------------------------------------------------------*/
  55 #define isNaNF32UI( ui ) (0xFF000000<(uint32_t)((uint_fast32_t)(ui)<<1))
  56 /*----------------------------------------------------------------------------
  57 | Returns the sign bit of the single-precision floating-point value `a'.
  58 *----------------------------------------------------------------------------*/
  59 #define signF32UI( a ) ((bool)((uint32_t)(a)>>31))
  60 /*----------------------------------------------------------------------------
  61 | Returns the exponent bits of the single-precision floating-point value `a'.
  62 *----------------------------------------------------------------------------*/
  63 #define expF32UI( a ) ((int_fast16_t)((a)>>23)&0xFF)
  64 /*----------------------------------------------------------------------------
  65 | Returns the fraction bits of the single-precision floating-point value `a'.
  66 *----------------------------------------------------------------------------*/
  67 #define fracF32UI( a ) ((a)&0x007FFFFF)
  68 /*----------------------------------------------------------------------------
  69 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
  70 | single-precision floating-point value, returning the result.  After being
  71 | shifted into the proper positions, the three fields are simply added
  72 | together to form the result.  This means that any integer portion of `zSig'
  73 | will be added into the exponent.  Since a properly normalized significand
  74 | will have an integer portion equal to 1, the `zExp' input should be 1 less
  75 | than the desired result exponent whenever `zSig' is a complete, normalized
  76 | significand.
  77 *----------------------------------------------------------------------------*/
  78 #define packToF32UI( sign, exp, sig ) (((uint32_t)(sign)<<31)+((uint32_t)(exp)<<23)+(sig))
  79
  80 /*----------------------------------------------------------------------------
  81 | Normalizes the subnormal single-precision floating-point value represented
  82 | by the denormalized significand `aSig'.  The normalized exponent and
  83 | significand are stored at the locations pointed to by `zExpPtr' and
  84 | `zSigPtr', respectively.
  85 *----------------------------------------------------------------------------*/
  86 struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; };
  87 struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t );
  88
  89 /*----------------------------------------------------------------------------
  90 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  91 | and significand `zSig', and returns the proper single-precision floating-
  92 | point value corresponding to the abstract input.  Ordinarily, the abstract
  93 | value is simply rounded and packed into the single-precision format, with
  94 | the inexact exception raised if the abstract input cannot be represented
  95 | exactly.  However, if the abstract value is too large, the overflow and
  96 | inexact exceptions are raised and an infinity or maximal finite value is
  97 | returned.  If the abstract value is too small, the input value is rounded to
  98 | a subnormal number, and the underflow and inexact exceptions are raised if
  99 | the abstract input cannot be represented exactly as a subnormal single-
 100 | precision floating-point number.
 101 |     The input significand `zSig' has its binary point between bits 30
 102 | and 29, which is 7 bits to the left of the usual location.  This shifted
 103 | significand must be normalized or smaller.  If `zSig' is not normalized,
 104 | `zExp' must be 0; in that case, the result returned is a subnormal number,
 105 | and it must not require rounding.  In the usual case that `zSig' is
 106 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
 107 | The handling of underflow and overflow follows the IEC/IEEE Standard for
 108 | Binary Floating-Point Arithmetic.
 109 *----------------------------------------------------------------------------*/
 110 float32_t softfloat_roundPackToF32( bool, int_fast16_t, uint_fast32_t );
 111 /*----------------------------------------------------------------------------
 112 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 113 | and significand `zSig', and returns the proper single-precision floating-
 114 | point value corresponding to the abstract input.  This routine is just like
 115 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
 116 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
 117 | floating-point exponent.
 118 *----------------------------------------------------------------------------*/
 119 float32_t softfloat_normRoundPackToF32( bool, int_fast16_t, uint_fast32_t );
 120
 121 /*----------------------------------------------------------------------------
 122 | Returns the result of adding the absolute values of the single-precision
 123 | floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
 124 | before being returned.  `zSign' is ignored if the result is a NaN.
 125 | The addition is performed according to the IEC/IEEE Standard for Binary
 126 | Floating-Point Arithmetic.
 127 *----------------------------------------------------------------------------*/
 128 float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t, bool );
 129 /*----------------------------------------------------------------------------
 130 | Returns the result of subtracting the absolute values of the single-
 131 | precision floating-point values `a' and `b'.  If `zSign' is 1, the
 132 | difference is negated before being returned.  `zSign' is ignored if the
 133 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
 134 | Standard for Binary Floating-Point Arithmetic.
 135 *----------------------------------------------------------------------------*/
 136 float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t, bool );
 137 /*----------------------------------------------------------------------------
 138 *----------------------------------------------------------------------------*/
 139 float32_t
 140  softfloat_mulAddF32( int, uint_fast32_t, uint_fast32_t, uint_fast32_t );
 141
 142 /*----------------------------------------------------------------------------
 143 | Returns 1 if the double-precision floating-point value `a' is a NaN;
 144 | otherwise, returns 0.
 145 *----------------------------------------------------------------------------*/
 146 #define isNaNF64UI( ui ) (UINT64_C(0xFFE0000000000000)<(uint64_t)((uint_fast64_t)(ui)<<1))
 147 /*----------------------------------------------------------------------------
 148 | Returns the sign bit of the double-precision floating-point value `a'.
 149 *----------------------------------------------------------------------------*/
 150 #define signF64UI( a ) ((bool)((uint64_t)(a)>>63))
 151 /*----------------------------------------------------------------------------
 152 | Returns the exponent bits of the double-precision floating-point value `a'.
 153 *----------------------------------------------------------------------------*/
 154 #define expF64UI( a ) ((int_fast16_t)((a)>>52)&0x7FF)
 155 /*----------------------------------------------------------------------------
 156 | Returns the fraction bits of the double-precision floating-point value `a'.
 157 *----------------------------------------------------------------------------*/
 158 #define fracF64UI( a ) ((a)&UINT64_C(0x000FFFFFFFFFFFFF))
 159 /*----------------------------------------------------------------------------
 160 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
 161 | double-precision floating-point value, returning the result.  After being
 162 | shifted into the proper positions, the three fields are simply added
 163 | together to form the result.  This means that any integer portion of `zSig'
 164 | will be added into the exponent.  Since a properly normalized significand
 165 | will have an integer portion equal to 1, the `zExp' input should be 1 less
 166 | than the desired result exponent whenever `zSig' is a complete, normalized
 167 | significand.
 168 *----------------------------------------------------------------------------*/
 169 #define packToF64UI( sign, exp, sig ) (((uint64_t)(sign)<<63)+((uint64_t)(exp)<<52)+(sig))
 170
 171 /*----------------------------------------------------------------------------
 172 | Normalizes the subnormal double-precision floating-point value represented
 173 | by the denormalized significand `aSig'.  The normalized exponent and
 174 | significand are stored at the locations pointed to by `zExpPtr' and
 175 | `zSigPtr', respectively.
 176 *----------------------------------------------------------------------------*/
 177 struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };
 178 struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t );
 179
 180 /*----------------------------------------------------------------------------
 181 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 182 | and significand `zSig', and returns the proper double-precision floating-
 183 | point value corresponding to the abstract input.  Ordinarily, the abstract
 184 | value is simply rounded and packed into the double-precision format, with
 185 | the inexact exception raised if the abstract input cannot be represented
 186 | exactly.  However, if the abstract value is too large, the overflow and
 187 | inexact exceptions are raised and an infinity or maximal finite value is
 188 | returned.  If the abstract value is too small, the input value is rounded
 189 | to a subnormal number, and the underflow and inexact exceptions are raised
 190 | if the abstract input cannot be represented exactly as a subnormal double-
 191 | precision floating-point number.
 192 |     The input significand `zSig' has its binary point between bits 62
 193 | and 61, which is 10 bits to the left of the usual location.  This shifted
 194 | significand must be normalized or smaller.  If `zSig' is not normalized,
 195 | `zExp' must be 0; in that case, the result returned is a subnormal number,
 196 | and it must not require rounding.  In the usual case that `zSig' is
 197 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
 198 | The handling of underflow and overflow follows the IEC/IEEE Standard for
 199 | Binary Floating-Point Arithmetic.
 200 *----------------------------------------------------------------------------*/
 201 float64_t softfloat_roundPackToF64( bool, int_fast16_t, uint_fast64_t );
 202 /*----------------------------------------------------------------------------
 203 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 204 | and significand `zSig', and returns the proper double-precision floating-
 205 | point value corresponding to the abstract input.  This routine is just like
 206 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
 207 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
 208 | floating-point exponent.
 209 *----------------------------------------------------------------------------*/
 210 float64_t softfloat_normRoundPackToF64( bool, int_fast16_t, uint_fast64_t );
 211
 212 /*----------------------------------------------------------------------------
 213 | Returns the result of adding the absolute values of the double-precision
 214 | floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
 215 | before being returned.  `zSign' is ignored if the result is a NaN.
 216 | The addition is performed according to the IEC/IEEE Standard for Binary
 217 | Floating-Point Arithmetic.
 218 *----------------------------------------------------------------------------*/
 219 float64_t softfloat_addMagsF64( uint_fast64_t, uint_fast64_t, bool );
 220 /*----------------------------------------------------------------------------
 221 | Returns the result of subtracting the absolute values of the double-
 222 | precision floating-point values `a' and `b'.  If `zSign' is 1, the
 223 | difference is negated before being returned.  `zSign' is ignored if the
 224 | result is a NaN.  The subtraction is performed according to the IEC/IEEE
 225 | Standard for Binary Floating-Point Arithmetic.
 226 *----------------------------------------------------------------------------*/
 227 float64_t softfloat_subMagsF64( uint_fast64_t, uint_fast64_t, bool );
 228 /*----------------------------------------------------------------------------
 229 *----------------------------------------------------------------------------*/
 230 float64_t
 231  softfloat_mulAddF64( int, uint_fast64_t, uint_fast64_t, uint_fast64_t );
 232