softfloat/softfloat-specialize

   1
   2 /*============================================================================
   3
   4 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
   5 Arithmetic Package, Release 2b.
   6
   7 Written by John R. Hauser.  This work was made possible in part by the
   8 International Computer Science Institute, located at Suite 600, 1947 Center
   9 Street, Berkeley, California 94704.  Funding was partially provided by the
  10 National Science Foundation under grant MIP-9311980.  The original version
  11 of this code was written as part of a project to build a fixed-point vector
  12 processor in collaboration with the University of California at Berkeley,
  13 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  14 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
  15 arithmetic/SoftFloat.html'.
  16
  17 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
  18 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
  19 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
  20 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  21 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  22 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  23 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  24 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  25
  26 Derivative works are acceptable, even for commercial purposes, so long as
  27 (1) the source code for the derivative work includes prominent notice that
  28 the work is derivative, and (2) the source code includes prominent notice with
  29 these four paragraphs for those parts of this code that are retained.
  30
  31 =============================================================================*/
  32
  33 /*----------------------------------------------------------------------------
  34 | Underflow tininess-detection mode, statically initialized to default value.
  35 | (The declaration in `softfloat.h' must match the `int8' type here.)
  36 *----------------------------------------------------------------------------*/
  37 int8 float_detect_tininess = float_tininess_before_rounding;
  38
  39 /*----------------------------------------------------------------------------
  40 | Raises the exceptions specified by `flags'.  Floating-point traps can be
  41 | defined here if desired.  It is currently not possible for such a trap
  42 | to substitute a result value.  If traps are not implemented, this routine
  43 | should be simply `float_exception_flags |= flags;'.
  44 *----------------------------------------------------------------------------*/
  45
  46 void float_raise( int8 flags )
  47 {
  48
  49     float_exception_flags |= flags;
  50
  51 }
  52
  53 /*----------------------------------------------------------------------------
  54 | Internal canonical NaN format.
  55 *----------------------------------------------------------------------------*/
  56 typedef struct {
  57     flag sign;
  58     bits64 high, low;
  59 } commonNaNT;
  60
  61 /*----------------------------------------------------------------------------
  62 | The pattern for a default generated single-precision NaN.
  63 *----------------------------------------------------------------------------*/
  64 #define float32_default_nan 0x7FFFFFFF
  65
  66 /*----------------------------------------------------------------------------
  67 | Returns 1 if the single-precision floating-point value `a' is a NaN;
  68 | otherwise returns 0.
  69 *----------------------------------------------------------------------------*/
  70
  71 flag float32_is_nan( float32 a )
  72 {
  73
  74     return ( 0xFF000000 < (bits32) ( a<<1 ) );
  75
  76 }
  77
  78 /*----------------------------------------------------------------------------
  79 | Returns 1 if the single-precision floating-point value `a' is a signaling
  80 | NaN; otherwise returns 0.
  81 *----------------------------------------------------------------------------*/
  82
  83 flag float32_is_signaling_nan( float32 a )
  84 {
  85
  86     return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
  87
  88 }
  89
  90 /*----------------------------------------------------------------------------
  91 | Returns the result of converting the single-precision floating-point NaN
  92 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
  93 | exception is raised.
  94 *----------------------------------------------------------------------------*/
  95
  96 static commonNaNT float32ToCommonNaN( float32 a )
  97 {
  98     commonNaNT z;
  99
 100     if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
 101     z.sign = a>>31;
 102     z.low = 0;
 103     z.high = ( (bits64) a )<<41;
 104     return z;
 105
 106 }
 107
 108 /*----------------------------------------------------------------------------
 109 | Returns the result of converting the canonical NaN `a' to the single-
 110 | precision floating-point format.
 111 *----------------------------------------------------------------------------*/
 112
 113 static float32 commonNaNToFloat32( commonNaNT a )
 114 {
 115
 116     return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
 117
 118 }
 119
 120 /*----------------------------------------------------------------------------
 121 | Takes two single-precision floating-point values `a' and `b', one of which
 122 | is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
 123 | signaling NaN, the invalid exception is raised.
 124 *----------------------------------------------------------------------------*/
 125
 126 static float32 propagateFloat32NaN( float32 a, float32 b )
 127 {
 128     flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
 129
 130     aIsNaN = float32_is_nan( a );
 131     aIsSignalingNaN = float32_is_signaling_nan( a );
 132     bIsNaN = float32_is_nan( b );
 133     bIsSignalingNaN = float32_is_signaling_nan( b );
 134     a |= 0x00400000;
 135     b |= 0x00400000;
 136     if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
 137     return bIsSignalingNaN ? b : aIsSignalingNaN ? a : bIsNaN ? b : a;
 138
 139 }
 140
 141 /*----------------------------------------------------------------------------
 142 | The pattern for a default generated double-precision NaN.
 143 *----------------------------------------------------------------------------*/
 144 #define float64_default_nan LIT64( 0x7FFFFFFFFFFFFFFF )
 145
 146 /*----------------------------------------------------------------------------
 147 | Returns 1 if the double-precision floating-point value `a' is a NaN;
 148 | otherwise returns 0.
 149 *----------------------------------------------------------------------------*/
 150
 151 flag float64_is_nan( float64 a )
 152 {
 153
 154     return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
 155
 156 }
 157
 158 /*----------------------------------------------------------------------------
 159 | Returns 1 if the double-precision floating-point value `a' is a signaling
 160 | NaN; otherwise returns 0.
 161 *----------------------------------------------------------------------------*/
 162
 163 flag float64_is_signaling_nan( float64 a )
 164 {
 165
 166     return
 167            ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
 168         && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
 169
 170 }
 171
 172 /*----------------------------------------------------------------------------
 173 | Returns the result of converting the double-precision floating-point NaN
 174 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 175 | exception is raised.
 176 *----------------------------------------------------------------------------*/
 177
 178 static commonNaNT float64ToCommonNaN( float64 a )
 179 {
 180     commonNaNT z;
 181
 182     if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
 183     z.sign = a>>63;
 184     z.low = 0;
 185     z.high = a<<12;
 186     return z;
 187
 188 }
 189
 190 /*----------------------------------------------------------------------------
 191 | Returns the result of converting the canonical NaN `a' to the double-
 192 | precision floating-point format.
 193 *----------------------------------------------------------------------------*/
 194
 195 static float64 commonNaNToFloat64( commonNaNT a )
 196 {
 197
 198     return
 199           ( ( (bits64) a.sign )<<63 )
 200         | LIT64( 0x7FF8000000000000 )
 201         | ( a.high>>12 );
 202
 203 }
 204
 205 /*----------------------------------------------------------------------------
 206 | Takes two double-precision floating-point values `a' and `b', one of which
 207 | is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
 208 | signaling NaN, the invalid exception is raised.
 209 *----------------------------------------------------------------------------*/
 210
 211 static float64 propagateFloat64NaN( float64 a, float64 b )
 212 {
 213     flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
 214
 215     aIsNaN = float64_is_nan( a );
 216     aIsSignalingNaN = float64_is_signaling_nan( a );
 217     bIsNaN = float64_is_nan( b );
 218     bIsSignalingNaN = float64_is_signaling_nan( b );
 219     a |= LIT64( 0x0008000000000000 );
 220     b |= LIT64( 0x0008000000000000 );
 221     if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
 222     return bIsSignalingNaN ? b : aIsSignalingNaN ? a : bIsNaN ? b : a;
 223
 224 }
 225
 226 #ifdef FLOATX80
 227
 228 /*----------------------------------------------------------------------------
 229 | The pattern for a default generated extended double-precision NaN.  The
 230 | `high' and `low' values hold the most- and least-significant bits,
 231 | respectively.
 232 *----------------------------------------------------------------------------*/
 233 #define floatx80_default_nan_high 0x7FFF
 234 #define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
 235
 236 /*----------------------------------------------------------------------------
 237 | Returns 1 if the extended double-precision floating-point value `a' is a
 238 | NaN; otherwise returns 0.
 239 *----------------------------------------------------------------------------*/
 240
 241 flag floatx80_is_nan( floatx80 a )
 242 {
 243
 244     return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
 245
 246 }
 247
 248 /*----------------------------------------------------------------------------
 249 | Returns 1 if the extended double-precision floating-point value `a' is a
 250 | signaling NaN; otherwise returns 0.
 251 *----------------------------------------------------------------------------*/
 252
 253 flag floatx80_is_signaling_nan( floatx80 a )
 254 {
 255     bits64 aLow;
 256
 257     aLow = a.low & ~ LIT64( 0x4000000000000000 );
 258     return
 259            ( ( a.high & 0x7FFF ) == 0x7FFF )
 260         && (bits64) ( aLow<<1 )
 261         && ( a.low == aLow );
 262
 263 }
 264
 265 /*----------------------------------------------------------------------------
 266 | Returns the result of converting the extended double-precision floating-
 267 | point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
 268 | invalid exception is raised.
 269 *----------------------------------------------------------------------------*/
 270
 271 static commonNaNT floatx80ToCommonNaN( floatx80 a )
 272 {
 273     commonNaNT z;
 274
 275     if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
 276     z.sign = a.high>>15;
 277     z.low = 0;
 278     z.high = a.low<<1;
 279     return z;
 280
 281 }
 282
 283 /*----------------------------------------------------------------------------
 284 | Returns the result of converting the canonical NaN `a' to the extended
 285 | double-precision floating-point format.
 286 *----------------------------------------------------------------------------*/
 287
 288 static floatx80 commonNaNToFloatx80( commonNaNT a )
 289 {
 290     floatx80 z;
 291
 292     z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
 293     z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
 294     return z;
 295
 296 }
 297
 298 /*----------------------------------------------------------------------------
 299 | Takes two extended double-precision floating-point values `a' and `b', one
 300 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
 301 | `b' is a signaling NaN, the invalid exception is raised.
 302 *----------------------------------------------------------------------------*/
 303
 304 static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
 305 {
 306     flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
 307
 308     aIsNaN = floatx80_is_nan( a );
 309     aIsSignalingNaN = floatx80_is_signaling_nan( a );
 310     bIsNaN = floatx80_is_nan( b );
 311     bIsSignalingNaN = floatx80_is_signaling_nan( b );
 312     a.low |= LIT64( 0xC000000000000000 );
 313     b.low |= LIT64( 0xC000000000000000 );
 314     if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
 315     return bIsSignalingNaN ? b : aIsSignalingNaN ? a : bIsNaN ? b : a;
 316
 317 }
 318
 319 #endif
 320
 321 #ifdef FLOAT128
 322
 323 /*----------------------------------------------------------------------------
 324 | The pattern for a default generated quadruple-precision NaN.  The `high' and
 325 | `low' values hold the most- and least-significant bits, respectively.
 326 *----------------------------------------------------------------------------*/
 327 #define float128_default_nan_high LIT64( 0x7FFFFFFFFFFFFFFF )
 328 #define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
 329
 330 /*----------------------------------------------------------------------------
 331 | Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
 332 | otherwise returns 0.
 333 *----------------------------------------------------------------------------*/
 334
 335 flag float128_is_nan( float128 a )
 336 {
 337
 338     return
 339            ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
 340         && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
 341
 342 }
 343
 344 /*----------------------------------------------------------------------------
 345 | Returns 1 if the quadruple-precision floating-point value `a' is a
 346 | signaling NaN; otherwise returns 0.
 347 *----------------------------------------------------------------------------*/
 348
 349 flag float128_is_signaling_nan( float128 a )
 350 {
 351
 352     return
 353            ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
 354         && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
 355
 356 }
 357
 358 /*----------------------------------------------------------------------------
 359 | Returns the result of converting the quadruple-precision floating-point NaN
 360 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 361 | exception is raised.
 362 *----------------------------------------------------------------------------*/
 363
 364 static commonNaNT float128ToCommonNaN( float128 a )
 365 {
 366     commonNaNT z;
 367
 368     if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
 369     z.sign = a.high>>63;
 370     shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
 371     return z;
 372
 373 }
 374
 375 /*----------------------------------------------------------------------------
 376 | Returns the result of converting the canonical NaN `a' to the quadruple-
 377 | precision floating-point format.
 378 *----------------------------------------------------------------------------*/
 379
 380 static float128 commonNaNToFloat128( commonNaNT a )
 381 {
 382     float128 z;
 383
 384     shift128Right( a.high, a.low, 16, &z.high, &z.low );
 385     z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
 386     return z;
 387
 388 }
 389
 390 /*----------------------------------------------------------------------------
 391 | Takes two quadruple-precision floating-point values `a' and `b', one of
 392 | which is a NaN, and returns the appropriate NaN result.  If either `a' or
 393 | `b' is a signaling NaN, the invalid exception is raised.
 394 *----------------------------------------------------------------------------*/
 395
 396 static float128 propagateFloat128NaN( float128 a, float128 b )
 397 {
 398     flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
 399
 400     aIsNaN = float128_is_nan( a );
 401     aIsSignalingNaN = float128_is_signaling_nan( a );
 402     bIsNaN = float128_is_nan( b );
 403     bIsSignalingNaN = float128_is_signaling_nan( b );
 404     a.high |= LIT64( 0x0000800000000000 );
 405     b.high |= LIT64( 0x0000800000000000 );
 406     if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
 407     return bIsSignalingNaN ? b : aIsSignalingNaN ? a : bIsNaN ? b : a;
 408
 409 }
 410
 411 #endif
 412