5e6fd76a2c55d1761dbc4630cc8ec96294a02d1d
[riscv-isa-sim.git] / softfloat / internals.h
1
2 /*** UPDATE COMMENTS. ***/
3
4 #include "softfloat_types.h"
5
6 union ui32_f32 { uint32_t ui; float32_t f; };
7 union ui64_f64 { uint64_t ui; float64_t f; };
8 #ifdef LITTLEENDIAN
9 union ui128_f128 { uint64_t ui0, ui64; float128_t f; };
10 #else
11 union ui128_f128 { uint64_t ui64, ui0; float128_t f; };
12 #endif
13
14 enum {
15 softfloat_mulAdd_subC = 1,
16 softfloat_mulAdd_subProd = 2
17 };
18
19 uint_fast32_t
20 softfloat_roundPackToUI32( bool, uint_fast64_t, int_fast8_t, bool );
21 uint_fast64_t
22 softfloat_roundPackToUI64(
23 bool, uint_fast64_t, uint_fast64_t, int_fast8_t, bool );
24 /*----------------------------------------------------------------------------
25 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
26 | and 7, and returns the properly rounded 32-bit integer corresponding to the
27 | input. If `zSign' is 1, the input is negated before being converted to an
28 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
29 | is simply rounded to an integer, with the inexact exception raised if the
30 | input cannot be represented exactly as an integer. However, if the fixed-
31 | point input is too large, the invalid exception is raised and the largest
32 | positive or negative integer is returned.
33 *----------------------------------------------------------------------------*/
34 int_fast32_t
35 softfloat_roundPackToI32( bool, uint_fast64_t, int_fast8_t, bool );
36 /*----------------------------------------------------------------------------
37 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
38 | `absZ1', with binary point between bits 63 and 64 (between the input words),
39 | and returns the properly rounded 64-bit integer corresponding to the input.
40 | If `zSign' is 1, the input is negated before being converted to an integer.
41 | Ordinarily, the fixed-point input is simply rounded to an integer, with
42 | the inexact exception raised if the input cannot be represented exactly as
43 | an integer. However, if the fixed-point input is too large, the invalid
44 | exception is raised and the largest positive or negative integer is
45 | returned.
46 *----------------------------------------------------------------------------*/
47 int_fast64_t
48 softfloat_roundPackToI64(
49 bool, uint_fast64_t, uint_fast64_t, int_fast8_t, bool );
50
51 /*----------------------------------------------------------------------------
52 | Returns 1 if the single-precision floating-point value `a' is a NaN;
53 | otherwise, returns 0.
54 *----------------------------------------------------------------------------*/
55 #define isNaNF32UI( ui ) (0xFF000000<(uint32_t)((uint_fast32_t)(ui)<<1))
56 /*----------------------------------------------------------------------------
57 | Returns the sign bit of the single-precision floating-point value `a'.
58 *----------------------------------------------------------------------------*/
59 #define signF32UI( a ) ((bool)((uint32_t)(a)>>31))
60 /*----------------------------------------------------------------------------
61 | Returns the exponent bits of the single-precision floating-point value `a'.
62 *----------------------------------------------------------------------------*/
63 #define expF32UI( a ) ((int_fast16_t)((a)>>23)&0xFF)
64 /*----------------------------------------------------------------------------
65 | Returns the fraction bits of the single-precision floating-point value `a'.
66 *----------------------------------------------------------------------------*/
67 #define fracF32UI( a ) ((a)&0x007FFFFF)
68 /*----------------------------------------------------------------------------
69 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
70 | single-precision floating-point value, returning the result. After being
71 | shifted into the proper positions, the three fields are simply added
72 | together to form the result. This means that any integer portion of `zSig'
73 | will be added into the exponent. Since a properly normalized significand
74 | will have an integer portion equal to 1, the `zExp' input should be 1 less
75 | than the desired result exponent whenever `zSig' is a complete, normalized
76 | significand.
77 *----------------------------------------------------------------------------*/
78 #define packToF32UI( sign, exp, sig ) (((uint32_t)(sign)<<31)+((uint32_t)(exp)<<23)+(sig))
79
80 /*----------------------------------------------------------------------------
81 | Normalizes the subnormal single-precision floating-point value represented
82 | by the denormalized significand `aSig'. The normalized exponent and
83 | significand are stored at the locations pointed to by `zExpPtr' and
84 | `zSigPtr', respectively.
85 *----------------------------------------------------------------------------*/
86 struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; };
87 struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t );
88
89 /*----------------------------------------------------------------------------
90 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
91 | and significand `zSig', and returns the proper single-precision floating-
92 | point value corresponding to the abstract input. Ordinarily, the abstract
93 | value is simply rounded and packed into the single-precision format, with
94 | the inexact exception raised if the abstract input cannot be represented
95 | exactly. However, if the abstract value is too large, the overflow and
96 | inexact exceptions are raised and an infinity or maximal finite value is
97 | returned. If the abstract value is too small, the input value is rounded to
98 | a subnormal number, and the underflow and inexact exceptions are raised if
99 | the abstract input cannot be represented exactly as a subnormal single-
100 | precision floating-point number.
101 | The input significand `zSig' has its binary point between bits 30
102 | and 29, which is 7 bits to the left of the usual location. This shifted
103 | significand must be normalized or smaller. If `zSig' is not normalized,
104 | `zExp' must be 0; in that case, the result returned is a subnormal number,
105 | and it must not require rounding. In the usual case that `zSig' is
106 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
107 | The handling of underflow and overflow follows the IEC/IEEE Standard for
108 | Binary Floating-Point Arithmetic.
109 *----------------------------------------------------------------------------*/
110 float32_t softfloat_roundPackToF32( bool, int_fast16_t, uint_fast32_t );
111 /*----------------------------------------------------------------------------
112 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
113 | and significand `zSig', and returns the proper single-precision floating-
114 | point value corresponding to the abstract input. This routine is just like
115 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
116 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
117 | floating-point exponent.
118 *----------------------------------------------------------------------------*/
119 float32_t softfloat_normRoundPackToF32( bool, int_fast16_t, uint_fast32_t );
120
121 /*----------------------------------------------------------------------------
122 | Returns the result of adding the absolute values of the single-precision
123 | floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
124 | before being returned. `zSign' is ignored if the result is a NaN.
125 | The addition is performed according to the IEC/IEEE Standard for Binary
126 | Floating-Point Arithmetic.
127 *----------------------------------------------------------------------------*/
128 float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t, bool );
129 /*----------------------------------------------------------------------------
130 | Returns the result of subtracting the absolute values of the single-
131 | precision floating-point values `a' and `b'. If `zSign' is 1, the
132 | difference is negated before being returned. `zSign' is ignored if the
133 | result is a NaN. The subtraction is performed according to the IEC/IEEE
134 | Standard for Binary Floating-Point Arithmetic.
135 *----------------------------------------------------------------------------*/
136 float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t, bool );
137 /*----------------------------------------------------------------------------
138 *----------------------------------------------------------------------------*/
139 float32_t
140 softfloat_mulAddF32( int, uint_fast32_t, uint_fast32_t, uint_fast32_t );
141
142 /*----------------------------------------------------------------------------
143 | Returns 1 if the double-precision floating-point value `a' is a NaN;
144 | otherwise, returns 0.
145 *----------------------------------------------------------------------------*/
146 #define isNaNF64UI( ui ) (UINT64_C(0xFFE0000000000000)<(uint64_t)((uint_fast64_t)(ui)<<1))
147 /*----------------------------------------------------------------------------
148 | Returns the sign bit of the double-precision floating-point value `a'.
149 *----------------------------------------------------------------------------*/
150 #define signF64UI( a ) ((bool)((uint64_t)(a)>>63))
151 /*----------------------------------------------------------------------------
152 | Returns the exponent bits of the double-precision floating-point value `a'.
153 *----------------------------------------------------------------------------*/
154 #define expF64UI( a ) ((int_fast16_t)((a)>>52)&0x7FF)
155 /*----------------------------------------------------------------------------
156 | Returns the fraction bits of the double-precision floating-point value `a'.
157 *----------------------------------------------------------------------------*/
158 #define fracF64UI( a ) ((a)&UINT64_C(0x000FFFFFFFFFFFFF))
159 /*----------------------------------------------------------------------------
160 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
161 | double-precision floating-point value, returning the result. After being
162 | shifted into the proper positions, the three fields are simply added
163 | together to form the result. This means that any integer portion of `zSig'
164 | will be added into the exponent. Since a properly normalized significand
165 | will have an integer portion equal to 1, the `zExp' input should be 1 less
166 | than the desired result exponent whenever `zSig' is a complete, normalized
167 | significand.
168 *----------------------------------------------------------------------------*/
169 #define packToF64UI( sign, exp, sig ) (((uint64_t)(sign)<<63)+((uint64_t)(exp)<<52)+(sig))
170
171 /*----------------------------------------------------------------------------
172 | Normalizes the subnormal double-precision floating-point value represented
173 | by the denormalized significand `aSig'. The normalized exponent and
174 | significand are stored at the locations pointed to by `zExpPtr' and
175 | `zSigPtr', respectively.
176 *----------------------------------------------------------------------------*/
177 struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };
178 struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t );
179
180 /*----------------------------------------------------------------------------
181 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
182 | and significand `zSig', and returns the proper double-precision floating-
183 | point value corresponding to the abstract input. Ordinarily, the abstract
184 | value is simply rounded and packed into the double-precision format, with
185 | the inexact exception raised if the abstract input cannot be represented
186 | exactly. However, if the abstract value is too large, the overflow and
187 | inexact exceptions are raised and an infinity or maximal finite value is
188 | returned. If the abstract value is too small, the input value is rounded
189 | to a subnormal number, and the underflow and inexact exceptions are raised
190 | if the abstract input cannot be represented exactly as a subnormal double-
191 | precision floating-point number.
192 | The input significand `zSig' has its binary point between bits 62
193 | and 61, which is 10 bits to the left of the usual location. This shifted
194 | significand must be normalized or smaller. If `zSig' is not normalized,
195 | `zExp' must be 0; in that case, the result returned is a subnormal number,
196 | and it must not require rounding. In the usual case that `zSig' is
197 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
198 | The handling of underflow and overflow follows the IEC/IEEE Standard for
199 | Binary Floating-Point Arithmetic.
200 *----------------------------------------------------------------------------*/
201 float64_t softfloat_roundPackToF64( bool, int_fast16_t, uint_fast64_t );
202 /*----------------------------------------------------------------------------
203 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
204 | and significand `zSig', and returns the proper double-precision floating-
205 | point value corresponding to the abstract input. This routine is just like
206 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
207 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
208 | floating-point exponent.
209 *----------------------------------------------------------------------------*/
210 float64_t softfloat_normRoundPackToF64( bool, int_fast16_t, uint_fast64_t );
211
212 /*----------------------------------------------------------------------------
213 | Returns the result of adding the absolute values of the double-precision
214 | floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
215 | before being returned. `zSign' is ignored if the result is a NaN.
216 | The addition is performed according to the IEC/IEEE Standard for Binary
217 | Floating-Point Arithmetic.
218 *----------------------------------------------------------------------------*/
219 float64_t softfloat_addMagsF64( uint_fast64_t, uint_fast64_t, bool );
220 /*----------------------------------------------------------------------------
221 | Returns the result of subtracting the absolute values of the double-
222 | precision floating-point values `a' and `b'. If `zSign' is 1, the
223 | difference is negated before being returned. `zSign' is ignored if the
224 | result is a NaN. The subtraction is performed according to the IEC/IEEE
225 | Standard for Binary Floating-Point Arithmetic.
226 *----------------------------------------------------------------------------*/
227 float64_t softfloat_subMagsF64( uint_fast64_t, uint_fast64_t, bool );
228 /*----------------------------------------------------------------------------
229 *----------------------------------------------------------------------------*/
230 float64_t
231 softfloat_mulAddF64( int, uint_fast64_t, uint_fast64_t, uint_fast64_t );
232