2 /*============================================================================
4 This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
7 Written by John R. Hauser. This work was made possible in part by the
8 International Computer Science Institute, located at Suite 600, 1947 Center
9 Street, Berkeley, California 94704. Funding was partially provided by the
10 National Science Foundation under grant MIP-9311980. The original version
11 of this code was written as part of a project to build a fixed-point vector
12 processor in collaboration with the University of California at Berkeley,
13 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
14 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 arithmetic/SoftFloat.html'.
17 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
18 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
24 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
26 Derivative works are acceptable, even for commercial purposes, so long as
27 (1) the source code for the derivative work includes prominent notice that
28 the work is derivative, and (2) the source code includes prominent notice with
29 these four paragraphs for those parts of this code that are retained.
31 =============================================================================*/
33 /*----------------------------------------------------------------------------
34 | The macro `FLOATX80' must be defined to enable the extended double-precision
35 | floating-point format `floatx80'. If this macro is not defined, the
36 | `floatx80' type will not be defined, and none of the functions that either
37 | input or output the `floatx80' type will be defined. The same applies to
38 | the `FLOAT128' macro and the quadruple-precision format `float128'.
39 *----------------------------------------------------------------------------*/
43 /*----------------------------------------------------------------------------
44 | Software IEC/IEEE floating-point types.
45 *----------------------------------------------------------------------------*/
46 typedef unsigned int float32
;
47 typedef unsigned long long float64
;
51 unsigned long long low
;
56 unsigned long long high
, low
;
60 /*----------------------------------------------------------------------------
61 | Software IEC/IEEE floating-point underflow tininess-detection mode.
62 *----------------------------------------------------------------------------*/
63 extern int float_detect_tininess
;
65 float_tininess_after_rounding
= 0,
66 float_tininess_before_rounding
= 1
69 /*----------------------------------------------------------------------------
70 | Software IEC/IEEE floating-point rounding mode.
71 *----------------------------------------------------------------------------*/
72 extern int float_rounding_mode
;
74 float_round_nearest_even
= 0,
75 float_round_to_zero
= 1,
80 /*----------------------------------------------------------------------------
81 | Software IEC/IEEE floating-point exception flags.
82 *----------------------------------------------------------------------------*/
83 extern int float_exception_flags
;
85 float_flag_inexact
= 1,
86 float_flag_divbyzero
= 2,
87 float_flag_underflow
= 4,
88 float_flag_overflow
= 8,
89 float_flag_invalid
= 16
92 /*----------------------------------------------------------------------------
93 | Routine to raise any or all of the software IEC/IEEE floating-point
95 *----------------------------------------------------------------------------*/
96 void float_raise( int );
98 /*----------------------------------------------------------------------------
99 | Software IEC/IEEE integer-to-floating-point conversion routines.
100 *----------------------------------------------------------------------------*/
101 float32
int32_to_float32( int );
102 float64
int32_to_float64( int );
104 floatx80
int32_to_floatx80( int );
107 float128
int32_to_float128( int );
109 float32
int64_to_float32( long long );
110 float64
int64_to_float64( long long );
112 floatx80
int64_to_floatx80( long long );
115 float128
int64_to_float128( long long );
118 /*----------------------------------------------------------------------------
119 | Software IEC/IEEE single-precision conversion routines.
120 *----------------------------------------------------------------------------*/
121 int float32_to_int32( float32
);
122 int float32_to_int32_round_to_zero( float32
);
123 long long float32_to_int64( float32
);
124 long long float32_to_int64_round_to_zero( float32
);
125 float64
float32_to_float64( float32
);
127 floatx80
float32_to_floatx80( float32
);
130 float128
float32_to_float128( float32
);
133 /*----------------------------------------------------------------------------
134 | Software IEC/IEEE single-precision operations.
135 *----------------------------------------------------------------------------*/
136 float32
float32_round_to_int( float32
);
137 float32
float32_add( float32
, float32
);
138 float32
float32_sub( float32
, float32
);
139 float32
float32_mul( float32
, float32
);
140 float32
float32_div( float32
, float32
);
141 float32
float32_rem( float32
, float32
);
142 float32
float32_sqrt( float32
);
143 int float32_eq( float32
, float32
);
144 int float32_le( float32
, float32
);
145 int float32_lt( float32
, float32
);
146 int float32_eq_signaling( float32
, float32
);
147 int float32_le_quiet( float32
, float32
);
148 int float32_lt_quiet( float32
, float32
);
149 int float32_is_signaling_nan( float32
);
151 /*----------------------------------------------------------------------------
152 | Software IEC/IEEE double-precision conversion routines.
153 *----------------------------------------------------------------------------*/
154 int float64_to_int32( float64
);
155 int float64_to_int32_round_to_zero( float64
);
156 long long float64_to_int64( float64
);
157 long long float64_to_int64_round_to_zero( float64
);
158 float32
float64_to_float32( float64
);
160 floatx80
float64_to_floatx80( float64
);
163 float128
float64_to_float128( float64
);
166 /*----------------------------------------------------------------------------
167 | Software IEC/IEEE double-precision operations.
168 *----------------------------------------------------------------------------*/
169 float64
float64_round_to_int( float64
);
170 float64
float64_add( float64
, float64
);
171 float64
float64_sub( float64
, float64
);
172 float64
float64_mul( float64
, float64
);
173 float64
float64_div( float64
, float64
);
174 float64
float64_rem( float64
, float64
);
175 float64
float64_sqrt( float64
);
176 int float64_eq( float64
, float64
);
177 int float64_le( float64
, float64
);
178 int float64_lt( float64
, float64
);
179 int float64_eq_signaling( float64
, float64
);
180 int float64_le_quiet( float64
, float64
);
181 int float64_lt_quiet( float64
, float64
);
182 int float64_is_signaling_nan( float64
);
186 /*----------------------------------------------------------------------------
187 | Software IEC/IEEE extended double-precision conversion routines.
188 *----------------------------------------------------------------------------*/
189 int floatx80_to_int32( floatx80
);
190 int floatx80_to_int32_round_to_zero( floatx80
);
191 long long floatx80_to_int64( floatx80
);
192 long long floatx80_to_int64_round_to_zero( floatx80
);
193 float32
floatx80_to_float32( floatx80
);
194 float64
floatx80_to_float64( floatx80
);
196 float128
floatx80_to_float128( floatx80
);
199 /*----------------------------------------------------------------------------
200 | Software IEC/IEEE extended double-precision rounding precision. Valid
201 | values are 32, 64, and 80.
202 *----------------------------------------------------------------------------*/
203 extern int floatx80_rounding_precision
;
205 /*----------------------------------------------------------------------------
206 | Software IEC/IEEE extended double-precision operations.
207 *----------------------------------------------------------------------------*/
208 floatx80
floatx80_round_to_int( floatx80
);
209 floatx80
floatx80_add( floatx80
, floatx80
);
210 floatx80
floatx80_sub( floatx80
, floatx80
);
211 floatx80
floatx80_mul( floatx80
, floatx80
);
212 floatx80
floatx80_div( floatx80
, floatx80
);
213 floatx80
floatx80_rem( floatx80
, floatx80
);
214 floatx80
floatx80_sqrt( floatx80
);
215 int floatx80_eq( floatx80
, floatx80
);
216 int floatx80_le( floatx80
, floatx80
);
217 int floatx80_lt( floatx80
, floatx80
);
218 int floatx80_eq_signaling( floatx80
, floatx80
);
219 int floatx80_le_quiet( floatx80
, floatx80
);
220 int floatx80_lt_quiet( floatx80
, floatx80
);
221 int floatx80_is_signaling_nan( floatx80
);
227 /*----------------------------------------------------------------------------
228 | Software IEC/IEEE quadruple-precision conversion routines.
229 *----------------------------------------------------------------------------*/
230 int float128_to_int32( float128
);
231 int float128_to_int32_round_to_zero( float128
);
232 long long float128_to_int64( float128
);
233 long long float128_to_int64_round_to_zero( float128
);
234 float32
float128_to_float32( float128
);
235 float64
float128_to_float64( float128
);
237 floatx80
float128_to_floatx80( float128
);
240 /*----------------------------------------------------------------------------
241 | Software IEC/IEEE quadruple-precision operations.
242 *----------------------------------------------------------------------------*/
243 float128
float128_round_to_int( float128
);
244 float128
float128_add( float128
, float128
);
245 float128
float128_sub( float128
, float128
);
246 float128
float128_mul( float128
, float128
);
247 float128
float128_div( float128
, float128
);
248 float128
float128_rem( float128
, float128
);
249 float128
float128_sqrt( float128
);
250 int float128_eq( float128
, float128
);
251 int float128_le( float128
, float128
);
252 int float128_lt( float128
, float128
);
253 int float128_eq_signaling( float128
, float128
);
254 int float128_le_quiet( float128
, float128
);
255 int float128_lt_quiet( float128
, float128
);
256 int float128_is_signaling_nan( float128
);