cavatools: initialize repository
[cavatools.git] / softfloat / source / include / softfloat.h
1
2 /*============================================================================
3
4 This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
5 Package, Release 3e, by John R. Hauser.
6
7 Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
8 University of California. All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 1. Redistributions of source code must retain the above copyright notice,
14 this list of conditions, and the following disclaimer.
15
16 2. Redistributions in binary form must reproduce the above copyright notice,
17 this list of conditions, and the following disclaimer in the documentation
18 and/or other materials provided with the distribution.
19
20 3. Neither the name of the University nor the names of its contributors may
21 be used to endorse or promote products derived from this software without
22 specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
25 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
27 DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
28 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
31 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35 =============================================================================*/
36
37
38 /*============================================================================
39 | Note: If SoftFloat is made available as a general library for programs to
40 | use, it is strongly recommended that a platform-specific version of this
41 | header, "softfloat.h", be created that folds in "softfloat_types.h" and that
42 | eliminates all dependencies on compile-time macros.
43 *============================================================================*/
44
45
46 #ifndef softfloat_h
47 #define softfloat_h 1
48
49 #include <stdbool.h>
50 #include <stdint.h>
51 #include "softfloat_types.h"
52
53 #ifndef THREAD_LOCAL
54 #define THREAD_LOCAL
55 #endif
56
57 /*----------------------------------------------------------------------------
58 | Software floating-point underflow tininess-detection mode.
59 *----------------------------------------------------------------------------*/
60 extern THREAD_LOCAL uint_fast8_t softfloat_detectTininess;
61 enum {
62 softfloat_tininess_beforeRounding = 0,
63 softfloat_tininess_afterRounding = 1
64 };
65
66 /*----------------------------------------------------------------------------
67 | Software floating-point rounding mode. (Mode "odd" is supported only if
68 | SoftFloat is compiled with macro 'SOFTFLOAT_ROUND_ODD' defined.)
69 *----------------------------------------------------------------------------*/
70 extern THREAD_LOCAL uint_fast8_t softfloat_roundingMode;
71 enum {
72 softfloat_round_near_even = 0,
73 softfloat_round_minMag = 1,
74 softfloat_round_min = 2,
75 softfloat_round_max = 3,
76 softfloat_round_near_maxMag = 4,
77 softfloat_round_odd = 6
78 };
79
80 /*----------------------------------------------------------------------------
81 | Software floating-point exception flags.
82 *----------------------------------------------------------------------------*/
83 extern THREAD_LOCAL uint_fast8_t softfloat_exceptionFlags;
84 enum {
85 softfloat_flag_inexact = 1,
86 softfloat_flag_underflow = 2,
87 softfloat_flag_overflow = 4,
88 softfloat_flag_infinite = 8,
89 softfloat_flag_invalid = 16
90 };
91
92 /*----------------------------------------------------------------------------
93 | Routine to raise any or all of the software floating-point exception flags.
94 *----------------------------------------------------------------------------*/
95 void softfloat_raiseFlags( uint_fast8_t );
96
97 /*----------------------------------------------------------------------------
98 | Integer-to-floating-point conversion routines.
99 *----------------------------------------------------------------------------*/
100 float16_t ui32_to_f16( uint32_t );
101 float32_t ui32_to_f32( uint32_t );
102 float64_t ui32_to_f64( uint32_t );
103 #ifdef SOFTFLOAT_FAST_INT64
104 extFloat80_t ui32_to_extF80( uint32_t );
105 float128_t ui32_to_f128( uint32_t );
106 #endif
107 void ui32_to_extF80M( uint32_t, extFloat80_t * );
108 void ui32_to_f128M( uint32_t, float128_t * );
109 float16_t ui64_to_f16( uint64_t );
110 float32_t ui64_to_f32( uint64_t );
111 float64_t ui64_to_f64( uint64_t );
112 #ifdef SOFTFLOAT_FAST_INT64
113 extFloat80_t ui64_to_extF80( uint64_t );
114 float128_t ui64_to_f128( uint64_t );
115 #endif
116 void ui64_to_extF80M( uint64_t, extFloat80_t * );
117 void ui64_to_f128M( uint64_t, float128_t * );
118 float16_t i32_to_f16( int32_t );
119 float32_t i32_to_f32( int32_t );
120 float64_t i32_to_f64( int32_t );
121 #ifdef SOFTFLOAT_FAST_INT64
122 extFloat80_t i32_to_extF80( int32_t );
123 float128_t i32_to_f128( int32_t );
124 #endif
125 void i32_to_extF80M( int32_t, extFloat80_t * );
126 void i32_to_f128M( int32_t, float128_t * );
127 float16_t i64_to_f16( int64_t );
128 float32_t i64_to_f32( int64_t );
129 float64_t i64_to_f64( int64_t );
130 #ifdef SOFTFLOAT_FAST_INT64
131 extFloat80_t i64_to_extF80( int64_t );
132 float128_t i64_to_f128( int64_t );
133 #endif
134 void i64_to_extF80M( int64_t, extFloat80_t * );
135 void i64_to_f128M( int64_t, float128_t * );
136
137 /*----------------------------------------------------------------------------
138 | 16-bit (half-precision) floating-point operations.
139 *----------------------------------------------------------------------------*/
140 uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool );
141 uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool );
142 int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool );
143 int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool );
144 uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool );
145 uint_fast64_t f16_to_ui64_r_minMag( float16_t, bool );
146 int_fast32_t f16_to_i32_r_minMag( float16_t, bool );
147 int_fast64_t f16_to_i64_r_minMag( float16_t, bool );
148 float32_t f16_to_f32( float16_t );
149 float64_t f16_to_f64( float16_t );
150 #ifdef SOFTFLOAT_FAST_INT64
151 extFloat80_t f16_to_extF80( float16_t );
152 float128_t f16_to_f128( float16_t );
153 #endif
154 void f16_to_extF80M( float16_t, extFloat80_t * );
155 void f16_to_f128M( float16_t, float128_t * );
156 float16_t f16_roundToInt( float16_t, uint_fast8_t, bool );
157 float16_t f16_add( float16_t, float16_t );
158 float16_t f16_sub( float16_t, float16_t );
159 float16_t f16_mul( float16_t, float16_t );
160 float16_t f16_mulAdd( float16_t, float16_t, float16_t );
161 float16_t f16_div( float16_t, float16_t );
162 float16_t f16_rem( float16_t, float16_t );
163 float16_t f16_sqrt( float16_t );
164 bool f16_eq( float16_t, float16_t );
165 bool f16_le( float16_t, float16_t );
166 bool f16_lt( float16_t, float16_t );
167 bool f16_eq_signaling( float16_t, float16_t );
168 bool f16_le_quiet( float16_t, float16_t );
169 bool f16_lt_quiet( float16_t, float16_t );
170 bool f16_isSignalingNaN( float16_t );
171
172 /*----------------------------------------------------------------------------
173 | 32-bit (single-precision) floating-point operations.
174 *----------------------------------------------------------------------------*/
175 uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );
176 uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool );
177 int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
178 int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool );
179 uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
180 uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
181 int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
182 int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
183 float16_t f32_to_f16( float32_t );
184 float64_t f32_to_f64( float32_t );
185 #ifdef SOFTFLOAT_FAST_INT64
186 extFloat80_t f32_to_extF80( float32_t );
187 float128_t f32_to_f128( float32_t );
188 #endif
189 void f32_to_extF80M( float32_t, extFloat80_t * );
190 void f32_to_f128M( float32_t, float128_t * );
191 float32_t f32_roundToInt( float32_t, uint_fast8_t, bool );
192 float32_t f32_add( float32_t, float32_t );
193 float32_t f32_sub( float32_t, float32_t );
194 float32_t f32_mul( float32_t, float32_t );
195 float32_t f32_mulAdd( float32_t, float32_t, float32_t );
196 float32_t f32_div( float32_t, float32_t );
197 float32_t f32_rem( float32_t, float32_t );
198 float32_t f32_sqrt( float32_t );
199 bool f32_eq( float32_t, float32_t );
200 bool f32_le( float32_t, float32_t );
201 bool f32_lt( float32_t, float32_t );
202 bool f32_eq_signaling( float32_t, float32_t );
203 bool f32_le_quiet( float32_t, float32_t );
204 bool f32_lt_quiet( float32_t, float32_t );
205 bool f32_isSignalingNaN( float32_t );
206
207 /*----------------------------------------------------------------------------
208 | 64-bit (double-precision) floating-point operations.
209 *----------------------------------------------------------------------------*/
210 uint_fast32_t f64_to_ui32( float64_t, uint_fast8_t, bool );
211 uint_fast64_t f64_to_ui64( float64_t, uint_fast8_t, bool );
212 int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
213 int_fast64_t f64_to_i64( float64_t, uint_fast8_t, bool );
214 uint_fast32_t f64_to_ui32_r_minMag( float64_t, bool );
215 uint_fast64_t f64_to_ui64_r_minMag( float64_t, bool );
216 int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
217 int_fast64_t f64_to_i64_r_minMag( float64_t, bool );
218 float16_t f64_to_f16( float64_t );
219 float32_t f64_to_f32( float64_t );
220 #ifdef SOFTFLOAT_FAST_INT64
221 extFloat80_t f64_to_extF80( float64_t );
222 float128_t f64_to_f128( float64_t );
223 #endif
224 void f64_to_extF80M( float64_t, extFloat80_t * );
225 void f64_to_f128M( float64_t, float128_t * );
226 float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
227 float64_t f64_add( float64_t, float64_t );
228 float64_t f64_sub( float64_t, float64_t );
229 float64_t f64_mul( float64_t, float64_t );
230 float64_t f64_mulAdd( float64_t, float64_t, float64_t );
231 float64_t f64_div( float64_t, float64_t );
232 float64_t f64_rem( float64_t, float64_t );
233 float64_t f64_sqrt( float64_t );
234 bool f64_eq( float64_t, float64_t );
235 bool f64_le( float64_t, float64_t );
236 bool f64_lt( float64_t, float64_t );
237 bool f64_eq_signaling( float64_t, float64_t );
238 bool f64_le_quiet( float64_t, float64_t );
239 bool f64_lt_quiet( float64_t, float64_t );
240 bool f64_isSignalingNaN( float64_t );
241
242 /*----------------------------------------------------------------------------
243 | Rounding precision for 80-bit extended double-precision floating-point.
244 | Valid values are 32, 64, and 80.
245 *----------------------------------------------------------------------------*/
246 extern THREAD_LOCAL uint_fast8_t extF80_roundingPrecision;
247
248 /*----------------------------------------------------------------------------
249 | 80-bit extended double-precision floating-point operations.
250 *----------------------------------------------------------------------------*/
251 #ifdef SOFTFLOAT_FAST_INT64
252 uint_fast32_t extF80_to_ui32( extFloat80_t, uint_fast8_t, bool );
253 uint_fast64_t extF80_to_ui64( extFloat80_t, uint_fast8_t, bool );
254 int_fast32_t extF80_to_i32( extFloat80_t, uint_fast8_t, bool );
255 int_fast64_t extF80_to_i64( extFloat80_t, uint_fast8_t, bool );
256 uint_fast32_t extF80_to_ui32_r_minMag( extFloat80_t, bool );
257 uint_fast64_t extF80_to_ui64_r_minMag( extFloat80_t, bool );
258 int_fast32_t extF80_to_i32_r_minMag( extFloat80_t, bool );
259 int_fast64_t extF80_to_i64_r_minMag( extFloat80_t, bool );
260 float16_t extF80_to_f16( extFloat80_t );
261 float32_t extF80_to_f32( extFloat80_t );
262 float64_t extF80_to_f64( extFloat80_t );
263 float128_t extF80_to_f128( extFloat80_t );
264 extFloat80_t extF80_roundToInt( extFloat80_t, uint_fast8_t, bool );
265 extFloat80_t extF80_add( extFloat80_t, extFloat80_t );
266 extFloat80_t extF80_sub( extFloat80_t, extFloat80_t );
267 extFloat80_t extF80_mul( extFloat80_t, extFloat80_t );
268 extFloat80_t extF80_div( extFloat80_t, extFloat80_t );
269 extFloat80_t extF80_rem( extFloat80_t, extFloat80_t );
270 extFloat80_t extF80_sqrt( extFloat80_t );
271 bool extF80_eq( extFloat80_t, extFloat80_t );
272 bool extF80_le( extFloat80_t, extFloat80_t );
273 bool extF80_lt( extFloat80_t, extFloat80_t );
274 bool extF80_eq_signaling( extFloat80_t, extFloat80_t );
275 bool extF80_le_quiet( extFloat80_t, extFloat80_t );
276 bool extF80_lt_quiet( extFloat80_t, extFloat80_t );
277 bool extF80_isSignalingNaN( extFloat80_t );
278 #endif
279 uint_fast32_t extF80M_to_ui32( const extFloat80_t *, uint_fast8_t, bool );
280 uint_fast64_t extF80M_to_ui64( const extFloat80_t *, uint_fast8_t, bool );
281 int_fast32_t extF80M_to_i32( const extFloat80_t *, uint_fast8_t, bool );
282 int_fast64_t extF80M_to_i64( const extFloat80_t *, uint_fast8_t, bool );
283 uint_fast32_t extF80M_to_ui32_r_minMag( const extFloat80_t *, bool );
284 uint_fast64_t extF80M_to_ui64_r_minMag( const extFloat80_t *, bool );
285 int_fast32_t extF80M_to_i32_r_minMag( const extFloat80_t *, bool );
286 int_fast64_t extF80M_to_i64_r_minMag( const extFloat80_t *, bool );
287 float16_t extF80M_to_f16( const extFloat80_t * );
288 float32_t extF80M_to_f32( const extFloat80_t * );
289 float64_t extF80M_to_f64( const extFloat80_t * );
290 void extF80M_to_f128M( const extFloat80_t *, float128_t * );
291 void
292 extF80M_roundToInt(
293 const extFloat80_t *, uint_fast8_t, bool, extFloat80_t * );
294 void extF80M_add( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
295 void extF80M_sub( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
296 void extF80M_mul( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
297 void extF80M_div( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
298 void extF80M_rem( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
299 void extF80M_sqrt( const extFloat80_t *, extFloat80_t * );
300 bool extF80M_eq( const extFloat80_t *, const extFloat80_t * );
301 bool extF80M_le( const extFloat80_t *, const extFloat80_t * );
302 bool extF80M_lt( const extFloat80_t *, const extFloat80_t * );
303 bool extF80M_eq_signaling( const extFloat80_t *, const extFloat80_t * );
304 bool extF80M_le_quiet( const extFloat80_t *, const extFloat80_t * );
305 bool extF80M_lt_quiet( const extFloat80_t *, const extFloat80_t * );
306 bool extF80M_isSignalingNaN( const extFloat80_t * );
307
308 /*----------------------------------------------------------------------------
309 | 128-bit (quadruple-precision) floating-point operations.
310 *----------------------------------------------------------------------------*/
311 #ifdef SOFTFLOAT_FAST_INT64
312 uint_fast32_t f128_to_ui32( float128_t, uint_fast8_t, bool );
313 uint_fast64_t f128_to_ui64( float128_t, uint_fast8_t, bool );
314 int_fast32_t f128_to_i32( float128_t, uint_fast8_t, bool );
315 int_fast64_t f128_to_i64( float128_t, uint_fast8_t, bool );
316 uint_fast32_t f128_to_ui32_r_minMag( float128_t, bool );
317 uint_fast64_t f128_to_ui64_r_minMag( float128_t, bool );
318 int_fast32_t f128_to_i32_r_minMag( float128_t, bool );
319 int_fast64_t f128_to_i64_r_minMag( float128_t, bool );
320 float16_t f128_to_f16( float128_t );
321 float32_t f128_to_f32( float128_t );
322 float64_t f128_to_f64( float128_t );
323 extFloat80_t f128_to_extF80( float128_t );
324 float128_t f128_roundToInt( float128_t, uint_fast8_t, bool );
325 float128_t f128_add( float128_t, float128_t );
326 float128_t f128_sub( float128_t, float128_t );
327 float128_t f128_mul( float128_t, float128_t );
328 float128_t f128_mulAdd( float128_t, float128_t, float128_t );
329 float128_t f128_div( float128_t, float128_t );
330 float128_t f128_rem( float128_t, float128_t );
331 float128_t f128_sqrt( float128_t );
332 bool f128_eq( float128_t, float128_t );
333 bool f128_le( float128_t, float128_t );
334 bool f128_lt( float128_t, float128_t );
335 bool f128_eq_signaling( float128_t, float128_t );
336 bool f128_le_quiet( float128_t, float128_t );
337 bool f128_lt_quiet( float128_t, float128_t );
338 bool f128_isSignalingNaN( float128_t );
339 #endif
340 uint_fast32_t f128M_to_ui32( const float128_t *, uint_fast8_t, bool );
341 uint_fast64_t f128M_to_ui64( const float128_t *, uint_fast8_t, bool );
342 int_fast32_t f128M_to_i32( const float128_t *, uint_fast8_t, bool );
343 int_fast64_t f128M_to_i64( const float128_t *, uint_fast8_t, bool );
344 uint_fast32_t f128M_to_ui32_r_minMag( const float128_t *, bool );
345 uint_fast64_t f128M_to_ui64_r_minMag( const float128_t *, bool );
346 int_fast32_t f128M_to_i32_r_minMag( const float128_t *, bool );
347 int_fast64_t f128M_to_i64_r_minMag( const float128_t *, bool );
348 float16_t f128M_to_f16( const float128_t * );
349 float32_t f128M_to_f32( const float128_t * );
350 float64_t f128M_to_f64( const float128_t * );
351 void f128M_to_extF80M( const float128_t *, extFloat80_t * );
352 void f128M_roundToInt( const float128_t *, uint_fast8_t, bool, float128_t * );
353 void f128M_add( const float128_t *, const float128_t *, float128_t * );
354 void f128M_sub( const float128_t *, const float128_t *, float128_t * );
355 void f128M_mul( const float128_t *, const float128_t *, float128_t * );
356 void
357 f128M_mulAdd(
358 const float128_t *, const float128_t *, const float128_t *, float128_t *
359 );
360 void f128M_div( const float128_t *, const float128_t *, float128_t * );
361 void f128M_rem( const float128_t *, const float128_t *, float128_t * );
362 void f128M_sqrt( const float128_t *, float128_t * );
363 bool f128M_eq( const float128_t *, const float128_t * );
364 bool f128M_le( const float128_t *, const float128_t * );
365 bool f128M_lt( const float128_t *, const float128_t * );
366 bool f128M_eq_signaling( const float128_t *, const float128_t * );
367 bool f128M_le_quiet( const float128_t *, const float128_t * );
368 bool f128M_lt_quiet( const float128_t *, const float128_t * );
369 bool f128M_isSignalingNaN( const float128_t * );
370
371 #endif
372