cavatools: initialize repository
[cavatools.git] / softfloat / source / f128M_sqrt.c
1
2 /*============================================================================
3
4 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
5 Package, Release 3e, by John R. Hauser.
6
7 Copyright 2011, 2012, 2013, 2014, 2017 The Regents of the University of
8 California. All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 1. Redistributions of source code must retain the above copyright notice,
14 this list of conditions, and the following disclaimer.
15
16 2. Redistributions in binary form must reproduce the above copyright notice,
17 this list of conditions, and the following disclaimer in the documentation
18 and/or other materials provided with the distribution.
19
20 3. Neither the name of the University nor the names of its contributors may
21 be used to endorse or promote products derived from this software without
22 specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
25 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
27 DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
28 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
31 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35 =============================================================================*/
36
37 #include <stdbool.h>
38 #include <stdint.h>
39 #include "platform.h"
40 #include "internals.h"
41 #include "specialize.h"
42 #include "softfloat.h"
43
44 #ifdef SOFTFLOAT_FAST_INT64
45
46 void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
47 {
48
49 *zPtr = f128_sqrt( *aPtr );
50
51 }
52
53 #else
54
55 void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
56 {
57 const uint32_t *aWPtr;
58 uint32_t *zWPtr;
59 uint32_t uiA96;
60 bool signA;
61 int32_t rawExpA;
62 uint32_t rem[6];
63 int32_t expA, expZ;
64 uint64_t rem64;
65 uint32_t sig32A, recipSqrt32, sig32Z, qs[3], q;
66 uint64_t sig64Z;
67 uint32_t term[5];
68 uint64_t x64;
69 uint32_t y[5], rem32;
70
71 /*------------------------------------------------------------------------
72 *------------------------------------------------------------------------*/
73 aWPtr = (const uint32_t *) aPtr;
74 zWPtr = (uint32_t *) zPtr;
75 /*------------------------------------------------------------------------
76 *------------------------------------------------------------------------*/
77 uiA96 = aWPtr[indexWordHi( 4 )];
78 signA = signF128UI96( uiA96 );
79 rawExpA = expF128UI96( uiA96 );
80 /*------------------------------------------------------------------------
81 *------------------------------------------------------------------------*/
82 if ( rawExpA == 0x7FFF ) {
83 if (
84 fracF128UI96( uiA96 )
85 || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
86 | aWPtr[indexWord( 4, 0 )])
87 ) {
88 softfloat_propagateNaNF128M( aWPtr, 0, zWPtr );
89 return;
90 }
91 if ( ! signA ) goto copyA;
92 goto invalid;
93 }
94 /*------------------------------------------------------------------------
95 *------------------------------------------------------------------------*/
96 expA = softfloat_shiftNormSigF128M( aWPtr, 13 - (rawExpA & 1), rem );
97 if ( expA == -128 ) goto copyA;
98 if ( signA ) goto invalid;
99 /*------------------------------------------------------------------------
100 | (`sig32Z' is guaranteed to be a lower bound on the square root of
101 | `sig32A', which makes `sig32Z' also a lower bound on the square root of
102 | `sigA'.)
103 *------------------------------------------------------------------------*/
104 expZ = ((expA - 0x3FFF)>>1) + 0x3FFE;
105 expA &= 1;
106 rem64 = (uint64_t) rem[indexWord( 4, 3 )]<<32 | rem[indexWord( 4, 2 )];
107 if ( expA ) {
108 if ( ! rawExpA ) {
109 softfloat_shortShiftRight128M( rem, 1, rem );
110 rem64 >>= 1;
111 }
112 sig32A = rem64>>29;
113 } else {
114 sig32A = rem64>>30;
115 }
116 recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
117 sig32Z = ((uint64_t) sig32A * recipSqrt32)>>32;
118 if ( expA ) sig32Z >>= 1;
119 qs[2] = sig32Z;
120 rem64 -= (uint64_t) sig32Z * sig32Z;
121 rem[indexWord( 4, 3 )] = rem64>>32;
122 rem[indexWord( 4, 2 )] = rem64;
123 /*------------------------------------------------------------------------
124 *------------------------------------------------------------------------*/
125 q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
126 sig64Z = ((uint64_t) sig32Z<<32) + ((uint64_t) q<<3);
127 term[indexWord( 4, 3 )] = 0;
128 term[indexWord( 4, 0 )] = 0;
129 /*------------------------------------------------------------------------
130 | (Repeating this loop is a rare occurrence.)
131 *------------------------------------------------------------------------*/
132 for (;;) {
133 x64 = ((uint64_t) sig32Z<<32) + sig64Z;
134 term[indexWord( 4, 2 )] = x64>>32;
135 term[indexWord( 4, 1 )] = x64;
136 softfloat_remStep128MBy32( rem, 29, term, q, y );
137 rem32 = y[indexWord( 4, 3 )];
138 if ( ! (rem32 & 0x80000000) ) break;
139 --q;
140 sig64Z -= 1<<3;
141 }
142 qs[1] = q;
143 rem64 = (uint64_t) rem32<<32 | y[indexWord( 4, 2 )];
144 /*------------------------------------------------------------------------
145 *------------------------------------------------------------------------*/
146 q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
147 if ( rem64>>34 ) q += recipSqrt32;
148 sig64Z <<= 1;
149 /*------------------------------------------------------------------------
150 | (Repeating this loop is a rare occurrence.)
151 *------------------------------------------------------------------------*/
152 for (;;) {
153 x64 = sig64Z + (q>>26);
154 term[indexWord( 4, 2 )] = x64>>32;
155 term[indexWord( 4, 1 )] = x64;
156 term[indexWord( 4, 0 )] = q<<6;
157 softfloat_remStep128MBy32(
158 y, 29, term, q, &rem[indexMultiwordHi( 6, 4 )] );
159 rem32 = rem[indexWordHi( 6 )];
160 if ( ! (rem32 & 0x80000000) ) break;
161 --q;
162 }
163 qs[0] = q;
164 rem64 = (uint64_t) rem32<<32 | rem[indexWord( 6, 4 )];
165 /*------------------------------------------------------------------------
166 *------------------------------------------------------------------------*/
167 q = (((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32) + 2;
168 if ( rem64>>34 ) q += recipSqrt32;
169 x64 = (uint64_t) q<<27;
170 y[indexWord( 5, 0 )] = x64;
171 x64 = ((uint64_t) qs[0]<<24) + (x64>>32);
172 y[indexWord( 5, 1 )] = x64;
173 x64 = ((uint64_t) qs[1]<<21) + (x64>>32);
174 y[indexWord( 5, 2 )] = x64;
175 x64 = ((uint64_t) qs[2]<<18) + (x64>>32);
176 y[indexWord( 5, 3 )] = x64;
177 y[indexWord( 5, 4 )] = x64>>32;
178 /*------------------------------------------------------------------------
179 *------------------------------------------------------------------------*/
180 if ( (q & 0xF) <= 2 ) {
181 q &= ~3;
182 y[indexWordLo( 5 )] = q<<27;
183 term[indexWord( 5, 4 )] = 0;
184 term[indexWord( 5, 3 )] = 0;
185 term[indexWord( 5, 2 )] = 0;
186 term[indexWord( 5, 1 )] = q>>6;
187 term[indexWord( 5, 0 )] = q<<26;
188 softfloat_sub160M( y, term, term );
189 rem[indexWord( 6, 1 )] = 0;
190 rem[indexWord( 6, 0 )] = 0;
191 softfloat_remStep160MBy32(
192 &rem[indexMultiwordLo( 6, 5 )],
193 14,
194 term,
195 q,
196 &rem[indexMultiwordLo( 6, 5 )]
197 );
198 rem32 = rem[indexWord( 6, 4 )];
199 if ( rem32 & 0x80000000 ) {
200 softfloat_sub1X160M( y );
201 } else {
202 if (
203 rem32 || rem[indexWord( 6, 0 )] || rem[indexWord( 6, 1 )]
204 || (rem[indexWord( 6, 3 )] | rem[indexWord( 6, 2 )])
205 ) {
206 y[indexWordLo( 5 )] |= 1;
207 }
208 }
209 }
210 softfloat_roundPackMToF128M( 0, expZ, y, zWPtr );
211 return;
212 /*------------------------------------------------------------------------
213 *------------------------------------------------------------------------*/
214 invalid:
215 softfloat_invalidF128M( zWPtr );
216 return;
217 /*------------------------------------------------------------------------
218 *------------------------------------------------------------------------*/
219 copyA:
220 zWPtr[indexWordHi( 4 )] = uiA96;
221 zWPtr[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
222 zWPtr[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
223 zWPtr[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
224
225 }
226
227 #endif
228