2 /*============================================================================
4 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
5 Package, Release 3e, by John R. Hauser.
7 Copyright 2011, 2012, 2013, 2014, 2017 The Regents of the University of
8 California. All rights reserved.
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
13 1. Redistributions of source code must retain the above copyright notice,
14 this list of conditions, and the following disclaimer.
16 2. Redistributions in binary form must reproduce the above copyright notice,
17 this list of conditions, and the following disclaimer in the documentation
18 and/or other materials provided with the distribution.
20 3. Neither the name of the University nor the names of its contributors may
21 be used to endorse or promote products derived from this software without
22 specific prior written permission.
24 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
25 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
27 DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
28 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
31 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 =============================================================================*/
40 #include "internals.h"
41 #include "specialize.h"
42 #include "softfloat.h"
44 #ifdef SOFTFLOAT_FAST_INT64
46 void f128M_sqrt( const float128_t
*aPtr
, float128_t
*zPtr
)
49 *zPtr
= f128_sqrt( *aPtr
);
55 void f128M_sqrt( const float128_t
*aPtr
, float128_t
*zPtr
)
57 const uint32_t *aWPtr
;
65 uint32_t sig32A
, recipSqrt32
, sig32Z
, qs
[3], q
;
71 /*------------------------------------------------------------------------
72 *------------------------------------------------------------------------*/
73 aWPtr
= (const uint32_t *) aPtr
;
74 zWPtr
= (uint32_t *) zPtr
;
75 /*------------------------------------------------------------------------
76 *------------------------------------------------------------------------*/
77 uiA96
= aWPtr
[indexWordHi( 4 )];
78 signA
= signF128UI96( uiA96
);
79 rawExpA
= expF128UI96( uiA96
);
80 /*------------------------------------------------------------------------
81 *------------------------------------------------------------------------*/
82 if ( rawExpA
== 0x7FFF ) {
85 || (aWPtr
[indexWord( 4, 2 )] | aWPtr
[indexWord( 4, 1 )]
86 | aWPtr
[indexWord( 4, 0 )])
88 softfloat_propagateNaNF128M( aWPtr
, 0, zWPtr
);
91 if ( ! signA
) goto copyA
;
94 /*------------------------------------------------------------------------
95 *------------------------------------------------------------------------*/
96 expA
= softfloat_shiftNormSigF128M( aWPtr
, 13 - (rawExpA
& 1), rem
);
97 if ( expA
== -128 ) goto copyA
;
98 if ( signA
) goto invalid
;
99 /*------------------------------------------------------------------------
100 | (`sig32Z' is guaranteed to be a lower bound on the square root of
101 | `sig32A', which makes `sig32Z' also a lower bound on the square root of
103 *------------------------------------------------------------------------*/
104 expZ
= ((expA
- 0x3FFF)>>1) + 0x3FFE;
106 rem64
= (uint64_t) rem
[indexWord( 4, 3 )]<<32 | rem
[indexWord( 4, 2 )];
109 softfloat_shortShiftRight128M( rem
, 1, rem
);
116 recipSqrt32
= softfloat_approxRecipSqrt32_1( expA
, sig32A
);
117 sig32Z
= ((uint64_t) sig32A
* recipSqrt32
)>>32;
118 if ( expA
) sig32Z
>>= 1;
120 rem64
-= (uint64_t) sig32Z
* sig32Z
;
121 rem
[indexWord( 4, 3 )] = rem64
>>32;
122 rem
[indexWord( 4, 2 )] = rem64
;
123 /*------------------------------------------------------------------------
124 *------------------------------------------------------------------------*/
125 q
= ((uint32_t) (rem64
>>2) * (uint64_t) recipSqrt32
)>>32;
126 sig64Z
= ((uint64_t) sig32Z
<<32) + ((uint64_t) q
<<3);
127 term
[indexWord( 4, 3 )] = 0;
128 term
[indexWord( 4, 0 )] = 0;
129 /*------------------------------------------------------------------------
130 | (Repeating this loop is a rare occurrence.)
131 *------------------------------------------------------------------------*/
133 x64
= ((uint64_t) sig32Z
<<32) + sig64Z
;
134 term
[indexWord( 4, 2 )] = x64
>>32;
135 term
[indexWord( 4, 1 )] = x64
;
136 softfloat_remStep128MBy32( rem
, 29, term
, q
, y
);
137 rem32
= y
[indexWord( 4, 3 )];
138 if ( ! (rem32
& 0x80000000) ) break;
143 rem64
= (uint64_t) rem32
<<32 | y
[indexWord( 4, 2 )];
144 /*------------------------------------------------------------------------
145 *------------------------------------------------------------------------*/
146 q
= ((uint32_t) (rem64
>>2) * (uint64_t) recipSqrt32
)>>32;
147 if ( rem64
>>34 ) q
+= recipSqrt32
;
149 /*------------------------------------------------------------------------
150 | (Repeating this loop is a rare occurrence.)
151 *------------------------------------------------------------------------*/
153 x64
= sig64Z
+ (q
>>26);
154 term
[indexWord( 4, 2 )] = x64
>>32;
155 term
[indexWord( 4, 1 )] = x64
;
156 term
[indexWord( 4, 0 )] = q
<<6;
157 softfloat_remStep128MBy32(
158 y
, 29, term
, q
, &rem
[indexMultiwordHi( 6, 4 )] );
159 rem32
= rem
[indexWordHi( 6 )];
160 if ( ! (rem32
& 0x80000000) ) break;
164 rem64
= (uint64_t) rem32
<<32 | rem
[indexWord( 6, 4 )];
165 /*------------------------------------------------------------------------
166 *------------------------------------------------------------------------*/
167 q
= (((uint32_t) (rem64
>>2) * (uint64_t) recipSqrt32
)>>32) + 2;
168 if ( rem64
>>34 ) q
+= recipSqrt32
;
169 x64
= (uint64_t) q
<<27;
170 y
[indexWord( 5, 0 )] = x64
;
171 x64
= ((uint64_t) qs
[0]<<24) + (x64
>>32);
172 y
[indexWord( 5, 1 )] = x64
;
173 x64
= ((uint64_t) qs
[1]<<21) + (x64
>>32);
174 y
[indexWord( 5, 2 )] = x64
;
175 x64
= ((uint64_t) qs
[2]<<18) + (x64
>>32);
176 y
[indexWord( 5, 3 )] = x64
;
177 y
[indexWord( 5, 4 )] = x64
>>32;
178 /*------------------------------------------------------------------------
179 *------------------------------------------------------------------------*/
180 if ( (q
& 0xF) <= 2 ) {
182 y
[indexWordLo( 5 )] = q
<<27;
183 term
[indexWord( 5, 4 )] = 0;
184 term
[indexWord( 5, 3 )] = 0;
185 term
[indexWord( 5, 2 )] = 0;
186 term
[indexWord( 5, 1 )] = q
>>6;
187 term
[indexWord( 5, 0 )] = q
<<26;
188 softfloat_sub160M( y
, term
, term
);
189 rem
[indexWord( 6, 1 )] = 0;
190 rem
[indexWord( 6, 0 )] = 0;
191 softfloat_remStep160MBy32(
192 &rem
[indexMultiwordLo( 6, 5 )],
196 &rem
[indexMultiwordLo( 6, 5 )]
198 rem32
= rem
[indexWord( 6, 4 )];
199 if ( rem32
& 0x80000000 ) {
200 softfloat_sub1X160M( y
);
203 rem32
|| rem
[indexWord( 6, 0 )] || rem
[indexWord( 6, 1 )]
204 || (rem
[indexWord( 6, 3 )] | rem
[indexWord( 6, 2 )])
206 y
[indexWordLo( 5 )] |= 1;
210 softfloat_roundPackMToF128M( 0, expZ
, y
, zWPtr
);
212 /*------------------------------------------------------------------------
213 *------------------------------------------------------------------------*/
215 softfloat_invalidF128M( zWPtr
);
217 /*------------------------------------------------------------------------
218 *------------------------------------------------------------------------*/
220 zWPtr
[indexWordHi( 4 )] = uiA96
;
221 zWPtr
[indexWord( 4, 2 )] = aWPtr
[indexWord( 4, 2 )];
222 zWPtr
[indexWord( 4, 1 )] = aWPtr
[indexWord( 4, 1 )];
223 zWPtr
[indexWord( 4, 0 )] = aWPtr
[indexWord( 4, 0 )];