initial commit
[glibc.git] / sysdeps / ieee754 / dbl-64 / e_lgamma_r.c
1 /* @(#)er_lgamma.c 5.1 93/09/24 */
2 /*
3 * ====================================================
4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 *
6 * Developed at SunPro, a Sun Microsystems, Inc. business.
7 * Permission to use, copy, modify, and distribute this
8 * software is freely granted, provided that this notice
9 * is preserved.
10 * ====================================================
11 */
12
13 /* __ieee754_lgamma_r(x, signgamp)
14 * Reentrant version of the logarithm of the Gamma function
15 * with user provide pointer for the sign of Gamma(x).
16 *
17 * Method:
18 * 1. Argument Reduction for 0 < x <= 8
19 * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
20 * reduce x to a number in [1.5,2.5] by
21 * lgamma(1+s) = log(s) + lgamma(s)
22 * for example,
23 * lgamma(7.3) = log(6.3) + lgamma(6.3)
24 * = log(6.3*5.3) + lgamma(5.3)
25 * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
26 * 2. Polynomial approximation of lgamma around its
27 * minimun ymin=1.461632144968362245 to maintain monotonicity.
28 * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
29 * Let z = x-ymin;
30 * lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
31 * where
32 * poly(z) is a 14 degree polynomial.
33 * 2. Rational approximation in the primary interval [2,3]
34 * We use the following approximation:
35 * s = x-2.0;
36 * lgamma(x) = 0.5*s + s*P(s)/Q(s)
37 * with accuracy
38 * |P/Q - (lgamma(x)-0.5s)| < 2**-61.71
39 * Our algorithms are based on the following observation
40 *
41 * zeta(2)-1 2 zeta(3)-1 3
42 * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ...
43 * 2 3
44 *
45 * where Euler = 0.5771... is the Euler constant, which is very
46 * close to 0.5.
47 *
48 * 3. For x>=8, we have
49 * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
50 * (better formula:
51 * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
52 * Let z = 1/x, then we approximation
53 * f(z) = lgamma(x) - (x-0.5)(log(x)-1)
54 * by
55 * 3 5 11
56 * w = w0 + w1*z + w2*z + w3*z + ... + w6*z
57 * where
58 * |w - f(z)| < 2**-58.74
59 *
60 * 4. For negative x, since (G is gamma function)
61 * -x*G(-x)*G(x) = pi/sin(pi*x),
62 * we have
63 * G(x) = pi/(sin(pi*x)*(-x)*G(-x))
64 * since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
65 * Hence, for x<0, signgam = sign(sin(pi*x)) and
66 * lgamma(x) = log(|Gamma(x)|)
67 * = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
68 * Note: one should avoid compute pi*(-x) directly in the
69 * computation of sin(pi*(-x)).
70 *
71 * 5. Special Cases
72 * lgamma(2+s) ~ s*(1-Euler) for tiny s
73 * lgamma(1)=lgamma(2)=0
74 * lgamma(x) ~ -log(x) for tiny x
75 * lgamma(0) = lgamma(inf) = inf
76 * lgamma(-integer) = +-inf
77 *
78 */
79
80 #include <math.h>
81 #include <math-narrow-eval.h>
82 #include <math_private.h>
83 #include <libc-diag.h>
84 #include <libm-alias-finite.h>
85
86 static const double
87 two52= 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
88 half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
89 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
90 pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
91 a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
92 a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
93 a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
94 a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
95 a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
96 a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
97 a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
98 a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
99 a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
100 a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
101 a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
102 a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
103 tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
104 tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
105 /* tt = -(tail of tf) */
106 tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
107 t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
108 t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
109 t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
110 t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
111 t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
112 t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
113 t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
114 t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
115 t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
116 t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
117 t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
118 t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
119 t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
120 t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
121 t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
122 u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
123 u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
124 u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
125 u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
126 u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
127 u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
128 v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
129 v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
130 v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
131 v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
132 v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
133 s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
134 s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
135 s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
136 s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
137 s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
138 s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
139 s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
140 r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
141 r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
142 r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
143 r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
144 r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
145 r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
146 w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
147 w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
148 w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
149 w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
150 w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
151 w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
152 w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
153
154 static const double zero= 0.00000000000000000000e+00;
155
156 static double
157 sin_pi(double x)
158 {
159 double y,z;
160 int n,ix;
161
162 GET_HIGH_WORD(ix,x);
163 ix &= 0x7fffffff;
164
165 if(ix<0x3fd00000) return __sin(pi*x);
166 y = -x; /* x is assume negative */
167
168 /*
169 * argument reduction, make sure inexact flag not raised if input
170 * is an integer
171 */
172 z = floor(y);
173 if(z!=y) { /* inexact anyway */
174 y *= 0.5;
175 y = 2.0*(y - floor(y)); /* y = |x| mod 2.0 */
176 n = (int) (y*4.0);
177 } else {
178 if(ix>=0x43400000) {
179 y = zero; n = 0; /* y must be even */
180 } else {
181 if(ix<0x43300000) z = y+two52; /* exact */
182 GET_LOW_WORD(n,z);
183 n &= 1;
184 y = n;
185 n<<= 2;
186 }
187 }
188 switch (n) {
189 case 0: y = __sin(pi*y); break;
190 case 1:
191 case 2: y = __cos(pi*(0.5-y)); break;
192 case 3:
193 case 4: y = __sin(pi*(one-y)); break;
194 case 5:
195 case 6: y = -__cos(pi*(y-1.5)); break;
196 default: y = __sin(pi*(y-2.0)); break;
197 }
198 return -y;
199 }
200
201
202 double
203 __ieee754_lgamma_r(double x, int *signgamp)
204 {
205 double t,y,z,nadj,p,p1,p2,p3,q,r,w;
206 int i,hx,lx,ix;
207
208 EXTRACT_WORDS(hx,lx,x);
209
210 /* purge off +-inf, NaN, +-0, and negative arguments */
211 *signgamp = 1;
212 ix = hx&0x7fffffff;
213 if(__builtin_expect(ix>=0x7ff00000, 0)) return x*x;
214 if(__builtin_expect((ix|lx)==0, 0))
215 {
216 if (hx < 0)
217 *signgamp = -1;
218 return one/fabs(x);
219 }
220 if(__builtin_expect(ix<0x3b900000, 0)) {
221 /* |x|<2**-70, return -log(|x|) */
222 if(hx<0) {
223 *signgamp = -1;
224 return -__ieee754_log(-x);
225 } else return -__ieee754_log(x);
226 }
227 if(hx<0) {
228 if(__builtin_expect(ix>=0x43300000, 0))
229 /* |x|>=2**52, must be -integer */
230 return fabs (x)/zero;
231 if (x < -2.0 && x > -28.0)
232 return __lgamma_neg (x, signgamp);
233 t = sin_pi(x);
234 if(t==zero) return one/fabsf(t); /* -integer */
235 nadj = __ieee754_log(pi/fabs(t*x));
236 if(t<zero) *signgamp = -1;
237 x = -x;
238 }
239
240 /* purge off 1 and 2 */
241 if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
242 /* for x < 2.0 */
243 else if(ix<0x40000000) {
244 if(ix<=0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
245 r = -__ieee754_log(x);
246 if(ix>=0x3FE76944) {y = one-x; i= 0;}
247 else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;}
248 else {y = x; i=2;}
249 } else {
250 r = zero;
251 if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */
252 else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */
253 else {y=x-one;i=2;}
254 }
255 switch(i) {
256 case 0:
257 z = y*y;
258 p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
259 p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
260 p = y*p1+p2;
261 r += (p-0.5*y); break;
262 case 1:
263 z = y*y;
264 w = z*y;
265 p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */
266 p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
267 p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
268 p = z*p1-(tt-w*(p2+y*p3));
269 r += (tf + p); break;
270 case 2:
271 p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
272 p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
273 r += (-0.5*y + p1/p2);
274 }
275 }
276 else if(ix<0x40200000) { /* x < 8.0 */
277 i = (int)x;
278 t = zero;
279 y = x-(double)i;
280 p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
281 q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
282 r = half*y+p/q;
283 z = one; /* lgamma(1+s) = log(s) + lgamma(s) */
284 switch(i) {
285 case 7: z *= (y+6.0); /* FALLTHRU */
286 case 6: z *= (y+5.0); /* FALLTHRU */
287 case 5: z *= (y+4.0); /* FALLTHRU */
288 case 4: z *= (y+3.0); /* FALLTHRU */
289 case 3: z *= (y+2.0); /* FALLTHRU */
290 r += __ieee754_log(z); break;
291 }
292 /* 8.0 <= x < 2**58 */
293 } else if (ix < 0x43900000) {
294 t = __ieee754_log(x);
295 z = one/x;
296 y = z*z;
297 w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
298 r = (x-half)*(t-one)+w;
299 } else
300 /* 2**58 <= x <= inf */
301 r = math_narrow_eval (x*(__ieee754_log(x)-one));
302 /* NADJ is set for negative arguments but not otherwise,
303 resulting in warnings that it may be used uninitialized
304 although in the cases where it is used it has always been
305 set. */
306 DIAG_PUSH_NEEDS_COMMENT;
307 DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wmaybe-uninitialized");
308 if(hx<0) r = nadj - r;
309 DIAG_POP_NEEDS_COMMENT;
310 return r;
311 }
312 libm_alias_finite (__ieee754_lgamma_r, __lgamma_r)