initial commit
[glibc.git] / sysdeps / ia64 / fpu / w_tgammaf_compat.S
1 .file "tgammaf.s"
2
3
4 // Copyright (c) 2001 - 2005, Intel Corporation
5 // All rights reserved.
6 //
7 //
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
10 // met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 //
15 // * Redistributions in binary form must reproduce the above copyright
16 // notice, this list of conditions and the following disclaimer in the
17 // documentation and/or other materials provided with the distribution.
18 //
19 // * The name of Intel Corporation may not be used to endorse or promote
20 // products derived from this software without specific prior written
21 // permission.
22
23 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
25 // LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
28 // EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
29 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
30 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31 // OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
32 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 // SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 //
35 // Intel Corporation is the author of this code,and requests that all
36 // problem reports or change requests be submitted to it directly at
37 // http://www.intel.com/software/products/opensource/libraries/num.htm.
38 //
39 //*********************************************************************
40 //
41 // History:
42 // 11/30/01 Initial version
43 // 05/20/02 Cleaned up namespace and sf0 syntax
44 // 02/10/03 Reordered header: .section, .global, .proc, .align
45 // 04/04/03 Changed error codes for overflow and negative integers
46 // 04/10/03 Changed code for overflow near zero handling
47 // 12/16/03 Fixed parameter passing to/from error handling routine
48 // 03/31/05 Reformatted delimiters between data tables
49 //
50 //*********************************************************************
51 //
52 //*********************************************************************
53 //
54 // Function: tgammaf(x) computes the principle value of the GAMMA
55 // function of x.
56 //
57 //*********************************************************************
58 //
59 // Resources Used:
60 //
61 // Floating-Point Registers: f8-f15
62 // f33-f75
63 //
64 // General Purpose Registers:
65 // r8-r11
66 // r14-r29
67 // r32-r36
68 // r37-r40 (Used to pass arguments to error handling routine)
69 //
70 // Predicate Registers: p6-p15
71 //
72 //*********************************************************************
73 //
74 // IEEE Special Conditions:
75 //
76 // tgammaf(+inf) = +inf
77 // tgammaf(-inf) = QNaN
78 // tgammaf(+/-0) = +/-inf
79 // tgammaf(x<0, x - integer) = QNaN
80 // tgammaf(SNaN) = QNaN
81 // tgammaf(QNaN) = QNaN
82 //
83 //*********************************************************************
84 //
85 // Overview
86 //
87 // The method consists of three cases.
88 //
89 // If 2 <= x < OVERFLOW_BOUNDARY use case tgamma_regular;
90 // else if 0 < x < 2 use case tgamma_from_0_to_2;
91 // else if -(i+1) < x < -i, i = 0...43 use case tgamma_negatives;
92 //
93 // Case 2 <= x < OVERFLOW_BOUNDARY
94 // -------------------------------
95 // Here we use algorithm based on the recursive formula
96 // GAMMA(x+1) = x*GAMMA(x). For that we subdivide interval
97 // [2; OVERFLOW_BOUNDARY] into intervals [8*n; 8*(n+1)] and
98 // approximate GAMMA(x) by polynomial of 22th degree on each
99 // [8*n; 8*n+1], recursive formula is used to expand GAMMA(x)
100 // to [8*n; 8*n+1]. In other words we need to find n, i and r
101 // such that x = 8 * n + i + r where n and i are integer numbers
102 // and r is fractional part of x. So GAMMA(x) = GAMMA(8*n+i+r) =
103 // = (x-1)*(x-2)*...*(x-i)*GAMMA(x-i) =
104 // = (x-1)*(x-2)*...*(x-i)*GAMMA(8*n+r) ~
105 // ~ (x-1)*(x-2)*...*(x-i)*P12n(r).
106 //
107 // Step 1: Reduction
108 // -----------------
109 // N = [x] with truncate
110 // r = x - N, note 0 <= r < 1
111 //
112 // n = N & ~0xF - index of table that contains coefficient of
113 // polynomial approximation
114 // i = N & 0xF - is used in recursive formula
115 //
116 //
117 // Step 2: Approximation
118 // ---------------------
119 // We use factorized minimax approximation polynomials
120 // P12n(r) = A12*(r^2+C01(n)*r+C00(n))*
121 // *(r^2+C11(n)*r+C10(n))*...*(r^2+C51(n)*r+C50(n))
122 //
123 // Step 3: Recursion
124 // -----------------
125 // In case when i > 0 we need to multiply P12n(r) by product
126 // R(i,x)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions
127 // we can calculate R as follow:
128 // R(i,x) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is
129 // even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))*
130 // *(i-1) if i is odd. In both cases we need to calculate
131 // R2(i,x) = (x^2-3*x+2)*(x^2-7*x+12)*...*(x^2+x+2*j*(2*j-1)) =
132 // = ((x^2-x)+2*(1-x))*((x^2-x)+6*(2-x))*...*((x^2-x)+2*(2*j-1)*(j-x)) =
133 // = (RA+2*RB)*(RA+6*(1-RB))*...*(RA+2*(2*j-1)*(j-1+RB))
134 // where j = 1..[i/2], RA = x^2-x, RB = 1-x.
135 //
136 // Step 4: Reconstruction
137 // ----------------------
138 // Reconstruction is just simple multiplication i.e.
139 // GAMMA(x) = P12n(r)*R(i,x)
140 //
141 // Case 0 < x < 2
142 // --------------
143 // To calculate GAMMA(x) on this interval we do following
144 // if 1.0 <= x < 1.25 than GAMMA(x) = P7(x-1)
145 // if 1.25 <= x < 1.5 than GAMMA(x) = P7(x-x_min) where
146 // x_min is point of local minimum on [1; 2] interval.
147 // if 1.5 <= x < 1.75 than GAMMA(x) = P7(x-1.5)
148 // if 1.75 <= x < 2.0 than GAMMA(x) = P7(x-1.5)
149 // and
150 // if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x
151 //
152 // Case -(i+1) < x < -i, i = 0...43
153 // ----------------------------------
154 // Here we use the fact that GAMMA(-x) = PI/(x*GAMMA(x)*sin(PI*x)) and
155 // so we need to calculate GAMMA(x), sin(PI*x)/PI. Calculation of
156 // GAMMA(x) is described above.
157 //
158 // Step 1: Reduction
159 // -----------------
160 // Note that period of sin(PI*x) is 2 and range reduction for
161 // sin(PI*x) is like to range reduction for GAMMA(x)
162 // i.e rs = x - round(x) and |rs| <= 0.5.
163 //
164 // Step 2: Approximation
165 // ---------------------
166 // To approximate sin(PI*x)/PI = sin(PI*(2*n+rs))/PI =
167 // = (-1)^n*sin(PI*rs)/PI Taylor series is used.
168 // sin(PI*rs)/PI ~ S17(rs).
169 //
170 // Step 3: Division
171 // ----------------
172 // To calculate 1/x and 1/(GAMMA(x)*S12(rs)) we use frcpa
173 // instruction with following Newton-Raphson interations.
174 //
175 //
176 //*********************************************************************
177
178 GR_ad_Data = r8
179 GR_TAG = r8
180 GR_SignExp = r9
181 GR_Sig = r10
182 GR_ArgNz = r10
183 GR_RqDeg = r11
184
185 GR_NanBound = r14
186 GR_ExpOf025 = r15
187 GR_ExpOf05 = r16
188 GR_ad_Co = r17
189 GR_ad_Ce = r18
190 GR_TblOffs = r19
191 GR_Arg = r20
192 GR_Exp2Ind = r21
193 GR_TblOffsMask = r21
194 GR_Offs = r22
195 GR_OvfNzBound = r23
196 GR_ZeroResBound = r24
197 GR_ad_SinO = r25
198 GR_ad_SinE = r26
199 GR_Correction = r27
200 GR_Tbl12Offs = r28
201 GR_NzBound = r28
202 GR_ExpOf1 = r29
203 GR_fpsr = r29
204
205 GR_SAVE_B0 = r33
206 GR_SAVE_PFS = r34
207 GR_SAVE_GP = r35
208 GR_SAVE_SP = r36
209
210 GR_Parameter_X = r37
211 GR_Parameter_Y = r38
212 GR_Parameter_RESULT = r39
213 GR_Parameter_TAG = r40
214
215
216 FR_X = f10
217 FR_Y = f1
218 FR_RESULT = f8
219
220 FR_iXt = f11
221 FR_Xt = f12
222 FR_r = f13
223 FR_r2 = f14
224 FR_r4 = f15
225
226 FR_C01 = f33
227 FR_A7 = f33
228 FR_C11 = f34
229 FR_A6 = f34
230 FR_C21 = f35
231 FR_A5 = f35
232 FR_C31 = f36
233 FR_A4 = f36
234 FR_C41 = f37
235 FR_A3 = f37
236 FR_C51 = f38
237 FR_A2 = f38
238
239 FR_C00 = f39
240 FR_A1 = f39
241 FR_C10 = f40
242 FR_A0 = f40
243 FR_C20 = f41
244 FR_C30 = f42
245 FR_C40 = f43
246 FR_C50 = f44
247 FR_An = f45
248 FR_OvfBound = f46
249 FR_InvAn = f47
250
251 FR_Multplr = f48
252 FR_NormX = f49
253 FR_X2mX = f50
254 FR_1mX = f51
255 FR_Rq0 = f51
256 FR_Rq1 = f52
257 FR_Rq2 = f53
258 FR_Rq3 = f54
259
260 FR_Rcp0 = f55
261 FR_Rcp1 = f56
262 FR_Rcp2 = f57
263
264 FR_InvNormX1 = f58
265 FR_InvNormX2 = f59
266
267 FR_rs = f60
268 FR_rs2 = f61
269
270 FR_LocalMin = f62
271 FR_10 = f63
272
273 FR_05 = f64
274
275 FR_S32 = f65
276 FR_S31 = f66
277 FR_S01 = f67
278 FR_S11 = f68
279 FR_S21 = f69
280 FR_S00 = f70
281 FR_S10 = f71
282 FR_S20 = f72
283
284 FR_GAMMA = f73
285 FR_2 = f74
286 FR_6 = f75
287
288
289
290
291 // Data tables
292 //==============================================================
293 RODATA
294 .align 16
295 LOCAL_OBJECT_START(tgammaf_data)
296 data8 0x3FDD8B618D5AF8FE // local minimum (0.461632144968362356785)
297 data8 0x4024000000000000 // 10.0
298 data8 0x3E90FC992FF39E13 // S32
299 data8 0xBEC144B2760626E2 // S31
300 //
301 //[2; 8)
302 data8 0x4009EFD1BA0CB3B4 // C01
303 data8 0x3FFFB35378FF4822 // C11
304 data8 0xC01032270413B896 // C41
305 data8 0xC01F171A4C0D6827 // C51
306 data8 0x40148F8E197396AC // C20
307 data8 0x401C601959F1249C // C30
308 data8 0x3EE21AD881741977 // An
309 data8 0x4041852200000000 // overflow boundary (35.04010009765625)
310 data8 0x3FD9CE68F695B198 // C21
311 data8 0xBFF8C30AC900DA03 // C31
312 data8 0x400E17D2F0535C02 // C00
313 data8 0x4010689240F7FAC8 // C10
314 data8 0x402563147DDCCF8D // C40
315 data8 0x4033406D0480A21C // C50
316 //
317 //[8; 16)
318 data8 0x4006222BAE0B793B // C01
319 data8 0x4002452733473EDA // C11
320 data8 0xC0010EF3326FDDB3 // C41
321 data8 0xC01492B817F99C0F // C51
322 data8 0x40099C905A249B75 // C20
323 data8 0x4012B972AE0E533D // C30
324 data8 0x3FE6F6DB91D0D4CC // An
325 data8 0x4041852200000000 // overflow boundary
326 data8 0x3FF545828F7B73C5 // C21
327 data8 0xBFBBD210578764DF // C31
328 data8 0x4000542098F53CFC // C00
329 data8 0x40032C1309AD6C81 // C10
330 data8 0x401D7331E19BD2E1 // C40
331 data8 0x402A06807295EF57 // C50
332 //
333 //[16; 24)
334 data8 0x4000131002867596 // C01
335 data8 0x3FFAA362D5D1B6F2 // C11
336 data8 0xBFFCB6985697DB6D // C41
337 data8 0xC0115BEE3BFC3B3B // C51
338 data8 0x3FFE62FF83456F73 // C20
339 data8 0x4007E33478A114C4 // C30
340 data8 0x41E9B2B73795ED57 // An
341 data8 0x4041852200000000 // overflow boundary
342 data8 0x3FEEB1F345BC2769 // C21
343 data8 0xBFC3BBE6E7F3316F // C31
344 data8 0x3FF14E07DA5E9983 // C00
345 data8 0x3FF53B76BF81E2C0 // C10
346 data8 0x4014051E0269A3DC // C40
347 data8 0x40229D4227468EDB // C50
348 //
349 //[24; 32)
350 data8 0x3FFAF7BD498384DE // C01
351 data8 0x3FF62AD8B4D1C3D2 // C11
352 data8 0xBFFABCADCD004C32 // C41
353 data8 0xC00FADE97C097EC9 // C51
354 data8 0x3FF6DA9ED737707E // C20
355 data8 0x4002A29E9E0C782C // C30
356 data8 0x44329D5B5167C6C3 // An
357 data8 0x4041852200000000 // overflow boundary
358 data8 0x3FE8943CBBB4B727 // C21
359 data8 0xBFCB39D466E11756 // C31
360 data8 0x3FE879AF3243D8C1 // C00
361 data8 0x3FEEC7DEBB14CE1E // C10
362 data8 0x401017B79BA80BCB // C40
363 data8 0x401E941DC3C4DE80 // C50
364 //
365 //[32; 40)
366 data8 0x3FF7ECB3A0E8FE5C // C01
367 data8 0x3FF3815A8516316B // C11
368 data8 0xBFF9ABD8FCC000C3 // C41
369 data8 0xC00DD89969A4195B // C51
370 data8 0x3FF2E43139CBF563 // C20
371 data8 0x3FFF96DC3474A606 // C30
372 data8 0x46AFF4CA9B0DDDF0 // An
373 data8 0x4041852200000000 // overflow boundary
374 data8 0x3FE4CE76DA1B5783 // C21
375 data8 0xBFD0524DB460BC4E // C31
376 data8 0x3FE35852DF14E200 // C00
377 data8 0x3FE8C7610359F642 // C10
378 data8 0x400BCF750EC16173 // C40
379 data8 0x401AC14E02EA701C // C50
380 //
381 //[40; 48)
382 data8 0x3FF5DCE4D8193097 // C01
383 data8 0x3FF1B0D8C4974FFA // C11
384 data8 0xBFF8FB450194CAEA // C41
385 data8 0xC00C9658E030A6C4 // C51
386 data8 0x3FF068851118AB46 // C20
387 data8 0x3FFBF7C7BB46BF7D // C30
388 data8 0x3FF0000000000000 // An
389 data8 0x4041852200000000 // overflow boundary
390 data8 0x3FE231DEB11D847A // C21
391 data8 0xBFD251ECAFD7E935 // C31
392 data8 0x3FE0368AE288F6BF // C00
393 data8 0x3FE513AE4215A70C // C10
394 data8 0x4008F960F7141B8B // C40
395 data8 0x40183BA08134397B // C50
396 //
397 //[1.0; 1.25)
398 data8 0xBFD9909648921868 // A7
399 data8 0x3FE96FFEEEA8520F // A6
400 data8 0xBFED0800D93449B8 // A3
401 data8 0x3FEFA648D144911C // A2
402 data8 0xBFEE3720F7720B4D // A5
403 data8 0x3FEF4857A010CA3B // A4
404 data8 0xBFE2788CCD545AA4 // A1
405 data8 0x3FEFFFFFFFE9209E // A0
406 //
407 //[1.25; 1.5)
408 data8 0xBFB421236426936C // A7
409 data8 0x3FAF237514F36691 // A6
410 data8 0xBFC0BADE710A10B9 // A3
411 data8 0x3FDB6C5465BBEF1F // A2
412 data8 0xBFB7E7F83A546EBE // A5
413 data8 0x3FC496A01A545163 // A4
414 data8 0xBDEE86A39D8452EB // A1
415 data8 0x3FEC56DC82A39AA2 // A0
416 //
417 //[1.5; 1.75)
418 data8 0xBF94730B51795867 // A7
419 data8 0x3FBF4203E3816C7B // A6
420 data8 0xBFE85B427DBD23E4 // A3
421 data8 0x3FEE65557AB26771 // A2
422 data8 0xBFD59D31BE3AB42A // A5
423 data8 0x3FE3C90CC8F09147 // A4
424 data8 0xBFE245971DF735B8 // A1
425 data8 0x3FEFFC613AE7FBC8 // A0
426 //
427 //[1.75; 2.0)
428 data8 0xBF7746A85137617E // A7
429 data8 0x3FA96E37D09735F3 // A6
430 data8 0xBFE3C24AC40AC0BB // A3
431 data8 0x3FEC56A80A977CA5 // A2
432 data8 0xBFC6F0E707560916 // A5
433 data8 0x3FDB262D949175BE // A4
434 data8 0xBFE1C1AEDFB25495 // A1
435 data8 0x3FEFEE1E644B2022 // A0
436 //
437 // sin(pi*x)/pi
438 data8 0xC026FB0D377656CC // S01
439 data8 0x3FFFB15F95A22324 // S11
440 data8 0x406CE58F4A41C6E7 // S10
441 data8 0x404453786302C61E // S20
442 data8 0xC023D59A47DBFCD3 // S21
443 data8 0x405541D7ABECEFCA // S00
444 //
445 // 1/An for [40; 48)
446 data8 0xCAA7576DE621FCD5, 0x3F68
447 LOCAL_OBJECT_END(tgammaf_data)
448
449 //==============================================================
450 // Code
451 //==============================================================
452
453 .section .text
454 GLOBAL_LIBM_ENTRY(tgammaf)
455 { .mfi
456 getf.exp GR_SignExp = f8
457 fma.s1 FR_NormX = f8,f1,f0
458 addl GR_ad_Data = @ltoff(tgammaf_data), gp
459 }
460 { .mfi
461 mov GR_ExpOf05 = 0xFFFE
462 fcvt.fx.trunc.s1 FR_iXt = f8 // [x]
463 mov GR_Offs = 0 // 2 <= x < 8
464 };;
465 { .mfi
466 getf.d GR_Arg = f8
467 fcmp.lt.s1 p14,p15 = f8,f0
468 mov GR_Tbl12Offs = 0
469 }
470 { .mfi
471 setf.exp FR_05 = GR_ExpOf05
472 fma.s1 FR_2 = f1,f1,f1 // 2
473 mov GR_Correction = 0
474 };;
475 { .mfi
476 ld8 GR_ad_Data = [GR_ad_Data]
477 fclass.m p10,p0 = f8,0x1E7 // is x NaTVal, NaN, +/-0 or +/-INF?
478 tbit.z p12,p13 = GR_SignExp,16 // p13 if |x| >= 2
479 }
480 { .mfi
481 mov GR_ExpOf1 = 0xFFFF
482 fcvt.fx.s1 FR_rs = f8 // round(x)
483 and GR_Exp2Ind = 7,GR_SignExp
484 };;
485 .pred.rel "mutex",p14,p15
486 { .mfi
487 (p15) cmp.eq.unc p11,p0 = GR_ExpOf1,GR_SignExp // p11 if 1 <= x < 2
488 (p14) fma.s1 FR_1mX = f1,f1,f8 // 1 - |x|
489 mov GR_Sig = 0 // if |x| < 2
490 }
491 { .mfi
492 (p13) cmp.eq.unc p7,p0 = 2,GR_Exp2Ind
493 (p15) fms.s1 FR_1mX = f1,f1,f8 // 1 - |x|
494 (p13) cmp.eq.unc p8,p0 = 3,GR_Exp2Ind
495 };;
496 .pred.rel "mutex",p7,p8
497 { .mfi
498 (p7) mov GR_Offs = 0x7 // 8 <= |x| < 16
499 nop.f 0
500 (p8) tbit.z.unc p0,p6 = GR_Arg,51
501 }
502 { .mib
503 (p13) cmp.lt.unc p9,p0 = 3,GR_Exp2Ind
504 (p8) mov GR_Offs = 0xE // 16 <= |x| < 32
505 // jump if x is NaTVal, NaN, +/-0 or +/-INF?
506 (p10) br.cond.spnt tgammaf_spec_args
507 };;
508 .pred.rel "mutex",p14,p15
509 .pred.rel "mutex",p6,p9
510 { .mfi
511 (p9) mov GR_Offs = 0x1C // 32 <= |x|
512 (p14) fma.s1 FR_X2mX = FR_NormX,FR_NormX,FR_NormX // x^2-|x|
513 (p9) tbit.z.unc p0,p8 = GR_Arg,50
514 }
515 { .mfi
516 ldfpd FR_LocalMin,FR_10 = [GR_ad_Data],16
517 (p15) fms.s1 FR_X2mX = FR_NormX,FR_NormX,FR_NormX // x^2-|x|
518 (p6) add GR_Offs = 0x7,GR_Offs // 24 <= x < 32
519 };;
520 .pred.rel "mutex",p8,p12
521 { .mfi
522 add GR_ad_Ce = 0x50,GR_ad_Data
523 (p15) fcmp.lt.unc.s1 p10,p0 = f8,f1 // p10 if 0 <= x < 1
524 mov GR_OvfNzBound = 2
525 }
526 { .mib
527 ldfpd FR_S32,FR_S31 = [GR_ad_Data],16
528 (p8) add GR_Offs = 0x7,GR_Offs // 40 <= |x|
529 // jump if 1 <= x < 2
530 (p11) br.cond.spnt tgammaf_from_1_to_2
531 };;
532 { .mfi
533 shladd GR_ad_Ce = GR_Offs,4,GR_ad_Ce
534 fcvt.xf FR_Xt = FR_iXt // [x]
535 (p13) cmp.eq.unc p7,p0 = r0,GR_Offs // p7 if 2 <= |x| < 8
536 }
537 { .mfi
538 shladd GR_ad_Co = GR_Offs,4,GR_ad_Data
539 fma.s1 FR_6 = FR_2,FR_2,FR_2
540 mov GR_ExpOf05 = 0x7FC
541 };;
542 { .mfi
543 (p13) getf.sig GR_Sig = FR_iXt // if |x| >= 2
544 frcpa.s1 FR_Rcp0,p0 = f1,FR_NormX
545 (p10) shr GR_Arg = GR_Arg,51
546 }
547 { .mib
548 ldfpd FR_C01,FR_C11 = [GR_ad_Co],16
549 (p7) mov GR_Correction = 2
550 // jump if 0 < x < 1
551 (p10) br.cond.spnt tgammaf_from_0_to_1
552 };;
553 { .mfi
554 ldfpd FR_C21,FR_C31 = [GR_ad_Ce],16
555 fma.s1 FR_Rq2 = f1,f1,FR_1mX // 2 - |x|
556 (p14) sub GR_Correction = r0,GR_Correction
557 }
558 { .mfi
559 ldfpd FR_C41,FR_C51 = [GR_ad_Co],16
560 (p14) fcvt.xf FR_rs = FR_rs
561 (p14) add GR_ad_SinO = 0x3A0,GR_ad_Data
562 };;
563 .pred.rel "mutex",p14,p15
564 { .mfi
565 ldfpd FR_C00,FR_C10 = [GR_ad_Ce],16
566 nop.f 0
567 (p14) sub GR_Sig = GR_Correction,GR_Sig
568 }
569 { .mfi
570 ldfpd FR_C20,FR_C30 = [GR_ad_Co],16
571 fma.s1 FR_Rq1 = FR_1mX,FR_2,FR_X2mX // (x-1)*(x-2)
572 (p15) sub GR_Sig = GR_Sig,GR_Correction
573 };;
574 { .mfi
575 (p14) ldfpd FR_S01,FR_S11 = [GR_ad_SinO],16
576 fma.s1 FR_Rq3 = FR_2,f1,FR_1mX // 3 - |x|
577 and GR_RqDeg = 0x6,GR_Sig
578 }
579 { .mfi
580 ldfpd FR_C40,FR_C50 = [GR_ad_Ce],16
581 (p14) fma.d.s0 FR_X = f0,f0,f8 // set deno flag
582 mov GR_NanBound = 0x30016 // -2^23
583 };;
584 .pred.rel "mutex",p14,p15
585 { .mfi
586 (p14) add GR_ad_SinE = 0x3C0,GR_ad_Data
587 (p15) fms.s1 FR_r = FR_NormX,f1,FR_Xt // r = x - [x]
588 cmp.eq p8,p0 = 2,GR_RqDeg
589 }
590 { .mfi
591 ldfpd FR_An,FR_OvfBound = [GR_ad_Co]
592 (p14) fms.s1 FR_r = FR_Xt,f1,FR_NormX // r = |x - [x]|
593 cmp.eq p9,p0 = 4,GR_RqDeg
594 };;
595 .pred.rel "mutex",p8,p9
596 { .mfi
597 (p14) ldfpd FR_S21,FR_S00 = [GR_ad_SinE],16
598 (p8) fma.s1 FR_Rq0 = FR_2,f1,FR_1mX // (3-x)
599 tbit.z p0,p6 = GR_Sig,0
600 }
601 { .mfi
602 (p14) ldfpd FR_S10,FR_S20 = [GR_ad_SinO],16
603 (p9) fma.s1 FR_Rq0 = FR_2,FR_2,FR_1mX // (5-x)
604 cmp.eq p10,p0 = 6,GR_RqDeg
605 };;
606 { .mfi
607 (p14) getf.s GR_Arg = f8
608 (p14) fcmp.eq.unc.s1 p13,p0 = FR_NormX,FR_Xt
609 (p14) mov GR_ZeroResBound = 0xC22C // -43
610 }
611 { .mfi
612 (p14) ldfe FR_InvAn = [GR_ad_SinE]
613 (p10) fma.s1 FR_Rq0 = FR_6,f1,FR_1mX // (7-x)
614 cmp.eq p7,p0 = r0,GR_RqDeg
615 };;
616 { .mfi
617 (p14) cmp.ge.unc p11,p0 = GR_SignExp,GR_NanBound
618 fma.s1 FR_Rq2 = FR_Rq2,FR_6,FR_X2mX // (x-3)*(x-4)
619 (p14) shl GR_ZeroResBound = GR_ZeroResBound,16
620 }
621 { .mfb
622 (p14) mov GR_OvfNzBound = 0x802
623 (p14) fms.s1 FR_rs = FR_rs,f1,FR_NormX // rs = round(x) - x
624 // jump if x < -2^23 i.e. x is negative integer
625 (p11) br.cond.spnt tgammaf_singularity
626 };;
627 { .mfi
628 nop.m 0
629 (p7) fma.s1 FR_Rq1 = f0,f0,f1
630 (p14) shl GR_OvfNzBound = GR_OvfNzBound,20
631 }
632 { .mfb
633 nop.m 0
634 fma.s1 FR_Rq3 = FR_Rq3,FR_10,FR_X2mX // (x-5)*(x-6)
635 // jump if x is negative integer such that -2^23 < x < 0
636 (p13) br.cond.spnt tgammaf_singularity
637 };;
638 { .mfi
639 nop.m 0
640 fma.s1 FR_C01 = FR_C01,f1,FR_r
641 (p14) mov GR_ExpOf05 = 0xFFFE
642 }
643 { .mfi
644 (p14) cmp.eq.unc p7,p0 = GR_Arg,GR_OvfNzBound
645 fma.s1 FR_C11 = FR_C11,f1,FR_r
646 (p14) cmp.ltu.unc p11,p0 = GR_Arg,GR_OvfNzBound
647 };;
648 { .mfi
649 nop.m 0
650 fma.s1 FR_C21 = FR_C21,f1,FR_r
651 (p14) cmp.ltu.unc p9,p0 = GR_ZeroResBound,GR_Arg
652 }
653 { .mfb
654 nop.m 0
655 fma.s1 FR_C31 = FR_C31,f1,FR_r
656 // jump if argument is close to 0 negative
657 (p11) br.cond.spnt tgammaf_overflow
658 };;
659 { .mfi
660 nop.m 0
661 fma.s1 FR_C41 = FR_C41,f1,FR_r
662 nop.i 0
663 }
664 { .mfb
665 nop.m 0
666 fma.s1 FR_C51 = FR_C51,f1,FR_r
667 // jump if x is negative noninteger such that -2^23 < x < -43
668 (p9) br.cond.spnt tgammaf_underflow
669 };;
670 { .mfi
671 nop.m 0
672 (p14) fma.s1 FR_rs2 = FR_rs,FR_rs,f0
673 nop.i 0
674 }
675 { .mfb
676 nop.m 0
677 (p14) fma.s1 FR_S01 = FR_rs,FR_rs,FR_S01
678 // jump if argument is 0x80200000
679 (p7) br.cond.spnt tgammaf_overflow_near0_bound
680 };;
681 { .mfi
682 nop.m 0
683 (p6) fnma.s1 FR_Rq1 = FR_Rq1,FR_Rq0,f0
684 nop.i 0
685 }
686 { .mfi
687 nop.m 0
688 (p10) fma.s1 FR_Rq2 = FR_Rq2,FR_Rq3,f0
689 and GR_Sig = 0x7,GR_Sig
690 };;
691 { .mfi
692 nop.m 0
693 fma.s1 FR_C01 = FR_C01,FR_r,FR_C00
694 nop.i 0
695 }
696 { .mfi
697 nop.m 0
698 fma.s1 FR_C11 = FR_C11,FR_r,FR_C10
699 cmp.eq p6,p7 = r0,GR_Sig // p6 if |x| from one of base intervals
700 };;
701 { .mfi
702 nop.m 0
703 fma.s1 FR_C21 = FR_C21,FR_r,FR_C20
704 nop.i 0
705 }
706 { .mfi
707 nop.m 0
708 fma.s1 FR_C31 = FR_C31,FR_r,FR_C30
709 (p7) cmp.lt.unc p9,p0 = 2,GR_RqDeg
710 };;
711 { .mfi
712 nop.m 0
713 (p14) fma.s1 FR_S11 = FR_rs,FR_rs,FR_S11
714 nop.i 0
715 }
716 { .mfi
717 nop.m 0
718 (p14) fma.s1 FR_S21 = FR_rs,FR_rs,FR_S21
719 nop.i 0
720 };;
721 { .mfi
722 nop.m 0
723 fma.s1 FR_C41 = FR_C41,FR_r,FR_C40
724 nop.i 0
725 }
726 { .mfi
727 nop.m 0
728 (p14) fma.s1 FR_S32 = FR_rs2,FR_S32,FR_S31
729 nop.i 0
730 };;
731 { .mfi
732 nop.m 0
733 (p9) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq2,f0
734 nop.i 0
735 }
736 { .mfi
737 nop.m 0
738 fma.s1 FR_C51 = FR_C51,FR_r,FR_C50
739 nop.i 0
740 };;
741 { .mfi
742 (p14) getf.exp GR_SignExp = FR_rs
743 fma.s1 FR_C01 = FR_C01,FR_C11,f0
744 nop.i 0
745 }
746 { .mfi
747 nop.m 0
748 (p14) fma.s1 FR_S01 = FR_S01,FR_rs2,FR_S00
749 nop.i 0
750 };;
751 { .mfi
752 nop.m 0
753 fma.s1 FR_C21 = FR_C21,FR_C31,f0
754 nop.i 0
755 }
756 { .mfi
757 nop.m 0
758 // NR-iteration
759 (p14) fnma.s1 FR_InvNormX1 = FR_Rcp0,FR_NormX,f1
760 nop.i 0
761 };;
762 { .mfi
763 nop.m 0
764 (p14) fma.s1 FR_S11 = FR_S11,FR_rs2,FR_S10
765 (p14) tbit.z.unc p11,p12 = GR_SignExp,17
766 }
767 { .mfi
768 nop.m 0
769 (p14) fma.s1 FR_S21 = FR_S21,FR_rs2,FR_S20
770 nop.i 0
771 };;
772 { .mfi
773 nop.m 0
774 (p15) fcmp.lt.unc.s1 p0,p13 = FR_NormX,FR_OvfBound
775 nop.i 0
776 }
777 { .mfi
778 nop.m 0
779 (p14) fma.s1 FR_S32 = FR_rs2,FR_S32,f0
780 nop.i 0
781 };;
782 { .mfi
783 nop.m 0
784 fma.s1 FR_C41 = FR_C41,FR_C51,f0
785 nop.i 0
786 }
787 { .mfi
788 nop.m 0
789 (p7) fma.s1 FR_An = FR_Rq1,FR_An,f0
790 nop.i 0
791 };;
792 { .mfb
793 nop.m 0
794 nop.f 0
795 // jump if x > 35.04010009765625
796 (p13) br.cond.spnt tgammaf_overflow
797 };;
798 { .mfi
799 nop.m 0
800 // NR-iteration
801 (p14) fma.s1 FR_InvNormX1 = FR_Rcp0,FR_InvNormX1,FR_Rcp0
802 nop.i 0
803 };;
804 { .mfi
805 nop.m 0
806 (p14) fma.s1 FR_S01 = FR_S01,FR_S11,f0
807 nop.i 0
808 };;
809 { .mfi
810 nop.m 0
811 (p14) fma.s1 FR_S21 = FR_S21,FR_S32,f0
812 nop.i 0
813 };;
814 { .mfi
815 (p14) getf.exp GR_SignExp = FR_NormX
816 fma.s1 FR_C01 = FR_C01,FR_C21,f0
817 nop.i 0
818 }
819 { .mfi
820 nop.m 0
821 fma.s1 FR_C41 = FR_C41,FR_An,f0
822 (p14) mov GR_ExpOf1 = 0x2FFFF
823 };;
824 { .mfi
825 nop.m 0
826 // NR-iteration
827 (p14) fnma.s1 FR_InvNormX2 = FR_InvNormX1,FR_NormX,f1
828 nop.i 0
829 };;
830 .pred.rel "mutex",p11,p12
831 { .mfi
832 nop.m 0
833 (p12) fnma.s1 FR_S01 = FR_S01,FR_S21,f0
834 nop.i 0
835 }
836 { .mfi
837 nop.m 0
838 (p11) fma.s1 FR_S01 = FR_S01,FR_S21,f0
839 nop.i 0
840 };;
841
842 { .mfi
843 nop.m 0
844 (p14) fma.s1 FR_GAMMA = FR_C01,FR_C41,f0
845 (p14) tbit.z.unc p6,p7 = GR_Sig,0
846 }
847 { .mfb
848 nop.m 0
849 (p15) fma.s.s0 f8 = FR_C01,FR_C41,f0
850 (p15) br.ret.spnt b0 // exit for positives
851 };;
852 .pred.rel "mutex",p11,p12
853 { .mfi
854 nop.m 0
855 (p12) fms.s1 FR_S01 = FR_rs,FR_S01,FR_rs
856 nop.i 0
857 }
858 { .mfi
859 nop.m 0
860 (p11) fma.s1 FR_S01 = FR_rs,FR_S01,FR_rs
861 nop.i 0
862 };;
863 { .mfi
864 nop.m 0
865 // NR-iteration
866 fma.s1 FR_InvNormX2 = FR_InvNormX1,FR_InvNormX2,FR_InvNormX1
867 cmp.eq p10,p0 = 0x23,GR_Offs
868 };;
869 .pred.rel "mutex",p6,p7
870 { .mfi
871 nop.m 0
872 (p6) fma.s1 FR_GAMMA = FR_S01,FR_GAMMA,f0
873 cmp.gtu p8,p0 = GR_SignExp,GR_ExpOf1
874 }
875 { .mfi
876 nop.m 0
877 (p7) fnma.s1 FR_GAMMA = FR_S01,FR_GAMMA,f0
878 cmp.eq p9,p0 = GR_SignExp,GR_ExpOf1
879 };;
880 { .mfi
881 nop.m 0
882 // NR-iteration
883 fnma.s1 FR_InvNormX1 = FR_InvNormX2,FR_NormX,f1
884 nop.i 0
885 }
886 { .mfi
887 nop.m 0
888 (p10) fma.s1 FR_InvNormX2 = FR_InvNormX2,FR_InvAn,f0
889 nop.i 0
890 };;
891 { .mfi
892 nop.m 0
893 frcpa.s1 FR_Rcp0,p0 = f1,FR_GAMMA
894 nop.i 0
895 };;
896 { .mfi
897 nop.m 0
898 fms.s1 FR_Multplr = FR_NormX,f1,f1 // x - 1
899 nop.i 0
900 };;
901 { .mfi
902 nop.m 0
903 // NR-iteration
904 fnma.s1 FR_Rcp1 = FR_Rcp0,FR_GAMMA,f1
905 nop.i 0
906 };;
907 .pred.rel "mutex",p8,p9
908 { .mfi
909 nop.m 0
910 // 1/x or 1/(An*x)
911 (p8) fma.s1 FR_Multplr = FR_InvNormX2,FR_InvNormX1,FR_InvNormX2
912 nop.i 0
913 }
914 { .mfi
915 nop.m 0
916 (p9) fma.s1 FR_Multplr = f1,f1,f0
917 nop.i 0
918 };;
919 { .mfi
920 nop.m 0
921 // NR-iteration
922 fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
923 nop.i 0
924 };;
925 { .mfi
926 nop.m 0
927 // NR-iteration
928 fnma.s1 FR_Rcp2 = FR_Rcp1,FR_GAMMA,f1
929 nop.i 0
930 }
931 { .mfi
932 nop.m 0
933 // NR-iteration
934 fma.s1 FR_Rcp1 = FR_Rcp1,FR_Multplr,f0
935 nop.i 0
936 };;
937 { .mfb
938 nop.m 0
939 fma.s.s0 f8 = FR_Rcp1,FR_Rcp2,FR_Rcp1
940 br.ret.sptk b0
941 };;
942
943 // here if 0 < x < 1
944 //--------------------------------------------------------------------
945 .align 32
946 tgammaf_from_0_to_1:
947 { .mfi
948 cmp.lt p7,p0 = GR_Arg,GR_ExpOf05
949 // NR-iteration
950 fnma.s1 FR_Rcp1 = FR_Rcp0,FR_NormX,f1
951 cmp.eq p8,p0 = GR_Arg,GR_ExpOf05
952 }
953 { .mfi
954 cmp.gt p9,p0 = GR_Arg,GR_ExpOf05
955 fma.s1 FR_r = f0,f0,FR_NormX // reduced arg for (0;1)
956 mov GR_ExpOf025 = 0x7FA
957 };;
958 { .mfi
959 getf.s GR_ArgNz = f8
960 fma.d.s0 FR_X = f0,f0,f8 // set deno flag
961 shl GR_OvfNzBound = GR_OvfNzBound,20
962 }
963 { .mfi
964 (p8) mov GR_Tbl12Offs = 0x80 // 0.5 <= x < 0.75
965 nop.f 0
966 (p7) cmp.ge.unc p6,p0 = GR_Arg,GR_ExpOf025
967 };;
968 .pred.rel "mutex",p6,p9
969 { .mfi
970 (p9) mov GR_Tbl12Offs = 0xC0 // 0.75 <= x < 1
971 nop.f 0
972 (p6) mov GR_Tbl12Offs = 0x40 // 0.25 <= x < 0.5
973 }
974 { .mfi
975 add GR_ad_Ce = 0x2C0,GR_ad_Data
976 nop.f 0
977 add GR_ad_Co = 0x2A0,GR_ad_Data
978 };;
979 { .mfi
980 add GR_ad_Co = GR_ad_Co,GR_Tbl12Offs
981 nop.f 0
982 cmp.lt p12,p0 = GR_ArgNz,GR_OvfNzBound
983 }
984 { .mib
985 add GR_ad_Ce = GR_ad_Ce,GR_Tbl12Offs
986 cmp.eq p7,p0 = GR_ArgNz,GR_OvfNzBound
987 // jump if argument is 0x00200000
988 (p7) br.cond.spnt tgammaf_overflow_near0_bound
989 };;
990 { .mmb
991 ldfpd FR_A7,FR_A6 = [GR_ad_Co],16
992 ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16
993 // jump if argument is close to 0 positive
994 (p12) br.cond.spnt tgammaf_overflow
995 };;
996 { .mfi
997 ldfpd FR_A3,FR_A2 = [GR_ad_Co],16
998 // NR-iteration
999 fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
1000 nop.i 0
1001 }
1002 { .mfb
1003 ldfpd FR_A1,FR_A0 = [GR_ad_Ce],16
1004 nop.f 0
1005 br.cond.sptk tgamma_from_0_to_2
1006 };;
1007
1008 // here if 1 < x < 2
1009 //--------------------------------------------------------------------
1010 .align 32
1011 tgammaf_from_1_to_2:
1012 { .mfi
1013 add GR_ad_Co = 0x2A0,GR_ad_Data
1014 fms.s1 FR_r = f0,f0,FR_1mX
1015 shr GR_TblOffs = GR_Arg,47
1016 }
1017 { .mfi
1018 add GR_ad_Ce = 0x2C0,GR_ad_Data
1019 nop.f 0
1020 mov GR_TblOffsMask = 0x18
1021 };;
1022 { .mfi
1023 nop.m 0
1024 nop.f 0
1025 and GR_TblOffs = GR_TblOffs,GR_TblOffsMask
1026 };;
1027 { .mfi
1028 shladd GR_ad_Co = GR_TblOffs,3,GR_ad_Co
1029 nop.f 0
1030 nop.i 0
1031 }
1032 { .mfi
1033 shladd GR_ad_Ce = GR_TblOffs,3,GR_ad_Ce
1034 nop.f 0
1035 cmp.eq p6,p7 = 8,GR_TblOffs
1036 };;
1037 { .mmi
1038 ldfpd FR_A7,FR_A6 = [GR_ad_Co],16
1039 ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16
1040 nop.i 0
1041 };;
1042 { .mmi
1043 ldfpd FR_A3,FR_A2 = [GR_ad_Co],16
1044 ldfpd FR_A1,FR_A0 = [GR_ad_Ce],16
1045 nop.i 0
1046 };;
1047
1048 .align 32
1049 tgamma_from_0_to_2:
1050 { .mfi
1051 nop.m 0
1052 (p6) fms.s1 FR_r = FR_r,f1,FR_LocalMin
1053 nop.i 0
1054 };;
1055 { .mfi
1056 nop.m 0
1057 // NR-iteration
1058 (p10) fnma.s1 FR_Rcp2 = FR_Rcp1,FR_NormX,f1
1059 nop.i 0
1060 };;
1061 { .mfi
1062 nop.m 0
1063 fms.s1 FR_r2 = FR_r,FR_r,f0
1064 nop.i 0
1065 };;
1066 { .mfi
1067 nop.m 0
1068 fma.s1 FR_A7 = FR_A7,FR_r,FR_A6
1069 nop.i 0
1070 }
1071 { .mfi
1072 nop.m 0
1073 fma.s1 FR_A5 = FR_A5,FR_r,FR_A4
1074 nop.i 0
1075 };;
1076 { .mfi
1077 nop.m 0
1078 fma.s1 FR_A3 = FR_A3,FR_r,FR_A2
1079 nop.i 0
1080 }
1081 { .mfi
1082 nop.m 0
1083 fma.s1 FR_A1 = FR_A1,FR_r,FR_A0
1084 nop.i 0
1085 };;
1086 { .mfi
1087 nop.m 0
1088 // NR-iteration
1089 (p10) fma.s1 FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1
1090 nop.i 0
1091 };;
1092 { .mfi
1093 nop.m 0
1094 fma.s1 FR_A7 = FR_A7,FR_r2,FR_A5
1095 nop.i 0
1096 }
1097 { .mfi
1098 nop.m 0
1099 fma.s1 FR_r4 = FR_r2,FR_r2,f0
1100 nop.i 0
1101 };;
1102 { .mfi
1103 nop.m 0
1104 fma.s1 FR_A3 = FR_A3,FR_r2,FR_A1
1105 nop.i 0
1106 };;
1107 { .mfi
1108 nop.m 0
1109 (p10) fma.s1 FR_GAMMA = FR_A7,FR_r4,FR_A3
1110 nop.i 0
1111 }
1112 { .mfi
1113 nop.m 0
1114 (p11) fma.s.s0 f8 = FR_A7,FR_r4,FR_A3
1115 nop.i 0
1116 };;
1117 { .mfb
1118 nop.m 0
1119 (p10) fma.s.s0 f8 = FR_GAMMA,FR_Rcp2,f0
1120 br.ret.sptk b0
1121 };;
1122
1123
1124 // overflow
1125 //--------------------------------------------------------------------
1126 .align 32
1127 tgammaf_overflow_near0_bound:
1128 .pred.rel "mutex",p14,p15
1129 { .mfi
1130 mov GR_fpsr = ar.fpsr
1131 nop.f 0
1132 (p15) mov r8 = 0x7f8
1133 }
1134 { .mfi
1135 nop.m 0
1136 nop.f 0
1137 (p14) mov r8 = 0xff8
1138 };;
1139 { .mfi
1140 nop.m 0
1141 nop.f 0
1142 shl r8 = r8,20
1143 };;
1144 { .mfi
1145 sub r8 = r8,r0,1
1146 nop.f 0
1147 extr.u GR_fpsr = GR_fpsr,10,2 // rounding mode
1148 };;
1149 .pred.rel "mutex",p14,p15
1150 { .mfi
1151 // set p8 to 0 in case of overflow and to 1 otherwise
1152 // for negative arg:
1153 // no overflow if rounding mode either Z or +Inf, i.e.
1154 // GR_fpsr > 1
1155 (p14) cmp.lt p8,p0 = 1,GR_fpsr
1156 nop.f 0
1157 // for positive arg:
1158 // no overflow if rounding mode either Z or -Inf, i.e.
1159 // (GR_fpsr & 1) == 0
1160 (p15) tbit.z p0,p8 = GR_fpsr,0
1161 };;
1162 { .mib
1163 (p8) setf.s f8 = r8 // set result to 0x7f7fffff without
1164 // OVERFLOW flag raising
1165 nop.i 0
1166 (p8) br.ret.sptk b0
1167 };;
1168
1169 .align 32
1170 tgammaf_overflow:
1171 { .mfi
1172 nop.m 0
1173 nop.f 0
1174 mov r8 = 0x1FFFE
1175 };;
1176 { .mfi
1177 setf.exp f9 = r8
1178 fmerge.s FR_X = f8,f8
1179 nop.i 0
1180 };;
1181 .pred.rel "mutex",p14,p15
1182 { .mfi
1183 nop.m 0
1184 (p14) fnma.s.s0 f8 = f9,f9,f0 // set I,O and -INF result
1185 mov GR_TAG = 261 // overflow
1186 }
1187 { .mfb
1188 nop.m 0
1189 (p15) fma.s.s0 f8 = f9,f9,f0 // set I,O and +INF result
1190 br.cond.sptk tgammaf_libm_err
1191 };;
1192
1193 // x is negative integer or +/-0
1194 //--------------------------------------------------------------------
1195 .align 32
1196 tgammaf_singularity:
1197 { .mfi
1198 nop.m 0
1199 fmerge.s FR_X = f8,f8
1200 mov GR_TAG = 262 // negative
1201 }
1202 { .mfb
1203 nop.m 0
1204 frcpa.s0 f8,p0 = f0,f0
1205 br.cond.sptk tgammaf_libm_err
1206 };;
1207 // x is negative noninteger with big absolute value
1208 //--------------------------------------------------------------------
1209 .align 32
1210 tgammaf_underflow:
1211 { .mfi
1212 mov r8 = 0x00001
1213 nop.f 0
1214 tbit.z p6,p7 = GR_Sig,0
1215 };;
1216 { .mfi
1217 setf.exp f9 = r8
1218 nop.f 0
1219 nop.i 0
1220 };;
1221 .pred.rel "mutex",p6,p7
1222 { .mfi
1223 nop.m 0
1224 (p6) fms.s.s0 f8 = f9,f9,f9
1225 nop.i 0
1226 }
1227 { .mfb
1228 nop.m 0
1229 (p7) fma.s.s0 f8 = f9,f9,f9
1230 br.ret.sptk b0
1231 };;
1232
1233 // x for natval, nan, +/-inf or +/-0
1234 //--------------------------------------------------------------------
1235 .align 32
1236 tgammaf_spec_args:
1237 { .mfi
1238 nop.m 0
1239 fclass.m p6,p0 = f8,0x1E1 // Test x for natval, nan, +inf
1240 nop.i 0
1241 };;
1242 { .mfi
1243 nop.m 0
1244 fclass.m p7,p8 = f8,0x7 // +/-0
1245 nop.i 0
1246 };;
1247 { .mfi
1248 nop.m 0
1249 fmerge.s FR_X = f8,f8
1250 nop.i 0
1251 }
1252 { .mfb
1253 nop.m 0
1254 (p6) fma.s.s0 f8 = f8,f1,f8
1255 (p6) br.ret.spnt b0
1256 };;
1257 .pred.rel "mutex",p7,p8
1258 { .mfi
1259 (p7) mov GR_TAG = 262 // negative
1260 (p7) frcpa.s0 f8,p0 = f1,f8
1261 nop.i 0
1262 }
1263 { .mib
1264 nop.m 0
1265 nop.i 0
1266 (p8) br.cond.spnt tgammaf_singularity
1267 };;
1268
1269 .align 32
1270 tgammaf_libm_err:
1271 { .mfi
1272 alloc r32 = ar.pfs,1,4,4,0
1273 nop.f 0
1274 mov GR_Parameter_TAG = GR_TAG
1275 };;
1276
1277 GLOBAL_LIBM_END(tgammaf)
1278 libm_alias_float_other (tgamma, tgamma)
1279
1280 LOCAL_LIBM_ENTRY(__libm_error_region)
1281 .prologue
1282 { .mfi
1283 add GR_Parameter_Y=-32,sp // Parameter 2 value
1284 nop.f 0
1285 .save ar.pfs,GR_SAVE_PFS
1286 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
1287 }
1288 { .mfi
1289 .fframe 64
1290 add sp=-64,sp // Create new stack
1291 nop.f 0
1292 mov GR_SAVE_GP=gp // Save gp
1293 };;
1294 { .mmi
1295 stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
1296 add GR_Parameter_X = 16,sp // Parameter 1 address
1297 .save b0, GR_SAVE_B0
1298 mov GR_SAVE_B0=b0 // Save b0
1299 };;
1300 .body
1301 { .mib
1302 stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
1303 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
1304 nop.b 0
1305 }
1306 { .mib
1307 stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
1308 add GR_Parameter_Y = -16,GR_Parameter_Y
1309 br.call.sptk b0=__libm_error_support# // Call error handling function
1310 };;
1311 { .mmi
1312 nop.m 0
1313 nop.m 0
1314 add GR_Parameter_RESULT = 48,sp
1315 };;
1316 { .mmi
1317 ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
1318 .restore sp
1319 add sp = 64,sp // Restore stack pointer
1320 mov b0 = GR_SAVE_B0 // Restore return address
1321 };;
1322 { .mib
1323 mov gp = GR_SAVE_GP // Restore gp
1324 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
1325 br.ret.sptk b0 // Return
1326 };;
1327
1328 LOCAL_LIBM_END(__libm_error_region)
1329 .type __libm_error_support#,@function
1330 .global __libm_error_support#