4 // Copyright (c) 2000 - 2002, Intel Corporation
5 // All rights reserved.
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
15 // * Redistributions in binary form must reproduce the above copyright
16 // notice, this list of conditions and the following disclaimer in the
17 // documentation and/or other materials provided with the distribution.
19 // * The name of Intel Corporation may not be used to endorse or promote
20 // products derived from this software without specific prior written
23 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 // Intel Corporation is the author of this code, and requests that all
36 // problem reports or change requests be submitted to it directly at
37 // http://www.intel.com/software/products/opensource/libraries/num.htm.
40 //==============================================================
41 // 02/02/00 Initial version
42 // 04/04/00 Unwind support added
43 // 08/15/00 Bundle added after call to __libm_error_support to properly
44 // set [the previously overwritten] GR_Parameter_RESULT.
45 // 10/12/00 Update to set denormal operand and underflow flags
46 // 01/22/01 Fixed to set inexact flag for small args. Fixed incorrect
47 // call to __libm_error_support for 710.476 < x < 11357.2166.
48 // 05/02/01 Reworked to improve speed of all paths
49 // 05/20/02 Cleaned up namespace and sf0 syntax
50 // 12/04/02 Improved performance
53 //==============================================================
54 // long double = sinhl(long double)
55 // input floating point f8
56 // output floating point f8
59 //==============================================================
62 // predicate registers used:
64 // floating-point registers used:
65 // f9 -> f15; f32 -> f90;
66 // f8 has input, then output
68 // Overview of operation
69 //==============================================================
70 // There are seven paths
71 // 1. 0 < |x| < 0.25 SINH_BY_POLY
72 // 2. 0.25 <=|x| < 32 SINH_BY_TBL
73 // 3. 32 <= |x| < 11357.21655 SINH_BY_EXP (merged path with SINH_BY_TBL)
74 // 4. |x| >= 11357.21655 SINH_HUGE
75 // 5. x=0 Done with early exit
76 // 6. x=inf,nan Done with early exit
77 // 7. x=denormal SINH_DENORM
79 // For double extended we get overflow for x >= 400c b174 ddc0 31ae c0ea
83 // 1. SINH_BY_POLY 0 < |x| < 0.25
85 // Evaluate sinh(x) by a 13th order polynomial
86 // Care is take for the order of multiplication; and P_1 is not exactly 1/3!,
87 // P_2 is not exactly 1/5!, etc.
88 // sinh(x) = sign * (series(e^x) - series(e^-x))/2
89 // = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11!
91 // = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
92 // + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ))
93 // = sign * (ax + ax*p_odd + (ax*p_even))
94 // = sign * (ax + Y_lo)
95 // sinh(x) = sign * (Y_hi + Y_lo)
98 // 2. SINH_BY_TBL 0.25 <= |x| < 32.0
100 // sinh(x) = sinh(B+R)
101 // = sinh(B)cosh(R) + cosh(B)sinh(R)
103 // ax = |x| = M*log2/64 + R
106 // We will calculate M and get N as (M-j)/64
107 // The division is a shift.
108 // exp(B) = exp(N*log2 + j*log2/64)
109 // = 2^N * 2^(j*log2/64)
110 // sinh(B) = 1/2(e^B -e^-B)
111 // = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
112 // sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
113 // cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
114 // 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
115 // Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
117 // R = ax - M*log2/64
118 // R = ax - M*log2_by_64_hi - M*log2_by_64_lo
119 // exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
120 // = 1 + p_odd + p_even
121 // where the p_even uses the A coefficients and the p_even uses
122 // the B coefficients
124 // So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
125 // cosh(R) = 1 + p_even
126 // sinh(B) = S_hi + S_lo
128 // sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
130 // 3. SINH_BY_EXP 32.0 <= |x| < 11357.21655 ( 400c b174 ddc0 31ae c0ea )
132 // Can approximate result by exp(x)/2 in this region.
134 // Y_lo = Tjhi * (p_odd + p_even) + Tjlo
135 // sinh(x) = Y_hi + Y_lo
137 // 4. SINH_HUGE |x| >= 11357.21655 ( 400c b174 ddc0 31ae c0ea )
139 // Set error tag and call error support
143 //==============================================================
168 r_signexp_sgnx_0_5 = r28
181 GR_Parameter_RESULT = r39
182 GR_Parameter_TAG = r40
223 f_INV_LN2_2TO63 = f57
226 f_smlst_oflow_input = f60
266 //==============================================================
268 // DO NOT CHANGE ORDER OF THESE TABLES
272 LOCAL_OBJECT_START(sinh_arg_reduction)
273 // data8 0xB8AA3B295C17F0BC, 0x00004005 // 64/log2 -- signif loaded with setf
274 data8 0xB17217F7D1000000, 0x00003FF8 // log2/64 high part
275 data8 0xCF79ABC9E3B39804, 0x00003FD0 // log2/64 low part
276 data8 0xb174ddc031aec0ea, 0x0000400c // Smallest x to overflow (11357.21655)
277 LOCAL_OBJECT_END(sinh_arg_reduction)
279 LOCAL_OBJECT_START(sinh_p_table)
280 data8 0xB08AF9AE78C1239F, 0x00003FDE // P6
281 data8 0xB8EF1D28926D8891, 0x00003FEC // P4
282 data8 0x8888888888888412, 0x00003FF8 // P2
283 data8 0xD732377688025BE9, 0x00003FE5 // P5
284 data8 0xD00D00D00D4D39F2, 0x00003FF2 // P3
285 data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC // P1
286 LOCAL_OBJECT_END(sinh_p_table)
288 LOCAL_OBJECT_START(sinh_ab_table)
289 data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC // A1
290 data8 0x88888888884ECDD5, 0x00003FF8 // A2
291 data8 0xD00D0C6DCC26A86B, 0x00003FF2 // A3
292 data8 0x8000000000000002, 0x00003FFE // B1
293 data8 0xAAAAAAAAAA402C77, 0x00003FFA // B2
294 data8 0xB60B6CC96BDB144D, 0x00003FF5 // B3
295 LOCAL_OBJECT_END(sinh_ab_table)
297 LOCAL_OBJECT_START(sinh_j_hi_table)
298 data8 0xB504F333F9DE6484, 0x00003FFE
299 data8 0xB6FD91E328D17791, 0x00003FFE
300 data8 0xB8FBAF4762FB9EE9, 0x00003FFE
301 data8 0xBAFF5AB2133E45FB, 0x00003FFE
302 data8 0xBD08A39F580C36BF, 0x00003FFE
303 data8 0xBF1799B67A731083, 0x00003FFE
304 data8 0xC12C4CCA66709456, 0x00003FFE
305 data8 0xC346CCDA24976407, 0x00003FFE
306 data8 0xC5672A115506DADD, 0x00003FFE
307 data8 0xC78D74C8ABB9B15D, 0x00003FFE
308 data8 0xC9B9BD866E2F27A3, 0x00003FFE
309 data8 0xCBEC14FEF2727C5D, 0x00003FFE
310 data8 0xCE248C151F8480E4, 0x00003FFE
311 data8 0xD06333DAEF2B2595, 0x00003FFE
312 data8 0xD2A81D91F12AE45A, 0x00003FFE
313 data8 0xD4F35AABCFEDFA1F, 0x00003FFE
314 data8 0xD744FCCAD69D6AF4, 0x00003FFE
315 data8 0xD99D15C278AFD7B6, 0x00003FFE
316 data8 0xDBFBB797DAF23755, 0x00003FFE
317 data8 0xDE60F4825E0E9124, 0x00003FFE
318 data8 0xE0CCDEEC2A94E111, 0x00003FFE
319 data8 0xE33F8972BE8A5A51, 0x00003FFE
320 data8 0xE5B906E77C8348A8, 0x00003FFE
321 data8 0xE8396A503C4BDC68, 0x00003FFE
322 data8 0xEAC0C6E7DD24392F, 0x00003FFE
323 data8 0xED4F301ED9942B84, 0x00003FFE
324 data8 0xEFE4B99BDCDAF5CB, 0x00003FFE
325 data8 0xF281773C59FFB13A, 0x00003FFE
326 data8 0xF5257D152486CC2C, 0x00003FFE
327 data8 0xF7D0DF730AD13BB9, 0x00003FFE
328 data8 0xFA83B2DB722A033A, 0x00003FFE
329 data8 0xFD3E0C0CF486C175, 0x00003FFE
330 data8 0x8000000000000000, 0x00003FFF // Center of table
331 data8 0x8164D1F3BC030773, 0x00003FFF
332 data8 0x82CD8698AC2BA1D7, 0x00003FFF
333 data8 0x843A28C3ACDE4046, 0x00003FFF
334 data8 0x85AAC367CC487B15, 0x00003FFF
335 data8 0x871F61969E8D1010, 0x00003FFF
336 data8 0x88980E8092DA8527, 0x00003FFF
337 data8 0x8A14D575496EFD9A, 0x00003FFF
338 data8 0x8B95C1E3EA8BD6E7, 0x00003FFF
339 data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF
340 data8 0x8EA4398B45CD53C0, 0x00003FFF
341 data8 0x9031DC431466B1DC, 0x00003FFF
342 data8 0x91C3D373AB11C336, 0x00003FFF
343 data8 0x935A2B2F13E6E92C, 0x00003FFF
344 data8 0x94F4EFA8FEF70961, 0x00003FFF
345 data8 0x96942D3720185A00, 0x00003FFF
346 data8 0x9837F0518DB8A96F, 0x00003FFF
347 data8 0x99E0459320B7FA65, 0x00003FFF
348 data8 0x9B8D39B9D54E5539, 0x00003FFF
349 data8 0x9D3ED9A72CFFB751, 0x00003FFF
350 data8 0x9EF5326091A111AE, 0x00003FFF
351 data8 0xA0B0510FB9714FC2, 0x00003FFF
352 data8 0xA27043030C496819, 0x00003FFF
353 data8 0xA43515AE09E6809E, 0x00003FFF
354 data8 0xA5FED6A9B15138EA, 0x00003FFF
355 data8 0xA7CD93B4E965356A, 0x00003FFF
356 data8 0xA9A15AB4EA7C0EF8, 0x00003FFF
357 data8 0xAB7A39B5A93ED337, 0x00003FFF
358 data8 0xAD583EEA42A14AC6, 0x00003FFF
359 data8 0xAF3B78AD690A4375, 0x00003FFF
360 data8 0xB123F581D2AC2590, 0x00003FFF
361 data8 0xB311C412A9112489, 0x00003FFF
362 data8 0xB504F333F9DE6484, 0x00003FFF
363 LOCAL_OBJECT_END(sinh_j_hi_table)
365 LOCAL_OBJECT_START(sinh_j_lo_table)
398 data4 0x00000000 // Center of table
431 LOCAL_OBJECT_END(sinh_j_lo_table)
435 GLOBAL_IEEE754_ENTRY(sinhl)
438 getf.exp r_signexp_x = f8 // Get signexp of x, must redo if unorm
439 movl r_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
442 addl r_ad1 = @ltoff(sinh_arg_reduction), gp
443 movl r_rshf_2to57 = 0x4778000000000000 // 1.10000 2^(63+57)
449 fmerge.s f_ABS_X = f0,f8
450 mov r_exp_0_25 = 0x0fffd // Form exponent for 0.25
454 fnorm.s1 f_NORM_X = f8
455 mov r_exp_2tom57 = 0xffff-57
460 setf.d f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120
461 fclass.m p10,p0 = f8, 0x0b // Test for denorm
462 mov r_exp_mask = 0x1ffff
465 setf.sig f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63
466 movl r_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
472 fclass.m p7,p0 = f8, 0x07 // Test if x=0
476 setf.exp f_2TOM57 = r_exp_2tom57 // Form 2^-57 for scaling
478 add r_ad3 = 0x90, r_ad1 // Point to ab_table
483 setf.d f_RSHF = r_rshf // Form right shift const 1.100 * 2^63
484 fclass.m p6,p0 = f8, 0xe3 // Test if x nan, inf
485 add r_ad4 = 0x2f0, r_ad1 // Point to j_hi_table midpoint
488 add r_ad2e = 0x20, r_ad1 // Point to p_table
490 (p10) br.cond.spnt SINH_DENORM // Branch if x denorm
494 // Common path -- return here from SINH_DENORM if x is unnorm
497 ldfe f_smlst_oflow_input = [r_ad2e],16
499 add r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint
502 ldfe f_log2by64_hi = [r_ad1],16
503 and r_exp_x = r_exp_mask, r_signexp_x
504 (p7) br.ret.spnt b0 // Exit if x=0
508 // Get the A coefficients for SINH_BY_TBL
510 ldfe f_A1 = [r_ad3],16
511 fcmp.lt.s1 p8,p9 = f8,f0 // Test for x<0
512 cmp.lt p7,p0 = r_exp_x, r_exp_0_25 // Test x < 0.25
515 add r_ad2o = 0x30, r_ad2e // Point to p_table odd coeffs
516 (p6) fma.s0 f8 = f8,f1,f0 // Result for x nan, inf
517 (p6) br.ret.spnt b0 // Exit for x nan, inf
521 // Calculate X2 = ax*ax for SINH_BY_POLY
523 ldfe f_log2by64_lo = [r_ad1],16
528 ldfe f_A2 = [r_ad3],16
529 fma.s1 f_X2 = f_NORM_X, f_NORM_X, f0
530 (p7) br.cond.spnt SINH_BY_POLY
534 // Here if |x| >= 0.25
536 // ******************************************************
537 // STEP 1 (TBL and EXP) - Argument reduction
538 // ******************************************************
539 // Get the following constants.
545 // We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
546 // put them in an exponent.
547 // f_spos = 2^(N-1) and f_sneg = 2^(-N-1)
548 // 0xffff + (N-1) = 0xffff +N -1
549 // 0xffff - (N +1) = 0xffff -N -1
552 // Calculate M and keep it as integer and floating point.
553 // M = round-to-integer(x*Inv_log2by64)
554 // f_M = M = truncate(ax/(log2/64))
555 // Put the integer representation of M in r_M
556 // and the floating point representation of M in f_M
558 // Get the remaining A,B coefficients
560 ldfe f_A3 = [r_ad3],16
566 .pred.rel "mutex",p8,p9
567 // Use constant (1.100*2^(63-6)) to get rounded M into rightmost significand
568 // |x| * 64 * 1/ln2 * 2^(63-6) + 1.1000 * 2^(63+(63-6))
570 (p8) mov r_signexp_sgnx_0_5 = 0x2fffe // signexp of -0.5
571 fma.s1 f_M_temp = f_ABS_X, f_INV_LN2_2TO63, f_RSHF_2TO57
572 (p9) mov r_signexp_sgnx_0_5 = 0x0fffe // signexp of +0.5
576 // Test for |x| >= overflow limit
578 ldfe f_B1 = [r_ad3],16
579 fcmp.ge.s1 p6,p0 = f_ABS_X, f_smlst_oflow_input
585 ldfe f_B2 = [r_ad3],16
587 mov r_exp_32 = 0x10004
591 // Subtract RSHF constant to get rounded M as a floating point value
592 // M_temp * 2^(63-6) - 2^63
594 ldfe f_B3 = [r_ad3],16
595 fms.s1 f_M = f_M_temp, f_2TOM57, f_RSHF
596 (p6) br.cond.spnt SINH_HUGE // Branch if result will overflow
601 getf.sig r_M = f_M_temp
603 cmp.ge p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32
607 // Calculate j. j is the signed extension of the six lsb of M. It
608 // has a range of -32 thru 31.
611 // ax - M*log2by64_hi
612 // R = (ax - M*log2by64_hi) - M*log2by64_lo
616 fnma.s1 f_R_temp = f_M, f_log2by64_hi, f_ABS_X
623 shl r_jshf = r_j, 0x2 // Shift j so can sign extend it
631 shr r_j = r_jshf, 0x2 // Now j has range -32 to 31
637 shladd r_ad_J_hi = r_j, 4, r_ad4 // pointer to Tjhi
638 sub r_Mmj = r_M, r_j // M-j
639 sub r_mj = r0, r_j // Form -j
643 // The TBL and EXP branches are merged and predicated
644 // If TBL, p6 true, 0.25 <= |x| < 32
645 // If EXP, p7 true, 32 <= |x| < overflow_limit
649 ldfe f_Tjhi = [r_ad_J_hi]
650 fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp
651 shr r_N = r_Mmj, 0x6 // N = (M-j)/64
654 shladd r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi
656 shladd r_ad_mJ_lo = r_mj, 2, r_ad5 // pointer to Tmjlo
661 sub r_2mNm1 = r_signexp_sgnx_0_5, r_N // signexp sgnx*2^(-N-1)
663 shladd r_ad_J_lo = r_j, 2, r_ad5 // pointer to Tjlo
666 ldfe f_Tmjhi = [r_ad_mJ_hi]
668 add r_2Nm1 = r_signexp_sgnx_0_5, r_N // signexp sgnx*2^(N-1)
673 ldfs f_Tmjlo = [r_ad_mJ_lo]
674 setf.exp f_sneg = r_2mNm1 // Form sgnx * 2^(-N-1)
680 ldfs f_Tjlo = [r_ad_J_lo]
681 setf.exp f_spos = r_2Nm1 // Form sgnx * 2^(N-1)
686 // ******************************************************
687 // STEP 2 (TBL and EXP)
688 // ******************************************************
689 // Calculate Rsquared and Rcubed in preparation for p_even and p_odd
694 fma.s1 f_Rsq = f_R, f_R, f0
701 // B_1 + Rsq * (B_2 + Rsq *B_3)
702 // p_even = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
705 fma.s1 f_peven_temp1 = f_Rsq, f_B3, f_B2
710 // A_1 + Rsq * (A_2 + Rsq *A_3)
711 // podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
714 fma.s1 f_podd_temp1 = f_Rsq, f_A3, f_A2
721 fma.s1 f_Rcub = f_Rsq, f_R, f0
728 // Calculate S_hi and S_lo, and C_hi
729 // SC_hi_temp = sneg * Tmjhi
730 // S_hi = spos * Tjhi - SC_hi_temp
731 // S_hi = spos * Tjhi - (sneg * Tmjhi)
732 // C_hi = spos * Tjhi + SC_hi_temp
733 // C_hi = spos * Tjhi + (sneg * Tmjhi)
737 (p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0
743 // S_lo_temp3 = sneg * Tmjlo
744 // S_lo_temp4 = spos * Tjlo - S_lo_temp3
745 // S_lo_temp4 = spos * Tjlo -(sneg * Tmjlo)
748 (p6) fma.s1 f_S_lo_temp3 = f_sneg, f_Tmjlo, f0
755 fma.s1 f_peven_temp2 = f_Rsq, f_peven_temp1, f_B1
760 fma.s1 f_podd_temp2 = f_Rsq, f_podd_temp1, f_A1
766 // Compute sgnx * 2^(N-1) * Tjhi and sgnx * 2^(N-1) * Tjlo
769 (p7) fma.s1 f_Tjhi_spos = f_Tjhi, f_spos, f0
774 (p7) fma.s1 f_Tjlo_spos = f_Tjlo, f_spos, f0
781 (p6) fms.s1 f_S_hi = f_spos, f_Tjhi, f_SC_hi_temp
788 (p6) fma.s1 f_C_hi = f_spos, f_Tjhi, f_SC_hi_temp
793 (p6) fms.s1 f_S_lo_temp4 = f_spos, f_Tjlo, f_S_lo_temp3
800 fma.s1 f_peven = f_Rsq, f_peven_temp2, f0
805 fma.s1 f_podd = f_podd_temp2, f_Rcub, f_R
811 // S_lo_temp1 = spos * Tjhi - S_hi
812 // S_lo_temp2 = -sneg * Tmjlo + S_lo_temp1
813 // S_lo_temp2 = -sneg * Tmjlo + (spos * Tjhi - S_hi)
817 (p6) fms.s1 f_S_lo_temp1 = f_spos, f_Tjhi, f_S_hi
824 (p6) fnma.s1 f_S_lo_temp2 = f_sneg, f_Tmjhi, f_S_lo_temp1
830 // Y_hi = sgnx * 2^(N-1) * Tjhi
831 // Y_lo = sgnx * 2^(N-1) * Tjhi * (p_odd + p_even) + sgnx * 2^(N-1) * Tjlo
834 (p7) fma.s1 f_Y_lo_temp = f_peven, f1, f_podd
840 // S_lo = S_lo_temp4 + S_lo_temp2
843 (p6) fma.s1 f_S_lo = f_S_lo_temp4, f1, f_S_lo_temp2
850 // Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
853 (p6) fma.s1 f_Y_lo_temp = f_S_hi, f_peven, f_S_lo
860 (p7) fma.s1 f_Y_lo = f_Tjhi_spos, f_Y_lo_temp, f_Tjlo_spos
865 // Dummy multiply to generate inexact
868 fmpy.s0 f_tmp = f_B2, f_B2
873 (p6) fma.s1 f_Y_lo = f_C_hi, f_podd, f_Y_lo_temp
878 // f8 = answer = Y_hi + Y_lo
881 (p7) fma.s0 f8 = f_Y_lo, f1, f_Tjhi_spos
886 // f8 = answer = Y_hi + Y_lo
889 (p6) fma.s0 f8 = f_Y_lo, f1, f_S_hi
890 br.ret.sptk b0 // Exit for SINH_BY_TBL and SINH_BY_EXP
895 // Here if 0 < |x| < 0.25
898 ldfe f_P6 = [r_ad2e],16
899 ldfe f_P5 = [r_ad2o],16
905 ldfe f_P4 = [r_ad2e],16
906 ldfe f_P3 = [r_ad2o],16
912 ldfe f_P2 = [r_ad2e],16
913 ldfe f_P1 = [r_ad2o],16
920 fma.s1 f_X3 = f_NORM_X, f_X2, f0
925 fma.s1 f_X4 = f_X2, f_X2, f0
932 fma.s1 f_poly65 = f_X2, f_P6, f_P5
937 fma.s1 f_poly43 = f_X2, f_P4, f_P3
944 fma.s1 f_poly21 = f_X2, f_P2, f_P1
951 fma.s1 f_poly6543 = f_X4, f_poly65, f_poly43
958 fma.s1 f_poly6to1 = f_X4, f_poly6543, f_poly21
963 // Dummy multiply to generate inexact
966 fmpy.s0 f_tmp = f_P6, f_P6
971 fma.s0 f8 = f_poly6to1, f_X3, f_NORM_X
972 br.ret.sptk b0 // Exit SINH_BY_POLY
977 // Here if x denorm or unorm
979 // Determine if x really a denorm and not a unorm
981 getf.exp r_signexp_x = f_NORM_X
982 mov r_exp_denorm = 0x0c001 // Real denorms have exp < this
983 fmerge.s f_ABS_X = f0, f_NORM_X
989 fcmp.eq.s0 p10,p0 = f8, f0 // Set denorm flag
994 // Set p8 if really a denorm
996 and r_exp_x = r_exp_mask, r_signexp_x
998 cmp.lt p8,p9 = r_exp_x, r_exp_denorm
1003 // Identify denormal operands.
1006 (p8) fcmp.ge.unc.s1 p6,p7 = f8, f0 // Test sign of denorm
1007 (p9) br.cond.sptk SINH_COMMON // Return to main path if x unorm
1013 (p6) fma.s0 f8 = f8,f8,f8 // If x +denorm, result=x+x^2
1018 (p7) fnma.s0 f8 = f8,f8,f8 // If x -denorm, result=x-x^2
1019 br.ret.sptk b0 // Exit if x denorm
1024 // Here if |x| >= overflow limit
1026 // for SINH_HUGE, put 24000 in exponent; take sign from input
1028 mov r_exp_huge = 0x15dbf
1030 setf.exp f_huge = r_exp_huge
1035 .pred.rel "mutex",p8,p9
1037 alloc r32 = ar.pfs,0,5,4,0
1038 (p8) fnma.s1 f_signed_hi_lo = f_huge, f1, f1
1043 (p9) fma.s1 f_signed_hi_lo = f_huge, f1, f1
1050 fma.s0 f_pre_result = f_signed_hi_lo, f_huge, f0
1051 mov GR_Parameter_TAG = 126
1055 GLOBAL_IEEE754_END(sinhl)
1056 libm_alias_ldouble_other (__sinh, sinh)
1059 LOCAL_LIBM_ENTRY(__libm_error_region)
1063 add GR_Parameter_Y=-32,sp // Parameter 2 value
1065 .save ar.pfs,GR_SAVE_PFS
1066 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
1070 add sp=-64,sp // Create new stack
1072 mov GR_SAVE_GP=gp // Save gp
1076 stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
1077 add GR_Parameter_X = 16,sp // Parameter 1 address
1078 .save b0, GR_SAVE_B0
1079 mov GR_SAVE_B0=b0 // Save b0
1084 stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
1085 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
1089 stfe [GR_Parameter_Y] = f_pre_result // STORE Parameter 3 on stack
1090 add GR_Parameter_Y = -16,GR_Parameter_Y
1091 br.call.sptk b0=__libm_error_support# // Call error handling function
1095 add GR_Parameter_RESULT = 48,sp
1101 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
1103 add sp = 64,sp // Restore stack pointer
1104 mov b0 = GR_SAVE_B0 // Restore return address
1108 mov gp = GR_SAVE_GP // Restore gp
1109 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
1110 br.ret.sptk b0 // Return
1113 LOCAL_LIBM_END(__libm_error_region)
1116 .type __libm_error_support#,@function
1117 .global __libm_error_support#