src/algorithms/trig_pi.rs

   1 use crate::{
   2     f16::F16,
   3     prim::{PrimFloat, PrimSInt, PrimUInt},
   4     traits::{Compare, Context, ConvertFrom, ConvertTo, Float, Make, Select},
   5 };
   6
   7 mod consts {
   8     #![allow(clippy::excessive_precision)]
   9     #![allow(dead_code)]
  10
  11     /// coefficients of taylor series for `sin(pi * x)` centered at `0`
  12     /// generated using:
  13     /// ```maxima,text
  14     /// fpprec:50$
  15     /// sinpi: bfloat(taylor(sin(%pi*x),x,0,19))$
  16     /// for i: 1 step 2 thru 19 do
  17     ///     printf(true, "pub(crate) const SINPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(sinpi, x, i))))$
  18     /// ```
  19     pub(crate) const SINPI_KERNEL_TAYLOR_1: f64 =
  20         3.1415926535897932384626433832795028841971693993751e0;
  21     pub(crate) const SINPI_KERNEL_TAYLOR_3: f64 =
  22         -5.1677127800499700292460525111835658670375480943142e0;
  23     pub(crate) const SINPI_KERNEL_TAYLOR_5: f64 =
  24         2.550164039877345443856177583695296720669172555234e0;
  25     pub(crate) const SINPI_KERNEL_TAYLOR_7: f64 =
  26         -5.9926452932079207688773938354604004601536358636814e-1;
  27     pub(crate) const SINPI_KERNEL_TAYLOR_9: f64 =
  28         8.2145886611128228798802365523698344807837460797753e-2;
  29     pub(crate) const SINPI_KERNEL_TAYLOR_11: f64 =
  30         -7.370430945714350777259089957290781501211638236021e-3;
  31     pub(crate) const SINPI_KERNEL_TAYLOR_13: f64 =
  32         4.6630280576761256442062891447027174382819981361599e-4;
  33     pub(crate) const SINPI_KERNEL_TAYLOR_15: f64 =
  34         -2.1915353447830215827384652057094188859248708765956e-5;
  35     pub(crate) const SINPI_KERNEL_TAYLOR_17: f64 =
  36         7.9520540014755127847832068624575890327682459384282e-7;
  37     pub(crate) const SINPI_KERNEL_TAYLOR_19: f64 =
  38         -2.2948428997269873110203872385571587856074785581088e-8;
  39
  40     /// coefficients of taylor series for `cos(pi * x)` centered at `0`
  41     /// generated using:
  42     /// ```maxima,text
  43     /// fpprec:50$
  44     /// cospi: bfloat(taylor(cos(%pi*x),x,0,18))$
  45     /// for i: 0 step 2 thru 18 do
  46     ///     printf(true, "pub(crate) const COSPI_KERNEL_TAYLOR_~d: f64 = ~a;~%", i, ssubst("e", "b", string(coeff(cospi, x, i))))$
  47     /// ```
  48     pub(crate) const COSPI_KERNEL_TAYLOR_0: f64 = 1.0e0;
  49     pub(crate) const COSPI_KERNEL_TAYLOR_2: f64 =
  50         -4.9348022005446793094172454999380755676568497036204e0;
  51     pub(crate) const COSPI_KERNEL_TAYLOR_4: f64 =
  52         4.0587121264167682181850138620293796354053160696952e0;
  53     pub(crate) const COSPI_KERNEL_TAYLOR_6: f64 =
  54         -1.3352627688545894958753047828505831928711354556681e0;
  55     pub(crate) const COSPI_KERNEL_TAYLOR_8: f64 =
  56         2.3533063035889320454187935277546542154506893530856e-1;
  57     pub(crate) const COSPI_KERNEL_TAYLOR_10: f64 =
  58         -2.5806891390014060012598294252898849657186441048147e-2;
  59     pub(crate) const COSPI_KERNEL_TAYLOR_12: f64 =
  60         1.9295743094039230479033455636859576401684718150003e-3;
  61     pub(crate) const COSPI_KERNEL_TAYLOR_14: f64 =
  62         -1.0463810492484570711801672835223932761029733149091e-4;
  63     pub(crate) const COSPI_KERNEL_TAYLOR_16: f64 =
  64         4.3030695870329470072978237149669233008960901556009e-6;
  65     pub(crate) const COSPI_KERNEL_TAYLOR_18: f64 =
  66         -1.387895246221377211446808750399309343777037849978e-7;
  67 }
  68
  69 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
  70 /// not guaranteed to give correct sign for zero result
  71 /// has an error of up to 2ULP
  72 pub fn sin_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
  73     let x_sq = x * x;
  74     let mut v: Ctx::VecF16 = ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to());
  75     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
  76     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
  77     v * x
  78 }
  79
  80 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
  81 /// has an error of up to 2ULP
  82 pub fn cos_pi_kernel_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
  83     let x_sq = x * x;
  84     let mut v: Ctx::VecF16 = ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to());
  85     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
  86     v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
  87 }
  88
  89 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
  90 /// not guaranteed to give correct sign for zero result
  91 /// has an error of up to 2ULP
  92 pub fn sin_pi_kernel_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
  93     let x_sq = x * x;
  94     let mut v: Ctx::VecF32 = ctx.make(consts::SINPI_KERNEL_TAYLOR_9.to());
  95     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_7.to()));
  96     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()));
  97     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
  98     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
  99     v * x
 100 }
 101
 102 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
 103 /// has an error of up to 2ULP
 104 pub fn cos_pi_kernel_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
 105     let x_sq = x * x;
 106     let mut v: Ctx::VecF32 = ctx.make(consts::COSPI_KERNEL_TAYLOR_8.to());
 107     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_6.to()));
 108     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()));
 109     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
 110     v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
 111 }
 112
 113 /// computes `sin(pi * x)` for `-0.25 <= x <= 0.25`
 114 /// not guaranteed to give correct sign for zero result
 115 /// has an error of up to 2ULP
 116 pub fn sin_pi_kernel_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 117     let x_sq = x * x;
 118     let mut v: Ctx::VecF64 = ctx.make(consts::SINPI_KERNEL_TAYLOR_15.to());
 119     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_13.to()));
 120     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_11.to()));
 121     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_9.to()));
 122     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_7.to()));
 123     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_5.to()));
 124     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_3.to()));
 125     v = v.mul_add_fast(x_sq, ctx.make(consts::SINPI_KERNEL_TAYLOR_1.to()));
 126     v * x
 127 }
 128
 129 /// computes `cos(pi * x)` for `-0.25 <= x <= 0.25`
 130 /// has an error of up to 2ULP
 131 pub fn cos_pi_kernel_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 132     let x_sq = x * x;
 133     let mut v: Ctx::VecF64 = ctx.make(consts::COSPI_KERNEL_TAYLOR_16.to());
 134     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_14.to()));
 135     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_12.to()));
 136     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_10.to()));
 137     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_8.to()));
 138     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_6.to()));
 139     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_4.to()));
 140     v = v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_2.to()));
 141     v.mul_add_fast(x_sq, ctx.make(consts::COSPI_KERNEL_TAYLOR_0.to()))
 142 }
 143
 144 /// computes `(sin(pi * x), cos(pi * x))`
 145 /// not guaranteed to give correct sign for zero results
 146 /// inherits error from `sin_pi_kernel` and `cos_pi_kernel`
 147 pub fn sin_cos_pi_impl<
 148     Ctx: Context,
 149     VecF: Float<PrimFloat = PrimF> + Make<Context = Ctx>,
 150     PrimF: PrimFloat<BitsType = PrimU>,
 151     PrimU: PrimUInt,
 152     SinPiKernel: FnOnce(Ctx, VecF) -> VecF,
 153     CosPiKernel: FnOnce(Ctx, VecF) -> VecF,
 154 >(
 155     ctx: Ctx,
 156     x: VecF,
 157     sin_pi_kernel: SinPiKernel,
 158     cos_pi_kernel: CosPiKernel,
 159 ) -> (VecF, VecF) {
 160     let two_f: VecF = ctx.make(2.0.to());
 161     let one_half: VecF = ctx.make(0.5.to());
 162     let max_contiguous_integer: VecF =
 163         ctx.make((PrimU::cvt_from(1) << (PrimF::MANTISSA_FIELD_WIDTH + 1.to())).to());
 164     // if `x` is finite and bigger than `max_contiguous_integer`, then x is an even integer
 165     let in_range = x.abs().lt(max_contiguous_integer); // use `lt` so nans are counted as out-of-range
 166     let is_finite = x.is_finite();
 167     let nan: VecF = ctx.make(f32::NAN.to());
 168     let zero_f: VecF = ctx.make(0.to());
 169     let one_f: VecF = ctx.make(1.to());
 170     let zero_i: VecF::SignedBitsType = ctx.make(0.to());
 171     let one_i: VecF::SignedBitsType = ctx.make(1.to());
 172     let two_i: VecF::SignedBitsType = ctx.make(2.to());
 173     let out_of_range_sin = is_finite.select(zero_f, nan);
 174     let out_of_range_cos = is_finite.select(one_f, nan);
 175     let xi = (x * two_f).round();
 176     let xk = x - xi * one_half;
 177     let sk = sin_pi_kernel(ctx, xk);
 178     let ck = cos_pi_kernel(ctx, xk);
 179     let xi = VecF::SignedBitsType::cvt_from(xi);
 180     let bit_0_clear = (xi & one_i).eq(zero_i);
 181     let st = bit_0_clear.select(sk, ck);
 182     let ct = bit_0_clear.select(ck, sk);
 183     let s = (xi & two_i).eq(zero_i).select(st, -st);
 184     let c = ((xi + one_i) & two_i).eq(zero_i).select(ct, -ct);
 185     (
 186         in_range.select(s, out_of_range_sin),
 187         in_range.select(c, out_of_range_cos),
 188     )
 189 }
 190
 191 /// computes `(sin(pi * x), cos(pi * x))`
 192 /// not guaranteed to give correct sign for zero results
 193 /// has an error of up to 2ULP
 194 pub fn sin_cos_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> (Ctx::VecF16, Ctx::VecF16) {
 195     sin_cos_pi_impl(ctx, x, sin_pi_kernel_f16, cos_pi_kernel_f16)
 196 }
 197
 198 /// computes `sin(pi * x)`
 199 /// not guaranteed to give correct sign for zero results
 200 /// has an error of up to 2ULP
 201 pub fn sin_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
 202     sin_cos_pi_f16(ctx, x).0
 203 }
 204
 205 /// computes `cos(pi * x)`
 206 /// not guaranteed to give correct sign for zero results
 207 /// has an error of up to 2ULP
 208 pub fn cos_pi_f16<Ctx: Context>(ctx: Ctx, x: Ctx::VecF16) -> Ctx::VecF16 {
 209     sin_cos_pi_f16(ctx, x).1
 210 }
 211
 212 /// computes `(sin(pi * x), cos(pi * x))`
 213 /// not guaranteed to give correct sign for zero results
 214 /// has an error of up to 2ULP
 215 pub fn sin_cos_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> (Ctx::VecF32, Ctx::VecF32) {
 216     sin_cos_pi_impl(ctx, x, sin_pi_kernel_f32, cos_pi_kernel_f32)
 217 }
 218
 219 /// computes `sin(pi * x)`
 220 /// not guaranteed to give correct sign for zero results
 221 /// has an error of up to 2ULP
 222 pub fn sin_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
 223     sin_cos_pi_f32(ctx, x).0
 224 }
 225
 226 /// computes `cos(pi * x)`
 227 /// not guaranteed to give correct sign for zero results
 228 /// has an error of up to 2ULP
 229 pub fn cos_pi_f32<Ctx: Context>(ctx: Ctx, x: Ctx::VecF32) -> Ctx::VecF32 {
 230     sin_cos_pi_f32(ctx, x).1
 231 }
 232
 233 /// computes `(sin(pi * x), cos(pi * x))`
 234 /// not guaranteed to give correct sign for zero results
 235 /// has an error of up to 2ULP
 236 pub fn sin_cos_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> (Ctx::VecF64, Ctx::VecF64) {
 237     sin_cos_pi_impl(ctx, x, sin_pi_kernel_f64, cos_pi_kernel_f64)
 238 }
 239
 240 /// computes `sin(pi * x)`
 241 /// not guaranteed to give correct sign for zero results
 242 /// has an error of up to 2ULP
 243 pub fn sin_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 244     sin_cos_pi_f64(ctx, x).0
 245 }
 246
 247 /// computes `cos(pi * x)`
 248 /// not guaranteed to give correct sign for zero results
 249 /// has an error of up to 2ULP
 250 pub fn cos_pi_f64<Ctx: Context>(ctx: Ctx, x: Ctx::VecF64) -> Ctx::VecF64 {
 251     sin_cos_pi_f64(ctx, x).1
 252 }
 253
 254 #[cfg(test)]
 255 mod tests {
 256     use super::*;
 257     use crate::{
 258         f16::F16,
 259         scalar::{Scalar, Value},
 260     };
 261     use std::f64;
 262
 263     struct CheckUlpCallbackArg<F, I> {
 264         distance_in_ulp: I,
 265         x: F,
 266         expected: F,
 267         result: F,
 268     }
 269
 270     #[track_caller]
 271     fn check_ulp<T: PrimFloat>(
 272         x: T,
 273         is_ok: impl Fn(CheckUlpCallbackArg<T, u64>) -> bool,
 274         fn_f16: impl Fn(T) -> T,
 275         fn_reference: impl Fn(f64) -> f64,
 276     ) {
 277         let x_f64: f64 = x.to();
 278         let expected_f64 = fn_reference(x_f64);
 279         let expected: T = expected_f64.to();
 280         let result = fn_f16(x);
 281         if result == expected {
 282             return;
 283         }
 284         if result.is_nan() && expected.is_nan() {
 285             return;
 286         }
 287         let expected_bits: i64 = expected.to_bits().to();
 288         let result_bits: i64 = result.to_bits().to();
 289         let distance_in_ulp = (expected_bits - result_bits).unsigned_abs();
 290         if !result.is_nan()
 291             && !expected.is_nan()
 292             && is_ok(CheckUlpCallbackArg {
 293                 distance_in_ulp,
 294                 x,
 295                 expected,
 296                 result,
 297             })
 298         {
 299             return;
 300         }
 301         panic!(
 302             "error is too big: \
 303                 x = {x:?} {x_bits:#X}, \
 304                 result = {result:?} {result_bits:#X}, \
 305                 expected = {expected:?} {expected_bits:#X}, \
 306                 distance_in_ulp = {distance_in_ulp}",
 307             x = x,
 308             x_bits = x.to_bits(),
 309             result = result,
 310             result_bits = result.to_bits(),
 311             expected = expected,
 312             expected_bits = expected.to_bits(),
 313             distance_in_ulp = distance_in_ulp,
 314         );
 315     }
 316
 317     #[test]
 318     #[cfg_attr(
 319         not(feature = "f16"),
 320         should_panic(expected = "f16 feature is not enabled")
 321     )]
 322     fn test_sin_pi_kernel_f16() {
 323         let check = |x| {
 324             check_ulp(
 325                 x,
 326                 |arg| arg.distance_in_ulp <= if arg.expected == 0.to() { 0 } else { 2 },
 327                 |x| sin_pi_kernel_f16(Scalar, Value(x)).0,
 328                 |x| (f64::consts::PI * x).sin(),
 329             )
 330         };
 331         let quarter = F16::to_bits(0.25f32.to());
 332         for bits in (0..=quarter).rev() {
 333             check(F16::from_bits(bits));
 334             check(-F16::from_bits(bits));
 335         }
 336     }
 337
 338     #[test]
 339     #[cfg_attr(
 340         not(feature = "f16"),
 341         should_panic(expected = "f16 feature is not enabled")
 342     )]
 343     fn test_cos_pi_kernel_f16() {
 344         let check = |x| {
 345             check_ulp(
 346                 x,
 347                 |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.to(),
 348                 |x| cos_pi_kernel_f16(Scalar, Value(x)).0,
 349                 |x| (f64::consts::PI * x).cos(),
 350             )
 351         };
 352         let quarter = F16::to_bits(0.25f32.to());
 353         for bits in (0..=quarter).rev() {
 354             check(F16::from_bits(bits));
 355             check(-F16::from_bits(bits));
 356         }
 357     }
 358
 359     #[test]
 360     #[cfg(feature = "full_tests")]
 361     fn test_sin_pi_kernel_f32() {
 362         let check = |x| {
 363             check_ulp(
 364                 x,
 365                 |arg| arg.distance_in_ulp <= if arg.expected == 0. { 0 } else { 2 },
 366                 |x| sin_pi_kernel_f32(Scalar, Value(x)).0,
 367                 |x| (f64::consts::PI * x).sin(),
 368             )
 369         };
 370         let quarter = 0.25f32.to_bits();
 371         for bits in (0..=quarter).rev() {
 372             check(f32::from_bits(bits));
 373             check(-f32::from_bits(bits));
 374         }
 375     }
 376
 377     #[test]
 378     #[cfg(feature = "full_tests")]
 379     fn test_cos_pi_kernel_f32() {
 380         let check = |x| {
 381             check_ulp(
 382                 x,
 383                 |arg| arg.distance_in_ulp <= 2 && arg.result <= 1.,
 384                 |x| cos_pi_kernel_f32(Scalar, Value(x)).0,
 385                 |x| (f64::consts::PI * x).cos(),
 386             )
 387         };
 388         let quarter = 0.25f32.to_bits();
 389         for bits in (0..=quarter).rev() {
 390             check(f32::from_bits(bits));
 391             check(-f32::from_bits(bits));
 392         }
 393     }
 394
 395     #[test]
 396     #[cfg(feature = "full_tests")]
 397     fn test_sin_pi_kernel_f64() {
 398         let check = |x| {
 399             check_ulp(
 400                 x,
 401                 sin_cos_pi_check_ulp_callback,
 402                 |x| sin_pi_kernel_f64(Scalar, Value(x)).0,
 403                 |x| reference_sin_cos_pi_f64(x).0,
 404             )
 405         };
 406         let quarter = 0.25f32.to_bits();
 407         for bits in (0..=quarter).rev().step_by(1 << 5) {
 408             check(f32::from_bits(bits) as f64);
 409             check(-f32::from_bits(bits) as f64);
 410         }
 411     }
 412
 413     #[test]
 414     #[cfg(feature = "full_tests")]
 415     fn test_cos_pi_kernel_f64() {
 416         let check = |x| {
 417             check_ulp(
 418                 x,
 419                 sin_cos_pi_check_ulp_callback,
 420                 |x| cos_pi_kernel_f64(Scalar, Value(x)).0,
 421                 |x| reference_sin_cos_pi_f64(x).1,
 422             )
 423         };
 424         let quarter = 0.25f32.to_bits();
 425         for bits in (0..=quarter).rev().step_by(1 << 5) {
 426             check(f32::from_bits(bits) as f64);
 427             check(-f32::from_bits(bits) as f64);
 428         }
 429     }
 430
 431     fn sin_cos_pi_check_ulp_callback<F: PrimFloat>(arg: CheckUlpCallbackArg<F, u64>) -> bool {
 432         if arg.x % 0.5.to() == 0.0.to() {
 433             arg.distance_in_ulp == 0
 434         } else {
 435             arg.distance_in_ulp <= 2 && arg.result.abs() <= 1.to()
 436         }
 437     }
 438
 439     #[test]
 440     #[cfg_attr(
 441         not(feature = "f16"),
 442         should_panic(expected = "f16 feature is not enabled")
 443     )]
 444     fn test_sin_pi_f16() {
 445         for bits in 0..=u16::MAX {
 446             check_ulp(
 447                 F16::from_bits(bits),
 448                 sin_cos_pi_check_ulp_callback,
 449                 |x| sin_pi_f16(Scalar, Value(x)).0,
 450                 |x| (f64::consts::PI * x).sin(),
 451             );
 452         }
 453     }
 454
 455     #[test]
 456     #[cfg_attr(
 457         not(feature = "f16"),
 458         should_panic(expected = "f16 feature is not enabled")
 459     )]
 460     fn test_cos_pi_f16() {
 461         for bits in 0..=u16::MAX {
 462             check_ulp(
 463                 F16::from_bits(bits),
 464                 sin_cos_pi_check_ulp_callback,
 465                 |x| cos_pi_f16(Scalar, Value(x)).0,
 466                 |x| (f64::consts::PI * x).cos(),
 467             );
 468         }
 469     }
 470
 471     fn reference_sin_cos_pi_f32(mut v: f64) -> (f64, f64) {
 472         if !v.is_finite() {
 473             return (f64::NAN, f64::NAN);
 474         }
 475         v %= 2.0;
 476         if v >= 1.0 {
 477             v -= 2.0;
 478         } else if v <= -1.0 {
 479             v += 2.0;
 480         }
 481         v *= 2.0;
 482         let part = v.round() as i32;
 483         v -= part as f64;
 484         v *= f64::consts::PI / 2.0;
 485         let (sin, cos) = v.sin_cos();
 486         match part {
 487             0 => (sin, cos),
 488             1 => (cos, -sin),
 489             2 => (-sin, -cos),
 490             -2 => (-sin, -cos),
 491             -1 => (-cos, sin),
 492             _ => panic!("not implemented: part={}", part),
 493         }
 494     }
 495
 496     fn reference_sin_cos_pi_f64(mut v: f64) -> (f64, f64) {
 497         use az::Cast;
 498         use rug::{float::Constant, Float};
 499         if !v.is_finite() {
 500             return (f64::NAN, f64::NAN);
 501         }
 502         v %= 2.0;
 503         if v >= 1.0 {
 504             v -= 2.0;
 505         } else if v <= -1.0 {
 506             v += 2.0;
 507         }
 508         v *= 2.0;
 509         let part = v.round() as i32;
 510         v -= part as f64;
 511         let precision = 100;
 512         let mut v = Float::with_val(precision, v);
 513         let pi = Float::with_val(precision, Constant::Pi);
 514         let pi_2 = pi / 2;
 515         v *= &pi_2;
 516         let cos = pi_2; // just a temp var, value is ignored
 517         let (sin, cos) = v.sin_cos(cos);
 518         let sin: f64 = sin.cast();
 519         let cos: f64 = cos.cast();
 520         match part {
 521             0 => (sin, cos),
 522             1 => (cos, -sin),
 523             2 => (-sin, -cos),
 524             -2 => (-sin, -cos),
 525             -1 => (-cos, sin),
 526             _ => panic!("not implemented: part={}", part),
 527         }
 528     }
 529
 530     macro_rules! test_reference_sin_cos_pi_test_cases {
 531         ($case:expr, $ty:ident) => {
 532             $case($ty::NAN, $ty::NAN, $ty::NAN);
 533             $case($ty::INFINITY, $ty::NAN, $ty::NAN);
 534             $case(-$ty::INFINITY, $ty::NAN, $ty::NAN);
 535             $case(-4., 0., 1.);
 536             $case(
 537                 -3.875,
 538                 0.38268343236508977172845998403039886676134456248563,
 539                 0.92387953251128675612818318939678828682241662586364,
 540             );
 541             $case(
 542                 -3.75,
 543                 0.70710678118654752440084436210484903928483593768847,
 544                 0.70710678118654752440084436210484903928483593768847,
 545             );
 546             $case(
 547                 -3.625,
 548                 0.92387953251128675612818318939678828682241662586364,
 549                 0.38268343236508977172845998403039886676134456248563,
 550             );
 551             $case(-3.5, 1., -0.);
 552             $case(
 553                 -3.375,
 554                 0.92387953251128675612818318939678828682241662586364,
 555                 -0.38268343236508977172845998403039886676134456248563,
 556             );
 557             $case(
 558                 -3.25,
 559                 0.70710678118654752440084436210484903928483593768847,
 560                 -0.70710678118654752440084436210484903928483593768847,
 561             );
 562             $case(
 563                 -3.125,
 564                 0.38268343236508977172845998403039886676134456248563,
 565                 -0.92387953251128675612818318939678828682241662586364,
 566             );
 567             $case(-3., -0., -1.);
 568             $case(
 569                 -2.875,
 570                 -0.38268343236508977172845998403039886676134456248563,
 571                 -0.92387953251128675612818318939678828682241662586364,
 572             );
 573             $case(
 574                 -2.75,
 575                 -0.70710678118654752440084436210484903928483593768847,
 576                 -0.70710678118654752440084436210484903928483593768847,
 577             );
 578             $case(
 579                 -2.625,
 580                 -0.92387953251128675612818318939678828682241662586364,
 581                 -0.38268343236508977172845998403039886676134456248563,
 582             );
 583             $case(-2.5, -1., 0.);
 584             $case(
 585                 -2.375,
 586                 -0.92387953251128675612818318939678828682241662586364,
 587                 0.38268343236508977172845998403039886676134456248563,
 588             );
 589             $case(
 590                 -2.25,
 591                 -0.70710678118654752440084436210484903928483593768847,
 592                 0.70710678118654752440084436210484903928483593768847,
 593             );
 594             $case(
 595                 -2.125,
 596                 -0.38268343236508977172845998403039886676134456248563,
 597                 0.92387953251128675612818318939678828682241662586364,
 598             );
 599             $case(-2., 0., 1.);
 600             $case(
 601                 -1.875,
 602                 0.38268343236508977172845998403039886676134456248563,
 603                 0.92387953251128675612818318939678828682241662586364,
 604             );
 605             $case(
 606                 -1.75,
 607                 0.70710678118654752440084436210484903928483593768847,
 608                 0.70710678118654752440084436210484903928483593768847,
 609             );
 610             $case(
 611                 -1.625,
 612                 0.92387953251128675612818318939678828682241662586364,
 613                 0.38268343236508977172845998403039886676134456248563,
 614             );
 615             $case(-1.5, 1., -0.);
 616             $case(
 617                 -1.375,
 618                 0.92387953251128675612818318939678828682241662586364,
 619                 -0.38268343236508977172845998403039886676134456248563,
 620             );
 621             $case(
 622                 -1.25,
 623                 0.70710678118654752440084436210484903928483593768847,
 624                 -0.70710678118654752440084436210484903928483593768847,
 625             );
 626             $case(
 627                 -1.125,
 628                 0.38268343236508977172845998403039886676134456248563,
 629                 -0.92387953251128675612818318939678828682241662586364,
 630             );
 631             $case(-1., -0., -1.);
 632             $case(
 633                 -0.875,
 634                 -0.38268343236508977172845998403039886676134456248563,
 635                 -0.92387953251128675612818318939678828682241662586364,
 636             );
 637             $case(
 638                 -0.75,
 639                 -0.70710678118654752440084436210484903928483593768847,
 640                 -0.70710678118654752440084436210484903928483593768847,
 641             );
 642             $case(
 643                 -0.625,
 644                 -0.92387953251128675612818318939678828682241662586364,
 645                 -0.38268343236508977172845998403039886676134456248563,
 646             );
 647             $case(-0.5, -1., 0.);
 648             $case(
 649                 -0.375,
 650                 -0.92387953251128675612818318939678828682241662586364,
 651                 0.38268343236508977172845998403039886676134456248563,
 652             );
 653             $case(
 654                 -0.25,
 655                 -0.70710678118654752440084436210484903928483593768847,
 656                 0.70710678118654752440084436210484903928483593768847,
 657             );
 658             $case(
 659                 -0.125,
 660                 -0.38268343236508977172845998403039886676134456248563,
 661                 0.92387953251128675612818318939678828682241662586364,
 662             );
 663             $case(0., 0., 1.);
 664             $case(
 665                 0.125,
 666                 0.38268343236508977172845998403039886676134456248563,
 667                 0.92387953251128675612818318939678828682241662586364,
 668             );
 669             $case(
 670                 0.25,
 671                 0.70710678118654752440084436210484903928483593768847,
 672                 0.70710678118654752440084436210484903928483593768847,
 673             );
 674             $case(
 675                 0.375,
 676                 0.92387953251128675612818318939678828682241662586364,
 677                 0.38268343236508977172845998403039886676134456248563,
 678             );
 679             $case(0.5, 1., 0.);
 680             $case(
 681                 0.625,
 682                 0.92387953251128675612818318939678828682241662586364,
 683                 -0.38268343236508977172845998403039886676134456248563,
 684             );
 685             $case(
 686                 0.75,
 687                 0.70710678118654752440084436210484903928483593768847,
 688                 -0.70710678118654752440084436210484903928483593768847,
 689             );
 690             $case(
 691                 0.875,
 692                 0.38268343236508977172845998403039886676134456248563,
 693                 -0.92387953251128675612818318939678828682241662586364,
 694             );
 695             $case(1., 0., -1.);
 696             $case(
 697                 1.125,
 698                 -0.38268343236508977172845998403039886676134456248563,
 699                 -0.92387953251128675612818318939678828682241662586364,
 700             );
 701             $case(
 702                 1.25,
 703                 -0.70710678118654752440084436210484903928483593768847,
 704                 -0.70710678118654752440084436210484903928483593768847,
 705             );
 706             $case(
 707                 1.375,
 708                 -0.92387953251128675612818318939678828682241662586364,
 709                 -0.38268343236508977172845998403039886676134456248563,
 710             );
 711             $case(1.5, -1., -0.);
 712             $case(
 713                 1.625,
 714                 -0.92387953251128675612818318939678828682241662586364,
 715                 0.38268343236508977172845998403039886676134456248563,
 716             );
 717             $case(
 718                 1.75,
 719                 -0.70710678118654752440084436210484903928483593768847,
 720                 0.70710678118654752440084436210484903928483593768847,
 721             );
 722             $case(
 723                 1.875,
 724                 -0.38268343236508977172845998403039886676134456248563,
 725                 0.92387953251128675612818318939678828682241662586364,
 726             );
 727             $case(2., -0., 1.);
 728             $case(
 729                 2.125,
 730                 0.38268343236508977172845998403039886676134456248563,
 731                 0.92387953251128675612818318939678828682241662586364,
 732             );
 733             $case(
 734                 2.25,
 735                 0.70710678118654752440084436210484903928483593768847,
 736                 0.70710678118654752440084436210484903928483593768847,
 737             );
 738             $case(
 739                 2.375,
 740                 0.92387953251128675612818318939678828682241662586364,
 741                 0.38268343236508977172845998403039886676134456248563,
 742             );
 743             $case(2.5, 1., 0.);
 744             $case(
 745                 2.625,
 746                 0.92387953251128675612818318939678828682241662586364,
 747                 -0.38268343236508977172845998403039886676134456248563,
 748             );
 749             $case(
 750                 2.75,
 751                 0.70710678118654752440084436210484903928483593768847,
 752                 -0.70710678118654752440084436210484903928483593768847,
 753             );
 754             $case(
 755                 2.875,
 756                 0.38268343236508977172845998403039886676134456248563,
 757                 -0.92387953251128675612818318939678828682241662586364,
 758             );
 759             $case(3., 0., -1.);
 760             $case(
 761                 3.125,
 762                 -0.38268343236508977172845998403039886676134456248563,
 763                 -0.92387953251128675612818318939678828682241662586364,
 764             );
 765             $case(
 766                 3.25,
 767                 -0.70710678118654752440084436210484903928483593768847,
 768                 -0.70710678118654752440084436210484903928483593768847,
 769             );
 770             $case(
 771                 3.375,
 772                 -0.92387953251128675612818318939678828682241662586364,
 773                 -0.38268343236508977172845998403039886676134456248563,
 774             );
 775             $case(3.5, -1., -0.);
 776             $case(
 777                 3.625,
 778                 -0.92387953251128675612818318939678828682241662586364,
 779                 0.38268343236508977172845998403039886676134456248563,
 780             );
 781             $case(
 782                 3.75,
 783                 -0.70710678118654752440084436210484903928483593768847,
 784                 0.70710678118654752440084436210484903928483593768847,
 785             );
 786             $case(
 787                 3.875,
 788                 -0.38268343236508977172845998403039886676134456248563,
 789                 0.92387953251128675612818318939678828682241662586364,
 790             );
 791             $case(4., -0., 1.);
 792         };
 793     }
 794
 795     #[test]
 796     fn test_reference_sin_cos_pi_f32() {
 797         fn approx_same(a: f32, b: f32) -> bool {
 798             if a.is_finite() && b.is_finite() {
 799                 (a - b).abs() < 1e-6
 800             } else {
 801                 a == b || (a.is_nan() && b.is_nan())
 802             }
 803         }
 804         #[track_caller]
 805         fn case(x: f32, expected_sin: f32, expected_cos: f32) {
 806             let (ref_sin, ref_cos) = reference_sin_cos_pi_f32(x as f64);
 807             assert!(
 808                 approx_same(ref_sin as f32, expected_sin)
 809                     && approx_same(ref_cos as f32, expected_cos),
 810                 "case failed: x={x}, expected_sin={expected_sin}, expected_cos={expected_cos}, ref_sin={ref_sin}, ref_cos={ref_cos}",
 811                 x=x,
 812                 expected_sin=expected_sin,
 813                 expected_cos=expected_cos,
 814                 ref_sin=ref_sin,
 815                 ref_cos=ref_cos,
 816             );
 817         }
 818         test_reference_sin_cos_pi_test_cases!(case, f32);
 819     }
 820
 821     #[test]
 822     fn test_reference_sin_cos_pi_f64() {
 823         fn same(a: f64, b: f64) -> bool {
 824             if a.is_finite() && b.is_finite() {
 825                 a == b
 826             } else {
 827                 a == b || (a.is_nan() && b.is_nan())
 828             }
 829         }
 830         #[track_caller]
 831         fn case(x: f64, expected_sin: f64, expected_cos: f64) {
 832             let (ref_sin, ref_cos) = reference_sin_cos_pi_f64(x);
 833             assert!(
 834                 same(ref_sin, expected_sin) && same(ref_cos, expected_cos),
 835                 "case failed: x={x}, expected_sin={expected_sin}, expected_cos={expected_cos}, ref_sin={ref_sin}, ref_cos={ref_cos}",
 836                 x=x,
 837                 expected_sin=expected_sin,
 838                 expected_cos=expected_cos,
 839                 ref_sin=ref_sin,
 840                 ref_cos=ref_cos,
 841             );
 842         }
 843         test_reference_sin_cos_pi_test_cases!(case, f64);
 844     }
 845
 846     #[test]
 847     #[cfg(feature = "full_tests")]
 848     fn test_sin_pi_f32() {
 849         for bits in 0..=u32::MAX {
 850             check_ulp(
 851                 f32::from_bits(bits),
 852                 sin_cos_pi_check_ulp_callback,
 853                 |x| sin_pi_f32(Scalar, Value(x)).0,
 854                 |x| reference_sin_cos_pi_f32(x).0,
 855             );
 856         }
 857     }
 858
 859     #[test]
 860     #[cfg(feature = "full_tests")]
 861     fn test_cos_pi_f32() {
 862         for bits in 0..=u32::MAX {
 863             check_ulp(
 864                 f32::from_bits(bits),
 865                 sin_cos_pi_check_ulp_callback,
 866                 |x| cos_pi_f32(Scalar, Value(x)).0,
 867                 |x| reference_sin_cos_pi_f32(x).1,
 868             );
 869         }
 870     }
 871
 872     #[test]
 873     #[cfg(feature = "full_tests")]
 874     fn test_sin_pi_f64() {
 875         for bits in (0..=u32::MAX).step_by(1 << 7) {
 876             check_ulp(
 877                 f32::from_bits(bits) as f64,
 878                 sin_cos_pi_check_ulp_callback,
 879                 |x| sin_pi_f64(Scalar, Value(x)).0,
 880                 |x| reference_sin_cos_pi_f64(x).0,
 881             );
 882         }
 883     }
 884
 885     #[test]
 886     #[cfg(feature = "full_tests")]
 887     fn test_cos_pi_f64() {
 888         for bits in (0..=u32::MAX).step_by(1 << 7) {
 889             check_ulp(
 890                 f32::from_bits(bits) as f64,
 891                 sin_cos_pi_check_ulp_callback,
 892                 |x| cos_pi_f64(Scalar, Value(x)).0,
 893                 |x| reference_sin_cos_pi_f64(x).1,
 894             )
 895         }
 896     }
 897 }