diff options
| author | Trevor Gross <tmgross@umich.edu> | 2025-04-29 20:52:54 +0000 |
|---|---|---|
| committer | Trevor Gross <t.gross35@gmail.com> | 2025-04-29 19:09:50 -0400 |
| commit | caf337d46742aca1705c3254e8d45d7238a8435c (patch) | |
| tree | 3b70b0470898c6c7cda46c301d60e20f15c3d210 | |
| parent | 6d83a3226f1401d777da0a91019caf2c3191b685 (diff) | |
| download | rust-caf337d46742aca1705c3254e8d45d7238a8435c.tar.gz rust-caf337d46742aca1705c3254e8d45d7238a8435c.zip | |
Refactor the fma modules
Move implementations to `generic/` like the other functions. This also allows us to combine the `fma` and `fma_wide` modules.
6 files changed, 179 insertions, 175 deletions
diff --git a/library/compiler-builtins/etc/function-definitions.json b/library/compiler-builtins/etc/function-definitions.json index 3e33343c464..9e5774eaf0a 100644 --- a/library/compiler-builtins/etc/function-definitions.json +++ b/library/compiler-builtins/etc/function-definitions.json @@ -350,7 +350,7 @@ "fmaf": { "sources": [ "libm/src/math/arch/aarch64.rs", - "libm/src/math/fma_wide.rs" + "libm/src/math/fma.rs" ], "type": "f32" }, diff --git a/library/compiler-builtins/libm/src/math/fma.rs b/library/compiler-builtins/libm/src/math/fma.rs new file mode 100644 index 00000000000..78f0f8992ea --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fma.rs @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fma.c, fmaf.c Ported to generic Rust algorithm in 2025, TG. */ + +use super::generic; +use crate::support::Round; + +// Placeholder so we can have `fmaf16` in the `Float` trait. +#[allow(unused)] +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { + unimplemented!() +} + +/// Floating multiply add (f32) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + select_implementation! { + name: fmaf, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + + generic::fma_wide_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f64) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + select_implementation! { + name: fma, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + + generic::fma_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f128) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { + generic::fma_round(x, y, z, Round::Nearest).val +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{CastFrom, CastInto, Float, FpResult, HInt, MinInt, Round, Status}; + + /// Test the generic `fma_round` algorithm for a given float. + fn spec_test<F>(f: impl Fn(F, F, F) -> F) + where + F: Float, + F: CastFrom<F::SignedInt>, + F: CastFrom<i8>, + F::Int: HInt, + u32: CastInto<F::Int>, + { + let x = F::from_bits(F::Int::ONE); + let y = F::from_bits(F::Int::ONE); + let z = F::ZERO; + + // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of + // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the + // exact result" + assert_biteq!(f(x, y, z), F::ZERO); + assert_biteq!(f(x, -y, z), F::NEG_ZERO); + assert_biteq!(f(-x, y, z), F::NEG_ZERO); + assert_biteq!(f(-x, -y, z), F::ZERO); + } + + #[test] + fn spec_test_f32() { + spec_test::<f32>(fmaf); + + // Also do a small check that the non-widening version works for f32 (this should ideally + // get tested some more). + spec_test::<f32>(|x, y, z| generic::fma_round(x, y, z, Round::Nearest).val); + } + + #[test] + fn spec_test_f64() { + spec_test::<f64>(fma); + + let expect_underflow = [ + ( + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("0x1.ffffffffffffp-1023"), + hf64!("0x0.ffffffffffff8p-1022"), + ), + ( + // FIXME: we raise underflow but this should only be inexact (based on C and + // `rustc_apfloat`). + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("-0x1.0p-1022"), + hf64!("-0x1.0p-1022"), + ), + ]; + + for (x, y, z, res) in expect_underflow { + let FpResult { val, status } = generic::fma_round(x, y, z, Round::Nearest); + assert_biteq!(val, res); + assert_eq!(status, Status::UNDERFLOW); + } + } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::<f128>(fmaf128); + } + + #[test] + fn issue_263() { + let a = f32::from_bits(1266679807); + let b = f32::from_bits(1300234242); + let c = f32::from_bits(1115553792); + let expected = f32::from_bits(1501560833); + assert_eq!(fmaf(a, b, c), expected); + } + + #[test] + fn fma_segfault() { + // These two inputs cause fma to segfault on release due to overflow: + assert_eq!( + fma( + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313 + ), + -0.00000000000000022204460492503126, + ); + + let result = fma(-0.992, -0.992, -0.992); + //force rounding to storage format on x87 to prevent superious errors. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); + assert_eq!(result, -0.007936000000000007,); + } + + #[test] + fn fma_sbb() { + assert_eq!( + fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), + -3991680619069439e277 + ); + } + + #[test] + fn fma_underflow() { + assert_eq!( + fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), + 0.0, + ); + } +} diff --git a/library/compiler-builtins/libm/src/math/generic/fma.rs b/library/compiler-builtins/libm/src/math/generic/fma.rs index 8856e63f596..aaf459d1b61 100644 --- a/library/compiler-builtins/libm/src/math/generic/fma.rs +++ b/library/compiler-builtins/libm/src/math/generic/fma.rs @@ -1,31 +1,9 @@ /* SPDX-License-Identifier: MIT */ /* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ -use super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; -use super::{CastFrom, CastInto, Float, Int, MinInt}; - -/// Fused multiply add (f64) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fma(x: f64, y: f64, z: f64) -> f64 { - select_implementation! { - name: fma, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), - args: x, y, z, - } - - fma_round(x, y, z, Round::Nearest).val -} - -/// Fused multiply add (f128) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { - fma_round(x, y, z, Round::Nearest).val -} +use crate::support::{ + CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status, +}; /// Fused multiply-add that works when there is not a larger float size available. Computes /// `(x * y) + z`. @@ -234,7 +212,7 @@ where } // Use our exponent to scale the final value. - FpResult::new(super::generic::scalbn(r, e), status) + FpResult::new(super::scalbn(r, e), status) } /// Representation of `F` that has handled subnormals. @@ -298,106 +276,3 @@ impl<F: Float> Norm<F> { self.e > Self::ZERO_INF_NAN as i32 } } - -#[cfg(test)] -mod tests { - use super::*; - - /// Test the generic `fma_round` algorithm for a given float. - fn spec_test<F>() - where - F: Float, - F: CastFrom<F::SignedInt>, - F: CastFrom<i8>, - F::Int: HInt, - u32: CastInto<F::Int>, - { - let x = F::from_bits(F::Int::ONE); - let y = F::from_bits(F::Int::ONE); - let z = F::ZERO; - - let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val; - - // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of - // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the - // exact result" - assert_biteq!(fma(x, y, z), F::ZERO); - assert_biteq!(fma(x, -y, z), F::NEG_ZERO); - assert_biteq!(fma(-x, y, z), F::NEG_ZERO); - assert_biteq!(fma(-x, -y, z), F::ZERO); - } - - #[test] - fn spec_test_f32() { - spec_test::<f32>(); - } - - #[test] - fn spec_test_f64() { - spec_test::<f64>(); - - let expect_underflow = [ - ( - hf64!("0x1.0p-1070"), - hf64!("0x1.0p-1070"), - hf64!("0x1.ffffffffffffp-1023"), - hf64!("0x0.ffffffffffff8p-1022"), - ), - ( - // FIXME: we raise underflow but this should only be inexact (based on C and - // `rustc_apfloat`). - hf64!("0x1.0p-1070"), - hf64!("0x1.0p-1070"), - hf64!("-0x1.0p-1022"), - hf64!("-0x1.0p-1022"), - ), - ]; - - for (x, y, z, res) in expect_underflow { - let FpResult { val, status } = fma_round(x, y, z, Round::Nearest); - assert_biteq!(val, res); - assert_eq!(status, Status::UNDERFLOW); - } - } - - #[test] - #[cfg(f128_enabled)] - fn spec_test_f128() { - spec_test::<f128>(); - } - - #[test] - fn fma_segfault() { - // These two inputs cause fma to segfault on release due to overflow: - assert_eq!( - fma( - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313 - ), - -0.00000000000000022204460492503126, - ); - - let result = fma(-0.992, -0.992, -0.992); - //force rounding to storage format on x87 to prevent superious errors. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let result = force_eval!(result); - assert_eq!(result, -0.007936000000000007,); - } - - #[test] - fn fma_sbb() { - assert_eq!( - fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), - -3991680619069439e277 - ); - } - - #[test] - fn fma_underflow() { - assert_eq!( - fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), - 0.0, - ); - } -} diff --git a/library/compiler-builtins/libm/src/math/generic/fma_wide.rs b/library/compiler-builtins/libm/src/math/generic/fma_wide.rs index f268c2f144c..a2ef59d3e3d 100644 --- a/library/compiler-builtins/libm/src/math/generic/fma_wide.rs +++ b/library/compiler-builtins/libm/src/math/generic/fma_wide.rs @@ -1,30 +1,6 @@ -/* SPDX-License-Identifier: MIT */ -/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */ - -use super::support::{FpResult, IntTy, Round, Status}; -use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt}; - -// Placeholder so we can have `fmaf16` in the `Float` trait. -#[allow(unused)] -#[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { - unimplemented!() -} - -/// Floating multiply add (f32) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { - select_implementation! { - name: fmaf, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), - args: x, y, z, - } - - fma_wide_round(x, y, z, Round::Nearest).val -} +use crate::support::{ + CastFrom, CastInto, DFloat, Float, FpResult, HFloat, IntTy, MinInt, Round, Status, +}; /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. @@ -95,17 +71,3 @@ where FpResult::ok(B::from_bits(ui).narrow()) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn issue_263() { - let a = f32::from_bits(1266679807); - let b = f32::from_bits(1300234242); - let c = f32::from_bits(1115553792); - let expected = f32::from_bits(1501560833); - assert_eq!(fmaf(a, b, c), expected); - } -} diff --git a/library/compiler-builtins/libm/src/math/generic/mod.rs b/library/compiler-builtins/libm/src/math/generic/mod.rs index 35846351a6e..9d497a03f54 100644 --- a/library/compiler-builtins/libm/src/math/generic/mod.rs +++ b/library/compiler-builtins/libm/src/math/generic/mod.rs @@ -6,6 +6,8 @@ mod copysign; mod fabs; mod fdim; mod floor; +mod fma; +mod fma_wide; mod fmax; mod fmaximum; mod fmaximum_num; @@ -24,6 +26,8 @@ pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; +pub use fma::fma_round; +pub use fma_wide::fma_wide_round; pub use fmax::fmax; pub use fmaximum::fmaximum; pub use fmaximum_num::fmaximum_num; diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs index 949c18b4000..ce9b8fc58bb 100644 --- a/library/compiler-builtins/libm/src/math/mod.rs +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -159,7 +159,6 @@ mod fabs; mod fdim; mod floor; mod fma; -mod fma_wide; mod fmin_fmax; mod fminimum_fmaximum; mod fminimum_fmaximum_num; @@ -254,8 +253,7 @@ pub use self::expm1f::expm1f; pub use self::fabs::{fabs, fabsf}; pub use self::fdim::{fdim, fdimf}; pub use self::floor::{floor, floorf}; -pub use self::fma::fma; -pub use self::fma_wide::fmaf; +pub use self::fma::{fma, fmaf}; pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf}; pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf}; @@ -336,7 +334,7 @@ cfg_if! { // verify-sorted-end #[allow(unused_imports)] - pub(crate) use self::fma_wide::fmaf16; + pub(crate) use self::fma::fmaf16; } } |
