diff options
| author | Trevor Gross <tmgross@umich.edu> | 2025-04-29 22:16:41 +0000 |
|---|---|---|
| committer | Trevor Gross <t.gross35@gmail.com> | 2025-05-03 14:17:49 -0400 |
| commit | 8995ac0448312c94f85ffe3ec92e44b85834aead (patch) | |
| tree | c8f0572c2ce445cfb27f0597642bff3bee2e141e /library/compiler-builtins/libm/src/math/fma.rs | |
| parent | 4a1dc96652ea5cbd7e0273df19c828324699f4db (diff) | |
| download | rust-8995ac0448312c94f85ffe3ec92e44b85834aead.tar.gz rust-8995ac0448312c94f85ffe3ec92e44b85834aead.zip | |
Use runtime feature detection for fma routines on x86
Get performance closer to the glibc implementations by adding assembly fma routines, with runtime feature detection so they are used even if not compiled with `+fma` (as the distributed standard library is often not). Glibc uses ifuncs, this implementation stores a function pointer in an atomic. Results of CPU flags are also cached in order to avoid repeating the startup time in calls to different functions. The feature detection code is a slightly simplified version of `std-detect`. Musl sources were used as a reference [1]. Fixes: https://github.com/rust-lang/rust/issues/140452 once synced [1]: https://github.com/bminor/musl/blob/c47ad25ea3b484e10326f933e927c0bc8cded3da/src/math/x32/fma.c
Diffstat (limited to 'library/compiler-builtins/libm/src/math/fma.rs')
| -rw-r--r-- | library/compiler-builtins/libm/src/math/fma.rs | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/library/compiler-builtins/libm/src/math/fma.rs b/library/compiler-builtins/libm/src/math/fma.rs index 78f0f8992ea..5bf473cfe06 100644 --- a/library/compiler-builtins/libm/src/math/fma.rs +++ b/library/compiler-builtins/libm/src/math/fma.rs @@ -19,7 +19,10 @@ pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { select_implementation! { name: fmaf, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + target_feature = "sse2", + ), args: x, y, z, } @@ -33,7 +36,10 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { pub fn fma(x: f64, y: f64, z: f64) -> f64 { select_implementation! { name: fma, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), + use_arch: any( + all(target_arch = "aarch64", target_feature = "neon"), + target_feature = "sse2", + ), args: x, y, z, } |
