about summary refs log tree commit diff
path: root/library/compiler-builtins/libm/src/math/fma.rs
diff options
context:
space:
mode:
authorTrevor Gross <tmgross@umich.edu>2025-04-29 22:16:41 +0000
committerTrevor Gross <t.gross35@gmail.com>2025-05-03 14:17:49 -0400
commit8995ac0448312c94f85ffe3ec92e44b85834aead (patch)
treec8f0572c2ce445cfb27f0597642bff3bee2e141e /library/compiler-builtins/libm/src/math/fma.rs
parent4a1dc96652ea5cbd7e0273df19c828324699f4db (diff)
downloadrust-8995ac0448312c94f85ffe3ec92e44b85834aead.tar.gz
rust-8995ac0448312c94f85ffe3ec92e44b85834aead.zip
Use runtime feature detection for fma routines on x86
Get performance closer to the glibc implementations by adding assembly
fma routines, with runtime feature detection so they are used even if
not compiled with `+fma` (as the distributed standard library is often
not). Glibc uses ifuncs, this implementation stores a function pointer
in an atomic.

Results of CPU flags are also cached in order to avoid repeating the
startup time in calls to different functions. The feature detection code
is a slightly simplified version of `std-detect`.

Musl sources were used as a reference [1].

Fixes: https://github.com/rust-lang/rust/issues/140452 once synced

[1]: https://github.com/bminor/musl/blob/c47ad25ea3b484e10326f933e927c0bc8cded3da/src/math/x32/fma.c
Diffstat (limited to 'library/compiler-builtins/libm/src/math/fma.rs')
-rw-r--r--library/compiler-builtins/libm/src/math/fma.rs10
1 files changed, 8 insertions, 2 deletions
diff --git a/library/compiler-builtins/libm/src/math/fma.rs b/library/compiler-builtins/libm/src/math/fma.rs
index 78f0f8992ea..5bf473cfe06 100644
--- a/library/compiler-builtins/libm/src/math/fma.rs
+++ b/library/compiler-builtins/libm/src/math/fma.rs
@@ -19,7 +19,10 @@ pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
 pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
     select_implementation! {
         name: fmaf,
-        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            target_feature = "sse2",
+        ),
         args: x, y, z,
     }
 
@@ -33,7 +36,10 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
 pub fn fma(x: f64, y: f64, z: f64) -> f64 {
     select_implementation! {
         name: fma,
-        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            target_feature = "sse2",
+        ),
         args: x, y, z,
     }