about summary refs log tree commit diff
diff options
context:
space:
mode:
authorTrevor Gross <tmgross@umich.edu>2025-04-09 01:56:14 +0000
committerTrevor Gross <t.gross35@gmail.com>2025-04-08 22:04:58 -0500
commit4e5cbbeda169377e5025a979105d0dac31bf560a (patch)
treef2120e4a7e8ee746a1530193ea110afc8360e34b
parent725759602a1ca7bca8f2acf1b2cb13e7cb7fa01d (diff)
downloadrust-4e5cbbeda169377e5025a979105d0dac31bf560a.tar.gz
rust-4e5cbbeda169377e5025a979105d0dac31bf560a.zip
Replace calls to `core::arch` intrinsics with assembly
Some backends may replace calls to `core::arch` with multiple calls to
`sqrt` [1], which becomes recursive. Help mitigate this by replacing the
call with assembly.

Results in the same assembly as the current implementation when built
with optimizations.

[1]: https://github.com/rust-lang/compiler-builtins/issues/649
-rw-r--r--library/compiler-builtins/libm/src/math/arch/i686.rs35
1 files changed, 20 insertions, 15 deletions
diff --git a/library/compiler-builtins/libm/src/math/arch/i686.rs b/library/compiler-builtins/libm/src/math/arch/i686.rs
index ad54d8b61d7..3e1d19bfab6 100644
--- a/library/compiler-builtins/libm/src/math/arch/i686.rs
+++ b/library/compiler-builtins/libm/src/math/arch/i686.rs
@@ -1,22 +1,27 @@
 //! Architecture-specific support for x86-32 and x86-64 with SSE2
 
-#[cfg(target_arch = "x86")]
-use core::arch::x86::*;
-#[cfg(target_arch = "x86_64")]
-use core::arch::x86_64::*;
-
-pub fn sqrtf(x: f32) -> f32 {
+pub fn sqrtf(mut x: f32) -> f32 {
+    // SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory
+    // access or side effects.
     unsafe {
-        let m = _mm_set_ss(x);
-        let m_sqrt = _mm_sqrt_ss(m);
-        _mm_cvtss_f32(m_sqrt)
-    }
+        core::arch::asm!(
+            "sqrtss {x}, {x}",
+            x = inout(xmm_reg) x,
+            options(nostack, nomem, pure),
+        )
+    };
+    x
 }
 
-pub fn sqrt(x: f64) -> f64 {
+pub fn sqrt(mut x: f64) -> f64 {
+    // SAFETY: `sqrtsd` is part of `sse2`, which this module is gated behind. It has no memory
+    // access or side effects.
     unsafe {
-        let m = _mm_set_sd(x);
-        let m_sqrt = _mm_sqrt_pd(m);
-        _mm_cvtsd_f64(m_sqrt)
-    }
+        core::arch::asm!(
+            "sqrtsd {x}, {x}",
+            x = inout(xmm_reg) x,
+            options(nostack, nomem, pure),
+        )
+    };
+    x
 }