about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFolkert de Vries <folkert@folkertdev.nl>2025-07-27 23:27:40 +0200
committerGitHub <noreply@github.com>2025-07-27 17:27:40 -0400
commit9c683d3487d8966dad182bc7ad2524bf0bb6d797 (patch)
treebf6ad0e319007035018ca281d3b0a7b19ef49da0
parentc061e73d9ff3fa07dcb005a40453e124302bdeb8 (diff)
downloadrust-9c683d3487d8966dad182bc7ad2524bf0bb6d797.tar.gz
rust-9c683d3487d8966dad182bc7ad2524bf0bb6d797.zip
Implement `floor` and `ceil` in assembly on `i586`
Fixes: https://github.com/rust-lang/compiler-builtins/issues/837

The assembly is based on

- https://github.com/NetBSD/src/blob/20433927938987dd64c8f6aa46904b7aca3fa39e/lib/libm/arch/i387/s_floor.S
- https://github.com/NetBSD/src/blob/20433927938987dd64c8f6aa46904b7aca3fa39e/lib/libm/arch/i387/s_ceil.S

Which both state

    /*
     * Written by J.T. Conklin <jtc@NetBSD.org>.
     * Public domain.
     */

Which I believe means we're good in terms of licensing.
-rw-r--r--library/compiler-builtins/libm-test/src/precision.rs22
-rw-r--r--library/compiler-builtins/libm/src/math/arch/i586.rs85
2 files changed, 55 insertions, 52 deletions
diff --git a/library/compiler-builtins/libm-test/src/precision.rs b/library/compiler-builtins/libm-test/src/precision.rs
index 32825b15d47..3fb8c1b3710 100644
--- a/library/compiler-builtins/libm-test/src/precision.rs
+++ b/library/compiler-builtins/libm-test/src/precision.rs
@@ -272,18 +272,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
 impl MaybeOverride<(f64,)> for SpecialCase {
     fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
         if cfg!(x86_no_sse)
-            && ctx.base_name == BaseName::Ceil
-            && ctx.basis == CheckBasis::Musl
-            && input.0 < 0.0
-            && input.0 > -1.0
-            && expected == F::ZERO
-            && actual == F::ZERO
-        {
-            // musl returns -0.0, we return +0.0
-            return XFAIL("i586 ceil signed zero");
-        }
-
-        if cfg!(x86_no_sse)
             && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
             && (expected - actual).abs() <= F::ONE
             && (expected - actual).abs() > F::ZERO
@@ -293,16 +281,6 @@ impl MaybeOverride<(f64,)> for SpecialCase {
         }
 
         if cfg!(x86_no_sse)
-            && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
-            && expected.eq_repr(F::NEG_ZERO)
-            && actual.eq_repr(F::ZERO)
-        {
-            // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
-            // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
-            return XFAIL("i586 ceil/floor signed zero");
-        }
-
-        if cfg!(x86_no_sse)
             && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
         {
             // FIXME: i586 has very imprecise results with ULP > u32::MAX for these
diff --git a/library/compiler-builtins/libm/src/math/arch/i586.rs b/library/compiler-builtins/libm/src/math/arch/i586.rs
index f92b9a2af71..b9a66762063 100644
--- a/library/compiler-builtins/libm/src/math/arch/i586.rs
+++ b/library/compiler-builtins/libm/src/math/arch/i586.rs
@@ -1,37 +1,62 @@
 //! Architecture-specific support for x86-32 without SSE2
+//!
+//! We use an alternative implementation on x86, because the
+//! main implementation fails with the x87 FPU used by
+//! debian i386, probably due to excess precision issues.
+//!
+//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these
+//! functions are implemented in this way.
 
-use super::super::fabs;
-
-/// Use an alternative implementation on x86, because the
-/// main implementation fails with the x87 FPU used by
-/// debian i386, probably due to excess precision issues.
-/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
-pub fn ceil(x: f64) -> f64 {
-    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-        let truncated = x as i64 as f64;
-        if truncated < x {
-            return truncated + 1.0;
-        } else {
-            return truncated;
-        }
-    } else {
-        return x;
+pub fn ceil(mut x: f64) -> f64 {
+    unsafe {
+        core::arch::asm!(
+            "fld qword ptr [{x}]",
+            // Save the FPU control word, using `x` as scratch space.
+            "fstcw [{x}]",
+            // Set rounding control to 0b10 (+∞).
+            "mov word ptr [{x} + 2], 0x0b7f",
+            "fldcw [{x} + 2]",
+            // Round.
+            "frndint",
+            // Restore FPU control word.
+            "fldcw [{x}]",
+            // Save rounded value to memory.
+            "fstp qword ptr [{x}]",
+            x = in(reg) &mut x,
+            // All the x87 FPU stack is used, all registers must be clobbered
+            out("st(0)") _, out("st(1)") _,
+            out("st(2)") _, out("st(3)") _,
+            out("st(4)") _, out("st(5)") _,
+            out("st(6)") _, out("st(7)") _,
+            options(nostack),
+        );
     }
+    x
 }
 
-/// Use an alternative implementation on x86, because the
-/// main implementation fails with the x87 FPU used by
-/// debian i386, probably due to excess precision issues.
-/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
-pub fn floor(x: f64) -> f64 {
-    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-        let truncated = x as i64 as f64;
-        if truncated > x {
-            return truncated - 1.0;
-        } else {
-            return truncated;
-        }
-    } else {
-        return x;
+pub fn floor(mut x: f64) -> f64 {
+    unsafe {
+        core::arch::asm!(
+            "fld qword ptr [{x}]",
+            // Save the FPU control word, using `x` as scratch space.
+            "fstcw [{x}]",
+            // Set rounding control to 0b01 (-∞).
+            "mov word ptr [{x} + 2], 0x077f",
+            "fldcw [{x} + 2]",
+            // Round.
+            "frndint",
+            // Restore FPU control word.
+            "fldcw [{x}]",
+            // Save rounded value to memory.
+            "fstp qword ptr [{x}]",
+            x = in(reg) &mut x,
+            // All the x87 FPU stack is used, all registers must be clobbered
+            out("st(0)") _, out("st(1)") _,
+            out("st(2)") _, out("st(3)") _,
+            out("st(4)") _, out("st(5)") _,
+            out("st(6)") _, out("st(7)") _,
+            options(nostack),
+        );
     }
+    x
 }