about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-07-12 20:36:43 +0000
committerbors <bors@rust-lang.org>2024-07-12 20:36:43 +0000
commitc6727fc9b5c64cefa7263486497ee95e529bd0f8 (patch)
tree97a13f2f6b89c85feb20239b77a14958fea7083c
parent62c068feeafd1f4abbf87243d69cf8862e4dd277 (diff)
parentcae9d480bfe930a678fe2744082fc6ef1d957f63 (diff)
downloadrust-c6727fc9b5c64cefa7263486497ee95e529bd0f8.tar.gz
rust-c6727fc9b5c64cefa7263486497ee95e529bd0f8.zip
Auto merge of #123351 - beetrees:x86-ret-snan-rust, r=nikic,workingjubilee
Ensure floats are returned losslessly by the Rust ABI on 32-bit x86

Solves #115567 for the (default) `"Rust"` ABI. When compiling for 32-bit x86, this PR changes the `"Rust"` ABI to return floats indirectly instead of in x87 registers (with the exception of single `f32`s, which this PR returns in general purpose registers as they are small enough to fit in one). No change is made to the `"C"` ABI as that ABI requires x87 register usage and therefore will need a different solution.
-rw-r--r--compiler/rustc_ty_utils/src/abi.rs34
-rw-r--r--src/doc/rustc/src/platform-support.md6
-rw-r--r--tests/assembly/x86-return-float.rs328
-rw-r--r--tests/codegen/float/f128.rs11
-rw-r--r--tests/codegen/float/f16.rs11
-rw-r--r--tests/codegen/issues/issue-32031.rs10
-rw-r--r--tests/codegen/union-abi.rs10
-rw-r--r--tests/ui/abi/numbers-arithmetic/return-float.rs61
8 files changed, 461 insertions, 10 deletions
diff --git a/compiler/rustc_ty_utils/src/abi.rs b/compiler/rustc_ty_utils/src/abi.rs
index 4eb7b58bff9..1dced9cf7cd 100644
--- a/compiler/rustc_ty_utils/src/abi.rs
+++ b/compiler/rustc_ty_utils/src/abi.rs
@@ -743,6 +743,40 @@ fn fn_abi_adjust_for_abi<'tcx>(
                 return;
             }
 
+            // Avoid returning floats in x87 registers on x86 as loading and storing from x87
+            // registers will quiet signalling NaNs.
+            if cx.tcx.sess.target.arch == "x86"
+                && arg_idx.is_none()
+                // Intrinsics themselves are not actual "real" functions, so theres no need to
+                // change their ABIs.
+                && abi != SpecAbi::RustIntrinsic
+            {
+                match arg.layout.abi {
+                    // Handle similar to the way arguments with an `Abi::Aggregate` abi are handled
+                    // below, by returning arguments up to the size of a pointer (32 bits on x86)
+                    // cast to an appropriately sized integer.
+                    Abi::Scalar(s) if s.primitive() == Float(F32) => {
+                        // Same size as a pointer, return in a register.
+                        arg.cast_to(Reg::i32());
+                        return;
+                    }
+                    Abi::Scalar(s) if s.primitive() == Float(F64) => {
+                        // Larger than a pointer, return indirectly.
+                        arg.make_indirect();
+                        return;
+                    }
+                    Abi::ScalarPair(s1, s2)
+                        if matches!(s1.primitive(), Float(F32 | F64))
+                            || matches!(s2.primitive(), Float(F32 | F64)) =>
+                    {
+                        // Larger than a pointer, return indirectly.
+                        arg.make_indirect();
+                        return;
+                    }
+                    _ => {}
+                };
+            }
+
             match arg.layout.abi {
                 Abi::Aggregate { .. } => {}
 
diff --git a/src/doc/rustc/src/platform-support.md b/src/doc/rustc/src/platform-support.md
index f5cd4bd217a..370dbed50fa 100644
--- a/src/doc/rustc/src/platform-support.md
+++ b/src/doc/rustc/src/platform-support.md
@@ -41,10 +41,10 @@ target | notes
 `x86_64-pc-windows-msvc` | 64-bit MSVC (Windows 10+, Windows Server 2016+)
 `x86_64-unknown-linux-gnu` | 64-bit Linux (kernel 3.2+, glibc 2.17+)
 
-[^x86_32-floats-return-ABI]: Due to limitations of the C ABI, floating-point support on `i686` targets is non-compliant: floating-point return values are passed via an x87 register, so NaN payload bits can be lost. See [issue #114479][x86-32-float-issue].
+[^x86_32-floats-return-ABI]: Due to limitations of the C ABI, floating-point support on `i686` targets is non-compliant: floating-point return values are passed via an x87 register, so NaN payload bits can be lost. Functions with the default Rust ABI are not affected. See [issue #115567][x86-32-float-return-issue].
 
 [77071]: https://github.com/rust-lang/rust/issues/77071
-[x86-32-float-issue]: https://github.com/rust-lang/rust/issues/114479
+[x86-32-float-return-issue]: https://github.com/rust-lang/rust/issues/115567
 
 ## Tier 1
 
@@ -209,6 +209,8 @@ target | std | notes
 
 [^x86_32-floats-x87]: Floating-point support on `i586` targets is non-compliant: the `x87` registers and instructions used for these targets do not provide IEEE-754-compliant behavior, in particular when it comes to rounding and NaN payload bits. See [issue #114479][x86-32-float-issue].
 
+[x86-32-float-issue]: https://github.com/rust-lang/rust/issues/114479
+
 [wasi-rename]: https://github.com/rust-lang/compiler-team/issues/607
 
 [Fortanix ABI]: https://edp.fortanix.com/
diff --git a/tests/assembly/x86-return-float.rs b/tests/assembly/x86-return-float.rs
new file mode 100644
index 00000000000..c4a2c1ad44e
--- /dev/null
+++ b/tests/assembly/x86-return-float.rs
@@ -0,0 +1,328 @@
+//@ assembly-output: emit-asm
+//@ only-x86
+// FIXME(#114479): LLVM miscompiles loading and storing `f32` and `f64` when SSE is disabled.
+// There's no compiletest directive to ignore a test on i586 only, so just always explicitly enable
+// SSE2.
+// Use the same target CPU as `i686` so that LLVM orders the instructions in the same order.
+//@ compile-flags: -Ctarget-feature=+sse2 -Ctarget-cpu=pentium4
+// Force frame pointers to make ASM more consistent between targets
+//@ compile-flags: -O -C force-frame-pointers
+//@ filecheck-flags: --implicit-check-not fld --implicit-check-not fst
+//@ revisions: unix windows
+//@[unix] ignore-windows
+//@[windows] only-windows
+
+#![crate_type = "lib"]
+#![feature(f16, f128)]
+
+// Tests that returning `f32` and `f64` with the "Rust" ABI on 32-bit x86 doesn't use the x87
+// floating point stack, as loading and storing `f32`s and `f64`s to and from the x87 stack quietens
+// signalling NaNs.
+
+// Returning individual floats
+
+// CHECK-LABEL: return_f32:
+#[no_mangle]
+pub fn return_f32(x: f32) -> f32 {
+    // CHECK: movl {{.*}}(%ebp), %eax
+    // CHECK-NOT: ax
+    // CHECK: retl
+    x
+}
+
+// CHECK-LABEL: return_f64:
+#[no_mangle]
+pub fn return_f64(x: f64) -> f64 {
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL:.*]]
+    // CHECK-NEXT: movsd %[[VAL]], (%[[PTR]])
+    // CHECK: retl
+    x
+}
+
+// Returning scalar pairs containing floats
+
+// CHECK-LABEL: return_f32_f32:
+#[no_mangle]
+pub fn return_f32_f32(x: (f32, f32)) -> (f32, f32) {
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movss [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movss %[[VAL2]], 4(%[[PTR]])
+    // CHECK: retl
+    x
+}
+
+// CHECK-LABEL: return_f64_f64:
+#[no_mangle]
+pub fn return_f64_f64(x: (f64, f64)) -> (f64, f64) {
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movsd [[#%d,OFFSET+12]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
+    // CHECK: retl
+    x
+}
+
+// CHECK-LABEL: return_f32_f64:
+#[no_mangle]
+pub fn return_f32_f64(x: (f32, f64)) -> (f32, f64) {
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movss [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movsd %[[VAL2]], {{4|8}}(%[[PTR]])
+    // CHECK: retl
+    x
+}
+
+// CHECK-LABEL: return_f64_f32:
+#[no_mangle]
+pub fn return_f64_f32(x: (f64, f32)) -> (f64, f32) {
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movss [[#%d,OFFSET+12]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movss %[[VAL2]], 8(%[[PTR]])
+    // CHECK: retl
+    x
+}
+
+// CHECK-LABEL: return_f32_other:
+#[no_mangle]
+pub fn return_f32_other(x: (f32, usize)) -> (f32, usize) {
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movss [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movl %[[VAL2]], 4(%[[PTR]])
+    // CHECK: retl
+    x
+}
+
+// CHECK-LABEL: return_f64_other:
+#[no_mangle]
+pub fn return_f64_other(x: (f64, usize)) -> (f64, usize) {
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movl %[[VAL2]], 8(%[[PTR]])
+    // CHECK: retl
+    x
+}
+
+// CHECK-LABEL: return_other_f32:
+#[no_mangle]
+pub fn return_other_f32(x: (usize, f32)) -> (usize, f32) {
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movss %[[VAL2]], 4(%[[PTR]])
+    // CHECK: retl
+    x
+}
+
+// CHECK-LABEL: return_other_f64:
+#[no_mangle]
+pub fn return_other_f64(x: (usize, f64)) -> (usize, f64) {
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movsd %[[VAL2]], {{4|8}}(%[[PTR]])
+    // CHECK: retl
+    x
+}
+
+// Calling functions returning floats
+
+// CHECK-LABEL: call_f32:
+#[no_mangle]
+pub unsafe fn call_f32(x: &mut f32) {
+    extern "Rust" {
+        fn get_f32() -> f32;
+    }
+    // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
+    // CHECK: calll {{()|_}}get_f32
+    // CHECK-NEXT: movl %eax, (%[[PTR]])
+    *x = get_f32();
+}
+
+// CHECK-LABEL: call_f64:
+#[no_mangle]
+pub unsafe fn call_f64(x: &mut f64) {
+    extern "Rust" {
+        fn get_f64() -> f64;
+    }
+    // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
+    // CHECK: calll {{()|_}}get_f64
+    // CHECK: movsd {{.*}}(%{{ebp|esp}}), %[[VAL:.*]]
+    // CHECK-NEXT: movsd %[[VAL:.*]], (%[[PTR]])
+    *x = get_f64();
+}
+
+// Calling functions returning scalar pairs containing floats
+
+// CHECK-LABEL: call_f32_f32:
+#[no_mangle]
+pub unsafe fn call_f32_f32(x: &mut (f32, f32)) {
+    extern "Rust" {
+        fn get_f32_f32() -> (f32, f32);
+    }
+    // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
+    // CHECK: calll {{()|_}}get_f32_f32
+    // CHECK: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movss [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movss %[[VAL2]], 4(%[[PTR]])
+    *x = get_f32_f32();
+}
+
+// CHECK-LABEL: call_f64_f64:
+#[no_mangle]
+pub unsafe fn call_f64_f64(x: &mut (f64, f64)) {
+    extern "Rust" {
+        fn get_f64_f64() -> (f64, f64);
+    }
+    // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
+    // CHECK: calll {{()|_}}get_f64_f64
+    // unix: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
+    // unix-NEXT: movsd [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
+    // windows: movsd (%esp), %[[VAL1:.*]]
+    // windows-NEXT: movsd 8(%esp), %[[VAL2:.*]]
+    // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
+    *x = get_f64_f64();
+}
+
+// CHECK-LABEL: call_f32_f64:
+#[no_mangle]
+pub unsafe fn call_f32_f64(x: &mut (f32, f64)) {
+    extern "Rust" {
+        fn get_f32_f64() -> (f32, f64);
+    }
+    // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
+    // CHECK: calll {{()|_}}get_f32_f64
+    // unix: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
+    // unix-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
+    // windows: movss (%esp), %[[VAL1:.*]]
+    // windows-NEXT: movsd 8(%esp), %[[VAL2:.*]]
+    // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
+    // unix-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
+    // windows-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
+    *x = get_f32_f64();
+}
+
+// CHECK-LABEL: call_f64_f32:
+#[no_mangle]
+pub unsafe fn call_f64_f32(x: &mut (f64, f32)) {
+    extern "Rust" {
+        fn get_f64_f32() -> (f64, f32);
+    }
+    // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
+    // CHECK: calll {{()|_}}get_f64_f32
+    // unix: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
+    // unix-NEXT: movss [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
+    // windows: movsd (%esp), %[[VAL1:.*]]
+    // windows-NEXT: movss 8(%esp), %[[VAL2:.*]]
+    // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movss %[[VAL2]], 8(%[[PTR]])
+    *x = get_f64_f32();
+}
+
+// CHECK-LABEL: call_f32_other:
+#[no_mangle]
+pub unsafe fn call_f32_other(x: &mut (f32, usize)) {
+    extern "Rust" {
+        fn get_f32_other() -> (f32, usize);
+    }
+    // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
+    // CHECK: calll {{()|_}}get_f32_other
+    // CHECK: movss [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movss %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movl %[[VAL2]], 4(%[[PTR]])
+    *x = get_f32_other();
+}
+
+// CHECK-LABEL: call_f64_other:
+#[no_mangle]
+pub unsafe fn call_f64_other(x: &mut (f64, usize)) {
+    extern "Rust" {
+        fn get_f64_other() -> (f64, usize);
+    }
+    // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
+    // CHECK: calll {{()|_}}get_f64_other
+    // unix: movsd [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
+    // unix-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
+    // windows: movsd (%esp), %[[VAL1:.*]]
+    // windows-NEXT: movl 8(%esp), %[[VAL2:.*]]
+    // CHECK-NEXT: movsd %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movl %[[VAL2]], 8(%[[PTR]])
+    *x = get_f64_other();
+}
+
+// CHECK-LABEL: call_other_f32:
+#[no_mangle]
+pub unsafe fn call_other_f32(x: &mut (usize, f32)) {
+    extern "Rust" {
+        fn get_other_f32() -> (usize, f32);
+    }
+    // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
+    // CHECK: calll {{()|_}}get_other_f32
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movss [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
+    // CHECK-NEXT: movss %[[VAL2]], 4(%[[PTR]])
+    *x = get_other_f32();
+}
+
+// CHECK-LABEL: call_other_f64:
+#[no_mangle]
+pub unsafe fn call_other_f64(x: &mut (usize, f64)) {
+    extern "Rust" {
+        fn get_other_f64() -> (usize, f64);
+    }
+    // CHECK: movl {{.*}}(%ebp), %[[PTR:.*]]
+    // CHECK: calll {{()|_}}get_other_f64
+    // unix: movl [[#%d,OFFSET:]](%ebp), %[[VAL1:.*]]
+    // unix-NEXT: movsd [[#%d,OFFSET+4]](%ebp), %[[VAL2:.*]]
+    // windows: movl (%esp), %[[VAL1:.*]]
+    // windows-NEXT: movsd 8(%esp), %[[VAL2:.*]]
+    // CHECK-NEXT: movl %[[VAL1]], (%[[PTR]])
+    // unix-NEXT: movsd %[[VAL2]], 4(%[[PTR]])
+    // windows-NEXT: movsd %[[VAL2]], 8(%[[PTR]])
+    *x = get_other_f64();
+}
+
+// The "C" ABI for `f16` and `f128` on x86 has never used the x87 floating point stack. Do some
+// basic checks to ensure this remains the case for the "Rust" ABI.
+
+// CHECK-LABEL: return_f16:
+#[no_mangle]
+pub fn return_f16(x: f16) -> f16 {
+    // CHECK: pinsrw $0, {{.*}}(%ebp), %xmm0
+    // CHECK-NOT: xmm0
+    // CHECK: retl
+    x
+}
+
+// CHECK-LABEL: return_f128:
+#[no_mangle]
+pub fn return_f128(x: f128) -> f128 {
+    // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
+    // CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
+    // CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
+    // CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
+    // CHECK-NEXT: movl %[[VAL1:.*]] (%[[PTR]])
+    // CHECK: retl
+    x
+}
diff --git a/tests/codegen/float/f128.rs b/tests/codegen/float/f128.rs
index 32c5be1ec65..80b572fbbc9 100644
--- a/tests/codegen/float/f128.rs
+++ b/tests/codegen/float/f128.rs
@@ -1,3 +1,8 @@
+// 32-bit x86 returns `f32` and `f64` differently to avoid the x87 stack.
+//@ revisions: x86 other
+//@[x86] only-x86
+//@[other] ignore-x86
+
 // Verify that our intrinsics generate the correct LLVM calls for f128
 
 #![crate_type = "lib"]
@@ -138,14 +143,16 @@ pub fn f128_as_f16(a: f128) -> f16 {
     a as f16
 }
 
-// CHECK-LABEL: float @f128_as_f32(
+// other-LABEL: float @f128_as_f32(
+// x86-LABEL: i32 @f128_as_f32(
 #[no_mangle]
 pub fn f128_as_f32(a: f128) -> f32 {
     // CHECK: fptrunc fp128 %{{.+}} to float
     a as f32
 }
 
-// CHECK-LABEL: double @f128_as_f64(
+// other-LABEL: double @f128_as_f64(
+// x86-LABEL: void @f128_as_f64(
 #[no_mangle]
 pub fn f128_as_f64(a: f128) -> f64 {
     // CHECK: fptrunc fp128 %{{.+}} to double
diff --git a/tests/codegen/float/f16.rs b/tests/codegen/float/f16.rs
index 96daac869c2..2910d7d3e92 100644
--- a/tests/codegen/float/f16.rs
+++ b/tests/codegen/float/f16.rs
@@ -1,3 +1,8 @@
+// 32-bit x86 returns `f32` and `f64` differently to avoid the x87 stack.
+//@ revisions: x86 other
+//@[x86] only-x86
+//@[other] ignore-x86
+
 // Verify that our intrinsics generate the correct LLVM calls for f16
 
 #![crate_type = "lib"]
@@ -140,14 +145,16 @@ pub fn f16_as_self(a: f16) -> f16 {
     a as f16
 }
 
-// CHECK-LABEL: float @f16_as_f32(
+// other-LABEL: float @f16_as_f32(
+// x86-LABEL: i32 @f16_as_f32(
 #[no_mangle]
 pub fn f16_as_f32(a: f16) -> f32 {
     // CHECK: fpext half %{{.+}} to float
     a as f32
 }
 
-// CHECK-LABEL: double @f16_as_f64(
+// other-LABEL: double @f16_as_f64(
+// x86-LABEL: void @f16_as_f64(
 #[no_mangle]
 pub fn f16_as_f64(a: f16) -> f64 {
     // CHECK: fpext half %{{.+}} to double
diff --git a/tests/codegen/issues/issue-32031.rs b/tests/codegen/issues/issue-32031.rs
index 9693c414a67..4d6895166f1 100644
--- a/tests/codegen/issues/issue-32031.rs
+++ b/tests/codegen/issues/issue-32031.rs
@@ -1,11 +1,16 @@
 //@ compile-flags: -C no-prepopulate-passes -Copt-level=0
+// 32-bit x86 returns `f32` and `f64` differently to avoid the x87 stack.
+//@ revisions: x86 other
+//@[x86] only-x86
+//@[other] ignore-x86
 
 #![crate_type = "lib"]
 
 #[no_mangle]
 pub struct F32(f32);
 
-// CHECK: define{{.*}}float @add_newtype_f32(float %a, float %b)
+// other: define{{.*}}float @add_newtype_f32(float %a, float %b)
+// x86: define{{.*}}i32 @add_newtype_f32(float %a, float %b)
 #[inline(never)]
 #[no_mangle]
 pub fn add_newtype_f32(a: F32, b: F32) -> F32 {
@@ -15,7 +20,8 @@ pub fn add_newtype_f32(a: F32, b: F32) -> F32 {
 #[no_mangle]
 pub struct F64(f64);
 
-// CHECK: define{{.*}}double @add_newtype_f64(double %a, double %b)
+// other: define{{.*}}double @add_newtype_f64(double %a, double %b)
+// x86: define{{.*}}void @add_newtype_f64(ptr{{.*}}sret([8 x i8]){{.*}}%_0, double %a, double %b)
 #[inline(never)]
 #[no_mangle]
 pub fn add_newtype_f64(a: F64, b: F64) -> F64 {
diff --git a/tests/codegen/union-abi.rs b/tests/codegen/union-abi.rs
index 9e02fa9ff35..08015014456 100644
--- a/tests/codegen/union-abi.rs
+++ b/tests/codegen/union-abi.rs
@@ -1,5 +1,9 @@
 //@ ignore-emscripten vectors passed directly
 //@ compile-flags: -O -C no-prepopulate-passes
+// 32-bit x86 returns `f32` differently to avoid the x87 stack.
+//@ revisions: x86 other
+//@[x86] only-x86
+//@[other] ignore-x86
 
 // This test that using union forward the abi of the inner type, as
 // discussed in #54668
@@ -67,7 +71,8 @@ pub union UnionF32 {
     a: f32,
 }
 
-// CHECK: define {{(dso_local )?}}float @test_UnionF32(float %_1)
+// other: define {{(dso_local )?}}float @test_UnionF32(float %_1)
+// x86: define {{(dso_local )?}}i32 @test_UnionF32(float %_1)
 #[no_mangle]
 pub fn test_UnionF32(_: UnionF32) -> UnionF32 {
     loop {}
@@ -78,7 +83,8 @@ pub union UnionF32F32 {
     b: f32,
 }
 
-// CHECK: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
+// other: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
+// x86: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1)
 #[no_mangle]
 pub fn test_UnionF32F32(_: UnionF32F32) -> UnionF32F32 {
     loop {}
diff --git a/tests/ui/abi/numbers-arithmetic/return-float.rs b/tests/ui/abi/numbers-arithmetic/return-float.rs
new file mode 100644
index 00000000000..66a6d66911d
--- /dev/null
+++ b/tests/ui/abi/numbers-arithmetic/return-float.rs
@@ -0,0 +1,61 @@
+//@ run-pass
+//@ compile-flags: -Copt-level=0
+
+// Test that floats (in particular signalling NaNs) are losslessly returned from functions.
+
+fn main() {
+    // FIXME(#114479): LLVM miscompiles loading and storing `f32` and `f64` when SSE is disabled on
+    // x86.
+    if cfg!(not(all(target_arch = "x86", not(target_feature = "sse2")))) {
+        let bits_f32 = std::hint::black_box([
+            4.2_f32.to_bits(),
+            f32::INFINITY.to_bits(),
+            f32::NEG_INFINITY.to_bits(),
+            f32::NAN.to_bits(),
+            // These two masks cover all the mantissa bits. One of them is a signalling NaN, the
+            // other is quiet.
+            // Similar to the masks in `test_float_bits_conv` in library/std/src/f32/tests.rs
+            f32::NAN.to_bits() ^ 0x002A_AAAA,
+            f32::NAN.to_bits() ^ 0x0055_5555,
+            // Same as above but with the sign bit flipped.
+            f32::NAN.to_bits() ^ 0x802A_AAAA,
+            f32::NAN.to_bits() ^ 0x8055_5555,
+        ]);
+        for bits in bits_f32 {
+            assert_eq!(identity(f32::from_bits(bits)).to_bits(), bits);
+            // Test types that are returned as scalar pairs.
+            assert_eq!(identity((f32::from_bits(bits), 42)).0.to_bits(), bits);
+            assert_eq!(identity((42, f32::from_bits(bits))).1.to_bits(), bits);
+            let (a, b) = identity((f32::from_bits(bits), f32::from_bits(bits)));
+            assert_eq!((a.to_bits(), b.to_bits()), (bits, bits));
+        }
+
+        let bits_f64 = std::hint::black_box([
+            4.2_f64.to_bits(),
+            f64::INFINITY.to_bits(),
+            f64::NEG_INFINITY.to_bits(),
+            f64::NAN.to_bits(),
+            // These two masks cover all the mantissa bits. One of them is a signalling NaN, the
+            // other is quiet.
+            // Similar to the masks in `test_float_bits_conv` in library/std/src/f64/tests.rs
+            f64::NAN.to_bits() ^ 0x000A_AAAA_AAAA_AAAA,
+            f64::NAN.to_bits() ^ 0x0005_5555_5555_5555,
+            // Same as above but with the sign bit flipped.
+            f64::NAN.to_bits() ^ 0x800A_AAAA_AAAA_AAAA,
+            f64::NAN.to_bits() ^ 0x8005_5555_5555_5555,
+        ]);
+        for bits in bits_f64 {
+            assert_eq!(identity(f64::from_bits(bits)).to_bits(), bits);
+            // Test types that are returned as scalar pairs.
+            assert_eq!(identity((f64::from_bits(bits), 42)).0.to_bits(), bits);
+            assert_eq!(identity((42, f64::from_bits(bits))).1.to_bits(), bits);
+            let (a, b) = identity((f64::from_bits(bits), f64::from_bits(bits)));
+            assert_eq!((a.to_bits(), b.to_bits()), (bits, bits));
+        }
+    }
+}
+
+#[inline(never)]
+fn identity<T>(x: T) -> T {
+    x
+}