about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJubilee <workingjubilee@gmail.com>2024-09-15 23:51:25 -0700
committerGitHub <noreply@github.com>2024-09-15 23:51:25 -0700
commit68758c05604f0ae4dfbe25f61e70f1a3db083c29 (patch)
treea5a0b0691b27a39e3bcf701bb4d8bdfc14817607
parent0151cbe6e8ca1027f5482c98a1b93d70fbd9a50b (diff)
parentab8c2025279077fcfb0992c455bb8c173b2d9e53 (diff)
downloadrust-68758c05604f0ae4dfbe25f61e70f1a3db083c29.tar.gz
rust-68758c05604f0ae4dfbe25f61e70f1a3db083c29.zip
Rollup merge of #130325 - workingjubilee:plus-minus-zero-redux, r=RalfJung,jieyouxu
Use -0.0 in `intrinsics::simd::reduce_add_unordered`

-0.0 is the actual neutral additive float, not +0.0, and this matters to codegen.

try-job: aarch64-gnu
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs4
-rw-r--r--tests/assembly/simd/reduce-fadd-unordered.rs29
2 files changed, 31 insertions, 2 deletions
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 9705dd506b9..15c519dfcb4 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -2066,14 +2066,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         };
     }
 
-    arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, 0.0);
+    arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, -0.0);
     arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
     arith_red!(
         simd_reduce_add_unordered: vector_reduce_add,
         vector_reduce_fadd_reassoc,
         false,
         add,
-        0.0
+        -0.0
     );
     arith_red!(
         simd_reduce_mul_unordered: vector_reduce_mul,
diff --git a/tests/assembly/simd/reduce-fadd-unordered.rs b/tests/assembly/simd/reduce-fadd-unordered.rs
new file mode 100644
index 00000000000..fa9ce6bd35e
--- /dev/null
+++ b/tests/assembly/simd/reduce-fadd-unordered.rs
@@ -0,0 +1,29 @@
+//@ revisions: x86_64 aarch64
+//@ assembly-output: emit-asm
+//@ compile-flags: --crate-type=lib -O
+//@[aarch64] only-aarch64
+//@[x86_64] only-x86_64
+//@[x86_64] compile-flags: -Ctarget-feature=+sse3
+#![feature(portable_simd)]
+#![feature(core_intrinsics)]
+use std::intrinsics::simd as intrinsics;
+use std::simd::*;
+// Regression test for https://github.com/rust-lang/rust/issues/130028
+// This intrinsic produces much worse code if you use +0.0 instead of -0.0 because
+// +0.0 isn't as easy to algebraically reassociate, even using LLVM's reassoc attribute!
+// It would emit about an extra fadd, depending on the architecture.
+
+// CHECK-LABEL: reduce_fadd_negative_zero
+pub unsafe fn reduce_fadd_negative_zero(v: f32x4) -> f32 {
+    // x86_64: addps
+    // x86_64-NEXT: movshdup
+    // x86_64-NEXT: addss
+    // x86_64-NOT: xorps
+
+    // aarch64: faddp
+    // aarch64-NEXT: faddp
+
+    // CHECK-NOT: {{f?}}add{{p?s*}}
+    // CHECK: ret
+    intrinsics::simd_reduce_add_unordered(v)
+}