about summary refs log tree commit diff
diff options
context:
space:
mode:
authorCaleb Zulawski <caleb.zulawski@gmail.com>2024-11-23 14:31:20 -0500
committerCaleb Zulawski <caleb.zulawski@gmail.com>2024-11-23 14:39:42 -0500
commite73e9f9af202206372d1fc0bbe215aad376c0482 (patch)
tree2f4eb84b60c28c3405cf9023818e8f12810cae45
parent826b673412d2f579e7865b6f26bc9771c6d3b097 (diff)
downloadrust-e73e9f9af202206372d1fc0bbe215aad376c0482.tar.gz
rust-e73e9f9af202206372d1fc0bbe215aad376c0482.zip
Add simd_relaxed_fma intrinsic
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs3
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/simd.rs1
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs2
-rw-r--r--compiler/rustc_hir_analysis/src/check/intrinsic.rs4
-rw-r--r--compiler/rustc_span/src/symbol.rs1
-rw-r--r--library/core/src/intrinsics/simd.rs10
-rw-r--r--tests/ui/simd/intrinsic/float-math-pass.rs4
7 files changed, 23 insertions, 2 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index f787b8a6fd9..e0ebe30752a 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -415,7 +415,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         }
 
-        sym::simd_fma => {
+        // FIXME: simd_relaxed_fma doesn't relax to non-fused multiply-add
+        sym::simd_fma | sym::simd_relaxed_fma => {
             intrinsic_args!(fx, args => (a, b, c); intrinsic);
 
             if !a.layout().ty.is_simd() {
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
index 604678a9af4..79d1a06dd46 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -772,6 +772,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             sym::simd_flog => "log",
             sym::simd_floor => "floor",
             sym::simd_fma => "fma",
+            sym::simd_relaxed_fma => "fma", // FIXME: this should relax to non-fused multiply-add when necessary
             sym::simd_fpowi => "__builtin_powi",
             sym::simd_fpow => "pow",
             sym::simd_fsin => "sin",
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index da7f94e8cf7..d8b055137b3 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1534,6 +1534,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
+            sym::simd_relaxed_fma => ("fmuladd", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
             sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
             sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
             sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
@@ -1572,6 +1573,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             | sym::simd_fpowi
             | sym::simd_fsin
             | sym::simd_fsqrt
+            | sym::simd_relaxed_fma
             | sym::simd_round
             | sym::simd_trunc
     ) {
diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs
index 3e33120901f..7434bbf180b 100644
--- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs
+++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs
@@ -641,7 +641,9 @@ pub fn check_intrinsic_type(
             | sym::simd_round
             | sym::simd_trunc => (1, 0, vec![param(0)], param(0)),
             sym::simd_fpowi => (1, 0, vec![param(0), tcx.types.i32], param(0)),
-            sym::simd_fma => (1, 0, vec![param(0), param(0), param(0)], param(0)),
+            sym::simd_fma | sym::simd_relaxed_fma => {
+                (1, 0, vec![param(0), param(0), param(0)], param(0))
+            }
             sym::simd_gather => (3, 0, vec![param(0), param(1), param(2)], param(0)),
             sym::simd_masked_load => (3, 0, vec![param(0), param(1), param(2)], param(2)),
             sym::simd_masked_store => (3, 0, vec![param(0), param(1), param(2)], tcx.types.unit),
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 3d0ec2afa2b..17b6c5d41d2 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -1840,6 +1840,7 @@ symbols! {
         simd_reduce_mul_unordered,
         simd_reduce_or,
         simd_reduce_xor,
+        simd_relaxed_fma,
         simd_rem,
         simd_round,
         simd_saturating_add,
diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs
index 5ddca9c4dce..945bbe34df3 100644
--- a/library/core/src/intrinsics/simd.rs
+++ b/library/core/src/intrinsics/simd.rs
@@ -612,6 +612,16 @@ extern "rust-intrinsic" {
     #[rustc_nounwind]
     pub fn simd_fma<T>(x: T, y: T, z: T) -> T;
 
+    /// Computes `(x*y) + z` for each element, with unspecified rounding.
+    ///
+    /// This may be equivalent to `simd_fma`, or it may relax to rounding each
+    /// operation if that's more efficient.
+    ///
+    /// `T` must be a vector of floats.
+    #[cfg(not(bootstrap))]
+    #[rustc_nounwind]
+    pub fn simd_relaxed_fma<T>(x: T, y: T, z: T) -> T;
+
     // Computes the sine of each element.
     ///
     /// `T` must be a vector of floats.
diff --git a/tests/ui/simd/intrinsic/float-math-pass.rs b/tests/ui/simd/intrinsic/float-math-pass.rs
index 9b14d410acb..24b9941133e 100644
--- a/tests/ui/simd/intrinsic/float-math-pass.rs
+++ b/tests/ui/simd/intrinsic/float-math-pass.rs
@@ -23,6 +23,7 @@ extern "rust-intrinsic" {
     fn simd_fexp<T>(x: T) -> T;
     fn simd_fexp2<T>(x: T) -> T;
     fn simd_fma<T>(x: T, y: T, z: T) -> T;
+    fn simd_relaxed_fma<T>(x: T, y: T, z: T) -> T;
     fn simd_flog<T>(x: T) -> T;
     fn simd_flog10<T>(x: T) -> T;
     fn simd_flog2<T>(x: T) -> T;
@@ -77,6 +78,9 @@ fn main() {
         let r = simd_fma(x, h, h);
         assert_approx_eq!(x, r);
 
+        let r = simd_relaxed_fma(x, h, h);
+        assert_approx_eq!(x, r);
+
         let r = simd_fsqrt(x);
         assert_approx_eq!(x, r);