about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbjorn3 <bjorn3@users.noreply.github.com>2022-03-25 20:42:58 +0100
committerbjorn3 <bjorn3@users.noreply.github.com>2022-03-25 20:42:58 +0100
commit11007c02f70130cdc70b98f0909e5c150a2751a6 (patch)
tree49cc2c670ef21603890d8a4e782e76f08139f134
parent3c030e2425bb1fdb165ac87797076072ec991970 (diff)
downloadrust-11007c02f70130cdc70b98f0909e5c150a2751a6.tar.gz
rust-11007c02f70130cdc70b98f0909e5c150a2751a6.zip
Use fma(f) libm function for simd_fma intrinsic
-rw-r--r--patches/0001-portable-simd-Disable-unsupported-tests.patch20
-rw-r--r--src/intrinsics/simd.rs21
2 files changed, 11 insertions, 30 deletions
diff --git a/patches/0001-portable-simd-Disable-unsupported-tests.patch b/patches/0001-portable-simd-Disable-unsupported-tests.patch
index b952b21e6e6..54e13b090ab 100644
--- a/patches/0001-portable-simd-Disable-unsupported-tests.patch
+++ b/patches/0001-portable-simd-Disable-unsupported-tests.patch
@@ -102,26 +102,6 @@ index 6a8ecd3..68fcb49 100644
          }
      }
  }
-diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
-index 31b7ee2..bd04b3c 100644
---- a/crates/core_simd/tests/ops_macros.rs
-+++ b/crates/core_simd/tests/ops_macros.rs
-@@ -604,6 +606,7 @@ macro_rules! impl_float_tests {
-                         )
-                     }
- 
-+                    /*
-                     fn mul_add<const LANES: usize>() {
-                         test_helpers::test_ternary_elementwise(
-                             &Vector::<LANES>::mul_add,
-@@ -611,6 +614,7 @@ macro_rules! impl_float_tests {
-                             &|_, _, _| true,
-                         )
-                     }
-+                    */
-                 }
-             }
-         }
 -- 
 2.26.2.7.g19db9cfb68
 
diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index 25755f8c7ec..d1ca9edf2e0 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -322,20 +322,21 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             }
             assert_eq!(a.layout(), b.layout());
             assert_eq!(a.layout(), c.layout());
-            let layout = a.layout();
+            assert_eq!(a.layout(), ret.layout());
 
-            let (lane_count, _lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
-            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
-            assert_eq!(lane_count, ret_lane_count);
-            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+            let layout = a.layout();
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
 
             for lane in 0..lane_count {
-                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
-                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
-                let c_lane = c.value_lane(fx, lane).load_scalar(fx);
+                let a_lane = a.value_lane(fx, lane);
+                let b_lane = b.value_lane(fx, lane);
+                let c_lane = c.value_lane(fx, lane);
 
-                let mul_lane = fx.bcx.ins().fmul(a_lane, b_lane);
-                let res_lane = CValue::by_val(fx.bcx.ins().fadd(mul_lane, c_lane), ret_lane_layout);
+                let res_lane = match lane_ty.kind() {
+                    ty::Float(FloatTy::F32) => fx.easy_call("fmaf", &[a_lane, b_lane, c_lane], lane_ty),
+                    ty::Float(FloatTy::F64) => fx.easy_call("fma", &[a_lane, b_lane, c_lane], lane_ty),
+                    _ => unreachable!(),
+                };
 
                 ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
             }