Merge pull request #1189 from bjorn3/stdsimd_fixes

Improve stdsimd support
author: bjorn3 <bjorn3@users.noreply.github.com> 2021-07-26 11:09:13 +0200
committer: GitHub <noreply@github.com> 2021-07-26 11:09:13 +0200
commit: 1f7080246562a7dd3c3dee006d1fc7754029c8bc (patch)
tree: 7035b0cbcbf7bc59ee72b128c9b71032325ffe54 /src
parent: 356360836e128e1d1eb11caf6ff5186efb211960 (diff)
parent: 581e38b97c8431be06e74a5c8c22c8e1e78cf368 (diff)
download: rust-1f7080246562a7dd3c3dee006d1fc7754029c8bc.tar.gz
rust-1f7080246562a7dd3c3dee006d1fc7754029c8bc.zip
3 files changed, 256 insertions, 36 deletions
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index 42483230fdc..17c79645eb2 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -175,12 +175,11 @@ fn simd_for_each_lane<'tcx>(
     assert_eq!(lane_count, ret_lane_count);
 
     for lane_idx in 0..lane_count {
-        let lane_idx = mir::Field::new(lane_idx.try_into().unwrap());
-        let lane = val.value_field(fx, lane_idx).load_scalar(fx);
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
 
         let res_lane = f(fx, lane_layout, ret_lane_layout, lane);
 
-        ret.place_field(fx, lane_idx).write_cvalue(fx, res_lane);
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
     }
 }
 
@@ -206,20 +205,20 @@ fn simd_pair_for_each_lane<'tcx>(
     let ret_lane_layout = fx.layout_of(ret_lane_ty);
     assert_eq!(lane_count, ret_lane_count);
 
-    for lane in 0..lane_count {
-        let lane = mir::Field::new(lane.try_into().unwrap());
-        let x_lane = x.value_field(fx, lane).load_scalar(fx);
-        let y_lane = y.value_field(fx, lane).load_scalar(fx);
+    for lane_idx in 0..lane_count {
+        let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx);
+        let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx);
 
         let res_lane = f(fx, lane_layout, ret_lane_layout, x_lane, y_lane);
 
-        ret.place_field(fx, lane).write_cvalue(fx, res_lane);
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
     }
 }
 
 fn simd_reduce<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     val: CValue<'tcx>,
+    acc: Option<Value>,
     ret: CPlace<'tcx>,
     f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
 ) {
@@ -227,16 +226,17 @@ fn simd_reduce<'tcx>(
     let lane_layout = fx.layout_of(lane_ty);
     assert_eq!(lane_layout, ret.layout());
 
-    let mut res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
-    for lane_idx in 1..lane_count {
-        let lane =
-            val.value_field(fx, mir::Field::new(lane_idx.try_into().unwrap())).load_scalar(fx);
+    let (mut res_val, start_lane) =
+        if let Some(acc) = acc { (acc, 0) } else { (val.value_lane(fx, 0).load_scalar(fx), 1) };
+    for lane_idx in start_lane..lane_count {
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
         res_val = f(fx, lane_layout, res_val, lane);
     }
     let res = CValue::by_val(res_val, lane_layout);
     ret.write_cvalue(fx, res);
 }
 
+// FIXME move all uses to `simd_reduce`
 fn simd_reduce_bool<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     val: CValue<'tcx>,
@@ -246,14 +246,18 @@ fn simd_reduce_bool<'tcx>(
     let (lane_count, _lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
     assert!(ret.layout().ty.is_bool());
 
-    let res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
+    let res_val = val.value_lane(fx, 0).load_scalar(fx);
     let mut res_val = fx.bcx.ins().band_imm(res_val, 1); // mask to boolean
     for lane_idx in 1..lane_count {
-        let lane =
-            val.value_field(fx, mir::Field::new(lane_idx.try_into().unwrap())).load_scalar(fx);
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
         let lane = fx.bcx.ins().band_imm(lane, 1); // mask to boolean
         res_val = f(fx, res_val, lane);
     }
+    let res_val = if fx.bcx.func.dfg.value_type(res_val) != types::I8 {
+        fx.bcx.ins().ireduce(types::I8, res_val)
+    } else {
+        res_val
+    };
     let res = CValue::by_val(res_val, ret.layout());
     ret.write_cvalue(fx, res);
 }
@@ -288,7 +292,11 @@ macro simd_cmp {
         if let Some(vector_ty) = vector_ty {
             let x = $x.load_scalar($fx);
             let y = $y.load_scalar($fx);
-            let val = $fx.bcx.ins().icmp(IntCC::$cc, x, y);
+            let val = if vector_ty.lane_type().is_float() {
+                $fx.bcx.ins().fcmp(FloatCC::$cc_f, x, y)
+            } else {
+                $fx.bcx.ins().icmp(IntCC::$cc, x, y)
+            };
 
             // HACK This depends on the fact that icmp for vectors represents bools as 0 and !0, not 0 and 1.
             let val = $fx.bcx.ins().raw_bitcast(vector_ty, val);
diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index c2f469fa021..37906ab472d 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -108,11 +108,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
             for (out_idx, in_idx) in indexes.into_iter().enumerate() {
                 let in_lane = if u64::from(in_idx) < lane_count {
-                    x.value_field(fx, mir::Field::new(in_idx.into()))
+                    x.value_lane(fx, in_idx.into())
                 } else {
-                    y.value_field(fx, mir::Field::new(usize::from(in_idx) - usize::try_from(lane_count).unwrap()))
+                    y.value_lane(fx, u64::from(in_idx) - lane_count)
                 };
-                let out_lane = ret.place_field(fx, mir::Field::new(out_idx));
+                let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap());
                 out_lane.write_cvalue(fx, in_lane);
             }
         };
@@ -163,10 +163,38 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                 fx.tcx.sess.span_fatal(fx.mir.span, &format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count));
             }
 
-            let ret_lane = v.value_field(fx, mir::Field::new(idx.try_into().unwrap()));
+            let ret_lane = v.value_lane(fx, idx.try_into().unwrap());
             ret.write_cvalue(fx, ret_lane);
         };
 
+        simd_neg, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
+                let ret_lane = match lane_layout.ty.kind() {
+                    ty::Int(_) => fx.bcx.ins().ineg(lane),
+                    ty::Float(_) => fx.bcx.ins().fneg(lane),
+                    _ => unreachable!(),
+                };
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
+        simd_fabs, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().fabs(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
+        simd_fsqrt, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().sqrt(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
         simd_add, (c x, c y) {
             validate_simd_type!(fx, intrinsic, span, x.layout().ty);
             simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret);
@@ -183,6 +211,29 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             validate_simd_type!(fx, intrinsic, span, x.layout().ty);
             simd_int_flt_binop!(fx, udiv|sdiv|fdiv(x, y) -> ret);
         };
+        simd_rem, (c x, c y) {
+            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+                let res_lane = match lane_layout.ty.kind() {
+                    ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
+                    ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane),
+                    ty::Float(FloatTy::F32) => fx.lib_call(
+                        "fmodf",
+                        vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
+                        vec![AbiParam::new(types::F32)],
+                        &[x_lane, y_lane],
+                    )[0],
+                    ty::Float(FloatTy::F64) => fx.lib_call(
+                        "fmod",
+                        vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
+                        vec![AbiParam::new(types::F64)],
+                        &[x_lane, y_lane],
+                    )[0],
+                    _ => unreachable!("{:?}", lane_layout.ty),
+                };
+                CValue::by_val(res_lane, ret_lane_layout)
+            });
+        };
         simd_shl, (c x, c y) {
             validate_simd_type!(fx, intrinsic, span, x.layout().ty);
             simd_int_binop!(fx, ishl(x, y) -> ret);
@@ -216,15 +267,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             let ret_lane_layout = fx.layout_of(ret_lane_ty);
 
             for lane in 0..lane_count {
-                let lane = mir::Field::new(lane.try_into().unwrap());
-                let a_lane = a.value_field(fx, lane).load_scalar(fx);
-                let b_lane = b.value_field(fx, lane).load_scalar(fx);
-                let c_lane = c.value_field(fx, lane).load_scalar(fx);
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
+                let c_lane = c.value_lane(fx, lane).load_scalar(fx);
 
                 let mul_lane = fx.bcx.ins().fmul(a_lane, b_lane);
                 let res_lane = CValue::by_val(fx.bcx.ins().fadd(mul_lane, c_lane), ret_lane_layout);
 
-                ret.place_field(fx, lane).write_cvalue(fx, res_lane);
+                ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
             }
         };
 
@@ -237,9 +287,52 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             simd_flt_binop!(fx, fmax(x, y) -> ret);
         };
 
-        simd_reduce_add_ordered | simd_reduce_add_unordered, (c v) {
+        simd_round, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
+                let res_lane = match lane_layout.ty.kind() {
+                    ty::Float(FloatTy::F32) => fx.lib_call(
+                        "roundf",
+                        vec![AbiParam::new(types::F32)],
+                        vec![AbiParam::new(types::F32)],
+                        &[lane],
+                    )[0],
+                    ty::Float(FloatTy::F64) => fx.lib_call(
+                        "round",
+                        vec![AbiParam::new(types::F64)],
+                        vec![AbiParam::new(types::F64)],
+                        &[lane],
+                    )[0],
+                    _ => unreachable!("{:?}", lane_layout.ty),
+                };
+                CValue::by_val(res_lane, ret_lane_layout)
+            });
+        };
+        simd_ceil, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().ceil(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+        simd_floor, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().floor(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+        simd_trunc, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().trunc(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
+        simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) {
             validate_simd_type!(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
+            simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
                 if lane_layout.ty.is_floating_point() {
                     fx.bcx.ins().fadd(a, b)
                 } else {
@@ -248,9 +341,9 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         };
 
-        simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v) {
+        simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) {
             validate_simd_type!(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
+            simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
                 if lane_layout.ty.is_floating_point() {
                     fx.bcx.ins().fmul(a, b)
                 } else {
@@ -269,13 +362,68 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
         };
 
-        // simd_fabs
-        // simd_saturating_add
+        simd_reduce_and, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b));
+        };
+
+        simd_reduce_or, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b));
+        };
+
+        simd_reduce_xor, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
+        };
+
+        simd_reduce_min, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
+                let lt = fx.bcx.ins().icmp(if layout.ty.is_signed() {
+                    IntCC::SignedLessThan
+                } else {
+                    IntCC::UnsignedLessThan
+                }, a, b);
+                fx.bcx.ins().select(lt, a, b)
+            });
+        };
+
+        simd_reduce_max, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
+                let gt = fx.bcx.ins().icmp(if layout.ty.is_signed() {
+                    IntCC::SignedGreaterThan
+                } else {
+                    IntCC::UnsignedGreaterThan
+                }, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        };
+
+        simd_select, (c m, c a, c b) {
+            validate_simd_type!(fx, intrinsic, span, m.layout().ty);
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            assert_eq!(a.layout(), b.layout());
+
+            let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
+            let lane_layout = fx.layout_of(lane_ty);
+
+            for lane in 0..lane_count {
+                let m_lane = m.value_lane(fx, lane).load_scalar(fx);
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
+
+                let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
+                let res_lane = CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
+
+                ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
+            }
+        };
+
+        // simd_saturating_*
         // simd_bitmask
-        // simd_select
-        // simd_rem
-        // simd_neg
-        // simd_trunc
-        // simd_floor
+        // simd_scatter
+        // simd_gather
     }
 }
diff --git a/src/value_and_place.rs b/src/value_and_place.rs
index ae8ccc626b4..b1e00360d56 100644
--- a/src/value_and_place.rs
+++ b/src/value_and_place.rs
@@ -206,6 +206,38 @@ impl<'tcx> CValue<'tcx> {
         }
     }
 
+    /// Like [`CValue::value_field`] except handling ADTs containing a single array field in a way
+    /// such that you can access individual lanes.
+    pub(crate) fn value_lane(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: u64,
+    ) -> CValue<'tcx> {
+        let layout = self.1;
+        assert!(layout.ty.is_simd());
+        let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert!(lane_idx < lane_count);
+        match self.0 {
+            CValueInner::ByVal(val) => match layout.abi {
+                Abi::Vector { element: _, count: _ } => {
+                    assert!(lane_count <= u8::MAX.into(), "SIMD type with more than 255 lanes???");
+                    let lane_idx = u8::try_from(lane_idx).unwrap();
+                    let lane = fx.bcx.ins().extractlane(val, lane_idx);
+                    CValue::by_val(lane, lane_layout)
+                }
+                _ => unreachable!("value_lane for ByVal with abi {:?}", layout.abi),
+            },
+            CValueInner::ByValPair(_, _) => unreachable!(),
+            CValueInner::ByRef(ptr, None) => {
+                let field_offset = lane_layout.size * lane_idx;
+                let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
+                CValue::by_ref(field_ptr, lane_layout)
+            }
+            CValueInner::ByRef(_, Some(_)) => unreachable!(),
+        }
+    }
+
     pub(crate) fn unsize_value(self, fx: &mut FunctionCx<'_, '_, 'tcx>, dest: CPlace<'tcx>) {
         crate::unsize::coerce_unsized_into(fx, self, dest);
     }
@@ -610,6 +642,38 @@ impl<'tcx> CPlace<'tcx> {
         }
     }
 
+    /// Like [`CPlace::place_field`] except handling ADTs containing a single array field in a way
+    /// such that you can access individual lanes.
+    pub(crate) fn place_lane(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: u64,
+    ) -> CPlace<'tcx> {
+        let layout = self.layout();
+        assert!(layout.ty.is_simd());
+        let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert!(lane_idx < lane_count);
+
+        match self.inner {
+            CPlaceInner::Var(local, var) => {
+                assert!(matches!(layout.abi, Abi::Vector { .. }));
+                CPlace {
+                    inner: CPlaceInner::VarLane(local, var, lane_idx.try_into().unwrap()),
+                    layout: lane_layout,
+                }
+            }
+            CPlaceInner::VarPair(_, _, _) => unreachable!(),
+            CPlaceInner::VarLane(_, _, _) => unreachable!(),
+            CPlaceInner::Addr(ptr, None) => {
+                let field_offset = lane_layout.size * lane_idx;
+                let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
+                CPlace::for_ptr(field_ptr, lane_layout)
+            }
+            CPlaceInner::Addr(_, Some(_)) => unreachable!(),
+        }
+    }
+
     pub(crate) fn place_index(
         self,
         fx: &mut FunctionCx<'_, '_, 'tcx>,
author	bjorn3 <bjorn3@users.noreply.github.com>	2021-07-26 11:09:13 +0200
committer	GitHub <noreply@github.com>	2021-07-26 11:09:13 +0200
commit	1f7080246562a7dd3c3dee006d1fc7754029c8bc (patch)
tree	7035b0cbcbf7bc59ee72b128c9b71032325ffe54 /src
parent	356360836e128e1d1eb11caf6ff5186efb211960 (diff)
parent	581e38b97c8431be06e74a5c8c22c8e1e78cf368 (diff)
download	rust-1f7080246562a7dd3c3dee006d1fc7754029c8bc.tar.gz rust-1f7080246562a7dd3c3dee006d1fc7754029c8bc.zip