diff options
| author | bjorn3 <bjorn3@users.noreply.github.com> | 2021-07-26 11:09:13 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-07-26 11:09:13 +0200 |
| commit | 1f7080246562a7dd3c3dee006d1fc7754029c8bc (patch) | |
| tree | 7035b0cbcbf7bc59ee72b128c9b71032325ffe54 /src | |
| parent | 356360836e128e1d1eb11caf6ff5186efb211960 (diff) | |
| parent | 581e38b97c8431be06e74a5c8c22c8e1e78cf368 (diff) | |
| download | rust-1f7080246562a7dd3c3dee006d1fc7754029c8bc.tar.gz rust-1f7080246562a7dd3c3dee006d1fc7754029c8bc.zip | |
Merge pull request #1189 from bjorn3/stdsimd_fixes
Improve stdsimd support
Diffstat (limited to 'src')
| -rw-r--r-- | src/intrinsics/mod.rs | 40 | ||||
| -rw-r--r-- | src/intrinsics/simd.rs | 188 | ||||
| -rw-r--r-- | src/value_and_place.rs | 64 |
3 files changed, 256 insertions, 36 deletions
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 42483230fdc..17c79645eb2 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -175,12 +175,11 @@ fn simd_for_each_lane<'tcx>( assert_eq!(lane_count, ret_lane_count); for lane_idx in 0..lane_count { - let lane_idx = mir::Field::new(lane_idx.try_into().unwrap()); - let lane = val.value_field(fx, lane_idx).load_scalar(fx); + let lane = val.value_lane(fx, lane_idx).load_scalar(fx); let res_lane = f(fx, lane_layout, ret_lane_layout, lane); - ret.place_field(fx, lane_idx).write_cvalue(fx, res_lane); + ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); } } @@ -206,20 +205,20 @@ fn simd_pair_for_each_lane<'tcx>( let ret_lane_layout = fx.layout_of(ret_lane_ty); assert_eq!(lane_count, ret_lane_count); - for lane in 0..lane_count { - let lane = mir::Field::new(lane.try_into().unwrap()); - let x_lane = x.value_field(fx, lane).load_scalar(fx); - let y_lane = y.value_field(fx, lane).load_scalar(fx); + for lane_idx in 0..lane_count { + let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx); + let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx); let res_lane = f(fx, lane_layout, ret_lane_layout, x_lane, y_lane); - ret.place_field(fx, lane).write_cvalue(fx, res_lane); + ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); } } fn simd_reduce<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, val: CValue<'tcx>, + acc: Option<Value>, ret: CPlace<'tcx>, f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value, ) { @@ -227,16 +226,17 @@ fn simd_reduce<'tcx>( let lane_layout = fx.layout_of(lane_ty); assert_eq!(lane_layout, ret.layout()); - let mut res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx); - for lane_idx in 1..lane_count { - let lane = - val.value_field(fx, mir::Field::new(lane_idx.try_into().unwrap())).load_scalar(fx); + let (mut res_val, start_lane) = + if let Some(acc) = acc { (acc, 0) } else { (val.value_lane(fx, 0).load_scalar(fx), 1) }; + for lane_idx in start_lane..lane_count { + let lane = val.value_lane(fx, lane_idx).load_scalar(fx); res_val = f(fx, lane_layout, res_val, lane); } let res = CValue::by_val(res_val, lane_layout); ret.write_cvalue(fx, res); } +// FIXME move all uses to `simd_reduce` fn simd_reduce_bool<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, val: CValue<'tcx>, @@ -246,14 +246,18 @@ fn simd_reduce_bool<'tcx>( let (lane_count, _lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx); assert!(ret.layout().ty.is_bool()); - let res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx); + let res_val = val.value_lane(fx, 0).load_scalar(fx); let mut res_val = fx.bcx.ins().band_imm(res_val, 1); // mask to boolean for lane_idx in 1..lane_count { - let lane = - val.value_field(fx, mir::Field::new(lane_idx.try_into().unwrap())).load_scalar(fx); + let lane = val.value_lane(fx, lane_idx).load_scalar(fx); let lane = fx.bcx.ins().band_imm(lane, 1); // mask to boolean res_val = f(fx, res_val, lane); } + let res_val = if fx.bcx.func.dfg.value_type(res_val) != types::I8 { + fx.bcx.ins().ireduce(types::I8, res_val) + } else { + res_val + }; let res = CValue::by_val(res_val, ret.layout()); ret.write_cvalue(fx, res); } @@ -288,7 +292,11 @@ macro simd_cmp { if let Some(vector_ty) = vector_ty { let x = $x.load_scalar($fx); let y = $y.load_scalar($fx); - let val = $fx.bcx.ins().icmp(IntCC::$cc, x, y); + let val = if vector_ty.lane_type().is_float() { + $fx.bcx.ins().fcmp(FloatCC::$cc_f, x, y) + } else { + $fx.bcx.ins().icmp(IntCC::$cc, x, y) + }; // HACK This depends on the fact that icmp for vectors represents bools as 0 and !0, not 0 and 1. let val = $fx.bcx.ins().raw_bitcast(vector_ty, val); diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs index c2f469fa021..37906ab472d 100644 --- a/src/intrinsics/simd.rs +++ b/src/intrinsics/simd.rs @@ -108,11 +108,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( for (out_idx, in_idx) in indexes.into_iter().enumerate() { let in_lane = if u64::from(in_idx) < lane_count { - x.value_field(fx, mir::Field::new(in_idx.into())) + x.value_lane(fx, in_idx.into()) } else { - y.value_field(fx, mir::Field::new(usize::from(in_idx) - usize::try_from(lane_count).unwrap())) + y.value_lane(fx, u64::from(in_idx) - lane_count) }; - let out_lane = ret.place_field(fx, mir::Field::new(out_idx)); + let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap()); out_lane.write_cvalue(fx, in_lane); } }; @@ -163,10 +163,38 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( fx.tcx.sess.span_fatal(fx.mir.span, &format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count)); } - let ret_lane = v.value_field(fx, mir::Field::new(idx.try_into().unwrap())); + let ret_lane = v.value_lane(fx, idx.try_into().unwrap()); ret.write_cvalue(fx, ret_lane); }; + simd_neg, (c a) { + validate_simd_type!(fx, intrinsic, span, a.layout().ty); + simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| { + let ret_lane = match lane_layout.ty.kind() { + ty::Int(_) => fx.bcx.ins().ineg(lane), + ty::Float(_) => fx.bcx.ins().fneg(lane), + _ => unreachable!(), + }; + CValue::by_val(ret_lane, ret_lane_layout) + }); + }; + + simd_fabs, (c a) { + validate_simd_type!(fx, intrinsic, span, a.layout().ty); + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { + let ret_lane = fx.bcx.ins().fabs(lane); + CValue::by_val(ret_lane, ret_lane_layout) + }); + }; + + simd_fsqrt, (c a) { + validate_simd_type!(fx, intrinsic, span, a.layout().ty); + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { + let ret_lane = fx.bcx.ins().sqrt(lane); + CValue::by_val(ret_lane, ret_lane_layout) + }); + }; + simd_add, (c x, c y) { validate_simd_type!(fx, intrinsic, span, x.layout().ty); simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret); @@ -183,6 +211,29 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( validate_simd_type!(fx, intrinsic, span, x.layout().ty); simd_int_flt_binop!(fx, udiv|sdiv|fdiv(x, y) -> ret); }; + simd_rem, (c x, c y) { + validate_simd_type!(fx, intrinsic, span, x.layout().ty); + simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane), + ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane), + ty::Float(FloatTy::F32) => fx.lib_call( + "fmodf", + vec![AbiParam::new(types::F32), AbiParam::new(types::F32)], + vec![AbiParam::new(types::F32)], + &[x_lane, y_lane], + )[0], + ty::Float(FloatTy::F64) => fx.lib_call( + "fmod", + vec![AbiParam::new(types::F64), AbiParam::new(types::F64)], + vec![AbiParam::new(types::F64)], + &[x_lane, y_lane], + )[0], + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }); + }; simd_shl, (c x, c y) { validate_simd_type!(fx, intrinsic, span, x.layout().ty); simd_int_binop!(fx, ishl(x, y) -> ret); @@ -216,15 +267,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( let ret_lane_layout = fx.layout_of(ret_lane_ty); for lane in 0..lane_count { - let lane = mir::Field::new(lane.try_into().unwrap()); - let a_lane = a.value_field(fx, lane).load_scalar(fx); - let b_lane = b.value_field(fx, lane).load_scalar(fx); - let c_lane = c.value_field(fx, lane).load_scalar(fx); + let a_lane = a.value_lane(fx, lane).load_scalar(fx); + let b_lane = b.value_lane(fx, lane).load_scalar(fx); + let c_lane = c.value_lane(fx, lane).load_scalar(fx); let mul_lane = fx.bcx.ins().fmul(a_lane, b_lane); let res_lane = CValue::by_val(fx.bcx.ins().fadd(mul_lane, c_lane), ret_lane_layout); - ret.place_field(fx, lane).write_cvalue(fx, res_lane); + ret.place_lane(fx, lane).write_cvalue(fx, res_lane); } }; @@ -237,9 +287,52 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_flt_binop!(fx, fmax(x, y) -> ret); }; - simd_reduce_add_ordered | simd_reduce_add_unordered, (c v) { + simd_round, (c a) { + validate_simd_type!(fx, intrinsic, span, a.layout().ty); + simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| { + let res_lane = match lane_layout.ty.kind() { + ty::Float(FloatTy::F32) => fx.lib_call( + "roundf", + vec![AbiParam::new(types::F32)], + vec![AbiParam::new(types::F32)], + &[lane], + )[0], + ty::Float(FloatTy::F64) => fx.lib_call( + "round", + vec![AbiParam::new(types::F64)], + vec![AbiParam::new(types::F64)], + &[lane], + )[0], + _ => unreachable!("{:?}", lane_layout.ty), + }; + CValue::by_val(res_lane, ret_lane_layout) + }); + }; + simd_ceil, (c a) { + validate_simd_type!(fx, intrinsic, span, a.layout().ty); + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { + let ret_lane = fx.bcx.ins().ceil(lane); + CValue::by_val(ret_lane, ret_lane_layout) + }); + }; + simd_floor, (c a) { + validate_simd_type!(fx, intrinsic, span, a.layout().ty); + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { + let ret_lane = fx.bcx.ins().floor(lane); + CValue::by_val(ret_lane, ret_lane_layout) + }); + }; + simd_trunc, (c a) { + validate_simd_type!(fx, intrinsic, span, a.layout().ty); + simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| { + let ret_lane = fx.bcx.ins().trunc(lane); + CValue::by_val(ret_lane, ret_lane_layout) + }); + }; + + simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) { validate_simd_type!(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, ret, |fx, lane_layout, a, b| { + simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| { if lane_layout.ty.is_floating_point() { fx.bcx.ins().fadd(a, b) } else { @@ -248,9 +341,9 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }); }; - simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v) { + simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) { validate_simd_type!(fx, intrinsic, span, v.layout().ty); - simd_reduce(fx, v, ret, |fx, lane_layout, a, b| { + simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| { if lane_layout.ty.is_floating_point() { fx.bcx.ins().fmul(a, b) } else { @@ -269,13 +362,68 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b)); }; - // simd_fabs - // simd_saturating_add + simd_reduce_and, (c v) { + validate_simd_type!(fx, intrinsic, span, v.layout().ty); + simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b)); + }; + + simd_reduce_or, (c v) { + validate_simd_type!(fx, intrinsic, span, v.layout().ty); + simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b)); + }; + + simd_reduce_xor, (c v) { + validate_simd_type!(fx, intrinsic, span, v.layout().ty); + simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b)); + }; + + simd_reduce_min, (c v) { + validate_simd_type!(fx, intrinsic, span, v.layout().ty); + simd_reduce(fx, v, None, ret, |fx, layout, a, b| { + let lt = fx.bcx.ins().icmp(if layout.ty.is_signed() { + IntCC::SignedLessThan + } else { + IntCC::UnsignedLessThan + }, a, b); + fx.bcx.ins().select(lt, a, b) + }); + }; + + simd_reduce_max, (c v) { + validate_simd_type!(fx, intrinsic, span, v.layout().ty); + simd_reduce(fx, v, None, ret, |fx, layout, a, b| { + let gt = fx.bcx.ins().icmp(if layout.ty.is_signed() { + IntCC::SignedGreaterThan + } else { + IntCC::UnsignedGreaterThan + }, a, b); + fx.bcx.ins().select(gt, a, b) + }); + }; + + simd_select, (c m, c a, c b) { + validate_simd_type!(fx, intrinsic, span, m.layout().ty); + validate_simd_type!(fx, intrinsic, span, a.layout().ty); + assert_eq!(a.layout(), b.layout()); + + let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); + let lane_layout = fx.layout_of(lane_ty); + + for lane in 0..lane_count { + let m_lane = m.value_lane(fx, lane).load_scalar(fx); + let a_lane = a.value_lane(fx, lane).load_scalar(fx); + let b_lane = b.value_lane(fx, lane).load_scalar(fx); + + let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0); + let res_lane = CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout); + + ret.place_lane(fx, lane).write_cvalue(fx, res_lane); + } + }; + + // simd_saturating_* // simd_bitmask - // simd_select - // simd_rem - // simd_neg - // simd_trunc - // simd_floor + // simd_scatter + // simd_gather } } diff --git a/src/value_and_place.rs b/src/value_and_place.rs index ae8ccc626b4..b1e00360d56 100644 --- a/src/value_and_place.rs +++ b/src/value_and_place.rs @@ -206,6 +206,38 @@ impl<'tcx> CValue<'tcx> { } } + /// Like [`CValue::value_field`] except handling ADTs containing a single array field in a way + /// such that you can access individual lanes. + pub(crate) fn value_lane( + self, + fx: &mut FunctionCx<'_, '_, 'tcx>, + lane_idx: u64, + ) -> CValue<'tcx> { + let layout = self.1; + assert!(layout.ty.is_simd()); + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let lane_layout = fx.layout_of(lane_ty); + assert!(lane_idx < lane_count); + match self.0 { + CValueInner::ByVal(val) => match layout.abi { + Abi::Vector { element: _, count: _ } => { + assert!(lane_count <= u8::MAX.into(), "SIMD type with more than 255 lanes???"); + let lane_idx = u8::try_from(lane_idx).unwrap(); + let lane = fx.bcx.ins().extractlane(val, lane_idx); + CValue::by_val(lane, lane_layout) + } + _ => unreachable!("value_lane for ByVal with abi {:?}", layout.abi), + }, + CValueInner::ByValPair(_, _) => unreachable!(), + CValueInner::ByRef(ptr, None) => { + let field_offset = lane_layout.size * lane_idx; + let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap()); + CValue::by_ref(field_ptr, lane_layout) + } + CValueInner::ByRef(_, Some(_)) => unreachable!(), + } + } + pub(crate) fn unsize_value(self, fx: &mut FunctionCx<'_, '_, 'tcx>, dest: CPlace<'tcx>) { crate::unsize::coerce_unsized_into(fx, self, dest); } @@ -610,6 +642,38 @@ impl<'tcx> CPlace<'tcx> { } } + /// Like [`CPlace::place_field`] except handling ADTs containing a single array field in a way + /// such that you can access individual lanes. + pub(crate) fn place_lane( + self, + fx: &mut FunctionCx<'_, '_, 'tcx>, + lane_idx: u64, + ) -> CPlace<'tcx> { + let layout = self.layout(); + assert!(layout.ty.is_simd()); + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let lane_layout = fx.layout_of(lane_ty); + assert!(lane_idx < lane_count); + + match self.inner { + CPlaceInner::Var(local, var) => { + assert!(matches!(layout.abi, Abi::Vector { .. })); + CPlace { + inner: CPlaceInner::VarLane(local, var, lane_idx.try_into().unwrap()), + layout: lane_layout, + } + } + CPlaceInner::VarPair(_, _, _) => unreachable!(), + CPlaceInner::VarLane(_, _, _) => unreachable!(), + CPlaceInner::Addr(ptr, None) => { + let field_offset = lane_layout.size * lane_idx; + let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap()); + CPlace::for_ptr(field_ptr, lane_layout) + } + CPlaceInner::Addr(_, Some(_)) => unreachable!(), + } + } + pub(crate) fn place_index( self, fx: &mut FunctionCx<'_, '_, 'tcx>, |
