diff options
| author | Scott McMurray <scottmcm@users.noreply.github.com> | 2021-05-30 10:25:41 -0700 |
|---|---|---|
| committer | Scott McMurray <scottmcm@users.noreply.github.com> | 2021-07-08 14:55:54 -0700 |
| commit | 2456495a260827217d3c612d6c577c2f165c61eb (patch) | |
| tree | e85ffd47a15f93a5b6f0a6324bb8b747659a2448 | |
| parent | d05eafae2fcc05bd64ab094a1352a5c16df3106e (diff) | |
| download | rust-2456495a260827217d3c612d6c577c2f165c61eb.tar.gz rust-2456495a260827217d3c612d6c577c2f165c61eb.zip | |
Stop generating `alloca`s+`memcmp` for simple array equality
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/context.rs | 5 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/intrinsic.rs | 26 | ||||
| -rw-r--r-- | compiler/rustc_mir/src/interpret/intrinsics.rs | 18 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_typeck/src/check/intrinsic.rs | 8 | ||||
| -rw-r--r-- | library/core/src/array/equality.rs | 53 | ||||
| -rw-r--r-- | library/core/src/intrinsics.rs | 16 | ||||
| -rw-r--r-- | src/test/codegen/array-equality.rs | 36 | ||||
| -rw-r--r-- | src/test/codegen/slice-ref-equality.rs | 19 | ||||
| -rw-r--r-- | src/test/ui/intrinsics/intrinsic-raw_eq-const-padding.rs | 12 | ||||
| -rw-r--r-- | src/test/ui/intrinsics/intrinsic-raw_eq-const-padding.stderr | 21 | ||||
| -rw-r--r-- | src/test/ui/intrinsics/intrinsic-raw_eq-const.rs | 27 |
12 files changed, 238 insertions, 4 deletions
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index f662887abf8..d1aecd32e2f 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -500,6 +500,7 @@ impl CodegenCx<'b, 'tcx> { let t_i32 = self.type_i32(); let t_i64 = self.type_i64(); let t_i128 = self.type_i128(); + let t_isize = self.type_isize(); let t_f32 = self.type_f32(); let t_f64 = self.type_f64(); @@ -712,6 +713,10 @@ impl CodegenCx<'b, 'tcx> { ifn!("llvm.assume", fn(i1) -> void); ifn!("llvm.prefetch", fn(i8p, t_i32, t_i32, t_i32) -> void); + // This isn't an "LLVM intrinsic", but LLVM's optimization passes + // recognize it like one and we assume it exists in `core::slice::cmp` + ifn!("memcmp", fn(i8p, i8p, t_isize) -> t_i32); + // variadic intrinsics ifn!("llvm.va_start", fn(i8p) -> void); ifn!("llvm.va_end", fn(i8p) -> void); diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 1fb201eda6b..615295e96e1 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -296,6 +296,32 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> { } } + sym::raw_eq => { + let tp_ty = substs.type_at(0); + let (size, align) = self.size_and_align_of(tp_ty); + let a = args[0].immediate(); + let b = args[1].immediate(); + if size.bytes() == 0 { + self.const_bool(true) + } else if size > self.data_layout().pointer_size * 4 { + let i8p_ty = self.type_i8p(); + let a_ptr = self.bitcast(a, i8p_ty); + let b_ptr = self.bitcast(b, i8p_ty); + let n = self.const_usize(size.bytes()); + let llfn = self.get_intrinsic("memcmp"); + let cmp = self.call(llfn, &[a_ptr, b_ptr, n], None); + self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0)) + } else { + let integer_ty = self.type_ix(size.bits()); + let ptr_ty = self.type_ptr_to(integer_ty); + let a_ptr = self.bitcast(a, ptr_ty); + let a_val = self.load(a_ptr, align); + let b_ptr = self.bitcast(b, ptr_ty); + let b_val = self.load(b_ptr, align); + self.icmp(IntPredicate::IntEQ, a_val, b_val) + } + } + _ if name_str.starts_with("simd_") => { match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) { Ok(llval) => llval, diff --git a/compiler/rustc_mir/src/interpret/intrinsics.rs b/compiler/rustc_mir/src/interpret/intrinsics.rs index 4e4166dad50..5dd679b8912 100644 --- a/compiler/rustc_mir/src/interpret/intrinsics.rs +++ b/compiler/rustc_mir/src/interpret/intrinsics.rs @@ -472,6 +472,10 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { throw_ub_format!("`assume` intrinsic called with `false`"); } } + sym::raw_eq => { + let result = self.raw_eq_intrinsic(&args[0], &args[1])?; + self.write_scalar(result, dest)?; + } _ => return Ok(false), } @@ -559,4 +563,18 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { self.memory.copy(src, align, dst, align, size, nonoverlapping) } + + pub(crate) fn raw_eq_intrinsic( + &mut self, + lhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::PointerTag>, + rhs: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::PointerTag>, + ) -> InterpResult<'tcx, Scalar<M::PointerTag>> { + let layout = self.layout_of(lhs.layout.ty.builtin_deref(true).unwrap().ty)?; + + let lhs = self.read_scalar(lhs)?.check_init()?; + let rhs = self.read_scalar(rhs)?.check_init()?; + let lhs_bytes = self.memory.read_bytes(lhs, layout.size)?; + let rhs_bytes = self.memory.read_bytes(rhs, layout.size)?; + Ok(Scalar::Int((lhs_bytes == rhs_bytes).into())) + } } diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index edb97d70517..3ab32fe418d 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -933,6 +933,7 @@ symbols! { quote, range_inclusive_new, raw_dylib, + raw_eq, raw_identifiers, raw_ref_op, re_rebalance_coherence, diff --git a/compiler/rustc_typeck/src/check/intrinsic.rs b/compiler/rustc_typeck/src/check/intrinsic.rs index 882d5d54b7c..18ccaf79d32 100644 --- a/compiler/rustc_typeck/src/check/intrinsic.rs +++ b/compiler/rustc_typeck/src/check/intrinsic.rs @@ -380,6 +380,14 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) { sym::nontemporal_store => (1, vec![tcx.mk_mut_ptr(param(0)), param(0)], tcx.mk_unit()), + sym::raw_eq => { + let param_count = if intrinsic_name == sym::raw_eq { 2 } else { 1 }; + let br = ty::BoundRegion { var: ty::BoundVar::from_u32(0), kind: ty::BrAnon(0) }; + let param_ty = + tcx.mk_imm_ref(tcx.mk_region(ty::ReLateBound(ty::INNERMOST, br)), param(0)); + (1, vec![param_ty; param_count], tcx.types.bool) + } + other => { tcx.sess.emit_err(UnrecognizedIntrinsicFunction { span: it.span, name: other }); return; diff --git a/library/core/src/array/equality.rs b/library/core/src/array/equality.rs index dcd78e7a245..6d66b9e2f27 100644 --- a/library/core/src/array/equality.rs +++ b/library/core/src/array/equality.rs @@ -5,11 +5,11 @@ where { #[inline] fn eq(&self, other: &[B; N]) -> bool { - self[..] == other[..] + SpecArrayEq::spec_eq(self, other) } #[inline] fn ne(&self, other: &[B; N]) -> bool { - self[..] != other[..] + SpecArrayEq::spec_ne(self, other) } } @@ -109,3 +109,52 @@ where #[stable(feature = "rust1", since = "1.0.0")] impl<T: Eq, const N: usize> Eq for [T; N] {} + +trait SpecArrayEq<Other, const N: usize>: Sized { + fn spec_eq(a: &[Self; N], b: &[Other; N]) -> bool; + fn spec_ne(a: &[Self; N], b: &[Other; N]) -> bool; +} + +impl<T: PartialEq<Other>, Other, const N: usize> SpecArrayEq<Other, N> for T { + default fn spec_eq(a: &[Self; N], b: &[Other; N]) -> bool { + a[..] == b[..] + } + default fn spec_ne(a: &[Self; N], b: &[Other; N]) -> bool { + a[..] != b[..] + } +} + +impl<T: PartialEq<U> + IsRawEqComparable<U>, U, const N: usize> SpecArrayEq<U, N> for T { + #[cfg(bootstrap)] + fn spec_eq(a: &[T; N], b: &[U; N]) -> bool { + a[..] == b[..] + } + #[cfg(not(bootstrap))] + fn spec_eq(a: &[T; N], b: &[U; N]) -> bool { + // SAFETY: This is why `IsRawEqComparable` is an `unsafe trait`. + unsafe { + let b = &*b.as_ptr().cast::<[T; N]>(); + crate::intrinsics::raw_eq(a, b) + } + } + fn spec_ne(a: &[T; N], b: &[U; N]) -> bool { + !Self::spec_eq(a, b) + } +} + +/// `U` exists on here mostly because `min_specialization` didn't let me +/// repeat the `T` type parameter in the above specialization, so instead +/// the `T == U` constraint comes from the impls on this. +/// # Safety +/// - Neither `Self` nor `U` has any padding. +/// - `Self` and `U` have the same layout. +/// - `Self: PartialEq<U>` is byte-wise (this means no floats, among other things) +#[rustc_specialization_trait] +unsafe trait IsRawEqComparable<U> {} + +macro_rules! is_raw_comparable { + ($($t:ty),+) => {$( + unsafe impl IsRawEqComparable<$t> for $t {} + )+}; +} +is_raw_comparable!(bool, char, u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize); diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs index c5a4bbd3208..7d2c278aa05 100644 --- a/library/core/src/intrinsics.rs +++ b/library/core/src/intrinsics.rs @@ -1913,6 +1913,22 @@ extern "rust-intrinsic" { /// Allocate at compile time. Should not be called at runtime. #[rustc_const_unstable(feature = "const_heap", issue = "79597")] pub fn const_allocate(size: usize, align: usize) -> *mut u8; + + /// Determines whether the raw bytes of the two values are equal. + /// + /// The is particularly handy for arrays, since it allows things like just + /// comparing `i96`s instead of forcing `alloca`s for `[6 x i16]`. + /// + /// Above some backend-decided threshold this will emit calls to `memcmp`, + /// like slice equality does, instead of causing massive code size. + /// + /// # Safety + /// + /// This doesn't take into account padding, so if `T` has padding + /// the result will be `undef`, which cannot be exposed to safe code. + #[cfg(not(bootstrap))] + #[rustc_const_unstable(feature = "const_intrinsic_raw_eq", issue = "none")] + pub fn raw_eq<T>(a: &T, b: &T) -> bool; } // Some functions are defined here because they accidentally got made diff --git a/src/test/codegen/array-equality.rs b/src/test/codegen/array-equality.rs new file mode 100644 index 00000000000..6a9fb5c8f86 --- /dev/null +++ b/src/test/codegen/array-equality.rs @@ -0,0 +1,36 @@ +// compile-flags: -O +// only-x86_64 + +#![crate_type = "lib"] + +// CHECK-LABEL: @array_eq_value +#[no_mangle] +pub fn array_eq_value(a: [u16; 6], b: [u16; 6]) -> bool { + // CHECK-NEXT: start: + // CHECK-NEXT: %2 = icmp eq i96 %0, %1 + // CHECK-NEXT: ret i1 %2 + a == b +} + +// CHECK-LABEL: @array_eq_ref +#[no_mangle] +pub fn array_eq_ref(a: &[u16; 6], b: &[u16; 6]) -> bool { + // CHECK: start: + // CHECK: load i96, i96* %{{.+}}, align 2 + // CHECK: load i96, i96* %{{.+}}, align 2 + // CHECK: icmp eq i96 + // CHECK-NEXT: ret + a == b +} + +// CHECK-LABEL: @array_eq_long +#[no_mangle] +pub fn array_eq_long(a: &[u16; 1234], b: &[u16; 1234]) -> bool { + // CHECK-NEXT: start: + // CHECK-NEXT: bitcast + // CHECK-NEXT: bitcast + // CHECK-NEXT: %[[CMP:.+]] = tail call i32 @{{bcmp|memcmp}}(i8* nonnull dereferenceable(2468) %{{.+}}, i8* nonnull dereferenceable(2468) %{{.+}}, i64 2468) + // CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[CMP]], 0 + // CHECK-NEXT: ret i1 %[[EQ]] + a == b +} diff --git a/src/test/codegen/slice-ref-equality.rs b/src/test/codegen/slice-ref-equality.rs index acc7879e7b1..1f99ac7342b 100644 --- a/src/test/codegen/slice-ref-equality.rs +++ b/src/test/codegen/slice-ref-equality.rs @@ -2,15 +2,30 @@ #![crate_type = "lib"] -// #71602: check that slice equality just generates a single bcmp +// #71602 reported a simple array comparison just generating a loop. +// This was originally fixed by ensuring it generates a single bcmp, +// but we now generate it as a load instead. `is_zero_slice` was +// tweaked to still test the case of comparison against a slice, +// and `is_zero_array` tests the new array-specific behaviour. // CHECK-LABEL: @is_zero_slice #[no_mangle] pub fn is_zero_slice(data: &[u8; 4]) -> bool { - // CHECK: start: + // CHECK: : // CHECK-NEXT: %{{.+}} = getelementptr {{.+}} // CHECK-NEXT: %[[BCMP:.+]] = tail call i32 @{{bcmp|memcmp}}({{.+}}) // CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[BCMP]], 0 // CHECK-NEXT: ret i1 %[[EQ]] + &data[..] == [0; 4] +} + +// CHECK-LABEL: @is_zero_array +#[no_mangle] +pub fn is_zero_array(data: &[u8; 4]) -> bool { + // CHECK: start: + // CHECK-NEXT: %[[PTR:.+]] = bitcast [4 x i8]* {{.+}} to i32* + // CHECK-NEXT: %[[LOAD:.+]] = load i32, i32* %[[PTR]], align 1 + // CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[LOAD]], 0 + // CHECK-NEXT: ret i1 %[[EQ]] *data == [0; 4] } diff --git a/src/test/ui/intrinsics/intrinsic-raw_eq-const-padding.rs b/src/test/ui/intrinsics/intrinsic-raw_eq-const-padding.rs new file mode 100644 index 00000000000..ec1c47cfaea --- /dev/null +++ b/src/test/ui/intrinsics/intrinsic-raw_eq-const-padding.rs @@ -0,0 +1,12 @@ +#![feature(core_intrinsics)] +#![feature(const_intrinsic_raw_eq)] +#![deny(const_err)] + +const BAD_RAW_EQ_CALL: bool = unsafe { + std::intrinsics::raw_eq(&(1_u8, 2_u16), &(1_u8, 2_u16)) +//~^ ERROR any use of this value will cause an error +//~| WARNING this was previously accepted by the compiler but is being phased out +}; + +pub fn main() { +} diff --git a/src/test/ui/intrinsics/intrinsic-raw_eq-const-padding.stderr b/src/test/ui/intrinsics/intrinsic-raw_eq-const-padding.stderr new file mode 100644 index 00000000000..74df99a69d1 --- /dev/null +++ b/src/test/ui/intrinsics/intrinsic-raw_eq-const-padding.stderr @@ -0,0 +1,21 @@ +error: any use of this value will cause an error + --> $DIR/intrinsic-raw_eq-const-padding.rs:6:5 + | +LL | / const BAD_RAW_EQ_CALL: bool = unsafe { +LL | | std::intrinsics::raw_eq(&(1_u8, 2_u16), &(1_u8, 2_u16)) + | | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ reading 4 bytes of memory starting at alloc2, but 1 byte is uninitialized starting at alloc2+0x1, and this operation requires initialized memory +LL | | +LL | | +LL | | }; + | |__- + | +note: the lint level is defined here + --> $DIR/intrinsic-raw_eq-const-padding.rs:3:9 + | +LL | #![deny(const_err)] + | ^^^^^^^^^ + = warning: this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release! + = note: for more information, see issue #71800 <https://github.com/rust-lang/rust/issues/71800> + +error: aborting due to previous error + diff --git a/src/test/ui/intrinsics/intrinsic-raw_eq-const.rs b/src/test/ui/intrinsics/intrinsic-raw_eq-const.rs new file mode 100644 index 00000000000..8ea95467302 --- /dev/null +++ b/src/test/ui/intrinsics/intrinsic-raw_eq-const.rs @@ -0,0 +1,27 @@ +// run-pass + +#![feature(core_intrinsics)] +#![feature(const_intrinsic_raw_eq)] +#![deny(const_err)] + +pub fn main() { + use std::intrinsics::raw_eq; + + const RAW_EQ_I32_TRUE: bool = unsafe { raw_eq(&42_i32, &42) }; + assert!(RAW_EQ_I32_TRUE); + + const RAW_EQ_I32_FALSE: bool = unsafe { raw_eq(&4_i32, &2) }; + assert!(!RAW_EQ_I32_FALSE); + + const RAW_EQ_CHAR_TRUE: bool = unsafe { raw_eq(&'a', &'a') }; + assert!(RAW_EQ_CHAR_TRUE); + + const RAW_EQ_CHAR_FALSE: bool = unsafe { raw_eq(&'a', &'A') }; + assert!(!RAW_EQ_CHAR_FALSE); + + const RAW_EQ_ARRAY_TRUE: bool = unsafe { raw_eq(&[13_u8, 42], &[13, 42]) }; + assert!(RAW_EQ_ARRAY_TRUE); + + const RAW_EQ_ARRAY_FALSE: bool = unsafe { raw_eq(&[13_u8, 42], &[42, 13]) }; + assert!(!RAW_EQ_ARRAY_FALSE); +} |
