about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-07-09 09:16:27 +0000
committerbors <bors@rust-lang.org>2021-07-09 09:16:27 +0000
commitee86f96ba176f598d64dc9f3bb7e074d5b8b86b6 (patch)
tree31e632ae31b54ae25e9323bb51fe507e31d140ed /compiler/rustc_codegen_llvm/src
parent95fb1315217976ff4c268bb03c9b4132f0dfa9fd (diff)
parentd0644947a3b093bfc09452ecd14291cea5dd8f14 (diff)
downloadrust-ee86f96ba176f598d64dc9f3bb7e074d5b8b86b6.tar.gz
rust-ee86f96ba176f598d64dc9f3bb7e074d5b8b86b6.zip
Auto merge of #85828 - scottmcm:raw-eq, r=oli-obk
Stop generating `alloca`s & `memcmp` for simple short array equality

Example:
```rust
pub fn demo(x: [u16; 6], y: [u16; 6]) -> bool { x == y }
```

Before:
```llvm
define zeroext i1 `@_ZN10playground4demo17h48537f7eac23948fE(i96` %0, i96 %1) unnamed_addr #0 {
start:
  %y = alloca [6 x i16], align 8
  %x = alloca [6 x i16], align 8
  %.0..sroa_cast = bitcast [6 x i16]* %x to i96*
  store i96 %0, i96* %.0..sroa_cast, align 8
  %.0..sroa_cast3 = bitcast [6 x i16]* %y to i96*
  store i96 %1, i96* %.0..sroa_cast3, align 8
  %_11.i.i.i = bitcast [6 x i16]* %x to i8*
  %_14.i.i.i = bitcast [6 x i16]* %y to i8*
  %bcmp.i.i.i = call i32 `@bcmp(i8*` nonnull dereferenceable(12) %_11.i.i.i, i8* nonnull dereferenceable(12) %_14.i.i.i, i64 12) #2, !alias.scope !2
  %2 = icmp eq i32 %bcmp.i.i.i, 0
  ret i1 %2
}
```
```x86
playground::demo: # `@playground::demo`
	sub	rsp, 32
	mov	qword ptr [rsp], rdi
	mov	dword ptr [rsp + 8], esi
	mov	qword ptr [rsp + 16], rdx
	mov	dword ptr [rsp + 24], ecx
	xor	rdi, rdx
	xor	esi, ecx
	or	rsi, rdi
	sete	al
	add	rsp, 32
	ret
```

After:
```llvm
define zeroext i1 `@_ZN4mini4demo17h7a8994aaa314c981E(i96` %0, i96 %1) unnamed_addr #0 {
start:
  %2 = icmp eq i96 %0, %1
  ret i1 %2
}
```
```x86
_ZN4mini4demo17h7a8994aaa314c981E:
	xor	rcx, r8
	xor	edx, r9d
	or	rdx, rcx
	sete	al
	ret
```
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs5
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs38
2 files changed, 43 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index f662887abf8..d1aecd32e2f 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -500,6 +500,7 @@ impl CodegenCx<'b, 'tcx> {
         let t_i32 = self.type_i32();
         let t_i64 = self.type_i64();
         let t_i128 = self.type_i128();
+        let t_isize = self.type_isize();
         let t_f32 = self.type_f32();
         let t_f64 = self.type_f64();
 
@@ -712,6 +713,10 @@ impl CodegenCx<'b, 'tcx> {
         ifn!("llvm.assume", fn(i1) -> void);
         ifn!("llvm.prefetch", fn(i8p, t_i32, t_i32, t_i32) -> void);
 
+        // This isn't an "LLVM intrinsic", but LLVM's optimization passes
+        // recognize it like one and we assume it exists in `core::slice::cmp`
+        ifn!("memcmp", fn(i8p, i8p, t_isize) -> t_i32);
+
         // variadic intrinsics
         ifn!("llvm.va_start", fn(i8p) -> void);
         ifn!("llvm.va_end", fn(i8p) -> void);
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 1fb201eda6b..9a968659e2f 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -296,6 +296,44 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> {
                 }
             }
 
+            sym::raw_eq => {
+                use abi::Abi::*;
+                let tp_ty = substs.type_at(0);
+                let layout = self.layout_of(tp_ty).layout;
+                let use_integer_compare = match layout.abi {
+                    Scalar(_) | ScalarPair(_, _) => true,
+                    Uninhabited | Vector { .. } => false,
+                    Aggregate { .. } => {
+                        // For rusty ABIs, small aggregates are actually passed
+                        // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
+                        // so we re-use that same threshold here.
+                        layout.size <= self.data_layout().pointer_size * 2
+                    }
+                };
+
+                let a = args[0].immediate();
+                let b = args[1].immediate();
+                if layout.size.bytes() == 0 {
+                    self.const_bool(true)
+                } else if use_integer_compare {
+                    let integer_ty = self.type_ix(layout.size.bits());
+                    let ptr_ty = self.type_ptr_to(integer_ty);
+                    let a_ptr = self.bitcast(a, ptr_ty);
+                    let a_val = self.load(a_ptr, layout.align.abi);
+                    let b_ptr = self.bitcast(b, ptr_ty);
+                    let b_val = self.load(b_ptr, layout.align.abi);
+                    self.icmp(IntPredicate::IntEQ, a_val, b_val)
+                } else {
+                    let i8p_ty = self.type_i8p();
+                    let a_ptr = self.bitcast(a, i8p_ty);
+                    let b_ptr = self.bitcast(b, i8p_ty);
+                    let n = self.const_usize(layout.size.bytes());
+                    let llfn = self.get_intrinsic("memcmp");
+                    let cmp = self.call(llfn, &[a_ptr, b_ptr, n], None);
+                    self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0))
+                }
+            }
+
             _ if name_str.starts_with("simd_") => {
                 match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) {
                     Ok(llval) => llval,