diff options
| author | bors <bors@rust-lang.org> | 2021-07-09 09:16:27 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-07-09 09:16:27 +0000 |
| commit | ee86f96ba176f598d64dc9f3bb7e074d5b8b86b6 (patch) | |
| tree | 31e632ae31b54ae25e9323bb51fe507e31d140ed /compiler/rustc_codegen_llvm/src | |
| parent | 95fb1315217976ff4c268bb03c9b4132f0dfa9fd (diff) | |
| parent | d0644947a3b093bfc09452ecd14291cea5dd8f14 (diff) | |
| download | rust-ee86f96ba176f598d64dc9f3bb7e074d5b8b86b6.tar.gz rust-ee86f96ba176f598d64dc9f3bb7e074d5b8b86b6.zip | |
Auto merge of #85828 - scottmcm:raw-eq, r=oli-obk
Stop generating `alloca`s & `memcmp` for simple short array equality
Example:
```rust
pub fn demo(x: [u16; 6], y: [u16; 6]) -> bool { x == y }
```
Before:
```llvm
define zeroext i1 `@_ZN10playground4demo17h48537f7eac23948fE(i96` %0, i96 %1) unnamed_addr #0 {
start:
%y = alloca [6 x i16], align 8
%x = alloca [6 x i16], align 8
%.0..sroa_cast = bitcast [6 x i16]* %x to i96*
store i96 %0, i96* %.0..sroa_cast, align 8
%.0..sroa_cast3 = bitcast [6 x i16]* %y to i96*
store i96 %1, i96* %.0..sroa_cast3, align 8
%_11.i.i.i = bitcast [6 x i16]* %x to i8*
%_14.i.i.i = bitcast [6 x i16]* %y to i8*
%bcmp.i.i.i = call i32 `@bcmp(i8*` nonnull dereferenceable(12) %_11.i.i.i, i8* nonnull dereferenceable(12) %_14.i.i.i, i64 12) #2, !alias.scope !2
%2 = icmp eq i32 %bcmp.i.i.i, 0
ret i1 %2
}
```
```x86
playground::demo: # `@playground::demo`
sub rsp, 32
mov qword ptr [rsp], rdi
mov dword ptr [rsp + 8], esi
mov qword ptr [rsp + 16], rdx
mov dword ptr [rsp + 24], ecx
xor rdi, rdx
xor esi, ecx
or rsi, rdi
sete al
add rsp, 32
ret
```
After:
```llvm
define zeroext i1 `@_ZN4mini4demo17h7a8994aaa314c981E(i96` %0, i96 %1) unnamed_addr #0 {
start:
%2 = icmp eq i96 %0, %1
ret i1 %2
}
```
```x86
_ZN4mini4demo17h7a8994aaa314c981E:
xor rcx, r8
xor edx, r9d
or rdx, rcx
sete al
ret
```
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/context.rs | 5 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/intrinsic.rs | 38 |
2 files changed, 43 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index f662887abf8..d1aecd32e2f 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -500,6 +500,7 @@ impl CodegenCx<'b, 'tcx> { let t_i32 = self.type_i32(); let t_i64 = self.type_i64(); let t_i128 = self.type_i128(); + let t_isize = self.type_isize(); let t_f32 = self.type_f32(); let t_f64 = self.type_f64(); @@ -712,6 +713,10 @@ impl CodegenCx<'b, 'tcx> { ifn!("llvm.assume", fn(i1) -> void); ifn!("llvm.prefetch", fn(i8p, t_i32, t_i32, t_i32) -> void); + // This isn't an "LLVM intrinsic", but LLVM's optimization passes + // recognize it like one and we assume it exists in `core::slice::cmp` + ifn!("memcmp", fn(i8p, i8p, t_isize) -> t_i32); + // variadic intrinsics ifn!("llvm.va_start", fn(i8p) -> void); ifn!("llvm.va_end", fn(i8p) -> void); diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 1fb201eda6b..9a968659e2f 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -296,6 +296,44 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> { } } + sym::raw_eq => { + use abi::Abi::*; + let tp_ty = substs.type_at(0); + let layout = self.layout_of(tp_ty).layout; + let use_integer_compare = match layout.abi { + Scalar(_) | ScalarPair(_, _) => true, + Uninhabited | Vector { .. } => false, + Aggregate { .. } => { + // For rusty ABIs, small aggregates are actually passed + // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`), + // so we re-use that same threshold here. + layout.size <= self.data_layout().pointer_size * 2 + } + }; + + let a = args[0].immediate(); + let b = args[1].immediate(); + if layout.size.bytes() == 0 { + self.const_bool(true) + } else if use_integer_compare { + let integer_ty = self.type_ix(layout.size.bits()); + let ptr_ty = self.type_ptr_to(integer_ty); + let a_ptr = self.bitcast(a, ptr_ty); + let a_val = self.load(a_ptr, layout.align.abi); + let b_ptr = self.bitcast(b, ptr_ty); + let b_val = self.load(b_ptr, layout.align.abi); + self.icmp(IntPredicate::IntEQ, a_val, b_val) + } else { + let i8p_ty = self.type_i8p(); + let a_ptr = self.bitcast(a, i8p_ty); + let b_ptr = self.bitcast(b, i8p_ty); + let n = self.const_usize(layout.size.bytes()); + let llfn = self.get_intrinsic("memcmp"); + let cmp = self.call(llfn, &[a_ptr, b_ptr, n], None); + self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0)) + } + } + _ if name_str.starts_with("simd_") => { match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) { Ok(llval) => llval, |
