diff options
| author | bors <bors@rust-lang.org> | 2021-07-09 09:16:27 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-07-09 09:16:27 +0000 |
| commit | ee86f96ba176f598d64dc9f3bb7e074d5b8b86b6 (patch) | |
| tree | 31e632ae31b54ae25e9323bb51fe507e31d140ed /compiler/rustc_codegen_cranelift | |
| parent | 95fb1315217976ff4c268bb03c9b4132f0dfa9fd (diff) | |
| parent | d0644947a3b093bfc09452ecd14291cea5dd8f14 (diff) | |
| download | rust-ee86f96ba176f598d64dc9f3bb7e074d5b8b86b6.tar.gz rust-ee86f96ba176f598d64dc9f3bb7e074d5b8b86b6.zip | |
Auto merge of #85828 - scottmcm:raw-eq, r=oli-obk
Stop generating `alloca`s & `memcmp` for simple short array equality
Example:
```rust
pub fn demo(x: [u16; 6], y: [u16; 6]) -> bool { x == y }
```
Before:
```llvm
define zeroext i1 `@_ZN10playground4demo17h48537f7eac23948fE(i96` %0, i96 %1) unnamed_addr #0 {
start:
%y = alloca [6 x i16], align 8
%x = alloca [6 x i16], align 8
%.0..sroa_cast = bitcast [6 x i16]* %x to i96*
store i96 %0, i96* %.0..sroa_cast, align 8
%.0..sroa_cast3 = bitcast [6 x i16]* %y to i96*
store i96 %1, i96* %.0..sroa_cast3, align 8
%_11.i.i.i = bitcast [6 x i16]* %x to i8*
%_14.i.i.i = bitcast [6 x i16]* %y to i8*
%bcmp.i.i.i = call i32 `@bcmp(i8*` nonnull dereferenceable(12) %_11.i.i.i, i8* nonnull dereferenceable(12) %_14.i.i.i, i64 12) #2, !alias.scope !2
%2 = icmp eq i32 %bcmp.i.i.i, 0
ret i1 %2
}
```
```x86
playground::demo: # `@playground::demo`
sub rsp, 32
mov qword ptr [rsp], rdi
mov dword ptr [rsp + 8], esi
mov qword ptr [rsp + 16], rdx
mov dword ptr [rsp + 24], ecx
xor rdi, rdx
xor esi, ecx
or rsi, rdi
sete al
add rsp, 32
ret
```
After:
```llvm
define zeroext i1 `@_ZN4mini4demo17h7a8994aaa314c981E(i96` %0, i96 %1) unnamed_addr #0 {
start:
%2 = icmp eq i96 %0, %1
ret i1 %2
}
```
```x86
_ZN4mini4demo17h7a8994aaa314c981E:
xor rcx, r8
xor edx, r9d
or rdx, rcx
sete al
ret
```
Diffstat (limited to 'compiler/rustc_codegen_cranelift')
| -rw-r--r-- | compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs | 34 | ||||
| -rw-r--r-- | compiler/rustc_codegen_cranelift/src/value_and_place.rs | 4 |
2 files changed, 38 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs index 52896fc7127..3979886e10c 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs @@ -1115,6 +1115,40 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( ); ret.write_cvalue(fx, CValue::by_val(res, ret.layout())); }; + + raw_eq, <T>(v lhs_ref, v rhs_ref) { + fn type_by_size(size: Size) -> Option<Type> { + Type::int(size.bits().try_into().ok()?) + } + + let size = fx.layout_of(T).layout.size; + let is_eq_value = + if size == Size::ZERO { + // No bytes means they're trivially equal + fx.bcx.ins().iconst(types::I8, 1) + } else if let Some(clty) = type_by_size(size) { + // Can't use `trusted` for these loads; they could be unaligned. + let mut flags = MemFlags::new(); + flags.set_notrap(); + let lhs_val = fx.bcx.ins().load(clty, flags, lhs_ref, 0); + let rhs_val = fx.bcx.ins().load(clty, flags, rhs_ref, 0); + let eq = fx.bcx.ins().icmp(IntCC::Equal, lhs_val, rhs_val); + fx.bcx.ins().bint(types::I8, eq) + } else { + // Just call `memcmp` (like slices do in core) when the + // size is too large or it's not a power-of-two. + let ptr_ty = pointer_ty(fx.tcx); + let signed_bytes = i64::try_from(size.bytes()).unwrap(); + let bytes_val = fx.bcx.ins().iconst(ptr_ty, signed_bytes); + let params = vec![AbiParam::new(ptr_ty); 3]; + let returns = vec![AbiParam::new(types::I32)]; + let args = &[lhs_ref, rhs_ref, bytes_val]; + let cmp = fx.lib_call("memcmp", params, returns, args)[0]; + let eq = fx.bcx.ins().icmp_imm(IntCC::Equal, cmp, 0); + fx.bcx.ins().bint(types::I8, eq) + }; + ret.write_cvalue(fx, CValue::by_val(is_eq_value, ret.layout())); + }; } if let Some((_, dest)) = destination { diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs index 171f39805f8..ae8ccc626b4 100644 --- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs +++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs @@ -453,6 +453,10 @@ impl<'tcx> CPlace<'tcx> { ptr.store(fx, data, MemFlags::trusted()); ptr.load(fx, dst_ty, MemFlags::trusted()) } + + // `CValue`s should never contain SSA-only types, so if you ended + // up here having seen an error like `B1 -> I8`, then before + // calling `write_cvalue` you need to add a `bint` instruction. _ => unreachable!("write_cvalue_transmute: {:?} -> {:?}", src_ty, dst_ty), }; //fx.bcx.set_val_label(data, cranelift_codegen::ir::ValueLabel::new(var.index())); |
