about summary refs log tree commit diff
path: root/compiler/rustc_codegen_cranelift
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-07-09 09:16:27 +0000
committerbors <bors@rust-lang.org>2021-07-09 09:16:27 +0000
commitee86f96ba176f598d64dc9f3bb7e074d5b8b86b6 (patch)
tree31e632ae31b54ae25e9323bb51fe507e31d140ed /compiler/rustc_codegen_cranelift
parent95fb1315217976ff4c268bb03c9b4132f0dfa9fd (diff)
parentd0644947a3b093bfc09452ecd14291cea5dd8f14 (diff)
downloadrust-ee86f96ba176f598d64dc9f3bb7e074d5b8b86b6.tar.gz
rust-ee86f96ba176f598d64dc9f3bb7e074d5b8b86b6.zip
Auto merge of #85828 - scottmcm:raw-eq, r=oli-obk
Stop generating `alloca`s & `memcmp` for simple short array equality

Example:
```rust
pub fn demo(x: [u16; 6], y: [u16; 6]) -> bool { x == y }
```

Before:
```llvm
define zeroext i1 `@_ZN10playground4demo17h48537f7eac23948fE(i96` %0, i96 %1) unnamed_addr #0 {
start:
  %y = alloca [6 x i16], align 8
  %x = alloca [6 x i16], align 8
  %.0..sroa_cast = bitcast [6 x i16]* %x to i96*
  store i96 %0, i96* %.0..sroa_cast, align 8
  %.0..sroa_cast3 = bitcast [6 x i16]* %y to i96*
  store i96 %1, i96* %.0..sroa_cast3, align 8
  %_11.i.i.i = bitcast [6 x i16]* %x to i8*
  %_14.i.i.i = bitcast [6 x i16]* %y to i8*
  %bcmp.i.i.i = call i32 `@bcmp(i8*` nonnull dereferenceable(12) %_11.i.i.i, i8* nonnull dereferenceable(12) %_14.i.i.i, i64 12) #2, !alias.scope !2
  %2 = icmp eq i32 %bcmp.i.i.i, 0
  ret i1 %2
}
```
```x86
playground::demo: # `@playground::demo`
	sub	rsp, 32
	mov	qword ptr [rsp], rdi
	mov	dword ptr [rsp + 8], esi
	mov	qword ptr [rsp + 16], rdx
	mov	dword ptr [rsp + 24], ecx
	xor	rdi, rdx
	xor	esi, ecx
	or	rsi, rdi
	sete	al
	add	rsp, 32
	ret
```

After:
```llvm
define zeroext i1 `@_ZN4mini4demo17h7a8994aaa314c981E(i96` %0, i96 %1) unnamed_addr #0 {
start:
  %2 = icmp eq i96 %0, %1
  ret i1 %2
}
```
```x86
_ZN4mini4demo17h7a8994aaa314c981E:
	xor	rcx, r8
	xor	edx, r9d
	or	rdx, rcx
	sete	al
	ret
```
Diffstat (limited to 'compiler/rustc_codegen_cranelift')
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs34
-rw-r--r--compiler/rustc_codegen_cranelift/src/value_and_place.rs4
2 files changed, 38 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
index 52896fc7127..3979886e10c 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@@ -1115,6 +1115,40 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             );
             ret.write_cvalue(fx, CValue::by_val(res, ret.layout()));
         };
+
+        raw_eq, <T>(v lhs_ref, v rhs_ref) {
+            fn type_by_size(size: Size) -> Option<Type> {
+                Type::int(size.bits().try_into().ok()?)
+            }
+
+            let size = fx.layout_of(T).layout.size;
+            let is_eq_value =
+                if size == Size::ZERO {
+                    // No bytes means they're trivially equal
+                    fx.bcx.ins().iconst(types::I8, 1)
+                } else if let Some(clty) = type_by_size(size) {
+                    // Can't use `trusted` for these loads; they could be unaligned.
+                    let mut flags = MemFlags::new();
+                    flags.set_notrap();
+                    let lhs_val = fx.bcx.ins().load(clty, flags, lhs_ref, 0);
+                    let rhs_val = fx.bcx.ins().load(clty, flags, rhs_ref, 0);
+                    let eq = fx.bcx.ins().icmp(IntCC::Equal, lhs_val, rhs_val);
+                    fx.bcx.ins().bint(types::I8, eq)
+                } else {
+                    // Just call `memcmp` (like slices do in core) when the
+                    // size is too large or it's not a power-of-two.
+                    let ptr_ty = pointer_ty(fx.tcx);
+                    let signed_bytes = i64::try_from(size.bytes()).unwrap();
+                    let bytes_val = fx.bcx.ins().iconst(ptr_ty, signed_bytes);
+                    let params = vec![AbiParam::new(ptr_ty); 3];
+                    let returns = vec![AbiParam::new(types::I32)];
+                    let args = &[lhs_ref, rhs_ref, bytes_val];
+                    let cmp = fx.lib_call("memcmp", params, returns, args)[0];
+                    let eq = fx.bcx.ins().icmp_imm(IntCC::Equal, cmp, 0);
+                    fx.bcx.ins().bint(types::I8, eq)
+                };
+            ret.write_cvalue(fx, CValue::by_val(is_eq_value, ret.layout()));
+        };
     }
 
     if let Some((_, dest)) = destination {
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index 171f39805f8..ae8ccc626b4 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -453,6 +453,10 @@ impl<'tcx> CPlace<'tcx> {
                     ptr.store(fx, data, MemFlags::trusted());
                     ptr.load(fx, dst_ty, MemFlags::trusted())
                 }
+
+                // `CValue`s should never contain SSA-only types, so if you ended
+                // up here having seen an error like `B1 -> I8`, then before
+                // calling `write_cvalue` you need to add a `bint` instruction.
                 _ => unreachable!("write_cvalue_transmute: {:?} -> {:?}", src_ty, dst_ty),
             };
             //fx.bcx.set_val_label(data, cranelift_codegen::ir::ValueLabel::new(var.index()));