about summary refs log tree commit diff
path: root/src/test/codegen
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-12-17 19:17:29 +0000
committerbors <bors@rust-lang.org>2021-12-17 19:17:29 +0000
commit7abab1efb21617ba6845fa86328dffa16cfcf1dc (patch)
treeab4bcc70374e8b52eadd3280497a97b5a3598c0c /src/test/codegen
parent34dc0d0f249a33fda18755991b4e73ad786d2b19 (diff)
parenta0b96902e4c622d40c7186fc0c7ba13efc1fc912 (diff)
downloadrust-7abab1efb21617ba6845fa86328dffa16cfcf1dc.tar.gz
rust-7abab1efb21617ba6845fa86328dffa16cfcf1dc.zip
Auto merge of #91838 - scottmcm:array-slice-eq-via-arrays-not-slices, r=dtolnay
Do array-slice equality via array equality, rather than always via slices

~~Draft because it needs a rebase after #91766 eventually gets through bors.~~

This enables the optimizations from #85828 to be used for array-to-slice comparisons too, not just array-to-array.

For example, <https://play.rust-lang.org/?version=nightly&mode=release&edition=2021&gist=5f9ba69b3d5825a782f897c830d3a6aa>
```rust
pub fn demo(x: &[u8], y: [u8; 4]) -> bool {
    *x == y
}
```
Currently writes the array to stack for no reason:
```nasm
	sub	rsp, 4
	mov	dword ptr [rsp], edx
	cmp	rsi, 4
	jne	.LBB0_1
	mov	eax, dword ptr [rdi]
	cmp	eax, dword ptr [rsp]
	sete	al
	add	rsp, 4
	ret

.LBB0_1:
	xor	eax, eax
	add	rsp, 4
	ret
```
Whereas with the change in this PR it just compares it directly:
```nasm
	cmp	rsi, 4
	jne	.LBB1_1
	cmp	dword ptr [rdi], edx
	sete	al
	ret

.LBB1_1:
	xor	eax, eax
	ret
```
Diffstat (limited to 'src/test/codegen')
-rw-r--r--src/test/codegen/slice-ref-equality.rs19
1 files changed, 16 insertions, 3 deletions
diff --git a/src/test/codegen/slice-ref-equality.rs b/src/test/codegen/slice-ref-equality.rs
index 1f99ac7342b..c06554ecdec 100644
--- a/src/test/codegen/slice-ref-equality.rs
+++ b/src/test/codegen/slice-ref-equality.rs
@@ -4,18 +4,31 @@
 
 // #71602 reported a simple array comparison just generating a loop.
 // This was originally fixed by ensuring it generates a single bcmp,
-// but we now generate it as a load instead. `is_zero_slice` was
+// but we now generate it as a load+icmp instead. `is_zero_slice` was
 // tweaked to still test the case of comparison against a slice,
 // and `is_zero_array` tests the new array-specific behaviour.
+// The optimization was then extended to short slice-to-array comparisons,
+// so the first test here now has a long slice to still get the bcmp.
 
-// CHECK-LABEL: @is_zero_slice
+// CHECK-LABEL: @is_zero_slice_long
 #[no_mangle]
-pub fn is_zero_slice(data: &[u8; 4]) -> bool {
+pub fn is_zero_slice_long(data: &[u8; 456]) -> bool {
     // CHECK: :
     // CHECK-NEXT: %{{.+}} = getelementptr {{.+}}
     // CHECK-NEXT: %[[BCMP:.+]] = tail call i32 @{{bcmp|memcmp}}({{.+}})
     // CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[BCMP]], 0
     // CHECK-NEXT: ret i1 %[[EQ]]
+    &data[..] == [0; 456]
+}
+
+// CHECK-LABEL: @is_zero_slice_short
+#[no_mangle]
+pub fn is_zero_slice_short(data: &[u8; 4]) -> bool {
+    // CHECK: :
+    // CHECK-NEXT: %[[PTR:.+]] = bitcast [4 x i8]* {{.+}} to i32*
+    // CHECK-NEXT: %[[LOAD:.+]] = load i32, i32* %[[PTR]], align 1
+    // CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[LOAD]], 0
+    // CHECK-NEXT: ret i1 %[[EQ]]
     &data[..] == [0; 4]
 }