diff options
| author | bors <bors@rust-lang.org> | 2021-12-17 19:17:29 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-12-17 19:17:29 +0000 |
| commit | 7abab1efb21617ba6845fa86328dffa16cfcf1dc (patch) | |
| tree | ab4bcc70374e8b52eadd3280497a97b5a3598c0c /src/test/codegen | |
| parent | 34dc0d0f249a33fda18755991b4e73ad786d2b19 (diff) | |
| parent | a0b96902e4c622d40c7186fc0c7ba13efc1fc912 (diff) | |
| download | rust-7abab1efb21617ba6845fa86328dffa16cfcf1dc.tar.gz rust-7abab1efb21617ba6845fa86328dffa16cfcf1dc.zip | |
Auto merge of #91838 - scottmcm:array-slice-eq-via-arrays-not-slices, r=dtolnay
Do array-slice equality via array equality, rather than always via slices
~~Draft because it needs a rebase after #91766 eventually gets through bors.~~
This enables the optimizations from #85828 to be used for array-to-slice comparisons too, not just array-to-array.
For example, <https://play.rust-lang.org/?version=nightly&mode=release&edition=2021&gist=5f9ba69b3d5825a782f897c830d3a6aa>
```rust
pub fn demo(x: &[u8], y: [u8; 4]) -> bool {
*x == y
}
```
Currently writes the array to stack for no reason:
```nasm
sub rsp, 4
mov dword ptr [rsp], edx
cmp rsi, 4
jne .LBB0_1
mov eax, dword ptr [rdi]
cmp eax, dword ptr [rsp]
sete al
add rsp, 4
ret
.LBB0_1:
xor eax, eax
add rsp, 4
ret
```
Whereas with the change in this PR it just compares it directly:
```nasm
cmp rsi, 4
jne .LBB1_1
cmp dword ptr [rdi], edx
sete al
ret
.LBB1_1:
xor eax, eax
ret
```
Diffstat (limited to 'src/test/codegen')
| -rw-r--r-- | src/test/codegen/slice-ref-equality.rs | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/src/test/codegen/slice-ref-equality.rs b/src/test/codegen/slice-ref-equality.rs index 1f99ac7342b..c06554ecdec 100644 --- a/src/test/codegen/slice-ref-equality.rs +++ b/src/test/codegen/slice-ref-equality.rs @@ -4,18 +4,31 @@ // #71602 reported a simple array comparison just generating a loop. // This was originally fixed by ensuring it generates a single bcmp, -// but we now generate it as a load instead. `is_zero_slice` was +// but we now generate it as a load+icmp instead. `is_zero_slice` was // tweaked to still test the case of comparison against a slice, // and `is_zero_array` tests the new array-specific behaviour. +// The optimization was then extended to short slice-to-array comparisons, +// so the first test here now has a long slice to still get the bcmp. -// CHECK-LABEL: @is_zero_slice +// CHECK-LABEL: @is_zero_slice_long #[no_mangle] -pub fn is_zero_slice(data: &[u8; 4]) -> bool { +pub fn is_zero_slice_long(data: &[u8; 456]) -> bool { // CHECK: : // CHECK-NEXT: %{{.+}} = getelementptr {{.+}} // CHECK-NEXT: %[[BCMP:.+]] = tail call i32 @{{bcmp|memcmp}}({{.+}}) // CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[BCMP]], 0 // CHECK-NEXT: ret i1 %[[EQ]] + &data[..] == [0; 456] +} + +// CHECK-LABEL: @is_zero_slice_short +#[no_mangle] +pub fn is_zero_slice_short(data: &[u8; 4]) -> bool { + // CHECK: : + // CHECK-NEXT: %[[PTR:.+]] = bitcast [4 x i8]* {{.+}} to i32* + // CHECK-NEXT: %[[LOAD:.+]] = load i32, i32* %[[PTR]], align 1 + // CHECK-NEXT: %[[EQ:.+]] = icmp eq i32 %[[LOAD]], 0 + // CHECK-NEXT: ret i1 %[[EQ]] &data[..] == [0; 4] } |
