about summary refs log tree commit diff
path: root/src/test/codegen
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-11-11 09:13:22 +0000
committerbors <bors@rust-lang.org>2021-11-11 09:13:22 +0000
commit62efba8a050c64249dab942951bb28f710208bc8 (patch)
tree7c82fb540c251a5495feb58954789fcab2ffbf95 /src/test/codegen
parent1d34cb4efccb6ec47c2eb478e4cc900936dfe387 (diff)
parent5b115fcb6839b94a14fb77acdd738281b4bb3fae (diff)
downloadrust-62efba8a050c64249dab942951bb28f710208bc8.tar.gz
rust-62efba8a050c64249dab942951bb28f710208bc8.zip
Auto merge of #90755 - scottmcm:spec-array-clone, r=jackh726
Specialize array cloning for Copy types

Because after PR 86041, the optimizer no longer load-merges at the LLVM IR level, which might be part of the perf loss.  (I'll run perf and see if this makes a difference.)

Also I added a codegen test so this hopefully won't regress in future -- it passes on stable and with my change here, but not on the 2021-11-09 nightly.

Example on current nightly: <https://play.rust-lang.org/?version=nightly&mode=release&edition=2021&gist=1f52d46fb8fc3ca3ac9f097390085ffa>
```rust
type T = u8;
const N: usize = 3;

pub fn demo_clone(x: &[T; N]) -> [T; N] {
    x.clone()
}

pub fn demo_copy(x: &[T; N]) -> [T; N] {
    *x
}
```
```llvm-ir
; playground::demo_clone
; Function Attrs: mustprogress nofree nosync nounwind nonlazybind uwtable willreturn
define i24 `@_ZN10playground10demo_clone17h98a4f11453d1a753E([3` x i8]* noalias nocapture readonly align 1 dereferenceable(3) %x) unnamed_addr #0 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* `@rust_eh_personality` {
start:
  %0 = getelementptr [3 x i8], [3 x i8]* %x, i64 0, i64 0
  %1 = getelementptr inbounds [3 x i8], [3 x i8]* %x, i64 0, i64 1
  %.val.i.i.i.i.i.i.i.i.i = load i8, i8* %0, align 1, !alias.scope !2, !noalias !9
  %2 = getelementptr inbounds [3 x i8], [3 x i8]* %x, i64 0, i64 2
  %.val.i.i.i.i.i.1.i.i.i.i = load i8, i8* %1, align 1, !alias.scope !2, !noalias !20
  %.val.i.i.i.i.i.2.i.i.i.i = load i8, i8* %2, align 1, !alias.scope !2, !noalias !23
  %array.sroa.6.0.insert.ext.i.i.i.i = zext i8 %.val.i.i.i.i.i.2.i.i.i.i to i32
  %array.sroa.6.0.insert.shift.i.i.i.i = shl nuw nsw i32 %array.sroa.6.0.insert.ext.i.i.i.i, 16
  %array.sroa.5.0.insert.ext.i.i.i.i = zext i8 %.val.i.i.i.i.i.1.i.i.i.i to i32
  %array.sroa.5.0.insert.shift.i.i.i.i = shl nuw nsw i32 %array.sroa.5.0.insert.ext.i.i.i.i, 8
  %array.sroa.0.0.insert.ext.i.i.i.i = zext i8 %.val.i.i.i.i.i.i.i.i.i to i32
  %array.sroa.5.0.insert.insert.i.i.i.i = or i32 %array.sroa.5.0.insert.shift.i.i.i.i, %array.sroa.0.0.insert.ext.i.i.i.i
  %array.sroa.0.0.insert.insert.i.i.i.i = or i32 %array.sroa.5.0.insert.insert.i.i.i.i, %array.sroa.6.0.insert.shift.i.i.i.i
  %.sroa.4.0.extract.trunc.i.i.i.i = trunc i32 %array.sroa.0.0.insert.insert.i.i.i.i to i24
  ret i24 %.sroa.4.0.extract.trunc.i.i.i.i
}

; playground::demo_copy
; Function Attrs: mustprogress nofree norecurse nosync nounwind nonlazybind readonly uwtable willreturn
define i24 `@_ZN10playground9demo_copy17h7817453f9291d746E([3` x i8]* noalias nocapture readonly align 1 dereferenceable(3) %x) unnamed_addr #1 {
start:
  %.sroa.0.0..sroa_cast = bitcast [3 x i8]* %x to i24*
  %.sroa.0.0.copyload = load i24, i24* %.sroa.0.0..sroa_cast, align 1
  ret i24 %.sroa.0.0.copyload
}
```
Diffstat (limited to 'src/test/codegen')
-rw-r--r--src/test/codegen/array-clone.rs15
1 files changed, 15 insertions, 0 deletions
diff --git a/src/test/codegen/array-clone.rs b/src/test/codegen/array-clone.rs
new file mode 100644
index 00000000000..0d42963bcd2
--- /dev/null
+++ b/src/test/codegen/array-clone.rs
@@ -0,0 +1,15 @@
+// compile-flags: -O
+
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @array_clone
+#[no_mangle]
+pub fn array_clone(a: &[u8; 2]) -> [u8; 2] {
+    // CHECK-NOT: getelementptr
+    // CHECK-NOT: load i8
+    // CHECK-NOT: zext
+    // CHECK-NOT: shl
+    // CHECK: load i16
+    // CHECK-NEXT: ret i16
+    a.clone()
+}