about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-06-15 09:38:53 +0000
committerbors <bors@rust-lang.org>2023-06-15 09:38:53 +0000
commit4996b56ba9647d149265d03dcbd9ab837af3a1bb (patch)
treec4d332653e4415c2e03fd3c7ec643bd04da87171
parent5a65be815211a059b08ee3b786583308377372fa (diff)
parentd90508f76130a8a9aaa68521ac1b0e35ea9a236e (diff)
downloadrust-4996b56ba9647d149265d03dcbd9ab837af3a1bb.tar.gz
rust-4996b56ba9647d149265d03dcbd9ab837af3a1bb.zip
Auto merge of #106343 - the8472:slice-iter-fold, r=scottmcm
optimize slice::Iter::fold

Fixes 2 of 4 cases from #106288

```
OLD: test slice::fold_to_last                                           ... bench:         248 ns/iter (+/- 3)
NEW: test slice::fold_to_last                                           ... bench:           0 ns/iter (+/- 0)
```
-rw-r--r--library/core/benches/slice.rs9
-rw-r--r--library/core/src/slice/iter/macros.rs33
-rw-r--r--tests/codegen/slice-iter-fold.rs14
-rw-r--r--tests/codegen/vec-shrink-panik.rs8
4 files changed, 56 insertions, 8 deletions
diff --git a/library/core/benches/slice.rs b/library/core/benches/slice.rs
index 9b86a0ca97c..3bfb35e684e 100644
--- a/library/core/benches/slice.rs
+++ b/library/core/benches/slice.rs
@@ -1,3 +1,4 @@
+use core::ptr::NonNull;
 use test::black_box;
 use test::Bencher;
 
@@ -162,3 +163,11 @@ fn fill_byte_sized(b: &mut Bencher) {
         black_box(slice.fill(black_box(NewType(42))));
     });
 }
+
+// Tests the ability of the compiler to recognize that only the last slice item is needed
+// based on issue #106288
+#[bench]
+fn fold_to_last(b: &mut Bencher) {
+    let slice: &[i32] = &[0; 1024];
+    b.iter(|| black_box(slice).iter().fold(None, |_, r| Some(NonNull::from(r))));
+}
diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs
index 3462c0e020a..96a145e22ed 100644
--- a/library/core/src/slice/iter/macros.rs
+++ b/library/core/src/slice/iter/macros.rs
@@ -191,6 +191,39 @@ macro_rules! iterator {
                 self.next_back()
             }
 
+            #[inline]
+            fn fold<B, F>(self, init: B, mut f: F) -> B
+                where
+                    F: FnMut(B, Self::Item) -> B,
+            {
+                // this implementation consists of the following optimizations compared to the
+                // default implementation:
+                // - do-while loop, as is llvm's preferred loop shape,
+                //   see https://releases.llvm.org/16.0.0/docs/LoopTerminology.html#more-canonical-loops
+                // - bumps an index instead of a pointer since the latter case inhibits
+                //   some optimizations, see #111603
+                // - avoids Option wrapping/matching
+                if is_empty!(self) {
+                    return init;
+                }
+                let mut acc = init;
+                let mut i = 0;
+                let len = len!(self);
+                loop {
+                    // SAFETY: the loop iterates `i in 0..len`, which always is in bounds of
+                    // the slice allocation
+                    acc = f(acc, unsafe { & $( $mut_ )? *self.ptr.add(i).as_ptr() });
+                    // SAFETY: `i` can't overflow since it'll only reach usize::MAX if the
+                    // slice had that length, in which case we'll break out of the loop
+                    // after the increment
+                    i = unsafe { i.unchecked_add(1) };
+                    if i == len {
+                        break;
+                    }
+                }
+                acc
+            }
+
             // We override the default implementation, which uses `try_fold`,
             // because this simple implementation generates less LLVM IR and is
             // faster to compile.
diff --git a/tests/codegen/slice-iter-fold.rs b/tests/codegen/slice-iter-fold.rs
new file mode 100644
index 00000000000..9391c176130
--- /dev/null
+++ b/tests/codegen/slice-iter-fold.rs
@@ -0,0 +1,14 @@
+// ignore-debug: the debug assertions get in the way
+// compile-flags: -O
+// min-llvm-version: 16
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @slice_fold_to_last
+#[no_mangle]
+pub fn slice_fold_to_last(slice: &[i32]) -> Option<&i32> {
+    // CHECK-NOT: loop
+    // CHECK-NOT: br
+    // CHECK-NOT: call
+    // CHECK: ret
+    slice.iter().fold(None, |_, i| Some(i))
+}
diff --git a/tests/codegen/vec-shrink-panik.rs b/tests/codegen/vec-shrink-panik.rs
index 606d68ff3ab..14fef4e2cd5 100644
--- a/tests/codegen/vec-shrink-panik.rs
+++ b/tests/codegen/vec-shrink-panik.rs
@@ -38,14 +38,6 @@ pub fn issue71861(vec: Vec<u32>) -> Box<[u32]> {
 #[no_mangle]
 pub fn issue75636<'a>(iter: &[&'a str]) -> Box<[&'a str]> {
     // CHECK-NOT: panic
-
-    // Call to panic_cannot_unwind in case of double-panic is expected,
-    // on LLVM 16 and older, but other panics are not.
-    // old: filter
-    // old-NEXT: ; call core::panicking::panic_cannot_unwind
-    // old-NEXT: panic_cannot_unwind
-
-    // CHECK-NOT: panic
     iter.iter().copied().collect()
 }