about summary refs log tree commit diff
path: root/library/alloc/src/vec
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-10-03 06:24:06 +0000
committerbors <bors@rust-lang.org>2021-10-03 06:24:06 +0000
commitc24c9067eec3aec8dd2013d24f6cd0dff3ecec4c (patch)
treeb62248940bca22a87b7914bab51600d62b5dd246 /library/alloc/src/vec
parent77f1e504a953efbbd59673a75c3cd530d5b3c530 (diff)
parent3839ca9953e47ad91751fdfffa10c9ae8a210e3c (diff)
downloadrust-c24c9067eec3aec8dd2013d24f6cd0dff3ecec4c.tar.gz
rust-c24c9067eec3aec8dd2013d24f6cd0dff3ecec4c.zip
Auto merge of #88060 - TennyZhuang:optimize-vec-retain, r=dtolnay
Optimize unnecessary check in Vec::retain

The function `vec::Vec::retain` only have two stages:

1. Nothing was deleted.
2. Some elements were deleted.

Here is an unnecessary check `if g.deleted_cnt > 0` in the loop, and it's difficult for compiler to optimize it. I split the loop into two stages manully and keep the code clean using const generics.

I write a special but common bench case for this optimization. I call retain on vec but keep all elements.

Before and after this optimization:

```
test vec::bench_retain_whole_100000                      ... bench:      84,803 ns/iter (+/- 17,314)
```

```
test vec::bench_retain_whole_100000                      ... bench:      42,638 ns/iter (+/- 16,910)
```

The result is expected, there are two `if`s before the optimization and one `if` after.
Diffstat (limited to 'library/alloc/src/vec')
-rw-r--r--library/alloc/src/vec/mod.rs27
1 files changed, 24 insertions, 3 deletions
diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs
index bdfd40a952d..f3a30d09825 100644
--- a/library/alloc/src/vec/mod.rs
+++ b/library/alloc/src/vec/mod.rs
@@ -1488,7 +1488,15 @@ impl<T, A: Allocator> Vec<T, A> {
 
         let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };
 
-        while g.processed_len < original_len {
+        // process_one return a bool indicates whether the processing element should be retained.
+        #[inline(always)]
+        fn process_one<F, T, A: Allocator, const DELETED: bool>(
+            f: &mut F,
+            g: &mut BackshiftOnDrop<'_, T, A>,
+        ) -> bool
+        where
+            F: FnMut(&T) -> bool,
+        {
             // SAFETY: Unchecked element must be valid.
             let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
             if !f(cur) {
@@ -1498,9 +1506,9 @@ impl<T, A: Allocator> Vec<T, A> {
                 // SAFETY: We never touch this element again after dropped.
                 unsafe { ptr::drop_in_place(cur) };
                 // We already advanced the counter.
-                continue;
+                return false;
             }
-            if g.deleted_cnt > 0 {
+            if DELETED {
                 // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
                 // We use copy for move, and never touch this element again.
                 unsafe {
@@ -1509,6 +1517,19 @@ impl<T, A: Allocator> Vec<T, A> {
                 }
             }
             g.processed_len += 1;
+            return true;
+        }
+
+        // Stage 1: Nothing was deleted.
+        while g.processed_len != original_len {
+            if !process_one::<F, T, A, false>(&mut f, &mut g) {
+                break;
+            }
+        }
+
+        // Stage 2: Some elements were deleted.
+        while g.processed_len != original_len {
+            process_one::<F, T, A, true>(&mut f, &mut g);
         }
 
         // All item are processed. This can be optimized to `set_len` by LLVM.