about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-08-21 23:35:54 +0000
committerbors <bors@rust-lang.org>2021-08-21 23:35:54 +0000
commit9faa714154dbc03faa174a7d4f72d6bbbfd61f7c (patch)
tree1777530d1b9e9c53d01d6025aee0e8f4800763d2
parentd3e2578c31688619ddc0a10ddf8543bf4ebcba5b (diff)
parente32f4c06d34ecb4b55b678bbff35d8a77f81cf16 (diff)
downloadrust-9faa714154dbc03faa174a7d4f72d6bbbfd61f7c.tar.gz
rust-9faa714154dbc03faa174a7d4f72d6bbbfd61f7c.zip
Auto merge of #88075 - Xuanwo:vec_deque_retain, r=dtolnay
Optimize unnecessary check in VecDeque::retain

This pr is highly inspired by https://github.com/rust-lang/rust/pull/88060 which shared the same idea: we can split the `for` loop into stages so that we can remove unnecessary checks like `del > 0`.

## Benchmarks

Before

```rust
test collections::vec_deque::tests::bench_retain_half_10000  ... bench:     290,125 ns/iter (+/- 8,717)
test collections::vec_deque::tests::bench_retain_odd_10000   ... bench:     291,588 ns/iter (+/- 9,621)
test collections::vec_deque::tests::bench_retain_whole_10000 ... bench:     287,426 ns/iter (+/- 9,009)
```

After

```rust
test collections::vec_deque::tests::bench_retain_half_10000  ... bench:     243,940 ns/iter (+/- 8,563)
test collections::vec_deque::tests::bench_retain_odd_10000   ... bench:     242,768 ns/iter (+/- 3,903)
test collections::vec_deque::tests::bench_retain_whole_10000 ... bench:     202,926 ns/iter (+/- 6,332)
```

Based on the current benchmark, this PR will improve the perf of `VecDeque::retain` by around 16%. For special cases, the improvement will be up to 30%.

Signed-off-by: Xuanwo <github@xuanwo.io>
-rw-r--r--library/alloc/src/collections/vec_deque/mod.rs32
-rw-r--r--library/alloc/src/collections/vec_deque/tests.rs33
2 files changed, 57 insertions, 8 deletions
diff --git a/library/alloc/src/collections/vec_deque/mod.rs b/library/alloc/src/collections/vec_deque/mod.rs
index 9a2205420a1..e4b28204158 100644
--- a/library/alloc/src/collections/vec_deque/mod.rs
+++ b/library/alloc/src/collections/vec_deque/mod.rs
@@ -2129,16 +2129,32 @@ impl<T, A: Allocator> VecDeque<T, A> {
         F: FnMut(&T) -> bool,
     {
         let len = self.len();
-        let mut del = 0;
-        for i in 0..len {
-            if !f(&self[i]) {
-                del += 1;
-            } else if del > 0 {
-                self.swap(i - del, i);
+        let mut idx = 0;
+        let mut cur = 0;
+
+        // Stage 1: All values are retained.
+        while cur < len {
+            if !f(&self[cur]) {
+                cur += 1;
+                break;
             }
+            cur += 1;
+            idx += 1;
         }
-        if del > 0 {
-            self.truncate(len - del);
+        // Stage 2: Swap retained value into current idx.
+        while cur < len {
+            if !f(&self[cur]) {
+                cur += 1;
+                continue;
+            }
+
+            self.swap(idx, cur);
+            cur += 1;
+            idx += 1;
+        }
+        // Stage 3: Trancate all values after idx.
+        if cur != idx {
+            self.truncate(idx);
         }
     }
 
diff --git a/library/alloc/src/collections/vec_deque/tests.rs b/library/alloc/src/collections/vec_deque/tests.rs
index 6116cfe1d01..56257e43462 100644
--- a/library/alloc/src/collections/vec_deque/tests.rs
+++ b/library/alloc/src/collections/vec_deque/tests.rs
@@ -42,6 +42,39 @@ fn bench_pop_back_100(b: &mut test::Bencher) {
 
 #[bench]
 #[cfg_attr(miri, ignore)] // isolated Miri does not support benchmarks
+fn bench_retain_whole_10000(b: &mut test::Bencher) {
+    let v = (1..100000).collect::<VecDeque<u32>>();
+
+    b.iter(|| {
+        let mut v = v.clone();
+        v.retain(|x| *x > 0)
+    })
+}
+
+#[bench]
+#[cfg_attr(miri, ignore)] // isolated Miri does not support benchmarks
+fn bench_retain_odd_10000(b: &mut test::Bencher) {
+    let v = (1..100000).collect::<VecDeque<u32>>();
+
+    b.iter(|| {
+        let mut v = v.clone();
+        v.retain(|x| x & 1 == 0)
+    })
+}
+
+#[bench]
+#[cfg_attr(miri, ignore)] // isolated Miri does not support benchmarks
+fn bench_retain_half_10000(b: &mut test::Bencher) {
+    let v = (1..100000).collect::<VecDeque<u32>>();
+
+    b.iter(|| {
+        let mut v = v.clone();
+        v.retain(|x| *x > 50000)
+    })
+}
+
+#[bench]
+#[cfg_attr(miri, ignore)] // isolated Miri does not support benchmarks
 fn bench_pop_front_100(b: &mut test::Bencher) {
     let mut deq = VecDeque::<i32>::with_capacity(101);