diff options
| author | bors <bors@rust-lang.org> | 2021-08-21 23:35:54 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-08-21 23:35:54 +0000 |
| commit | 9faa714154dbc03faa174a7d4f72d6bbbfd61f7c (patch) | |
| tree | 1777530d1b9e9c53d01d6025aee0e8f4800763d2 | |
| parent | d3e2578c31688619ddc0a10ddf8543bf4ebcba5b (diff) | |
| parent | e32f4c06d34ecb4b55b678bbff35d8a77f81cf16 (diff) | |
| download | rust-9faa714154dbc03faa174a7d4f72d6bbbfd61f7c.tar.gz rust-9faa714154dbc03faa174a7d4f72d6bbbfd61f7c.zip | |
Auto merge of #88075 - Xuanwo:vec_deque_retain, r=dtolnay
Optimize unnecessary check in VecDeque::retain This pr is highly inspired by https://github.com/rust-lang/rust/pull/88060 which shared the same idea: we can split the `for` loop into stages so that we can remove unnecessary checks like `del > 0`. ## Benchmarks Before ```rust test collections::vec_deque::tests::bench_retain_half_10000 ... bench: 290,125 ns/iter (+/- 8,717) test collections::vec_deque::tests::bench_retain_odd_10000 ... bench: 291,588 ns/iter (+/- 9,621) test collections::vec_deque::tests::bench_retain_whole_10000 ... bench: 287,426 ns/iter (+/- 9,009) ``` After ```rust test collections::vec_deque::tests::bench_retain_half_10000 ... bench: 243,940 ns/iter (+/- 8,563) test collections::vec_deque::tests::bench_retain_odd_10000 ... bench: 242,768 ns/iter (+/- 3,903) test collections::vec_deque::tests::bench_retain_whole_10000 ... bench: 202,926 ns/iter (+/- 6,332) ``` Based on the current benchmark, this PR will improve the perf of `VecDeque::retain` by around 16%. For special cases, the improvement will be up to 30%. Signed-off-by: Xuanwo <github@xuanwo.io>
| -rw-r--r-- | library/alloc/src/collections/vec_deque/mod.rs | 32 | ||||
| -rw-r--r-- | library/alloc/src/collections/vec_deque/tests.rs | 33 |
2 files changed, 57 insertions, 8 deletions
diff --git a/library/alloc/src/collections/vec_deque/mod.rs b/library/alloc/src/collections/vec_deque/mod.rs index 9a2205420a1..e4b28204158 100644 --- a/library/alloc/src/collections/vec_deque/mod.rs +++ b/library/alloc/src/collections/vec_deque/mod.rs @@ -2129,16 +2129,32 @@ impl<T, A: Allocator> VecDeque<T, A> { F: FnMut(&T) -> bool, { let len = self.len(); - let mut del = 0; - for i in 0..len { - if !f(&self[i]) { - del += 1; - } else if del > 0 { - self.swap(i - del, i); + let mut idx = 0; + let mut cur = 0; + + // Stage 1: All values are retained. + while cur < len { + if !f(&self[cur]) { + cur += 1; + break; } + cur += 1; + idx += 1; } - if del > 0 { - self.truncate(len - del); + // Stage 2: Swap retained value into current idx. + while cur < len { + if !f(&self[cur]) { + cur += 1; + continue; + } + + self.swap(idx, cur); + cur += 1; + idx += 1; + } + // Stage 3: Trancate all values after idx. + if cur != idx { + self.truncate(idx); } } diff --git a/library/alloc/src/collections/vec_deque/tests.rs b/library/alloc/src/collections/vec_deque/tests.rs index 6116cfe1d01..56257e43462 100644 --- a/library/alloc/src/collections/vec_deque/tests.rs +++ b/library/alloc/src/collections/vec_deque/tests.rs @@ -42,6 +42,39 @@ fn bench_pop_back_100(b: &mut test::Bencher) { #[bench] #[cfg_attr(miri, ignore)] // isolated Miri does not support benchmarks +fn bench_retain_whole_10000(b: &mut test::Bencher) { + let v = (1..100000).collect::<VecDeque<u32>>(); + + b.iter(|| { + let mut v = v.clone(); + v.retain(|x| *x > 0) + }) +} + +#[bench] +#[cfg_attr(miri, ignore)] // isolated Miri does not support benchmarks +fn bench_retain_odd_10000(b: &mut test::Bencher) { + let v = (1..100000).collect::<VecDeque<u32>>(); + + b.iter(|| { + let mut v = v.clone(); + v.retain(|x| x & 1 == 0) + }) +} + +#[bench] +#[cfg_attr(miri, ignore)] // isolated Miri does not support benchmarks +fn bench_retain_half_10000(b: &mut test::Bencher) { + let v = (1..100000).collect::<VecDeque<u32>>(); + + b.iter(|| { + let mut v = v.clone(); + v.retain(|x| *x > 50000) + }) +} + +#[bench] +#[cfg_attr(miri, ignore)] // isolated Miri does not support benchmarks fn bench_pop_front_100(b: &mut test::Bencher) { let mut deq = VecDeque::<i32>::with_capacity(101); |
