diff options
| author | bors <bors@rust-lang.org> | 2021-12-16 07:58:36 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-12-16 07:58:36 +0000 |
| commit | a090c8659c3be0cbc7dc93c4b2c11a9cdbf8b980 (patch) | |
| tree | 51187e7cb6ee1c308e65f0ea8494c212091fa39f | |
| parent | 9e1aff82e696c0edb568656ae6b509a9ab4d6c92 (diff) | |
| parent | 67180ef056b76f5d90c2164895adbe88fa056332 (diff) | |
| download | rust-a090c8659c3be0cbc7dc93c4b2c11a9cdbf8b980.tar.gz rust-a090c8659c3be0cbc7dc93c4b2c11a9cdbf8b980.zip | |
Auto merge of #91527 - the8472:retain-opt, r=dtolnay
Optimize `vec::retain` performance This simply moves the loops into the inner function which leads to better results. ``` old: test vec::bench_retain_100000 ... bench: 203,828 ns/iter (+/- 2,101) test vec::bench_retain_iter_100000 ... bench: 63,324 ns/iter (+/- 12,305) test vec::bench_retain_whole_100000 ... bench: 42,989 ns/iter (+/- 291) new: test vec::bench_retain_100000 ... bench: 42,180 ns/iter (+/- 451) test vec::bench_retain_iter_100000 ... bench: 65,167 ns/iter (+/- 11,971) test vec::bench_retain_whole_100000 ... bench: 33,736 ns/iter (+/- 12,404) ``` Measured on x86_64-unknown-linux-gnu, Zen2 Fixes #91497
| -rw-r--r-- | library/alloc/benches/vec.rs | 19 | ||||
| -rw-r--r-- | library/alloc/src/vec/mod.rs | 61 |
2 files changed, 46 insertions, 34 deletions
diff --git a/library/alloc/benches/vec.rs b/library/alloc/benches/vec.rs index 8e1d374b5d4..0da4886278e 100644 --- a/library/alloc/benches/vec.rs +++ b/library/alloc/benches/vec.rs @@ -733,11 +733,26 @@ fn bench_flat_map_collect(b: &mut Bencher) { b.iter(|| v.iter().flat_map(|color| color.rotate_left(8).to_be_bytes()).collect::<Vec<_>>()); } +/// Reference benchmark that `retain` has to compete with. +#[bench] +fn bench_retain_iter_100000(b: &mut Bencher) { + let mut v = Vec::with_capacity(100000); + + b.iter(|| { + let mut tmp = std::mem::take(&mut v); + tmp.clear(); + tmp.extend(black_box(1..=100000)); + v = tmp.into_iter().filter(|x| x & 1 == 0).collect(); + }); +} + #[bench] fn bench_retain_100000(b: &mut Bencher) { - let v = (1..=100000).collect::<Vec<u32>>(); + let mut v = Vec::with_capacity(100000); + b.iter(|| { - let mut v = v.clone(); + v.clear(); + v.extend(black_box(1..=100000)); v.retain(|x| x & 1 == 0) }); } diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index b6b11b75c99..d24b4bdffde 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1520,49 +1520,46 @@ impl<T, A: Allocator> Vec<T, A> { let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len }; - // process_one return a bool indicates whether the processing element should be retained. - #[inline(always)] - fn process_one<F, T, A: Allocator, const DELETED: bool>( + fn process_loop<F, T, A: Allocator, const DELETED: bool>( + original_len: usize, f: &mut F, g: &mut BackshiftOnDrop<'_, T, A>, - ) -> bool - where + ) where F: FnMut(&mut T) -> bool, { - // SAFETY: Unchecked element must be valid. - let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) }; - if !f(cur) { - // Advance early to avoid double drop if `drop_in_place` panicked. - g.processed_len += 1; - g.deleted_cnt += 1; - // SAFETY: We never touch this element again after dropped. - unsafe { ptr::drop_in_place(cur) }; - // We already advanced the counter. - return false; - } - if DELETED { - // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element. - // We use copy for move, and never touch this element again. - unsafe { - let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt); - ptr::copy_nonoverlapping(cur, hole_slot, 1); + while g.processed_len != original_len { + // SAFETY: Unchecked element must be valid. + let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) }; + if !f(cur) { + // Advance early to avoid double drop if `drop_in_place` panicked. + g.processed_len += 1; + g.deleted_cnt += 1; + // SAFETY: We never touch this element again after dropped. + unsafe { ptr::drop_in_place(cur) }; + // We already advanced the counter. + if DELETED { + continue; + } else { + break; + } + } + if DELETED { + // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element. + // We use copy for move, and never touch this element again. + unsafe { + let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt); + ptr::copy_nonoverlapping(cur, hole_slot, 1); + } } + g.processed_len += 1; } - g.processed_len += 1; - return true; } // Stage 1: Nothing was deleted. - while g.processed_len != original_len { - if !process_one::<F, T, A, false>(&mut f, &mut g) { - break; - } - } + process_loop::<F, T, A, false>(original_len, &mut f, &mut g); // Stage 2: Some elements were deleted. - while g.processed_len != original_len { - process_one::<F, T, A, true>(&mut f, &mut g); - } + process_loop::<F, T, A, true>(original_len, &mut f, &mut g); // All item are processed. This can be optimized to `set_len` by LLVM. drop(g); |
