diff options
| author | bors <bors@rust-lang.org> | 2021-02-11 04:40:57 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-02-11 04:40:57 +0000 |
| commit | 1efd8049837aec86f21da1486e8a459a7efd7806 (patch) | |
| tree | 8f8c3ebfed31260b12470da6431a96231e39f7aa /library/alloc/src/vec | |
| parent | 9ce7268bcfc17265bd05e4c08713d170d39618ad (diff) | |
| parent | 2a11c57fb04d68c261dc14f6cf1aa3f8f14f0a2e (diff) | |
| download | rust-1efd8049837aec86f21da1486e8a459a7efd7806.tar.gz rust-1efd8049837aec86f21da1486e8a459a7efd7806.zip | |
Auto merge of #81126 - oxalica:retain-early-drop, r=m-ou-se
Optimize Vec::retain Use `copy_non_overlapping` instead of `swap` to reduce memory writes, like what we've done in #44355 and `String::retain`. #48065 already tried to do this optimization but it is reverted in #67300 due to bad codegen of `DrainFilter::drop`. This PR re-implement the drop-then-move approach. I did a [benchmark](https://gist.github.com/oxalica/3360eec9376f22533fcecff02798b698) on small-no-drop, small-need-drop, large-no-drop elements with different predicate functions. It turns out that the new implementation is >20% faster in average for almost all cases. Only 2/24 cases are slower by 3% and 5%. See the link above for more detail. I think regression in may-panic cases is due to drop-guard preventing some optimization. If it's permitted to leak elements when predicate function of element's `drop` panic, the new implementation should be almost always faster than current one. I'm not sure if we should leak on panic, since there is indeed an issue (#52267) complains about it before.
Diffstat (limited to 'library/alloc/src/vec')
| -rw-r--r-- | library/alloc/src/vec/mod.rs | 75 |
1 files changed, 64 insertions, 11 deletions
diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index c7b4c98041b..b40c1a8c57a 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1399,22 +1399,75 @@ impl<T, A: Allocator> Vec<T, A> { where F: FnMut(&T) -> bool, { - let len = self.len(); - let mut del = 0; - { - let v = &mut **self; + let original_len = self.len(); + // Avoid double drop if the drop guard is not executed, + // since we may make some holes during the process. + unsafe { self.set_len(0) }; + + // Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked] + // |<- processed len ->| ^- next to check + // |<- deleted cnt ->| + // |<- original_len ->| + // Kept: Elements which predicate returns true on. + // Hole: Moved or dropped element slot. + // Unchecked: Unchecked valid elements. + // + // This drop guard will be invoked when predicate or `drop` of element panicked. + // It shifts unchecked elements to cover holes and `set_len` to the correct length. + // In cases when predicate and `drop` never panick, it will be optimized out. + struct BackshiftOnDrop<'a, T, A: Allocator> { + v: &'a mut Vec<T, A>, + processed_len: usize, + deleted_cnt: usize, + original_len: usize, + } - for i in 0..len { - if !f(&v[i]) { - del += 1; - } else if del > 0 { - v.swap(i - del, i); + impl<T, A: Allocator> Drop for BackshiftOnDrop<'_, T, A> { + fn drop(&mut self) { + if self.deleted_cnt > 0 { + // SAFETY: Trailing unchecked items must be valid since we never touch them. + unsafe { + ptr::copy( + self.v.as_ptr().add(self.processed_len), + self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt), + self.original_len - self.processed_len, + ); + } + } + // SAFETY: After filling holes, all items are in contiguous memory. + unsafe { + self.v.set_len(self.original_len - self.deleted_cnt); } } } - if del > 0 { - self.truncate(len - del); + + let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len }; + + while g.processed_len < original_len { + // SAFETY: Unchecked element must be valid. + let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) }; + if !f(cur) { + // Advance early to avoid double drop if `drop_in_place` panicked. + g.processed_len += 1; + g.deleted_cnt += 1; + // SAFETY: We never touch this element again after dropped. + unsafe { ptr::drop_in_place(cur) }; + // We already advanced the counter. + continue; + } + if g.deleted_cnt > 0 { + // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element. + // We use copy for move, and never touch this element again. + unsafe { + let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt); + ptr::copy_nonoverlapping(cur, hole_slot, 1); + } + } + g.processed_len += 1; } + + // All item are processed. This can be optimized to `set_len` by LLVM. + drop(g); } /// Removes all but the first of consecutive elements in the vector that resolve to the same |
