about summary refs log tree commit diff
path: root/library/alloc/src/vec
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-02-11 04:40:57 +0000
committerbors <bors@rust-lang.org>2021-02-11 04:40:57 +0000
commit1efd8049837aec86f21da1486e8a459a7efd7806 (patch)
tree8f8c3ebfed31260b12470da6431a96231e39f7aa /library/alloc/src/vec
parent9ce7268bcfc17265bd05e4c08713d170d39618ad (diff)
parent2a11c57fb04d68c261dc14f6cf1aa3f8f14f0a2e (diff)
downloadrust-1efd8049837aec86f21da1486e8a459a7efd7806.tar.gz
rust-1efd8049837aec86f21da1486e8a459a7efd7806.zip
Auto merge of #81126 - oxalica:retain-early-drop, r=m-ou-se
Optimize Vec::retain

Use `copy_non_overlapping` instead of `swap` to reduce memory writes, like what we've done in #44355 and `String::retain`.
#48065 already tried to do this optimization but it is reverted in #67300 due to bad codegen of `DrainFilter::drop`.

This PR re-implement the drop-then-move approach. I did a [benchmark](https://gist.github.com/oxalica/3360eec9376f22533fcecff02798b698) on small-no-drop, small-need-drop, large-no-drop elements with different predicate functions. It turns out that the new implementation is >20% faster in average for almost all cases. Only 2/24 cases are slower by 3% and 5%. See the link above for more detail.

I think regression in may-panic cases is due to drop-guard preventing some optimization. If it's permitted to leak elements when predicate function of element's `drop` panic, the new implementation should be almost always faster than current one.
I'm not sure if we should leak on panic, since there is indeed an issue (#52267) complains about it before.
Diffstat (limited to 'library/alloc/src/vec')
-rw-r--r--library/alloc/src/vec/mod.rs75
1 files changed, 64 insertions, 11 deletions
diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs
index c7b4c98041b..b40c1a8c57a 100644
--- a/library/alloc/src/vec/mod.rs
+++ b/library/alloc/src/vec/mod.rs
@@ -1399,22 +1399,75 @@ impl<T, A: Allocator> Vec<T, A> {
     where
         F: FnMut(&T) -> bool,
     {
-        let len = self.len();
-        let mut del = 0;
-        {
-            let v = &mut **self;
+        let original_len = self.len();
+        // Avoid double drop if the drop guard is not executed,
+        // since we may make some holes during the process.
+        unsafe { self.set_len(0) };
+
+        // Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked]
+        //      |<-              processed len   ->| ^- next to check
+        //                  |<-  deleted cnt     ->|
+        //      |<-              original_len                          ->|
+        // Kept: Elements which predicate returns true on.
+        // Hole: Moved or dropped element slot.
+        // Unchecked: Unchecked valid elements.
+        //
+        // This drop guard will be invoked when predicate or `drop` of element panicked.
+        // It shifts unchecked elements to cover holes and `set_len` to the correct length.
+        // In cases when predicate and `drop` never panick, it will be optimized out.
+        struct BackshiftOnDrop<'a, T, A: Allocator> {
+            v: &'a mut Vec<T, A>,
+            processed_len: usize,
+            deleted_cnt: usize,
+            original_len: usize,
+        }
 
-            for i in 0..len {
-                if !f(&v[i]) {
-                    del += 1;
-                } else if del > 0 {
-                    v.swap(i - del, i);
+        impl<T, A: Allocator> Drop for BackshiftOnDrop<'_, T, A> {
+            fn drop(&mut self) {
+                if self.deleted_cnt > 0 {
+                    // SAFETY: Trailing unchecked items must be valid since we never touch them.
+                    unsafe {
+                        ptr::copy(
+                            self.v.as_ptr().add(self.processed_len),
+                            self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt),
+                            self.original_len - self.processed_len,
+                        );
+                    }
+                }
+                // SAFETY: After filling holes, all items are in contiguous memory.
+                unsafe {
+                    self.v.set_len(self.original_len - self.deleted_cnt);
                 }
             }
         }
-        if del > 0 {
-            self.truncate(len - del);
+
+        let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };
+
+        while g.processed_len < original_len {
+            // SAFETY: Unchecked element must be valid.
+            let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
+            if !f(cur) {
+                // Advance early to avoid double drop if `drop_in_place` panicked.
+                g.processed_len += 1;
+                g.deleted_cnt += 1;
+                // SAFETY: We never touch this element again after dropped.
+                unsafe { ptr::drop_in_place(cur) };
+                // We already advanced the counter.
+                continue;
+            }
+            if g.deleted_cnt > 0 {
+                // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
+                // We use copy for move, and never touch this element again.
+                unsafe {
+                    let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
+                    ptr::copy_nonoverlapping(cur, hole_slot, 1);
+                }
+            }
+            g.processed_len += 1;
         }
+
+        // All item are processed. This can be optimized to `set_len` by LLVM.
+        drop(g);
     }
 
     /// Removes all but the first of consecutive elements in the vector that resolve to the same