about summary refs log tree commit diff
path: root/library/alloc/src
diff options
context:
space:
mode:
authorDylan DPC <dylan.dpc@gmail.com>2021-03-18 00:28:04 +0100
committerGitHub <noreply@github.com>2021-03-18 00:28:04 +0100
commit90797ef008a2004e70ff0106c756f24ea63ab236 (patch)
treee8cb8a5d1ee1b6ef5fc05df529e620062ebf1598 /library/alloc/src
parent36f1f04f18b89ba4a999bcfd6584663fd6fc1c5d (diff)
parentb0092bc995fa3e6633c3aaa1d0a56006ab7ad1e3 (diff)
downloadrust-90797ef008a2004e70ff0106c756f24ea63ab236.tar.gz
rust-90797ef008a2004e70ff0106c756f24ea63ab236.zip
Rollup merge of #82191 - Soveu:dedup, r=nagisa
Vec::dedup_by optimization

Now `Vec::dedup_by` drops items in-place as it goes through them.
From my benchmarks, it is around 10% faster when T is small, with no major regression when otherwise.

I used `ptr::copy` instead of conditional `ptr::copy_nonoverlapping`, because the latter had some weird performance issues on my ryzen laptop (it was 50% slower on it than on intel/sandybridge laptop)
It would be good if someone was able to reproduce these results.
Diffstat (limited to 'library/alloc/src')
-rw-r--r--library/alloc/src/vec/mod.rs95
1 files changed, 89 insertions, 6 deletions
diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs
index 9731a8e1d1d..135279874bb 100644
--- a/library/alloc/src/vec/mod.rs
+++ b/library/alloc/src/vec/mod.rs
@@ -1512,15 +1512,98 @@ impl<T, A: Allocator> Vec<T, A> {
     /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
     /// ```
     #[stable(feature = "dedup_by", since = "1.16.0")]
-    pub fn dedup_by<F>(&mut self, same_bucket: F)
+    pub fn dedup_by<F>(&mut self, mut same_bucket: F)
     where
         F: FnMut(&mut T, &mut T) -> bool,
     {
-        let len = {
-            let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket);
-            dedup.len()
-        };
-        self.truncate(len);
+        let len = self.len();
+        if len <= 1 {
+            return;
+        }
+
+        /* INVARIANT: vec.len() > read >= write > write-1 >= 0 */
+        struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> {
+            /* Offset of the element we want to check if it is duplicate */
+            read: usize,
+
+            /* Offset of the place where we want to place the non-duplicate
+             * when we find it. */
+            write: usize,
+
+            /* The Vec that would need correction if `same_bucket` panicked */
+            vec: &'a mut Vec<T, A>,
+        }
+
+        impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> {
+            fn drop(&mut self) {
+                /* This code gets executed when `same_bucket` panics */
+
+                /* SAFETY: invariant guarantees that `read - write`
+                 * and `len - read` never overflow and that the copy is always
+                 * in-bounds. */
+                unsafe {
+                    let ptr = self.vec.as_mut_ptr();
+                    let len = self.vec.len();
+
+                    /* How many items were left when `same_bucket` paniced.
+                     * Basically vec[read..].len() */
+                    let items_left = len.wrapping_sub(self.read);
+
+                    /* Pointer to first item in vec[write..write+items_left] slice */
+                    let dropped_ptr = ptr.add(self.write);
+                    /* Pointer to first item in vec[read..] slice */
+                    let valid_ptr = ptr.add(self.read);
+
+                    /* Copy `vec[read..]` to `vec[write..write+items_left]`.
+                     * The slices can overlap, so `copy_nonoverlapping` cannot be used */
+                    ptr::copy(valid_ptr, dropped_ptr, items_left);
+
+                    /* How many items have been already dropped
+                     * Basically vec[read..write].len() */
+                    let dropped = self.read.wrapping_sub(self.write);
+
+                    self.vec.set_len(len - dropped);
+                }
+            }
+        }
+
+        let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self };
+        let ptr = gap.vec.as_mut_ptr();
+
+        /* Drop items while going through Vec, it should be more efficient than
+         * doing slice partition_dedup + truncate */
+
+        /* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr
+         * are always in-bounds and read_ptr never aliases prev_ptr */
+        unsafe {
+            while gap.read < len {
+                let read_ptr = ptr.add(gap.read);
+                let prev_ptr = ptr.add(gap.write.wrapping_sub(1));
+
+                if same_bucket(&mut *read_ptr, &mut *prev_ptr) {
+                    /* We have found duplicate, drop it in-place */
+                    ptr::drop_in_place(read_ptr);
+                } else {
+                    let write_ptr = ptr.add(gap.write);
+
+                    /* Because `read_ptr` can be equal to `write_ptr`, we either
+                     * have to use `copy` or conditional `copy_nonoverlapping`.
+                     * Looks like the first option is faster. */
+                    ptr::copy(read_ptr, write_ptr, 1);
+
+                    /* We have filled that place, so go further */
+                    gap.write += 1;
+                }
+
+                gap.read += 1;
+            }
+
+            /* Technically we could let `gap` clean up with its Drop, but
+             * when `same_bucket` is guaranteed to not panic, this bloats a little
+             * the codegen, so we just do it manually */
+            gap.vec.set_len(gap.write);
+            mem::forget(gap);
+        }
     }
 
     /// Appends an element to the back of a collection.