about summary refs log tree commit diff
path: root/library/alloc
diff options
context:
space:
mode:
authorDylan DPC <dylan.dpc@gmail.com>2021-03-18 00:28:04 +0100
committerGitHub <noreply@github.com>2021-03-18 00:28:04 +0100
commit90797ef008a2004e70ff0106c756f24ea63ab236 (patch)
treee8cb8a5d1ee1b6ef5fc05df529e620062ebf1598 /library/alloc
parent36f1f04f18b89ba4a999bcfd6584663fd6fc1c5d (diff)
parentb0092bc995fa3e6633c3aaa1d0a56006ab7ad1e3 (diff)
downloadrust-90797ef008a2004e70ff0106c756f24ea63ab236.tar.gz
rust-90797ef008a2004e70ff0106c756f24ea63ab236.zip
Rollup merge of #82191 - Soveu:dedup, r=nagisa
Vec::dedup_by optimization

Now `Vec::dedup_by` drops items in-place as it goes through them.
From my benchmarks, it is around 10% faster when T is small, with no major regression when otherwise.

I used `ptr::copy` instead of conditional `ptr::copy_nonoverlapping`, because the latter had some weird performance issues on my ryzen laptop (it was 50% slower on it than on intel/sandybridge laptop)
It would be good if someone was able to reproduce these results.
Diffstat (limited to 'library/alloc')
-rw-r--r--library/alloc/benches/lib.rs1
-rw-r--r--library/alloc/benches/vec.rs89
-rw-r--r--library/alloc/src/vec/mod.rs95
-rw-r--r--library/alloc/tests/lib.rs1
-rw-r--r--library/alloc/tests/vec.rs126
5 files changed, 306 insertions, 6 deletions
diff --git a/library/alloc/benches/lib.rs b/library/alloc/benches/lib.rs
index 32edb86d101..38a8f65f169 100644
--- a/library/alloc/benches/lib.rs
+++ b/library/alloc/benches/lib.rs
@@ -4,6 +4,7 @@
 #![feature(btree_drain_filter)]
 #![feature(map_first_last)]
 #![feature(repr_simd)]
+#![feature(slice_partition_dedup)]
 #![feature(test)]
 
 extern crate test;
diff --git a/library/alloc/benches/vec.rs b/library/alloc/benches/vec.rs
index 89893b6209c..73eb353f6e7 100644
--- a/library/alloc/benches/vec.rs
+++ b/library/alloc/benches/vec.rs
@@ -671,3 +671,92 @@ fn bench_map_fast(b: &mut Bencher) {
     let data = black_box([(0, 0); LEN]);
     b.iter(|| map_fast(&data));
 }
+
+fn random_sorted_fill(mut seed: u32, buf: &mut [u32]) {
+    let mask = if buf.len() < 8192 {
+        0xFF
+    } else if buf.len() < 200_000 {
+        0xFFFF
+    } else {
+        0xFFFF_FFFF
+    };
+
+    for item in buf.iter_mut() {
+        seed ^= seed << 13;
+        seed ^= seed >> 17;
+        seed ^= seed << 5;
+
+        *item = seed & mask;
+    }
+
+    buf.sort();
+}
+
+fn bench_vec_dedup_old(b: &mut Bencher, sz: usize) {
+    let mut template = vec![0u32; sz];
+    b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
+    random_sorted_fill(0x43, &mut template);
+
+    let mut vec = template.clone();
+    b.iter(|| {
+        let len = {
+            let (dedup, _) = vec.partition_dedup();
+            dedup.len()
+        };
+        vec.truncate(len);
+
+        black_box(vec.first());
+        vec.clear();
+        vec.extend_from_slice(&template);
+    });
+}
+
+fn bench_vec_dedup_new(b: &mut Bencher, sz: usize) {
+    let mut template = vec![0u32; sz];
+    b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
+    random_sorted_fill(0x43, &mut template);
+
+    let mut vec = template.clone();
+    b.iter(|| {
+        vec.dedup();
+        black_box(vec.first());
+        vec.clear();
+        vec.extend_from_slice(&template);
+    });
+}
+
+#[bench]
+fn bench_dedup_old_100(b: &mut Bencher) {
+    bench_vec_dedup_old(b, 100);
+}
+#[bench]
+fn bench_dedup_new_100(b: &mut Bencher) {
+    bench_vec_dedup_new(b, 100);
+}
+
+#[bench]
+fn bench_dedup_old_1000(b: &mut Bencher) {
+    bench_vec_dedup_old(b, 1000);
+}
+#[bench]
+fn bench_dedup_new_1000(b: &mut Bencher) {
+    bench_vec_dedup_new(b, 1000);
+}
+
+#[bench]
+fn bench_dedup_old_10000(b: &mut Bencher) {
+    bench_vec_dedup_old(b, 10000);
+}
+#[bench]
+fn bench_dedup_new_10000(b: &mut Bencher) {
+    bench_vec_dedup_new(b, 10000);
+}
+
+#[bench]
+fn bench_dedup_old_100000(b: &mut Bencher) {
+    bench_vec_dedup_old(b, 100000);
+}
+#[bench]
+fn bench_dedup_new_100000(b: &mut Bencher) {
+    bench_vec_dedup_new(b, 100000);
+}
diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs
index 9731a8e1d1d..135279874bb 100644
--- a/library/alloc/src/vec/mod.rs
+++ b/library/alloc/src/vec/mod.rs
@@ -1512,15 +1512,98 @@ impl<T, A: Allocator> Vec<T, A> {
     /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
     /// ```
     #[stable(feature = "dedup_by", since = "1.16.0")]
-    pub fn dedup_by<F>(&mut self, same_bucket: F)
+    pub fn dedup_by<F>(&mut self, mut same_bucket: F)
     where
         F: FnMut(&mut T, &mut T) -> bool,
     {
-        let len = {
-            let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket);
-            dedup.len()
-        };
-        self.truncate(len);
+        let len = self.len();
+        if len <= 1 {
+            return;
+        }
+
+        /* INVARIANT: vec.len() > read >= write > write-1 >= 0 */
+        struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> {
+            /* Offset of the element we want to check if it is duplicate */
+            read: usize,
+
+            /* Offset of the place where we want to place the non-duplicate
+             * when we find it. */
+            write: usize,
+
+            /* The Vec that would need correction if `same_bucket` panicked */
+            vec: &'a mut Vec<T, A>,
+        }
+
+        impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> {
+            fn drop(&mut self) {
+                /* This code gets executed when `same_bucket` panics */
+
+                /* SAFETY: invariant guarantees that `read - write`
+                 * and `len - read` never overflow and that the copy is always
+                 * in-bounds. */
+                unsafe {
+                    let ptr = self.vec.as_mut_ptr();
+                    let len = self.vec.len();
+
+                    /* How many items were left when `same_bucket` paniced.
+                     * Basically vec[read..].len() */
+                    let items_left = len.wrapping_sub(self.read);
+
+                    /* Pointer to first item in vec[write..write+items_left] slice */
+                    let dropped_ptr = ptr.add(self.write);
+                    /* Pointer to first item in vec[read..] slice */
+                    let valid_ptr = ptr.add(self.read);
+
+                    /* Copy `vec[read..]` to `vec[write..write+items_left]`.
+                     * The slices can overlap, so `copy_nonoverlapping` cannot be used */
+                    ptr::copy(valid_ptr, dropped_ptr, items_left);
+
+                    /* How many items have been already dropped
+                     * Basically vec[read..write].len() */
+                    let dropped = self.read.wrapping_sub(self.write);
+
+                    self.vec.set_len(len - dropped);
+                }
+            }
+        }
+
+        let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self };
+        let ptr = gap.vec.as_mut_ptr();
+
+        /* Drop items while going through Vec, it should be more efficient than
+         * doing slice partition_dedup + truncate */
+
+        /* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr
+         * are always in-bounds and read_ptr never aliases prev_ptr */
+        unsafe {
+            while gap.read < len {
+                let read_ptr = ptr.add(gap.read);
+                let prev_ptr = ptr.add(gap.write.wrapping_sub(1));
+
+                if same_bucket(&mut *read_ptr, &mut *prev_ptr) {
+                    /* We have found duplicate, drop it in-place */
+                    ptr::drop_in_place(read_ptr);
+                } else {
+                    let write_ptr = ptr.add(gap.write);
+
+                    /* Because `read_ptr` can be equal to `write_ptr`, we either
+                     * have to use `copy` or conditional `copy_nonoverlapping`.
+                     * Looks like the first option is faster. */
+                    ptr::copy(read_ptr, write_ptr, 1);
+
+                    /* We have filled that place, so go further */
+                    gap.write += 1;
+                }
+
+                gap.read += 1;
+            }
+
+            /* Technically we could let `gap` clean up with its Drop, but
+             * when `same_bucket` is guaranteed to not panic, this bloats a little
+             * the codegen, so we just do it manually */
+            gap.vec.set_len(gap.write);
+            mem::forget(gap);
+        }
     }
 
     /// Appends an element to the back of a collection.
diff --git a/library/alloc/tests/lib.rs b/library/alloc/tests/lib.rs
index 799499b9b77..11673ed8262 100644
--- a/library/alloc/tests/lib.rs
+++ b/library/alloc/tests/lib.rs
@@ -19,6 +19,7 @@
 #![feature(int_bits_const)]
 #![feature(vecdeque_binary_search)]
 #![feature(slice_group_by)]
+#![feature(slice_partition_dedup)]
 #![feature(vec_extend_from_within)]
 #![feature(vec_spare_capacity)]
 
diff --git a/library/alloc/tests/vec.rs b/library/alloc/tests/vec.rs
index 1ba2315ca73..c142536cd2d 100644
--- a/library/alloc/tests/vec.rs
+++ b/library/alloc/tests/vec.rs
@@ -2102,6 +2102,132 @@ fn test_extend_from_within() {
     assert_eq!(v, ["a", "b", "c", "b", "c", "a", "b"]);
 }
 
+#[test]
+fn test_vec_dedup_by() {
+    let mut vec: Vec<i32> = vec![1, -1, 2, 3, 1, -5, 5, -2, 2];
+
+    vec.dedup_by(|a, b| a.abs() == b.abs());
+
+    assert_eq!(vec, [1, 2, 3, 1, -5, -2]);
+}
+
+#[test]
+fn test_vec_dedup_empty() {
+    let mut vec: Vec<i32> = Vec::new();
+
+    vec.dedup();
+
+    assert_eq!(vec, []);
+}
+
+#[test]
+fn test_vec_dedup_one() {
+    let mut vec = vec![12i32];
+
+    vec.dedup();
+
+    assert_eq!(vec, [12]);
+}
+
+#[test]
+fn test_vec_dedup_multiple_ident() {
+    let mut vec = vec![12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11];
+
+    vec.dedup();
+
+    assert_eq!(vec, [12, 11]);
+}
+
+#[test]
+fn test_vec_dedup_partialeq() {
+    #[derive(Debug)]
+    struct Foo(i32, i32);
+
+    impl PartialEq for Foo {
+        fn eq(&self, other: &Foo) -> bool {
+            self.0 == other.0
+        }
+    }
+
+    let mut vec = vec![Foo(0, 1), Foo(0, 5), Foo(1, 7), Foo(1, 9)];
+
+    vec.dedup();
+    assert_eq!(vec, [Foo(0, 1), Foo(1, 7)]);
+}
+
+#[test]
+fn test_vec_dedup() {
+    let mut vec: Vec<bool> = Vec::with_capacity(8);
+    let mut template = vec.clone();
+
+    for x in 0u8..255u8 {
+        vec.clear();
+        template.clear();
+
+        let iter = (0..8).map(move |bit| (x >> bit) & 1 == 1);
+        vec.extend(iter);
+        template.extend_from_slice(&vec);
+
+        let (dedup, _) = template.partition_dedup();
+        vec.dedup();
+
+        assert_eq!(vec, dedup);
+    }
+}
+
+#[test]
+fn test_vec_dedup_panicking() {
+    #[derive(Debug)]
+    struct Panic {
+        drop_counter: &'static AtomicU32,
+        value: bool,
+        index: usize,
+    }
+
+    impl PartialEq for Panic {
+        fn eq(&self, other: &Self) -> bool {
+            self.value == other.value
+        }
+    }
+
+    impl Drop for Panic {
+        fn drop(&mut self) {
+            let x = self.drop_counter.fetch_add(1, Ordering::SeqCst);
+            assert!(x != 4);
+        }
+    }
+
+    static DROP_COUNTER: AtomicU32 = AtomicU32::new(0);
+    let expected = [
+        Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 },
+        Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 },
+        Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 },
+        Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 },
+    ];
+    let mut vec = vec![
+        Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 },
+        // these elements get deduplicated
+        Panic { drop_counter: &DROP_COUNTER, value: false, index: 1 },
+        Panic { drop_counter: &DROP_COUNTER, value: false, index: 2 },
+        Panic { drop_counter: &DROP_COUNTER, value: false, index: 3 },
+        Panic { drop_counter: &DROP_COUNTER, value: false, index: 4 },
+        // here it panics
+        Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 },
+        Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 },
+        Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 },
+    ];
+
+    let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+        vec.dedup();
+    }));
+
+    let ok = vec.iter().zip(expected.iter()).all(|(x, y)| x.index == y.index);
+
+    if !ok {
+        panic!("expected: {:?}\ngot: {:?}\n", expected, vec);
+    }
+}
+
 // Regression test for issue #82533
 #[test]
 fn test_extend_from_within_panicing_clone() {