diff options
| author | Dylan DPC <dylan.dpc@gmail.com> | 2021-03-18 00:28:04 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-03-18 00:28:04 +0100 |
| commit | 90797ef008a2004e70ff0106c756f24ea63ab236 (patch) | |
| tree | e8cb8a5d1ee1b6ef5fc05df529e620062ebf1598 /library/alloc/src | |
| parent | 36f1f04f18b89ba4a999bcfd6584663fd6fc1c5d (diff) | |
| parent | b0092bc995fa3e6633c3aaa1d0a56006ab7ad1e3 (diff) | |
| download | rust-90797ef008a2004e70ff0106c756f24ea63ab236.tar.gz rust-90797ef008a2004e70ff0106c756f24ea63ab236.zip | |
Rollup merge of #82191 - Soveu:dedup, r=nagisa
Vec::dedup_by optimization Now `Vec::dedup_by` drops items in-place as it goes through them. From my benchmarks, it is around 10% faster when T is small, with no major regression when otherwise. I used `ptr::copy` instead of conditional `ptr::copy_nonoverlapping`, because the latter had some weird performance issues on my ryzen laptop (it was 50% slower on it than on intel/sandybridge laptop) It would be good if someone was able to reproduce these results.
Diffstat (limited to 'library/alloc/src')
| -rw-r--r-- | library/alloc/src/vec/mod.rs | 95 |
1 files changed, 89 insertions, 6 deletions
diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 9731a8e1d1d..135279874bb 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1512,15 +1512,98 @@ impl<T, A: Allocator> Vec<T, A> { /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]); /// ``` #[stable(feature = "dedup_by", since = "1.16.0")] - pub fn dedup_by<F>(&mut self, same_bucket: F) + pub fn dedup_by<F>(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool, { - let len = { - let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket); - dedup.len() - }; - self.truncate(len); + let len = self.len(); + if len <= 1 { + return; + } + + /* INVARIANT: vec.len() > read >= write > write-1 >= 0 */ + struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> { + /* Offset of the element we want to check if it is duplicate */ + read: usize, + + /* Offset of the place where we want to place the non-duplicate + * when we find it. */ + write: usize, + + /* The Vec that would need correction if `same_bucket` panicked */ + vec: &'a mut Vec<T, A>, + } + + impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> { + fn drop(&mut self) { + /* This code gets executed when `same_bucket` panics */ + + /* SAFETY: invariant guarantees that `read - write` + * and `len - read` never overflow and that the copy is always + * in-bounds. */ + unsafe { + let ptr = self.vec.as_mut_ptr(); + let len = self.vec.len(); + + /* How many items were left when `same_bucket` paniced. + * Basically vec[read..].len() */ + let items_left = len.wrapping_sub(self.read); + + /* Pointer to first item in vec[write..write+items_left] slice */ + let dropped_ptr = ptr.add(self.write); + /* Pointer to first item in vec[read..] slice */ + let valid_ptr = ptr.add(self.read); + + /* Copy `vec[read..]` to `vec[write..write+items_left]`. + * The slices can overlap, so `copy_nonoverlapping` cannot be used */ + ptr::copy(valid_ptr, dropped_ptr, items_left); + + /* How many items have been already dropped + * Basically vec[read..write].len() */ + let dropped = self.read.wrapping_sub(self.write); + + self.vec.set_len(len - dropped); + } + } + } + + let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self }; + let ptr = gap.vec.as_mut_ptr(); + + /* Drop items while going through Vec, it should be more efficient than + * doing slice partition_dedup + truncate */ + + /* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr + * are always in-bounds and read_ptr never aliases prev_ptr */ + unsafe { + while gap.read < len { + let read_ptr = ptr.add(gap.read); + let prev_ptr = ptr.add(gap.write.wrapping_sub(1)); + + if same_bucket(&mut *read_ptr, &mut *prev_ptr) { + /* We have found duplicate, drop it in-place */ + ptr::drop_in_place(read_ptr); + } else { + let write_ptr = ptr.add(gap.write); + + /* Because `read_ptr` can be equal to `write_ptr`, we either + * have to use `copy` or conditional `copy_nonoverlapping`. + * Looks like the first option is faster. */ + ptr::copy(read_ptr, write_ptr, 1); + + /* We have filled that place, so go further */ + gap.write += 1; + } + + gap.read += 1; + } + + /* Technically we could let `gap` clean up with its Drop, but + * when `same_bucket` is guaranteed to not panic, this bloats a little + * the codegen, so we just do it manually */ + gap.vec.set_len(gap.write); + mem::forget(gap); + } } /// Appends an element to the back of a collection. |
