From 1825810a898842b4660fd684cea66906b7e32500 Mon Sep 17 00:00:00 2001 From: Soveu Date: Tue, 16 Feb 2021 18:48:42 +0100 Subject: Vec::dedup optimization --- library/alloc/src/vec/mod.rs | 50 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) (limited to 'library/alloc/src/vec/mod.rs') diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index b6166617789..88f9f624aba 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1514,15 +1514,53 @@ impl Vec { /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]); /// ``` #[stable(feature = "dedup_by", since = "1.16.0")] - pub fn dedup_by(&mut self, same_bucket: F) + pub fn dedup_by(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool, { - let len = { - let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket); - dedup.len() - }; - self.truncate(len); + let len = self.len(); + if len <= 1 { + return; + } + + let ptr = self.as_mut_ptr(); + /* Offset of the element we want to check if it is duplicate */ + let mut read: usize = 1; + /* Offset of the place where we want to place the non-duplicate + * when we find it. */ + let mut write: usize = 1; + + /* Drop items while going through Vec, it should be more efficient than + * doing slice partition_dedup + truncate */ + + /* INVARIANT: len > read >= write > write-1 >= 0 + * SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr + * are always in-bounds and read_ptr never aliases prev_ptr */ + unsafe { + while read < len { + let read_ptr = ptr.add(read); + let prev_ptr = ptr.add(write.wrapping_sub(1)); + + if same_bucket(&mut *read_ptr, &mut *prev_ptr) { + /* We have found duplicate, drop it in-place */ + ptr::drop_in_place(read_ptr); + } else { + let write_ptr = ptr.add(write); + + /* Looks like doing just `copy` can be faster than + * conditional `copy_nonoverlapping` */ + ptr::copy(read_ptr, write_ptr, 1); + + /* We have filled that place, so go further */ + write += 1; + } + + read += 1; + } + + /* `write` items are inside vec, rest is already dropped */ + self.set_len(write); + } } /// Appends an element to the back of a collection. -- cgit 1.4.1-3-g733a5 From c114894b90971ad7c6743ca5961f276cae1e2b27 Mon Sep 17 00:00:00 2001 From: Soveu Date: Wed, 17 Feb 2021 17:21:12 +0100 Subject: Vec::dedup optimization - panic gracefully --- library/alloc/src/vec/mod.rs | 81 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 16 deletions(-) (limited to 'library/alloc/src/vec/mod.rs') diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 88f9f624aba..e65adb6c77e 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1523,43 +1523,92 @@ impl Vec { return; } - let ptr = self.as_mut_ptr(); - /* Offset of the element we want to check if it is duplicate */ - let mut read: usize = 1; - /* Offset of the place where we want to place the non-duplicate - * when we find it. */ - let mut write: usize = 1; + /* INVARIANT: vec.len() > read >= write > write-1 >= 0 */ + struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> { + /* Offset of the element we want to check if it is duplicate */ + read: usize, + + /* Offset of the place where we want to place the non-duplicate + * when we find it. */ + write: usize, + + /* The Vec that would need correction if `same_bucket` panicked */ + vec: &'a mut Vec, + } + + impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> { + fn drop(&mut self) { + /* This code gets executed either at the end of `dedup_by` or + * when `same_bucket` panics */ + + /* SAFETY (if finishing successfully): self.read == len, so + * no data is copied and length is set correctly */ + + /* SAFETY (if panicing): invariant guarantees that `read - write` + * and `len - read` never overflow and that the copy is always + * in-bounds. */ + unsafe { + let ptr = self.vec.as_mut_ptr(); + let len = self.vec.len(); + + /* How many items were left when `same_bucket` paniced. + * Basically vec[read..].len() */ + let items_left = len - self.read; + + /* Pointer to first item in vec[write..write+items_left] slice */ + let dropped_ptr = ptr.add(self.write); + /* Pointer to first item in vec[read..] slice */ + let valid_ptr = ptr.add(self.read); + + /* Copy `vec[read..]` to `vec[write..write+items_left]`. + * The slices can overlap, so `copy_nonoverlapping` cannot be used */ + ptr::copy(valid_ptr, dropped_ptr, items_left); + + /* How many items have been already dropped + * Basically vec[read..write].len() */ + let dropped = self.read - self.write; + + self.vec.set_len(len - dropped); + } + } + } + + let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self }; + + let ptr = gap.vec.as_mut_ptr(); /* Drop items while going through Vec, it should be more efficient than * doing slice partition_dedup + truncate */ - /* INVARIANT: len > read >= write > write-1 >= 0 - * SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr + /* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr * are always in-bounds and read_ptr never aliases prev_ptr */ unsafe { - while read < len { - let read_ptr = ptr.add(read); - let prev_ptr = ptr.add(write.wrapping_sub(1)); + while gap.read < len { + let read_ptr = ptr.add(gap.read); + let prev_ptr = ptr.add(gap.write.wrapping_sub(1)); if same_bucket(&mut *read_ptr, &mut *prev_ptr) { /* We have found duplicate, drop it in-place */ ptr::drop_in_place(read_ptr); } else { - let write_ptr = ptr.add(write); + let write_ptr = ptr.add(gap.write); /* Looks like doing just `copy` can be faster than * conditional `copy_nonoverlapping` */ ptr::copy(read_ptr, write_ptr, 1); /* We have filled that place, so go further */ - write += 1; + gap.write += 1; } - read += 1; + gap.read += 1; } - /* `write` items are inside vec, rest is already dropped */ - self.set_len(write); + /* Technically we could let `gap` clean up with its Drop, but + * when `same_bucket` is guaranteed to not panic, this bloats a little + * the codegen, so we just do it manually */ + gap.vec.set_len(gap.write); + mem::forget(gap); } } -- cgit 1.4.1-3-g733a5 From afdbc9ece176ccac7b1d156efcb397d089d88b5a Mon Sep 17 00:00:00 2001 From: Soveu Date: Mon, 15 Mar 2021 20:36:29 +0100 Subject: Vec::dedup optimization - finishing polishes --- library/alloc/src/vec/mod.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'library/alloc/src/vec/mod.rs') diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index e65adb6c77e..19198d4eeef 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1538,13 +1538,9 @@ impl Vec { impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> { fn drop(&mut self) { - /* This code gets executed either at the end of `dedup_by` or - * when `same_bucket` panics */ + /* This code gets executed when `same_bucket` panics */ - /* SAFETY (if finishing successfully): self.read == len, so - * no data is copied and length is set correctly */ - - /* SAFETY (if panicing): invariant guarantees that `read - write` + /* SAFETY: invariant guarantees that `read - write` * and `len - read` never overflow and that the copy is always * in-bounds. */ unsafe { @@ -1553,7 +1549,7 @@ impl Vec { /* How many items were left when `same_bucket` paniced. * Basically vec[read..].len() */ - let items_left = len - self.read; + let items_left = len.wrapping_sub(self.read); /* Pointer to first item in vec[write..write+items_left] slice */ let dropped_ptr = ptr.add(self.write); @@ -1566,7 +1562,7 @@ impl Vec { /* How many items have been already dropped * Basically vec[read..write].len() */ - let dropped = self.read - self.write; + let dropped = self.read.wrapping_sub(self.write); self.vec.set_len(len - dropped); } @@ -1574,7 +1570,6 @@ impl Vec { } let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self }; - let ptr = gap.vec.as_mut_ptr(); /* Drop items while going through Vec, it should be more efficient than @@ -1593,8 +1588,9 @@ impl Vec { } else { let write_ptr = ptr.add(gap.write); - /* Looks like doing just `copy` can be faster than - * conditional `copy_nonoverlapping` */ + /* Because `read_ptr` can be equal to `write_ptr`, we either + * have to use `copy` or conditional `copy_nonoverlapping`. + * Looks like the first option is faster. */ ptr::copy(read_ptr, write_ptr, 1); /* We have filled that place, so go further */ -- cgit 1.4.1-3-g733a5