about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-10-15 12:51:31 +0000
committerbors <bors@rust-lang.org>2021-10-15 12:51:31 +0000
commitaf9b508e1d6c83a8f0e6f5c0b2b75598aa37ed27 (patch)
tree2c4a3a50fe67e2710c33a76ad6fa7ae409d664a1
parent1dafe6d1c328d2f0580763e8438a227e490deb10 (diff)
parentcd773c358793beaae4688b4bbb60d793509f7cc0 (diff)
downloadrust-af9b508e1d6c83a8f0e6f5c0b2b75598aa37ed27.tar.gz
rust-af9b508e1d6c83a8f0e6f5c0b2b75598aa37ed27.zip
Auto merge of #88717 - tabokie:vecdeque-fast-append, r=m-ou-se
Optimize VecDeque::append

Optimize `VecDeque::append` to do unsafe copy rather than iterating through each element.

On my `Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz`, the benchmark shows 37% improvements:
```
Master:
custom-bench vec_deque_append 583164 ns/iter
custom-bench vec_deque_append 550040 ns/iter

Patched:
custom-bench vec_deque_append 349204 ns/iter
custom-bench vec_deque_append 368164 ns/iter
```

Additional notes on the context: this is the third attempt to implement a non-trivial version of `VecDeque::append`, the last two are reverted due to unsoundness or regression, see:
- https://github.com/rust-lang/rust/pull/52553, reverted in https://github.com/rust-lang/rust/pull/53571
- https://github.com/rust-lang/rust/pull/53564, reverted in https://github.com/rust-lang/rust/pull/54851

Both cases are covered by existing tests.

Signed-off-by: tabokie <xy.tao@outlook.com>
-rw-r--r--library/alloc/src/collections/vec_deque/mod.rs32
1 files changed, 30 insertions, 2 deletions
diff --git a/library/alloc/src/collections/vec_deque/mod.rs b/library/alloc/src/collections/vec_deque/mod.rs
index 09ae1f7eebd..c890ff4ac5e 100644
--- a/library/alloc/src/collections/vec_deque/mod.rs
+++ b/library/alloc/src/collections/vec_deque/mod.rs
@@ -418,6 +418,25 @@ impl<T, A: Allocator> VecDeque<T, A> {
         }
     }
 
+    /// Copies all values from `src` to `dst`, wrapping around if needed.
+    /// Assumes capacity is sufficient.
+    #[inline]
+    unsafe fn copy_slice(&mut self, dst: usize, src: &[T]) {
+        debug_assert!(src.len() <= self.cap());
+        let head_room = self.cap() - dst;
+        if src.len() <= head_room {
+            unsafe {
+                ptr::copy_nonoverlapping(src.as_ptr(), self.ptr().add(dst), src.len());
+            }
+        } else {
+            let (left, right) = src.split_at(head_room);
+            unsafe {
+                ptr::copy_nonoverlapping(left.as_ptr(), self.ptr().add(dst), left.len());
+                ptr::copy_nonoverlapping(right.as_ptr(), self.ptr(), right.len());
+            }
+        }
+    }
+
     /// Frobs the head and tail sections around to handle the fact that we
     /// just reallocated. Unsafe because it trusts old_capacity.
     #[inline]
@@ -2081,8 +2100,17 @@ impl<T, A: Allocator> VecDeque<T, A> {
     #[inline]
     #[stable(feature = "append", since = "1.4.0")]
     pub fn append(&mut self, other: &mut Self) {
-        // naive impl
-        self.extend(other.drain(..));
+        self.reserve(other.len());
+        unsafe {
+            let (left, right) = other.as_slices();
+            self.copy_slice(self.head, left);
+            self.copy_slice(self.wrap_add(self.head, left.len()), right);
+        }
+        // SAFETY: Update pointers after copying to avoid leaving doppelganger
+        // in case of panics.
+        self.head = self.wrap_add(self.head, other.len());
+        // Silently drop values in `other`.
+        other.tail = other.head;
     }
 
     /// Retains only the elements specified by the predicate.