about summary refs log tree commit diff
path: root/src/liballoc
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2018-08-18 08:56:12 +0000
committerbors <bors@rust-lang.org>2018-08-18 08:56:12 +0000
commitd5b6b95aef94169b5dbe4dbb1357d4bab1fc9800 (patch)
tree3e481559470fb4d3e90f4a5f5b59067fbd3c1208 /src/liballoc
parent6b1ff19af36f7bbf1974579ec1b9bf2c8ccd595e (diff)
parentb063bd4616bf2f27b814f39f0e452efd171fd539 (diff)
downloadrust-d5b6b95aef94169b5dbe4dbb1357d4bab1fc9800.tar.gz
rust-d5b6b95aef94169b5dbe4dbb1357d4bab1fc9800.zip
Auto merge of #52553 - Pazzaz:vecdeque-append, r=SimonSapin
Non-naive implementation of `VecDeque.append`

Replaces the old, simple implementation with a more manual (and **unsafe** 😱) one. I've added 1 more test and verified that it covers all 6 code paths in the function.

This new implementation was about 60% faster than the old naive one when I tried benchmarking it.
Diffstat (limited to 'src/liballoc')
-rw-r--r--src/liballoc/Cargo.toml5
-rw-r--r--src/liballoc/benches/vec_deque_append.rs48
-rw-r--r--src/liballoc/collections/vec_deque.rs161
-rw-r--r--src/liballoc/tests/vec_deque.rs101
4 files changed, 313 insertions, 2 deletions
diff --git a/src/liballoc/Cargo.toml b/src/liballoc/Cargo.toml
index ada21e04b30..1dad323769a 100644
--- a/src/liballoc/Cargo.toml
+++ b/src/liballoc/Cargo.toml
@@ -23,3 +23,8 @@ path = "../liballoc/tests/lib.rs"
 [[bench]]
 name = "collectionsbenches"
 path = "../liballoc/benches/lib.rs"
+
+[[bench]]
+name = "vec_deque_append_bench"
+path = "../liballoc/benches/vec_deque_append.rs"
+harness = false
diff --git a/src/liballoc/benches/vec_deque_append.rs b/src/liballoc/benches/vec_deque_append.rs
new file mode 100644
index 00000000000..bd335651137
--- /dev/null
+++ b/src/liballoc/benches/vec_deque_append.rs
@@ -0,0 +1,48 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![feature(duration_as_u128)]
+use std::{collections::VecDeque, time::Instant};
+
+const VECDEQUE_LEN: i32 = 100000;
+const WARMUP_N: usize = 100;
+const BENCH_N: usize = 1000;
+
+fn main() {
+    let a: VecDeque<i32> = (0..VECDEQUE_LEN).collect();
+    let b: VecDeque<i32> = (0..VECDEQUE_LEN).collect();
+
+    for _ in 0..WARMUP_N {
+        let mut c = a.clone();
+        let mut d = b.clone();
+        c.append(&mut d);
+    }
+
+    let mut durations = Vec::with_capacity(BENCH_N);
+
+    for _ in 0..BENCH_N {
+        let mut c = a.clone();
+        let mut d = b.clone();
+        let before = Instant::now();
+        c.append(&mut d);
+        let after = Instant::now();
+        durations.push(after.duration_since(before));
+    }
+
+    let l = durations.len();
+    durations.sort();
+
+    assert!(BENCH_N % 2 == 0);
+    let median = (durations[(l / 2) - 1] + durations[l / 2]) / 2;
+    println!(
+        "\ncustom-bench vec_deque_append {:?} ns/iter\n",
+        median.as_nanos()
+    );
+}
diff --git a/src/liballoc/collections/vec_deque.rs b/src/liballoc/collections/vec_deque.rs
index 7787102ba82..0f759bb8f0b 100644
--- a/src/liballoc/collections/vec_deque.rs
+++ b/src/liballoc/collections/vec_deque.rs
@@ -202,6 +202,23 @@ impl<T> VecDeque<T> {
                                  len);
     }
 
+    /// Returns a pair of slices which contain the contents of the buffer not used by the VecDeque.
+    #[inline]
+    unsafe fn unused_as_mut_slices<'a>(&'a mut self) -> (&'a mut [T], &'a mut [T]) {
+        let head = self.head;
+        let tail = self.tail;
+        let buf = self.buffer_as_mut_slice();
+        if head != tail {
+            // In buf, head..tail contains the VecDeque and tail..head is unused.
+            // So calling `ring_slices` with tail and head swapped returns unused slices.
+            RingSlices::ring_slices(buf, tail, head)
+        } else {
+            // Swapping doesn't help when head == tail.
+            let (before, after) = buf.split_at_mut(head);
+            (after, before)
+        }
+    }
+
     /// Copies a potentially wrapping block of memory len long from src to dest.
     /// (abs(dst - src) + len) must be no larger than cap() (There must be at
     /// most one continuous overlapping region between src and dest).
@@ -1834,8 +1851,148 @@ impl<T> VecDeque<T> {
     #[inline]
     #[stable(feature = "append", since = "1.4.0")]
     pub fn append(&mut self, other: &mut Self) {
-        // naive impl
-        self.extend(other.drain(..));
+        // Copies all values from `src_slice` to the start of `dst_slice`.
+        unsafe fn copy_whole_slice<T>(src_slice: &[T], dst_slice: &mut [T]) {
+            let len = src_slice.len();
+            ptr::copy_nonoverlapping(src_slice.as_ptr(), dst_slice[..len].as_mut_ptr(), len);
+        }
+
+        let src_total = other.len();
+
+        // Guarantees there is space in `self` for `other`.
+        self.reserve(src_total);
+
+        self.head = {
+            let original_head = self.head;
+
+            // The goal is to copy all values from `other` into `self`. To avoid any
+            // mismatch, all valid values in `other` are retrieved...
+            let (src_high, src_low) = other.as_slices();
+            // and unoccupied parts of self are retrieved.
+            let (dst_high, dst_low) = unsafe { self.unused_as_mut_slices() };
+
+            // Then all that is needed is to copy all values from
+            // src (src_high and src_low) to dst (dst_high and dst_low).
+            //
+            // other [o o o . . . . . o o o o]
+            //       [5 6 7]         [1 2 3 4]
+            //       src_low         src_high
+            //
+            // self  [. . . . . . o o o o . .]
+            //       [3 4 5 6 7 .]       [1 2]
+            //       dst_low             dst_high
+            //
+            // Values are not copied one by one but as slices in `copy_whole_slice`.
+            // What slices are used depends on various properties of src and dst.
+            // There are 6 cases in total:
+            //     1. `src` is contiguous and fits in dst_high
+            //     2. `src` is contiguous and does not fit in dst_high
+            //     3. `src` is discontiguous and fits in dst_high
+            //     4. `src` is discontiguous and does not fit in dst_high
+            //        + src_high is smaller than dst_high
+            //     5. `src` is discontiguous and does not fit in dst_high
+            //        + dst_high is smaller than src_high
+            //     6. `src` is discontiguous and does not fit in dst_high
+            //        + dst_high is the same size as src_high
+            let src_contiguous = src_low.is_empty();
+            let dst_high_fits_src = dst_high.len() >= src_total;
+            match (src_contiguous, dst_high_fits_src) {
+                (true, true) => {
+                    // 1.
+                    // other [. . . o o o . . . . . .]
+                    //       []    [1 1 1]
+                    //
+                    // self  [. o o o o o . . . . . .]
+                    //       [.]         [1 1 1 . . .]
+
+                    unsafe {
+                        copy_whole_slice(src_high, dst_high);
+                    }
+                    original_head + src_total
+                }
+                (true, false) => {
+                    // 2.
+                    // other [. . . o o o o o . . . .]
+                    //       []    [1 1 2 2 2]
+                    //
+                    // self  [. . . . . . . o o o . .]
+                    //       [2 2 2 . . . .]     [1 1]
+
+                    let (src_1, src_2) = src_high.split_at(dst_high.len());
+                    unsafe {
+                        copy_whole_slice(src_1, dst_high);
+                        copy_whole_slice(src_2, dst_low);
+                    }
+                    src_total - dst_high.len()
+                }
+                (false, true) => {
+                    // 3.
+                    // other [o o . . . . . . . o o o]
+                    //       [2 2]             [1 1 1]
+                    //
+                    // self  [. o o . . . . . . . . .]
+                    //       [.]   [1 1 1 2 2 . . . .]
+
+                    let (dst_1, dst_2) = dst_high.split_at_mut(src_high.len());
+                    unsafe {
+                        copy_whole_slice(src_high, dst_1);
+                        copy_whole_slice(src_low, dst_2);
+                    }
+                    original_head + src_total
+                }
+                (false, false) => {
+                    if src_high.len() < dst_high.len() {
+                        // 4.
+                        // other [o o o . . . . . . o o o]
+                        //       [2 3 3]           [1 1 1]
+                        //
+                        // self  [. . . . . . o o . . . .]
+                        //       [3 3 . . . .]   [1 1 1 2]
+
+                        let (dst_1, dst_2) = dst_high.split_at_mut(src_high.len());
+                        let (src_2, src_3) = src_low.split_at(dst_2.len());
+                        unsafe {
+                            copy_whole_slice(src_high, dst_1);
+                            copy_whole_slice(src_2, dst_2);
+                            copy_whole_slice(src_3, dst_low);
+                        }
+                        src_3.len()
+                    } else if src_high.len() > dst_high.len() {
+                        // 5.
+                        // other [o o o . . . . . o o o o]
+                        //       [3 3 3]         [1 1 2 2]
+                        //
+                        // self  [. . . . . . o o o o . .]
+                        //       [2 2 3 3 3 .]       [1 1]
+
+                        let (src_1, src_2) = src_high.split_at(dst_high.len());
+                        let (dst_2, dst_3) = dst_low.split_at_mut(src_2.len());
+                        unsafe {
+                            copy_whole_slice(src_1, dst_high);
+                            copy_whole_slice(src_2, dst_2);
+                            copy_whole_slice(src_low, dst_3);
+                        }
+                        dst_2.len() + src_low.len()
+                    } else {
+                        // 6.
+                        // other [o o . . . . . . . o o o]
+                        //       [2 2]             [1 1 1]
+                        //
+                        // self  [. . . . . . . o o . . .]
+                        //       [2 2 . . . . .]   [1 1 1]
+
+                        unsafe {
+                            copy_whole_slice(src_high, dst_high);
+                            copy_whole_slice(src_low, dst_low);
+                        }
+                        src_low.len()
+                    }
+                }
+            }
+        };
+
+        // Some values now exist in both `other` and `self` but are made inaccessible in `other`.
+        other.tail = other.head;
     }
 
     /// Retains only the elements specified by the predicate.
diff --git a/src/liballoc/tests/vec_deque.rs b/src/liballoc/tests/vec_deque.rs
index 4d55584e2f4..3ea6c87a651 100644
--- a/src/liballoc/tests/vec_deque.rs
+++ b/src/liballoc/tests/vec_deque.rs
@@ -929,6 +929,107 @@ fn test_append() {
 }
 
 #[test]
+fn test_append_permutations() {
+    fn construct_vec_deque(
+        push_back: usize,
+        pop_back: usize,
+        push_front: usize,
+        pop_front: usize,
+    ) -> VecDeque<usize> {
+        let mut out = VecDeque::new();
+        for a in 0..push_back {
+            out.push_back(a);
+        }
+        for b in 0..push_front {
+            out.push_front(push_back + b);
+        }
+        for _ in 0..pop_back {
+            out.pop_back();
+        }
+        for _ in 0..pop_front {
+            out.pop_front();
+        }
+        out
+    }
+
+    const MAX: usize = 5;
+
+    // Many different permutations of both the `VecDeque` getting appended to
+    // and the one getting appended are generated to check `append`.
+    // This ensures all 6 code paths of `append` are tested.
+    for src_push_back in 0..MAX {
+        for src_push_front in 0..MAX {
+            // doesn't pop more values than are pushed
+            for src_pop_back in 0..(src_push_back + src_push_front) {
+                for src_pop_front in 0..(src_push_back + src_push_front - src_pop_back) {
+
+                    let src = construct_vec_deque(
+                        src_push_back,
+                        src_pop_back,
+                        src_push_front,
+                        src_pop_front,
+                    );
+
+                    for dst_push_back in 0..MAX {
+                        for dst_push_front in 0..MAX {
+                            for dst_pop_back in 0..(dst_push_back + dst_push_front) {
+                                for dst_pop_front
+                                    in 0..(dst_push_back + dst_push_front - dst_pop_back)
+                                {
+                                    let mut dst = construct_vec_deque(
+                                        dst_push_back,
+                                        dst_pop_back,
+                                        dst_push_front,
+                                        dst_pop_front,
+                                    );
+                                    let mut src = src.clone();
+
+                                    // Assert that appending `src` to `dst` gives the same order
+                                    // of values as iterating over both in sequence.
+                                    let correct = dst
+                                        .iter()
+                                        .chain(src.iter())
+                                        .cloned()
+                                        .collect::<Vec<usize>>();
+                                    dst.append(&mut src);
+                                    assert_eq!(dst, correct);
+                                    assert!(src.is_empty());
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+struct DropCounter<'a> {
+    count: &'a mut u32,
+}
+
+impl<'a> Drop for DropCounter<'a> {
+    fn drop(&mut self) {
+        *self.count += 1;
+    }
+}
+
+#[test]
+fn test_append_double_drop() {
+    let (mut count_a, mut count_b) = (0, 0);
+    {
+        let mut a = VecDeque::new();
+        let mut b = VecDeque::new();
+        a.push_back(DropCounter { count: &mut count_a });
+        b.push_back(DropCounter { count: &mut count_b });
+
+        a.append(&mut b);
+    }
+    assert_eq!(count_a, 1);
+    assert_eq!(count_b, 1);
+}
+
+#[test]
 fn test_retain() {
     let mut buf = VecDeque::new();
     buf.extend(1..5);