about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2020-09-15 05:01:17 +0000
committerbors <bors@rust-lang.org>2020-09-15 05:01:17 +0000
commit6cae28165f0450fd3f100374b26841e458b8cfef (patch)
tree4c9c1a60afdbda1db11b24a2ce292b4f685c265f
parent715e9340a1006e37aed381e320ddf30311c2c2a6 (diff)
parent79aa9b15d7403ce2dc40b525a1d16e6c4ad1973c (diff)
downloadrust-6cae28165f0450fd3f100374b26841e458b8cfef.tar.gz
rust-6cae28165f0450fd3f100374b26841e458b8cfef.zip
Auto merge of #76682 - richkadel:vec-take, r=Mark-Simulacrum
Optimize behavior of vec.split_off(0) (take all)

Optimization improvement to `split_off()` so the performance meets the
intuitively expected behavior when `at == 0`, avoiding the current behavior
of copying the entire vector.

The change honors documented behavior that the original vector's
"previous capacity unchanged".

This improvement better supports the pattern for building and flushing a
buffer of elements, such as the following:

```rust
    let mut vec = Vec::new();
    loop {
        vec.push(something);
        if condition_is_met {
            process(vec.split_off(0));
        }
    }
```

`Option` wrapping is the first alternative I thought of, but is much
less obvious and more verbose:

```rust
    let mut capacity = 1;
    let mut vec: Option<Vec<Stuff>> = None;
    loop {
        vec.get_or_insert_with(|| Vec::with_capacity(capacity)).push(something);
        if condition_is_met {
            capacity = vec.capacity();
            process(vec.take().unwrap());
        }
    }
```

Directly using `mem::replace()` (instead of  calling`split_off()`) could work,
but `mem::replace()` is a more advanced tool for Rust developers, and in
this case, I believe developers would assume the standard library should
be sufficient for the purpose described here.

The benefit of the approach to this change is it does not change the
existing API contract, but improves the peformance of `split_off(0)` for
`Vec`, `String` (which delegates `split_off()` to `Vec`), and any other
existing use cases.

This change adds tests to validate the behavior of `split_off()` with
regard to capacity, as originally documented, and confirm that behavior
still holds, when `at == 0`.

The change is an implementation detail, and does not require a
documentation change, but documenting the new behavior as part of its
API contract may benefit future users.

(Let me know if I should make that documentation update.)

Note, for future consideration:

I think it would be helpful to introduce an additional method to `Vec`
(if not also to `String`):

```
    pub fn take_all(&mut self) -> Self {
        self.split_off(0)
    }
```

This would make it more clear how `Vec` supports the pattern, and make
it easier to find, since the behavior is similar to other `take()`
methods in the Rust standard library.

r? `@wesleywiser`
FYI: `@tmandry`
-rw-r--r--library/alloc/src/vec.rs5
-rw-r--r--library/alloc/tests/string.rs4
-rw-r--r--library/alloc/tests/vec.rs14
3 files changed, 23 insertions, 0 deletions
diff --git a/library/alloc/src/vec.rs b/library/alloc/src/vec.rs
index baa6c0919de..cb4c1c20abc 100644
--- a/library/alloc/src/vec.rs
+++ b/library/alloc/src/vec.rs
@@ -1410,6 +1410,11 @@ impl<T> Vec<T> {
             assert_failed(at, self.len());
         }
 
+        if at == 0 {
+            // the new vector can take over the original buffer and avoid the copy
+            return mem::replace(self, Vec::with_capacity(self.capacity()));
+        }
+
         let other_len = self.len - at;
         let mut other = Vec::with_capacity(other_len);
 
diff --git a/library/alloc/tests/string.rs b/library/alloc/tests/string.rs
index 6059bec8c5a..f7f78046d08 100644
--- a/library/alloc/tests/string.rs
+++ b/library/alloc/tests/string.rs
@@ -278,17 +278,21 @@ fn test_split_off_mid_char() {
 #[test]
 fn test_split_off_ascii() {
     let mut ab = String::from("ABCD");
+    let orig_capacity = ab.capacity();
     let cd = ab.split_off(2);
     assert_eq!(ab, "AB");
     assert_eq!(cd, "CD");
+    assert_eq!(ab.capacity(), orig_capacity);
 }
 
 #[test]
 fn test_split_off_unicode() {
     let mut nihon = String::from("日本語");
+    let orig_capacity = nihon.capacity();
     let go = nihon.split_off("日本".len());
     assert_eq!(nihon, "日本");
     assert_eq!(go, "語");
+    assert_eq!(nihon.capacity(), orig_capacity);
 }
 
 #[test]
diff --git a/library/alloc/tests/vec.rs b/library/alloc/tests/vec.rs
index 5f7a9399453..608a3c9bdf7 100644
--- a/library/alloc/tests/vec.rs
+++ b/library/alloc/tests/vec.rs
@@ -772,9 +772,23 @@ fn test_append() {
 #[test]
 fn test_split_off() {
     let mut vec = vec![1, 2, 3, 4, 5, 6];
+    let orig_capacity = vec.capacity();
     let vec2 = vec.split_off(4);
     assert_eq!(vec, [1, 2, 3, 4]);
     assert_eq!(vec2, [5, 6]);
+    assert_eq!(vec.capacity(), orig_capacity);
+}
+
+#[test]
+fn test_split_off_take_all() {
+    let mut vec = vec![1, 2, 3, 4, 5, 6];
+    let orig_ptr = vec.as_ptr();
+    let orig_capacity = vec.capacity();
+    let vec2 = vec.split_off(0);
+    assert_eq!(vec, []);
+    assert_eq!(vec2, [1, 2, 3, 4, 5, 6]);
+    assert_eq!(vec.capacity(), orig_capacity);
+    assert_eq!(vec2.as_ptr(), orig_ptr);
 }
 
 #[test]