about summary refs log tree commit diff
path: root/src/liballoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/liballoc')
-rw-r--r--src/liballoc/slice.rs16
-rw-r--r--src/liballoc/str.rs126
-rw-r--r--src/liballoc/tests/slice.rs9
-rw-r--r--src/liballoc/tests/str.rs13
4 files changed, 122 insertions, 42 deletions
diff --git a/src/liballoc/slice.rs b/src/liballoc/slice.rs
index 161493f3892..90bc2f9769c 100644
--- a/src/liballoc/slice.rs
+++ b/src/liballoc/slice.rs
@@ -566,15 +566,17 @@ impl<T: Clone, V: Borrow<[T]>> SliceConcatExt<T> for [V] {
     }
 
     fn join(&self, sep: &T) -> Vec<T> {
+        let mut iter = self.iter();
+        let first = match iter.next() {
+            Some(first) => first,
+            None => return vec![],
+        };
         let size = self.iter().fold(0, |acc, v| acc + v.borrow().len());
         let mut result = Vec::with_capacity(size + self.len());
-        let mut first = true;
-        for v in self {
-            if first {
-                first = false
-            } else {
-                result.push(sep.clone())
-            }
+        result.extend_from_slice(first.borrow());
+
+        for v in iter {
+            result.push(sep.clone());
             result.extend_from_slice(v.borrow())
         }
         result
diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs
index 823e56b64e3..32ca8d1fa5e 100644
--- a/src/liballoc/str.rs
+++ b/src/liballoc/str.rs
@@ -86,52 +86,108 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
     type Output = String;
 
     fn concat(&self) -> String {
-        if self.is_empty() {
-            return String::new();
-        }
-
-        // `len` calculation may overflow but push_str will check boundaries
-        let len = self.iter().map(|s| s.borrow().len()).sum();
-        let mut result = String::with_capacity(len);
-
-        for s in self {
-            result.push_str(s.borrow())
-        }
-
-        result
+        self.join("")
     }
 
     fn join(&self, sep: &str) -> String {
-        if self.is_empty() {
-            return String::new();
-        }
-
-        // concat is faster
-        if sep.is_empty() {
-            return self.concat();
+        unsafe {
+            String::from_utf8_unchecked( join_generic_copy(self, sep.as_bytes()) )
         }
+    }
 
-        // this is wrong without the guarantee that `self` is non-empty
-        // `len` calculation may overflow but push_str but will check boundaries
-        let len = sep.len() * (self.len() - 1) +
-                  self.iter().map(|s| s.borrow().len()).sum::<usize>();
-        let mut result = String::with_capacity(len);
-        let mut first = true;
+    fn connect(&self, sep: &str) -> String {
+        self.join(sep)
+    }
+}
 
-        for s in self {
-            if first {
-                first = false;
-            } else {
-                result.push_str(sep);
+macro_rules! spezialize_for_lengths {
+    ($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => {
+        let mut target = $target;
+        let iter = $iter;
+        let sep_bytes = $separator;
+        match $separator.len() {
+            $(
+                // loops with hardcoded sizes run much faster
+                // specialize the cases with small separator lengths
+                $num => {
+                    for s in iter {
+                        copy_slice_and_advance!(target, sep_bytes);
+                        copy_slice_and_advance!(target, s.borrow().as_ref());
+                    }
+                },
+            )*
+            _ => {
+                // arbitrary non-zero size fallback
+                for s in iter {
+                    copy_slice_and_advance!(target, sep_bytes);
+                    copy_slice_and_advance!(target, s.borrow().as_ref());
+                }
             }
-            result.push_str(s.borrow());
         }
-        result
+    };
+}
+
+macro_rules! copy_slice_and_advance {
+    ($target:expr, $bytes:expr) => {
+        let len = $bytes.len();
+        let (head, tail) = {$target}.split_at_mut(len);
+        head.copy_from_slice($bytes);
+        $target = tail;
     }
+}
 
-    fn connect(&self, sep: &str) -> String {
-        self.join(sep)
+// Optimized join implementation that works for both Vec<T> (T: Copy) and String's inner vec
+// Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262)
+// For this reason SliceConcatExt<T> is not specialized for T: Copy and SliceConcatExt<str> is the
+// only user of this function. It is left in place for the time when that is fixed.
+//
+// the bounds for String-join are S: Borrow<str> and for Vec-join Borrow<[T]>
+// [T] and str both impl AsRef<[T]> for some T
+// => s.borrow().as_ref() and we always have slices
+fn join_generic_copy<B, T, S>(slice: &[S], sep: &[T]) -> Vec<T>
+where
+    T: Copy,
+    B: AsRef<[T]> + ?Sized,
+    S: Borrow<B>,
+{
+    let sep_len = sep.len();
+    let mut iter = slice.iter();
+
+    // the first slice is the only one without a separator preceding it
+    let first = match iter.next() {
+        Some(first) => first,
+        None => return vec![],
+    };
+
+    // compute the exact total length of the joined Vec
+    // if the `len` calculation overflows, we'll panic
+    // we would have run out of memory anyway and the rest of the function requires
+    // the entire Vec pre-allocated for safety
+    let len =  sep_len.checked_mul(iter.len()).and_then(|n| {
+            slice.iter()
+                .map(|s| s.borrow().as_ref().len())
+                .try_fold(n, usize::checked_add)
+        }).expect("attempt to join into collection with len > usize::MAX");
+
+    // crucial for safety
+    let mut result = Vec::with_capacity(len);
+    assert!(result.capacity() >= len);
+
+    result.extend_from_slice(first.borrow().as_ref());
+
+    unsafe {
+        {
+            let pos = result.len();
+            let target = result.get_unchecked_mut(pos..len);
+
+            // copy separator and slices over without bounds checks
+            // generate loops with hardcoded offsets for small separators
+            // massive improvements possible (~ x2)
+            spezialize_for_lengths!(sep, target, iter; 0, 1, 2, 3, 4);
+        }
+        result.set_len(len);
     }
+    result
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs
index 6fd0b33f02a..3b7eec38609 100644
--- a/src/liballoc/tests/slice.rs
+++ b/src/liballoc/tests/slice.rs
@@ -610,6 +610,15 @@ fn test_join() {
 }
 
 #[test]
+fn test_join_nocopy() {
+    let v: [String; 0] = [];
+    assert_eq!(v.join(","), "");
+    assert_eq!(["a".to_string(), "ab".into()].join(","), "a,ab");
+    assert_eq!(["a".to_string(), "ab".into(), "abc".into()].join(","), "a,ab,abc");
+    assert_eq!(["a".to_string(), "ab".into(), "".into()].join(","), "a,ab,");
+}
+
+#[test]
 fn test_insert() {
     let mut a = vec![1, 2, 4];
     a.insert(2, 3);
diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs
index d11bf5dc3e9..03d295d16e6 100644
--- a/src/liballoc/tests/str.rs
+++ b/src/liballoc/tests/str.rs
@@ -162,6 +162,19 @@ fn test_join_for_different_lengths() {
     test_join!("-a-bc", ["", "a", "bc"], "-");
 }
 
+// join has fast paths for small separators up to 4 bytes
+// this tests the slow paths.
+#[test]
+fn test_join_for_different_lengths_with_long_separator() {
+    assert_eq!("~~~~~".len(), 15);
+
+    let empty: &[&str] = &[];
+    test_join!("", empty, "~~~~~");
+    test_join!("a", ["a"], "~~~~~");
+    test_join!("a~~~~~b", ["a", "b"], "~~~~~");
+    test_join!("~~~~~a~~~~~bc", ["", "a", "bc"], "~~~~~");
+}
+
 #[test]
 fn test_unsafe_slice() {
     assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});