Add str::split_ascii_whitespace.

author: Clar Charr <clar@charr.xyz> 2018-05-05 00:33:20 -0400
committer: Clar Charr <clar@charr.xyz> 2018-06-27 17:54:27 -0400
commit: b5cee029a55cd35fcdad52acb294a04ebff1f341 (patch)
tree: 038c8066457355be5193c16c4efe8e9a2bc3af21 /src/libcore
parent: 23b55161ab4cb6d4bf868ac575bd174ca2de0ffa (diff)
download: rust-b5cee029a55cd35fcdad52acb294a04ebff1f341.tar.gz
rust-b5cee029a55cd35fcdad52acb294a04ebff1f341.zip
1 files changed, 155 insertions, 4 deletions
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs
index 42fb1bc238b..5ae2f6349e5 100644
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -21,7 +21,7 @@ use char;
 use fmt;
 use iter::{Map, Cloned, FusedIterator, TrustedLen, Filter};
 use iter_private::TrustedRandomAccess;
-use slice::{self, SliceIndex};
+use slice::{self, SliceIndex, Split as SliceSplit};
 use mem;
 
 pub mod pattern;
@@ -2722,7 +2722,10 @@ impl str {
     /// the original string slice, separated by any amount of whitespace.
     ///
     /// 'Whitespace' is defined according to the terms of the Unicode Derived
-    /// Core Property `White_Space`.
+    /// Core Property `White_Space`. If you only want to split on ASCII whitespace
+    /// instead, use [`split_ascii_whitespace`].
+    ///
+    /// [`split_ascii_whitespace`]: #method.split_ascii_whitespace
     ///
     /// # Examples
     ///
@@ -2756,6 +2759,53 @@ impl str {
         SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
     }
 
+    /// Split a string slice by ASCII whitespace.
+    ///
+    /// The iterator returned will return string slices that are sub-slices of
+    /// the original string slice, separated by any amount of ASCII whitespace.
+    ///
+    /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
+    ///
+    /// [`split_whitespace`]: #method.split_whitespace
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// #![feature(split_ascii_whitespace)]
+    /// let mut iter = "A few words".split_ascii_whitespace();
+    ///
+    /// assert_eq!(Some("A"), iter.next());
+    /// assert_eq!(Some("few"), iter.next());
+    /// assert_eq!(Some("words"), iter.next());
+    ///
+    /// assert_eq!(None, iter.next());
+    /// ```
+    ///
+    /// All kinds of ASCII whitespace are considered:
+    ///
+    /// ```
+    /// let mut iter = " Mary   had\ta little  \n\t lamb".split_whitespace();
+    /// assert_eq!(Some("Mary"), iter.next());
+    /// assert_eq!(Some("had"), iter.next());
+    /// assert_eq!(Some("a"), iter.next());
+    /// assert_eq!(Some("little"), iter.next());
+    /// assert_eq!(Some("lamb"), iter.next());
+    ///
+    /// assert_eq!(None, iter.next());
+    /// ```
+    #[unstable(feature = "split_ascii_whitespace", issue = "48656")]
+    #[inline]
+    pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace {
+        let inner = self
+            .as_bytes()
+            .split(IsAsciiWhitespace)
+            .filter(IsNotEmpty)
+            .map(UnsafeBytesToStr);
+        SplitAsciiWhitespace { inner }
+    }
+
     /// An iterator over the lines of a string, as string slices.
     ///
     /// Lines are ended with either a newline (`\n`) or a carriage return with
@@ -3895,6 +3945,20 @@ pub struct SplitWhitespace<'a> {
     inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
 }
 
+/// An iterator over the non-ASCII-whitespace substrings of a string,
+/// separated by any amount of ASCII whitespace.
+///
+/// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
+/// See its documentation for more.
+///
+/// [`split_ascii_whitespace`]: ../../std/primitive.str.html#method.split_ascii_whitespace
+/// [`str`]: ../../std/primitive.str.html
+#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
+#[derive(Clone, Debug)]
+pub struct SplitAsciiWhitespace<'a> {
+    inner: Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, IsNotEmpty>, UnsafeBytesToStr>,
+}
+
 #[derive(Clone)]
 struct IsWhitespace;
 
@@ -3915,36 +3979,97 @@ impl FnMut<(char, )> for IsWhitespace {
 }
 
 #[derive(Clone)]
+struct IsAsciiWhitespace;
+
+impl<'a> FnOnce<(&'a u8, )> for IsAsciiWhitespace {
+    type Output = bool;
+
+    #[inline]
+    extern "rust-call" fn call_once(mut self, arg: (&u8, )) -> bool {
+        self.call_mut(arg)
+    }
+}
+
+impl<'a> FnMut<(&'a u8, )> for IsAsciiWhitespace {
+    #[inline]
+    extern "rust-call" fn call_mut(&mut self, arg: (&u8, )) -> bool {
+        arg.0.is_ascii_whitespace()
+    }
+}
+
+#[derive(Clone)]
 struct IsNotEmpty;
 
 impl<'a, 'b> FnOnce<(&'a &'b str, )> for IsNotEmpty {
     type Output = bool;
 
     #[inline]
-    extern "rust-call" fn call_once(mut self, arg: (&&str, )) -> bool {
+    extern "rust-call" fn call_once(mut self, arg: (&'a &'b str, )) -> bool {
         self.call_mut(arg)
     }
 }
 
 impl<'a, 'b> FnMut<(&'a &'b str, )> for IsNotEmpty {
     #[inline]
-    extern "rust-call" fn call_mut(&mut self, arg: (&&str, )) -> bool {
+    extern "rust-call" fn call_mut(&mut self, arg: (&'a &'b str, )) -> bool {
+        !arg.0.is_empty()
+    }
+}
+
+impl<'a, 'b> FnOnce<(&'a &'b [u8], )> for IsNotEmpty {
+    type Output = bool;
+
+    #[inline]
+    extern "rust-call" fn call_once(mut self, arg: (&'a &'b [u8], )) -> bool {
+        self.call_mut(arg)
+    }
+}
+
+impl<'a, 'b> FnMut<(&'a &'b [u8], )> for IsNotEmpty {
+    #[inline]
+    extern "rust-call" fn call_mut(&mut self, arg: (&'a &'b [u8], )) -> bool {
         !arg.0.is_empty()
     }
 }
 
+#[derive(Clone)]
+struct UnsafeBytesToStr;
+
+impl<'a> FnOnce<(&'a [u8], )> for UnsafeBytesToStr {
+    type Output = &'a str;
+
+    #[inline]
+    extern "rust-call" fn call_once(mut self, arg: (&'a [u8], )) -> &'a str {
+        self.call_mut(arg)
+    }
+}
+
+impl<'a> FnMut<(&'a [u8], )> for UnsafeBytesToStr {
+    #[inline]
+    extern "rust-call" fn call_mut(&mut self, arg: (&'a [u8], )) -> &'a str {
+        unsafe { from_utf8_unchecked(arg.0) }
+    }
+}
+
 
 #[stable(feature = "split_whitespace", since = "1.1.0")]
 impl<'a> Iterator for SplitWhitespace<'a> {
     type Item = &'a str;
 
+    #[inline]
     fn next(&mut self) -> Option<&'a str> {
         self.inner.next()
     }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.inner.size_hint()
+    }
 }
 
 #[stable(feature = "split_whitespace", since = "1.1.0")]
 impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
+    #[inline]
     fn next_back(&mut self) -> Option<&'a str> {
         self.inner.next_back()
     }
@@ -3953,6 +4078,32 @@ impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
 #[stable(feature = "fused", since = "1.26.0")]
 impl<'a> FusedIterator for SplitWhitespace<'a> {}
 
+#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
+impl<'a> Iterator for SplitAsciiWhitespace<'a> {
+    type Item = &'a str;
+
+    #[inline]
+    fn next(&mut self) -> Option<&'a str> {
+        self.inner.next()
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.inner.size_hint()
+    }
+}
+
+#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
+impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<&'a str> {
+        self.inner.next_back()
+    }
+}
+
+#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
+impl<'a> FusedIterator for SplitAsciiWhitespace<'a> {}
+
 /// An iterator of [`u16`] over the string encoded as UTF-16.
 ///
 /// [`u16`]: ../../std/primitive.u16.html
author	Clar Charr <clar@charr.xyz>	2018-05-05 00:33:20 -0400
committer	Clar Charr <clar@charr.xyz>	2018-06-27 17:54:27 -0400
commit	b5cee029a55cd35fcdad52acb294a04ebff1f341 (patch)
tree	038c8066457355be5193c16c4efe8e9a2bc3af21 /src/libcore
parent	23b55161ab4cb6d4bf868ac575bd174ca2de0ffa (diff)
download	rust-b5cee029a55cd35fcdad52acb294a04ebff1f341.tar.gz rust-b5cee029a55cd35fcdad52acb294a04ebff1f341.zip