diff options
| author | Pyry Kontio <pyry.kontio@drasa.eu> | 2019-12-16 04:05:25 +0900 |
|---|---|---|
| committer | Pyry Kontio <pyry.kontio@drasa.eu> | 2020-02-09 23:48:52 +0900 |
| commit | 86bf96291d8222dbec9e07461404ecb189be0a98 (patch) | |
| tree | 3271b1a4a02101eafcb4c01007b0825ea43b0d6a /src | |
| parent | 8647aa1a2ce279f8ec7cc5252d10b8cb9ea504eb (diff) | |
| download | rust-86bf96291d8222dbec9e07461404ecb189be0a98.tar.gz rust-86bf96291d8222dbec9e07461404ecb189be0a98.zip | |
Implement split_inclusive for slice and str, an splitting iterator that includes the matched part in the iterated substrings as a terminator.
Diffstat (limited to 'src')
| -rw-r--r-- | src/liballoc/tests/lib.rs | 1 | ||||
| -rw-r--r-- | src/liballoc/tests/slice.rs | 20 | ||||
| -rw-r--r-- | src/liballoc/tests/str.rs | 18 | ||||
| -rw-r--r-- | src/libcore/slice/mod.rs | 261 | ||||
| -rw-r--r-- | src/libcore/str/mod.rs | 123 |
5 files changed, 422 insertions, 1 deletions
diff --git a/src/liballoc/tests/lib.rs b/src/liballoc/tests/lib.rs index c1ae67a1a33..ea75f8903c3 100644 --- a/src/liballoc/tests/lib.rs +++ b/src/liballoc/tests/lib.rs @@ -12,6 +12,7 @@ #![feature(binary_heap_into_iter_sorted)] #![feature(binary_heap_drain_sorted)] #![feature(vec_remove_item)] +#![feature(split_inclusive)] use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs index 51ddb5e7a4e..9e903b16a26 100644 --- a/src/liballoc/tests/slice.rs +++ b/src/liballoc/tests/slice.rs @@ -852,6 +852,26 @@ fn test_splitator() { } #[test] +fn test_splitator_inclusive() { + let xs = &[1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]]; + assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 1).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5], &[]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 10).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5], &[]]; + assert_eq!(xs.split_inclusive(|_| true).collect::<Vec<&[i32]>>(), splits); + + let xs: &[i32] = &[]; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).collect::<Vec<&[i32]>>(), splits); +} + +#[test] fn test_splitnator() { let xs = &[1, 2, 3, 4, 5]; diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index d3c72615696..fc2fcb6e569 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -1248,6 +1248,24 @@ fn test_split_char_iterator_no_trailing() { } #[test] +fn test_split_char_iterator_inclusive() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_inclusive('\n').collect(); + assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n", ""]); + + let uppercase_separated = "SheePSharKTurtlECaT"; + let mut first_char = true; + let split: Vec<&str> = uppercase_separated.split_inclusive(|c: char| { + let split = !first_char && c.is_uppercase(); + first_char = split; + split + }).collect(); + assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT", ""]); +} + + +#[test] fn test_rsplit() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; diff --git a/src/libcore/slice/mod.rs b/src/libcore/slice/mod.rs index 9b4d2015732..202ffc390e4 100644 --- a/src/libcore/slice/mod.rs +++ b/src/libcore/slice/mod.rs @@ -1156,6 +1156,72 @@ impl<T> [T] { } /// Returns an iterator over subslices separated by elements that match + /// `pred`. The matched element is contained in the end of the previous + /// subslice as a terminator. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_inclusive)] + /// let slice = [10, 40, 33, 20]; + /// let mut iter = slice.split_inclusive(|num| num % 3 == 0); + /// + /// assert_eq!(iter.next().unwrap(), &[10, 40, 33]); + /// assert_eq!(iter.next().unwrap(), &[20]); + /// assert!(iter.next().is_none()); + /// ``` + /// + /// If the first element is matched, an empty slice will be the first item + /// returned by the iterator. Similarly, if the last element in the slice + /// is matched, an empty slice will be the last item returned by the + /// iterator: + /// + /// ``` + /// #![feature(split_inclusive)] + /// let slice = [10, 40, 33]; + /// let mut iter = slice.split_inclusive(|num| num % 3 == 0); + /// + /// assert_eq!(iter.next().unwrap(), &[10, 40, 33]); + /// assert_eq!(iter.next().unwrap(), &[]); + /// assert!(iter.next().is_none()); + /// ``` + #[unstable(feature = "split_inclusive", issue = "0")] + #[inline] + pub fn split_inclusive<F>(&self, pred: F) -> SplitInclusive<'_, T, F> + where F: FnMut(&T) -> bool + { + SplitInclusive { + v: self, + pred, + finished: false + } + } + + /// Returns an iterator over mutable subslices separated by elements that + /// match `pred`. The matched element is contained in the previous + /// subslice as a terminator. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_inclusive)] + /// let mut v = [10, 40, 30, 20, 60, 50]; + /// + /// for group in v.split_inclusive_mut(|num| *num % 3 == 0) { + /// let terminator_idx = group.len()-1; + /// group[terminator_idx] = 1; + /// } + /// assert_eq!(v, [10, 40, 1, 20, 1, 1]); + /// ``` + #[unstable(feature = "split_inclusive", issue = "0")] + #[inline] + pub fn split_inclusive_mut<F>(&mut self, pred: F) -> SplitInclusiveMut<'_, T, F> + where F: FnMut(&T) -> bool + { + SplitInclusiveMut { v: self, pred, finished: false } + } + + /// Returns an iterator over subslices separated by elements that match /// `pred`, starting at the end of the slice and working backwards. /// The matched element is not contained in the subslices. /// @@ -3675,7 +3741,100 @@ where #[stable(feature = "fused", since = "1.26.0")] impl<T, P> FusedIterator for Split<'_, T, P> where P: FnMut(&T) -> bool {} -/// An iterator over the subslices of the vector which are separated +/// An iterator over subslices separated by elements that match a predicate +/// function. Unlike `Split`, it contains the matched part as a terminator +/// of the subslice. +/// +/// This struct is created by the [`split_inclusive`] method on [slices]. +/// +/// [`split_inclusive`]: ../../std/primitive.slice.html#method.split_inclusive +/// [slices]: ../../std/primitive.slice.html +#[unstable(feature = "split_inclusive", issue = "0")] +pub struct SplitInclusive<'a, T:'a, P> where P: FnMut(&T) -> bool { + v: &'a [T], + pred: P, + finished: bool +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<T: fmt::Debug, P> fmt::Debug for SplitInclusive<'_, T, P> where P: FnMut(&T) -> bool { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitInclusive") + .field("v", &self.v) + .field("finished", &self.finished) + .finish() + } +} + +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +#[unstable(feature = "split_inclusive", issue = "0")] +impl<T, P> Clone for SplitInclusive<'_, T, P> where P: Clone + FnMut(&T) -> bool { + fn clone(&self) -> Self { + SplitInclusive { + v: self.v, + pred: self.pred.clone(), + finished: self.finished, + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> Iterator for SplitInclusive<'a, T, P> where P: FnMut(&T) -> bool { + type Item = &'a [T]; + + #[inline] + fn next(&mut self) -> Option<&'a [T]> { + if self.finished { return None; } + + match self.v.iter().position(|x| (self.pred)(x)) { + None => self.finish(), + Some(idx) => { + let ret = Some(&self.v[..idx + 1]); + self.v = &self.v[idx + 1..]; + ret + } + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + if self.finished { + (0, Some(0)) + } else { + (1, Some(self.v.len() + 1)) + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> DoubleEndedIterator for SplitInclusive<'a, T, P> where P: FnMut(&T) -> bool { + #[inline] + fn next_back(&mut self) -> Option<&'a [T]> { + if self.finished { return None; } + + match self.v.iter().rposition(|x| (self.pred)(x)) { + None => self.finish(), + Some(idx) => { + let ret = Some(&self.v[idx + 1..]); + self.v = &self.v[..idx]; + ret + } + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> SplitIter for SplitInclusive<'a, T, P> where P: FnMut(&T) -> bool { + #[inline] + fn finish(&mut self) -> Option<&'a [T]> { + if self.finished { None } else { self.finished = true; Some(self.v) } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<T, P> FusedIterator for SplitInclusive<'_, T, P> where P: FnMut(&T) -> bool {} + +/// An iterator over the mutable subslices of the vector which are separated /// by elements that match `pred`. /// /// This struct is created by the [`split_mut`] method on [slices]. @@ -3789,6 +3948,106 @@ where #[stable(feature = "fused", since = "1.26.0")] impl<T, P> FusedIterator for SplitMut<'_, T, P> where P: FnMut(&T) -> bool {} +/// An iterator over the mutable subslices of the vector which are separated +/// by elements that match `pred`. Unlike `SplitMut`, it contains the matched +/// parts in the ends of the subslices. +/// +/// This struct is created by the [`split_inclusive_mut`] method on [slices]. +/// +/// [`split_inclusive_mut`]: ../../std/primitive.slice.html#method.split_inclusive_mut +/// [slices]: ../../std/primitive.slice.html +#[unstable(feature = "split_inclusive", issue = "0")] +pub struct SplitInclusiveMut<'a, T:'a, P> where P: FnMut(&T) -> bool { + v: &'a mut [T], + pred: P, + finished: bool +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<T: fmt::Debug, P> fmt::Debug for SplitInclusiveMut<'_, T, P> where P: FnMut(&T) -> bool { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitInclusiveMut") + .field("v", &self.v) + .field("finished", &self.finished) + .finish() + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> SplitIter for SplitInclusiveMut<'a, T, P> where P: FnMut(&T) -> bool { + #[inline] + fn finish(&mut self) -> Option<&'a mut [T]> { + if self.finished { + None + } else { + self.finished = true; + Some(mem::replace(&mut self.v, &mut [])) + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> Iterator for SplitInclusiveMut<'a, T, P> where P: FnMut(&T) -> bool { + type Item = &'a mut [T]; + + #[inline] + fn next(&mut self) -> Option<&'a mut [T]> { + if self.finished { return None; } + + let idx_opt = { // work around borrowck limitations + let pred = &mut self.pred; + self.v.iter().position(|x| (*pred)(x)) + }; + match idx_opt { + None => self.finish(), + Some(idx) => { + let tmp = mem::replace(&mut self.v, &mut []); + let (head, tail) = tmp.split_at_mut(idx+1); + self.v = tail; + Some(head) + } + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + if self.finished { + (0, Some(0)) + } else { + // if the predicate doesn't match anything, we yield one slice + // if it matches every element, we yield len+1 empty slices. + (1, Some(self.v.len() + 1)) + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> DoubleEndedIterator for SplitInclusiveMut<'a, T, P> where + P: FnMut(&T) -> bool, +{ + #[inline] + fn next_back(&mut self) -> Option<&'a mut [T]> { + if self.finished { return None; } + + let idx_opt = { // work around borrowck limitations + let pred = &mut self.pred; + self.v.iter().rposition(|x| (*pred)(x)) + }; + match idx_opt { + None => self.finish(), + Some(idx) => { + let tmp = mem::replace(&mut self.v, &mut []); + let (head, tail) = tmp.split_at_mut(idx+1); + self.v = head; + Some(tail) + } + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<T, P> FusedIterator for SplitInclusiveMut<'_, T, P> where P: FnMut(&T) -> bool {} + /// An iterator over subslices separated by elements that match a predicate /// function, starting from the end of the slice. /// diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 5a7cddd4041..3f7defa912b 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -1133,6 +1133,21 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { } #[inline] + fn next_inclusive(&mut self) -> Option<&'a str> { + if self.finished { return None } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match() { + Some((_, b)) => unsafe { + let elt = haystack.get_unchecked(self.start..b); + self.start = b; + Some(elt) + }, + None => self.get_end(), + } + } + + #[inline] fn next_back(&mut self) -> Option<&'a str> where P::Searcher: ReverseSearcher<'a>, @@ -1168,6 +1183,34 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { }, } } + + #[inline] + fn next_back_inclusive(&mut self) -> Option<&'a str> + where P::Searcher: ReverseSearcher<'a> + { + if self.finished { return None } + + if !self.allow_trailing_empty { + self.allow_trailing_empty = true; + match self.next_back() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => if self.finished { return None } + } + } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match_back() { + Some((_, b)) => unsafe { + let elt = haystack.get_unchecked(b..self.end); + self.end = b; + Some(elt) + }, + None => unsafe { + self.finished = true; + Some(haystack.get_unchecked(self.start..self.end)) + }, + } + } } generate_pattern_iterators! { @@ -3212,6 +3255,31 @@ impl str { }) } + /// An iterator over substrings of this string slice, separated by + /// characters matched by a pattern. Differs from the iterator produced by + /// `split` in that `split_inclusive` leaves the matched part as the + /// terminator of the substring. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_inclusive)] + /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb." + /// .split_inclusive('\n').collect(); + /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]); + /// ``` + #[unstable(feature = "split_inclusive", issue = "0")] + #[inline] + pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> { + SplitInclusive(SplitInternal { + start: 0, + end: self.len(), + matcher: pat.into_searcher(self), + allow_trailing_empty: true, + finished: false, + }) + } + /// An iterator over substrings of the given string slice, separated by /// characters matched by a pattern and yielded in reverse order. /// @@ -4405,6 +4473,19 @@ pub struct SplitAsciiWhitespace<'a> { inner: Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, BytesIsNotEmpty>, UnsafeBytesToStr>, } +/// An iterator over the substrings of a string, +/// terminated by a substring matching to a predicate function +/// Unlike `Split`, it contains the matched part as a terminator +/// of the subslice. +/// +/// This struct is created by the [`split_inclusive`] method on [`str`]. +/// See its documentation for more. +/// +/// [`split_inclusive`]: ../../std/primitive.str.html#method.split_inclusive +/// [`str`]: ../../std/primitive.str.html +#[unstable(feature = "split_inclusive", issue = "0")] +pub struct SplitInclusive<'a, P: Pattern<'a>>(SplitInternal<'a, P>); + impl_fn_for_zst! { #[derive(Clone)] struct IsWhitespace impl Fn = |c: char| -> bool { @@ -4433,6 +4514,8 @@ impl_fn_for_zst! { }; } + + #[stable(feature = "split_whitespace", since = "1.1.0")] impl<'a> Iterator for SplitWhitespace<'a> { type Item = &'a str; @@ -4495,6 +4578,46 @@ impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> { #[stable(feature = "split_ascii_whitespace", since = "1.34.0")] impl FusedIterator for SplitAsciiWhitespace<'_> {} +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + self.0.next_inclusive() + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitInclusive") + .field("0", &self.0) + .finish() + } +} + +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitInclusive<'a, P> { + fn clone(&self) -> Self { + SplitInclusive(self.0.clone()) + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator + for SplitInclusive<'a, P> +{ + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + self.0.next_back_inclusive() + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, P: Pattern<'a>> FusedIterator for SplitInclusive<'a, P> {} + /// An iterator of [`u16`] over the string encoded as UTF-16. /// /// [`u16`]: ../../std/primitive.u16.html |
