Auto merge of #67330 - golddranks:split_inclusive, r=kodraus

Implement split_inclusive for slice and str # Overview * Implement `split_inclusive` for `slice` and `str` and `split_inclusive_mut` for `slice` * `split_inclusive` is a substring/subslice splitting iterator that includes the matched part in the iterated substrings as a terminator. * EDIT: The behaviour has now changed, as per @KodrAus 's input, to the same semantics with the `split_terminator` function. I updated the examples below. * Two examples below: ```Rust let data = "\nMäry häd ä little lämb\nLittle lämb\n"; let split: Vec<&str> = data.split_inclusive('\n').collect(); assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n"]); ``` ```Rust let uppercase_separated = "SheePSharKTurtlECaT"; let mut first_char = true; let split: Vec<&str> = uppercase_separated.split_inclusive(|c: char| { let split = !first_char && c.is_uppercase(); first_char = split; split }).collect(); assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]); ``` # Justification for the API * I was surprised to find that stdlib currently only has splitting iterators that leave out the matched part. In my experience, wanting to leave a substring terminator as a part of the substring is a pretty common usecase. * This API is strictly more expressive than the standard `split` API: it's easy to get the behaviour of `split` by mapping a subslicing operation that drops the terminator. On the other hand it's impossible to derive this behaviour from `split` without using hacky and brittle `unsafe` code. The normal way to achieve this functionality would be implementing the iterator yourself. * Especially when dealing with mutable slices, the only way currently is to use `split_at_mut`. This API provides an ergonomic alternative that plays to the strengths of the iterating capabilities of Rust. (Using `split_at_mut` iteratively used to be a real pain before NLL, fortunately the situation is a bit better now.) # Discussion items * <s>Does it make sense to mimic `split_terminator` in that the final empty slice would be left off in case of the string/slice ending with a terminator? It might do, as this use case is naturally geared towards considering the matching part as a terminator instead of a separator.</s> * EDIT: The behaviour was changed to mimic `split_terminator`. * Does it make sense to have `split_inclusive_mut` for `&mut str`?
author: bors <bors@rust-lang.org> 2020-02-22 03:54:50 +0000
committer: bors <bors@rust-lang.org> 2020-02-22 03:54:50 +0000
commit: 87e494c4cdf3f4f39d25ca008173f80688b8eb3d (patch)
tree: 9fe72e67de8fbb771a2a7c7105397adf59ba70d6 /src/liballoc/tests
parent: d735ede6ebfbdf6355d2857912b169bcecef3307 (diff)
parent: 5c9dc57cb587761561e85574c821a6f9c0c7cc67 (diff)
download: rust-87e494c4cdf3f4f39d25ca008173f80688b8eb3d.tar.gz
rust-87e494c4cdf3f4f39d25ca008173f80688b8eb3d.zip
3 files changed, 124 insertions, 0 deletions
diff --git a/src/liballoc/tests/lib.rs b/src/liballoc/tests/lib.rs
index c1ae67a1a33..ea75f8903c3 100644
--- a/src/liballoc/tests/lib.rs
+++ b/src/liballoc/tests/lib.rs
@@ -12,6 +12,7 @@
 #![feature(binary_heap_into_iter_sorted)]
 #![feature(binary_heap_drain_sorted)]
 #![feature(vec_remove_item)]
+#![feature(split_inclusive)]
 
 use std::collections::hash_map::DefaultHasher;
 use std::hash::{Hash, Hasher};
diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs
index 51ddb5e7a4e..3d6b4bff5e0 100644
--- a/src/liballoc/tests/slice.rs
+++ b/src/liballoc/tests/slice.rs
@@ -852,6 +852,86 @@ fn test_splitator() {
 }
 
 #[test]
+fn test_splitator_inclusive() {
+    let xs = &[1, 2, 3, 4, 5];
+
+    let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]];
+    assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive(|x| *x == 1).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive(|x| *x == 5).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive(|x| *x == 10).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]];
+    assert_eq!(xs.split_inclusive(|_| true).collect::<Vec<&[i32]>>(), splits);
+
+    let xs: &[i32] = &[];
+    let splits: &[&[i32]] = &[&[]];
+    assert_eq!(xs.split_inclusive(|x| *x == 5).collect::<Vec<&[i32]>>(), splits);
+}
+
+#[test]
+fn test_splitator_inclusive_reverse() {
+    let xs = &[1, 2, 3, 4, 5];
+
+    let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]];
+    assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]];
+    assert_eq!(xs.split_inclusive(|x| *x == 1).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive(|x| *x == 10).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]];
+    assert_eq!(xs.split_inclusive(|_| true).rev().collect::<Vec<_>>(), splits);
+
+    let xs: &[i32] = &[];
+    let splits: &[&[i32]] = &[&[]];
+    assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
+}
+
+#[test]
+fn test_splitator_mut_inclusive() {
+    let xs = &mut [1, 2, 3, 4, 5];
+
+    let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 1).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 10).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]];
+    assert_eq!(xs.split_inclusive_mut(|_| true).collect::<Vec<_>>(), splits);
+
+    let xs: &mut [i32] = &mut [];
+    let splits: &[&[i32]] = &[&[]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::<Vec<_>>(), splits);
+}
+
+#[test]
+fn test_splitator_mut_inclusive_reverse() {
+    let xs = &mut [1, 2, 3, 4, 5];
+
+    let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 1).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 10).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]];
+    assert_eq!(xs.split_inclusive_mut(|_| true).rev().collect::<Vec<_>>(), splits);
+
+    let xs: &mut [i32] = &mut [];
+    let splits: &[&[i32]] = &[&[]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
+}
+
+#[test]
 fn test_splitnator() {
     let xs = &[1, 2, 3, 4, 5];
 
diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs
index d3c72615696..b703df6f3cb 100644
--- a/src/liballoc/tests/str.rs
+++ b/src/liballoc/tests/str.rs
@@ -1248,6 +1248,49 @@ fn test_split_char_iterator_no_trailing() {
 }
 
 #[test]
+fn test_split_char_iterator_inclusive() {
+    let data = "\nMäry häd ä little lämb\nLittle lämb\n";
+
+    let split: Vec<&str> = data.split_inclusive('\n').collect();
+    assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n"]);
+
+    let uppercase_separated = "SheePSharKTurtlECaT";
+    let mut first_char = true;
+    let split: Vec<&str> = uppercase_separated
+        .split_inclusive(|c: char| {
+            let split = !first_char && c.is_uppercase();
+            first_char = split;
+            split
+        })
+        .collect();
+    assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]);
+}
+
+#[test]
+fn test_split_char_iterator_inclusive_rev() {
+    let data = "\nMäry häd ä little lämb\nLittle lämb\n";
+
+    let split: Vec<&str> = data.split_inclusive('\n').rev().collect();
+    assert_eq!(split, ["Little lämb\n", "Märy häd ä little lämb\n", "\n"]);
+
+    // Note that the predicate is stateful and thus dependent
+    // on the iteration order.
+    // (A different predicate is needed for reverse iterator vs normal iterator.)
+    // Not sure if anything can be done though.
+    let uppercase_separated = "SheePSharKTurtlECaT";
+    let mut term_char = true;
+    let split: Vec<&str> = uppercase_separated
+        .split_inclusive(|c: char| {
+            let split = term_char && c.is_uppercase();
+            term_char = c.is_uppercase();
+            split
+        })
+        .rev()
+        .collect();
+    assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]);
+}
+
+#[test]
 fn test_rsplit() {
     let data = "\nMäry häd ä little lämb\nLittle lämb\n";
author	bors <bors@rust-lang.org>	2020-02-22 03:54:50 +0000
committer	bors <bors@rust-lang.org>	2020-02-22 03:54:50 +0000
commit	87e494c4cdf3f4f39d25ca008173f80688b8eb3d (patch)
tree	9fe72e67de8fbb771a2a7c7105397adf59ba70d6 /src/liballoc/tests
parent	d735ede6ebfbdf6355d2857912b169bcecef3307 (diff)
parent	5c9dc57cb587761561e85574c821a6f9c0c7cc67 (diff)
download	rust-87e494c4cdf3f4f39d25ca008173f80688b8eb3d.tar.gz rust-87e494c4cdf3f4f39d25ca008173f80688b8eb3d.zip