about summary refs log tree commit diff
path: root/src/liballoc/tests
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2020-02-22 03:54:50 +0000
committerbors <bors@rust-lang.org>2020-02-22 03:54:50 +0000
commit87e494c4cdf3f4f39d25ca008173f80688b8eb3d (patch)
tree9fe72e67de8fbb771a2a7c7105397adf59ba70d6 /src/liballoc/tests
parentd735ede6ebfbdf6355d2857912b169bcecef3307 (diff)
parent5c9dc57cb587761561e85574c821a6f9c0c7cc67 (diff)
downloadrust-87e494c4cdf3f4f39d25ca008173f80688b8eb3d.tar.gz
rust-87e494c4cdf3f4f39d25ca008173f80688b8eb3d.zip
Auto merge of #67330 - golddranks:split_inclusive, r=kodraus
Implement split_inclusive for slice and str

# Overview
* Implement `split_inclusive` for `slice` and `str` and `split_inclusive_mut` for `slice`
* `split_inclusive` is a substring/subslice splitting iterator that includes the matched part in the iterated substrings as a terminator.
* EDIT: The behaviour has now changed, as per @KodrAus 's input, to the same semantics with the `split_terminator` function. I updated the examples below.
* Two examples below:
```Rust
    let data = "\nMäry häd ä little lämb\nLittle lämb\n";
    let split: Vec<&str> = data.split_inclusive('\n').collect();
    assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n"]);
```

```Rust
    let uppercase_separated = "SheePSharKTurtlECaT";
    let mut first_char = true;
    let split: Vec<&str> = uppercase_separated.split_inclusive(|c: char| {
        let split = !first_char && c.is_uppercase();
        first_char = split;
        split
    }).collect();
    assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]);
```

# Justification for the API
* I was surprised to find that stdlib currently only has splitting iterators that leave out the matched part. In my experience, wanting to leave a substring terminator as a part of the substring is a pretty common usecase.
* This API is strictly more expressive than the standard `split` API: it's easy to get the behaviour of `split` by mapping a subslicing operation that drops the terminator. On the other hand it's impossible to derive this behaviour from `split` without using hacky and brittle `unsafe` code. The normal way to achieve this functionality would be implementing the iterator yourself.
* Especially when dealing with mutable slices, the only way currently is to use `split_at_mut`. This API provides an ergonomic alternative that plays to the strengths of the iterating capabilities of Rust. (Using `split_at_mut` iteratively used to be a real pain before NLL, fortunately the situation is a bit better now.)

# Discussion items
* <s>Does it make sense to mimic `split_terminator` in that the final empty slice would be left off in case of the string/slice ending with a terminator? It might do, as this use case is naturally geared towards considering the matching part as a terminator instead of a separator.</s>
  * EDIT: The behaviour was changed to mimic `split_terminator`.
* Does it make sense to have `split_inclusive_mut` for `&mut str`?
Diffstat (limited to 'src/liballoc/tests')
-rw-r--r--src/liballoc/tests/lib.rs1
-rw-r--r--src/liballoc/tests/slice.rs80
-rw-r--r--src/liballoc/tests/str.rs43
3 files changed, 124 insertions, 0 deletions
diff --git a/src/liballoc/tests/lib.rs b/src/liballoc/tests/lib.rs
index c1ae67a1a33..ea75f8903c3 100644
--- a/src/liballoc/tests/lib.rs
+++ b/src/liballoc/tests/lib.rs
@@ -12,6 +12,7 @@
 #![feature(binary_heap_into_iter_sorted)]
 #![feature(binary_heap_drain_sorted)]
 #![feature(vec_remove_item)]
+#![feature(split_inclusive)]
 
 use std::collections::hash_map::DefaultHasher;
 use std::hash::{Hash, Hasher};
diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs
index 51ddb5e7a4e..3d6b4bff5e0 100644
--- a/src/liballoc/tests/slice.rs
+++ b/src/liballoc/tests/slice.rs
@@ -852,6 +852,86 @@ fn test_splitator() {
 }
 
 #[test]
+fn test_splitator_inclusive() {
+    let xs = &[1, 2, 3, 4, 5];
+
+    let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]];
+    assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive(|x| *x == 1).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive(|x| *x == 5).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive(|x| *x == 10).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]];
+    assert_eq!(xs.split_inclusive(|_| true).collect::<Vec<&[i32]>>(), splits);
+
+    let xs: &[i32] = &[];
+    let splits: &[&[i32]] = &[&[]];
+    assert_eq!(xs.split_inclusive(|x| *x == 5).collect::<Vec<&[i32]>>(), splits);
+}
+
+#[test]
+fn test_splitator_inclusive_reverse() {
+    let xs = &[1, 2, 3, 4, 5];
+
+    let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]];
+    assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]];
+    assert_eq!(xs.split_inclusive(|x| *x == 1).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive(|x| *x == 10).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]];
+    assert_eq!(xs.split_inclusive(|_| true).rev().collect::<Vec<_>>(), splits);
+
+    let xs: &[i32] = &[];
+    let splits: &[&[i32]] = &[&[]];
+    assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
+}
+
+#[test]
+fn test_splitator_mut_inclusive() {
+    let xs = &mut [1, 2, 3, 4, 5];
+
+    let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 1).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 10).collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]];
+    assert_eq!(xs.split_inclusive_mut(|_| true).collect::<Vec<_>>(), splits);
+
+    let xs: &mut [i32] = &mut [];
+    let splits: &[&[i32]] = &[&[]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::<Vec<_>>(), splits);
+}
+
+#[test]
+fn test_splitator_mut_inclusive_reverse() {
+    let xs = &mut [1, 2, 3, 4, 5];
+
+    let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 1).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 10).rev().collect::<Vec<_>>(), splits);
+    let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]];
+    assert_eq!(xs.split_inclusive_mut(|_| true).rev().collect::<Vec<_>>(), splits);
+
+    let xs: &mut [i32] = &mut [];
+    let splits: &[&[i32]] = &[&[]];
+    assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::<Vec<_>>(), splits);
+}
+
+#[test]
 fn test_splitnator() {
     let xs = &[1, 2, 3, 4, 5];
 
diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs
index d3c72615696..b703df6f3cb 100644
--- a/src/liballoc/tests/str.rs
+++ b/src/liballoc/tests/str.rs
@@ -1248,6 +1248,49 @@ fn test_split_char_iterator_no_trailing() {
 }
 
 #[test]
+fn test_split_char_iterator_inclusive() {
+    let data = "\nMäry häd ä little lämb\nLittle lämb\n";
+
+    let split: Vec<&str> = data.split_inclusive('\n').collect();
+    assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n"]);
+
+    let uppercase_separated = "SheePSharKTurtlECaT";
+    let mut first_char = true;
+    let split: Vec<&str> = uppercase_separated
+        .split_inclusive(|c: char| {
+            let split = !first_char && c.is_uppercase();
+            first_char = split;
+            split
+        })
+        .collect();
+    assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]);
+}
+
+#[test]
+fn test_split_char_iterator_inclusive_rev() {
+    let data = "\nMäry häd ä little lämb\nLittle lämb\n";
+
+    let split: Vec<&str> = data.split_inclusive('\n').rev().collect();
+    assert_eq!(split, ["Little lämb\n", "Märy häd ä little lämb\n", "\n"]);
+
+    // Note that the predicate is stateful and thus dependent
+    // on the iteration order.
+    // (A different predicate is needed for reverse iterator vs normal iterator.)
+    // Not sure if anything can be done though.
+    let uppercase_separated = "SheePSharKTurtlECaT";
+    let mut term_char = true;
+    let split: Vec<&str> = uppercase_separated
+        .split_inclusive(|c: char| {
+            let split = term_char && c.is_uppercase();
+            term_char = c.is_uppercase();
+            split
+        })
+        .rev()
+        .collect();
+    assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]);
+}
+
+#[test]
 fn test_rsplit() {
     let data = "\nMäry häd ä little lämb\nLittle lämb\n";