diff options
| author | bors <bors@rust-lang.org> | 2020-02-22 03:54:50 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2020-02-22 03:54:50 +0000 |
| commit | 87e494c4cdf3f4f39d25ca008173f80688b8eb3d (patch) | |
| tree | 9fe72e67de8fbb771a2a7c7105397adf59ba70d6 /src/liballoc/tests | |
| parent | d735ede6ebfbdf6355d2857912b169bcecef3307 (diff) | |
| parent | 5c9dc57cb587761561e85574c821a6f9c0c7cc67 (diff) | |
| download | rust-87e494c4cdf3f4f39d25ca008173f80688b8eb3d.tar.gz rust-87e494c4cdf3f4f39d25ca008173f80688b8eb3d.zip | |
Auto merge of #67330 - golddranks:split_inclusive, r=kodraus
Implement split_inclusive for slice and str
# Overview
* Implement `split_inclusive` for `slice` and `str` and `split_inclusive_mut` for `slice`
* `split_inclusive` is a substring/subslice splitting iterator that includes the matched part in the iterated substrings as a terminator.
* EDIT: The behaviour has now changed, as per @KodrAus 's input, to the same semantics with the `split_terminator` function. I updated the examples below.
* Two examples below:
```Rust
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
let split: Vec<&str> = data.split_inclusive('\n').collect();
assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n"]);
```
```Rust
let uppercase_separated = "SheePSharKTurtlECaT";
let mut first_char = true;
let split: Vec<&str> = uppercase_separated.split_inclusive(|c: char| {
let split = !first_char && c.is_uppercase();
first_char = split;
split
}).collect();
assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]);
```
# Justification for the API
* I was surprised to find that stdlib currently only has splitting iterators that leave out the matched part. In my experience, wanting to leave a substring terminator as a part of the substring is a pretty common usecase.
* This API is strictly more expressive than the standard `split` API: it's easy to get the behaviour of `split` by mapping a subslicing operation that drops the terminator. On the other hand it's impossible to derive this behaviour from `split` without using hacky and brittle `unsafe` code. The normal way to achieve this functionality would be implementing the iterator yourself.
* Especially when dealing with mutable slices, the only way currently is to use `split_at_mut`. This API provides an ergonomic alternative that plays to the strengths of the iterating capabilities of Rust. (Using `split_at_mut` iteratively used to be a real pain before NLL, fortunately the situation is a bit better now.)
# Discussion items
* <s>Does it make sense to mimic `split_terminator` in that the final empty slice would be left off in case of the string/slice ending with a terminator? It might do, as this use case is naturally geared towards considering the matching part as a terminator instead of a separator.</s>
* EDIT: The behaviour was changed to mimic `split_terminator`.
* Does it make sense to have `split_inclusive_mut` for `&mut str`?
Diffstat (limited to 'src/liballoc/tests')
| -rw-r--r-- | src/liballoc/tests/lib.rs | 1 | ||||
| -rw-r--r-- | src/liballoc/tests/slice.rs | 80 | ||||
| -rw-r--r-- | src/liballoc/tests/str.rs | 43 |
3 files changed, 124 insertions, 0 deletions
diff --git a/src/liballoc/tests/lib.rs b/src/liballoc/tests/lib.rs index c1ae67a1a33..ea75f8903c3 100644 --- a/src/liballoc/tests/lib.rs +++ b/src/liballoc/tests/lib.rs @@ -12,6 +12,7 @@ #![feature(binary_heap_into_iter_sorted)] #![feature(binary_heap_drain_sorted)] #![feature(vec_remove_item)] +#![feature(split_inclusive)] use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs index 51ddb5e7a4e..3d6b4bff5e0 100644 --- a/src/liballoc/tests/slice.rs +++ b/src/liballoc/tests/slice.rs @@ -852,6 +852,86 @@ fn test_splitator() { } #[test] +fn test_splitator_inclusive() { + let xs = &[1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]]; + assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 1).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 10).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]]; + assert_eq!(xs.split_inclusive(|_| true).collect::<Vec<&[i32]>>(), splits); + + let xs: &[i32] = &[]; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).collect::<Vec<&[i32]>>(), splits); +} + +#[test] +fn test_splitator_inclusive_reverse() { + let xs = &[1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]]; + assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).rev().collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]]; + assert_eq!(xs.split_inclusive(|x| *x == 1).rev().collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 10).rev().collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]]; + assert_eq!(xs.split_inclusive(|_| true).rev().collect::<Vec<_>>(), splits); + + let xs: &[i32] = &[]; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::<Vec<_>>(), splits); +} + +#[test] +fn test_splitator_mut_inclusive() { + let xs = &mut [1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 1).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 10).collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]]; + assert_eq!(xs.split_inclusive_mut(|_| true).collect::<Vec<_>>(), splits); + + let xs: &mut [i32] = &mut []; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::<Vec<_>>(), splits); +} + +#[test] +fn test_splitator_mut_inclusive_reverse() { + let xs = &mut [1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]]; + assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).rev().collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 1).rev().collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 10).rev().collect::<Vec<_>>(), splits); + let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]]; + assert_eq!(xs.split_inclusive_mut(|_| true).rev().collect::<Vec<_>>(), splits); + + let xs: &mut [i32] = &mut []; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::<Vec<_>>(), splits); +} + +#[test] fn test_splitnator() { let xs = &[1, 2, 3, 4, 5]; diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index d3c72615696..b703df6f3cb 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -1248,6 +1248,49 @@ fn test_split_char_iterator_no_trailing() { } #[test] +fn test_split_char_iterator_inclusive() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_inclusive('\n').collect(); + assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n"]); + + let uppercase_separated = "SheePSharKTurtlECaT"; + let mut first_char = true; + let split: Vec<&str> = uppercase_separated + .split_inclusive(|c: char| { + let split = !first_char && c.is_uppercase(); + first_char = split; + split + }) + .collect(); + assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]); +} + +#[test] +fn test_split_char_iterator_inclusive_rev() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_inclusive('\n').rev().collect(); + assert_eq!(split, ["Little lämb\n", "Märy häd ä little lämb\n", "\n"]); + + // Note that the predicate is stateful and thus dependent + // on the iteration order. + // (A different predicate is needed for reverse iterator vs normal iterator.) + // Not sure if anything can be done though. + let uppercase_separated = "SheePSharKTurtlECaT"; + let mut term_char = true; + let split: Vec<&str> = uppercase_separated + .split_inclusive(|c: char| { + let split = term_char && c.is_uppercase(); + term_char = c.is_uppercase(); + split + }) + .rev() + .collect(); + assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]); +} + +#[test] fn test_rsplit() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; |
