diff options
| author | Konrad Borowski <konrad@borowski.pw> | 2022-08-09 07:12:15 +0000 |
|---|---|---|
| committer | Konrad Borowski <konrad@borowski.pw> | 2022-10-06 16:05:38 +0000 |
| commit | cef81dcd0a9503ff8a8f915c43688a78c1c11d83 (patch) | |
| tree | 44cc2a7f596826fb188e4d3eb025f95d6dc2b26c | |
| parent | 0d8a0c56fee71dac218eb949817669ab8bb00c5e (diff) | |
| download | rust-cef81dcd0a9503ff8a8f915c43688a78c1c11d83.tar.gz rust-cef81dcd0a9503ff8a8f915c43688a78c1c11d83.zip | |
Fix handling of trailing bare CR in str::lines
Previously "bare\r" was split into ["bare"] even though the documentation said that only LF and CRLF count as newlines. This fix is a behavioural change, even though it brings the behaviour into line with the documentation, and into line with that of `std::io::BufRead::lines()`. This is an alternative to #91051, which proposes to document rather than fix the behaviour. Fixes #94435. Co-authored-by: Ian Jackson <ijackson@chiark.greenend.org.uk>
| -rw-r--r-- | library/alloc/tests/str.rs | 26 | ||||
| -rw-r--r-- | library/core/src/str/iter.rs | 2 | ||||
| -rw-r--r-- | library/core/src/str/mod.rs | 8 |
3 files changed, 24 insertions, 12 deletions
diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index e30329aa1cb..aa767d5691a 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1499,13 +1499,25 @@ fn test_split_whitespace() { #[test] fn test_lines() { - let data = "\nMäry häd ä little lämb\n\r\nLittle lämb\n"; - let lines: Vec<&str> = data.lines().collect(); - assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]); - - let data = "\r\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n - let lines: Vec<&str> = data.lines().collect(); - assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]); + fn t(data: &str, expected: &[&str]) { + let lines: Vec<&str> = data.lines().collect(); + assert_eq!(lines, expected); + } + t("", &[]); + t("\n", &[""]); + t("\n2nd", &["", "2nd"]); + t("\r\n", &[""]); + t("bare\r", &["bare\r"]); + t("bare\rcr", &["bare\rcr"]); + t("Text\n\r", &["Text", "\r"]); + t( + "\nMäry häd ä little lämb\n\r\nLittle lämb\n", + &["", "Märy häd ä little lämb", "", "Little lämb"], + ); + t( + "\r\nMäry häd ä little lämb\n\nLittle lämb", + &["", "Märy häd ä little lämb", "", "Little lämb"], + ); } #[test] diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 660a3b091b3..579f5806b1c 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -1091,7 +1091,7 @@ generate_pattern_iterators! { #[stable(feature = "rust1", since = "1.0.0")] #[must_use = "iterators are lazy and do nothing unless consumed"] #[derive(Clone, Debug)] -pub struct Lines<'a>(pub(super) Map<SplitTerminator<'a, char>, LinesMap>); +pub struct Lines<'a>(pub(super) Map<SplitInclusive<'a, char>, LinesMap>); #[stable(feature = "rust1", since = "1.0.0")] impl<'a> Iterator for Lines<'a> { diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 434c1598ee4..49478a72f0e 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -997,7 +997,7 @@ impl str { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn lines(&self) -> Lines<'_> { - Lines(self.split_terminator('\n').map(LinesMap)) + Lines(self.split_inclusive('\n').map(LinesMap)) } /// An iterator over the lines of a string. @@ -2591,9 +2591,9 @@ impl_fn_for_zst! { /// A nameable, cloneable fn type #[derive(Clone)] struct LinesMap impl<'a> Fn = |line: &'a str| -> &'a str { - let l = line.len(); - if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] } - else { line } + let Some(line) = line.strip_suffix('\n') else { return line }; + let Some(line) = line.strip_suffix('\r') else { return line }; + line }; #[derive(Clone)] |
