about summary refs log tree commit diff
diff options
context:
space:
mode:
authorKonrad Borowski <konrad@borowski.pw>2022-08-09 07:12:15 +0000
committerKonrad Borowski <konrad@borowski.pw>2022-10-06 16:05:38 +0000
commitcef81dcd0a9503ff8a8f915c43688a78c1c11d83 (patch)
tree44cc2a7f596826fb188e4d3eb025f95d6dc2b26c
parent0d8a0c56fee71dac218eb949817669ab8bb00c5e (diff)
downloadrust-cef81dcd0a9503ff8a8f915c43688a78c1c11d83.tar.gz
rust-cef81dcd0a9503ff8a8f915c43688a78c1c11d83.zip
Fix handling of trailing bare CR in str::lines
Previously "bare\r" was split into ["bare"] even though the
documentation said that only LF and CRLF count as newlines.

This fix is a behavioural change, even though it brings the behaviour
into line with the documentation, and into line with that of
`std::io::BufRead::lines()`.

This is an alternative to #91051, which proposes to document rather
than fix the behaviour.

Fixes #94435.

Co-authored-by: Ian Jackson <ijackson@chiark.greenend.org.uk>
-rw-r--r--library/alloc/tests/str.rs26
-rw-r--r--library/core/src/str/iter.rs2
-rw-r--r--library/core/src/str/mod.rs8
3 files changed, 24 insertions, 12 deletions
diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs
index e30329aa1cb..aa767d5691a 100644
--- a/library/alloc/tests/str.rs
+++ b/library/alloc/tests/str.rs
@@ -1499,13 +1499,25 @@ fn test_split_whitespace() {
 
 #[test]
 fn test_lines() {
-    let data = "\nMäry häd ä little lämb\n\r\nLittle lämb\n";
-    let lines: Vec<&str> = data.lines().collect();
-    assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
-
-    let data = "\r\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
-    let lines: Vec<&str> = data.lines().collect();
-    assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
+    fn t(data: &str, expected: &[&str]) {
+        let lines: Vec<&str> = data.lines().collect();
+        assert_eq!(lines, expected);
+    }
+    t("", &[]);
+    t("\n", &[""]);
+    t("\n2nd", &["", "2nd"]);
+    t("\r\n", &[""]);
+    t("bare\r", &["bare\r"]);
+    t("bare\rcr", &["bare\rcr"]);
+    t("Text\n\r", &["Text", "\r"]);
+    t(
+        "\nMäry häd ä little lämb\n\r\nLittle lämb\n",
+        &["", "Märy häd ä little lämb", "", "Little lämb"],
+    );
+    t(
+        "\r\nMäry häd ä little lämb\n\nLittle lämb",
+        &["", "Märy häd ä little lämb", "", "Little lämb"],
+    );
 }
 
 #[test]
diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs
index 660a3b091b3..579f5806b1c 100644
--- a/library/core/src/str/iter.rs
+++ b/library/core/src/str/iter.rs
@@ -1091,7 +1091,7 @@ generate_pattern_iterators! {
 #[stable(feature = "rust1", since = "1.0.0")]
 #[must_use = "iterators are lazy and do nothing unless consumed"]
 #[derive(Clone, Debug)]
-pub struct Lines<'a>(pub(super) Map<SplitTerminator<'a, char>, LinesMap>);
+pub struct Lines<'a>(pub(super) Map<SplitInclusive<'a, char>, LinesMap>);
 
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<'a> Iterator for Lines<'a> {
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index 434c1598ee4..49478a72f0e 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -997,7 +997,7 @@ impl str {
     #[stable(feature = "rust1", since = "1.0.0")]
     #[inline]
     pub fn lines(&self) -> Lines<'_> {
-        Lines(self.split_terminator('\n').map(LinesMap))
+        Lines(self.split_inclusive('\n').map(LinesMap))
     }
 
     /// An iterator over the lines of a string.
@@ -2591,9 +2591,9 @@ impl_fn_for_zst! {
     /// A nameable, cloneable fn type
     #[derive(Clone)]
     struct LinesMap impl<'a> Fn = |line: &'a str| -> &'a str {
-        let l = line.len();
-        if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
-        else { line }
+        let Some(line) = line.strip_suffix('\n') else { return line };
+        let Some(line) = line.strip_suffix('\r') else { return line };
+        line
     };
 
     #[derive(Clone)]