about summary refs log tree commit diff
path: root/src/liballoc
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2018-05-21 23:26:32 +0000
committerbors <bors@rust-lang.org>2018-05-21 23:26:32 +0000
commit65a16c000720f286c91747b91e6d9caa7b1bb63c (patch)
treedac2c0cbe77971b03cd52bc44f4d20ae9f0b7bcf /src/liballoc
parentcb20f68d0f9e1c1bf49df22991b5a0d1a43146e8 (diff)
parentb6539372e9d4dddb1c9f8c894f23bd4c3e8d9489 (diff)
downloadrust-65a16c000720f286c91747b91e6d9caa7b1bb63c.tar.gz
rust-65a16c000720f286c91747b91e6d9caa7b1bb63c.zip
Auto merge of #49283 - varkor:combining-chars-escape_debug, r=SimonSapin
Escape combining characters in char::Debug

Although combining characters are technically printable, they make little sense to print on their own with `Debug`: it'd be better to escape them like non-printable characters.

This is a breaking change, but I imagine the fact `escape_debug` is rare and almost certainly primarily used for debugging that this is an acceptable change.
Resolves #41922.

r? @alexcrichton
cc @clarcharr
Diffstat (limited to 'src/liballoc')
-rw-r--r--src/liballoc/str.rs11
-rw-r--r--src/liballoc/tests/str.rs7
2 files changed, 17 insertions, 1 deletions
diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs
index c10c0a69433..823e56b64e3 100644
--- a/src/liballoc/str.rs
+++ b/src/liballoc/str.rs
@@ -372,12 +372,21 @@ impl str {
 
     /// Escapes each char in `s` with [`char::escape_debug`].
     ///
+    /// Note: only extended grapheme codepoints that begin the string will be
+    /// escaped.
+    ///
     /// [`char::escape_debug`]: primitive.char.html#method.escape_debug
     #[unstable(feature = "str_escape",
                reason = "return type may change to be an iterator",
                issue = "27791")]
     pub fn escape_debug(&self) -> String {
-        self.chars().flat_map(|c| c.escape_debug()).collect()
+        let mut string = String::with_capacity(self.len());
+        let mut chars = self.chars();
+        if let Some(first) = chars.next() {
+            string.extend(first.escape_debug_ext(true))
+        }
+        string.extend(chars.flat_map(|c| c.escape_debug_ext(false)));
+        string
     }
 
     /// Escapes each char in `s` with [`char::escape_default`].
diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs
index 1a47e5433ea..d11bf5dc3e9 100644
--- a/src/liballoc/tests/str.rs
+++ b/src/liballoc/tests/str.rs
@@ -989,6 +989,12 @@ fn test_escape_unicode() {
 
 #[test]
 fn test_escape_debug() {
+    // Note that there are subtleties with the number of backslashes
+    // on the left- and right-hand sides. In particular, Unicode code points
+    // are usually escaped with two backslashes on the right-hand side, as
+    // they are escaped. However, when the character is unescaped (e.g. for
+    // printable characters), only a single backslash appears (as the character
+    // itself appears in the debug string).
     assert_eq!("abc".escape_debug(), "abc");
     assert_eq!("a c".escape_debug(), "a c");
     assert_eq!("éèê".escape_debug(), "éèê");
@@ -999,6 +1005,7 @@ fn test_escape_debug() {
     assert_eq!("\u{10000}\u{10ffff}".escape_debug(), "\u{10000}\\u{10ffff}");
     assert_eq!("ab\u{200b}".escape_debug(), "ab\\u{200b}");
     assert_eq!("\u{10d4ea}\r".escape_debug(), "\\u{10d4ea}\\r");
+    assert_eq!("\u{301}a\u{301}bé\u{e000}".escape_debug(), "\\u{301}a\u{301}bé\\u{e000}");
 }
 
 #[test]