From 8c89e7f3d58ff110aa4de64aef8ef29f78ebf456 Mon Sep 17 00:00:00 2001 From: varkor Date: Wed, 16 May 2018 23:20:22 +0100 Subject: Make {char, str}::escape_debug and impl Debug for {char, str} consistent --- src/liballoc/tests/str.rs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/liballoc') diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index 1a47e5433ea..2f38c8b3ae2 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -999,6 +999,7 @@ fn test_escape_debug() { assert_eq!("\u{10000}\u{10ffff}".escape_debug(), "\u{10000}\\u{10ffff}"); assert_eq!("ab\u{200b}".escape_debug(), "ab\\u{200b}"); assert_eq!("\u{10d4ea}\r".escape_debug(), "\\u{10d4ea}\\r"); + assert_eq!("\u{301}a\u{301}bé\u{e000}".escape_debug(), "\\u{301}a\\u{301}bé\\u{e000}"); } #[test] -- cgit 1.4.1-3-g733a5 From c51f00280205d476651ff9f9a46cff6645b411a2 Mon Sep 17 00:00:00 2001 From: varkor Date: Thu, 17 May 2018 10:45:34 +0100 Subject: Only escape extended grapheme characters in the first position --- src/liballoc/str.rs | 5 ++++- src/liballoc/tests/str.rs | 2 +- src/libcore/char/methods.rs | 34 ++++++++++++++++++++++------------ src/libcore/tests/char.rs | 2 +- 4 files changed, 28 insertions(+), 15 deletions(-) (limited to 'src/liballoc') diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index c10c0a69433..8af14d3c698 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -372,12 +372,15 @@ impl str { /// Escapes each char in `s` with [`char::escape_debug`]. /// + /// Note: only extended grapheme codepoints that begin the string will be + /// escaped. + /// /// [`char::escape_debug`]: primitive.char.html#method.escape_debug #[unstable(feature = "str_escape", reason = "return type may change to be an iterator", issue = "27791")] pub fn escape_debug(&self) -> String { - self.chars().flat_map(|c| c.escape_debug()).collect() + self.chars().enumerate().flat_map(|(i, c)| c.escape_debug_ext(i == 0)).collect() } /// Escapes each char in `s` with [`char::escape_default`]. diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index 2f38c8b3ae2..84c97abcbc2 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -999,7 +999,7 @@ fn test_escape_debug() { assert_eq!("\u{10000}\u{10ffff}".escape_debug(), "\u{10000}\\u{10ffff}"); assert_eq!("ab\u{200b}".escape_debug(), "ab\\u{200b}"); assert_eq!("\u{10d4ea}\r".escape_debug(), "\\u{10d4ea}\\r"); - assert_eq!("\u{301}a\u{301}bé\u{e000}".escape_debug(), "\\u{301}a\\u{301}bé\\u{e000}"); + assert_eq!("\u{301}a\u{301}bé\u{e000}".escape_debug(), "\\u{301}a\u{301}bé\\u{e000}"); } #[test] diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs index bf7772492e5..f6b201fe06d 100644 --- a/src/libcore/char/methods.rs +++ b/src/libcore/char/methods.rs @@ -187,6 +187,27 @@ impl char { } } + /// An extended version of `escape_debug` that optionally permits escaping + /// Extended Grapheme codepoints. This allows us to format characters like + /// nonspacing marks better when they're at the start of a string. + #[doc(hidden)] + #[unstable(feature = "str_internals", issue = "0")] + #[inline] + pub fn escape_debug_ext(self, escape_grapheme_extended: bool) -> EscapeDebug { + let init_state = match self { + '\t' => EscapeDefaultState::Backslash('t'), + '\r' => EscapeDefaultState::Backslash('r'), + '\n' => EscapeDefaultState::Backslash('n'), + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), + _ if escape_grapheme_extended && self.is_grapheme_extended() => { + EscapeDefaultState::Unicode(self.escape_unicode()) + } + _ if is_printable(self) => EscapeDefaultState::Char(self), + _ => EscapeDefaultState::Unicode(self.escape_unicode()), + }; + EscapeDebug(EscapeDefault { state: init_state }) + } + /// Returns an iterator that yields the literal escape code of a character /// as `char`s. /// @@ -224,18 +245,7 @@ impl char { #[stable(feature = "char_escape_debug", since = "1.20.0")] #[inline] pub fn escape_debug(self) -> EscapeDebug { - let init_state = match self { - '\t' => EscapeDefaultState::Backslash('t'), - '\r' => EscapeDefaultState::Backslash('r'), - '\n' => EscapeDefaultState::Backslash('n'), - '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), - _ if self.is_grapheme_extended() => { - EscapeDefaultState::Unicode(self.escape_unicode()) - } - _ if is_printable(self) => EscapeDefaultState::Char(self), - _ => EscapeDefaultState::Unicode(self.escape_unicode()), - }; - EscapeDebug(EscapeDefault { state: init_state }) + self.escape_debug_ext(true) } /// Returns an iterator that yields the literal escape code of a character diff --git a/src/libcore/tests/char.rs b/src/libcore/tests/char.rs index d19e3b52769..d2a9ed75be6 100644 --- a/src/libcore/tests/char.rs +++ b/src/libcore/tests/char.rs @@ -181,7 +181,7 @@ fn test_escape_debug() { assert_eq!(string('\u{ff}'), "\u{ff}"); assert_eq!(string('\u{11b}'), "\u{11b}"); assert_eq!(string('\u{1d4b6}'), "\u{1d4b6}"); - assert_eq!(string('\u{301}'), "'\\u{301}'"); // combining character + assert_eq!(string('\u{301}'), "\\u{301}"); // combining character assert_eq!(string('\u{200b}'),"\\u{200b}"); // zero width space assert_eq!(string('\u{e000}'), "\\u{e000}"); // private use 1 assert_eq!(string('\u{100000}'), "\\u{100000}"); // private use 2 -- cgit 1.4.1-3-g733a5 From 2fa22effb6e1dd3b3e2e587ec5fcabefe2eb3443 Mon Sep 17 00:00:00 2001 From: varkor Date: Mon, 21 May 2018 18:57:49 +0100 Subject: Avoid counting characters and add explanatory comment to test --- src/liballoc/str.rs | 8 +++++++- src/liballoc/tests/str.rs | 6 ++++++ src/libcore/unicode/unicode.py | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'src/liballoc') diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 8af14d3c698..823e56b64e3 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -380,7 +380,13 @@ impl str { reason = "return type may change to be an iterator", issue = "27791")] pub fn escape_debug(&self) -> String { - self.chars().enumerate().flat_map(|(i, c)| c.escape_debug_ext(i == 0)).collect() + let mut string = String::with_capacity(self.len()); + let mut chars = self.chars(); + if let Some(first) = chars.next() { + string.extend(first.escape_debug_ext(true)) + } + string.extend(chars.flat_map(|c| c.escape_debug_ext(false))); + string } /// Escapes each char in `s` with [`char::escape_default`]. diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index 84c97abcbc2..d11bf5dc3e9 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -989,6 +989,12 @@ fn test_escape_unicode() { #[test] fn test_escape_debug() { + // Note that there are subtleties with the number of backslashes + // on the left- and right-hand sides. In particular, Unicode code points + // are usually escaped with two backslashes on the right-hand side, as + // they are escaped. However, when the character is unescaped (e.g. for + // printable characters), only a single backslash appears (as the character + // itself appears in the debug string). assert_eq!("abc".escape_debug(), "abc"); assert_eq!("a c".escape_debug(), "a c"); assert_eq!("éèê".escape_debug(), "éèê"); diff --git a/src/libcore/unicode/unicode.py b/src/libcore/unicode/unicode.py index 1da5878c4c6..07f873b13c0 100755 --- a/src/libcore/unicode/unicode.py +++ b/src/libcore/unicode/unicode.py @@ -21,7 +21,7 @@ # - UnicodeData.txt # # Since this should not require frequent updates, we just store this -# out-of-line and check the unicode.py file into git. +# out-of-line and check the tables.rs file into git. import fileinput, re, os, sys, operator, math, datetime -- cgit 1.4.1-3-g733a5