diff options
| author | bors <bors@rust-lang.org> | 2024-05-24 12:23:00 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2024-05-24 12:23:00 +0000 |
| commit | 213ad10c8f0fc275648552366275dc4e07f97462 (patch) | |
| tree | f8fd81abfddb7485310347dc95022726627cc10e | |
| parent | 464987730ab36e3456ba57c53965372989382deb (diff) | |
| parent | 004100c222638c980b6509aba0ed4990181fa5dc (diff) | |
| download | rust-213ad10c8f0fc275648552366275dc4e07f97462.tar.gz rust-213ad10c8f0fc275648552366275dc4e07f97462.zip | |
Auto merge of #121150 - Swatinem:debug-ascii-str, r=joboet
Add a fast-path to `Debug` ASCII `&str` Instead of going through the `EscapeDebug` machinery, we can just skip over ASCII chars that don’t need any escaping. --- This is an alternative / a companion to https://github.com/rust-lang/rust/pull/121138. The other PR is adding the fast path deep within `EscapeDebug`, whereas this skips as early as possible.
| -rw-r--r-- | library/core/benches/str/debug.rs | 4 | ||||
| -rw-r--r-- | library/core/src/fmt/mod.rs | 59 |
2 files changed, 43 insertions, 20 deletions
diff --git a/library/core/benches/str/debug.rs b/library/core/benches/str/debug.rs index 7c72228f0fb..cb91169eed8 100644 --- a/library/core/benches/str/debug.rs +++ b/library/core/benches/str/debug.rs @@ -44,7 +44,7 @@ fn ascii_escapes(b: &mut Bencher) { assert_fmt( s, r#""some\tmore\tascii\ttext\nthis time with some \"escapes\", also 64 byte""#, - 21, + 15, ); b.iter(|| { black_box(format!("{:?}", black_box(s))); @@ -72,7 +72,7 @@ fn mostly_unicode(b: &mut Bencher) { #[bench] fn mixed(b: &mut Bencher) { let s = "\"❤️\"\n\"hűha ez betű\"\n\"кириллических букв\"."; - assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 36); + assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 21); b.iter(|| { black_box(format!("{:?}", black_box(s))); }); diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index 60a27863413..1324fb6e056 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -2402,23 +2402,47 @@ impl Display for bool { impl Debug for str { fn fmt(&self, f: &mut Formatter<'_>) -> Result { f.write_char('"')?; - let mut from = 0; - for (i, c) in self.char_indices() { - let esc = c.escape_debug_ext(EscapeDebugExtArgs { - escape_grapheme_extended: true, - escape_single_quote: false, - escape_double_quote: true, - }); - // If char needs escaping, flush backlog so far and write, else skip - if esc.len() != 1 { - f.write_str(&self[from..i])?; - for c in esc { - f.write_char(c)?; + + // substring we know is printable + let mut printable_range = 0..0; + + fn needs_escape(b: u8) -> bool { + b > 0x7E || b < 0x20 || b == b'\\' || b == b'"' + } + + // the loop here first skips over runs of printable ASCII as a fast path. + // other chars (unicode, or ASCII that needs escaping) are then handled per-`char`. + let mut rest = self; + while rest.len() > 0 { + let Some(non_printable_start) = rest.as_bytes().iter().position(|&b| needs_escape(b)) + else { + printable_range.end += rest.len(); + break; + }; + + printable_range.end += non_printable_start; + // SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary + rest = unsafe { rest.get_unchecked(non_printable_start..) }; + + let mut chars = rest.chars(); + if let Some(c) = chars.next() { + let esc = c.escape_debug_ext(EscapeDebugExtArgs { + escape_grapheme_extended: true, + escape_single_quote: false, + escape_double_quote: true, + }); + if esc.len() != 1 { + f.write_str(&self[printable_range.clone()])?; + Display::fmt(&esc, f)?; + printable_range.start = printable_range.end + c.len_utf8(); } - from = i + c.len_utf8(); + printable_range.end += c.len_utf8(); } + rest = chars.as_str(); } - f.write_str(&self[from..])?; + + f.write_str(&self[printable_range])?; + f.write_char('"') } } @@ -2434,13 +2458,12 @@ impl Display for str { impl Debug for char { fn fmt(&self, f: &mut Formatter<'_>) -> Result { f.write_char('\'')?; - for c in self.escape_debug_ext(EscapeDebugExtArgs { + let esc = self.escape_debug_ext(EscapeDebugExtArgs { escape_grapheme_extended: true, escape_single_quote: true, escape_double_quote: false, - }) { - f.write_char(c)? - } + }); + Display::fmt(&esc, f)?; f.write_char('\'') } } |
