about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-05-24 12:23:00 +0000
committerbors <bors@rust-lang.org>2024-05-24 12:23:00 +0000
commit213ad10c8f0fc275648552366275dc4e07f97462 (patch)
treef8fd81abfddb7485310347dc95022726627cc10e
parent464987730ab36e3456ba57c53965372989382deb (diff)
parent004100c222638c980b6509aba0ed4990181fa5dc (diff)
downloadrust-213ad10c8f0fc275648552366275dc4e07f97462.tar.gz
rust-213ad10c8f0fc275648552366275dc4e07f97462.zip
Auto merge of #121150 - Swatinem:debug-ascii-str, r=joboet
Add a fast-path to `Debug` ASCII `&str`

Instead of going through the `EscapeDebug` machinery, we can just skip over ASCII chars that don’t need any escaping.

---

This is an alternative / a companion to https://github.com/rust-lang/rust/pull/121138.

The other PR is adding the fast path deep within `EscapeDebug`, whereas this skips as early as possible.
-rw-r--r--library/core/benches/str/debug.rs4
-rw-r--r--library/core/src/fmt/mod.rs59
2 files changed, 43 insertions, 20 deletions
diff --git a/library/core/benches/str/debug.rs b/library/core/benches/str/debug.rs
index 7c72228f0fb..cb91169eed8 100644
--- a/library/core/benches/str/debug.rs
+++ b/library/core/benches/str/debug.rs
@@ -44,7 +44,7 @@ fn ascii_escapes(b: &mut Bencher) {
     assert_fmt(
         s,
         r#""some\tmore\tascii\ttext\nthis time with some \"escapes\", also 64 byte""#,
-        21,
+        15,
     );
     b.iter(|| {
         black_box(format!("{:?}", black_box(s)));
@@ -72,7 +72,7 @@ fn mostly_unicode(b: &mut Bencher) {
 #[bench]
 fn mixed(b: &mut Bencher) {
     let s = "\"❤️\"\n\"hűha ez betű\"\n\"кириллических букв\".";
-    assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 36);
+    assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 21);
     b.iter(|| {
         black_box(format!("{:?}", black_box(s)));
     });
diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs
index 60a27863413..1324fb6e056 100644
--- a/library/core/src/fmt/mod.rs
+++ b/library/core/src/fmt/mod.rs
@@ -2402,23 +2402,47 @@ impl Display for bool {
 impl Debug for str {
     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
         f.write_char('"')?;
-        let mut from = 0;
-        for (i, c) in self.char_indices() {
-            let esc = c.escape_debug_ext(EscapeDebugExtArgs {
-                escape_grapheme_extended: true,
-                escape_single_quote: false,
-                escape_double_quote: true,
-            });
-            // If char needs escaping, flush backlog so far and write, else skip
-            if esc.len() != 1 {
-                f.write_str(&self[from..i])?;
-                for c in esc {
-                    f.write_char(c)?;
+
+        // substring we know is printable
+        let mut printable_range = 0..0;
+
+        fn needs_escape(b: u8) -> bool {
+            b > 0x7E || b < 0x20 || b == b'\\' || b == b'"'
+        }
+
+        // the loop here first skips over runs of printable ASCII as a fast path.
+        // other chars (unicode, or ASCII that needs escaping) are then handled per-`char`.
+        let mut rest = self;
+        while rest.len() > 0 {
+            let Some(non_printable_start) = rest.as_bytes().iter().position(|&b| needs_escape(b))
+            else {
+                printable_range.end += rest.len();
+                break;
+            };
+
+            printable_range.end += non_printable_start;
+            // SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
+            rest = unsafe { rest.get_unchecked(non_printable_start..) };
+
+            let mut chars = rest.chars();
+            if let Some(c) = chars.next() {
+                let esc = c.escape_debug_ext(EscapeDebugExtArgs {
+                    escape_grapheme_extended: true,
+                    escape_single_quote: false,
+                    escape_double_quote: true,
+                });
+                if esc.len() != 1 {
+                    f.write_str(&self[printable_range.clone()])?;
+                    Display::fmt(&esc, f)?;
+                    printable_range.start = printable_range.end + c.len_utf8();
                 }
-                from = i + c.len_utf8();
+                printable_range.end += c.len_utf8();
             }
+            rest = chars.as_str();
         }
-        f.write_str(&self[from..])?;
+
+        f.write_str(&self[printable_range])?;
+
         f.write_char('"')
     }
 }
@@ -2434,13 +2458,12 @@ impl Display for str {
 impl Debug for char {
     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
         f.write_char('\'')?;
-        for c in self.escape_debug_ext(EscapeDebugExtArgs {
+        let esc = self.escape_debug_ext(EscapeDebugExtArgs {
             escape_grapheme_extended: true,
             escape_single_quote: true,
             escape_double_quote: false,
-        }) {
-            f.write_char(c)?
-        }
+        });
+        Display::fmt(&esc, f)?;
         f.write_char('\'')
     }
 }