about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLeón Orell Valerian Liehr <me@fmease.dev>2024-05-15 14:21:37 +0200
committerGitHub <noreply@github.com>2024-05-15 14:21:37 +0200
commit3873a74f8a581dd6a3d0dfb6a67fc5469d6fca5f (patch)
tree045d565b8b7e11897da508a1a830570663c8670b
parenta71c3ffce9ca505af27f43cd3bad7606a72e3ec8 (diff)
parent4edf12d33e0aa67a82e6587e94469f3761cb1f53 (diff)
downloadrust-3873a74f8a581dd6a3d0dfb6a67fc5469d6fca5f.tar.gz
rust-3873a74f8a581dd6a3d0dfb6a67fc5469d6fca5f.zip
Rollup merge of #124307 - reitermarkus:escape-debug-size-hint-inline, r=joboet
Optimize character escaping.

Allow optimization of panicking branch in `EscapeDebug`, see https://github.com/rust-lang/rust/pull/121805.

r? `@joboet`
-rw-r--r--library/core/src/ascii.rs14
-rw-r--r--library/core/src/char/methods.rs8
-rw-r--r--library/core/src/char/mod.rs44
-rw-r--r--library/core/src/escape.rs139
4 files changed, 126 insertions, 79 deletions
diff --git a/library/core/src/ascii.rs b/library/core/src/ascii.rs
index c29e5565d51..e9f4d0f93ed 100644
--- a/library/core/src/ascii.rs
+++ b/library/core/src/ascii.rs
@@ -91,17 +91,21 @@ pub struct EscapeDefault(escape::EscapeIterInner<4>);
 /// ```
 #[stable(feature = "rust1", since = "1.0.0")]
 pub fn escape_default(c: u8) -> EscapeDefault {
-    let mut data = [Char::Null; 4];
-    let range = escape::escape_ascii_into(&mut data, c);
-    EscapeDefault(escape::EscapeIterInner::new(data, range))
+    EscapeDefault::new(c)
 }
 
 impl EscapeDefault {
+    #[inline]
+    pub(crate) const fn new(c: u8) -> Self {
+        Self(escape::EscapeIterInner::ascii(c))
+    }
+
+    #[inline]
     pub(crate) fn empty() -> Self {
-        let data = [Char::Null; 4];
-        EscapeDefault(escape::EscapeIterInner::new(data, 0..0))
+        Self(escape::EscapeIterInner::empty())
     }
 
+    #[inline]
     pub(crate) fn as_str(&self) -> &str {
         self.0.as_str()
     }
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index a93b94867ce..458be49fb15 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -449,10 +449,10 @@ impl char {
             '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
             '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
             _ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
-                EscapeDebug::from_unicode(self.escape_unicode())
+                EscapeDebug::unicode(self)
             }
             _ if is_printable(self) => EscapeDebug::printable(self),
-            _ => EscapeDebug::from_unicode(self.escape_unicode()),
+            _ => EscapeDebug::unicode(self),
         }
     }
 
@@ -555,9 +555,9 @@ impl char {
             '\t' => EscapeDefault::backslash(ascii::Char::SmallT),
             '\r' => EscapeDefault::backslash(ascii::Char::SmallR),
             '\n' => EscapeDefault::backslash(ascii::Char::SmallN),
-            '\\' | '\'' | '"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
+            '\\' | '\'' | '\"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
             '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
-            _ => EscapeDefault::from_unicode(self.escape_unicode()),
+            _ => EscapeDefault::unicode(self),
         }
     }
 
diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs
index a860c7c6aaa..f3683fe3f9c 100644
--- a/library/core/src/char/mod.rs
+++ b/library/core/src/char/mod.rs
@@ -152,10 +152,9 @@ pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
 pub struct EscapeUnicode(escape::EscapeIterInner<10>);
 
 impl EscapeUnicode {
-    fn new(chr: char) -> Self {
-        let mut data = [ascii::Char::Null; 10];
-        let range = escape::escape_unicode_into(&mut data, chr);
-        Self(escape::EscapeIterInner::new(data, range))
+    #[inline]
+    const fn new(c: char) -> Self {
+        Self(escape::EscapeIterInner::unicode(c))
     }
 }
 
@@ -219,18 +218,19 @@ impl fmt::Display for EscapeUnicode {
 pub struct EscapeDefault(escape::EscapeIterInner<10>);
 
 impl EscapeDefault {
-    fn printable(chr: ascii::Char) -> Self {
-        let data = [chr];
-        Self(escape::EscapeIterInner::from_array(data))
+    #[inline]
+    const fn printable(c: ascii::Char) -> Self {
+        Self(escape::EscapeIterInner::ascii(c.to_u8()))
     }
 
-    fn backslash(chr: ascii::Char) -> Self {
-        let data = [ascii::Char::ReverseSolidus, chr];
-        Self(escape::EscapeIterInner::from_array(data))
+    #[inline]
+    const fn backslash(c: ascii::Char) -> Self {
+        Self(escape::EscapeIterInner::backslash(c))
     }
 
-    fn from_unicode(esc: EscapeUnicode) -> Self {
-        Self(esc.0)
+    #[inline]
+    const fn unicode(c: char) -> Self {
+        Self(escape::EscapeIterInner::unicode(c))
     }
 }
 
@@ -304,23 +304,24 @@ enum EscapeDebugInner {
 }
 
 impl EscapeDebug {
-    fn printable(chr: char) -> Self {
+    #[inline]
+    const fn printable(chr: char) -> Self {
         Self(EscapeDebugInner::Char(chr))
     }
 
-    fn backslash(chr: ascii::Char) -> Self {
-        let data = [ascii::Char::ReverseSolidus, chr];
-        let iter = escape::EscapeIterInner::from_array(data);
-        Self(EscapeDebugInner::Bytes(iter))
+    #[inline]
+    const fn backslash(c: ascii::Char) -> Self {
+        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::backslash(c)))
     }
 
-    fn from_unicode(esc: EscapeUnicode) -> Self {
-        Self(EscapeDebugInner::Bytes(esc.0))
+    #[inline]
+    const fn unicode(c: char) -> Self {
+        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::unicode(c)))
     }
 
+    #[inline]
     fn clear(&mut self) {
-        let bytes = escape::EscapeIterInner::from_array([]);
-        self.0 = EscapeDebugInner::Bytes(bytes);
+        self.0 = EscapeDebugInner::Bytes(escape::EscapeIterInner::empty());
     }
 }
 
@@ -339,6 +340,7 @@ impl Iterator for EscapeDebug {
         }
     }
 
+    #[inline]
     fn size_hint(&self) -> (usize, Option<usize>) {
         let n = self.len();
         (n, Some(n))
diff --git a/library/core/src/escape.rs b/library/core/src/escape.rs
index 143e277283e..f6ec30b9f79 100644
--- a/library/core/src/escape.rs
+++ b/library/core/src/escape.rs
@@ -6,56 +6,79 @@ use crate::ops::Range;
 
 const HEX_DIGITS: [ascii::Char; 16] = *b"0123456789abcdef".as_ascii().unwrap();
 
-/// Escapes a byte into provided buffer; returns length of escaped
-/// representation.
-pub(crate) fn escape_ascii_into(output: &mut [ascii::Char; 4], byte: u8) -> Range<u8> {
-    #[inline]
-    fn backslash(a: ascii::Char) -> ([ascii::Char; 4], u8) {
-        ([ascii::Char::ReverseSolidus, a, ascii::Char::Null, ascii::Char::Null], 2)
-    }
+#[inline]
+const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
+    const { assert!(N >= 2) };
+
+    let mut output = [ascii::Char::Null; N];
+
+    output[0] = ascii::Char::ReverseSolidus;
+    output[1] = a;
+
+    (output, 0..2)
+}
 
-    let (data, len) = match byte {
+/// Escapes an ASCII character.
+///
+/// Returns a buffer and the length of the escaped representation.
+const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
+    const { assert!(N >= 4) };
+
+    match byte {
         b'\t' => backslash(ascii::Char::SmallT),
         b'\r' => backslash(ascii::Char::SmallR),
         b'\n' => backslash(ascii::Char::SmallN),
         b'\\' => backslash(ascii::Char::ReverseSolidus),
         b'\'' => backslash(ascii::Char::Apostrophe),
         b'\"' => backslash(ascii::Char::QuotationMark),
-        _ => {
-            if let Some(a) = byte.as_ascii()
+        byte => {
+            let mut output = [ascii::Char::Null; N];
+
+            if let Some(c) = byte.as_ascii()
                 && !byte.is_ascii_control()
             {
-                ([a, ascii::Char::Null, ascii::Char::Null, ascii::Char::Null], 1)
+                output[0] = c;
+                (output, 0..1)
             } else {
-                let hi = HEX_DIGITS[usize::from(byte >> 4)];
-                let lo = HEX_DIGITS[usize::from(byte & 0xf)];
-                ([ascii::Char::ReverseSolidus, ascii::Char::SmallX, hi, lo], 4)
+                let hi = HEX_DIGITS[(byte >> 4) as usize];
+                let lo = HEX_DIGITS[(byte & 0xf) as usize];
+
+                output[0] = ascii::Char::ReverseSolidus;
+                output[1] = ascii::Char::SmallX;
+                output[2] = hi;
+                output[3] = lo;
+
+                (output, 0..4)
             }
         }
-    };
-    *output = data;
-    0..len
+    }
 }
 
-/// Escapes a character into provided buffer using `\u{NNNN}` representation.
-pub(crate) fn escape_unicode_into(output: &mut [ascii::Char; 10], ch: char) -> Range<u8> {
+/// Escapes a character `\u{NNNN}` representation.
+///
+/// Returns a buffer and the length of the escaped representation.
+const fn escape_unicode<const N: usize>(c: char) -> ([ascii::Char; N], Range<u8>) {
+    const { assert!(N >= 10 && N < u8::MAX as usize) };
+
+    let c = u32::from(c);
+
+    // OR-ing `1` ensures that for `c == 0` the code computes that
+    // one digit should be printed.
+    let start = (c | 1).leading_zeros() as usize / 4 - 2;
+
+    let mut output = [ascii::Char::Null; N];
+    output[3] = HEX_DIGITS[((c >> 20) & 15) as usize];
+    output[4] = HEX_DIGITS[((c >> 16) & 15) as usize];
+    output[5] = HEX_DIGITS[((c >> 12) & 15) as usize];
+    output[6] = HEX_DIGITS[((c >> 8) & 15) as usize];
+    output[7] = HEX_DIGITS[((c >> 4) & 15) as usize];
+    output[8] = HEX_DIGITS[((c >> 0) & 15) as usize];
     output[9] = ascii::Char::RightCurlyBracket;
+    output[start + 0] = ascii::Char::ReverseSolidus;
+    output[start + 1] = ascii::Char::SmallU;
+    output[start + 2] = ascii::Char::LeftCurlyBracket;
 
-    let ch = ch as u32;
-    output[3] = HEX_DIGITS[((ch >> 20) & 15) as usize];
-    output[4] = HEX_DIGITS[((ch >> 16) & 15) as usize];
-    output[5] = HEX_DIGITS[((ch >> 12) & 15) as usize];
-    output[6] = HEX_DIGITS[((ch >> 8) & 15) as usize];
-    output[7] = HEX_DIGITS[((ch >> 4) & 15) as usize];
-    output[8] = HEX_DIGITS[((ch >> 0) & 15) as usize];
-
-    // or-ing 1 ensures that for ch==0 the code computes that one digit should
-    // be printed.
-    let start = (ch | 1).leading_zeros() as usize / 4 - 2;
-    const UNICODE_ESCAPE_PREFIX: &[ascii::Char; 3] = b"\\u{".as_ascii().unwrap();
-    output[start..][..3].copy_from_slice(UNICODE_ESCAPE_PREFIX);
-
-    (start as u8)..10
+    (output, (start as u8)..(N as u8))
 }
 
 /// An iterator over an fixed-size array.
@@ -65,45 +88,63 @@ pub(crate) fn escape_unicode_into(output: &mut [ascii::Char; 10], ch: char) -> R
 #[derive(Clone, Debug)]
 pub(crate) struct EscapeIterInner<const N: usize> {
     // The element type ensures this is always ASCII, and thus also valid UTF-8.
-    pub(crate) data: [ascii::Char; N],
+    data: [ascii::Char; N],
 
-    // Invariant: alive.start <= alive.end <= N.
-    pub(crate) alive: Range<u8>,
+    // Invariant: `alive.start <= alive.end <= N`
+    alive: Range<u8>,
 }
 
 impl<const N: usize> EscapeIterInner<N> {
-    pub fn new(data: [ascii::Char; N], alive: Range<u8>) -> Self {
-        const { assert!(N < 256) };
-        debug_assert!(alive.start <= alive.end && usize::from(alive.end) <= N, "{alive:?}");
-        Self { data, alive }
+    pub const fn backslash(c: ascii::Char) -> Self {
+        let (data, range) = backslash(c);
+        Self { data, alive: range }
+    }
+
+    pub const fn ascii(c: u8) -> Self {
+        let (data, range) = escape_ascii(c);
+        Self { data, alive: range }
     }
 
-    pub fn from_array<const M: usize>(array: [ascii::Char; M]) -> Self {
-        const { assert!(M <= N) };
+    pub const fn unicode(c: char) -> Self {
+        let (data, range) = escape_unicode(c);
+        Self { data, alive: range }
+    }
 
-        let mut data = [ascii::Char::Null; N];
-        data[..M].copy_from_slice(&array);
-        Self::new(data, 0..M as u8)
+    #[inline]
+    pub const fn empty() -> Self {
+        Self { data: [ascii::Char::Null; N], alive: 0..0 }
     }
 
+    #[inline]
     pub fn as_ascii(&self) -> &[ascii::Char] {
-        &self.data[usize::from(self.alive.start)..usize::from(self.alive.end)]
+        // SAFETY: `self.alive` is guaranteed to be a valid range for indexing `self.data`.
+        unsafe {
+            self.data.get_unchecked(usize::from(self.alive.start)..usize::from(self.alive.end))
+        }
     }
 
+    #[inline]
     pub fn as_str(&self) -> &str {
         self.as_ascii().as_str()
     }
 
+    #[inline]
     pub fn len(&self) -> usize {
         usize::from(self.alive.end - self.alive.start)
     }
 
     pub fn next(&mut self) -> Option<u8> {
-        self.alive.next().map(|i| self.data[usize::from(i)].to_u8())
+        let i = self.alive.next()?;
+
+        // SAFETY: `i` is guaranteed to be a valid index for `self.data`.
+        unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }
     }
 
     pub fn next_back(&mut self) -> Option<u8> {
-        self.alive.next_back().map(|i| self.data[usize::from(i)].to_u8())
+        let i = self.alive.next_back()?;
+
+        // SAFETY: `i` is guaranteed to be a valid index for `self.data`.
+        unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }
     }
 
     pub fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {