about summary refs log tree commit diff
path: root/library/core/src
diff options
context:
space:
mode:
authorMarkus Reiter <me@reitermark.us>2025-03-08 20:29:17 +0100
committerMarkus Reiter <me@reitermark.us>2025-06-15 22:08:41 +0200
commitce457e1c2628e41a02bc2dcdc44346e7c7d0a9e2 (patch)
treeb11b833747d2e3f612f5927530a4333c0fae0cbf /library/core/src
parent49a8ba06848fa8f282fe9055b4178350970bb0ce (diff)
downloadrust-ce457e1c2628e41a02bc2dcdc44346e7c7d0a9e2.tar.gz
rust-ce457e1c2628e41a02bc2dcdc44346e7c7d0a9e2.zip
Get rid of `EscapeDebugInner`.
Diffstat (limited to 'library/core/src')
-rw-r--r--library/core/src/ascii.rs16
-rw-r--r--library/core/src/char/mod.rs62
-rw-r--r--library/core/src/escape.rs222
-rw-r--r--library/core/src/slice/ascii.rs2
4 files changed, 210 insertions, 92 deletions
diff --git a/library/core/src/ascii.rs b/library/core/src/ascii.rs
index 5b3711b4071..d3c6c046e71 100644
--- a/library/core/src/ascii.rs
+++ b/library/core/src/ascii.rs
@@ -9,9 +9,10 @@
 
 #![stable(feature = "core_ascii", since = "1.26.0")]
 
+use crate::escape::{AlwaysEscaped, EscapeIterInner};
+use crate::fmt;
 use crate::iter::FusedIterator;
 use crate::num::NonZero;
-use crate::{escape, fmt};
 
 mod ascii_char;
 #[unstable(feature = "ascii_char", issue = "110998")]
@@ -24,7 +25,7 @@ pub use ascii_char::AsciiChar as Char;
 #[must_use = "iterators are lazy and do nothing unless consumed"]
 #[stable(feature = "rust1", since = "1.0.0")]
 #[derive(Clone)]
-pub struct EscapeDefault(escape::EscapeIterInner<4>);
+pub struct EscapeDefault(EscapeIterInner<4, AlwaysEscaped>);
 
 /// Returns an iterator that produces an escaped version of a `u8`.
 ///
@@ -96,17 +97,12 @@ pub fn escape_default(c: u8) -> EscapeDefault {
 impl EscapeDefault {
     #[inline]
     pub(crate) const fn new(c: u8) -> Self {
-        Self(escape::EscapeIterInner::ascii(c))
+        Self(EscapeIterInner::ascii(c))
     }
 
     #[inline]
     pub(crate) fn empty() -> Self {
-        Self(escape::EscapeIterInner::empty())
-    }
-
-    #[inline]
-    pub(crate) fn as_str(&self) -> &str {
-        self.0.as_str()
+        Self(EscapeIterInner::empty())
     }
 }
 
@@ -168,7 +164,7 @@ impl FusedIterator for EscapeDefault {}
 #[stable(feature = "ascii_escape_display", since = "1.39.0")]
 impl fmt::Display for EscapeDefault {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.write_str(self.0.as_str())
+        fmt::Display::fmt(&self.0, f)
     }
 }
 
diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs
index 5b9f0e2143f..82a3f6f916b 100644
--- a/library/core/src/char/mod.rs
+++ b/library/core/src/char/mod.rs
@@ -44,7 +44,7 @@ pub use self::methods::{encode_utf8_raw, encode_utf8_raw_unchecked}; // perma-un
 use crate::ascii;
 pub(crate) use self::methods::EscapeDebugExtArgs;
 use crate::error::Error;
-use crate::escape;
+use crate::escape::{AlwaysEscaped, EscapeIterInner, MaybeEscaped};
 use crate::fmt::{self, Write};
 use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
 use crate::num::NonZero;
@@ -161,12 +161,12 @@ pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
 /// [`escape_unicode`]: char::escape_unicode
 #[derive(Clone, Debug)]
 #[stable(feature = "rust1", since = "1.0.0")]
-pub struct EscapeUnicode(escape::EscapeIterInner<10>);
+pub struct EscapeUnicode(EscapeIterInner<10, AlwaysEscaped>);
 
 impl EscapeUnicode {
     #[inline]
     const fn new(c: char) -> Self {
-        Self(escape::EscapeIterInner::unicode(c))
+        Self(EscapeIterInner::unicode(c))
     }
 }
 
@@ -215,7 +215,7 @@ impl FusedIterator for EscapeUnicode {}
 #[stable(feature = "char_struct_display", since = "1.16.0")]
 impl fmt::Display for EscapeUnicode {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.write_str(self.0.as_str())
+        fmt::Display::fmt(&self.0, f)
     }
 }
 
@@ -227,22 +227,22 @@ impl fmt::Display for EscapeUnicode {
 /// [`escape_default`]: char::escape_default
 #[derive(Clone, Debug)]
 #[stable(feature = "rust1", since = "1.0.0")]
-pub struct EscapeDefault(escape::EscapeIterInner<10>);
+pub struct EscapeDefault(EscapeIterInner<10, AlwaysEscaped>);
 
 impl EscapeDefault {
     #[inline]
     const fn printable(c: ascii::Char) -> Self {
-        Self(escape::EscapeIterInner::ascii(c.to_u8()))
+        Self(EscapeIterInner::ascii(c.to_u8()))
     }
 
     #[inline]
     const fn backslash(c: ascii::Char) -> Self {
-        Self(escape::EscapeIterInner::backslash(c))
+        Self(EscapeIterInner::backslash(c))
     }
 
     #[inline]
     const fn unicode(c: char) -> Self {
-        Self(escape::EscapeIterInner::unicode(c))
+        Self(EscapeIterInner::unicode(c))
     }
 }
 
@@ -290,8 +290,9 @@ impl FusedIterator for EscapeDefault {}
 
 #[stable(feature = "char_struct_display", since = "1.16.0")]
 impl fmt::Display for EscapeDefault {
+    #[inline]
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.write_str(self.0.as_str())
+        fmt::Display::fmt(&self.0, f)
     }
 }
 
@@ -303,37 +304,22 @@ impl fmt::Display for EscapeDefault {
 /// [`escape_debug`]: char::escape_debug
 #[stable(feature = "char_escape_debug", since = "1.20.0")]
 #[derive(Clone, Debug)]
-pub struct EscapeDebug(EscapeDebugInner);
-
-#[derive(Clone, Debug)]
-// Note: It’s possible to manually encode the EscapeDebugInner inside of
-// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
-// a char) which would likely result in a more optimised code.  For now we use
-// the option easier to implement.
-enum EscapeDebugInner {
-    Bytes(escape::EscapeIterInner<10>),
-    Char(char),
-}
+pub struct EscapeDebug(EscapeIterInner<10, MaybeEscaped>);
 
 impl EscapeDebug {
     #[inline]
     const fn printable(chr: char) -> Self {
-        Self(EscapeDebugInner::Char(chr))
+        Self(EscapeIterInner::printable(chr))
     }
 
     #[inline]
     const fn backslash(c: ascii::Char) -> Self {
-        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::backslash(c)))
+        Self(EscapeIterInner::backslash(c))
     }
 
     #[inline]
     const fn unicode(c: char) -> Self {
-        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::unicode(c)))
-    }
-
-    #[inline]
-    fn clear(&mut self) {
-        self.0 = EscapeDebugInner::Bytes(escape::EscapeIterInner::empty());
+        Self(EscapeIterInner::unicode(c))
     }
 }
 
@@ -343,13 +329,7 @@ impl Iterator for EscapeDebug {
 
     #[inline]
     fn next(&mut self) -> Option<char> {
-        match self.0 {
-            EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from),
-            EscapeDebugInner::Char(chr) => {
-                self.clear();
-                Some(chr)
-            }
-        }
+        self.0.next()
     }
 
     #[inline]
@@ -367,10 +347,7 @@ impl Iterator for EscapeDebug {
 #[stable(feature = "char_escape_debug", since = "1.20.0")]
 impl ExactSizeIterator for EscapeDebug {
     fn len(&self) -> usize {
-        match &self.0 {
-            EscapeDebugInner::Bytes(bytes) => bytes.len(),
-            EscapeDebugInner::Char(_) => 1,
-        }
+        self.0.len()
     }
 }
 
@@ -379,11 +356,9 @@ impl FusedIterator for EscapeDebug {}
 
 #[stable(feature = "char_escape_debug", since = "1.20.0")]
 impl fmt::Display for EscapeDebug {
+    #[inline]
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match &self.0 {
-            EscapeDebugInner::Bytes(bytes) => f.write_str(bytes.as_str()),
-            EscapeDebugInner::Char(chr) => f.write_char(*chr),
-        }
+        fmt::Display::fmt(&self.0, f)
     }
 }
 
@@ -480,6 +455,7 @@ macro_rules! casemappingiter_impls {
 
         #[stable(feature = "char_struct_display", since = "1.16.0")]
         impl fmt::Display for $ITER_NAME {
+            #[inline]
             fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                 fmt::Display::fmt(&self.0, f)
             }
diff --git a/library/core/src/escape.rs b/library/core/src/escape.rs
index 0c3329f676e..f459c582708 100644
--- a/library/core/src/escape.rs
+++ b/library/core/src/escape.rs
@@ -1,11 +1,16 @@
 //! Helper code for character escaping.
 
 use crate::ascii;
+use crate::fmt::{self, Write};
+use crate::marker::PhantomData;
 use crate::num::NonZero;
 use crate::ops::Range;
 
 const HEX_DIGITS: [ascii::Char; 16] = *b"0123456789abcdef".as_ascii().unwrap();
 
+/// Escapes a character with `\x` representation.
+///
+/// Returns a buffer with the escaped representation and its corresponding range.
 #[inline]
 const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 2) };
@@ -18,6 +23,9 @@ const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u
     (output, 0..2)
 }
 
+/// Escapes a character with `\xNN` representation.
+///
+/// Returns a buffer with the escaped representation and its corresponding range.
 #[inline]
 const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 4) };
@@ -35,6 +43,7 @@ const fn hex_escape<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
     (output, 0..4)
 }
 
+/// Returns a buffer with the verbatim character and its corresponding range.
 #[inline]
 const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 1) };
@@ -48,7 +57,7 @@ const fn verbatim<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u8
 
 /// Escapes an ASCII character.
 ///
-/// Returns a buffer and the length of the escaped representation.
+/// Returns a buffer with the escaped representation and its corresponding range.
 const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 4) };
 
@@ -122,9 +131,9 @@ const fn escape_ascii<const N: usize>(byte: u8) -> ([ascii::Char; N], Range<u8>)
     }
 }
 
-/// Escapes a character `\u{NNNN}` representation.
+/// Escapes a character with `\u{NNNN}` representation.
 ///
-/// Returns a buffer and the length of the escaped representation.
+/// Returns a buffer with the escaped representation and its corresponding range.
 const fn escape_unicode<const N: usize>(c: char) -> ([ascii::Char; N], Range<u8>) {
     const { assert!(N >= 10 && N < u8::MAX as usize) };
 
@@ -149,77 +158,214 @@ const fn escape_unicode<const N: usize>(c: char) -> ([ascii::Char; N], Range<u8>
     (output, (start as u8)..(N as u8))
 }
 
-/// An iterator over an fixed-size array.
-///
-/// This is essentially equivalent to array’s IntoIter except that indexes are
-/// limited to u8 to reduce size of the structure.
-#[derive(Clone, Debug)]
-pub(crate) struct EscapeIterInner<const N: usize> {
-    // The element type ensures this is always ASCII, and thus also valid UTF-8.
-    data: [ascii::Char; N],
-
-    // Invariant: `alive.start <= alive.end <= N`
+#[derive(Clone, Copy)]
+union MaybeEscapedCharacter<const N: usize> {
+    pub escape_seq: [ascii::Char; N],
+    pub literal: char,
+}
+
+/// Marker type to indicate that the character is always escaped,
+/// used to optimize the iterator implementation.
+#[derive(Clone, Copy)]
+#[non_exhaustive]
+pub(crate) struct AlwaysEscaped;
+
+/// Marker type to indicate that the character may be escaped,
+/// used to optimize the iterator implementation.
+#[derive(Clone, Copy)]
+#[non_exhaustive]
+pub(crate) struct MaybeEscaped;
+
+/// An iterator over a possibly escaped character.
+#[derive(Clone)]
+pub(crate) struct EscapeIterInner<const N: usize, ESCAPING> {
+    // Invariant:
+    //
+    // If `alive.end <= Self::LITERAL_ESCAPE_START`, `data` must contain
+    // printable ASCII characters in the `alive` range of its `escape_seq` variant.
+    //
+    // If `alive.end > Self::LITERAL_ESCAPE_START`, `data` must contain a
+    // `char` in its `literal` variant, and the `alive` range must have a
+    // length of at most `1`.
+    data: MaybeEscapedCharacter<N>,
     alive: Range<u8>,
+    escaping: PhantomData<ESCAPING>,
 }
 
-impl<const N: usize> EscapeIterInner<N> {
+impl<const N: usize, ESCAPING> EscapeIterInner<N, ESCAPING> {
+    const LITERAL_ESCAPE_START: u8 = 128;
+
+    /// # Safety
+    ///
+    /// `data.escape_seq` must contain an escape sequence in the range given by `alive`.
+    #[inline]
+    const unsafe fn new(data: MaybeEscapedCharacter<N>, alive: Range<u8>) -> Self {
+        // Longer escape sequences are not useful given `alive.end` is at most
+        // `Self::LITERAL_ESCAPE_START`.
+        const { assert!(N < Self::LITERAL_ESCAPE_START as usize) };
+
+        // Check bounds, which implicitly also checks the invariant
+        // `alive.end <= Self::LITERAL_ESCAPE_START`.
+        debug_assert!(alive.end <= (N + 1) as u8);
+
+        Self { data, alive, escaping: PhantomData }
+    }
+
     pub(crate) const fn backslash(c: ascii::Char) -> Self {
-        let (data, range) = backslash(c);
-        Self { data, alive: range }
+        let (escape_seq, alive) = backslash(c);
+        // SAFETY: `escape_seq` contains an escape sequence in the range given by `alive`.
+        unsafe { Self::new(MaybeEscapedCharacter { escape_seq }, alive) }
     }
 
     pub(crate) const fn ascii(c: u8) -> Self {
-        let (data, range) = escape_ascii(c);
-        Self { data, alive: range }
+        let (escape_seq, alive) = escape_ascii(c);
+        // SAFETY: `escape_seq` contains an escape sequence in the range given by `alive`.
+        unsafe { Self::new(MaybeEscapedCharacter { escape_seq }, alive) }
     }
 
     pub(crate) const fn unicode(c: char) -> Self {
-        let (data, range) = escape_unicode(c);
-        Self { data, alive: range }
+        let (escape_seq, alive) = escape_unicode(c);
+        // SAFETY: `escape_seq` contains an escape sequence in the range given by `alive`.
+        unsafe { Self::new(MaybeEscapedCharacter { escape_seq }, alive) }
     }
 
     #[inline]
     pub(crate) const fn empty() -> Self {
-        Self { data: [ascii::Char::Null; N], alive: 0..0 }
+        // SAFETY: `0..0` ensures an empty escape sequence.
+        unsafe { Self::new(MaybeEscapedCharacter { escape_seq: [ascii::Char::Null; N] }, 0..0) }
     }
 
     #[inline]
-    pub(crate) fn as_ascii(&self) -> &[ascii::Char] {
-        // SAFETY: `self.alive` is guaranteed to be a valid range for indexing `self.data`.
-        unsafe {
-            self.data.get_unchecked(usize::from(self.alive.start)..usize::from(self.alive.end))
-        }
+    pub(crate) fn len(&self) -> usize {
+        usize::from(self.alive.end - self.alive.start)
     }
 
     #[inline]
-    pub(crate) fn as_str(&self) -> &str {
-        self.as_ascii().as_str()
+    pub(crate) fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
+        self.alive.advance_by(n)
     }
 
     #[inline]
-    pub(crate) fn len(&self) -> usize {
-        usize::from(self.alive.end - self.alive.start)
+    pub(crate) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
+        self.alive.advance_back_by(n)
+    }
+
+    /// Returns a `char` if `self.data` contains one in its `literal` variant.
+    #[inline]
+    const fn to_char(&self) -> Option<char> {
+        if self.alive.end > Self::LITERAL_ESCAPE_START {
+            // SAFETY: We just checked that `self.data` contains a `char` in
+            //         its `literal` variant.
+            return Some(unsafe { self.data.literal });
+        }
+
+        None
     }
 
+    /// Returns the printable ASCII characters in the `escape_seq` variant of `self.data`
+    /// as a string.
+    ///
+    /// # Safety
+    ///
+    /// - `self.data` must contain printable ASCII characters in its `escape_seq` variant.
+    /// - `self.alive` must be a valid range for `self.data.escape_seq`.
+    #[inline]
+    unsafe fn to_str_unchecked(&self) -> &str {
+        debug_assert!(self.alive.end <= Self::LITERAL_ESCAPE_START);
+
+        // SAFETY: The caller guarantees `self.data` contains printable ASCII
+        //         characters in its `escape_seq` variant, and `self.alive` is
+        //         a valid range for `self.data.escape_seq`.
+        unsafe {
+            self.data
+                .escape_seq
+                .get_unchecked(usize::from(self.alive.start)..usize::from(self.alive.end))
+                .as_str()
+        }
+    }
+}
+
+impl<const N: usize> EscapeIterInner<N, AlwaysEscaped> {
     pub(crate) fn next(&mut self) -> Option<u8> {
         let i = self.alive.next()?;
 
-        // SAFETY: `i` is guaranteed to be a valid index for `self.data`.
-        unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }
+        // SAFETY: The `AlwaysEscaped` marker guarantees that `self.data`
+        //         contains printable ASCII characters in its `escape_seq`
+        //         variant, and `i` is guaranteed to be a valid index for
+        //         `self.data.escape_seq`.
+        unsafe { Some(self.data.escape_seq.get_unchecked(usize::from(i)).to_u8()) }
     }
 
     pub(crate) fn next_back(&mut self) -> Option<u8> {
         let i = self.alive.next_back()?;
 
-        // SAFETY: `i` is guaranteed to be a valid index for `self.data`.
-        unsafe { Some(self.data.get_unchecked(usize::from(i)).to_u8()) }
+        // SAFETY: The `AlwaysEscaped` marker guarantees that `self.data`
+        //         contains printable ASCII characters in its `escape_seq`
+        //         variant, and `i` is guaranteed to be a valid index for
+        //         `self.data.escape_seq`.
+        unsafe { Some(self.data.escape_seq.get_unchecked(usize::from(i)).to_u8()) }
     }
+}
 
-    pub(crate) fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
-        self.alive.advance_by(n)
+impl<const N: usize> EscapeIterInner<N, MaybeEscaped> {
+    // This is the only way to create any `EscapeIterInner` containing a `char` in
+    // the `literal` variant of its `self.data`, meaning the `AlwaysEscaped` marker
+    // guarantees that `self.data` contains printable ASCII characters in its
+    // `escape_seq` variant.
+    pub(crate) const fn printable(c: char) -> Self {
+        Self {
+            data: MaybeEscapedCharacter { literal: c },
+            // Uphold the invariant `alive.end > Self::LITERAL_ESCAPE_START`, and ensure
+            // `len` behaves correctly for iterating through one character literal.
+            alive: Self::LITERAL_ESCAPE_START..(Self::LITERAL_ESCAPE_START + 1),
+            escaping: PhantomData,
+        }
     }
 
-    pub(crate) fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
-        self.alive.advance_back_by(n)
+    pub(crate) fn next(&mut self) -> Option<char> {
+        let i = self.alive.next()?;
+
+        if let Some(c) = self.to_char() {
+            return Some(c);
+        }
+
+        // SAFETY: At this point, `self.data` must contain printable ASCII
+        //         characters in its `escape_seq` variant, and `i` is
+        //         guaranteed to be a valid index for `self.data.escape_seq`.
+        Some(char::from(unsafe { self.data.escape_seq.get_unchecked(usize::from(i)).to_u8() }))
+    }
+}
+
+impl<const N: usize> fmt::Display for EscapeIterInner<N, AlwaysEscaped> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // SAFETY: The `AlwaysEscaped` marker guarantees that `self.data`
+        //         contains printable ASCII chars, and `self.alive` is
+        //         guaranteed to be a valid range for `self.data`.
+        f.write_str(unsafe { self.to_str_unchecked() })
+    }
+}
+
+impl<const N: usize> fmt::Display for EscapeIterInner<N, MaybeEscaped> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if let Some(c) = self.to_char() {
+            return f.write_char(c);
+        }
+
+        // SAFETY: At this point, `self.data` must contain printable ASCII
+        //         characters in its `escape_seq` variant, and `self.alive`
+        //         is guaranteed to be a valid range for `self.data`.
+        f.write_str(unsafe { self.to_str_unchecked() })
+    }
+}
+
+impl<const N: usize> fmt::Debug for EscapeIterInner<N, AlwaysEscaped> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("EscapeIterInner").field(&format_args!("'{}'", self)).finish()
+    }
+}
+
+impl<const N: usize> fmt::Debug for EscapeIterInner<N, MaybeEscaped> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("EscapeIterInner").field(&format_args!("'{}'", self)).finish()
     }
 }
diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs
index b4d9a1b1ca4..181ae82959c 100644
--- a/library/core/src/slice/ascii.rs
+++ b/library/core/src/slice/ascii.rs
@@ -308,7 +308,7 @@ impl<'a> fmt::Display for EscapeAscii<'a> {
 
             if let Some(&b) = bytes.first() {
                 // guaranteed to be non-empty, better to write it as a str
-                f.write_str(ascii::escape_default(b).as_str())?;
+                fmt::Display::fmt(&ascii::escape_default(b), f)?;
                 bytes = &bytes[1..];
             }
         }