diff options
| author | Thalia Archibald <thalia@archibald.dev> | 2025-02-05 14:03:50 -0800 |
|---|---|---|
| committer | Thalia Archibald <thalia@archibald.dev> | 2025-02-16 14:43:31 -0800 |
| commit | 09dc38f23b4aa759d65d2fb940781f080445f685 (patch) | |
| tree | 4389fa05730db14eb44b13560d8dfa2e760e7649 | |
| parent | 5bc62314547c7639484481f62f218156697cfef0 (diff) | |
| download | rust-09dc38f23b4aa759d65d2fb940781f080445f685.tar.gz rust-09dc38f23b4aa759d65d2fb940781f080445f685.zip | |
Improve WTF-8 comments
| -rw-r--r-- | library/std/src/sys_common/wtf8.rs | 38 |
1 files changed, 23 insertions, 15 deletions
diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 6c60d901ee9..0d327189d12 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -156,9 +156,12 @@ impl ops::DerefMut for Wtf8Buf { } } -/// Format the string with double quotes, -/// and surrogates as `\u` followed by four hexadecimal digits. -/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800] +/// Formats the string in double quotes, with characters escaped according to +/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`, +/// where each `x` is a hexadecimal digit. +/// +/// For example, the code units [U+0061, U+D800, U+000A] are formatted as +/// `"a\u{D800}\n"`. impl fmt::Debug for Wtf8Buf { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -181,7 +184,7 @@ impl Wtf8Buf { /// Creates a WTF-8 string from a WTF-8 byte vec. /// - /// Since the byte vec is not checked for valid WTF-8, this functions is + /// Since the byte vec is not checked for valid WTF-8, this function is /// marked unsafe. #[inline] pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf { @@ -237,8 +240,9 @@ impl Wtf8Buf { string } - /// Copied from String::push + /// Appends the given `char` to the end of this string. /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. + /// Copied from String::push. fn push_code_point_unchecked(&mut self, code_point: CodePoint) { let mut bytes = [0; 4]; let bytes = encode_utf8_raw(code_point.value, &mut bytes); @@ -264,16 +268,16 @@ impl Wtf8Buf { /// /// # Panics /// - /// Panics if the new capacity overflows `usize`. + /// Panics if the new capacity exceeds `isize::MAX` bytes. #[inline] pub fn reserve(&mut self, additional: usize) { self.bytes.reserve(additional) } - /// Tries to reserve capacity for at least `additional` more length units - /// in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to avoid - /// frequent reallocations. After calling `try_reserve`, capacity will be - /// greater than or equal to `self.len() + additional`. Does nothing if + /// Tries to reserve capacity for at least `additional` more bytes to be + /// inserted in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to + /// avoid frequent reallocations. After calling `try_reserve`, capacity will + /// be greater than or equal to `self.len() + additional`. Does nothing if /// capacity is already sufficient. This method preserves the contents even /// if an error occurs. /// @@ -291,8 +295,8 @@ impl Wtf8Buf { self.bytes.reserve_exact(additional) } - /// Tries to reserve the minimum capacity for exactly `additional` - /// length units in the given `Wtf8Buf`. After calling + /// Tries to reserve the minimum capacity for exactly `additional` more + /// bytes to be inserted in the given `Wtf8Buf`. After calling /// `try_reserve_exact`, capacity will be greater than or equal to /// `self.len() + additional` if it returns `Ok(())`. /// Does nothing if the capacity is already sufficient. @@ -450,6 +454,8 @@ impl Wtf8Buf { match self.next_surrogate(pos) { Some((surrogate_pos, _)) => { pos = surrogate_pos + 3; + // Surrogates and the replacement character are all 3 bytes, + // so they can substituted in-place. self.bytes[surrogate_pos..pos] .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); } @@ -535,9 +541,9 @@ impl AsInner<[u8]> for Wtf8 { } } -/// Format the slice with double quotes, -/// and surrogates as `\u` followed by four hexadecimal digits. -/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800] +/// Formats the string in double quotes, with characters escaped according to +/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`, +/// where each `x` is a hexadecimal digit. impl fmt::Debug for Wtf8 { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result { @@ -562,6 +568,8 @@ impl fmt::Debug for Wtf8 { } } +/// Formats the string with unpaired surrogates substituted with the replacement +/// character, U+FFFD. impl fmt::Display for Wtf8 { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { let wtf8_bytes = &self.bytes; |
