about summary refs log tree commit diff
diff options
context:
space:
mode:
authorThalia Archibald <thalia@archibald.dev>2025-02-05 14:03:50 -0800
committerThalia Archibald <thalia@archibald.dev>2025-02-16 14:43:31 -0800
commit09dc38f23b4aa759d65d2fb940781f080445f685 (patch)
tree4389fa05730db14eb44b13560d8dfa2e760e7649
parent5bc62314547c7639484481f62f218156697cfef0 (diff)
downloadrust-09dc38f23b4aa759d65d2fb940781f080445f685.tar.gz
rust-09dc38f23b4aa759d65d2fb940781f080445f685.zip
Improve WTF-8 comments
-rw-r--r--library/std/src/sys_common/wtf8.rs38
1 files changed, 23 insertions, 15 deletions
diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs
index 6c60d901ee9..0d327189d12 100644
--- a/library/std/src/sys_common/wtf8.rs
+++ b/library/std/src/sys_common/wtf8.rs
@@ -156,9 +156,12 @@ impl ops::DerefMut for Wtf8Buf {
     }
 }
 
-/// Format the string with double quotes,
-/// and surrogates as `\u` followed by four hexadecimal digits.
-/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800]
+/// Formats the string in double quotes, with characters escaped according to
+/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`,
+/// where each `x` is a hexadecimal digit.
+///
+/// For example, the code units [U+0061, U+D800, U+000A] are formatted as
+/// `"a\u{D800}\n"`.
 impl fmt::Debug for Wtf8Buf {
     #[inline]
     fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -181,7 +184,7 @@ impl Wtf8Buf {
 
     /// Creates a WTF-8 string from a WTF-8 byte vec.
     ///
-    /// Since the byte vec is not checked for valid WTF-8, this functions is
+    /// Since the byte vec is not checked for valid WTF-8, this function is
     /// marked unsafe.
     #[inline]
     pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf {
@@ -237,8 +240,9 @@ impl Wtf8Buf {
         string
     }
 
-    /// Copied from String::push
+    /// Appends the given `char` to the end of this string.
     /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
+    /// Copied from String::push.
     fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
         let mut bytes = [0; 4];
         let bytes = encode_utf8_raw(code_point.value, &mut bytes);
@@ -264,16 +268,16 @@ impl Wtf8Buf {
     ///
     /// # Panics
     ///
-    /// Panics if the new capacity overflows `usize`.
+    /// Panics if the new capacity exceeds `isize::MAX` bytes.
     #[inline]
     pub fn reserve(&mut self, additional: usize) {
         self.bytes.reserve(additional)
     }
 
-    /// Tries to reserve capacity for at least `additional` more length units
-    /// in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to avoid
-    /// frequent reallocations. After calling `try_reserve`, capacity will be
-    /// greater than or equal to `self.len() + additional`. Does nothing if
+    /// Tries to reserve capacity for at least `additional` more bytes to be
+    /// inserted in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to
+    /// avoid frequent reallocations. After calling `try_reserve`, capacity will
+    /// be greater than or equal to `self.len() + additional`. Does nothing if
     /// capacity is already sufficient. This method preserves the contents even
     /// if an error occurs.
     ///
@@ -291,8 +295,8 @@ impl Wtf8Buf {
         self.bytes.reserve_exact(additional)
     }
 
-    /// Tries to reserve the minimum capacity for exactly `additional`
-    /// length units in the given `Wtf8Buf`. After calling
+    /// Tries to reserve the minimum capacity for exactly `additional` more
+    /// bytes to be inserted in the given `Wtf8Buf`. After calling
     /// `try_reserve_exact`, capacity will be greater than or equal to
     /// `self.len() + additional` if it returns `Ok(())`.
     /// Does nothing if the capacity is already sufficient.
@@ -450,6 +454,8 @@ impl Wtf8Buf {
             match self.next_surrogate(pos) {
                 Some((surrogate_pos, _)) => {
                     pos = surrogate_pos + 3;
+                    // Surrogates and the replacement character are all 3 bytes,
+                    // so they can substituted in-place.
                     self.bytes[surrogate_pos..pos]
                         .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
                 }
@@ -535,9 +541,9 @@ impl AsInner<[u8]> for Wtf8 {
     }
 }
 
-/// Format the slice with double quotes,
-/// and surrogates as `\u` followed by four hexadecimal digits.
-/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800]
+/// Formats the string in double quotes, with characters escaped according to
+/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`,
+/// where each `x` is a hexadecimal digit.
 impl fmt::Debug for Wtf8 {
     fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
         fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
@@ -562,6 +568,8 @@ impl fmt::Debug for Wtf8 {
     }
 }
 
+/// Formats the string with unpaired surrogates substituted with the replacement
+/// character, U+FFFD.
 impl fmt::Display for Wtf8 {
     fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
         let wtf8_bytes = &self.bytes;