about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-04-26 17:41:24 +0000
committerbors <bors@rust-lang.org>2024-04-26 17:41:24 +0000
commit4d570eea025a19564429eb52b34ec34e14659f55 (patch)
treea265caa89ea72bd1b354e734fe629ed4f88e90a8
parent1b3a32958bb54bde45e693217e8f7469459e5865 (diff)
parent61cf00464e311b3dcf1a47b342c57978b1f40d88 (diff)
downloadrust-4d570eea025a19564429eb52b34ec34e14659f55.tar.gz
rust-4d570eea025a19564429eb52b34ec34e14659f55.zip
Auto merge of #123909 - dtolnay:utf8chunks, r=joboet
Stabilize `Utf8Chunks`

Pending FCP in https://github.com/rust-lang/rust/issues/99543.

This PR includes the proposed modification in https://github.com/rust-lang/libs-team/issues/190 as agreed in https://github.com/rust-lang/rust/issues/99543#issuecomment-2050406568.
-rw-r--r--library/alloc/src/lib.rs1
-rw-r--r--library/alloc/src/str.rs2
-rw-r--r--library/alloc/src/string.rs4
-rw-r--r--library/core/src/str/lossy.rs74
-rw-r--r--library/core/src/str/mod.rs2
-rw-r--r--library/core/tests/lib.rs1
-rw-r--r--library/core/tests/str_lossy.rs6
-rw-r--r--library/std/src/lib.rs1
-rw-r--r--library/std/src/sys/os_str/bytes.rs6
9 files changed, 57 insertions, 40 deletions
diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs
index b417513aaa2..2bd8fca7e01 100644
--- a/library/alloc/src/lib.rs
+++ b/library/alloc/src/lib.rs
@@ -161,7 +161,6 @@
 #![feature(tuple_trait)]
 #![feature(unicode_internals)]
 #![feature(unsize)]
-#![feature(utf8_chunks)]
 #![feature(vec_pop_if)]
 // tidy-alphabetical-end
 //
diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs
index ade114678b7..d88639c4092 100644
--- a/library/alloc/src/str.rs
+++ b/library/alloc/src/str.rs
@@ -53,7 +53,7 @@ pub use core::str::{RSplit, Split};
 pub use core::str::{RSplitN, SplitN};
 #[stable(feature = "rust1", since = "1.0.0")]
 pub use core::str::{RSplitTerminator, SplitTerminator};
-#[unstable(feature = "utf8_chunks", issue = "99543")]
+#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
 pub use core::str::{Utf8Chunk, Utf8Chunks};
 
 /// Note: `str` in `Concat<str>` is not meaningful here.
diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs
index 082af1447ec..7c9f13e30ff 100644
--- a/library/alloc/src/string.rs
+++ b/library/alloc/src/string.rs
@@ -58,8 +58,6 @@ use core::ops::{self, Range, RangeBounds};
 use core::ptr;
 use core::slice;
 use core::str::pattern::Pattern;
-#[cfg(not(no_global_oom_handling))]
-use core::str::Utf8Chunks;
 
 #[cfg(not(no_global_oom_handling))]
 use crate::borrow::{Cow, ToOwned};
@@ -633,7 +631,7 @@ impl String {
     #[cfg(not(no_global_oom_handling))]
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn from_utf8_lossy(v: &[u8]) -> Cow<'_, str> {
-        let mut iter = Utf8Chunks::new(v);
+        let mut iter = v.utf8_chunks();
 
         let first_valid = if let Some(chunk) = iter.next() {
             let valid = chunk.valid();
diff --git a/library/core/src/str/lossy.rs b/library/core/src/str/lossy.rs
index 59f873d1268..f8ecf1f3a7c 100644
--- a/library/core/src/str/lossy.rs
+++ b/library/core/src/str/lossy.rs
@@ -6,6 +6,46 @@ use crate::iter::FusedIterator;
 use super::from_utf8_unchecked;
 use super::validations::utf8_char_width;
 
+impl [u8] {
+    /// Creates an iterator over the contiguous valid UTF-8 ranges of this
+    /// slice, and the non-UTF-8 fragments in between.
+    ///
+    /// # Examples
+    ///
+    /// This function formats arbitrary but mostly-UTF-8 bytes into Rust source
+    /// code in the form of a C-string literal (`c"..."`).
+    ///
+    /// ```
+    /// use std::fmt::Write as _;
+    ///
+    /// pub fn cstr_literal(bytes: &[u8]) -> String {
+    ///     let mut repr = String::new();
+    ///     repr.push_str("c\"");
+    ///     for chunk in bytes.utf8_chunks() {
+    ///         for ch in chunk.valid().chars() {
+    ///             // Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for non-printable characters.
+    ///             write!(repr, "{}", ch.escape_debug()).unwrap();
+    ///         }
+    ///         for byte in chunk.invalid() {
+    ///             write!(repr, "\\x{:02X}", byte).unwrap();
+    ///         }
+    ///     }
+    ///     repr.push('"');
+    ///     repr
+    /// }
+    ///
+    /// fn main() {
+    ///     let lit = cstr_literal(b"\xferris the \xf0\x9f\xa6\x80\x07");
+    ///     let expected = stringify!(c"\xFErris the 🦀\u{7}");
+    ///     assert_eq!(lit, expected);
+    /// }
+    /// ```
+    #[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
+    pub fn utf8_chunks(&self) -> Utf8Chunks<'_> {
+        Utf8Chunks { source: self }
+    }
+}
+
 /// An item returned by the [`Utf8Chunks`] iterator.
 ///
 /// A `Utf8Chunk` stores a sequence of [`u8`] up to the first broken character
@@ -14,15 +54,11 @@ use super::validations::utf8_char_width;
 /// # Examples
 ///
 /// ```
-/// #![feature(utf8_chunks)]
-///
-/// use std::str::Utf8Chunks;
-///
 /// // An invalid UTF-8 string
 /// let bytes = b"foo\xF1\x80bar";
 ///
 /// // Decode the first `Utf8Chunk`
-/// let chunk = Utf8Chunks::new(bytes).next().unwrap();
+/// let chunk = bytes.utf8_chunks().next().unwrap();
 ///
 /// // The first three characters are valid UTF-8
 /// assert_eq!("foo", chunk.valid());
@@ -30,7 +66,7 @@ use super::validations::utf8_char_width;
 /// // The fourth character is broken
 /// assert_eq!(b"\xF1\x80", chunk.invalid());
 /// ```
-#[unstable(feature = "utf8_chunks", issue = "99543")]
+#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Utf8Chunk<'a> {
     valid: &'a str,
@@ -43,7 +79,7 @@ impl<'a> Utf8Chunk<'a> {
     /// This substring can be empty at the start of the string or between
     /// broken UTF-8 characters.
     #[must_use]
-    #[unstable(feature = "utf8_chunks", issue = "99543")]
+    #[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
     pub fn valid(&self) -> &'a str {
         self.valid
     }
@@ -63,7 +99,7 @@ impl<'a> Utf8Chunk<'a> {
     /// [`valid`]: Self::valid
     /// [`U+FFFD REPLACEMENT CHARACTER`]: crate::char::REPLACEMENT_CHARACTER
     #[must_use]
-    #[unstable(feature = "utf8_chunks", issue = "99543")]
+    #[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
     pub fn invalid(&self) -> &'a [u8] {
         self.invalid
     }
@@ -78,7 +114,7 @@ impl fmt::Debug for Debug<'_> {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         f.write_char('"')?;
 
-        for chunk in Utf8Chunks::new(self.0) {
+        for chunk in self.0.utf8_chunks() {
             // Valid part.
             // Here we partially parse UTF-8 again which is suboptimal.
             {
@@ -123,12 +159,8 @@ impl fmt::Debug for Debug<'_> {
 /// [`String::from_utf8_lossy`] without allocating heap memory:
 ///
 /// ```
-/// #![feature(utf8_chunks)]
-///
-/// use std::str::Utf8Chunks;
-///
 /// fn from_utf8_lossy<F>(input: &[u8], mut push: F) where F: FnMut(&str) {
-///     for chunk in Utf8Chunks::new(input) {
+///     for chunk in input.utf8_chunks() {
 ///         push(chunk.valid());
 ///
 ///         if !chunk.invalid().is_empty() {
@@ -140,19 +172,13 @@ impl fmt::Debug for Debug<'_> {
 ///
 /// [`String::from_utf8_lossy`]: ../../std/string/struct.String.html#method.from_utf8_lossy
 #[must_use = "iterators are lazy and do nothing unless consumed"]
-#[unstable(feature = "utf8_chunks", issue = "99543")]
+#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
 #[derive(Clone)]
 pub struct Utf8Chunks<'a> {
     source: &'a [u8],
 }
 
 impl<'a> Utf8Chunks<'a> {
-    /// Creates a new iterator to decode the bytes.
-    #[unstable(feature = "utf8_chunks", issue = "99543")]
-    pub fn new(bytes: &'a [u8]) -> Self {
-        Self { source: bytes }
-    }
-
     #[doc(hidden)]
     #[unstable(feature = "str_internals", issue = "none")]
     pub fn debug(&self) -> Debug<'_> {
@@ -160,7 +186,7 @@ impl<'a> Utf8Chunks<'a> {
     }
 }
 
-#[unstable(feature = "utf8_chunks", issue = "99543")]
+#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
 impl<'a> Iterator for Utf8Chunks<'a> {
     type Item = Utf8Chunk<'a>;
 
@@ -259,10 +285,10 @@ impl<'a> Iterator for Utf8Chunks<'a> {
     }
 }
 
-#[unstable(feature = "utf8_chunks", issue = "99543")]
+#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
 impl FusedIterator for Utf8Chunks<'_> {}
 
-#[unstable(feature = "utf8_chunks", issue = "99543")]
+#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
 impl fmt::Debug for Utf8Chunks<'_> {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         f.debug_struct("Utf8Chunks").field("source", &self.debug()).finish()
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index 61a60456145..3313da9dce7 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -24,7 +24,7 @@ use crate::slice::{self, SliceIndex};
 pub mod pattern;
 
 mod lossy;
-#[unstable(feature = "utf8_chunks", issue = "99543")]
+#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
 pub use lossy::{Utf8Chunk, Utf8Chunks};
 
 #[stable(feature = "rust1", since = "1.0.0")]
diff --git a/library/core/tests/lib.rs b/library/core/tests/lib.rs
index d6e705a37a7..8a35fdd1857 100644
--- a/library/core/tests/lib.rs
+++ b/library/core/tests/lib.rs
@@ -117,7 +117,6 @@
 #![feature(error_generic_member_access)]
 #![feature(error_in_core)]
 #![feature(trait_upcasting)]
-#![feature(utf8_chunks)]
 #![feature(is_ascii_octdigit)]
 #![feature(get_many_mut)]
 #![feature(iter_map_windows)]
diff --git a/library/core/tests/str_lossy.rs b/library/core/tests/str_lossy.rs
index 9d3f0b65fdb..6e70ea3e285 100644
--- a/library/core/tests/str_lossy.rs
+++ b/library/core/tests/str_lossy.rs
@@ -1,10 +1,8 @@
-use core::str::Utf8Chunks;
-
 #[test]
 fn chunks() {
     macro_rules! assert_chunks {
         ( $string:expr, $(($valid:expr, $invalid:expr)),* $(,)? ) => {{
-            let mut iter = Utf8Chunks::new($string);
+            let mut iter = $string.utf8_chunks();
             $(
                 let chunk = iter.next().expect("missing chunk");
                 assert_eq!($valid, chunk.valid());
@@ -79,7 +77,7 @@ fn debug() {
         "\"Hello\\xC0\\x80 There\\xE6\\x83 Goodbye\\u{10d4ea}\"",
         &format!(
             "{:?}",
-            Utf8Chunks::new(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa").debug(),
+            b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa".utf8_chunks().debug(),
         ),
     );
 }
diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs
index aa908f0499f..3d4f1872f38 100644
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@@ -314,7 +314,6 @@
 #![feature(thread_local)]
 #![feature(try_blocks)]
 #![feature(type_alias_impl_trait)]
-#![feature(utf8_chunks)]
 // tidy-alphabetical-end
 //
 // Library features (core):
diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs
index 4ca3f1cd185..9be02bc191e 100644
--- a/library/std/src/sys/os_str/bytes.rs
+++ b/library/std/src/sys/os_str/bytes.rs
@@ -11,8 +11,6 @@ use crate::str;
 use crate::sync::Arc;
 use crate::sys_common::{AsInner, IntoInner};
 
-use core::str::Utf8Chunks;
-
 #[cfg(test)]
 mod tests;
 
@@ -29,7 +27,7 @@ pub struct Slice {
 
 impl fmt::Debug for Slice {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt::Debug::fmt(&Utf8Chunks::new(&self.inner).debug(), f)
+        fmt::Debug::fmt(&self.inner.utf8_chunks().debug(), f)
     }
 }
 
@@ -41,7 +39,7 @@ impl fmt::Display for Slice {
             return "".fmt(f);
         }
 
-        for chunk in Utf8Chunks::new(&self.inner) {
+        for chunk in self.inner.utf8_chunks() {
             let valid = chunk.valid();
             // If we successfully decoded the whole chunk as a valid string then
             // we can return a direct formatting of the string which will also