about summary refs log tree commit diff
path: root/library/std/src
diff options
context:
space:
mode:
authorMatthias Krüger <matthias.krueger@famsik.de>2022-08-20 19:32:07 +0200
committerGitHub <noreply@github.com>2022-08-20 19:32:07 +0200
commitd49906519be1e071f771b1126f13d41e8106a24e (patch)
treefba3afb7236211e2b40b2064720cac75ddabe0a1 /library/std/src
parent2be85b0d03d4650a3f6acec6322faeee3556d5fd (diff)
parente8ee0b7b2b9f004c51f7a10eeff0fbfe79af28d8 (diff)
downloadrust-d49906519be1e071f771b1126f13d41e8106a24e.tar.gz
rust-d49906519be1e071f771b1126f13d41e8106a24e.zip
Rollup merge of #99544 - dylni:expose-utf8lossy, r=Mark-Simulacrum
Expose `Utf8Lossy` as `Utf8Chunks`

This PR changes the feature for `Utf8Lossy` from `str_internals` to `utf8_lossy` and improves the API. This is done to eventually expose the API as stable.

Proposal: rust-lang/libs-team#54
Tracking Issue: #99543
Diffstat (limited to 'library/std/src')
-rw-r--r--library/std/src/lib.rs1
-rw-r--r--library/std/src/sys/unix/os_str.rs40
-rw-r--r--library/std/src/sys/unix/os_str/tests.rs8
3 files changed, 32 insertions, 17 deletions
diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs
index a8d6645794a..8627be85422 100644
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@@ -258,6 +258,7 @@
 #![feature(staged_api)]
 #![feature(thread_local)]
 #![feature(try_blocks)]
+#![feature(utf8_chunks)]
 //
 // Library features (core):
 #![feature(array_error_internals)]
diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs
index ccbc182240c..017e2af29d4 100644
--- a/library/std/src/sys/unix/os_str.rs
+++ b/library/std/src/sys/unix/os_str.rs
@@ -11,7 +11,7 @@ use crate::str;
 use crate::sync::Arc;
 use crate::sys_common::{AsInner, IntoInner};
 
-use core::str::lossy::{Utf8Lossy, Utf8LossyChunk};
+use core::str::Utf8Chunks;
 
 #[cfg(test)]
 #[path = "../unix/os_str/tests.rs"]
@@ -29,26 +29,32 @@ pub struct Slice {
 }
 
 impl fmt::Debug for Slice {
-    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
-        // Writes out a valid unicode string with the correct escape sequences
-
-        formatter.write_str("\"")?;
-        for Utf8LossyChunk { valid, broken } in Utf8Lossy::from_bytes(&self.inner).chunks() {
-            for c in valid.chars().flat_map(|c| c.escape_debug()) {
-                formatter.write_char(c)?
-            }
-
-            for b in broken {
-                write!(formatter, "\\x{:02X}", b)?;
-            }
-        }
-        formatter.write_str("\"")
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Debug::fmt(&Utf8Chunks::new(&self.inner).debug(), f)
     }
 }
 
 impl fmt::Display for Slice {
-    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
-        fmt::Display::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // If we're the empty string then our iterator won't actually yield
+        // anything, so perform the formatting manually
+        if self.inner.is_empty() {
+            return "".fmt(f);
+        }
+
+        for chunk in Utf8Chunks::new(&self.inner) {
+            let valid = chunk.valid();
+            // If we successfully decoded the whole chunk as a valid string then
+            // we can return a direct formatting of the string which will also
+            // respect various formatting flags if possible.
+            if chunk.invalid().is_empty() {
+                return valid.fmt(f);
+            }
+
+            f.write_str(valid)?;
+            f.write_char(char::REPLACEMENT_CHARACTER)?;
+        }
+        Ok(())
     }
 }
 
diff --git a/library/std/src/sys/unix/os_str/tests.rs b/library/std/src/sys/unix/os_str/tests.rs
index 213277f01f2..22ba0c92350 100644
--- a/library/std/src/sys/unix/os_str/tests.rs
+++ b/library/std/src/sys/unix/os_str/tests.rs
@@ -8,3 +8,11 @@ fn slice_debug_output() {
 
     assert_eq!(output, expected);
 }
+
+#[test]
+fn display() {
+    assert_eq!(
+        "Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye",
+        Slice::from_u8_slice(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string(),
+    );
+}