about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2017-12-18 02:54:11 +0000
committerbors <bors@rust-lang.org>2017-12-18 02:54:11 +0000
commita3a7203e2c9ed30a501da86f3fa1f9efe707ac94 (patch)
tree862a6ca6053469cdac26ec03a14c02076945fd5a /src/libstd
parentdc39c31699a83313edf2ac096d0bf3cef871b705 (diff)
parent8fac7d95bc2429ff2156bf1afcf8972f92cd6afd (diff)
downloadrust-a3a7203e2c9ed30a501da86f3fa1f9efe707ac94.tar.gz
rust-a3a7203e2c9ed30a501da86f3fa1f9efe707ac94.zip
Auto merge of #46798 - Diggsey:debug-osstr, r=dtolnay
Add lossless debug implementation for unix OsStrs

Fixes #22766

Invalid utf8 byte sequences are replaced with `\xFF` style escape codes, while valid utf8 goes through the normal `Debug` implementation.

This is necessarily different from the windows Debug implementation, which uses `\u{xxxx}` style escape sequences for unpaired surrogates, but both implementations are consistent in that they are both lossless, and display invalid sequences in the way most similar to existing language syntax.

r? @dtolnay
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/sys/redox/os_str.rs3
-rw-r--r--src/libstd/sys/unix/os_str.rs3
-rw-r--r--src/libstd/sys/wasm/os_str.rs3
-rw-r--r--src/libstd/sys_common/bytestring.rs56
-rw-r--r--src/libstd/sys_common/mod.rs1
5 files changed, 63 insertions, 3 deletions
diff --git a/src/libstd/sys/redox/os_str.rs b/src/libstd/sys/redox/os_str.rs
index 5c40d42fa0a..655bfdb9167 100644
--- a/src/libstd/sys/redox/os_str.rs
+++ b/src/libstd/sys/redox/os_str.rs
@@ -18,6 +18,7 @@ use mem;
 use rc::Rc;
 use sync::Arc;
 use sys_common::{AsInner, IntoInner};
+use sys_common::bytestring::debug_fmt_bytestring;
 use std_unicode::lossy::Utf8Lossy;
 
 #[derive(Clone, Hash)]
@@ -31,7 +32,7 @@ pub struct Slice {
 
 impl fmt::Debug for Slice {
     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
-        fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
+        debug_fmt_bytestring(&self.inner, formatter)
     }
 }
 
diff --git a/src/libstd/sys/unix/os_str.rs b/src/libstd/sys/unix/os_str.rs
index a27e76a0e3b..e0349387998 100644
--- a/src/libstd/sys/unix/os_str.rs
+++ b/src/libstd/sys/unix/os_str.rs
@@ -18,6 +18,7 @@ use mem;
 use rc::Rc;
 use sync::Arc;
 use sys_common::{AsInner, IntoInner};
+use sys_common::bytestring::debug_fmt_bytestring;
 use std_unicode::lossy::Utf8Lossy;
 
 #[derive(Clone, Hash)]
@@ -31,7 +32,7 @@ pub struct Slice {
 
 impl fmt::Debug for Slice {
     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
-        fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
+        debug_fmt_bytestring(&self.inner, formatter)
     }
 }
 
diff --git a/src/libstd/sys/wasm/os_str.rs b/src/libstd/sys/wasm/os_str.rs
index 0e64b5bc6b8..543c22ebe18 100644
--- a/src/libstd/sys/wasm/os_str.rs
+++ b/src/libstd/sys/wasm/os_str.rs
@@ -18,6 +18,7 @@ use mem;
 use rc::Rc;
 use sync::Arc;
 use sys_common::{AsInner, IntoInner};
+use sys_common::bytestring::debug_fmt_bytestring;
 use std_unicode::lossy::Utf8Lossy;
 
 #[derive(Clone, Hash)]
@@ -31,7 +32,7 @@ pub struct Slice {
 
 impl fmt::Debug for Slice {
     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
-        fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
+        debug_fmt_bytestring(&self.inner, formatter)
     }
 }
 
diff --git a/src/libstd/sys_common/bytestring.rs b/src/libstd/sys_common/bytestring.rs
new file mode 100644
index 00000000000..eb9cad09915
--- /dev/null
+++ b/src/libstd/sys_common/bytestring.rs
@@ -0,0 +1,56 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![allow(dead_code)]
+
+use fmt::{Formatter, Result, Write};
+use std_unicode::lossy::{Utf8Lossy, Utf8LossyChunk};
+
+pub fn debug_fmt_bytestring(slice: &[u8], f: &mut Formatter) -> Result {
+    // Writes out a valid unicode string with the correct escape sequences
+    fn write_str_escaped(f: &mut Formatter, s: &str) -> Result {
+        for c in s.chars().flat_map(|c| c.escape_debug()) {
+            f.write_char(c)?
+        }
+        Ok(())
+    }
+
+    f.write_str("\"")?;
+    for Utf8LossyChunk { valid, broken } in Utf8Lossy::from_bytes(slice).chunks() {
+        write_str_escaped(f, valid)?;
+        for b in broken {
+            write!(f, "\\x{:02X}", b)?;
+        }
+    }
+    f.write_str("\"")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use fmt::{Formatter, Result, Debug};
+
+    #[test]
+    fn smoke() {
+        struct Helper<'a>(&'a [u8]);
+
+        impl<'a> Debug for Helper<'a> {
+            fn fmt(&self, f: &mut Formatter) -> Result {
+                debug_fmt_bytestring(self.0, f)
+            }
+        }
+
+        let input =      b"\xF0hello,\tworld";
+        let expected = r#""\xF0hello,\tworld""#;
+        let output = format!("{:?}", Helper(input));
+
+        assert!(output == expected);
+    }
+}
diff --git a/src/libstd/sys_common/mod.rs b/src/libstd/sys_common/mod.rs
index 14e5697b94e..5c4d7b52754 100644
--- a/src/libstd/sys_common/mod.rs
+++ b/src/libstd/sys_common/mod.rs
@@ -43,6 +43,7 @@ pub mod thread_info;
 pub mod thread_local;
 pub mod util;
 pub mod wtf8;
+pub mod bytestring;
 
 cfg_if! {
     if #[cfg(any(target_os = "redox", target_os = "l4re"))] {