diff options
| author | bors <bors@rust-lang.org> | 2017-12-18 02:54:11 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2017-12-18 02:54:11 +0000 |
| commit | a3a7203e2c9ed30a501da86f3fa1f9efe707ac94 (patch) | |
| tree | 862a6ca6053469cdac26ec03a14c02076945fd5a /src/libstd | |
| parent | dc39c31699a83313edf2ac096d0bf3cef871b705 (diff) | |
| parent | 8fac7d95bc2429ff2156bf1afcf8972f92cd6afd (diff) | |
| download | rust-a3a7203e2c9ed30a501da86f3fa1f9efe707ac94.tar.gz rust-a3a7203e2c9ed30a501da86f3fa1f9efe707ac94.zip | |
Auto merge of #46798 - Diggsey:debug-osstr, r=dtolnay
Add lossless debug implementation for unix OsStrs
Fixes #22766
Invalid utf8 byte sequences are replaced with `\xFF` style escape codes, while valid utf8 goes through the normal `Debug` implementation.
This is necessarily different from the windows Debug implementation, which uses `\u{xxxx}` style escape sequences for unpaired surrogates, but both implementations are consistent in that they are both lossless, and display invalid sequences in the way most similar to existing language syntax.
r? @dtolnay
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/sys/redox/os_str.rs | 3 | ||||
| -rw-r--r-- | src/libstd/sys/unix/os_str.rs | 3 | ||||
| -rw-r--r-- | src/libstd/sys/wasm/os_str.rs | 3 | ||||
| -rw-r--r-- | src/libstd/sys_common/bytestring.rs | 56 | ||||
| -rw-r--r-- | src/libstd/sys_common/mod.rs | 1 |
5 files changed, 63 insertions, 3 deletions
diff --git a/src/libstd/sys/redox/os_str.rs b/src/libstd/sys/redox/os_str.rs index 5c40d42fa0a..655bfdb9167 100644 --- a/src/libstd/sys/redox/os_str.rs +++ b/src/libstd/sys/redox/os_str.rs @@ -18,6 +18,7 @@ use mem; use rc::Rc; use sync::Arc; use sys_common::{AsInner, IntoInner}; +use sys_common::bytestring::debug_fmt_bytestring; use std_unicode::lossy::Utf8Lossy; #[derive(Clone, Hash)] @@ -31,7 +32,7 @@ pub struct Slice { impl fmt::Debug for Slice { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter) + debug_fmt_bytestring(&self.inner, formatter) } } diff --git a/src/libstd/sys/unix/os_str.rs b/src/libstd/sys/unix/os_str.rs index a27e76a0e3b..e0349387998 100644 --- a/src/libstd/sys/unix/os_str.rs +++ b/src/libstd/sys/unix/os_str.rs @@ -18,6 +18,7 @@ use mem; use rc::Rc; use sync::Arc; use sys_common::{AsInner, IntoInner}; +use sys_common::bytestring::debug_fmt_bytestring; use std_unicode::lossy::Utf8Lossy; #[derive(Clone, Hash)] @@ -31,7 +32,7 @@ pub struct Slice { impl fmt::Debug for Slice { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter) + debug_fmt_bytestring(&self.inner, formatter) } } diff --git a/src/libstd/sys/wasm/os_str.rs b/src/libstd/sys/wasm/os_str.rs index 0e64b5bc6b8..543c22ebe18 100644 --- a/src/libstd/sys/wasm/os_str.rs +++ b/src/libstd/sys/wasm/os_str.rs @@ -18,6 +18,7 @@ use mem; use rc::Rc; use sync::Arc; use sys_common::{AsInner, IntoInner}; +use sys_common::bytestring::debug_fmt_bytestring; use std_unicode::lossy::Utf8Lossy; #[derive(Clone, Hash)] @@ -31,7 +32,7 @@ pub struct Slice { impl fmt::Debug for Slice { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter) + debug_fmt_bytestring(&self.inner, formatter) } } diff --git a/src/libstd/sys_common/bytestring.rs b/src/libstd/sys_common/bytestring.rs new file mode 100644 index 00000000000..eb9cad09915 --- /dev/null +++ b/src/libstd/sys_common/bytestring.rs @@ -0,0 +1,56 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![allow(dead_code)] + +use fmt::{Formatter, Result, Write}; +use std_unicode::lossy::{Utf8Lossy, Utf8LossyChunk}; + +pub fn debug_fmt_bytestring(slice: &[u8], f: &mut Formatter) -> Result { + // Writes out a valid unicode string with the correct escape sequences + fn write_str_escaped(f: &mut Formatter, s: &str) -> Result { + for c in s.chars().flat_map(|c| c.escape_debug()) { + f.write_char(c)? + } + Ok(()) + } + + f.write_str("\"")?; + for Utf8LossyChunk { valid, broken } in Utf8Lossy::from_bytes(slice).chunks() { + write_str_escaped(f, valid)?; + for b in broken { + write!(f, "\\x{:02X}", b)?; + } + } + f.write_str("\"") +} + +#[cfg(test)] +mod tests { + use super::*; + use fmt::{Formatter, Result, Debug}; + + #[test] + fn smoke() { + struct Helper<'a>(&'a [u8]); + + impl<'a> Debug for Helper<'a> { + fn fmt(&self, f: &mut Formatter) -> Result { + debug_fmt_bytestring(self.0, f) + } + } + + let input = b"\xF0hello,\tworld"; + let expected = r#""\xF0hello,\tworld""#; + let output = format!("{:?}", Helper(input)); + + assert!(output == expected); + } +} diff --git a/src/libstd/sys_common/mod.rs b/src/libstd/sys_common/mod.rs index 14e5697b94e..5c4d7b52754 100644 --- a/src/libstd/sys_common/mod.rs +++ b/src/libstd/sys_common/mod.rs @@ -43,6 +43,7 @@ pub mod thread_info; pub mod thread_local; pub mod util; pub mod wtf8; +pub mod bytestring; cfg_if! { if #[cfg(any(target_os = "redox", target_os = "l4re"))] { |
