diff options
| author | Stepan Koltsov <stepan.koltsov@gmail.com> | 2017-06-12 22:21:53 +0300 |
|---|---|---|
| committer | Stepan Koltsov <stepan.koltsov@gmail.com> | 2017-06-15 20:42:37 +0100 |
| commit | ac96fd77874bb7968b7d82bed5a410d3941bc4c8 (patch) | |
| tree | 57bf556685e5b2c29ef59dd13fb3b45177b66054 /src/libstd/sys_common | |
| parent | ea149b8571d538fc8bb2117e46161d442aef48a4 (diff) | |
| download | rust-ac96fd77874bb7968b7d82bed5a410d3941bc4c8.tar.gz rust-ac96fd77874bb7968b7d82bed5a410d3941bc4c8.zip | |
Avoid allocations in Debug for os_str
Fixes #38879
Diffstat (limited to 'src/libstd/sys_common')
| -rw-r--r-- | src/libstd/sys_common/wtf8.rs | 46 |
1 files changed, 42 insertions, 4 deletions
diff --git a/src/libstd/sys_common/wtf8.rs b/src/libstd/sys_common/wtf8.rs index df5e4ef1d88..4e4a6e77d12 100644 --- a/src/libstd/sys_common/wtf8.rs +++ b/src/libstd/sys_common/wtf8.rs @@ -39,7 +39,7 @@ use slice; use str; use sys_common::AsInner; -const UTF8_REPLACEMENT_CHARACTER: &'static [u8] = b"\xEF\xBF\xBD"; +const UTF8_REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}"; /// A Unicode code point: from U+0000 to U+10FFFF. /// @@ -339,7 +339,7 @@ impl Wtf8Buf { Some((surrogate_pos, _)) => { pos = surrogate_pos + 3; self.bytes[surrogate_pos..pos] - .copy_from_slice(UTF8_REPLACEMENT_CHARACTER); + .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); }, None => return unsafe { String::from_utf8_unchecked(self.bytes) } } @@ -438,6 +438,30 @@ impl fmt::Debug for Wtf8 { } } +impl fmt::Display for Wtf8 { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let wtf8_bytes = &self.bytes; + let mut pos = 0; + loop { + match self.next_surrogate(pos) { + Some((surrogate_pos, _)) => { + formatter.write_str(unsafe { + str::from_utf8_unchecked(&wtf8_bytes[pos .. surrogate_pos]) + })?; + formatter.write_str(UTF8_REPLACEMENT_CHARACTER)?; + pos = surrogate_pos + 3; + }, + None => { + formatter.write_str(unsafe { + str::from_utf8_unchecked(&wtf8_bytes[pos..]) + })?; + return Ok(()); + } + } + } + } +} + impl Wtf8 { /// Creates a WTF-8 slice from a UTF-8 `&str` slice. /// @@ -516,13 +540,13 @@ impl Wtf8 { let wtf8_bytes = &self.bytes; let mut utf8_bytes = Vec::with_capacity(self.len()); utf8_bytes.extend_from_slice(&wtf8_bytes[..surrogate_pos]); - utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER); + utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); let mut pos = surrogate_pos + 3; loop { match self.next_surrogate(pos) { Some((surrogate_pos, _)) => { utf8_bytes.extend_from_slice(&wtf8_bytes[pos .. surrogate_pos]); - utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER); + utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); pos = surrogate_pos + 3; }, None => { @@ -1201,6 +1225,20 @@ mod tests { } #[test] + fn wtf8_display() { + fn d(b: &[u8]) -> String { + format!("{}", &unsafe { Wtf8::from_bytes_unchecked(b) }) + } + + assert_eq!("", d("".as_bytes())); + assert_eq!("aé 💩", d("aé 💩".as_bytes())); + + let mut string = Wtf8Buf::from_str("aé 💩"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!("aé 💩�", d(string.as_inner())); + } + + #[test] fn wtf8_encode_wide() { let mut string = Wtf8Buf::from_str("aé "); string.push(CodePoint::from_u32(0xD83D).unwrap()); |
