diff options
Diffstat (limited to 'library/std/src/sys_common/wtf8.rs')
| -rw-r--r-- | library/std/src/sys_common/wtf8.rs | 43 |
1 files changed, 31 insertions, 12 deletions
diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index dd53767d452..67db5ebd89c 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -18,10 +18,10 @@ #[cfg(test)] mod tests; +use core::char::{encode_utf16_raw, encode_utf8_raw}; use core::str::next_code_point; use crate::borrow::Cow; -use crate::char; use crate::collections::TryReserveError; use crate::fmt; use crate::hash::{Hash, Hasher}; @@ -182,6 +182,15 @@ impl Wtf8Buf { Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true } } + /// Creates a WTF-8 string from a WTF-8 byte vec. + /// + /// Since the byte vec is not checked for valid WTF-8, this functions is + /// marked unsafe. + #[inline] + pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf { + Wtf8Buf { bytes: value, is_known_utf8: false } + } + /// Creates a WTF-8 string from a UTF-8 `String`. /// /// This takes ownership of the `String` and does not copy. @@ -235,7 +244,7 @@ impl Wtf8Buf { /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. fn push_code_point_unchecked(&mut self, code_point: CodePoint) { let mut bytes = [0; 4]; - let bytes = char::encode_utf8_raw(code_point.value, &mut bytes); + let bytes = encode_utf8_raw(code_point.value, &mut bytes); self.bytes.extend_from_slice(bytes) } @@ -402,6 +411,12 @@ impl Wtf8Buf { self.bytes.truncate(new_len) } + /// Consumes the WTF-8 string and tries to convert it to a vec of bytes. + #[inline] + pub fn into_bytes(self) -> Vec<u8> { + self.bytes + } + /// Consumes the WTF-8 string and tries to convert it to UTF-8. /// /// This does not copy the data. @@ -444,6 +459,7 @@ impl Wtf8Buf { /// Converts this `Wtf8Buf` into a boxed `Wtf8`. #[inline] pub fn into_box(self) -> Box<Wtf8> { + // SAFETY: relies on `Wtf8` being `repr(transparent)`. unsafe { mem::transmute(self.bytes.into_boxed_slice()) } } @@ -496,11 +512,13 @@ impl Extend<CodePoint> for Wtf8Buf { /// Similar to `&str`, but can additionally contain surrogate code points /// if they’re not in a surrogate pair. #[derive(Eq, Ord, PartialEq, PartialOrd)] +#[repr(transparent)] pub struct Wtf8 { bytes: [u8], } impl AsInner<[u8]> for Wtf8 { + #[inline] fn as_inner(&self) -> &[u8] { &self.bytes } @@ -569,7 +587,7 @@ impl Wtf8 { /// Since the byte slice is not checked for valid WTF-8, this functions is /// marked unsafe. #[inline] - unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 { + pub unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 { mem::transmute(value) } @@ -594,7 +612,7 @@ impl Wtf8 { } /// Returns the code point at `position` if it is in the ASCII range, - /// or `b'\xFF' otherwise. + /// or `b'\xFF'` otherwise. /// /// # Panics /// @@ -613,19 +631,20 @@ impl Wtf8 { Wtf8CodePoints { bytes: self.bytes.iter() } } + /// Access raw bytes of WTF-8 data + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.bytes + } + /// Tries to convert the string to UTF-8 and return a `&str` slice. /// /// Returns `None` if the string contains surrogates. /// /// This does not copy the data. #[inline] - pub fn as_str(&self) -> Option<&str> { - // Well-formed WTF-8 is also well-formed UTF-8 - // if and only if it contains no surrogate. - match self.next_surrogate(0) { - None => Some(unsafe { str::from_utf8_unchecked(&self.bytes) }), - Some(_) => None, - } + pub fn as_str(&self) -> Result<&str, str::Utf8Error> { + str::from_utf8(&self.bytes) } /// Creates an owned `Wtf8Buf` from a borrowed `Wtf8`. @@ -939,7 +958,7 @@ impl<'a> Iterator for EncodeWide<'a> { let mut buf = [0; 2]; self.code_points.next().map(|code_point| { - let n = char::encode_utf16_raw(code_point.value, &mut buf).len(); + let n = encode_utf16_raw(code_point.value, &mut buf).len(); if n == 2 { self.extra = buf[1]; } |
