about summary refs log tree commit diff
path: root/library/std/src/sys_common/wtf8.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/std/src/sys_common/wtf8.rs')
-rw-r--r--library/std/src/sys_common/wtf8.rs43
1 files changed, 31 insertions, 12 deletions
diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs
index dd53767d452..67db5ebd89c 100644
--- a/library/std/src/sys_common/wtf8.rs
+++ b/library/std/src/sys_common/wtf8.rs
@@ -18,10 +18,10 @@
 #[cfg(test)]
 mod tests;
 
+use core::char::{encode_utf16_raw, encode_utf8_raw};
 use core::str::next_code_point;
 
 use crate::borrow::Cow;
-use crate::char;
 use crate::collections::TryReserveError;
 use crate::fmt;
 use crate::hash::{Hash, Hasher};
@@ -182,6 +182,15 @@ impl Wtf8Buf {
         Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true }
     }
 
+    /// Creates a WTF-8 string from a WTF-8 byte vec.
+    ///
+    /// Since the byte vec is not checked for valid WTF-8, this functions is
+    /// marked unsafe.
+    #[inline]
+    pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf {
+        Wtf8Buf { bytes: value, is_known_utf8: false }
+    }
+
     /// Creates a WTF-8 string from a UTF-8 `String`.
     ///
     /// This takes ownership of the `String` and does not copy.
@@ -235,7 +244,7 @@ impl Wtf8Buf {
     /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
     fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
         let mut bytes = [0; 4];
-        let bytes = char::encode_utf8_raw(code_point.value, &mut bytes);
+        let bytes = encode_utf8_raw(code_point.value, &mut bytes);
         self.bytes.extend_from_slice(bytes)
     }
 
@@ -402,6 +411,12 @@ impl Wtf8Buf {
         self.bytes.truncate(new_len)
     }
 
+    /// Consumes the WTF-8 string and tries to convert it to a vec of bytes.
+    #[inline]
+    pub fn into_bytes(self) -> Vec<u8> {
+        self.bytes
+    }
+
     /// Consumes the WTF-8 string and tries to convert it to UTF-8.
     ///
     /// This does not copy the data.
@@ -444,6 +459,7 @@ impl Wtf8Buf {
     /// Converts this `Wtf8Buf` into a boxed `Wtf8`.
     #[inline]
     pub fn into_box(self) -> Box<Wtf8> {
+        // SAFETY: relies on `Wtf8` being `repr(transparent)`.
         unsafe { mem::transmute(self.bytes.into_boxed_slice()) }
     }
 
@@ -496,11 +512,13 @@ impl Extend<CodePoint> for Wtf8Buf {
 /// Similar to `&str`, but can additionally contain surrogate code points
 /// if they’re not in a surrogate pair.
 #[derive(Eq, Ord, PartialEq, PartialOrd)]
+#[repr(transparent)]
 pub struct Wtf8 {
     bytes: [u8],
 }
 
 impl AsInner<[u8]> for Wtf8 {
+    #[inline]
     fn as_inner(&self) -> &[u8] {
         &self.bytes
     }
@@ -569,7 +587,7 @@ impl Wtf8 {
     /// Since the byte slice is not checked for valid WTF-8, this functions is
     /// marked unsafe.
     #[inline]
-    unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
+    pub unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
         mem::transmute(value)
     }
 
@@ -594,7 +612,7 @@ impl Wtf8 {
     }
 
     /// Returns the code point at `position` if it is in the ASCII range,
-    /// or `b'\xFF' otherwise.
+    /// or `b'\xFF'` otherwise.
     ///
     /// # Panics
     ///
@@ -613,19 +631,20 @@ impl Wtf8 {
         Wtf8CodePoints { bytes: self.bytes.iter() }
     }
 
+    /// Access raw bytes of WTF-8 data
+    #[inline]
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.bytes
+    }
+
     /// Tries to convert the string to UTF-8 and return a `&str` slice.
     ///
     /// Returns `None` if the string contains surrogates.
     ///
     /// This does not copy the data.
     #[inline]
-    pub fn as_str(&self) -> Option<&str> {
-        // Well-formed WTF-8 is also well-formed UTF-8
-        // if and only if it contains no surrogate.
-        match self.next_surrogate(0) {
-            None => Some(unsafe { str::from_utf8_unchecked(&self.bytes) }),
-            Some(_) => None,
-        }
+    pub fn as_str(&self) -> Result<&str, str::Utf8Error> {
+        str::from_utf8(&self.bytes)
     }
 
     /// Creates an owned `Wtf8Buf` from a borrowed `Wtf8`.
@@ -939,7 +958,7 @@ impl<'a> Iterator for EncodeWide<'a> {
 
         let mut buf = [0; 2];
         self.code_points.next().map(|code_point| {
-            let n = char::encode_utf16_raw(code_point.value, &mut buf).len();
+            let n = encode_utf16_raw(code_point.value, &mut buf).len();
             if n == 2 {
                 self.extra = buf[1];
             }