also expose and use encode_utf16_raw for wtf8

author: Ralf Jung <post@ralfj.de> 2020-05-30 12:08:55 +0200
committer: Ralf Jung <post@ralfj.de> 2020-05-30 12:11:21 +0200
commit: 9c627c33dde998cfe42bcde07e1c5692370daf63 (patch)
tree: 59cd9bb446c94b2a8003d4a18df605b99553c2e1
parent: 3182cdf9baf8ed9e8ae24f4742ee5d3d01c2b54a (diff)
download: rust-9c627c33dde998cfe42bcde07e1c5692370daf63.tar.gz
rust-9c627c33dde998cfe42bcde07e1c5692370daf63.zip
3 files changed, 40 insertions, 24 deletions
diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs
index 112e7e38e41..b1b3c70efb1 100644
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@@ -701,28 +701,7 @@ impl char {
     #[stable(feature = "unicode_encode_char", since = "1.15.0")]
     #[inline]
     pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
-        let mut code = self as u32;
-        // SAFETY: each arm checks whether there are enough bits to write into
-        unsafe {
-            if (code & 0xFFFF) == code && !dst.is_empty() {
-                // The BMP falls through (assuming non-surrogate, as it should)
-                *dst.get_unchecked_mut(0) = code as u16;
-                slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
-            } else if dst.len() >= 2 {
-                // Supplementary planes break into surrogates.
-                code -= 0x1_0000;
-                *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
-                *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
-                slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
-            } else {
-                panic!(
-                    "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
-                    from_u32_unchecked(code).len_utf16(),
-                    code,
-                    dst.len(),
-                )
-            }
-        }
+        encode_utf16_raw(self as u32, dst)
     }
 
     /// Returns `true` if this `char` has the `Alphabetic` property.
@@ -1692,3 +1671,39 @@ pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut str {
     // SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
     unsafe { from_utf8_unchecked_mut(&mut dst[..len]) }
 }
+
+/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
+/// and then returns the subslice of the buffer that contains the encoded character.
+///
+/// Unlike `char::encode_utf16`, this method can be called on codepoints in the surrogate range.
+///
+/// # Panics
+///
+/// Panics if the buffer is not large enough.
+/// A buffer of length 2 is large enough to encode any `char`.
+#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
+#[doc(hidden)]
+#[inline]
+pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
+    // SAFETY: each arm checks whether there are enough bits to write into
+    unsafe {
+        if (code & 0xFFFF) == code && !dst.is_empty() {
+            // The BMP falls through (assuming non-surrogate, as it should)
+            *dst.get_unchecked_mut(0) = code as u16;
+            slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
+        } else if dst.len() >= 2 {
+            // Supplementary planes break into surrogates.
+            code -= 0x1_0000;
+            *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
+            *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
+            slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
+        } else {
+            panic!(
+                "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
+                from_u32_unchecked(code).len_utf16(),
+                code,
+                dst.len(),
+            )
+        }
+    }
+}
diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs
index 40b429b7496..1b4e906e4e4 100644
--- a/src/libcore/char/mod.rs
+++ b/src/libcore/char/mod.rs
@@ -39,6 +39,8 @@ pub use crate::unicode::UNICODE_VERSION;
 
 // perma-unstable re-exports
 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
+pub use self::methods::encode_utf16_raw;
+#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
 pub use self::methods::encode_utf8_raw;
 
 use crate::fmt::{self, Write};
diff --git a/src/libstd/sys_common/wtf8.rs b/src/libstd/sys_common/wtf8.rs
index 90bbf4afd1a..9f589c93ae5 100644
--- a/src/libstd/sys_common/wtf8.rs
+++ b/src/libstd/sys_common/wtf8.rs
@@ -828,8 +828,7 @@ impl<'a> Iterator for EncodeWide<'a> {
 
         let mut buf = [0; 2];
         self.code_points.next().map(|code_point| {
-            let c = unsafe { char::from_u32_unchecked(code_point.value) };
-            let n = c.encode_utf16(&mut buf).len();
+            let n = char::encode_utf16_raw(code_point.value, &mut buf).len();
             if n == 2 {
                 self.extra = buf[1];
             }
author	Ralf Jung <post@ralfj.de>	2020-05-30 12:08:55 +0200
committer	Ralf Jung <post@ralfj.de>	2020-05-30 12:11:21 +0200
commit	9c627c33dde998cfe42bcde07e1c5692370daf63 (patch)
tree	59cd9bb446c94b2a8003d4a18df605b99553c2e1
parent	3182cdf9baf8ed9e8ae24f4742ee5d3d01c2b54a (diff)
download	rust-9c627c33dde998cfe42bcde07e1c5692370daf63.tar.gz rust-9c627c33dde998cfe42bcde07e1c5692370daf63.zip