diff options
| author | Ralf Jung <post@ralfj.de> | 2020-05-30 12:08:55 +0200 |
|---|---|---|
| committer | Ralf Jung <post@ralfj.de> | 2020-05-30 12:11:21 +0200 |
| commit | 9c627c33dde998cfe42bcde07e1c5692370daf63 (patch) | |
| tree | 59cd9bb446c94b2a8003d4a18df605b99553c2e1 | |
| parent | 3182cdf9baf8ed9e8ae24f4742ee5d3d01c2b54a (diff) | |
| download | rust-9c627c33dde998cfe42bcde07e1c5692370daf63.tar.gz rust-9c627c33dde998cfe42bcde07e1c5692370daf63.zip | |
also expose and use encode_utf16_raw for wtf8
| -rw-r--r-- | src/libcore/char/methods.rs | 59 | ||||
| -rw-r--r-- | src/libcore/char/mod.rs | 2 | ||||
| -rw-r--r-- | src/libstd/sys_common/wtf8.rs | 3 |
3 files changed, 40 insertions, 24 deletions
diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs index 112e7e38e41..b1b3c70efb1 100644 --- a/src/libcore/char/methods.rs +++ b/src/libcore/char/methods.rs @@ -701,28 +701,7 @@ impl char { #[stable(feature = "unicode_encode_char", since = "1.15.0")] #[inline] pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { - let mut code = self as u32; - // SAFETY: each arm checks whether there are enough bits to write into - unsafe { - if (code & 0xFFFF) == code && !dst.is_empty() { - // The BMP falls through (assuming non-surrogate, as it should) - *dst.get_unchecked_mut(0) = code as u16; - slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) - } else if dst.len() >= 2 { - // Supplementary planes break into surrogates. - code -= 0x1_0000; - *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); - *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); - slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) - } else { - panic!( - "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", - from_u32_unchecked(code).len_utf16(), - code, - dst.len(), - ) - } - } + encode_utf16_raw(self as u32, dst) } /// Returns `true` if this `char` has the `Alphabetic` property. @@ -1692,3 +1671,39 @@ pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut str { // SAFETY: We just wrote UTF-8 content in, so converting to str is fine. unsafe { from_utf8_unchecked_mut(&mut dst[..len]) } } + +/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer, +/// and then returns the subslice of the buffer that contains the encoded character. +/// +/// Unlike `char::encode_utf16`, this method can be called on codepoints in the surrogate range. +/// +/// # Panics +/// +/// Panics if the buffer is not large enough. +/// A buffer of length 2 is large enough to encode any `char`. +#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")] +#[doc(hidden)] +#[inline] +pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] { + // SAFETY: each arm checks whether there are enough bits to write into + unsafe { + if (code & 0xFFFF) == code && !dst.is_empty() { + // The BMP falls through (assuming non-surrogate, as it should) + *dst.get_unchecked_mut(0) = code as u16; + slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) + } else if dst.len() >= 2 { + // Supplementary planes break into surrogates. + code -= 0x1_0000; + *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); + *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); + slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) + } else { + panic!( + "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", + from_u32_unchecked(code).len_utf16(), + code, + dst.len(), + ) + } + } +} diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs index 40b429b7496..1b4e906e4e4 100644 --- a/src/libcore/char/mod.rs +++ b/src/libcore/char/mod.rs @@ -39,6 +39,8 @@ pub use crate::unicode::UNICODE_VERSION; // perma-unstable re-exports #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")] +pub use self::methods::encode_utf16_raw; +#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")] pub use self::methods::encode_utf8_raw; use crate::fmt::{self, Write}; diff --git a/src/libstd/sys_common/wtf8.rs b/src/libstd/sys_common/wtf8.rs index 90bbf4afd1a..9f589c93ae5 100644 --- a/src/libstd/sys_common/wtf8.rs +++ b/src/libstd/sys_common/wtf8.rs @@ -828,8 +828,7 @@ impl<'a> Iterator for EncodeWide<'a> { let mut buf = [0; 2]; self.code_points.next().map(|code_point| { - let c = unsafe { char::from_u32_unchecked(code_point.value) }; - let n = c.encode_utf16(&mut buf).len(); + let n = char::encode_utf16_raw(code_point.value, &mut buf).len(); if n == 2 { self.extra = buf[1]; } |
