diff options
| author | bors <bors@rust-lang.org> | 2024-09-19 04:17:04 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2024-09-19 04:17:04 +0000 |
| commit | f8192ba0d00bbbc58a10c45823ba010e98ea4474 (patch) | |
| tree | de557973c439fe882636fe3d843e40666a6fdf52 | |
| parent | df7f77811c8806f85522a38878c57fde221138c9 (diff) | |
| parent | fb475e47594d089f1b670009ffcd38cea1544fb3 (diff) | |
| download | rust-f8192ba0d00bbbc58a10c45823ba010e98ea4474.tar.gz rust-f8192ba0d00bbbc58a10c45823ba010e98ea4474.zip | |
Auto merge of #130511 - bjoernager:const-char-encode-utf8, r=dtolnay
Support `char::encode_utf8` in const scenarios. This PR implements [`rust-lang/rfcs#3696`](https://github.com/rust-lang/rfcs/pull/3696/). This assumes [`const_slice_from_raw_parts_mut`](https://github.com/rust-lang/rust/issues/67456/).
| -rw-r--r-- | library/core/src/char/methods.rs | 32 | ||||
| -rw-r--r-- | library/core/src/lib.rs | 1 |
2 files changed, 15 insertions, 18 deletions
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index bc5c7c32490..fcaa91184d3 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -672,8 +672,9 @@ impl char { /// 'ß'.encode_utf8(&mut b); /// ``` #[stable(feature = "unicode_encode_char", since = "1.15.0")] + #[rustc_const_unstable(feature = "const_char_encode_utf8", issue = "130512")] #[inline] - pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str { + pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str { // SAFETY: `char` is not a surrogate, so this is valid UTF-8. unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) } } @@ -1735,14 +1736,11 @@ impl EscapeDebugExtArgs { #[inline] const fn len_utf8(code: u32) -> usize { - if code < MAX_ONE_B { - 1 - } else if code < MAX_TWO_B { - 2 - } else if code < MAX_THREE_B { - 3 - } else { - 4 + match code { + ..MAX_ONE_B => 1, + ..MAX_TWO_B => 2, + ..MAX_THREE_B => 3, + _ => 4, } } @@ -1760,11 +1758,12 @@ const fn len_utf8(code: u32) -> usize { /// Panics if the buffer is not large enough. /// A buffer of length four is large enough to encode any `char`. #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")] +#[rustc_const_unstable(feature = "const_char_encode_utf8", issue = "130512")] #[doc(hidden)] #[inline] -pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] { +pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] { let len = len_utf8(code); - match (len, &mut dst[..]) { + match (len, &mut *dst) { (1, [a, ..]) => { *a = code as u8; } @@ -1783,14 +1782,11 @@ pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] { *c = (code >> 6 & 0x3F) as u8 | TAG_CONT; *d = (code & 0x3F) as u8 | TAG_CONT; } - _ => panic!( - "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", - len, - code, - dst.len(), - ), + // Note that we cannot format in constant expressions. + _ => panic!("encode_utf8: buffer does not have enough bytes to encode code point"), }; - &mut dst[..len] + // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds. + unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) } } /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer, diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 348ccebea3b..759d017eb2f 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -119,6 +119,7 @@ #![feature(const_bigint_helper_methods)] #![feature(const_black_box)] #![feature(const_cell_into_inner)] +#![feature(const_char_encode_utf8)] #![feature(const_eval_select)] #![feature(const_exact_div)] #![feature(const_float_classify)] |
