about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-09-19 04:17:04 +0000
committerbors <bors@rust-lang.org>2024-09-19 04:17:04 +0000
commitf8192ba0d00bbbc58a10c45823ba010e98ea4474 (patch)
treede557973c439fe882636fe3d843e40666a6fdf52
parentdf7f77811c8806f85522a38878c57fde221138c9 (diff)
parentfb475e47594d089f1b670009ffcd38cea1544fb3 (diff)
downloadrust-f8192ba0d00bbbc58a10c45823ba010e98ea4474.tar.gz
rust-f8192ba0d00bbbc58a10c45823ba010e98ea4474.zip
Auto merge of #130511 - bjoernager:const-char-encode-utf8, r=dtolnay
Support `char::encode_utf8` in const scenarios.

This PR implements [`rust-lang/rfcs#3696`](https://github.com/rust-lang/rfcs/pull/3696/).

This assumes [`const_slice_from_raw_parts_mut`](https://github.com/rust-lang/rust/issues/67456/).
-rw-r--r--library/core/src/char/methods.rs32
-rw-r--r--library/core/src/lib.rs1
2 files changed, 15 insertions, 18 deletions
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index bc5c7c32490..fcaa91184d3 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -672,8 +672,9 @@ impl char {
     /// 'ß'.encode_utf8(&mut b);
     /// ```
     #[stable(feature = "unicode_encode_char", since = "1.15.0")]
+    #[rustc_const_unstable(feature = "const_char_encode_utf8", issue = "130512")]
     #[inline]
-    pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
+    pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
         // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
         unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
     }
@@ -1735,14 +1736,11 @@ impl EscapeDebugExtArgs {
 
 #[inline]
 const fn len_utf8(code: u32) -> usize {
-    if code < MAX_ONE_B {
-        1
-    } else if code < MAX_TWO_B {
-        2
-    } else if code < MAX_THREE_B {
-        3
-    } else {
-        4
+    match code {
+        ..MAX_ONE_B => 1,
+        ..MAX_TWO_B => 2,
+        ..MAX_THREE_B => 3,
+        _ => 4,
     }
 }
 
@@ -1760,11 +1758,12 @@ const fn len_utf8(code: u32) -> usize {
 /// Panics if the buffer is not large enough.
 /// A buffer of length four is large enough to encode any `char`.
 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
+#[rustc_const_unstable(feature = "const_char_encode_utf8", issue = "130512")]
 #[doc(hidden)]
 #[inline]
-pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
+pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
     let len = len_utf8(code);
-    match (len, &mut dst[..]) {
+    match (len, &mut *dst) {
         (1, [a, ..]) => {
             *a = code as u8;
         }
@@ -1783,14 +1782,11 @@ pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
             *c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
             *d = (code & 0x3F) as u8 | TAG_CONT;
         }
-        _ => panic!(
-            "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
-            len,
-            code,
-            dst.len(),
-        ),
+        // Note that we cannot format in constant expressions.
+        _ => panic!("encode_utf8: buffer does not have enough bytes to encode code point"),
     };
-    &mut dst[..len]
+    // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
+    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
 }
 
 /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs
index 348ccebea3b..759d017eb2f 100644
--- a/library/core/src/lib.rs
+++ b/library/core/src/lib.rs
@@ -119,6 +119,7 @@
 #![feature(const_bigint_helper_methods)]
 #![feature(const_black_box)]
 #![feature(const_cell_into_inner)]
+#![feature(const_char_encode_utf8)]
 #![feature(const_eval_select)]
 #![feature(const_exact_div)]
 #![feature(const_float_classify)]