Rollup merge of #67569 - Mark-Simulacrum:opt-char-encode, r=oli-obk

Clean up unsafety in char::encode_utf8 This originally started as an attempt to allow LLVM to optimize through encode_utf8 to detect the try_encode_utf8 case (#52579, #52580), but due to a typo my conclusion that my optimizations were successful was incorrect. Furthermore, as far as I can tell, this optimization is probably just not possible with LLVM today. This [code](https://rust.godbolt.org/z/JggRj4) compiles down to a long series of compares, notably, two identical series of compares. That essentially means that LLVM is today unable to see that these two ifs are identical and as such can be merged and then realize that no value of the if condition can result in a call to `please_delete`. As such, for now, we do not attempt to specifically optimize for that case.
author: Mazdak Farrokhzad <twingoow@gmail.com> 2019-12-24 04:40:02 +0100
committer: GitHub <noreply@github.com> 2019-12-24 04:40:02 +0100
commit: 20d5df91f460196c5720b22afbd0c07bd5bcf22b (patch)
tree: 143d6204474c0150d47fe7a97f848cfe13c838b9
parent: 75b27ef59cd0eb95b50d0cde14b05e0079b3ebe9 (diff)
parent: df4d490038c37e441065890fa27ed2ce0bdf83e6 (diff)
download: rust-20d5df91f460196c5720b22afbd0c07bd5bcf22b.tar.gz
rust-20d5df91f460196c5720b22afbd0c07bd5bcf22b.zip
2 files changed, 30 insertions, 30 deletions
diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs
index bb6d6db57d2..fe5d16862a6 100644
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@@ -434,36 +434,35 @@ impl char {
     #[inline]
     pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
         let code = self as u32;
-        // SAFETY: each arm checks the size of the slice and only uses `get_unchecked` unsafe ops
-        unsafe {
-            let len = if code < MAX_ONE_B && !dst.is_empty() {
-                *dst.get_unchecked_mut(0) = code as u8;
-                1
-            } else if code < MAX_TWO_B && dst.len() >= 2 {
-                *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
-                *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
-                2
-            } else if code < MAX_THREE_B && dst.len() >= 3 {
-                *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
-                *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
-                *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
-                3
-            } else if dst.len() >= 4 {
-                *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
-                *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
-                *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
-                *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
-                4
-            } else {
-                panic!(
-                    "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
-                    from_u32_unchecked(code).len_utf8(),
-                    code,
-                    dst.len(),
-                )
-            };
-            from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
-        }
+        let len = self.len_utf8();
+        match (len, &mut dst[..]) {
+            (1, [a, ..]) => {
+                *a = code as u8;
+            }
+            (2, [a, b, ..]) => {
+                *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
+                *b = (code & 0x3F) as u8 | TAG_CONT;
+            }
+            (3, [a, b, c, ..]) => {
+                *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
+                *b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
+                *c = (code & 0x3F) as u8 | TAG_CONT;
+            }
+            (4, [a, b, c, d, ..]) => {
+                *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
+                *b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
+                *c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
+                *d = (code & 0x3F) as u8 | TAG_CONT;
+            }
+            _ => panic!(
+                "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
+                len,
+                code,
+                dst.len(),
+            ),
+        };
+        // SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
+        unsafe { from_utf8_unchecked_mut(&mut dst[..len]) }
     }
 
     /// Encodes this character as UTF-16 into the provided `u16` buffer,
diff --git a/src/libcore/lib.rs b/src/libcore/lib.rs
index d12aebb87b9..7d11dd2800f 100644
--- a/src/libcore/lib.rs
+++ b/src/libcore/lib.rs
@@ -129,6 +129,7 @@
 #![feature(associated_type_bounds)]
 #![feature(const_type_id)]
 #![feature(const_caller_location)]
+#![feature(slice_patterns)]
 
 #[prelude_import]
 #[allow(unused)]
author	Mazdak Farrokhzad <twingoow@gmail.com>	2019-12-24 04:40:02 +0100
committer	GitHub <noreply@github.com>	2019-12-24 04:40:02 +0100
commit	20d5df91f460196c5720b22afbd0c07bd5bcf22b (patch)
tree	143d6204474c0150d47fe7a97f848cfe13c838b9
parent	75b27ef59cd0eb95b50d0cde14b05e0079b3ebe9 (diff)
parent	df4d490038c37e441065890fa27ed2ce0bdf83e6 (diff)
download	rust-20d5df91f460196c5720b22afbd0c07bd5bcf22b.tar.gz rust-20d5df91f460196c5720b22afbd0c07bd5bcf22b.zip