about summary refs log tree commit diff
diff options
context:
space:
mode:
authorKarl Meakin <karl.meakin@arm.com>2025-08-16 23:45:45 +0100
committerKarl Meakin <karl.meakin@arm.com>2025-08-17 01:22:48 +0100
commitc9ce45cb1e11408c2f0c7cdc89f22bf54406eb20 (patch)
treebde92f6a34029096e8697847a1cf88730c674340
parentcd7cbe818e4a66d46fe2df993d1b8518eba8a5cd (diff)
downloadrust-c9ce45cb1e11408c2f0c7cdc89f22bf54406eb20.tar.gz
rust-c9ce45cb1e11408c2f0c7cdc89f22bf54406eb20.zip
Optimize `char::encode_utf8`
Save a few instructions in `encode_utf8_raw_unchecked` by performing
manual CSE.
-rw-r--r--library/core/src/char/methods.rs47
1 files changed, 26 insertions, 21 deletions
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index 7ee0962721f..985e669c92d 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -1872,28 +1872,33 @@ pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) {
     // SAFETY: The caller must guarantee that the buffer pointed to by `dst`
     // is at least `len` bytes long.
     unsafe {
-        match len {
-            1 => {
-                *dst = code as u8;
-            }
-            2 => {
-                *dst = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
-                *dst.add(1) = (code & 0x3F) as u8 | TAG_CONT;
-            }
-            3 => {
-                *dst = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
-                *dst.add(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
-                *dst.add(2) = (code & 0x3F) as u8 | TAG_CONT;
-            }
-            4 => {
-                *dst = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
-                *dst.add(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
-                *dst.add(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
-                *dst.add(3) = (code & 0x3F) as u8 | TAG_CONT;
-            }
-            // SAFETY: `char` always takes between 1 and 4 bytes to encode in UTF-8.
-            _ => crate::hint::unreachable_unchecked(),
+        if len == 1 {
+            *dst = code as u8;
+            return;
+        }
+
+        let last1 = (code >> 0 & 0x3F) as u8 | TAG_CONT;
+        let last2 = (code >> 6 & 0x3F) as u8 | TAG_CONT;
+        let last3 = (code >> 12 & 0x3F) as u8 | TAG_CONT;
+        let last4 = (code >> 18 & 0x3F) as u8 | TAG_FOUR_B;
+
+        if len == 2 {
+            *dst = last2 | TAG_TWO_B;
+            *dst.add(1) = last1;
+            return;
         }
+
+        if len == 3 {
+            *dst = last3 | TAG_THREE_B;
+            *dst.add(1) = last2;
+            *dst.add(2) = last1;
+            return;
+        }
+
+        *dst = last4;
+        *dst.add(1) = last3;
+        *dst.add(2) = last2;
+        *dst.add(3) = last1;
     }
 }