diff options
| author | Karl Meakin <karl.meakin@arm.com> | 2025-08-16 23:45:45 +0100 |
|---|---|---|
| committer | Karl Meakin <karl.meakin@arm.com> | 2025-08-17 01:22:48 +0100 |
| commit | c9ce45cb1e11408c2f0c7cdc89f22bf54406eb20 (patch) | |
| tree | bde92f6a34029096e8697847a1cf88730c674340 | |
| parent | cd7cbe818e4a66d46fe2df993d1b8518eba8a5cd (diff) | |
| download | rust-c9ce45cb1e11408c2f0c7cdc89f22bf54406eb20.tar.gz rust-c9ce45cb1e11408c2f0c7cdc89f22bf54406eb20.zip | |
Optimize `char::encode_utf8`
Save a few instructions in `encode_utf8_raw_unchecked` by performing manual CSE.
| -rw-r--r-- | library/core/src/char/methods.rs | 47 |
1 files changed, 26 insertions, 21 deletions
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 7ee0962721f..985e669c92d 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1872,28 +1872,33 @@ pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) { // SAFETY: The caller must guarantee that the buffer pointed to by `dst` // is at least `len` bytes long. unsafe { - match len { - 1 => { - *dst = code as u8; - } - 2 => { - *dst = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; - *dst.add(1) = (code & 0x3F) as u8 | TAG_CONT; - } - 3 => { - *dst = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; - *dst.add(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.add(2) = (code & 0x3F) as u8 | TAG_CONT; - } - 4 => { - *dst = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; - *dst.add(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; - *dst.add(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.add(3) = (code & 0x3F) as u8 | TAG_CONT; - } - // SAFETY: `char` always takes between 1 and 4 bytes to encode in UTF-8. - _ => crate::hint::unreachable_unchecked(), + if len == 1 { + *dst = code as u8; + return; + } + + let last1 = (code >> 0 & 0x3F) as u8 | TAG_CONT; + let last2 = (code >> 6 & 0x3F) as u8 | TAG_CONT; + let last3 = (code >> 12 & 0x3F) as u8 | TAG_CONT; + let last4 = (code >> 18 & 0x3F) as u8 | TAG_FOUR_B; + + if len == 2 { + *dst = last2 | TAG_TWO_B; + *dst.add(1) = last1; + return; } + + if len == 3 { + *dst = last3 | TAG_THREE_B; + *dst.add(1) = last2; + *dst.add(2) = last1; + return; + } + + *dst = last4; + *dst.add(1) = last3; + *dst.add(2) = last2; + *dst.add(3) = last1; } } |
