about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMichal Nazarewicz <mina86@mina86.com>2022-12-23 13:38:03 +0100
committerMichal Nazarewicz <mina86@mina86.com>2022-12-23 14:15:33 +0100
commit28162ad97024f470e107dd224f07f1d6ca0fa642 (patch)
tree258c81d3a756329db87a00554044b23e78f574f0
parent5e656baf8bc832d3b77a1e49373352b3b2685fc0 (diff)
downloadrust-28162ad97024f470e107dd224f07f1d6ca0fa642.tar.gz
rust-28162ad97024f470e107dd224f07f1d6ca0fa642.zip
char: µoptimise UTF-16 surrogates decoding
According to Godbolt¹, on x86_64 using binary and produces slightly
better code than using subtraction.  Readability of both is pretty
much equivalent so might just as well use the shorter option.

¹ https://rust.godbolt.org/z/9jM3ejbMx
-rw-r--r--library/core/src/char/decode.rs2
-rw-r--r--library/core/tests/char.rs4
2 files changed, 5 insertions, 1 deletions
diff --git a/library/core/src/char/decode.rs b/library/core/src/char/decode.rs
index 11f1c30f6d5..eeb08803040 100644
--- a/library/core/src/char/decode.rs
+++ b/library/core/src/char/decode.rs
@@ -67,7 +67,7 @@ impl<I: Iterator<Item = u16>> Iterator for DecodeUtf16<I> {
             }
 
             // all ok, so lets decode it.
-            let c = (((u - 0xD800) as u32) << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
+            let c = (((u & 0x3ff) as u32) << 10 | (u2 & 0x3ff) as u32) + 0x1_0000;
             // SAFETY: we checked that it's a legal unicode value
             Some(Ok(unsafe { from_u32_unchecked(c) }))
         }
diff --git a/library/core/tests/char.rs b/library/core/tests/char.rs
index 8542e5c70d4..ac0b2ca168b 100644
--- a/library/core/tests/char.rs
+++ b/library/core/tests/char.rs
@@ -306,6 +306,10 @@ fn test_decode_utf16() {
     }
     check(&[0xD800, 0x41, 0x42], &[Err(0xD800), Ok('A'), Ok('B')]);
     check(&[0xD800, 0], &[Err(0xD800), Ok('\0')]);
+    check(&[0xD800], &[Err(0xD800)]);
+    check(&[0xD840, 0xDC00], &[Ok('\u{20000}')]);
+    check(&[0xD840, 0xD840, 0xDC00], &[Err(0xD840), Ok('\u{20000}')]);
+    check(&[0xDC00, 0xD840], &[Err(0xDC00), Err(0xD840)]);
 }
 
 #[test]