about summary refs log tree commit diff
diff options
context:
space:
mode:
authorSimon Sapin <simon.sapin@exyr.org>2019-03-19 00:50:26 +0100
committerSimon Sapin <simon.sapin@exyr.org>2019-03-19 00:50:26 +0100
commit0ad91f73d92c3b8d3978f8f54c04b8efe3d2e673 (patch)
tree806084ebc5277243acbb738fd570642947445412
parent4a3241a815e951dfa87e9be7133e8265bc945e57 (diff)
downloadrust-0ad91f73d92c3b8d3978f8f54c04b8efe3d2e673.tar.gz
rust-0ad91f73d92c3b8d3978f8f54c04b8efe3d2e673.zip
Simplify u8::to_ascii_{upp,low}ercase while keeping it fast
-rw-r--r--src/libcore/benches/ascii.rs24
-rw-r--r--src/libcore/num/mod.rs46
2 files changed, 27 insertions, 43 deletions
diff --git a/src/libcore/benches/ascii.rs b/src/libcore/benches/ascii.rs
index ce36027394a..89e67cca4b7 100644
--- a/src/libcore/benches/ascii.rs
+++ b/src/libcore/benches/ascii.rs
@@ -1,4 +1,26 @@
-// See comments in `u8::to_ascii_uppercase` in `src/libcore/num/mod.rs`.
+// Lower-case ASCII 'a' is the first byte that has its highest bit set
+// after wrap-adding 0x1F:
+//
+//     b'a' + 0x1F == 0x80 == 0b1000_0000
+//     b'z' + 0x1F == 0x98 == 0b10011000
+//
+// Lower-case ASCII 'z' is the last byte that has its highest bit unset
+// after wrap-adding 0x05:
+//
+//     b'a' + 0x05 == 0x66 == 0b0110_0110
+//     b'z' + 0x05 == 0x7F == 0b0111_1111
+//
+// … except for 0xFB to 0xFF, but those are in the range of bytes
+// that have the highest bit unset again after adding 0x1F.
+//
+// So `(byte + 0x1f) & !(byte + 5)` has its highest bit set
+// iff `byte` is a lower-case ASCII letter.
+//
+// Lower-case ASCII letters all have the 0x20 bit set.
+// (Two positions right of 0x80, the highest bit.)
+// Unsetting that bit produces the same letter, in upper-case.
+//
+// Therefore:
 fn branchless_to_ascii_upper_case(byte: u8) -> u8 {
     byte &
     !(
diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs
index 64469a4b7e4..3fcae6b94b0 100644
--- a/src/libcore/num/mod.rs
+++ b/src/libcore/num/mod.rs
@@ -3794,39 +3794,8 @@ impl u8 {
     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
     #[inline]
     pub fn to_ascii_uppercase(&self) -> u8 {
-        // See benchmarks in src/libcore/benches/ascii_case.rs
-
-        // Lower-case ASCII 'a' is the first byte that has its highest bit set
-        // after wrap-adding 0x1F:
-        //
-        //     b'a' + 0x1F == 0x80 == 0b1000_0000
-        //     b'z' + 0x1F == 0x98 == 0b10011000
-        //
-        // Lower-case ASCII 'z' is the last byte that has its highest bit unset
-        // after wrap-adding 0x05:
-        //
-        //     b'a' + 0x05 == 0x66 == 0b0110_0110
-        //     b'z' + 0x05 == 0x7F == 0b0111_1111
-        //
-        // … except for 0xFB to 0xFF, but those are in the range of bytes
-        // that have the highest bit unset again after adding 0x1F.
-        //
-        // So `(byte + 0x1f) & !(byte + 5)` has its highest bit set
-        // iff `byte` is a lower-case ASCII letter.
-        //
-        // Lower-case ASCII letters all have the 0x20 bit set.
-        // (Two positions right of 0x80, the highest bit.)
-        // Unsetting that bit produces the same letter, in upper-case.
-        //
-        // Therefore:
-        *self &
-        !(
-            (
-                self.wrapping_add(0x1f) &
-                !self.wrapping_add(0x05) &
-                0x80
-            ) >> 2
-        )
+        // Unset the fith bit if this is a lowercase letter
+        *self & !((self.is_ascii_lowercase() as u8) << 5)
     }
 
     /// Makes a copy of the value in its ASCII lower case equivalent.
@@ -3848,15 +3817,8 @@ impl u8 {
     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
     #[inline]
     pub fn to_ascii_lowercase(&self) -> u8 {
-        // See comments in to_ascii_uppercase above.
-        *self |
-        (
-            (
-                self.wrapping_add(0x3f) &
-                !self.wrapping_add(0x25) &
-                0x80
-            ) >> 2
-        )
+        // Set the fith bit if this is an uppercase letter
+        *self | ((self.is_ascii_uppercase() as u8) << 5)
     }
 
     /// Checks that two values are an ASCII case-insensitive match.