about summary refs log tree commit diff
diff options
context:
space:
mode:
authorDylan DPC <dylan.dpc@gmail.com>2021-02-23 02:51:47 +0100
committerGitHub <noreply@github.com>2021-02-23 02:51:47 +0100
commit4af965e73262190137a86b44fc2f75da7fb54eab (patch)
tree8b4fe6abf3e045534c80c7771b860f77b6eb1337
parent5d90e89c36468350b9636614b0f9dbf64a4aef80 (diff)
parent33d8b0456876181883f8d97997a3a0a6e9ff652f (diff)
downloadrust-4af965e73262190137a86b44fc2f75da7fb54eab.tar.gz
rust-4af965e73262190137a86b44fc2f75da7fb54eab.zip
Rollup merge of #81837 - gilescope:to_ascii_speedups, r=dtolnay
Slight perf improvement on char::to_ascii_lowercase

`char::to_ascii_lowercase()` was checking if it was ascii and then if it was in the right range. Instead propose to check once (I think removing a compare and a shift in the process: [godbolt](https://godbolt.org/z/e5Tora) ).

before:
```
        test char::methods::bench_to_ascii_lowercase                    ... bench:      11,196 ns/iter (+/- 632)
        test char::methods::bench_to_ascii_uppercase                    ... bench:      11,656 ns/iter (+/- 671)
```
after:
```
         test char::methods::bench_to_ascii_lowercase                    ... bench:       9,612 ns/iter (+/- 979)
         test char::methods::bench_to_ascii_uppercase                    ... bench:       8,241 ns/iter (+/- 701)
```

(calling u8::to_ascii_lowercase and letting that flip the 5th bit is also an option, but it's more instructions. I'm thinking for things around ascii and char we want to be as efficient as possible.)
-rw-r--r--library/core/benches/ascii.rs6
-rw-r--r--library/core/benches/char/methods.rs10
-rw-r--r--library/core/src/char/methods.rs12
-rw-r--r--library/core/src/num/mod.rs13
4 files changed, 35 insertions, 6 deletions
diff --git a/library/core/benches/ascii.rs b/library/core/benches/ascii.rs
index bc59c378609..64938745a4a 100644
--- a/library/core/benches/ascii.rs
+++ b/library/core/benches/ascii.rs
@@ -66,6 +66,8 @@ macro_rules! benches {
 use test::black_box;
 use test::Bencher;
 
+const ASCII_CASE_MASK: u8 = 0b0010_0000;
+
 benches! {
     fn case00_alloc_only(_bytes: &mut [u8]) {}
 
@@ -204,7 +206,7 @@ benches! {
             }
         }
         for byte in bytes {
-            *byte &= !((is_ascii_lowercase(*byte) as u8) << 5)
+            *byte &= !((is_ascii_lowercase(*byte) as u8) * ASCII_CASE_MASK)
         }
     }
 
@@ -216,7 +218,7 @@ benches! {
             }
         }
         for byte in bytes {
-            *byte -= (is_ascii_lowercase(*byte) as u8) << 5
+            *byte -= (is_ascii_lowercase(*byte) as u8) * ASCII_CASE_MASK
         }
     }
 
diff --git a/library/core/benches/char/methods.rs b/library/core/benches/char/methods.rs
index a9a08a4d762..de4b63030fa 100644
--- a/library/core/benches/char/methods.rs
+++ b/library/core/benches/char/methods.rs
@@ -35,3 +35,13 @@ fn bench_to_digit_radix_var(b: &mut Bencher) {
             .min()
     })
 }
+
+#[bench]
+fn bench_to_ascii_uppercase(b: &mut Bencher) {
+    b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_uppercase()).min())
+}
+
+#[bench]
+fn bench_to_ascii_lowercase(b: &mut Bencher) {
+    b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_lowercase()).min())
+}
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index 64ae7db0d9b..4390342134d 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -1088,7 +1088,11 @@ impl char {
     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
     #[inline]
     pub fn to_ascii_uppercase(&self) -> char {
-        if self.is_ascii() { (*self as u8).to_ascii_uppercase() as char } else { *self }
+        if self.is_ascii_lowercase() {
+            (*self as u8).ascii_change_case_unchecked() as char
+        } else {
+            *self
+        }
     }
 
     /// Makes a copy of the value in its ASCII lower case equivalent.
@@ -1116,7 +1120,11 @@ impl char {
     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
     #[inline]
     pub fn to_ascii_lowercase(&self) -> char {
-        if self.is_ascii() { (*self as u8).to_ascii_lowercase() as char } else { *self }
+        if self.is_ascii_uppercase() {
+            (*self as u8).ascii_change_case_unchecked() as char
+        } else {
+            *self
+        }
     }
 
     /// Checks that two values are an ASCII case-insensitive match.
diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs
index 6bdfa18fa43..c13f000a736 100644
--- a/library/core/src/num/mod.rs
+++ b/library/core/src/num/mod.rs
@@ -152,6 +152,9 @@ impl isize {
      usize_isize_to_xe_bytes_doc!(), usize_isize_from_xe_bytes_doc!() }
 }
 
+/// If 6th bit set ascii is upper case.
+const ASCII_CASE_MASK: u8 = 0b0010_0000;
+
 #[lang = "u8"]
 impl u8 {
     uint_impl! { u8, u8, 8, 255, 2, "0x82", "0xa", "0x12", "0x12", "0x48", "[0x12]",
@@ -195,7 +198,7 @@ impl u8 {
     #[inline]
     pub fn to_ascii_uppercase(&self) -> u8 {
         // Unset the fifth bit if this is a lowercase letter
-        *self & !((self.is_ascii_lowercase() as u8) << 5)
+        *self & !((self.is_ascii_lowercase() as u8) * ASCII_CASE_MASK)
     }
 
     /// Makes a copy of the value in its ASCII lower case equivalent.
@@ -218,7 +221,13 @@ impl u8 {
     #[inline]
     pub fn to_ascii_lowercase(&self) -> u8 {
         // Set the fifth bit if this is an uppercase letter
-        *self | ((self.is_ascii_uppercase() as u8) << 5)
+        *self | (self.is_ascii_uppercase() as u8 * ASCII_CASE_MASK)
+    }
+
+    /// Assumes self is ascii
+    #[inline]
+    pub(crate) fn ascii_change_case_unchecked(&self) -> u8 {
+        *self ^ ASCII_CASE_MASK
     }
 
     /// Checks that two values are an ASCII case-insensitive match.