about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-03-17 11:17:18 +0000
committerbors <bors@rust-lang.org>2021-03-17 11:17:18 +0000
commit0ce0fedb67fa66d50aa819ef8b12f1d89eb22d7d (patch)
tree410e1e03c785d954f3894c78b4eb1f1aa9f30818
parent2c7490379d7e8854192c176039cfabf6acefe7ef (diff)
parent229fdf839a348ddbdd2c6cf411767a565394a8ae (diff)
downloadrust-0ce0fedb67fa66d50aa819ef8b12f1d89eb22d7d.tar.gz
rust-0ce0fedb67fa66d50aa819ef8b12f1d89eb22d7d.zip
Auto merge of #81358 - mcastorina:to-upper-lower-speed, r=joshtriplett
Add a check for ASCII characters in to_upper and to_lower

This extra check has better performance. See discussion here:
https://internals.rust-lang.org/t/to-upper-speed/13896

Thanks to `@gilescope` for helping discover and test this.
-rw-r--r--library/core/benches/char/methods.rs30
-rw-r--r--library/core/src/unicode/unicode_data.rs20
2 files changed, 44 insertions, 6 deletions
diff --git a/library/core/benches/char/methods.rs b/library/core/benches/char/methods.rs
index de4b63030fa..9408f83c32f 100644
--- a/library/core/benches/char/methods.rs
+++ b/library/core/benches/char/methods.rs
@@ -45,3 +45,33 @@ fn bench_to_ascii_uppercase(b: &mut Bencher) {
 fn bench_to_ascii_lowercase(b: &mut Bencher) {
     b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_lowercase()).min())
 }
+
+#[bench]
+fn bench_ascii_mix_to_uppercase(b: &mut Bencher) {
+    b.iter(|| (0..=255).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count())
+}
+
+#[bench]
+fn bench_ascii_mix_to_lowercase(b: &mut Bencher) {
+    b.iter(|| (0..=255).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count())
+}
+
+#[bench]
+fn bench_ascii_char_to_uppercase(b: &mut Bencher) {
+    b.iter(|| (0..=127).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count())
+}
+
+#[bench]
+fn bench_ascii_char_to_lowercase(b: &mut Bencher) {
+    b.iter(|| (0..=127).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count())
+}
+
+#[bench]
+fn bench_non_ascii_char_to_uppercase(b: &mut Bencher) {
+    b.iter(|| (128..=255).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count())
+}
+
+#[bench]
+fn bench_non_ascii_char_to_lowercase(b: &mut Bencher) {
+    b.iter(|| (128..=255).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count())
+}
diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs
index 9c92a8ba28a..16803bf2e83 100644
--- a/library/core/src/unicode/unicode_data.rs
+++ b/library/core/src/unicode/unicode_data.rs
@@ -549,16 +549,24 @@ pub mod white_space {
 #[rustfmt::skip]
 pub mod conversions {
     pub fn to_lower(c: char) -> [char; 3] {
-        match bsearch_case_table(c, LOWERCASE_TABLE) {
-            None => [c, '\0', '\0'],
-            Some(index) => LOWERCASE_TABLE[index].1,
+        if c.is_ascii() {
+            [(c as u8).to_ascii_lowercase() as char, '\0', '\0']
+        } else {
+            match bsearch_case_table(c, LOWERCASE_TABLE) {
+                None => [c, '\0', '\0'],
+                Some(index) => LOWERCASE_TABLE[index].1,
+            }
         }
     }
 
     pub fn to_upper(c: char) -> [char; 3] {
-        match bsearch_case_table(c, UPPERCASE_TABLE) {
-            None => [c, '\0', '\0'],
-            Some(index) => UPPERCASE_TABLE[index].1,
+        if c.is_ascii() {
+            [(c as u8).to_ascii_uppercase() as char, '\0', '\0']
+        } else {
+            match bsearch_case_table(c, UPPERCASE_TABLE) {
+                None => [c, '\0', '\0'],
+                Some(index) => UPPERCASE_TABLE[index].1,
+            }
         }
     }