diff options
| author | bors <bors@rust-lang.org> | 2021-03-17 11:17:18 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-03-17 11:17:18 +0000 |
| commit | 0ce0fedb67fa66d50aa819ef8b12f1d89eb22d7d (patch) | |
| tree | 410e1e03c785d954f3894c78b4eb1f1aa9f30818 | |
| parent | 2c7490379d7e8854192c176039cfabf6acefe7ef (diff) | |
| parent | 229fdf839a348ddbdd2c6cf411767a565394a8ae (diff) | |
| download | rust-0ce0fedb67fa66d50aa819ef8b12f1d89eb22d7d.tar.gz rust-0ce0fedb67fa66d50aa819ef8b12f1d89eb22d7d.zip | |
Auto merge of #81358 - mcastorina:to-upper-lower-speed, r=joshtriplett
Add a check for ASCII characters in to_upper and to_lower This extra check has better performance. See discussion here: https://internals.rust-lang.org/t/to-upper-speed/13896 Thanks to `@gilescope` for helping discover and test this.
| -rw-r--r-- | library/core/benches/char/methods.rs | 30 | ||||
| -rw-r--r-- | library/core/src/unicode/unicode_data.rs | 20 |
2 files changed, 44 insertions, 6 deletions
diff --git a/library/core/benches/char/methods.rs b/library/core/benches/char/methods.rs index de4b63030fa..9408f83c32f 100644 --- a/library/core/benches/char/methods.rs +++ b/library/core/benches/char/methods.rs @@ -45,3 +45,33 @@ fn bench_to_ascii_uppercase(b: &mut Bencher) { fn bench_to_ascii_lowercase(b: &mut Bencher) { b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_lowercase()).min()) } + +#[bench] +fn bench_ascii_mix_to_uppercase(b: &mut Bencher) { + b.iter(|| (0..=255).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count()) +} + +#[bench] +fn bench_ascii_mix_to_lowercase(b: &mut Bencher) { + b.iter(|| (0..=255).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count()) +} + +#[bench] +fn bench_ascii_char_to_uppercase(b: &mut Bencher) { + b.iter(|| (0..=127).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count()) +} + +#[bench] +fn bench_ascii_char_to_lowercase(b: &mut Bencher) { + b.iter(|| (0..=127).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count()) +} + +#[bench] +fn bench_non_ascii_char_to_uppercase(b: &mut Bencher) { + b.iter(|| (128..=255).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count()) +} + +#[bench] +fn bench_non_ascii_char_to_lowercase(b: &mut Bencher) { + b.iter(|| (128..=255).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count()) +} diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs index 9c92a8ba28a..16803bf2e83 100644 --- a/library/core/src/unicode/unicode_data.rs +++ b/library/core/src/unicode/unicode_data.rs @@ -549,16 +549,24 @@ pub mod white_space { #[rustfmt::skip] pub mod conversions { pub fn to_lower(c: char) -> [char; 3] { - match bsearch_case_table(c, LOWERCASE_TABLE) { - None => [c, '\0', '\0'], - Some(index) => LOWERCASE_TABLE[index].1, + if c.is_ascii() { + [(c as u8).to_ascii_lowercase() as char, '\0', '\0'] + } else { + match bsearch_case_table(c, LOWERCASE_TABLE) { + None => [c, '\0', '\0'], + Some(index) => LOWERCASE_TABLE[index].1, + } } } pub fn to_upper(c: char) -> [char; 3] { - match bsearch_case_table(c, UPPERCASE_TABLE) { - None => [c, '\0', '\0'], - Some(index) => UPPERCASE_TABLE[index].1, + if c.is_ascii() { + [(c as u8).to_ascii_uppercase() as char, '\0', '\0'] + } else { + match bsearch_case_table(c, UPPERCASE_TABLE) { + None => [c, '\0', '\0'], + Some(index) => UPPERCASE_TABLE[index].1, + } } } |
