about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-06-10 23:14:11 +0000
committerbors <bors@rust-lang.org>2021-06-10 23:14:11 +0000
commit46ad16b70f16795f419eb04b823f4c6485867b32 (patch)
tree0775edb869a31b73e3f242c9a8cbd60fc18145c2
parent16e18395ce33ca1ebfe60a591fb2f9317a75d822 (diff)
parent9c3d81e186c36091b1609db88cbec961c2fc49ab (diff)
downloadrust-46ad16b70f16795f419eb04b823f4c6485867b32.tar.gz
rust-46ad16b70f16795f419eb04b823f4c6485867b32.zip
Auto merge of #85630 - gilescope:to_digit_speedup3, r=nagisa
to_digit simplification (less jumps)

I just realised we might be able to make use of the fact that changing case in ascii is easy to help simplify to_digit some more.

It looks a bit cleaner and it looks like it's less jumps and there's less instructions in the generated assembly:

https://godbolt.org/z/84Erh5dhz

The benchmarks don't really tell me much. Maybe a slight improvement on the var radix.

Before:
```
test char::methods::bench_to_digit_radix_10                     ... bench:      53,819 ns/iter (+/- 8,314)
test char::methods::bench_to_digit_radix_16                     ... bench:      57,265 ns/iter (+/- 10,730)
test char::methods::bench_to_digit_radix_2                      ... bench:      55,077 ns/iter (+/- 5,431)
test char::methods::bench_to_digit_radix_36                     ... bench:      56,549 ns/iter (+/- 3,248)
test char::methods::bench_to_digit_radix_var                    ... bench:      43,848 ns/iter (+/- 3,189)

test char::methods::bench_to_digit_radix_10                     ... bench:      51,707 ns/iter (+/- 10,946)
test char::methods::bench_to_digit_radix_16                     ... bench:      52,835 ns/iter (+/- 2,689)
test char::methods::bench_to_digit_radix_2                      ... bench:      51,012 ns/iter (+/- 2,746)
test char::methods::bench_to_digit_radix_36                     ... bench:      53,210 ns/iter (+/- 8,645)
test char::methods::bench_to_digit_radix_var                    ... bench:      40,386 ns/iter (+/- 4,711)

test char::methods::bench_to_digit_radix_10                     ... bench:      54,088 ns/iter (+/- 5,677)
test char::methods::bench_to_digit_radix_16                     ... bench:      55,972 ns/iter (+/- 17,229)
test char::methods::bench_to_digit_radix_2                      ... bench:      52,083 ns/iter (+/- 2,425)
test char::methods::bench_to_digit_radix_36                     ... bench:      54,132 ns/iter (+/- 1,548)
test char::methods::bench_to_digit_radix_var                    ... bench:      41,250 ns/iter (+/- 5,299)
```
After:
```
test char::methods::bench_to_digit_radix_10                     ... bench:      48,907 ns/iter (+/- 19,449)
test char::methods::bench_to_digit_radix_16                     ... bench:      52,673 ns/iter (+/- 8,122)
test char::methods::bench_to_digit_radix_2                      ... bench:      48,509 ns/iter (+/- 2,885)
test char::methods::bench_to_digit_radix_36                     ... bench:      50,526 ns/iter (+/- 4,610)
test char::methods::bench_to_digit_radix_var                    ... bench:      38,618 ns/iter (+/- 3,180)

test char::methods::bench_to_digit_radix_10                     ... bench:      54,202 ns/iter (+/- 6,994)
test char::methods::bench_to_digit_radix_16                     ... bench:      56,585 ns/iter (+/- 8,448)
test char::methods::bench_to_digit_radix_2                      ... bench:      50,548 ns/iter (+/- 1,674)
test char::methods::bench_to_digit_radix_36                     ... bench:      52,749 ns/iter (+/- 2,576)
test char::methods::bench_to_digit_radix_var                    ... bench:      40,215 ns/iter (+/- 3,327)

test char::methods::bench_to_digit_radix_10                     ... bench:      50,233 ns/iter (+/- 22,272)
test char::methods::bench_to_digit_radix_16                     ... bench:      50,841 ns/iter (+/- 19,981)
test char::methods::bench_to_digit_radix_2                      ... bench:      50,386 ns/iter (+/- 4,555)
test char::methods::bench_to_digit_radix_36                     ... bench:      52,369 ns/iter (+/- 2,737)
test char::methods::bench_to_digit_radix_var                    ... bench:      40,417 ns/iter (+/- 2,766)
```

I removed the likely as it resulted in a few less instructions. (It's not been in there long - I added it in the last to_digit iteration).
-rw-r--r--library/core/src/char/methods.rs24
-rw-r--r--library/core/src/lib.rs1
-rw-r--r--library/core/tests/char.rs12
3 files changed, 21 insertions, 16 deletions
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
index 2da3d6a72fb..80d0890551f 100644
--- a/library/core/src/char/methods.rs
+++ b/library/core/src/char/methods.rs
@@ -1,6 +1,5 @@
 //! impl char {}
 
-use crate::intrinsics::likely;
 use crate::slice;
 use crate::str::from_utf8_unchecked_mut;
 use crate::unicode::printable::is_printable;
@@ -332,21 +331,16 @@ impl char {
     #[inline]
     pub fn to_digit(self, radix: u32) -> Option<u32> {
         assert!(radix <= 36, "to_digit: radix is too high (maximum 36)");
-        // the code is split up here to improve execution speed for cases where
-        // the `radix` is constant and 10 or smaller
-        let val = if likely(radix <= 10) {
-            // If not a digit, a number greater than radix will be created.
-            (self as u32).wrapping_sub('0' as u32)
-        } else {
-            match self {
-                '0'..='9' => self as u32 - '0' as u32,
-                'a'..='z' => self as u32 - 'a' as u32 + 10,
-                'A'..='Z' => self as u32 - 'A' as u32 + 10,
-                _ => return None,
+        // If not a digit, a number greater than radix will be created.
+        let mut digit = (self as u32).wrapping_sub('0' as u32);
+        if radix > 10 {
+            if digit < 10 {
+                return Some(digit);
             }
-        };
-
-        if val < radix { Some(val) } else { None }
+            // Force the 6th bit to be set to ensure ascii is lower case.
+            digit = (self as u32 | 0b10_0000).wrapping_sub('a' as u32).saturating_add(10);
+        }
+        (digit < radix).then_some(digit)
     }
 
     /// Returns an iterator that yields the hexadecimal Unicode escape of a
diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs
index d4e4c5b0d3e..41a4eab3c85 100644
--- a/library/core/src/lib.rs
+++ b/library/core/src/lib.rs
@@ -65,6 +65,7 @@
 #![feature(allow_internal_unstable)]
 #![feature(arbitrary_self_types)]
 #![feature(asm)]
+#![feature(bool_to_option)]
 #![feature(cfg_target_has_atomic)]
 #![feature(const_heap)]
 #![feature(const_alloc_layout)]
diff --git a/library/core/tests/char.rs b/library/core/tests/char.rs
index c16f54081ce..51eca1e05d3 100644
--- a/library/core/tests/char.rs
+++ b/library/core/tests/char.rs
@@ -67,10 +67,20 @@ fn test_to_digit() {
     assert_eq!('A'.to_digit(16), Some(10));
     assert_eq!('b'.to_digit(16), Some(11));
     assert_eq!('B'.to_digit(16), Some(11));
+    assert_eq!('A'.to_digit(36), Some(10));
     assert_eq!('z'.to_digit(36), Some(35));
     assert_eq!('Z'.to_digit(36), Some(35));
-    assert_eq!(' '.to_digit(10), None);
+    assert_eq!('['.to_digit(36), None);
+    assert_eq!('`'.to_digit(36), None);
+    assert_eq!('{'.to_digit(36), None);
     assert_eq!('$'.to_digit(36), None);
+    assert_eq!('@'.to_digit(16), None);
+    assert_eq!('G'.to_digit(16), None);
+    assert_eq!('g'.to_digit(16), None);
+    assert_eq!(' '.to_digit(10), None);
+    assert_eq!('/'.to_digit(10), None);
+    assert_eq!(':'.to_digit(10), None);
+    assert_eq!(':'.to_digit(11), None);
 }
 
 #[test]