about summary refs log tree commit diff
diff options
context:
space:
mode:
authorDylan DPC <dylan.dpc@gmail.com>2020-02-12 14:21:05 +0100
committerGitHub <noreply@github.com>2020-02-12 14:21:05 +0100
commit79ebf53cbb659175e863cdc4ca6341ee934376fd (patch)
treee7323c3be168e687fe658ed3c8c6b0f6353588b3
parentcd5441faf4e56d136d7c05d5eb55b4a41396edaf (diff)
parent4e7aeaf1b52ee66ede1adb104fcd08b0463eb0f9 (diff)
downloadrust-79ebf53cbb659175e863cdc4ca6341ee934376fd.tar.gz
rust-79ebf53cbb659175e863cdc4ca6341ee934376fd.zip
Rollup merge of #67585 - ranma42:fix/char-is-ascii-codegen, r=Amanieu
Improve `char::is_ascii_*` codegen

This PR is an attempt to fix https://github.com/rust-lang/rust/issues/65127

A couple of warnings:
 1. the generated code might be further improved (in LLVM and/or MIR) by emitting better comparison sequences; in particular, this would improve the performance of "complex" checks such as those in `is_ascii_punctuation`
 2. the second commit is currently marked "DO NOT MERGE", because it regresses SIMD on `u8` slices; this could likely be fixed by improving the computation/usage of demanded bits in LLVM

An alternative approach to remove the code duplication might be the use of macros, but currently most of the duplication is actually in the doc comments, so maybe just keeping the redundancy could be ok
-rw-r--r--src/libcore/char/methods.rs50
1 files changed, 40 insertions, 10 deletions
diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs
index 2b52c48cb90..302400744e2 100644
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@@ -1075,7 +1075,10 @@ impl char {
     #[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
     #[inline]
     pub const fn is_ascii_alphabetic(&self) -> bool {
-        self.is_ascii() && (*self as u8).is_ascii_alphabetic()
+        match *self {
+            'A'..='Z' | 'a'..='z' => true,
+            _ => false,
+        }
     }
 
     /// Checks if the value is an ASCII uppercase character:
@@ -1108,7 +1111,10 @@ impl char {
     #[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
     #[inline]
     pub const fn is_ascii_uppercase(&self) -> bool {
-        self.is_ascii() && (*self as u8).is_ascii_uppercase()
+        match *self {
+            'A'..='Z' => true,
+            _ => false,
+        }
     }
 
     /// Checks if the value is an ASCII lowercase character:
@@ -1141,7 +1147,10 @@ impl char {
     #[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
     #[inline]
     pub const fn is_ascii_lowercase(&self) -> bool {
-        self.is_ascii() && (*self as u8).is_ascii_lowercase()
+        match *self {
+            'a'..='z' => true,
+            _ => false,
+        }
     }
 
     /// Checks if the value is an ASCII alphanumeric character:
@@ -1177,7 +1186,10 @@ impl char {
     #[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
     #[inline]
     pub const fn is_ascii_alphanumeric(&self) -> bool {
-        self.is_ascii() && (*self as u8).is_ascii_alphanumeric()
+        match *self {
+            '0'..='9' | 'A'..='Z' | 'a'..='z' => true,
+            _ => false,
+        }
     }
 
     /// Checks if the value is an ASCII decimal digit:
@@ -1210,7 +1222,10 @@ impl char {
     #[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
     #[inline]
     pub const fn is_ascii_digit(&self) -> bool {
-        self.is_ascii() && (*self as u8).is_ascii_digit()
+        match *self {
+            '0'..='9' => true,
+            _ => false,
+        }
     }
 
     /// Checks if the value is an ASCII hexadecimal digit:
@@ -1246,7 +1261,10 @@ impl char {
     #[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
     #[inline]
     pub const fn is_ascii_hexdigit(&self) -> bool {
-        self.is_ascii() && (*self as u8).is_ascii_hexdigit()
+        match *self {
+            '0'..='9' | 'A'..='F' | 'a'..='f' => true,
+            _ => false,
+        }
     }
 
     /// Checks if the value is an ASCII punctuation character:
@@ -1283,7 +1301,10 @@ impl char {
     #[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
     #[inline]
     pub const fn is_ascii_punctuation(&self) -> bool {
-        self.is_ascii() && (*self as u8).is_ascii_punctuation()
+        match *self {
+            '!'..='/' | ':'..='@' | '['..='`' | '{'..='~' => true,
+            _ => false,
+        }
     }
 
     /// Checks if the value is an ASCII graphic character:
@@ -1316,7 +1337,10 @@ impl char {
     #[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
     #[inline]
     pub const fn is_ascii_graphic(&self) -> bool {
-        self.is_ascii() && (*self as u8).is_ascii_graphic()
+        match *self {
+            '!'..='~' => true,
+            _ => false,
+        }
     }
 
     /// Checks if the value is an ASCII whitespace character:
@@ -1366,7 +1390,10 @@ impl char {
     #[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
     #[inline]
     pub const fn is_ascii_whitespace(&self) -> bool {
-        self.is_ascii() && (*self as u8).is_ascii_whitespace()
+        match *self {
+            '\t' | '\n' | '\x0C' | '\r' | ' ' => true,
+            _ => false,
+        }
     }
 
     /// Checks if the value is an ASCII control character:
@@ -1401,6 +1428,9 @@ impl char {
     #[rustc_const_unstable(feature = "const_ascii_ctype_on_intrinsics", issue = "68983")]
     #[inline]
     pub const fn is_ascii_control(&self) -> bool {
-        self.is_ascii() && (*self as u8).is_ascii_control()
+        match *self {
+            '\0'..='\x1F' | '\x7F' => true,
+            _ => false,
+        }
     }
 }