about summary refs log tree commit diff
path: root/src/libsyntax
diff options
context:
space:
mode:
authorMazdak Farrokhzad <twingoow@gmail.com>2019-09-05 12:11:04 +0200
committerGitHub <noreply@github.com>2019-09-05 12:11:04 +0200
commita8d4e4f435a3287989ac1b20f86b87f6fca5e09b (patch)
tree69b3a040b81a76b51ee36ce5000249971020c7c4 /src/libsyntax
parenta24f636e60a5da57ab641d800ac5952bbde98b65 (diff)
parent206fe8e1c37d55d0bf3a82baaa23eb5fb148880b (diff)
downloadrust-a8d4e4f435a3287989ac1b20f86b87f6fca5e09b.tar.gz
rust-a8d4e4f435a3287989ac1b20f86b87f6fca5e09b.zip
Rollup merge of #62848 - matklad:xid-unicode, r=petrochenkov
Use unicode-xid crate instead of libcore

This PR proposes to remove `char::is_xid_start` and `char::is_xid_continue` functions from `libcore` and use `unicode_xid` crate from crates.io (note that this crate is already present in rust-lang/rust's Cargo.lock).

Reasons to do this:

* removing rustc-binary-specific stuff from libcore
* making sure that, across the ecosystem, there's a single definition of what rust identifier is (`unicode-xid` has almost 10 million downs, as a `proc_macro2` dependency)
* making it easier to share `rustc_lexer` crate with rust-analyzer: no need to `#[cfg]` if we are building as a part of the compiler

Reasons not to do this:

* increased maintenance burden: we'll need to upgrade unicode version both in libcore and in unicode-xid. However, this shouldn't be a too heavy burden: just running `./unicode.py` after new unicode version. I (@matklad) am ready to be a t-compiler side maintainer of unicode-xid. Moreover, given that xid-unicode is an important dependency of syn, *someone* needs to maintain it anyway.
* xid-unicode implementation is significantly slower. It uses a more compact table with binary search, instead of a trie. However, this shouldn't matter in practice, because we have fast-path for ascii anyway, and code size savings is a plus. Moreover, in #59706 not using libcore turned out to be *faster*, presumably beacause checking for whitespace with match is even faster.

<details>

<summary>old description</summary>

Followup to #59706

r? @eddyb

Note that this doesn't actually remove tables from libcore, to avoid conflict with https://github.com/rust-lang/rust/pull/62641.

cc https://github.com/unicode-rs/unicode-xid/pull/11

</details>
Diffstat (limited to 'src/libsyntax')
-rw-r--r--src/libsyntax/ext/proc_macro_server.rs3
-rw-r--r--src/libsyntax/tests.rs14
2 files changed, 6 insertions, 11 deletions
diff --git a/src/libsyntax/ext/proc_macro_server.rs b/src/libsyntax/ext/proc_macro_server.rs
index 1a26b17dac7..544ec789d80 100644
--- a/src/libsyntax/ext/proc_macro_server.rs
+++ b/src/libsyntax/ext/proc_macro_server.rs
@@ -322,8 +322,7 @@ impl Ident {
     fn is_valid(string: &str) -> bool {
         let mut chars = string.chars();
         if let Some(start) = chars.next() {
-            (start == '_' || start.is_xid_start())
-                && chars.all(|cont| cont == '_' || cont.is_xid_continue())
+            rustc_lexer::is_id_start(start) && chars.all(rustc_lexer::is_id_continue)
         } else {
             false
         }
diff --git a/src/libsyntax/tests.rs b/src/libsyntax/tests.rs
index c472212bc20..9b90b31f2d2 100644
--- a/src/libsyntax/tests.rs
+++ b/src/libsyntax/tests.rs
@@ -63,7 +63,7 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
             (None, None) => return true,
             (None, _) => return false,
             (Some(&a), None) => {
-                if is_pattern_whitespace(a) {
+                if rustc_lexer::is_whitespace(a) {
                     break // trailing whitespace check is out of loop for borrowck
                 } else {
                     return false
@@ -72,11 +72,11 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
             (Some(&a), Some(&b)) => (a, b)
         };
 
-        if is_pattern_whitespace(a) && is_pattern_whitespace(b) {
+        if rustc_lexer::is_whitespace(a) && rustc_lexer::is_whitespace(b) {
             // skip whitespace for a and b
             scan_for_non_ws_or_end(&mut a_iter);
             scan_for_non_ws_or_end(&mut b_iter);
-        } else if is_pattern_whitespace(a) {
+        } else if rustc_lexer::is_whitespace(a) {
             // skip whitespace for a
             scan_for_non_ws_or_end(&mut a_iter);
         } else if a == b {
@@ -88,20 +88,16 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool {
     }
 
     // check if a has *only* trailing whitespace
-    a_iter.all(is_pattern_whitespace)
+    a_iter.all(rustc_lexer::is_whitespace)
 }
 
 /// Advances the given peekable `Iterator` until it reaches a non-whitespace character
 fn scan_for_non_ws_or_end<I: Iterator<Item = char>>(iter: &mut Peekable<I>) {
-    while iter.peek().copied().map(|c| is_pattern_whitespace(c)) == Some(true) {
+    while iter.peek().copied().map(|c| rustc_lexer::is_whitespace(c)) == Some(true) {
         iter.next();
     }
 }
 
-fn is_pattern_whitespace(c: char) -> bool {
-    rustc_lexer::character_properties::is_whitespace(c)
-}
-
 /// Identify a position in the text by the Nth occurrence of a string.
 struct Position {
     string: &'static str,