about summary refs log tree commit diff
diff options
context:
space:
mode:
authorCharles Lew <crlf0710@gmail.com>2023-07-29 08:47:21 +0800
committerCharles Lew <crlf0710@gmail.com>2023-07-29 08:47:21 +0800
commitbca79a26d80147e3bcf87d6d5e95ff4a303d7eda (patch)
treee4d1c5026d797790077612c3477259fb64de4dc2
parent04abc370b9f3855b28172b65a7f7d5a433f41412 (diff)
downloadrust-bca79a26d80147e3bcf87d6d5e95ff4a303d7eda.tar.gz
rust-bca79a26d80147e3bcf87d6d5e95ff4a303d7eda.zip
Update lexer emoji diagnostics to Unicode 15.0
-rw-r--r--Cargo.lock49
-rw-r--r--compiler/rustc_lexer/Cargo.toml6
-rw-r--r--compiler/rustc_lexer/src/lib.rs11
-rw-r--r--src/tools/tidy/src/deps.rs6
-rw-r--r--tests/ui/lexer/lex-emoji-identifiers.rs6
-rw-r--r--tests/ui/lexer/lex-emoji-identifiers.stderr34
6 files changed, 36 insertions, 76 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 45959c039e7..56390d1fb4f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3785,7 +3785,7 @@ name = "rustc_lexer"
 version = "0.1.0"
 dependencies = [
  "expect-test",
- "unic-emoji-char",
+ "unicode-properties",
  "unicode-xid",
 ]
 
@@ -5446,38 +5446,6 @@ dependencies = [
 ]
 
 [[package]]
-name = "unic-char-property"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221"
-dependencies = [
- "unic-char-range",
-]
-
-[[package]]
-name = "unic-char-range"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc"
-
-[[package]]
-name = "unic-common"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
-
-[[package]]
-name = "unic-emoji-char"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d"
-dependencies = [
- "unic-char-property",
- "unic-char-range",
- "unic-ucd-version",
-]
-
-[[package]]
 name = "unic-langid"
 version = "0.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -5521,15 +5489,6 @@ dependencies = [
 ]
 
 [[package]]
-name = "unic-ucd-version"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4"
-dependencies = [
- "unic-common",
-]
-
-[[package]]
 name = "unicase"
 version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -5567,6 +5526,12 @@ dependencies = [
 ]
 
 [[package]]
+name = "unicode-properties"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7f91c8b21fbbaa18853c3d0801c78f4fc94cdb976699bb03e832e75f7fd22f0"
+
+[[package]]
 name = "unicode-script"
 version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/compiler/rustc_lexer/Cargo.toml b/compiler/rustc_lexer/Cargo.toml
index 23294dc2e1b..2211ac1c8a7 100644
--- a/compiler/rustc_lexer/Cargo.toml
+++ b/compiler/rustc_lexer/Cargo.toml
@@ -16,7 +16,11 @@ Rust lexer used by rustc. No stability guarantees are provided.
 # Note that this crate purposefully does not depend on other rustc crates
 [dependencies]
 unicode-xid = "0.2.0"
-unic-emoji-char = "0.9.0"
+
+[dependencies.unicode-properties]
+version = "0.1.0"
+default-features = false
+features = ["emoji"]
 
 [dev-dependencies]
 expect-test = "1.4.0"
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index d511d2b1280..43dfd34a6ff 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -34,6 +34,7 @@ pub use crate::cursor::Cursor;
 use self::LiteralKind::*;
 use self::TokenKind::*;
 use crate::cursor::EOF_CHAR;
+use unicode_properties::UnicodeEmoji;
 
 /// Parsed token.
 /// It doesn't contain information about data that has been parsed,
@@ -428,9 +429,7 @@ impl Cursor<'_> {
                 Literal { kind, suffix_start }
             }
             // Identifier starting with an emoji. Only lexed for graceful error recovery.
-            c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => {
-                self.fake_ident_or_unknown_prefix()
-            }
+            c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(),
             _ => Unknown,
         };
         let res = Token::new(token_kind, self.pos_within_token());
@@ -514,9 +513,7 @@ impl Cursor<'_> {
         // we see a prefix here, it is definitely an unknown prefix.
         match self.first() {
             '#' | '"' | '\'' => UnknownPrefix,
-            c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => {
-                self.fake_ident_or_unknown_prefix()
-            }
+            c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(),
             _ => Ident,
         }
     }
@@ -525,7 +522,7 @@ impl Cursor<'_> {
         // Start is already eaten, eat the rest of identifier.
         self.eat_while(|c| {
             unicode_xid::UnicodeXID::is_xid_continue(c)
-                || (!c.is_ascii() && unic_emoji_char::is_emoji(c))
+                || (!c.is_ascii() && c.is_emoji_char())
                 || c == '\u{200d}'
         });
         // Known prefixes must have been handled earlier. So if
diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs
index 57cbfe68be4..a015c36d7eb 100644
--- a/src/tools/tidy/src/deps.rs
+++ b/src/tools/tidy/src/deps.rs
@@ -270,18 +270,14 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
     "twox-hash",
     "type-map",
     "typenum",
-    "unic-char-property",
-    "unic-char-range",
-    "unic-common",
-    "unic-emoji-char",
     "unic-langid",
     "unic-langid-impl",
     "unic-langid-macros",
     "unic-langid-macros-impl",
-    "unic-ucd-version",
     "unicase",
     "unicode-ident",
     "unicode-normalization",
+    "unicode-properties",
     "unicode-script",
     "unicode-security",
     "unicode-width",
diff --git a/tests/ui/lexer/lex-emoji-identifiers.rs b/tests/ui/lexer/lex-emoji-identifiers.rs
index 91b5929c0fe..decf2f00587 100644
--- a/tests/ui/lexer/lex-emoji-identifiers.rs
+++ b/tests/ui/lexer/lex-emoji-identifiers.rs
@@ -1,9 +1,7 @@
 fn invalid_emoji_usages() {
     let arrow↔️ = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
-    // FIXME
-    let planet🪐 = "basic emoji"; //~ ERROR: unknown start of token
-    // FIXME
-    let wireless🛜 = "basic emoji"; //~ ERROR: unknown start of token
+    let planet🪐 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
+    let wireless🛜 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
     // FIXME
     let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token
                                     //~^ WARN: identifier contains uncommon Unicode codepoints
diff --git a/tests/ui/lexer/lex-emoji-identifiers.stderr b/tests/ui/lexer/lex-emoji-identifiers.stderr
index 6237c5d0236..747825fa2a9 100644
--- a/tests/ui/lexer/lex-emoji-identifiers.stderr
+++ b/tests/ui/lexer/lex-emoji-identifiers.stderr
@@ -1,17 +1,5 @@
-error: unknown start of token: \u{1fa90}
-  --> $DIR/lex-emoji-identifiers.rs:4:15
-   |
-LL |     let planet🪐 = "basic emoji";
-   |               ^^
-
-error: unknown start of token: \u{1f6dc}
-  --> $DIR/lex-emoji-identifiers.rs:6:17
-   |
-LL |     let wireless🛜 = "basic emoji";
-   |                 ^^
-
 error: unknown start of token: \u{20e3}
-  --> $DIR/lex-emoji-identifiers.rs:8:14
+  --> $DIR/lex-emoji-identifiers.rs:6:14
    |
 LL |     let key1️⃣ = "keycap sequence";
    |             ^
@@ -22,26 +10,38 @@ error: identifiers cannot contain emoji: `arrow↔️`
 LL |     let arrow↔️ = "basic emoji";
    |         ^^^^^^
 
+error: identifiers cannot contain emoji: `planet🪐`
+  --> $DIR/lex-emoji-identifiers.rs:3:9
+   |
+LL |     let planet🪐 = "basic emoji";
+   |         ^^^^^^^^
+
+error: identifiers cannot contain emoji: `wireless🛜`
+  --> $DIR/lex-emoji-identifiers.rs:4:9
+   |
+LL |     let wireless🛜 = "basic emoji";
+   |         ^^^^^^^^^^
+
 error: identifiers cannot contain emoji: `flag🇺🇳`
-  --> $DIR/lex-emoji-identifiers.rs:10:9
+  --> $DIR/lex-emoji-identifiers.rs:8:9
    |
 LL |     let flag🇺🇳 = "flag sequence";
    |         ^^^^^^
 
 error: identifiers cannot contain emoji: `wales🏴`
-  --> $DIR/lex-emoji-identifiers.rs:11:9
+  --> $DIR/lex-emoji-identifiers.rs:9:9
    |
 LL |     let wales🏴 = "tag sequence";
    |         ^^^^^^^
 
 error: identifiers cannot contain emoji: `folded🙏🏿`
-  --> $DIR/lex-emoji-identifiers.rs:12:9
+  --> $DIR/lex-emoji-identifiers.rs:10:9
    |
 LL |     let folded🙏🏿 = "modifier sequence";
    |         ^^^^^^^^^^
 
 warning: identifier contains uncommon Unicode codepoints
-  --> $DIR/lex-emoji-identifiers.rs:8:9
+  --> $DIR/lex-emoji-identifiers.rs:6:9
    |
 LL |     let key1️⃣ = "keycap sequence";
    |         ^^^^