about summary refs log tree commit diff
path: root/compiler
diff options
context:
space:
mode:
authorMatthias Krüger <matthias.krueger@famsik.de>2023-07-31 22:51:15 +0200
committerGitHub <noreply@github.com>2023-07-31 22:51:15 +0200
commit57c57a555bc21b490532be5b43bdf00227f5beaa (patch)
tree45a9504d6e647672e38834301ca753d14c0325fa /compiler
parent7c6942a11bb49c7847fb565d38dc9318c6ac5117 (diff)
parentbca79a26d80147e3bcf87d6d5e95ff4a303d7eda (diff)
downloadrust-57c57a555bc21b490532be5b43bdf00227f5beaa.tar.gz
rust-57c57a555bc21b490532be5b43bdf00227f5beaa.zip
Rollup merge of #114193 - crlf0710:lexer_unicode15, r=Manishearth
Update lexer emoji diagnostics to Unicode 15.0

This replaces the `unic-emoji-char` dep tree (which hasn't been updated for a while) with `unicode-properties` crate which contains Unicode 15.0 data.

Improves diagnostics for added emoji characters in recent years. (See tests).

cc #101840

cc ``@Manishearth``
Diffstat (limited to 'compiler')
-rw-r--r--compiler/rustc_lexer/Cargo.toml6
-rw-r--r--compiler/rustc_lexer/src/lib.rs11
2 files changed, 9 insertions, 8 deletions
diff --git a/compiler/rustc_lexer/Cargo.toml b/compiler/rustc_lexer/Cargo.toml
index 23294dc2e1b..2211ac1c8a7 100644
--- a/compiler/rustc_lexer/Cargo.toml
+++ b/compiler/rustc_lexer/Cargo.toml
@@ -16,7 +16,11 @@ Rust lexer used by rustc. No stability guarantees are provided.
 # Note that this crate purposefully does not depend on other rustc crates
 [dependencies]
 unicode-xid = "0.2.0"
-unic-emoji-char = "0.9.0"
+
+[dependencies.unicode-properties]
+version = "0.1.0"
+default-features = false
+features = ["emoji"]
 
 [dev-dependencies]
 expect-test = "1.4.0"
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index d511d2b1280..43dfd34a6ff 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -34,6 +34,7 @@ pub use crate::cursor::Cursor;
 use self::LiteralKind::*;
 use self::TokenKind::*;
 use crate::cursor::EOF_CHAR;
+use unicode_properties::UnicodeEmoji;
 
 /// Parsed token.
 /// It doesn't contain information about data that has been parsed,
@@ -428,9 +429,7 @@ impl Cursor<'_> {
                 Literal { kind, suffix_start }
             }
             // Identifier starting with an emoji. Only lexed for graceful error recovery.
-            c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => {
-                self.fake_ident_or_unknown_prefix()
-            }
+            c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(),
             _ => Unknown,
         };
         let res = Token::new(token_kind, self.pos_within_token());
@@ -514,9 +513,7 @@ impl Cursor<'_> {
         // we see a prefix here, it is definitely an unknown prefix.
         match self.first() {
             '#' | '"' | '\'' => UnknownPrefix,
-            c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => {
-                self.fake_ident_or_unknown_prefix()
-            }
+            c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(),
             _ => Ident,
         }
     }
@@ -525,7 +522,7 @@ impl Cursor<'_> {
         // Start is already eaten, eat the rest of identifier.
         self.eat_while(|c| {
             unicode_xid::UnicodeXID::is_xid_continue(c)
-                || (!c.is_ascii() && unic_emoji_char::is_emoji(c))
+                || (!c.is_ascii() && c.is_emoji_char())
                 || c == '\u{200d}'
         });
         // Known prefixes must have been handled earlier. So if