diff options
| author | Matthias Krüger <matthias.krueger@famsik.de> | 2023-07-31 22:51:15 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-07-31 22:51:15 +0200 |
| commit | 57c57a555bc21b490532be5b43bdf00227f5beaa (patch) | |
| tree | 45a9504d6e647672e38834301ca753d14c0325fa /compiler | |
| parent | 7c6942a11bb49c7847fb565d38dc9318c6ac5117 (diff) | |
| parent | bca79a26d80147e3bcf87d6d5e95ff4a303d7eda (diff) | |
| download | rust-57c57a555bc21b490532be5b43bdf00227f5beaa.tar.gz rust-57c57a555bc21b490532be5b43bdf00227f5beaa.zip | |
Rollup merge of #114193 - crlf0710:lexer_unicode15, r=Manishearth
Update lexer emoji diagnostics to Unicode 15.0 This replaces the `unic-emoji-char` dep tree (which hasn't been updated for a while) with `unicode-properties` crate which contains Unicode 15.0 data. Improves diagnostics for added emoji characters in recent years. (See tests). cc #101840 cc ``@Manishearth``
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_lexer/Cargo.toml | 6 | ||||
| -rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 11 |
2 files changed, 9 insertions, 8 deletions
diff --git a/compiler/rustc_lexer/Cargo.toml b/compiler/rustc_lexer/Cargo.toml index 23294dc2e1b..2211ac1c8a7 100644 --- a/compiler/rustc_lexer/Cargo.toml +++ b/compiler/rustc_lexer/Cargo.toml @@ -16,7 +16,11 @@ Rust lexer used by rustc. No stability guarantees are provided. # Note that this crate purposefully does not depend on other rustc crates [dependencies] unicode-xid = "0.2.0" -unic-emoji-char = "0.9.0" + +[dependencies.unicode-properties] +version = "0.1.0" +default-features = false +features = ["emoji"] [dev-dependencies] expect-test = "1.4.0" diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index d511d2b1280..43dfd34a6ff 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -34,6 +34,7 @@ pub use crate::cursor::Cursor; use self::LiteralKind::*; use self::TokenKind::*; use crate::cursor::EOF_CHAR; +use unicode_properties::UnicodeEmoji; /// Parsed token. /// It doesn't contain information about data that has been parsed, @@ -428,9 +429,7 @@ impl Cursor<'_> { Literal { kind, suffix_start } } // Identifier starting with an emoji. Only lexed for graceful error recovery. - c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => { - self.fake_ident_or_unknown_prefix() - } + c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(), _ => Unknown, }; let res = Token::new(token_kind, self.pos_within_token()); @@ -514,9 +513,7 @@ impl Cursor<'_> { // we see a prefix here, it is definitely an unknown prefix. match self.first() { '#' | '"' | '\'' => UnknownPrefix, - c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => { - self.fake_ident_or_unknown_prefix() - } + c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(), _ => Ident, } } @@ -525,7 +522,7 @@ impl Cursor<'_> { // Start is already eaten, eat the rest of identifier. self.eat_while(|c| { unicode_xid::UnicodeXID::is_xid_continue(c) - || (!c.is_ascii() && unic_emoji_char::is_emoji(c)) + || (!c.is_ascii() && c.is_emoji_char()) || c == '\u{200d}' }); // Known prefixes must have been handled earlier. So if |
