From 19821ad23474a3d056feac94f11569841764eb87 Mon Sep 17 00:00:00 2001 From: Esteban Küber Date: Wed, 10 Apr 2024 21:12:45 +0000 Subject: Properly handle emojis as literal prefix in macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not accept the following ```rust macro_rules! lexes {($($_:tt)*) => {}} lexes!(🐛"foo"); ``` Before, invalid emoji identifiers were gated during parsing instead of lexing in all cases, but this didn't account for macro expansion of literal prefixes. Fix #123696. --- compiler/rustc_parse/src/lexer/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'compiler/rustc_parse/src') diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 69b48bf0aff..85c4c74e1e9 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -205,6 +205,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { self.ident(start) } rustc_lexer::TokenKind::InvalidIdent + | rustc_lexer::TokenKind::InvalidPrefix // Do not recover an identifier with emoji if the codepoint is a confusable // with a recoverable substitution token, like `➖`. if !UNICODE_ARRAY @@ -302,7 +303,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> { rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret), rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), - rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => { + rustc_lexer::TokenKind::Unknown + | rustc_lexer::TokenKind::InvalidIdent + | rustc_lexer::TokenKind::InvalidPrefix => { // Don't emit diagnostics for sequences of the same invalid token if swallow_next_invalid > 0 { swallow_next_invalid -= 1; -- cgit 1.4.1-3-g733a5