about summary refs log tree commit diff
path: root/compiler/rustc_lexer
diff options
context:
space:
mode:
authorEsteban Küber <esteban@kuber.com.ar>2024-04-10 21:12:45 +0000
committerEsteban Küber <esteban@kuber.com.ar>2024-04-10 23:19:27 +0000
commit19821ad23474a3d056feac94f11569841764eb87 (patch)
tree112ef36f05e4193a49e95a22d1e2adda911b0ac2 /compiler/rustc_lexer
parente78913baef70895c966f0456ad16086a6a9aa37b (diff)
downloadrust-19821ad23474a3d056feac94f11569841764eb87.tar.gz
rust-19821ad23474a3d056feac94f11569841764eb87.zip
Properly handle emojis as literal prefix in macros
Do not accept the following

```rust
macro_rules! lexes {($($_:tt)*) => {}}
lexes!(🐛"foo");
```

Before, invalid emoji identifiers were gated during parsing instead of lexing in all cases, but this didn't account for macro expansion of literal prefixes.

Fix #123696.
Diffstat (limited to 'compiler/rustc_lexer')
-rw-r--r--compiler/rustc_lexer/src/lib.rs6
1 files changed, 5 insertions, 1 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index ca84e930c24..83fff98bad5 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -88,6 +88,10 @@ pub enum TokenKind {
     /// tokens.
     UnknownPrefix,
 
+    /// Similar to the above, but *always* an error on every edition. This is used
+    /// for emoji identifier recovery, as those are not meant to be ever accepted.
+    InvalidPrefix,
+
     /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
     /// suffix, but may be present here on string and float literals. Users of
     /// this type will need to check for and reject that case.
@@ -528,7 +532,7 @@ impl Cursor<'_> {
         // Known prefixes must have been handled earlier. So if
         // we see a prefix here, it is definitely an unknown prefix.
         match self.first() {
-            '#' | '"' | '\'' => UnknownPrefix,
+            '#' | '"' | '\'' => InvalidPrefix,
             _ => InvalidIdent,
         }
     }