about summary refs log tree commit diff
path: root/compiler/rustc_lexer/src
diff options
context:
space:
mode:
authorNicholas Nethercote <n.nethercote@gmail.com>2024-11-19 15:55:34 +1100
committerNicholas Nethercote <n.nethercote@gmail.com>2024-11-19 18:06:22 +1100
commite9a0c3c98c5640070e15a3cb38860a7268c1dca2 (patch)
tree742f8f8a71c208ec8fce000b9d9b8bf2661a5b7e /compiler/rustc_lexer/src
parent2c7c3697dbaac3e0599aa0e7cd3581822caec17b (diff)
downloadrust-e9a0c3c98c5640070e15a3cb38860a7268c1dca2.tar.gz
rust-e9a0c3c98c5640070e15a3cb38860a7268c1dca2.zip
Remove `TokenKind::InvalidPrefix`.
It was added in #123752 to handle some cases involving emoji, but it
isn't necessary because it's always treated the same as
`TokenKind::InvalidIdent`. This commit removes it, which makes things a
little simpler.
Diffstat (limited to 'compiler/rustc_lexer/src')
-rw-r--r--compiler/rustc_lexer/src/lib.rs21
1 files changed, 8 insertions, 13 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index c01dad810c4..bcb103957ba 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -99,10 +99,6 @@ pub enum TokenKind {
     /// several tokens: `'r` and `#` and `foo`.
     RawLifetime,
 
-    /// Similar to the above, but *always* an error on every edition. This is used
-    /// for emoji identifier recovery, as those are not meant to be ever accepted.
-    InvalidPrefix,
-
     /// Guarded string literal prefix: `#"` or `##`.
     ///
     /// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
@@ -466,7 +462,7 @@ impl Cursor<'_> {
                 Literal { kind, suffix_start }
             }
             // Identifier starting with an emoji. Only lexed for graceful error recovery.
-            c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident_or_prefix(),
+            c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
             _ => Unknown,
         };
         let res = Token::new(token_kind, self.pos_within_token());
@@ -550,23 +546,22 @@ impl Cursor<'_> {
         // we see a prefix here, it is definitely an unknown prefix.
         match self.first() {
             '#' | '"' | '\'' => UnknownPrefix,
-            c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident_or_prefix(),
+            c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
             _ => Ident,
         }
     }
 
-    fn invalid_ident_or_prefix(&mut self) -> TokenKind {
+    fn invalid_ident(&mut self) -> TokenKind {
         // Start is already eaten, eat the rest of identifier.
         self.eat_while(|c| {
             const ZERO_WIDTH_JOINER: char = '\u{200d}';
             is_id_continue(c) || (!c.is_ascii() && c.is_emoji_char()) || c == ZERO_WIDTH_JOINER
         });
-        // Known prefixes must have been handled earlier. So if
-        // we see a prefix here, it is definitely an unknown prefix.
-        match self.first() {
-            '#' | '"' | '\'' => InvalidPrefix,
-            _ => InvalidIdent,
-        }
+        // An invalid identifier followed by '#' or '"' or '\'' could be
+        // interpreted as an invalid literal prefix. We don't bother doing that
+        // because the treatment of invalid identifiers and invalid prefixes
+        // would be the same.
+        InvalidIdent
     }
 
     fn c_or_byte_string(