Rollup merge of #126452 - compiler-errors:raw-lifetimes, r=spastorino

Implement raw lifetimes and labels (`'r#ident`) This PR does two things: 1. Reserve lifetime prefixes, e.g. `'prefix#lt` in edition 2021. 2. Implements raw lifetimes, e.g. `'r#async` in edition 2021. This PR additionally extends the `keyword_idents_2024` lint to also check lifetimes. cc `@traviscross` r? parser
author: Matthias Krüger <matthias.krueger@famsik.de> 2024-09-07 23:30:10 +0200
committer: GitHub <noreply@github.com> 2024-09-07 23:30:10 +0200
commit: ccf3f6e59d554c84ad654d04f03bc781497a743c (patch)
tree: 15c7e56db5931f9ae24cfec6f1939521eb11a49b /compiler/rustc_parse/src
parent: ec867f03bcd6c39156ef13eb5f85bf4fb924ca29 (diff)
parent: b6a86bee87a8f54e132bb276fadc738c1afc1ef6 (diff)
download: rust-ccf3f6e59d554c84ad654d04f03bc781497a743c.tar.gz
rust-ccf3f6e59d554c84ad654d04f03bc781497a743c.zip
6 files changed, 82 insertions, 35 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index f30939093c2..b7232ff21ca 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -13,7 +13,6 @@ use rustc_session::lint::builtin::{
 };
 use rustc_session::lint::BuiltinLintDiag;
 use rustc_session::parse::ParseSess;
-use rustc_span::edition::Edition;
 use rustc_span::symbol::Symbol;
 use rustc_span::{BytePos, Pos, Span};
 use tracing::debug;
@@ -188,9 +187,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
                     preceded_by_whitespace = true;
                     continue;
                 }
-                rustc_lexer::TokenKind::Ident => {
-                    self.ident(start)
-                }
+                rustc_lexer::TokenKind::Ident => self.ident(start),
                 rustc_lexer::TokenKind::RawIdent => {
                     let sym = nfc_normalize(self.str_from(start + BytePos(2)));
                     let span = self.mk_sp(start, self.pos);
@@ -205,20 +202,31 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
                     self.report_unknown_prefix(start);
                     self.ident(start)
                 }
-                rustc_lexer::TokenKind::InvalidIdent
-                | rustc_lexer::TokenKind::InvalidPrefix
+                rustc_lexer::TokenKind::UnknownPrefixLifetime => {
+                    self.report_unknown_prefix(start);
+                    // Include the leading `'` in the real identifier, for macro
+                    // expansion purposes. See #12512 for the gory details of why
+                    // this is necessary.
+                    let lifetime_name = self.str_from(start);
+                    self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
+                    let ident = Symbol::intern(lifetime_name);
+                    token::Lifetime(ident, IdentIsRaw::No)
+                }
+                rustc_lexer::TokenKind::InvalidIdent | rustc_lexer::TokenKind::InvalidPrefix
                     // Do not recover an identifier with emoji if the codepoint is a confusable
                     // with a recoverable substitution token, like `➖`.
-                    if !UNICODE_ARRAY
-                        .iter()
-                        .any(|&(c, _, _)| {
-                            let sym = self.str_from(start);
-                            sym.chars().count() == 1 && c == sym.chars().next().unwrap()
-                        }) =>
+                    if !UNICODE_ARRAY.iter().any(|&(c, _, _)| {
+                        let sym = self.str_from(start);
+                        sym.chars().count() == 1 && c == sym.chars().next().unwrap()
+                    }) =>
                 {
                     let sym = nfc_normalize(self.str_from(start));
                     let span = self.mk_sp(start, self.pos);
-                    self.psess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default()
+                    self.psess
+                        .bad_unicode_identifiers
+                        .borrow_mut()
+                        .entry(sym)
+                        .or_default()
                         .push(span);
                     token::Ident(sym, IdentIsRaw::No)
                 }
@@ -249,9 +257,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
                     let suffix = if suffix_start < self.pos {
                         let string = self.str_from(suffix_start);
                         if string == "_" {
-                            self
-                                .dcx()
-                                .emit_err(errors::UnderscoreLiteralSuffix { span: self.mk_sp(suffix_start, self.pos) });
+                            self.dcx().emit_err(errors::UnderscoreLiteralSuffix {
+                                span: self.mk_sp(suffix_start, self.pos),
+                            });
                             None
                         } else {
                             Some(Symbol::intern(string))
@@ -269,12 +277,47 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
                     self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
                     if starts_with_number {
                         let span = self.mk_sp(start, self.pos);
-                        self.dcx().struct_err("lifetimes cannot start with a number")
+                        self.dcx()
+                            .struct_err("lifetimes cannot start with a number")
                             .with_span(span)
                             .stash(span, StashKey::LifetimeIsChar);
                     }
                     let ident = Symbol::intern(lifetime_name);
-                    token::Lifetime(ident)
+                    token::Lifetime(ident, IdentIsRaw::No)
+                }
+                rustc_lexer::TokenKind::RawLifetime => {
+                    self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
+
+                    let ident_start = start + BytePos(3);
+                    let prefix_span = self.mk_sp(start, ident_start);
+
+                    if prefix_span.at_least_rust_2021() {
+                        let lifetime_name_without_tick = self.str_from(ident_start);
+                        // Put the `'` back onto the lifetime name.
+                        let mut lifetime_name = String::with_capacity(lifetime_name_without_tick.len() + 1);
+                        lifetime_name.push('\'');
+                        lifetime_name += lifetime_name_without_tick;
+                        let sym = Symbol::intern(&lifetime_name);
+
+                        token::Lifetime(sym, IdentIsRaw::Yes)
+                    } else {
+                        // Otherwise, this should be parsed like `'r`. Warn about it though.
+                        self.psess.buffer_lint(
+                            RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
+                            prefix_span,
+                            ast::CRATE_NODE_ID,
+                            BuiltinLintDiag::RawPrefix(prefix_span),
+                        );
+
+                        // Reset the state so we just lex the `'r`.
+                        let lt_start = start + BytePos(2);
+                        self.pos = lt_start;
+                        self.cursor = Cursor::new(&str_before[2 as usize..]);
+
+                        let lifetime_name = self.str_from(start);
+                        let ident = Symbol::intern(lifetime_name);
+                        token::Lifetime(ident, IdentIsRaw::No)
+                    }
                 }
                 rustc_lexer::TokenKind::Semi => token::Semi,
                 rustc_lexer::TokenKind::Comma => token::Comma,
@@ -331,16 +374,19 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
                     // first remove compound tokens like `<<` from `rustc_lexer`, and then add
                     // fancier error recovery to it, as there will be less overall work to do this
                     // way.
-                    let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1);
+                    let (token, sugg) =
+                        unicode_chars::check_for_substitution(self, start, c, repeats + 1);
                     self.dcx().emit_err(errors::UnknownTokenStart {
                         span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())),
                         escaped: escaped_char(c),
                         sugg,
-                        null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None},
+                        null: if c == '\x00' { Some(errors::UnknownTokenNull) } else { None },
                         repeat: if repeats > 0 {
                             swallow_next_invalid = repeats;
                             Some(errors::UnknownTokenRepeat { repeats })
-                        } else {None}
+                        } else {
+                            None
+                        },
                     });
 
                     if let Some(token) = token {
@@ -699,7 +745,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
 
         let expn_data = prefix_span.ctxt().outer_expn_data();
 
-        if expn_data.edition >= Edition::Edition2021 {
+        if expn_data.edition.at_least_rust_2021() {
             // In Rust 2021, this is a hard error.
             let sugg = if prefix == "rb" {
                 Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
index 84684e808d9..ecc4cd96faf 100644
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@@ -2050,7 +2050,7 @@ impl<'a> Parser<'a> {
         };
         // On an error path, eagerly consider a lifetime to be an unclosed character lit, if that
         // makes sense.
-        if let Some(ident) = self.token.lifetime()
+        if let Some((ident, IdentIsRaw::No)) = self.token.lifetime()
             && could_be_unclosed_char_literal(ident)
         {
             let lt = self.expect_lifetime();
@@ -2925,9 +2925,9 @@ impl<'a> Parser<'a> {
     }
 
     pub(crate) fn eat_label(&mut self) -> Option<Label> {
-        if let Some(ident) = self.token.lifetime() {
+        if let Some((ident, is_raw)) = self.token.lifetime() {
             // Disallow `'fn`, but with a better error message than `expect_lifetime`.
-            if ident.without_first_quote().is_reserved() {
+            if matches!(is_raw, IdentIsRaw::No) && ident.without_first_quote().is_reserved() {
                 self.dcx().emit_err(errors::InvalidLabel { span: ident.span, name: ident.name });
             }
 
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index 61e3fa2e6c5..b90c41d9163 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -1666,7 +1666,7 @@ enum FlatToken {
 pub enum ParseNtResult {
     Tt(TokenTree),
     Ident(Ident, IdentIsRaw),
-    Lifetime(Ident),
+    Lifetime(Ident, IdentIsRaw),
 
     /// This case will eventually be removed, along with `Token::Interpolate`.
     Nt(Lrc<Nonterminal>),
diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs
index e66d0df012b..44b169c881e 100644
--- a/compiler/rustc_parse/src/parser/nonterminal.rs
+++ b/compiler/rustc_parse/src/parser/nonterminal.rs
@@ -88,7 +88,7 @@ impl<'a> Parser<'a> {
             },
             NonterminalKind::Pat(pat_kind) => token.can_begin_pattern(pat_kind),
             NonterminalKind::Lifetime => match &token.kind {
-                token::Lifetime(_) | token::NtLifetime(..) => true,
+                token::Lifetime(..) | token::NtLifetime(..) => true,
                 _ => false,
             },
             NonterminalKind::TT | NonterminalKind::Item | NonterminalKind::Stmt => {
@@ -171,9 +171,9 @@ impl<'a> Parser<'a> {
             NonterminalKind::Lifetime => {
                 // We want to keep `'keyword` parsing, just like `keyword` is still
                 // an ident for nonterminal purposes.
-                return if let Some(ident) = self.token.lifetime() {
+                return if let Some((ident, is_raw)) = self.token.lifetime() {
                     self.bump();
-                    Ok(ParseNtResult::Lifetime(ident))
+                    Ok(ParseNtResult::Lifetime(ident, is_raw))
                 } else {
                     Err(self.dcx().create_err(UnexpectedNonterminal::Lifetime {
                         span: self.token.span,
diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs
index f87b5649654..cbd35ffdfa9 100644
--- a/compiler/rustc_parse/src/parser/pat.rs
+++ b/compiler/rustc_parse/src/parser/pat.rs
@@ -1,6 +1,6 @@
 use rustc_ast::mut_visit::{walk_pat, MutVisitor};
 use rustc_ast::ptr::P;
-use rustc_ast::token::{self, BinOpToken, Delimiter, Token};
+use rustc_ast::token::{self, BinOpToken, Delimiter, IdentIsRaw, Token};
 use rustc_ast::{
     self as ast, AttrVec, BindingMode, ByRef, Expr, ExprKind, MacCall, Mutability, Pat, PatField,
     PatFieldsRest, PatKind, Path, QSelf, RangeEnd, RangeSyntax,
@@ -548,7 +548,7 @@ impl<'a> Parser<'a> {
                     None => PatKind::Path(qself, path),
                 }
             }
-        } else if let Some(lt) = self.token.lifetime()
+        } else if let Some((lt, IdentIsRaw::No)) = self.token.lifetime()
             // In pattern position, we're totally fine with using "next token isn't colon"
             // as a heuristic. We could probably just always try to recover if it's a lifetime,
             // because we never have `'a: label {}` in a pattern position anyways, but it does
@@ -689,7 +689,7 @@ impl<'a> Parser<'a> {
     /// Parse `&pat` / `&mut pat`.
     fn parse_pat_deref(&mut self, expected: Option<Expected>) -> PResult<'a, PatKind> {
         self.expect_and()?;
-        if let Some(lifetime) = self.token.lifetime() {
+        if let Some((lifetime, _)) = self.token.lifetime() {
             self.bump(); // `'a`
 
             self.dcx().emit_err(UnexpectedLifetimeInPattern {
diff --git a/compiler/rustc_parse/src/parser/ty.rs b/compiler/rustc_parse/src/parser/ty.rs
index 52f0b1c1b04..dd1cc75c7ff 100644
--- a/compiler/rustc_parse/src/parser/ty.rs
+++ b/compiler/rustc_parse/src/parser/ty.rs
@@ -1,5 +1,5 @@
 use rustc_ast::ptr::P;
-use rustc_ast::token::{self, BinOpToken, Delimiter, Token, TokenKind};
+use rustc_ast::token::{self, BinOpToken, Delimiter, IdentIsRaw, Token, TokenKind};
 use rustc_ast::util::case::Case;
 use rustc_ast::{
     self as ast, BareFnTy, BoundAsyncness, BoundConstness, BoundPolarity, FnRetTy, GenericBound,
@@ -1285,8 +1285,9 @@ impl<'a> Parser<'a> {
 
     /// Parses a single lifetime `'a` or panics.
     pub(super) fn expect_lifetime(&mut self) -> Lifetime {
-        if let Some(ident) = self.token.lifetime() {
-            if ident.without_first_quote().is_reserved()
+        if let Some((ident, is_raw)) = self.token.lifetime() {
+            if matches!(is_raw, IdentIsRaw::No)
+                && ident.without_first_quote().is_reserved()
                 && ![kw::UnderscoreLifetime, kw::StaticLifetime].contains(&ident.name)
             {
                 self.dcx().emit_err(errors::KeywordLifetime { span: ident.span });
author	Matthias Krüger <matthias.krueger@famsik.de>	2024-09-07 23:30:10 +0200
committer	GitHub <noreply@github.com>	2024-09-07 23:30:10 +0200
commit	ccf3f6e59d554c84ad654d04f03bc781497a743c (patch)
tree	15c7e56db5931f9ae24cfec6f1939521eb11a49b /compiler/rustc_parse/src
parent	ec867f03bcd6c39156ef13eb5f85bf4fb924ca29 (diff)
parent	b6a86bee87a8f54e132bb276fadc738c1afc1ef6 (diff)
download	rust-ccf3f6e59d554c84ad654d04f03bc781497a743c.tar.gz rust-ccf3f6e59d554c84ad654d04f03bc781497a743c.zip