diff options
| author | Matthias Krüger <matthias.krueger@famsik.de> | 2024-09-07 23:30:10 +0200 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-07 23:30:10 +0200 | 
| commit | ccf3f6e59d554c84ad654d04f03bc781497a743c (patch) | |
| tree | 15c7e56db5931f9ae24cfec6f1939521eb11a49b /compiler/rustc_parse/src | |
| parent | ec867f03bcd6c39156ef13eb5f85bf4fb924ca29 (diff) | |
| parent | b6a86bee87a8f54e132bb276fadc738c1afc1ef6 (diff) | |
| download | rust-ccf3f6e59d554c84ad654d04f03bc781497a743c.tar.gz rust-ccf3f6e59d554c84ad654d04f03bc781497a743c.zip | |
Rollup merge of #126452 - compiler-errors:raw-lifetimes, r=spastorino
Implement raw lifetimes and labels (`'r#ident`) This PR does two things: 1. Reserve lifetime prefixes, e.g. `'prefix#lt` in edition 2021. 2. Implements raw lifetimes, e.g. `'r#async` in edition 2021. This PR additionally extends the `keyword_idents_2024` lint to also check lifetimes. cc `@traviscross` r? parser
Diffstat (limited to 'compiler/rustc_parse/src')
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 90 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/expr.rs | 6 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/nonterminal.rs | 6 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/pat.rs | 6 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/ty.rs | 7 | 
6 files changed, 82 insertions, 35 deletions
| diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index f30939093c2..b7232ff21ca 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -13,7 +13,6 @@ use rustc_session::lint::builtin::{ }; use rustc_session::lint::BuiltinLintDiag; use rustc_session::parse::ParseSess; -use rustc_span::edition::Edition; use rustc_span::symbol::Symbol; use rustc_span::{BytePos, Pos, Span}; use tracing::debug; @@ -188,9 +187,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { preceded_by_whitespace = true; continue; } - rustc_lexer::TokenKind::Ident => { - self.ident(start) - } + rustc_lexer::TokenKind::Ident => self.ident(start), rustc_lexer::TokenKind::RawIdent => { let sym = nfc_normalize(self.str_from(start + BytePos(2))); let span = self.mk_sp(start, self.pos); @@ -205,20 +202,31 @@ impl<'psess, 'src> StringReader<'psess, 'src> { self.report_unknown_prefix(start); self.ident(start) } - rustc_lexer::TokenKind::InvalidIdent - | rustc_lexer::TokenKind::InvalidPrefix + rustc_lexer::TokenKind::UnknownPrefixLifetime => { + self.report_unknown_prefix(start); + // Include the leading `'` in the real identifier, for macro + // expansion purposes. See #12512 for the gory details of why + // this is necessary. + let lifetime_name = self.str_from(start); + self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident, IdentIsRaw::No) + } + rustc_lexer::TokenKind::InvalidIdent | rustc_lexer::TokenKind::InvalidPrefix // Do not recover an identifier with emoji if the codepoint is a confusable // with a recoverable substitution token, like `➖`. - if !UNICODE_ARRAY - .iter() - .any(|&(c, _, _)| { - let sym = self.str_from(start); - sym.chars().count() == 1 && c == sym.chars().next().unwrap() - }) => + if !UNICODE_ARRAY.iter().any(|&(c, _, _)| { + let sym = self.str_from(start); + sym.chars().count() == 1 && c == sym.chars().next().unwrap() + }) => { let sym = nfc_normalize(self.str_from(start)); let span = self.mk_sp(start, self.pos); - self.psess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default() + self.psess + .bad_unicode_identifiers + .borrow_mut() + .entry(sym) + .or_default() .push(span); token::Ident(sym, IdentIsRaw::No) } @@ -249,9 +257,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let suffix = if suffix_start < self.pos { let string = self.str_from(suffix_start); if string == "_" { - self - .dcx() - .emit_err(errors::UnderscoreLiteralSuffix { span: self.mk_sp(suffix_start, self.pos) }); + self.dcx().emit_err(errors::UnderscoreLiteralSuffix { + span: self.mk_sp(suffix_start, self.pos), + }); None } else { Some(Symbol::intern(string)) @@ -269,12 +277,47 @@ impl<'psess, 'src> StringReader<'psess, 'src> { self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); if starts_with_number { let span = self.mk_sp(start, self.pos); - self.dcx().struct_err("lifetimes cannot start with a number") + self.dcx() + .struct_err("lifetimes cannot start with a number") .with_span(span) .stash(span, StashKey::LifetimeIsChar); } let ident = Symbol::intern(lifetime_name); - token::Lifetime(ident) + token::Lifetime(ident, IdentIsRaw::No) + } + rustc_lexer::TokenKind::RawLifetime => { + self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); + + let ident_start = start + BytePos(3); + let prefix_span = self.mk_sp(start, ident_start); + + if prefix_span.at_least_rust_2021() { + let lifetime_name_without_tick = self.str_from(ident_start); + // Put the `'` back onto the lifetime name. + let mut lifetime_name = String::with_capacity(lifetime_name_without_tick.len() + 1); + lifetime_name.push('\''); + lifetime_name += lifetime_name_without_tick; + let sym = Symbol::intern(&lifetime_name); + + token::Lifetime(sym, IdentIsRaw::Yes) + } else { + // Otherwise, this should be parsed like `'r`. Warn about it though. + self.psess.buffer_lint( + RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, + prefix_span, + ast::CRATE_NODE_ID, + BuiltinLintDiag::RawPrefix(prefix_span), + ); + + // Reset the state so we just lex the `'r`. + let lt_start = start + BytePos(2); + self.pos = lt_start; + self.cursor = Cursor::new(&str_before[2 as usize..]); + + let lifetime_name = self.str_from(start); + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident, IdentIsRaw::No) + } } rustc_lexer::TokenKind::Semi => token::Semi, rustc_lexer::TokenKind::Comma => token::Comma, @@ -331,16 +374,19 @@ impl<'psess, 'src> StringReader<'psess, 'src> { // first remove compound tokens like `<<` from `rustc_lexer`, and then add // fancier error recovery to it, as there will be less overall work to do this // way. - let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1); + let (token, sugg) = + unicode_chars::check_for_substitution(self, start, c, repeats + 1); self.dcx().emit_err(errors::UnknownTokenStart { span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())), escaped: escaped_char(c), sugg, - null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None}, + null: if c == '\x00' { Some(errors::UnknownTokenNull) } else { None }, repeat: if repeats > 0 { swallow_next_invalid = repeats; Some(errors::UnknownTokenRepeat { repeats }) - } else {None} + } else { + None + }, }); if let Some(token) = token { @@ -699,7 +745,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let expn_data = prefix_span.ctxt().outer_expn_data(); - if expn_data.edition >= Edition::Edition2021 { + if expn_data.edition.at_least_rust_2021() { // In Rust 2021, this is a hard error. let sugg = if prefix == "rb" { Some(errors::UnknownPrefixSugg::UseBr(prefix_span)) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 84684e808d9..ecc4cd96faf 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -2050,7 +2050,7 @@ impl<'a> Parser<'a> { }; // On an error path, eagerly consider a lifetime to be an unclosed character lit, if that // makes sense. - if let Some(ident) = self.token.lifetime() + if let Some((ident, IdentIsRaw::No)) = self.token.lifetime() && could_be_unclosed_char_literal(ident) { let lt = self.expect_lifetime(); @@ -2925,9 +2925,9 @@ impl<'a> Parser<'a> { } pub(crate) fn eat_label(&mut self) -> Option<Label> { - if let Some(ident) = self.token.lifetime() { + if let Some((ident, is_raw)) = self.token.lifetime() { // Disallow `'fn`, but with a better error message than `expect_lifetime`. - if ident.without_first_quote().is_reserved() { + if matches!(is_raw, IdentIsRaw::No) && ident.without_first_quote().is_reserved() { self.dcx().emit_err(errors::InvalidLabel { span: ident.span, name: ident.name }); } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 61e3fa2e6c5..b90c41d9163 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -1666,7 +1666,7 @@ enum FlatToken { pub enum ParseNtResult { Tt(TokenTree), Ident(Ident, IdentIsRaw), - Lifetime(Ident), + Lifetime(Ident, IdentIsRaw), /// This case will eventually be removed, along with `Token::Interpolate`. Nt(Lrc<Nonterminal>), diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index e66d0df012b..44b169c881e 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -88,7 +88,7 @@ impl<'a> Parser<'a> { }, NonterminalKind::Pat(pat_kind) => token.can_begin_pattern(pat_kind), NonterminalKind::Lifetime => match &token.kind { - token::Lifetime(_) | token::NtLifetime(..) => true, + token::Lifetime(..) | token::NtLifetime(..) => true, _ => false, }, NonterminalKind::TT | NonterminalKind::Item | NonterminalKind::Stmt => { @@ -171,9 +171,9 @@ impl<'a> Parser<'a> { NonterminalKind::Lifetime => { // We want to keep `'keyword` parsing, just like `keyword` is still // an ident for nonterminal purposes. - return if let Some(ident) = self.token.lifetime() { + return if let Some((ident, is_raw)) = self.token.lifetime() { self.bump(); - Ok(ParseNtResult::Lifetime(ident)) + Ok(ParseNtResult::Lifetime(ident, is_raw)) } else { Err(self.dcx().create_err(UnexpectedNonterminal::Lifetime { span: self.token.span, diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index f87b5649654..cbd35ffdfa9 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -1,6 +1,6 @@ use rustc_ast::mut_visit::{walk_pat, MutVisitor}; use rustc_ast::ptr::P; -use rustc_ast::token::{self, BinOpToken, Delimiter, Token}; +use rustc_ast::token::{self, BinOpToken, Delimiter, IdentIsRaw, Token}; use rustc_ast::{ self as ast, AttrVec, BindingMode, ByRef, Expr, ExprKind, MacCall, Mutability, Pat, PatField, PatFieldsRest, PatKind, Path, QSelf, RangeEnd, RangeSyntax, @@ -548,7 +548,7 @@ impl<'a> Parser<'a> { None => PatKind::Path(qself, path), } } - } else if let Some(lt) = self.token.lifetime() + } else if let Some((lt, IdentIsRaw::No)) = self.token.lifetime() // In pattern position, we're totally fine with using "next token isn't colon" // as a heuristic. We could probably just always try to recover if it's a lifetime, // because we never have `'a: label {}` in a pattern position anyways, but it does @@ -689,7 +689,7 @@ impl<'a> Parser<'a> { /// Parse `&pat` / `&mut pat`. fn parse_pat_deref(&mut self, expected: Option<Expected>) -> PResult<'a, PatKind> { self.expect_and()?; - if let Some(lifetime) = self.token.lifetime() { + if let Some((lifetime, _)) = self.token.lifetime() { self.bump(); // `'a` self.dcx().emit_err(UnexpectedLifetimeInPattern { diff --git a/compiler/rustc_parse/src/parser/ty.rs b/compiler/rustc_parse/src/parser/ty.rs index 52f0b1c1b04..dd1cc75c7ff 100644 --- a/compiler/rustc_parse/src/parser/ty.rs +++ b/compiler/rustc_parse/src/parser/ty.rs @@ -1,5 +1,5 @@ use rustc_ast::ptr::P; -use rustc_ast::token::{self, BinOpToken, Delimiter, Token, TokenKind}; +use rustc_ast::token::{self, BinOpToken, Delimiter, IdentIsRaw, Token, TokenKind}; use rustc_ast::util::case::Case; use rustc_ast::{ self as ast, BareFnTy, BoundAsyncness, BoundConstness, BoundPolarity, FnRetTy, GenericBound, @@ -1285,8 +1285,9 @@ impl<'a> Parser<'a> { /// Parses a single lifetime `'a` or panics. pub(super) fn expect_lifetime(&mut self) -> Lifetime { - if let Some(ident) = self.token.lifetime() { - if ident.without_first_quote().is_reserved() + if let Some((ident, is_raw)) = self.token.lifetime() { + if matches!(is_raw, IdentIsRaw::No) + && ident.without_first_quote().is_reserved() && ![kw::UnderscoreLifetime, kw::StaticLifetime].contains(&ident.name) { self.dcx().emit_err(errors::KeywordLifetime { span: ident.span }); | 
